diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2020-07-26 19:36:28 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2020-07-26 19:36:28 +0000 |
commit | cfca06d7963fa0909f90483b42a6d7d194d01e08 (patch) | |
tree | 209fb2a2d68f8f277793fc8df46c753d31bc853b /compiler-rt/lib/scudo/standalone | |
parent | 706b4fc47bbc608932d3b491ae19a3b9cde9497b (diff) |
Notes
Diffstat (limited to 'compiler-rt/lib/scudo/standalone')
33 files changed, 1997 insertions, 501 deletions
diff --git a/compiler-rt/lib/scudo/standalone/allocator_config.h b/compiler-rt/lib/scudo/standalone/allocator_config.h index 3a5aaae73674..ad2a17ef7014 100644 --- a/compiler-rt/lib/scudo/standalone/allocator_config.h +++ b/compiler-rt/lib/scudo/standalone/allocator_config.h @@ -32,20 +32,23 @@ struct DefaultConfig { // 512KB regions typedef SizeClassAllocator32<SizeClassMap, 19U> Primary; #endif - typedef MapAllocator<> Secondary; + typedef MapAllocator<MapAllocatorCache<>> Secondary; template <class A> using TSDRegistryT = TSDRegistryExT<A>; // Exclusive }; struct AndroidConfig { using SizeClassMap = AndroidSizeClassMap; #if SCUDO_CAN_USE_PRIMARY64 - // 1GB regions - typedef SizeClassAllocator64<SizeClassMap, 30U> Primary; + // 256MB regions + typedef SizeClassAllocator64<SizeClassMap, 28U, 1000, 1000, + /*MaySupportMemoryTagging=*/true> + Primary; #else - // 512KB regions - typedef SizeClassAllocator32<SizeClassMap, 19U> Primary; + // 256KB regions + typedef SizeClassAllocator32<SizeClassMap, 18U, 1000, 1000> Primary; #endif - typedef MapAllocator<> Secondary; + // Cache blocks up to 2MB + typedef MapAllocator<MapAllocatorCache<32U, 2UL << 20, 0, 1000>> Secondary; template <class A> using TSDRegistryT = TSDRegistrySharedT<A, 2U>; // Shared, max 2 TSDs. }; @@ -53,13 +56,13 @@ struct AndroidConfig { struct AndroidSvelteConfig { using SizeClassMap = SvelteSizeClassMap; #if SCUDO_CAN_USE_PRIMARY64 - // 512MB regions - typedef SizeClassAllocator64<SizeClassMap, 29U> Primary; + // 128MB regions + typedef SizeClassAllocator64<SizeClassMap, 27U, 1000, 1000> Primary; #else // 64KB regions - typedef SizeClassAllocator32<SizeClassMap, 16U> Primary; + typedef SizeClassAllocator32<SizeClassMap, 16U, 1000, 1000> Primary; #endif - typedef MapAllocator<0U> Secondary; + typedef MapAllocator<MapAllocatorCache<4U, 1UL << 18, 0, 0>> Secondary; template <class A> using TSDRegistryT = TSDRegistrySharedT<A, 1U>; // Shared, only 1 TSD. }; @@ -68,7 +71,7 @@ struct AndroidSvelteConfig { struct FuchsiaConfig { // 1GB Regions typedef SizeClassAllocator64<DefaultSizeClassMap, 30U> Primary; - typedef MapAllocator<0U> Secondary; + typedef MapAllocator<MapAllocatorNoCache> Secondary; template <class A> using TSDRegistryT = TSDRegistrySharedT<A, 8U>; // Shared, max 8 TSDs. }; diff --git a/compiler-rt/lib/scudo/standalone/atomic_helpers.h b/compiler-rt/lib/scudo/standalone/atomic_helpers.h index 6c84ba86ed32..1ea1a86ae506 100644 --- a/compiler-rt/lib/scudo/standalone/atomic_helpers.h +++ b/compiler-rt/lib/scudo/standalone/atomic_helpers.h @@ -51,7 +51,7 @@ struct atomic_u32 { struct atomic_u64 { typedef u64 Type; // On 32-bit platforms u64 is not necessarily aligned on 8 bytes. - ALIGNED(8) volatile Type ValDoNotUse; + alignas(8) volatile Type ValDoNotUse; }; struct atomic_uptr { diff --git a/compiler-rt/lib/scudo/standalone/bytemap.h b/compiler-rt/lib/scudo/standalone/bytemap.h index a03a0c471062..e0d54f4e5971 100644 --- a/compiler-rt/lib/scudo/standalone/bytemap.h +++ b/compiler-rt/lib/scudo/standalone/bytemap.h @@ -17,12 +17,10 @@ namespace scudo { template <uptr Size> class FlatByteMap { public: - void initLinkerInitialized() { - Map = reinterpret_cast<u8 *>(map(nullptr, Size, "scudo:bytemap")); - } - void init() { initLinkerInitialized(); } + void initLinkerInitialized() {} + void init() { memset(Map, 0, sizeof(Map)); } - void unmapTestOnly() { unmap(reinterpret_cast<void *>(Map), Size); } + void unmapTestOnly() {} void set(uptr Index, u8 Value) { DCHECK_LT(Index, Size); @@ -38,78 +36,7 @@ public: void enable() {} private: - u8 *Map; -}; - -template <uptr Level1Size, uptr Level2Size> class TwoLevelByteMap { -public: - void initLinkerInitialized() { - Level1Map = reinterpret_cast<atomic_uptr *>( - map(nullptr, sizeof(atomic_uptr) * Level1Size, "scudo:bytemap")); - } - void init() { - Mutex.init(); - initLinkerInitialized(); - } - - void reset() { - for (uptr I = 0; I < Level1Size; I++) { - u8 *P = get(I); - if (!P) - continue; - unmap(P, Level2Size); - } - memset(Level1Map, 0, sizeof(atomic_uptr) * Level1Size); - } - - void unmapTestOnly() { - reset(); - unmap(reinterpret_cast<void *>(Level1Map), - sizeof(atomic_uptr) * Level1Size); - } - - uptr size() const { return Level1Size * Level2Size; } - - void set(uptr Index, u8 Value) { - DCHECK_LT(Index, Level1Size * Level2Size); - u8 *Level2Map = getOrCreate(Index / Level2Size); - DCHECK_EQ(0U, Level2Map[Index % Level2Size]); - Level2Map[Index % Level2Size] = Value; - } - - u8 operator[](uptr Index) const { - DCHECK_LT(Index, Level1Size * Level2Size); - u8 *Level2Map = get(Index / Level2Size); - if (!Level2Map) - return 0; - return Level2Map[Index % Level2Size]; - } - - void disable() { Mutex.lock(); } - void enable() { Mutex.unlock(); } - -private: - u8 *get(uptr Index) const { - DCHECK_LT(Index, Level1Size); - return reinterpret_cast<u8 *>( - atomic_load(&Level1Map[Index], memory_order_acquire)); - } - - u8 *getOrCreate(uptr Index) { - u8 *Res = get(Index); - if (!Res) { - ScopedLock L(Mutex); - if (!(Res = get(Index))) { - Res = reinterpret_cast<u8 *>(map(nullptr, Level2Size, "scudo:bytemap")); - atomic_store(&Level1Map[Index], reinterpret_cast<uptr>(Res), - memory_order_release); - } - } - return Res; - } - - atomic_uptr *Level1Map; - HybridMutex Mutex; + u8 Map[Size]; }; } // namespace scudo diff --git a/compiler-rt/lib/scudo/standalone/checksum.cpp b/compiler-rt/lib/scudo/standalone/checksum.cpp index 5de049a0931b..05d4ba54bfc8 100644 --- a/compiler-rt/lib/scudo/standalone/checksum.cpp +++ b/compiler-rt/lib/scudo/standalone/checksum.cpp @@ -31,6 +31,13 @@ Checksum HashAlgorithm = {Checksum::BSD}; #define bit_SSE4_2 bit_SSE42 // clang and gcc have different defines. #endif +#ifndef signature_HYGON_ebx // They are not defined in gcc. +// HYGON: "HygonGenuine". +#define signature_HYGON_ebx 0x6f677948 +#define signature_HYGON_edx 0x6e65476e +#define signature_HYGON_ecx 0x656e6975 +#endif + bool hasHardwareCRC32() { u32 Eax, Ebx = 0, Ecx = 0, Edx = 0; __get_cpuid(0, &Eax, &Ebx, &Ecx, &Edx); @@ -39,7 +46,10 @@ bool hasHardwareCRC32() { (Ecx == signature_INTEL_ecx); const bool IsAMD = (Ebx == signature_AMD_ebx) && (Edx == signature_AMD_edx) && (Ecx == signature_AMD_ecx); - if (!IsIntel && !IsAMD) + const bool IsHygon = (Ebx == signature_HYGON_ebx) && + (Edx == signature_HYGON_edx) && + (Ecx == signature_HYGON_ecx); + if (!IsIntel && !IsAMD && !IsHygon) return false; __get_cpuid(1, &Eax, &Ebx, &Ecx, &Edx); return !!(Ecx & bit_SSE4_2); diff --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h index a0b4b2973e96..3bb41eca88f7 100644 --- a/compiler-rt/lib/scudo/standalone/combined.h +++ b/compiler-rt/lib/scudo/standalone/combined.h @@ -13,28 +13,36 @@ #include "common.h" #include "flags.h" #include "flags_parser.h" -#include "interface.h" #include "local_cache.h" +#include "memtag.h" #include "quarantine.h" #include "report.h" #include "secondary.h" +#include "stack_depot.h" #include "string_utils.h" #include "tsd.h" +#include "scudo/interface.h" + #ifdef GWP_ASAN_HOOKS #include "gwp_asan/guarded_pool_allocator.h" -// GWP-ASan is declared here in order to avoid indirect call overhead. It's also -// instantiated outside of the Allocator class, as the allocator is only -// zero-initialised. GWP-ASan requires constant initialisation, and the Scudo -// allocator doesn't have a constexpr constructor (see discussion here: -// https://reviews.llvm.org/D69265#inline-624315). -static gwp_asan::GuardedPoolAllocator GuardedAlloc; +#include "gwp_asan/optional/backtrace.h" +#include "gwp_asan/optional/segv_handler.h" #endif // GWP_ASAN_HOOKS extern "C" inline void EmptyCallback() {} +#ifdef HAVE_ANDROID_UNSAFE_FRAME_POINTER_CHASE +// This function is not part of the NDK so it does not appear in any public +// header files. We only declare/use it when targeting the platform. +extern "C" size_t android_unsafe_frame_pointer_chase(scudo::uptr *buf, + size_t num_entries); +#endif + namespace scudo { +enum class Option { ReleaseInterval }; + template <class Params, void (*PostInitCallback)(void) = EmptyCallback> class Allocator { public: @@ -139,20 +147,29 @@ public: // Store some flags locally. Options.MayReturnNull = getFlags()->may_return_null; - Options.ZeroContents = getFlags()->zero_contents; + Options.FillContents = + getFlags()->zero_contents + ? ZeroFill + : (getFlags()->pattern_fill_contents ? PatternOrZeroFill : NoFill); Options.DeallocTypeMismatch = getFlags()->dealloc_type_mismatch; Options.DeleteSizeMismatch = getFlags()->delete_size_mismatch; + Options.TrackAllocationStacks = false; Options.QuarantineMaxChunkSize = static_cast<u32>(getFlags()->quarantine_max_chunk_size); Stats.initLinkerInitialized(); - Primary.initLinkerInitialized(getFlags()->release_to_os_interval_ms); - Secondary.initLinkerInitialized(&Stats); + const s32 ReleaseToOsIntervalMs = getFlags()->release_to_os_interval_ms; + Primary.initLinkerInitialized(ReleaseToOsIntervalMs); + Secondary.initLinkerInitialized(&Stats, ReleaseToOsIntervalMs); Quarantine.init( static_cast<uptr>(getFlags()->quarantine_size_kb << 10), static_cast<uptr>(getFlags()->thread_local_quarantine_size_kb << 10)); + } + // Initialize the embedded GWP-ASan instance. Requires the main allocator to + // be functional, best called from PostInitCallback. + void initGwpAsan() { #ifdef GWP_ASAN_HOOKS gwp_asan::options::Options Opt; Opt.Enabled = getFlags()->GWP_ASAN_Enabled; @@ -165,8 +182,17 @@ public: getFlags()->GWP_ASAN_MaxSimultaneousAllocations; Opt.SampleRate = getFlags()->GWP_ASAN_SampleRate; Opt.InstallSignalHandlers = getFlags()->GWP_ASAN_InstallSignalHandlers; - Opt.Printf = Printf; + // Embedded GWP-ASan is locked through the Scudo atfork handler (via + // Allocator::disable calling GWPASan.disable). Disable GWP-ASan's atfork + // handler. + Opt.InstallForkHandlers = false; + Opt.Backtrace = gwp_asan::options::getBacktraceFunction(); GuardedAlloc.init(Opt); + + if (Opt.InstallSignalHandlers) + gwp_asan::crash_handler::installSignalHandlers( + &GuardedAlloc, Printf, gwp_asan::options::getPrintBacktraceFunction(), + Opt.Backtrace); #endif // GWP_ASAN_HOOKS } @@ -175,6 +201,11 @@ public: void unmapTestOnly() { TSDRegistry.unmapTestOnly(); Primary.unmapTestOnly(); +#ifdef GWP_ASAN_HOOKS + if (getFlags()->GWP_ASAN_InstallSignalHandlers) + gwp_asan::crash_handler::uninstallSignalHandlers(); + GuardedAlloc.uninitTestOnly(); +#endif // GWP_ASAN_HOOKS } TSDRegistryT *getTSDRegistry() { return &TSDRegistry; } @@ -195,6 +226,27 @@ public: TSD->Cache.destroy(&Stats); } + ALWAYS_INLINE void *untagPointerMaybe(void *Ptr) { + if (Primary.SupportsMemoryTagging) + return reinterpret_cast<void *>( + untagPointer(reinterpret_cast<uptr>(Ptr))); + return Ptr; + } + + NOINLINE u32 collectStackTrace() { +#ifdef HAVE_ANDROID_UNSAFE_FRAME_POINTER_CHASE + // Discard collectStackTrace() frame and allocator function frame. + constexpr uptr DiscardFrames = 2; + uptr Stack[MaxTraceSize + DiscardFrames]; + uptr Size = + android_unsafe_frame_pointer_chase(Stack, MaxTraceSize + DiscardFrames); + Size = Min<uptr>(Size, MaxTraceSize + DiscardFrames); + return Depot.insert(Stack + Min<uptr>(DiscardFrames, Size), Stack + Size); +#else + return 0; +#endif + } + NOINLINE void *allocate(uptr Size, Chunk::Origin Origin, uptr Alignment = MinAlignment, bool ZeroContents = false) { @@ -207,7 +259,8 @@ public: } #endif // GWP_ASAN_HOOKS - ZeroContents |= static_cast<bool>(Options.ZeroContents); + FillContentsMode FillContents = + ZeroContents ? ZeroFill : Options.FillContents; if (UNLIKELY(Alignment > MaxAlignment)) { if (Options.MayReturnNull) @@ -235,22 +288,36 @@ public: } DCHECK_LE(Size, NeededSize); - void *Block; - uptr ClassId; - uptr BlockEnd; + void *Block = nullptr; + uptr ClassId = 0; + uptr SecondaryBlockEnd; if (LIKELY(PrimaryT::canAllocate(NeededSize))) { ClassId = SizeClassMap::getClassIdBySize(NeededSize); DCHECK_NE(ClassId, 0U); bool UnlockRequired; auto *TSD = TSDRegistry.getTSDAndLock(&UnlockRequired); Block = TSD->Cache.allocate(ClassId); + // If the allocation failed, the most likely reason with a 32-bit primary + // is the region being full. In that event, retry in each successively + // larger class until it fits. If it fails to fit in the largest class, + // fallback to the Secondary. + if (UNLIKELY(!Block)) { + while (ClassId < SizeClassMap::LargestClassId) { + Block = TSD->Cache.allocate(++ClassId); + if (LIKELY(Block)) { + break; + } + } + if (UNLIKELY(!Block)) { + ClassId = 0; + } + } if (UnlockRequired) TSD->unlock(); - } else { - ClassId = 0; - Block = - Secondary.allocate(NeededSize, Alignment, &BlockEnd, ZeroContents); } + if (UNLIKELY(ClassId == 0)) + Block = Secondary.allocate(NeededSize, Alignment, &SecondaryBlockEnd, + FillContents); if (UNLIKELY(!Block)) { if (Options.MayReturnNull) @@ -258,16 +325,88 @@ public: reportOutOfMemory(NeededSize); } - // We only need to zero the contents for Primary backed allocations. This - // condition is not necessarily unlikely, but since memset is costly, we - // might as well mark it as such. - if (UNLIKELY(ZeroContents && ClassId)) - memset(Block, 0, PrimaryT::getSizeByClassId(ClassId)); - - const uptr UnalignedUserPtr = - reinterpret_cast<uptr>(Block) + Chunk::getHeaderSize(); + const uptr BlockUptr = reinterpret_cast<uptr>(Block); + const uptr UnalignedUserPtr = BlockUptr + Chunk::getHeaderSize(); const uptr UserPtr = roundUpTo(UnalignedUserPtr, Alignment); + void *Ptr = reinterpret_cast<void *>(UserPtr); + void *TaggedPtr = Ptr; + if (ClassId) { + // We only need to zero or tag the contents for Primary backed + // allocations. We only set tags for primary allocations in order to avoid + // faulting potentially large numbers of pages for large secondary + // allocations. We assume that guard pages are enough to protect these + // allocations. + // + // FIXME: When the kernel provides a way to set the background tag of a + // mapping, we should be able to tag secondary allocations as well. + // + // When memory tagging is enabled, zeroing the contents is done as part of + // setting the tag. + if (UNLIKELY(useMemoryTagging())) { + uptr PrevUserPtr; + Chunk::UnpackedHeader Header; + const uptr BlockEnd = BlockUptr + PrimaryT::getSizeByClassId(ClassId); + // If possible, try to reuse the UAF tag that was set by deallocate(). + // For simplicity, only reuse tags if we have the same start address as + // the previous allocation. This handles the majority of cases since + // most allocations will not be more aligned than the minimum alignment. + // + // We need to handle situations involving reclaimed chunks, and retag + // the reclaimed portions if necessary. In the case where the chunk is + // fully reclaimed, the chunk's header will be zero, which will trigger + // the code path for new mappings and invalid chunks that prepares the + // chunk from scratch. There are three possibilities for partial + // reclaiming: + // + // (1) Header was reclaimed, data was partially reclaimed. + // (2) Header was not reclaimed, all data was reclaimed (e.g. because + // data started on a page boundary). + // (3) Header was not reclaimed, data was partially reclaimed. + // + // Case (1) will be handled in the same way as for full reclaiming, + // since the header will be zero. + // + // We can detect case (2) by loading the tag from the start + // of the chunk. If it is zero, it means that either all data was + // reclaimed (since we never use zero as the chunk tag), or that the + // previous allocation was of size zero. Either way, we need to prepare + // a new chunk from scratch. + // + // We can detect case (3) by moving to the next page (if covered by the + // chunk) and loading the tag of its first granule. If it is zero, it + // means that all following pages may need to be retagged. On the other + // hand, if it is nonzero, we can assume that all following pages are + // still tagged, according to the logic that if any of the pages + // following the next page were reclaimed, the next page would have been + // reclaimed as well. + uptr TaggedUserPtr; + if (getChunkFromBlock(BlockUptr, &PrevUserPtr, &Header) && + PrevUserPtr == UserPtr && + (TaggedUserPtr = loadTag(UserPtr)) != UserPtr) { + uptr PrevEnd = TaggedUserPtr + Header.SizeOrUnusedBytes; + const uptr NextPage = roundUpTo(TaggedUserPtr, getPageSizeCached()); + if (NextPage < PrevEnd && loadTag(NextPage) != NextPage) + PrevEnd = NextPage; + TaggedPtr = reinterpret_cast<void *>(TaggedUserPtr); + resizeTaggedChunk(PrevEnd, TaggedUserPtr + Size, BlockEnd); + if (Size) { + // Clear any stack metadata that may have previously been stored in + // the chunk data. + memset(TaggedPtr, 0, archMemoryTagGranuleSize()); + } + } else { + TaggedPtr = prepareTaggedChunk(Ptr, Size, BlockEnd); + } + storeAllocationStackMaybe(Ptr); + } else if (UNLIKELY(FillContents != NoFill)) { + // This condition is not necessarily unlikely, but since memset is + // costly, we might as well mark it as such. + memset(Block, FillContents == ZeroFill ? 0 : PatternFillByte, + PrimaryT::getSizeByClassId(ClassId)); + } + } + Chunk::UnpackedHeader Header = {}; if (UNLIKELY(UnalignedUserPtr != UserPtr)) { const uptr Offset = UserPtr - UnalignedUserPtr; @@ -283,15 +422,15 @@ public: Header.ClassId = ClassId & Chunk::ClassIdMask; Header.State = Chunk::State::Allocated; Header.Origin = Origin & Chunk::OriginMask; - Header.SizeOrUnusedBytes = (ClassId ? Size : BlockEnd - (UserPtr + Size)) & - Chunk::SizeOrUnusedBytesMask; - void *Ptr = reinterpret_cast<void *>(UserPtr); + Header.SizeOrUnusedBytes = + (ClassId ? Size : SecondaryBlockEnd - (UserPtr + Size)) & + Chunk::SizeOrUnusedBytesMask; Chunk::storeHeader(Cookie, Ptr, &Header); if (&__scudo_allocate_hook) - __scudo_allocate_hook(Ptr, Size); + __scudo_allocate_hook(TaggedPtr, Size); - return Ptr; + return TaggedPtr; } NOINLINE void deallocate(void *Ptr, Chunk::Origin Origin, uptr DeleteSize = 0, @@ -319,6 +458,8 @@ public: if (UNLIKELY(!isAligned(reinterpret_cast<uptr>(Ptr), MinAlignment))) reportMisalignedPointer(AllocatorAction::Deallocating, Ptr); + Ptr = untagPointerMaybe(Ptr); + Chunk::UnpackedHeader Header; Chunk::loadHeader(Cookie, Ptr, &Header); @@ -346,6 +487,15 @@ public: void *reallocate(void *OldPtr, uptr NewSize, uptr Alignment = MinAlignment) { initThreadMaybe(); + if (UNLIKELY(NewSize >= MaxAllowedMallocSize)) { + if (Options.MayReturnNull) + return nullptr; + reportAllocationSizeTooBig(NewSize, 0, MaxAllowedMallocSize); + } + + void *OldTaggedPtr = OldPtr; + OldPtr = untagPointerMaybe(OldPtr); + // The following cases are handled by the C wrappers. DCHECK_NE(OldPtr, nullptr); DCHECK_NE(NewSize, 0); @@ -396,16 +546,20 @@ public: // reasonable delta), we just keep the old block, and update the chunk // header to reflect the size change. if (reinterpret_cast<uptr>(OldPtr) + NewSize <= BlockEnd) { - const uptr Delta = - OldSize < NewSize ? NewSize - OldSize : OldSize - NewSize; - if (Delta <= SizeClassMap::MaxSize / 2) { + if (NewSize > OldSize || (OldSize - NewSize) < getPageSizeCached()) { Chunk::UnpackedHeader NewHeader = OldHeader; NewHeader.SizeOrUnusedBytes = (ClassId ? NewSize : BlockEnd - (reinterpret_cast<uptr>(OldPtr) + NewSize)) & Chunk::SizeOrUnusedBytesMask; Chunk::compareExchangeHeader(Cookie, OldPtr, &NewHeader, &OldHeader); - return OldPtr; + if (UNLIKELY(ClassId && useMemoryTagging())) { + resizeTaggedChunk(reinterpret_cast<uptr>(OldTaggedPtr) + OldSize, + reinterpret_cast<uptr>(OldTaggedPtr) + NewSize, + BlockEnd); + storeAllocationStackMaybe(OldPtr); + } + return OldTaggedPtr; } } @@ -416,7 +570,7 @@ public: void *NewPtr = allocate(NewSize, Chunk::Origin::Malloc, Alignment); if (NewPtr) { const uptr OldSize = getSize(OldPtr, &OldHeader); - memcpy(NewPtr, OldPtr, Min(NewSize, OldSize)); + memcpy(NewPtr, OldTaggedPtr, Min(NewSize, OldSize)); quarantineOrDeallocateChunk(OldPtr, &OldHeader, OldSize); } return NewPtr; @@ -427,6 +581,9 @@ public: // this function finishes. We will revisit that later. void disable() { initThreadMaybe(); +#ifdef GWP_ASAN_HOOKS + GuardedAlloc.disable(); +#endif TSDRegistry.disable(); Stats.disable(); Quarantine.disable(); @@ -441,6 +598,9 @@ public: Quarantine.enable(); Stats.enable(); TSDRegistry.enable(); +#ifdef GWP_ASAN_HOOKS + GuardedAlloc.enable(); +#endif } // The function returns the amount of bytes required to store the statistics, @@ -473,6 +633,7 @@ public: void releaseToOS() { initThreadMaybe(); Primary.releaseToOS(); + Secondary.releaseToOS(); } // Iterate over all chunks and call a callback for all busy chunks located @@ -489,11 +650,19 @@ public: uptr Chunk; Chunk::UnpackedHeader Header; if (getChunkFromBlock(Block, &Chunk, &Header) && - Header.State == Chunk::State::Allocated) - Callback(Chunk, getSize(reinterpret_cast<void *>(Chunk), &Header), Arg); + Header.State == Chunk::State::Allocated) { + uptr TaggedChunk = Chunk; + if (useMemoryTagging()) + TaggedChunk = loadTag(Chunk); + Callback(TaggedChunk, getSize(reinterpret_cast<void *>(Chunk), &Header), + Arg); + } }; Primary.iterateOverBlocks(Lambda); Secondary.iterateOverBlocks(Lambda); +#ifdef GWP_ASAN_HOOKS + GuardedAlloc.iterate(reinterpret_cast<void *>(Base), Size, Callback, Arg); +#endif } bool canReturnNull() { @@ -501,8 +670,14 @@ public: return Options.MayReturnNull; } - // TODO(kostyak): implement this as a "backend" to mallopt. - bool setOption(UNUSED uptr Option, UNUSED uptr Value) { return false; } + bool setOption(Option O, sptr Value) { + if (O == Option::ReleaseInterval) { + Primary.setReleaseToOsIntervalMs(static_cast<s32>(Value)); + Secondary.setReleaseToOsIntervalMs(static_cast<s32>(Value)); + return true; + } + return false; + } // Return the usable size for a given chunk. Technically we lie, as we just // report the actual size of a chunk. This is done to counteract code actively @@ -519,6 +694,7 @@ public: return GuardedAlloc.getSize(Ptr); #endif // GWP_ASAN_HOOKS + Ptr = untagPointerMaybe(const_cast<void *>(Ptr)); Chunk::UnpackedHeader Header; Chunk::loadHeader(Cookie, Ptr, &Header); // Getting the usable size of a chunk only makes sense if it's allocated. @@ -543,11 +719,151 @@ public: #endif // GWP_ASAN_HOOKS if (!Ptr || !isAligned(reinterpret_cast<uptr>(Ptr), MinAlignment)) return false; + Ptr = untagPointerMaybe(const_cast<void *>(Ptr)); Chunk::UnpackedHeader Header; return Chunk::isValid(Cookie, Ptr, &Header) && Header.State == Chunk::State::Allocated; } + bool useMemoryTagging() { return Primary.useMemoryTagging(); } + + void disableMemoryTagging() { Primary.disableMemoryTagging(); } + + void setTrackAllocationStacks(bool Track) { + initThreadMaybe(); + Options.TrackAllocationStacks = Track; + } + + void setFillContents(FillContentsMode FillContents) { + initThreadMaybe(); + Options.FillContents = FillContents; + } + + const char *getStackDepotAddress() const { + return reinterpret_cast<const char *>(&Depot); + } + + const char *getRegionInfoArrayAddress() const { + return Primary.getRegionInfoArrayAddress(); + } + + static uptr getRegionInfoArraySize() { + return PrimaryT::getRegionInfoArraySize(); + } + + static void getErrorInfo(struct scudo_error_info *ErrorInfo, + uintptr_t FaultAddr, const char *DepotPtr, + const char *RegionInfoPtr, const char *Memory, + const char *MemoryTags, uintptr_t MemoryAddr, + size_t MemorySize) { + *ErrorInfo = {}; + if (!PrimaryT::SupportsMemoryTagging || + MemoryAddr + MemorySize < MemoryAddr) + return; + + uptr UntaggedFaultAddr = untagPointer(FaultAddr); + u8 FaultAddrTag = extractTag(FaultAddr); + BlockInfo Info = + PrimaryT::findNearestBlock(RegionInfoPtr, UntaggedFaultAddr); + + auto GetGranule = [&](uptr Addr, const char **Data, uint8_t *Tag) -> bool { + if (Addr < MemoryAddr || + Addr + archMemoryTagGranuleSize() < Addr || + Addr + archMemoryTagGranuleSize() > MemoryAddr + MemorySize) + return false; + *Data = &Memory[Addr - MemoryAddr]; + *Tag = static_cast<u8>( + MemoryTags[(Addr - MemoryAddr) / archMemoryTagGranuleSize()]); + return true; + }; + + auto ReadBlock = [&](uptr Addr, uptr *ChunkAddr, + Chunk::UnpackedHeader *Header, const u32 **Data, + u8 *Tag) { + const char *BlockBegin; + u8 BlockBeginTag; + if (!GetGranule(Addr, &BlockBegin, &BlockBeginTag)) + return false; + uptr ChunkOffset = getChunkOffsetFromBlock(BlockBegin); + *ChunkAddr = Addr + ChunkOffset; + + const char *ChunkBegin; + if (!GetGranule(*ChunkAddr, &ChunkBegin, Tag)) + return false; + *Header = *reinterpret_cast<const Chunk::UnpackedHeader *>( + ChunkBegin - Chunk::getHeaderSize()); + *Data = reinterpret_cast<const u32 *>(ChunkBegin); + return true; + }; + + auto *Depot = reinterpret_cast<const StackDepot *>(DepotPtr); + + auto MaybeCollectTrace = [&](uintptr_t(&Trace)[MaxTraceSize], u32 Hash) { + uptr RingPos, Size; + if (!Depot->find(Hash, &RingPos, &Size)) + return; + for (unsigned I = 0; I != Size && I != MaxTraceSize; ++I) + Trace[I] = (*Depot)[RingPos + I]; + }; + + size_t NextErrorReport = 0; + + // First, check for UAF. + { + uptr ChunkAddr; + Chunk::UnpackedHeader Header; + const u32 *Data; + uint8_t Tag; + if (ReadBlock(Info.BlockBegin, &ChunkAddr, &Header, &Data, &Tag) && + Header.State != Chunk::State::Allocated && + Data[MemTagPrevTagIndex] == FaultAddrTag) { + auto *R = &ErrorInfo->reports[NextErrorReport++]; + R->error_type = USE_AFTER_FREE; + R->allocation_address = ChunkAddr; + R->allocation_size = Header.SizeOrUnusedBytes; + MaybeCollectTrace(R->allocation_trace, + Data[MemTagAllocationTraceIndex]); + R->allocation_tid = Data[MemTagAllocationTidIndex]; + MaybeCollectTrace(R->deallocation_trace, + Data[MemTagDeallocationTraceIndex]); + R->deallocation_tid = Data[MemTagDeallocationTidIndex]; + } + } + + auto CheckOOB = [&](uptr BlockAddr) { + if (BlockAddr < Info.RegionBegin || BlockAddr >= Info.RegionEnd) + return false; + + uptr ChunkAddr; + Chunk::UnpackedHeader Header; + const u32 *Data; + uint8_t Tag; + if (!ReadBlock(BlockAddr, &ChunkAddr, &Header, &Data, &Tag) || + Header.State != Chunk::State::Allocated || Tag != FaultAddrTag) + return false; + + auto *R = &ErrorInfo->reports[NextErrorReport++]; + R->error_type = + UntaggedFaultAddr < ChunkAddr ? BUFFER_UNDERFLOW : BUFFER_OVERFLOW; + R->allocation_address = ChunkAddr; + R->allocation_size = Header.SizeOrUnusedBytes; + MaybeCollectTrace(R->allocation_trace, Data[MemTagAllocationTraceIndex]); + R->allocation_tid = Data[MemTagAllocationTidIndex]; + return NextErrorReport == + sizeof(ErrorInfo->reports) / sizeof(ErrorInfo->reports[0]); + }; + + if (CheckOOB(Info.BlockBegin)) + return; + + // Check for OOB in the 30 surrounding blocks. Beyond that we are likely to + // hit false positives. + for (int I = 1; I != 16; ++I) + if (CheckOOB(Info.BlockBegin + I * Info.BlockSize) || + CheckOOB(Info.BlockBegin - I * Info.BlockSize)) + return; + } + private: using SecondaryT = typename Params::Secondary; typedef typename PrimaryT::SizeClassMap SizeClassMap; @@ -561,9 +877,32 @@ private: static_assert(MinAlignment >= sizeof(Chunk::PackedHeader), "Minimal alignment must at least cover a chunk header."); + static_assert(!PrimaryT::SupportsMemoryTagging || + MinAlignment >= archMemoryTagGranuleSize(), + ""); static const u32 BlockMarker = 0x44554353U; + // These are indexes into an "array" of 32-bit values that store information + // inline with a chunk that is relevant to diagnosing memory tag faults, where + // 0 corresponds to the address of the user memory. This means that negative + // indexes may be used to store information about allocations, while positive + // indexes may only be used to store information about deallocations, because + // the user memory is in use until it has been deallocated. The smallest index + // that may be used is -2, which corresponds to 8 bytes before the user + // memory, because the chunk header size is 8 bytes and in allocators that + // support memory tagging the minimum alignment is at least the tag granule + // size (16 on aarch64), and the largest index that may be used is 3 because + // we are only guaranteed to have at least a granule's worth of space in the + // user memory. + static const sptr MemTagAllocationTraceIndex = -2; + static const sptr MemTagAllocationTidIndex = -1; + static const sptr MemTagDeallocationTraceIndex = 0; + static const sptr MemTagDeallocationTidIndex = 1; + static const sptr MemTagPrevTagIndex = 2; + + static const uptr MaxTraceSize = 64; + GlobalStats Stats; TSDRegistryT TSDRegistry; PrimaryT Primary; @@ -574,12 +913,19 @@ private: struct { u8 MayReturnNull : 1; // may_return_null - u8 ZeroContents : 1; // zero_contents + FillContentsMode FillContents : 2; // zero_contents, pattern_fill_contents u8 DeallocTypeMismatch : 1; // dealloc_type_mismatch u8 DeleteSizeMismatch : 1; // delete_size_mismatch + u8 TrackAllocationStacks : 1; u32 QuarantineMaxChunkSize; // quarantine_max_chunk_size } Options; +#ifdef GWP_ASAN_HOOKS + gwp_asan::GuardedPoolAllocator GuardedAlloc; +#endif // GWP_ASAN_HOOKS + + StackDepot Depot; + // The following might get optimized out by the compiler. NOINLINE void performSanityChecks() { // Verify that the header offset field can hold the maximum offset. In the @@ -638,6 +984,14 @@ private: void quarantineOrDeallocateChunk(void *Ptr, Chunk::UnpackedHeader *Header, uptr Size) { Chunk::UnpackedHeader NewHeader = *Header; + if (UNLIKELY(NewHeader.ClassId && useMemoryTagging())) { + u8 PrevTag = extractTag(loadTag(reinterpret_cast<uptr>(Ptr))); + uptr TaggedBegin, TaggedEnd; + // Exclude the previous tag so that immediate use after free is detected + // 100% of the time. + setRandomTag(Ptr, Size, 1UL << PrevTag, &TaggedBegin, &TaggedEnd); + storeDeallocationStackMaybe(Ptr, PrevTag); + } // If the quarantine is disabled, the actual size of a chunk is 0 or larger // than the maximum allowed, we return a chunk directly to the backend. // Logical Or can be short-circuited, which introduces unnecessary @@ -672,13 +1026,39 @@ private: bool getChunkFromBlock(uptr Block, uptr *Chunk, Chunk::UnpackedHeader *Header) { - u32 Offset = 0; - if (reinterpret_cast<u32 *>(Block)[0] == BlockMarker) - Offset = reinterpret_cast<u32 *>(Block)[1]; - *Chunk = Block + Offset + Chunk::getHeaderSize(); + *Chunk = + Block + getChunkOffsetFromBlock(reinterpret_cast<const char *>(Block)); return Chunk::isValid(Cookie, reinterpret_cast<void *>(*Chunk), Header); } + static uptr getChunkOffsetFromBlock(const char *Block) { + u32 Offset = 0; + if (reinterpret_cast<const u32 *>(Block)[0] == BlockMarker) + Offset = reinterpret_cast<const u32 *>(Block)[1]; + return Offset + Chunk::getHeaderSize(); + } + + void storeAllocationStackMaybe(void *Ptr) { + if (!UNLIKELY(Options.TrackAllocationStacks)) + return; + auto *Ptr32 = reinterpret_cast<u32 *>(Ptr); + Ptr32[MemTagAllocationTraceIndex] = collectStackTrace(); + Ptr32[MemTagAllocationTidIndex] = getThreadID(); + } + + void storeDeallocationStackMaybe(void *Ptr, uint8_t PrevTag) { + if (!UNLIKELY(Options.TrackAllocationStacks)) + return; + + // Disable tag checks here so that we don't need to worry about zero sized + // allocations. + ScopedDisableMemoryTagChecks x; + auto *Ptr32 = reinterpret_cast<u32 *>(Ptr); + Ptr32[MemTagDeallocationTraceIndex] = collectStackTrace(); + Ptr32[MemTagDeallocationTidIndex] = getThreadID(); + Ptr32[MemTagPrevTagIndex] = PrevTag; + } + uptr getStats(ScopedString *Str) { Primary.getStats(Str); Secondary.getStats(Str); diff --git a/compiler-rt/lib/scudo/standalone/common.h b/compiler-rt/lib/scudo/standalone/common.h index a76eb6bbc164..9037f92b4976 100644 --- a/compiler-rt/lib/scudo/standalone/common.h +++ b/compiler-rt/lib/scudo/standalone/common.h @@ -126,12 +126,15 @@ inline uptr getPageSizeCached() { return getPageSizeSlow(); } +// Returns 0 if the number of CPUs could not be determined. u32 getNumberOfCPUs(); const char *getEnv(const char *Name); u64 getMonotonicTime(); +u32 getThreadID(); + // Our randomness gathering function is limited to 256 bytes to ensure we get // as many bytes as requested, and avoid interruptions (on Linux). constexpr uptr MaxRandomLength = 256U; @@ -142,6 +145,7 @@ bool getRandom(void *Buffer, uptr Length, bool Blocking = false); #define MAP_ALLOWNOMEM (1U << 0) #define MAP_NOACCESS (1U << 1) #define MAP_RESIZABLE (1U << 2) +#define MAP_MEMTAG (1U << 3) // Our platform memory mapping use is restricted to 3 scenarios: // - reserve memory at a random address (MAP_NOACCESS); @@ -171,6 +175,22 @@ void NORETURN dieOnMapUnmapError(bool OutOfMemory = false); void setAbortMessage(const char *Message); +struct BlockInfo { + uptr BlockBegin; + uptr BlockSize; + uptr RegionBegin; + uptr RegionEnd; +}; + +constexpr unsigned char PatternFillByte = 0xAB; + +enum FillContentsMode { + NoFill = 0, + ZeroFill = 1, + PatternOrZeroFill = 2 // Pattern fill unless the memory is known to be + // zero-initialized already. +}; + } // namespace scudo #endif // SCUDO_COMMON_H_ diff --git a/compiler-rt/lib/scudo/standalone/flags.cpp b/compiler-rt/lib/scudo/standalone/flags.cpp index dd9f050a2d20..de5153b288b1 100644 --- a/compiler-rt/lib/scudo/standalone/flags.cpp +++ b/compiler-rt/lib/scudo/standalone/flags.cpp @@ -9,7 +9,8 @@ #include "flags.h" #include "common.h" #include "flags_parser.h" -#include "interface.h" + +#include "scudo/interface.h" namespace scudo { diff --git a/compiler-rt/lib/scudo/standalone/flags.inc b/compiler-rt/lib/scudo/standalone/flags.inc index 25b86e14fa94..b5cab4734166 100644 --- a/compiler-rt/lib/scudo/standalone/flags.inc +++ b/compiler-rt/lib/scudo/standalone/flags.inc @@ -34,6 +34,9 @@ SCUDO_FLAG(bool, delete_size_mismatch, true, SCUDO_FLAG(bool, zero_contents, false, "Zero chunk contents on allocation.") +SCUDO_FLAG(bool, pattern_fill_contents, false, + "Pattern fill chunk contents on allocation.") + SCUDO_FLAG(int, rss_limit_mb, -1, "Enforce an upper limit (in megabytes) to the process RSS. The " "allocator will terminate or return NULL when allocations are " @@ -45,6 +48,6 @@ SCUDO_FLAG(bool, may_return_null, true, "returning NULL in otherwise non-fatal error scenarios, eg: OOM, " "invalid allocation alignments, etc.") -SCUDO_FLAG(int, release_to_os_interval_ms, 5000, +SCUDO_FLAG(int, release_to_os_interval_ms, SCUDO_ANDROID ? INT32_MIN : 5000, "Interval (in milliseconds) at which to attempt release of unused " "memory to the OS. Negative values disable the feature.") diff --git a/compiler-rt/lib/scudo/standalone/fuchsia.cpp b/compiler-rt/lib/scudo/standalone/fuchsia.cpp index b3d72de158cf..d4ea33277941 100644 --- a/compiler-rt/lib/scudo/standalone/fuchsia.cpp +++ b/compiler-rt/lib/scudo/standalone/fuchsia.cpp @@ -170,6 +170,8 @@ u64 getMonotonicTime() { return _zx_clock_get_monotonic(); } u32 getNumberOfCPUs() { return _zx_system_get_num_cpus(); } +u32 getThreadID() { return 0; } + bool getRandom(void *Buffer, uptr Length, UNUSED bool Blocking) { static_assert(MaxRandomLength <= ZX_CPRNG_DRAW_MAX_LEN, ""); if (UNLIKELY(!Buffer || !Length || Length > MaxRandomLength)) diff --git a/compiler-rt/lib/scudo/standalone/fuzz/get_error_info_fuzzer.cpp b/compiler-rt/lib/scudo/standalone/fuzz/get_error_info_fuzzer.cpp new file mode 100644 index 000000000000..d29f515215e6 --- /dev/null +++ b/compiler-rt/lib/scudo/standalone/fuzz/get_error_info_fuzzer.cpp @@ -0,0 +1,48 @@ +//===-- get_error_info_fuzzer.cpp -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define SCUDO_FUZZ +#include "allocator_config.h" +#include "combined.h" + +#include <fuzzer/FuzzedDataProvider.h> + +#include <string> +#include <vector> + +extern "C" int LLVMFuzzerTestOneInput(uint8_t *Data, size_t Size) { + using AllocatorT = scudo::Allocator<scudo::AndroidConfig>; + FuzzedDataProvider FDP(Data, Size); + + uintptr_t FaultAddr = FDP.ConsumeIntegral<uintptr_t>(); + uintptr_t MemoryAddr = FDP.ConsumeIntegral<uintptr_t>(); + + std::string MemoryAndTags = FDP.ConsumeRandomLengthString(FDP.remaining_bytes()); + const char *Memory = MemoryAndTags.c_str(); + // Assume 16-byte alignment. + size_t MemorySize = (MemoryAndTags.length() / 17) * 16; + const char *MemoryTags = Memory + MemorySize; + + std::string StackDepotBytes = FDP.ConsumeRandomLengthString(FDP.remaining_bytes()); + std::vector<char> StackDepot(sizeof(scudo::StackDepot), 0); + for (size_t i = 0; i < StackDepotBytes.length() && i < StackDepot.size(); ++i) { + StackDepot[i] = StackDepotBytes[i]; + } + + std::string RegionInfoBytes = FDP.ConsumeRemainingBytesAsString(); + std::vector<char> RegionInfo(AllocatorT::getRegionInfoArraySize(), 0); + for (size_t i = 0; i < RegionInfoBytes.length() && i < RegionInfo.size(); ++i) { + RegionInfo[i] = RegionInfoBytes[i]; + } + + scudo_error_info ErrorInfo; + AllocatorT::getErrorInfo(&ErrorInfo, FaultAddr, StackDepot.data(), + RegionInfo.data(), Memory, MemoryTags, MemoryAddr, + MemorySize); + return 0; +} diff --git a/compiler-rt/lib/scudo/standalone/include/scudo/interface.h b/compiler-rt/lib/scudo/standalone/include/scudo/interface.h new file mode 100644 index 000000000000..d30fb6514a14 --- /dev/null +++ b/compiler-rt/lib/scudo/standalone/include/scudo/interface.h @@ -0,0 +1,110 @@ +//===-- scudo/interface.h ---------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_INTERFACE_H_ +#define SCUDO_INTERFACE_H_ + +#include <stddef.h> + +extern "C" { + +__attribute__((weak)) const char *__scudo_default_options(); + +// Post-allocation & pre-deallocation hooks. +// They must be thread-safe and not use heap related functions. +__attribute__((weak)) void __scudo_allocate_hook(void *ptr, size_t size); +__attribute__((weak)) void __scudo_deallocate_hook(void *ptr); + +void __scudo_print_stats(void); + +typedef void (*iterate_callback)(uintptr_t base, size_t size, void *arg); + +// Determine the likely cause of a tag check fault or other memory protection +// error on a system with memory tagging support. The results are returned via +// the error_info data structure. Up to three possible causes are returned in +// the reports array, in decreasing order of probability. The remaining elements +// of reports are zero-initialized. +// +// This function may be called from a different process from the one that +// crashed. In this case, various data structures must be copied from the +// crashing process to the process that analyzes the crash. +// +// This interface is not guaranteed to be stable and may change at any time. +// Furthermore, the version of scudo in the crashing process must be the same as +// the version in the process that analyzes the crash. +// +// fault_addr is the fault address. On aarch64 this is available in the system +// register FAR_ELx, or far_context.far in an upcoming release of the Linux +// kernel. This address must include the pointer tag; note that the kernel +// strips the tag from the fields siginfo.si_addr and sigcontext.fault_address, +// so these addresses are not suitable to be passed as fault_addr. +// +// stack_depot is a pointer to the stack depot data structure, which may be +// obtained by calling the function __scudo_get_stack_depot_addr() in the +// crashing process. The size of the stack depot is available by calling the +// function __scudo_get_stack_depot_size(). +// +// region_info is a pointer to the region info data structure, which may be +// obtained by calling the function __scudo_get_region_info_addr() in the +// crashing process. The size of the region info is available by calling the +// function __scudo_get_region_info_size(). +// +// memory is a pointer to a region of memory surrounding the fault address. +// The more memory available via this pointer, the more likely it is that the +// function will be able to analyze a crash correctly. It is recommended to +// provide an amount of memory equal to 16 * the primary allocator's largest +// size class either side of the fault address. +// +// memory_tags is a pointer to an array of memory tags for the memory accessed +// via memory. Each byte of this array corresponds to a region of memory of size +// equal to the architecturally defined memory tag granule size (16 on aarch64). +// +// memory_addr is the start address of memory in the crashing process's address +// space. +// +// memory_size is the size of the memory region referred to by the memory +// pointer. +void __scudo_get_error_info(struct scudo_error_info *error_info, + uintptr_t fault_addr, const char *stack_depot, + const char *region_info, const char *memory, + const char *memory_tags, uintptr_t memory_addr, + size_t memory_size); + +enum scudo_error_type { + UNKNOWN, + USE_AFTER_FREE, + BUFFER_OVERFLOW, + BUFFER_UNDERFLOW, +}; + +struct scudo_error_report { + enum scudo_error_type error_type; + + uintptr_t allocation_address; + uintptr_t allocation_size; + + uint32_t allocation_tid; + uintptr_t allocation_trace[64]; + + uint32_t deallocation_tid; + uintptr_t deallocation_trace[64]; +}; + +struct scudo_error_info { + struct scudo_error_report reports[3]; +}; + +const char *__scudo_get_stack_depot_addr(); +size_t __scudo_get_stack_depot_size(); + +const char *__scudo_get_region_info_addr(); +size_t __scudo_get_region_info_size(); + +} // extern "C" + +#endif // SCUDO_INTERFACE_H_ diff --git a/compiler-rt/lib/scudo/standalone/interface.h b/compiler-rt/lib/scudo/standalone/interface.h deleted file mode 100644 index e2639823f426..000000000000 --- a/compiler-rt/lib/scudo/standalone/interface.h +++ /dev/null @@ -1,29 +0,0 @@ -//===-- interface.h ---------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef SCUDO_INTERFACE_H_ -#define SCUDO_INTERFACE_H_ - -#include "internal_defs.h" - -extern "C" { - -WEAK INTERFACE const char *__scudo_default_options(); - -// Post-allocation & pre-deallocation hooks. -// They must be thread-safe and not use heap related functions. -WEAK INTERFACE void __scudo_allocate_hook(void *ptr, size_t size); -WEAK INTERFACE void __scudo_deallocate_hook(void *ptr); - -WEAK INTERFACE void __scudo_print_stats(void); - -typedef void (*iterate_callback)(uintptr_t base, size_t size, void *arg); - -} // extern "C" - -#endif // SCUDO_INTERFACE_H_ diff --git a/compiler-rt/lib/scudo/standalone/internal_defs.h b/compiler-rt/lib/scudo/standalone/internal_defs.h index 8f6a89ecba73..a884f1f3a40e 100644 --- a/compiler-rt/lib/scudo/standalone/internal_defs.h +++ b/compiler-rt/lib/scudo/standalone/internal_defs.h @@ -29,12 +29,10 @@ // Attributes & builtins related macros. #define INTERFACE __attribute__((visibility("default"))) +#define HIDDEN __attribute__((visibility("hidden"))) #define WEAK __attribute__((weak)) #define ALWAYS_INLINE inline __attribute__((always_inline)) #define ALIAS(X) __attribute__((alias(X))) -// Please only use the ALIGNED macro before the type. Using ALIGNED after the -// variable declaration is not portable. -#define ALIGNED(X) __attribute__((aligned(X))) #define FORMAT(F, A) __attribute__((format(printf, F, A))) #define NOINLINE __attribute__((noinline)) #define NORETURN __attribute__((noreturn)) diff --git a/compiler-rt/lib/scudo/standalone/linux.cpp b/compiler-rt/lib/scudo/standalone/linux.cpp index 8266a528f42c..69ffdd9a165b 100644 --- a/compiler-rt/lib/scudo/standalone/linux.cpp +++ b/compiler-rt/lib/scudo/standalone/linux.cpp @@ -35,6 +35,10 @@ #define ANDROID_PR_SET_VMA_ANON_NAME 0 #endif +#ifdef ANDROID_EXPERIMENTAL_MTE +#include <bionic/mte_kernel.h> +#endif + namespace scudo { uptr getPageSize() { return static_cast<uptr>(sysconf(_SC_PAGESIZE)); } @@ -50,6 +54,10 @@ void *map(void *Addr, uptr Size, UNUSED const char *Name, uptr Flags, MmapProt = PROT_NONE; } else { MmapProt = PROT_READ | PROT_WRITE; +#if defined(__aarch64__) && defined(ANDROID_EXPERIMENTAL_MTE) + if (Flags & MAP_MEMTAG) + MmapProt |= PROT_MTE; +#endif } if (Addr) { // Currently no scenario for a noaccess mapping with a fixed address. @@ -124,10 +132,21 @@ u64 getMonotonicTime() { u32 getNumberOfCPUs() { cpu_set_t CPUs; - CHECK_EQ(sched_getaffinity(0, sizeof(cpu_set_t), &CPUs), 0); + // sched_getaffinity can fail for a variety of legitimate reasons (lack of + // CAP_SYS_NICE, syscall filtering, etc), in which case we shall return 0. + if (sched_getaffinity(0, sizeof(cpu_set_t), &CPUs) != 0) + return 0; return static_cast<u32>(CPU_COUNT(&CPUs)); } +u32 getThreadID() { +#if SCUDO_ANDROID + return static_cast<u32>(gettid()); +#else + return static_cast<u32>(syscall(SYS_gettid)); +#endif +} + // Blocking is possibly unused if the getrandom block is not compiled in. bool getRandom(void *Buffer, uptr Length, UNUSED bool Blocking) { if (!Buffer || !Length || Length > MaxRandomLength) @@ -153,10 +172,34 @@ bool getRandom(void *Buffer, uptr Length, UNUSED bool Blocking) { return (ReadBytes == static_cast<ssize_t>(Length)); } +// Allocation free syslog-like API. +extern "C" WEAK int async_safe_write_log(int pri, const char *tag, + const char *msg); + void outputRaw(const char *Buffer) { - static HybridMutex Mutex; - ScopedLock L(Mutex); - write(2, Buffer, strlen(Buffer)); + if (&async_safe_write_log) { + constexpr s32 AndroidLogInfo = 4; + constexpr uptr MaxLength = 1024U; + char LocalBuffer[MaxLength]; + while (strlen(Buffer) > MaxLength) { + uptr P; + for (P = MaxLength - 1; P > 0; P--) { + if (Buffer[P] == '\n') { + memcpy(LocalBuffer, Buffer, P); + LocalBuffer[P] = '\0'; + async_safe_write_log(AndroidLogInfo, "scudo", LocalBuffer); + Buffer = &Buffer[P + 1]; + break; + } + } + // If no newline was found, just log the buffer. + if (P == 0) + break; + } + async_safe_write_log(AndroidLogInfo, "scudo", Buffer); + } else { + write(2, Buffer, strlen(Buffer)); + } } extern "C" WEAK void android_set_abort_message(const char *); diff --git a/compiler-rt/lib/scudo/standalone/local_cache.h b/compiler-rt/lib/scudo/standalone/local_cache.h index b08abd3e5d9b..a6425fc6d1ea 100644 --- a/compiler-rt/lib/scudo/standalone/local_cache.h +++ b/compiler-rt/lib/scudo/standalone/local_cache.h @@ -165,13 +165,14 @@ private: NOINLINE void drain(PerClass *C, uptr ClassId) { const u32 Count = Min(C->MaxCount / 2, C->Count); - const uptr FirstIndexToDrain = C->Count - Count; - TransferBatch *B = createBatch(ClassId, C->Chunks[FirstIndexToDrain]); + TransferBatch *B = createBatch(ClassId, C->Chunks[0]); if (UNLIKELY(!B)) reportOutOfMemory( SizeClassAllocator::getSizeByClassId(SizeClassMap::BatchClassId)); - B->setFromArray(&C->Chunks[FirstIndexToDrain], Count); + B->setFromArray(&C->Chunks[0], Count); C->Count -= Count; + for (uptr I = 0; I < C->Count; I++) + C->Chunks[I] = C->Chunks[I + Count]; Allocator->pushBatch(ClassId, B); } }; diff --git a/compiler-rt/lib/scudo/standalone/memtag.h b/compiler-rt/lib/scudo/standalone/memtag.h new file mode 100644 index 000000000000..6f347f4694e8 --- /dev/null +++ b/compiler-rt/lib/scudo/standalone/memtag.h @@ -0,0 +1,261 @@ +//===-- memtag.h ------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_MEMTAG_H_ +#define SCUDO_MEMTAG_H_ + +#include "internal_defs.h" + +#if SCUDO_LINUX +#include <sys/auxv.h> +#include <sys/prctl.h> +#if defined(ANDROID_EXPERIMENTAL_MTE) +#include <bionic/mte_kernel.h> +#endif +#endif + +namespace scudo { + +#if defined(__aarch64__) || defined(SCUDO_FUZZ) + +inline constexpr bool archSupportsMemoryTagging() { return true; } +inline constexpr uptr archMemoryTagGranuleSize() { return 16; } + +inline uptr untagPointer(uptr Ptr) { return Ptr & ((1ULL << 56) - 1); } + +inline uint8_t extractTag(uptr Ptr) { + return (Ptr >> 56) & 0xf; +} + +#else + +inline constexpr bool archSupportsMemoryTagging() { return false; } + +inline uptr archMemoryTagGranuleSize() { + UNREACHABLE("memory tagging not supported"); +} + +inline uptr untagPointer(uptr Ptr) { + (void)Ptr; + UNREACHABLE("memory tagging not supported"); +} + +inline uint8_t extractTag(uptr Ptr) { + (void)Ptr; + UNREACHABLE("memory tagging not supported"); +} + +#endif + +#if defined(__aarch64__) + +inline bool systemSupportsMemoryTagging() { +#if defined(ANDROID_EXPERIMENTAL_MTE) + return getauxval(AT_HWCAP2) & HWCAP2_MTE; +#else + return false; +#endif +} + +inline bool systemDetectsMemoryTagFaultsTestOnly() { +#if defined(ANDROID_EXPERIMENTAL_MTE) + return (prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0) & PR_MTE_TCF_MASK) != + PR_MTE_TCF_NONE; +#else + return false; +#endif +} + +inline void disableMemoryTagChecksTestOnly() { + __asm__ __volatile__(".arch_extension mte; msr tco, #1"); +} + +inline void enableMemoryTagChecksTestOnly() { + __asm__ __volatile__(".arch_extension mte; msr tco, #0"); +} + +class ScopedDisableMemoryTagChecks { + size_t PrevTCO; + + public: + ScopedDisableMemoryTagChecks() { + __asm__ __volatile__(".arch_extension mte; mrs %0, tco; msr tco, #1" + : "=r"(PrevTCO)); + } + + ~ScopedDisableMemoryTagChecks() { + __asm__ __volatile__(".arch_extension mte; msr tco, %0" : : "r"(PrevTCO)); + } +}; + +inline void setRandomTag(void *Ptr, uptr Size, uptr ExcludeMask, + uptr *TaggedBegin, uptr *TaggedEnd) { + void *End; + __asm__ __volatile__( + R"( + .arch_extension mte + + // Set a random tag for Ptr in TaggedPtr. This needs to happen even if + // Size = 0 so that TaggedPtr ends up pointing at a valid address. + irg %[TaggedPtr], %[Ptr], %[ExcludeMask] + mov %[Cur], %[TaggedPtr] + + // Skip the loop if Size = 0. We don't want to do any tagging in this case. + cbz %[Size], 2f + + // Set the memory tag of the region + // [TaggedPtr, TaggedPtr + roundUpTo(Size, 16)) + // to the pointer tag stored in TaggedPtr. + add %[End], %[TaggedPtr], %[Size] + + 1: + stzg %[Cur], [%[Cur]], #16 + cmp %[Cur], %[End] + b.lt 1b + + 2: + )" + : + [TaggedPtr] "=&r"(*TaggedBegin), [Cur] "=&r"(*TaggedEnd), [End] "=&r"(End) + : [Ptr] "r"(Ptr), [Size] "r"(Size), [ExcludeMask] "r"(ExcludeMask) + : "memory"); +} + +inline void *prepareTaggedChunk(void *Ptr, uptr Size, uptr BlockEnd) { + // Prepare the granule before the chunk to store the chunk header by setting + // its tag to 0. Normally its tag will already be 0, but in the case where a + // chunk holding a low alignment allocation is reused for a higher alignment + // allocation, the chunk may already have a non-zero tag from the previous + // allocation. + __asm__ __volatile__(".arch_extension mte; stg %0, [%0, #-16]" + : + : "r"(Ptr) + : "memory"); + + uptr TaggedBegin, TaggedEnd; + setRandomTag(Ptr, Size, 0, &TaggedBegin, &TaggedEnd); + + // Finally, set the tag of the granule past the end of the allocation to 0, + // to catch linear overflows even if a previous larger allocation used the + // same block and tag. Only do this if the granule past the end is in our + // block, because this would otherwise lead to a SEGV if the allocation + // covers the entire block and our block is at the end of a mapping. The tag + // of the next block's header granule will be set to 0, so it will serve the + // purpose of catching linear overflows in this case. + uptr UntaggedEnd = untagPointer(TaggedEnd); + if (UntaggedEnd != BlockEnd) + __asm__ __volatile__(".arch_extension mte; stg %0, [%0]" + : + : "r"(UntaggedEnd) + : "memory"); + return reinterpret_cast<void *>(TaggedBegin); +} + +inline void resizeTaggedChunk(uptr OldPtr, uptr NewPtr, uptr BlockEnd) { + uptr RoundOldPtr = roundUpTo(OldPtr, 16); + if (RoundOldPtr >= NewPtr) { + // If the allocation is shrinking we just need to set the tag past the end + // of the allocation to 0. See explanation in prepareTaggedChunk above. + uptr RoundNewPtr = untagPointer(roundUpTo(NewPtr, 16)); + if (RoundNewPtr != BlockEnd) + __asm__ __volatile__(".arch_extension mte; stg %0, [%0]" + : + : "r"(RoundNewPtr) + : "memory"); + return; + } + + __asm__ __volatile__(R"( + .arch_extension mte + + // Set the memory tag of the region + // [roundUpTo(OldPtr, 16), roundUpTo(NewPtr, 16)) + // to the pointer tag stored in OldPtr. + 1: + stzg %[Cur], [%[Cur]], #16 + cmp %[Cur], %[End] + b.lt 1b + + // Finally, set the tag of the granule past the end of the allocation to 0. + and %[Cur], %[Cur], #(1 << 56) - 1 + cmp %[Cur], %[BlockEnd] + b.eq 2f + stg %[Cur], [%[Cur]] + + 2: + )" + : [ Cur ] "+&r"(RoundOldPtr), [ End ] "+&r"(NewPtr) + : [ BlockEnd ] "r"(BlockEnd) + : "memory"); +} + +inline uptr loadTag(uptr Ptr) { + uptr TaggedPtr = Ptr; + __asm__ __volatile__(".arch_extension mte; ldg %0, [%0]" + : "+r"(TaggedPtr) + : + : "memory"); + return TaggedPtr; +} + +#else + +inline bool systemSupportsMemoryTagging() { + UNREACHABLE("memory tagging not supported"); +} + +inline bool systemDetectsMemoryTagFaultsTestOnly() { + UNREACHABLE("memory tagging not supported"); +} + +inline void disableMemoryTagChecksTestOnly() { + UNREACHABLE("memory tagging not supported"); +} + +inline void enableMemoryTagChecksTestOnly() { + UNREACHABLE("memory tagging not supported"); +} + +struct ScopedDisableMemoryTagChecks { + ScopedDisableMemoryTagChecks() {} +}; + +inline void setRandomTag(void *Ptr, uptr Size, uptr ExcludeMask, + uptr *TaggedBegin, uptr *TaggedEnd) { + (void)Ptr; + (void)Size; + (void)ExcludeMask; + (void)TaggedBegin; + (void)TaggedEnd; + UNREACHABLE("memory tagging not supported"); +} + +inline void *prepareTaggedChunk(void *Ptr, uptr Size, uptr BlockEnd) { + (void)Ptr; + (void)Size; + (void)BlockEnd; + UNREACHABLE("memory tagging not supported"); +} + +inline void resizeTaggedChunk(uptr OldPtr, uptr NewPtr, uptr BlockEnd) { + (void)OldPtr; + (void)NewPtr; + (void)BlockEnd; + UNREACHABLE("memory tagging not supported"); +} + +inline uptr loadTag(uptr Ptr) { + (void)Ptr; + UNREACHABLE("memory tagging not supported"); +} + +#endif + +} // namespace scudo + +#endif diff --git a/compiler-rt/lib/scudo/standalone/mutex.h b/compiler-rt/lib/scudo/standalone/mutex.h index b26b2df06627..d6e6a5b33aae 100644 --- a/compiler-rt/lib/scudo/standalone/mutex.h +++ b/compiler-rt/lib/scudo/standalone/mutex.h @@ -22,7 +22,7 @@ namespace scudo { class HybridMutex { public: - void init() { memset(this, 0, sizeof(*this)); } + void init() { M = {}; } bool tryLock(); NOINLINE void lock() { if (LIKELY(tryLock())) diff --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h index e296a78778e0..29a268098185 100644 --- a/compiler-rt/lib/scudo/standalone/primary32.h +++ b/compiler-rt/lib/scudo/standalone/primary32.h @@ -38,14 +38,23 @@ namespace scudo { // Memory used by this allocator is never unmapped but can be partially // reclaimed if the platform allows for it. -template <class SizeClassMapT, uptr RegionSizeLog> class SizeClassAllocator32 { +template <class SizeClassMapT, uptr RegionSizeLog, + s32 MinReleaseToOsIntervalMs = INT32_MIN, + s32 MaxReleaseToOsIntervalMs = INT32_MAX> +class SizeClassAllocator32 { public: typedef SizeClassMapT SizeClassMap; + // The bytemap can only track UINT8_MAX - 1 classes. + static_assert(SizeClassMap::LargestClassId <= (UINT8_MAX - 1), ""); // Regions should be large enough to hold the largest Block. static_assert((1UL << RegionSizeLog) >= SizeClassMap::MaxSize, ""); - typedef SizeClassAllocator32<SizeClassMapT, RegionSizeLog> ThisT; + typedef SizeClassAllocator32<SizeClassMapT, RegionSizeLog, + MinReleaseToOsIntervalMs, + MaxReleaseToOsIntervalMs> + ThisT; typedef SizeClassAllocatorLocalCache<ThisT> CacheT; typedef typename CacheT::TransferBatch TransferBatch; + static const bool SupportsMemoryTagging = false; static uptr getSizeByClassId(uptr ClassId) { return (ClassId == SizeClassMap::BatchClassId) @@ -63,20 +72,21 @@ public: MinRegionIndex = NumRegions; // MaxRegionIndex is already initialized to 0. u32 Seed; + const u64 Time = getMonotonicTime(); if (UNLIKELY(!getRandom(reinterpret_cast<void *>(&Seed), sizeof(Seed)))) - Seed = - static_cast<u32>(getMonotonicTime() ^ - (reinterpret_cast<uptr>(SizeClassInfoArray) >> 6)); + Seed = static_cast<u32>( + Time ^ (reinterpret_cast<uptr>(SizeClassInfoArray) >> 6)); const uptr PageSize = getPageSizeCached(); for (uptr I = 0; I < NumClasses; I++) { SizeClassInfo *Sci = getSizeClassInfo(I); Sci->RandState = getRandomU32(&Seed); // See comment in the 64-bit primary about releasing smaller size classes. - Sci->CanRelease = (ReleaseToOsInterval >= 0) && - (I != SizeClassMap::BatchClassId) && + Sci->CanRelease = (I != SizeClassMap::BatchClassId) && (getSizeByClassId(I) >= (PageSize / 32)); + if (Sci->CanRelease) + Sci->ReleaseInfo.LastReleaseAtNs = Time; } - ReleaseToOsIntervalMs = ReleaseToOsInterval; + setReleaseToOsIntervalMs(ReleaseToOsInterval); } void init(s32 ReleaseToOsInterval) { memset(this, 0, sizeof(*this)); @@ -87,8 +97,7 @@ public: while (NumberOfStashedRegions > 0) unmap(reinterpret_cast<void *>(RegionsStash[--NumberOfStashedRegions]), RegionSize); - // TODO(kostyak): unmap the TransferBatch regions as well. - for (uptr I = 0; I < NumRegions; I++) + for (uptr I = MinRegionIndex; I <= MaxRegionIndex; I++) if (PossibleRegions[I]) unmap(reinterpret_cast<void *>(I * RegionSize), RegionSize); PossibleRegions.unmapTestOnly(); @@ -147,8 +156,9 @@ public: template <typename F> void iterateOverBlocks(F Callback) { for (uptr I = MinRegionIndex; I <= MaxRegionIndex; I++) - if (PossibleRegions[I]) { - const uptr BlockSize = getSizeByClassId(PossibleRegions[I]); + if (PossibleRegions[I] && + (PossibleRegions[I] - 1U) != SizeClassMap::BatchClassId) { + const uptr BlockSize = getSizeByClassId(PossibleRegions[I] - 1U); const uptr From = I * RegionSize; const uptr To = From + (RegionSize / BlockSize) * BlockSize; for (uptr Block = From; Block < To; Block += BlockSize) @@ -174,11 +184,18 @@ public: getStats(Str, I, 0); } + void setReleaseToOsIntervalMs(s32 Interval) { + if (Interval >= MaxReleaseToOsIntervalMs) { + Interval = MaxReleaseToOsIntervalMs; + } else if (Interval <= MinReleaseToOsIntervalMs) { + Interval = MinReleaseToOsIntervalMs; + } + atomic_store(&ReleaseToOsIntervalMs, Interval, memory_order_relaxed); + } + uptr releaseToOS() { uptr TotalReleasedBytes = 0; for (uptr I = 0; I < NumClasses; I++) { - if (I == SizeClassMap::BatchClassId) - continue; SizeClassInfo *Sci = getSizeClassInfo(I); ScopedLock L(Sci->Mutex); TotalReleasedBytes += releaseToOSMaybe(Sci, I, /*Force=*/true); @@ -186,15 +203,24 @@ public: return TotalReleasedBytes; } + bool useMemoryTagging() { return false; } + void disableMemoryTagging() {} + + const char *getRegionInfoArrayAddress() const { return nullptr; } + static uptr getRegionInfoArraySize() { return 0; } + + static BlockInfo findNearestBlock(const char *RegionInfoData, uptr Ptr) { + (void)RegionInfoData; + (void)Ptr; + return {}; + } + private: static const uptr NumClasses = SizeClassMap::NumClasses; static const uptr RegionSize = 1UL << RegionSizeLog; static const uptr NumRegions = SCUDO_MMAP_RANGE_SIZE >> RegionSizeLog; -#if SCUDO_WORDSIZE == 32U + static const u32 MaxNumBatches = SCUDO_ANDROID ? 4U : 8U; typedef FlatByteMap<NumRegions> ByteMap; -#else - typedef TwoLevelByteMap<(NumRegions >> 12), 1UL << 12> ByteMap; -#endif struct SizeClassStats { uptr PoppedBlocks; @@ -208,9 +234,11 @@ private: u64 LastReleaseAtNs; }; - struct ALIGNED(SCUDO_CACHE_LINE_SIZE) SizeClassInfo { + struct alignas(SCUDO_CACHE_LINE_SIZE) SizeClassInfo { HybridMutex Mutex; SinglyLinkedList<TransferBatch> FreeList; + uptr CurrentRegion; + uptr CurrentRegionAllocated; SizeClassStats Stats; bool CanRelease; u32 RandState; @@ -261,14 +289,12 @@ private: if (!Region) Region = allocateRegionSlow(); if (LIKELY(Region)) { - if (ClassId) { - const uptr RegionIndex = computeRegionId(Region); - if (RegionIndex < MinRegionIndex) - MinRegionIndex = RegionIndex; - if (RegionIndex > MaxRegionIndex) - MaxRegionIndex = RegionIndex; - PossibleRegions.set(RegionIndex, static_cast<u8>(ClassId)); - } + const uptr RegionIndex = computeRegionId(Region); + if (RegionIndex < MinRegionIndex) + MinRegionIndex = RegionIndex; + if (RegionIndex > MaxRegionIndex) + MaxRegionIndex = RegionIndex; + PossibleRegions.set(RegionIndex, static_cast<u8>(ClassId + 1U)); } return Region; } @@ -303,21 +329,50 @@ private: NOINLINE TransferBatch *populateFreeList(CacheT *C, uptr ClassId, SizeClassInfo *Sci) { - const uptr Region = allocateRegion(ClassId); - if (UNLIKELY(!Region)) - return nullptr; - C->getStats().add(StatMapped, RegionSize); + uptr Region; + uptr Offset; + // If the size-class currently has a region associated to it, use it. The + // newly created blocks will be located after the currently allocated memory + // for that region (up to RegionSize). Otherwise, create a new region, where + // the new blocks will be carved from the beginning. + if (Sci->CurrentRegion) { + Region = Sci->CurrentRegion; + DCHECK_GT(Sci->CurrentRegionAllocated, 0U); + Offset = Sci->CurrentRegionAllocated; + } else { + DCHECK_EQ(Sci->CurrentRegionAllocated, 0U); + Region = allocateRegion(ClassId); + if (UNLIKELY(!Region)) + return nullptr; + C->getStats().add(StatMapped, RegionSize); + Sci->CurrentRegion = Region; + Offset = 0; + } + const uptr Size = getSizeByClassId(ClassId); const u32 MaxCount = TransferBatch::getMaxCached(Size); - DCHECK_GT(MaxCount, 0); - const uptr NumberOfBlocks = RegionSize / Size; - DCHECK_GT(NumberOfBlocks, 0); + DCHECK_GT(MaxCount, 0U); + // The maximum number of blocks we should carve in the region is dictated + // by the maximum number of batches we want to fill, and the amount of + // memory left in the current region (we use the lowest of the two). This + // will not be 0 as we ensure that a region can at least hold one block (via + // static_assert and at the end of this function). + const u32 NumberOfBlocks = + Min(MaxNumBatches * MaxCount, + static_cast<u32>((RegionSize - Offset) / Size)); + DCHECK_GT(NumberOfBlocks, 0U); + TransferBatch *B = nullptr; - constexpr u32 ShuffleArraySize = 8U * TransferBatch::MaxNumCached; + constexpr u32 ShuffleArraySize = + MaxNumBatches * TransferBatch::MaxNumCached; + // Fill the transfer batches and put them in the size-class freelist. We + // need to randomize the blocks for security purposes, so we first fill a + // local array that we then shuffle before populating the batches. void *ShuffleArray[ShuffleArraySize]; u32 Count = 0; const uptr AllocatedUser = Size * NumberOfBlocks; - for (uptr I = Region; I < Region + AllocatedUser; I += Size) { + for (uptr I = Region + Offset; I < Region + Offset + AllocatedUser; + I += Size) { ShuffleArray[Count++] = reinterpret_cast<void *>(I); if (Count == ShuffleArraySize) { if (UNLIKELY(!populateBatches(C, Sci, ClassId, &B, MaxCount, @@ -340,9 +395,18 @@ private: DCHECK_GT(B->getCount(), 0); C->getStats().add(StatFree, AllocatedUser); + DCHECK_LE(Sci->CurrentRegionAllocated + AllocatedUser, RegionSize); + // If there is not enough room in the region currently associated to fit + // more blocks, we deassociate the region by resetting CurrentRegion and + // CurrentRegionAllocated. Otherwise, update the allocated amount. + if (RegionSize - (Sci->CurrentRegionAllocated + AllocatedUser) < Size) { + Sci->CurrentRegion = 0; + Sci->CurrentRegionAllocated = 0; + } else { + Sci->CurrentRegionAllocated += AllocatedUser; + } Sci->AllocatedUser += AllocatedUser; - if (Sci->CanRelease) - Sci->ReleaseInfo.LastReleaseAtNs = getMonotonicTime(); + return B; } @@ -353,10 +417,14 @@ private: const uptr InUse = Sci->Stats.PoppedBlocks - Sci->Stats.PushedBlocks; const uptr AvailableChunks = Sci->AllocatedUser / getSizeByClassId(ClassId); Str->append(" %02zu (%6zu): mapped: %6zuK popped: %7zu pushed: %7zu " - "inuse: %6zu avail: %6zu rss: %6zuK\n", + "inuse: %6zu avail: %6zu rss: %6zuK releases: %6zu\n", ClassId, getSizeByClassId(ClassId), Sci->AllocatedUser >> 10, Sci->Stats.PoppedBlocks, Sci->Stats.PushedBlocks, InUse, - AvailableChunks, Rss >> 10); + AvailableChunks, Rss >> 10, Sci->ReleaseInfo.RangesReleased); + } + + s32 getReleaseToOsIntervalMs() { + return atomic_load(&ReleaseToOsIntervalMs, memory_order_relaxed); } NOINLINE uptr releaseToOSMaybe(SizeClassInfo *Sci, uptr ClassId, @@ -370,18 +438,18 @@ private: (Sci->Stats.PoppedBlocks - Sci->Stats.PushedBlocks) * BlockSize; if (BytesInFreeList < PageSize) return 0; // No chance to release anything. - if ((Sci->Stats.PushedBlocks - Sci->ReleaseInfo.PushedBlocksAtLastRelease) * - BlockSize < - PageSize) { + const uptr BytesPushed = + (Sci->Stats.PushedBlocks - Sci->ReleaseInfo.PushedBlocksAtLastRelease) * + BlockSize; + if (BytesPushed < PageSize) return 0; // Nothing new to release. - } if (!Force) { - const s32 IntervalMs = ReleaseToOsIntervalMs; + const s32 IntervalMs = getReleaseToOsIntervalMs(); if (IntervalMs < 0) return 0; if (Sci->ReleaseInfo.LastReleaseAtNs + - static_cast<uptr>(IntervalMs) * 1000000ULL > + static_cast<u64>(IntervalMs) * 1000000 > getMonotonicTime()) { return 0; // Memory was returned recently. } @@ -391,11 +459,18 @@ private: // iterate multiple times over the same freelist if a ClassId spans multiple // regions. But it will have to do for now. uptr TotalReleasedBytes = 0; + const uptr MaxSize = (RegionSize / BlockSize) * BlockSize; for (uptr I = MinRegionIndex; I <= MaxRegionIndex; I++) { - if (PossibleRegions[I] == ClassId) { - ReleaseRecorder Recorder(I * RegionSize); - releaseFreeMemoryToOS(Sci->FreeList, I * RegionSize, - RegionSize / PageSize, BlockSize, &Recorder); + if (PossibleRegions[I] - 1U == ClassId) { + const uptr Region = I * RegionSize; + // If the region is the one currently associated to the size-class, we + // only need to release up to CurrentRegionAllocated, MaxSize otherwise. + const uptr Size = (Region == Sci->CurrentRegion) + ? Sci->CurrentRegionAllocated + : MaxSize; + ReleaseRecorder Recorder(Region); + releaseFreeMemoryToOS(Sci->FreeList, Region, Size, BlockSize, + &Recorder); if (Recorder.getReleasedRangesCount() > 0) { Sci->ReleaseInfo.PushedBlocksAtLastRelease = Sci->Stats.PushedBlocks; Sci->ReleaseInfo.RangesReleased += Recorder.getReleasedRangesCount(); @@ -410,12 +485,13 @@ private: SizeClassInfo SizeClassInfoArray[NumClasses]; + // Track the regions in use, 0 is unused, otherwise store ClassId + 1. ByteMap PossibleRegions; // Keep track of the lowest & highest regions allocated to avoid looping // through the whole NumRegions. uptr MinRegionIndex; uptr MaxRegionIndex; - s32 ReleaseToOsIntervalMs; + atomic_s32 ReleaseToOsIntervalMs; // Unless several threads request regions simultaneously from different size // classes, the stash rarely contains more than 1 entry. static constexpr uptr MaxStashedRegions = 4; diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h index ef02f0b772d6..d4767882ba2c 100644 --- a/compiler-rt/lib/scudo/standalone/primary64.h +++ b/compiler-rt/lib/scudo/standalone/primary64.h @@ -13,6 +13,7 @@ #include "common.h" #include "list.h" #include "local_cache.h" +#include "memtag.h" #include "release.h" #include "stats.h" #include "string_utils.h" @@ -38,12 +39,21 @@ namespace scudo { // The memory used by this allocator is never unmapped, but can be partially // released if the platform allows for it. -template <class SizeClassMapT, uptr RegionSizeLog> class SizeClassAllocator64 { +template <class SizeClassMapT, uptr RegionSizeLog, + s32 MinReleaseToOsIntervalMs = INT32_MIN, + s32 MaxReleaseToOsIntervalMs = INT32_MAX, + bool MaySupportMemoryTagging = false> +class SizeClassAllocator64 { public: typedef SizeClassMapT SizeClassMap; - typedef SizeClassAllocator64<SizeClassMap, RegionSizeLog> ThisT; + typedef SizeClassAllocator64< + SizeClassMap, RegionSizeLog, MinReleaseToOsIntervalMs, + MaxReleaseToOsIntervalMs, MaySupportMemoryTagging> + ThisT; typedef SizeClassAllocatorLocalCache<ThisT> CacheT; typedef typename CacheT::TransferBatch TransferBatch; + static const bool SupportsMemoryTagging = + MaySupportMemoryTagging && archSupportsMemoryTagging(); static uptr getSizeByClassId(uptr ClassId) { return (ClassId == SizeClassMap::BatchClassId) @@ -58,20 +68,17 @@ public: PrimaryBase = reinterpret_cast<uptr>( map(nullptr, PrimarySize, "scudo:primary", MAP_NOACCESS, &Data)); - RegionInfoArray = reinterpret_cast<RegionInfo *>( - map(nullptr, sizeof(RegionInfo) * NumClasses, "scudo:regioninfo")); - DCHECK_EQ(reinterpret_cast<uptr>(RegionInfoArray) % SCUDO_CACHE_LINE_SIZE, - 0); - u32 Seed; + const u64 Time = getMonotonicTime(); if (UNLIKELY(!getRandom(reinterpret_cast<void *>(&Seed), sizeof(Seed)))) - Seed = static_cast<u32>(getMonotonicTime() ^ (PrimaryBase >> 12)); + Seed = static_cast<u32>(Time ^ (PrimaryBase >> 12)); const uptr PageSize = getPageSizeCached(); for (uptr I = 0; I < NumClasses; I++) { RegionInfo *Region = getRegionInfo(I); // The actual start of a region is offseted by a random number of pages. Region->RegionBeg = getRegionBaseByClassId(I) + (getRandomModN(&Seed, 16) + 1) * PageSize; + Region->RandState = getRandomU32(&Seed); // Releasing smaller size classes doesn't necessarily yield to a // meaningful RSS impact: there are more blocks per page, they are // randomized around, and thus pages are less likely to be entirely empty. @@ -79,12 +86,15 @@ public: // memory accesses which ends up being fairly costly. The current lower // limit is mostly arbitrary and based on empirical observations. // TODO(kostyak): make the lower limit a runtime option - Region->CanRelease = (ReleaseToOsInterval >= 0) && - (I != SizeClassMap::BatchClassId) && + Region->CanRelease = (I != SizeClassMap::BatchClassId) && (getSizeByClassId(I) >= (PageSize / 32)); - Region->RandState = getRandomU32(&Seed); + if (Region->CanRelease) + Region->ReleaseInfo.LastReleaseAtNs = Time; } - ReleaseToOsIntervalMs = ReleaseToOsInterval; + setReleaseToOsIntervalMs(ReleaseToOsInterval); + + if (SupportsMemoryTagging) + UseMemoryTagging = systemSupportsMemoryTagging(); } void init(s32 ReleaseToOsInterval) { memset(this, 0, sizeof(*this)); @@ -93,8 +103,6 @@ public: void unmapTestOnly() { unmap(reinterpret_cast<void *>(PrimaryBase), PrimarySize, UNMAP_ALL, &Data); - unmap(reinterpret_cast<void *>(RegionInfoArray), - sizeof(RegionInfo) * NumClasses); } TransferBatch *popBatch(CacheT *C, uptr ClassId) { @@ -143,7 +151,7 @@ public: } } - template <typename F> void iterateOverBlocks(F Callback) const { + template <typename F> void iterateOverBlocks(F Callback) { for (uptr I = 0; I < NumClasses; I++) { if (I == SizeClassMap::BatchClassId) continue; @@ -156,7 +164,7 @@ public: } } - void getStats(ScopedString *Str) const { + void getStats(ScopedString *Str) { // TODO(kostyak): get the RSS per region. uptr TotalMapped = 0; uptr PoppedBlocks = 0; @@ -177,11 +185,18 @@ public: getStats(Str, I, 0); } + void setReleaseToOsIntervalMs(s32 Interval) { + if (Interval >= MaxReleaseToOsIntervalMs) { + Interval = MaxReleaseToOsIntervalMs; + } else if (Interval <= MinReleaseToOsIntervalMs) { + Interval = MinReleaseToOsIntervalMs; + } + atomic_store(&ReleaseToOsIntervalMs, Interval, memory_order_relaxed); + } + uptr releaseToOS() { uptr TotalReleasedBytes = 0; for (uptr I = 0; I < NumClasses; I++) { - if (I == SizeClassMap::BatchClassId) - continue; RegionInfo *Region = getRegionInfo(I); ScopedLock L(Region->Mutex); TotalReleasedBytes += releaseToOSMaybe(Region, I, /*Force=*/true); @@ -189,15 +204,72 @@ public: return TotalReleasedBytes; } + bool useMemoryTagging() const { + return SupportsMemoryTagging && UseMemoryTagging; + } + void disableMemoryTagging() { UseMemoryTagging = false; } + + const char *getRegionInfoArrayAddress() const { + return reinterpret_cast<const char *>(RegionInfoArray); + } + + static uptr getRegionInfoArraySize() { + return sizeof(RegionInfoArray); + } + + static BlockInfo findNearestBlock(const char *RegionInfoData, uptr Ptr) { + const RegionInfo *RegionInfoArray = + reinterpret_cast<const RegionInfo *>(RegionInfoData); + uptr ClassId; + uptr MinDistance = -1UL; + for (uptr I = 0; I != NumClasses; ++I) { + if (I == SizeClassMap::BatchClassId) + continue; + uptr Begin = RegionInfoArray[I].RegionBeg; + uptr End = Begin + RegionInfoArray[I].AllocatedUser; + if (Begin > End || End - Begin < SizeClassMap::getSizeByClassId(I)) + continue; + uptr RegionDistance; + if (Begin <= Ptr) { + if (Ptr < End) + RegionDistance = 0; + else + RegionDistance = Ptr - End; + } else { + RegionDistance = Begin - Ptr; + } + + if (RegionDistance < MinDistance) { + MinDistance = RegionDistance; + ClassId = I; + } + } + + BlockInfo B = {}; + if (MinDistance <= 8192) { + B.RegionBegin = RegionInfoArray[ClassId].RegionBeg; + B.RegionEnd = B.RegionBegin + RegionInfoArray[ClassId].AllocatedUser; + B.BlockSize = SizeClassMap::getSizeByClassId(ClassId); + B.BlockBegin = + B.RegionBegin + uptr(sptr(Ptr - B.RegionBegin) / sptr(B.BlockSize) * + sptr(B.BlockSize)); + while (B.BlockBegin < B.RegionBegin) + B.BlockBegin += B.BlockSize; + while (B.RegionEnd < B.BlockBegin + B.BlockSize) + B.BlockBegin -= B.BlockSize; + } + return B; + } + private: static const uptr RegionSize = 1UL << RegionSizeLog; static const uptr NumClasses = SizeClassMap::NumClasses; static const uptr PrimarySize = RegionSize * NumClasses; // Call map for user memory with at least this size. - static const uptr MapSizeIncrement = 1UL << 17; + static const uptr MapSizeIncrement = 1UL << 18; // Fill at most this number of batches from the newly map'd memory. - static const u32 MaxNumBatches = 8U; + static const u32 MaxNumBatches = SCUDO_ANDROID ? 4U : 8U; struct RegionStats { uptr PoppedBlocks; @@ -211,7 +283,7 @@ private: u64 LastReleaseAtNs; }; - struct ALIGNED(SCUDO_CACHE_LINE_SIZE) RegionInfo { + struct UnpaddedRegionInfo { HybridMutex Mutex; SinglyLinkedList<TransferBatch> FreeList; RegionStats Stats; @@ -224,14 +296,19 @@ private: MapPlatformData Data; ReleaseToOsInfo ReleaseInfo; }; + struct RegionInfo : UnpaddedRegionInfo { + char Padding[SCUDO_CACHE_LINE_SIZE - + (sizeof(UnpaddedRegionInfo) % SCUDO_CACHE_LINE_SIZE)]; + }; static_assert(sizeof(RegionInfo) % SCUDO_CACHE_LINE_SIZE == 0, ""); uptr PrimaryBase; - RegionInfo *RegionInfoArray; MapPlatformData Data; - s32 ReleaseToOsIntervalMs; + atomic_s32 ReleaseToOsIntervalMs; + bool UseMemoryTagging; + alignas(SCUDO_CACHE_LINE_SIZE) RegionInfo RegionInfoArray[NumClasses]; - RegionInfo *getRegionInfo(uptr ClassId) const { + RegionInfo *getRegionInfo(uptr ClassId) { DCHECK_LT(ClassId, NumClasses); return &RegionInfoArray[ClassId]; } @@ -294,7 +371,9 @@ private: Region->Data = Data; if (UNLIKELY(!map(reinterpret_cast<void *>(RegionBeg + MappedUser), UserMapSize, "scudo:primary", - MAP_ALLOWNOMEM | MAP_RESIZABLE, &Region->Data))) + MAP_ALLOWNOMEM | MAP_RESIZABLE | + (useMemoryTagging() ? MAP_MEMTAG : 0), + &Region->Data))) return nullptr; Region->MappedUser += UserMapSize; C->getStats().add(StatMapped, UserMapSize); @@ -337,13 +416,11 @@ private: C->getStats().add(StatFree, AllocatedUser); Region->AllocatedUser += AllocatedUser; Region->Exhausted = false; - if (Region->CanRelease) - Region->ReleaseInfo.LastReleaseAtNs = getMonotonicTime(); return B; } - void getStats(ScopedString *Str, uptr ClassId, uptr Rss) const { + void getStats(ScopedString *Str, uptr ClassId, uptr Rss) { RegionInfo *Region = getRegionInfo(ClassId); if (Region->MappedUser == 0) return; @@ -360,6 +437,10 @@ private: getRegionBaseByClassId(ClassId)); } + s32 getReleaseToOsIntervalMs() { + return atomic_load(&ReleaseToOsIntervalMs, memory_order_relaxed); + } + NOINLINE uptr releaseToOSMaybe(RegionInfo *Region, uptr ClassId, bool Force = false) { const uptr BlockSize = getSizeByClassId(ClassId); @@ -371,19 +452,18 @@ private: (Region->Stats.PoppedBlocks - Region->Stats.PushedBlocks) * BlockSize; if (BytesInFreeList < PageSize) return 0; // No chance to release anything. - if ((Region->Stats.PushedBlocks - - Region->ReleaseInfo.PushedBlocksAtLastRelease) * - BlockSize < - PageSize) { + const uptr BytesPushed = (Region->Stats.PushedBlocks - + Region->ReleaseInfo.PushedBlocksAtLastRelease) * + BlockSize; + if (BytesPushed < PageSize) return 0; // Nothing new to release. - } if (!Force) { - const s32 IntervalMs = ReleaseToOsIntervalMs; + const s32 IntervalMs = getReleaseToOsIntervalMs(); if (IntervalMs < 0) return 0; if (Region->ReleaseInfo.LastReleaseAtNs + - static_cast<uptr>(IntervalMs) * 1000000ULL > + static_cast<u64>(IntervalMs) * 1000000 > getMonotonicTime()) { return 0; // Memory was returned recently. } @@ -391,8 +471,7 @@ private: ReleaseRecorder Recorder(Region->RegionBeg, &Region->Data); releaseFreeMemoryToOS(Region->FreeList, Region->RegionBeg, - roundUpTo(Region->AllocatedUser, PageSize) / PageSize, - BlockSize, &Recorder); + Region->AllocatedUser, BlockSize, &Recorder); if (Recorder.getReleasedRangesCount() > 0) { Region->ReleaseInfo.PushedBlocksAtLastRelease = diff --git a/compiler-rt/lib/scudo/standalone/quarantine.h b/compiler-rt/lib/scudo/standalone/quarantine.h index 406a0e23804d..27aa4bfec91a 100644 --- a/compiler-rt/lib/scudo/standalone/quarantine.h +++ b/compiler-rt/lib/scudo/standalone/quarantine.h @@ -187,7 +187,12 @@ public: Cache.initLinkerInitialized(); } void init(uptr Size, uptr CacheSize) { - memset(this, 0, sizeof(*this)); + CacheMutex.init(); + Cache.init(); + RecycleMutex.init(); + MinSize = {}; + MaxSize = {}; + MaxCacheSize = {}; initLinkerInitialized(Size, CacheSize); } diff --git a/compiler-rt/lib/scudo/standalone/release.cpp b/compiler-rt/lib/scudo/standalone/release.cpp new file mode 100644 index 000000000000..e144b354b258 --- /dev/null +++ b/compiler-rt/lib/scudo/standalone/release.cpp @@ -0,0 +1,16 @@ +//===-- release.cpp ---------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "release.h" + +namespace scudo { + +HybridMutex PackedCounterArray::Mutex = {}; +uptr PackedCounterArray::StaticBuffer[1024]; + +} // namespace scudo diff --git a/compiler-rt/lib/scudo/standalone/release.h b/compiler-rt/lib/scudo/standalone/release.h index 4b5c56ce7c19..323bf9db6dca 100644 --- a/compiler-rt/lib/scudo/standalone/release.h +++ b/compiler-rt/lib/scudo/standalone/release.h @@ -11,6 +11,7 @@ #include "common.h" #include "list.h" +#include "mutex.h" namespace scudo { @@ -39,11 +40,13 @@ private: }; // A packed array of Counters. Each counter occupies 2^N bits, enough to store -// counter's MaxValue. Ctor will try to allocate the required Buffer via map() -// and the caller is expected to check whether the initialization was successful -// by checking isAllocated() result. For the performance sake, none of the -// accessors check the validity of the arguments, It is assumed that Index is -// always in [0, N) range and the value is not incremented past MaxValue. +// counter's MaxValue. Ctor will try to use a static buffer first, and if that +// fails (the buffer is too small or already locked), will allocate the +// required Buffer via map(). The caller is expected to check whether the +// initialization was successful by checking isAllocated() result. For +// performance sake, none of the accessors check the validity of the arguments, +// It is assumed that Index is always in [0, N) range and the value is not +// incremented past MaxValue. class PackedCounterArray { public: PackedCounterArray(uptr NumCounters, uptr MaxValue) : N(NumCounters) { @@ -66,11 +69,20 @@ public: BufferSize = (roundUpTo(N, static_cast<uptr>(1U) << PackingRatioLog) >> PackingRatioLog) * sizeof(*Buffer); - Buffer = reinterpret_cast<uptr *>( - map(nullptr, BufferSize, "scudo:counters", MAP_ALLOWNOMEM)); + if (BufferSize <= StaticBufferSize && Mutex.tryLock()) { + Buffer = &StaticBuffer[0]; + memset(Buffer, 0, BufferSize); + } else { + Buffer = reinterpret_cast<uptr *>( + map(nullptr, BufferSize, "scudo:counters", MAP_ALLOWNOMEM)); + } } ~PackedCounterArray() { - if (isAllocated()) + if (!isAllocated()) + return; + if (Buffer == &StaticBuffer[0]) + Mutex.unlock(); + else unmap(reinterpret_cast<void *>(Buffer), BufferSize); } @@ -95,7 +107,8 @@ public: void incRange(uptr From, uptr To) const { DCHECK_LE(From, To); - for (uptr I = From; I <= To; I++) + const uptr Top = Min(To + 1, N); + for (uptr I = From; I < Top; I++) inc(I); } @@ -110,6 +123,10 @@ private: uptr BufferSize; uptr *Buffer; + + static HybridMutex Mutex; + static const uptr StaticBufferSize = 1024U; + static uptr StaticBuffer[StaticBufferSize]; }; template <class ReleaseRecorderT> class FreePagesRangeTracker { @@ -150,8 +167,7 @@ private: template <class TransferBatchT, class ReleaseRecorderT> NOINLINE void releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, uptr Base, - uptr AllocatedPagesCount, uptr BlockSize, - ReleaseRecorderT *Recorder) { + uptr Size, uptr BlockSize, ReleaseRecorderT *Recorder) { const uptr PageSize = getPageSizeCached(); // Figure out the number of chunks per page and whether we can take a fast @@ -188,34 +204,51 @@ releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, uptr Base, } } - PackedCounterArray Counters(AllocatedPagesCount, FullPagesBlockCountMax); + const uptr PagesCount = roundUpTo(Size, PageSize) / PageSize; + PackedCounterArray Counters(PagesCount, FullPagesBlockCountMax); if (!Counters.isAllocated()) return; const uptr PageSizeLog = getLog2(PageSize); - const uptr End = Base + AllocatedPagesCount * PageSize; + const uptr RoundedSize = PagesCount << PageSizeLog; // Iterate over free chunks and count how many free chunks affect each // allocated page. if (BlockSize <= PageSize && PageSize % BlockSize == 0) { // Each chunk affects one page only. for (const auto &It : FreeList) { - for (u32 I = 0; I < It.getCount(); I++) { - const uptr P = reinterpret_cast<uptr>(It.get(I)); - if (P >= Base && P < End) - Counters.inc((P - Base) >> PageSizeLog); + // If dealing with a TransferBatch, the first pointer of the batch will + // point to the batch itself, we do not want to mark this for release as + // the batch is in use, so skip the first entry. + const bool IsTransferBatch = + (It.getCount() != 0) && + (reinterpret_cast<uptr>(It.get(0)) == reinterpret_cast<uptr>(&It)); + for (u32 I = IsTransferBatch ? 1 : 0; I < It.getCount(); I++) { + const uptr P = reinterpret_cast<uptr>(It.get(I)) - Base; + // This takes care of P < Base and P >= Base + RoundedSize. + if (P < RoundedSize) + Counters.inc(P >> PageSizeLog); } } + for (uptr P = Size; P < RoundedSize; P += BlockSize) + Counters.inc(P >> PageSizeLog); } else { // In all other cases chunks might affect more than one page. for (const auto &It : FreeList) { - for (u32 I = 0; I < It.getCount(); I++) { - const uptr P = reinterpret_cast<uptr>(It.get(I)); - if (P >= Base && P < End) - Counters.incRange((P - Base) >> PageSizeLog, - (P - Base + BlockSize - 1) >> PageSizeLog); + // See TransferBatch comment above. + const bool IsTransferBatch = + (It.getCount() != 0) && + (reinterpret_cast<uptr>(It.get(0)) == reinterpret_cast<uptr>(&It)); + for (u32 I = IsTransferBatch ? 1 : 0; I < It.getCount(); I++) { + const uptr P = reinterpret_cast<uptr>(It.get(I)) - Base; + // This takes care of P < Base and P >= Base + RoundedSize. + if (P < RoundedSize) + Counters.incRange(P >> PageSizeLog, + (P + BlockSize - 1) >> PageSizeLog); } } + for (uptr P = Size; P < RoundedSize; P += BlockSize) + Counters.incRange(P >> PageSizeLog, (P + BlockSize - 1) >> PageSizeLog); } // Iterate over pages detecting ranges of pages with chunk Counters equal diff --git a/compiler-rt/lib/scudo/standalone/secondary.h b/compiler-rt/lib/scudo/standalone/secondary.h index ab68e5a1d38d..84eaa5091b43 100644 --- a/compiler-rt/lib/scudo/standalone/secondary.h +++ b/compiler-rt/lib/scudo/standalone/secondary.h @@ -48,24 +48,195 @@ static Header *getHeader(const void *Ptr) { } // namespace LargeBlock -template <uptr MaxFreeListSize = 32U> class MapAllocator { +class MapAllocatorNoCache { public: - // Ensure the freelist is disabled on Fuchsia, since it doesn't support - // releasing Secondary blocks yet. - static_assert(!SCUDO_FUCHSIA || MaxFreeListSize == 0U, ""); + void initLinkerInitialized(UNUSED s32 ReleaseToOsInterval) {} + void init(UNUSED s32 ReleaseToOsInterval) {} + bool retrieve(UNUSED uptr Size, UNUSED LargeBlock::Header **H) { + return false; + } + bool store(UNUSED LargeBlock::Header *H) { return false; } + static bool canCache(UNUSED uptr Size) { return false; } + void disable() {} + void enable() {} + void releaseToOS() {} + void setReleaseToOsIntervalMs(UNUSED s32 Interval) {} +}; + +template <uptr MaxEntriesCount = 32U, uptr MaxEntrySize = 1UL << 19, + s32 MinReleaseToOsIntervalMs = INT32_MIN, + s32 MaxReleaseToOsIntervalMs = INT32_MAX> +class MapAllocatorCache { +public: + // Fuchsia doesn't allow releasing Secondary blocks yet. Note that 0 length + // arrays are an extension for some compilers. + // FIXME(kostyak): support (partially) the cache on Fuchsia. + static_assert(!SCUDO_FUCHSIA || MaxEntriesCount == 0U, ""); + + void initLinkerInitialized(s32 ReleaseToOsInterval) { + setReleaseToOsIntervalMs(ReleaseToOsInterval); + } + void init(s32 ReleaseToOsInterval) { + memset(this, 0, sizeof(*this)); + initLinkerInitialized(ReleaseToOsInterval); + } + + bool store(LargeBlock::Header *H) { + bool EntryCached = false; + bool EmptyCache = false; + const u64 Time = getMonotonicTime(); + { + ScopedLock L(Mutex); + if (EntriesCount == MaxEntriesCount) { + if (IsFullEvents++ == 4U) + EmptyCache = true; + } else { + for (uptr I = 0; I < MaxEntriesCount; I++) { + if (Entries[I].Block) + continue; + if (I != 0) + Entries[I] = Entries[0]; + Entries[0].Block = reinterpret_cast<uptr>(H); + Entries[0].BlockEnd = H->BlockEnd; + Entries[0].MapBase = H->MapBase; + Entries[0].MapSize = H->MapSize; + Entries[0].Data = H->Data; + Entries[0].Time = Time; + EntriesCount++; + EntryCached = true; + break; + } + } + } + s32 Interval; + if (EmptyCache) + empty(); + else if ((Interval = getReleaseToOsIntervalMs()) >= 0) + releaseOlderThan(Time - static_cast<u64>(Interval) * 1000000); + return EntryCached; + } + + bool retrieve(uptr Size, LargeBlock::Header **H) { + const uptr PageSize = getPageSizeCached(); + ScopedLock L(Mutex); + if (EntriesCount == 0) + return false; + for (uptr I = 0; I < MaxEntriesCount; I++) { + if (!Entries[I].Block) + continue; + const uptr BlockSize = Entries[I].BlockEnd - Entries[I].Block; + if (Size > BlockSize) + continue; + if (Size < BlockSize - PageSize * 4U) + continue; + *H = reinterpret_cast<LargeBlock::Header *>(Entries[I].Block); + Entries[I].Block = 0; + (*H)->BlockEnd = Entries[I].BlockEnd; + (*H)->MapBase = Entries[I].MapBase; + (*H)->MapSize = Entries[I].MapSize; + (*H)->Data = Entries[I].Data; + EntriesCount--; + return true; + } + return false; + } + + static bool canCache(uptr Size) { + return MaxEntriesCount != 0U && Size <= MaxEntrySize; + } + + void setReleaseToOsIntervalMs(s32 Interval) { + if (Interval >= MaxReleaseToOsIntervalMs) { + Interval = MaxReleaseToOsIntervalMs; + } else if (Interval <= MinReleaseToOsIntervalMs) { + Interval = MinReleaseToOsIntervalMs; + } + atomic_store(&ReleaseToOsIntervalMs, Interval, memory_order_relaxed); + } + + void releaseToOS() { releaseOlderThan(UINT64_MAX); } + + void disable() { Mutex.lock(); } + + void enable() { Mutex.unlock(); } + +private: + void empty() { + struct { + void *MapBase; + uptr MapSize; + MapPlatformData Data; + } MapInfo[MaxEntriesCount]; + uptr N = 0; + { + ScopedLock L(Mutex); + for (uptr I = 0; I < MaxEntriesCount; I++) { + if (!Entries[I].Block) + continue; + MapInfo[N].MapBase = reinterpret_cast<void *>(Entries[I].MapBase); + MapInfo[N].MapSize = Entries[I].MapSize; + MapInfo[N].Data = Entries[I].Data; + Entries[I].Block = 0; + N++; + } + EntriesCount = 0; + IsFullEvents = 0; + } + for (uptr I = 0; I < N; I++) + unmap(MapInfo[I].MapBase, MapInfo[I].MapSize, UNMAP_ALL, + &MapInfo[I].Data); + } + + void releaseOlderThan(u64 Time) { + ScopedLock L(Mutex); + if (!EntriesCount) + return; + for (uptr I = 0; I < MaxEntriesCount; I++) { + if (!Entries[I].Block || !Entries[I].Time || Entries[I].Time > Time) + continue; + releasePagesToOS(Entries[I].Block, 0, + Entries[I].BlockEnd - Entries[I].Block, + &Entries[I].Data); + Entries[I].Time = 0; + } + } + + s32 getReleaseToOsIntervalMs() { + return atomic_load(&ReleaseToOsIntervalMs, memory_order_relaxed); + } - void initLinkerInitialized(GlobalStats *S) { + struct CachedBlock { + uptr Block; + uptr BlockEnd; + uptr MapBase; + uptr MapSize; + MapPlatformData Data; + u64 Time; + }; + + HybridMutex Mutex; + CachedBlock Entries[MaxEntriesCount]; + u32 EntriesCount; + uptr LargestSize; + u32 IsFullEvents; + atomic_s32 ReleaseToOsIntervalMs; +}; + +template <class CacheT> class MapAllocator { +public: + void initLinkerInitialized(GlobalStats *S, s32 ReleaseToOsInterval = -1) { + Cache.initLinkerInitialized(ReleaseToOsInterval); Stats.initLinkerInitialized(); if (LIKELY(S)) S->link(&Stats); } - void init(GlobalStats *S) { + void init(GlobalStats *S, s32 ReleaseToOsInterval = -1) { memset(this, 0, sizeof(*this)); - initLinkerInitialized(S); + initLinkerInitialized(S, ReleaseToOsInterval); } void *allocate(uptr Size, uptr AlignmentHint = 0, uptr *BlockEnd = nullptr, - bool ZeroContents = false); + FillContentsMode FillContents = NoFill); void deallocate(void *Ptr); @@ -79,22 +250,34 @@ public: void getStats(ScopedString *Str) const; - void disable() { Mutex.lock(); } + void disable() { + Mutex.lock(); + Cache.disable(); + } - void enable() { Mutex.unlock(); } + void enable() { + Cache.enable(); + Mutex.unlock(); + } template <typename F> void iterateOverBlocks(F Callback) const { for (const auto &H : InUseBlocks) Callback(reinterpret_cast<uptr>(&H) + LargeBlock::getHeaderSize()); } - static uptr getMaxFreeListSize(void) { return MaxFreeListSize; } + static uptr canCache(uptr Size) { return CacheT::canCache(Size); } + + void setReleaseToOsIntervalMs(s32 Interval) { + Cache.setReleaseToOsIntervalMs(Interval); + } + + void releaseToOS() { Cache.releaseToOS(); } private: + CacheT Cache; + HybridMutex Mutex; DoublyLinkedList<LargeBlock::Header> InUseBlocks; - // The free list is sorted based on the committed size of blocks. - DoublyLinkedList<LargeBlock::Header> FreeBlocks; uptr AllocatedBytes; uptr FreedBytes; uptr LargestSize; @@ -114,35 +297,34 @@ private: // For allocations requested with an alignment greater than or equal to a page, // the committed memory will amount to something close to Size - AlignmentHint // (pending rounding and headers). -template <uptr MaxFreeListSize> -void *MapAllocator<MaxFreeListSize>::allocate(uptr Size, uptr AlignmentHint, - uptr *BlockEnd, - bool ZeroContents) { +template <class CacheT> +void *MapAllocator<CacheT>::allocate(uptr Size, uptr AlignmentHint, + uptr *BlockEnd, + FillContentsMode FillContents) { DCHECK_GE(Size, AlignmentHint); const uptr PageSize = getPageSizeCached(); const uptr RoundedSize = roundUpTo(Size + LargeBlock::getHeaderSize(), PageSize); - if (MaxFreeListSize && AlignmentHint < PageSize) { - ScopedLock L(Mutex); - for (auto &H : FreeBlocks) { - const uptr FreeBlockSize = H.BlockEnd - reinterpret_cast<uptr>(&H); - if (FreeBlockSize < RoundedSize) - continue; - // Candidate free block should only be at most 4 pages larger. - if (FreeBlockSize > RoundedSize + 4 * PageSize) - break; - FreeBlocks.remove(&H); - InUseBlocks.push_back(&H); - AllocatedBytes += FreeBlockSize; - NumberOfAllocs++; - Stats.add(StatAllocated, FreeBlockSize); + if (AlignmentHint < PageSize && CacheT::canCache(RoundedSize)) { + LargeBlock::Header *H; + if (Cache.retrieve(RoundedSize, &H)) { if (BlockEnd) - *BlockEnd = H.BlockEnd; - void *Ptr = reinterpret_cast<void *>(reinterpret_cast<uptr>(&H) + + *BlockEnd = H->BlockEnd; + void *Ptr = reinterpret_cast<void *>(reinterpret_cast<uptr>(H) + LargeBlock::getHeaderSize()); - if (ZeroContents) - memset(Ptr, 0, H.BlockEnd - reinterpret_cast<uptr>(Ptr)); + if (FillContents) + memset(Ptr, FillContents == ZeroFill ? 0 : PatternFillByte, + H->BlockEnd - reinterpret_cast<uptr>(Ptr)); + const uptr BlockSize = H->BlockEnd - reinterpret_cast<uptr>(H); + { + ScopedLock L(Mutex); + InUseBlocks.push_back(H); + AllocatedBytes += BlockSize; + NumberOfAllocs++; + Stats.add(StatAllocated, BlockSize); + Stats.add(StatMapped, H->MapSize); + } return Ptr; } } @@ -191,6 +373,8 @@ void *MapAllocator<MaxFreeListSize>::allocate(uptr Size, uptr AlignmentHint, H->MapSize = MapEnd - MapBase; H->BlockEnd = CommitBase + CommitSize; H->Data = Data; + if (BlockEnd) + *BlockEnd = CommitBase + CommitSize; { ScopedLock L(Mutex); InUseBlocks.push_back(H); @@ -201,52 +385,31 @@ void *MapAllocator<MaxFreeListSize>::allocate(uptr Size, uptr AlignmentHint, Stats.add(StatAllocated, CommitSize); Stats.add(StatMapped, H->MapSize); } - if (BlockEnd) - *BlockEnd = CommitBase + CommitSize; return reinterpret_cast<void *>(Ptr + LargeBlock::getHeaderSize()); } -template <uptr MaxFreeListSize> -void MapAllocator<MaxFreeListSize>::deallocate(void *Ptr) { +template <class CacheT> void MapAllocator<CacheT>::deallocate(void *Ptr) { LargeBlock::Header *H = LargeBlock::getHeader(Ptr); const uptr Block = reinterpret_cast<uptr>(H); + const uptr CommitSize = H->BlockEnd - Block; { ScopedLock L(Mutex); InUseBlocks.remove(H); - const uptr CommitSize = H->BlockEnd - Block; FreedBytes += CommitSize; NumberOfFrees++; Stats.sub(StatAllocated, CommitSize); - if (MaxFreeListSize && FreeBlocks.size() < MaxFreeListSize) { - bool Inserted = false; - for (auto &F : FreeBlocks) { - const uptr FreeBlockSize = F.BlockEnd - reinterpret_cast<uptr>(&F); - if (FreeBlockSize >= CommitSize) { - FreeBlocks.insert(H, &F); - Inserted = true; - break; - } - } - if (!Inserted) - FreeBlocks.push_back(H); - const uptr RoundedAllocationStart = - roundUpTo(Block + LargeBlock::getHeaderSize(), getPageSizeCached()); - MapPlatformData Data = H->Data; - // TODO(kostyak): use release_to_os_interval_ms - releasePagesToOS(Block, RoundedAllocationStart - Block, - H->BlockEnd - RoundedAllocationStart, &Data); - return; - } Stats.sub(StatMapped, H->MapSize); } + if (CacheT::canCache(CommitSize) && Cache.store(H)) + return; void *Addr = reinterpret_cast<void *>(H->MapBase); const uptr Size = H->MapSize; MapPlatformData Data = H->Data; unmap(Addr, Size, UNMAP_ALL, &Data); } -template <uptr MaxFreeListSize> -void MapAllocator<MaxFreeListSize>::getStats(ScopedString *Str) const { +template <class CacheT> +void MapAllocator<CacheT>::getStats(ScopedString *Str) const { Str->append( "Stats: MapAllocator: allocated %zu times (%zuK), freed %zu times " "(%zuK), remains %zu (%zuK) max %zuM\n", diff --git a/compiler-rt/lib/scudo/standalone/size_class_map.h b/compiler-rt/lib/scudo/standalone/size_class_map.h index 947526e8aea1..5ed8e2845b38 100644 --- a/compiler-rt/lib/scudo/standalone/size_class_map.h +++ b/compiler-rt/lib/scudo/standalone/size_class_map.h @@ -9,11 +9,32 @@ #ifndef SCUDO_SIZE_CLASS_MAP_H_ #define SCUDO_SIZE_CLASS_MAP_H_ +#include "chunk.h" #include "common.h" #include "string_utils.h" namespace scudo { +inline uptr scaledLog2(uptr Size, uptr ZeroLog, uptr LogBits) { + const uptr L = getMostSignificantSetBitIndex(Size); + const uptr LBits = (Size >> (L - LogBits)) - (1 << LogBits); + const uptr HBits = (L - ZeroLog) << LogBits; + return LBits + HBits; +} + +template <typename Config> struct SizeClassMapBase { + static u32 getMaxCachedHint(uptr Size) { + DCHECK_NE(Size, 0); + u32 N; + // Force a 32-bit division if the template parameters allow for it. + if (Config::MaxBytesCachedLog > 31 || Config::MaxSizeLog > 31) + N = static_cast<u32>((1UL << Config::MaxBytesCachedLog) / Size); + else + N = (1U << Config::MaxBytesCachedLog) / static_cast<u32>(Size); + return Max(1U, Min(Config::MaxNumCachedHint, N)); + } +}; + // SizeClassMap maps allocation sizes into size classes and back, in an // efficient table-free manner. // @@ -33,22 +54,24 @@ namespace scudo { // of chunks that can be cached per-thread: // - MaxNumCachedHint is a hint for the max number of chunks cached per class. // - 2^MaxBytesCachedLog is the max number of bytes cached per class. +template <typename Config> +class FixedSizeClassMap : public SizeClassMapBase<Config> { + typedef SizeClassMapBase<Config> Base; -template <u8 NumBits, u8 MinSizeLog, u8 MidSizeLog, u8 MaxSizeLog, - u32 MaxNumCachedHintT, u8 MaxBytesCachedLog> -class SizeClassMap { - static const uptr MinSize = 1UL << MinSizeLog; - static const uptr MidSize = 1UL << MidSizeLog; + static const uptr MinSize = 1UL << Config::MinSizeLog; + static const uptr MidSize = 1UL << Config::MidSizeLog; static const uptr MidClass = MidSize / MinSize; - static const u8 S = NumBits - 1; + static const u8 S = Config::NumBits - 1; static const uptr M = (1UL << S) - 1; + static const uptr SizeDelta = Chunk::getHeaderSize(); + public: - static const u32 MaxNumCachedHint = MaxNumCachedHintT; + static const u32 MaxNumCachedHint = Config::MaxNumCachedHint; - static const uptr MaxSize = 1UL << MaxSizeLog; + static const uptr MaxSize = (1UL << Config::MaxSizeLog) + SizeDelta; static const uptr NumClasses = - MidClass + ((MaxSizeLog - MidSizeLog) << S) + 1; + MidClass + ((Config::MaxSizeLog - Config::MidSizeLog) << S) + 1; static_assert(NumClasses <= 256, ""); static const uptr LargestClassId = NumClasses - 1; static const uptr BatchClassId = 0; @@ -56,97 +79,213 @@ public: static uptr getSizeByClassId(uptr ClassId) { DCHECK_NE(ClassId, BatchClassId); if (ClassId <= MidClass) - return ClassId << MinSizeLog; + return (ClassId << Config::MinSizeLog) + SizeDelta; ClassId -= MidClass; const uptr T = MidSize << (ClassId >> S); - return T + (T >> S) * (ClassId & M); + return T + (T >> S) * (ClassId & M) + SizeDelta; } static uptr getClassIdBySize(uptr Size) { + if (Size <= SizeDelta + (1 << Config::MinSizeLog)) + return 1; + Size -= SizeDelta; DCHECK_LE(Size, MaxSize); if (Size <= MidSize) - return (Size + MinSize - 1) >> MinSizeLog; - const uptr L = getMostSignificantSetBitIndex(Size); - const uptr HBits = (Size >> (L - S)) & M; - const uptr LBits = Size & ((1UL << (L - S)) - 1); - const uptr L1 = L - MidSizeLog; - return MidClass + (L1 << S) + HBits + (LBits > 0); + return (Size + MinSize - 1) >> Config::MinSizeLog; + return MidClass + 1 + scaledLog2(Size - 1, Config::MidSizeLog, S); } static u32 getMaxCachedHint(uptr Size) { DCHECK_LE(Size, MaxSize); - DCHECK_NE(Size, 0); - u32 N; - // Force a 32-bit division if the template parameters allow for it. - if (MaxBytesCachedLog > 31 || MaxSizeLog > 31) - N = static_cast<u32>((1UL << MaxBytesCachedLog) / Size); - else - N = (1U << MaxBytesCachedLog) / static_cast<u32>(Size); - return Max(1U, Min(MaxNumCachedHint, N)); + return Base::getMaxCachedHint(Size); } +}; + +template <typename Config> +class TableSizeClassMap : public SizeClassMapBase<Config> { + typedef SizeClassMapBase<Config> Base; + + static const u8 S = Config::NumBits - 1; + static const uptr M = (1UL << S) - 1; + static const uptr ClassesSize = + sizeof(Config::Classes) / sizeof(Config::Classes[0]); - static void print() { - ScopedString Buffer(1024); - uptr PrevS = 0; - uptr TotalCached = 0; - for (uptr I = 0; I < NumClasses; I++) { - if (I == BatchClassId) - continue; - const uptr S = getSizeByClassId(I); - if (S >= MidSize / 2 && (S & (S - 1)) == 0) - Buffer.append("\n"); - const uptr D = S - PrevS; - const uptr P = PrevS ? (D * 100 / PrevS) : 0; - const uptr L = S ? getMostSignificantSetBitIndex(S) : 0; - const uptr Cached = getMaxCachedHint(S) * S; - Buffer.append( - "C%02zu => S: %zu diff: +%zu %02zu%% L %zu Cached: %zu %zu; id %zu\n", - I, getSizeByClassId(I), D, P, L, getMaxCachedHint(S), Cached, - getClassIdBySize(S)); - TotalCached += Cached; - PrevS = S; + struct SizeTable { + constexpr SizeTable() { + uptr Pos = 1 << Config::MidSizeLog; + uptr Inc = 1 << (Config::MidSizeLog - S); + for (uptr i = 0; i != getTableSize(); ++i) { + Pos += Inc; + if ((Pos & (Pos - 1)) == 0) + Inc *= 2; + Tab[i] = computeClassId(Pos + Config::SizeDelta); + } } - Buffer.append("Total Cached: %zu\n", TotalCached); - Buffer.output(); - } - static void validate() { - for (uptr C = 0; C < NumClasses; C++) { - if (C == BatchClassId) - continue; - const uptr S = getSizeByClassId(C); - CHECK_NE(S, 0U); - CHECK_EQ(getClassIdBySize(S), C); - if (C < LargestClassId) - CHECK_EQ(getClassIdBySize(S + 1), C + 1); - CHECK_EQ(getClassIdBySize(S - 1), C); - if (C - 1 != BatchClassId) - CHECK_GT(getSizeByClassId(C), getSizeByClassId(C - 1)); + constexpr static u8 computeClassId(uptr Size) { + for (uptr i = 0; i != ClassesSize; ++i) { + if (Size <= Config::Classes[i]) + return static_cast<u8>(i + 1); + } + return static_cast<u8>(-1); } - // Do not perform the loop if the maximum size is too large. - if (MaxSizeLog > 19) - return; - for (uptr S = 1; S <= MaxSize; S++) { - const uptr C = getClassIdBySize(S); - CHECK_LT(C, NumClasses); - CHECK_GE(getSizeByClassId(C), S); - if (C - 1 != BatchClassId) - CHECK_LT(getSizeByClassId(C - 1), S); + + constexpr static uptr getTableSize() { + return (Config::MaxSizeLog - Config::MidSizeLog) << S; } + + u8 Tab[getTableSize()] = {}; + }; + + static constexpr SizeTable Table = {}; + +public: + static const u32 MaxNumCachedHint = Config::MaxNumCachedHint; + + static const uptr NumClasses = ClassesSize + 1; + static_assert(NumClasses < 256, ""); + static const uptr LargestClassId = NumClasses - 1; + static const uptr BatchClassId = 0; + static const uptr MaxSize = Config::Classes[LargestClassId - 1]; + + static uptr getSizeByClassId(uptr ClassId) { + return Config::Classes[ClassId - 1]; } + + static uptr getClassIdBySize(uptr Size) { + if (Size <= Config::Classes[0]) + return 1; + Size -= Config::SizeDelta; + DCHECK_LE(Size, MaxSize); + if (Size <= (1 << Config::MidSizeLog)) + return ((Size - 1) >> Config::MinSizeLog) + 1; + return Table.Tab[scaledLog2(Size - 1, Config::MidSizeLog, S)]; + } + + static u32 getMaxCachedHint(uptr Size) { + DCHECK_LE(Size, MaxSize); + return Base::getMaxCachedHint(Size); + } +}; + +struct AndroidSizeClassConfig { +#if SCUDO_WORDSIZE == 64U + static const uptr NumBits = 7; + static const uptr MinSizeLog = 4; + static const uptr MidSizeLog = 6; + static const uptr MaxSizeLog = 16; + static const u32 MaxNumCachedHint = 14; + static const uptr MaxBytesCachedLog = 13; + + static constexpr u32 Classes[] = { + 0x00020, 0x00030, 0x00040, 0x00050, 0x00060, 0x00070, 0x00090, 0x000b0, + 0x000c0, 0x000e0, 0x00120, 0x00160, 0x001c0, 0x00250, 0x00320, 0x00450, + 0x00670, 0x00830, 0x00a10, 0x00c30, 0x01010, 0x01210, 0x01bd0, 0x02210, + 0x02d90, 0x03790, 0x04010, 0x04810, 0x05a10, 0x07310, 0x08210, 0x10010, + }; + static const uptr SizeDelta = 16; +#else + static const uptr NumBits = 8; + static const uptr MinSizeLog = 4; + static const uptr MidSizeLog = 7; + static const uptr MaxSizeLog = 16; + static const u32 MaxNumCachedHint = 14; + static const uptr MaxBytesCachedLog = 13; + + static constexpr u32 Classes[] = { + 0x00020, 0x00030, 0x00040, 0x00050, 0x00060, 0x00070, 0x00080, 0x00090, + 0x000a0, 0x000b0, 0x000c0, 0x000e0, 0x000f0, 0x00110, 0x00120, 0x00130, + 0x00150, 0x00160, 0x00170, 0x00190, 0x001d0, 0x00210, 0x00240, 0x002a0, + 0x00330, 0x00370, 0x003a0, 0x00400, 0x00430, 0x004a0, 0x00530, 0x00610, + 0x00730, 0x00840, 0x00910, 0x009c0, 0x00a60, 0x00b10, 0x00ca0, 0x00e00, + 0x00fb0, 0x01030, 0x01130, 0x011f0, 0x01490, 0x01650, 0x01930, 0x02010, + 0x02190, 0x02490, 0x02850, 0x02d50, 0x03010, 0x03210, 0x03c90, 0x04090, + 0x04510, 0x04810, 0x05c10, 0x06f10, 0x07310, 0x08010, 0x0c010, 0x10010, + }; + static const uptr SizeDelta = 16; +#endif +}; + +typedef TableSizeClassMap<AndroidSizeClassConfig> AndroidSizeClassMap; + +struct DefaultSizeClassConfig { + static const uptr NumBits = 3; + static const uptr MinSizeLog = 5; + static const uptr MidSizeLog = 8; + static const uptr MaxSizeLog = 17; + static const u32 MaxNumCachedHint = 8; + static const uptr MaxBytesCachedLog = 10; }; -typedef SizeClassMap<3, 5, 8, 17, 8, 10> DefaultSizeClassMap; +typedef FixedSizeClassMap<DefaultSizeClassConfig> DefaultSizeClassMap; -// TODO(kostyak): further tune class maps for Android & Fuchsia. +struct SvelteSizeClassConfig { #if SCUDO_WORDSIZE == 64U -typedef SizeClassMap<4, 4, 8, 14, 4, 10> SvelteSizeClassMap; -typedef SizeClassMap<3, 5, 8, 17, 14, 14> AndroidSizeClassMap; + static const uptr NumBits = 4; + static const uptr MinSizeLog = 4; + static const uptr MidSizeLog = 8; + static const uptr MaxSizeLog = 14; + static const u32 MaxNumCachedHint = 4; + static const uptr MaxBytesCachedLog = 10; #else -typedef SizeClassMap<4, 3, 7, 14, 5, 10> SvelteSizeClassMap; -typedef SizeClassMap<3, 5, 8, 17, 14, 14> AndroidSizeClassMap; + static const uptr NumBits = 4; + static const uptr MinSizeLog = 3; + static const uptr MidSizeLog = 7; + static const uptr MaxSizeLog = 14; + static const u32 MaxNumCachedHint = 5; + static const uptr MaxBytesCachedLog = 10; #endif +}; + +typedef FixedSizeClassMap<SvelteSizeClassConfig> SvelteSizeClassMap; + +template <typename SCMap> inline void printMap() { + ScopedString Buffer(1024); + uptr PrevS = 0; + uptr TotalCached = 0; + for (uptr I = 0; I < SCMap::NumClasses; I++) { + if (I == SCMap::BatchClassId) + continue; + const uptr S = SCMap::getSizeByClassId(I); + const uptr D = S - PrevS; + const uptr P = PrevS ? (D * 100 / PrevS) : 0; + const uptr L = S ? getMostSignificantSetBitIndex(S) : 0; + const uptr Cached = SCMap::getMaxCachedHint(S) * S; + Buffer.append( + "C%02zu => S: %zu diff: +%zu %02zu%% L %zu Cached: %zu %zu; id %zu\n", + I, S, D, P, L, SCMap::getMaxCachedHint(S), Cached, + SCMap::getClassIdBySize(S)); + TotalCached += Cached; + PrevS = S; + } + Buffer.append("Total Cached: %zu\n", TotalCached); + Buffer.output(); +} +template <typename SCMap> static void validateMap() { + for (uptr C = 0; C < SCMap::NumClasses; C++) { + if (C == SCMap::BatchClassId) + continue; + const uptr S = SCMap::getSizeByClassId(C); + CHECK_NE(S, 0U); + CHECK_EQ(SCMap::getClassIdBySize(S), C); + if (C < SCMap::LargestClassId) + CHECK_EQ(SCMap::getClassIdBySize(S + 1), C + 1); + CHECK_EQ(SCMap::getClassIdBySize(S - 1), C); + if (C - 1 != SCMap::BatchClassId) + CHECK_GT(SCMap::getSizeByClassId(C), SCMap::getSizeByClassId(C - 1)); + } + // Do not perform the loop if the maximum size is too large. + if (SCMap::MaxSize > (1 << 19)) + return; + for (uptr S = 1; S <= SCMap::MaxSize; S++) { + const uptr C = SCMap::getClassIdBySize(S); + CHECK_LT(C, SCMap::NumClasses); + CHECK_GE(SCMap::getSizeByClassId(C), S); + if (C - 1 != SCMap::BatchClassId) + CHECK_LT(SCMap::getSizeByClassId(C - 1), S); + } +} } // namespace scudo #endif // SCUDO_SIZE_CLASS_MAP_H_ diff --git a/compiler-rt/lib/scudo/standalone/stack_depot.h b/compiler-rt/lib/scudo/standalone/stack_depot.h new file mode 100644 index 000000000000..f2f4d9597795 --- /dev/null +++ b/compiler-rt/lib/scudo/standalone/stack_depot.h @@ -0,0 +1,144 @@ +//===-- stack_depot.h -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_STACK_DEPOT_H_ +#define SCUDO_STACK_DEPOT_H_ + +#include "atomic_helpers.h" +#include "mutex.h" + +namespace scudo { + +class MurMur2HashBuilder { + static const u32 M = 0x5bd1e995; + static const u32 Seed = 0x9747b28c; + static const u32 R = 24; + u32 H; + + public: + explicit MurMur2HashBuilder(u32 Init = 0) { H = Seed ^ Init; } + void add(u32 K) { + K *= M; + K ^= K >> R; + K *= M; + H *= M; + H ^= K; + } + u32 get() { + u32 X = H; + X ^= X >> 13; + X *= M; + X ^= X >> 15; + return X; + } +}; + +class StackDepot { + HybridMutex RingEndMu; + u32 RingEnd; + + // This data structure stores a stack trace for each allocation and + // deallocation when stack trace recording is enabled, that may be looked up + // using a hash of the stack trace. The lower bits of the hash are an index + // into the Tab array, which stores an index into the Ring array where the + // stack traces are stored. As the name implies, Ring is a ring buffer, so a + // stack trace may wrap around to the start of the array. + // + // Each stack trace in Ring is prefixed by a stack trace marker consisting of + // a fixed 1 bit in bit 0 (this allows disambiguation between stack frames + // and stack trace markers in the case where instruction pointers are 4-byte + // aligned, as they are on arm64), the stack trace hash in bits 1-32, and the + // size of the stack trace in bits 33-63. + // + // The insert() function is potentially racy in its accesses to the Tab and + // Ring arrays, but find() is resilient to races in the sense that, barring + // hash collisions, it will either return the correct stack trace or no stack + // trace at all, even if two instances of insert() raced with one another. + // This is achieved by re-checking the hash of the stack trace before + // returning the trace. + +#ifdef SCUDO_FUZZ + // Use smaller table sizes for fuzzing in order to reduce input size. + static const uptr TabBits = 4; +#else + static const uptr TabBits = 16; +#endif + static const uptr TabSize = 1 << TabBits; + static const uptr TabMask = TabSize - 1; + atomic_u32 Tab[TabSize]; + +#ifdef SCUDO_FUZZ + static const uptr RingBits = 4; +#else + static const uptr RingBits = 19; +#endif + static const uptr RingSize = 1 << RingBits; + static const uptr RingMask = RingSize - 1; + atomic_u64 Ring[RingSize]; + +public: + // Insert hash of the stack trace [Begin, End) into the stack depot, and + // return the hash. + u32 insert(uptr *Begin, uptr *End) { + MurMur2HashBuilder B; + for (uptr *I = Begin; I != End; ++I) + B.add(u32(*I) >> 2); + u32 Hash = B.get(); + + u32 Pos = Hash & TabMask; + u32 RingPos = atomic_load_relaxed(&Tab[Pos]); + u64 Entry = atomic_load_relaxed(&Ring[RingPos]); + u64 Id = (u64(End - Begin) << 33) | (u64(Hash) << 1) | 1; + if (Entry == Id) + return Hash; + + ScopedLock Lock(RingEndMu); + RingPos = RingEnd; + atomic_store_relaxed(&Tab[Pos], RingPos); + atomic_store_relaxed(&Ring[RingPos], Id); + for (uptr *I = Begin; I != End; ++I) { + RingPos = (RingPos + 1) & RingMask; + atomic_store_relaxed(&Ring[RingPos], *I); + } + RingEnd = (RingPos + 1) & RingMask; + return Hash; + } + + // Look up a stack trace by hash. Returns true if successful. The trace may be + // accessed via operator[] passing indexes between *RingPosPtr and + // *RingPosPtr + *SizePtr. + bool find(u32 Hash, uptr *RingPosPtr, uptr *SizePtr) const { + u32 Pos = Hash & TabMask; + u32 RingPos = atomic_load_relaxed(&Tab[Pos]); + if (RingPos >= RingSize) + return false; + u64 Entry = atomic_load_relaxed(&Ring[RingPos]); + u64 HashWithTagBit = (u64(Hash) << 1) | 1; + if ((Entry & 0x1ffffffff) != HashWithTagBit) + return false; + u32 Size = u32(Entry >> 33); + if (Size >= RingSize) + return false; + *RingPosPtr = (RingPos + 1) & RingMask; + *SizePtr = Size; + MurMur2HashBuilder B; + for (uptr I = 0; I != Size; ++I) { + RingPos = (RingPos + 1) & RingMask; + B.add(u32(atomic_load_relaxed(&Ring[RingPos])) >> 2); + } + return B.get() == Hash; + } + + u64 operator[](uptr RingPos) const { + return atomic_load_relaxed(&Ring[RingPos & RingMask]); + } +}; + +} // namespace scudo + +#endif // SCUDO_STACK_DEPOT_H_ diff --git a/compiler-rt/lib/scudo/standalone/stats.h b/compiler-rt/lib/scudo/standalone/stats.h index 38481e98e48d..d76b904949ea 100644 --- a/compiler-rt/lib/scudo/standalone/stats.h +++ b/compiler-rt/lib/scudo/standalone/stats.h @@ -58,7 +58,9 @@ class GlobalStats : public LocalStats { public: void initLinkerInitialized() {} void init() { - memset(this, 0, sizeof(*this)); + LocalStats::init(); + Mutex.init(); + StatsList = {}; initLinkerInitialized(); } diff --git a/compiler-rt/lib/scudo/standalone/tsd.h b/compiler-rt/lib/scudo/standalone/tsd.h index 20f0d69cabfd..b3701c63f8a9 100644 --- a/compiler-rt/lib/scudo/standalone/tsd.h +++ b/compiler-rt/lib/scudo/standalone/tsd.h @@ -23,7 +23,7 @@ namespace scudo { -template <class Allocator> struct ALIGNED(SCUDO_CACHE_LINE_SIZE) TSD { +template <class Allocator> struct alignas(SCUDO_CACHE_LINE_SIZE) TSD { typename Allocator::CacheT Cache; typename Allocator::QuarantineCacheT QuarantineCache; u8 DestructorIterations; diff --git a/compiler-rt/lib/scudo/standalone/tsd_exclusive.h b/compiler-rt/lib/scudo/standalone/tsd_exclusive.h index 69479ea7bdf4..3492509b5a8e 100644 --- a/compiler-rt/lib/scudo/standalone/tsd_exclusive.h +++ b/compiler-rt/lib/scudo/standalone/tsd_exclusive.h @@ -25,9 +25,7 @@ template <class Allocator> struct TSDRegistryExT { void initLinkerInitialized(Allocator *Instance) { Instance->initLinkerInitialized(); CHECK_EQ(pthread_key_create(&PThreadKey, teardownThread<Allocator>), 0); - FallbackTSD = reinterpret_cast<TSD<Allocator> *>( - map(nullptr, sizeof(TSD<Allocator>), "scudo:tsd")); - FallbackTSD->initLinkerInitialized(Instance); + FallbackTSD.initLinkerInitialized(Instance); Initialized = true; } void init(Allocator *Instance) { @@ -35,9 +33,7 @@ template <class Allocator> struct TSDRegistryExT { initLinkerInitialized(Instance); } - void unmapTestOnly() { - unmap(reinterpret_cast<void *>(FallbackTSD), sizeof(TSD<Allocator>)); - } + void unmapTestOnly() {} ALWAYS_INLINE void initThreadMaybe(Allocator *Instance, bool MinimalInit) { if (LIKELY(State != ThreadState::NotInitialized)) @@ -51,23 +47,22 @@ template <class Allocator> struct TSDRegistryExT { *UnlockRequired = false; return &ThreadTSD; } - DCHECK(FallbackTSD); - FallbackTSD->lock(); + FallbackTSD.lock(); *UnlockRequired = true; - return FallbackTSD; + return &FallbackTSD; } // To disable the exclusive TSD registry, we effectively lock the fallback TSD // and force all threads to attempt to use it instead of their local one. void disable() { Mutex.lock(); - FallbackTSD->lock(); + FallbackTSD.lock(); atomic_store(&Disabled, 1U, memory_order_release); } void enable() { atomic_store(&Disabled, 0U, memory_order_release); - FallbackTSD->unlock(); + FallbackTSD.unlock(); Mutex.unlock(); } @@ -96,7 +91,7 @@ private: pthread_key_t PThreadKey; bool Initialized; atomic_u8 Disabled; - TSD<Allocator> *FallbackTSD; + TSD<Allocator> FallbackTSD; HybridMutex Mutex; static THREADLOCAL ThreadState State; static THREADLOCAL TSD<Allocator> ThreadTSD; diff --git a/compiler-rt/lib/scudo/standalone/tsd_shared.h b/compiler-rt/lib/scudo/standalone/tsd_shared.h index 5ab8269519a9..038a5905ff48 100644 --- a/compiler-rt/lib/scudo/standalone/tsd_shared.h +++ b/compiler-rt/lib/scudo/standalone/tsd_shared.h @@ -18,9 +18,10 @@ template <class Allocator, u32 MaxTSDCount> struct TSDRegistrySharedT { void initLinkerInitialized(Allocator *Instance) { Instance->initLinkerInitialized(); CHECK_EQ(pthread_key_create(&PThreadKey, nullptr), 0); // For non-TLS - NumberOfTSDs = Min(Max(1U, getNumberOfCPUs()), MaxTSDCount); - TSDs = reinterpret_cast<TSD<Allocator> *>( - map(nullptr, sizeof(TSD<Allocator>) * NumberOfTSDs, "scudo:tsd")); + const u32 NumberOfCPUs = getNumberOfCPUs(); + NumberOfTSDs = (SCUDO_ANDROID || NumberOfCPUs == 0) + ? MaxTSDCount + : Min(NumberOfCPUs, MaxTSDCount); for (u32 I = 0; I < NumberOfTSDs; I++) TSDs[I].initLinkerInitialized(Instance); // Compute all the coprimes of NumberOfTSDs. This will be used to walk the @@ -46,8 +47,6 @@ template <class Allocator, u32 MaxTSDCount> struct TSDRegistrySharedT { } void unmapTestOnly() { - unmap(reinterpret_cast<void *>(TSDs), - sizeof(TSD<Allocator>) * NumberOfTSDs); setCurrentTSD(nullptr); pthread_key_delete(PThreadKey); } @@ -77,7 +76,7 @@ template <class Allocator, u32 MaxTSDCount> struct TSDRegistrySharedT { } void enable() { - for (s32 I = NumberOfTSDs - 1; I >= 0; I--) + for (s32 I = static_cast<s32>(NumberOfTSDs - 1); I >= 0; I--) TSDs[I].unlock(); Mutex.unlock(); } @@ -160,11 +159,11 @@ private: pthread_key_t PThreadKey; atomic_u32 CurrentIndex; u32 NumberOfTSDs; - TSD<Allocator> *TSDs; u32 NumberOfCoPrimes; u32 CoPrimes[MaxTSDCount]; bool Initialized; HybridMutex Mutex; + TSD<Allocator> TSDs[MaxTSDCount]; #if SCUDO_LINUX && !_BIONIC static THREADLOCAL TSD<Allocator> *ThreadTSD; #endif diff --git a/compiler-rt/lib/scudo/standalone/wrappers_c.cpp b/compiler-rt/lib/scudo/standalone/wrappers_c.cpp index 93a666c4d61e..098cc089a1ca 100644 --- a/compiler-rt/lib/scudo/standalone/wrappers_c.cpp +++ b/compiler-rt/lib/scudo/standalone/wrappers_c.cpp @@ -22,13 +22,11 @@ #define SCUDO_ALLOCATOR Allocator extern "C" void SCUDO_PREFIX(malloc_postinit)(); -static scudo::Allocator<scudo::Config, SCUDO_PREFIX(malloc_postinit)> - SCUDO_ALLOCATOR; -// Pointer to the static allocator so that the C++ wrappers can access it. + +// Export the static allocator so that the C++ wrappers can access it. // Technically we could have a completely separated heap for C & C++ but in // reality the amount of cross pollination between the two is staggering. -scudo::Allocator<scudo::Config, SCUDO_PREFIX(malloc_postinit)> * - CONCATENATE(SCUDO_ALLOCATOR, Ptr) = &SCUDO_ALLOCATOR; +scudo::Allocator<scudo::Config, SCUDO_PREFIX(malloc_postinit)> SCUDO_ALLOCATOR; #include "wrappers_c.inc" diff --git a/compiler-rt/lib/scudo/standalone/wrappers_c.inc b/compiler-rt/lib/scudo/standalone/wrappers_c.inc index 2fd709eaa1f6..4396dfc50d1d 100644 --- a/compiler-rt/lib/scudo/standalone/wrappers_c.inc +++ b/compiler-rt/lib/scudo/standalone/wrappers_c.inc @@ -150,13 +150,25 @@ INTERFACE WEAK void SCUDO_PREFIX(malloc_disable)() { } void SCUDO_PREFIX(malloc_postinit)() { + SCUDO_ALLOCATOR.initGwpAsan(); pthread_atfork(SCUDO_PREFIX(malloc_disable), SCUDO_PREFIX(malloc_enable), SCUDO_PREFIX(malloc_enable)); } INTERFACE WEAK int SCUDO_PREFIX(mallopt)(int param, UNUSED int value) { if (param == M_DECAY_TIME) { - // TODO(kostyak): set release_to_os_interval_ms accordingly. + if (SCUDO_ANDROID) { + if (value == 0) { + // Will set the release values to their minimum values. + value = INT32_MIN; + } else { + // Will set the release values to their maximum values. + value = INT32_MAX; + } + } + + SCUDO_ALLOCATOR.setOption(scudo::Option::ReleaseInterval, + static_cast<scudo::sptr>(value)); return 1; } else if (param == M_PURGE) { SCUDO_ALLOCATOR.releaseToOS(); @@ -179,9 +191,56 @@ INTERFACE WEAK void *SCUDO_PREFIX(aligned_alloc)(size_t alignment, } INTERFACE WEAK int SCUDO_PREFIX(malloc_info)(UNUSED int options, FILE *stream) { - fputs("<malloc version=\"scudo-1\">", stream); - fputs("</malloc>", stream); + const scudo::uptr max_size = + decltype(SCUDO_ALLOCATOR)::PrimaryT::SizeClassMap::MaxSize; + auto *sizes = static_cast<scudo::uptr *>( + SCUDO_PREFIX(calloc)(max_size, sizeof(scudo::uptr))); + auto callback = [](uintptr_t, size_t size, void *arg) { + auto *sizes = reinterpret_cast<scudo::uptr *>(arg); + if (size < max_size) + sizes[size]++; + }; + SCUDO_ALLOCATOR.iterateOverChunks(0, -1ul, callback, sizes); + + fputs("<malloc version=\"scudo-1\">\n", stream); + for (scudo::uptr i = 0; i != max_size; ++i) + if (sizes[i]) + fprintf(stream, "<alloc size=\"%lu\" count=\"%lu\"/>\n", i, sizes[i]); + fputs("</malloc>\n", stream); + SCUDO_PREFIX(free)(sizes); return 0; } +// Disable memory tagging for the heap. The caller must disable memory tag +// checks globally (e.g. by clearing TCF0 on aarch64) before calling this +// function, and may not re-enable them after calling the function. The program +// must be single threaded at the point when the function is called. +INTERFACE WEAK void SCUDO_PREFIX(malloc_disable_memory_tagging)() { + SCUDO_ALLOCATOR.disableMemoryTagging(); +} + +// Sets whether scudo records stack traces and other metadata for allocations +// and deallocations. This function only has an effect if the allocator and +// hardware support memory tagging. The program must be single threaded at the +// point when the function is called. +INTERFACE WEAK void +SCUDO_PREFIX(malloc_set_track_allocation_stacks)(int track) { + SCUDO_ALLOCATOR.setTrackAllocationStacks(track); +} + +// Sets whether scudo zero-initializes all allocated memory. The program must +// be single threaded at the point when the function is called. +INTERFACE WEAK void SCUDO_PREFIX(malloc_set_zero_contents)(int zero_contents) { + SCUDO_ALLOCATOR.setFillContents(zero_contents ? scudo::ZeroFill + : scudo::NoFill); +} + +// Sets whether scudo pattern-initializes all allocated memory. The program must +// be single threaded at the point when the function is called. +INTERFACE WEAK void +SCUDO_PREFIX(malloc_set_pattern_fill_contents)(int pattern_fill_contents) { + SCUDO_ALLOCATOR.setFillContents( + pattern_fill_contents ? scudo::PatternOrZeroFill : scudo::NoFill); +} + } // extern "C" diff --git a/compiler-rt/lib/scudo/standalone/wrappers_c_bionic.cpp b/compiler-rt/lib/scudo/standalone/wrappers_c_bionic.cpp index f004369d96cb..4298e69b5774 100644 --- a/compiler-rt/lib/scudo/standalone/wrappers_c_bionic.cpp +++ b/compiler-rt/lib/scudo/standalone/wrappers_c_bionic.cpp @@ -25,11 +25,6 @@ extern "C" void SCUDO_PREFIX(malloc_postinit)(); static scudo::Allocator<scudo::AndroidConfig, SCUDO_PREFIX(malloc_postinit)> SCUDO_ALLOCATOR; -// Pointer to the static allocator so that the C++ wrappers can access it. -// Technically we could have a completely separated heap for C & C++ but in -// reality the amount of cross pollination between the two is staggering. -scudo::Allocator<scudo::AndroidConfig, SCUDO_PREFIX(malloc_postinit)> * - CONCATENATE(SCUDO_ALLOCATOR, Ptr) = &SCUDO_ALLOCATOR; #include "wrappers_c.inc" @@ -44,22 +39,37 @@ extern "C" void SCUDO_PREFIX(malloc_postinit)(); static scudo::Allocator<scudo::AndroidSvelteConfig, SCUDO_PREFIX(malloc_postinit)> SCUDO_ALLOCATOR; -// Pointer to the static allocator so that the C++ wrappers can access it. -// Technically we could have a completely separated heap for C & C++ but in -// reality the amount of cross pollination between the two is staggering. -scudo::Allocator<scudo::AndroidSvelteConfig, SCUDO_PREFIX(malloc_postinit)> * - CONCATENATE(SCUDO_ALLOCATOR, Ptr) = &SCUDO_ALLOCATOR; #include "wrappers_c.inc" #undef SCUDO_ALLOCATOR #undef SCUDO_PREFIX -// The following is the only function that will end up initializing both -// allocators, which will result in a slight increase in memory footprint. -INTERFACE void __scudo_print_stats(void) { - Allocator.printStats(); - SvelteAllocator.printStats(); +// TODO(kostyak): support both allocators. +INTERFACE void __scudo_print_stats(void) { Allocator.printStats(); } + +INTERFACE void __scudo_get_error_info( + struct scudo_error_info *error_info, uintptr_t fault_addr, + const char *stack_depot, const char *region_info, const char *memory, + const char *memory_tags, uintptr_t memory_addr, size_t memory_size) { + Allocator.getErrorInfo(error_info, fault_addr, stack_depot, region_info, + memory, memory_tags, memory_addr, memory_size); +} + +INTERFACE const char *__scudo_get_stack_depot_addr() { + return Allocator.getStackDepotAddress(); +} + +INTERFACE size_t __scudo_get_stack_depot_size() { + return sizeof(scudo::StackDepot); +} + +INTERFACE const char *__scudo_get_region_info_addr() { + return Allocator.getRegionInfoArrayAddress(); +} + +INTERFACE size_t __scudo_get_region_info_size() { + return Allocator.getRegionInfoArraySize(); } #endif // SCUDO_ANDROID && _BIONIC diff --git a/compiler-rt/lib/scudo/standalone/wrappers_cpp.cpp b/compiler-rt/lib/scudo/standalone/wrappers_cpp.cpp index 1da5385c7789..adb104118123 100644 --- a/compiler-rt/lib/scudo/standalone/wrappers_cpp.cpp +++ b/compiler-rt/lib/scudo/standalone/wrappers_cpp.cpp @@ -16,7 +16,7 @@ #include <stdint.h> extern "C" void malloc_postinit(); -extern scudo::Allocator<scudo::Config, malloc_postinit> *AllocatorPtr; +extern HIDDEN scudo::Allocator<scudo::Config, malloc_postinit> Allocator; namespace std { struct nothrow_t {}; @@ -24,85 +24,85 @@ enum class align_val_t : size_t {}; } // namespace std INTERFACE WEAK void *operator new(size_t size) { - return AllocatorPtr->allocate(size, scudo::Chunk::Origin::New); + return Allocator.allocate(size, scudo::Chunk::Origin::New); } INTERFACE WEAK void *operator new[](size_t size) { - return AllocatorPtr->allocate(size, scudo::Chunk::Origin::NewArray); + return Allocator.allocate(size, scudo::Chunk::Origin::NewArray); } INTERFACE WEAK void *operator new(size_t size, std::nothrow_t const &) NOEXCEPT { - return AllocatorPtr->allocate(size, scudo::Chunk::Origin::New); + return Allocator.allocate(size, scudo::Chunk::Origin::New); } INTERFACE WEAK void *operator new[](size_t size, std::nothrow_t const &) NOEXCEPT { - return AllocatorPtr->allocate(size, scudo::Chunk::Origin::NewArray); + return Allocator.allocate(size, scudo::Chunk::Origin::NewArray); } INTERFACE WEAK void *operator new(size_t size, std::align_val_t align) { - return AllocatorPtr->allocate(size, scudo::Chunk::Origin::New, - static_cast<scudo::uptr>(align)); + return Allocator.allocate(size, scudo::Chunk::Origin::New, + static_cast<scudo::uptr>(align)); } INTERFACE WEAK void *operator new[](size_t size, std::align_val_t align) { - return AllocatorPtr->allocate(size, scudo::Chunk::Origin::NewArray, - static_cast<scudo::uptr>(align)); + return Allocator.allocate(size, scudo::Chunk::Origin::NewArray, + static_cast<scudo::uptr>(align)); } INTERFACE WEAK void *operator new(size_t size, std::align_val_t align, std::nothrow_t const &) NOEXCEPT { - return AllocatorPtr->allocate(size, scudo::Chunk::Origin::New, - static_cast<scudo::uptr>(align)); + return Allocator.allocate(size, scudo::Chunk::Origin::New, + static_cast<scudo::uptr>(align)); } INTERFACE WEAK void *operator new[](size_t size, std::align_val_t align, std::nothrow_t const &) NOEXCEPT { - return AllocatorPtr->allocate(size, scudo::Chunk::Origin::NewArray, - static_cast<scudo::uptr>(align)); + return Allocator.allocate(size, scudo::Chunk::Origin::NewArray, + static_cast<scudo::uptr>(align)); } INTERFACE WEAK void operator delete(void *ptr)NOEXCEPT { - AllocatorPtr->deallocate(ptr, scudo::Chunk::Origin::New); + Allocator.deallocate(ptr, scudo::Chunk::Origin::New); } INTERFACE WEAK void operator delete[](void *ptr) NOEXCEPT { - AllocatorPtr->deallocate(ptr, scudo::Chunk::Origin::NewArray); + Allocator.deallocate(ptr, scudo::Chunk::Origin::NewArray); } INTERFACE WEAK void operator delete(void *ptr, std::nothrow_t const &)NOEXCEPT { - AllocatorPtr->deallocate(ptr, scudo::Chunk::Origin::New); + Allocator.deallocate(ptr, scudo::Chunk::Origin::New); } INTERFACE WEAK void operator delete[](void *ptr, std::nothrow_t const &) NOEXCEPT { - AllocatorPtr->deallocate(ptr, scudo::Chunk::Origin::NewArray); + Allocator.deallocate(ptr, scudo::Chunk::Origin::NewArray); } INTERFACE WEAK void operator delete(void *ptr, size_t size)NOEXCEPT { - AllocatorPtr->deallocate(ptr, scudo::Chunk::Origin::New, size); + Allocator.deallocate(ptr, scudo::Chunk::Origin::New, size); } INTERFACE WEAK void operator delete[](void *ptr, size_t size) NOEXCEPT { - AllocatorPtr->deallocate(ptr, scudo::Chunk::Origin::NewArray, size); + Allocator.deallocate(ptr, scudo::Chunk::Origin::NewArray, size); } INTERFACE WEAK void operator delete(void *ptr, std::align_val_t align)NOEXCEPT { - AllocatorPtr->deallocate(ptr, scudo::Chunk::Origin::New, 0, - static_cast<scudo::uptr>(align)); + Allocator.deallocate(ptr, scudo::Chunk::Origin::New, 0, + static_cast<scudo::uptr>(align)); } INTERFACE WEAK void operator delete[](void *ptr, std::align_val_t align) NOEXCEPT { - AllocatorPtr->deallocate(ptr, scudo::Chunk::Origin::NewArray, 0, - static_cast<scudo::uptr>(align)); + Allocator.deallocate(ptr, scudo::Chunk::Origin::NewArray, 0, + static_cast<scudo::uptr>(align)); } INTERFACE WEAK void operator delete(void *ptr, std::align_val_t align, std::nothrow_t const &)NOEXCEPT { - AllocatorPtr->deallocate(ptr, scudo::Chunk::Origin::New, 0, - static_cast<scudo::uptr>(align)); + Allocator.deallocate(ptr, scudo::Chunk::Origin::New, 0, + static_cast<scudo::uptr>(align)); } INTERFACE WEAK void operator delete[](void *ptr, std::align_val_t align, std::nothrow_t const &) NOEXCEPT { - AllocatorPtr->deallocate(ptr, scudo::Chunk::Origin::NewArray, 0, - static_cast<scudo::uptr>(align)); + Allocator.deallocate(ptr, scudo::Chunk::Origin::NewArray, 0, + static_cast<scudo::uptr>(align)); } INTERFACE WEAK void operator delete(void *ptr, size_t size, std::align_val_t align)NOEXCEPT { - AllocatorPtr->deallocate(ptr, scudo::Chunk::Origin::New, size, - static_cast<scudo::uptr>(align)); + Allocator.deallocate(ptr, scudo::Chunk::Origin::New, size, + static_cast<scudo::uptr>(align)); } INTERFACE WEAK void operator delete[](void *ptr, size_t size, std::align_val_t align) NOEXCEPT { - AllocatorPtr->deallocate(ptr, scudo::Chunk::Origin::NewArray, size, - static_cast<scudo::uptr>(align)); + Allocator.deallocate(ptr, scudo::Chunk::Origin::NewArray, size, + static_cast<scudo::uptr>(align)); } #endif // !SCUDO_ANDROID || !_BIONIC |