vendor/compiler-rt/compiler-rt-trunk-r321017

author: Dimitry Andric <dim@FreeBSD.org> 2017-12-18 20:11:54 +0000
committer: Dimitry Andric <dim@FreeBSD.org> 2017-12-18 20:11:54 +0000
commit: cdf4f3055e964bb585f294cf77cb549ead82783f (patch)
tree: 7bceeca766b3fbe491245bc926a083f78c35d1de /lib/scudo
parent: 625108084a3ec7c19c7745004c5af0ed7aa417a9 (diff)
24 files changed, 915 insertions, 895 deletions
diff --git a/lib/scudo/CMakeLists.txt b/lib/scudo/CMakeLists.txt
index 14c199fa82270..4d26a3477feb6 100644
--- a/lib/scudo/CMakeLists.txt
+++ b/lib/scudo/CMakeLists.txt
@@ -12,12 +12,14 @@ set(SCUDO_SOURCES
   scudo_flags.cpp
   scudo_crc32.cpp
   scudo_interceptors.cpp
-  scudo_new_delete.cpp
   scudo_termination.cpp
-  scudo_tls_android.cpp
-  scudo_tls_linux.cpp
+  scudo_tsd_exclusive.cpp
+  scudo_tsd_shared.cpp
   scudo_utils.cpp)
 
+set(SCUDO_CXX_SOURCES
+  scudo_new_delete.cpp)
+
 # Enable the SSE 4.2 instruction set for scudo_crc32.cpp, if available.
 if (COMPILER_RT_HAS_MSSE4_2_FLAG)
   set_source_files_properties(scudo_crc32.cpp PROPERTIES COMPILE_FLAGS -msse4.2)
@@ -30,15 +32,41 @@ if (COMPILER_RT_HAS_MCRC_FLAG)
 endif()
 
 if(COMPILER_RT_HAS_SCUDO)
-  foreach(arch ${SCUDO_SUPPORTED_ARCH})
-    add_compiler_rt_runtime(clang_rt.scudo
-      STATIC
-      ARCHS ${arch}
-      SOURCES ${SCUDO_SOURCES}
-              $<TARGET_OBJECTS:RTInterception.${arch}>
-              $<TARGET_OBJECTS:RTSanitizerCommonNoTermination.${arch}>
-              $<TARGET_OBJECTS:RTSanitizerCommonLibc.${arch}>
-      CFLAGS ${SCUDO_CFLAGS}
-      PARENT_TARGET scudo)
-  endforeach()
+  set(SCUDO_DYNAMIC_LIBS ${SANITIZER_COMMON_LINK_LIBS})
+  append_list_if(COMPILER_RT_HAS_LIBDL dl SCUDO_DYNAMIC_LIBS)
+  append_list_if(COMPILER_RT_HAS_LIBRT rt SCUDO_DYNAMIC_LIBS)
+  append_list_if(COMPILER_RT_HAS_LIBPTHREAD pthread SCUDO_DYNAMIC_LIBS)
+  append_list_if(COMPILER_RT_HAS_LIBLOG log SCUDO_DYNAMIC_LIBS)
+
+  add_compiler_rt_runtime(clang_rt.scudo
+    STATIC
+    ARCHS ${SCUDO_SUPPORTED_ARCH}
+    SOURCES ${SCUDO_SOURCES}
+    OBJECT_LIBS RTSanitizerCommonNoTermination
+                RTSanitizerCommonLibc
+                RTInterception
+                RTUbsan
+    CFLAGS ${SCUDO_CFLAGS}
+    PARENT_TARGET scudo)
+
+  add_compiler_rt_runtime(clang_rt.scudo_cxx
+    STATIC
+    ARCHS ${SCUDO_SUPPORTED_ARCH}
+    SOURCES ${SCUDO_CXX_SOURCES}
+    OBJECT_LIBS RTUbsan_cxx
+    CFLAGS ${SCUDO_CFLAGS}
+    PARENT_TARGET scudo)
+
+  add_compiler_rt_runtime(clang_rt.scudo
+    SHARED
+    ARCHS ${SCUDO_SUPPORTED_ARCH}
+    SOURCES ${SCUDO_SOURCES} ${SCUDO_CXX_SOURCES}
+    OBJECT_LIBS RTSanitizerCommonNoTermination
+                RTSanitizerCommonLibc
+                RTInterception
+                RTUbsan
+                RTUbsan_cxx
+    CFLAGS ${SCUDO_CFLAGS}
+    LINK_LIBS ${SCUDO_DYNAMIC_LIBS}
+    PARENT_TARGET scudo)
 endif()
diff --git a/lib/scudo/scudo_allocator.cpp b/lib/scudo/scudo_allocator.cpp
index 6f30ee9875137..e5a4d714c66e4 100644
--- a/lib/scudo/scudo_allocator.cpp
+++ b/lib/scudo/scudo_allocator.cpp
@@ -16,26 +16,27 @@
 
 #include "scudo_allocator.h"
 #include "scudo_crc32.h"
-#include "scudo_tls.h"
+#include "scudo_flags.h"
+#include "scudo_tsd.h"
 #include "scudo_utils.h"
 
 #include "sanitizer_common/sanitizer_allocator_checks.h"
 #include "sanitizer_common/sanitizer_allocator_interface.h"
-#include "sanitizer_common/sanitizer_errno.h"
 #include "sanitizer_common/sanitizer_quarantine.h"
 
+#include <errno.h>
 #include <string.h>
 
 namespace __scudo {
 
 // Global static cookie, initialized at start-up.
-static uptr Cookie;
+static u32 Cookie;
 
 // We default to software CRC32 if the alternatives are not supported, either
 // at compilation or at runtime.
 static atomic_uint8_t HashAlgorithm = { CRC32Software };
 
-INLINE u32 computeCRC32(uptr Crc, uptr Value, uptr *Array, uptr ArraySize) {
+INLINE u32 computeCRC32(u32 Crc, uptr Value, uptr *Array, uptr ArraySize) {
   // If the hardware CRC32 feature is defined here, it was enabled everywhere,
   // as opposed to only for scudo_crc32.cpp. This means that other hardware
   // specific instructions were likely emitted at other places, and as a
@@ -61,46 +62,60 @@ INLINE u32 computeCRC32(uptr Crc, uptr Value, uptr *Array, uptr ArraySize) {
 
 static ScudoBackendAllocator &getBackendAllocator();
 
-struct ScudoChunk : UnpackedHeader {
+namespace Chunk {
   // We can't use the offset member of the chunk itself, as we would double
   // fetch it without any warranty that it wouldn't have been tampered. To
   // prevent this, we work with a local copy of the header.
-  void *getAllocBeg(UnpackedHeader *Header) {
-    return reinterpret_cast<void *>(
-        reinterpret_cast<uptr>(this) - (Header->Offset << MinAlignmentLog));
+  static INLINE void *getBackendPtr(const void *Ptr, UnpackedHeader *Header) {
+    return reinterpret_cast<void *>(reinterpret_cast<uptr>(Ptr) -
+                                    AlignedChunkHeaderSize -
+                                    (Header->Offset << MinAlignmentLog));
+  }
+
+  static INLINE AtomicPackedHeader *getAtomicHeader(void *Ptr) {
+    return reinterpret_cast<AtomicPackedHeader *>(reinterpret_cast<uptr>(Ptr) -
+                                                  AlignedChunkHeaderSize);
+  }
+  static INLINE
+  const AtomicPackedHeader *getConstAtomicHeader(const void *Ptr) {
+    return reinterpret_cast<const AtomicPackedHeader *>(
+        reinterpret_cast<uptr>(Ptr) - AlignedChunkHeaderSize);
+  }
+
+  static INLINE bool isAligned(const void *Ptr) {
+    return IsAligned(reinterpret_cast<uptr>(Ptr), MinAlignment);
   }
 
   // Returns the usable size for a chunk, meaning the amount of bytes from the
   // beginning of the user data to the end of the backend allocated chunk.
-  uptr getUsableSize(UnpackedHeader *Header) {
-    uptr Size =
-        getBackendAllocator().getActuallyAllocatedSize(getAllocBeg(Header),
-                                                       Header->FromPrimary);
+  static INLINE uptr getUsableSize(const void *Ptr, UnpackedHeader *Header) {
+    const uptr Size = getBackendAllocator().getActuallyAllocatedSize(
+        getBackendPtr(Ptr, Header), Header->ClassId);
     if (Size == 0)
       return 0;
     return Size - AlignedChunkHeaderSize - (Header->Offset << MinAlignmentLog);
   }
 
-  // Compute the checksum of the Chunk pointer and its ChunkHeader.
-  u16 computeChecksum(UnpackedHeader *Header) const {
+  // Compute the checksum of the chunk pointer and its header.
+  static INLINE u16 computeChecksum(const void *Ptr, UnpackedHeader *Header) {
     UnpackedHeader ZeroChecksumHeader = *Header;
     ZeroChecksumHeader.Checksum = 0;
     uptr HeaderHolder[sizeof(UnpackedHeader) / sizeof(uptr)];
     memcpy(&HeaderHolder, &ZeroChecksumHeader, sizeof(HeaderHolder));
-    u32 Crc = computeCRC32(Cookie, reinterpret_cast<uptr>(this), HeaderHolder,
-                           ARRAY_SIZE(HeaderHolder));
+    const u32 Crc = computeCRC32(Cookie, reinterpret_cast<uptr>(Ptr),
+                                 HeaderHolder, ARRAY_SIZE(HeaderHolder));
     return static_cast<u16>(Crc);
   }
 
   // Checks the validity of a chunk by verifying its checksum. It doesn't
   // incur termination in the event of an invalid chunk.
-  bool isValid() {
-    UnpackedHeader NewUnpackedHeader;
-    const AtomicPackedHeader *AtomicHeader =
-        reinterpret_cast<const AtomicPackedHeader *>(this);
-    PackedHeader NewPackedHeader = atomic_load_relaxed(AtomicHeader);
-    NewUnpackedHeader = bit_cast<UnpackedHeader>(NewPackedHeader);
-    return (NewUnpackedHeader.Checksum == computeChecksum(&NewUnpackedHeader));
+  static INLINE bool isValid(const void *Ptr) {
+    PackedHeader NewPackedHeader =
+        atomic_load_relaxed(getConstAtomicHeader(Ptr));
+    UnpackedHeader NewUnpackedHeader =
+        bit_cast<UnpackedHeader>(NewPackedHeader);
+    return (NewUnpackedHeader.Checksum ==
+            computeChecksum(Ptr, &NewUnpackedHeader));
   }
 
   // Nulls out a chunk header. When returning the chunk to the backend, there
@@ -109,114 +124,46 @@ struct ScudoChunk : UnpackedHeader {
   // the header invalid. In the extremely rare event where 0 would be a valid
   // checksum for the chunk, the state of the chunk is ChunkAvailable anyway.
   COMPILER_CHECK(ChunkAvailable == 0);
-  void eraseHeader() {
-    PackedHeader NullPackedHeader = 0;
-    AtomicPackedHeader *AtomicHeader =
-        reinterpret_cast<AtomicPackedHeader *>(this);
-    atomic_store_relaxed(AtomicHeader, NullPackedHeader);
+  static INLINE void eraseHeader(void *Ptr) {
+    const PackedHeader NullPackedHeader = 0;
+    atomic_store_relaxed(getAtomicHeader(Ptr), NullPackedHeader);
   }
 
   // Loads and unpacks the header, verifying the checksum in the process.
-  void loadHeader(UnpackedHeader *NewUnpackedHeader) const {
-    const AtomicPackedHeader *AtomicHeader =
-        reinterpret_cast<const AtomicPackedHeader *>(this);
-    PackedHeader NewPackedHeader = atomic_load_relaxed(AtomicHeader);
+  static INLINE
+  void loadHeader(const void *Ptr, UnpackedHeader *NewUnpackedHeader) {
+    PackedHeader NewPackedHeader =
+        atomic_load_relaxed(getConstAtomicHeader(Ptr));
     *NewUnpackedHeader = bit_cast<UnpackedHeader>(NewPackedHeader);
     if (UNLIKELY(NewUnpackedHeader->Checksum !=
-        computeChecksum(NewUnpackedHeader))) {
-      dieWithMessage("ERROR: corrupted chunk header at address %p\n", this);
+        computeChecksum(Ptr, NewUnpackedHeader))) {
+      dieWithMessage("ERROR: corrupted chunk header at address %p\n", Ptr);
     }
   }
 
   // Packs and stores the header, computing the checksum in the process.
-  void storeHeader(UnpackedHeader *NewUnpackedHeader) {
-    NewUnpackedHeader->Checksum = computeChecksum(NewUnpackedHeader);
+  static INLINE void storeHeader(void *Ptr, UnpackedHeader *NewUnpackedHeader) {
+    NewUnpackedHeader->Checksum = computeChecksum(Ptr, NewUnpackedHeader);
     PackedHeader NewPackedHeader = bit_cast<PackedHeader>(*NewUnpackedHeader);
-    AtomicPackedHeader *AtomicHeader =
-        reinterpret_cast<AtomicPackedHeader *>(this);
-    atomic_store_relaxed(AtomicHeader, NewPackedHeader);
+    atomic_store_relaxed(getAtomicHeader(Ptr), NewPackedHeader);
   }
 
   // Packs and stores the header, computing the checksum in the process. We
   // compare the current header with the expected provided one to ensure that
   // we are not being raced by a corruption occurring in another thread.
-  void compareExchangeHeader(UnpackedHeader *NewUnpackedHeader,
-                             UnpackedHeader *OldUnpackedHeader) {
-    NewUnpackedHeader->Checksum = computeChecksum(NewUnpackedHeader);
+  static INLINE void compareExchangeHeader(void *Ptr,
+                                           UnpackedHeader *NewUnpackedHeader,
+                                           UnpackedHeader *OldUnpackedHeader) {
+    NewUnpackedHeader->Checksum = computeChecksum(Ptr, NewUnpackedHeader);
     PackedHeader NewPackedHeader = bit_cast<PackedHeader>(*NewUnpackedHeader);
     PackedHeader OldPackedHeader = bit_cast<PackedHeader>(*OldUnpackedHeader);
-    AtomicPackedHeader *AtomicHeader =
-        reinterpret_cast<AtomicPackedHeader *>(this);
-    if (UNLIKELY(!atomic_compare_exchange_strong(AtomicHeader,
-                                                 &OldPackedHeader,
-                                                 NewPackedHeader,
-                                                 memory_order_relaxed))) {
-      dieWithMessage("ERROR: race on chunk header at address %p\n", this);
+    if (UNLIKELY(!atomic_compare_exchange_strong(
+            getAtomicHeader(Ptr), &OldPackedHeader, NewPackedHeader,
+            memory_order_relaxed))) {
+      dieWithMessage("ERROR: race on chunk header at address %p\n", Ptr);
     }
   }
-};
-
-ScudoChunk *getScudoChunk(uptr UserBeg) {
-  return reinterpret_cast<ScudoChunk *>(UserBeg - AlignedChunkHeaderSize);
-}
-
-struct AllocatorOptions {
-  u32 QuarantineSizeMb;
-  u32 ThreadLocalQuarantineSizeKb;
-  bool MayReturnNull;
-  s32 ReleaseToOSIntervalMs;
-  bool DeallocationTypeMismatch;
-  bool DeleteSizeMismatch;
-  bool ZeroContents;
-
-  void setFrom(const Flags *f, const CommonFlags *cf);
-  void copyTo(Flags *f, CommonFlags *cf) const;
-};
-
-void AllocatorOptions::setFrom(const Flags *f, const CommonFlags *cf) {
-  MayReturnNull = cf->allocator_may_return_null;
-  ReleaseToOSIntervalMs = cf->allocator_release_to_os_interval_ms;
-  QuarantineSizeMb = f->QuarantineSizeMb;
-  ThreadLocalQuarantineSizeKb = f->ThreadLocalQuarantineSizeKb;
-  DeallocationTypeMismatch = f->DeallocationTypeMismatch;
-  DeleteSizeMismatch = f->DeleteSizeMismatch;
-  ZeroContents = f->ZeroContents;
-}
-
-void AllocatorOptions::copyTo(Flags *f, CommonFlags *cf) const {
-  cf->allocator_may_return_null = MayReturnNull;
-  cf->allocator_release_to_os_interval_ms = ReleaseToOSIntervalMs;
-  f->QuarantineSizeMb = QuarantineSizeMb;
-  f->ThreadLocalQuarantineSizeKb = ThreadLocalQuarantineSizeKb;
-  f->DeallocationTypeMismatch = DeallocationTypeMismatch;
-  f->DeleteSizeMismatch = DeleteSizeMismatch;
-  f->ZeroContents = ZeroContents;
-}
-
-static void initScudoInternal(const AllocatorOptions &Options);
-
-static bool ScudoInitIsRunning = false;
-
-void initScudo() {
-  SanitizerToolName = "Scudo";
-  CHECK(!ScudoInitIsRunning && "Scudo init calls itself!");
-  ScudoInitIsRunning = true;
-
-  // Check if hardware CRC32 is supported in the binary and by the platform, if
-  // so, opt for the CRC32 hardware version of the checksum.
-  if (computeHardwareCRC32 && testCPUFeature(CRC32CPUFeature))
-    atomic_store_relaxed(&HashAlgorithm, CRC32Hardware);
-
-  initFlags();
-
-  AllocatorOptions Options;
-  Options.setFrom(getFlags(), common_flags());
-  initScudoInternal(Options);
-
-  // TODO(kostyak): determine if MaybeStartBackgroudThread could be of some use.
-
-  ScudoInitIsRunning = false;
-}
+}  // namespace Chunk
 
 struct QuarantineCallback {
   explicit QuarantineCallback(AllocatorCache *Cache)
@@ -224,52 +171,46 @@ struct QuarantineCallback {
 
   // Chunk recycling function, returns a quarantined chunk to the backend,
   // first making sure it hasn't been tampered with.
-  void Recycle(ScudoChunk *Chunk) {
+  void Recycle(void *Ptr) {
     UnpackedHeader Header;
-    Chunk->loadHeader(&Header);
+    Chunk::loadHeader(Ptr, &Header);
     if (UNLIKELY(Header.State != ChunkQuarantine)) {
       dieWithMessage("ERROR: invalid chunk state when recycling address %p\n",
-                     Chunk);
+                     Ptr);
     }
-    Chunk->eraseHeader();
-    void *Ptr = Chunk->getAllocBeg(&Header);
-    if (Header.FromPrimary)
-      getBackendAllocator().deallocatePrimary(Cache_, Ptr);
+    Chunk::eraseHeader(Ptr);
+    void *BackendPtr = Chunk::getBackendPtr(Ptr, &Header);
+    if (Header.ClassId)
+      getBackendAllocator().deallocatePrimary(Cache_, BackendPtr,
+                                              Header.ClassId);
     else
-      getBackendAllocator().deallocateSecondary(Ptr);
+      getBackendAllocator().deallocateSecondary(BackendPtr);
   }
 
   // Internal quarantine allocation and deallocation functions. We first check
   // that the batches are indeed serviced by the Primary.
   // TODO(kostyak): figure out the best way to protect the batches.
-  COMPILER_CHECK(sizeof(QuarantineBatch) < SizeClassMap::kMaxSize);
   void *Allocate(uptr Size) {
-    return getBackendAllocator().allocatePrimary(Cache_, Size);
+    return getBackendAllocator().allocatePrimary(Cache_, BatchClassId);
   }
 
   void Deallocate(void *Ptr) {
-    getBackendAllocator().deallocatePrimary(Cache_, Ptr);
+    getBackendAllocator().deallocatePrimary(Cache_, Ptr, BatchClassId);
   }
 
   AllocatorCache *Cache_;
+  COMPILER_CHECK(sizeof(QuarantineBatch) < SizeClassMap::kMaxSize);
+  const uptr BatchClassId = SizeClassMap::ClassID(sizeof(QuarantineBatch));
 };
 
-typedef Quarantine<QuarantineCallback, ScudoChunk> ScudoQuarantine;
+typedef Quarantine<QuarantineCallback, void> ScudoQuarantine;
 typedef ScudoQuarantine::Cache ScudoQuarantineCache;
 COMPILER_CHECK(sizeof(ScudoQuarantineCache) <=
-               sizeof(ScudoThreadContext::QuarantineCachePlaceHolder));
+               sizeof(ScudoTSD::QuarantineCachePlaceHolder));
 
-AllocatorCache *getAllocatorCache(ScudoThreadContext *ThreadContext) {
-  return &ThreadContext->Cache;
-}
-
-ScudoQuarantineCache *getQuarantineCache(ScudoThreadContext *ThreadContext) {
-  return reinterpret_cast<
-      ScudoQuarantineCache *>(ThreadContext->QuarantineCachePlaceHolder);
-}
-
-ScudoPrng *getPrng(ScudoThreadContext *ThreadContext) {
-  return &ThreadContext->Prng;
+ScudoQuarantineCache *getQuarantineCache(ScudoTSD *TSD) {
+  return reinterpret_cast<ScudoQuarantineCache *>(
+      TSD->QuarantineCachePlaceHolder);
 }
 
 struct ScudoAllocator {
@@ -281,26 +222,22 @@ struct ScudoAllocator {
   ScudoBackendAllocator BackendAllocator;
   ScudoQuarantine AllocatorQuarantine;
 
-  StaticSpinMutex GlobalPrngMutex;
-  ScudoPrng GlobalPrng;
-
-  // The fallback caches are used when the thread local caches have been
-  // 'detroyed' on thread tear-down. They are protected by a Mutex as they can
-  // be accessed by different threads.
-  StaticSpinMutex FallbackMutex;
-  AllocatorCache FallbackAllocatorCache;
-  ScudoQuarantineCache FallbackQuarantineCache;
-  ScudoPrng FallbackPrng;
+  u32 QuarantineChunksUpToSize;
 
   bool DeallocationTypeMismatch;
   bool ZeroContents;
   bool DeleteSizeMismatch;
 
+  bool CheckRssLimit;
+  uptr HardRssLimitMb;
+  uptr SoftRssLimitMb;
+  atomic_uint8_t RssLimitExceeded;
+  atomic_uint64_t RssLastCheckedAtNS;
+
   explicit ScudoAllocator(LinkerInitialized)
-    : AllocatorQuarantine(LINKER_INITIALIZED),
-      FallbackQuarantineCache(LINKER_INITIALIZED) {}
+    : AllocatorQuarantine(LINKER_INITIALIZED) {}
 
-  void init(const AllocatorOptions &Options) {
+  void performSanityChecks() {
     // Verify that the header offset field can hold the maximum offset. In the
     // case of the Secondary allocator, it takes care of alignment and the
     // offset will always be 0. In the case of the Primary, the worst case
@@ -310,9 +247,9 @@ struct ScudoAllocator {
     // result, the maximum offset will be at most the maximum alignment for the
     // last size class minus the header size, in multiples of MinAlignment.
     UnpackedHeader Header = {};
-    uptr MaxPrimaryAlignment =
+    const uptr MaxPrimaryAlignment =
         1 << MostSignificantSetBitIndex(SizeClassMap::kMaxSize - MinAlignment);
-    uptr MaxOffset =
+    const uptr MaxOffset =
         (MaxPrimaryAlignment - AlignedChunkHeaderSize) >> MinAlignmentLog;
     Header.Offset = MaxOffset;
     if (Header.Offset != MaxOffset) {
@@ -324,36 +261,97 @@ struct ScudoAllocator {
     // case scenario happens in the Primary. It will depend on the second to
     // last and last class sizes, as well as the dynamic base for the Primary.
     // The following is an over-approximation that works for our needs.
-    uptr MaxSizeOrUnusedBytes = SizeClassMap::kMaxSize - 1;
+    const uptr MaxSizeOrUnusedBytes = SizeClassMap::kMaxSize - 1;
     Header.SizeOrUnusedBytes = MaxSizeOrUnusedBytes;
     if (Header.SizeOrUnusedBytes != MaxSizeOrUnusedBytes) {
       dieWithMessage("ERROR: the maximum possible unused bytes doesn't fit in "
                      "the header\n");
     }
 
-    DeallocationTypeMismatch = Options.DeallocationTypeMismatch;
-    DeleteSizeMismatch = Options.DeleteSizeMismatch;
-    ZeroContents = Options.ZeroContents;
-    SetAllocatorMayReturnNull(Options.MayReturnNull);
-    BackendAllocator.init(Options.ReleaseToOSIntervalMs);
+    const uptr LargestClassId = SizeClassMap::kLargestClassID;
+    Header.ClassId = LargestClassId;
+    if (Header.ClassId != LargestClassId) {
+      dieWithMessage("ERROR: the largest class ID doesn't fit in the header\n");
+    }
+  }
+
+  void init() {
+    SanitizerToolName = "Scudo";
+    initFlags();
+
+    performSanityChecks();
+
+    // Check if hardware CRC32 is supported in the binary and by the platform,
+    // if so, opt for the CRC32 hardware version of the checksum.
+    if (&computeHardwareCRC32 && hasHardwareCRC32())
+      atomic_store_relaxed(&HashAlgorithm, CRC32Hardware);
+
+    SetAllocatorMayReturnNull(common_flags()->allocator_may_return_null);
+    BackendAllocator.init(common_flags()->allocator_release_to_os_interval_ms);
+    HardRssLimitMb = common_flags()->hard_rss_limit_mb;
+    SoftRssLimitMb = common_flags()->soft_rss_limit_mb;
     AllocatorQuarantine.Init(
-        static_cast<uptr>(Options.QuarantineSizeMb) << 20,
-        static_cast<uptr>(Options.ThreadLocalQuarantineSizeKb) << 10);
-    GlobalPrng.init();
-    Cookie = GlobalPrng.getU64();
-    BackendAllocator.initCache(&FallbackAllocatorCache);
-    FallbackPrng.init();
+        static_cast<uptr>(getFlags()->QuarantineSizeKb) << 10,
+        static_cast<uptr>(getFlags()->ThreadLocalQuarantineSizeKb) << 10);
+    QuarantineChunksUpToSize = getFlags()->QuarantineChunksUpToSize;
+    DeallocationTypeMismatch = getFlags()->DeallocationTypeMismatch;
+    DeleteSizeMismatch = getFlags()->DeleteSizeMismatch;
+    ZeroContents = getFlags()->ZeroContents;
+
+    if (UNLIKELY(!GetRandom(reinterpret_cast<void *>(&Cookie), sizeof(Cookie),
+                            /*blocking=*/false))) {
+      Cookie = static_cast<u32>((NanoTime() >> 12) ^
+                                (reinterpret_cast<uptr>(this) >> 4));
+    }
+
+    CheckRssLimit = HardRssLimitMb || SoftRssLimitMb;
+    if (CheckRssLimit)
+      atomic_store_relaxed(&RssLastCheckedAtNS, MonotonicNanoTime());
   }
 
   // Helper function that checks for a valid Scudo chunk. nullptr isn't.
-  bool isValidPointer(const void *UserPtr) {
+  bool isValidPointer(const void *Ptr) {
     initThreadMaybe();
-    if (UNLIKELY(!UserPtr))
+    if (UNLIKELY(!Ptr))
       return false;
-    uptr UserBeg = reinterpret_cast<uptr>(UserPtr);
-    if (!IsAligned(UserBeg, MinAlignment))
+    if (!Chunk::isAligned(Ptr))
       return false;
-    return getScudoChunk(UserBeg)->isValid();
+    return Chunk::isValid(Ptr);
+  }
+
+  // Opportunistic RSS limit check. This will update the RSS limit status, if
+  // it can, every 100ms, otherwise it will just return the current one.
+  bool isRssLimitExceeded() {
+    u64 LastCheck = atomic_load_relaxed(&RssLastCheckedAtNS);
+    const u64 CurrentCheck = MonotonicNanoTime();
+    if (LIKELY(CurrentCheck < LastCheck + (100ULL * 1000000ULL)))
+      return atomic_load_relaxed(&RssLimitExceeded);
+    if (!atomic_compare_exchange_weak(&RssLastCheckedAtNS, &LastCheck,
+                                      CurrentCheck, memory_order_relaxed))
+      return atomic_load_relaxed(&RssLimitExceeded);
+    // TODO(kostyak): We currently use sanitizer_common's GetRSS which reads the
+    //                RSS from /proc/self/statm by default. We might want to
+    //                call getrusage directly, even if it's less accurate.
+    const uptr CurrentRssMb = GetRSS() >> 20;
+    if (HardRssLimitMb && HardRssLimitMb < CurrentRssMb) {
+      Report("%s: hard RSS limit exhausted (%zdMb vs %zdMb)\n",
+             SanitizerToolName, HardRssLimitMb, CurrentRssMb);
+      DumpProcessMap();
+      Die();
+    }
+    if (SoftRssLimitMb) {
+      if (atomic_load_relaxed(&RssLimitExceeded)) {
+        if (CurrentRssMb <= SoftRssLimitMb)
+          atomic_store_relaxed(&RssLimitExceeded, false);
+      } else {
+        if (CurrentRssMb > SoftRssLimitMb) {
+          atomic_store_relaxed(&RssLimitExceeded, true);
+          Report("%s: soft RSS limit exhausted (%zdMb vs %zdMb)\n",
+                 SanitizerToolName, SoftRssLimitMb, CurrentRssMb);
+        }
+      }
+    }
+    return atomic_load_relaxed(&RssLimitExceeded);
   }
 
   // Allocates a chunk.
@@ -375,207 +373,185 @@ struct ScudoAllocator {
     if (UNLIKELY(AlignedSize >= MaxAllowedMallocSize))
       return FailureHandler::OnBadRequest();
 
+    if (CheckRssLimit && UNLIKELY(isRssLimitExceeded()))
+      return FailureHandler::OnOOM();
+
     // Primary and Secondary backed allocations have a different treatment. We
     // deal with alignment requirements of Primary serviced allocations here,
     // but the Secondary will take care of its own alignment needs.
-    bool FromPrimary = PrimaryAllocator::CanAllocate(AlignedSize, MinAlignment);
-
-    void *Ptr;
-    u8 Salt;
-    uptr AllocSize;
-    if (FromPrimary) {
-      AllocSize = AlignedSize;
-      ScudoThreadContext *ThreadContext = getThreadContextAndLock();
-      if (LIKELY(ThreadContext)) {
-        Salt = getPrng(ThreadContext)->getU8();
-        Ptr = BackendAllocator.allocatePrimary(getAllocatorCache(ThreadContext),
-                                               AllocSize);
-        ThreadContext->unlock();
-      } else {
-        SpinMutexLock l(&FallbackMutex);
-        Salt = FallbackPrng.getU8();
-        Ptr = BackendAllocator.allocatePrimary(&FallbackAllocatorCache,
-                                               AllocSize);
-      }
+    void *BackendPtr;
+    uptr BackendSize;
+    u8 ClassId;
+    if (PrimaryAllocator::CanAllocate(AlignedSize, MinAlignment)) {
+      BackendSize = AlignedSize;
+      ClassId = SizeClassMap::ClassID(BackendSize);
+      ScudoTSD *TSD = getTSDAndLock();
+      BackendPtr = BackendAllocator.allocatePrimary(&TSD->Cache, ClassId);
+      TSD->unlock();
     } else {
-      {
-        SpinMutexLock l(&GlobalPrngMutex);
-        Salt = GlobalPrng.getU8();
-      }
-      AllocSize = NeededSize;
-      Ptr = BackendAllocator.allocateSecondary(AllocSize, Alignment);
+      BackendSize = NeededSize;
+      ClassId = 0;
+      BackendPtr = BackendAllocator.allocateSecondary(BackendSize, Alignment);
     }
-    if (UNLIKELY(!Ptr))
+    if (UNLIKELY(!BackendPtr))
       return FailureHandler::OnOOM();
 
     // If requested, we will zero out the entire contents of the returned chunk.
-    if ((ForceZeroContents || ZeroContents) && FromPrimary)
-      memset(Ptr, 0, BackendAllocator.getActuallyAllocatedSize(
-          Ptr, /*FromPrimary=*/true));
+    if ((ForceZeroContents || ZeroContents) && ClassId)
+      memset(BackendPtr, 0,
+             BackendAllocator.getActuallyAllocatedSize(BackendPtr, ClassId));
 
     UnpackedHeader Header = {};
-    uptr AllocBeg = reinterpret_cast<uptr>(Ptr);
-    uptr UserBeg = AllocBeg + AlignedChunkHeaderSize;
-    if (UNLIKELY(!IsAligned(UserBeg, Alignment))) {
+    uptr UserPtr = reinterpret_cast<uptr>(BackendPtr) + AlignedChunkHeaderSize;
+    if (UNLIKELY(!IsAligned(UserPtr, Alignment))) {
       // Since the Secondary takes care of alignment, a non-aligned pointer
       // means it is from the Primary. It is also the only case where the offset
       // field of the header would be non-zero.
-      CHECK(FromPrimary);
-      UserBeg = RoundUpTo(UserBeg, Alignment);
-      uptr Offset = UserBeg - AlignedChunkHeaderSize - AllocBeg;
-      Header.Offset = Offset >> MinAlignmentLog;
+      DCHECK(ClassId);
+      const uptr AlignedUserPtr = RoundUpTo(UserPtr, Alignment);
+      Header.Offset = (AlignedUserPtr - UserPtr) >> MinAlignmentLog;
+      UserPtr = AlignedUserPtr;
     }
-    CHECK_LE(UserBeg + Size, AllocBeg + AllocSize);
+    CHECK_LE(UserPtr + Size, reinterpret_cast<uptr>(BackendPtr) + BackendSize);
     Header.State = ChunkAllocated;
     Header.AllocType = Type;
-    if (FromPrimary) {
-      Header.FromPrimary = 1;
+    if (ClassId) {
+      Header.ClassId = ClassId;
       Header.SizeOrUnusedBytes = Size;
     } else {
       // The secondary fits the allocations to a page, so the amount of unused
       // bytes is the difference between the end of the user allocation and the
       // next page boundary.
-      uptr PageSize = GetPageSizeCached();
-      uptr TrailingBytes = (UserBeg + Size) & (PageSize - 1);
+      const uptr PageSize = GetPageSizeCached();
+      const uptr TrailingBytes = (UserPtr + Size) & (PageSize - 1);
       if (TrailingBytes)
         Header.SizeOrUnusedBytes = PageSize - TrailingBytes;
     }
-    Header.Salt = Salt;
-    getScudoChunk(UserBeg)->storeHeader(&Header);
-    void *UserPtr = reinterpret_cast<void *>(UserBeg);
-    // if (&__sanitizer_malloc_hook) __sanitizer_malloc_hook(UserPtr, Size);
-    return UserPtr;
-  }
-
-  // Place a chunk in the quarantine. In the event of a zero-sized quarantine,
-  // we directly deallocate the chunk, otherwise the flow would lead to the
-  // chunk being loaded (and checked) twice, and stored (and checksummed) once,
-  // with no additional security value.
-  void quarantineOrDeallocateChunk(ScudoChunk *Chunk, UnpackedHeader *Header,
+    void *Ptr = reinterpret_cast<void *>(UserPtr);
+    Chunk::storeHeader(Ptr, &Header);
+    // if (&__sanitizer_malloc_hook) __sanitizer_malloc_hook(Ptr, Size);
+    return Ptr;
+  }
+
+  // Place a chunk in the quarantine or directly deallocate it in the event of
+  // a zero-sized quarantine, or if the size of the chunk is greater than the
+  // quarantine chunk size threshold.
+  void quarantineOrDeallocateChunk(void *Ptr, UnpackedHeader *Header,
                                    uptr Size) {
-    bool FromPrimary = Header->FromPrimary;
-    bool BypassQuarantine = (AllocatorQuarantine.GetCacheSize() == 0);
+    const bool BypassQuarantine = (AllocatorQuarantine.GetCacheSize() == 0) ||
+        (Size > QuarantineChunksUpToSize);
     if (BypassQuarantine) {
-      Chunk->eraseHeader();
-      void *Ptr = Chunk->getAllocBeg(Header);
-      if (FromPrimary) {
-        ScudoThreadContext *ThreadContext = getThreadContextAndLock();
-        if (LIKELY(ThreadContext)) {
-          getBackendAllocator().deallocatePrimary(
-              getAllocatorCache(ThreadContext), Ptr);
-          ThreadContext->unlock();
-        } else {
-          SpinMutexLock Lock(&FallbackMutex);
-          getBackendAllocator().deallocatePrimary(&FallbackAllocatorCache, Ptr);
-        }
+      Chunk::eraseHeader(Ptr);
+      void *BackendPtr = Chunk::getBackendPtr(Ptr, Header);
+      if (Header->ClassId) {
+        ScudoTSD *TSD = getTSDAndLock();
+        getBackendAllocator().deallocatePrimary(&TSD->Cache, BackendPtr,
+                                                Header->ClassId);
+        TSD->unlock();
       } else {
-        getBackendAllocator().deallocateSecondary(Ptr);
+        getBackendAllocator().deallocateSecondary(BackendPtr);
       }
     } else {
+      // If a small memory amount was allocated with a larger alignment, we want
+      // to take that into account. Otherwise the Quarantine would be filled
+      // with tiny chunks, taking a lot of VA memory. This is an approximation
+      // of the usable size, that allows us to not call
+      // GetActuallyAllocatedSize.
+      uptr EstimatedSize = Size + (Header->Offset << MinAlignmentLog);
       UnpackedHeader NewHeader = *Header;
       NewHeader.State = ChunkQuarantine;
-      Chunk->compareExchangeHeader(&NewHeader, Header);
-      ScudoThreadContext *ThreadContext = getThreadContextAndLock();
-      if (LIKELY(ThreadContext)) {
-        AllocatorQuarantine.Put(getQuarantineCache(ThreadContext),
-                                QuarantineCallback(
-                                    getAllocatorCache(ThreadContext)),
-                                Chunk, Size);
-        ThreadContext->unlock();
-      } else {
-        SpinMutexLock l(&FallbackMutex);
-        AllocatorQuarantine.Put(&FallbackQuarantineCache,
-                                QuarantineCallback(&FallbackAllocatorCache),
-                                Chunk, Size);
-      }
+      Chunk::compareExchangeHeader(Ptr, &NewHeader, Header);
+      ScudoTSD *TSD = getTSDAndLock();
+      AllocatorQuarantine.Put(getQuarantineCache(TSD),
+                              QuarantineCallback(&TSD->Cache), Ptr,
+                              EstimatedSize);
+      TSD->unlock();
     }
   }
 
-  // Deallocates a Chunk, which means adding it to the delayed free list (or
-  // Quarantine).
-  void deallocate(void *UserPtr, uptr DeleteSize, AllocType Type) {
-    initThreadMaybe();
-    // if (&__sanitizer_free_hook) __sanitizer_free_hook(UserPtr);
-    if (UNLIKELY(!UserPtr))
+  // Deallocates a Chunk, which means either adding it to the quarantine or
+  // directly returning it to the backend if criteria are met.
+  void deallocate(void *Ptr, uptr DeleteSize, AllocType Type) {
+    // For a deallocation, we only ensure minimal initialization, meaning thread
+    // local data will be left uninitialized for now (when using ELF TLS). The
+    // fallback cache will be used instead. This is a workaround for a situation
+    // where the only heap operation performed in a thread would be a free past
+    // the TLS destructors, ending up in initialized thread specific data never
+    // being destroyed properly. Any other heap operation will do a full init.
+    initThreadMaybe(/*MinimalInit=*/true);
+    // if (&__sanitizer_free_hook) __sanitizer_free_hook(Ptr);
+    if (UNLIKELY(!Ptr))
       return;
-    uptr UserBeg = reinterpret_cast<uptr>(UserPtr);
-    if (UNLIKELY(!IsAligned(UserBeg, MinAlignment))) {
+    if (UNLIKELY(!Chunk::isAligned(Ptr))) {
       dieWithMessage("ERROR: attempted to deallocate a chunk not properly "
-                     "aligned at address %p\n", UserPtr);
+                     "aligned at address %p\n", Ptr);
     }
-    ScudoChunk *Chunk = getScudoChunk(UserBeg);
-    UnpackedHeader OldHeader;
-    Chunk->loadHeader(&OldHeader);
-    if (UNLIKELY(OldHeader.State != ChunkAllocated)) {
+    UnpackedHeader Header;
+    Chunk::loadHeader(Ptr, &Header);
+    if (UNLIKELY(Header.State != ChunkAllocated)) {
       dieWithMessage("ERROR: invalid chunk state when deallocating address "
-                     "%p\n", UserPtr);
+                     "%p\n", Ptr);
     }
     if (DeallocationTypeMismatch) {
       // The deallocation type has to match the allocation one.
-      if (OldHeader.AllocType != Type) {
+      if (Header.AllocType != Type) {
         // With the exception of memalign'd Chunks, that can be still be free'd.
-        if (OldHeader.AllocType != FromMemalign || Type != FromMalloc) {
-          dieWithMessage("ERROR: allocation type mismatch on address %p\n",
-                         UserPtr);
+        if (Header.AllocType != FromMemalign || Type != FromMalloc) {
+          dieWithMessage("ERROR: allocation type mismatch when deallocating "
+                         "address %p\n", Ptr);
         }
       }
     }
-    uptr Size = OldHeader.FromPrimary ? OldHeader.SizeOrUnusedBytes :
-        Chunk->getUsableSize(&OldHeader) - OldHeader.SizeOrUnusedBytes;
+    uptr Size = Header.ClassId ? Header.SizeOrUnusedBytes :
+        Chunk::getUsableSize(Ptr, &Header) - Header.SizeOrUnusedBytes;
     if (DeleteSizeMismatch) {
       if (DeleteSize && DeleteSize != Size) {
         dieWithMessage("ERROR: invalid sized delete on chunk at address %p\n",
-                       UserPtr);
+                       Ptr);
       }
     }
-
-    // If a small memory amount was allocated with a larger alignment, we want
-    // to take that into account. Otherwise the Quarantine would be filled with
-    // tiny chunks, taking a lot of VA memory. This is an approximation of the
-    // usable size, that allows us to not call GetActuallyAllocatedSize.
-    uptr LiableSize = Size + (OldHeader.Offset << MinAlignment);
-    quarantineOrDeallocateChunk(Chunk, &OldHeader, LiableSize);
+    quarantineOrDeallocateChunk(Ptr, &Header, Size);
   }
 
   // Reallocates a chunk. We can save on a new allocation if the new requested
   // size still fits in the chunk.
   void *reallocate(void *OldPtr, uptr NewSize) {
     initThreadMaybe();
-    uptr UserBeg = reinterpret_cast<uptr>(OldPtr);
-    if (UNLIKELY(!IsAligned(UserBeg, MinAlignment))) {
+    if (UNLIKELY(!Chunk::isAligned(OldPtr))) {
       dieWithMessage("ERROR: attempted to reallocate a chunk not properly "
                      "aligned at address %p\n", OldPtr);
     }
-    ScudoChunk *Chunk = getScudoChunk(UserBeg);
     UnpackedHeader OldHeader;
-    Chunk->loadHeader(&OldHeader);
+    Chunk::loadHeader(OldPtr, &OldHeader);
     if (UNLIKELY(OldHeader.State != ChunkAllocated)) {
       dieWithMessage("ERROR: invalid chunk state when reallocating address "
                      "%p\n", OldPtr);
     }
-    if (UNLIKELY(OldHeader.AllocType != FromMalloc)) {
-      dieWithMessage("ERROR: invalid chunk type when reallocating address %p\n",
-                     OldPtr);
+    if (DeallocationTypeMismatch) {
+      if (UNLIKELY(OldHeader.AllocType != FromMalloc)) {
+        dieWithMessage("ERROR: allocation type mismatch when reallocating "
+                       "address %p\n", OldPtr);
+      }
     }
-    uptr UsableSize = Chunk->getUsableSize(&OldHeader);
+    const uptr UsableSize = Chunk::getUsableSize(OldPtr, &OldHeader);
     // The new size still fits in the current chunk, and the size difference
     // is reasonable.
     if (NewSize <= UsableSize &&
         (UsableSize - NewSize) < (SizeClassMap::kMaxSize / 2)) {
       UnpackedHeader NewHeader = OldHeader;
       NewHeader.SizeOrUnusedBytes =
-                OldHeader.FromPrimary ? NewSize : UsableSize - NewSize;
-      Chunk->compareExchangeHeader(&NewHeader, &OldHeader);
+          OldHeader.ClassId ? NewSize : UsableSize - NewSize;
+      Chunk::compareExchangeHeader(OldPtr, &NewHeader, &OldHeader);
       return OldPtr;
     }
     // Otherwise, we have to allocate a new chunk and copy the contents of the
     // old one.
     void *NewPtr = allocate(NewSize, MinAlignment, FromMalloc);
     if (NewPtr) {
-      uptr OldSize = OldHeader.FromPrimary ? OldHeader.SizeOrUnusedBytes :
+      uptr OldSize = OldHeader.ClassId ? OldHeader.SizeOrUnusedBytes :
           UsableSize - OldHeader.SizeOrUnusedBytes;
-      memcpy(NewPtr, OldPtr, Min(NewSize, OldSize));
-      quarantineOrDeallocateChunk(Chunk, &OldHeader, UsableSize);
+      memcpy(NewPtr, OldPtr, Min(NewSize, UsableSize));
+      quarantineOrDeallocateChunk(OldPtr, &OldHeader, OldSize);
     }
     return NewPtr;
   }
@@ -585,16 +561,14 @@ struct ScudoAllocator {
     initThreadMaybe();
     if (UNLIKELY(!Ptr))
       return 0;
-    uptr UserBeg = reinterpret_cast<uptr>(Ptr);
-    ScudoChunk *Chunk = getScudoChunk(UserBeg);
     UnpackedHeader Header;
-    Chunk->loadHeader(&Header);
+    Chunk::loadHeader(Ptr, &Header);
     // Getting the usable size of a chunk only makes sense if it's allocated.
     if (UNLIKELY(Header.State != ChunkAllocated)) {
       dieWithMessage("ERROR: invalid chunk state when sizing address %p\n",
                      Ptr);
     }
-    return Chunk->getUsableSize(&Header);
+    return Chunk::getUsableSize(Ptr, &Header);
   }
 
   void *calloc(uptr NMemB, uptr Size) {
@@ -604,11 +578,10 @@ struct ScudoAllocator {
     return allocate(NMemB * Size, MinAlignment, FromMalloc, true);
   }
 
-  void commitBack(ScudoThreadContext *ThreadContext) {
-    AllocatorCache *Cache = getAllocatorCache(ThreadContext);
-    AllocatorQuarantine.Drain(getQuarantineCache(ThreadContext),
-                              QuarantineCallback(Cache));
-    BackendAllocator.destroyCache(Cache);
+  void commitBack(ScudoTSD *TSD) {
+    AllocatorQuarantine.Drain(getQuarantineCache(TSD),
+                              QuarantineCallback(&TSD->Cache));
+    BackendAllocator.destroyCache(&TSD->Cache);
   }
 
   uptr getStats(AllocatorStat StatType) {
@@ -617,6 +590,19 @@ struct ScudoAllocator {
     BackendAllocator.getStats(stats);
     return stats[StatType];
   }
+
+  void *handleBadRequest() {
+    initThreadMaybe();
+    return FailureHandler::OnBadRequest();
+  }
+
+  void setRssLimit(uptr LimitMb, bool HardLimit) {
+    if (HardLimit)
+      HardRssLimitMb = LimitMb;
+    else
+      SoftRssLimitMb = LimitMb;
+    CheckRssLimit = HardRssLimitMb || SoftRssLimitMb;
+  }
 };
 
 static ScudoAllocator Instance(LINKER_INITIALIZED);
@@ -625,17 +611,17 @@ static ScudoBackendAllocator &getBackendAllocator() {
   return Instance.BackendAllocator;
 }
 
-static void initScudoInternal(const AllocatorOptions &Options) {
-  Instance.init(Options);
+void initScudo() {
+  Instance.init();
 }
 
-void ScudoThreadContext::init() {
+void ScudoTSD::init(bool Shared) {
+  UnlockRequired = Shared;
   getBackendAllocator().initCache(&Cache);
-  Prng.init();
   memset(QuarantineCachePlaceHolder, 0, sizeof(QuarantineCachePlaceHolder));
 }
 
-void ScudoThreadContext::commitBack() {
+void ScudoTSD::commitBack() {
   Instance.commitBack(this);
 }
 
@@ -672,6 +658,10 @@ void *scudoValloc(uptr Size) {
 
 void *scudoPvalloc(uptr Size) {
   uptr PageSize = GetPageSizeCached();
+  if (UNLIKELY(CheckForPvallocOverflow(Size, PageSize))) {
+    errno = ENOMEM;
+    return Instance.handleBadRequest();
+  }
   // pvalloc(0) should allocate one page.
   Size = Size ? RoundUpTo(Size, PageSize) : PageSize;
   return SetErrnoOnNull(Instance.allocate(Size, PageSize, FromMemalign));
@@ -679,28 +669,28 @@ void *scudoPvalloc(uptr Size) {
 
 void *scudoMemalign(uptr Alignment, uptr Size) {
   if (UNLIKELY(!IsPowerOfTwo(Alignment))) {
-    errno = errno_EINVAL;
-    return ScudoAllocator::FailureHandler::OnBadRequest();
+    errno = EINVAL;
+    return Instance.handleBadRequest();
   }
   return SetErrnoOnNull(Instance.allocate(Size, Alignment, FromMemalign));
 }
 
 int scudoPosixMemalign(void **MemPtr, uptr Alignment, uptr Size) {
   if (UNLIKELY(!CheckPosixMemalignAlignment(Alignment))) {
-    ScudoAllocator::FailureHandler::OnBadRequest();
-    return errno_EINVAL;
+    Instance.handleBadRequest();
+    return EINVAL;
   }
   void *Ptr = Instance.allocate(Size, Alignment, FromMemalign);
   if (UNLIKELY(!Ptr))
-    return errno_ENOMEM;
+    return ENOMEM;
   *MemPtr = Ptr;
   return 0;
 }
 
 void *scudoAlignedAlloc(uptr Alignment, uptr Size) {
   if (UNLIKELY(!CheckAlignedAllocAlignmentAndSize(Alignment, Size))) {
-    errno = errno_EINVAL;
-    return ScudoAllocator::FailureHandler::OnBadRequest();
+    errno = EINVAL;
+    return Instance.handleBadRequest();
   }
   return SetErrnoOnNull(Instance.allocate(Size, Alignment, FromMalloc));
 }
@@ -742,3 +732,13 @@ int __sanitizer_get_ownership(const void *Ptr) {
 uptr __sanitizer_get_allocated_size(const void *Ptr) {
   return Instance.getUsableSize(Ptr);
 }
+
+// Interface functions
+
+extern "C" {
+void __scudo_set_rss_limit(unsigned long LimitMb, int HardLimit) {  // NOLINT
+  if (!SCUDO_CAN_USE_PUBLIC_INTERFACE)
+    return;
+  Instance.setRssLimit(LimitMb, !!HardLimit);
+}
+}  // extern "C"
diff --git a/lib/scudo/scudo_allocator.h b/lib/scudo/scudo_allocator.h
index 29d85995a3eea..a561247def9c8 100644
--- a/lib/scudo/scudo_allocator.h
+++ b/lib/scudo/scudo_allocator.h
@@ -14,21 +14,15 @@
 #ifndef SCUDO_ALLOCATOR_H_
 #define SCUDO_ALLOCATOR_H_
 
-#include "scudo_flags.h"
-
-#include "sanitizer_common/sanitizer_allocator.h"
-
-#if !SANITIZER_LINUX
-# error "The Scudo hardened allocator is currently only supported on Linux."
-#endif
+#include "scudo_platform.h"
 
 namespace __scudo {
 
 enum AllocType : u8 {
-  FromMalloc    = 0, // Memory block came from malloc, realloc, calloc, etc.
-  FromNew       = 1, // Memory block came from operator new.
-  FromNewArray  = 2, // Memory block came from operator new [].
-  FromMemalign  = 3, // Memory block came from memalign, posix_memalign, etc.
+  FromMalloc    = 0,  // Memory block came from malloc, realloc, calloc, etc.
+  FromNew       = 1,  // Memory block came from operator new.
+  FromNewArray  = 2,  // Memory block came from operator new [].
+  FromMemalign  = 3,  // Memory block came from memalign, posix_memalign, etc.
 };
 
 enum ChunkState : u8 {
@@ -45,16 +39,15 @@ enum ChunkState : u8 {
 typedef u64 PackedHeader;
 struct UnpackedHeader {
   u64 Checksum          : 16;
-  u64 SizeOrUnusedBytes : 19; // Size for Primary backed allocations, amount of
-                              // unused bytes in the chunk for Secondary ones.
-  u64 FromPrimary       : 1;
-  u64 State             : 2;  // available, allocated, or quarantined
-  u64 AllocType         : 2;  // malloc, new, new[], or memalign
-  u64 Offset            : 16; // Offset from the beginning of the backend
-                              // allocation to the beginning of the chunk
-                              // itself, in multiples of MinAlignment. See
-                              // comment about its maximum value and in init().
-  u64 Salt              : 8;
+  u64 ClassId           : 8;
+  u64 SizeOrUnusedBytes : 20;  // Size for Primary backed allocations, amount of
+                               // unused bytes in the chunk for Secondary ones.
+  u64 State             : 2;   // available, allocated, or quarantined
+  u64 AllocType         : 2;   // malloc, new, new[], or memalign
+  u64 Offset            : 16;  // Offset from the beginning of the backend
+                               // allocation to the beginning of the chunk
+                               // itself, in multiples of MinAlignment. See
+                               // comment about its maximum value and in init().
 };
 
 typedef atomic_uint64_t AtomicPackedHeader;
@@ -72,14 +65,6 @@ const uptr AlignedChunkHeaderSize =
 
 #if SANITIZER_CAN_USE_ALLOCATOR64
 const uptr AllocatorSpace = ~0ULL;
-# if defined(__aarch64__) && SANITIZER_ANDROID
-const uptr AllocatorSize = 0x4000000000ULL;  // 256G.
-# elif defined(__aarch64__)
-const uptr AllocatorSize = 0x10000000000ULL;  // 1T.
-# else
-const uptr AllocatorSize = 0x40000000000ULL;  // 4T.
-# endif
-typedef DefaultSizeClassMap SizeClassMap;
 struct AP64 {
   static const uptr kSpaceBeg = AllocatorSpace;
   static const uptr kSpaceSize = AllocatorSize;
@@ -91,17 +76,12 @@ struct AP64 {
 };
 typedef SizeClassAllocator64<AP64> PrimaryAllocator;
 #else
-// Currently, the 32-bit Sanitizer allocator has not yet benefited from all the
-// security improvements brought to the 64-bit one. This makes the 32-bit
-// version of Scudo slightly less toughened.
-static const uptr RegionSizeLog = 20;
 static const uptr NumRegions = SANITIZER_MMAP_RANGE_SIZE >> RegionSizeLog;
 # if SANITIZER_WORDSIZE == 32
 typedef FlatByteMap<NumRegions> ByteMap;
 # elif SANITIZER_WORDSIZE == 64
 typedef TwoLevelByteMap<(NumRegions >> 12), 1 << 12> ByteMap;
 # endif  // SANITIZER_WORDSIZE
-typedef DefaultSizeClassMap SizeClassMap;
 struct AP32 {
   static const uptr kSpaceBeg = 0;
   static const u64 kSpaceSize = SANITIZER_MMAP_RANGE_SIZE;
@@ -111,7 +91,8 @@ struct AP32 {
   typedef __scudo::ByteMap ByteMap;
   typedef NoOpMapUnmapCallback MapUnmapCallback;
   static const uptr kFlags =
-      SizeClassAllocator32FlagMasks::kRandomShuffleChunks;
+      SizeClassAllocator32FlagMasks::kRandomShuffleChunks |
+      SizeClassAllocator32FlagMasks::kUseSeparateSizeClassForBatch;
 };
 typedef SizeClassAllocator32<AP32> PrimaryAllocator;
 #endif  // SANITIZER_CAN_USE_ALLOCATOR64
diff --git a/lib/scudo/scudo_allocator_combined.h b/lib/scudo/scudo_allocator_combined.h
index 7599c12abb6d8..25e273114c238 100644
--- a/lib/scudo/scudo_allocator_combined.h
+++ b/lib/scudo/scudo_allocator_combined.h
@@ -31,8 +31,8 @@ class ScudoCombinedAllocator {
 
   // Primary allocations are always MinAlignment aligned, and as such do not
   // require an Alignment parameter.
-  void *allocatePrimary(AllocatorCache *Cache, uptr Size) {
-    return Cache->Allocate(&Primary, Primary.ClassID(Size));
+  void *allocatePrimary(AllocatorCache *Cache, uptr ClassId) {
+    return Cache->Allocate(&Primary, ClassId);
   }
 
   // Secondary allocations do not require a Cache, but do require an Alignment
@@ -41,17 +41,17 @@ class ScudoCombinedAllocator {
     return Secondary.Allocate(&Stats, Size, Alignment);
   }
 
-  void deallocatePrimary(AllocatorCache *Cache, void *Ptr) {
-    Cache->Deallocate(&Primary, Primary.GetSizeClass(Ptr), Ptr);
+  void deallocatePrimary(AllocatorCache *Cache, void *Ptr, uptr ClassId) {
+    Cache->Deallocate(&Primary, ClassId, Ptr);
   }
 
   void deallocateSecondary(void *Ptr) {
     Secondary.Deallocate(&Stats, Ptr);
   }
 
-  uptr getActuallyAllocatedSize(void *Ptr, bool FromPrimary) {
-    if (FromPrimary)
-      return PrimaryAllocator::ClassIdToSize(Primary.GetSizeClass(Ptr));
+  uptr getActuallyAllocatedSize(void *Ptr, uptr ClassId) {
+    if (ClassId)
+      return PrimaryAllocator::ClassIdToSize(ClassId);
     return Secondary.GetActuallyAllocatedSize(Ptr);
   }
 
diff --git a/lib/scudo/scudo_allocator_secondary.h b/lib/scudo/scudo_allocator_secondary.h
index dbfb22565f9c1..f2002ed986c31 100644
--- a/lib/scudo/scudo_allocator_secondary.h
+++ b/lib/scudo/scudo_allocator_secondary.h
@@ -23,23 +23,24 @@
 
 class ScudoLargeMmapAllocator {
  public:
-
   void Init() {
-    PageSize = GetPageSizeCached();
+    PageSizeCached = GetPageSizeCached();
   }
 
   void *Allocate(AllocatorStats *Stats, uptr Size, uptr Alignment) {
-    uptr UserSize = Size - AlignedChunkHeaderSize;
+    const uptr UserSize = Size - AlignedChunkHeaderSize;
     // The Scudo frontend prevents us from allocating more than
     // MaxAllowedMallocSize, so integer overflow checks would be superfluous.
-    uptr MapSize = Size + SecondaryHeaderSize;
+    uptr MapSize = Size + AlignedReservedAddressRangeSize;
     if (Alignment > MinAlignment)
       MapSize += Alignment;
+    const uptr PageSize = PageSizeCached;
     MapSize = RoundUpTo(MapSize, PageSize);
     // Account for 2 guard pages, one before and one after the chunk.
     MapSize += 2 * PageSize;
 
-    uptr MapBeg = reinterpret_cast<uptr>(MmapNoAccess(MapSize));
+    ReservedAddressRange AddressRange;
+    uptr MapBeg = AddressRange.Init(MapSize);
     if (MapBeg == ~static_cast<uptr>(0))
       return ReturnNullOrDieOnFailure::OnOOM();
     // A page-aligned pointer is assumed after that, so check it now.
@@ -62,27 +63,27 @@ class ScudoLargeMmapAllocator {
             PageSize;
         CHECK_GE(NewMapBeg, MapBeg);
         if (NewMapBeg != MapBeg) {
-          UnmapOrDie(reinterpret_cast<void *>(MapBeg), NewMapBeg - MapBeg);
+          AddressRange.Unmap(MapBeg, NewMapBeg - MapBeg);
           MapBeg = NewMapBeg;
         }
         UserEnd = UserBeg + UserSize;
       }
       uptr NewMapEnd = RoundUpTo(UserEnd, PageSize) + PageSize;
       if (NewMapEnd != MapEnd) {
-        UnmapOrDie(reinterpret_cast<void *>(NewMapEnd), MapEnd - NewMapEnd);
+        AddressRange.Unmap(NewMapEnd, MapEnd - NewMapEnd);
         MapEnd = NewMapEnd;
       }
       MapSize = MapEnd - MapBeg;
     }
 
     CHECK_LE(UserEnd, MapEnd - PageSize);
-    // Actually mmap the memory, preserving the guard pages on either side.
-    CHECK_EQ(MapBeg + PageSize, reinterpret_cast<uptr>(
-        MmapFixedOrDie(MapBeg + PageSize, MapSize - 2 * PageSize)));
-    uptr Ptr = UserBeg - AlignedChunkHeaderSize;
-    SecondaryHeader *Header = getHeader(Ptr);
-    Header->MapBeg = MapBeg;
-    Header->MapSize = MapSize;
+    // Actually mmap the memory, preserving the guard pages on either side
+    CHECK_EQ(MapBeg + PageSize,
+             AddressRange.Map(MapBeg + PageSize, MapSize - 2 * PageSize));
+    const uptr Ptr = UserBeg - AlignedChunkHeaderSize;
+    ReservedAddressRange *StoredRange = getReservedAddressRange(Ptr);
+    *StoredRange = AddressRange;
+
     // The primary adds the whole class size to the stats when allocating a
     // chunk, so we will do something similar here. But we will not account for
     // the guard pages.
@@ -96,42 +97,43 @@ class ScudoLargeMmapAllocator {
   }
 
   void Deallocate(AllocatorStats *Stats, void *Ptr) {
-    SecondaryHeader *Header = getHeader(Ptr);
+    // Since we're unmapping the entirety of where the ReservedAddressRange
+    // actually is, copy onto the stack.
+    const uptr PageSize = PageSizeCached;
+    ReservedAddressRange AddressRange = *getReservedAddressRange(Ptr);
     {
       SpinMutexLock l(&StatsMutex);
-      Stats->Sub(AllocatorStatAllocated, Header->MapSize - 2 * PageSize);
-      Stats->Sub(AllocatorStatMapped, Header->MapSize - 2 * PageSize);
+      Stats->Sub(AllocatorStatAllocated, AddressRange.size() - 2 * PageSize);
+      Stats->Sub(AllocatorStatMapped, AddressRange.size() - 2 * PageSize);
     }
-    UnmapOrDie(reinterpret_cast<void *>(Header->MapBeg), Header->MapSize);
+    AddressRange.Unmap(reinterpret_cast<uptr>(AddressRange.base()),
+                       AddressRange.size());
   }
 
   uptr GetActuallyAllocatedSize(void *Ptr) {
-    SecondaryHeader *Header = getHeader(Ptr);
-    // Deduct PageSize as MapSize includes the trailing guard page.
-    uptr MapEnd = Header->MapBeg + Header->MapSize - PageSize;
+    ReservedAddressRange *StoredRange = getReservedAddressRange(Ptr);
+    // Deduct PageSize as ReservedAddressRange size includes the trailing guard
+    // page.
+    uptr MapEnd = reinterpret_cast<uptr>(StoredRange->base()) +
+        StoredRange->size() - PageSizeCached;
     return MapEnd - reinterpret_cast<uptr>(Ptr);
   }
 
  private:
-  // A Secondary allocated chunk header contains the base of the mapping and
-  // its size, which comprises the guard pages.
-  struct SecondaryHeader {
-    uptr MapBeg;
-    uptr MapSize;
-  };
-  // Check that sizeof(SecondaryHeader) is a multiple of MinAlignment.
-  COMPILER_CHECK((sizeof(SecondaryHeader) & (MinAlignment - 1)) == 0);
-
-  SecondaryHeader *getHeader(uptr Ptr) {
-    return reinterpret_cast<SecondaryHeader*>(Ptr - sizeof(SecondaryHeader));
+  ReservedAddressRange *getReservedAddressRange(uptr Ptr) {
+    return reinterpret_cast<ReservedAddressRange*>(
+        Ptr - sizeof(ReservedAddressRange));
   }
-  SecondaryHeader *getHeader(const void *Ptr) {
-    return getHeader(reinterpret_cast<uptr>(Ptr));
+  ReservedAddressRange *getReservedAddressRange(const void *Ptr) {
+    return getReservedAddressRange(reinterpret_cast<uptr>(Ptr));
   }
 
-  const uptr SecondaryHeaderSize = sizeof(SecondaryHeader);
-  const uptr HeadersSize = SecondaryHeaderSize + AlignedChunkHeaderSize;
-  uptr PageSize;
+  static constexpr uptr AlignedReservedAddressRangeSize =
+      (sizeof(ReservedAddressRange) + MinAlignment - 1) & ~(MinAlignment - 1);
+  static constexpr uptr HeadersSize =
+      AlignedReservedAddressRangeSize + AlignedChunkHeaderSize;
+
+  uptr PageSizeCached;
   SpinMutex StatsMutex;
 };
 
diff --git a/lib/scudo/scudo_crc32.h b/lib/scudo/scudo_crc32.h
index 5ffcc62658cc0..e89e430f4085a 100644
--- a/lib/scudo/scudo_crc32.h
+++ b/lib/scudo/scudo_crc32.h
@@ -40,7 +40,7 @@ enum : u8 {
   CRC32Hardware = 1,
 };
 
-const static u32 CRC32Table[] = {
+static const u32 CRC32Table[] = {
   0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
   0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
   0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2,
diff --git a/lib/scudo/scudo_flags.cpp b/lib/scudo/scudo_flags.cpp
index 90f0cbf4bb864..2aff3ef1e8fae 100644
--- a/lib/scudo/scudo_flags.cpp
+++ b/lib/scudo/scudo_flags.cpp
@@ -17,12 +17,11 @@
 #include "sanitizer_common/sanitizer_flags.h"
 #include "sanitizer_common/sanitizer_flag_parser.h"
 
-extern "C" SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
-const char* __scudo_default_options();
+SANITIZER_INTERFACE_WEAK_DEF(const char*, __scudo_default_options, void);
 
 namespace __scudo {
 
-Flags ScudoFlags;  // Use via getFlags().
+static Flags ScudoFlags;  // Use via getFlags().
 
 void Flags::setDefaults() {
 #define SCUDO_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue;
@@ -37,7 +36,7 @@ static void RegisterScudoFlags(FlagParser *parser, Flags *f) {
 #undef SCUDO_FLAG
 }
 
-static const char *callGetScudoDefaultOptions() {
+static const char *getScudoDefaultOptions() {
   return (&__scudo_default_options) ? __scudo_default_options() : "";
 }
 
@@ -57,8 +56,7 @@ void initFlags() {
   RegisterCommonFlags(&ScudoParser);
 
   // Override from user-specified string.
-  const char *ScudoDefaultOptions = callGetScudoDefaultOptions();
-  ScudoParser.ParseString(ScudoDefaultOptions);
+  ScudoParser.ParseString(getScudoDefaultOptions());
 
   // Override from environment.
   ScudoParser.ParseString(GetEnv("SCUDO_OPTIONS"));
@@ -67,27 +65,52 @@ void initFlags() {
 
   // Sanity checks and default settings for the Quarantine parameters.
 
-  if (f->QuarantineSizeMb < 0) {
-    const int DefaultQuarantineSizeMb = FIRST_32_SECOND_64(4, 16);
-    f->QuarantineSizeMb = DefaultQuarantineSizeMb;
+  if (f->QuarantineSizeMb >= 0) {
+    // Backward compatible logic if QuarantineSizeMb is set.
+    if (f->QuarantineSizeKb >= 0) {
+      dieWithMessage("ERROR: please use either QuarantineSizeMb (deprecated) "
+          "or QuarantineSizeKb, but not both\n");
+    }
+    if (f->QuarantineChunksUpToSize >= 0) {
+      dieWithMessage("ERROR: QuarantineChunksUpToSize cannot be used in "
+          " conjunction with the deprecated QuarantineSizeMb option\n");
+    }
+    // If everything is in order, update QuarantineSizeKb accordingly.
+    f->QuarantineSizeKb = f->QuarantineSizeMb * 1024;
+  } else {
+    // Otherwise proceed with the new options.
+    if (f->QuarantineSizeKb < 0) {
+      const int DefaultQuarantineSizeKb = FIRST_32_SECOND_64(64, 256);
+      f->QuarantineSizeKb = DefaultQuarantineSizeKb;
+    }
+    if (f->QuarantineChunksUpToSize < 0) {
+      const int DefaultQuarantineChunksUpToSize = FIRST_32_SECOND_64(512, 2048);
+      f->QuarantineChunksUpToSize = DefaultQuarantineChunksUpToSize;
+    }
   }
-  // We enforce an upper limit for the quarantine size of 4Gb.
-  if (f->QuarantineSizeMb > (4 * 1024)) {
+
+  // We enforce an upper limit for the chunk quarantine threshold of 4Mb.
+  if (f->QuarantineChunksUpToSize > (4 * 1024 * 1024)) {
+    dieWithMessage("ERROR: the chunk quarantine threshold is too large\n");
+  }
+
+  // We enforce an upper limit for the quarantine size of 32Mb.
+  if (f->QuarantineSizeKb > (32 * 1024)) {
     dieWithMessage("ERROR: the quarantine size is too large\n");
   }
+
   if (f->ThreadLocalQuarantineSizeKb < 0) {
-    const int DefaultThreadLocalQuarantineSizeKb =
-        FIRST_32_SECOND_64(64, 256);
+    const int DefaultThreadLocalQuarantineSizeKb = FIRST_32_SECOND_64(16, 64);
     f->ThreadLocalQuarantineSizeKb = DefaultThreadLocalQuarantineSizeKb;
   }
-  // And an upper limit of 128Mb for the thread quarantine cache.
-  if (f->ThreadLocalQuarantineSizeKb > (128 * 1024)) {
+  // And an upper limit of 8Mb for the thread quarantine cache.
+  if (f->ThreadLocalQuarantineSizeKb > (8 * 1024)) {
     dieWithMessage("ERROR: the per thread quarantine cache size is too "
-                   "large\n");
+        "large\n");
   }
-  if (f->ThreadLocalQuarantineSizeKb == 0 && f->QuarantineSizeMb > 0) {
+  if (f->ThreadLocalQuarantineSizeKb == 0 && f->QuarantineSizeKb > 0) {
     dieWithMessage("ERROR: ThreadLocalQuarantineSizeKb can be set to 0 only "
-                   "when QuarantineSizeMb is set to 0\n");
+        "when QuarantineSizeKb is set to 0\n");
   }
 }
 
diff --git a/lib/scudo/scudo_flags.inc b/lib/scudo/scudo_flags.inc
index 45f9ea846e1ab..f180478fdac38 100644
--- a/lib/scudo/scudo_flags.inc
+++ b/lib/scudo/scudo_flags.inc
@@ -15,17 +15,27 @@
 # error "Define SCUDO_FLAG prior to including this file!"
 #endif
 
-// Default value is set in scudo_flags.cpp based on architecture.
 SCUDO_FLAG(int, QuarantineSizeMb, -1,
-           "Size (in Mb) of quarantine used to delay the actual deallocation "
-           "of chunks. Lower value may reduce memory usage but decrease the "
-           "effectiveness of the mitigation.")
+           "Deprecated. Please use QuarantineSizeKb.")
+
+// Default value is set in scudo_flags.cpp based on architecture.
+SCUDO_FLAG(int, QuarantineSizeKb, -1,
+           "Size in KB of quarantine used to delay the actual deallocation of "
+           "chunks. Lower value may reduce memory usage but decrease the "
+           "effectiveness of the mitigation. Defaults to 64KB (32-bit) or "
+           "256KB (64-bit)")
 
 // Default value is set in scudo_flags.cpp based on architecture.
 SCUDO_FLAG(int, ThreadLocalQuarantineSizeKb, -1,
-          "Size (in Kb) of per-thread cache used to offload the global "
+          "Size in KB of per-thread cache used to offload the global "
           "quarantine. Lower value may reduce memory usage but might increase "
-          "the contention on the global quarantine.")
+          "the contention on the global quarantine. Defaults to 16KB (32-bit) "
+          "or 64KB (64-bit)")
+
+// Default value is set in scudo_flags.cpp based on architecture.
+SCUDO_FLAG(int, QuarantineChunksUpToSize, -1,
+          "Size in bytes up to which chunks will be quarantined (if lower than"
+          "or equal to). Defaults to 256 (32-bit) or 2048 (64-bit)")
 
 SCUDO_FLAG(bool, DeallocationTypeMismatch, true,
           "Report errors on malloc/delete, new/free, new/delete[], etc.")
diff --git a/lib/scudo/scudo_interface_internal.h b/lib/scudo/scudo_interface_internal.h
new file mode 100644
index 0000000000000..3f39e0c4ee0bb
--- /dev/null
+++ b/lib/scudo/scudo_interface_internal.h
@@ -0,0 +1,22 @@
+//===-- scudo_interface_internal.h ------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// Private Scudo interface header.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef SCUDO_INTERFACE_INTERNAL_H_
+#define SCUDO_INTERFACE_INTERNAL_H_
+
+extern "C" {
+SANITIZER_INTERFACE_ATTRIBUTE
+void __scudo_set_rss_limit(unsigned long LimitMb, int HardLimit);  // NOLINT
+}  // extern "C"
+
+#endif  // SCUDO_INTERFACE_INTERNAL_H_
diff --git a/lib/scudo/scudo_new_delete.cpp b/lib/scudo/scudo_new_delete.cpp
index cdefb127b9651..c5a1abbed82ba 100644
--- a/lib/scudo/scudo_new_delete.cpp
+++ b/lib/scudo/scudo_new_delete.cpp
@@ -15,7 +15,7 @@
 
 #include "interception/interception.h"
 
-#include <cstddef>
+#include <stddef.h>
 
 using namespace __scudo;
 
diff --git a/lib/scudo/scudo_platform.h b/lib/scudo/scudo_platform.h
new file mode 100644
index 0000000000000..e1c9c32e9a623
--- /dev/null
+++ b/lib/scudo/scudo_platform.h
@@ -0,0 +1,80 @@
+//===-- scudo_platform.h ----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// Scudo platform specific definitions.
+/// TODO(kostyak): add tests for the compile time defines.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef SCUDO_PLATFORM_H_
+#define SCUDO_PLATFORM_H_
+
+#include "sanitizer_common/sanitizer_allocator.h"
+
+#if !SANITIZER_LINUX && !SANITIZER_FUCHSIA
+# error "The Scudo hardened allocator is not supported on this platform."
+#endif
+
+#define SCUDO_TSD_EXCLUSIVE_SUPPORTED (!SANITIZER_ANDROID && !SANITIZER_FUCHSIA)
+
+#ifndef SCUDO_TSD_EXCLUSIVE
+// SCUDO_TSD_EXCLUSIVE wasn't defined, use a default TSD model for the platform.
+# if SANITIZER_ANDROID || SANITIZER_FUCHSIA
+// Android and Fuchsia use a pool of TSDs shared between threads.
+#  define SCUDO_TSD_EXCLUSIVE 0
+# elif SANITIZER_LINUX && !SANITIZER_ANDROID
+// Non-Android Linux use an exclusive TSD per thread.
+#  define SCUDO_TSD_EXCLUSIVE 1
+# else
+#  error "No default TSD model defined for this platform."
+# endif  // SANITIZER_ANDROID || SANITIZER_FUCHSIA
+#endif  // SCUDO_TSD_EXCLUSIVE
+
+// If the exclusive TSD model is chosen, make sure the platform supports it.
+#if SCUDO_TSD_EXCLUSIVE && !SCUDO_TSD_EXCLUSIVE_SUPPORTED
+# error "The exclusive TSD model is not supported on this platform."
+#endif
+
+// Maximum number of TSDs that can be created for the Shared model.
+#ifndef SCUDO_SHARED_TSD_POOL_SIZE
+# define SCUDO_SHARED_TSD_POOL_SIZE 32U
+#endif  // SCUDO_SHARED_TSD_POOL_SIZE
+
+// The following allows the public interface functions to be disabled.
+#ifndef SCUDO_CAN_USE_PUBLIC_INTERFACE
+# define SCUDO_CAN_USE_PUBLIC_INTERFACE 1
+#endif
+
+namespace __scudo {
+
+#if SANITIZER_CAN_USE_ALLOCATOR64
+# if defined(__aarch64__) && SANITIZER_ANDROID
+const uptr AllocatorSize = 0x4000000000ULL;  // 256G.
+# elif defined(__aarch64__)
+const uptr AllocatorSize = 0x10000000000ULL;  // 1T.
+# else
+const uptr AllocatorSize = 0x40000000000ULL;  // 4T.
+# endif
+#else
+const uptr RegionSizeLog = SANITIZER_ANDROID ? 19 : 20;
+#endif  // SANITIZER_CAN_USE_ALLOCATOR64
+
+#if !defined(SCUDO_SIZE_CLASS_MAP)
+# define SCUDO_SIZE_CLASS_MAP Default
+#endif
+
+#define SIZE_CLASS_MAP_TYPE SIZE_CLASS_MAP_TYPE_(SCUDO_SIZE_CLASS_MAP)
+#define SIZE_CLASS_MAP_TYPE_(T) SIZE_CLASS_MAP_TYPE__(T)
+#define SIZE_CLASS_MAP_TYPE__(T) T##SizeClassMap
+
+typedef SIZE_CLASS_MAP_TYPE SizeClassMap;
+
+}  // namespace __scudo
+
+#endif // SCUDO_PLATFORM_H_
diff --git a/lib/scudo/scudo_tls.h b/lib/scudo/scudo_tls.h
deleted file mode 100644
index 20c49204cf134..0000000000000
--- a/lib/scudo/scudo_tls.h
+++ /dev/null
@@ -1,47 +0,0 @@
-//===-- scudo_tls.h ---------------------------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// Scudo thread local structure definition.
-/// Implementation will differ based on the thread local storage primitives
-/// offered by the underlying platform.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef SCUDO_TLS_H_
-#define SCUDO_TLS_H_
-
-#include "scudo_allocator.h"
-#include "scudo_utils.h"
-
-#include "sanitizer_common/sanitizer_linux.h"
-#include "sanitizer_common/sanitizer_platform.h"
-
-namespace __scudo {
-
-// Platform specific base thread context definitions.
-#include "scudo_tls_context_android.inc"
-#include "scudo_tls_context_linux.inc"
-
-struct ALIGNED(64) ScudoThreadContext : public ScudoThreadContextPlatform {
-  AllocatorCache Cache;
-  ScudoPrng Prng;
-  uptr QuarantineCachePlaceHolder[4];
-  void init();
-  void commitBack();
-};
-
-void initThread();
-
-// Platform specific dastpath functions definitions.
-#include "scudo_tls_android.inc"
-#include "scudo_tls_linux.inc"
-
-}  // namespace __scudo
-
-#endif  // SCUDO_TLS_H_
diff --git a/lib/scudo/scudo_tls_android.cpp b/lib/scudo/scudo_tls_android.cpp
deleted file mode 100644
index ec74e37c8dbc0..0000000000000
--- a/lib/scudo/scudo_tls_android.cpp
+++ /dev/null
@@ -1,95 +0,0 @@
-//===-- scudo_tls_android.cpp -----------------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// Scudo thread local structure implementation for Android.
-///
-//===----------------------------------------------------------------------===//
-
-#include "sanitizer_common/sanitizer_platform.h"
-
-#if SANITIZER_LINUX && SANITIZER_ANDROID
-
-#include "scudo_tls.h"
-
-#include <pthread.h>
-
-namespace __scudo {
-
-static pthread_once_t GlobalInitialized = PTHREAD_ONCE_INIT;
-static pthread_key_t PThreadKey;
-
-static atomic_uint32_t ThreadContextCurrentIndex;
-static ScudoThreadContext *ThreadContexts;
-static uptr NumberOfContexts;
-
-// sysconf(_SC_NPROCESSORS_{CONF,ONLN}) cannot be used as they allocate memory.
-static uptr getNumberOfCPUs() {
-  cpu_set_t CPUs;
-  CHECK_EQ(sched_getaffinity(0, sizeof(cpu_set_t), &CPUs), 0);
-  return CPU_COUNT(&CPUs);
-}
-
-static void initOnce() {
-  // Hack: TLS_SLOT_TSAN was introduced in N. To be able to use it on M for
-  // testing, we create an unused key. Since the key_data array follows the tls
-  // array, it basically gives us the extra entry we need.
-  // TODO(kostyak): remove and restrict to N and above.
-  CHECK_EQ(pthread_key_create(&PThreadKey, NULL), 0);
-  initScudo();
-  NumberOfContexts = getNumberOfCPUs();
-  ThreadContexts = reinterpret_cast<ScudoThreadContext *>(
-      MmapOrDie(sizeof(ScudoThreadContext) * NumberOfContexts, __func__));
-  for (uptr i = 0; i < NumberOfContexts; i++)
-    ThreadContexts[i].init();
-}
-
-void initThread() {
-  pthread_once(&GlobalInitialized, initOnce);
-  // Initial context assignment is done in a plain round-robin fashion.
-  u32 Index = atomic_fetch_add(&ThreadContextCurrentIndex, 1,
-                               memory_order_relaxed);
-  ScudoThreadContext *ThreadContext =
-      &ThreadContexts[Index % NumberOfContexts];
-  *get_android_tls_ptr() = reinterpret_cast<uptr>(ThreadContext);
-}
-
-ScudoThreadContext *getThreadContextAndLockSlow() {
-  ScudoThreadContext *ThreadContext;
-  // Go through all the contexts and find the first unlocked one. 
-  for (u32 i = 0; i < NumberOfContexts; i++) {
-    ThreadContext = &ThreadContexts[i];
-    if (ThreadContext->tryLock()) {
-      *get_android_tls_ptr() = reinterpret_cast<uptr>(ThreadContext);
-      return ThreadContext;
-    }
-  }
-  // No luck, find the one with the lowest precedence, and slow lock it.
-  u64 Precedence = UINT64_MAX;
-  for (u32 i = 0; i < NumberOfContexts; i++) {
-    u64 SlowLockPrecedence = ThreadContexts[i].getSlowLockPrecedence();
-    if (SlowLockPrecedence && SlowLockPrecedence < Precedence) {
-      ThreadContext = &ThreadContexts[i];
-      Precedence = SlowLockPrecedence;
-    }
-  }
-  if (LIKELY(Precedence != UINT64_MAX)) {
-    ThreadContext->lock();
-    *get_android_tls_ptr() = reinterpret_cast<uptr>(ThreadContext);
-    return ThreadContext;
-  }
-  // Last resort (can this happen?), stick with the current one.
-  ThreadContext =
-      reinterpret_cast<ScudoThreadContext *>(*get_android_tls_ptr());
-  ThreadContext->lock();
-  return ThreadContext;
-}
-
-}  // namespace __scudo
-
-#endif  // SANITIZER_LINUX && SANITIZER_ANDROID
diff --git a/lib/scudo/scudo_tls_android.inc b/lib/scudo/scudo_tls_android.inc
deleted file mode 100644
index 8ecad7a30a6c7..0000000000000
--- a/lib/scudo/scudo_tls_android.inc
+++ /dev/null
@@ -1,44 +0,0 @@
-//===-- scudo_tls_android.inc -----------------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// Scudo thread local structure fastpath functions implementation for Android.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef SCUDO_TLS_ANDROID_H_
-#define SCUDO_TLS_ANDROID_H_
-
-#ifndef SCUDO_TLS_H_
-# error "This file must be included inside scudo_tls.h."
-#endif  // SCUDO_TLS_H_
-
-#if SANITIZER_LINUX && SANITIZER_ANDROID
-
-ALWAYS_INLINE void initThreadMaybe() {
-  if (LIKELY(*get_android_tls_ptr()))
-    return;
-  initThread();
-}
-
-ScudoThreadContext *getThreadContextAndLockSlow();
-
-ALWAYS_INLINE ScudoThreadContext *getThreadContextAndLock() {
-  ScudoThreadContext *ThreadContext =
-      reinterpret_cast<ScudoThreadContext *>(*get_android_tls_ptr());
-  CHECK(ThreadContext);
-  // Try to lock the currently associated context.
-  if (ThreadContext->tryLock())
-    return ThreadContext;
-  // If it failed, go the slow path.
-  return getThreadContextAndLockSlow();
-}
-
-#endif  // SANITIZER_LINUX && SANITIZER_ANDROID
-
-#endif  // SCUDO_TLS_ANDROID_H_
diff --git a/lib/scudo/scudo_tls_context_android.inc b/lib/scudo/scudo_tls_context_android.inc
deleted file mode 100644
index f1951319d487c..0000000000000
--- a/lib/scudo/scudo_tls_context_android.inc
+++ /dev/null
@@ -1,54 +0,0 @@
-//===-- scudo_tls_context_android.inc ---------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// Android specific base thread context definition.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef SCUDO_TLS_CONTEXT_ANDROID_INC_
-#define SCUDO_TLS_CONTEXT_ANDROID_INC_
-
-#ifndef SCUDO_TLS_H_
-# error "This file must be included inside scudo_tls.h."
-#endif  // SCUDO_TLS_H_
-
-#if SANITIZER_LINUX && SANITIZER_ANDROID
-
-struct ScudoThreadContextPlatform {
-  INLINE bool tryLock() {
-    if (Mutex.TryLock()) {
-      atomic_store_relaxed(&SlowLockPrecedence, 0);
-      return true;
-    }
-    if (atomic_load_relaxed(&SlowLockPrecedence) == 0)
-      atomic_store_relaxed(&SlowLockPrecedence, NanoTime());
-    return false;
-  }
-
-  INLINE void lock() {
-    Mutex.Lock();
-    atomic_store_relaxed(&SlowLockPrecedence, 0);
-  }
-
-  INLINE void unlock() {
-    Mutex.Unlock();
-  }
-
-  INLINE u64 getSlowLockPrecedence() {
-    return atomic_load_relaxed(&SlowLockPrecedence);
-  }
-
- private:
-  StaticSpinMutex Mutex;
-  atomic_uint64_t SlowLockPrecedence;
-};
-
-#endif  // SANITIZER_LINUX && SANITIZER_ANDROID
-
-#endif  // SCUDO_TLS_CONTEXT_ANDROID_INC_
diff --git a/lib/scudo/scudo_tls_context_linux.inc b/lib/scudo/scudo_tls_context_linux.inc
deleted file mode 100644
index 8d292bdbc932b..0000000000000
--- a/lib/scudo/scudo_tls_context_linux.inc
+++ /dev/null
@@ -1,29 +0,0 @@
-//===-- scudo_tls_context_linux.inc -----------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// Linux specific base thread context definition.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef SCUDO_TLS_CONTEXT_LINUX_INC_
-#define SCUDO_TLS_CONTEXT_LINUX_INC_
-
-#ifndef SCUDO_TLS_H_
-# error "This file must be included inside scudo_tls.h."
-#endif  // SCUDO_TLS_H_
-
-#if SANITIZER_LINUX && !SANITIZER_ANDROID
-
-struct ScudoThreadContextPlatform {
-  ALWAYS_INLINE void unlock() {}
-};
-
-#endif  // SANITIZER_LINUX && !SANITIZER_ANDROID
-
-#endif  // SCUDO_TLS_CONTEXT_LINUX_INC_
diff --git a/lib/scudo/scudo_tls_linux.inc b/lib/scudo/scudo_tls_linux.inc
deleted file mode 100644
index 242ee3329ea86..0000000000000
--- a/lib/scudo/scudo_tls_linux.inc
+++ /dev/null
@@ -1,48 +0,0 @@
-//===-- scudo_tls_linux.inc -------------------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// Scudo thread local structure fastpath functions implementation for platforms
-/// supporting thread_local.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef SCUDO_TLS_LINUX_H_
-#define SCUDO_TLS_LINUX_H_
-
-#ifndef SCUDO_TLS_H_
-# error "This file must be included inside scudo_tls.h."
-#endif  // SCUDO_TLS_H_
-
-#if SANITIZER_LINUX && !SANITIZER_ANDROID
-
-enum ThreadState : u8 {
-  ThreadNotInitialized = 0,
-  ThreadInitialized,
-  ThreadTornDown,
-};
-__attribute__((tls_model("initial-exec")))
-extern THREADLOCAL ThreadState ScudoThreadState;
-__attribute__((tls_model("initial-exec")))
-extern THREADLOCAL ScudoThreadContext ThreadLocalContext;
-
-ALWAYS_INLINE void initThreadMaybe() {
-  if (LIKELY(ScudoThreadState != ThreadNotInitialized))
-    return;
-  initThread();
-}
-
-ALWAYS_INLINE ScudoThreadContext *getThreadContextAndLock() {
-  if (UNLIKELY(ScudoThreadState == ThreadTornDown))
-    return nullptr;
-  return &ThreadLocalContext;
-}
-
-#endif  // SANITIZER_LINUX && !SANITIZER_ANDROID
-
-#endif  // SCUDO_TLS_LINUX_H_
diff --git a/lib/scudo/scudo_tsd.h b/lib/scudo/scudo_tsd.h
new file mode 100644
index 0000000000000..80464b5ea1e4a
--- /dev/null
+++ b/lib/scudo/scudo_tsd.h
@@ -0,0 +1,72 @@
+//===-- scudo_tsd.h ---------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// Scudo thread specific data definition.
+/// Implementation will differ based on the thread local storage primitives
+/// offered by the underlying platform.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef SCUDO_TSD_H_
+#define SCUDO_TSD_H_
+
+#include "scudo_allocator.h"
+#include "scudo_utils.h"
+
+#include <pthread.h>
+
+namespace __scudo {
+
+struct ALIGNED(64) ScudoTSD {
+  AllocatorCache Cache;
+  uptr QuarantineCachePlaceHolder[4];
+
+  void init(bool Shared);
+  void commitBack();
+
+  INLINE bool tryLock() {
+    if (Mutex.TryLock()) {
+      atomic_store_relaxed(&Precedence, 0);
+      return true;
+    }
+    if (atomic_load_relaxed(&Precedence) == 0)
+      atomic_store_relaxed(&Precedence, MonotonicNanoTime());
+    return false;
+  }
+
+  INLINE void lock() {
+    Mutex.Lock();
+    atomic_store_relaxed(&Precedence, 0);
+  }
+
+  INLINE void unlock() {
+    if (!UnlockRequired)
+      return;
+    Mutex.Unlock();
+  }
+
+  INLINE u64 getPrecedence() {
+    return atomic_load_relaxed(&Precedence);
+  }
+
+ private:
+  bool UnlockRequired;
+  StaticSpinMutex Mutex;
+  atomic_uint64_t Precedence;
+};
+
+void initThread(bool MinimalInit);
+
+// TSD model specific fastpath functions definitions.
+#include "scudo_tsd_exclusive.inc"
+#include "scudo_tsd_shared.inc"
+
+}  // namespace __scudo
+
+#endif  // SCUDO_TSD_H_
diff --git a/lib/scudo/scudo_tls_linux.cpp b/lib/scudo/scudo_tsd_exclusive.cpp
index 1e38233f339c4..1084dfac91e1b 100644
--- a/lib/scudo/scudo_tls_linux.cpp
+++ b/lib/scudo/scudo_tsd_exclusive.cpp
@@ -1,4 +1,4 @@
-//===-- scudo_tls_linux.cpp -------------------------------------*- C++ -*-===//
+//===-- scudo_tsd_exclusive.cpp ---------------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,18 +7,13 @@
 //
 //===----------------------------------------------------------------------===//
 ///
-/// Scudo thread local structure implementation for platforms supporting
-/// thread_local.
+/// Scudo exclusive TSD implementation.
 ///
 //===----------------------------------------------------------------------===//
 
-#include "sanitizer_common/sanitizer_platform.h"
+#include "scudo_tsd.h"
 
-#if SANITIZER_LINUX && !SANITIZER_ANDROID
-
-#include "scudo_tls.h"
-
-#include <pthread.h>
+#if SCUDO_TSD_EXCLUSIVE
 
 namespace __scudo {
 
@@ -28,7 +23,11 @@ static pthread_key_t PThreadKey;
 __attribute__((tls_model("initial-exec")))
 THREADLOCAL ThreadState ScudoThreadState = ThreadNotInitialized;
 __attribute__((tls_model("initial-exec")))
-THREADLOCAL ScudoThreadContext ThreadLocalContext;
+THREADLOCAL ScudoTSD TSD;
+
+// Fallback TSD for when the thread isn't initialized yet or is torn down. It
+// can be shared between multiple threads and as such must be locked.
+ScudoTSD FallbackTSD;
 
 static void teardownThread(void *Ptr) {
   uptr I = reinterpret_cast<uptr>(Ptr);
@@ -43,7 +42,7 @@ static void teardownThread(void *Ptr) {
                                    reinterpret_cast<void *>(I - 1)) == 0))
       return;
   }
-  ThreadLocalContext.commitBack();
+  TSD.commitBack();
   ScudoThreadState = ThreadTornDown;
 }
 
@@ -51,16 +50,19 @@ static void teardownThread(void *Ptr) {
 static void initOnce() {
   CHECK_EQ(pthread_key_create(&PThreadKey, teardownThread), 0);
   initScudo();
+  FallbackTSD.init(/*Shared=*/true);
 }
 
-void initThread() {
+void initThread(bool MinimalInit) {
   CHECK_EQ(pthread_once(&GlobalInitialized, initOnce), 0);
+  if (UNLIKELY(MinimalInit))
+    return;
   CHECK_EQ(pthread_setspecific(PThreadKey, reinterpret_cast<void *>(
       GetPthreadDestructorIterations())), 0);
-  ThreadLocalContext.init();
+  TSD.init(/*Shared=*/false);
   ScudoThreadState = ThreadInitialized;
 }
 
 }  // namespace __scudo
 
-#endif  // SANITIZER_LINUX && !SANITIZER_ANDROID
+#endif  // SCUDO_TSD_EXCLUSIVE
diff --git a/lib/scudo/scudo_tsd_exclusive.inc b/lib/scudo/scudo_tsd_exclusive.inc
new file mode 100644
index 0000000000000..567b6a1edd12b
--- /dev/null
+++ b/lib/scudo/scudo_tsd_exclusive.inc
@@ -0,0 +1,46 @@
+//===-- scudo_tsd_exclusive.inc ---------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// Scudo exclusive TSD fastpath functions implementation.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef SCUDO_TSD_H_
+# error "This file must be included inside scudo_tsd.h."
+#endif  // SCUDO_TSD_H_
+
+#if SCUDO_TSD_EXCLUSIVE
+
+enum ThreadState : u8 {
+  ThreadNotInitialized = 0,
+  ThreadInitialized,
+  ThreadTornDown,
+};
+__attribute__((tls_model("initial-exec")))
+extern THREADLOCAL ThreadState ScudoThreadState;
+__attribute__((tls_model("initial-exec")))
+extern THREADLOCAL ScudoTSD TSD;
+
+extern ScudoTSD FallbackTSD;
+
+ALWAYS_INLINE void initThreadMaybe(bool MinimalInit = false) {
+  if (LIKELY(ScudoThreadState != ThreadNotInitialized))
+    return;
+  initThread(MinimalInit);
+}
+
+ALWAYS_INLINE ScudoTSD *getTSDAndLock() {
+  if (UNLIKELY(ScudoThreadState != ThreadInitialized)) {
+    FallbackTSD.lock();
+    return &FallbackTSD;
+  }
+  return &TSD;
+}
+
+#endif  // SCUDO_TSD_EXCLUSIVE
diff --git a/lib/scudo/scudo_tsd_shared.cpp b/lib/scudo/scudo_tsd_shared.cpp
new file mode 100644
index 0000000000000..3e13e5d3a109b
--- /dev/null
+++ b/lib/scudo/scudo_tsd_shared.cpp
@@ -0,0 +1,87 @@
+//===-- scudo_tsd_shared.cpp ------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// Scudo shared TSD implementation.
+///
+//===----------------------------------------------------------------------===//
+
+#include "scudo_tsd.h"
+
+#if !SCUDO_TSD_EXCLUSIVE
+
+namespace __scudo {
+
+static pthread_once_t GlobalInitialized = PTHREAD_ONCE_INIT;
+pthread_key_t PThreadKey;
+
+static atomic_uint32_t CurrentIndex;
+static ScudoTSD *TSDs;
+static u32 NumberOfTSDs;
+
+static void initOnce() {
+  CHECK_EQ(pthread_key_create(&PThreadKey, NULL), 0);
+  initScudo();
+  NumberOfTSDs = Min(Max(1U, GetNumberOfCPUsCached()),
+                     static_cast<u32>(SCUDO_SHARED_TSD_POOL_SIZE));
+  TSDs = reinterpret_cast<ScudoTSD *>(
+      MmapOrDie(sizeof(ScudoTSD) * NumberOfTSDs, "ScudoTSDs"));
+  for (u32 i = 0; i < NumberOfTSDs; i++)
+    TSDs[i].init(/*Shared=*/true);
+}
+
+ALWAYS_INLINE void setCurrentTSD(ScudoTSD *TSD) {
+#if SANITIZER_ANDROID
+  *get_android_tls_ptr() = reinterpret_cast<uptr>(TSD);
+#else
+  CHECK_EQ(pthread_setspecific(PThreadKey, reinterpret_cast<void *>(TSD)), 0);
+#endif  // SANITIZER_ANDROID
+}
+
+void initThread(bool MinimalInit) {
+  pthread_once(&GlobalInitialized, initOnce);
+  // Initial context assignment is done in a plain round-robin fashion.
+  u32 Index = atomic_fetch_add(&CurrentIndex, 1, memory_order_relaxed);
+  setCurrentTSD(&TSDs[Index % NumberOfTSDs]);
+}
+
+ScudoTSD *getTSDAndLockSlow() {
+  ScudoTSD *TSD;
+  if (NumberOfTSDs > 1) {
+    // Go through all the contexts and find the first unlocked one.
+    for (u32 i = 0; i < NumberOfTSDs; i++) {
+      TSD = &TSDs[i];
+      if (TSD->tryLock()) {
+        setCurrentTSD(TSD);
+        return TSD;
+      }
+    }
+    // No luck, find the one with the lowest Precedence, and slow lock it.
+    u64 LowestPrecedence = UINT64_MAX;
+    for (u32 i = 0; i < NumberOfTSDs; i++) {
+      u64 Precedence = TSDs[i].getPrecedence();
+      if (Precedence && Precedence < LowestPrecedence) {
+        TSD = &TSDs[i];
+        LowestPrecedence = Precedence;
+      }
+    }
+    if (LIKELY(LowestPrecedence != UINT64_MAX)) {
+      TSD->lock();
+      setCurrentTSD(TSD);
+      return TSD;
+    }
+  }
+  // Last resort, stick with the current one.
+  TSD = getCurrentTSD();
+  TSD->lock();
+  return TSD;
+}
+
+}  // namespace __scudo
+
+#endif  // !SCUDO_TSD_EXCLUSIVE
diff --git a/lib/scudo/scudo_tsd_shared.inc b/lib/scudo/scudo_tsd_shared.inc
new file mode 100644
index 0000000000000..79fcd651ed2da
--- /dev/null
+++ b/lib/scudo/scudo_tsd_shared.inc
@@ -0,0 +1,48 @@
+//===-- scudo_tsd_shared.inc ------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// Scudo shared TSD fastpath functions implementation.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef SCUDO_TSD_H_
+# error "This file must be included inside scudo_tsd.h."
+#endif  // SCUDO_TSD_H_
+
+#if !SCUDO_TSD_EXCLUSIVE
+
+extern pthread_key_t PThreadKey;
+
+ALWAYS_INLINE ScudoTSD* getCurrentTSD() {
+#if SANITIZER_ANDROID
+  return reinterpret_cast<ScudoTSD *>(*get_android_tls_ptr());
+#else
+  return reinterpret_cast<ScudoTSD *>(pthread_getspecific(PThreadKey));
+#endif  // SANITIZER_ANDROID
+}
+
+ALWAYS_INLINE void initThreadMaybe(bool MinimalInit = false) {
+  if (LIKELY(getCurrentTSD()))
+    return;
+  initThread(MinimalInit);
+}
+
+ScudoTSD *getTSDAndLockSlow();
+
+ALWAYS_INLINE ScudoTSD *getTSDAndLock() {
+  ScudoTSD *TSD = getCurrentTSD();
+  CHECK(TSD && "No TSD associated with the current thread!");
+  // Try to lock the currently associated context.
+  if (TSD->tryLock())
+    return TSD;
+  // If it failed, go the slow path.
+  return getTSDAndLockSlow();
+}
+
+#endif  // !SCUDO_TSD_EXCLUSIVE
diff --git a/lib/scudo/scudo_utils.cpp b/lib/scudo/scudo_utils.cpp
index f7903ff34c73c..2f936bf9e780e 100644
--- a/lib/scudo/scudo_utils.cpp
+++ b/lib/scudo/scudo_utils.cpp
@@ -13,17 +13,18 @@
 
 #include "scudo_utils.h"
 
-#include <errno.h>
-#include <fcntl.h>
-#include <stdarg.h>
-#include <unistd.h>
 #if defined(__x86_64__) || defined(__i386__)
 # include <cpuid.h>
-#endif
-#if defined(__arm__) || defined(__aarch64__)
-# include <sys/auxv.h>
+#elif defined(__arm__) || defined(__aarch64__)
+# include "sanitizer_common/sanitizer_getauxval.h"
+# if SANITIZER_POSIX
+#  include "sanitizer_common/sanitizer_posix.h"
+#  include <fcntl.h>
+# endif
 #endif
 
+#include <stdarg.h>
+
 // TODO(kostyak): remove __sanitizer *Printf uses in favor for our own less
 //                complicated string formatting code. The following is a
 //                temporary workaround to be able to use __sanitizer::VSNPrintf.
@@ -36,13 +37,12 @@ extern int VSNPrintf(char *buff, int buff_length, const char *format,
 
 namespace __scudo {
 
-FORMAT(1, 2)
-void NORETURN dieWithMessage(const char *Format, ...) {
+FORMAT(1, 2) void NORETURN dieWithMessage(const char *Format, ...) {
   // Our messages are tiny, 256 characters is more than enough.
   char Message[256];
   va_list Args;
   va_start(Args, Format);
-  __sanitizer::VSNPrintf(Message, sizeof(Message), Format, Args);
+  VSNPrintf(Message, sizeof(Message), Format, Args);
   va_end(Args);
   RawWrite(Message);
   Die();
@@ -51,76 +51,68 @@ void NORETURN dieWithMessage(const char *Format, ...) {
 #if defined(__x86_64__) || defined(__i386__)
 // i386 and x86_64 specific code to detect CRC32 hardware support via CPUID.
 // CRC32 requires the SSE 4.2 instruction set.
-typedef struct {
-  u32 Eax;
-  u32 Ebx;
-  u32 Ecx;
-  u32 Edx;
-} CPUIDRegs;
-
-static void getCPUID(CPUIDRegs *Regs, u32 Level)
-{
-  __get_cpuid(Level, &Regs->Eax, &Regs->Ebx, &Regs->Ecx, &Regs->Edx);
+# ifndef bit_SSE4_2
+#  define bit_SSE4_2 bit_SSE42  // clang and gcc have different defines.
+# endif
+bool hasHardwareCRC32() {
+  u32 Eax, Ebx, Ecx, Edx;
+  __get_cpuid(0, &Eax, &Ebx, &Ecx, &Edx);
+  const bool IsIntel = (Ebx == signature_INTEL_ebx) &&
+                       (Edx == signature_INTEL_edx) &&
+                       (Ecx == signature_INTEL_ecx);
+  const bool IsAMD = (Ebx == signature_AMD_ebx) &&
+                     (Edx == signature_AMD_edx) &&
+                     (Ecx == signature_AMD_ecx);
+  if (!IsIntel && !IsAMD)
+    return false;
+  __get_cpuid(1, &Eax, &Ebx, &Ecx, &Edx);
+  return !!(Ecx & bit_SSE4_2);
 }
-
-CPUIDRegs getCPUFeatures() {
-  CPUIDRegs VendorRegs = {};
-  getCPUID(&VendorRegs, 0);
-  bool IsIntel =
-      (VendorRegs.Ebx == signature_INTEL_ebx) &&
-      (VendorRegs.Edx == signature_INTEL_edx) &&
-      (VendorRegs.Ecx == signature_INTEL_ecx);
-  bool IsAMD =
-      (VendorRegs.Ebx == signature_AMD_ebx) &&
-      (VendorRegs.Edx == signature_AMD_edx) &&
-      (VendorRegs.Ecx == signature_AMD_ecx);
-  // Default to an empty feature set if not on a supported CPU.
-  CPUIDRegs FeaturesRegs = {};
-  if (IsIntel || IsAMD) {
-    getCPUID(&FeaturesRegs, 1);
-  }
-  return FeaturesRegs;
-}
-
-#ifndef bit_SSE4_2
-# define bit_SSE4_2 bit_SSE42  // clang and gcc have different defines.
-#endif
-
-bool testCPUFeature(CPUFeature Feature)
-{
-  CPUIDRegs FeaturesRegs = getCPUFeatures();
-
-  switch (Feature) {
-    case CRC32CPUFeature:  // CRC32 is provided by SSE 4.2.
-      return !!(FeaturesRegs.Ecx & bit_SSE4_2);
-    default:
+#elif defined(__arm__) || defined(__aarch64__)
+// For ARM and AArch64, hardware CRC32 support is indicated in the AT_HWCAP
+// auxiliary vector.
+# ifndef AT_HWCAP
+#  define AT_HWCAP 16
+# endif
+# ifndef HWCAP_CRC32
+#  define HWCAP_CRC32 (1 << 7)  // HWCAP_CRC32 is missing on older platforms.
+# endif
+# if SANITIZER_POSIX
+bool hasHardwareCRC32ARMPosix() {
+  uptr F = internal_open("/proc/self/auxv", O_RDONLY);
+  if (internal_iserror(F))
+    return false;
+  struct { uptr Tag; uptr Value; } Entry = { 0, 0 };
+  for (;;) {
+    uptr N = internal_read(F, &Entry, sizeof(Entry));
+    if (internal_iserror(N) || N != sizeof(Entry) ||
+        (Entry.Tag == 0 && Entry.Value == 0) || Entry.Tag == AT_HWCAP)
       break;
   }
-  return false;
+  internal_close(F);
+  return (Entry.Tag == AT_HWCAP && (Entry.Value & HWCAP_CRC32) != 0);
+}
+# else
+bool hasHardwareCRC32ARMPosix() { return false; }
+# endif  // SANITIZER_POSIX
+
+// Bionic doesn't initialize its globals early enough. This causes issues when
+// trying to access them from a preinit_array (b/25751302) or from another
+// constructor called before the libc one (b/68046352). __progname is
+// initialized after the other globals, so we can check its value to know if
+// calling getauxval is safe.
+extern "C" SANITIZER_WEAK_ATTRIBUTE char *__progname;
+INLINE bool areBionicGlobalsInitialized() {
+  return !SANITIZER_ANDROID || (&__progname && __progname);
 }
-#elif defined(__arm__) || defined(__aarch64__)
-// For ARM and AArch64, hardware CRC32 support is indicated in the
-// AT_HWVAL auxiliary vector.
-
-#ifndef HWCAP_CRC32
-# define HWCAP_CRC32 (1<<7)  // HWCAP_CRC32 is missing on older platforms.
-#endif
-
-bool testCPUFeature(CPUFeature Feature) {
-  uptr HWCap = getauxval(AT_HWCAP);
 
-  switch (Feature) {
-    case CRC32CPUFeature:
-      return !!(HWCap & HWCAP_CRC32);
-    default:
-      break;
-  }
-  return false;
+bool hasHardwareCRC32() {
+  if (&getauxval && areBionicGlobalsInitialized())
+    return !!(getauxval(AT_HWCAP) & HWCAP_CRC32);
+  return hasHardwareCRC32ARMPosix();
 }
 #else
-bool testCPUFeature(CPUFeature Feature) {
-  return false;
-}
+bool hasHardwareCRC32() { return false; }
 #endif  // defined(__x86_64__) || defined(__i386__)
 
 }  // namespace __scudo
diff --git a/lib/scudo/scudo_utils.h b/lib/scudo/scudo_utils.h
index 6c6c9d8934045..43448e0831e83 100644
--- a/lib/scudo/scudo_utils.h
+++ b/lib/scudo/scudo_utils.h
@@ -14,14 +14,14 @@
 #ifndef SCUDO_UTILS_H_
 #define SCUDO_UTILS_H_
 
-#include <string.h>
-
 #include "sanitizer_common/sanitizer_common.h"
 
+#include <string.h>
+
 namespace __scudo {
 
 template <class Dest, class Source>
-inline Dest bit_cast(const Source& source) {
+INLINE Dest bit_cast(const Source& source) {
   static_assert(sizeof(Dest) == sizeof(Source), "Sizes are not equal!");
   Dest dest;
   memcpy(&dest, &source, sizeof(dest));
@@ -30,63 +30,7 @@ inline Dest bit_cast(const Source& source) {
 
 void NORETURN dieWithMessage(const char *Format, ...);
 
-enum CPUFeature {
-  CRC32CPUFeature = 0,
-  MaxCPUFeature,
-};
-bool testCPUFeature(CPUFeature feature);
-
-INLINE u64 rotl(const u64 X, int K) {
-  return (X << K) | (X >> (64 - K));
-}
-
-// XoRoShiRo128+ PRNG (http://xoroshiro.di.unimi.it/).
-struct XoRoShiRo128Plus {
- public:
-  void init() {
-    if (UNLIKELY(!GetRandom(reinterpret_cast<void *>(State), sizeof(State)))) {
-      // Early processes (eg: init) do not have /dev/urandom yet, but we still
-      // have to provide them with some degree of entropy. Not having a secure
-      // seed is not as problematic for them, as they are less likely to be
-      // the target of heap based vulnerabilities exploitation attempts.
-      State[0] = NanoTime();
-      State[1] = 0;
-    }
-    fillCache();
-  }
-  u8 getU8() {
-    if (UNLIKELY(isCacheEmpty()))
-      fillCache();
-    const u8 Result = static_cast<u8>(CachedBytes & 0xff);
-    CachedBytes >>= 8;
-    CachedBytesAvailable--;
-    return Result;
-  }
-  u64 getU64() { return next(); }
-
- private:
-  u8 CachedBytesAvailable;
-  u64 CachedBytes;
-  u64 State[2];
-  u64 next() {
-    const u64 S0 = State[0];
-    u64 S1 = State[1];
-    const u64 Result = S0 + S1;
-    S1 ^= S0;
-    State[0] = rotl(S0, 55) ^ S1 ^ (S1 << 14);
-    State[1] = rotl(S1, 36);
-    return Result;
-  }
-  bool isCacheEmpty() {
-    return CachedBytesAvailable == 0;
-  }
-  void fillCache() {
-    CachedBytes = next();
-    CachedBytesAvailable = sizeof(CachedBytes);
-  }
-};
-
-typedef XoRoShiRo128Plus ScudoPrng;
+bool hasHardwareCRC32();
 
 }  // namespace __scudo
author	Dimitry Andric <dim@FreeBSD.org>	2017-12-18 20:11:54 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2017-12-18 20:11:54 +0000
commit	cdf4f3055e964bb585f294cf77cb549ead82783f (patch)
tree	7bceeca766b3fbe491245bc926a083f78c35d1de /lib/scudo
parent	625108084a3ec7c19c7745004c5af0ed7aa417a9 (diff)