diff options
Diffstat (limited to 'lib/xray/xray_buffer_queue.cc')
-rw-r--r-- | lib/xray/xray_buffer_queue.cc | 281 |
1 files changed, 174 insertions, 107 deletions
diff --git a/lib/xray/xray_buffer_queue.cc b/lib/xray/xray_buffer_queue.cc index 3ce728900787..7d0e5a1f323c 100644 --- a/lib/xray/xray_buffer_queue.cc +++ b/lib/xray/xray_buffer_queue.cc @@ -13,141 +13,206 @@ // //===----------------------------------------------------------------------===// #include "xray_buffer_queue.h" +#include "sanitizer_common/sanitizer_atomic.h" #include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_libc.h" +#if !SANITIZER_FUCHSIA #include "sanitizer_common/sanitizer_posix.h" +#endif +#include "xray_allocator.h" +#include "xray_defs.h" #include <memory> #include <sys/mman.h> -#ifndef MAP_NORESERVE -// no-op on NetBSD (at least), unsupported flag on FreeBSD -#define MAP_NORESERVE 0 -#endif - using namespace __xray; -using namespace __sanitizer; - -template <class T> static T *allocRaw(size_t N) { - // TODO: Report errors? - // We use MAP_NORESERVE on platforms where it's supported to ensure that the - // pages we're allocating for XRay never end up in pages that can be swapped - // in/out. We're doing this because for FDR mode, we want to ensure that - // writes to the buffers stay resident in memory to prevent XRay itself from - // causing swapping/thrashing. - // - // In the case when XRay pages cannot be swapped in/out or there's not enough - // RAM to back these pages, we're willing to cause a segmentation fault - // instead of introducing latency in the measurement. We assume here that - // there are enough pages that are swappable in/out outside of the buffers - // being used by FDR mode (which are bounded and configurable anyway) to allow - // us to keep using always-resident memory. - // - // TODO: Make this configurable? - void *A = reinterpret_cast<void *>( - internal_mmap(NULL, N * sizeof(T), PROT_WRITE | PROT_READ, - MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, 0)); - return (A == MAP_FAILED) ? nullptr : reinterpret_cast<T *>(A); -} -template <class T> static void deallocRaw(T *ptr, size_t N) { - // TODO: Report errors? - if (ptr != nullptr) - internal_munmap(ptr, N); +namespace { + +BufferQueue::ControlBlock *allocControlBlock(size_t Size, size_t Count) { + auto B = + allocateBuffer((sizeof(BufferQueue::ControlBlock) - 1) + (Size * Count)); + return B == nullptr ? nullptr + : reinterpret_cast<BufferQueue::ControlBlock *>(B); } -template <class T> static T *initArray(size_t N) { - auto A = allocRaw<T>(N); - if (A != nullptr) - while (N > 0) - new (A + (--N)) T(); - return A; +void deallocControlBlock(BufferQueue::ControlBlock *C, size_t Size, + size_t Count) { + deallocateBuffer(reinterpret_cast<unsigned char *>(C), + (sizeof(BufferQueue::ControlBlock) - 1) + (Size * Count)); } -BufferQueue::BufferQueue(size_t B, size_t N, bool &Success) - : BufferSize(B), Buffers(initArray<BufferQueue::BufferRep>(N)), - BufferCount(N), Finalizing{0}, OwnedBuffers(initArray<void *>(N)), - Next(Buffers), First(Buffers), LiveBuffers(0) { - if (Buffers == nullptr) { - Success = false; +void decRefCount(BufferQueue::ControlBlock *C, size_t Size, size_t Count) { + if (C == nullptr) return; - } - if (OwnedBuffers == nullptr) { - // Clean up the buffers we've already allocated. - for (auto B = Buffers, E = Buffers + BufferCount; B != E; ++B) - B->~BufferRep(); - deallocRaw(Buffers, N); - Success = false; + if (atomic_fetch_sub(&C->RefCount, 1, memory_order_acq_rel) == 1) + deallocControlBlock(C, Size, Count); +} + +void incRefCount(BufferQueue::ControlBlock *C) { + if (C == nullptr) return; + atomic_fetch_add(&C->RefCount, 1, memory_order_acq_rel); +} + +// We use a struct to ensure that we are allocating one atomic_uint64_t per +// cache line. This allows us to not worry about false-sharing among atomic +// objects being updated (constantly) by different threads. +struct ExtentsPadded { + union { + atomic_uint64_t Extents; + unsigned char Storage[kCacheLineSize]; }; +}; - for (size_t i = 0; i < N; ++i) { - auto &T = Buffers[i]; - void *Tmp = allocRaw<char>(BufferSize); - if (Tmp == nullptr) { - Success = false; +constexpr size_t kExtentsSize = sizeof(ExtentsPadded); + +} // namespace + +BufferQueue::ErrorCode BufferQueue::init(size_t BS, size_t BC) { + SpinMutexLock Guard(&Mutex); + + if (!finalizing()) + return BufferQueue::ErrorCode::AlreadyInitialized; + + cleanupBuffers(); + + bool Success = false; + BufferSize = BS; + BufferCount = BC; + + BackingStore = allocControlBlock(BufferSize, BufferCount); + if (BackingStore == nullptr) + return BufferQueue::ErrorCode::NotEnoughMemory; + + auto CleanupBackingStore = at_scope_exit([&, this] { + if (Success) return; - } - auto *Extents = allocRaw<BufferExtents>(1); - if (Extents == nullptr) { - Success = false; + deallocControlBlock(BackingStore, BufferSize, BufferCount); + BackingStore = nullptr; + }); + + // Initialize enough atomic_uint64_t instances, each + ExtentsBackingStore = allocControlBlock(kExtentsSize, BufferCount); + if (ExtentsBackingStore == nullptr) + return BufferQueue::ErrorCode::NotEnoughMemory; + + auto CleanupExtentsBackingStore = at_scope_exit([&, this] { + if (Success) return; - } + deallocControlBlock(ExtentsBackingStore, kExtentsSize, BufferCount); + ExtentsBackingStore = nullptr; + }); + + Buffers = initArray<BufferRep>(BufferCount); + if (Buffers == nullptr) + return BufferQueue::ErrorCode::NotEnoughMemory; + + // At this point we increment the generation number to associate the buffers + // to the new generation. + atomic_fetch_add(&Generation, 1, memory_order_acq_rel); + + // First, we initialize the refcount in the ControlBlock, which we treat as + // being at the start of the BackingStore pointer. + atomic_store(&BackingStore->RefCount, 1, memory_order_release); + atomic_store(&ExtentsBackingStore->RefCount, 1, memory_order_release); + + // Then we initialise the individual buffers that sub-divide the whole backing + // store. Each buffer will start at the `Data` member of the ControlBlock, and + // will be offsets from these locations. + for (size_t i = 0; i < BufferCount; ++i) { + auto &T = Buffers[i]; auto &Buf = T.Buff; - Buf.Data = Tmp; - Buf.Size = B; - Buf.Extents = Extents; - OwnedBuffers[i] = Tmp; + auto *E = reinterpret_cast<ExtentsPadded *>(&ExtentsBackingStore->Data + + (kExtentsSize * i)); + Buf.Extents = &E->Extents; + atomic_store(Buf.Extents, 0, memory_order_release); + Buf.Generation = generation(); + Buf.Data = &BackingStore->Data + (BufferSize * i); + Buf.Size = BufferSize; + Buf.BackingStore = BackingStore; + Buf.ExtentsBackingStore = ExtentsBackingStore; + Buf.Count = BufferCount; + T.Used = false; } + + Next = Buffers; + First = Buffers; + LiveBuffers = 0; + atomic_store(&Finalizing, 0, memory_order_release); Success = true; + return BufferQueue::ErrorCode::Ok; +} + +BufferQueue::BufferQueue(size_t B, size_t N, + bool &Success) XRAY_NEVER_INSTRUMENT + : BufferSize(B), + BufferCount(N), + Mutex(), + Finalizing{1}, + BackingStore(nullptr), + ExtentsBackingStore(nullptr), + Buffers(nullptr), + Next(Buffers), + First(Buffers), + LiveBuffers(0), + Generation{0} { + Success = init(B, N) == BufferQueue::ErrorCode::Ok; } BufferQueue::ErrorCode BufferQueue::getBuffer(Buffer &Buf) { if (atomic_load(&Finalizing, memory_order_acquire)) return ErrorCode::QueueFinalizing; - SpinMutexLock Guard(&Mutex); - if (LiveBuffers == BufferCount) - return ErrorCode::NotEnoughMemory; - auto &T = *Next; - auto &B = T.Buff; - Buf = B; - T.Used = true; - ++LiveBuffers; - - if (++Next == (Buffers + BufferCount)) - Next = Buffers; + BufferRep *B = nullptr; + { + SpinMutexLock Guard(&Mutex); + if (LiveBuffers == BufferCount) + return ErrorCode::NotEnoughMemory; + B = Next++; + if (Next == (Buffers + BufferCount)) + Next = Buffers; + ++LiveBuffers; + } + incRefCount(BackingStore); + incRefCount(ExtentsBackingStore); + Buf = B->Buff; + Buf.Generation = generation(); + B->Used = true; return ErrorCode::Ok; } BufferQueue::ErrorCode BufferQueue::releaseBuffer(Buffer &Buf) { - // Blitz through the buffers array to find the buffer. - bool Found = false; - for (auto I = OwnedBuffers, E = OwnedBuffers + BufferCount; I != E; ++I) { - if (*I == Buf.Data) { - Found = true; - break; + // Check whether the buffer being referred to is within the bounds of the + // backing store's range. + BufferRep *B = nullptr; + { + SpinMutexLock Guard(&Mutex); + if (Buf.Generation != generation() || LiveBuffers == 0) { + Buf = {}; + decRefCount(Buf.BackingStore, Buf.Size, Buf.Count); + decRefCount(Buf.ExtentsBackingStore, kExtentsSize, Buf.Count); + return BufferQueue::ErrorCode::Ok; } - } - if (!Found) - return ErrorCode::UnrecognizedBuffer; - SpinMutexLock Guard(&Mutex); + if (Buf.Data < &BackingStore->Data || + Buf.Data > &BackingStore->Data + (BufferCount * BufferSize)) + return BufferQueue::ErrorCode::UnrecognizedBuffer; - // This points to a semantic bug, we really ought to not be releasing more - // buffers than we actually get. - if (LiveBuffers == 0) - return ErrorCode::NotEnoughMemory; + --LiveBuffers; + B = First++; + if (First == (Buffers + BufferCount)) + First = Buffers; + } // Now that the buffer has been released, we mark it as "used". - First->Buff = Buf; - First->Used = true; - Buf.Data = nullptr; - Buf.Size = 0; - --LiveBuffers; - if (++First == (Buffers + BufferCount)) - First = Buffers; - + B->Buff = Buf; + B->Used = true; + decRefCount(Buf.BackingStore, Buf.Size, Buf.Count); + decRefCount(Buf.ExtentsBackingStore, kExtentsSize, Buf.Count); + atomic_store(B->Buff.Extents, atomic_load(Buf.Extents, memory_order_acquire), + memory_order_release); + Buf = {}; return ErrorCode::Ok; } @@ -157,15 +222,17 @@ BufferQueue::ErrorCode BufferQueue::finalize() { return ErrorCode::Ok; } -BufferQueue::~BufferQueue() { - for (auto I = Buffers, E = Buffers + BufferCount; I != E; ++I) { - auto &T = *I; - auto &Buf = T.Buff; - deallocRaw(Buf.Data, Buf.Size); - deallocRaw(Buf.Extents, 1); - } +void BufferQueue::cleanupBuffers() { for (auto B = Buffers, E = Buffers + BufferCount; B != E; ++B) B->~BufferRep(); - deallocRaw(Buffers, BufferCount); - deallocRaw(OwnedBuffers, BufferCount); + deallocateBuffer(Buffers, BufferCount); + decRefCount(BackingStore, BufferSize, BufferCount); + decRefCount(ExtentsBackingStore, kExtentsSize, BufferCount); + BackingStore = nullptr; + ExtentsBackingStore = nullptr; + Buffers = nullptr; + BufferCount = 0; + BufferSize = 0; } + +BufferQueue::~BufferQueue() { cleanupBuffers(); } |