aboutsummaryrefslogtreecommitdiff
path: root/lib/xray/xray_buffer_queue.cc
diff options
context:
space:
mode:
Diffstat (limited to 'lib/xray/xray_buffer_queue.cc')
-rw-r--r--lib/xray/xray_buffer_queue.cc281
1 files changed, 174 insertions, 107 deletions
diff --git a/lib/xray/xray_buffer_queue.cc b/lib/xray/xray_buffer_queue.cc
index 3ce728900787..7d0e5a1f323c 100644
--- a/lib/xray/xray_buffer_queue.cc
+++ b/lib/xray/xray_buffer_queue.cc
@@ -13,141 +13,206 @@
//
//===----------------------------------------------------------------------===//
#include "xray_buffer_queue.h"
+#include "sanitizer_common/sanitizer_atomic.h"
#include "sanitizer_common/sanitizer_common.h"
#include "sanitizer_common/sanitizer_libc.h"
+#if !SANITIZER_FUCHSIA
#include "sanitizer_common/sanitizer_posix.h"
+#endif
+#include "xray_allocator.h"
+#include "xray_defs.h"
#include <memory>
#include <sys/mman.h>
-#ifndef MAP_NORESERVE
-// no-op on NetBSD (at least), unsupported flag on FreeBSD
-#define MAP_NORESERVE 0
-#endif
-
using namespace __xray;
-using namespace __sanitizer;
-
-template <class T> static T *allocRaw(size_t N) {
- // TODO: Report errors?
- // We use MAP_NORESERVE on platforms where it's supported to ensure that the
- // pages we're allocating for XRay never end up in pages that can be swapped
- // in/out. We're doing this because for FDR mode, we want to ensure that
- // writes to the buffers stay resident in memory to prevent XRay itself from
- // causing swapping/thrashing.
- //
- // In the case when XRay pages cannot be swapped in/out or there's not enough
- // RAM to back these pages, we're willing to cause a segmentation fault
- // instead of introducing latency in the measurement. We assume here that
- // there are enough pages that are swappable in/out outside of the buffers
- // being used by FDR mode (which are bounded and configurable anyway) to allow
- // us to keep using always-resident memory.
- //
- // TODO: Make this configurable?
- void *A = reinterpret_cast<void *>(
- internal_mmap(NULL, N * sizeof(T), PROT_WRITE | PROT_READ,
- MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, 0));
- return (A == MAP_FAILED) ? nullptr : reinterpret_cast<T *>(A);
-}
-template <class T> static void deallocRaw(T *ptr, size_t N) {
- // TODO: Report errors?
- if (ptr != nullptr)
- internal_munmap(ptr, N);
+namespace {
+
+BufferQueue::ControlBlock *allocControlBlock(size_t Size, size_t Count) {
+ auto B =
+ allocateBuffer((sizeof(BufferQueue::ControlBlock) - 1) + (Size * Count));
+ return B == nullptr ? nullptr
+ : reinterpret_cast<BufferQueue::ControlBlock *>(B);
}
-template <class T> static T *initArray(size_t N) {
- auto A = allocRaw<T>(N);
- if (A != nullptr)
- while (N > 0)
- new (A + (--N)) T();
- return A;
+void deallocControlBlock(BufferQueue::ControlBlock *C, size_t Size,
+ size_t Count) {
+ deallocateBuffer(reinterpret_cast<unsigned char *>(C),
+ (sizeof(BufferQueue::ControlBlock) - 1) + (Size * Count));
}
-BufferQueue::BufferQueue(size_t B, size_t N, bool &Success)
- : BufferSize(B), Buffers(initArray<BufferQueue::BufferRep>(N)),
- BufferCount(N), Finalizing{0}, OwnedBuffers(initArray<void *>(N)),
- Next(Buffers), First(Buffers), LiveBuffers(0) {
- if (Buffers == nullptr) {
- Success = false;
+void decRefCount(BufferQueue::ControlBlock *C, size_t Size, size_t Count) {
+ if (C == nullptr)
return;
- }
- if (OwnedBuffers == nullptr) {
- // Clean up the buffers we've already allocated.
- for (auto B = Buffers, E = Buffers + BufferCount; B != E; ++B)
- B->~BufferRep();
- deallocRaw(Buffers, N);
- Success = false;
+ if (atomic_fetch_sub(&C->RefCount, 1, memory_order_acq_rel) == 1)
+ deallocControlBlock(C, Size, Count);
+}
+
+void incRefCount(BufferQueue::ControlBlock *C) {
+ if (C == nullptr)
return;
+ atomic_fetch_add(&C->RefCount, 1, memory_order_acq_rel);
+}
+
+// We use a struct to ensure that we are allocating one atomic_uint64_t per
+// cache line. This allows us to not worry about false-sharing among atomic
+// objects being updated (constantly) by different threads.
+struct ExtentsPadded {
+ union {
+ atomic_uint64_t Extents;
+ unsigned char Storage[kCacheLineSize];
};
+};
- for (size_t i = 0; i < N; ++i) {
- auto &T = Buffers[i];
- void *Tmp = allocRaw<char>(BufferSize);
- if (Tmp == nullptr) {
- Success = false;
+constexpr size_t kExtentsSize = sizeof(ExtentsPadded);
+
+} // namespace
+
+BufferQueue::ErrorCode BufferQueue::init(size_t BS, size_t BC) {
+ SpinMutexLock Guard(&Mutex);
+
+ if (!finalizing())
+ return BufferQueue::ErrorCode::AlreadyInitialized;
+
+ cleanupBuffers();
+
+ bool Success = false;
+ BufferSize = BS;
+ BufferCount = BC;
+
+ BackingStore = allocControlBlock(BufferSize, BufferCount);
+ if (BackingStore == nullptr)
+ return BufferQueue::ErrorCode::NotEnoughMemory;
+
+ auto CleanupBackingStore = at_scope_exit([&, this] {
+ if (Success)
return;
- }
- auto *Extents = allocRaw<BufferExtents>(1);
- if (Extents == nullptr) {
- Success = false;
+ deallocControlBlock(BackingStore, BufferSize, BufferCount);
+ BackingStore = nullptr;
+ });
+
+ // Initialize enough atomic_uint64_t instances, each
+ ExtentsBackingStore = allocControlBlock(kExtentsSize, BufferCount);
+ if (ExtentsBackingStore == nullptr)
+ return BufferQueue::ErrorCode::NotEnoughMemory;
+
+ auto CleanupExtentsBackingStore = at_scope_exit([&, this] {
+ if (Success)
return;
- }
+ deallocControlBlock(ExtentsBackingStore, kExtentsSize, BufferCount);
+ ExtentsBackingStore = nullptr;
+ });
+
+ Buffers = initArray<BufferRep>(BufferCount);
+ if (Buffers == nullptr)
+ return BufferQueue::ErrorCode::NotEnoughMemory;
+
+ // At this point we increment the generation number to associate the buffers
+ // to the new generation.
+ atomic_fetch_add(&Generation, 1, memory_order_acq_rel);
+
+ // First, we initialize the refcount in the ControlBlock, which we treat as
+ // being at the start of the BackingStore pointer.
+ atomic_store(&BackingStore->RefCount, 1, memory_order_release);
+ atomic_store(&ExtentsBackingStore->RefCount, 1, memory_order_release);
+
+ // Then we initialise the individual buffers that sub-divide the whole backing
+ // store. Each buffer will start at the `Data` member of the ControlBlock, and
+ // will be offsets from these locations.
+ for (size_t i = 0; i < BufferCount; ++i) {
+ auto &T = Buffers[i];
auto &Buf = T.Buff;
- Buf.Data = Tmp;
- Buf.Size = B;
- Buf.Extents = Extents;
- OwnedBuffers[i] = Tmp;
+ auto *E = reinterpret_cast<ExtentsPadded *>(&ExtentsBackingStore->Data +
+ (kExtentsSize * i));
+ Buf.Extents = &E->Extents;
+ atomic_store(Buf.Extents, 0, memory_order_release);
+ Buf.Generation = generation();
+ Buf.Data = &BackingStore->Data + (BufferSize * i);
+ Buf.Size = BufferSize;
+ Buf.BackingStore = BackingStore;
+ Buf.ExtentsBackingStore = ExtentsBackingStore;
+ Buf.Count = BufferCount;
+ T.Used = false;
}
+
+ Next = Buffers;
+ First = Buffers;
+ LiveBuffers = 0;
+ atomic_store(&Finalizing, 0, memory_order_release);
Success = true;
+ return BufferQueue::ErrorCode::Ok;
+}
+
+BufferQueue::BufferQueue(size_t B, size_t N,
+ bool &Success) XRAY_NEVER_INSTRUMENT
+ : BufferSize(B),
+ BufferCount(N),
+ Mutex(),
+ Finalizing{1},
+ BackingStore(nullptr),
+ ExtentsBackingStore(nullptr),
+ Buffers(nullptr),
+ Next(Buffers),
+ First(Buffers),
+ LiveBuffers(0),
+ Generation{0} {
+ Success = init(B, N) == BufferQueue::ErrorCode::Ok;
}
BufferQueue::ErrorCode BufferQueue::getBuffer(Buffer &Buf) {
if (atomic_load(&Finalizing, memory_order_acquire))
return ErrorCode::QueueFinalizing;
- SpinMutexLock Guard(&Mutex);
- if (LiveBuffers == BufferCount)
- return ErrorCode::NotEnoughMemory;
- auto &T = *Next;
- auto &B = T.Buff;
- Buf = B;
- T.Used = true;
- ++LiveBuffers;
-
- if (++Next == (Buffers + BufferCount))
- Next = Buffers;
+ BufferRep *B = nullptr;
+ {
+ SpinMutexLock Guard(&Mutex);
+ if (LiveBuffers == BufferCount)
+ return ErrorCode::NotEnoughMemory;
+ B = Next++;
+ if (Next == (Buffers + BufferCount))
+ Next = Buffers;
+ ++LiveBuffers;
+ }
+ incRefCount(BackingStore);
+ incRefCount(ExtentsBackingStore);
+ Buf = B->Buff;
+ Buf.Generation = generation();
+ B->Used = true;
return ErrorCode::Ok;
}
BufferQueue::ErrorCode BufferQueue::releaseBuffer(Buffer &Buf) {
- // Blitz through the buffers array to find the buffer.
- bool Found = false;
- for (auto I = OwnedBuffers, E = OwnedBuffers + BufferCount; I != E; ++I) {
- if (*I == Buf.Data) {
- Found = true;
- break;
+ // Check whether the buffer being referred to is within the bounds of the
+ // backing store's range.
+ BufferRep *B = nullptr;
+ {
+ SpinMutexLock Guard(&Mutex);
+ if (Buf.Generation != generation() || LiveBuffers == 0) {
+ Buf = {};
+ decRefCount(Buf.BackingStore, Buf.Size, Buf.Count);
+ decRefCount(Buf.ExtentsBackingStore, kExtentsSize, Buf.Count);
+ return BufferQueue::ErrorCode::Ok;
}
- }
- if (!Found)
- return ErrorCode::UnrecognizedBuffer;
- SpinMutexLock Guard(&Mutex);
+ if (Buf.Data < &BackingStore->Data ||
+ Buf.Data > &BackingStore->Data + (BufferCount * BufferSize))
+ return BufferQueue::ErrorCode::UnrecognizedBuffer;
- // This points to a semantic bug, we really ought to not be releasing more
- // buffers than we actually get.
- if (LiveBuffers == 0)
- return ErrorCode::NotEnoughMemory;
+ --LiveBuffers;
+ B = First++;
+ if (First == (Buffers + BufferCount))
+ First = Buffers;
+ }
// Now that the buffer has been released, we mark it as "used".
- First->Buff = Buf;
- First->Used = true;
- Buf.Data = nullptr;
- Buf.Size = 0;
- --LiveBuffers;
- if (++First == (Buffers + BufferCount))
- First = Buffers;
-
+ B->Buff = Buf;
+ B->Used = true;
+ decRefCount(Buf.BackingStore, Buf.Size, Buf.Count);
+ decRefCount(Buf.ExtentsBackingStore, kExtentsSize, Buf.Count);
+ atomic_store(B->Buff.Extents, atomic_load(Buf.Extents, memory_order_acquire),
+ memory_order_release);
+ Buf = {};
return ErrorCode::Ok;
}
@@ -157,15 +222,17 @@ BufferQueue::ErrorCode BufferQueue::finalize() {
return ErrorCode::Ok;
}
-BufferQueue::~BufferQueue() {
- for (auto I = Buffers, E = Buffers + BufferCount; I != E; ++I) {
- auto &T = *I;
- auto &Buf = T.Buff;
- deallocRaw(Buf.Data, Buf.Size);
- deallocRaw(Buf.Extents, 1);
- }
+void BufferQueue::cleanupBuffers() {
for (auto B = Buffers, E = Buffers + BufferCount; B != E; ++B)
B->~BufferRep();
- deallocRaw(Buffers, BufferCount);
- deallocRaw(OwnedBuffers, BufferCount);
+ deallocateBuffer(Buffers, BufferCount);
+ decRefCount(BackingStore, BufferSize, BufferCount);
+ decRefCount(ExtentsBackingStore, kExtentsSize, BufferCount);
+ BackingStore = nullptr;
+ ExtentsBackingStore = nullptr;
+ Buffers = nullptr;
+ BufferCount = 0;
+ BufferSize = 0;
}
+
+BufferQueue::~BufferQueue() { cleanupBuffers(); }