src - FreeBSD source tree

diff options


context:
space:
mode:

author	Allan Jude <allanjude@FreeBSD.org>	2023-01-25 01:20:16 +0000
committer	Allan Jude <allanjude@FreeBSD.org>	2023-01-25 01:20:16 +0000
commit	b3392d84da5bf2162baf937c77e0557f3fd8a52b (patch)
tree	d9bfa8081b46f82d07e21f8ff8e70627a2aec35f /lib/compress/zstdmt_compress.c
parent	f6ae97673c28bdb9ae795bd235ab6f26f2536a2d (diff)

vendor/zstd/1.5.2 vendor/zstd

Diffstat (limited to 'lib/compress/zstdmt_compress.c')

-rw-r--r--

lib/compress/zstdmt_compress.c

146

1 files changed, 97 insertions, 49 deletions

diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c
index 50454a50b9b7..6bc14b035e17 100644
--- a/lib/compress/zstdmt_compress.c
+++ b/lib/compress/zstdmt_compress.c

@@ -1,5 +1,5 @@

+ * Copyright (c) Yann Collet, Facebook, Inc.

* This source code is licensed under both the BSD-style license (found in the

@@ -102,9 +102,8 @@ typedef struct ZSTDMT_bufferPool_s {

buffer_t bTable[1]; /* variable size */

} ZSTDMT_bufferPool;

-static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbWorkers, ZSTD_customMem cMem)

+static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned maxNbBuffers, ZSTD_customMem cMem)

{

- unsigned const maxNbBuffers = 2*nbWorkers + 3;

ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_customCalloc(

sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem);

if (bufPool==NULL) return NULL;

@@ -160,9 +159,8 @@ static void ZSTDMT_setBufferSize(ZSTDMT_bufferPool* const bufPool, size_t const

}

-static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool, U32 nbWorkers)

+static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool, unsigned maxNbBuffers)

{

- unsigned const maxNbBuffers = 2*nbWorkers + 3;

if (srcBufPool==NULL) return NULL;

if (srcBufPool->totalBuffers >= maxNbBuffers) /* good enough */

return srcBufPool;

@@ -171,7 +169,7 @@ static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool,

size_t const bSize = srcBufPool->bufferSize; /* forward parameters */

ZSTDMT_bufferPool* newBufPool;

ZSTDMT_freeBufferPool(srcBufPool);

- newBufPool = ZSTDMT_createBufferPool(nbWorkers, cMem);

+ newBufPool = ZSTDMT_createBufferPool(maxNbBuffers, cMem);

if (newBufPool==NULL) return newBufPool;

ZSTDMT_setBufferSize(newBufPool, bSize);

return newBufPool;

@@ -263,6 +261,16 @@ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)

ZSTD_customFree(buf.start, bufPool->cMem);

}

+/* We need 2 output buffers per worker since each dstBuff must be flushed after it is released.

+ * The 3 additional buffers are as follows:

+ * 1 buffer for input loading

+ * 1 buffer for "next input" when submitting current one

+ * 1 buffer stuck in queue */

+#define BUF_POOL_MAX_NB_BUFFERS(nbWorkers) 2*nbWorkers + 3

+/* After a worker releases its rawSeqStore, it is immediately ready for reuse.

+ * So we only need one seq buffer per worker. */

+#define SEQ_POOL_MAX_NB_BUFFERS(nbWorkers) nbWorkers

/* ===== Seq Pool Wrapper ====== */

@@ -316,7 +324,7 @@ static void ZSTDMT_setNbSeq(ZSTDMT_seqPool* const seqPool, size_t const nbSeq)

static ZSTDMT_seqPool* ZSTDMT_createSeqPool(unsigned nbWorkers, ZSTD_customMem cMem)

{

- ZSTDMT_seqPool* const seqPool = ZSTDMT_createBufferPool(nbWorkers, cMem);

+ ZSTDMT_seqPool* const seqPool = ZSTDMT_createBufferPool(SEQ_POOL_MAX_NB_BUFFERS(nbWorkers), cMem);

if (seqPool == NULL) return NULL;

ZSTDMT_setNbSeq(seqPool, 0);

return seqPool;

@@ -329,7 +337,7 @@ static void ZSTDMT_freeSeqPool(ZSTDMT_seqPool* seqPool)

static ZSTDMT_seqPool* ZSTDMT_expandSeqPool(ZSTDMT_seqPool* pool, U32 nbWorkers)

{

- return ZSTDMT_expandBufferPool(pool, nbWorkers);

+ return ZSTDMT_expandBufferPool(pool, SEQ_POOL_MAX_NB_BUFFERS(nbWorkers));

}

@@ -467,29 +475,27 @@ ZSTDMT_serialState_reset(serialState_t* serialState,

ZSTD_dictContentType_e dictContentType)

{

/* Adjust parameters */

- if (params.ldmParams.enableLdm) {

+ if (params.ldmParams.enableLdm == ZSTD_ps_enable) {

DEBUGLOG(4, "LDM window size = %u KB", (1U << params.cParams.windowLog) >> 10);

ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);

assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);

assert(params.ldmParams.hashRateLog < 32);

- serialState->ldmState.hashPower =

- ZSTD_rollingHash_primePower(params.ldmParams.minMatchLength);

} else {

ZSTD_memset(&params.ldmParams, 0, sizeof(params.ldmParams));

}

serialState->nextJobID = 0;

if (params.fParams.checksumFlag)

XXH64_reset(&serialState->xxhState, 0);

- if (params.ldmParams.enableLdm) {

+ if (params.ldmParams.enableLdm == ZSTD_ps_enable) {

ZSTD_customMem cMem = params.customMem;

unsigned const hashLog = params.ldmParams.hashLog;

size_t const hashSize = ((size_t)1 << hashLog) * sizeof(ldmEntry_t);

unsigned const bucketLog =

params.ldmParams.hashLog - params.ldmParams.bucketSizeLog;

- size_t const bucketSize = (size_t)1 << bucketLog;

unsigned const prevBucketLog =

serialState->params.ldmParams.hashLog -

serialState->params.ldmParams.bucketSizeLog;

+ size_t const numBuckets = (size_t)1 << bucketLog;

/* Size the seq pool tables */

ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize));

/* Reset the window */

@@ -501,20 +507,20 @@ ZSTDMT_serialState_reset(serialState_t* serialState,

}

if (serialState->ldmState.bucketOffsets == NULL || prevBucketLog < bucketLog) {

ZSTD_customFree(serialState->ldmState.bucketOffsets, cMem);

- serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_customMalloc(bucketSize, cMem);

+ serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_customMalloc(numBuckets, cMem);

}

if (!serialState->ldmState.hashTable || !serialState->ldmState.bucketOffsets)

return 1;

/* Zero the tables */

ZSTD_memset(serialState->ldmState.hashTable, 0, hashSize);

- ZSTD_memset(serialState->ldmState.bucketOffsets, 0, bucketSize);

+ ZSTD_memset(serialState->ldmState.bucketOffsets, 0, numBuckets);

/* Update window state and fill hash table with dict */

serialState->ldmState.loadedDictEnd = 0;

if (dictSize > 0) {

if (dictContentType == ZSTD_dct_rawContent) {

BYTE const* const dictEnd = (const BYTE*)dict + dictSize;

- ZSTD_window_update(&serialState->ldmState.window, dict, dictSize);

+ ZSTD_window_update(&serialState->ldmState.window, dict, dictSize, /* forceNonContiguous */ 0);

ZSTD_ldm_fillHashTable(&serialState->ldmState, (const BYTE*)dict, dictEnd, &params.ldmParams);

serialState->ldmState.loadedDictEnd = params.forceWindow ? 0 : (U32)(dictEnd - serialState->ldmState.window.base);

} else {

@@ -566,12 +572,12 @@ static void ZSTDMT_serialState_update(serialState_t* serialState,

/* A future job may error and skip our job */

if (serialState->nextJobID == jobID) {

/* It is now our turn, do any processing necessary */

- if (serialState->params.ldmParams.enableLdm) {

+ if (serialState->params.ldmParams.enableLdm == ZSTD_ps_enable) {

size_t error;

assert(seqStore.seq != NULL && seqStore.pos == 0 &&

seqStore.size == 0 && seqStore.capacity > 0);

assert(src.size <= serialState->params.jobSize);

- ZSTD_window_update(&serialState->ldmState.window, src.start, src.size);

+ ZSTD_window_update(&serialState->ldmState.window, src.start, src.size, /* forceNonContiguous */ 0);

error = ZSTD_ldm_generateSequences(

&serialState->ldmState, &seqStore,

&serialState->params.ldmParams, src.start, src.size);

@@ -596,7 +602,7 @@ static void ZSTDMT_serialState_update(serialState_t* serialState,

if (seqStore.size > 0) {

size_t const err = ZSTD_referenceExternalSequences(

jobCCtx, seqStore.seq, seqStore.size);

- assert(serialState->params.ldmParams.enableLdm);

+ assert(serialState->params.ldmParams.enableLdm == ZSTD_ps_enable);

assert(!ZSTD_isError(err));

(void)err;

}

@@ -674,7 +680,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)

if (dstBuff.start==NULL) JOB_ERROR(ERROR(memory_allocation));

job->dstBuff = dstBuff; /* this value can be read in ZSTDMT_flush, when it copies the whole job */

}

- if (jobParams.ldmParams.enableLdm && rawSeqStore.seq == NULL)

+ if (jobParams.ldmParams.enableLdm == ZSTD_ps_enable && rawSeqStore.seq == NULL)

JOB_ERROR(ERROR(memory_allocation));

/* Don't compute the checksum for chunks, since we compute it externally,

@@ -682,7 +688,9 @@ static void ZSTDMT_compressionJob(void* jobDescription)

if (job->jobID != 0) jobParams.fParams.checksumFlag = 0;

/* Don't run LDM for the chunks, since we handle it externally */

- jobParams.ldmParams.enableLdm = 0;

+ jobParams.ldmParams.enableLdm = ZSTD_ps_disable;

+ /* Correct nbWorkers to 0. */

+ jobParams.nbWorkers = 0;

/* init */

@@ -695,6 +703,10 @@ static void ZSTDMT_compressionJob(void* jobDescription)

{ size_t const forceWindowError = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob);

if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError);

}

+ if (!job->firstJob) {

+ size_t const err = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_deterministicRefPrefix, 0);

+ if (ZSTD_isError(err)) JOB_ERROR(err);

+ }

{ size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,

job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */

ZSTD_dtlm_fast,

@@ -750,6 +762,13 @@ static void ZSTDMT_compressionJob(void* jobDescription)

if (ZSTD_isError(cSize)) JOB_ERROR(cSize);

lastCBlockSize = cSize;

} }

+ if (!job->firstJob) {

+ /* Double check that we don't have an ext-dict, because then our

+ * repcode invalidation doesn't work.

+ */

+ assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window));

+ }

+ ZSTD_CCtx_trace(cctx, 0);

_endJob:

ZSTDMT_serialState_ensureFinished(job->serial, job->jobID, job->cSize);

@@ -796,6 +815,15 @@ typedef struct {

static const roundBuff_t kNullRoundBuff = {NULL, 0, 0};

#define RSYNC_LENGTH 32

+/* Don't create chunks smaller than the zstd block size.

+ * This stops us from regressing compression ratio too much,

+ * and ensures our output fits in ZSTD_compressBound().

+ *

+ * If this is shrunk < ZSTD_BLOCKSIZELOG_MIN then

+ * ZSTD_COMPRESSBOUND() will need to be updated.

+ */

+#define RSYNC_MIN_BLOCK_LOG ZSTD_BLOCKSIZELOG_MAX

+#define RSYNC_MIN_BLOCK_SIZE (1<<RSYNC_MIN_BLOCK_LOG)

typedef struct {

U64 hash;

@@ -916,7 +944,7 @@ MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers,

mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, cMem);

assert(nbJobs > 0); assert((nbJobs & (nbJobs - 1)) == 0); /* ensure nbJobs is a power of 2 */

mtctx->jobIDMask = nbJobs - 1;

- mtctx->bufPool = ZSTDMT_createBufferPool(nbWorkers, cMem);

+ mtctx->bufPool = ZSTDMT_createBufferPool(BUF_POOL_MAX_NB_BUFFERS(nbWorkers), cMem);

mtctx->cctxPool = ZSTDMT_createCCtxPool(nbWorkers, cMem);

mtctx->seqPool = ZSTDMT_createSeqPool(nbWorkers, cMem);

initError = ZSTDMT_serialState_init(&mtctx->serial);

@@ -1019,7 +1047,7 @@ static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers)

{

if (POOL_resize(mtctx->factory, nbWorkers)) return ERROR(memory_allocation);

FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) , "");

- mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, nbWorkers);

+ mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, BUF_POOL_MAX_NB_BUFFERS(nbWorkers));

if (mtctx->bufPool == NULL) return ERROR(memory_allocation);

mtctx->cctxPool = ZSTDMT_expandCCtxPool(mtctx->cctxPool, nbWorkers);

if (mtctx->cctxPool == NULL) return ERROR(memory_allocation);

@@ -1124,7 +1152,7 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)

static unsigned ZSTDMT_computeTargetJobLog(const ZSTD_CCtx_params* params)

{

unsigned jobLog;

- if (params->ldmParams.enableLdm) {

+ if (params->ldmParams.enableLdm == ZSTD_ps_enable) {

/* In Long Range Mode, the windowLog is typically oversized.

* In which case, it's preferable to determine the jobSize

* based on cycleLog instead. */

@@ -1168,7 +1196,7 @@ static size_t ZSTDMT_computeOverlapSize(const ZSTD_CCtx_params* params)

int const overlapRLog = 9 - ZSTDMT_overlapLog(params->overlapLog, params->cParams.strategy);

int ovLog = (overlapRLog >= 8) ? 0 : (params->cParams.windowLog - overlapRLog);

assert(0 <= overlapRLog && overlapRLog <= 8);

- if (params->ldmParams.enableLdm) {

+ if (params->ldmParams.enableLdm == ZSTD_ps_enable) {

/* In Long Range Mode, the windowLog is typically oversized.

* In which case, it's preferable to determine the jobSize

* based on chainLog instead.

@@ -1239,9 +1267,11 @@ size_t ZSTDMT_initCStream_internal(

if (params.rsyncable) {

/* Aim for the targetsectionSize as the average job size. */

- U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20);

- U32 const rsyncBits = ZSTD_highbit32(jobSizeMB) + 20;

- assert(jobSizeMB >= 1);

+ U32 const jobSizeKB = (U32)(mtctx->targetSectionSize >> 10);

+ U32 const rsyncBits = (assert(jobSizeKB >= 1), ZSTD_highbit32(jobSizeKB) + 10);

+ /* We refuse to create jobs < RSYNC_MIN_BLOCK_SIZE bytes, so make sure our

+ * expected job size is at least 4x larger. */

+ assert(rsyncBits >= RSYNC_MIN_BLOCK_LOG + 2);

DEBUGLOG(4, "rsyncLog = %u", rsyncBits);

mtctx->rsync.hash = 0;

mtctx->rsync.hitMask = (1ULL << rsyncBits) - 1;

@@ -1253,7 +1283,7 @@ size_t ZSTDMT_initCStream_internal(

ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(mtctx->targetSectionSize));

{

/* If ldm is enabled we need windowSize space. */

- size_t const windowSize = mtctx->params.ldmParams.enableLdm ? (1U << mtctx->params.cParams.windowLog) : 0;

+ size_t const windowSize = mtctx->params.ldmParams.enableLdm == ZSTD_ps_enable ? (1U << mtctx->params.cParams.windowLog) : 0;

/* Two buffers of slack, plus extra space for the overlap

* This is the minimum slack that LDM works with. One extra because

* flush might waste up to targetSectionSize-1 bytes. Another extra

@@ -1528,17 +1558,21 @@ static range_t ZSTDMT_getInputDataInUse(ZSTDMT_CCtx* mtctx)

static int ZSTDMT_isOverlapped(buffer_t buffer, range_t range)

{

BYTE const* const bufferStart = (BYTE const*)buffer.start;

- BYTE const* const bufferEnd = bufferStart + buffer.capacity;

BYTE const* const rangeStart = (BYTE const*)range.start;

- BYTE const* const rangeEnd = range.size != 0 ? rangeStart + range.size : rangeStart;

if (rangeStart == NULL || bufferStart == NULL)

return 0;

- /* Empty ranges cannot overlap */

- if (bufferStart == bufferEnd || rangeStart == rangeEnd)

- return 0;

- return bufferStart < rangeEnd && rangeStart < bufferEnd;

+ {

+ BYTE const* const bufferEnd = bufferStart + buffer.capacity;

+ BYTE const* const rangeEnd = rangeStart + range.size;

+ /* Empty ranges cannot overlap */

+ if (bufferStart == bufferEnd || rangeStart == rangeEnd)

+ return 0;

+ return bufferStart < rangeEnd && rangeStart < bufferEnd;

+ }

}

static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window)

@@ -1565,7 +1599,7 @@ static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window)

static void ZSTDMT_waitForLdmComplete(ZSTDMT_CCtx* mtctx, buffer_t buffer)

{

- if (mtctx->params.ldmParams.enableLdm) {

+ if (mtctx->params.ldmParams.enableLdm == ZSTD_ps_enable) {

ZSTD_pthread_mutex_t* mutex = &mtctx->serial.ldmWindowMutex;

DEBUGLOG(5, "ZSTDMT_waitForLdmComplete");

DEBUGLOG(5, "source [0x%zx, 0x%zx)",

@@ -1668,6 +1702,11 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)

if (!mtctx->params.rsyncable)

/* Rsync is disabled. */

return syncPoint;

+ if (mtctx->inBuff.filled + input.size - input.pos < RSYNC_MIN_BLOCK_SIZE)

+ /* We don't emit synchronization points if it would produce too small blocks.

+ * We don't have enough input to find a synchronization point, so don't look.

+ */

+ return syncPoint;

if (mtctx->inBuff.filled + syncPoint.toLoad < RSYNC_LENGTH)

/* Not enough to compute the hash.

* We will miss any synchronization points in this RSYNC_LENGTH byte

@@ -1678,10 +1717,28 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)

return syncPoint;

/* Initialize the loop variables. */

- if (mtctx->inBuff.filled >= RSYNC_LENGTH) {

- /* We have enough bytes buffered to initialize the hash.

+ if (mtctx->inBuff.filled < RSYNC_MIN_BLOCK_SIZE) {

+ /* We don't need to scan the first RSYNC_MIN_BLOCK_SIZE positions

+ * because they can't possibly be a sync point. So we can start

+ * part way through the input buffer.

+ */

+ pos = RSYNC_MIN_BLOCK_SIZE - mtctx->inBuff.filled;

+ if (pos >= RSYNC_LENGTH) {

+ prev = istart + pos - RSYNC_LENGTH;

+ hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH);

+ } else {

+ assert(mtctx->inBuff.filled >= RSYNC_LENGTH);

+ prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH;

+ hash = ZSTD_rollingHash_compute(prev + pos, (RSYNC_LENGTH - pos));

+ hash = ZSTD_rollingHash_append(hash, istart, pos);

+ }

+ } else {

+ /* We have enough bytes buffered to initialize the hash,

+ * and are have processed enough bytes to find a sync point.

* Start scanning at the beginning of the input.

+ assert(mtctx->inBuff.filled >= RSYNC_MIN_BLOCK_SIZE);

+ assert(RSYNC_MIN_BLOCK_SIZE >= RSYNC_LENGTH);

pos = 0;

prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH;

hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH);

@@ -1695,16 +1752,6 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)

syncPoint.flush = 1;

return syncPoint;

}

- } else {

- /* We don't have enough bytes buffered to initialize the hash, but

- * we know we have at least RSYNC_LENGTH bytes total.

- * Start scanning after the first RSYNC_LENGTH bytes less the bytes

- * already buffered.

- */

- pos = RSYNC_LENGTH - mtctx->inBuff.filled;

- prev = (BYTE const*)mtctx->inBuff.buffer.start - pos;

- hash = ZSTD_rollingHash_compute(mtctx->inBuff.buffer.start, mtctx->inBuff.filled);

- hash = ZSTD_rollingHash_append(hash, istart, pos);

}

/* Starting with the hash of the previous RSYNC_LENGTH bytes, roll

* through the input. If we hit a synchronization point, then cut the

@@ -1716,8 +1763,9 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)

for (; pos < syncPoint.toLoad; ++pos) {

BYTE const toRemove = pos < RSYNC_LENGTH ? prev[pos] : istart[pos - RSYNC_LENGTH];

- /* if (pos >= RSYNC_LENGTH) assert(ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash); */

+ assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);

hash = ZSTD_rollingHash_rotate(hash, toRemove, istart[pos], primePower);

+ assert(mtctx->inBuff.filled + pos >= RSYNC_MIN_BLOCK_SIZE);

if ((hash & hitMask) == hitMask) {

syncPoint.toLoad = pos + 1;

syncPoint.flush = 1;