diff options
Diffstat (limited to 'lib/dictBuilder/cover.c')
-rw-r--r-- | lib/dictBuilder/cover.c | 55 |
1 files changed, 33 insertions, 22 deletions
diff --git a/lib/dictBuilder/cover.c b/lib/dictBuilder/cover.c index 3d445ae8b81d..efdffddbf930 100644 --- a/lib/dictBuilder/cover.c +++ b/lib/dictBuilder/cover.c @@ -5,6 +5,7 @@ * This source code is licensed under both the BSD-style license (found in the * LICENSE file in the root directory of this source tree) and the GPLv2 (found * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. */ /* ***************************************************************************** @@ -382,7 +383,7 @@ static void COVER_group(COVER_ctx_t *ctx, const void *group, typedef struct { U32 begin; U32 end; - double score; + U32 score; } COVER_segment_t; /** @@ -479,11 +480,16 @@ static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs, * Check the validity of the parameters. * Returns non-zero if the parameters are valid and 0 otherwise. */ -static int COVER_checkParameters(ZDICT_cover_params_t parameters) { +static int COVER_checkParameters(ZDICT_cover_params_t parameters, + size_t maxDictSize) { /* k and d are required parameters */ if (parameters.d == 0 || parameters.k == 0) { return 0; } + /* k <= maxDictSize */ + if (parameters.k > maxDictSize) { + return 0; + } /* d <= k */ if (parameters.d > parameters.k) { return 0; @@ -622,9 +628,13 @@ static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs, /* Select a segment */ COVER_segment_t segment = COVER_selectSegment( ctx, freqs, activeDmers, epochBegin, epochEnd, parameters); - /* Trim the segment if necessary and if it is empty then we are done */ + /* If the segment covers no dmers, then we are out of content */ + if (segment.score == 0) { + break; + } + /* Trim the segment if necessary and if it is too small then we are done */ segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail); - if (segmentSize == 0) { + if (segmentSize < parameters.d) { break; } /* We fill the dictionary from the back to allow the best segments to be @@ -648,7 +658,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover( COVER_ctx_t ctx; COVER_map_t activeDmers; /* Checks */ - if (!COVER_checkParameters(parameters)) { + if (!COVER_checkParameters(parameters, dictBufferCapacity)) { DISPLAYLEVEL(1, "Cover parameters incorrect\n"); return ERROR(GENERIC); } @@ -701,8 +711,8 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover( * compiled with multithreaded support. */ typedef struct COVER_best_s { - pthread_mutex_t mutex; - pthread_cond_t cond; + ZSTD_pthread_mutex_t mutex; + ZSTD_pthread_cond_t cond; size_t liveJobs; void *dict; size_t dictSize; @@ -715,8 +725,8 @@ typedef struct COVER_best_s { */ static void COVER_best_init(COVER_best_t *best) { if (best==NULL) return; /* compatible with init on NULL */ - (void)pthread_mutex_init(&best->mutex, NULL); - (void)pthread_cond_init(&best->cond, NULL); + (void)ZSTD_pthread_mutex_init(&best->mutex, NULL); + (void)ZSTD_pthread_cond_init(&best->cond, NULL); best->liveJobs = 0; best->dict = NULL; best->dictSize = 0; @@ -731,11 +741,11 @@ static void COVER_best_wait(COVER_best_t *best) { if (!best) { return; } - pthread_mutex_lock(&best->mutex); + ZSTD_pthread_mutex_lock(&best->mutex); while (best->liveJobs != 0) { - pthread_cond_wait(&best->cond, &best->mutex); + ZSTD_pthread_cond_wait(&best->cond, &best->mutex); } - pthread_mutex_unlock(&best->mutex); + ZSTD_pthread_mutex_unlock(&best->mutex); } /** @@ -749,8 +759,8 @@ static void COVER_best_destroy(COVER_best_t *best) { if (best->dict) { free(best->dict); } - pthread_mutex_destroy(&best->mutex); - pthread_cond_destroy(&best->cond); + ZSTD_pthread_mutex_destroy(&best->mutex); + ZSTD_pthread_cond_destroy(&best->cond); } /** @@ -761,9 +771,9 @@ static void COVER_best_start(COVER_best_t *best) { if (!best) { return; } - pthread_mutex_lock(&best->mutex); + ZSTD_pthread_mutex_lock(&best->mutex); ++best->liveJobs; - pthread_mutex_unlock(&best->mutex); + ZSTD_pthread_mutex_unlock(&best->mutex); } /** @@ -779,7 +789,7 @@ static void COVER_best_finish(COVER_best_t *best, size_t compressedSize, } { size_t liveJobs; - pthread_mutex_lock(&best->mutex); + ZSTD_pthread_mutex_lock(&best->mutex); --best->liveJobs; liveJobs = best->liveJobs; /* If the new dictionary is better */ @@ -802,9 +812,9 @@ static void COVER_best_finish(COVER_best_t *best, size_t compressedSize, best->parameters = parameters; best->compressedSize = compressedSize; } - pthread_mutex_unlock(&best->mutex); + ZSTD_pthread_mutex_unlock(&best->mutex); if (liveJobs == 0) { - pthread_cond_broadcast(&best->cond); + ZSTD_pthread_cond_broadcast(&best->cond); } } } @@ -884,7 +894,7 @@ static void COVER_tryParameters(void *opaque) { goto _compressCleanup; } /* Compress each sample and sum their sizes (or error) */ - totalCompressedSize = 0; + totalCompressedSize = dictBufferCapacity; for (i = 0; i < ctx->nbSamples; ++i) { const size_t size = ZSTD_compress_usingCDict( cctx, dst, dstCapacity, ctx->samples + ctx->offsets[i], @@ -960,7 +970,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( /* Initialization */ COVER_best_init(&best); /* Turn down global display level to clean up display at level 2 and below */ - g_displayLevel = parameters->zParams.notificationLevel - 1; + g_displayLevel = displayLevel == 0 ? 0 : displayLevel - 1; /* Loop through d first because each new value needs a new context */ LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n", kIterations); @@ -994,8 +1004,9 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( data->parameters.k = k; data->parameters.d = d; data->parameters.steps = kSteps; + data->parameters.zParams.notificationLevel = g_displayLevel; /* Check the parameters */ - if (!COVER_checkParameters(data->parameters)) { + if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) { DISPLAYLEVEL(1, "Cover parameters incorrect\n"); free(data); continue; |