summaryrefslogtreecommitdiff
path: root/lib/dictBuilder/cover.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/dictBuilder/cover.c')
-rw-r--r--lib/dictBuilder/cover.c55
1 files changed, 33 insertions, 22 deletions
diff --git a/lib/dictBuilder/cover.c b/lib/dictBuilder/cover.c
index 3d445ae8b81d..efdffddbf930 100644
--- a/lib/dictBuilder/cover.c
+++ b/lib/dictBuilder/cover.c
@@ -5,6 +5,7 @@
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
*/
/* *****************************************************************************
@@ -382,7 +383,7 @@ static void COVER_group(COVER_ctx_t *ctx, const void *group,
typedef struct {
U32 begin;
U32 end;
- double score;
+ U32 score;
} COVER_segment_t;
/**
@@ -479,11 +480,16 @@ static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
* Check the validity of the parameters.
* Returns non-zero if the parameters are valid and 0 otherwise.
*/
-static int COVER_checkParameters(ZDICT_cover_params_t parameters) {
+static int COVER_checkParameters(ZDICT_cover_params_t parameters,
+ size_t maxDictSize) {
/* k and d are required parameters */
if (parameters.d == 0 || parameters.k == 0) {
return 0;
}
+ /* k <= maxDictSize */
+ if (parameters.k > maxDictSize) {
+ return 0;
+ }
/* d <= k */
if (parameters.d > parameters.k) {
return 0;
@@ -622,9 +628,13 @@ static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
/* Select a segment */
COVER_segment_t segment = COVER_selectSegment(
ctx, freqs, activeDmers, epochBegin, epochEnd, parameters);
- /* Trim the segment if necessary and if it is empty then we are done */
+ /* If the segment covers no dmers, then we are out of content */
+ if (segment.score == 0) {
+ break;
+ }
+ /* Trim the segment if necessary and if it is too small then we are done */
segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail);
- if (segmentSize == 0) {
+ if (segmentSize < parameters.d) {
break;
}
/* We fill the dictionary from the back to allow the best segments to be
@@ -648,7 +658,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
COVER_ctx_t ctx;
COVER_map_t activeDmers;
/* Checks */
- if (!COVER_checkParameters(parameters)) {
+ if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
DISPLAYLEVEL(1, "Cover parameters incorrect\n");
return ERROR(GENERIC);
}
@@ -701,8 +711,8 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
* compiled with multithreaded support.
*/
typedef struct COVER_best_s {
- pthread_mutex_t mutex;
- pthread_cond_t cond;
+ ZSTD_pthread_mutex_t mutex;
+ ZSTD_pthread_cond_t cond;
size_t liveJobs;
void *dict;
size_t dictSize;
@@ -715,8 +725,8 @@ typedef struct COVER_best_s {
*/
static void COVER_best_init(COVER_best_t *best) {
if (best==NULL) return; /* compatible with init on NULL */
- (void)pthread_mutex_init(&best->mutex, NULL);
- (void)pthread_cond_init(&best->cond, NULL);
+ (void)ZSTD_pthread_mutex_init(&best->mutex, NULL);
+ (void)ZSTD_pthread_cond_init(&best->cond, NULL);
best->liveJobs = 0;
best->dict = NULL;
best->dictSize = 0;
@@ -731,11 +741,11 @@ static void COVER_best_wait(COVER_best_t *best) {
if (!best) {
return;
}
- pthread_mutex_lock(&best->mutex);
+ ZSTD_pthread_mutex_lock(&best->mutex);
while (best->liveJobs != 0) {
- pthread_cond_wait(&best->cond, &best->mutex);
+ ZSTD_pthread_cond_wait(&best->cond, &best->mutex);
}
- pthread_mutex_unlock(&best->mutex);
+ ZSTD_pthread_mutex_unlock(&best->mutex);
}
/**
@@ -749,8 +759,8 @@ static void COVER_best_destroy(COVER_best_t *best) {
if (best->dict) {
free(best->dict);
}
- pthread_mutex_destroy(&best->mutex);
- pthread_cond_destroy(&best->cond);
+ ZSTD_pthread_mutex_destroy(&best->mutex);
+ ZSTD_pthread_cond_destroy(&best->cond);
}
/**
@@ -761,9 +771,9 @@ static void COVER_best_start(COVER_best_t *best) {
if (!best) {
return;
}
- pthread_mutex_lock(&best->mutex);
+ ZSTD_pthread_mutex_lock(&best->mutex);
++best->liveJobs;
- pthread_mutex_unlock(&best->mutex);
+ ZSTD_pthread_mutex_unlock(&best->mutex);
}
/**
@@ -779,7 +789,7 @@ static void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
}
{
size_t liveJobs;
- pthread_mutex_lock(&best->mutex);
+ ZSTD_pthread_mutex_lock(&best->mutex);
--best->liveJobs;
liveJobs = best->liveJobs;
/* If the new dictionary is better */
@@ -802,9 +812,9 @@ static void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
best->parameters = parameters;
best->compressedSize = compressedSize;
}
- pthread_mutex_unlock(&best->mutex);
+ ZSTD_pthread_mutex_unlock(&best->mutex);
if (liveJobs == 0) {
- pthread_cond_broadcast(&best->cond);
+ ZSTD_pthread_cond_broadcast(&best->cond);
}
}
}
@@ -884,7 +894,7 @@ static void COVER_tryParameters(void *opaque) {
goto _compressCleanup;
}
/* Compress each sample and sum their sizes (or error) */
- totalCompressedSize = 0;
+ totalCompressedSize = dictBufferCapacity;
for (i = 0; i < ctx->nbSamples; ++i) {
const size_t size = ZSTD_compress_usingCDict(
cctx, dst, dstCapacity, ctx->samples + ctx->offsets[i],
@@ -960,7 +970,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
/* Initialization */
COVER_best_init(&best);
/* Turn down global display level to clean up display at level 2 and below */
- g_displayLevel = parameters->zParams.notificationLevel - 1;
+ g_displayLevel = displayLevel == 0 ? 0 : displayLevel - 1;
/* Loop through d first because each new value needs a new context */
LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n",
kIterations);
@@ -994,8 +1004,9 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
data->parameters.k = k;
data->parameters.d = d;
data->parameters.steps = kSteps;
+ data->parameters.zParams.notificationLevel = g_displayLevel;
/* Check the parameters */
- if (!COVER_checkParameters(data->parameters)) {
+ if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) {
DISPLAYLEVEL(1, "Cover parameters incorrect\n");
free(data);
continue;