diff options
| author | Conrad Meyer <cem@FreeBSD.org> | 2018-10-22 20:00:30 +0000 | 
|---|---|---|
| committer | Conrad Meyer <cem@FreeBSD.org> | 2018-10-22 20:00:30 +0000 | 
| commit | 706cfae467a217cc786fd96a72cc2e33c61987e4 (patch) | |
| tree | e7673904660df47b5abd9a1c33cf982a514dac66 /programs/dibio.c | |
| parent | 42239e68a5cfba3b37b054425eace8d14e0844e3 (diff) | |
Diffstat (limited to 'programs/dibio.c')
| -rw-r--r-- | programs/dibio.c | 55 | 
1 files changed, 39 insertions, 16 deletions
diff --git a/programs/dibio.c b/programs/dibio.c index 112259ddcd05..d3fd8cc053de 100644 --- a/programs/dibio.c +++ b/programs/dibio.c @@ -27,6 +27,7 @@  #include <string.h>         /* memset */  #include <stdio.h>          /* fprintf, fopen, ftello64 */  #include <errno.h>          /* errno */ +#include <assert.h>  #include "mem.h"            /* read */  #include "error_private.h" @@ -43,6 +44,7 @@  #define SAMPLESIZE_MAX (128 KB)  #define MEMMULT 11    /* rough estimation : memory cost to analyze 1 byte of sample */  #define COVER_MEMMULT 9    /* rough estimation : memory cost to analyze 1 byte of sample */ +#define FASTCOVER_MEMMULT 1    /* rough estimation : memory cost to analyze 1 byte of sample */  static const size_t g_maxMemory = (sizeof(size_t) == 4) ? (2 GB - 64 MB) : ((size_t)(512 MB) << sizeof(size_t));  #define NOISELENGTH 32 @@ -82,10 +84,6 @@ static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;  /* ********************************************************  *  Helper functions  **********************************************************/ -unsigned DiB_isError(size_t errorCode) { return ERR_isError(errorCode); } - -const char* DiB_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); } -  #undef MIN  #define MIN(a,b)    ((a) < (b) ? (a) : (b)) @@ -165,6 +163,7 @@ static U32 DiB_rand(U32* src)  static void DiB_shuffle(const char** fileNamesTable, unsigned nbFiles) {      U32 seed = 0xFD2FB528;      unsigned i; +    assert(nbFiles >= 1);      for (i = nbFiles - 1; i > 0; --i) {          unsigned const j = DiB_rand(&seed) % (i + 1);          const char* const tmp = fileNamesTable[j]; @@ -269,16 +268,19 @@ size_t ZDICT_trainFromBuffer_unsafe_legacy(void* dictBuffer, size_t dictBufferCa  int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,                         const char** fileNamesTable, unsigned nbFiles, size_t chunkSize, -                       ZDICT_legacy_params_t *params, ZDICT_cover_params_t *coverParams, -                       int optimizeCover) +                       ZDICT_legacy_params_t* params, ZDICT_cover_params_t* coverParams, +                       ZDICT_fastCover_params_t* fastCoverParams, int optimize)  {      unsigned const displayLevel = params ? params->zParams.notificationLevel :                          coverParams ? coverParams->zParams.notificationLevel : +                        fastCoverParams ? fastCoverParams->zParams.notificationLevel :                          0;   /* should never happen */      void* const dictBuffer = malloc(maxDictSize);      fileStats const fs = DiB_fileStats(fileNamesTable, nbFiles, chunkSize, displayLevel);      size_t* const sampleSizes = (size_t*)malloc(fs.nbSamples * sizeof(size_t)); -    size_t const memMult = params ? MEMMULT : COVER_MEMMULT; +    size_t const memMult = params ? MEMMULT : +                           coverParams ? COVER_MEMMULT: +                           FASTCOVER_MEMMULT;      size_t const maxMem =  DiB_findMaxMem(fs.totalSizeToLoad * memMult) / memMult;      size_t loadedSize = (size_t) MIN ((unsigned long long)maxMem, fs.totalSizeToLoad);      void* const srcBuffer = malloc(loadedSize+NOISELENGTH); @@ -310,7 +312,8 @@ int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,      /* Load input buffer */      DISPLAYLEVEL(3, "Shuffling input files\n");      DiB_shuffle(fileNamesTable, nbFiles); -    nbFiles = DiB_loadFiles(srcBuffer, &loadedSize, sampleSizes, fs.nbSamples, fileNamesTable, nbFiles, chunkSize, displayLevel); + +    DiB_loadFiles(srcBuffer, &loadedSize, sampleSizes, fs.nbSamples, fileNamesTable, nbFiles, chunkSize, displayLevel);      {   size_t dictSize;          if (params) { @@ -318,16 +321,36 @@ int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,              dictSize = ZDICT_trainFromBuffer_unsafe_legacy(dictBuffer, maxDictSize,                                                             srcBuffer, sampleSizes, fs.nbSamples,                                                             *params); -        } else if (optimizeCover) { -            dictSize = ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, maxDictSize, -                                                           srcBuffer, sampleSizes, fs.nbSamples, -                                                           coverParams); -            if (!ZDICT_isError(dictSize)) { -                DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\n", coverParams->k, coverParams->d, coverParams->steps); +        } else if (coverParams) { +            if (optimize) { +              dictSize = ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, maxDictSize, +                                                             srcBuffer, sampleSizes, fs.nbSamples, +                                                             coverParams); +              if (!ZDICT_isError(dictSize)) { +                  unsigned splitPercentage = (unsigned)(coverParams->splitPoint * 100); +                  DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\nsplit=%u\n", coverParams->k, coverParams->d, +                              coverParams->steps, splitPercentage); +              } +            } else { +              dictSize = ZDICT_trainFromBuffer_cover(dictBuffer, maxDictSize, srcBuffer, +                                                     sampleSizes, fs.nbSamples, *coverParams);              }          } else { -            dictSize = ZDICT_trainFromBuffer_cover(dictBuffer, maxDictSize, srcBuffer, -                                                   sampleSizes, fs.nbSamples, *coverParams); +            assert(fastCoverParams != NULL); +            if (optimize) { +              dictSize = ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, maxDictSize, +                                                              srcBuffer, sampleSizes, fs.nbSamples, +                                                              fastCoverParams); +              if (!ZDICT_isError(dictSize)) { +                unsigned splitPercentage = (unsigned)(fastCoverParams->splitPoint * 100); +                DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\naccel=%u\n", fastCoverParams->k, +                            fastCoverParams->d, fastCoverParams->f, fastCoverParams->steps, splitPercentage, +                            fastCoverParams->accel); +              } +            } else { +              dictSize = ZDICT_trainFromBuffer_fastCover(dictBuffer, maxDictSize, srcBuffer, +                                                        sampleSizes, fs.nbSamples, *fastCoverParams); +            }          }          if (ZDICT_isError(dictSize)) {              DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize));   /* should not happen */  | 
