summaryrefslogtreecommitdiff
path: root/contrib/experimental_dict_builders/benchmarkDictBuilder/benchmark.c
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/experimental_dict_builders/benchmarkDictBuilder/benchmark.c')
-rw-r--r--contrib/experimental_dict_builders/benchmarkDictBuilder/benchmark.c442
1 files changed, 0 insertions, 442 deletions
diff --git a/contrib/experimental_dict_builders/benchmarkDictBuilder/benchmark.c b/contrib/experimental_dict_builders/benchmarkDictBuilder/benchmark.c
deleted file mode 100644
index cd943797bdead..0000000000000
--- a/contrib/experimental_dict_builders/benchmarkDictBuilder/benchmark.c
+++ /dev/null
@@ -1,442 +0,0 @@
-#include <stdio.h> /* fprintf */
-#include <stdlib.h> /* malloc, free, qsort */
-#include <string.h> /* strcmp, strlen */
-#include <errno.h> /* errno */
-#include <ctype.h>
-#include <time.h>
-#include "random.h"
-#include "dictBuilder.h"
-#include "zstd_internal.h" /* includes zstd.h */
-#include "io.h"
-#include "util.h"
-#include "zdict.h"
-
-
-
-/*-*************************************
-* Console display
-***************************************/
-#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
-#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
-
-static const U64 g_refreshRate = SEC_TO_MICRO / 6;
-static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
-
-#define DISPLAYUPDATE(l, ...) { if (displayLevel>=l) { \
- if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (displayLevel>=4)) \
- { g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
- if (displayLevel>=4) fflush(stderr); } } }
-
-
-/*-*************************************
-* Exceptions
-***************************************/
-#ifndef DEBUG
-# define DEBUG 0
-#endif
-#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
-#define EXM_THROW(error, ...) \
-{ \
- DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
- DISPLAY("Error %i : ", error); \
- DISPLAY(__VA_ARGS__); \
- DISPLAY("\n"); \
- exit(error); \
-}
-
-
-/*-*************************************
-* Constants
-***************************************/
-static const unsigned g_defaultMaxDictSize = 110 KB;
-#define DEFAULT_CLEVEL 3
-#define DEFAULT_DISPLAYLEVEL 2
-
-
-/*-*************************************
-* Struct
-***************************************/
-typedef struct {
- const void* dictBuffer;
- size_t dictSize;
-} dictInfo;
-
-
-/*-*************************************
-* Dictionary related operations
-***************************************/
-/** createDictFromFiles() :
- * Based on type of param given, train dictionary using the corresponding algorithm
- * @return dictInfo containing dictionary buffer and dictionary size
- */
-dictInfo* createDictFromFiles(sampleInfo *info, unsigned maxDictSize,
- ZDICT_random_params_t *randomParams, ZDICT_cover_params_t *coverParams,
- ZDICT_legacy_params_t *legacyParams, ZDICT_fastCover_params_t *fastParams) {
- unsigned const displayLevel = randomParams ? randomParams->zParams.notificationLevel :
- coverParams ? coverParams->zParams.notificationLevel :
- legacyParams ? legacyParams->zParams.notificationLevel :
- fastParams ? fastParams->zParams.notificationLevel :
- DEFAULT_DISPLAYLEVEL; /* no dict */
- void* const dictBuffer = malloc(maxDictSize);
-
- dictInfo* dInfo = NULL;
-
- /* Checks */
- if (!dictBuffer)
- EXM_THROW(12, "not enough memory for trainFromFiles"); /* should not happen */
-
- { size_t dictSize;
- if(randomParams) {
- dictSize = ZDICT_trainFromBuffer_random(dictBuffer, maxDictSize, info->srcBuffer,
- info->samplesSizes, info->nbSamples, *randomParams);
- }else if(coverParams) {
- /* Run the optimize version if either k or d is not provided */
- if (!coverParams->d || !coverParams->k){
- dictSize = ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, maxDictSize, info->srcBuffer,
- info->samplesSizes, info->nbSamples, coverParams);
- } else {
- dictSize = ZDICT_trainFromBuffer_cover(dictBuffer, maxDictSize, info->srcBuffer,
- info->samplesSizes, info->nbSamples, *coverParams);
- }
- } else if(legacyParams) {
- dictSize = ZDICT_trainFromBuffer_legacy(dictBuffer, maxDictSize, info->srcBuffer,
- info->samplesSizes, info->nbSamples, *legacyParams);
- } else if(fastParams) {
- /* Run the optimize version if either k or d is not provided */
- if (!fastParams->d || !fastParams->k) {
- dictSize = ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, maxDictSize, info->srcBuffer,
- info->samplesSizes, info->nbSamples, fastParams);
- } else {
- dictSize = ZDICT_trainFromBuffer_fastCover(dictBuffer, maxDictSize, info->srcBuffer,
- info->samplesSizes, info->nbSamples, *fastParams);
- }
- } else {
- dictSize = 0;
- }
- if (ZDICT_isError(dictSize)) {
- DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */
- free(dictBuffer);
- return dInfo;
- }
- dInfo = (dictInfo *)malloc(sizeof(dictInfo));
- dInfo->dictBuffer = dictBuffer;
- dInfo->dictSize = dictSize;
- }
- return dInfo;
-}
-
-
-/** compressWithDict() :
- * Compress samples from sample buffer given dictionary stored on dictionary buffer and compression level
- * @return compression ratio
- */
-double compressWithDict(sampleInfo *srcInfo, dictInfo* dInfo, int compressionLevel, int displayLevel) {
- /* Local variables */
- size_t totalCompressedSize = 0;
- size_t totalOriginalSize = 0;
- const unsigned hasDict = dInfo->dictSize > 0 ? 1 : 0;
- double cRatio;
- size_t dstCapacity;
- int i;
-
- /* Pointers */
- ZSTD_CDict *cdict = NULL;
- ZSTD_CCtx* cctx = NULL;
- size_t *offsets = NULL;
- void* dst = NULL;
-
- /* Allocate dst with enough space to compress the maximum sized sample */
- {
- size_t maxSampleSize = 0;
- for (i = 0; i < srcInfo->nbSamples; i++) {
- maxSampleSize = MAX(srcInfo->samplesSizes[i], maxSampleSize);
- }
- dstCapacity = ZSTD_compressBound(maxSampleSize);
- dst = malloc(dstCapacity);
- }
-
- /* Calculate offset for each sample */
- offsets = (size_t *)malloc((srcInfo->nbSamples + 1) * sizeof(size_t));
- offsets[0] = 0;
- for (i = 1; i <= srcInfo->nbSamples; i++) {
- offsets[i] = offsets[i - 1] + srcInfo->samplesSizes[i - 1];
- }
-
- /* Create the cctx */
- cctx = ZSTD_createCCtx();
- if(!cctx || !dst) {
- cRatio = -1;
- goto _cleanup;
- }
-
- /* Create CDict if there's a dictionary stored on buffer */
- if (hasDict) {
- cdict = ZSTD_createCDict(dInfo->dictBuffer, dInfo->dictSize, compressionLevel);
- if(!cdict) {
- cRatio = -1;
- goto _cleanup;
- }
- }
-
- /* Compress each sample and sum their sizes*/
- const BYTE *const samples = (const BYTE *)srcInfo->srcBuffer;
- for (i = 0; i < srcInfo->nbSamples; i++) {
- size_t compressedSize;
- if(hasDict) {
- compressedSize = ZSTD_compress_usingCDict(cctx, dst, dstCapacity, samples + offsets[i], srcInfo->samplesSizes[i], cdict);
- } else {
- compressedSize = ZSTD_compressCCtx(cctx, dst, dstCapacity,samples + offsets[i], srcInfo->samplesSizes[i], compressionLevel);
- }
- if (ZSTD_isError(compressedSize)) {
- cRatio = -1;
- goto _cleanup;
- }
- totalCompressedSize += compressedSize;
- }
-
- /* Sum original sizes */
- for (i = 0; i<srcInfo->nbSamples; i++) {
- totalOriginalSize += srcInfo->samplesSizes[i];
- }
-
- /* Calculate compression ratio */
- DISPLAYLEVEL(2, "original size is %lu\n", totalOriginalSize);
- DISPLAYLEVEL(2, "compressed size is %lu\n", totalCompressedSize);
- cRatio = (double)totalOriginalSize/(double)totalCompressedSize;
-
-_cleanup:
- free(dst);
- free(offsets);
- ZSTD_freeCCtx(cctx);
- ZSTD_freeCDict(cdict);
- return cRatio;
-}
-
-
-/** FreeDictInfo() :
- * Free memory allocated for dictInfo
- */
-void freeDictInfo(dictInfo* info) {
- if (!info) return;
- if (info->dictBuffer) free((void*)(info->dictBuffer));
- free(info);
-}
-
-
-
-/*-********************************************************
- * Benchmarking functions
-**********************************************************/
-/** benchmarkDictBuilder() :
- * Measure how long a dictionary builder takes and compression ratio with the dictionary built
- * @return 0 if benchmark successfully, 1 otherwise
- */
-int benchmarkDictBuilder(sampleInfo *srcInfo, unsigned maxDictSize, ZDICT_random_params_t *randomParam,
- ZDICT_cover_params_t *coverParam, ZDICT_legacy_params_t *legacyParam,
- ZDICT_fastCover_params_t *fastParam) {
- /* Local variables */
- const unsigned displayLevel = randomParam ? randomParam->zParams.notificationLevel :
- coverParam ? coverParam->zParams.notificationLevel :
- legacyParam ? legacyParam->zParams.notificationLevel :
- fastParam ? fastParam->zParams.notificationLevel:
- DEFAULT_DISPLAYLEVEL; /* no dict */
- const char* name = randomParam ? "RANDOM" :
- coverParam ? "COVER" :
- legacyParam ? "LEGACY" :
- fastParam ? "FAST":
- "NODICT"; /* no dict */
- const unsigned cLevel = randomParam ? randomParam->zParams.compressionLevel :
- coverParam ? coverParam->zParams.compressionLevel :
- legacyParam ? legacyParam->zParams.compressionLevel :
- fastParam ? fastParam->zParams.compressionLevel:
- DEFAULT_CLEVEL; /* no dict */
- int result = 0;
-
- /* Calculate speed */
- const UTIL_time_t begin = UTIL_getTime();
- dictInfo* dInfo = createDictFromFiles(srcInfo, maxDictSize, randomParam, coverParam, legacyParam, fastParam);
- const U64 timeMicro = UTIL_clockSpanMicro(begin);
- const double timeSec = timeMicro / (double)SEC_TO_MICRO;
- if (!dInfo) {
- DISPLAYLEVEL(1, "%s does not train successfully\n", name);
- result = 1;
- goto _cleanup;
- }
- DISPLAYLEVEL(1, "%s took %f seconds to execute \n", name, timeSec);
-
- /* Calculate compression ratio */
- const double cRatio = compressWithDict(srcInfo, dInfo, cLevel, displayLevel);
- if (cRatio < 0) {
- DISPLAYLEVEL(1, "Compressing with %s dictionary does not work\n", name);
- result = 1;
- goto _cleanup;
-
- }
- DISPLAYLEVEL(1, "Compression ratio with %s dictionary is %f\n", name, cRatio);
-
-_cleanup:
- freeDictInfo(dInfo);
- return result;
-}
-
-
-
-int main(int argCount, const char* argv[])
-{
- const int displayLevel = DEFAULT_DISPLAYLEVEL;
- const char* programName = argv[0];
- int result = 0;
-
- /* Initialize arguments to default values */
- unsigned k = 200;
- unsigned d = 8;
- unsigned f;
- unsigned accel;
- unsigned i;
- const unsigned cLevel = DEFAULT_CLEVEL;
- const unsigned dictID = 0;
- const unsigned maxDictSize = g_defaultMaxDictSize;
-
- /* Initialize table to store input files */
- const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*));
- unsigned filenameIdx = 0;
-
- char* fileNamesBuf = NULL;
- unsigned fileNamesNb = filenameIdx;
- const int followLinks = 0;
- const char** extendedFileList = NULL;
-
- /* Parse arguments */
- for (i = 1; i < argCount; i++) {
- const char* argument = argv[i];
- if (longCommandWArg(&argument, "in=")) {
- filenameTable[filenameIdx] = argument;
- filenameIdx++;
- continue;
- }
- DISPLAYLEVEL(1, "benchmark: Incorrect parameters\n");
- return 1;
- }
-
- /* Get the list of all files recursively (because followLinks==0)*/
- extendedFileList = UTIL_createFileList(filenameTable, filenameIdx, &fileNamesBuf,
- &fileNamesNb, followLinks);
- if (extendedFileList) {
- unsigned u;
- for (u=0; u<fileNamesNb; u++) DISPLAYLEVEL(4, "%u %s\n", u, extendedFileList[u]);
- free((void*)filenameTable);
- filenameTable = extendedFileList;
- filenameIdx = fileNamesNb;
- }
-
- /* get sampleInfo */
- size_t blockSize = 0;
- sampleInfo* srcInfo= getSampleInfo(filenameTable,
- filenameIdx, blockSize, maxDictSize, displayLevel);
-
- /* set up zParams */
- ZDICT_params_t zParams;
- zParams.compressionLevel = cLevel;
- zParams.notificationLevel = displayLevel;
- zParams.dictID = dictID;
-
- /* with no dict */
- {
- const int noDictResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, NULL);
- if(noDictResult) {
- result = 1;
- goto _cleanup;
- }
- }
-
- /* for random */
- {
- ZDICT_random_params_t randomParam;
- randomParam.zParams = zParams;
- randomParam.k = k;
- const int randomResult = benchmarkDictBuilder(srcInfo, maxDictSize, &randomParam, NULL, NULL, NULL);
- DISPLAYLEVEL(2, "k=%u\n", randomParam.k);
- if(randomResult) {
- result = 1;
- goto _cleanup;
- }
- }
-
- /* for legacy */
- {
- ZDICT_legacy_params_t legacyParam;
- legacyParam.zParams = zParams;
- legacyParam.selectivityLevel = 9;
- const int legacyResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, &legacyParam, NULL);
- DISPLAYLEVEL(2, "selectivityLevel=%u\n", legacyParam.selectivityLevel);
- if(legacyResult) {
- result = 1;
- goto _cleanup;
- }
- }
-
- /* for cover */
- {
- /* for cover (optimizing k and d) */
- ZDICT_cover_params_t coverParam;
- memset(&coverParam, 0, sizeof(coverParam));
- coverParam.zParams = zParams;
- coverParam.splitPoint = 1.0;
- coverParam.steps = 40;
- coverParam.nbThreads = 1;
- const int coverOptResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, &coverParam, NULL, NULL);
- DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\nsplit=%u\n", coverParam.k, coverParam.d, coverParam.steps, (unsigned)(coverParam.splitPoint * 100));
- if(coverOptResult) {
- result = 1;
- goto _cleanup;
- }
-
- /* for cover (with k and d provided) */
- const int coverResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, &coverParam, NULL, NULL);
- DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\nsplit=%u\n", coverParam.k, coverParam.d, coverParam.steps, (unsigned)(coverParam.splitPoint * 100));
- if(coverResult) {
- result = 1;
- goto _cleanup;
- }
-
- }
-
- /* for fastCover */
- for (f = 15; f < 25; f++){
- DISPLAYLEVEL(2, "current f is %u\n", f);
- for (accel = 1; accel < 11; accel++) {
- DISPLAYLEVEL(2, "current accel is %u\n", accel);
- /* for fastCover (optimizing k and d) */
- ZDICT_fastCover_params_t fastParam;
- memset(&fastParam, 0, sizeof(fastParam));
- fastParam.zParams = zParams;
- fastParam.f = f;
- fastParam.steps = 40;
- fastParam.nbThreads = 1;
- fastParam.accel = accel;
- const int fastOptResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, &fastParam);
- DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\naccel=%u\n", fastParam.k, fastParam.d, fastParam.f, fastParam.steps, (unsigned)(fastParam.splitPoint * 100), fastParam.accel);
- if(fastOptResult) {
- result = 1;
- goto _cleanup;
- }
-
- /* for fastCover (with k and d provided) */
- for (i = 0; i < 5; i++) {
- const int fastResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, &fastParam);
- DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\naccel=%u\n", fastParam.k, fastParam.d, fastParam.f, fastParam.steps, (unsigned)(fastParam.splitPoint * 100), fastParam.accel);
- if(fastResult) {
- result = 1;
- goto _cleanup;
- }
- }
- }
- }
-
-
- /* Free allocated memory */
-_cleanup:
- UTIL_freeFileList(extendedFileList, fileNamesBuf);
- freeSampleInfo(srcInfo);
- return result;
-}