summaryrefslogtreecommitdiff
path: root/contrib/experimental_dict_builders/randomDictBuilder/main.c
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/experimental_dict_builders/randomDictBuilder/main.c')
-rw-r--r--contrib/experimental_dict_builders/randomDictBuilder/main.c161
1 files changed, 0 insertions, 161 deletions
diff --git a/contrib/experimental_dict_builders/randomDictBuilder/main.c b/contrib/experimental_dict_builders/randomDictBuilder/main.c
deleted file mode 100644
index 3ad885746090..000000000000
--- a/contrib/experimental_dict_builders/randomDictBuilder/main.c
+++ /dev/null
@@ -1,161 +0,0 @@
-#include <stdio.h> /* fprintf */
-#include <stdlib.h> /* malloc, free, qsort */
-#include <string.h> /* strcmp, strlen */
-#include <errno.h> /* errno */
-#include <ctype.h>
-#include "random.h"
-#include "io.h"
-#include "util.h"
-#include "zdict.h"
-
-
-/*-*************************************
-* Console display
-***************************************/
-#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
-#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
-
-static const U64 g_refreshRate = SEC_TO_MICRO / 6;
-static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
-
-#define DISPLAYUPDATE(l, ...) { if (displayLevel>=l) { \
- if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (displayLevel>=4)) \
- { g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
- if (displayLevel>=4) fflush(stderr); } } }
-
-
-/*-*************************************
-* Exceptions
-***************************************/
-#ifndef DEBUG
-# define DEBUG 0
-#endif
-#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
-#define EXM_THROW(error, ...) \
-{ \
- DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
- DISPLAY("Error %i : ", error); \
- DISPLAY(__VA_ARGS__); \
- DISPLAY("\n"); \
- exit(error); \
-}
-
-
-/*-*************************************
-* Constants
-***************************************/
-static const unsigned g_defaultMaxDictSize = 110 KB;
-#define DEFAULT_CLEVEL 3
-#define DEFAULT_k 200
-#define DEFAULT_OUTPUTFILE "defaultDict"
-#define DEFAULT_DICTID 0
-
-
-
-/*-*************************************
-* RANDOM
-***************************************/
-int RANDOM_trainFromFiles(const char* dictFileName, sampleInfo *info,
- unsigned maxDictSize,
- ZDICT_random_params_t *params) {
- unsigned const displayLevel = params->zParams.notificationLevel;
- void* const dictBuffer = malloc(maxDictSize);
-
- int result = 0;
-
- /* Checks */
- if (!dictBuffer)
- EXM_THROW(12, "not enough memory for trainFromFiles"); /* should not happen */
-
- { size_t dictSize;
- dictSize = ZDICT_trainFromBuffer_random(dictBuffer, maxDictSize, info->srcBuffer,
- info->samplesSizes, info->nbSamples, *params);
- DISPLAYLEVEL(2, "k=%u\n", params->k);
- if (ZDICT_isError(dictSize)) {
- DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */
- result = 1;
- goto _done;
- }
- /* save dict */
- DISPLAYLEVEL(2, "Save dictionary of size %u into file %s \n", (U32)dictSize, dictFileName);
- saveDict(dictFileName, dictBuffer, dictSize);
- }
-
- /* clean up */
-_done:
- free(dictBuffer);
- return result;
-}
-
-
-
-int main(int argCount, const char* argv[])
-{
- int displayLevel = 2;
- const char* programName = argv[0];
- int operationResult = 0;
-
- /* Initialize arguments to default values */
- unsigned k = DEFAULT_k;
- const char* outputFile = DEFAULT_OUTPUTFILE;
- unsigned dictID = DEFAULT_DICTID;
- unsigned maxDictSize = g_defaultMaxDictSize;
-
- /* Initialize table to store input files */
- const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*));
- unsigned filenameIdx = 0;
-
- /* Parse arguments */
- for (int i = 1; i < argCount; i++) {
- const char* argument = argv[i];
- if (longCommandWArg(&argument, "k=")) { k = readU32FromChar(&argument); continue; }
- if (longCommandWArg(&argument, "dictID=")) { dictID = readU32FromChar(&argument); continue; }
- if (longCommandWArg(&argument, "maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; }
- if (longCommandWArg(&argument, "in=")) {
- filenameTable[filenameIdx] = argument;
- filenameIdx++;
- continue;
- }
- if (longCommandWArg(&argument, "out=")) {
- outputFile = argument;
- continue;
- }
- DISPLAYLEVEL(1, "Incorrect parameters\n");
- operationResult = 1;
- return operationResult;
- }
-
- char* fileNamesBuf = NULL;
- unsigned fileNamesNb = filenameIdx;
- int followLinks = 0; /* follow directory recursively */
- const char** extendedFileList = NULL;
- extendedFileList = UTIL_createFileList(filenameTable, filenameIdx, &fileNamesBuf,
- &fileNamesNb, followLinks);
- if (extendedFileList) {
- unsigned u;
- for (u=0; u<fileNamesNb; u++) DISPLAYLEVEL(4, "%u %s\n", u, extendedFileList[u]);
- free((void*)filenameTable);
- filenameTable = extendedFileList;
- filenameIdx = fileNamesNb;
- }
-
- size_t blockSize = 0;
-
- ZDICT_random_params_t params;
- ZDICT_params_t zParams;
- zParams.compressionLevel = DEFAULT_CLEVEL;
- zParams.notificationLevel = displayLevel;
- zParams.dictID = dictID;
- params.zParams = zParams;
- params.k = k;
-
- sampleInfo* info = getSampleInfo(filenameTable,
- filenameIdx, blockSize, maxDictSize, zParams.notificationLevel);
- operationResult = RANDOM_trainFromFiles(outputFile, info, maxDictSize, &params);
-
- /* Free allocated memory */
- UTIL_freeFileList(extendedFileList, fileNamesBuf);
- freeSampleInfo(info);
-
- return operationResult;
-}