summaryrefslogtreecommitdiff
path: root/tests/decodecorpus.c
diff options
context:
space:
mode:
Diffstat (limited to 'tests/decodecorpus.c')
-rw-r--r--tests/decodecorpus.c1932
1 files changed, 0 insertions, 1932 deletions
diff --git a/tests/decodecorpus.c b/tests/decodecorpus.c
deleted file mode 100644
index 91873ba46031..000000000000
--- a/tests/decodecorpus.c
+++ /dev/null
@@ -1,1932 +0,0 @@
-/*
- * Copyright (c) 2017-present, Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-#include <limits.h>
-#include <math.h>
-#include <stddef.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "util.h"
-#include "timefn.h" /* UTIL_clockSpanMicro, SEC_TO_MICRO, UTIL_TIME_INITIALIZER */
-#include "zstd.h"
-#include "zstd_internal.h"
-#include "mem.h"
-#define ZDICT_STATIC_LINKING_ONLY
-#include "zdict.h"
-
-/* Direct access to internal compression functions is required */
-#include "zstd_compress.c"
-
-#define XXH_STATIC_LINKING_ONLY
-#include "xxhash.h" /* XXH64 */
-
-#ifndef MIN
- #define MIN(a, b) ((a) < (b) ? (a) : (b))
-#endif
-
-#ifndef MAX_PATH
- #ifdef PATH_MAX
- #define MAX_PATH PATH_MAX
- #else
- #define MAX_PATH 256
- #endif
-#endif
-
-/*-************************************
-* DISPLAY Macros
-**************************************/
-#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
-#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
-static U32 g_displayLevel = 2;
-
-#define DISPLAYUPDATE(...) \
- do { \
- if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || \
- (g_displayLevel >= 4)) { \
- g_displayClock = UTIL_getTime(); \
- DISPLAY(__VA_ARGS__); \
- if (g_displayLevel >= 4) fflush(stderr); \
- } \
- } while (0)
-
-static const U64 g_refreshRate = SEC_TO_MICRO / 6;
-static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
-
-#define CHECKERR(code) \
- do { \
- if (ZSTD_isError(code)) { \
- DISPLAY("Error occurred while generating data: %s\n", \
- ZSTD_getErrorName(code)); \
- exit(1); \
- } \
- } while (0)
-
-/*-*******************************************************
-* Random function
-*********************************************************/
-static U32 RAND(U32* src)
-{
-#define RAND_rotl32(x,r) ((x << r) | (x >> (32 - r)))
- static const U32 prime1 = 2654435761U;
- static const U32 prime2 = 2246822519U;
- U32 rand32 = *src;
- rand32 *= prime1;
- rand32 += prime2;
- rand32 = RAND_rotl32(rand32, 13);
- *src = rand32;
- return RAND_rotl32(rand32, 27);
-#undef RAND_rotl32
-}
-
-#define DISTSIZE (8192)
-
-/* Write `size` bytes into `ptr`, all of which are less than or equal to `maxSymb` */
-static void RAND_bufferMaxSymb(U32* seed, void* ptr, size_t size, int maxSymb)
-{
- size_t i;
- BYTE* op = ptr;
-
- for (i = 0; i < size; i++) {
- op[i] = (BYTE) (RAND(seed) % (maxSymb + 1));
- }
-}
-
-/* Write `size` random bytes into `ptr` */
-static void RAND_buffer(U32* seed, void* ptr, size_t size)
-{
- size_t i;
- BYTE* op = ptr;
-
- for (i = 0; i + 4 <= size; i += 4) {
- MEM_writeLE32(op + i, RAND(seed));
- }
- for (; i < size; i++) {
- op[i] = RAND(seed) & 0xff;
- }
-}
-
-/* Write `size` bytes into `ptr` following the distribution `dist` */
-static void RAND_bufferDist(U32* seed, BYTE* dist, void* ptr, size_t size)
-{
- size_t i;
- BYTE* op = ptr;
-
- for (i = 0; i < size; i++) {
- op[i] = dist[RAND(seed) % DISTSIZE];
- }
-}
-
-/* Generate a random distribution where the frequency of each symbol follows a
- * geometric distribution defined by `weight`
- * `dist` should have size at least `DISTSIZE` */
-static void RAND_genDist(U32* seed, BYTE* dist, double weight)
-{
- size_t i = 0;
- size_t statesLeft = DISTSIZE;
- BYTE symb = (BYTE) (RAND(seed) % 256);
- BYTE step = (BYTE) ((RAND(seed) % 256) | 1); /* force it to be odd so it's relatively prime to 256 */
-
- while (i < DISTSIZE) {
- size_t states = ((size_t)(weight * statesLeft)) + 1;
- size_t j;
- for (j = 0; j < states && i < DISTSIZE; j++, i++) {
- dist[i] = symb;
- }
-
- symb += step;
- statesLeft -= states;
- }
-}
-
-/* Generates a random number in the range [min, max) */
-static inline U32 RAND_range(U32* seed, U32 min, U32 max)
-{
- return (RAND(seed) % (max-min)) + min;
-}
-
-#define ROUND(x) ((U32)(x + 0.5))
-
-/* Generates a random number in an exponential distribution with mean `mean` */
-static double RAND_exp(U32* seed, double mean)
-{
- double const u = RAND(seed) / (double) UINT_MAX;
- return log(1-u) * (-mean);
-}
-
-/*-*******************************************************
-* Constants and Structs
-*********************************************************/
-const char *BLOCK_TYPES[] = {"raw", "rle", "compressed"};
-
-#define MAX_DECOMPRESSED_SIZE_LOG 20
-#define MAX_DECOMPRESSED_SIZE (1ULL << MAX_DECOMPRESSED_SIZE_LOG)
-
-#define MAX_WINDOW_LOG 22 /* Recommended support is 8MB, so limit to 4MB + mantissa */
-
-#define MIN_SEQ_LEN (3)
-#define MAX_NB_SEQ ((ZSTD_BLOCKSIZE_MAX + MIN_SEQ_LEN - 1) / MIN_SEQ_LEN)
-
-BYTE CONTENT_BUFFER[MAX_DECOMPRESSED_SIZE];
-BYTE FRAME_BUFFER[MAX_DECOMPRESSED_SIZE * 2];
-BYTE LITERAL_BUFFER[ZSTD_BLOCKSIZE_MAX];
-
-seqDef SEQUENCE_BUFFER[MAX_NB_SEQ];
-BYTE SEQUENCE_LITERAL_BUFFER[ZSTD_BLOCKSIZE_MAX]; /* storeSeq expects a place to copy literals to */
-BYTE SEQUENCE_LLCODE[ZSTD_BLOCKSIZE_MAX];
-BYTE SEQUENCE_MLCODE[ZSTD_BLOCKSIZE_MAX];
-BYTE SEQUENCE_OFCODE[ZSTD_BLOCKSIZE_MAX];
-
-unsigned WKSP[1024];
-
-typedef struct {
- size_t contentSize; /* 0 means unknown (unless contentSize == windowSize == 0) */
- unsigned windowSize; /* contentSize >= windowSize means single segment */
-} frameHeader_t;
-
-/* For repeat modes */
-typedef struct {
- U32 rep[ZSTD_REP_NUM];
-
- int hufInit;
- /* the distribution used in the previous block for repeat mode */
- BYTE hufDist[DISTSIZE];
- U32 hufTable [256]; /* HUF_CElt is an incomplete type */
-
- int fseInit;
- FSE_CTable offcodeCTable [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
- FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
- FSE_CTable litlengthCTable [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
-
- /* Symbols that were present in the previous distribution, for use with
- * set_repeat */
- BYTE litlengthSymbolSet[36];
- BYTE offsetSymbolSet[29];
- BYTE matchlengthSymbolSet[53];
-} cblockStats_t;
-
-typedef struct {
- void* data;
- void* dataStart;
- void* dataEnd;
-
- void* src;
- void* srcStart;
- void* srcEnd;
-
- frameHeader_t header;
-
- cblockStats_t stats;
- cblockStats_t oldStats; /* so they can be rolled back if uncompressible */
-} frame_t;
-
-typedef struct {
- int useDict;
- U32 dictID;
- size_t dictContentSize;
- BYTE* dictContent;
-} dictInfo;
-
-typedef enum {
- gt_frame = 0, /* generate frames */
- gt_block, /* generate compressed blocks without block/frame headers */
-} genType_e;
-
-/*-*******************************************************
-* Global variables (set from command line)
-*********************************************************/
-U32 g_maxDecompressedSizeLog = MAX_DECOMPRESSED_SIZE_LOG; /* <= 20 */
-U32 g_maxBlockSize = ZSTD_BLOCKSIZE_MAX; /* <= 128 KB */
-
-/*-*******************************************************
-* Generator Functions
-*********************************************************/
-
-struct {
- int contentSize; /* force the content size to be present */
-} opts; /* advanced options on generation */
-
-/* Generate and write a random frame header */
-static void writeFrameHeader(U32* seed, frame_t* frame, dictInfo info)
-{
- BYTE* const op = frame->data;
- size_t pos = 0;
- frameHeader_t fh;
-
- BYTE windowByte = 0;
-
- int singleSegment = 0;
- int contentSizeFlag = 0;
- int fcsCode = 0;
-
- memset(&fh, 0, sizeof(fh));
-
- /* generate window size */
- {
- /* Follow window algorithm from specification */
- int const exponent = RAND(seed) % (MAX_WINDOW_LOG - 10);
- int const mantissa = RAND(seed) % 8;
- windowByte = (BYTE) ((exponent << 3) | mantissa);
- fh.windowSize = (1U << (exponent + 10));
- fh.windowSize += fh.windowSize / 8 * mantissa;
- }
-
- {
- /* Generate random content size */
- size_t highBit;
- if (RAND(seed) & 7 && g_maxDecompressedSizeLog > 7) {
- /* do content of at least 128 bytes */
- highBit = 1ULL << RAND_range(seed, 7, g_maxDecompressedSizeLog);
- } else if (RAND(seed) & 3) {
- /* do small content */
- highBit = 1ULL << RAND_range(seed, 0, MIN(7, 1U << g_maxDecompressedSizeLog));
- } else {
- /* 0 size frame */
- highBit = 0;
- }
- fh.contentSize = highBit ? highBit + (RAND(seed) % highBit) : 0;
-
- /* provide size sometimes */
- contentSizeFlag = opts.contentSize | (RAND(seed) & 1);
-
- if (contentSizeFlag && (fh.contentSize == 0 || !(RAND(seed) & 7))) {
- /* do single segment sometimes */
- fh.windowSize = (U32) fh.contentSize;
- singleSegment = 1;
- }
- }
-
- if (contentSizeFlag) {
- /* Determine how large fcs field has to be */
- int minFcsCode = (fh.contentSize >= 256) +
- (fh.contentSize >= 65536 + 256) +
- (fh.contentSize > 0xFFFFFFFFU);
- if (!singleSegment && !minFcsCode) {
- minFcsCode = 1;
- }
- fcsCode = minFcsCode + (RAND(seed) % (4 - minFcsCode));
- if (fcsCode == 1 && fh.contentSize < 256) fcsCode++;
- }
-
- /* write out the header */
- MEM_writeLE32(op + pos, ZSTD_MAGICNUMBER);
- pos += 4;
-
- {
- /*
- * fcsCode: 2-bit flag specifying how many bytes used to represent Frame_Content_Size (bits 7-6)
- * singleSegment: 1-bit flag describing if data must be regenerated within a single continuous memory segment. (bit 5)
- * contentChecksumFlag: 1-bit flag that is set if frame includes checksum at the end -- set to 1 below (bit 2)
- * dictBits: 2-bit flag describing how many bytes Dictionary_ID uses -- set to 3 (bits 1-0)
- * For more information: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame_header
- */
- int const dictBits = info.useDict ? 3 : 0;
- BYTE const frameHeaderDescriptor =
- (BYTE) ((fcsCode << 6) | (singleSegment << 5) | (1 << 2) | dictBits);
- op[pos++] = frameHeaderDescriptor;
- }
-
- if (!singleSegment) {
- op[pos++] = windowByte;
- }
- if (info.useDict) {
- MEM_writeLE32(op + pos, (U32) info.dictID);
- pos += 4;
- }
- if (contentSizeFlag) {
- switch (fcsCode) {
- default: /* Impossible */
- case 0: op[pos++] = (BYTE) fh.contentSize; break;
- case 1: MEM_writeLE16(op + pos, (U16) (fh.contentSize - 256)); pos += 2; break;
- case 2: MEM_writeLE32(op + pos, (U32) fh.contentSize); pos += 4; break;
- case 3: MEM_writeLE64(op + pos, (U64) fh.contentSize); pos += 8; break;
- }
- }
-
- DISPLAYLEVEL(3, " frame content size:\t%u\n", (unsigned)fh.contentSize);
- DISPLAYLEVEL(3, " frame window size:\t%u\n", fh.windowSize);
- DISPLAYLEVEL(3, " content size flag:\t%d\n", contentSizeFlag);
- DISPLAYLEVEL(3, " single segment flag:\t%d\n", singleSegment);
-
- frame->data = op + pos;
- frame->header = fh;
-}
-
-/* Write a literal block in either raw or RLE form, return the literals size */
-static size_t writeLiteralsBlockSimple(U32* seed, frame_t* frame, size_t contentSize)
-{
- BYTE* op = (BYTE*)frame->data;
- int const type = RAND(seed) % 2;
- int const sizeFormatDesc = RAND(seed) % 8;
- size_t litSize;
- size_t maxLitSize = MIN(contentSize, g_maxBlockSize);
-
- if (sizeFormatDesc == 0) {
- /* Size_FormatDesc = ?0 */
- maxLitSize = MIN(maxLitSize, 31);
- } else if (sizeFormatDesc <= 4) {
- /* Size_FormatDesc = 01 */
- maxLitSize = MIN(maxLitSize, 4095);
- } else {
- /* Size_Format = 11 */
- maxLitSize = MIN(maxLitSize, 1048575);
- }
-
- litSize = RAND(seed) % (maxLitSize + 1);
- if (frame->src == frame->srcStart && litSize == 0) {
- litSize = 1; /* no empty literals if there's nothing preceding this block */
- }
- if (litSize + 3 > contentSize) {
- litSize = contentSize; /* no matches shorter than 3 are allowed */
- }
- /* use smallest size format that fits */
- if (litSize < 32) {
- op[0] = (type | (0 << 2) | (litSize << 3)) & 0xff;
- op += 1;
- } else if (litSize < 4096) {
- op[0] = (type | (1 << 2) | (litSize << 4)) & 0xff;
- op[1] = (litSize >> 4) & 0xff;
- op += 2;
- } else {
- op[0] = (type | (3 << 2) | (litSize << 4)) & 0xff;
- op[1] = (litSize >> 4) & 0xff;
- op[2] = (litSize >> 12) & 0xff;
- op += 3;
- }
-
- if (type == 0) {
- /* Raw literals */
- DISPLAYLEVEL(4, " raw literals\n");
-
- RAND_buffer(seed, LITERAL_BUFFER, litSize);
- memcpy(op, LITERAL_BUFFER, litSize);
- op += litSize;
- } else {
- /* RLE literals */
- BYTE const symb = (BYTE) (RAND(seed) % 256);
-
- DISPLAYLEVEL(4, " rle literals: 0x%02x\n", (unsigned)symb);
-
- memset(LITERAL_BUFFER, symb, litSize);
- op[0] = symb;
- op++;
- }
-
- frame->data = op;
-
- return litSize;
-}
-
-/* Generate a Huffman header for the given source */
-static size_t writeHufHeader(U32* seed, HUF_CElt* hufTable, void* dst, size_t dstSize,
- const void* src, size_t srcSize)
-{
- BYTE* const ostart = (BYTE*)dst;
- BYTE* op = ostart;
-
- unsigned huffLog = 11;
- unsigned maxSymbolValue = 255;
-
- unsigned count[HUF_SYMBOLVALUE_MAX+1];
-
- /* Scan input and build symbol stats */
- { size_t const largest = HIST_count_wksp (count, &maxSymbolValue, (const BYTE*)src, srcSize, WKSP, sizeof(WKSP));
- assert(!HIST_isError(largest));
- if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 0; } /* single symbol, rle */
- if (largest <= (srcSize >> 7)+1) return 0; /* Fast heuristic : not compressible enough */
- }
-
- /* Build Huffman Tree */
- /* Max Huffman log is 11, min is highbit(maxSymbolValue)+1 */
- huffLog = RAND_range(seed, ZSTD_highbit32(maxSymbolValue)+1, huffLog+1);
- DISPLAYLEVEL(6, " huffman log: %u\n", huffLog);
- { size_t const maxBits = HUF_buildCTable_wksp (hufTable, count, maxSymbolValue, huffLog, WKSP, sizeof(WKSP));
- CHECKERR(maxBits);
- huffLog = (U32)maxBits;
- }
-
- /* Write table description header */
- { size_t const hSize = HUF_writeCTable (op, dstSize, hufTable, maxSymbolValue, huffLog);
- if (hSize + 12 >= srcSize) return 0; /* not useful to try compression */
- op += hSize;
- }
-
- return op - ostart;
-}
-
-/* Write a Huffman coded literals block and return the literals size */
-static size_t writeLiteralsBlockCompressed(U32* seed, frame_t* frame, size_t contentSize)
-{
- BYTE* origop = (BYTE*)frame->data;
- BYTE* opend = (BYTE*)frame->dataEnd;
- BYTE* op;
- BYTE* const ostart = origop;
- int const sizeFormat = RAND(seed) % 4;
- size_t litSize;
- size_t hufHeaderSize = 0;
- size_t compressedSize = 0;
- size_t maxLitSize = MIN(contentSize-3, g_maxBlockSize);
-
- symbolEncodingType_e hType;
-
- if (contentSize < 64) {
- /* make sure we get reasonably-sized literals for compression */
- return ERROR(GENERIC);
- }
-
- DISPLAYLEVEL(4, " compressed literals\n");
-
- switch (sizeFormat) {
- case 0: /* fall through, size is the same as case 1 */
- case 1:
- maxLitSize = MIN(maxLitSize, 1023);
- origop += 3;
- break;
- case 2:
- maxLitSize = MIN(maxLitSize, 16383);
- origop += 4;
- break;
- case 3:
- maxLitSize = MIN(maxLitSize, 262143);
- origop += 5;
- break;
- default:; /* impossible */
- }
-
- do {
- op = origop;
- do {
- litSize = RAND(seed) % (maxLitSize + 1);
- } while (litSize < 32); /* avoid small literal sizes */
- if (litSize + 3 > contentSize) {
- litSize = contentSize; /* no matches shorter than 3 are allowed */
- }
-
- /* most of the time generate a new distribution */
- if ((RAND(seed) & 3) || !frame->stats.hufInit) {
- do {
- if (RAND(seed) & 3) {
- /* add 10 to ensure some compressibility */
- double const weight = ((RAND(seed) % 90) + 10) / 100.0;
-
- DISPLAYLEVEL(5, " distribution weight: %d%%\n",
- (int)(weight * 100));
-
- RAND_genDist(seed, frame->stats.hufDist, weight);
- } else {
- /* sometimes do restricted range literals to force
- * non-huffman headers */
- DISPLAYLEVEL(5, " small range literals\n");
- RAND_bufferMaxSymb(seed, frame->stats.hufDist, DISTSIZE,
- 15);
- }
- RAND_bufferDist(seed, frame->stats.hufDist, LITERAL_BUFFER,
- litSize);
-
- /* generate the header from the distribution instead of the
- * actual data to avoid bugs with symbols that were in the
- * distribution but never showed up in the output */
- hufHeaderSize = writeHufHeader(
- seed, (HUF_CElt*)frame->stats.hufTable, op, opend - op,
- frame->stats.hufDist, DISTSIZE);
- CHECKERR(hufHeaderSize);
- /* repeat until a valid header is written */
- } while (hufHeaderSize == 0);
- op += hufHeaderSize;
- hType = set_compressed;
-
- frame->stats.hufInit = 1;
- } else {
- /* repeat the distribution/table from last time */
- DISPLAYLEVEL(5, " huffman repeat stats\n");
- RAND_bufferDist(seed, frame->stats.hufDist, LITERAL_BUFFER,
- litSize);
- hufHeaderSize = 0;
- hType = set_repeat;
- }
-
- do {
- compressedSize =
- sizeFormat == 0
- ? HUF_compress1X_usingCTable(
- op, opend - op, LITERAL_BUFFER, litSize,
- (HUF_CElt*)frame->stats.hufTable)
- : HUF_compress4X_usingCTable(
- op, opend - op, LITERAL_BUFFER, litSize,
- (HUF_CElt*)frame->stats.hufTable);
- CHECKERR(compressedSize);
- /* this only occurs when it could not compress or similar */
- } while (compressedSize <= 0);
-
- op += compressedSize;
-
- compressedSize += hufHeaderSize;
- DISPLAYLEVEL(5, " regenerated size: %u\n", (unsigned)litSize);
- DISPLAYLEVEL(5, " compressed size: %u\n", (unsigned)compressedSize);
- if (compressedSize >= litSize) {
- DISPLAYLEVEL(5, " trying again\n");
- /* if we have to try again, reset the stats so we don't accidentally
- * try to repeat a distribution we just made */
- frame->stats = frame->oldStats;
- } else {
- break;
- }
- } while (1);
-
- /* write header */
- switch (sizeFormat) {
- case 0: /* fall through, size is the same as case 1 */
- case 1: {
- U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) |
- ((U32)compressedSize << 14);
- MEM_writeLE24(ostart, header);
- break;
- }
- case 2: {
- U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) |
- ((U32)compressedSize << 18);
- MEM_writeLE32(ostart, header);
- break;
- }
- case 3: {
- U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) |
- ((U32)compressedSize << 22);
- MEM_writeLE32(ostart, header);
- ostart[4] = (BYTE)(compressedSize >> 10);
- break;
- }
- default:; /* impossible */
- }
-
- frame->data = op;
- return litSize;
-}
-
-static size_t writeLiteralsBlock(U32* seed, frame_t* frame, size_t contentSize)
-{
- /* only do compressed for larger segments to avoid compressibility issues */
- if (RAND(seed) & 7 && contentSize >= 64) {
- return writeLiteralsBlockCompressed(seed, frame, contentSize);
- } else {
- return writeLiteralsBlockSimple(seed, frame, contentSize);
- }
-}
-
-static inline void initSeqStore(seqStore_t *seqStore) {
- seqStore->maxNbSeq = MAX_NB_SEQ;
- seqStore->maxNbLit = ZSTD_BLOCKSIZE_MAX;
- seqStore->sequencesStart = SEQUENCE_BUFFER;
- seqStore->litStart = SEQUENCE_LITERAL_BUFFER;
- seqStore->llCode = SEQUENCE_LLCODE;
- seqStore->mlCode = SEQUENCE_MLCODE;
- seqStore->ofCode = SEQUENCE_OFCODE;
-
- ZSTD_resetSeqStore(seqStore);
-}
-
-/* Randomly generate sequence commands */
-static U32 generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore,
- size_t contentSize, size_t literalsSize, dictInfo info)
-{
- /* The total length of all the matches */
- size_t const remainingMatch = contentSize - literalsSize;
- size_t excessMatch = 0;
- U32 numSequences = 0;
-
- U32 i;
-
-
- const BYTE* literals = LITERAL_BUFFER;
- BYTE* srcPtr = frame->src;
-
- if (literalsSize != contentSize) {
- /* each match must be at least MIN_SEQ_LEN, so this is the maximum
- * number of sequences we can have */
- U32 const maxSequences = (U32)remainingMatch / MIN_SEQ_LEN;
- numSequences = (RAND(seed) % maxSequences) + 1;
-
- /* the extra match lengths we have to allocate to each sequence */
- excessMatch = remainingMatch - numSequences * MIN_SEQ_LEN;
- }
-
- DISPLAYLEVEL(5, " total match lengths: %u\n", (unsigned)remainingMatch);
- for (i = 0; i < numSequences; i++) {
- /* Generate match and literal lengths by exponential distribution to
- * ensure nice numbers */
- U32 matchLen =
- MIN_SEQ_LEN +
- ROUND(RAND_exp(seed, excessMatch / (double)(numSequences - i)));
- U32 literalLen =
- (RAND(seed) & 7)
- ? ROUND(RAND_exp(seed,
- literalsSize /
- (double)(numSequences - i)))
- : 0;
- /* actual offset, code to send, and point to copy up to when shifting
- * codes in the repeat offsets history */
- U32 offset, offsetCode, repIndex;
-
- /* bounds checks */
- matchLen = (U32) MIN(matchLen, excessMatch + MIN_SEQ_LEN);
- literalLen = MIN(literalLen, (U32) literalsSize);
- if (i == 0 && srcPtr == frame->srcStart && literalLen == 0) literalLen = 1;
- if (i + 1 == numSequences) matchLen = MIN_SEQ_LEN + (U32) excessMatch;
-
- memcpy(srcPtr, literals, literalLen);
- srcPtr += literalLen;
- do {
- if (RAND(seed) & 7) {
- /* do a normal offset */
- U32 const dataDecompressed = (U32)((BYTE*)srcPtr-(BYTE*)frame->srcStart);
- offset = (RAND(seed) %
- MIN(frame->header.windowSize,
- (size_t)((BYTE*)srcPtr - (BYTE*)frame->srcStart))) +
- 1;
- if (info.useDict && (RAND(seed) & 1) && i + 1 != numSequences && dataDecompressed < frame->header.windowSize) {
- /* need to occasionally generate offsets that go past the start */
- /* including i+1 != numSequences because the last sequences has to adhere to predetermined contentSize */
- U32 lenPastStart = (RAND(seed) % info.dictContentSize) + 1;
- offset = (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart)+lenPastStart;
- if (offset > frame->header.windowSize) {
- if (lenPastStart < MIN_SEQ_LEN) {
- /* when offset > windowSize, matchLen bound by end of dictionary (lenPastStart) */
- /* this also means that lenPastStart must be greater than MIN_SEQ_LEN */
- /* make sure lenPastStart does not go past dictionary start though */
- lenPastStart = MIN(lenPastStart+MIN_SEQ_LEN, (U32)info.dictContentSize);
- offset = (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart) + lenPastStart;
- }
- {
- U32 const matchLenBound = MIN(frame->header.windowSize, lenPastStart);
- matchLen = MIN(matchLen, matchLenBound);
- }
- }
- }
- offsetCode = offset + ZSTD_REP_MOVE;
- repIndex = 2;
- } else {
- /* do a repeat offset */
- offsetCode = RAND(seed) % 3;
- if (literalLen > 0) {
- offset = frame->stats.rep[offsetCode];
- repIndex = offsetCode;
- } else {
- /* special case */
- offset = offsetCode == 2 ? frame->stats.rep[0] - 1
- : frame->stats.rep[offsetCode + 1];
- repIndex = MIN(2, offsetCode + 1);
- }
- }
- } while (((!info.useDict) && (offset > (size_t)((BYTE*)srcPtr - (BYTE*)frame->srcStart))) || offset == 0);
-
- {
- size_t j;
- BYTE* const dictEnd = info.dictContent + info.dictContentSize;
- for (j = 0; j < matchLen; j++) {
- if ((U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart) < offset) {
- /* copy from dictionary instead of literals */
- size_t const dictOffset = offset - (srcPtr - (BYTE*)frame->srcStart);
- *srcPtr = *(dictEnd - dictOffset);
- }
- else {
- *srcPtr = *(srcPtr-offset);
- }
- srcPtr++;
- }
- }
-
- { int r;
- for (r = repIndex; r > 0; r--) {
- frame->stats.rep[r] = frame->stats.rep[r - 1];
- }
- frame->stats.rep[0] = offset;
- }
-
- DISPLAYLEVEL(6, " LL: %5u OF: %5u ML: %5u",
- (unsigned)literalLen, (unsigned)offset, (unsigned)matchLen);
- DISPLAYLEVEL(7, " srcPos: %8u seqNb: %3u",
- (unsigned)((BYTE*)srcPtr - (BYTE*)frame->srcStart), (unsigned)i);
- DISPLAYLEVEL(6, "\n");
- if (offsetCode < 3) {
- DISPLAYLEVEL(7, " repeat offset: %d\n", (int)repIndex);
- }
- /* use libzstd sequence handling */
- ZSTD_storeSeq(seqStore, literalLen, literals, literals + literalLen,
- offsetCode, matchLen - MINMATCH);
-
- literalsSize -= literalLen;
- excessMatch -= (matchLen - MIN_SEQ_LEN);
- literals += literalLen;
- }
-
- memcpy(srcPtr, literals, literalsSize);
- srcPtr += literalsSize;
- DISPLAYLEVEL(6, " excess literals: %5u", (unsigned)literalsSize);
- DISPLAYLEVEL(7, " srcPos: %8u", (unsigned)((BYTE*)srcPtr - (BYTE*)frame->srcStart));
- DISPLAYLEVEL(6, "\n");
-
- return numSequences;
-}
-
-static void initSymbolSet(const BYTE* symbols, size_t len, BYTE* set, BYTE maxSymbolValue)
-{
- size_t i;
-
- memset(set, 0, (size_t)maxSymbolValue+1);
-
- for (i = 0; i < len; i++) {
- set[symbols[i]] = 1;
- }
-}
-
-static int isSymbolSubset(const BYTE* symbols, size_t len, const BYTE* set, BYTE maxSymbolValue)
-{
- size_t i;
-
- for (i = 0; i < len; i++) {
- if (symbols[i] > maxSymbolValue || !set[symbols[i]]) {
- return 0;
- }
- }
- return 1;
-}
-
-static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
- size_t nbSeq)
-{
- /* This code is mostly copied from ZSTD_compressSequences in zstd_compress.c */
- unsigned count[MaxSeq+1];
- S16 norm[MaxSeq+1];
- FSE_CTable* CTable_LitLength = frame->stats.litlengthCTable;
- FSE_CTable* CTable_OffsetBits = frame->stats.offcodeCTable;
- FSE_CTable* CTable_MatchLength = frame->stats.matchlengthCTable;
- U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */
- const seqDef* const sequences = seqStorePtr->sequencesStart;
- const BYTE* const ofCodeTable = seqStorePtr->ofCode;
- const BYTE* const llCodeTable = seqStorePtr->llCode;
- const BYTE* const mlCodeTable = seqStorePtr->mlCode;
- BYTE* const oend = (BYTE*)frame->dataEnd;
- BYTE* op = (BYTE*)frame->data;
- BYTE* seqHead;
- BYTE scratchBuffer[1<<MAX(MLFSELog,LLFSELog)];
-
- /* literals compressing block removed so that can be done separately */
-
- /* Sequences Header */
- if ((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */) return ERROR(dstSize_tooSmall);
- if (nbSeq < 0x7F) *op++ = (BYTE)nbSeq;
- else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
- else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
-
- if (nbSeq==0) {
- frame->data = op;
- return 0;
- }
-
- /* seqHead : flags for FSE encoding type */
- seqHead = op++;
-
- /* convert length/distances into codes */
- ZSTD_seqToCodes(seqStorePtr);
-
- /* CTable for Literal Lengths */
- { unsigned max = MaxLL;
- size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, WKSP, sizeof(WKSP)); /* cannot fail */
- assert(!HIST_isError(mostFrequent));
- if (frame->stats.fseInit && !(RAND(seed) & 3) &&
- isSymbolSubset(llCodeTable, nbSeq,
- frame->stats.litlengthSymbolSet, 35)) {
- /* maybe do repeat mode if we're allowed to */
- LLtype = set_repeat;
- } else if (mostFrequent == nbSeq) {
- /* do RLE if we have the chance */
- *op++ = llCodeTable[0];
- FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
- LLtype = set_rle;
- } else if (!(RAND(seed) & 3)) {
- /* maybe use the default distribution */
- FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
- LLtype = set_basic;
- } else {
- /* fall back on a full table */
- size_t nbSeq_1 = nbSeq;
- const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max);
- if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; }
- FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
- { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
- if (FSE_isError(NCountSize)) return ERROR(GENERIC);
- op += NCountSize; }
- FSE_buildCTable_wksp(CTable_LitLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
- LLtype = set_compressed;
- } }
-
- /* CTable for Offsets */
- /* see Literal Lengths for descriptions of mode choices */
- { unsigned max = MaxOff;
- size_t const mostFrequent = HIST_countFast_wksp(count, &max, ofCodeTable, nbSeq, WKSP, sizeof(WKSP)); /* cannot fail */
- assert(!HIST_isError(mostFrequent));
- if (frame->stats.fseInit && !(RAND(seed) & 3) &&
- isSymbolSubset(ofCodeTable, nbSeq,
- frame->stats.offsetSymbolSet, 28)) {
- Offtype = set_repeat;
- } else if (mostFrequent == nbSeq) {
- *op++ = ofCodeTable[0];
- FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
- Offtype = set_rle;
- } else if (!(RAND(seed) & 3)) {
- FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, DefaultMaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
- Offtype = set_basic;
- } else {
- size_t nbSeq_1 = nbSeq;
- const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max);
- if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; }
- FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
- { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
- if (FSE_isError(NCountSize)) return ERROR(GENERIC);
- op += NCountSize; }
- FSE_buildCTable_wksp(CTable_OffsetBits, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
- Offtype = set_compressed;
- } }
-
- /* CTable for MatchLengths */
- /* see Literal Lengths for descriptions of mode choices */
- { unsigned max = MaxML;
- size_t const mostFrequent = HIST_countFast_wksp(count, &max, mlCodeTable, nbSeq, WKSP, sizeof(WKSP)); /* cannot fail */
- assert(!HIST_isError(mostFrequent));
- if (frame->stats.fseInit && !(RAND(seed) & 3) &&
- isSymbolSubset(mlCodeTable, nbSeq,
- frame->stats.matchlengthSymbolSet, 52)) {
- MLtype = set_repeat;
- } else if (mostFrequent == nbSeq) {
- *op++ = *mlCodeTable;
- FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
- MLtype = set_rle;
- } else if (!(RAND(seed) & 3)) {
- /* sometimes do default distribution */
- FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
- MLtype = set_basic;
- } else {
- /* fall back on table */
- size_t nbSeq_1 = nbSeq;
- const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max);
- if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; }
- FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
- { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
- if (FSE_isError(NCountSize)) return ERROR(GENERIC);
- op += NCountSize; }
- FSE_buildCTable_wksp(CTable_MatchLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
- MLtype = set_compressed;
- } }
- frame->stats.fseInit = 1;
- initSymbolSet(llCodeTable, nbSeq, frame->stats.litlengthSymbolSet, 35);
- initSymbolSet(ofCodeTable, nbSeq, frame->stats.offsetSymbolSet, 28);
- initSymbolSet(mlCodeTable, nbSeq, frame->stats.matchlengthSymbolSet, 52);
-
- DISPLAYLEVEL(5, " LL type: %d OF type: %d ML type: %d\n", (unsigned)LLtype, (unsigned)Offtype, (unsigned)MLtype);
-
- *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
-
- /* Encoding Sequences */
- { BIT_CStream_t blockStream;
- FSE_CState_t stateMatchLength;
- FSE_CState_t stateOffsetBits;
- FSE_CState_t stateLitLength;
-
- RETURN_ERROR_IF(
- ERR_isError(BIT_initCStream(&blockStream, op, oend-op)),
- dstSize_tooSmall, "not enough space remaining");
-
- /* first symbols */
- FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
- FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]);
- FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]);
- BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
- if (MEM_32bits()) BIT_flushBits(&blockStream);
- BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
- if (MEM_32bits()) BIT_flushBits(&blockStream);
- BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
- BIT_flushBits(&blockStream);
-
- { size_t n;
- for (n=nbSeq-2 ; n<nbSeq ; n--) { /* intentional underflow */
- BYTE const llCode = llCodeTable[n];
- BYTE const ofCode = ofCodeTable[n];
- BYTE const mlCode = mlCodeTable[n];
- U32 const llBits = LL_bits[llCode];
- U32 const ofBits = ofCode; /* 32b*/ /* 64b*/
- U32 const mlBits = ML_bits[mlCode];
- /* (7)*/ /* (7)*/
- FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */
- FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */
- if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/
- FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */
- if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
- BIT_flushBits(&blockStream); /* (7)*/
- BIT_addBits(&blockStream, sequences[n].litLength, llBits);
- if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
- BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
- if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/
- BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
- BIT_flushBits(&blockStream); /* (7)*/
- } }
-
- FSE_flushCState(&blockStream, &stateMatchLength);
- FSE_flushCState(&blockStream, &stateOffsetBits);
- FSE_flushCState(&blockStream, &stateLitLength);
-
- { size_t const streamSize = BIT_closeCStream(&blockStream);
- if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */
- op += streamSize;
- } }
-
- frame->data = op;
-
- return 0;
-}
-
-static size_t writeSequencesBlock(U32* seed, frame_t* frame, size_t contentSize,
- size_t literalsSize, dictInfo info)
-{
- seqStore_t seqStore;
- size_t numSequences;
-
-
- initSeqStore(&seqStore);
-
- /* randomly generate sequences */
- numSequences = generateSequences(seed, frame, &seqStore, contentSize, literalsSize, info);
- /* write them out to the frame data */
- CHECKERR(writeSequences(seed, frame, &seqStore, numSequences));
-
- return numSequences;
-}
-
-static size_t writeCompressedBlock(U32* seed, frame_t* frame, size_t contentSize, dictInfo info)
-{
- BYTE* const blockStart = (BYTE*)frame->data;
- size_t literalsSize;
- size_t nbSeq;
-
- DISPLAYLEVEL(4, " compressed block:\n");
-
- literalsSize = writeLiteralsBlock(seed, frame, contentSize);
-
- DISPLAYLEVEL(4, " literals size: %u\n", (unsigned)literalsSize);
-
- nbSeq = writeSequencesBlock(seed, frame, contentSize, literalsSize, info);
-
- DISPLAYLEVEL(4, " number of sequences: %u\n", (unsigned)nbSeq);
-
- return (BYTE*)frame->data - blockStart;
-}
-
-static void writeBlock(U32* seed, frame_t* frame, size_t contentSize,
- int lastBlock, dictInfo info)
-{
- int const blockTypeDesc = RAND(seed) % 8;
- size_t blockSize;
- int blockType;
-
- BYTE *const header = (BYTE*)frame->data;
- BYTE *op = header + 3;
-
- DISPLAYLEVEL(4, " block:\n");
- DISPLAYLEVEL(4, " block content size: %u\n", (unsigned)contentSize);
- DISPLAYLEVEL(4, " last block: %s\n", lastBlock ? "yes" : "no");
-
- if (blockTypeDesc == 0) {
- /* Raw data frame */
-
- RAND_buffer(seed, frame->src, contentSize);
- memcpy(op, frame->src, contentSize);
-
- op += contentSize;
- blockType = 0;
- blockSize = contentSize;
- } else if (blockTypeDesc == 1 && frame->header.contentSize > 0) {
- /* RLE (Don't create RLE block if frame content is 0 since block size of 1 may exceed max block size)*/
- BYTE const symbol = RAND(seed) & 0xff;
-
- op[0] = symbol;
- memset(frame->src, symbol, contentSize);
-
- op++;
- blockType = 1;
- blockSize = contentSize;
- } else {
- /* compressed, most common */
- size_t compressedSize;
- blockType = 2;
-
- frame->oldStats = frame->stats;
-
- frame->data = op;
- compressedSize = writeCompressedBlock(seed, frame, contentSize, info);
- if (compressedSize >= contentSize) { /* compressed block must be strictly smaller than uncompressed one */
- blockType = 0;
- memcpy(op, frame->src, contentSize);
-
- op += contentSize;
- blockSize = contentSize; /* fall back on raw block if data doesn't
- compress */
-
- frame->stats = frame->oldStats; /* don't update the stats */
- } else {
- op += compressedSize;
- blockSize = compressedSize;
- }
- }
- frame->src = (BYTE*)frame->src + contentSize;
-
- DISPLAYLEVEL(4, " block type: %s\n", BLOCK_TYPES[blockType]);
- DISPLAYLEVEL(4, " block size field: %u\n", (unsigned)blockSize);
-
- header[0] = (BYTE) ((lastBlock | (blockType << 1) | (blockSize << 3)) & 0xff);
- MEM_writeLE16(header + 1, (U16) (blockSize >> 5));
-
- frame->data = op;
-}
-
-static void writeBlocks(U32* seed, frame_t* frame, dictInfo info)
-{
- size_t contentLeft = frame->header.contentSize;
- size_t const maxBlockSize = MIN(g_maxBlockSize, frame->header.windowSize);
- while (1) {
- /* 1 in 4 chance of ending frame */
- int const lastBlock = contentLeft > maxBlockSize ? 0 : !(RAND(seed) & 3);
- size_t blockContentSize;
- if (lastBlock) {
- blockContentSize = contentLeft;
- } else {
- if (contentLeft > 0 && (RAND(seed) & 7)) {
- /* some variable size block */
- blockContentSize = RAND(seed) % (MIN(maxBlockSize, contentLeft)+1);
- } else if (contentLeft > maxBlockSize && (RAND(seed) & 1)) {
- /* some full size block */
- blockContentSize = maxBlockSize;
- } else {
- /* some empty block */
- blockContentSize = 0;
- }
- }
-
- writeBlock(seed, frame, blockContentSize, lastBlock, info);
-
- contentLeft -= blockContentSize;
- if (lastBlock) break;
- }
-}
-
-static void writeChecksum(frame_t* frame)
-{
- /* write checksum so implementations can verify their output */
- U64 digest = XXH64(frame->srcStart, (BYTE*)frame->src-(BYTE*)frame->srcStart, 0);
- DISPLAYLEVEL(3, " checksum: %08x\n", (unsigned)digest);
- MEM_writeLE32(frame->data, (U32)digest);
- frame->data = (BYTE*)frame->data + 4;
-}
-
-static void outputBuffer(const void* buf, size_t size, const char* const path)
-{
- /* write data out to file */
- const BYTE* ip = (const BYTE*)buf;
- FILE* out;
- if (path) {
- out = fopen(path, "wb");
- } else {
- out = stdout;
- }
- if (!out) {
- fprintf(stderr, "Failed to open file at %s: ", path);
- perror(NULL);
- exit(1);
- }
-
- { size_t fsize = size;
- size_t written = 0;
- while (written < fsize) {
- written += fwrite(ip + written, 1, fsize - written, out);
- if (ferror(out)) {
- fprintf(stderr, "Failed to write to file at %s: ", path);
- perror(NULL);
- exit(1);
- }
- }
- }
-
- if (path) {
- fclose(out);
- }
-}
-
-static void initFrame(frame_t* fr)
-{
- memset(fr, 0, sizeof(*fr));
- fr->data = fr->dataStart = FRAME_BUFFER;
- fr->dataEnd = FRAME_BUFFER + sizeof(FRAME_BUFFER);
- fr->src = fr->srcStart = CONTENT_BUFFER;
- fr->srcEnd = CONTENT_BUFFER + sizeof(CONTENT_BUFFER);
-
- /* init repeat codes */
- fr->stats.rep[0] = 1;
- fr->stats.rep[1] = 4;
- fr->stats.rep[2] = 8;
-}
-
-/**
- * Generated a single zstd compressed block with no block/frame header.
- * Returns the final seed.
- */
-static U32 generateCompressedBlock(U32 seed, frame_t* frame, dictInfo info)
-{
- size_t blockContentSize;
- int blockWritten = 0;
- BYTE* op;
- DISPLAYLEVEL(4, "block seed: %u\n", (unsigned)seed);
- initFrame(frame);
- op = (BYTE*)frame->data;
-
- while (!blockWritten) {
- size_t cSize;
- /* generate window size */
- { int const exponent = RAND(&seed) % (MAX_WINDOW_LOG - 10);
- int const mantissa = RAND(&seed) % 8;
- frame->header.windowSize = (1U << (exponent + 10));
- frame->header.windowSize += (frame->header.windowSize / 8) * mantissa;
- }
-
- /* generate content size */
- { size_t const maxBlockSize = MIN(g_maxBlockSize, frame->header.windowSize);
- if (RAND(&seed) & 15) {
- /* some full size blocks */
- blockContentSize = maxBlockSize;
- } else if (RAND(&seed) & 7 && g_maxBlockSize >= (1U << 7)) {
- /* some small blocks <= 128 bytes*/
- blockContentSize = RAND(&seed) % (1U << 7);
- } else {
- /* some variable size blocks */
- blockContentSize = RAND(&seed) % maxBlockSize;
- }
- }
-
- /* try generating a compressed block */
- frame->oldStats = frame->stats;
- frame->data = op;
- cSize = writeCompressedBlock(&seed, frame, blockContentSize, info);
- if (cSize >= blockContentSize) { /* compressed size must be strictly smaller than decompressed size : https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#blocks */
- /* data doesn't compress -- try again */
- frame->stats = frame->oldStats; /* don't update the stats */
- DISPLAYLEVEL(5, " can't compress block : try again \n");
- } else {
- blockWritten = 1;
- DISPLAYLEVEL(4, " block size: %u \n", (unsigned)cSize);
- frame->src = (BYTE*)frame->src + blockContentSize;
- }
- }
- return seed;
-}
-
-/* Return the final seed */
-static U32 generateFrame(U32 seed, frame_t* fr, dictInfo info)
-{
- /* generate a complete frame */
- DISPLAYLEVEL(3, "frame seed: %u\n", (unsigned)seed);
- initFrame(fr);
-
- writeFrameHeader(&seed, fr, info);
- writeBlocks(&seed, fr, info);
- writeChecksum(fr);
-
- return seed;
-}
-
-/*_*******************************************************
-* Dictionary Helper Functions
-*********************************************************/
-/* returns 0 if successful, otherwise returns 1 upon error */
-static int genRandomDict(U32 dictID, U32 seed, size_t dictSize, BYTE* fullDict)
-{
- /* allocate space for samples */
- int ret = 0;
- unsigned const numSamples = 4;
- size_t sampleSizes[4];
- BYTE* const samples = malloc(5000*sizeof(BYTE));
- if (samples == NULL) {
- DISPLAY("Error: could not allocate space for samples\n");
- return 1;
- }
-
- /* generate samples */
- { unsigned literalValue = 1;
- unsigned samplesPos = 0;
- size_t currSize = 1;
- while (literalValue <= 4) {
- sampleSizes[literalValue - 1] = currSize;
- { size_t k;
- for (k = 0; k < currSize; k++) {
- *(samples + (samplesPos++)) = (BYTE)literalValue;
- } }
- literalValue++;
- currSize *= 16;
- } }
-
- { size_t dictWriteSize = 0;
- ZDICT_params_t zdictParams;
- size_t const headerSize = MAX(dictSize/4, 256);
- size_t const dictContentSize = dictSize - headerSize;
- BYTE* const dictContent = fullDict + headerSize;
- if (dictContentSize < ZDICT_CONTENTSIZE_MIN || dictSize < ZDICT_DICTSIZE_MIN) {
- DISPLAY("Error: dictionary size is too small\n");
- ret = 1;
- goto exitGenRandomDict;
- }
-
- /* init dictionary params */
- memset(&zdictParams, 0, sizeof(zdictParams));
- zdictParams.dictID = dictID;
- zdictParams.notificationLevel = 1;
-
- /* fill in dictionary content */
- RAND_buffer(&seed, (void*)dictContent, dictContentSize);
-
- /* finalize dictionary with random samples */
- dictWriteSize = ZDICT_finalizeDictionary(fullDict, dictSize,
- dictContent, dictContentSize,
- samples, sampleSizes, numSamples,
- zdictParams);
-
- if (ZDICT_isError(dictWriteSize)) {
- DISPLAY("Could not finalize dictionary: %s\n", ZDICT_getErrorName(dictWriteSize));
- ret = 1;
- }
- }
-
-exitGenRandomDict:
- free(samples);
- return ret;
-}
-
-static dictInfo initDictInfo(int useDict, size_t dictContentSize, BYTE* dictContent, U32 dictID){
- /* allocate space statically */
- dictInfo dictOp;
- memset(&dictOp, 0, sizeof(dictOp));
- dictOp.useDict = useDict;
- dictOp.dictContentSize = dictContentSize;
- dictOp.dictContent = dictContent;
- dictOp.dictID = dictID;
- return dictOp;
-}
-
-/*-*******************************************************
-* Test Mode
-*********************************************************/
-
-BYTE DECOMPRESSED_BUFFER[MAX_DECOMPRESSED_SIZE];
-
-static size_t testDecodeSimple(frame_t* fr)
-{
- /* test decoding the generated data with the simple API */
- size_t const ret = ZSTD_decompress(DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE,
- fr->dataStart, (BYTE*)fr->data - (BYTE*)fr->dataStart);
-
- if (ZSTD_isError(ret)) return ret;
-
- if (memcmp(DECOMPRESSED_BUFFER, fr->srcStart,
- (BYTE*)fr->src - (BYTE*)fr->srcStart) != 0) {
- return ERROR(corruption_detected);
- }
-
- return ret;
-}
-
-static size_t testDecodeStreaming(frame_t* fr)
-{
- /* test decoding the generated data with the streaming API */
- ZSTD_DStream* zd = ZSTD_createDStream();
- ZSTD_inBuffer in;
- ZSTD_outBuffer out;
- size_t ret;
-
- if (!zd) return ERROR(memory_allocation);
-
- in.src = fr->dataStart;
- in.pos = 0;
- in.size = (BYTE*)fr->data - (BYTE*)fr->dataStart;
-
- out.dst = DECOMPRESSED_BUFFER;
- out.pos = 0;
- out.size = ZSTD_DStreamOutSize();
-
- ZSTD_initDStream(zd);
- while (1) {
- ret = ZSTD_decompressStream(zd, &out, &in);
- if (ZSTD_isError(ret)) goto cleanup; /* error */
- if (ret == 0) break; /* frame is done */
-
- /* force decoding to be done in chunks */
- out.size += MIN(ZSTD_DStreamOutSize(), MAX_DECOMPRESSED_SIZE - out.size);
- }
-
- ret = out.pos;
-
- if (memcmp(out.dst, fr->srcStart, out.pos) != 0) {
- return ERROR(corruption_detected);
- }
-
-cleanup:
- ZSTD_freeDStream(zd);
- return ret;
-}
-
-static size_t testDecodeWithDict(U32 seed, genType_e genType)
-{
- /* create variables */
- size_t const dictSize = RAND(&seed) % (10 << 20) + ZDICT_DICTSIZE_MIN + ZDICT_CONTENTSIZE_MIN;
- U32 const dictID = RAND(&seed);
- size_t errorDetected = 0;
- BYTE* const fullDict = malloc(dictSize);
- if (fullDict == NULL) {
- return ERROR(GENERIC);
- }
-
- /* generate random dictionary */
- if (genRandomDict(dictID, seed, dictSize, fullDict)) { /* return 0 on success */
- errorDetected = ERROR(GENERIC);
- goto dictTestCleanup;
- }
-
-
- { frame_t fr;
- dictInfo info;
- ZSTD_DCtx* const dctx = ZSTD_createDCtx();
- size_t ret;
-
- /* get dict info */
- { size_t const headerSize = MAX(dictSize/4, 256);
- size_t const dictContentSize = dictSize-headerSize;
- BYTE* const dictContent = fullDict+headerSize;
- info = initDictInfo(1, dictContentSize, dictContent, dictID);
- }
-
- /* manually decompress and check difference */
- if (genType == gt_frame) {
- /* Test frame */
- generateFrame(seed, &fr, info);
- ret = ZSTD_decompress_usingDict(dctx, DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE,
- fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart,
- fullDict, dictSize);
- } else {
- /* Test block */
- generateCompressedBlock(seed, &fr, info);
- ret = ZSTD_decompressBegin_usingDict(dctx, fullDict, dictSize);
- if (ZSTD_isError(ret)) {
- errorDetected = ret;
- ZSTD_freeDCtx(dctx);
- goto dictTestCleanup;
- }
- ret = ZSTD_decompressBlock(dctx, DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE,
- fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart);
- }
- ZSTD_freeDCtx(dctx);
-
- if (ZSTD_isError(ret)) {
- errorDetected = ret;
- goto dictTestCleanup;
- }
-
- if (memcmp(DECOMPRESSED_BUFFER, fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart) != 0) {
- errorDetected = ERROR(corruption_detected);
- goto dictTestCleanup;
- }
- }
-
-dictTestCleanup:
- free(fullDict);
- return errorDetected;
-}
-
-static size_t testDecodeRawBlock(frame_t* fr)
-{
- ZSTD_DCtx* dctx = ZSTD_createDCtx();
- size_t ret = ZSTD_decompressBegin(dctx);
- if (ZSTD_isError(ret)) return ret;
-
- ret = ZSTD_decompressBlock(
- dctx,
- DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE,
- fr->dataStart, (BYTE*)fr->data - (BYTE*)fr->dataStart);
- ZSTD_freeDCtx(dctx);
- if (ZSTD_isError(ret)) return ret;
-
- if (memcmp(DECOMPRESSED_BUFFER, fr->srcStart,
- (BYTE*)fr->src - (BYTE*)fr->srcStart) != 0) {
- return ERROR(corruption_detected);
- }
-
- return ret;
-}
-
-static int runBlockTest(U32* seed)
-{
- frame_t fr;
- U32 const seedCopy = *seed;
- { dictInfo const info = initDictInfo(0, 0, NULL, 0);
- *seed = generateCompressedBlock(*seed, &fr, info);
- }
-
- { size_t const r = testDecodeRawBlock(&fr);
- if (ZSTD_isError(r)) {
- DISPLAY("Error in block mode on test seed %u: %s\n",
- (unsigned)seedCopy, ZSTD_getErrorName(r));
- return 1;
- }
- }
-
- { size_t const r = testDecodeWithDict(*seed, gt_block);
- if (ZSTD_isError(r)) {
- DISPLAY("Error in block mode with dictionary on test seed %u: %s\n",
- (unsigned)seedCopy, ZSTD_getErrorName(r));
- return 1;
- }
- }
- return 0;
-}
-
-static int runFrameTest(U32* seed)
-{
- frame_t fr;
- U32 const seedCopy = *seed;
- { dictInfo const info = initDictInfo(0, 0, NULL, 0);
- *seed = generateFrame(*seed, &fr, info);
- }
-
- { size_t const r = testDecodeSimple(&fr);
- if (ZSTD_isError(r)) {
- DISPLAY("Error in simple mode on test seed %u: %s\n",
- (unsigned)seedCopy, ZSTD_getErrorName(r));
- return 1;
- }
- }
- { size_t const r = testDecodeStreaming(&fr);
- if (ZSTD_isError(r)) {
- DISPLAY("Error in streaming mode on test seed %u: %s\n",
- (unsigned)seedCopy, ZSTD_getErrorName(r));
- return 1;
- }
- }
- { size_t const r = testDecodeWithDict(*seed, gt_frame); /* avoid big dictionaries */
- if (ZSTD_isError(r)) {
- DISPLAY("Error in dictionary mode on test seed %u: %s\n",
- (unsigned)seedCopy, ZSTD_getErrorName(r));
- return 1;
- }
- }
- return 0;
-}
-
-static int runTestMode(U32 seed, unsigned numFiles, unsigned const testDurationS,
- genType_e genType)
-{
- unsigned fnum;
-
- UTIL_time_t const startClock = UTIL_getTime();
- U64 const maxClockSpan = testDurationS * SEC_TO_MICRO;
-
- if (numFiles == 0 && !testDurationS) numFiles = 1;
-
- DISPLAY("seed: %u\n", (unsigned)seed);
-
- for (fnum = 0; fnum < numFiles || UTIL_clockSpanMicro(startClock) < maxClockSpan; fnum++) {
- if (fnum < numFiles)
- DISPLAYUPDATE("\r%u/%u ", fnum, numFiles);
- else
- DISPLAYUPDATE("\r%u ", fnum);
-
- { int const ret = (genType == gt_frame) ?
- runFrameTest(&seed) :
- runBlockTest(&seed);
- if (ret) return ret;
- }
- }
-
- DISPLAY("\r%u tests completed: ", fnum);
- DISPLAY("OK\n");
-
- return 0;
-}
-
-/*-*******************************************************
-* File I/O
-*********************************************************/
-
-static int generateFile(U32 seed, const char* const path,
- const char* const origPath, genType_e genType)
-{
- frame_t fr;
-
- DISPLAY("seed: %u\n", (unsigned)seed);
-
- { dictInfo const info = initDictInfo(0, 0, NULL, 0);
- if (genType == gt_frame) {
- generateFrame(seed, &fr, info);
- } else {
- generateCompressedBlock(seed, &fr, info);
- }
- }
- outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, path);
- if (origPath) {
- outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, origPath);
- }
- return 0;
-}
-
-static int generateCorpus(U32 seed, unsigned numFiles, const char* const path,
- const char* const origPath, genType_e genType)
-{
- char outPath[MAX_PATH];
- unsigned fnum;
-
- DISPLAY("seed: %u\n", (unsigned)seed);
-
- for (fnum = 0; fnum < numFiles; fnum++) {
- frame_t fr;
-
- DISPLAYUPDATE("\r%u/%u ", fnum, numFiles);
-
- { dictInfo const info = initDictInfo(0, 0, NULL, 0);
- if (genType == gt_frame) {
- seed = generateFrame(seed, &fr, info);
- } else {
- seed = generateCompressedBlock(seed, &fr, info);
- }
- }
-
- if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) {
- DISPLAY("Error: path too long\n");
- return 1;
- }
- outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, outPath);
-
- if (origPath) {
- if (snprintf(outPath, MAX_PATH, "%s/z%06u", origPath, fnum) + 1 > MAX_PATH) {
- DISPLAY("Error: path too long\n");
- return 1;
- }
- outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, outPath);
- }
- }
-
- DISPLAY("\r%u/%u \n", fnum, numFiles);
-
- return 0;
-}
-
-static int generateCorpusWithDict(U32 seed, unsigned numFiles, const char* const path,
- const char* const origPath, const size_t dictSize,
- genType_e genType)
-{
- char outPath[MAX_PATH];
- BYTE* fullDict;
- U32 const dictID = RAND(&seed);
- int errorDetected = 0;
-
- if (snprintf(outPath, MAX_PATH, "%s/dictionary", path) + 1 > MAX_PATH) {
- DISPLAY("Error: path too long\n");
- return 1;
- }
-
- /* allocate space for the dictionary */
- fullDict = malloc(dictSize);
- if (fullDict == NULL) {
- DISPLAY("Error: could not allocate space for full dictionary.\n");
- return 1;
- }
-
- /* randomly generate the dictionary */
- { int const ret = genRandomDict(dictID, seed, dictSize, fullDict);
- if (ret != 0) {
- errorDetected = ret;
- goto dictCleanup;
- }
- }
-
- /* write out dictionary */
- if (numFiles != 0) {
- if (snprintf(outPath, MAX_PATH, "%s/dictionary", path) + 1 > MAX_PATH) {
- DISPLAY("Error: dictionary path too long\n");
- errorDetected = 1;
- goto dictCleanup;
- }
- outputBuffer(fullDict, dictSize, outPath);
- }
- else {
- outputBuffer(fullDict, dictSize, "dictionary");
- }
-
- /* generate random compressed/decompressed files */
- { unsigned fnum;
- for (fnum = 0; fnum < MAX(numFiles, 1); fnum++) {
- frame_t fr;
- DISPLAYUPDATE("\r%u/%u ", fnum, numFiles);
- {
- size_t const headerSize = MAX(dictSize/4, 256);
- size_t const dictContentSize = dictSize-headerSize;
- BYTE* const dictContent = fullDict+headerSize;
- dictInfo const info = initDictInfo(1, dictContentSize, dictContent, dictID);
- if (genType == gt_frame) {
- seed = generateFrame(seed, &fr, info);
- } else {
- seed = generateCompressedBlock(seed, &fr, info);
- }
- }
-
- if (numFiles != 0) {
- if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) {
- DISPLAY("Error: path too long\n");
- errorDetected = 1;
- goto dictCleanup;
- }
- outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, outPath);
-
- if (origPath) {
- if (snprintf(outPath, MAX_PATH, "%s/z%06u", origPath, fnum) + 1 > MAX_PATH) {
- DISPLAY("Error: path too long\n");
- errorDetected = 1;
- goto dictCleanup;
- }
- outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, outPath);
- }
- }
- else {
- outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, path);
- if (origPath) {
- outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, origPath);
- }
- }
- }
- }
-
-dictCleanup:
- free(fullDict);
- return errorDetected;
-}
-
-
-/*_*******************************************************
-* Command line
-*********************************************************/
-static U32 makeSeed(void)
-{
- U32 t = (U32) time(NULL);
- return XXH32(&t, sizeof(t), 0) % 65536;
-}
-
-static unsigned readInt(const char** argument)
-{
- unsigned val = 0;
- while ((**argument>='0') && (**argument<='9')) {
- val *= 10;
- val += **argument - '0';
- (*argument)++;
- }
- return val;
-}
-
-static void usage(const char* programName)
-{
- DISPLAY( "Usage :\n");
- DISPLAY( " %s [args]\n", programName);
- DISPLAY( "\n");
- DISPLAY( "Arguments :\n");
- DISPLAY( " -p<path> : select output path (default:stdout)\n");
- DISPLAY( " in multiple files mode this should be a directory\n");
- DISPLAY( " -o<path> : select path to output original file (default:no output)\n");
- DISPLAY( " in multiple files mode this should be a directory\n");
- DISPLAY( " -s# : select seed (default:random based on time)\n");
- DISPLAY( " -n# : number of files to generate (default:1)\n");
- DISPLAY( " -t : activate test mode (test files against libzstd instead of outputting them)\n");
- DISPLAY( " -T# : length of time to run tests for\n");
- DISPLAY( " -v : increase verbosity level (default:0, max:7)\n");
- DISPLAY( " -h/H : display help/long help and exit\n");
-}
-
-static void advancedUsage(const char* programName)
-{
- usage(programName);
- DISPLAY( "\n");
- DISPLAY( "Advanced arguments :\n");
- DISPLAY( " --content-size : always include the content size in the frame header\n");
- DISPLAY( " --use-dict=# : include a dictionary used to decompress the corpus\n");
- DISPLAY( " --gen-blocks : generate raw compressed blocks without block/frame headers\n");
- DISPLAY( " --max-block-size-log=# : max block size log, must be in range [2, 17]\n");
- DISPLAY( " --max-content-size-log=# : max content size log, must be <= 20\n");
- DISPLAY( " (this is ignored with gen-blocks)\n");
-}
-
-/*! readU32FromChar() :
- @return : unsigned integer value read from input in `char` format
- allows and interprets K, KB, KiB, M, MB and MiB suffix.
- Will also modify `*stringPtr`, advancing it to position where it stopped reading.
- Note : function result can overflow if digit string > MAX_UINT */
-static unsigned readU32FromChar(const char** stringPtr)
-{
- unsigned result = 0;
- while ((**stringPtr >='0') && (**stringPtr <='9'))
- result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
- if ((**stringPtr=='K') || (**stringPtr=='M')) {
- result <<= 10;
- if (**stringPtr=='M') result <<= 10;
- (*stringPtr)++ ;
- if (**stringPtr=='i') (*stringPtr)++;
- if (**stringPtr=='B') (*stringPtr)++;
- }
- return result;
-}
-
-/** longCommandWArg() :
- * check if *stringPtr is the same as longCommand.
- * If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
- * @return 0 and doesn't modify *stringPtr otherwise.
- */
-static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
-{
- size_t const comSize = strlen(longCommand);
- int const result = !strncmp(*stringPtr, longCommand, comSize);
- if (result) *stringPtr += comSize;
- return result;
-}
-
-int main(int argc, char** argv)
-{
- U32 seed = 0;
- int seedset = 0;
- unsigned numFiles = 0;
- unsigned testDuration = 0;
- int testMode = 0;
- const char* path = NULL;
- const char* origPath = NULL;
- int useDict = 0;
- unsigned dictSize = (10 << 10); /* 10 kB default */
- genType_e genType = gt_frame;
-
- int argNb;
-
- /* Check command line */
- for (argNb=1; argNb<argc; argNb++) {
- const char* argument = argv[argNb];
- if(!argument) continue; /* Protection if argument empty */
-
- /* Handle commands. Aggregated commands are allowed */
- if (argument[0]=='-') {
- argument++;
- while (*argument!=0) {
- switch(*argument)
- {
- case 'h':
- usage(argv[0]);
- return 0;
- case 'H':
- advancedUsage(argv[0]);
- return 0;
- case 'v':
- argument++;
- g_displayLevel++;
- break;
- case 's':
- argument++;
- seedset=1;
- seed = readInt(&argument);
- break;
- case 'n':
- argument++;
- numFiles = readInt(&argument);
- break;
- case 'T':
- argument++;
- testDuration = readInt(&argument);
- if (*argument == 'm') {
- testDuration *= 60;
- argument++;
- if (*argument == 'n') argument++;
- }
- break;
- case 'o':
- argument++;
- origPath = argument;
- argument += strlen(argument);
- break;
- case 'p':
- argument++;
- path = argument;
- argument += strlen(argument);
- break;
- case 't':
- argument++;
- testMode = 1;
- break;
- case '-':
- argument++;
- if (strcmp(argument, "content-size") == 0) {
- opts.contentSize = 1;
- } else if (longCommandWArg(&argument, "use-dict=")) {
- dictSize = readU32FromChar(&argument);
- useDict = 1;
- } else if (strcmp(argument, "gen-blocks") == 0) {
- genType = gt_block;
- } else if (longCommandWArg(&argument, "max-block-size-log=")) {
- U32 value = readU32FromChar(&argument);
- if (value >= 2 && value <= ZSTD_BLOCKSIZE_MAX) {
- g_maxBlockSize = 1U << value;
- }
- } else if (longCommandWArg(&argument, "max-content-size-log=")) {
- U32 value = readU32FromChar(&argument);
- g_maxDecompressedSizeLog =
- MIN(MAX_DECOMPRESSED_SIZE_LOG, value);
- } else {
- advancedUsage(argv[0]);
- return 1;
- }
- argument += strlen(argument);
- break;
- default:
- usage(argv[0]);
- return 1;
- } } } } /* for (argNb=1; argNb<argc; argNb++) */
-
- if (!seedset) {
- seed = makeSeed();
- }
-
- if (testMode) {
- return runTestMode(seed, numFiles, testDuration, genType);
- } else {
- if (testDuration) {
- DISPLAY("Error: -T requires test mode (-t)\n\n");
- usage(argv[0]);
- return 1;
- }
- }
-
- if (!path) {
- DISPLAY("Error: path is required in file generation mode\n");
- usage(argv[0]);
- return 1;
- }
-
- if (numFiles == 0 && useDict == 0) {
- return generateFile(seed, path, origPath, genType);
- } else if (useDict == 0){
- return generateCorpus(seed, numFiles, path, origPath, genType);
- } else {
- /* should generate files with a dictionary */
- return generateCorpusWithDict(seed, numFiles, path, origPath, dictSize, genType);
- }
-
-}