From ffcbc2d7ba03067492045e4cbead519a3b3c27ef Mon Sep 17 00:00:00 2001
From: Baptiste Daroussin Compresses `src` content as a single zstd compressed frame into already allocated `dst`.
- Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`.
- @return : compressed size written into `dst` (<= `dstCapacity),
- or an error code if it fails (which can be tested using ZSTD_isError()).
+ Compresses `src` content as a single zstd compressed frame into already allocated `dst`.
+ Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`.
+ @return : compressed size written into `dst` (<= `dstCapacity),
+ or an error code if it fails (which can be tested using ZSTD_isError()).
`compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames.
- `dstCapacity` is an upper bound of originalSize.
- If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data.
- @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
- or an errorCode if it fails (which can be tested using ZSTD_isError()).
+ `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames.
+ `dstCapacity` is an upper bound of originalSize.
+ If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data.
+ @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
+ or an errorCode if it fails (which can be tested using ZSTD_isError()).
NOTE: This function is planned to be obsolete, in favour of ZSTD_getFrameContentSize.
- ZSTD_getFrameContentSize functions the same way, returning the decompressed size of a single
- frame, but distinguishes empty frames from frames with an unknown size, or errors.
-
- Additionally, ZSTD_findDecompressedSize can be used instead. It can handle multiple
- concatenated frames in one buffer, and so is more general.
- As a result however, it requires more computation and entire frames to be passed to it,
- as opposed to ZSTD_getFrameContentSize which requires only a single frame's header.
-
- 'src' is the start of a zstd compressed frame.
- @return : content size to be decompressed, as a 64-bits value _if known_, 0 otherwise.
- note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode.
- When `return==0`, data to decompress could be any size.
- In which case, it's necessary to use streaming mode to decompress data.
- Optionally, application can still use ZSTD_decompress() while relying on implied limits.
- (For example, data may be necessarily cut into blocks <= 16 KB).
- note 2 : decompressed size is always present when compression is done with ZSTD_compress()
- note 3 : decompressed size can be very large (64-bits value),
- potentially larger than what local system can handle as a single memory segment.
- In which case, it's necessary to use streaming mode to decompress data.
- note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified.
- Always ensure result fits within application's authorized limits.
- Each application can set its own limits.
- note 5 : when `return==0`, if precise failure cause is needed, use ZSTD_getFrameParams() to know more.
+ NOTE: This function is planned to be obsolete, in favour of ZSTD_getFrameContentSize.
+ ZSTD_getFrameContentSize functions the same way, returning the decompressed size of a single
+ frame, but distinguishes empty frames from frames with an unknown size, or errors.
+
+ Additionally, ZSTD_findDecompressedSize can be used instead. It can handle multiple
+ concatenated frames in one buffer, and so is more general.
+ As a result however, it requires more computation and entire frames to be passed to it,
+ as opposed to ZSTD_getFrameContentSize which requires only a single frame's header.
+
+ 'src' is the start of a zstd compressed frame.
+ @return : content size to be decompressed, as a 64-bits value _if known_, 0 otherwise.
+ note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode.
+ When `return==0`, data to decompress could be any size.
+ In which case, it's necessary to use streaming mode to decompress data.
+ Optionally, application can still use ZSTD_decompress() while relying on implied limits.
+ (For example, data may be necessarily cut into blocks <= 16 KB).
+ note 2 : decompressed size is always present when compression is done with ZSTD_compress()
+ note 3 : decompressed size can be very large (64-bits value),
+ potentially larger than what local system can handle as a single memory segment.
+ In which case, it's necessary to use streaming mode to decompress data.
+ note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified.
+ Always ensure result fits within application's authorized limits.
+ Each application can set its own limits.
+ note 5 : when `return==0`, if precise failure cause is needed, use ZSTD_getFrameParams() to know more.
Same as ZSTD_compress(), requires an allocated ZSTD_CCtx (see ZSTD_createCCtx()).
+ Same as ZSTD_compress(), requires an allocated ZSTD_CCtx (see ZSTD_createCCtx()).
Same as ZSTD_decompress(), requires an allocated ZSTD_DCtx (see ZSTD_createDCtx()).
+ Same as ZSTD_decompress(), requires an allocated ZSTD_DCtx (see ZSTD_createDCtx()).
Compression using a digested Dictionary.
- Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
- Note that compression level is decided during dictionary creation.
+ Compression using a digested Dictionary.
+ Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
+ Note that compression level is decided during dictionary creation.
+ Frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no)
zstd 1.1.4 Manual
+zstd 1.2.0 Manual
Contents
@@ -57,46 +57,46 @@
size_t ZSTD_compress( void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
int compressionLevel);
-
size_t ZSTD_decompress( void* dst, size_t dstCapacity,
const void* src, size_t compressedSize);
-
unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
-
Helper functions
int ZSTD_maxCLevel(void);
/*!< maximum compression level available */
@@ -106,28 +106,28 @@ const char* ZSTD_getErrorName(size_t code); /*!< provides readable strin
Explicit memory management
-Compression context
When compressing many times,
- it is recommended to allocate a context just once, and re-use it for each successive compression operation.
- This will make workload friendlier for system's memory.
- Use one context per thread for parallel execution in multi-threaded environments.
+
Compression context
When compressing many times,
+ it is recommended to allocate a context just once, and re-use it for each successive compression operation.
+ This will make workload friendlier for system's memory.
+ Use one context per thread for parallel execution in multi-threaded environments.
typedef struct ZSTD_CCtx_s ZSTD_CCtx;
ZSTD_CCtx* ZSTD_createCCtx(void);
size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx);
size_t ZSTD_compressCCtx(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel);
-
-Decompression context
When decompressing many times,
- it is recommended to allocate a context just once, and re-use it for each successive compression operation.
- This will make workload friendlier for system's memory.
- Use one context per thread for parallel execution in multi-threaded environments.
+
Decompression context
When decompressing many times,
+ it is recommended to allocate a context just once, and re-use it for each successive compression operation.
+ This will make workload friendlier for system's memory.
+ Use one context per thread for parallel execution in multi-threaded environments.
typedef struct ZSTD_DCtx_s ZSTD_DCtx;
ZSTD_DCtx* ZSTD_createDCtx(void);
size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-
Simple dictionary API
@@ -169,9 +169,10 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
const ZSTD_CDict* cdict);
-
ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize);
@@ -399,7 +400,7 @@ typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; v
ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, unsigned byReference, - ZSTD_parameters params, ZSTD_customMem customMem); + ZSTD_compressionParameters cParams, ZSTD_customMem customMem);Create a ZSTD_CDict using external alloc and free, and customized compression parameters
size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize, - ZSTD_parameters params); -Same as ZSTD_compress_usingDict(), with fine-tune control of each compression parameter +
size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params); +Same as ZSTD_compress_usingDict(), with fine-tune control over each compression parameter +
+ +size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, ZSTD_frameParameters fParams); +Same as ZSTD_compress_usingCDict(), with fine-tune control over frame parameters
Advanced decompression functions
@@ -491,20 +499,29 @@ typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; v Note : this use case also happens when using a non-conformant dictionary. - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). - This is not a Zstandard frame. - When identifying the exact failure cause, it's possible to used ZSTD_getFrameParams(), which will provide a more precise error code. + When identifying the exact failure cause, it's possible to use ZSTD_getFrameParams(), which will provide a more precise error code.
ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem); +size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);/**< size of CStream is variable, depending primarily on compression level */ size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize); /**< pledgedSrcSize must be correct, a size of 0 means unknown. for a frame size of 0 use initCStream_advanced */ size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); /**< note: a dict will not be used if dict == NULL or dictSize < 8 */ size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be 0 (meaning unknown). note: if the contentSizeFlag is set, pledgedSrcSize == 0 means the source size is actually 0 */ size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); /**< note : cdict will just be referenced, and must outlive compression session */ -size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); /**< re-use compression parameters from previous init; skip dictionary loading stage; zcs must be init at least once before. note: pledgedSrcSize must be correct, a size of 0 means unknown. for a frame size of 0 use initCStream_advanced */ -size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs); +size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize, ZSTD_frameParameters fParams); /**< same as ZSTD_initCStream_usingCDict(), with control over frame parameters */
size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); +start a new compression job, using same parameters from previous job. + This is typically useful to skip dictionary loading stage, since it will re-use it in-place.. + Note that zcs must be init at least once before using ZSTD_resetCStream(). + pledgedSrcSize==0 means "srcSize unknown". + If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end. + @return : 0, or an error code (which can be tested using ZSTD_isError()) +
typedef enum { DStream_p_maxWindowSize } ZSTD_DStreamParameter_e;
ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); /**< note: a dict will not be used if dict == NULL or dictSize < 8 */
@@ -552,10 +569,9 @@ size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize);/**< pledgedSrcSize is optional and can be 0 (meaning unknown). note: if the contentSizeFlag is set, pledgedSrcSize == 0 means the source size is actually 0 */ +size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /**< note: fails if cdict==NULL */ +size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize); /* compression parameters are already set within cdict. pledgedSrcSize=0 means null-size */ size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**< note: if pledgedSrcSize can be 0, indicating unknown size. if it is non-zero, it must be accurate. for 0 size frames, use compressBegin_advanced */ -size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize); /**< note: if pledgedSrcSize can be 0, indicating unknown size. if it is non-zero, it must be accurate. for 0 size frames, use compressBegin_advanced */ -size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
A ZSTD_DCtx object is required to track streaming operations.
@@ -640,19 +656,20 @@ ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
- Compressing and decompressing require a context structure
+ Use ZSTD_createCCtx() and ZSTD_createDCtx()
- It is necessary to init context before starting
- + compression : ZSTD_compressBegin()
- + decompression : ZSTD_decompressBegin()
- + variants _usingDict() are also allowed
- + copyCCtx() and copyDCtx() work too
- - Block size is limited, it must be <= ZSTD_getBlockSizeMax()
- + If you need to compress more, cut data into multiple blocks
- + Consider using the regular ZSTD_compress() instead, as frame metadata costs become negligible when source size is large.
+ + compression : any ZSTD_compressBegin*() variant, including with dictionary
+ + decompression : any ZSTD_decompressBegin*() variant, including with dictionary
+ + copyCCtx() and copyDCtx() can be used too
+ - Block size is limited, it must be <= ZSTD_getBlockSizeMax() <= ZSTD_BLOCKSIZE_ABSOLUTEMAX
+ + If input is larger than a block size, it's necessary to split input data into multiple blocks
+ + For inputs larger than a single block size, consider using the regular ZSTD_compress() instead.
+ Frame metadata is not that costly, and quickly becomes negligible as source size grows larger.
- When a block is considered not compressible enough, ZSTD_compressBlock() result will be zero.
In which case, nothing is produced into `dst`.
+ User must test for such outcome and deal directly with uncompressed data
+ ZSTD_decompressBlock() doesn't accept uncompressed data as input !!!
- + In case of multiple successive blocks, decoder must be informed of uncompressed block existence to follow proper history.
- Use ZSTD_insertBlock() in such a case.
+ + In case of multiple successive blocks, should some of them be uncompressed,
+ decoder must be informed of their existence in order to follow proper history.
+ Use ZSTD_insertBlock() for such a case.
size_t ZSTD_getBlockSizeMax(ZSTD_CCtx* cctx);
diff --git a/examples/simple_compression.c b/examples/simple_compression.c
index 9d448712ec34..ab1131475575 100644
--- a/examples/simple_compression.c
+++ b/examples/simple_compression.c
@@ -116,7 +116,6 @@ static char* createOutFilename_orDie(const char* filename)
int main(int argc, const char** argv)
{
const char* const exeName = argv[0];
- const char* const inFilename = argv[1];
if (argc!=2) {
printf("wrong arguments\n");
@@ -125,6 +124,8 @@ int main(int argc, const char** argv)
return 1;
}
+ const char* const inFilename = argv[1];
+
char* const outFilename = createOutFilename_orDie(inFilename);
compress_orDie(inFilename, outFilename);
free(outFilename);
diff --git a/examples/streaming_compression.c b/examples/streaming_compression.c
index 4c2c1a1d8bc7..24ad15bd614c 100644
--- a/examples/streaming_compression.c
+++ b/examples/streaming_compression.c
@@ -112,7 +112,6 @@ static const char* createOutFilename_orDie(const char* filename)
int main(int argc, const char** argv)
{
const char* const exeName = argv[0];
- const char* const inFilename = argv[1];
if (argc!=2) {
printf("wrong arguments\n");
@@ -121,6 +120,8 @@ int main(int argc, const char** argv)
return 1;
}
+ const char* const inFilename = argv[1];
+
const char* const outFilename = createOutFilename_orDie(inFilename);
compressFile_orDie(inFilename, outFilename, 1);
diff --git a/examples/streaming_decompression.c b/examples/streaming_decompression.c
index 400aa673d64b..bb2d80987081 100644
--- a/examples/streaming_decompression.c
+++ b/examples/streaming_decompression.c
@@ -99,7 +99,6 @@ static void decompressFile_orDie(const char* fname)
int main(int argc, const char** argv)
{
const char* const exeName = argv[0];
- const char* const inFilename = argv[1];
if (argc!=2) {
fprintf(stderr, "wrong arguments\n");
@@ -108,6 +107,8 @@ int main(int argc, const char** argv)
return 1;
}
+ const char* const inFilename = argv[1];
+
decompressFile_orDie(inFilename);
return 0;
}
diff --git a/lib/Makefile b/lib/Makefile
index 197fdeeea033..d8d8e179d205 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -71,6 +71,9 @@ libzstd.a: $(ZSTD_OBJ)
@echo compiling static library
@$(AR) $(ARFLAGS) $@ $^
+libzstd.a-mt: CPPFLAGS += -DZSTD_MULTHREAD
+libzstd.a-mt: libzstd.a
+
$(LIBZSTD): LDFLAGS += -shared -fPIC -fvisibility=hidden
$(LIBZSTD): $(ZSTD_FILES)
@echo compiling dynamic library $(LIBVER)
@@ -86,10 +89,17 @@ endif
libzstd : $(LIBZSTD)
+libzstd-mt : CPPFLAGS += -DZSTD_MULTITHREAD
+libzstd-mt : libzstd
+
lib: libzstd.a libzstd
-lib-release: DEBUGFLAGS :=
+lib-mt: CPPFLAGS += -DZSTD_MULTITHREAD
+lib-mt: lib
+
+lib-release lib-release-mt: DEBUGFLAGS :=
lib-release: lib
+lib-release-mt: lib-mt
clean:
@$(RM) -r *.dSYM # Mac OS-X specific
diff --git a/lib/README.md b/lib/README.md
index 3357e3d87096..79b6fd50014d 100644
--- a/lib/README.md
+++ b/lib/README.md
@@ -22,6 +22,14 @@ Some additional API may be useful if you're looking into advanced features :
They are not "stable", their definition may change in the future.
Only static linking is allowed.
+#### ZSTDMT API
+
+To enable multithreaded compression within the library, invoke `make lib-mt` target.
+Prototypes are defined in header file `compress/zstdmt_compress.h`.
+When linking a program that uses ZSTDMT API against libzstd.a on a POSIX system,
+`-pthread` flag must be provided to the compiler and linker.
+Note : ZSTDMT prototypes can still be used with a library built without multithread support,
+but in this case, they will be single threaded only.
#### Modular build
diff --git a/lib/common/bitstream.h b/lib/common/bitstream.h
index d3873002ebd1..ca42850df324 100644
--- a/lib/common/bitstream.h
+++ b/lib/common/bitstream.h
@@ -2,7 +2,7 @@
bitstream
Part of FSE library
header file (to include)
- Copyright (C) 2013-2016, Yann Collet.
+ Copyright (C) 2013-2017, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
@@ -53,6 +53,16 @@ extern "C" {
#include "error_private.h" /* error codes and messages */
+/*-*************************************
+* Debug
+***************************************/
+#if defined(BIT_DEBUG) && (BIT_DEBUG>=1)
+# include
+#else
+# define assert(condition) ((void)0)
+#endif
+
+
/*=========================================
* Target specific
=========================================*/
@@ -74,7 +84,7 @@ extern "C" {
typedef struct
{
size_t bitContainer;
- int bitPos;
+ unsigned bitPos;
char* startPtr;
char* ptr;
char* endPtr;
@@ -112,6 +122,7 @@ typedef struct
unsigned bitsConsumed;
const char* ptr;
const char* start;
+ const char* limitPtr;
} BIT_DStream_t;
typedef enum { BIT_DStream_unfinished = 0,
@@ -163,7 +174,10 @@ MEM_STATIC unsigned BIT_highbit32 (register U32 val)
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
return 31 - __builtin_clz (val);
# else /* Software version */
- static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+ static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29,
+ 11, 14, 16, 18, 22, 25, 3, 30,
+ 8, 12, 20, 28, 15, 17, 24, 7,
+ 19, 27, 23, 6, 26, 5, 4, 31 };
U32 v = val;
v |= v >> 1;
v |= v >> 2;
@@ -175,31 +189,36 @@ MEM_STATIC unsigned BIT_highbit32 (register U32 val)
}
/*===== Local Constants =====*/
-static const unsigned BIT_mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF }; /* up to 26 bits */
+static const unsigned BIT_mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F,
+ 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF,
+ 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF,
+ 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF }; /* up to 26 bits */
/*-**************************************************************
* bitStream encoding
****************************************************************/
/*! BIT_initCStream() :
- * `dstCapacity` must be > sizeof(void*)
+ * `dstCapacity` must be > sizeof(size_t)
* @return : 0 if success,
otherwise an error code (can be tested using ERR_isError() ) */
-MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* startPtr, size_t dstCapacity)
+MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
+ void* startPtr, size_t dstCapacity)
{
bitC->bitContainer = 0;
bitC->bitPos = 0;
bitC->startPtr = (char*)startPtr;
bitC->ptr = bitC->startPtr;
- bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->ptr);
- if (dstCapacity <= sizeof(bitC->ptr)) return ERROR(dstSize_tooSmall);
+ bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer);
+ if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall);
return 0;
}
/*! BIT_addBits() :
can add up to 26 bits into `bitC`.
Does not check for register overflow ! */
-MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits)
+MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
+ size_t value, unsigned nbBits)
{
bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
bitC->bitPos += nbBits;
@@ -207,34 +226,42 @@ MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits)
/*! BIT_addBitsFast() :
* works only if `value` is _clean_, meaning all high bits above nbBits are 0 */
-MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits)
+MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
+ size_t value, unsigned nbBits)
{
+ assert((value>>nbBits) == 0);
bitC->bitContainer |= value << bitC->bitPos;
bitC->bitPos += nbBits;
}
/*! BIT_flushBitsFast() :
+ * assumption : bitContainer has not overflowed
* unsafe version; does not check buffer overflow */
MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
{
size_t const nbBytes = bitC->bitPos >> 3;
+ assert( bitC->bitPos <= (sizeof(bitC->bitContainer)*8) );
MEM_writeLEST(bitC->ptr, bitC->bitContainer);
bitC->ptr += nbBytes;
+ assert(bitC->ptr <= bitC->endPtr);
bitC->bitPos &= 7;
- bitC->bitContainer >>= nbBytes*8; /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */
+ bitC->bitContainer >>= nbBytes*8;
}
/*! BIT_flushBits() :
+ * assumption : bitContainer has not overflowed
* safe version; check for buffer overflow, and prevents it.
- * note : does not signal buffer overflow. This will be revealed later on using BIT_closeCStream() */
+ * note : does not signal buffer overflow.
+ * overflow will be revealed later on using BIT_closeCStream() */
MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
{
size_t const nbBytes = bitC->bitPos >> 3;
+ assert( bitC->bitPos <= (sizeof(bitC->bitContainer)*8) );
MEM_writeLEST(bitC->ptr, bitC->bitContainer);
bitC->ptr += nbBytes;
if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
bitC->bitPos &= 7;
- bitC->bitContainer >>= nbBytes*8; /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */
+ bitC->bitContainer >>= nbBytes*8;
}
/*! BIT_closeCStream() :
@@ -244,9 +271,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
{
BIT_addBitsFast(bitC, 1, 1); /* endMark */
BIT_flushBits(bitC);
-
- if (bitC->ptr >= bitC->endPtr) return 0; /* doesn't fit within authorized budget : cancel */
-
+ if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
}
@@ -264,15 +289,16 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
{
if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
+ bitD->start = (const char*)srcBuffer;
+ bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
+
if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */
- bitD->start = (const char*)srcBuffer;
bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
bitD->bitContainer = MEM_readLEST(bitD->ptr);
{ BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */
if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
} else {
- bitD->start = (const char*)srcBuffer;
bitD->ptr = bitD->start;
bitD->bitContainer = *(const BYTE*)(bitD->start);
switch(srcSize)
@@ -330,17 +356,18 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
#if defined(__BMI__) && defined(__GNUC__) /* experimental; fails if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8 */
return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
#else
- U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1;
- return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
+ U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
+ return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
#endif
}
/*! BIT_lookBitsFast() :
-* unsafe version; only works only if nbBits >= 1 */
+ * unsafe version; only works if nbBits >= 1 */
MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
{
- U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1;
- return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
+ U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
+ assert(nbBits >= 1);
+ return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
}
MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
@@ -365,6 +392,7 @@ MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
{
size_t const value = BIT_lookBitsFast(bitD, nbBits);
+ assert(nbBits >= 1);
BIT_skipBits(bitD, nbBits);
return value;
}
@@ -376,10 +404,10 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
if status == BIT_DStream_unfinished, internal register is filled with >= (sizeof(bitD->bitContainer)*8 - 7) bits */
MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
{
- if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should not happen => corruption detected */
- return BIT_DStream_overflow;
+ if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */
+ return BIT_DStream_overflow;
- if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) {
+ if (bitD->ptr >= bitD->limitPtr) {
bitD->ptr -= bitD->bitsConsumed >> 3;
bitD->bitsConsumed &= 7;
bitD->bitContainer = MEM_readLEST(bitD->ptr);
@@ -389,6 +417,7 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
return BIT_DStream_completed;
}
+ /* start < ptr < limitPtr */
{ U32 nbBytes = bitD->bitsConsumed >> 3;
BIT_DStream_status result = BIT_DStream_unfinished;
if (bitD->ptr - nbBytes < bitD->start) {
@@ -397,7 +426,7 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
}
bitD->ptr -= nbBytes;
bitD->bitsConsumed -= nbBytes*8;
- bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD) */
+ bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */
return result;
}
}
diff --git a/lib/common/error_private.c b/lib/common/error_private.c
index a0fa1724aee8..b3287245f1ee 100644
--- a/lib/common/error_private.c
+++ b/lib/common/error_private.c
@@ -29,7 +29,7 @@ const char* ERR_getErrorString(ERR_enum code)
case PREFIX(memory_allocation): return "Allocation error : not enough memory";
case PREFIX(stage_wrong): return "Operation not authorized at current processing stage";
case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
- case PREFIX(srcSize_wrong): return "Src size incorrect";
+ case PREFIX(srcSize_wrong): return "Src size is incorrect";
case PREFIX(corruption_detected): return "Corrupted block detected";
case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported";
@@ -37,6 +37,7 @@ const char* ERR_getErrorString(ERR_enum code)
case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small";
case PREFIX(dictionary_corrupted): return "Dictionary is corrupted";
case PREFIX(dictionary_wrong): return "Dictionary mismatch";
+ case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples";
case PREFIX(maxCode):
default: return notErrorCode;
}
diff --git a/lib/common/fse.h b/lib/common/fse.h
index baac39032675..6d5d41def19b 100644
--- a/lib/common/fse.h
+++ b/lib/common/fse.h
@@ -316,6 +316,10 @@ If there is an error, the function will return an error code, which can be teste
#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<2)?(maxTableLog-2):0)) )
+#define FSE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
@@ -550,9 +554,9 @@ MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U3
MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, U32 symbol)
{
- const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
+ FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
const U16* const stateTable = (const U16*)(statePtr->stateTable);
- U32 nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
+ U32 const nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
BIT_addBits(bitC, statePtr->value, nbBitsOut);
statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
}
diff --git a/lib/common/huf.h b/lib/common/huf.h
index e5572760a548..7873ca3d42a5 100644
--- a/lib/common/huf.h
+++ b/lib/common/huf.h
@@ -43,6 +43,21 @@ extern "C" {
#include /* size_t */
+/* *** library symbols visibility *** */
+/* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual,
+ * HUF symbols remain "private" (internal symbols for library only).
+ * Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */
+#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4)
+# define HUF_PUBLIC_API __attribute__ ((visibility ("default")))
+#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */
+# define HUF_PUBLIC_API __declspec(dllexport)
+#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1)
+# define HUF_PUBLIC_API __declspec(dllimport) /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */
+#else
+# define HUF_PUBLIC_API
+#endif
+
+
/* *** simple functions *** */
/**
HUF_compress() :
@@ -55,8 +70,8 @@ HUF_compress() :
if return == 1, srcData is a single repeated byte symbol (RLE compression).
if HUF_isError(return), compression failed (more details using HUF_getErrorName())
*/
-size_t HUF_compress(void* dst, size_t dstCapacity,
- const void* src, size_t srcSize);
+HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize);
/**
HUF_decompress() :
@@ -69,32 +84,42 @@ HUF_decompress() :
@return : size of regenerated data (== originalSize),
or an error code, which can be tested using HUF_isError()
*/
-size_t HUF_decompress(void* dst, size_t originalSize,
- const void* cSrc, size_t cSrcSize);
+HUF_PUBLIC_API size_t HUF_decompress(void* dst, size_t originalSize,
+ const void* cSrc, size_t cSrcSize);
/* *** Tool functions *** */
-#define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */
-size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */
+#define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */
+HUF_PUBLIC_API size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */
/* Error Management */
-unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */
-const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */
+HUF_PUBLIC_API unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */
+HUF_PUBLIC_API const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */
/* *** Advanced function *** */
/** HUF_compress2() :
- * Same as HUF_compress(), but offers direct control over `maxSymbolValue` and `tableLog` .
- * `tableLog` must be `<= HUF_TABLELOG_MAX` . */
-size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
+ * Same as HUF_compress(), but offers direct control over `maxSymbolValue` and `tableLog`.
+ * `tableLog` must be `<= HUF_TABLELOG_MAX` . */
+HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
/** HUF_compress4X_wksp() :
-* Same as HUF_compress2(), but uses externally allocated `workSpace`, which must be a table of >= 1024 unsigned */
-size_t HUF_compress4X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
+ * Same as HUF_compress2(), but uses externally allocated `workSpace`.
+ * `workspace` must have minimum alignment of 4, and be at least as large as following macro */
+#define HUF_WORKSPACE_SIZE (6 << 10)
+#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32))
+HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
+/* ******************************************************************
+ * WARNING !!
+ * The following section contains advanced and experimental definitions
+ * which shall never be used in the context of dll
+ * because they are not guaranteed to remain stable in the future.
+ * Only consider them in association with static linking.
+ *******************************************************************/
#ifdef HUF_STATIC_LINKING_ONLY
/* *** Dependencies *** */
@@ -117,12 +142,14 @@ size_t HUF_compress4X_wksp (void* dst, size_t dstSize, const void* src, size_t s
******************************************/
/* HUF buffer bounds */
#define HUF_CTABLEBOUND 129
-#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8) /* only true if incompressible pre-filtered with fast heuristic */
+#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8) /* only true when incompressible is pre-filtered with fast heuristic */
#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
/* static allocation of HUF's Compression Table */
+#define HUF_CTABLE_SIZE_U32(maxSymbolValue) ((maxSymbolValue)+1) /* Use tables of U32, for proper alignment */
+#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32))
#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
- U32 name##hb[maxSymbolValue+1]; \
+ U32 name##hb[HUF_CTABLE_SIZE_U32(maxSymbolValue)]; \
void* name##hv = &(name##hb); \
HUF_CElt* name = (HUF_CElt*)(name##hv) /* no final ; */
@@ -134,10 +161,6 @@ typedef U32 HUF_DTable;
#define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \
HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog) * 0x01000001) }
-/* The workspace must have alignment at least 4 and be at least this large */
-#define HUF_WORKSPACE_SIZE (6 << 10)
-#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32))
-
/* ****************************************
* Advanced decompression functions
diff --git a/lib/common/mem.h b/lib/common/mem.h
index 3cacd216aa02..4773a8b9309e 100644
--- a/lib/common/mem.h
+++ b/lib/common/mem.h
@@ -89,8 +89,7 @@ MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (size
#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
# define MEM_FORCE_MEMORY_ACCESS 2
-# elif defined(__INTEL_COMPILER) /*|| defined(_MSC_VER)*/ || \
- (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+# elif defined(__INTEL_COMPILER) || defined(__GNUC__)
# define MEM_FORCE_MEMORY_ACCESS 1
# endif
#endif
@@ -122,7 +121,7 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; }
/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
/* currently only defined for gcc and icc */
#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
- __pragma( pack(push, 1) )
+ __pragma( pack(push, 1) )
typedef union { U16 u16; U32 u32; U64 u64; size_t st; } unalign;
__pragma( pack(pop) )
#else
diff --git a/lib/common/zstd_errors.h b/lib/common/zstd_errors.h
index 949dbd0fffac..3d579d969363 100644
--- a/lib/common/zstd_errors.h
+++ b/lib/common/zstd_errors.h
@@ -57,6 +57,7 @@ typedef enum {
ZSTD_error_maxSymbolValue_tooSmall,
ZSTD_error_dictionary_corrupted,
ZSTD_error_dictionary_wrong,
+ ZSTD_error_dictionaryCreation_failed,
ZSTD_error_maxCode
} ZSTD_ErrorCode;
diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h
index 5c5b28732975..2533333ba83c 100644
--- a/lib/common/zstd_internal.h
+++ b/lib/common/zstd_internal.h
@@ -16,9 +16,9 @@
#ifdef _MSC_VER /* Visual Studio */
# define FORCE_INLINE static __forceinline
# include /* For Visual 2005 */
+# pragma warning(disable : 4100) /* disable: C4100: unreferenced formal parameter */
# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
# pragma warning(disable : 4324) /* disable: C4324: padded structure */
-# pragma warning(disable : 4100) /* disable: C4100: unreferenced formal parameter */
#else
# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
# ifdef __GNUC__
@@ -58,6 +58,8 @@
/*-*************************************
* shared macros
***************************************/
+#undef MIN
+#undef MAX
#define MIN(a,b) ((a)<(b) ? (a) : (b))
#define MAX(a,b) ((a)>(b) ? (a) : (b))
#define CHECK_F(f) { size_t const errcod = f; if (ERR_isError(errcod)) return errcod; } /* check and Forward error code */
@@ -104,7 +106,6 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy
#define LONGNBSEQ 0x7F00
#define MINMATCH 3
-#define EQUAL_READ32 4
#define Litbits 8
#define MaxLit ((1< FSE_MAX_TABLELOG) return ERROR(GENERIC); /* Unsupported */
+ if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported */
if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */
if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
@@ -476,20 +476,20 @@ void FSE_freeCTable (FSE_CTable* ct) { free(ct); }
/* provides the minimum logSize to safely represent a distribution */
static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
{
- U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1;
- U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
- U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
- return minBits;
+ U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1;
+ U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
+ U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
+ return minBits;
}
unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
{
- U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
+ U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
U32 tableLog = maxTableLog;
- U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
+ U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
- if (maxBitsSrc < tableLog) tableLog = maxBitsSrc; /* Accuracy can be reduced */
- if (minBits > tableLog) tableLog = minBits; /* Need a minimum to safely represent all symbol values */
+ if (maxBitsSrc < tableLog) tableLog = maxBitsSrc; /* Accuracy can be reduced */
+ if (minBits > tableLog) tableLog = minBits; /* Need a minimum to safely represent all symbol values */
if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG;
if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG;
return tableLog;
@@ -808,7 +808,7 @@ size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t src
if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
/* Scan input and build symbol stats */
- { CHECK_V_F(maxCount, FSE_count(count, &maxSymbolValue, src, srcSize) );
+ { CHECK_V_F(maxCount, FSE_count_wksp(count, &maxSymbolValue, src, srcSize, (unsigned*)scratchBuffer) );
if (maxCount == srcSize) return 1; /* only a single symbol in src : rle */
if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index 450e5970a4a5..c08b315dab93 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -20,6 +20,26 @@
#include "zstd_internal.h" /* includes zstd.h */
+/*-*************************************
+* Debug
+***************************************/
+#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=1)
+# include
+#else
+# define assert(condition) ((void)0)
+#endif
+
+#define ZSTD_STATIC_ASSERT(c) { enum { ZSTD_static_assert = 1/(int)(!!(c)) }; }
+
+#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2)
+# include
+ static unsigned g_debugLevel = ZSTD_DEBUG;
+# define DEBUGLOG(l, ...) if (l<=g_debugLevel) { fprintf(stderr, __FILE__ ": "); fprintf(stderr, __VA_ARGS__); fprintf(stderr, " \n"); }
+#else
+# define DEBUGLOG(l, ...) {} /* disabled */
+#endif
+
+
/*-*************************************
* Constants
***************************************/
@@ -27,12 +47,22 @@ static const U32 g_searchStrength = 8; /* control skip over incompressible dat
#define HASH_READ_SIZE 8
typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
+/* entropy tables always have same size */
+static size_t const hufCTable_size = HUF_CTABLE_SIZE(255);
+static size_t const litlengthCTable_size = FSE_CTABLE_SIZE(LLFSELog, MaxLL);
+static size_t const offcodeCTable_size = FSE_CTABLE_SIZE(OffFSELog, MaxOff);
+static size_t const matchlengthCTable_size = FSE_CTABLE_SIZE(MLFSELog, MaxML);
+static size_t const entropyScratchSpace_size = HUF_WORKSPACE_SIZE;
+
/*-*************************************
* Helper functions
***************************************/
-#define ZSTD_STATIC_ASSERT(c) { enum { ZSTD_static_assert = 1/(int)(!!(c)) }; }
-size_t ZSTD_compressBound(size_t srcSize) { return FSE_compressBound(srcSize) + 12; }
+size_t ZSTD_compressBound(size_t srcSize) {
+ size_t const lowLimit = 256 KB;
+ size_t const margin = (srcSize < lowLimit) ? (lowLimit-srcSize) >> 12 : 0; /* from 64 to 0 */
+ return srcSize + (srcSize >> 8) + margin;
+}
/*-*************************************
@@ -70,6 +100,7 @@ struct ZSTD_CCtx_s {
size_t workSpaceSize;
size_t blockSize;
U64 frameContentSize;
+ U64 consumedSrcSize;
XXH64_state_t xxhState;
ZSTD_customMem customMem;
@@ -77,13 +108,13 @@ struct ZSTD_CCtx_s {
U32* hashTable;
U32* hashTable3;
U32* chainTable;
- HUF_CElt* hufTable;
- U32 flagStaticTables;
- HUF_repeat flagStaticHufTable;
- FSE_CTable offcodeCTable [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
- FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
- FSE_CTable litlengthCTable [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
- unsigned tmpCounters[HUF_WORKSPACE_SIZE_U32];
+ HUF_repeat hufCTable_repeatMode;
+ HUF_CElt* hufCTable;
+ U32 fseCTables_ready;
+ FSE_CTable* offcodeCTable;
+ FSE_CTable* matchlengthCTable;
+ FSE_CTable* litlengthCTable;
+ unsigned* entropyScratchSpace;
};
ZSTD_CCtx* ZSTD_createCCtx(void)
@@ -150,9 +181,7 @@ size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
CLAMPCHECK(cParams.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX);
CLAMPCHECK(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
CLAMPCHECK(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
- { U32 const searchLengthMin = ((cParams.strategy == ZSTD_fast) | (cParams.strategy == ZSTD_greedy)) ? ZSTD_SEARCHLENGTH_MIN+1 : ZSTD_SEARCHLENGTH_MIN;
- U32 const searchLengthMax = (cParams.strategy == ZSTD_fast) ? ZSTD_SEARCHLENGTH_MAX : ZSTD_SEARCHLENGTH_MAX-1;
- CLAMPCHECK(cParams.searchLength, searchLengthMin, searchLengthMax); }
+ CLAMPCHECK(cParams.searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX);
CLAMPCHECK(cParams.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX);
if ((U32)(cParams.strategy) > (U32)ZSTD_btopt2) return ERROR(compressionParameter_unsupported);
return 0;
@@ -206,11 +235,14 @@ size_t ZSTD_estimateCCtxSize(ZSTD_compressionParameters cParams)
size_t const hSize = ((size_t)1) << cParams.hashLog;
U32 const hashLog3 = (cParams.searchLength>3) ? 0 : MIN(ZSTD_HASHLOG3_MAX, cParams.windowLog);
size_t const h3Size = ((size_t)1) << hashLog3;
+ size_t const entropySpace = hufCTable_size + litlengthCTable_size
+ + offcodeCTable_size + matchlengthCTable_size
+ + entropyScratchSpace_size;
size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
size_t const optSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<nextSrc - cctx->base);
cctx->params = params;
cctx->frameContentSize = frameContentSize;
+ cctx->consumedSrcSize = 0;
cctx->lowLimit = end;
cctx->dictLimit = end;
cctx->nextToUpdate = end+1;
@@ -246,16 +279,16 @@ static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_parameters params, U64 fra
typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset, ZSTDcrp_fullReset } ZSTD_compResetPolicy_e;
-/*! ZSTD_resetCCtx_advanced() :
+/*! ZSTD_resetCCtx_internal() :
note : `params` must be validated */
-static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
+static size_t ZSTD_resetCCtx_internal (ZSTD_CCtx* zc,
ZSTD_parameters params, U64 frameContentSize,
ZSTD_compResetPolicy_e const crp)
{
if (crp == ZSTDcrp_continue)
if (ZSTD_equivalentParams(params, zc->params)) {
- zc->flagStaticTables = 0;
- zc->flagStaticHufTable = HUF_repeat_none;
+ zc->fseCTables_ready = 0;
+ zc->hufCTable_repeatMode = HUF_repeat_none;
return ZSTD_continueCCtx(zc, params, frameContentSize);
}
@@ -271,41 +304,67 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
void* ptr;
/* Check if workSpace is large enough, alloc a new one if needed */
- { size_t const optSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<workSpaceSize < neededSpace) {
+ zc->workSpaceSize = 0;
ZSTD_free(zc->workSpace, zc->customMem);
zc->workSpace = ZSTD_malloc(neededSpace, zc->customMem);
if (zc->workSpace == NULL) return ERROR(memory_allocation);
zc->workSpaceSize = neededSpace;
+ ptr = zc->workSpace;
+
+ /* entropy space */
+ zc->hufCTable = (HUF_CElt*)ptr;
+ ptr = (char*)zc->hufCTable + hufCTable_size; /* note : HUF_CElt* is incomplete type, size is estimated via macro */
+ zc->offcodeCTable = (FSE_CTable*) ptr;
+ ptr = (char*)ptr + offcodeCTable_size;
+ zc->matchlengthCTable = (FSE_CTable*) ptr;
+ ptr = (char*)ptr + matchlengthCTable_size;
+ zc->litlengthCTable = (FSE_CTable*) ptr;
+ ptr = (char*)ptr + litlengthCTable_size;
+ assert(((size_t)ptr & 3) == 0); /* ensure correct alignment */
+ zc->entropyScratchSpace = (unsigned*) ptr;
} }
- if (crp!=ZSTDcrp_noMemset) memset(zc->workSpace, 0, tableSpace); /* reset tables only */
- XXH64_reset(&zc->xxhState, 0);
- zc->hashLog3 = hashLog3;
- zc->hashTable = (U32*)(zc->workSpace);
- zc->chainTable = zc->hashTable + hSize;
- zc->hashTable3 = zc->chainTable + chainSize;
- ptr = zc->hashTable3 + h3Size;
- zc->hufTable = (HUF_CElt*)ptr;
- zc->flagStaticTables = 0;
- zc->flagStaticHufTable = HUF_repeat_none;
- ptr = ((U32*)ptr) + 256; /* note : HUF_CElt* is incomplete type, size is simulated using U32 */
+ /* init params */
+ zc->params = params;
+ zc->blockSize = blockSize;
+ zc->frameContentSize = frameContentSize;
+ zc->consumedSrcSize = 0;
+ XXH64_reset(&zc->xxhState, 0);
+ zc->stage = ZSTDcs_init;
+ zc->dictID = 0;
+ zc->loadedDictEnd = 0;
+ zc->fseCTables_ready = 0;
+ zc->hufCTable_repeatMode = HUF_repeat_none;
zc->nextToUpdate = 1;
zc->nextSrc = NULL;
zc->base = NULL;
zc->dictBase = NULL;
zc->dictLimit = 0;
zc->lowLimit = 0;
- zc->params = params;
- zc->blockSize = blockSize;
- zc->frameContentSize = frameContentSize;
{ int i; for (i=0; irep[i] = repStartValue[i]; }
+ zc->hashLog3 = hashLog3;
+ zc->seqStore.litLengthSum = 0;
+
+ /* ensure entropy tables are close together at the beginning */
+ assert((void*)zc->hufCTable == zc->workSpace);
+ assert((char*)zc->offcodeCTable == (char*)zc->hufCTable + hufCTable_size);
+ assert((char*)zc->matchlengthCTable == (char*)zc->offcodeCTable + offcodeCTable_size);
+ assert((char*)zc->litlengthCTable == (char*)zc->matchlengthCTable + matchlengthCTable_size);
+ assert((char*)zc->entropyScratchSpace == (char*)zc->litlengthCTable + litlengthCTable_size);
+ ptr = (char*)zc->entropyScratchSpace + entropyScratchSpace_size;
+ /* opt parser space */
if ((params.cParams.strategy == ZSTD_btopt) || (params.cParams.strategy == ZSTD_btopt2)) {
+ assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
zc->seqStore.litFreq = (U32*)ptr;
zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (MaxLL+1);
@@ -315,8 +374,17 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
ptr = zc->seqStore.matchTable + ZSTD_OPT_NUM+1;
zc->seqStore.priceTable = (ZSTD_optimal_t*)ptr;
ptr = zc->seqStore.priceTable + ZSTD_OPT_NUM+1;
- zc->seqStore.litLengthSum = 0;
}
+
+ /* table Space */
+ if (crp!=ZSTDcrp_noMemset) memset(ptr, 0, tableSpace); /* reset tables only */
+ assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
+ zc->hashTable = (U32*)(ptr);
+ zc->chainTable = zc->hashTable + hSize;
+ zc->hashTable3 = zc->chainTable + chainSize;
+ ptr = zc->hashTable3 + h3Size;
+
+ /* sequences storage */
zc->seqStore.sequencesStart = (seqDef*)ptr;
ptr = zc->seqStore.sequencesStart + maxNbSeq;
zc->seqStore.llCode = (BYTE*) ptr;
@@ -324,10 +392,6 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
zc->seqStore.ofCode = zc->seqStore.mlCode + maxNbSeq;
zc->seqStore.litStart = zc->seqStore.ofCode + maxNbSeq;
- zc->stage = ZSTDcs_init;
- zc->dictID = 0;
- zc->loadedDictEnd = 0;
-
return 0;
}
}
@@ -341,27 +405,32 @@ void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) {
for (i=0; irep[i] = 0;
}
-/*! ZSTD_copyCCtx() :
-* Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
-* Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
-* @return : 0, or an error code */
-size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize)
+
+/*! ZSTD_copyCCtx_internal() :
+ * Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
+ * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
+ * pledgedSrcSize=0 means "empty" if fParams.contentSizeFlag=1
+ * @return : 0, or an error code */
+size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx,
+ ZSTD_frameParameters fParams, unsigned long long pledgedSrcSize)
{
if (srcCCtx->stage!=ZSTDcs_init) return ERROR(stage_wrong);
-
memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));
{ ZSTD_parameters params = srcCCtx->params;
- params.fParams.contentSizeFlag = (pledgedSrcSize > 0);
- ZSTD_resetCCtx_advanced(dstCCtx, params, pledgedSrcSize, ZSTDcrp_noMemset);
+ params.fParams = fParams;
+ DEBUGLOG(5, "ZSTD_resetCCtx_internal : dictIDFlag : %u \n", !fParams.noDictIDFlag);
+ ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize, ZSTDcrp_noMemset);
}
/* copy tables */
{ size_t const chainSize = (srcCCtx->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << srcCCtx->params.cParams.chainLog);
- size_t const hSize = ((size_t)1) << srcCCtx->params.cParams.hashLog;
+ size_t const hSize = (size_t)1 << srcCCtx->params.cParams.hashLog;
size_t const h3Size = (size_t)1 << srcCCtx->hashLog3;
size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
- memcpy(dstCCtx->workSpace, srcCCtx->workSpace, tableSpace);
+ assert((U32*)dstCCtx->chainTable == (U32*)dstCCtx->hashTable + hSize); /* chainTable must follow hashTable */
+ assert((U32*)dstCCtx->hashTable3 == (U32*)dstCCtx->chainTable + chainSize);
+ memcpy(dstCCtx->hashTable, srcCCtx->hashTable, tableSpace); /* presumes all tables follow each other */
}
/* copy dictionary offsets */
@@ -376,23 +445,36 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long
dstCCtx->dictID = srcCCtx->dictID;
/* copy entropy tables */
- dstCCtx->flagStaticTables = srcCCtx->flagStaticTables;
- dstCCtx->flagStaticHufTable = srcCCtx->flagStaticHufTable;
- if (srcCCtx->flagStaticTables) {
- memcpy(dstCCtx->litlengthCTable, srcCCtx->litlengthCTable, sizeof(dstCCtx->litlengthCTable));
- memcpy(dstCCtx->matchlengthCTable, srcCCtx->matchlengthCTable, sizeof(dstCCtx->matchlengthCTable));
- memcpy(dstCCtx->offcodeCTable, srcCCtx->offcodeCTable, sizeof(dstCCtx->offcodeCTable));
+ dstCCtx->fseCTables_ready = srcCCtx->fseCTables_ready;
+ if (srcCCtx->fseCTables_ready) {
+ memcpy(dstCCtx->litlengthCTable, srcCCtx->litlengthCTable, litlengthCTable_size);
+ memcpy(dstCCtx->matchlengthCTable, srcCCtx->matchlengthCTable, matchlengthCTable_size);
+ memcpy(dstCCtx->offcodeCTable, srcCCtx->offcodeCTable, offcodeCTable_size);
}
- if (srcCCtx->flagStaticHufTable) {
- memcpy(dstCCtx->hufTable, srcCCtx->hufTable, 256*4);
+ dstCCtx->hufCTable_repeatMode = srcCCtx->hufCTable_repeatMode;
+ if (srcCCtx->hufCTable_repeatMode) {
+ memcpy(dstCCtx->hufCTable, srcCCtx->hufCTable, hufCTable_size);
}
return 0;
}
+/*! ZSTD_copyCCtx() :
+ * Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
+ * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
+ * pledgedSrcSize==0 means "unknown".
+* @return : 0, or an error code */
+size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize)
+{
+ ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
+ fParams.contentSizeFlag = pledgedSrcSize>0;
+
+ return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx, fParams, pledgedSrcSize);
+}
+
/*! ZSTD_reduceTable() :
-* reduce table indexes by `reducerValue` */
+ * reduce table indexes by `reducerValue` */
static void ZSTD_reduceTable (U32* const table, U32 const size, U32 const reducerValue)
{
U32 u;
@@ -499,26 +581,28 @@ static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc,
/* small ? don't even attempt compression (speed opt) */
# define LITERAL_NOENTROPY 63
- { size_t const minLitSize = zc->flagStaticHufTable == HUF_repeat_valid ? 6 : LITERAL_NOENTROPY;
+ { size_t const minLitSize = zc->hufCTable_repeatMode == HUF_repeat_valid ? 6 : LITERAL_NOENTROPY;
if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
}
if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */
- { HUF_repeat repeat = zc->flagStaticHufTable;
+ { HUF_repeat repeat = zc->hufCTable_repeatMode;
int const preferRepeat = zc->params.cParams.strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
- cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters, sizeof(zc->tmpCounters), zc->hufTable, &repeat, preferRepeat)
- : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters, sizeof(zc->tmpCounters), zc->hufTable, &repeat, preferRepeat);
+ cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
+ zc->entropyScratchSpace, entropyScratchSpace_size, zc->hufCTable, &repeat, preferRepeat)
+ : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
+ zc->entropyScratchSpace, entropyScratchSpace_size, zc->hufCTable, &repeat, preferRepeat);
if (repeat != HUF_repeat_none) { hType = set_repeat; } /* reused the existing table */
- else { zc->flagStaticHufTable = HUF_repeat_check; } /* now have a table to reuse */
+ else { zc->hufCTable_repeatMode = HUF_repeat_check; } /* now have a table to reuse */
}
if ((cLitSize==0) | (cLitSize >= srcSize - minGain)) {
- zc->flagStaticHufTable = HUF_repeat_none;
+ zc->hufCTable_repeatMode = HUF_repeat_none;
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
}
if (cLitSize==1) {
- zc->flagStaticHufTable = HUF_repeat_none;
+ zc->hufCTable_repeatMode = HUF_repeat_none;
return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
}
@@ -637,12 +721,12 @@ MEM_STATIC size_t ZSTD_compressSequences (ZSTD_CCtx* zc,
/* CTable for Literal Lengths */
{ U32 max = MaxLL;
- size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, zc->tmpCounters);
+ size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, zc->entropyScratchSpace);
if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
*op++ = llCodeTable[0];
FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
LLtype = set_rle;
- } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
+ } else if ((zc->fseCTables_ready) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
LLtype = set_repeat;
} else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LL_defaultNormLog-1)))) {
FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
@@ -653,7 +737,7 @@ MEM_STATIC size_t ZSTD_compressSequences (ZSTD_CCtx* zc,
if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; }
FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
{ size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
- if (FSE_isError(NCountSize)) return ERROR(GENERIC);
+ if (FSE_isError(NCountSize)) return NCountSize;
op += NCountSize; }
FSE_buildCTable_wksp(CTable_LitLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
LLtype = set_compressed;
@@ -661,12 +745,12 @@ MEM_STATIC size_t ZSTD_compressSequences (ZSTD_CCtx* zc,
/* CTable for Offsets */
{ U32 max = MaxOff;
- size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, zc->tmpCounters);
+ size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, zc->entropyScratchSpace);
if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
*op++ = ofCodeTable[0];
FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
Offtype = set_rle;
- } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
+ } else if ((zc->fseCTables_ready) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
Offtype = set_repeat;
} else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (OF_defaultNormLog-1)))) {
FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
@@ -677,7 +761,7 @@ MEM_STATIC size_t ZSTD_compressSequences (ZSTD_CCtx* zc,
if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; }
FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
{ size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
- if (FSE_isError(NCountSize)) return ERROR(GENERIC);
+ if (FSE_isError(NCountSize)) return NCountSize;
op += NCountSize; }
FSE_buildCTable_wksp(CTable_OffsetBits, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
Offtype = set_compressed;
@@ -685,12 +769,12 @@ MEM_STATIC size_t ZSTD_compressSequences (ZSTD_CCtx* zc,
/* CTable for MatchLengths */
{ U32 max = MaxML;
- size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, zc->tmpCounters);
+ size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, zc->entropyScratchSpace);
if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
*op++ = *mlCodeTable;
FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
MLtype = set_rle;
- } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
+ } else if ((zc->fseCTables_ready) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
MLtype = set_repeat;
} else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (ML_defaultNormLog-1)))) {
FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
@@ -701,14 +785,14 @@ MEM_STATIC size_t ZSTD_compressSequences (ZSTD_CCtx* zc,
if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; }
FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
{ size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
- if (FSE_isError(NCountSize)) return ERROR(GENERIC);
+ if (FSE_isError(NCountSize)) return NCountSize;
op += NCountSize; }
FSE_buildCTable_wksp(CTable_MatchLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
MLtype = set_compressed;
} }
*seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
- zc->flagStaticTables = 0;
+ zc->fseCTables_ready = 0;
/* Encoding Sequences */
{ BIT_CStream_t blockStream;
@@ -787,7 +871,7 @@ _check_compressibility:
{ size_t const minGain = ZSTD_minGain(srcSize);
size_t const maxCSize = srcSize - minGain;
if ((size_t)(op-ostart) >= maxCSize) {
- zc->flagStaticHufTable = HUF_repeat_none;
+ zc->hufCTable_repeatMode = HUF_repeat_none;
return 0;
} }
@@ -816,7 +900,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const v
const U32 pos = (U32)((const BYTE*)literals - g_start);
if (g_start==NULL) g_start = (const BYTE*)literals;
if ((pos > 1895000) && (pos < 1895300))
- fprintf(stderr, "Cpos %6u :%5u literals & match %3u bytes at distance %6u \n",
+ DEBUGLOG(5, "Cpos %6u :%5u literals & match %3u bytes at distance %6u \n",
pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode);
}
#endif
@@ -825,14 +909,20 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const v
seqStorePtr->lit += litLength;
/* literal Length */
- if (litLength>0xFFFF) { seqStorePtr->longLengthID = 1; seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); }
+ if (litLength>0xFFFF) {
+ seqStorePtr->longLengthID = 1;
+ seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
+ }
seqStorePtr->sequences[0].litLength = (U16)litLength;
/* match offset */
seqStorePtr->sequences[0].offset = offsetCode + 1;
/* match Length */
- if (matchCode>0xFFFF) { seqStorePtr->longLengthID = 2; seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); }
+ if (matchCode>0xFFFF) {
+ seqStorePtr->longLengthID = 2;
+ seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
+ }
seqStorePtr->sequences[0].matchLength = (U16)matchCode;
seqStorePtr->sequences++;
@@ -853,7 +943,14 @@ static unsigned ZSTD_NbCommonBytes (register size_t val)
# elif defined(__GNUC__) && (__GNUC__ >= 3)
return (__builtin_ctzll((U64)val) >> 3);
# else
- static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
+ static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
+ 0, 3, 1, 3, 1, 4, 2, 7,
+ 0, 2, 3, 6, 1, 5, 3, 5,
+ 1, 3, 4, 4, 2, 5, 6, 7,
+ 7, 0, 1, 2, 3, 3, 4, 6,
+ 2, 6, 5, 5, 3, 4, 5, 6,
+ 7, 1, 2, 4, 6, 4, 4, 5,
+ 7, 2, 6, 5, 7, 6, 7, 7 };
return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
# endif
} else { /* 32 bits */
@@ -864,7 +961,10 @@ static unsigned ZSTD_NbCommonBytes (register size_t val)
# elif defined(__GNUC__) && (__GNUC__ >= 3)
return (__builtin_ctz((U32)val) >> 3);
# else
- static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
+ static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
+ 3, 2, 2, 1, 3, 2, 0, 1,
+ 3, 3, 1, 2, 2, 2, 2, 0,
+ 3, 1, 2, 0, 1, 0, 1, 1 };
return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
# endif
}
@@ -936,7 +1036,7 @@ static size_t ZSTD_count_2segments(const BYTE* ip, const BYTE* match, const BYTE
***************************************/
static const U32 prime3bytes = 506832829U;
static U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes) >> (32-h) ; }
-MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
+MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
static const U32 prime4bytes = 2654435761U;
static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
@@ -1085,7 +1185,7 @@ static void ZSTD_compressBlock_fast(ZSTD_CCtx* ctx,
const U32 mls = ctx->params.cParams.searchLength;
switch(mls)
{
- default:
+ default: /* includes case 3 */
case 4 :
ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 4); return;
case 5 :
@@ -1135,7 +1235,7 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex))
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
- mLength = ZSTD_count_2segments(ip+1+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repMatchEnd, lowPrefixPtr) + EQUAL_READ32;
+ mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, lowPrefixPtr) + 4;
ip++;
ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
} else {
@@ -1147,7 +1247,7 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
{ const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend;
const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr;
U32 offset;
- mLength = ZSTD_count_2segments(ip+EQUAL_READ32, match+EQUAL_READ32, iend, matchEnd, lowPrefixPtr) + EQUAL_READ32;
+ mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, lowPrefixPtr) + 4;
while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
offset = current - matchIndex;
offset_2 = offset_1;
@@ -1171,7 +1271,7 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
- size_t repLength2 = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch2+EQUAL_READ32, iend, repEnd2, lowPrefixPtr) + EQUAL_READ32;
+ size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, lowPrefixPtr) + 4;
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH);
hashTable[ZSTD_hashPtr(ip, hBits, mls)] = current2;
@@ -1199,7 +1299,7 @@ static void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx,
U32 const mls = ctx->params.cParams.searchLength;
switch(mls)
{
- default:
+ default: /* includes case 3 */
case 4 :
ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 4); return;
case 5 :
@@ -1274,7 +1374,9 @@ void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx,
const BYTE* match = base + matchIndexS;
hashLong[h2] = hashSmall[h] = current; /* update hash tables */
- if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { /* note : by construction, offset_1 <= current */
+ assert(offset_1 <= current); /* supposed guaranteed by construction */
+ if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
+ /* favor repcode */
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
ip++;
ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
@@ -1285,15 +1387,15 @@ void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx,
offset = (U32)(ip-matchLong);
while (((ip>anchor) & (matchLong>lowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
} else if ( (matchIndexS > lowestIndex) && (MEM_read32(match) == MEM_read32(ip)) ) {
- size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
- U32 const matchIndex3 = hashLong[h3];
- const BYTE* match3 = base + matchIndex3;
- hashLong[h3] = current + 1;
- if ( (matchIndex3 > lowestIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
- mLength = ZSTD_count(ip+9, match3+8, iend) + 8;
+ size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
+ U32 const matchIndexL3 = hashLong[hl3];
+ const BYTE* matchL3 = base + matchIndexL3;
+ hashLong[hl3] = current + 1;
+ if ( (matchIndexL3 > lowestIndex) && (MEM_read64(matchL3) == MEM_read64(ip+1)) ) {
+ mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8;
ip++;
- offset = (U32)(ip-match3);
- while (((ip>anchor) & (match3>lowest)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
+ offset = (U32)(ip-matchL3);
+ while (((ip>anchor) & (matchL3>lowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
} else {
mLength = ZSTD_count(ip+4, match+4, iend) + 4;
offset = (U32)(ip-match);
@@ -1353,7 +1455,7 @@ static void ZSTD_compressBlock_doubleFast(ZSTD_CCtx* ctx, const void* src, size_
const U32 mls = ctx->params.cParams.searchLength;
switch(mls)
{
- default:
+ default: /* includes case 3 */
case 4 :
ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 4); return;
case 5 :
@@ -1462,8 +1564,8 @@ static void ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx,
if (ip <= ilimit) {
/* Fill Table */
- hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2;
- hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = current+2;
+ hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2;
+ hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = current+2;
hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
/* check immediate repcode */
@@ -1474,7 +1576,7 @@ static void ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx,
if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
- size_t const repLength2 = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch2+EQUAL_READ32, iend, repEnd2, lowPrefixPtr) + EQUAL_READ32;
+ size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, lowPrefixPtr) + 4;
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH);
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
@@ -1503,7 +1605,7 @@ static void ZSTD_compressBlock_doubleFast_extDict(ZSTD_CCtx* ctx,
U32 const mls = ctx->params.cParams.searchLength;
switch(mls)
{
- default:
+ default: /* includes case 3 */
case 4 :
ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 4); return;
case 5 :
@@ -1588,7 +1690,7 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co
match = dictBase + matchIndex;
matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
if (matchIndex+matchLength >= dictLimit)
- match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
+ match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
}
if (matchLength > bestLength) {
@@ -1667,7 +1769,7 @@ static size_t ZSTD_insertBtAndFindBestMatch (
match = dictBase + matchIndex;
matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
if (matchIndex+matchLength >= dictLimit)
- match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
+ match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
}
if (matchLength > bestLength) {
@@ -1733,9 +1835,10 @@ static size_t ZSTD_BtFindBestMatch_selectMLS (
{
switch(matchLengthSearch)
{
- default :
+ default : /* includes case 3 */
case 4 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
case 5 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
+ case 7 :
case 6 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
}
}
@@ -1772,9 +1875,10 @@ static size_t ZSTD_BtFindBestMatch_selectMLS_extDict (
{
switch(matchLengthSearch)
{
- default :
+ default : /* includes case 3 */
case 4 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
case 5 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
+ case 7 :
case 6 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
}
}
@@ -1831,7 +1935,7 @@ size_t ZSTD_HcFindBestMatch_generic (
const U32 current = (U32)(ip-base);
const U32 minChain = current > chainSize ? current - chainSize : 0;
int nbAttempts=maxNbAttempts;
- size_t ml=EQUAL_READ32-1;
+ size_t ml=4-1;
/* HC4 match finder */
U32 matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls);
@@ -1846,11 +1950,15 @@ size_t ZSTD_HcFindBestMatch_generic (
} else {
match = dictBase + matchIndex;
if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
- currentMl = ZSTD_count_2segments(ip+EQUAL_READ32, match+EQUAL_READ32, iLimit, dictEnd, prefixStart) + EQUAL_READ32;
+ currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
}
/* save best solution */
- if (currentMl > ml) { ml = currentMl; *offsetPtr = current - matchIndex + ZSTD_REP_MOVE; if (ip+currentMl == iLimit) break; /* best possible, and avoid read overflow*/ }
+ if (currentMl > ml) {
+ ml = currentMl;
+ *offsetPtr = current - matchIndex + ZSTD_REP_MOVE;
+ if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
+ }
if (matchIndex <= minChain) break;
matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
@@ -1868,9 +1976,10 @@ FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS (
{
switch(matchLengthSearch)
{
- default :
+ default : /* includes case 3 */
case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 0);
case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 0);
+ case 7 :
case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 0);
}
}
@@ -1884,9 +1993,10 @@ FORCE_INLINE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
{
switch(matchLengthSearch)
{
- default :
+ default : /* includes case 3 */
case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 1);
case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 1);
+ case 7 :
case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 1);
}
}
@@ -1934,7 +2044,7 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
/* check repCode */
if ((offset_1>0) & (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1))) {
/* repcode : we take it */
- matchLength = ZSTD_count(ip+1+EQUAL_READ32, ip+1+EQUAL_READ32-offset_1, iend) + EQUAL_READ32;
+ matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
if (depth==0) goto _storeSequence;
}
@@ -1945,7 +2055,7 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
matchLength = ml2, start = ip, offset=offsetFound;
}
- if (matchLength < EQUAL_READ32) {
+ if (matchLength < 4) {
ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */
continue;
}
@@ -1955,17 +2065,17 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
while (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
- size_t const mlRep = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_1, iend) + EQUAL_READ32;
+ size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
int const gain2 = (int)(mlRep * 3);
int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
- if ((mlRep >= EQUAL_READ32) && (gain2 > gain1))
+ if ((mlRep >= 4) && (gain2 > gain1))
matchLength = mlRep, offset = 0, start = ip;
}
{ size_t offset2=99999999;
size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
- if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
+ if ((ml2 >= 4) && (gain2 > gain1)) {
matchLength = ml2, offset = offset2, start = ip;
continue; /* search a better one */
} }
@@ -1974,17 +2084,17 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
if ((depth==2) && (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
- size_t const ml2 = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_1, iend) + EQUAL_READ32;
+ size_t const ml2 = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
int const gain2 = (int)(ml2 * 4);
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
- if ((ml2 >= EQUAL_READ32) && (gain2 > gain1))
+ if ((ml2 >= 4) && (gain2 > gain1))
matchLength = ml2, offset = 0, start = ip;
}
{ size_t offset2=99999999;
size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
- if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
+ if ((ml2 >= 4) && (gain2 > gain1)) {
matchLength = ml2, offset = offset2, start = ip;
continue;
} } }
@@ -1993,7 +2103,9 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
/* catch up */
if (offset) {
- while ((start>anchor) && (start>base+offset-ZSTD_REP_MOVE) && (start[-1] == start[-1-offset+ZSTD_REP_MOVE])) /* only search for offset within prefix */
+ while ( (start > anchor)
+ && (start > base+offset-ZSTD_REP_MOVE)
+ && (start[-1] == start[-1-offset+ZSTD_REP_MOVE]) ) /* only search for offset within prefix */
{ start--; matchLength++; }
offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
}
@@ -2010,7 +2122,7 @@ _storeSequence:
&& ((offset_2>0)
& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
/* store sequence */
- matchLength = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_2, iend) + EQUAL_READ32;
+ matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH);
ip += matchLength;
@@ -2099,7 +2211,7 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
/* repcode detected we should take it */
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
- matchLength = ZSTD_count_2segments(ip+1+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
+ matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4;
if (depth==0) goto _storeSequence;
} }
@@ -2110,7 +2222,7 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
matchLength = ml2, start = ip, offset=offsetFound;
}
- if (matchLength < EQUAL_READ32) {
+ if (matchLength < 4) {
ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */
continue;
}
@@ -2129,10 +2241,10 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
if (MEM_read32(ip) == MEM_read32(repMatch)) {
/* repcode detected */
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
- size_t const repLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
+ size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
int const gain2 = (int)(repLength * 3);
int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
- if ((repLength >= EQUAL_READ32) && (gain2 > gain1))
+ if ((repLength >= 4) && (gain2 > gain1))
matchLength = repLength, offset = 0, start = ip;
} }
@@ -2141,7 +2253,7 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
- if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
+ if ((ml2 >= 4) && (gain2 > gain1)) {
matchLength = ml2, offset = offset2, start = ip;
continue; /* search a better one */
} }
@@ -2159,10 +2271,10 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
if (MEM_read32(ip) == MEM_read32(repMatch)) {
/* repcode detected */
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
- size_t repLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
- int gain2 = (int)(repLength * 4);
- int gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
- if ((repLength >= EQUAL_READ32) && (gain2 > gain1))
+ size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
+ int const gain2 = (int)(repLength * 4);
+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
+ if ((repLength >= 4) && (gain2 > gain1))
matchLength = repLength, offset = 0, start = ip;
} }
@@ -2171,7 +2283,7 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
- if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
+ if ((ml2 >= 4) && (gain2 > gain1)) {
matchLength = ml2, offset = offset2, start = ip;
continue;
} } }
@@ -2203,7 +2315,7 @@ _storeSequence:
if (MEM_read32(ip) == MEM_read32(repMatch)) {
/* repcode detected we should take it */
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
- matchLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
+ matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */
ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH);
ip += matchLength;
@@ -2294,8 +2406,12 @@ typedef void (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t sr
static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict)
{
static const ZSTD_blockCompressor blockCompressor[2][8] = {
- { ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2, ZSTD_compressBlock_btopt, ZSTD_compressBlock_btopt2 },
- { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_doubleFast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict, ZSTD_compressBlock_btopt2_extDict }
+ { ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, ZSTD_compressBlock_greedy,
+ ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2,
+ ZSTD_compressBlock_btopt, ZSTD_compressBlock_btopt2 },
+ { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_doubleFast_extDict, ZSTD_compressBlock_greedy_extDict,
+ ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict,
+ ZSTD_compressBlock_btopt_extDict, ZSTD_compressBlock_btopt2_extDict }
};
return blockCompressor[extDict][(U32)strat];
@@ -2311,7 +2427,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCa
if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) return 0; /* don't even attempt compression below a certain srcSize */
ZSTD_resetSeqStore(&(zc->seqStore));
if (current > zc->nextToUpdate + 384)
- zc->nextToUpdate = current - MIN(192, (U32)(current - zc->nextToUpdate - 384)); /* update tree not updated after finding very long rep matches */
+ zc->nextToUpdate = current - MIN(192, (U32)(current - zc->nextToUpdate - 384)); /* limited update after finding a very long match */
blockCompressor(zc, src, srcSize);
return ZSTD_compressSequences(zc, dst, dstCapacity, srcSize);
}
@@ -2343,7 +2459,8 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
U32 const lastBlock = lastFrameChunk & (blockSize >= remaining);
size_t cSize;
- if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE) return ERROR(dstSize_tooSmall); /* not enough space to store compressed block */
+ if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE)
+ return ERROR(dstSize_tooSmall); /* not enough space to store compressed block */
if (remaining < blockSize) blockSize = remaining;
/* preemptive overflow correction */
@@ -2398,7 +2515,8 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
ZSTD_parameters params, U64 pledgedSrcSize, U32 dictID)
{ BYTE* const op = (BYTE*)dst;
- U32 const dictIDSizeCode = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */
+ U32 const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */
+ U32 const dictIDSizeCode = params.fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */
U32 const checksumFlag = params.fParams.checksumFlag>0;
U32 const windowSize = 1U << params.cParams.windowLog;
U32 const singleSegment = params.fParams.contentSizeFlag && (windowSize >= pledgedSrcSize);
@@ -2410,6 +2528,9 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
size_t pos;
if (dstCapacity < ZSTD_frameHeaderSize_max) return ERROR(dstSize_tooSmall);
+ DEBUGLOG(5, "ZSTD_writeFrameHeader : dictIDFlag : %u \n", !params.fParams.noDictIDFlag);
+ DEBUGLOG(5, "ZSTD_writeFrameHeader : dictID : %u \n", dictID);
+ DEBUGLOG(5, "ZSTD_writeFrameHeader : dictIDSizeCode : %u \n", dictIDSizeCode);
MEM_writeLE32(dst, ZSTD_MAGICNUMBER);
op[4] = frameHeaderDecriptionByte; pos=5;
@@ -2478,6 +2599,7 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
ZSTD_compress_generic (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize);
if (ZSTD_isError(cSize)) return cSize;
+ cctx->consumedSrcSize += srcSize;
return cSize + fhSize;
} else
return fhSize;
@@ -2488,7 +2610,7 @@ size_t ZSTD_compressContinue (ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize)
{
- return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1, 0);
+ return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */);
}
@@ -2501,10 +2623,12 @@ size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const
{
size_t const blockSizeMax = ZSTD_getBlockSizeMax(cctx);
if (srcSize > blockSizeMax) return ERROR(srcSize_wrong);
- return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0, 0);
+ return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */);
}
-
+/*! ZSTD_loadDictionaryContent() :
+ * @return : 0, or an error code
+ */
static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t srcSize)
{
const BYTE* const ip = (const BYTE*) src;
@@ -2534,13 +2658,15 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t
case ZSTD_greedy:
case ZSTD_lazy:
case ZSTD_lazy2:
- ZSTD_insertAndFindFirstIndex (zc, iend-HASH_READ_SIZE, zc->params.cParams.searchLength);
+ if (srcSize >= HASH_READ_SIZE)
+ ZSTD_insertAndFindFirstIndex(zc, iend-HASH_READ_SIZE, zc->params.cParams.searchLength);
break;
case ZSTD_btlazy2:
case ZSTD_btopt:
case ZSTD_btopt2:
- ZSTD_updateTree(zc, iend-HASH_READ_SIZE, iend, 1 << zc->params.cParams.searchLog, zc->params.cParams.searchLength);
+ if (srcSize >= HASH_READ_SIZE)
+ ZSTD_updateTree(zc, iend-HASH_READ_SIZE, iend, 1 << zc->params.cParams.searchLog, zc->params.cParams.searchLength);
break;
default:
@@ -2567,18 +2693,15 @@ static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSym
/* Dictionary format :
- Magic == ZSTD_DICT_MAGIC (4 bytes)
- HUF_writeCTable(256)
- FSE_writeNCount(off)
- FSE_writeNCount(ml)
- FSE_writeNCount(ll)
- RepOffsets
- Dictionary content
-*/
-/*! ZSTD_loadDictEntropyStats() :
- @return : size read from dictionary
- note : magic number supposed already checked */
-static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
+ * See :
+ * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format
+ */
+/*! ZSTD_loadZstdDictionary() :
+ * @return : 0, or an error code
+ * assumptions : magic number supposed already checked
+ * dictSize supposed > 8
+ */
+static size_t ZSTD_loadZstdDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
{
const BYTE* dictPtr = (const BYTE*)dict;
const BYTE* const dictEnd = dictPtr + dictSize;
@@ -2586,7 +2709,11 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* cctx, const void* dict, size_
unsigned offcodeMaxValue = MaxOff;
BYTE scratchBuffer[1<hufTable, 255, dict, dictSize);
+ dictPtr += 4; /* skip magic number */
+ cctx->dictID = cctx->params.fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr);
+ dictPtr += 4;
+
+ { size_t const hufHeaderSize = HUF_readCTable(cctx->hufCTable, 255, dictPtr, dictEnd-dictPtr);
if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted);
dictPtr += hufHeaderSize;
}
@@ -2596,7 +2723,8 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* cctx, const void* dict, size_
if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
/* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
- CHECK_E (FSE_buildCTable_wksp(cctx->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog, scratchBuffer, sizeof(scratchBuffer)), dictionary_corrupted);
+ CHECK_E( FSE_buildCTable_wksp(cctx->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog, scratchBuffer, sizeof(scratchBuffer)),
+ dictionary_corrupted);
dictPtr += offcodeHeaderSize;
}
@@ -2606,8 +2734,9 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* cctx, const void* dict, size_
if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
/* Every match length code must have non-zero probability */
- CHECK_F (ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML));
- CHECK_E (FSE_buildCTable_wksp(cctx->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, scratchBuffer, sizeof(scratchBuffer)), dictionary_corrupted);
+ CHECK_F( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML));
+ CHECK_E( FSE_buildCTable_wksp(cctx->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, scratchBuffer, sizeof(scratchBuffer)),
+ dictionary_corrupted);
dictPtr += matchlengthHeaderSize;
}
@@ -2617,49 +2746,51 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* cctx, const void* dict, size_
if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
/* Every literal length code must have non-zero probability */
- CHECK_F (ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL));
- CHECK_E(FSE_buildCTable_wksp(cctx->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, scratchBuffer, sizeof(scratchBuffer)), dictionary_corrupted);
+ CHECK_F( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL));
+ CHECK_E( FSE_buildCTable_wksp(cctx->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, scratchBuffer, sizeof(scratchBuffer)),
+ dictionary_corrupted);
dictPtr += litlengthHeaderSize;
}
if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
- cctx->rep[0] = MEM_readLE32(dictPtr+0); if (cctx->rep[0] == 0 || cctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
- cctx->rep[1] = MEM_readLE32(dictPtr+4); if (cctx->rep[1] == 0 || cctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
- cctx->rep[2] = MEM_readLE32(dictPtr+8); if (cctx->rep[2] == 0 || cctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
+ cctx->rep[0] = MEM_readLE32(dictPtr+0);
+ cctx->rep[1] = MEM_readLE32(dictPtr+4);
+ cctx->rep[2] = MEM_readLE32(dictPtr+8);
dictPtr += 12;
- { U32 offcodeMax = MaxOff;
- if ((size_t)(dictEnd - dictPtr) <= ((U32)-1) - 128 KB) {
- U32 const maxOffset = (U32)(dictEnd - dictPtr) + 128 KB; /* The maximum offset that must be supported */
- /* Calculate minimum offset code required to represent maxOffset */
- offcodeMax = ZSTD_highbit32(maxOffset);
+ { size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
+ U32 offcodeMax = MaxOff;
+ if (dictContentSize <= ((U32)-1) - 128 KB) {
+ U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */
+ offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */
}
- /* Every possible supported offset <= dictContentSize + 128 KB must be representable */
+ /* All offset values <= dictContentSize + 128 KB must be representable */
CHECK_F (ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)));
- }
+ /* All repCodes must be <= dictContentSize and != 0*/
+ { U32 u;
+ for (u=0; u<3; u++) {
+ if (cctx->rep[u] == 0) return ERROR(dictionary_corrupted);
+ if (cctx->rep[u] > dictContentSize) return ERROR(dictionary_corrupted);
+ } }
- cctx->flagStaticTables = 1;
- cctx->flagStaticHufTable = HUF_repeat_valid;
- return dictPtr - (const BYTE*)dict;
+ cctx->fseCTables_ready = 1;
+ cctx->hufCTable_repeatMode = HUF_repeat_valid;
+ return ZSTD_loadDictionaryContent(cctx, dictPtr, dictContentSize);
+ }
}
/** ZSTD_compress_insertDictionary() :
* @return : 0, or an error code */
-static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* dict, size_t dictSize)
+static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
{
if ((dict==NULL) || (dictSize<=8)) return 0;
/* dict as pure content */
- if ((MEM_readLE32(dict) != ZSTD_DICT_MAGIC) || (zc->forceRawDict))
- return ZSTD_loadDictionaryContent(zc, dict, dictSize);
- zc->dictID = zc->params.fParams.noDictIDFlag ? 0 : MEM_readLE32((const char*)dict+4);
+ if ((MEM_readLE32(dict) != ZSTD_DICT_MAGIC) || (cctx->forceRawDict))
+ return ZSTD_loadDictionaryContent(cctx, dict, dictSize);
- /* known magic number : dict is parsed for entropy stats and content */
- { size_t const loadError = ZSTD_loadDictEntropyStats(zc, (const char*)dict+8 /* skip dictHeader */, dictSize-8);
- size_t const eSize = loadError + 8;
- if (ZSTD_isError(loadError)) return loadError;
- return ZSTD_loadDictionaryContent(zc, (const char*)dict+eSize, dictSize-eSize);
- }
+ /* dict as zstd dictionary */
+ return ZSTD_loadZstdDictionary(cctx, dict, dictSize);
}
/*! ZSTD_compressBegin_internal() :
@@ -2669,7 +2800,8 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
ZSTD_parameters params, U64 pledgedSrcSize)
{
ZSTD_compResetPolicy_e const crp = dictSize ? ZSTDcrp_fullReset : ZSTDcrp_continue;
- CHECK_F(ZSTD_resetCCtx_advanced(cctx, params, pledgedSrcSize, crp));
+ assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
+ CHECK_F(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, crp));
return ZSTD_compress_insertDictionary(cctx, dict, dictSize);
}
@@ -2745,10 +2877,13 @@ size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,
const void* src, size_t srcSize)
{
size_t endResult;
- size_t const cSize = ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1, 1);
+ size_t const cSize = ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 1 /* last chunk */);
if (ZSTD_isError(cSize)) return cSize;
endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize);
if (ZSTD_isError(endResult)) return endResult;
+ if (cctx->params.fParams.contentSizeFlag) { /* control src size */
+ if (cctx->frameContentSize != cctx->consumedSrcSize) return ERROR(srcSize_wrong);
+ }
return cSize + endResult;
}
@@ -2773,7 +2908,8 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params);
}
-size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, int compressionLevel)
+size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize,
+ const void* dict, size_t dictSize, int compressionLevel)
{
ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, dict ? dictSize : 0);
params.fParams.contentSizeFlag = 1;
@@ -2812,8 +2948,16 @@ size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict)
return ZSTD_sizeof_CCtx(cdict->refContext) + (cdict->dictBuffer ? cdict->dictContentSize : 0) + sizeof(*cdict);
}
+static ZSTD_parameters ZSTD_makeParams(ZSTD_compressionParameters cParams, ZSTD_frameParameters fParams)
+{
+ ZSTD_parameters params;
+ params.cParams = cParams;
+ params.fParams = fParams;
+ return params;
+}
+
ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, unsigned byReference,
- ZSTD_parameters params, ZSTD_customMem customMem)
+ ZSTD_compressionParameters cParams, ZSTD_customMem customMem)
{
if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
if (!customMem.customAlloc || !customMem.customFree) return NULL;
@@ -2838,7 +2982,9 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, u
cdict->dictContent = internalBuffer;
}
- { size_t const errorCode = ZSTD_compressBegin_advanced(cctx, cdict->dictContent, dictSize, params, 0);
+ { ZSTD_frameParameters const fParams = { 0 /* contentSizeFlag */, 0 /* checksumFlag */, 0 /* noDictIDFlag */ }; /* dummy */
+ ZSTD_parameters const params = ZSTD_makeParams(cParams, fParams);
+ size_t const errorCode = ZSTD_compressBegin_advanced(cctx, cdict->dictContent, dictSize, params, 0);
if (ZSTD_isError(errorCode)) {
ZSTD_free(cdict->dictBuffer, customMem);
ZSTD_free(cdict, customMem);
@@ -2855,17 +3001,15 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, u
ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel)
{
ZSTD_customMem const allocator = { NULL, NULL, NULL };
- ZSTD_parameters params = ZSTD_getParams(compressionLevel, 0, dictSize);
- params.fParams.contentSizeFlag = 1;
- return ZSTD_createCDict_advanced(dict, dictSize, 0, params, allocator);
+ ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
+ return ZSTD_createCDict_advanced(dict, dictSize, 0, cParams, allocator);
}
ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel)
{
ZSTD_customMem const allocator = { NULL, NULL, NULL };
- ZSTD_parameters params = ZSTD_getParams(compressionLevel, 0, dictSize);
- params.fParams.contentSizeFlag = 1;
- return ZSTD_createCDict_advanced(dict, dictSize, 1, params, allocator);
+ ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
+ return ZSTD_createCDict_advanced(dict, dictSize, 1, cParams, allocator);
}
size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
@@ -2883,34 +3027,55 @@ static ZSTD_parameters ZSTD_getParamsFromCDict(const ZSTD_CDict* cdict) {
return ZSTD_getParamsFromCCtx(cdict->refContext);
}
-size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize)
+/* ZSTD_compressBegin_usingCDict_advanced() :
+ * cdict must be != NULL */
+size_t ZSTD_compressBegin_usingCDict_advanced(
+ ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict,
+ ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize)
{
- if (cdict->dictContentSize) CHECK_F(ZSTD_copyCCtx(cctx, cdict->refContext, pledgedSrcSize))
+ if (cdict==NULL) return ERROR(GENERIC); /* does not support NULL cdict */
+ DEBUGLOG(5, "ZSTD_compressBegin_usingCDict_advanced : dictIDFlag == %u \n", !fParams.noDictIDFlag);
+ if (cdict->dictContentSize)
+ CHECK_F( ZSTD_copyCCtx_internal(cctx, cdict->refContext, fParams, pledgedSrcSize) )
else {
ZSTD_parameters params = cdict->refContext->params;
- params.fParams.contentSizeFlag = (pledgedSrcSize > 0);
- CHECK_F(ZSTD_compressBegin_advanced(cctx, NULL, 0, params, pledgedSrcSize));
+ params.fParams = fParams;
+ CHECK_F(ZSTD_compressBegin_internal(cctx, NULL, 0, params, pledgedSrcSize));
}
return 0;
}
+/* ZSTD_compressBegin_usingCDict() :
+ * pledgedSrcSize=0 means "unknown"
+ * if pledgedSrcSize>0, it will enable contentSizeFlag */
+size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
+{
+ ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
+ DEBUGLOG(5, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u \n", !fParams.noDictIDFlag);
+ return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, 0);
+}
+
+size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
+ void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize,
+ const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)
+{
+ CHECK_F (ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize)); /* will check if cdict != NULL */
+ return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
+}
+
/*! ZSTD_compress_usingCDict() :
-* Compression using a digested Dictionary.
-* Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
-* Note that compression level is decided during dictionary creation */
+ * Compression using a digested Dictionary.
+ * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
+ * Note that compression parameters are decided at CDict creation time
+ * while frame parameters are hardcoded */
size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
const ZSTD_CDict* cdict)
{
- CHECK_F(ZSTD_compressBegin_usingCDict(cctx, cdict, srcSize));
-
- if (cdict->refContext->params.fParams.contentSizeFlag==1) {
- cctx->params.fParams.contentSizeFlag = 1;
- cctx->frameContentSize = srcSize;
- }
-
- return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
+ ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
+ return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, fParams);
}
@@ -2939,7 +3104,6 @@ struct ZSTD_CStream_s {
U32 checksum;
U32 frameEnded;
U64 pledgedSrcSize;
- U64 inputProcessed;
ZSTD_parameters params;
ZSTD_customMem customMem;
}; /* typedef'd to ZSTD_CStream within "zstd.h" */
@@ -2970,9 +3134,13 @@ size_t ZSTD_freeCStream(ZSTD_CStream* zcs)
if (zcs==NULL) return 0; /* support free on NULL */
{ ZSTD_customMem const cMem = zcs->customMem;
ZSTD_freeCCtx(zcs->cctx);
+ zcs->cctx = NULL;
ZSTD_freeCDict(zcs->cdictLocal);
+ zcs->cdictLocal = NULL;
ZSTD_free(zcs->inBuff, cMem);
+ zcs->inBuff = NULL;
ZSTD_free(zcs->outBuff, cMem);
+ zcs->outBuff = NULL;
ZSTD_free(zcs, cMem);
return 0;
}
@@ -2982,14 +3150,20 @@ size_t ZSTD_freeCStream(ZSTD_CStream* zcs)
/*====== Initialization ======*/
size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; }
-size_t ZSTD_CStreamOutSize(void) { return ZSTD_compressBound(ZSTD_BLOCKSIZE_ABSOLUTEMAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ; }
+
+size_t ZSTD_CStreamOutSize(void)
+{
+ return ZSTD_compressBound(ZSTD_BLOCKSIZE_ABSOLUTEMAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ;
+}
static size_t ZSTD_resetCStream_internal(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
{
if (zcs->inBuffSize==0) return ERROR(stage_wrong); /* zcs has not been init at least once => can't reset */
- if (zcs->cdict) CHECK_F(ZSTD_compressBegin_usingCDict(zcs->cctx, zcs->cdict, pledgedSrcSize))
- else CHECK_F(ZSTD_compressBegin_advanced(zcs->cctx, NULL, 0, zcs->params, pledgedSrcSize));
+ DEBUGLOG(5, "ZSTD_resetCStream_internal : dictIDFlag == %u \n", !zcs->params.fParams.noDictIDFlag);
+
+ if (zcs->cdict) CHECK_F(ZSTD_compressBegin_usingCDict_advanced(zcs->cctx, zcs->cdict, zcs->params.fParams, pledgedSrcSize))
+ else CHECK_F(ZSTD_compressBegin_internal(zcs->cctx, NULL, 0, zcs->params, pledgedSrcSize));
zcs->inToCompress = 0;
zcs->inBuffPos = 0;
@@ -2998,7 +3172,6 @@ static size_t ZSTD_resetCStream_internal(ZSTD_CStream* zcs, unsigned long long p
zcs->stage = zcss_load;
zcs->frameEnded = 0;
zcs->pledgedSrcSize = pledgedSrcSize;
- zcs->inputProcessed = 0;
return 0; /* ready to go */
}
@@ -3006,70 +3179,109 @@ size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
{
zcs->params.fParams.contentSizeFlag = (pledgedSrcSize > 0);
-
+ DEBUGLOG(5, "ZSTD_resetCStream : dictIDFlag == %u \n", !zcs->params.fParams.noDictIDFlag);
return ZSTD_resetCStream_internal(zcs, pledgedSrcSize);
}
-size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
- const void* dict, size_t dictSize,
- ZSTD_parameters params, unsigned long long pledgedSrcSize)
+/* ZSTD_initCStream_internal() :
+ * params are supposed validated at this stage
+ * and zcs->cdict is supposed to be correct */
+static size_t ZSTD_initCStream_stage2(ZSTD_CStream* zcs,
+ const ZSTD_parameters params,
+ unsigned long long pledgedSrcSize)
{
+ assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
+
/* allocate buffers */
{ size_t const neededInBuffSize = (size_t)1 << params.cParams.windowLog;
if (zcs->inBuffSize < neededInBuffSize) {
- zcs->inBuffSize = neededInBuffSize;
+ zcs->inBuffSize = 0;
ZSTD_free(zcs->inBuff, zcs->customMem);
zcs->inBuff = (char*) ZSTD_malloc(neededInBuffSize, zcs->customMem);
if (zcs->inBuff == NULL) return ERROR(memory_allocation);
+ zcs->inBuffSize = neededInBuffSize;
}
zcs->blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, neededInBuffSize);
}
if (zcs->outBuffSize < ZSTD_compressBound(zcs->blockSize)+1) {
- zcs->outBuffSize = ZSTD_compressBound(zcs->blockSize)+1;
+ size_t const outBuffSize = ZSTD_compressBound(zcs->blockSize)+1;
+ zcs->outBuffSize = 0;
ZSTD_free(zcs->outBuff, zcs->customMem);
- zcs->outBuff = (char*) ZSTD_malloc(zcs->outBuffSize, zcs->customMem);
+ zcs->outBuff = (char*) ZSTD_malloc(outBuffSize, zcs->customMem);
if (zcs->outBuff == NULL) return ERROR(memory_allocation);
+ zcs->outBuffSize = outBuffSize;
}
- if (dict && dictSize >= 8) {
- ZSTD_freeCDict(zcs->cdictLocal);
- zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, 0, params, zcs->customMem);
- if (zcs->cdictLocal == NULL) return ERROR(memory_allocation);
- zcs->cdict = zcs->cdictLocal;
- } else zcs->cdict = NULL;
-
zcs->checksum = params.fParams.checksumFlag > 0;
zcs->params = params;
+ DEBUGLOG(5, "ZSTD_initCStream_stage2 : dictIDFlag == %u \n", !params.fParams.noDictIDFlag);
return ZSTD_resetCStream_internal(zcs, pledgedSrcSize);
}
+/* ZSTD_initCStream_usingCDict_advanced() :
+ * same as ZSTD_initCStream_usingCDict(), with control over frame parameters */
+size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize, ZSTD_frameParameters fParams)
+{
+ if (!cdict) return ERROR(GENERIC); /* cannot handle NULL cdict (does not know what to do) */
+ { ZSTD_parameters params = ZSTD_getParamsFromCDict(cdict);
+ params.fParams = fParams;
+ zcs->cdict = cdict;
+ return ZSTD_initCStream_stage2(zcs, params, pledgedSrcSize);
+ }
+}
+
/* note : cdict must outlive compression session */
size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict)
{
- ZSTD_parameters const params = ZSTD_getParamsFromCDict(cdict);
- size_t const initError = ZSTD_initCStream_advanced(zcs, NULL, 0, params, 0);
- zcs->cdict = cdict;
- zcs->cctx->dictID = params.fParams.noDictIDFlag ? 0 : cdict->refContext->dictID;
- return initError;
+ ZSTD_frameParameters const fParams = { 0 /* content */, 0 /* checksum */, 0 /* noDictID */ };
+ return ZSTD_initCStream_usingCDict_advanced(zcs, cdict, 0, fParams);
+}
+
+static size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
+ const void* dict, size_t dictSize,
+ ZSTD_parameters params, unsigned long long pledgedSrcSize)
+{
+ assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
+ zcs->cdict = NULL;
+
+ if (dict && dictSize >= 8) {
+ ZSTD_freeCDict(zcs->cdictLocal);
+ zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, 0 /* copy */, params.cParams, zcs->customMem);
+ if (zcs->cdictLocal == NULL) return ERROR(memory_allocation);
+ zcs->cdict = zcs->cdictLocal;
+ }
+
+ DEBUGLOG(5, "ZSTD_initCStream_internal : dictIDFlag == %u \n", !params.fParams.noDictIDFlag);
+ return ZSTD_initCStream_stage2(zcs, params, pledgedSrcSize);
+}
+
+size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
+ const void* dict, size_t dictSize,
+ ZSTD_parameters params, unsigned long long pledgedSrcSize)
+{
+ CHECK_F( ZSTD_checkCParams(params.cParams) );
+ DEBUGLOG(5, "ZSTD_initCStream_advanced : dictIDFlag == %u \n", !params.fParams.noDictIDFlag);
+ return ZSTD_initCStream_internal(zcs, dict, dictSize, params, pledgedSrcSize);
}
size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel)
{
ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize);
- return ZSTD_initCStream_advanced(zcs, dict, dictSize, params, 0);
+ return ZSTD_initCStream_internal(zcs, dict, dictSize, params, 0);
}
size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize)
{
ZSTD_parameters params = ZSTD_getParams(compressionLevel, pledgedSrcSize, 0);
- if (pledgedSrcSize) params.fParams.contentSizeFlag = 1;
- return ZSTD_initCStream_advanced(zcs, NULL, 0, params, pledgedSrcSize);
+ params.fParams.contentSizeFlag = (pledgedSrcSize>0);
+ return ZSTD_initCStream_internal(zcs, NULL, 0, params, pledgedSrcSize);
}
size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel)
{
- return ZSTD_initCStream_usingDict(zcs, NULL, 0, compressionLevel);
+ ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, 0);
+ return ZSTD_initCStream_internal(zcs, NULL, 0, params, 0);
}
size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)
@@ -3163,7 +3375,6 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
*srcSizePtr = ip - istart;
*dstCapacityPtr = op - ostart;
- zcs->inputProcessed += *srcSizePtr;
if (zcs->frameEnded) return 0;
{ size_t hintInSize = zcs->inBuffTarget - zcs->inBuffPos;
if (hintInSize==0) hintInSize = zcs->blockSize;
@@ -3208,14 +3419,12 @@ size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
BYTE* const oend = (BYTE*)(output->dst) + output->size;
BYTE* op = ostart;
- if ((zcs->pledgedSrcSize) && (zcs->inputProcessed != zcs->pledgedSrcSize))
- return ERROR(srcSize_wrong); /* pledgedSrcSize not respected */
-
if (zcs->stage != zcss_final) {
/* flush whatever remains */
size_t srcSize = 0;
size_t sizeWritten = output->size - output->pos;
- size_t const notEnded = ZSTD_compressStream_generic(zcs, ostart, &sizeWritten, &srcSize, &srcSize, zsf_end); /* use a valid src address instead of NULL */
+ size_t const notEnded = ZSTD_compressStream_generic(zcs, ostart, &sizeWritten,
+ &srcSize /* use a valid src address instead of NULL */, &srcSize, zsf_end);
size_t const remainingToFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
op += sizeWritten;
if (remainingToFlush) {
@@ -3225,7 +3434,9 @@ size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
/* create epilogue */
zcs->stage = zcss_final;
zcs->outBuffContentSize = !notEnded ? 0 :
- ZSTD_compressEnd(zcs->cctx, zcs->outBuff, zcs->outBuffSize, NULL, 0); /* write epilogue, including final empty block, into outBuff */
+ /* write epilogue, including final empty block, into outBuff */
+ ZSTD_compressEnd(zcs->cctx, zcs->outBuff, zcs->outBuffSize, NULL, 0);
+ if (ZSTD_isError(zcs->outBuffContentSize)) return zcs->outBuffContentSize;
}
/* flush epilogue */
@@ -3268,7 +3479,7 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
{ 22, 21, 21, 5, 5, 16, ZSTD_btlazy2 }, /* level 15 */
{ 23, 22, 22, 5, 5, 16, ZSTD_btlazy2 }, /* level 16 */
{ 23, 21, 22, 4, 5, 24, ZSTD_btopt }, /* level 17 */
- { 23, 23, 22, 6, 5, 32, ZSTD_btopt }, /* level 18 */
+ { 23, 22, 22, 5, 4, 32, ZSTD_btopt }, /* level 18 */
{ 23, 23, 22, 6, 3, 48, ZSTD_btopt }, /* level 19 */
{ 25, 25, 23, 7, 3, 64, ZSTD_btopt2 }, /* level 20 */
{ 26, 26, 23, 7, 3,256, ZSTD_btopt2 }, /* level 21 */
diff --git a/lib/compress/zstd_opt.h b/lib/compress/zstd_opt.h
index ac418b61c834..54376119121d 100644
--- a/lib/compress/zstd_opt.h
+++ b/lib/compress/zstd_opt.h
@@ -175,10 +175,10 @@ MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const B
}
/* match offset */
- { BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);
- seqStorePtr->offCodeSum++;
- seqStorePtr->offCodeFreq[offCode]++;
- }
+ { BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);
+ seqStorePtr->offCodeSum++;
+ seqStorePtr->offCodeFreq[offCode]++;
+ }
/* match Length */
{ const BYTE ML_deltaCode = 36;
@@ -360,6 +360,7 @@ static U32 ZSTD_BtGetAllMatches_selectMLS (
default :
case 4 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen);
case 5 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen);
+ case 7 :
case 6 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen);
}
}
@@ -387,6 +388,7 @@ static U32 ZSTD_BtGetAllMatches_selectMLS_extDict (
default :
case 4 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen);
case 5 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen);
+ case 7 :
case 6 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen);
}
}
diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c
index 45514a81a3af..fc7f52a2902d 100644
--- a/lib/compress/zstdmt_compress.c
+++ b/lib/compress/zstdmt_compress.c
@@ -33,7 +33,7 @@
# include
# include
# include
- static unsigned g_debugLevel = 3;
+ static unsigned g_debugLevel = 5;
# define DEBUGLOGRAW(l, ...) if (l<=g_debugLevel) { fprintf(stderr, __VA_ARGS__); }
# define DEBUGLOG(l, ...) if (l<=g_debugLevel) { fprintf(stderr, __FILE__ ": "); fprintf(stderr, __VA_ARGS__); fprintf(stderr, " \n"); }
@@ -44,26 +44,26 @@
DEBUGLOGRAW(l, " \n"); \
}
-static unsigned long long GetCurrentClockTimeMicroseconds()
+static unsigned long long GetCurrentClockTimeMicroseconds(void)
{
static clock_t _ticksPerSecond = 0;
if (_ticksPerSecond <= 0) _ticksPerSecond = sysconf(_SC_CLK_TCK);
- struct tms junk; clock_t newTicks = (clock_t) times(&junk);
- return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond);
+ { struct tms junk; clock_t newTicks = (clock_t) times(&junk);
+ return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond); }
}
#define MUTEX_WAIT_TIME_DLEVEL 5
#define PTHREAD_MUTEX_LOCK(mutex) \
if (g_debugLevel>=MUTEX_WAIT_TIME_DLEVEL) { \
- unsigned long long beforeTime = GetCurrentClockTimeMicroseconds(); \
- pthread_mutex_lock(mutex); \
- unsigned long long afterTime = GetCurrentClockTimeMicroseconds(); \
- unsigned long long elapsedTime = (afterTime-beforeTime); \
- if (elapsedTime > 1000) { /* or whatever threshold you like; I'm using 1 millisecond here */ \
- DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread took %llu microseconds to acquire mutex %s \n", \
+ unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \
+ pthread_mutex_lock(mutex); \
+ { unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \
+ unsigned long long const elapsedTime = (afterTime-beforeTime); \
+ if (elapsedTime > 1000) { /* or whatever threshold you like; I'm using 1 millisecond here */ \
+ DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread took %llu microseconds to acquire mutex %s \n", \
elapsedTime, #mutex); \
- } \
+ } } \
} else pthread_mutex_lock(mutex);
#else
@@ -228,17 +228,19 @@ void ZSTDMT_compressChunk(void* jobDescription)
ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription;
const void* const src = (const char*)job->srcStart + job->dictSize;
buffer_t const dstBuff = job->dstBuff;
- DEBUGLOG(3, "job (first:%u) (last:%u) : dictSize %u, srcSize %u", job->firstChunk, job->lastChunk, (U32)job->dictSize, (U32)job->srcSize);
+ DEBUGLOG(3, "job (first:%u) (last:%u) : dictSize %u, srcSize %u",
+ job->firstChunk, job->lastChunk, (U32)job->dictSize, (U32)job->srcSize);
if (job->cdict) { /* should only happen for first segment */
- size_t const initError = ZSTD_compressBegin_usingCDict(job->cctx, job->cdict, job->fullFrameSize);
+ size_t const initError = ZSTD_compressBegin_usingCDict_advanced(job->cctx, job->cdict, job->params.fParams, job->fullFrameSize);
if (job->cdict) DEBUGLOG(3, "using CDict ");
if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
} else { /* srcStart points at reloaded section */
- size_t const dictModeError = ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceRawDict, 1); /* Force loading dictionary in "content-only" mode (no header analysis) */
- size_t const initError = ZSTD_compressBegin_advanced(job->cctx, job->srcStart, job->dictSize, job->params, 0);
- if (ZSTD_isError(initError) || ZSTD_isError(dictModeError)) { job->cSize = initError; goto _endJob; }
- ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceWindow, 1);
- }
+ if (!job->firstChunk) job->params.fParams.contentSizeFlag = 0; /* ensure no srcSize control */
+ { size_t const dictModeError = ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceRawDict, 1); /* Force loading dictionary in "content-only" mode (no header analysis) */
+ size_t const initError = ZSTD_compressBegin_advanced(job->cctx, job->srcStart, job->dictSize, job->params, job->fullFrameSize);
+ if (ZSTD_isError(initError) || ZSTD_isError(dictModeError)) { job->cSize = initError; goto _endJob; }
+ ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceWindow, 1);
+ } }
if (!job->firstChunk) { /* flush and overwrite frame header when it's not first segment */
size_t const hSize = ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, 0);
if (ZSTD_isError(hSize)) { job->cSize = hSize; goto _endJob; }
@@ -250,7 +252,9 @@ void ZSTDMT_compressChunk(void* jobDescription)
job->cSize = (job->lastChunk) ?
ZSTD_compressEnd (job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize) :
ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize);
- DEBUGLOG(3, "compressed %u bytes into %u bytes (first:%u) (last:%u)", (unsigned)job->srcSize, (unsigned)job->cSize, job->firstChunk, job->lastChunk);
+ DEBUGLOG(3, "compressed %u bytes into %u bytes (first:%u) (last:%u)",
+ (unsigned)job->srcSize, (unsigned)job->cSize, job->firstChunk, job->lastChunk);
+ DEBUGLOG(5, "dstBuff.size : %u ; => %s", (U32)dstBuff.size, ZSTD_getErrorName(job->cSize));
_endJob:
PTHREAD_MUTEX_LOCK(job->jobCompleted_mutex);
@@ -388,14 +392,17 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
int compressionLevel)
{
ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0);
+ U32 const overlapLog = (compressionLevel >= ZSTD_maxCLevel()) ? 0 : 3;
+ size_t const overlapSize = (size_t)1 << (params.cParams.windowLog - overlapLog);
size_t const chunkTargetSize = (size_t)1 << (params.cParams.windowLog + 2);
- unsigned const nbChunksMax = (unsigned)(srcSize / chunkTargetSize) + (srcSize < chunkTargetSize) /* min 1 */;
+ unsigned const nbChunksMax = (unsigned)(srcSize / chunkTargetSize) + 1;
unsigned nbChunks = MIN(nbChunksMax, mtctx->nbThreads);
size_t const proposedChunkSize = (srcSize + (nbChunks-1)) / nbChunks;
size_t const avgChunkSize = ((proposedChunkSize & 0x1FFFF) < 0xFFFF) ? proposedChunkSize + 0xFFFF : proposedChunkSize; /* avoid too small last block */
size_t remainingSrcSize = srcSize;
const char* const srcStart = (const char*)src;
- size_t frameStartPos = 0;
+ unsigned const compressWithinDst = (dstCapacity >= ZSTD_compressBound(srcSize)) ? nbChunks : (unsigned)(dstCapacity / ZSTD_compressBound(avgChunkSize)); /* presumes avgChunkSize >= 256 KB, which should be the case */
+ size_t frameStartPos = 0, dstBufferPos = 0;
DEBUGLOG(3, "windowLog : %2u => chunkTargetSize : %u bytes ", params.cParams.windowLog, (U32)chunkTargetSize);
DEBUGLOG(2, "nbChunks : %2u (chunkSize : %u bytes) ", nbChunks, (U32)avgChunkSize);
@@ -409,10 +416,11 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
{ unsigned u;
for (u=0; ubuffPool, dstBufferCapacity) : dstAsBuffer;
+ size_t const dstBufferCapacity = ZSTD_compressBound(chunkSize);
+ buffer_t const dstAsBuffer = { (char*)dst + dstBufferPos, dstBufferCapacity };
+ buffer_t const dstBuffer = u < compressWithinDst ? dstAsBuffer : ZSTDMT_getBuffer(mtctx->buffPool, dstBufferCapacity);
ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(mtctx->cctxPool);
+ size_t dictSize = u ? overlapSize : 0;
if ((cctx==NULL) || (dstBuffer.start==NULL)) {
mtctx->jobs[u].cSize = ERROR(memory_allocation); /* job result */
@@ -421,7 +429,8 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
break; /* let's wait for previous jobs to complete, but don't start new ones */
}
- mtctx->jobs[u].srcStart = srcStart + frameStartPos;
+ mtctx->jobs[u].srcStart = srcStart + frameStartPos - dictSize;
+ mtctx->jobs[u].dictSize = dictSize;
mtctx->jobs[u].srcSize = chunkSize;
mtctx->jobs[u].fullFrameSize = srcSize;
mtctx->jobs[u].params = params;
@@ -438,6 +447,7 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
POOL_add(mtctx->factory, ZSTDMT_compressChunk, &mtctx->jobs[u]);
frameStartPos += chunkSize;
+ dstBufferPos += dstBufferCapacity;
remainingSrcSize -= chunkSize;
} }
/* note : since nbChunks <= nbThreads, all jobs should be running immediately in parallel */
@@ -461,8 +471,10 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
if (ZSTD_isError(cSize)) error = cSize;
if ((!error) && (dstPos + cSize > dstCapacity)) error = ERROR(dstSize_tooSmall);
if (chunkID) { /* note : chunk 0 is already written directly into dst */
- if (!error) memcpy((char*)dst + dstPos, mtctx->jobs[chunkID].dstBuff.start, cSize);
- ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[chunkID].dstBuff);
+ if (!error)
+ memmove((char*)dst + dstPos, mtctx->jobs[chunkID].dstBuff.start, cSize); /* may overlap if chunk decompressed within dst */
+ if (chunkID >= compressWithinDst) /* otherwise, it decompresses within dst */
+ ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[chunkID].dstBuff);
mtctx->jobs[chunkID].dstBuff = g_nullBuffer;
}
dstPos += cSize ;
@@ -509,7 +521,7 @@ static size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
if (updateDict) {
ZSTD_freeCDict(zcs->cdict); zcs->cdict = NULL;
if (dict && dictSize) {
- zcs->cdict = ZSTD_createCDict_advanced(dict, dictSize, 0, params, cmem);
+ zcs->cdict = ZSTD_createCDict_advanced(dict, dictSize, 0, params.cParams, cmem);
if (zcs->cdict == NULL) return ERROR(memory_allocation);
} }
zcs->frameContentSize = pledgedSrcSize;
diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c
index 2aaa4a3df3c5..910f9ab783c3 100644
--- a/lib/decompress/zstd_decompress.c
+++ b/lib/decompress/zstd_decompress.c
@@ -177,30 +177,6 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
memcpy(dstDCtx, srcDCtx, sizeof(ZSTD_DCtx) - workSpaceSize); /* no need to copy workspace */
}
-#if 0
-/* deprecated */
-static void ZSTD_refDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
-{
- ZSTD_decompressBegin(dstDCtx); /* init */
- if (srcDCtx) { /* support refDCtx on NULL */
- dstDCtx->dictEnd = srcDCtx->dictEnd;
- dstDCtx->vBase = srcDCtx->vBase;
- dstDCtx->base = srcDCtx->base;
- dstDCtx->previousDstEnd = srcDCtx->previousDstEnd;
- dstDCtx->dictID = srcDCtx->dictID;
- dstDCtx->litEntropy = srcDCtx->litEntropy;
- dstDCtx->fseEntropy = srcDCtx->fseEntropy;
- dstDCtx->LLTptr = srcDCtx->entropy.LLTable;
- dstDCtx->MLTptr = srcDCtx->entropy.MLTable;
- dstDCtx->OFTptr = srcDCtx->entropy.OFTable;
- dstDCtx->HUFptr = srcDCtx->entropy.hufTable;
- dstDCtx->entropy.rep[0] = srcDCtx->entropy.rep[0];
- dstDCtx->entropy.rep[1] = srcDCtx->entropy.rep[1];
- dstDCtx->entropy.rep[2] = srcDCtx->entropy.rep[2];
- }
-}
-#endif
-
static void ZSTD_refDDict(ZSTD_DCtx* dstDCtx, const ZSTD_DDict* ddict);
@@ -431,7 +407,8 @@ typedef struct
/*! ZSTD_getcBlockSize() :
* Provides the size of compressed block from block header `src` */
-size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
+size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
+ blockProperties_t* bpPtr)
{
if (srcSize < ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
{ U32 const cBlockHeader = MEM_readLE24(src);
@@ -446,7 +423,8 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bp
}
-static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize)
{
if (srcSize > dstCapacity) return ERROR(dstSize_tooSmall);
memcpy(dst, src, srcSize);
@@ -454,7 +432,9 @@ static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, const void* src,
}
-static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize, size_t regenSize)
+static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize,
+ size_t regenSize)
{
if (srcSize != 1) return ERROR(srcSize_wrong);
if (regenSize > dstCapacity) return ERROR(dstSize_tooSmall);
@@ -595,176 +575,70 @@ typedef union {
U32 alignedBy4;
} FSE_decode_t4;
+/* Default FSE distribution table for Literal Lengths */
static const FSE_decode_t4 LL_defaultDTable[(1< (size_t)(oLitEnd - base)) {
- /* offset beyond prefix */
+ /* offset beyond prefix -> go into extDict */
if (sequence.offset > (size_t)(oLitEnd - vBase)) return ERROR(corruption_detected);
match = dictEnd + (match - base);
if (match + sequence.matchLength <= dictEnd) {
@@ -1156,21 +1033,26 @@ FORCE_INLINE seq_t ZSTD_decodeSequenceLong_generic(seqState_t* seqState, int con
U32 const totalBits = llBits+mlBits+ofBits;
static const U32 LL_base[MaxLL+1] = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
- 16, 18, 20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 18, 20, 22, 24, 28, 32, 40,
+ 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
0x2000, 0x4000, 0x8000, 0x10000 };
static const U32 ML_base[MaxML+1] = {
- 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
- 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
- 35, 37, 39, 41, 43, 47, 51, 59, 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
+ 3, 4, 5, 6, 7, 8, 9, 10,
+ 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20, 21, 22, 23, 24, 25, 26,
+ 27, 28, 29, 30, 31, 32, 33, 34,
+ 35, 37, 39, 41, 43, 47, 51, 59,
+ 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
static const U32 OF_base[MaxOff+1] = {
- 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
- 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
- 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
- 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD };
+ 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
+ 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
+ 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
+ 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD };
/* sequence */
{ size_t offset;
@@ -1476,7 +1358,7 @@ size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
if (ZSTD_isLegacy(src, srcSize)) return ZSTD_findFrameCompressedSizeLegacy(src, srcSize);
#endif
if (srcSize >= ZSTD_skippableHeaderSize &&
- (MEM_readLE32(src) & 0xFFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {
+ (MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {
return ZSTD_skippableHeaderSize + MEM_readLE32((const BYTE*)src + 4);
} else {
const BYTE* ip = (const BYTE*)src;
@@ -2115,15 +1997,18 @@ unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
}
/*! ZSTD_getDictID_fromFrame() :
- * Provides the dictID required to decompressed the frame stored within `src`.
+ * Provides the dictID required to decompresse frame stored within `src`.
* If @return == 0, the dictID could not be decoded.
* This could for one of the following reasons :
- * - The frame does not require a dictionary to be decoded (most common case).
- * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information.
+ * - The frame does not require a dictionary (most common case).
+ * - The frame was built with dictID intentionally removed.
+ * Needed dictionary is a hidden information.
* Note : this use case also happens when using a non-conformant dictionary.
- * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`).
+ * - `srcSize` is too small, and as a result, frame header could not be decoded.
+ * Note : possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`.
* - This is not a Zstandard frame.
- * When identifying the exact failure cause, it's possible to used ZSTD_getFrameParams(), which will provide a more precise error code. */
+ * When identifying the exact failure cause, it's possible to use
+ * ZSTD_getFrameParams(), which will provide a more precise error code. */
unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize)
{
ZSTD_frameParams zfp = { 0 , 0 , 0 , 0 };
@@ -2209,9 +2094,13 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds)
if (zds==NULL) return 0; /* support free on null */
{ ZSTD_customMem const cMem = zds->customMem;
ZSTD_freeDCtx(zds->dctx);
+ zds->dctx = NULL;
ZSTD_freeDDict(zds->ddictLocal);
+ zds->ddictLocal = NULL;
ZSTD_free(zds->inBuff, cMem);
+ zds->inBuff = NULL;
ZSTD_free(zds->outBuff, cMem);
+ zds->outBuff = NULL;
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
if (zds->legacyContext)
ZSTD_freeLegacyStreamContext(zds->legacyContext, zds->previousLegacyVersion);
@@ -2247,7 +2136,9 @@ size_t ZSTD_initDStream(ZSTD_DStream* zds)
return ZSTD_initDStream_usingDict(zds, NULL, 0);
}
-size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict) /**< note : ddict will just be referenced, and must outlive decompression session */
+/* ZSTD_initDStream_usingDDict() :
+ * ddict will just be referenced, and must outlive decompression session */
+size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict)
{
size_t const initResult = ZSTD_initDStream(zds);
zds->ddict = ddict;
@@ -2277,8 +2168,11 @@ size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds,
size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds)
{
- if (zds==NULL) return 0; /* support sizeof on NULL */
- return sizeof(*zds) + ZSTD_sizeof_DCtx(zds->dctx) + ZSTD_sizeof_DDict(zds->ddictLocal) + zds->inBuffSize + zds->outBuffSize;
+ if (zds==NULL) return 0; /* support sizeof NULL */
+ return sizeof(*zds)
+ + ZSTD_sizeof_DCtx(zds->dctx)
+ + ZSTD_sizeof_DDict(zds->ddictLocal)
+ + zds->inBuffSize + zds->outBuffSize;
}
@@ -2376,15 +2270,17 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
zds->blockSize = blockSize;
if (zds->inBuffSize < blockSize) {
ZSTD_free(zds->inBuff, zds->customMem);
- zds->inBuffSize = blockSize;
+ zds->inBuffSize = 0;
zds->inBuff = (char*)ZSTD_malloc(blockSize, zds->customMem);
if (zds->inBuff == NULL) return ERROR(memory_allocation);
+ zds->inBuffSize = blockSize;
}
if (zds->outBuffSize < neededOutSize) {
ZSTD_free(zds->outBuff, zds->customMem);
- zds->outBuffSize = neededOutSize;
+ zds->outBuffSize = 0;
zds->outBuff = (char*)ZSTD_malloc(neededOutSize, zds->customMem);
if (zds->outBuff == NULL) return ERROR(memory_allocation);
+ zds->outBuffSize = neededOutSize;
} }
zds->stage = zdss_read;
/* pass-through */
diff --git a/lib/dictBuilder/cover.c b/lib/dictBuilder/cover.c
index 3a7b9f39f9fd..1863c8f34542 100644
--- a/lib/dictBuilder/cover.c
+++ b/lib/dictBuilder/cover.c
@@ -59,8 +59,6 @@ static int g_displayLevel = 2;
if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \
g_time = clock(); \
DISPLAY(__VA_ARGS__); \
- if (displayLevel >= 4) \
- fflush(stdout); \
} \
}
#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
@@ -236,10 +234,22 @@ static size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) {
* Returns 1 if the dmer at lp is greater than the dmer at rp.
*/
static int COVER_cmp(COVER_ctx_t *ctx, const void *lp, const void *rp) {
- const U32 lhs = *(const U32 *)lp;
- const U32 rhs = *(const U32 *)rp;
+ U32 const lhs = *(U32 const *)lp;
+ U32 const rhs = *(U32 const *)rp;
return memcmp(ctx->samples + lhs, ctx->samples + rhs, ctx->d);
}
+/**
+ * Faster version for d <= 8.
+ */
+static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) {
+ U64 const mask = (ctx->d == 8) ? (U64)-1 : (((U64)1 << (8 * ctx->d)) - 1);
+ U64 const lhs = MEM_readLE64(ctx->samples + *(U32 const *)lp) & mask;
+ U64 const rhs = MEM_readLE64(ctx->samples + *(U32 const *)rp) & mask;
+ if (lhs < rhs) {
+ return -1;
+ }
+ return (lhs > rhs);
+}
/**
* Same as COVER_cmp() except ties are broken by pointer value
@@ -253,6 +263,16 @@ static int COVER_strict_cmp(const void *lp, const void *rp) {
}
return result;
}
+/**
+ * Faster version for d <= 8.
+ */
+static int COVER_strict_cmp8(const void *lp, const void *rp) {
+ int result = COVER_cmp8(g_ctx, lp, rp);
+ if (result == 0) {
+ result = lp < rp ? -1 : 1;
+ }
+ return result;
+}
/**
* Returns the first pointer in [first, last) whose element does not compare
@@ -508,7 +528,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
const BYTE *const samples = (const BYTE *)samplesBuffer;
const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);
/* Checks */
- if (totalSamplesSize < d ||
+ if (totalSamplesSize < MAX(d, sizeof(U64)) ||
totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
DISPLAYLEVEL(1, "Total samples size is too large, maximum size is %u MB\n",
(COVER_MAX_SAMPLES_SIZE >> 20));
@@ -522,7 +542,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
ctx->samplesSizes = samplesSizes;
ctx->nbSamples = nbSamples;
/* Partial suffix array */
- ctx->suffixSize = totalSamplesSize - d + 1;
+ ctx->suffixSize = totalSamplesSize - MAX(d, sizeof(U64)) + 1;
ctx->suffix = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
/* Maps index to the dmerID */
ctx->dmerAt = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
@@ -556,7 +576,8 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
}
/* qsort doesn't take an opaque pointer, so pass as a global */
g_ctx = ctx;
- qsort(ctx->suffix, ctx->suffixSize, sizeof(U32), &COVER_strict_cmp);
+ qsort(ctx->suffix, ctx->suffixSize, sizeof(U32),
+ (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
}
DISPLAYLEVEL(2, "Computing frequencies\n");
/* For each dmer group (group of positions with the same first d bytes):
@@ -566,8 +587,8 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
* 2. We calculate how many samples the dmer occurs in and save it in
* freqs[dmerId].
*/
- COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx, &COVER_cmp,
- &COVER_group);
+ COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx,
+ (ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group);
ctx->freqs = ctx->suffix;
ctx->suffix = NULL;
return 1;
@@ -918,10 +939,10 @@ ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void *dictBuffer,
/* constants */
const unsigned nbThreads = parameters->nbThreads;
const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
- const unsigned kMaxD = parameters->d == 0 ? 16 : parameters->d;
- const unsigned kMinK = parameters->k == 0 ? kMaxD : parameters->k;
- const unsigned kMaxK = parameters->k == 0 ? 2048 : parameters->k;
- const unsigned kSteps = parameters->steps == 0 ? 32 : parameters->steps;
+ const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
+ const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
+ const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k;
+ const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps;
const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
const unsigned kIterations =
(1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c
index 0757dbbbb643..179e02effa4d 100644
--- a/lib/dictBuilder/zdict.c
+++ b/lib/dictBuilder/zdict.c
@@ -11,8 +11,9 @@
/*-**************************************
* Tuning parameters
****************************************/
+#define MINRATIO 4 /* minimum nb of apparition to be selected in dictionary */
#define ZDICT_MAX_SAMPLES_SIZE (2000U << 20)
-#define ZDICT_MIN_SAMPLES_SIZE 512
+#define ZDICT_MIN_SAMPLES_SIZE (ZDICT_CONTENTSIZE_MIN * MINRATIO)
/*-**************************************
@@ -59,11 +60,8 @@
#define NOISELENGTH 32
-#define MINRATIO 4
static const int g_compressionLevel_default = 6;
static const U32 g_selectivity_default = 9;
-static const size_t g_provision_entropySize = 200;
-static const size_t g_min_fast_dictContent = 192;
/*-*************************************
@@ -308,10 +306,10 @@ static dictItem ZDICT_analyzePos(
/* look backward */
length = MINMATCHLENGTH;
while ((length >= MINMATCHLENGTH) & (start > 0)) {
- length = ZDICT_count(b + pos, b + suffix[start - 1]);
- if (length >= LLIMIT) length = LLIMIT - 1;
- lengthList[length]++;
- if (length >= MINMATCHLENGTH) start--;
+ length = ZDICT_count(b + pos, b + suffix[start - 1]);
+ if (length >= LLIMIT) length = LLIMIT - 1;
+ lengthList[length]++;
+ if (length >= MINMATCHLENGTH) start--;
}
/* largest useful length */
@@ -363,21 +361,35 @@ static dictItem ZDICT_analyzePos(
}
+static int isIncluded(const void* in, const void* container, size_t length)
+{
+ const char* const ip = (const char*) in;
+ const char* const into = (const char*) container;
+ size_t u;
+
+ for (u=0; upos;
const U32 eltEnd = elt.pos + elt.length;
+ const char* const buf = (const char*) buffer;
/* tail overlap */
U32 u; for (u=1; u elt.pos) && (table[u].pos <= eltEnd)) { /* overlap, existing > new */
/* append */
- U32 addedLength = table[u].pos - elt.pos;
+ U32 const addedLength = table[u].pos - elt.pos;
table[u].length += addedLength;
table[u].pos = elt.pos;
table[u].savings += elt.savings * addedLength / elt.length; /* rough approx */
@@ -393,9 +405,10 @@ static U32 ZDICT_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip)
/* front overlap */
for (u=1; u= elt.pos) && (table[u].pos < elt.pos)) { /* overlap, existing < new */
/* append */
- int addedLength = (int)eltEnd - (table[u].pos + table[u].length);
+ int const addedLength = (int)eltEnd - (table[u].pos + table[u].length);
table[u].savings += elt.length / 8; /* rough approx bonus */
if (addedLength > 0) { /* otherwise, elt fully included into existing */
table[u].length += addedLength;
@@ -407,7 +420,18 @@ static U32 ZDICT_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip)
table[u] = table[u-1], u--;
table[u] = elt;
return u;
- } }
+ }
+
+ if (MEM_read64(buf + table[u].pos) == MEM_read64(buf + elt.pos + 1)) {
+ if (isIncluded(buf + table[u].pos, buf + elt.pos + 1, table[u].length)) {
+ size_t const addedLength = MAX( (int)elt.length - (int)table[u].length , 1 );
+ table[u].pos = elt.pos;
+ table[u].savings += (U32)(elt.savings * addedLength / elt.length);
+ table[u].length = MIN(elt.length, table[u].length + 1);
+ return u;
+ }
+ }
+ }
return 0;
}
@@ -425,14 +449,14 @@ static void ZDICT_removeDictItem(dictItem* table, U32 id)
}
-static void ZDICT_insertDictItem(dictItem* table, U32 maxSize, dictItem elt)
+static void ZDICT_insertDictItem(dictItem* table, U32 maxSize, dictItem elt, const void* buffer)
{
/* merge if possible */
- U32 mergeId = ZDICT_checkMerge(table, elt, 0);
+ U32 mergeId = ZDICT_tryMerge(table, elt, 0, buffer);
if (mergeId) {
U32 newMerge = 1;
while (newMerge) {
- newMerge = ZDICT_checkMerge(table, table[mergeId], mergeId);
+ newMerge = ZDICT_tryMerge(table, table[mergeId], mergeId, buffer);
if (newMerge) ZDICT_removeDictItem(table, mergeId);
mergeId = newMerge;
}
@@ -480,7 +504,7 @@ static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
# define DISPLAYUPDATE(l, ...) if (notificationLevel>=l) { \
if (ZDICT_clockSpan(displayClock) > refreshRate) \
{ displayClock = clock(); DISPLAY(__VA_ARGS__); \
- if (notificationLevel>=4) fflush(stdout); } }
+ if (notificationLevel>=4) fflush(stderr); } }
/* init */
DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */
@@ -521,7 +545,7 @@ static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
if (doneMarks[cursor]) { cursor++; continue; }
solution = ZDICT_analyzePos(doneMarks, suffix, reverseSuffix[cursor], buffer, minRatio, notificationLevel);
if (solution.length==0) { cursor++; continue; }
- ZDICT_insertDictItem(dictList, dictListSize, solution);
+ ZDICT_insertDictItem(dictList, dictListSize, solution, buffer);
cursor += solution.length;
DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / bufferSize * 100);
} }
@@ -683,19 +707,19 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
goto _cleanup;
}
if (offcodeMax>OFFCODE_MAX) { eSize = ERROR(dictionary_wrong); goto _cleanup; } /* too large dictionary */
- for (u=0; u<256; u++) countLit[u]=1; /* any character must be described */
- for (u=0; u<=offcodeMax; u++) offcodeCount[u]=1;
- for (u=0; u<=MaxML; u++) matchLengthCount[u]=1;
- for (u=0; u<=MaxLL; u++) litLengthCount[u]=1;
+ for (u=0; u<256; u++) countLit[u] = 1; /* any character must be described */
+ for (u=0; u<=offcodeMax; u++) offcodeCount[u] = 1;
+ for (u=0; u<=MaxML; u++) matchLengthCount[u] = 1;
+ for (u=0; u<=MaxLL; u++) litLengthCount[u] = 1;
memset(repOffset, 0, sizeof(repOffset));
repOffset[1] = repOffset[4] = repOffset[8] = 1;
memset(bestRepOffset, 0, sizeof(bestRepOffset));
- if (compressionLevel==0) compressionLevel=g_compressionLevel_default;
+ if (compressionLevel==0) compressionLevel = g_compressionLevel_default;
params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize);
{ size_t const beginResult = ZSTD_compressBegin_advanced(esr.ref, dictBuffer, dictBufferSize, params, 0);
- if (ZSTD_isError(beginResult)) {
+ if (ZSTD_isError(beginResult)) {
+ DISPLAYLEVEL(1, "error : ZSTD_compressBegin_advanced() failed : %s \n", ZSTD_getErrorName(beginResult));
eSize = ERROR(GENERIC);
- DISPLAYLEVEL(1, "error : ZSTD_compressBegin_advanced failed \n");
goto _cleanup;
} }
@@ -812,7 +836,6 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
MEM_writeLE32(dstPtr+4, repStartValue[1]);
MEM_writeLE32(dstPtr+8, repStartValue[2]);
#endif
- //dstPtr += 12;
eSize += 12;
_cleanup:
@@ -831,7 +854,7 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
ZDICT_params_t params)
{
size_t hSize;
-#define HBUFFSIZE 256
+#define HBUFFSIZE 256 /* should prove large enough for all entropy headers */
BYTE header[HBUFFSIZE];
int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel;
U32 const notificationLevel = params.notificationLevel;
@@ -877,20 +900,11 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
ZDICT_params_t params)
{
- size_t hSize;
int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel;
U32 const notificationLevel = params.notificationLevel;
+ size_t hSize = 8;
- /* dictionary header */
- MEM_writeLE32(dictBuffer, ZSTD_DICT_MAGIC);
- { U64 const randomID = XXH64((char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, 0);
- U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
- U32 const dictID = params.dictID ? params.dictID : compliantID;
- MEM_writeLE32((char*)dictBuffer+4, dictID);
- }
- hSize = 8;
-
- /* entropy tables */
+ /* calculate entropy tables */
DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */
DISPLAYLEVEL(2, "statistics ... \n");
{ size_t const eSize = ZDICT_analyzeEntropy((char*)dictBuffer+hSize, dictBufferCapacity-hSize,
@@ -902,6 +916,13 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
hSize += eSize;
}
+ /* add dictionary header (after entropy tables) */
+ MEM_writeLE32(dictBuffer, ZSTD_DICT_MAGIC);
+ { U64 const randomID = XXH64((char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, 0);
+ U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
+ U32 const dictID = params.dictID ? params.dictID : compliantID;
+ MEM_writeLE32((char*)dictBuffer+4, dictID);
+ }
if (hSize + dictContentSize < dictBufferCapacity)
memmove((char*)dictBuffer + hSize, (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize);
@@ -929,8 +950,8 @@ size_t ZDICT_trainFromBuffer_unsafe(
/* checks */
if (!dictList) return ERROR(memory_allocation);
- if (maxDictSize <= g_provision_entropySize + g_min_fast_dictContent) { free(dictList); return ERROR(dstSize_tooSmall); }
- if (samplesBuffSize < ZDICT_MIN_SAMPLES_SIZE) { free(dictList); return 0; } /* not enough source to create dictionary */
+ if (maxDictSize < ZDICT_DICTSIZE_MIN) { free(dictList); return ERROR(dstSize_tooSmall); } /* requested dictionary size is too small */
+ if (samplesBuffSize < ZDICT_MIN_SAMPLES_SIZE) { free(dictList); return ERROR(dictionaryCreation_failed); } /* not enough source to create dictionary */
/* init */
ZDICT_initDictItem(dictList);
@@ -963,14 +984,15 @@ size_t ZDICT_trainFromBuffer_unsafe(
/* create dictionary */
{ U32 dictContentSize = ZDICT_dictSize(dictList);
- if (dictContentSize < targetDictSize/3) {
+ if (dictContentSize < ZDICT_CONTENTSIZE_MIN) { free(dictList); return ERROR(dictionaryCreation_failed); } /* dictionary content too small */
+ if (dictContentSize < targetDictSize/4) {
DISPLAYLEVEL(2, "! warning : selected content significantly smaller than requested (%u < %u) \n", dictContentSize, (U32)maxDictSize);
+ if (samplesBuffSize < 10 * targetDictSize)
+ DISPLAYLEVEL(2, "! consider increasing the number of samples (total size : %u MB)\n", (U32)(samplesBuffSize>>20));
if (minRep > MINRATIO) {
DISPLAYLEVEL(2, "! consider increasing selectivity to produce larger dictionary (-s%u) \n", selectivity+1);
DISPLAYLEVEL(2, "! note : larger dictionaries are not necessarily better, test its efficiency on samples \n");
}
- if (samplesBuffSize < 10 * targetDictSize)
- DISPLAYLEVEL(2, "! consider increasing the number of samples (total size : %u MB)\n", (U32)(samplesBuffSize>>20));
}
if ((dictContentSize > targetDictSize*3) && (nbSamples > 2*MINRATIO) && (selectivity>1)) {
@@ -978,7 +1000,7 @@ size_t ZDICT_trainFromBuffer_unsafe(
while ((nbSamples >> proposedSelectivity) <= MINRATIO) { proposedSelectivity--; }
DISPLAYLEVEL(2, "! note : calculated dictionary significantly larger than requested (%u > %u) \n", dictContentSize, (U32)maxDictSize);
DISPLAYLEVEL(2, "! consider increasing dictionary size, or produce denser dictionary (-s%u) \n", proposedSelectivity);
- DISPLAYLEVEL(2, "! always test dictionary efficiency on samples \n");
+ DISPLAYLEVEL(2, "! always test dictionary efficiency on real samples \n");
}
/* limit dictionary size */
diff --git a/lib/dictBuilder/zdict.h b/lib/dictBuilder/zdict.h
index 4ead4474fa9b..9b53de346527 100644
--- a/lib/dictBuilder/zdict.h
+++ b/lib/dictBuilder/zdict.h
@@ -88,7 +88,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dict
/*! COVER_params_t :
For all values 0 means default.
- kMin and d are the only required parameters.
+ k and d are the only required parameters.
*/
typedef struct {
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
@@ -147,18 +147,18 @@ ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void* dictBuffer, size_t dictB
Samples must be stored concatenated in a flat buffer `samplesBuffer`,
supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
- dictContentSize must be > ZDICT_CONTENTSIZE_MIN bytes.
- maxDictSize must be >= dictContentSize, and must be > ZDICT_DICTSIZE_MIN bytes.
+ dictContentSize must be >= ZDICT_CONTENTSIZE_MIN bytes.
+ maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes.
@return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
or an error code, which can be tested by ZDICT_isError().
note : ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
- note 2 : dictBuffer and customDictContent can overlap
+ note 2 : dictBuffer and dictContent can overlap
*/
-#define ZDICT_CONTENTSIZE_MIN 256
-#define ZDICT_DICTSIZE_MIN 512
+#define ZDICT_CONTENTSIZE_MIN 128
+#define ZDICT_DICTSIZE_MIN 256
ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
- const void* customDictContent, size_t dictContentSize,
+ const void* dictContent, size_t dictContentSize,
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
ZDICT_params_t parameters);
diff --git a/lib/legacy/zstd_v01.c b/lib/legacy/zstd_v01.c
index bcacb8d5d7a2..cf5354d6a9b6 100644
--- a/lib/legacy/zstd_v01.c
+++ b/lib/legacy/zstd_v01.c
@@ -1432,7 +1432,7 @@ typedef struct ZSTD_Cctx_s
#else
U32 hashTable[HASH_TABLESIZE];
#endif
- BYTE buffer[WORKPLACESIZE];
+ BYTE buffer[WORKPLACESIZE];
} cctxi_t;
diff --git a/lib/legacy/zstd_v02.c b/lib/legacy/zstd_v02.c
index 2297b28c8b2e..3cf8f4778250 100644
--- a/lib/legacy/zstd_v02.c
+++ b/lib/legacy/zstd_v02.c
@@ -475,8 +475,8 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
{
- if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */
- return BIT_DStream_overflow;
+ if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */
+ return BIT_DStream_overflow;
if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer))
{
@@ -1334,8 +1334,8 @@ static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsi
else
{
bitCount -= (int)(8 * (iend - 4 - ip));
- ip = iend - 4;
- }
+ ip = iend - 4;
+ }
bitStream = MEM_readLE32(ip) >> (bitCount & 31);
}
}
@@ -2040,7 +2040,7 @@ static size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
}
- /* Build rankVal */
+ /* Build rankVal */
{
const U32 minBits = tableLog+1 - maxW;
U32 nextRankVal = 0;
@@ -2374,7 +2374,7 @@ static size_t HUF_readDTableX6 (U32* DTable, const void* src, size_t srcSize)
rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
}
- /* Build rankVal */
+ /* Build rankVal */
{
const U32 minBits = tableLog+1 - maxW;
U32 nextRankVal = 0;
@@ -2948,14 +2948,14 @@ static size_t ZSTD_decodeLiteralsBlock(void* ctx,
const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
if (litSize > srcSize-11) /* risk of reading too far with wildcopy */
{
- if (litSize > srcSize-3) return ERROR(corruption_detected);
- memcpy(dctx->litBuffer, istart, litSize);
- dctx->litPtr = dctx->litBuffer;
- dctx->litSize = litSize;
- memset(dctx->litBuffer + dctx->litSize, 0, 8);
- return litSize+3;
- }
- /* direct reference into compressed stream */
+ if (litSize > srcSize-3) return ERROR(corruption_detected);
+ memcpy(dctx->litBuffer, istart, litSize);
+ dctx->litPtr = dctx->litBuffer;
+ dctx->litSize = litSize;
+ memset(dctx->litBuffer + dctx->litSize, 0, 8);
+ return litSize+3;
+ }
+ /* direct reference into compressed stream */
dctx->litPtr = istart+3;
dctx->litSize = litSize;
return litSize+3;
@@ -3515,13 +3515,13 @@ static size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSi
unsigned ZSTDv02_isError(size_t code)
{
- return ZSTD_isError(code);
+ return ZSTD_isError(code);
}
size_t ZSTDv02_decompress( void* dst, size_t maxOriginalSize,
const void* src, size_t compressedSize)
{
- return ZSTD_decompress(dst, maxOriginalSize, src, compressedSize);
+ return ZSTD_decompress(dst, maxOriginalSize, src, compressedSize);
}
size_t ZSTDv02_findFrameCompressedSize(const void *src, size_t compressedSize)
@@ -3531,25 +3531,25 @@ size_t ZSTDv02_findFrameCompressedSize(const void *src, size_t compressedSize)
ZSTDv02_Dctx* ZSTDv02_createDCtx(void)
{
- return (ZSTDv02_Dctx*)ZSTD_createDCtx();
+ return (ZSTDv02_Dctx*)ZSTD_createDCtx();
}
size_t ZSTDv02_freeDCtx(ZSTDv02_Dctx* dctx)
{
- return ZSTD_freeDCtx((ZSTD_DCtx*)dctx);
+ return ZSTD_freeDCtx((ZSTD_DCtx*)dctx);
}
size_t ZSTDv02_resetDCtx(ZSTDv02_Dctx* dctx)
{
- return ZSTD_resetDCtx((ZSTD_DCtx*)dctx);
+ return ZSTD_resetDCtx((ZSTD_DCtx*)dctx);
}
size_t ZSTDv02_nextSrcSizeToDecompress(ZSTDv02_Dctx* dctx)
{
- return ZSTD_nextSrcSizeToDecompress((ZSTD_DCtx*)dctx);
+ return ZSTD_nextSrcSizeToDecompress((ZSTD_DCtx*)dctx);
}
size_t ZSTDv02_decompressContinue(ZSTDv02_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
{
- return ZSTD_decompressContinue((ZSTD_DCtx*)dctx, dst, maxDstSize, src, srcSize);
+ return ZSTD_decompressContinue((ZSTD_DCtx*)dctx, dst, maxDstSize, src, srcSize);
}
diff --git a/lib/legacy/zstd_v03.c b/lib/legacy/zstd_v03.c
index ef654931f528..f438330a4692 100644
--- a/lib/legacy/zstd_v03.c
+++ b/lib/legacy/zstd_v03.c
@@ -477,8 +477,8 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
{
- if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */
- return BIT_DStream_overflow;
+ if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */
+ return BIT_DStream_overflow;
if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer))
{
@@ -1335,8 +1335,8 @@ static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsi
else
{
bitCount -= (int)(8 * (iend - 4 - ip));
- ip = iend - 4;
- }
+ ip = iend - 4;
+ }
bitStream = MEM_readLE32(ip) >> (bitCount & 31);
}
}
@@ -2037,7 +2037,7 @@ static size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
}
- /* Build rankVal */
+ /* Build rankVal */
{
const U32 minBits = tableLog+1 - maxW;
U32 nextRankVal = 0;
@@ -2589,14 +2589,14 @@ static size_t ZSTD_decodeLiteralsBlock(void* ctx,
const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
if (litSize > srcSize-11) /* risk of reading too far with wildcopy */
{
- if (litSize > srcSize-3) return ERROR(corruption_detected);
- memcpy(dctx->litBuffer, istart, litSize);
- dctx->litPtr = dctx->litBuffer;
- dctx->litSize = litSize;
- memset(dctx->litBuffer + dctx->litSize, 0, 8);
- return litSize+3;
- }
- /* direct reference into compressed stream */
+ if (litSize > srcSize-3) return ERROR(corruption_detected);
+ memcpy(dctx->litBuffer, istart, litSize);
+ dctx->litPtr = dctx->litBuffer;
+ dctx->litSize = litSize;
+ memset(dctx->litBuffer + dctx->litSize, 0, 8);
+ return litSize+3;
+ }
+ /* direct reference into compressed stream */
dctx->litPtr = istart+3;
dctx->litSize = litSize;
return litSize+3;
@@ -3156,13 +3156,13 @@ static size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSi
unsigned ZSTDv03_isError(size_t code)
{
- return ZSTD_isError(code);
+ return ZSTD_isError(code);
}
size_t ZSTDv03_decompress( void* dst, size_t maxOriginalSize,
const void* src, size_t compressedSize)
{
- return ZSTD_decompress(dst, maxOriginalSize, src, compressedSize);
+ return ZSTD_decompress(dst, maxOriginalSize, src, compressedSize);
}
size_t ZSTDv03_findFrameCompressedSize(const void* src, size_t srcSize)
@@ -3172,25 +3172,25 @@ size_t ZSTDv03_findFrameCompressedSize(const void* src, size_t srcSize)
ZSTDv03_Dctx* ZSTDv03_createDCtx(void)
{
- return (ZSTDv03_Dctx*)ZSTD_createDCtx();
+ return (ZSTDv03_Dctx*)ZSTD_createDCtx();
}
size_t ZSTDv03_freeDCtx(ZSTDv03_Dctx* dctx)
{
- return ZSTD_freeDCtx((ZSTD_DCtx*)dctx);
+ return ZSTD_freeDCtx((ZSTD_DCtx*)dctx);
}
size_t ZSTDv03_resetDCtx(ZSTDv03_Dctx* dctx)
{
- return ZSTD_resetDCtx((ZSTD_DCtx*)dctx);
+ return ZSTD_resetDCtx((ZSTD_DCtx*)dctx);
}
size_t ZSTDv03_nextSrcSizeToDecompress(ZSTDv03_Dctx* dctx)
{
- return ZSTD_nextSrcSizeToDecompress((ZSTD_DCtx*)dctx);
+ return ZSTD_nextSrcSizeToDecompress((ZSTD_DCtx*)dctx);
}
size_t ZSTDv03_decompressContinue(ZSTDv03_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
{
- return ZSTD_decompressContinue((ZSTD_DCtx*)dctx, dst, maxDstSize, src, srcSize);
+ return ZSTD_decompressContinue((ZSTD_DCtx*)dctx, dst, maxDstSize, src, srcSize);
}
diff --git a/lib/legacy/zstd_v04.c b/lib/legacy/zstd_v04.c
index 09040e68ec56..1a29da92d1e8 100644
--- a/lib/legacy/zstd_v04.c
+++ b/lib/legacy/zstd_v04.c
@@ -882,8 +882,8 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
{
- if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */
- return BIT_DStream_overflow;
+ if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */
+ return BIT_DStream_overflow;
if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer))
{
@@ -1451,8 +1451,8 @@ static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsi
else
{
bitCount -= (int)(8 * (iend - 4 - ip));
- ip = iend - 4;
- }
+ ip = iend - 4;
+ }
bitStream = MEM_readLE32(ip) >> (bitCount & 31);
}
}
diff --git a/lib/legacy/zstd_v05.c b/lib/legacy/zstd_v05.c
index a6f5f5dbbd16..674f5b0e4a8e 100644
--- a/lib/legacy/zstd_v05.c
+++ b/lib/legacy/zstd_v05.c
@@ -884,8 +884,8 @@ MEM_STATIC size_t BITv05_readBitsFast(BITv05_DStream_t* bitD, U32 nbBits)
MEM_STATIC BITv05_DStream_status BITv05_reloadDStream(BITv05_DStream_t* bitD)
{
- if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */
- return BITv05_DStream_overflow;
+ if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */
+ return BITv05_DStream_overflow;
if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) {
bitD->ptr -= bitD->bitsConsumed >> 3;
diff --git a/lib/legacy/zstd_v06.c b/lib/legacy/zstd_v06.c
index a4258b67a618..ad8c4cd3186d 100644
--- a/lib/legacy/zstd_v06.c
+++ b/lib/legacy/zstd_v06.c
@@ -982,8 +982,8 @@ MEM_STATIC size_t BITv06_readBitsFast(BITv06_DStream_t* bitD, U32 nbBits)
if status == unfinished, internal register is filled with >= (sizeof(bitD->bitContainer)*8 - 7) bits */
MEM_STATIC BITv06_DStream_status BITv06_reloadDStream(BITv06_DStream_t* bitD)
{
- if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */
- return BITv06_DStream_overflow;
+ if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */
+ return BITv06_DStream_overflow;
if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) {
bitD->ptr -= bitD->bitsConsumed >> 3;
diff --git a/lib/zstd.h b/lib/zstd.h
index a3237c77eebd..f8050c136104 100644
--- a/lib/zstd.h
+++ b/lib/zstd.h
@@ -55,8 +55,8 @@ extern "C" {
/*------ Version ------*/
#define ZSTD_VERSION_MAJOR 1
-#define ZSTD_VERSION_MINOR 1
-#define ZSTD_VERSION_RELEASE 4
+#define ZSTD_VERSION_MINOR 2
+#define ZSTD_VERSION_RELEASE 0
#define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE
#define ZSTD_QUOTE(str) #str
@@ -71,48 +71,48 @@ ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< library version number; to
* Simple API
***************************************/
/*! ZSTD_compress() :
- Compresses `src` content as a single zstd compressed frame into already allocated `dst`.
- Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`.
- @return : compressed size written into `dst` (<= `dstCapacity),
- or an error code if it fails (which can be tested using ZSTD_isError()). */
+ * Compresses `src` content as a single zstd compressed frame into already allocated `dst`.
+ * Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`.
+ * @return : compressed size written into `dst` (<= `dstCapacity),
+ * or an error code if it fails (which can be tested using ZSTD_isError()). */
ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
int compressionLevel);
/*! ZSTD_decompress() :
- `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames.
- `dstCapacity` is an upper bound of originalSize.
- If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data.
- @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
- or an errorCode if it fails (which can be tested using ZSTD_isError()). */
+ * `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames.
+ * `dstCapacity` is an upper bound of originalSize.
+ * If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data.
+ * @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
+ * or an errorCode if it fails (which can be tested using ZSTD_isError()). */
ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity,
const void* src, size_t compressedSize);
/*! ZSTD_getDecompressedSize() :
-* NOTE: This function is planned to be obsolete, in favour of ZSTD_getFrameContentSize.
-* ZSTD_getFrameContentSize functions the same way, returning the decompressed size of a single
-* frame, but distinguishes empty frames from frames with an unknown size, or errors.
-*
-* Additionally, ZSTD_findDecompressedSize can be used instead. It can handle multiple
-* concatenated frames in one buffer, and so is more general.
-* As a result however, it requires more computation and entire frames to be passed to it,
-* as opposed to ZSTD_getFrameContentSize which requires only a single frame's header.
-*
-* 'src' is the start of a zstd compressed frame.
-* @return : content size to be decompressed, as a 64-bits value _if known_, 0 otherwise.
-* note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode.
-* When `return==0`, data to decompress could be any size.
-* In which case, it's necessary to use streaming mode to decompress data.
-* Optionally, application can still use ZSTD_decompress() while relying on implied limits.
-* (For example, data may be necessarily cut into blocks <= 16 KB).
-* note 2 : decompressed size is always present when compression is done with ZSTD_compress()
-* note 3 : decompressed size can be very large (64-bits value),
-* potentially larger than what local system can handle as a single memory segment.
-* In which case, it's necessary to use streaming mode to decompress data.
-* note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified.
-* Always ensure result fits within application's authorized limits.
-* Each application can set its own limits.
-* note 5 : when `return==0`, if precise failure cause is needed, use ZSTD_getFrameParams() to know more. */
+ * NOTE: This function is planned to be obsolete, in favour of ZSTD_getFrameContentSize.
+ * ZSTD_getFrameContentSize functions the same way, returning the decompressed size of a single
+ * frame, but distinguishes empty frames from frames with an unknown size, or errors.
+ *
+ * Additionally, ZSTD_findDecompressedSize can be used instead. It can handle multiple
+ * concatenated frames in one buffer, and so is more general.
+ * As a result however, it requires more computation and entire frames to be passed to it,
+ * as opposed to ZSTD_getFrameContentSize which requires only a single frame's header.
+ *
+ * 'src' is the start of a zstd compressed frame.
+ * @return : content size to be decompressed, as a 64-bits value _if known_, 0 otherwise.
+ * note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode.
+ * When `return==0`, data to decompress could be any size.
+ * In which case, it's necessary to use streaming mode to decompress data.
+ * Optionally, application can still use ZSTD_decompress() while relying on implied limits.
+ * (For example, data may be necessarily cut into blocks <= 16 KB).
+ * note 2 : decompressed size is always present when compression is done with ZSTD_compress()
+ * note 3 : decompressed size can be very large (64-bits value),
+ * potentially larger than what local system can handle as a single memory segment.
+ * In which case, it's necessary to use streaming mode to decompress data.
+ * note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified.
+ * Always ensure result fits within application's authorized limits.
+ * Each application can set its own limits.
+ * note 5 : when `return==0`, if precise failure cause is needed, use ZSTD_getFrameParams() to know more. */
ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
@@ -127,29 +127,29 @@ ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readab
* Explicit memory management
***************************************/
/*= Compression context
-* When compressing many times,
-* it is recommended to allocate a context just once, and re-use it for each successive compression operation.
-* This will make workload friendlier for system's memory.
-* Use one context per thread for parallel execution in multi-threaded environments. */
+ * When compressing many times,
+ * it is recommended to allocate a context just once, and re-use it for each successive compression operation.
+ * This will make workload friendlier for system's memory.
+ * Use one context per thread for parallel execution in multi-threaded environments. */
typedef struct ZSTD_CCtx_s ZSTD_CCtx;
ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void);
ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx);
/*! ZSTD_compressCCtx() :
- Same as ZSTD_compress(), requires an allocated ZSTD_CCtx (see ZSTD_createCCtx()). */
+ * Same as ZSTD_compress(), requires an allocated ZSTD_CCtx (see ZSTD_createCCtx()). */
ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel);
/*= Decompression context
-* When decompressing many times,
-* it is recommended to allocate a context just once, and re-use it for each successive compression operation.
-* This will make workload friendlier for system's memory.
-* Use one context per thread for parallel execution in multi-threaded environments. */
+ * When decompressing many times,
+ * it is recommended to allocate a context just once, and re-use it for each successive compression operation.
+ * This will make workload friendlier for system's memory.
+ * Use one context per thread for parallel execution in multi-threaded environments. */
typedef struct ZSTD_DCtx_s ZSTD_DCtx;
ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void);
ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
/*! ZSTD_decompressDCtx() :
-* Same as ZSTD_decompress(), requires an allocated ZSTD_DCtx (see ZSTD_createDCtx()). */
+ * Same as ZSTD_decompress(), requires an allocated ZSTD_DCtx (see ZSTD_createDCtx()). */
ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
@@ -194,9 +194,10 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize
ZSTDLIB_API size_t ZSTD_freeCDict(ZSTD_CDict* CDict);
/*! ZSTD_compress_usingCDict() :
-* Compression using a digested Dictionary.
-* Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
-* Note that compression level is decided during dictionary creation. */
+ * Compression using a digested Dictionary.
+ * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
+ * Note that compression level is decided during dictionary creation.
+ * Frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */
ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
@@ -487,7 +488,7 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, siz
/*! ZSTD_createCDict_advanced() :
* Create a ZSTD_CDict using external alloc and free, and customized compression parameters */
ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, unsigned byReference,
- ZSTD_parameters params, ZSTD_customMem customMem);
+ ZSTD_compressionParameters cParams, ZSTD_customMem customMem);
/*! ZSTD_sizeof_CDict() :
* Gives the amount of memory used by a given ZSTD_sizeof_CDict */
@@ -513,12 +514,19 @@ ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params);
ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize);
/*! ZSTD_compress_advanced() :
-* Same as ZSTD_compress_usingDict(), with fine-tune control of each compression parameter */
-ZSTDLIB_API size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
- void* dst, size_t dstCapacity,
- const void* src, size_t srcSize,
- const void* dict,size_t dictSize,
- ZSTD_parameters params);
+* Same as ZSTD_compress_usingDict(), with fine-tune control over each compression parameter */
+ZSTDLIB_API size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx,
+ void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize,
+ const void* dict,size_t dictSize,
+ ZSTD_parameters params);
+
+/*! ZSTD_compress_usingCDict_advanced() :
+* Same as ZSTD_compress_usingCDict(), with fine-tune control over frame parameters */
+ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
+ void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize,
+ const ZSTD_CDict* cdict, ZSTD_frameParameters fParams);
/*--- Advanced decompression functions ---*/
@@ -578,7 +586,7 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
* Note : this use case also happens when using a non-conformant dictionary.
* - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`).
* - This is not a Zstandard frame.
- * When identifying the exact failure cause, it's possible to used ZSTD_getFrameParams(), which will provide a more precise error code. */
+ * When identifying the exact failure cause, it's possible to use ZSTD_getFrameParams(), which will provide a more precise error code. */
ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
@@ -588,13 +596,22 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
/*===== Advanced Streaming compression functions =====*/
ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
+ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs); /**< size of CStream is variable, depending primarily on compression level */
ZSTDLIB_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize); /**< pledgedSrcSize must be correct, a size of 0 means unknown. for a frame size of 0 use initCStream_advanced */
ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); /**< note: a dict will not be used if dict == NULL or dictSize < 8 */
ZSTDLIB_API size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize,
ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be 0 (meaning unknown). note: if the contentSizeFlag is set, pledgedSrcSize == 0 means the source size is actually 0 */
ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); /**< note : cdict will just be referenced, and must outlive compression session */
-ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); /**< re-use compression parameters from previous init; skip dictionary loading stage; zcs must be init at least once before. note: pledgedSrcSize must be correct, a size of 0 means unknown. for a frame size of 0 use initCStream_advanced */
-ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);
+ZSTDLIB_API size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize, ZSTD_frameParameters fParams); /**< same as ZSTD_initCStream_usingCDict(), with control over frame parameters */
+
+/*! ZSTD_resetCStream() :
+ * start a new compression job, using same parameters from previous job.
+ * This is typically useful to skip dictionary loading stage, since it will re-use it in-place..
+ * Note that zcs must be init at least once before using ZSTD_resetCStream().
+ * pledgedSrcSize==0 means "srcSize unknown".
+ * If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end.
+ * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
+ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
/*===== Advanced Streaming decompression functions =====*/
@@ -650,8 +667,10 @@ ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be 0 (meaning unknown). note: if the contentSizeFlag is set, pledgedSrcSize == 0 means the source size is actually 0 */
+ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /**< note: fails if cdict==NULL */
+ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize); /* compression parameters are already set within cdict. pledgedSrcSize=0 means null-size */
ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**< note: if pledgedSrcSize can be 0, indicating unknown size. if it is non-zero, it must be accurate. for 0 size frames, use compressBegin_advanced */
-ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize); /**< note: if pledgedSrcSize can be 0, indicating unknown size. if it is non-zero, it must be accurate. for 0 size frames, use compressBegin_advanced */
+
ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
@@ -745,19 +764,20 @@ ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
- Compressing and decompressing require a context structure
+ Use ZSTD_createCCtx() and ZSTD_createDCtx()
- It is necessary to init context before starting
- + compression : ZSTD_compressBegin()
- + decompression : ZSTD_decompressBegin()
- + variants _usingDict() are also allowed
- + copyCCtx() and copyDCtx() work too
- - Block size is limited, it must be <= ZSTD_getBlockSizeMax()
- + If you need to compress more, cut data into multiple blocks
- + Consider using the regular ZSTD_compress() instead, as frame metadata costs become negligible when source size is large.
+ + compression : any ZSTD_compressBegin*() variant, including with dictionary
+ + decompression : any ZSTD_decompressBegin*() variant, including with dictionary
+ + copyCCtx() and copyDCtx() can be used too
+ - Block size is limited, it must be <= ZSTD_getBlockSizeMax() <= ZSTD_BLOCKSIZE_ABSOLUTEMAX
+ + If input is larger than a block size, it's necessary to split input data into multiple blocks
+ + For inputs larger than a single block size, consider using the regular ZSTD_compress() instead.
+ Frame metadata is not that costly, and quickly becomes negligible as source size grows larger.
- When a block is considered not compressible enough, ZSTD_compressBlock() result will be zero.
In which case, nothing is produced into `dst`.
+ User must test for such outcome and deal directly with uncompressed data
+ ZSTD_decompressBlock() doesn't accept uncompressed data as input !!!
- + In case of multiple successive blocks, decoder must be informed of uncompressed block existence to follow proper history.
- Use ZSTD_insertBlock() in such a case.
+ + In case of multiple successive blocks, should some of them be uncompressed,
+ decoder must be informed of their existence in order to follow proper history.
+ Use ZSTD_insertBlock() for such a case.
*/
#define ZSTD_BLOCKSIZE_ABSOLUTEMAX (128 * 1024) /* define, for static allocation */
diff --git a/programs/Makefile b/programs/Makefile
index 1475cb610916..0c920a87bcbd 100644
--- a/programs/Makefile
+++ b/programs/Makefile
@@ -18,6 +18,19 @@
ZSTDDIR = ../lib
+# Version numbers
+LIBVER_SRC := $(ZSTDDIR)/zstd.h
+LIBVER_MAJOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(LIBVER_SRC)`
+LIBVER_MINOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(LIBVER_SRC)`
+LIBVER_PATCH_SCRIPT:=`sed -n '/define ZSTD_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(LIBVER_SRC)`
+LIBVER_SCRIPT:= $(LIBVER_MAJOR_SCRIPT).$(LIBVER_MINOR_SCRIPT).$(LIBVER_PATCH_SCRIPT)
+LIBVER_MAJOR := $(shell echo $(LIBVER_MAJOR_SCRIPT))
+LIBVER_MINOR := $(shell echo $(LIBVER_MINOR_SCRIPT))
+LIBVER_PATCH := $(shell echo $(LIBVER_PATCH_SCRIPT))
+LIBVER := $(shell echo $(LIBVER_SCRIPT))
+
+ZSTD_VERSION=$(LIBVER)
+
ifeq ($(shell $(CC) -v 2>&1 | grep -c "gcc version "), 1)
ALIGN_LOOP = -falign-loops=32
else
@@ -69,10 +82,23 @@ else
EXT =
endif
+VOID = /dev/null
+
+# thread detection
+NO_THREAD_MSG := ==> no threads, building without multithreading support
+HAVE_PTHREAD := $(shell printf '\#include \nint main(void) { return 0; }' | $(CC) $(FLAGS) -o have_pthread$(EXT) -x c - -pthread 2> $(VOID) && rm have_pthread$(EXT) && echo 1 || echo 0)
+HAVE_THREAD := $(shell [ "$(HAVE_PTHREAD)" -eq "1" -o -n "$(filter Windows%,$(OS))" ] && echo 1 || echo 0)
+ifeq ($(HAVE_THREAD), 1)
+THREAD_MSG := ==> building with threading support
+THREAD_CPP := -DZSTD_MULTITHREAD
+THREAD_LD := -pthread
+else
+THREAD_MSG := $(NO_THREAD_MSG)
+endif
+
# zlib detection
NO_ZLIB_MSG := ==> no zlib, building zstd without .gz support
-VOID = /dev/null
-HAVE_ZLIB := $(shell printf '\#include \nint main(){}' | $(CC) -o have_zlib -x c - -lz 2> $(VOID) && rm have_zlib$(EXT) && echo 1 || echo 0)
+HAVE_ZLIB := $(shell printf '\#include \nint main(void) { return 0; }' | $(CC) $(FLAGS) -o have_zlib$(EXT) -x c - -lz 2> $(VOID) && rm have_zlib$(EXT) && echo 1 || echo 0)
ifeq ($(HAVE_ZLIB), 1)
ZLIB_MSG := ==> building zstd with .gz compression support
ZLIBCPP = -DZSTD_GZCOMPRESS -DZSTD_GZDECOMPRESS
@@ -80,9 +106,10 @@ ZLIBLD = -lz
else
ZLIB_MSG := $(NO_ZLIB_MSG)
endif
+
# lzma detection
NO_LZMA_MSG := ==> no liblzma, building zstd without .xz/.lzma support
-HAVE_LZMA := $(shell printf '\#include \nint main(){}' | $(CC) -o have_lzma -x c - -llzma 2> $(VOID) && rm have_lzma$(EXT) && echo 1 || echo 0)
+HAVE_LZMA := $(shell printf '\#include \nint main(void) { return 0; }' | $(CC) $(FLAGS) -o have_lzma$(EXT) -x c - -llzma 2> $(VOID) && rm have_lzma$(EXT) && echo 1 || echo 0)
ifeq ($(HAVE_LZMA), 1)
LZMA_MSG := ==> building zstd with .xz/.lzma compression support
LZMACPP = -DZSTD_LZMACOMPRESS -DZSTD_LZMADECOMPRESS
@@ -91,6 +118,16 @@ else
LZMA_MSG := $(NO_LZMA_MSG)
endif
+# lz4 detection
+NO_LZ4_MSG := ==> no liblz4, building zstd without .lz4 support
+HAVE_LZ4 := $(shell printf '\#include \n\#include \nint main(void) { return 0; }' | $(CC) $(FLAGS) -o have_lz4$(EXT) -x c - -llz4 2> $(VOID) && rm have_lz4$(EXT) && echo 1 || echo 0)
+ifeq ($(HAVE_LZ4), 1)
+LZ4_MSG := ==> building zstd with .lz4 compression support
+LZ4CPP = -DZSTD_LZ4COMPRESS -DZSTD_LZ4DECOMPRESS
+LZ4LD = -llz4
+else
+LZ4_MSG := $(NO_LZ4_MSG)
+endif
.PHONY: default all clean clean_decomp_o install uninstall generate_res
@@ -100,17 +137,20 @@ all: zstd
$(ZSTDDECOMP_O): CFLAGS += $(ALIGN_LOOP)
-zstd : CPPFLAGS += $(ZLIBCPP)
-zstd : LDFLAGS += $(ZLIBLD)
-zstd : LZMA_MSG := $(NO_LZMA_MSG)
-zstd-nogz : ZLIB_MSG := $(NO_ZLIB_MSG)
-zstd-nogz : LZMA_MSG := $(NO_LZMA_MSG)
-xzstd : CPPFLAGS += $(ZLIBCPP) $(LZMACPP)
-xzstd : LDFLAGS += $(ZLIBLD) $(LZMALD)
-zstd zstd-nogz xzstd : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT)
-zstd zstd-nogz xzstd : $(ZSTDLIB_OBJ) zstdcli.o fileio.o bench.o datagen.o dibio.o
+zstd xzstd zstd4 xzstd4 : CPPFLAGS += $(THREAD_CPP) $(ZLIBCPP)
+zstd xzstd zstd4 xzstd4 : LDFLAGS += $(THREAD_LD) $(ZLIBLD)
+xzstd xzstd4 : CPPFLAGS += $(LZMACPP)
+xzstd xzstd4 : LDFLAGS += $(LZMALD)
+zstd4 xzstd4 : CPPFLAGS += $(LZ4CPP)
+zstd4 xzstd4 : LDFLAGS += $(LZ4LD)
+zstd zstd4 : LZMA_MSG := - xz/lzma support is disabled
+zstd xzstd : LZ4_MSG := - lz4 support is disabled
+zstd xzstd zstd4 xzstd4 : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT)
+zstd xzstd zstd4 xzstd4 : $(ZSTDLIB_FILES) zstdcli.o fileio.o bench.o datagen.o dibio.o
+ @echo "$(THREAD_MSG)"
@echo "$(ZLIB_MSG)"
@echo "$(LZMA_MSG)"
+ @echo "$(LZ4_MSG)"
ifneq (,$(filter Windows%,$(OS)))
windres/generate_res.bat
endif
@@ -126,10 +166,20 @@ ifneq (,$(filter Windows%,$(OS)))
endif
$(CC) -m32 $(FLAGS) $^ $(RES32_FILE) -o $@$(EXT)
-
zstd-nolegacy : clean_decomp_o
$(MAKE) zstd ZSTD_LEGACY_SUPPORT=0
+zstd-nomt : THREAD_CPP :=
+zstd-nomt : THREAD_LD :=
+zstd-nomt : THREAD_MSG := - multi-threading disabled
+zstd-nomt : zstd
+
+zstd-nogz : ZLIBCPP :=
+zstd-nogz : ZLIBLD :=
+zstd-nogz : ZLIB_MSG := - gzip support is disabled
+zstd-nogz : zstd
+
+
zstd-pgo : MOREFLAGS = -fprofile-generate
zstd-pgo : clean zstd
./zstd -b19i1 $(PROFILE_WITH)
@@ -142,22 +192,18 @@ zstd-pgo : clean zstd
$(RM) $(ZSTDDECOMP_O)
$(MAKE) zstd MOREFLAGS=-fprofile-use
-zstd-frugal: $(ZSTD_FILES) zstdcli.c fileio.c
+# minimal target, with only zstd compression and decompression. no bench. no legacy.
+zstd-small: CFLAGS = "-Os -s"
+zstd-frugal zstd-small: $(ZSTD_FILES) zstdcli.c fileio.c
$(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT $^ -o zstd$(EXT)
-zstd-small:
- CFLAGS="-Os -s" $(MAKE) zstd-frugal
-
zstd-decompress: $(ZSTDCOMMON_FILES) $(ZSTDDECOMP_FILES) zstdcli.c fileio.c
$(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NOCOMPRESS $^ -o $@$(EXT)
zstd-compress: $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES) zstdcli.c fileio.c
$(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NODECOMPRESS $^ -o $@$(EXT)
-zstdmt: CPPFLAGS += -DZSTD_MULTITHREAD
-ifeq (,$(filter Windows%,$(OS)))
-zstdmt: LDFLAGS += -lpthread
-endif
+# zstd is now built with Multi-threading by default
zstdmt: zstd
generate_res:
@@ -174,6 +220,19 @@ clean:
clean_decomp_o:
@$(RM) $(ZSTDDECOMP_O)
+MD2ROFF = ronn
+MD2ROFF_FLAGS = --roff --warnings --manual="User Commands" --organization="zstd $(ZSTD_VERSION)"
+
+zstd.1: zstd.1.md
+ cat $^ | $(MD2ROFF) $(MD2ROFF_FLAGS) | sed -n '/^\.\\\".*/!p' > $@
+
+man: zstd.1
+
+clean-man:
+ rm zstd.1
+
+preview-man: clean-man man
+ man ./zstd.1
#-----------------------------------------------------------------------------
# make install is validated only for Linux, OSX, BSD, Hurd and Solaris targets
@@ -206,6 +265,7 @@ install: zstd
@$(INSTALL_PROGRAM) zstd $(DESTDIR)$(BINDIR)/zstd
@ln -sf zstd $(DESTDIR)$(BINDIR)/zstdcat
@ln -sf zstd $(DESTDIR)$(BINDIR)/unzstd
+ @ln -sf zstd $(DESTDIR)$(BINDIR)/zstdmt
@$(INSTALL_SCRIPT) zstdless $(DESTDIR)$(BINDIR)/zstdless
@$(INSTALL_SCRIPT) zstdgrep $(DESTDIR)$(BINDIR)/zstdgrep
@echo Installing man pages
diff --git a/programs/README.md b/programs/README.md
index 203fd7b49bce..d7922a0969e4 100644
--- a/programs/README.md
+++ b/programs/README.md
@@ -11,8 +11,29 @@ There are however other Makefile targets that create different variations of CLI
- `zstd-decompress` : decompressor-only version of CLI; without dictionary builder, benchmark, and support for decompression of legacy zstd versions
+#### Compilation variables
+`zstd` tries to detect and use the following features automatically :
+
+- __HAVE_THREAD__ : multithreading is automatically enabled when `pthread` is detected.
+ It's possible to disable multithread support, by either compiling `zstd-nomt` target or using HAVE_THREAD=0 variable.
+ Example : make zstd HAVE_THREAD=0
+ It's also possible to force compilation with multithread support, using HAVE_THREAD=1.
+ In which case, linking stage will fail if `pthread` library cannot be found.
+ This might be useful to prevent silent feature disabling.
+
+- __HAVE_ZLIB__ : `zstd` can compress and decompress files in `.gz` format.
+ This is done through command `--format=gzip`.
+ Alternatively, symlinks named `gzip` or `gunzip` will mimic intended behavior.
+ .gz support is automatically enabled when `zlib` library is detected at build time.
+ It's possible to disable .gz support, by either compiling `zstd-nogz` target or using HAVE_ZLIB=0 variable.
+ Example : make zstd HAVE_ZLIB=0
+ It's also possible to force compilation with zlib support, using HAVE_ZLIB=1.
+ In which case, linking stage will fail if `zlib` library cannot be found.
+ This might be useful to prevent silent feature disabling.
+
+
#### Aggregation of parameters
-CLI supports aggregation of parameters i.e. `-b1`, `-e18`, and `-i1` can be joined into `-b1e18i1`.
+CLI supports aggregation of parameters i.e. `-b1`, `-e18`, and `-i1` can be joined into `-b1e18i1`.
#### Dictionary builder in Command Line Interface
@@ -23,7 +44,7 @@ which can be loaded before compression and decompression.
Using a dictionary, the compression ratio achievable on small data improves dramatically.
These compression gains are achieved while simultaneously providing faster compression and decompression speeds.
-Dictionary work if there is some correlation in a family of small data (there is no universal dictionary).
+Dictionary work if there is some correlation in a family of small data (there is no universal dictionary).
Hence, deploying one dictionary per type of data will provide the greater benefits.
Dictionary gains are mostly effective in the first few KB. Then, the compression algorithm
will rely more and more on previously decoded content to compress the rest of the file.
@@ -35,7 +56,6 @@ Usage of the dictionary builder and created dictionaries with CLI:
3. Decompress with the dictionary: `zstd --decompress FILE.zst -D dictionaryName`
-
#### Benchmark in Command Line Interface
CLI includes in-memory compression benchmark module for zstd.
The benchmark is conducted using given filenames. The files are read into memory and joined together.
@@ -48,7 +68,6 @@ One can select compression levels starting from `-b` and ending with `-e`.
The `-i` parameter selects minimal time used for each of tested levels.
-
#### Usage of Command Line Interface
The full list of options can be obtained with `-h` or `-H` parameter:
```
@@ -62,33 +81,40 @@ Arguments :
-d : decompression
-D file: use `file` as Dictionary
-o file: result stored into `file` (only if 1 input file)
- -f : overwrite output without prompting
+ -f : overwrite output without prompting and (de)compress links
--rm : remove source file(s) after successful de/compression
-k : preserve source file(s) (default)
-h/-H : display help/long help and exit
Advanced arguments :
-V : display Version number and exit
- -v : verbose mode; specify multiple times to increase log level (default:2)
+ -v : verbose mode; specify multiple times to increase verbosity
-q : suppress warnings; specify twice to suppress errors too
-c : force write to standard output, even if it is the console
- -r : operate recursively on directories
--ultra : enable levels beyond 19, up to 22 (requires more memory)
+ -T# : use # threads for compression (default:1)
+ -B# : select size of each job (default:0==automatic)
--no-dictID : don't write dictID into header (dictionary compression)
--[no-]check : integrity check (default:enabled)
+ -r : operate recursively on directories
+--format=gzip : compress files to the .gz format
--test : test compressed file integrity
---[no-]sparse : sparse mode (default:enabled on file, disabled on stdout)
+--[no-]sparse : sparse mode (default:disabled)
+ -M# : Set a memory usage limit for decompression
+-- : All arguments after "--" are treated as files
Dictionary builder :
--train ## : create a dictionary from a training set of files
+--train-cover[=k=#,d=#,steps=#] : use the cover algorithm with optional args
+--train-legacy[=s=#] : use the legacy algorithm with selectivity (default: 9)
-o file : `file` is dictionary name (default: dictionary)
---maxdict ## : limit dictionary to specified size (default : 112640)
- -s# : dictionary selectivity level (default: 9)
---dictID ## : force dictionary ID to specified value (default: random)
+--maxdict=# : limit dictionary to specified size (default : 112640)
+--dictID=# : force dictionary ID to specified value (default: random)
Benchmark arguments :
-b# : benchmark file(s), using # compression level (default : 1)
-e# : test all compression levels from -bX to # (default: 1)
-i# : minimum evaluation time in seconds (default : 3s)
-B# : cut file into independent blocks of size # (default: no block)
- ```
\ No newline at end of file
+--priority=rt : set process priority to real-time
+```
diff --git a/programs/bench.c b/programs/bench.c
index 2dd1cfb0fab1..22b871952b8e 100644
--- a/programs/bench.c
+++ b/programs/bench.c
@@ -70,12 +70,12 @@ static U32 g_compressibilityDefault = 50;
***************************************/
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
-static U32 g_displayLevel = 2; /* 0 : no display; 1: errors; 2 : + result + interaction + warnings; 3 : + progression; 4 : + information */
+static int g_displayLevel = 2; /* 0 : no display; 1: errors; 2 : + result + interaction + warnings; 3 : + progression; 4 : + information */
#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
if ((clock() - g_time > refreshRate) || (g_displayLevel>=4)) \
{ g_time = clock(); DISPLAY(__VA_ARGS__); \
- if (g_displayLevel>=4) fflush(stdout); } }
+ if (g_displayLevel>=4) fflush(stderr); } }
static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
static clock_t g_time = 0;
@@ -89,7 +89,7 @@ static clock_t g_time = 0;
#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
#define EXM_THROW(error, ...) \
{ \
- DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
+ DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \
DISPLAYLEVEL(1, "Error %i : ", error); \
DISPLAYLEVEL(1, __VA_ARGS__); \
DISPLAYLEVEL(1, " \n"); \
@@ -146,17 +146,20 @@ typedef struct {
} blockParam_t;
-#define MIN(a,b) ((a)<(b) ? (a) : (b))
-#define MAX(a,b) ((a)>(b) ? (a) : (b))
+
+#undef MIN
+#undef MAX
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
+#define MAX(a,b) ((a) > (b) ? (a) : (b))
static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
const char* displayName, int cLevel,
const size_t* fileSizes, U32 nbFiles,
const void* dictBuffer, size_t dictBufferSize,
- ZSTD_compressionParameters *comprParams)
+ const ZSTD_compressionParameters* comprParams)
{
size_t const blockSize = ((g_blockSize>=32 && !g_decodeOnly) ? g_blockSize : srcSize) + (!srcSize) /* avoid div by 0 */ ;
- size_t const avgSize = MIN(g_blockSize, (srcSize / nbFiles));
+ size_t const avgSize = MIN(blockSize, (srcSize / nbFiles));
U32 const maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles;
blockParam_t* const blockTable = (blockParam_t*) malloc(maxNbBlocks * sizeof(blockParam_t));
size_t const maxCompressedSize = ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024); /* add some room for safety */
@@ -176,22 +179,21 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
EXM_THROW(31, "allocation error : not enough memory");
/* init */
- if (strlen(displayName)>17) displayName += strlen(displayName)-17; /* can only display 17 characters */
+ if (strlen(displayName)>17) displayName += strlen(displayName)-17; /* display last 17 characters */
UTIL_initTimer(&ticksPerSecond);
- if (g_decodeOnly) {
- const char* srcPtr = (const char*) srcBuffer;
- U64 dSize64 = 0;
+ if (g_decodeOnly) { /* benchmark only decompression : source must be already compressed */
+ const char* srcPtr = (const char*)srcBuffer;
+ U64 totalDSize64 = 0;
U32 fileNb;
for (fileNb=0; fileNb decodedSize) EXM_THROW(32, "original size is too large");
- if (decodedSize==0) EXM_THROW(32, "Impossible to determine original size ");
+ { size_t const decodedSize = (size_t)totalDSize64;
+ if (totalDSize64 > decodedSize) EXM_THROW(32, "original size is too large"); /* size_t overflow */
free(resultBuffer);
resultBuffer = malloc(decodedSize);
if (!resultBuffer) EXM_THROW(33, "not enough memory");
@@ -260,12 +262,11 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
UTIL_getTime(&clockStart);
if (!cCompleted) { /* still some time to do compression tests */
- ZSTD_parameters zparams = ZSTD_getParams(cLevel, avgSize, dictBufferSize);
ZSTD_customMem const cmem = { NULL, NULL, NULL };
- U64 clockLoop = g_nbSeconds ? TIMELOOP_MICROSEC : 1;
+ U64 const clockLoop = g_nbSeconds ? TIMELOOP_MICROSEC : 1;
U32 nbLoops = 0;
- ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, 1, zparams, cmem);
- if (cdict==NULL) EXM_THROW(1, "ZSTD_createCDict_advanced() allocation failure");
+ ZSTD_parameters zparams = ZSTD_getParams(cLevel, avgSize, dictBufferSize);
+ ZSTD_CDict* cdict;
if (comprParams->windowLog) zparams.cParams.windowLog = comprParams->windowLog;
if (comprParams->chainLog) zparams.cParams.chainLog = comprParams->chainLog;
if (comprParams->hashLog) zparams.cParams.hashLog = comprParams->hashLog;
@@ -273,6 +274,8 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
if (comprParams->searchLength) zparams.cParams.searchLength = comprParams->searchLength;
if (comprParams->targetLength) zparams.cParams.targetLength = comprParams->targetLength;
if (comprParams->strategy) zparams.cParams.strategy = (ZSTD_strategy)(comprParams->strategy - 1);
+ cdict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, 1, zparams.cParams, cmem);
+ if (cdict==NULL) EXM_THROW(1, "ZSTD_createCDict_advanced() allocation failure");
do {
U32 blockNb;
size_t rSize;
diff --git a/programs/dibio.c b/programs/dibio.c
index 5ef202c8abad..aac36425cf75 100644
--- a/programs/dibio.c
+++ b/programs/dibio.c
@@ -53,12 +53,12 @@ static const size_t maxMemory = (sizeof(size_t) == 4) ? (2 GB - 64 MB) : ((size_
***************************************/
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
-static unsigned g_displayLevel = 0; /* 0 : no display; 1: errors; 2: default; 4: full information */
+static int g_displayLevel = 0; /* 0 : no display; 1: errors; 2: default; 4: full information */
#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
if ((DIB_clockSpan(g_time) > refreshRate) || (g_displayLevel>=4)) \
{ g_time = clock(); DISPLAY(__VA_ARGS__); \
- if (g_displayLevel>=4) fflush(stdout); } }
+ if (g_displayLevel>=4) fflush(stderr); } }
static const clock_t refreshRate = CLOCKS_PER_SEC * 2 / 10;
static clock_t g_time = 0;
@@ -89,7 +89,8 @@ unsigned DiB_isError(size_t errorCode) { return ERR_isError(errorCode); }
const char* DiB_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
-#define MIN(a,b) ( (a) < (b) ? (a) : (b) )
+#undef MIN
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
/* ********************************************************
diff --git a/programs/fileio.c b/programs/fileio.c
index e6481f1fa726..e188936b21f5 100644
--- a/programs/fileio.c
+++ b/programs/fileio.c
@@ -31,6 +31,11 @@
#include /* clock */
#include /* errno */
+#if defined (_MSC_VER)
+# include
+# include
+#endif
+
#include "mem.h"
#include "fileio.h"
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_magicNumber, ZSTD_frameHeaderSize_max */
@@ -48,6 +53,12 @@
# include
#endif
+#define LZ4_MAGICNUMBER 0x184D2204
+#if defined(ZSTD_LZ4COMPRESS) || defined(ZSTD_LZ4DECOMPRESS)
+# include
+# include
+#endif
+
/*-*************************************
* Constants
@@ -71,7 +82,7 @@
#define CACHELINE 64
-#define MAX_DICT_SIZE (8 MB) /* protection against large input (attack scenario) */
+#define DICTSIZE_MAX (32 MB) /* protection against large input (attack scenario) */
#define FNSPACE 30
@@ -81,18 +92,20 @@
***************************************/
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
#define DISPLAYLEVEL(l, ...) { if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } }
-static U32 g_displayLevel = 2; /* 0 : no display; 1: errors; 2 : + result + interaction + warnings; 3 : + progression; 4 : + information */
+static int g_displayLevel = 2; /* 0 : no display; 1: errors; 2 : + result + interaction + warnings; 3 : + progression; 4 : + information */
void FIO_setNotificationLevel(unsigned level) { g_displayLevel=level; }
#define DISPLAYUPDATE(l, ...) { if (g_displayLevel>=l) { \
if ((clock() - g_time > refreshRate) || (g_displayLevel>=4)) \
{ g_time = clock(); DISPLAY(__VA_ARGS__); \
- if (g_displayLevel>=4) fflush(stdout); } } }
+ if (g_displayLevel>=4) fflush(stderr); } } }
static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
static clock_t g_time = 0;
+#undef MIN
#define MIN(a,b) ((a) < (b) ? (a) : (b))
+
/* ************************************************************
* Avoid fseek()'s 2GiB barrier with MSVC, MacOS, *BSD, MinGW
***************************************************************/
@@ -188,6 +201,18 @@ void FIO_setOverlapLog(unsigned overlapLog){
/*-*************************************
* Functions
***************************************/
+/** FIO_remove() :
+ * @result : Unlink `fileName`, even if it's read-only */
+static int FIO_remove(const char* path)
+{
+#if defined(_WIN32) || defined(WIN32)
+ /* windows doesn't allow remove read-only files, so try to make it
+ * writable first */
+ chmod(path, _S_IWRITE);
+#endif
+ return remove(path);
+}
+
/** FIO_openSrcFile() :
* condition : `dstFileName` must be non-NULL.
* @result : FILE* to `dstFileName`, or NULL if it fails */
@@ -227,23 +252,32 @@ static FILE* FIO_openDstFile(const char* dstFileName)
DISPLAYLEVEL(4, "Sparse File Support is automatically disabled on stdout ; try --sparse \n");
}
} else {
- if (!g_overwrite && strcmp (dstFileName, nulmark)) { /* Check if destination file already exists */
+ if (g_sparseFileSupport == 1) {
+ g_sparseFileSupport = ZSTD_SPARSE_DEFAULT;
+ }
+ if (strcmp (dstFileName, nulmark)) { /* Check if destination file already exists */
f = fopen( dstFileName, "rb" );
if (f != 0) { /* dest file exists, prompt for overwrite authorization */
fclose(f);
- if (g_displayLevel <= 1) {
- /* No interaction possible */
- DISPLAY("zstd: %s already exists; not overwritten \n", dstFileName);
- return NULL;
- }
- DISPLAY("zstd: %s already exists; do you wish to overwrite (y/N) ? ", dstFileName);
- { int ch = getchar();
- if ((ch!='Y') && (ch!='y')) {
- DISPLAY(" not overwritten \n");
+ if (!g_overwrite) {
+ if (g_displayLevel <= 1) {
+ /* No interaction possible */
+ DISPLAY("zstd: %s already exists; not overwritten \n", dstFileName);
return NULL;
}
- while ((ch!=EOF) && (ch!='\n')) ch = getchar(); /* flush rest of input line */
- } } }
+ DISPLAY("zstd: %s already exists; do you wish to overwrite (y/N) ? ", dstFileName);
+ { int ch = getchar();
+ if ((ch!='Y') && (ch!='y')) {
+ DISPLAY(" not overwritten \n");
+ return NULL;
+ }
+ while ((ch!=EOF) && (ch!='\n')) ch = getchar(); /* flush rest of input line */
+ }
+ }
+
+ /* need to unlink */
+ FIO_remove(dstFileName);
+ } }
f = fopen( dstFileName, "wb" );
if (f==NULL) DISPLAYLEVEL(1, "zstd: %s: %s\n", dstFileName, strerror(errno));
}
@@ -252,13 +286,13 @@ static FILE* FIO_openDstFile(const char* dstFileName)
}
-/*! FIO_loadFile() :
-* creates a buffer, pointed by `*bufferPtr`,
-* loads `filename` content into it,
-* up to MAX_DICT_SIZE bytes.
-* @return : loaded size
-*/
-static size_t FIO_loadFile(void** bufferPtr, const char* fileName)
+/*! FIO_createDictBuffer() :
+ * creates a buffer, pointed by `*bufferPtr`,
+ * loads `filename` content into it, up to DICTSIZE_MAX bytes.
+ * @return : loaded size
+ * if fileName==NULL, returns 0 and a NULL pointer
+ */
+static size_t FIO_createDictBuffer(void** bufferPtr, const char* fileName)
{
FILE* fileHandle;
U64 fileSize;
@@ -270,14 +304,7 @@ static size_t FIO_loadFile(void** bufferPtr, const char* fileName)
fileHandle = fopen(fileName, "rb");
if (fileHandle==0) EXM_THROW(31, "zstd: %s: %s", fileName, strerror(errno));
fileSize = UTIL_getFileSize(fileName);
- if (fileSize > MAX_DICT_SIZE) {
- int seekResult;
- if (fileSize > 1 GB) EXM_THROW(32, "Dictionary file %s is too large", fileName); /* avoid extreme cases */
- DISPLAYLEVEL(2,"Dictionary %s is too large : using last %u bytes only \n", fileName, (U32)MAX_DICT_SIZE);
- seekResult = fseek(fileHandle, (long int)(fileSize-MAX_DICT_SIZE), SEEK_SET); /* use end of file */
- if (seekResult != 0) EXM_THROW(33, "zstd: %s: %s", fileName, strerror(errno));
- fileSize = MAX_DICT_SIZE;
- }
+ if (fileSize > DICTSIZE_MAX) EXM_THROW(32, "Dictionary file %s is too large (> %u MB)", fileName, DICTSIZE_MAX >> 20); /* avoid extreme cases */
*bufferPtr = malloc((size_t)fileSize);
if (*bufferPtr==NULL) EXM_THROW(34, "zstd: %s", strerror(errno));
{ size_t const readSize = fread(*bufferPtr, 1, (size_t)fileSize, fileHandle);
@@ -330,7 +357,7 @@ static cRess_t FIO_createCResources(const char* dictFileName, int cLevel,
/* dictionary */
{ void* dictBuffer;
- size_t const dictBuffSize = FIO_loadFile(&dictBuffer, dictFileName);
+ size_t const dictBuffSize = FIO_createDictBuffer(&dictBuffer, dictFileName); /* works with dictFileName==NULL */
if (dictFileName && (dictBuffer==NULL)) EXM_THROW(32, "zstd: allocation error : can't create dictBuffer");
{ ZSTD_parameters params = ZSTD_getParams(cLevel, srcSize, dictBuffSize);
params.fParams.contentSizeFlag = srcRegFile;
@@ -342,7 +369,7 @@ static cRess_t FIO_createCResources(const char* dictFileName, int cLevel,
if (comprParams->searchLog) params.cParams.searchLog = comprParams->searchLog;
if (comprParams->searchLength) params.cParams.searchLength = comprParams->searchLength;
if (comprParams->targetLength) params.cParams.targetLength = comprParams->targetLength;
- if (comprParams->strategy) params.cParams.strategy = (ZSTD_strategy)(comprParams->strategy - 1);
+ if (comprParams->strategy) params.cParams.strategy = (ZSTD_strategy)(comprParams->strategy - 1); /* 0 means : do not change */
#ifdef ZSTD_MULTITHREAD
{ size_t const errorCode = ZSTDMT_initCStream_advanced(ress.cctx, dictBuffer, dictBuffSize, params, srcSize);
if (ZSTD_isError(errorCode)) EXM_THROW(33, "Error initializing CStream : %s", ZSTD_getErrorName(errorCode));
@@ -494,6 +521,84 @@ static unsigned long long FIO_compressLzmaFrame(cRess_t* ress, const char* srcFi
}
#endif
+#ifdef ZSTD_LZ4COMPRESS
+static int FIO_LZ4_GetBlockSize_FromBlockId (int id) { return (1 << (8 + (2 * id))); }
+static unsigned long long FIO_compressLz4Frame(cRess_t* ress, const char* srcFileName, U64 const srcFileSize, int compressionLevel, U64* readsize)
+{
+ unsigned long long inFileSize = 0, outFileSize = 0;
+
+ LZ4F_preferences_t prefs;
+ LZ4F_compressionContext_t ctx;
+
+ LZ4F_errorCode_t const errorCode = LZ4F_createCompressionContext(&ctx, LZ4F_VERSION);
+ if (LZ4F_isError(errorCode)) EXM_THROW(31, "zstd: failed to create lz4 compression context");
+
+ memset(&prefs, 0, sizeof(prefs));
+
+#if LZ4_VERSION_NUMBER <= 10600
+#define LZ4F_blockIndependent blockIndependent
+#define LZ4F_max4MB max4MB
+#endif
+
+ prefs.autoFlush = 1;
+ prefs.compressionLevel = compressionLevel;
+ prefs.frameInfo.blockMode = LZ4F_blockIndependent; /* stick to defaults for lz4 cli */
+ prefs.frameInfo.blockSizeID = LZ4F_max4MB;
+ prefs.frameInfo.contentChecksumFlag = (contentChecksum_t)g_checksumFlag;
+#if LZ4_VERSION_NUMBER >= 10600
+ prefs.frameInfo.contentSize = srcFileSize;
+#endif
+
+ {
+ size_t blockSize = FIO_LZ4_GetBlockSize_FromBlockId(LZ4F_max4MB);
+ size_t readSize;
+ size_t headerSize = LZ4F_compressBegin(ctx, ress->dstBuffer, ress->dstBufferSize, &prefs);
+ if (LZ4F_isError(headerSize)) EXM_THROW(33, "File header generation failed : %s", LZ4F_getErrorName(headerSize));
+ { size_t const sizeCheck = fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile);
+ if (sizeCheck!=headerSize) EXM_THROW(34, "Write error : cannot write header"); }
+ outFileSize += headerSize;
+
+ /* Read first block */
+ readSize = fread(ress->srcBuffer, (size_t)1, (size_t)blockSize, ress->srcFile);
+ inFileSize += readSize;
+
+ /* Main Loop */
+ while (readSize>0) {
+ size_t outSize;
+
+ /* Compress Block */
+ outSize = LZ4F_compressUpdate(ctx, ress->dstBuffer, ress->dstBufferSize, ress->srcBuffer, readSize, NULL);
+ if (LZ4F_isError(outSize)) EXM_THROW(35, "zstd: %s: lz4 compression failed : %s", srcFileName, LZ4F_getErrorName(outSize));
+ outFileSize += outSize;
+ if (!srcFileSize) DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%", (U32)(inFileSize>>20), (double)outFileSize/inFileSize*100)
+ else DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%", (U32)(inFileSize>>20), (U32)(srcFileSize>>20), (double)outFileSize/inFileSize*100);
+
+ /* Write Block */
+ { size_t const sizeCheck = fwrite(ress->dstBuffer, 1, outSize, ress->dstFile);
+ if (sizeCheck!=outSize) EXM_THROW(36, "Write error : cannot write compressed block"); }
+
+ /* Read next block */
+ readSize = fread(ress->srcBuffer, (size_t)1, (size_t)blockSize, ress->srcFile);
+ inFileSize += readSize;
+ }
+ if (ferror(ress->srcFile)) EXM_THROW(37, "Error reading %s ", srcFileName);
+
+ /* End of Stream mark */
+ headerSize = LZ4F_compressEnd(ctx, ress->dstBuffer, ress->dstBufferSize, NULL);
+ if (LZ4F_isError(headerSize)) EXM_THROW(38, "zstd: %s: lz4 end of file generation failed : %s", srcFileName, LZ4F_getErrorName(headerSize));
+
+ { size_t const sizeCheck = fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile);
+ if (sizeCheck!=headerSize) EXM_THROW(39, "Write error : cannot write end of stream"); }
+ outFileSize += headerSize;
+ }
+
+ *readsize = inFileSize;
+ LZ4F_freeCompressionContext(ctx);
+
+ return outFileSize;
+}
+#endif
+
/*! FIO_compressFilename_internal() :
* same as FIO_compressFilename_extRess(), with `ress.desFile` already opened.
@@ -512,6 +617,7 @@ static int FIO_compressFilename_internal(cRess_t ress,
switch (g_compressionType) {
case FIO_zstdCompression:
break;
+
case FIO_gzipCompression:
#ifdef ZSTD_GZCOMPRESS
compressedfilesize = FIO_compressGzFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize);
@@ -520,6 +626,7 @@ static int FIO_compressFilename_internal(cRess_t ress,
EXM_THROW(20, "zstd: %s: file cannot be compressed as gzip (zstd compiled without ZSTD_GZCOMPRESS) -- ignored \n", srcFileName);
#endif
goto finish;
+
case FIO_xzCompression:
case FIO_lzmaCompression:
#ifdef ZSTD_LZMACOMPRESS
@@ -527,6 +634,15 @@ static int FIO_compressFilename_internal(cRess_t ress,
#else
(void)compressionLevel;
EXM_THROW(20, "zstd: %s: file cannot be compressed as xz/lzma (zstd compiled without ZSTD_LZMACOMPRESS) -- ignored \n", srcFileName);
+#endif
+ goto finish;
+
+ case FIO_lz4Compression:
+#ifdef ZSTD_LZ4COMPRESS
+ compressedfilesize = FIO_compressLz4Frame(&ress, srcFileName, fileSize, compressionLevel, &readsize);
+#else
+ (void)compressionLevel;
+ EXM_THROW(20, "zstd: %s: file cannot be compressed as lz4 (zstd compiled without ZSTD_LZ4COMPRESS) -- ignored \n", srcFileName);
#endif
goto finish;
}
@@ -548,8 +664,8 @@ static int FIO_compressFilename_internal(cRess_t ress,
readsize += inSize;
{ ZSTD_inBuffer inBuff = { ress.srcBuffer, inSize, 0 };
- while (inBuff.pos != inBuff.size) { /* note : is there any possibility of endless loop ? for example, if outBuff is not large enough ? */
- ZSTD_outBuffer outBuff= { ress.dstBuffer, ress.dstBufferSize, 0 };
+ while (inBuff.pos != inBuff.size) {
+ ZSTD_outBuffer outBuff = { ress.dstBuffer, ress.dstBufferSize, 0 };
#ifdef ZSTD_MULTITHREAD
size_t const result = ZSTDMT_compressStream(ress.cctx, &outBuff, &inBuff);
#else
@@ -563,13 +679,13 @@ static int FIO_compressFilename_internal(cRess_t ress,
if (sizeCheck!=outBuff.pos) EXM_THROW(25, "Write error : cannot write compressed block into %s", dstFileName);
compressedfilesize += outBuff.pos;
} } }
-#ifdef ZSTD_MULTITHREAD
- if (!fileSize) DISPLAYUPDATE(2, "\rRead : %u MB", (U32)(readsize>>20))
- else DISPLAYUPDATE(2, "\rRead : %u / %u MB", (U32)(readsize>>20), (U32)(fileSize>>20));
-#else
- if (!fileSize) DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%", (U32)(readsize>>20), (double)compressedfilesize/readsize*100)
- else DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%", (U32)(readsize>>20), (U32)(fileSize>>20), (double)compressedfilesize/readsize*100);
-#endif
+ if (g_nbThreads > 1) {
+ if (!fileSize) DISPLAYUPDATE(2, "\rRead : %u MB", (U32)(readsize>>20))
+ else DISPLAYUPDATE(2, "\rRead : %u / %u MB", (U32)(readsize>>20), (U32)(fileSize>>20));
+ } else {
+ if (!fileSize) DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%", (U32)(readsize>>20), (double)compressedfilesize/readsize*100)
+ else DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%", (U32)(readsize>>20), (U32)(fileSize>>20), (double)compressedfilesize/readsize*100);
+ }
}
/* End of Frame */
@@ -750,7 +866,7 @@ static dRess_t FIO_createDResources(const char* dictFileName)
/* dictionary */
{ void* dictBuffer;
- size_t const dictBufferSize = FIO_loadFile(&dictBuffer, dictFileName);
+ size_t const dictBufferSize = FIO_createDictBuffer(&dictBuffer, dictFileName);
size_t const initError = ZSTD_initDStream_usingDict(ress.dctx, dictBuffer, dictBufferSize);
if (ZSTD_isError(initError)) EXM_THROW(61, "ZSTD_initDStream_usingDict error : %s", ZSTD_getErrorName(initError));
free(dictBuffer);
@@ -1019,6 +1135,66 @@ static unsigned long long FIO_decompressLzmaFrame(dRess_t* ress, FILE* srcFile,
}
#endif
+#ifdef ZSTD_LZ4DECOMPRESS
+static unsigned long long FIO_decompressLz4Frame(dRess_t* ress, FILE* srcFile, const char* srcFileName)
+{
+ unsigned long long filesize = 0;
+ LZ4F_errorCode_t nextToLoad;
+ LZ4F_decompressionContext_t dCtx;
+ LZ4F_errorCode_t const errorCode = LZ4F_createDecompressionContext(&dCtx, LZ4F_VERSION);
+
+ if (LZ4F_isError(errorCode)) EXM_THROW(61, "zstd: failed to create lz4 decompression context");
+
+ /* Init feed with magic number (already consumed from FILE* sFile) */
+ { size_t inSize = 4;
+ size_t outSize= 0;
+ MEM_writeLE32(ress->srcBuffer, LZ4_MAGICNUMBER);
+ nextToLoad = LZ4F_decompress(dCtx, ress->dstBuffer, &outSize, ress->srcBuffer, &inSize, NULL);
+ if (LZ4F_isError(nextToLoad)) EXM_THROW(62, "zstd: %s: lz4 header error : %s", srcFileName, LZ4F_getErrorName(nextToLoad));
+ }
+
+ /* Main Loop */
+ for (;nextToLoad;) {
+ size_t readSize;
+ size_t pos = 0;
+ size_t decodedBytes = ress->dstBufferSize;
+
+ /* Read input */
+ if (nextToLoad > ress->srcBufferSize) nextToLoad = ress->srcBufferSize;
+ readSize = fread(ress->srcBuffer, 1, nextToLoad, srcFile);
+ if (!readSize) break; /* reached end of file or stream */
+
+ while ((pos < readSize) || (decodedBytes == ress->dstBufferSize)) { /* still to read, or still to flush */
+ /* Decode Input (at least partially) */
+ size_t remaining = readSize - pos;
+ decodedBytes = ress->dstBufferSize;
+ nextToLoad = LZ4F_decompress(dCtx, ress->dstBuffer, &decodedBytes, (char*)(ress->srcBuffer)+pos, &remaining, NULL);
+ if (LZ4F_isError(nextToLoad)) EXM_THROW(66, "zstd: %s: decompression error : %s", srcFileName, LZ4F_getErrorName(nextToLoad));
+ pos += remaining;
+
+ /* Write Block */
+ if (decodedBytes) {
+ if (fwrite(ress->dstBuffer, 1, decodedBytes, ress->dstFile) != decodedBytes) EXM_THROW(63, "Write error : cannot write to output file");
+ filesize += decodedBytes;
+ DISPLAYUPDATE(2, "\rDecompressed : %u MB ", (unsigned)(filesize>>20));
+ }
+
+ if (!nextToLoad) break;
+ }
+ }
+ /* can be out because readSize == 0, which could be an fread() error */
+ if (ferror(srcFile)) EXM_THROW(67, "zstd: %s: read error", srcFileName);
+
+ if (nextToLoad!=0) EXM_THROW(68, "zstd: %s: unfinished stream", srcFileName);
+
+ LZ4F_freeDecompressionContext(dCtx);
+ ress->srcBufferLoaded = 0; /* LZ4F will go to the frame boundary */
+
+ return filesize;
+}
+#endif
+
+
/** FIO_decompressSrcFile() :
Decompression `srcFileName` into `ress.dstFile`
@@ -1070,6 +1246,15 @@ static int FIO_decompressSrcFile(dRess_t ress, const char* dstFileName, const ch
#else
DISPLAYLEVEL(1, "zstd: %s: xz/lzma file cannot be uncompressed (zstd compiled without ZSTD_LZMADECOMPRESS) -- ignored \n", srcFileName);
return 1;
+#endif
+ } else if (MEM_readLE32(buf) == LZ4_MAGICNUMBER) {
+#ifdef ZSTD_LZ4DECOMPRESS
+ unsigned long long const result = FIO_decompressLz4Frame(&ress, srcFile, srcFileName);
+ if (result == 0) return 1;
+ filesize += result;
+#else
+ DISPLAYLEVEL(1, "zstd: %s: lz4 file cannot be uncompressed (zstd compiled without ZSTD_LZ4DECOMPRESS) -- ignored \n", srcFileName);
+ return 1;
#endif
} else {
if (!ZSTD_isFrame(ress.srcBuffer, toRead)) {
@@ -1179,7 +1364,7 @@ int FIO_decompressMultipleFilenames(const char** srcNamesTable, unsigned nbFiles
dstFileName = (char*)malloc(dfnSize);
if (dstFileName==NULL) EXM_THROW(74, "not enough memory for dstFileName");
}
- if (sfnSize <= suffixSize || (strcmp(suffixPtr, GZ_EXTENSION) && strcmp(suffixPtr, XZ_EXTENSION) && strcmp(suffixPtr, ZSTD_EXTENSION) && strcmp(suffixPtr, LZMA_EXTENSION))) {
+ if (sfnSize <= suffixSize || (strcmp(suffixPtr, GZ_EXTENSION) && strcmp(suffixPtr, XZ_EXTENSION) && strcmp(suffixPtr, ZSTD_EXTENSION) && strcmp(suffixPtr, LZMA_EXTENSION) && strcmp(suffixPtr, LZ4_EXTENSION))) {
DISPLAYLEVEL(1, "zstd: %s: unknown suffix (%s/%s/%s/%s expected) -- ignored \n", srcFileName, GZ_EXTENSION, XZ_EXTENSION, ZSTD_EXTENSION, LZMA_EXTENSION);
skippedFiles++;
continue;
diff --git a/programs/fileio.h b/programs/fileio.h
index 0dd58d625d44..65da98d7fa88 100644
--- a/programs/fileio.h
+++ b/programs/fileio.h
@@ -33,12 +33,13 @@ extern "C" {
#define XZ_EXTENSION ".xz"
#define GZ_EXTENSION ".gz"
#define ZSTD_EXTENSION ".zst"
+#define LZ4_EXTENSION ".lz4"
/*-*************************************
* Types
***************************************/
-typedef enum { FIO_zstdCompression, FIO_gzipCompression, FIO_xzCompression, FIO_lzmaCompression } FIO_compressionType_t;
+typedef enum { FIO_zstdCompression, FIO_gzipCompression, FIO_xzCompression, FIO_lzmaCompression, FIO_lz4Compression } FIO_compressionType_t;
/*-*************************************
diff --git a/programs/platform.h b/programs/platform.h
index 89a9f6cd42a0..74412cde332e 100644
--- a/programs/platform.h
+++ b/programs/platform.h
@@ -100,9 +100,18 @@ extern "C" {
#if (defined(__linux__) && (PLATFORM_POSIX_VERSION >= 1)) || (PLATFORM_POSIX_VERSION >= 200112L) || defined(__DJGPP__)
# include /* isatty */
# define IS_CONSOLE(stdStream) isatty(fileno(stdStream))
-#elif defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__)
+#elif defined(MSDOS) || defined(OS2) || defined(__CYGWIN__)
# include /* _isatty */
# define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream))
+#elif defined(WIN32) || defined(_WIN32)
+# include /* _isatty */
+# include /* DeviceIoControl, HANDLE, FSCTL_SET_SPARSE */
+# include /* FILE */
+static __inline int IS_CONSOLE(FILE* stdStream)
+{
+ DWORD dummy;
+ return _isatty(_fileno(stdStream)) && GetConsoleMode((HANDLE)_get_osfhandle(_fileno(stdStream)), &dummy);
+}
#else
# define IS_CONSOLE(stdStream) 0
#endif
@@ -129,6 +138,14 @@ extern "C" {
#endif
+#ifndef ZSTD_SPARSE_DEFAULT
+# if (defined(__APPLE__) && defined(__MACH__))
+# define ZSTD_SPARSE_DEFAULT 0
+# else
+# define ZSTD_SPARSE_DEFAULT 1
+# endif
+#endif
+
#if defined (__cplusplus)
}
diff --git a/programs/util.h b/programs/util.h
index 59e19d027ccd..5f437b2b268c 100644
--- a/programs/util.h
+++ b/programs/util.h
@@ -1,6 +1,6 @@
/**
* util.h - utility functions
- *
+ *
* Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
* All rights reserved.
*
@@ -25,6 +25,7 @@ extern "C" {
#include /* malloc */
#include /* size_t, ptrdiff_t */
#include /* fprintf */
+#include /* strncmp */
#include /* stat, utime */
#include /* stat */
#if defined(_MSC_VER)
@@ -166,8 +167,8 @@ UTIL_STATIC void UTIL_waitForNextTick(UTIL_freq_t ticksPerSecond)
* File functions
******************************************/
#if defined(_MSC_VER)
- #define chmod _chmod
- typedef struct __stat64 stat_t;
+ #define chmod _chmod
+ typedef struct __stat64 stat_t;
#else
typedef struct stat stat_t;
#endif
@@ -178,9 +179,9 @@ UTIL_STATIC int UTIL_setFileStat(const char *filename, stat_t *statbuf)
int res = 0;
struct utimbuf timebuf;
- timebuf.actime = time(NULL);
- timebuf.modtime = statbuf->st_mtime;
- res += utime(filename, &timebuf); /* set access and modification times */
+ timebuf.actime = time(NULL);
+ timebuf.modtime = statbuf->st_mtime;
+ res += utime(filename, &timebuf); /* set access and modification times */
#if !defined(_WIN32)
res += chown(filename, statbuf->st_uid, statbuf->st_gid); /* Copy ownership */
@@ -228,6 +229,20 @@ UTIL_STATIC U32 UTIL_isDirectory(const char* infilename)
return 0;
}
+UTIL_STATIC U32 UTIL_isLink(const char* infilename)
+{
+#if defined(_WIN32)
+ /* no symlinks on windows */
+ (void)infilename;
+#else
+ int r;
+ stat_t statbuf;
+ r = lstat(infilename, &statbuf);
+ if (!r && S_ISLNK(statbuf.st_mode)) return 1;
+#endif
+ return 0;
+}
+
UTIL_STATIC U64 UTIL_getFileSize(const char* infilename)
{
@@ -271,11 +286,14 @@ UTIL_STATIC void *UTIL_realloc(void *ptr, size_t size)
return NULL;
}
+static int g_utilDisplayLevel;
+#define UTIL_DISPLAY(...) fprintf(stderr, __VA_ARGS__)
+#define UTIL_DISPLAYLEVEL(l, ...) { if (g_utilDisplayLevel>=l) { UTIL_DISPLAY(__VA_ARGS__); } }
#ifdef _WIN32
# define UTIL_HAS_CREATEFILELIST
-UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char** bufEnd)
+UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char** bufEnd, int followLinks)
{
char* path;
int dirLength, fnameLength, pathLength, nbFiles = 0;
@@ -311,7 +329,7 @@ UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_
if (strcmp (cFile.cFileName, "..") == 0 ||
strcmp (cFile.cFileName, ".") == 0) continue;
- nbFiles += UTIL_prepareFileList(path, bufStart, pos, bufEnd); /* Recursively call "UTIL_prepareFileList" with the new path. */
+ nbFiles += UTIL_prepareFileList(path, bufStart, pos, bufEnd, followLinks); /* Recursively call "UTIL_prepareFileList" with the new path. */
if (*bufStart == NULL) { free(path); FindClose(hFile); return 0; }
}
else if ((cFile.dwFileAttributes & FILE_ATTRIBUTE_NORMAL) || (cFile.dwFileAttributes & FILE_ATTRIBUTE_ARCHIVE) || (cFile.dwFileAttributes & FILE_ATTRIBUTE_COMPRESSED)) {
@@ -339,7 +357,7 @@ UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_
# include /* opendir, readdir */
# include /* strerror, memcpy */
-UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char** bufEnd)
+UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char** bufEnd, int followLinks)
{
DIR *dir;
struct dirent *entry;
@@ -360,13 +378,19 @@ UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_
path = (char*) malloc(dirLength + fnameLength + 2);
if (!path) { closedir(dir); return 0; }
memcpy(path, dirName, dirLength);
+
path[dirLength] = '/';
memcpy(path+dirLength+1, entry->d_name, fnameLength);
pathLength = dirLength+1+fnameLength;
path[pathLength] = 0;
+ if (!followLinks && UTIL_isLink(path)) {
+ UTIL_DISPLAYLEVEL(2, "Warning : %s is a symbolic link, ignoring\n", path);
+ continue;
+ }
+
if (UTIL_isDirectory(path)) {
- nbFiles += UTIL_prepareFileList(path, bufStart, pos, bufEnd); /* Recursively call "UTIL_prepareFileList" with the new path. */
+ nbFiles += UTIL_prepareFileList(path, bufStart, pos, bufEnd, followLinks); /* Recursively call "UTIL_prepareFileList" with the new path. */
if (*bufStart == NULL) { free(path); closedir(dir); return 0; }
} else {
if (*bufStart + *pos + pathLength >= *bufEnd) {
@@ -396,7 +420,7 @@ UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_
#else
-UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char** bufEnd)
+UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char** bufEnd, int followLinks)
{
(void)bufStart; (void)bufEnd; (void)pos;
fprintf(stderr, "Directory %s ignored (compiled without _WIN32 or _POSIX_C_SOURCE)\n", dirName);
@@ -411,7 +435,7 @@ UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_
* After finishing usage of the list the structures should be freed with UTIL_freeFileList(params: return value, allocatedBuffer)
* In case of error UTIL_createFileList returns NULL and UTIL_freeFileList should not be called.
*/
-UTIL_STATIC const char** UTIL_createFileList(const char **inputNames, unsigned inputNamesNb, char** allocatedBuffer, unsigned* allocatedNamesNb)
+UTIL_STATIC const char** UTIL_createFileList(const char **inputNames, unsigned inputNamesNb, char** allocatedBuffer, unsigned* allocatedNamesNb, int followLinks)
{
size_t pos;
unsigned i, nbFiles;
@@ -436,7 +460,7 @@ UTIL_STATIC const char** UTIL_createFileList(const char **inputNames, unsigned i
nbFiles++;
}
} else {
- nbFiles += UTIL_prepareFileList(inputNames[i], &buf, &pos, &bufend);
+ nbFiles += UTIL_prepareFileList(inputNames[i], &buf, &pos, &bufend, followLinks);
if (buf == NULL) return NULL;
} }
@@ -465,6 +489,201 @@ UTIL_STATIC void UTIL_freeFileList(const char** filenameTable, char* allocatedBu
if (filenameTable) free((void*)filenameTable);
}
+/* count the number of physical cores */
+#if defined(_WIN32) || defined(WIN32)
+
+#include
+
+typedef BOOL(WINAPI* LPFN_GLPI)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD);
+
+UTIL_STATIC int UTIL_countPhysicalCores(void)
+{
+ static int numPhysicalCores = 0;
+ if (numPhysicalCores != 0) return numPhysicalCores;
+
+ { LPFN_GLPI glpi;
+ BOOL done = FALSE;
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = NULL;
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = NULL;
+ DWORD returnLength = 0;
+ size_t byteOffset = 0;
+
+ glpi = (LPFN_GLPI)GetProcAddress(GetModuleHandle(TEXT("kernel32")),
+ "GetLogicalProcessorInformation");
+
+ if (glpi == NULL) {
+ goto failed;
+ }
+
+ while(!done) {
+ DWORD rc = glpi(buffer, &returnLength);
+ if (FALSE == rc) {
+ if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
+ if (buffer)
+ free(buffer);
+ buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(returnLength);
+
+ if (buffer == NULL) {
+ perror("zstd");
+ exit(1);
+ }
+ } else {
+ /* some other error */
+ goto failed;
+ }
+ } else {
+ done = TRUE;
+ }
+ }
+
+ ptr = buffer;
+
+ while (byteOffset + sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION) <= returnLength) {
+
+ if (ptr->Relationship == RelationProcessorCore) {
+ numPhysicalCores++;
+ }
+
+ ptr++;
+ byteOffset += sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
+ }
+
+ free(buffer);
+
+ return numPhysicalCores;
+ }
+
+failed:
+ /* try to fall back on GetSystemInfo */
+ { SYSTEM_INFO sysinfo;
+ GetSystemInfo(&sysinfo);
+ numPhysicalCores = sysinfo.dwNumberOfProcessors;
+ if (numPhysicalCores == 0) numPhysicalCores = 1; /* just in case */
+ }
+ return numPhysicalCores;
+}
+
+#elif defined(__APPLE__)
+
+#include
+
+/* Use apple-provided syscall
+ * see: man 3 sysctl */
+UTIL_STATIC int UTIL_countPhysicalCores(void)
+{
+ static S32 numPhysicalCores = 0; /* apple specifies int32_t */
+ if (numPhysicalCores != 0) return numPhysicalCores;
+
+ { size_t size = sizeof(S32);
+ int const ret = sysctlbyname("hw.physicalcpu", &numPhysicalCores, &size, NULL, 0);
+ if (ret != 0) {
+ if (errno == ENOENT) {
+ /* entry not present, fall back on 1 */
+ numPhysicalCores = 1;
+ } else {
+ perror("zstd: can't get number of physical cpus");
+ exit(1);
+ }
+ }
+
+ return numPhysicalCores;
+ }
+}
+
+#elif defined(__linux__)
+
+/* parse /proc/cpuinfo
+ * siblings / cpu cores should give hyperthreading ratio
+ * otherwise fall back on sysconf */
+UTIL_STATIC int UTIL_countPhysicalCores(void)
+{
+ static int numPhysicalCores = 0;
+
+ if (numPhysicalCores != 0) return numPhysicalCores;
+
+ numPhysicalCores = (int)sysconf(_SC_NPROCESSORS_ONLN);
+ if (numPhysicalCores == -1) {
+ /* value not queryable, fall back on 1 */
+ return numPhysicalCores = 1;
+ }
+
+ /* try to determine if there's hyperthreading */
+ { FILE* const cpuinfo = fopen("/proc/cpuinfo", "r");
+ size_t const BUF_SIZE = 80;
+ char buff[BUF_SIZE];
+
+ int siblings = 0;
+ int cpu_cores = 0;
+ int ratio = 1;
+
+ if (cpuinfo == NULL) {
+ /* fall back on the sysconf value */
+ return numPhysicalCores;
+ }
+
+ /* assume the cpu cores/siblings values will be constant across all
+ * present processors */
+ while (!feof(cpuinfo)) {
+ if (fgets(buff, BUF_SIZE, cpuinfo) != NULL) {
+ if (strncmp(buff, "siblings", 8) == 0) {
+ const char* const sep = strchr(buff, ':');
+ if (*sep == '\0') {
+ /* formatting was broken? */
+ goto failed;
+ }
+
+ siblings = atoi(sep + 1);
+ }
+ if (strncmp(buff, "cpu cores", 9) == 0) {
+ const char* const sep = strchr(buff, ':');
+ if (*sep == '\0') {
+ /* formatting was broken? */
+ goto failed;
+ }
+
+ cpu_cores = atoi(sep + 1);
+ }
+ } else if (ferror(cpuinfo)) {
+ /* fall back on the sysconf value */
+ goto failed;
+ }
+ }
+ if (siblings && cpu_cores) {
+ ratio = siblings / cpu_cores;
+ }
+failed:
+ fclose(cpuinfo);
+ return numPhysicalCores = numPhysicalCores / ratio;
+ }
+}
+
+#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__)
+
+/* Use apple-provided syscall
+ * see: man 3 sysctl */
+UTIL_STATIC int UTIL_countPhysicalCores(void)
+{
+ static int numPhysicalCores = 0;
+
+ if (numPhysicalCores != 0) return numPhysicalCores;
+
+ numPhysicalCores = (int)sysconf(_SC_NPROCESSORS_ONLN);
+ if (numPhysicalCores == -1) {
+ /* value not queryable, fall back on 1 */
+ return numPhysicalCores = 1;
+ }
+ return numPhysicalCores;
+}
+
+#else
+
+UTIL_STATIC int UTIL_countPhysicalCores(void)
+{
+ /* assume 1 */
+ return 1;
+}
+
+#endif
#if defined (__cplusplus)
}
diff --git a/programs/zstd.1 b/programs/zstd.1
index 684fb868aa30..6cc5f7e9d2f6 100644
--- a/programs/zstd.1
+++ b/programs/zstd.1
@@ -1,408 +1,334 @@
-\"
-\" zstd.1: This is a manual page for 'zstd' program. This file is part of the
-\" zstd project.
-\" Author: Yann Collet
-\"
-
-\" No hyphenation
-.hy 0
-.nr HY 0
-
-.TH zstd "1" "2015-08-22" "zstd" "User Commands"
-.SH NAME
-\fBzstd, unzstd, zstdcat\fR - Compress or decompress .zst files
-
-.SH SYNOPSIS
-.TP 5
-\fBzstd\fR [\fBOPTIONS\fR] [-|INPUT-FILE] [-o ]
-.PP
-.B unzstd
-is equivalent to
-.BR "zstd \-d"
-.br
-.B zstdcat
-is equivalent to
-.BR "zstd \-dcf"
-.br
-
-.SH DESCRIPTION
-.PP
-\fBzstd\fR is a fast lossless compression algorithm
-and data compression tool,
-with command line syntax similar to \fB gzip (1) \fR and \fB xz (1) \fR .
-It is based on the \fBLZ77\fR family, with further FSE & huff0 entropy stages.
-\fBzstd\fR offers highly configurable compression speed,
-with fast modes at > 200 MB/s per core,
-and strong modes nearing lzma compression ratios.
-It also features a very fast decoder, with speeds > 500 MB/s per core.
-
-\fBzstd\fR command line syntax is generally similar to gzip,
-but features the following differences :
- - Source files are preserved by default.
- It's possible to remove them automatically by using \fB--rm\fR command.
- - When compressing a single file, \fBzstd\fR displays progress notifications and result summary by default.
- Use \fB-q\fR to turn them off
-
-.PP
-.B zstd
-compresses or decompresses each
-.I file
-according to the selected operation mode.
-If no
-.I files
-are given or
-.I file
-is
-.BR \- ,
-.B zstd
-reads from standard input and writes the processed data
-to standard output.
-.B zstd
-will refuse (display an error and skip the
-.IR file )
-to write compressed data to standard output if it is a terminal.
-Similarly,
-.B zstd
-will refuse to read compressed data
-from standard input if it is a terminal.
-
-.PP
-Unless
-.B \-\-stdout
-or
-.B \-o
-is specified,
-.I files
-are written to a new file whose name is derived from the source
-.I file
-name:
-.IP \(bu 3
-When compressing, the suffix
-.B .zst
-is appended to the source filename to get the target filename.
-.IP \(bu 3
-When decompressing, the
-.B .zst
-suffix is removed from the filename to get the target filename.
-
-.SS "Concatenation with .zst files"
-It is possible to concatenate
-.B .zst
-files as is.
-.B zstd
-will decompress such files as if they were a single
-.B .zst
-file.
-
-
-
-.SH OPTIONS
-
+.
+.TH "ZSTD" "1" "May 2017" "zstd 1.2.0" "User Commands"
+.
+.SH "NAME"
+\fBzstd\fR \- zstd, zstdmt, unzstd, zstdcat \- Compress or decompress \.zst files
+.
+.SH "SYNOPSIS"
+\fBzstd\fR [\fIOPTIONS\fR] [\-|\fIINPUT\-FILE\fR] [\-o \fIOUTPUT\-FILE\fR]
+.
+.P
+\fBzstdmt\fR is equivalent to \fBzstd \-T0\fR
+.
+.P
+\fBunzstd\fR is equivalent to \fBzstd \-d\fR
+.
+.P
+\fBzstdcat\fR is equivalent to \fBzstd \-dcf\fR
+.
+.SH "DESCRIPTION"
+\fBzstd\fR is a fast lossless compression algorithm and data compression tool, with command line syntax similar to \fBgzip (1)\fR and \fBxz (1)\fR\. It is based on the \fBLZ77\fR family, with further FSE & huff0 entropy stages\. \fBzstd\fR offers highly configurable compression speed, with fast modes at > 200 MB/s per code, and strong modes nearing lzma compression ratios\. It also features a very fast decoder, with speeds > 500 MB/s per core\.
+.
+.P
+\fBzstd\fR command line syntax is generally similar to gzip, but features the following differences :
+.
+.IP "\(bu" 4
+Source files are preserved by default\. It\'s possible to remove them automatically by using the \fB\-\-rm\fR command\.
+.
+.IP "\(bu" 4
+When compressing a single file, \fBzstd\fR displays progress notifications and result summary by default\. Use \fB\-q\fR to turn them off\.
+.
+.IP "\(bu" 4
+\fBzstd\fR does not accept input from console, but it properly accepts \fBstdin\fR when it\'s not the console\.
+.
+.IP "\(bu" 4
+\fBzstd\fR displays a short help page when command line is an error\. Use \fB\-q\fR to turn it off\.
+.
+.IP "" 0
+.
+.P
+\fBzstd\fR compresses or decompresses each \fIfile\fR according to the selected operation mode\. If no \fIfiles\fR are given or \fIfile\fR is \fB\-\fR, \fBzstd\fR reads from standard input and writes the processed data to standard output\. \fBzstd\fR will refuse to write compressed data to standard output if it is a terminal : it will display an error message and skip the \fIfile\fR\. Similarly, \fBzstd\fR will refuse to read compressed data from standard input if it is a terminal\.
+.
+.P
+Unless \fB\-\-stdout\fR or \fB\-o\fR is specified, \fIfiles\fR are written to a new file whose name is derived from the source \fIfile\fR name:
+.
+.IP "\(bu" 4
+When compressing, the suffix \fB\.zst\fR is appended to the source filename to get the target filename\.
+.
+.IP "\(bu" 4
+When decompressing, the \fB\.zst\fR suffix is removed from the source filename to get the target filename
+.
+.IP "" 0
+.
+.SS "Concatenation with \.zst files"
+It is possible to concatenate \fB\.zst\fR files as is\. \fBzstd\fR will decompress such files as if they were a single \fB\.zst\fR file\.
+.
+.SH "OPTIONS"
.
.SS "Integer suffixes and special values"
-In most places where an integer argument is expected,
-an optional suffix is supported to easily indicate large integers.
-There must be no space between the integer and the suffix.
-.TP
-.B KiB
-Multiply the integer by 1,024 (2^10).
-.BR Ki ,
-.BR K ,
-and
-.B KB
-are accepted as synonyms for
-.BR KiB .
-.TP
-.B MiB
-Multiply the integer by 1,048,576 (2^20).
-.BR Mi ,
-.BR M ,
-and
-.B MB
-are accepted as synonyms for
-.BR MiB .
-
+In most places where an integer argument is expected, an optional suffix is supported to easily indicate large integers\. There must be no space between the integer and the suffix\.
+.
+.TP
+\fBKiB\fR
+Multiply the integer by 1,024 (2^10)\. \fBKi\fR, \fBK\fR, and \fBKB\fR are accepted as synonyms for \fBKiB\fR\.
+.
+.TP
+\fBMiB\fR
+Multiply the integer by 1,048,576 (2^20)\. \fBMi\fR, \fBM\fR, and \fBMB\fR are accepted as synonyms for \fBMiB\fR\.
.
.SS "Operation mode"
-If multiple operation mode options are given,
-the last one takes effect.
-.TP
-.BR \-z ", " \-\-compress
-Compress.
-This is the default operation mode when no operation mode option
-is specified and no other operation mode is implied from
-the command name (for example,
-.B unzstd
-implies
-.BR \-\-decompress ).
-.TP
-.BR \-d ", " \-\-decompress ", " \-\-uncompress
-Decompress.
-.TP
-.BR \-t ", " \-\-test
-Test the integrity of compressed
-.IR files .
-This option is equivalent to
-.B "\-\-decompress \-\-stdout"
-except that the decompressed data is discarded instead of being
-written to standard output.
-No files are created or removed.
-.TP
-.B \-b#
- benchmark file(s) using compression level #
-.TP
-.B \--train FILEs
- use FILEs as training set to create a dictionary. The training set should contain a lot of small files (> 100).
-
+If multiple operation mode options are given, the last one takes effect\.
+.
+.TP
+\fB\-z\fR, \fB\-\-compress\fR
+Compress\. This is the default operation mode when no operation mode option is specified and no other operation mode is implied from the command name (for example, \fBunzstd\fR implies \fB\-\-decompress\fR)\.
+.
+.TP
+\fB\-d\fR, \fB\-\-decompress\fR, \fB\-\-uncompress\fR
+Decompress\.
+.
+.TP
+\fB\-t\fR, \fB\-\-test\fR
+Test the integrity of compressed \fIfiles\fR\. This option is equivalent to \fB\-\-decompress \-\-stdout\fR except that the decompressed data is discarded instead of being written to standard output\. No files are created or removed\.
+.
+.TP
+\fB\-b#\fR
+Benchmark file(s) using compression level #
+.
+.TP
+\fB\-\-train FILEs\fR
+Use FILEs as a training set to create a dictionary\. The training set should contain a lot of small files (> 100)\.
.
.SS "Operation modifiers"
+.
.TP
-.B \-#
- # compression level [1-19] (default:3)
-.TP
-.BR \--ultra
- unlocks high compression levels 20+ (maximum 22), using a lot more memory.
-Note that decompression will also require more memory when using these levels.
-.TP
-.B \-D file
- use `file` as Dictionary to compress or decompress FILE(s)
-.TP
-.BR \--no-dictID
- do not store dictionary ID within frame header (dictionary compression).
- The decoder will have to rely on implicit knowledge about which dictionary to use,
-it won't be able to check if it's correct.
-.TP
-.B \-o file
- save result into `file` (only possible with a single INPUT-FILE)
-.TP
-.BR \-f ", " --force
- overwrite output without prompting
-.TP
-.BR \-c ", " --stdout
- force write to standard output, even if it is the console
-.TP
-.BR \--[no-]sparse
- enable / disable sparse FS support, to make files with many zeroes smaller on disk.
- Creating sparse files may save disk space and speed up the decompression
-by reducing the amount of disk I/O.
- default : enabled when output is into a file, and disabled when output is stdout.
- This setting overrides default and can force sparse mode over stdout.
-.TP
-.BR \--rm
- remove source file(s) after successful compression or decompression
-.TP
-.BR \-k ", " --keep
- keep source file(s) after successful compression or decompression.
- This is the default behavior.
-.TP
-.BR \-r
- operate recursively on directories
-.TP
-.BR \-h/\-H ", " --help
- display help/long help and exit
-.TP
-.BR \-V ", " --version
- display Version number and exit
-.TP
-.BR \-v ", " --verbose
- verbose mode
-.TP
-.BR \-q ", " --quiet
- suppress warnings, interactivity and notifications.
- specify twice to suppress errors too.
-.TP
-.BR \-C ", " --[no-]check
- add integrity check computed from uncompressed data (default : enabled)
-.TP
-.BR \-t ", " --test
- Test the integrity of compressed files. This option is equivalent to \fB--decompress --stdout > /dev/null\fR.
- No files are created or removed.
-.TP
-.BR --
- All arguments after -- are treated as files
-
-
-.SH DICTIONARY BUILDER
-.PP
-\fBzstd\fR offers \fIdictionary\fR compression, useful for very small files and messages.
-It's possible to train \fBzstd\fR with some samples, the result of which is saved into a file called `dictionary`.
-Then during compression and decompression, make reference to the same dictionary.
-It will improve compression ratio of small files.
-Typical gains range from ~10% (at 64KB) to x5 better (at <1KB).
-.TP
-.B \--train FILEs
- use FILEs as training set to create a dictionary. The training set should contain a lot of small files (> 100),
-and weight typically 100x the target dictionary size (for example, 10 MB for a 100 KB dictionary)
-.TP
-.B \-o file
- dictionary saved into `file` (default: dictionary)
-.TP
-.B \--maxdict #
- limit dictionary to specified size (default : 112640)
-.TP
-.B \--dictID #
- A dictionary ID is a locally unique ID that a decoder can use to verify it is using the right dictionary.
- By default, zstd will create a 4-bytes random number ID.
- It's possible to give a precise number instead.
- Short numbers have an advantage : an ID < 256 will only need 1 byte in the compressed frame header,
- and an ID < 65536 will only need 2 bytes. This compares favorably to 4 bytes default.
- However, it's up to the dictionary manager to not assign twice the same ID to 2 different dictionaries.
-.TP
-.B \-s#
- dictionary selectivity level (default: 9)
- the smaller the value, the denser the dictionary, improving its efficiency but reducing its possible maximum size.
-.TP
-.B \--cover=k=#,d=#
- Use alternate dictionary builder algorithm named cover with parameters \fIk\fR and \fId\fR with \fId\fR <= \fIk\fR.
- Selects segments of size \fIk\fR with the highest score to put in the dictionary.
- The score of a segment is computed by the sum of the frequencies of all the subsegments of of size \fId\fR.
- Generally \fId\fR should be in the range [6, 24].
- Good values for \fIk\fR vary widely based on the input data, but a safe range is [32, 2048].
- Example: \fB--train --cover=k=64,d=8 FILEs\fR.
-.TP
-.B \--optimize-cover[=steps=#,k=#,d=#]
- If \fIsteps\fR is not specified, the default value of 32 is used.
- If \fIk\fR is not specified, \fIsteps\fR values in [16, 2048] are checked for each value of \fId\fR.
- If \fId\fR is not specified, the values checked are [6, 8, ..., 16].
-
- Runs the cover dictionary builder for each parameter set saves the optimal parameters and dictionary.
- Prints the optimal parameters and writes the optimal dictionary to the output file.
- Supports multithreading if \fBzstd\fR is compiled with threading support.
-
- The parameter \fIk\fR is more sensitve than \fId\fR, and is faster to optimize over.
- Suggested use is to run with a \fIsteps\fR <= 32 with neither \fIk\fR nor \fId\fR set.
- Once it completes, use the value of \fId\fR it selects with a higher \fIsteps\fR (in the range [256, 1024]).
- \fBzstd --train --optimize-cover FILEs
- \fBzstd --train --optimize-cover=d=d,steps=512 FILEs
-.TP
-
-.SH BENCHMARK
-.TP
-.B \-b#
- benchmark file(s) using compression level #
-.TP
-.B \-e#
- benchmark file(s) using multiple compression levels, from -b# to -e# (included).
-.TP
-.B \-i#
- minimum evaluation time, in seconds (default : 3s), benchmark mode only
-.TP
-.B \-B#
- cut file into independent blocks of size # (default: no block)
-.B \--priority=rt
- set process priority to real-time
-
-.SH ADVANCED COMPRESSION OPTIONS
-.TP
-.B \--zstd[=\fIoptions\fR]
-.PD
-\fBzstd\fR provides 22 predefined compression levels. The selected or default predefined compression level can be changed with advanced compression options.
-The \fIoptions\fR are provided as a comma-separated list. You may specify only the \fIoptions\fR you want to change and the rest will be taken from the selected or default compression level.
-The list of available \fIoptions\fR:
-.RS
-
-.TP
-.BI strategy= strat
-.PD 0
-.TP
-.BI strat= strat
-.PD
-Specify a strategy used by a match finder.
-.IP ""
-There are 8 strategies numbered from 0 to 7, from faster to stronger:
-0=ZSTD_fast, 1=ZSTD_dfast, 2=ZSTD_greedy, 3=ZSTD_lazy, 4=ZSTD_lazy2, 5=ZSTD_btlazy2, 6=ZSTD_btopt, 7=ZSTD_btopt2.
-.IP ""
-
-.TP
-.BI windowLog= wlog
-.PD 0
-.TP
-.BI wlog= wlog
-.PD
-Specify the maximum number of bits for a match distance.
-.IP ""
-The higher number of bits increases the chance to find a match what usually improves compression ratio.
-It also increases memory requirements for compressor and decompressor.
-.IP ""
-The minimum \fIwlog\fR is 10 (1 KiB) and the maximum is 25 (32 MiB) for 32-bit compilation and 27 (128 MiB) for 64-bit compilation.
-.IP ""
-
-.TP
-.BI hashLog= hlog
-.PD 0
-.TP
-.BI hlog= hlog
-.PD
-Specify the maximum number of bits for a hash table.
-.IP ""
-The bigger hash table causes less collisions what usually make compression faster but requires more memory during compression.
-.IP ""
-The minimum \fIhlog\fR is 6 (64 B) and the maximum is 25 (32 MiB) for 32-bit compilation and 27 (128 MiB) for 64-bit compilation.
-
-.TP
-.BI chainLog= clog
-.PD 0
-.TP
-.BI clog= clog
-.PD
-Specify the maximum number of bits for a hash chain or a binary tree.
-.IP ""
-The higher number of bits increases the chance to find a match what usually improves compression ratio.
-It also slows down compression speed and increases memory requirements for compression.
-This option is ignored for the ZSTD_fast strategy.
-.IP ""
-The minimum \fIclog\fR is 6 (64 B) and the maximum is 26 (64 MiB) for 32-bit compilation and 28 (256 MiB) for 64-bit compilation.
-.IP ""
-
-.TP
-.BI searchLog= slog
-.PD 0
-.TP
-.BI slog= slog
-.PD
-Specify the maximum number of searches in a hash chain or a binary tree using logarithmic scale.
-.IP ""
-The bigger number of searches increases the chance to find a match what usually improves compression ratio but decreases compression speed.
-.IP ""
-The minimum \fIslog\fR is 1 and the maximum is 24 for 32-bit compilation and 26 for 64-bit compilation.
-.IP ""
-
-.TP
-.BI searchLength= slen
-.PD 0
-.TP
-.BI slen= slen
-.PD
-Specify the minimum searched length of a match in a hash table.
-.IP ""
-The bigger search length usually decreases compression ratio but improves decompression speed.
-.IP ""
-The minimum \fIslen\fR is 3 and the maximum is 7.
-.IP ""
-
-.TP
-.BI targetLength= tlen
-.PD 0
-.TP
-.BI tlen= tlen
-.PD
-Specify the minimum match length that causes a match finder to interrupt searching of better matches.
-.IP ""
-The bigger minimum match length usually improves compression ratio but decreases compression speed.
-This option is used only with ZSTD_btopt and ZSTD_btopt2 strategies.
-.IP ""
-The minimum \fItlen\fR is 4 and the maximum is 999.
-.IP ""
-
-.PP
-.B An example
-.br
-The following parameters sets advanced compression options to predefined level 19 for files bigger than 256 KB:
-.IP ""
-\fB--zstd=\fRwindowLog=23,chainLog=23,hashLog=22,searchLog=6,searchLength=3,targetLength=48,strategy=6
-
-.SH BUGS
-Report bugs at:- https://github.com/facebook/zstd/issues
-
-.SH AUTHOR
+\fB\-#\fR
+\fB#\fR compression level [1\-19] (default: 3)
+.
+.TP
+\fB\-\-ultra\fR
+unlocks high compression levels 20+ (maximum 22), using a lot more memory\. Note that decompression will also require more memory when using these levels\.
+.
+.TP
+\fB\-T#\fR, \fB\-\-threads=#\fR
+Compress using \fB#\fR threads (default: 1)\. If \fB#\fR is 0, attempt to detect and use the number of physical CPU cores\. This modifier does nothing if \fBzstd\fR is compiled without multithread support\.
+.
+.TP
+\fB\-D file\fR
+use \fBfile\fR as Dictionary to compress or decompress FILE(s)
+.
+.TP
+\fB\-\-nodictID\fR
+do not store dictionary ID within frame header (dictionary compression)\. The decoder will have to rely on implicit knowledge about which dictionary to use, it won\'t be able to check if it\'s correct\.
+.
+.TP
+\fB\-o file\fR
+save result into \fBfile\fR (only possible with a single \fIINPUT\-FILE\fR)
+.
+.TP
+\fB\-f\fR, \fB\-\-force\fR
+overwrite output without prompting, and (de)compress symbolic links
+.
+.TP
+\fB\-c\fR, \fB\-\-stdout\fR
+force write to standard output, even if it is the console
+.
+.TP
+\fB\-\-[no\-]sparse\fR
+enable / disable sparse FS support, to make files with many zeroes smaller on disk\. Creating sparse files may save disk space and speed up decompression by reducing the amount of disk I/O\. default : enabled when output is into a file, and disabled when output is stdout\. This setting overrides default and can force sparse mode over stdout\.
+.
+.TP
+\fB\-\-rm\fR
+remove source file(s) after successful compression or decompression
+.
+.TP
+\fB\-k\fR, \fB\-\-keep\fR
+keep source file(s) after successful compression or decompression\. This is the default behavior\.
+.
+.TP
+\fB\-r\fR
+operate recursively on dictionaries
+.
+.TP
+\fB\-h\fR/\fB\-H\fR, \fB\-\-help\fR
+display help/long help and exit
+.
+.TP
+\fB\-V\fR, \fB\-\-version\fR
+display version number and exit
+.
+.TP
+\fB\-v\fR
+verbose mode
+.
+.TP
+\fB\-q\fR, \fB\-\-quiet\fR
+suppress warnings, interactivity, and notifications\. specify twice to suppress errors too\.
+.
+.TP
+\fB\-C\fR, \fB\-\-[no\-]check\fR
+add integrity check computed from uncompressed data (default : enabled)
+.
+.TP
+\fB\-\-\fR
+All arguments after \fB\-\-\fR are treated as files
+.
+.SH "DICTIONARY BUILDER"
+\fBzstd\fR offers \fIdictionary\fR compression, useful for very small files and messages\. It\'s possible to train \fBzstd\fR with some samples, the result of which is saved into a file called a \fBdictionary\fR\. Then during compression and decompression, reference the same dictionary\. It will improve compression ratio of small files\. Typical gains range from 10% (at 64KB) to x5 better (at <1KB)\.
+.
+.TP
+\fB\-\-train FILEs\fR
+Use FILEs as training set to create a dictionary\. The training set should contain a lot of small files (> 100), and weight typically 100x the target dictionary size (for example, 10 MB for a 100 KB dictionary)\.
+.
+.IP
+Supports multithreading if \fBzstd\fR is compiled with threading support\. Additional parameters can be specified with \fB\-\-train\-cover\fR\. The legacy dictionary builder can be accessed with \fB\-\-train\-legacy\fR\. Equivalent to \fB\-\-train\-cover=d=8,steps=4\fR\.
+.
+.TP
+\fB\-o file\fR
+Dictionary saved into \fBfile\fR (default name: dictionary)\.
+.
+.TP
+\fB\-\-maxdict=#\fR
+Limit dictionary to specified size (default: 112640)\.
+.
+.TP
+\fB\-\-dictID=#\fR
+A dictionary ID is a locally unique ID that a decoder can use to verify it is using the right dictionary\. By default, zstd will create a 4\-bytes random number ID\. It\'s possible to give a precise number instead\. Short numbers have an advantage : an ID < 256 will only need 1 byte in the compressed frame header, and an ID < 65536 will only need 2 bytes\. This compares favorably to 4 bytes default\. However, it\'s up to the dictionary manager to not assign twice the same ID to 2 different dictionaries\.
+.
+.TP
+\fB\-\-train\-cover[=k#,d=#,steps=#]\fR
+Select parameters for the default dictionary builder algorithm named cover\. If \fId\fR is not specified, then it tries \fId\fR = 6 and \fId\fR = 8\. If \fIk\fR is not specified, then it tries \fIsteps\fR values in the range [50, 2000]\. If \fIsteps\fR is not specified, then the default value of 40 is used\. Requires that \fId\fR <= \fIk\fR\.
+.
+.IP
+Selects segments of size \fIk\fR with highest score to put in the dictionary\. The score of a segment is computed by the sum of the frequencies of all the subsegments of size \fId\fR\. Generally \fId\fR should be in the range [6, 8], occasionally up to 16, but the algorithm will run faster with d <= \fI8\fR\. Good values for \fIk\fR vary widely based on the input data, but a safe range is [2 * \fId\fR, 2000]\. Supports multithreading if \fBzstd\fR is compiled with threading support\.
+.
+.IP
+Examples:
+.
+.IP
+\fBzstd \-\-train\-cover FILEs\fR
+.
+.IP
+\fBzstd \-\-train\-cover=k=50,d=8 FILEs\fR
+.
+.IP
+\fBzstd \-\-train\-cover=d=8,steps=500 FILEs\fR
+.
+.IP
+\fBzstd \-\-train\-cover=k=50 FILEs\fR
+.
+.TP
+\fB\-\-train\-legacy[=selectivity=#]\fR
+Use legacy dictionary builder algorithm with the given dictionary \fIselectivity\fR (default: 9)\. The smaller the \fIselectivity\fR value, the denser the dictionary, improving its efficiency but reducing its possible maximum size\. \fB\-\-train\-legacy=s=#\fR is also accepted\.
+.
+.IP
+Examples:
+.
+.IP
+\fBzstd \-\-train\-legacy FILEs\fR
+.
+.IP
+\fBzstd \-\-train\-legacy=selectivity=8 FILEs\fR
+.
+.SH "BENCHMARK"
+.
+.TP
+\fB\-b#\fR
+benchmark file(s) using compression level #
+.
+.TP
+\fB\-e#\fR
+benchmark file(s) using multiple compression levels, from \fB\-b#\fR to \fB\-e#\fR (inclusive)
+.
+.TP
+\fB\-i#\fR
+minimum evaluation time, in seconds (default : 3s), benchmark mode only
+.
+.TP
+\fB\-B#\fR
+cut file into independent blocks of size # (default: no block)
+.
+.TP
+\fB\-\-priority=rt\fR
+set process priority to real\-time
+.
+.SH "ADVANCED COMPRESSION OPTIONS"
+.
+.SS "\-\-zstd[=options]:"
+\fBzstd\fR provides 22 predefined compression levels\. The selected or default predefined compression level can be changed with advanced compression options\. The \fIoptions\fR are provided as a comma\-separated list\. You may specify only the options you want to change and the rest will be taken from the selected or default compression level\. The list of available \fIoptions\fR:
+.
+.TP
+\fBstrategy\fR=\fIstrat\fR, \fBstrat\fR=\fIstrat\fR
+Specify a strategy used by a match finder\.
+.
+.IP
+There are 8 strategies numbered from 0 to 7, from faster to stronger: 0=ZSTD_fast, 1=ZSTD_dfast, 2=ZSTD_greedy, 3=ZSTD_lazy, 4=ZSTD_lazy2, 5=ZSTD_btlazy2, 6=ZSTD_btopt, 7=ZSTD_btopt2\.
+.
+.TP
+\fBwindowLog\fR=\fIwlog\fR, \fBwlog\fR=\fIwlog\fR
+Specify the maximum number of bits for a match distance\.
+.
+.IP
+The higher number of increases the chance to find a match which usually improves compression ratio\. It also increases memory requirements for the compressor and decompressor\. The minimum \fIwlog\fR is 10 (1 KiB) and the maximum is 27 (128 MiB)\.
+.
+.TP
+\fBhashLog\fR=\fIhlog\fR, \fBhlog\fR=\fIhlog\fR
+Specify the maximum number of bits for a hash table\.
+.
+.IP
+Bigger hash tables cause less collisions which usually makes compression faster, but requires more memory during compression\.
+.
+.IP
+The minimum \fIhlog\fR is 6 (64 B) and the maximum is 26 (128 MiB)\.
+.
+.TP
+\fBchainLog\fR=\fIclog\fR, \fBclog\fR=\fIclog\fR
+Specify the maximum number of bits for a hash chain or a binary tree\.
+.
+.IP
+Higher numbers of bits increases the chance to find a match which usually improves compression ratio\. It also slows down compression speed and increases memory requirements for compression\. This option is ignored for the ZSTD_fast strategy\.
+.
+.IP
+The minimum \fIclog\fR is 6 (64 B) and the maximum is 28 (256 MiB)\.
+.
+.TP
+\fBsearchLog\fR=\fIslog\fR, \fBslog\fR=\fIslog\fR
+Specify the maximum number of searches in a hash chain or a binary tree using logarithmic scale\.
+.
+.IP
+More searches increases the chance to find a match which usually increases compression ratio but decreases compression speed\.
+.
+.IP
+The minimum \fIslog\fR is 1 and the maximum is 26\.
+.
+.TP
+\fBsearchLength\fR=\fIslen\fR, \fBslen\fR=\fIslen\fR
+Specify the minimum searched length of a match in a hash table\.
+.
+.IP
+Larger search lengths usually decrease compression ratio but improve decompression speed\.
+.
+.IP
+The minimum \fIslen\fR is 3 and the maximum is 7\.
+.
+.TP
+\fBtargetLen\fR=\fItlen\fR, \fBtlen\fR=\fItlen\fR
+Specify the minimum match length that causes a match finder to stop searching for better matches\.
+.
+.IP
+A larger minimum match length usually improves compression ratio but decreases compression speed\. This option is only used with strategies ZSTD_btopt and ZSTD_btopt2\.
+.
+.IP
+The minimum \fItlen\fR is 4 and the maximum is 999\.
+.
+.TP
+\fBoverlapLog\fR=\fIovlog\fR, \fBovlog\fR=\fIovlog\fR
+Determine \fBoverlapSize\fR, amount of data reloaded from previous job\. This parameter is only available when multithreading is enabled\. Reloading more data improves compression ratio, but decreases speed\.
+.
+.IP
+The minimum \fIovlog\fR is 0, and the maximum is 9\. 0 means "no overlap", hence completely independent jobs\. 9 means "full overlap", meaning up to \fBwindowSize\fR is reloaded from previous job\. Reducing \fIovlog\fR by 1 reduces the amount of reload by a factor 2\. Default \fIovlog\fR is 6, which means "reload \fBwindowSize / 8\fR"\. Exception : the maximum compression level (22) has a default \fIovlog\fR of 9\.
+.
+.SS "\-B#:"
+Select the size of each compression job\. This parameter is available only when multi\-threading is enabled\. Default value is \fB4 * windowSize\fR, which means it varies depending on compression level\. \fB\-B#\fR makes it possible to select a custom value\. Note that job size must respect a minimum value which is enforced transparently\. This minimum is either 1 MB, or \fBoverlapSize\fR, whichever is largest\.
+.
+.SS "Example"
+The following parameters sets advanced compression options to those of predefined level 19 for files bigger than 256 KB:
+.
+.P
+\fB\-\-zstd\fR=windowLog=23,chainLog=23,hashLog=22,searchLog=6,searchLength=3,targetLength=48,strategy=6
+.
+.SH "BUGS"
+Report bugs at: https://github\.com/facebook/zstd/issues
+.
+.SH "AUTHOR"
Yann Collet
diff --git a/programs/zstd.1.md b/programs/zstd.1.md
new file mode 100644
index 000000000000..118c9f2f8ee5
--- /dev/null
+++ b/programs/zstd.1.md
@@ -0,0 +1,343 @@
+zstd(1) -- zstd, zstdmt, unzstd, zstdcat - Compress or decompress .zst files
+============================================================================
+
+SYNOPSIS
+--------
+
+`zstd` [*OPTIONS*] [-|_INPUT-FILE_] [-o _OUTPUT-FILE_]
+
+`zstdmt` is equivalent to `zstd -T0`
+
+`unzstd` is equivalent to `zstd -d`
+
+`zstdcat` is equivalent to `zstd -dcf`
+
+
+DESCRIPTION
+-----------
+`zstd` is a fast lossless compression algorithm and data compression tool,
+with command line syntax similar to `gzip (1)` and `xz (1)`.
+It is based on the **LZ77** family, with further FSE & huff0 entropy stages.
+`zstd` offers highly configurable compression speed,
+with fast modes at > 200 MB/s per code,
+and strong modes nearing lzma compression ratios.
+It also features a very fast decoder, with speeds > 500 MB/s per core.
+
+`zstd` command line syntax is generally similar to gzip,
+but features the following differences :
+
+ - Source files are preserved by default.
+ It's possible to remove them automatically by using the `--rm` command.
+ - When compressing a single file, `zstd` displays progress notifications
+ and result summary by default.
+ Use `-q` to turn them off.
+ - `zstd` does not accept input from console,
+ but it properly accepts `stdin` when it's not the console.
+ - `zstd` displays a short help page when command line is an error.
+ Use `-q` to turn it off.
+
+`zstd` compresses or decompresses each _file_ according to the selected
+operation mode.
+If no _files_ are given or _file_ is `-`, `zstd` reads from standard input
+and writes the processed data to standard output.
+`zstd` will refuse to write compressed data to standard output
+if it is a terminal : it will display an error message and skip the _file_.
+Similarly, `zstd` will refuse to read compressed data from standard input
+if it is a terminal.
+
+Unless `--stdout` or `-o` is specified, _files_ are written to a new file
+whose name is derived from the source _file_ name:
+
+* When compressing, the suffix `.zst` is appended to the source filename to
+ get the target filename.
+* When decompressing, the `.zst` suffix is removed from the source filename to
+ get the target filename
+
+### Concatenation with .zst files
+It is possible to concatenate `.zst` files as is.
+`zstd` will decompress such files as if they were a single `.zst` file.
+
+OPTIONS
+-------
+
+### Integer suffixes and special values
+In most places where an integer argument is expected,
+an optional suffix is supported to easily indicate large integers.
+There must be no space between the integer and the suffix.
+
+* `KiB`:
+ Multiply the integer by 1,024 (2\^10).
+ `Ki`, `K`, and `KB` are accepted as synonyms for `KiB`.
+* `MiB`:
+ Multiply the integer by 1,048,576 (2\^20).
+ `Mi`, `M`, and `MB` are accepted as synonyms for `MiB`.
+
+### Operation mode
+If multiple operation mode options are given,
+the last one takes effect.
+
+* `-z`, `--compress`:
+ Compress.
+ This is the default operation mode when no operation mode option is specified
+ and no other operation mode is implied from the command name
+ (for example, `unzstd` implies `--decompress`).
+* `-d`, `--decompress`, `--uncompress`:
+ Decompress.
+* `-t`, `--test`:
+ Test the integrity of compressed _files_.
+ This option is equivalent to `--decompress --stdout` except that the
+ decompressed data is discarded instead of being written to standard output.
+ No files are created or removed.
+* `-b#`:
+ Benchmark file(s) using compression level #
+* `--train FILEs`:
+ Use FILEs as a training set to create a dictionary.
+ The training set should contain a lot of small files (> 100).
+
+### Operation modifiers
+
+* `-#`:
+ `#` compression level \[1-19] (default: 3)
+* `--ultra`:
+ unlocks high compression levels 20+ (maximum 22), using a lot more memory.
+ Note that decompression will also require more memory when using these levels.
+* `-T#`, `--threads=#`:
+ Compress using `#` threads (default: 1).
+ If `#` is 0, attempt to detect and use the number of physical CPU cores.
+ This modifier does nothing if `zstd` is compiled without multithread support.
+* `-D file`:
+ use `file` as Dictionary to compress or decompress FILE(s)
+* `--nodictID`:
+ do not store dictionary ID within frame header (dictionary compression).
+ The decoder will have to rely on implicit knowledge about which dictionary to use,
+ it won't be able to check if it's correct.
+* `-o file`:
+ save result into `file` (only possible with a single _INPUT-FILE_)
+* `-f`, `--force`:
+ overwrite output without prompting, and (de)compress symbolic links
+* `-c`, `--stdout`:
+ force write to standard output, even if it is the console
+* `--[no-]sparse`:
+ enable / disable sparse FS support,
+ to make files with many zeroes smaller on disk.
+ Creating sparse files may save disk space and speed up decompression by
+ reducing the amount of disk I/O.
+ default : enabled when output is into a file,
+ and disabled when output is stdout.
+ This setting overrides default and can force sparse mode over stdout.
+* `--rm`:
+ remove source file(s) after successful compression or decompression
+* `-k`, `--keep`:
+ keep source file(s) after successful compression or decompression.
+ This is the default behavior.
+* `-r`:
+ operate recursively on dictionaries
+* `-h`/`-H`, `--help`:
+ display help/long help and exit
+* `-V`, `--version`:
+ display version number and exit
+* `-v`:
+ verbose mode
+* `-q`, `--quiet`:
+ suppress warnings, interactivity, and notifications.
+ specify twice to suppress errors too.
+* `-C`, `--[no-]check`:
+ add integrity check computed from uncompressed data (default : enabled)
+* `--`:
+ All arguments after `--` are treated as files
+
+
+DICTIONARY BUILDER
+------------------
+`zstd` offers _dictionary_ compression,
+useful for very small files and messages.
+It's possible to train `zstd` with some samples,
+the result of which is saved into a file called a `dictionary`.
+Then during compression and decompression, reference the same dictionary.
+It will improve compression ratio of small files.
+Typical gains range from 10% (at 64KB) to x5 better (at <1KB).
+
+* `--train FILEs`:
+ Use FILEs as training set to create a dictionary.
+ The training set should contain a lot of small files (> 100),
+ and weight typically 100x the target dictionary size
+ (for example, 10 MB for a 100 KB dictionary).
+
+ Supports multithreading if `zstd` is compiled with threading support.
+ Additional parameters can be specified with `--train-cover`.
+ The legacy dictionary builder can be accessed with `--train-legacy`.
+ Equivalent to `--train-cover=d=8,steps=4`.
+* `-o file`:
+ Dictionary saved into `file` (default name: dictionary).
+* `--maxdict=#`:
+ Limit dictionary to specified size (default: 112640).
+* `--dictID=#`:
+ A dictionary ID is a locally unique ID that a decoder can use to verify it is
+ using the right dictionary.
+ By default, zstd will create a 4-bytes random number ID.
+ It's possible to give a precise number instead.
+ Short numbers have an advantage : an ID < 256 will only need 1 byte in the
+ compressed frame header, and an ID < 65536 will only need 2 bytes.
+ This compares favorably to 4 bytes default.
+ However, it's up to the dictionary manager to not assign twice the same ID to
+ 2 different dictionaries.
+* `--train-cover[=k#,d=#,steps=#]`:
+ Select parameters for the default dictionary builder algorithm named cover.
+ If _d_ is not specified, then it tries _d_ = 6 and _d_ = 8.
+ If _k_ is not specified, then it tries _steps_ values in the range [50, 2000].
+ If _steps_ is not specified, then the default value of 40 is used.
+ Requires that _d_ <= _k_.
+
+ Selects segments of size _k_ with highest score to put in the dictionary.
+ The score of a segment is computed by the sum of the frequencies of all the
+ subsegments of size _d_.
+ Generally _d_ should be in the range [6, 8], occasionally up to 16, but the
+ algorithm will run faster with d <= _8_.
+ Good values for _k_ vary widely based on the input data, but a safe range is
+ [2 * _d_, 2000].
+ Supports multithreading if `zstd` is compiled with threading support.
+
+ Examples:
+
+ `zstd --train-cover FILEs`
+
+ `zstd --train-cover=k=50,d=8 FILEs`
+
+ `zstd --train-cover=d=8,steps=500 FILEs`
+
+ `zstd --train-cover=k=50 FILEs`
+
+* `--train-legacy[=selectivity=#]`:
+ Use legacy dictionary builder algorithm with the given dictionary
+ _selectivity_ (default: 9).
+ The smaller the _selectivity_ value, the denser the dictionary,
+ improving its efficiency but reducing its possible maximum size.
+ `--train-legacy=s=#` is also accepted.
+
+ Examples:
+
+ `zstd --train-legacy FILEs`
+
+ `zstd --train-legacy=selectivity=8 FILEs`
+
+
+BENCHMARK
+---------
+
+* `-b#`:
+ benchmark file(s) using compression level #
+* `-e#`:
+ benchmark file(s) using multiple compression levels, from `-b#` to `-e#` (inclusive)
+* `-i#`:
+ minimum evaluation time, in seconds (default : 3s), benchmark mode only
+* `-B#`:
+ cut file into independent blocks of size # (default: no block)
+* `--priority=rt`:
+ set process priority to real-time
+
+
+ADVANCED COMPRESSION OPTIONS
+----------------------------
+### --zstd[=options]:
+`zstd` provides 22 predefined compression levels.
+The selected or default predefined compression level can be changed with
+advanced compression options.
+The _options_ are provided as a comma-separated list.
+You may specify only the options you want to change and the rest will be
+taken from the selected or default compression level.
+The list of available _options_:
+
+- `strategy`=_strat_, `strat`=_strat_:
+ Specify a strategy used by a match finder.
+
+ There are 8 strategies numbered from 0 to 7, from faster to stronger:
+ 0=ZSTD\_fast, 1=ZSTD\_dfast, 2=ZSTD\_greedy, 3=ZSTD\_lazy,
+ 4=ZSTD\_lazy2, 5=ZSTD\_btlazy2, 6=ZSTD\_btopt, 7=ZSTD\_btopt2.
+
+- `windowLog`=_wlog_, `wlog`=_wlog_:
+ Specify the maximum number of bits for a match distance.
+
+ The higher number of increases the chance to find a match which usually
+ improves compression ratio.
+ It also increases memory requirements for the compressor and decompressor.
+ The minimum _wlog_ is 10 (1 KiB) and the maximum is 27 (128 MiB).
+
+- `hashLog`=_hlog_, `hlog`=_hlog_:
+ Specify the maximum number of bits for a hash table.
+
+ Bigger hash tables cause less collisions which usually makes compression
+ faster, but requires more memory during compression.
+
+ The minimum _hlog_ is 6 (64 B) and the maximum is 26 (128 MiB).
+
+- `chainLog`=_clog_, `clog`=_clog_:
+ Specify the maximum number of bits for a hash chain or a binary tree.
+
+ Higher numbers of bits increases the chance to find a match which usually
+ improves compression ratio.
+ It also slows down compression speed and increases memory requirements for
+ compression.
+ This option is ignored for the ZSTD_fast strategy.
+
+ The minimum _clog_ is 6 (64 B) and the maximum is 28 (256 MiB).
+
+- `searchLog`=_slog_, `slog`=_slog_:
+ Specify the maximum number of searches in a hash chain or a binary tree
+ using logarithmic scale.
+
+ More searches increases the chance to find a match which usually increases
+ compression ratio but decreases compression speed.
+
+ The minimum _slog_ is 1 and the maximum is 26.
+
+- `searchLength`=_slen_, `slen`=_slen_:
+ Specify the minimum searched length of a match in a hash table.
+
+ Larger search lengths usually decrease compression ratio but improve
+ decompression speed.
+
+ The minimum _slen_ is 3 and the maximum is 7.
+
+- `targetLen`=_tlen_, `tlen`=_tlen_:
+ Specify the minimum match length that causes a match finder to stop
+ searching for better matches.
+
+ A larger minimum match length usually improves compression ratio but
+ decreases compression speed.
+ This option is only used with strategies ZSTD_btopt and ZSTD_btopt2.
+
+ The minimum _tlen_ is 4 and the maximum is 999.
+
+- `overlapLog`=_ovlog_, `ovlog`=_ovlog_:
+ Determine `overlapSize`, amount of data reloaded from previous job.
+ This parameter is only available when multithreading is enabled.
+ Reloading more data improves compression ratio, but decreases speed.
+
+ The minimum _ovlog_ is 0, and the maximum is 9.
+ 0 means "no overlap", hence completely independent jobs.
+ 9 means "full overlap", meaning up to `windowSize` is reloaded from previous job.
+ Reducing _ovlog_ by 1 reduces the amount of reload by a factor 2.
+ Default _ovlog_ is 6, which means "reload `windowSize / 8`".
+ Exception : the maximum compression level (22) has a default _ovlog_ of 9.
+
+### -B#:
+Select the size of each compression job.
+This parameter is available only when multi-threading is enabled.
+Default value is `4 * windowSize`, which means it varies depending on compression level.
+`-B#` makes it possible to select a custom value.
+Note that job size must respect a minimum value which is enforced transparently.
+This minimum is either 1 MB, or `overlapSize`, whichever is largest.
+
+### Example
+The following parameters sets advanced compression options to those of
+predefined level 19 for files bigger than 256 KB:
+
+`--zstd`=windowLog=23,chainLog=23,hashLog=22,searchLog=6,searchLength=3,targetLength=48,strategy=6
+
+BUGS
+----
+Report bugs at: https://github.com/facebook/zstd/issues
+
+AUTHOR
+------
+Yann Collet
diff --git a/programs/zstdcli.c b/programs/zstdcli.c
index ae49da7b1e72..32fef99930e9 100644
--- a/programs/zstdcli.c
+++ b/programs/zstdcli.c
@@ -49,6 +49,7 @@
#define AUTHOR "Yann Collet"
#define WELCOME_MESSAGE "*** %s %i-bits %s, by %s ***\n", COMPRESSOR_NAME, (int)(sizeof(size_t)*8), ZSTD_VERSION, AUTHOR
+#define ZSTD_ZSTDMT "zstdmt"
#define ZSTD_UNZSTD "unzstd"
#define ZSTD_CAT "zstdcat"
#define ZSTD_GZ "gzip"
@@ -74,10 +75,10 @@ static U32 g_overlapLog = OVERLAP_LOG_DEFAULT;
/*-************************************
* Display Macros
**************************************/
-#define DISPLAY(...) fprintf(displayOut, __VA_ARGS__)
-#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
-static FILE* displayOut;
-static unsigned displayLevel = DEFAULT_DISPLAY_LEVEL; /* 0 : no display, 1: errors, 2 : + result + interaction + warnings, 3 : + progression, 4 : + information */
+#define DISPLAY(...) fprintf(g_displayOut, __VA_ARGS__)
+#define DISPLAYLEVEL(l, ...) { if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } }
+static int g_displayLevel = DEFAULT_DISPLAY_LEVEL; /* 0 : no display, 1: errors, 2 : + result + interaction + warnings, 3 : + progression, 4 : + information */
+static FILE* g_displayOut;
/*-************************************
@@ -99,7 +100,7 @@ static int usage(const char* programName)
#endif
DISPLAY( " -D file: use `file` as Dictionary \n");
DISPLAY( " -o file: result stored into `file` (only if 1 input file) \n");
- DISPLAY( " -f : overwrite output without prompting \n");
+ DISPLAY( " -f : overwrite output without prompting and (de)compress links \n");
DISPLAY( "--rm : remove source file(s) after successful de/compression \n");
DISPLAY( " -k : preserve source file(s) (default) \n");
DISPLAY( " -h/-H : display help/long help and exit\n");
@@ -113,19 +114,20 @@ static int usage_advanced(const char* programName)
DISPLAY( "\n");
DISPLAY( "Advanced arguments :\n");
DISPLAY( " -V : display Version number and exit\n");
- DISPLAY( " -v : verbose mode; specify multiple times to increase log level (default:%d)\n", DEFAULT_DISPLAY_LEVEL);
+ DISPLAY( " -v : verbose mode; specify multiple times to increase verbosity\n");
DISPLAY( " -q : suppress warnings; specify twice to suppress errors too\n");
DISPLAY( " -c : force write to standard output, even if it is the console\n");
-#ifdef UTIL_HAS_CREATEFILELIST
- DISPLAY( " -r : operate recursively on directories \n");
-#endif
#ifndef ZSTD_NOCOMPRESS
DISPLAY( "--ultra : enable levels beyond %i, up to %i (requires more memory)\n", ZSTDCLI_CLEVEL_MAX, ZSTD_maxCLevel());
- DISPLAY( "--no-dictID : don't write dictID into header (dictionary compression)\n");
- DISPLAY( "--[no-]check : integrity check (default:enabled) \n");
#ifdef ZSTD_MULTITHREAD
DISPLAY( " -T# : use # threads for compression (default:1) \n");
- DISPLAY( " -B# : select size of independent sections (default:0==automatic) \n");
+ DISPLAY( " -B# : select size of each job (default:0==automatic) \n");
+#endif
+ DISPLAY( "--no-dictID : don't write dictID into header (dictionary compression)\n");
+ DISPLAY( "--[no-]check : integrity check (default:enabled) \n");
+#endif
+#ifdef UTIL_HAS_CREATEFILELIST
+ DISPLAY( " -r : operate recursively on directories \n");
#endif
#ifdef ZSTD_GZCOMPRESS
DISPLAY( "--format=gzip : compress files to the .gz format \n");
@@ -134,10 +136,16 @@ static int usage_advanced(const char* programName)
DISPLAY( "--format=xz : compress files to the .xz format \n");
DISPLAY( "--format=lzma : compress files to the .lzma format \n");
#endif
+#ifdef ZSTD_LZ4COMPRESS
+ DISPLAY( "--format=lz4 : compress files to the .lz4 format \n");
#endif
#ifndef ZSTD_NODECOMPRESS
DISPLAY( "--test : test compressed file integrity \n");
+#if ZSTD_SPARSE_DEFAULT
DISPLAY( "--[no-]sparse : sparse mode (default:enabled on file, disabled on stdout)\n");
+#else
+ DISPLAY( "--[no-]sparse : sparse mode (default:disabled)\n");
+#endif
#endif
DISPLAY( " -M# : Set a memory usage limit for decompression \n");
DISPLAY( "-- : All arguments after \"--\" are treated as files \n");
@@ -145,12 +153,11 @@ static int usage_advanced(const char* programName)
DISPLAY( "\n");
DISPLAY( "Dictionary builder :\n");
DISPLAY( "--train ## : create a dictionary from a training set of files \n");
- DISPLAY( "--cover=k=#,d=# : use the cover algorithm with parameters k and d \n");
- DISPLAY( "--optimize-cover[=steps=#,k=#,d=#] : optimize cover parameters with optional parameters\n");
+ DISPLAY( "--train-cover[=k=#,d=#,steps=#] : use the cover algorithm with optional args\n");
+ DISPLAY( "--train-legacy[=s=#] : use the legacy algorithm with selectivity (default: %u)\n", g_defaultSelectivityLevel);
DISPLAY( " -o file : `file` is dictionary name (default: %s) \n", g_defaultDictName);
- DISPLAY( "--maxdict ## : limit dictionary to specified size (default : %u) \n", g_defaultMaxDictSize);
- DISPLAY( " -s# : dictionary selectivity level (default: %u)\n", g_defaultSelectivityLevel);
- DISPLAY( "--dictID ## : force dictionary ID to specified value (default: random)\n");
+ DISPLAY( "--maxdict=# : limit dictionary to specified size (default : %u) \n", g_defaultMaxDictSize);
+ DISPLAY( "--dictID=# : force dictionary ID to specified value (default: random)\n");
#endif
#ifndef ZSTD_NOBENCH
DISPLAY( "\n");
@@ -167,7 +174,7 @@ static int usage_advanced(const char* programName)
static int badusage(const char* programName)
{
DISPLAYLEVEL(1, "Incorrect parameters\n");
- if (displayLevel >= 1) usage(programName);
+ if (g_displayLevel >= 2) usage(programName);
return 1;
}
@@ -179,6 +186,23 @@ static void waitEnter(void)
(void)unused;
}
+static const char* lastNameFromPath(const char* path)
+{
+ const char* name = path;
+ if (strrchr(name, '/')) name = strrchr(name, '/') + 1;
+ if (strrchr(name, '\\')) name = strrchr(name, '\\') + 1; /* windows */
+ return name;
+}
+
+/*! exeNameMatch() :
+ @return : a non-zero value if exeName matches test, excluding the extension
+ */
+static int exeNameMatch(const char* exeName, const char* test)
+{
+ return !strncmp(exeName, test, strlen(test)) &&
+ (exeName[strlen(test)] == '\0' || exeName[strlen(test)] == '.');
+}
+
/*! readU32FromChar() :
@return : unsigned integer value read from input in `char` format
allows and interprets K, KB, KiB, M, MB and MiB suffix.
@@ -216,11 +240,11 @@ static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
#ifndef ZSTD_NODICT
/**
* parseCoverParameters() :
- * reads cover parameters from *stringPtr (e.g. "--cover=smoothing=100,kmin=48,kstep=4,kmax=64,d=8") into *params
+ * reads cover parameters from *stringPtr (e.g. "--train-cover=k=48,d=8,steps=32") into *params
* @return 1 means that cover parameters were correct
* @return 0 in case of malformed parameters
*/
-static unsigned parseCoverParameters(const char* stringPtr, COVER_params_t *params)
+static unsigned parseCoverParameters(const char* stringPtr, COVER_params_t* params)
{
memset(params, 0, sizeof(*params));
for (; ;) {
@@ -230,9 +254,33 @@ static unsigned parseCoverParameters(const char* stringPtr, COVER_params_t *para
return 0;
}
if (stringPtr[0] != 0) return 0;
- DISPLAYLEVEL(4, "k=%u\nd=%u\nsteps=%u\n", params->k, params->d, params->steps);
+ DISPLAYLEVEL(4, "cover: k=%u\nd=%u\nsteps=%u\n", params->k, params->d, params->steps);
+ return 1;
+}
+
+/**
+ * parseLegacyParameters() :
+ * reads legacy dictioanry builter parameters from *stringPtr (e.g. "--train-legacy=selectivity=8") into *selectivity
+ * @return 1 means that legacy dictionary builder parameters were correct
+ * @return 0 in case of malformed parameters
+ */
+static unsigned parseLegacyParameters(const char* stringPtr, unsigned* selectivity)
+{
+ if (!longCommandWArg(&stringPtr, "s=") && !longCommandWArg(&stringPtr, "selectivity=")) { return 0; }
+ *selectivity = readU32FromChar(&stringPtr);
+ if (stringPtr[0] != 0) return 0;
+ DISPLAYLEVEL(4, "legacy: selectivity=%u\n", *selectivity);
return 1;
}
+
+static COVER_params_t defaultCoverParams(void)
+{
+ COVER_params_t params;
+ memset(¶ms, 0, sizeof(params));
+ params.d = 8;
+ params.steps = 4;
+ return params;
+}
#endif
@@ -270,6 +318,7 @@ int main(int argCount, const char* argv[])
{
int argNb,
forceStdout=0,
+ followLinks=0,
main_pause=0,
nextEntryIsDictionary=0,
operationResult=0,
@@ -305,8 +354,8 @@ int main(int argCount, const char* argv[])
unsigned fileNamesNb;
#endif
#ifndef ZSTD_NODICT
- COVER_params_t coverParams;
- int cover = 0;
+ COVER_params_t coverParams = defaultCoverParams();
+ int cover = 1;
#endif
/* init */
@@ -316,21 +365,19 @@ int main(int argCount, const char* argv[])
(void)memLimit; /* not used when ZSTD_NODECOMPRESS set */
if (filenameTable==NULL) { DISPLAY("zstd: %s \n", strerror(errno)); exit(1); }
filenameTable[0] = stdinmark;
- displayOut = stderr;
- /* Pick out program name from path. Don't rely on stdlib because of conflicting behavior */
- { size_t pos;
- for (pos = (int)strlen(programName); pos > 0; pos--) { if (programName[pos] == '/') { pos++; break; } }
- programName += pos;
- }
+ g_displayOut = stderr;
+
+ programName = lastNameFromPath(programName);
/* preset behaviors */
- if (!strcmp(programName, ZSTD_UNZSTD)) operation=zom_decompress;
- if (!strcmp(programName, ZSTD_CAT)) { operation=zom_decompress; forceStdout=1; FIO_overwriteMode(); outFileName=stdoutmark; displayLevel=1; }
- if (!strcmp(programName, ZSTD_GZ)) { suffix = GZ_EXTENSION; FIO_setCompressionType(FIO_gzipCompression); FIO_setRemoveSrcFile(1); } /* behave like gzip */
- if (!strcmp(programName, ZSTD_GUNZIP)) { operation=zom_decompress; FIO_setRemoveSrcFile(1); } /* behave like gunzip */
- if (!strcmp(programName, ZSTD_GZCAT)) { operation=zom_decompress; forceStdout=1; FIO_overwriteMode(); outFileName=stdoutmark; displayLevel=1; } /* behave like gzcat */
- if (!strcmp(programName, ZSTD_LZMA)) { suffix = LZMA_EXTENSION; FIO_setCompressionType(FIO_lzmaCompression); FIO_setRemoveSrcFile(1); } /* behave like lzma */
- if (!strcmp(programName, ZSTD_XZ)) { suffix = XZ_EXTENSION; FIO_setCompressionType(FIO_xzCompression); FIO_setRemoveSrcFile(1); } /* behave like xz */
+ if (exeNameMatch(programName, ZSTD_ZSTDMT)) nbThreads=0;
+ if (exeNameMatch(programName, ZSTD_UNZSTD)) operation=zom_decompress;
+ if (exeNameMatch(programName, ZSTD_CAT)) { operation=zom_decompress; forceStdout=1; FIO_overwriteMode(); outFileName=stdoutmark; g_displayLevel=1; }
+ if (exeNameMatch(programName, ZSTD_GZ)) { suffix = GZ_EXTENSION; FIO_setCompressionType(FIO_gzipCompression); FIO_setRemoveSrcFile(1); } /* behave like gzip */
+ if (exeNameMatch(programName, ZSTD_GUNZIP)) { operation=zom_decompress; FIO_setRemoveSrcFile(1); } /* behave like gunzip */
+ if (exeNameMatch(programName, ZSTD_GZCAT)) { operation=zom_decompress; forceStdout=1; FIO_overwriteMode(); outFileName=stdoutmark; g_displayLevel=1; } /* behave like gzcat */
+ if (exeNameMatch(programName, ZSTD_LZMA)) { suffix = LZMA_EXTENSION; FIO_setCompressionType(FIO_lzmaCompression); FIO_setRemoveSrcFile(1); } /* behave like lzma */
+ if (exeNameMatch(programName, ZSTD_XZ)) { suffix = XZ_EXTENSION; FIO_setCompressionType(FIO_xzCompression); FIO_setRemoveSrcFile(1); } /* behave like xz */
memset(&compressionParams, 0, sizeof(compressionParams));
/* command switches */
@@ -344,7 +391,7 @@ int main(int argCount, const char* argv[])
if (!filenameIdx) {
filenameIdx=1, filenameTable[0]=stdinmark;
outFileName=stdoutmark;
- displayLevel-=(displayLevel==2);
+ g_displayLevel-=(g_displayLevel==2);
continue;
} }
@@ -357,12 +404,12 @@ int main(int argCount, const char* argv[])
if (!strcmp(argument, "--compress")) { operation=zom_compress; continue; }
if (!strcmp(argument, "--decompress")) { operation=zom_decompress; continue; }
if (!strcmp(argument, "--uncompress")) { operation=zom_decompress; continue; }
- if (!strcmp(argument, "--force")) { FIO_overwriteMode(); continue; }
- if (!strcmp(argument, "--version")) { displayOut=stdout; DISPLAY(WELCOME_MESSAGE); CLEAN_RETURN(0); }
- if (!strcmp(argument, "--help")) { displayOut=stdout; CLEAN_RETURN(usage_advanced(programName)); }
- if (!strcmp(argument, "--verbose")) { displayLevel++; continue; }
- if (!strcmp(argument, "--quiet")) { displayLevel--; continue; }
- if (!strcmp(argument, "--stdout")) { forceStdout=1; outFileName=stdoutmark; displayLevel-=(displayLevel==2); continue; }
+ if (!strcmp(argument, "--force")) { FIO_overwriteMode(); forceStdout=1; followLinks=1; continue; }
+ if (!strcmp(argument, "--version")) { g_displayOut=stdout; DISPLAY(WELCOME_MESSAGE); CLEAN_RETURN(0); }
+ if (!strcmp(argument, "--help")) { g_displayOut=stdout; CLEAN_RETURN(usage_advanced(programName)); }
+ if (!strcmp(argument, "--verbose")) { g_displayLevel++; continue; }
+ if (!strcmp(argument, "--quiet")) { g_displayLevel--; continue; }
+ if (!strcmp(argument, "--stdout")) { forceStdout=1; outFileName=stdoutmark; g_displayLevel-=(g_displayLevel==2); continue; }
if (!strcmp(argument, "--ultra")) { ultra=1; continue; }
if (!strcmp(argument, "--check")) { FIO_setChecksumFlag(2); continue; }
if (!strcmp(argument, "--no-check")) { FIO_setChecksumFlag(0); continue; }
@@ -370,8 +417,8 @@ int main(int argCount, const char* argv[])
if (!strcmp(argument, "--no-sparse")) { FIO_setSparseWrite(0); continue; }
if (!strcmp(argument, "--test")) { operation=zom_test; continue; }
if (!strcmp(argument, "--train")) { operation=zom_train; outFileName=g_defaultDictName; continue; }
- if (!strcmp(argument, "--maxdict")) { nextArgumentIsMaxDict=1; lastCommand=1; continue; }
- if (!strcmp(argument, "--dictID")) { nextArgumentIsDictID=1; lastCommand=1; continue; }
+ if (!strcmp(argument, "--maxdict")) { nextArgumentIsMaxDict=1; lastCommand=1; continue; } /* kept available for compatibility with old syntax ; will be removed one day */
+ if (!strcmp(argument, "--dictID")) { nextArgumentIsDictID=1; lastCommand=1; continue; } /* kept available for compatibility with old syntax ; will be removed one day */
if (!strcmp(argument, "--no-dictID")) { FIO_setDictIDFlag(0); continue; }
if (!strcmp(argument, "--keep")) { FIO_setRemoveSrcFile(0); continue; }
if (!strcmp(argument, "--rm")) { FIO_setRemoveSrcFile(1); continue; }
@@ -383,26 +430,40 @@ int main(int argCount, const char* argv[])
if (!strcmp(argument, "--format=lzma")) { suffix = LZMA_EXTENSION; FIO_setCompressionType(FIO_lzmaCompression); continue; }
if (!strcmp(argument, "--format=xz")) { suffix = XZ_EXTENSION; FIO_setCompressionType(FIO_xzCompression); continue; }
#endif
+#ifdef ZSTD_LZ4COMPRESS
+ if (!strcmp(argument, "--format=lz4")) { suffix = LZ4_EXTENSION; FIO_setCompressionType(FIO_lz4Compression); continue; }
+#endif
/* long commands with arguments */
#ifndef ZSTD_NODICT
- if (longCommandWArg(&argument, "--cover=")) {
- cover=1; if (!parseCoverParameters(argument, &coverParams)) CLEAN_RETURN(badusage(programName));
- continue;
- }
- if (longCommandWArg(&argument, "--optimize-cover")) {
- cover=2;
+ if (longCommandWArg(&argument, "--train-cover")) {
+ operation = zom_train;
+ outFileName = g_defaultDictName;
+ cover = 1;
/* Allow optional arguments following an = */
if (*argument == 0) { memset(&coverParams, 0, sizeof(coverParams)); }
else if (*argument++ != '=') { CLEAN_RETURN(badusage(programName)); }
else if (!parseCoverParameters(argument, &coverParams)) { CLEAN_RETURN(badusage(programName)); }
continue;
}
+ if (longCommandWArg(&argument, "--train-legacy")) {
+ operation = zom_train;
+ outFileName = g_defaultDictName;
+ cover = 0;
+ /* Allow optional arguments following an = */
+ if (*argument == 0) { continue; }
+ else if (*argument++ != '=') { CLEAN_RETURN(badusage(programName)); }
+ else if (!parseLegacyParameters(argument, &dictSelect)) { CLEAN_RETURN(badusage(programName)); }
+ continue;
+ }
#endif
+ if (longCommandWArg(&argument, "--threads=")) { nbThreads = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--memlimit=")) { memLimit = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--memory=")) { memLimit = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--memlimit-decompress=")) { memLimit = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--block-size=")) { blockSize = readU32FromChar(&argument); continue; }
+ if (longCommandWArg(&argument, "--maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; }
+ if (longCommandWArg(&argument, "--dictID=")) { dictID = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) CLEAN_RETURN(badusage(programName)); continue; }
/* fall-through, will trigger bad_usage() later on */
}
@@ -424,9 +485,9 @@ int main(int argCount, const char* argv[])
switch(argument[0])
{
/* Display help */
- case 'V': displayOut=stdout; DISPLAY(WELCOME_MESSAGE); CLEAN_RETURN(0); /* Version Only */
+ case 'V': g_displayOut=stdout; DISPLAY(WELCOME_MESSAGE); CLEAN_RETURN(0); /* Version Only */
case 'H':
- case 'h': displayOut=stdout; CLEAN_RETURN(usage_advanced(programName));
+ case 'h': g_displayOut=stdout; CLEAN_RETURN(usage_advanced(programName));
/* Compress */
case 'z': operation=zom_compress; argument++; break;
@@ -445,19 +506,19 @@ int main(int argCount, const char* argv[])
case 'D': nextEntryIsDictionary = 1; lastCommand = 1; argument++; break;
/* Overwrite */
- case 'f': FIO_overwriteMode(); forceStdout=1; argument++; break;
+ case 'f': FIO_overwriteMode(); forceStdout=1; followLinks=1; argument++; break;
/* Verbose mode */
- case 'v': displayLevel++; argument++; break;
+ case 'v': g_displayLevel++; argument++; break;
/* Quiet mode */
- case 'q': displayLevel--; argument++; break;
+ case 'q': g_displayLevel--; argument++; break;
- /* keep source file (default); for gzip/xz compatibility */
+ /* keep source file (default) */
case 'k': FIO_setRemoveSrcFile(0); argument++; break;
/* Checksum */
- case 'C': argument++; FIO_setChecksumFlag(2); break;
+ case 'C': FIO_setChecksumFlag(2); argument++; break;
/* test compressed file */
case 't': operation=zom_test; argument++; break;
@@ -532,14 +593,14 @@ int main(int argCount, const char* argv[])
continue;
} /* if (argument[0]=='-') */
- if (nextArgumentIsMaxDict) {
+ if (nextArgumentIsMaxDict) { /* kept available for compatibility with old syntax ; will be removed one day */
nextArgumentIsMaxDict = 0;
lastCommand = 0;
maxDictSize = readU32FromChar(&argument);
continue;
}
- if (nextArgumentIsDictID) {
+ if (nextArgumentIsDictID) { /* kept available for compatibility with old syntax ; will be removed one day */
nextArgumentIsDictID = 0;
lastCommand = 0;
dictID = readU32FromChar(&argument);
@@ -581,9 +642,27 @@ int main(int argCount, const char* argv[])
DISPLAYLEVEL(4, "PLATFORM_POSIX_VERSION defined: %ldL\n", (long) PLATFORM_POSIX_VERSION);
#endif
+ if (nbThreads == 0) {
+ /* try to guess */
+ nbThreads = UTIL_countPhysicalCores();
+ DISPLAYLEVEL(3, "Note: %d physical core(s) detected\n", nbThreads);
+ }
+
+ g_utilDisplayLevel = g_displayLevel;
+ if (!followLinks) {
+ unsigned u;
+ for (u=0, fileNamesNb=0; u1) & (displayLevel==2)) displayLevel=1;
+ if (!strcmp(filenameTable[0], stdinmark) && outFileName && !strcmp(outFileName,stdoutmark) && (g_displayLevel==2)) g_displayLevel=1;
+ if ((filenameIdx>1) & (g_displayLevel==2)) g_displayLevel=1;
/* IO Stream/File */
- FIO_setNotificationLevel(displayLevel);
+ FIO_setNotificationLevel(g_displayLevel);
if (operation==zom_compress) {
#ifndef ZSTD_NOCOMPRESS
FIO_setNbThreads(nbThreads);
diff --git a/tests/Makefile b/tests/Makefile
index 9382fe80ddc2..ea58c0fe5119 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -26,9 +26,12 @@ PYTHON ?= python3
TESTARTEFACT := versionsTest namespaceTest
-CPPFLAGS+= -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress -I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(PRGDIR)
+DEBUGFLAGS=-g -DZSTD_DEBUG=1
+CPPFLAGS+= -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \
+ -I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(PRGDIR) \
+ $(DEBUGFLAG)
CFLAGS ?= -O3
-CFLAGS += -g -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
+CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
-Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
-Wstrict-prototypes -Wundef -Wformat-security
CFLAGS += $(MOREFLAGS)
@@ -65,14 +68,16 @@ FUZZERTEST ?= -T5mn
ZSTDRTTEST = --test-large-data
DECODECORPUS_TESTTIME ?= -T30
-.PHONY: default all all32 dll clean test test32 test-all namespaceTest versionsTest
+.PHONY: default all all32 allnothread dll clean test test32 test-all namespaceTest versionsTest
default: fullbench
-all: fullbench fuzzer zstreamtest paramgrill datagen zbufftest
+all: fullbench fuzzer zstreamtest paramgrill datagen zbufftest decodecorpus
all32: fullbench32 fuzzer32 zstreamtest32 zbufftest32
+allnothread: fullbench fuzzer paramgrill datagen zbufftest decodecorpus
+
dll: fuzzer-dll zstreamtest-dll zbufftest-dll
zstd:
@@ -152,6 +157,7 @@ zstreamtest-dll : $(ZSTDDIR)/common/xxhash.c $(PRGDIR)/datagen.c zstreamtest.c
$(MAKE) -C $(ZSTDDIR) libzstd
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@$(EXT)
+paramgrill : DEBUGFLAG =
paramgrill : $(ZSTD_FILES) $(PRGDIR)/datagen.c paramgrill.c
$(CC) $(FLAGS) $^ -lm -o $@$(EXT)
@@ -300,10 +306,10 @@ test-fullbench32: fullbench32 datagen
$(QEMU_SYS) ./fullbench32 -i1 -P0
test-fuzzer: fuzzer
- $(QEMU_SYS) ./fuzzer $(FUZZERTEST)
+ $(QEMU_SYS) ./fuzzer $(FUZZERTEST) $(FUZZER_FLAGS)
test-fuzzer32: fuzzer32
- $(QEMU_SYS) ./fuzzer32 $(FUZZERTEST)
+ $(QEMU_SYS) ./fuzzer32 $(FUZZERTEST) $(FUZZER_FLAGS)
test-zbuff: zbufftest
$(QEMU_SYS) ./zbufftest $(ZSTREAM_TESTTIME)
@@ -312,10 +318,10 @@ test-zbuff32: zbufftest32
$(QEMU_SYS) ./zbufftest32 $(ZSTREAM_TESTTIME)
test-zstream: zstreamtest
- $(QEMU_SYS) ./zstreamtest $(ZSTREAM_TESTTIME)
+ $(QEMU_SYS) ./zstreamtest $(ZSTREAM_TESTTIME) $(FUZZER_FLAGS)
test-zstream32: zstreamtest32
- $(QEMU_SYS) ./zstreamtest32 $(ZSTREAM_TESTTIME)
+ $(QEMU_SYS) ./zstreamtest32 $(ZSTREAM_TESTTIME) $(FUZZER_FLAGS)
test-longmatch: longmatch
$(QEMU_SYS) ./longmatch
diff --git a/tests/decodecorpus.c b/tests/decodecorpus.c
index 183d20f749e6..f7b3c854fdb2 100644
--- a/tests/decodecorpus.c
+++ b/tests/decodecorpus.c
@@ -98,7 +98,7 @@ static void RAND_bufferMaxSymb(U32* seed, void* ptr, size_t size, int maxSymb)
BYTE* op = ptr;
for (i = 0; i < size; i++) {
- op[i] = RAND(seed) % (maxSymb + 1);
+ op[i] = (BYTE) (RAND(seed) % (maxSymb + 1));
}
}
@@ -134,8 +134,8 @@ static void RAND_genDist(U32* seed, BYTE* dist, double weight)
{
size_t i = 0;
size_t statesLeft = DISTSIZE;
- BYTE symb = RAND(seed) % 256;
- BYTE step = (RAND(seed) % 256) | 1; /* force it to be odd so it's relatively prime to 256 */
+ BYTE symb = (BYTE) (RAND(seed) % 256);
+ BYTE step = (BYTE) ((RAND(seed) % 256) | 1); /* force it to be odd so it's relatively prime to 256 */
while (i < DISTSIZE) {
size_t states = ((size_t)(weight * statesLeft)) + 1;
@@ -259,7 +259,7 @@ static void writeFrameHeader(U32* seed, frame_t* frame)
/* Follow window algorithm from specification */
int const exponent = RAND(seed) % (MAX_WINDOW_LOG - 10);
int const mantissa = RAND(seed) % 8;
- windowByte = (exponent << 3) | mantissa;
+ windowByte = (BYTE) ((exponent << 3) | mantissa);
fh.windowSize = (1U << (exponent + 10));
fh.windowSize += fh.windowSize / 8 * mantissa;
}
@@ -284,7 +284,7 @@ static void writeFrameHeader(U32* seed, frame_t* frame)
if (contentSizeFlag && (fh.contentSize == 0 || !(RAND(seed) & 7))) {
/* do single segment sometimes */
- fh.windowSize = fh.contentSize;
+ fh.windowSize = (U32) fh.contentSize;
singleSegment = 1;
}
}
@@ -307,7 +307,7 @@ static void writeFrameHeader(U32* seed, frame_t* frame)
{
BYTE const frameHeaderDescriptor =
- (fcsCode << 6) | (singleSegment << 5) | (1 << 2);
+ (BYTE) ((fcsCode << 6) | (singleSegment << 5) | (1 << 2));
op[pos++] = frameHeaderDescriptor;
}
@@ -318,14 +318,14 @@ static void writeFrameHeader(U32* seed, frame_t* frame)
if (contentSizeFlag) {
switch (fcsCode) {
default: /* Impossible */
- case 0: op[pos++] = fh.contentSize; break;
- case 1: MEM_writeLE16(op + pos, fh.contentSize - 256); pos += 2; break;
- case 2: MEM_writeLE32(op + pos, fh.contentSize); pos += 4; break;
- case 3: MEM_writeLE64(op + pos, fh.contentSize); pos += 8; break;
+ case 0: op[pos++] = (BYTE) fh.contentSize; break;
+ case 1: MEM_writeLE16(op + pos, (U16) (fh.contentSize - 256)); pos += 2; break;
+ case 2: MEM_writeLE32(op + pos, (U32) fh.contentSize); pos += 4; break;
+ case 3: MEM_writeLE64(op + pos, (U64) fh.contentSize); pos += 8; break;
}
}
- DISPLAYLEVEL(2, " frame content size:\t%zu\n", fh.contentSize);
+ DISPLAYLEVEL(2, " frame content size:\t%u\n", (U32)fh.contentSize);
DISPLAYLEVEL(2, " frame window size:\t%u\n", fh.windowSize);
DISPLAYLEVEL(2, " content size flag:\t%d\n", contentSizeFlag);
DISPLAYLEVEL(2, " single segment flag:\t%d\n", singleSegment);
@@ -385,7 +385,7 @@ static size_t writeLiteralsBlockSimple(U32* seed, frame_t* frame, size_t content
op += litSize;
} else {
/* RLE literals */
- BYTE const symb = RAND(seed) % 256;
+ BYTE const symb = (BYTE) (RAND(seed) % 256);
DISPLAYLEVEL(4, " rle literals: 0x%02x\n", (U32)symb);
@@ -542,8 +542,8 @@ static size_t writeLiteralsBlockCompressed(U32* seed, frame_t* frame, size_t con
op += compressedSize;
compressedSize += hufHeaderSize;
- DISPLAYLEVEL(5, " regenerated size: %zu\n", litSize);
- DISPLAYLEVEL(5, " compressed size: %zu\n", compressedSize);
+ DISPLAYLEVEL(5, " regenerated size: %u\n", (U32)litSize);
+ DISPLAYLEVEL(5, " compressed size: %u\n", (U32)compressedSize);
if (compressedSize >= litSize) {
DISPLAYLEVEL(5, " trying again\n");
/* if we have to try again, reset the stats so we don't accidentally
@@ -611,7 +611,7 @@ static U32 generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore,
size_t const remainingMatch = contentSize - literalsSize;
size_t excessMatch = 0;
U32 numSequences = 0;
-
+
U32 i;
@@ -628,7 +628,7 @@ static U32 generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore,
excessMatch = remainingMatch - numSequences * MIN_SEQ_LEN;
}
- DISPLAYLEVEL(5, " total match lengths: %zu\n", remainingMatch);
+ DISPLAYLEVEL(5, " total match lengths: %u\n", (U32)remainingMatch);
for (i = 0; i < numSequences; i++) {
/* Generate match and literal lengths by exponential distribution to
@@ -647,10 +647,10 @@ static U32 generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore,
U32 offset, offsetCode, repIndex;
/* bounds checks */
- matchLen = MIN(matchLen, excessMatch + MIN_SEQ_LEN);
- literalLen = MIN(literalLen, literalsSize);
+ matchLen = (U32) MIN(matchLen, excessMatch + MIN_SEQ_LEN);
+ literalLen = MIN(literalLen, (U32) literalsSize);
if (i == 0 && srcPtr == frame->srcStart && literalLen == 0) literalLen = 1;
- if (i + 1 == numSequences) matchLen = MIN_SEQ_LEN + excessMatch;
+ if (i + 1 == numSequences) matchLen = MIN_SEQ_LEN + (U32) excessMatch;
memcpy(srcPtr, literals, literalLen);
srcPtr += literalLen;
@@ -694,8 +694,8 @@ static U32 generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore,
}
DISPLAYLEVEL(6, " LL: %5u OF: %5u ML: %5u", literalLen, offset, matchLen);
- DISPLAYLEVEL(7, " srcPos: %8tu seqNb: %3u",
- (BYTE*)srcPtr - (BYTE*)frame->srcStart, i);
+ DISPLAYLEVEL(7, " srcPos: %8u seqNb: %3u",
+ (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart), i);
DISPLAYLEVEL(6, "\n");
if (offsetCode < 3) {
DISPLAYLEVEL(7, " repeat offset: %d\n", repIndex);
@@ -711,8 +711,8 @@ static U32 generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore,
memcpy(srcPtr, literals, literalsSize);
srcPtr += literalsSize;
- DISPLAYLEVEL(6, " excess literals: %5zu", literalsSize);
- DISPLAYLEVEL(7, " srcPos: %8tu", (BYTE*)srcPtr - (BYTE*)frame->srcStart);
+ DISPLAYLEVEL(6, " excess literals: %5u", (U32)literalsSize);
+ DISPLAYLEVEL(7, " srcPos: %8u", (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart));
DISPLAYLEVEL(6, "\n");
return numSequences;
@@ -957,11 +957,11 @@ static size_t writeCompressedBlock(U32* seed, frame_t* frame, size_t contentSize
literalsSize = writeLiteralsBlock(seed, frame, contentSize);
- DISPLAYLEVEL(4, " literals size: %zu\n", literalsSize);
+ DISPLAYLEVEL(4, " literals size: %u\n", (U32)literalsSize);
nbSeq = writeSequencesBlock(seed, frame, contentSize, literalsSize);
- DISPLAYLEVEL(4, " number of sequences: %zu\n", nbSeq);
+ DISPLAYLEVEL(4, " number of sequences: %u\n", (U32)nbSeq);
return (BYTE*)frame->data - blockStart;
}
@@ -977,7 +977,7 @@ static void writeBlock(U32* seed, frame_t* frame, size_t contentSize,
BYTE *op = header + 3;
DISPLAYLEVEL(3, " block:\n");
- DISPLAYLEVEL(3, " block content size: %zu\n", contentSize);
+ DISPLAYLEVEL(3, " block content size: %u\n", (U32)contentSize);
DISPLAYLEVEL(3, " last block: %s\n", lastBlock ? "yes" : "no");
if (blockTypeDesc == 0) {
@@ -1025,10 +1025,10 @@ static void writeBlock(U32* seed, frame_t* frame, size_t contentSize,
frame->src = (BYTE*)frame->src + contentSize;
DISPLAYLEVEL(3, " block type: %s\n", BLOCK_TYPES[blockType]);
- DISPLAYLEVEL(3, " block size field: %zu\n", blockSize);
+ DISPLAYLEVEL(3, " block size field: %u\n", (U32)blockSize);
- header[0] = (lastBlock | (blockType << 1) | (blockSize << 3)) & 0xff;
- MEM_writeLE16(header + 1, blockSize >> 5);
+ header[0] = (BYTE) ((lastBlock | (blockType << 1) | (blockSize << 3)) & 0xff);
+ MEM_writeLE16(header + 1, (U16) (blockSize >> 5));
frame->data = op;
}
@@ -1300,7 +1300,7 @@ static int generateCorpus(U32 seed, unsigned numFiles, const char* const path,
*********************************************************/
static U32 makeSeed(void)
{
- U32 t = time(NULL);
+ U32 t = (U32) time(NULL);
return XXH32(&t, sizeof(t), 0) % 65536;
}
diff --git a/tests/fullbench.c b/tests/fullbench.c
index 940d315a7cd4..38cf22fa7ebc 100644
--- a/tests/fullbench.c
+++ b/tests/fullbench.c
@@ -251,14 +251,14 @@ static size_t benchMem(const void* src, size_t srcSize, U32 benchNb)
case 13:
benchFunction = local_ZSTD_decompressContinue; benchName = "ZSTD_decompressContinue";
break;
- case 31:
+ case 31:
benchFunction = local_ZSTD_decodeLiteralsBlock; benchName = "ZSTD_decodeLiteralsBlock";
break;
case 32:
benchFunction = local_ZSTD_decodeSeqHeaders; benchName = "ZSTD_decodeSeqHeaders";
break;
#endif
- case 41:
+ case 41:
benchFunction = local_ZSTD_compressStream; benchName = "ZSTD_compressStream";
break;
case 42:
diff --git a/tests/fuzzer.c b/tests/fuzzer.c
index def7542b5f0b..a9dcf12e0702 100644
--- a/tests/fuzzer.c
+++ b/tests/fuzzer.c
@@ -28,6 +28,7 @@
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressContinue, ZSTD_compressBlock */
#include "zstd.h" /* ZSTD_VERSION_STRING */
#include "zstd_errors.h" /* ZSTD_getErrorCode */
+#include "zstdmt_compress.h"
#define ZDICT_STATIC_LINKING_ONLY
#include "zdict.h" /* ZDICT_trainFromBuffer */
#include "datagen.h" /* RDG_genBuffer */
@@ -57,7 +58,7 @@ static U32 g_displayLevel = 2;
#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
if ((FUZ_clockSpan(g_displayClock) > g_refreshRate) || (g_displayLevel>=4)) \
{ g_displayClock = clock(); DISPLAY(__VA_ARGS__); \
- if (g_displayLevel>=4) fflush(stdout); } }
+ if (g_displayLevel>=4) fflush(stderr); } }
static const clock_t g_refreshRate = CLOCKS_PER_SEC / 6;
static clock_t g_displayClock = 0;
@@ -133,13 +134,21 @@ static int basicUnitTests(U32 seed, double compressibility)
DISPLAYLEVEL(4, "OK : %s \n", errorString);
}
+
DISPLAYLEVEL(4, "test%3i : compress %u bytes : ", testNb++, (U32)CNBuffSize);
CHECKPLUS(r, ZSTD_compress(compressedBuffer, ZSTD_compressBound(CNBuffSize),
CNBuffer, CNBuffSize, 1),
cSize=r );
DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/CNBuffSize*100);
- DISPLAYLEVEL(4, "test%3i : decompressed size test : ", testNb++);
+
+ DISPLAYLEVEL(4, "test%3i : ZSTD_getFrameContentSize test : ", testNb++);
+ { unsigned long long const rSize = ZSTD_getFrameContentSize(compressedBuffer, cSize);
+ if (rSize != CNBuffSize) goto _output_error;
+ }
+ DISPLAYLEVEL(4, "OK \n");
+
+ DISPLAYLEVEL(4, "test%3i : ZSTD_findDecompressedSize test : ", testNb++);
{ unsigned long long const rSize = ZSTD_findDecompressedSize(compressedBuffer, cSize);
if (rSize != CNBuffSize) goto _output_error;
}
@@ -157,6 +166,7 @@ static int basicUnitTests(U32 seed, double compressibility)
} }
DISPLAYLEVEL(4, "OK \n");
+
DISPLAYLEVEL(4, "test%3i : decompress with null dict : ", testNb++);
{ size_t const r = ZSTD_decompress_usingDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, NULL, 0);
if (r != CNBuffSize) goto _output_error; }
@@ -179,6 +189,49 @@ static int basicUnitTests(U32 seed, double compressibility)
if (ZSTD_getErrorCode(r) != ZSTD_error_srcSize_wrong) goto _output_error; }
DISPLAYLEVEL(4, "OK \n");
+
+ /* ZSTDMT simple MT compression test */
+ DISPLAYLEVEL(4, "test%3i : create ZSTDMT CCtx : ", testNb++);
+ { ZSTDMT_CCtx* mtctx = ZSTDMT_createCCtx(2);
+ if (mtctx==NULL) {
+ DISPLAY("mtctx : mot enough memory, aborting \n");
+ testResult = 1;
+ goto _end;
+ }
+ DISPLAYLEVEL(4, "OK \n");
+
+ DISPLAYLEVEL(4, "test%3i : compress %u bytes with 2 threads : ", testNb++, (U32)CNBuffSize);
+ CHECKPLUS(r, ZSTDMT_compressCCtx(mtctx,
+ compressedBuffer, ZSTD_compressBound(CNBuffSize),
+ CNBuffer, CNBuffSize,
+ 1),
+ cSize=r );
+ DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/CNBuffSize*100);
+
+ DISPLAYLEVEL(4, "test%3i : decompressed size test : ", testNb++);
+ { unsigned long long const rSize = ZSTD_getFrameContentSize(compressedBuffer, cSize);
+ if (rSize != CNBuffSize) {
+ DISPLAY("ZSTD_getFrameContentSize incorrect : %u != %u \n", (U32)rSize, (U32)CNBuffSize);
+ goto _output_error;
+ } }
+ DISPLAYLEVEL(4, "OK \n");
+
+ DISPLAYLEVEL(4, "test%3i : decompress %u bytes : ", testNb++, (U32)CNBuffSize);
+ { size_t const r = ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, cSize);
+ if (r != CNBuffSize) goto _output_error; }
+ DISPLAYLEVEL(4, "OK \n");
+
+ DISPLAYLEVEL(4, "test%3i : check decompressed result : ", testNb++);
+ { size_t u;
+ for (u=0; u "); DISPLAY(__VA_ARGS__); \
DISPLAY(" (seed %u, test nb %u) \n", seed, testNb); goto _output_error; }
-static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 const maxDurationS, double compressibility)
+static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 const maxDurationS, double compressibility, int bigTests)
{
static const U32 maxSrcLog = 23;
static const U32 maxSampleLog = 22;
@@ -636,6 +766,7 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 const maxD
U32 coreSeed = seed, lseed = 0;
clock_t const startClock = clock();
clock_t const maxClockSpan = maxDurationS * CLOCKS_PER_SEC;
+ int const cLevelLimiter = bigTests ? 3 : 2;
/* allocation */
cNoiseBuffer[0] = (BYTE*)malloc (srcBufferSize);
@@ -662,7 +793,6 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 const maxD
for ( ; (testNb <= nbTests) || (FUZ_clockSpan(startClock) < maxClockSpan); testNb++ ) {
size_t sampleSize, maxTestSize, totalTestSize;
size_t cSize, totalCSize, totalGenSize;
- XXH64_state_t xxhState;
U64 crcOrig;
BYTE* sampleBuffer;
const BYTE* dict;
@@ -701,7 +831,10 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 const maxD
crcOrig = XXH64(sampleBuffer, sampleSize, 0);
/* compression tests */
- { unsigned const cLevel = (FUZ_rand(&lseed) % (ZSTD_maxCLevel() - (FUZ_highbit32((U32)sampleSize)/3))) + 1;
+ { unsigned const cLevel =
+ ( FUZ_rand(&lseed) %
+ (ZSTD_maxCLevel() - (FUZ_highbit32((U32)sampleSize) / cLevelLimiter)) )
+ + 1;
cSize = ZSTD_compressCCtx(ctx, cBuffer, cBufferSize, sampleBuffer, sampleSize, cLevel);
CHECK(ZSTD_isError(cSize), "ZSTD_compressCCtx failed : %s", ZSTD_getErrorName(cSize));
@@ -801,7 +934,10 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 const maxD
{ U32 const testLog = FUZ_rand(&lseed) % maxSrcLog;
U32 const dictLog = FUZ_rand(&lseed) % maxSrcLog;
- int const cLevel = (FUZ_rand(&lseed) % (ZSTD_maxCLevel() - (MAX(testLog, dictLog)/3))) + 1;
+ int const cLevel = (FUZ_rand(&lseed) %
+ (ZSTD_maxCLevel() -
+ (MAX(testLog, dictLog) / cLevelLimiter))) +
+ 1;
maxTestSize = FUZ_rLogLength(&lseed, testLog);
if (maxTestSize >= dstBufferSize) maxTestSize = dstBufferSize-1;
@@ -813,22 +949,22 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 const maxD
CHECK (ZSTD_isError(errorCode), "ZSTD_compressBegin_usingDict error : %s", ZSTD_getErrorName(errorCode));
} else {
ZSTD_compressionParameters const cPar = ZSTD_getCParams(cLevel, 0, dictSize);
- ZSTD_frameParameters const fpar = { FUZ_rand(&lseed)&1 /* contentSizeFlag */,
+ ZSTD_frameParameters const fPar = { FUZ_rand(&lseed)&1 /* contentSizeFlag */,
!(FUZ_rand(&lseed)&3) /* contentChecksumFlag*/,
0 /*NodictID*/ }; /* note : since dictionary is fake, dictIDflag has no impact */
- ZSTD_parameters p;
- size_t errorCode;
- p.cParams = cPar; p.fParams = fpar;
- errorCode = ZSTD_compressBegin_advanced(refCtx, dict, dictSize, p, 0);
+ ZSTD_parameters const p = FUZ_makeParams(cPar, fPar);
+ size_t const errorCode = ZSTD_compressBegin_advanced(refCtx, dict, dictSize, p, 0);
CHECK (ZSTD_isError(errorCode), "ZSTD_compressBegin_advanced error : %s", ZSTD_getErrorName(errorCode));
}
{ size_t const errorCode = ZSTD_copyCCtx(ctx, refCtx, 0);
CHECK (ZSTD_isError(errorCode), "ZSTD_copyCCtx error : %s", ZSTD_getErrorName(errorCode));
} }
- XXH64_reset(&xxhState, 0);
ZSTD_setCCtxParameter(ctx, ZSTD_p_forceWindow, FUZ_rand(&lseed) & 1);
+
{ U32 const nbChunks = (FUZ_rand(&lseed) & 127) + 2;
U32 n;
+ XXH64_state_t xxhState;
+ XXH64_reset(&xxhState, 0);
for (totalTestSize=0, cSize=0, n=0 ; n MAX_UINT */
+static unsigned readU32FromChar(const char** stringPtr)
+{
+ unsigned result = 0;
+ while ((**stringPtr >='0') && (**stringPtr <='9'))
+ result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
+ if ((**stringPtr=='K') || (**stringPtr=='M')) {
+ result <<= 10;
+ if (**stringPtr=='M') result <<= 10;
+ (*stringPtr)++ ;
+ if (**stringPtr=='i') (*stringPtr)++;
+ if (**stringPtr=='B') (*stringPtr)++;
+ }
+ return result;
+}
int main(int argc, const char** argv)
{
- U32 seed=0;
- int seedset=0;
+ U32 seed = 0;
+ int seedset = 0;
int argNb;
int nbTests = nbTestsDefault;
int testNb = 0;
U32 proba = FUZ_compressibility_default;
- int result=0;
+ int result = 0;
U32 mainPause = 0;
U32 maxDuration = 0;
- const char* programName = argv[0];
+ int bigTests = 1;
+ const char* const programName = argv[0];
/* Check command line */
for (argNb=1; argNb='0') && (*argument<='9')) {
- nbTests *= 10;
- nbTests += *argument - '0';
- argument++;
- }
+ argument++; maxDuration = 0;
+ nbTests = readU32FromChar(&argument);
break;
case 'T':
argument++;
- nbTests=0; maxDuration=0;
- while ((*argument>='0') && (*argument<='9')) {
- maxDuration *= 10;
- maxDuration += *argument - '0';
- argument++;
- }
- if (*argument=='m') maxDuration *=60, argument++;
+ nbTests = 0;
+ maxDuration = readU32FromChar(&argument);
+ if (*argument=='s') argument++; /* seconds */
+ if (*argument=='m') maxDuration *= 60, argument++; /* minutes */
if (*argument=='n') argument++;
break;
case 's':
argument++;
- seed=0;
- seedset=1;
- while ((*argument>='0') && (*argument<='9')) {
- seed *= 10;
- seed += *argument - '0';
- argument++;
- }
+ seedset = 1;
+ seed = readU32FromChar(&argument);
break;
case 't':
argument++;
- testNb=0;
- while ((*argument>='0') && (*argument<='9')) {
- testNb *= 10;
- testNb += *argument - '0';
- argument++;
- }
+ testNb = readU32FromChar(&argument);
break;
case 'P': /* compressibility % */
argument++;
- proba=0;
- while ((*argument>='0') && (*argument<='9')) {
- proba *= 10;
- proba += *argument - '0';
- argument++;
- }
- if (proba>100) proba=100;
+ proba = readU32FromChar(&argument);
+ if (proba>100) proba = 100;
break;
default:
- return FUZ_usage(programName);
+ return (FUZ_usage(programName), 1);
} } } } /* for (argNb=1; argNb (b) ? (a) : (b) )
+
/*-************************************
* Benchmark Parameters
@@ -106,7 +111,11 @@ static size_t BMK_findMaxMem(U64 requiredMem)
}
-# define FUZ_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+static U32 FUZ_rotl32(U32 x, U32 r)
+{
+ return ((x << r) | (x >> (32 - r)));
+}
+
U32 FUZ_rand(U32* src)
{
const U32 prime1 = 2654435761U;
@@ -125,7 +134,7 @@ U32 FUZ_rand(U32* src)
*********************************************************/
typedef struct {
size_t cSize;
- double cSpeed;
+ double cSpeed; /* bytes / sec */
double dSpeed;
} BMK_result_t;
@@ -141,8 +150,6 @@ typedef struct
} blockParam_t;
-#define MIN(a,b) ( (a) < (b) ? (a) : (b) )
-
static size_t BMK_benchParam(BMK_result_t* resultPtr,
const void* srcBuffer, size_t srcSize,
ZSTD_CCtx* ctx,
@@ -165,6 +172,11 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr,
char name[30] = { 0 };
U64 crcOrig;
+ /* init result for early exit */
+ resultPtr->cSize = srcSize;
+ resultPtr->cSpeed = 0.;
+ resultPtr->dSpeed = 0.;
+
/* Memory allocation & restrictions */
snprintf(name, 30, "Sw%02uc%02uh%02us%02ul%1ut%03uS%1u", Wlog, Clog, Hlog, Slog, Slength, Tlength, strat);
if (!compressedBuffer || !resultBuffer || !blockTable) {
@@ -206,7 +218,6 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr,
size_t cSize = 0;
double fastestC = 100000000., fastestD = 100000000.;
double ratio = 0.;
- U64 crcCheck = 0;
clock_t const benchStart = clock();
DISPLAY("\r%79s\r", "");
@@ -242,8 +253,8 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr,
cSize = 0;
for (blockNb=0; blockNb", loopNb, name, (U32)srcSize);
DISPLAY(" %9u (%4.3f),%7.1f MB/s", (U32)cSize, ratio, (double)srcSize / fastestC / 1000000.);
@@ -273,18 +284,18 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr,
resultPtr->dSpeed = (double)srcSize / fastestD;
/* CRC Checking */
- crcCheck = XXH64(resultBuffer, srcSize, 0);
- if (crcOrig!=crcCheck) {
- unsigned u;
- unsigned eBlockSize = (unsigned)(MIN(65536*2, blockSize));
- DISPLAY("\n!!! WARNING !!! Invalid Checksum : %x != %x\n", (unsigned)crcOrig, (unsigned)crcCheck);
- for (u=0; u> 3) & PARAMTABLEMASK]
-#define MAX(a,b) ( (a) > (b) ? (a) : (b) )
-
static void playAround(FILE* f, winnerInfo_t* winners,
ZSTD_compressionParameters params,
const void* srcBuffer, size_t srcSize,
@@ -711,6 +720,14 @@ int benchFiles(const char** fileNamesTable, int nbFiles)
}
+static void BMK_translateAdvancedParams(ZSTD_compressionParameters params)
+{
+ DISPLAY("--zstd=windowLog=%u,chainLog=%u,hashLog=%u,searchLog=%u,searchLength=%u,targetLength=%u,strategy=%u \n",
+ params.windowLog, params.chainLog, params.hashLog, params.searchLog, params.searchLength, params.targetLength, (U32)(params.strategy));
+}
+
+/* optimizeForSize():
+ * targetSpeed : expressed in MB/s */
int optimizeForSize(const char* inFileName, U32 targetSpeed)
{
FILE* const inFile = fopen( inFileName, "rb" );
@@ -723,8 +740,11 @@ int optimizeForSize(const char* inFileName, U32 targetSpeed)
/* Memory allocation & restrictions */
if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize;
- if (benchedSize < inFileSize)
- DISPLAY("Not enough memory for '%s' full size; testing %i MB only...\n", inFileName, (int)(benchedSize>>20));
+ if (benchedSize < inFileSize) {
+ DISPLAY("Not enough memory for '%s' \n", inFileName);
+ fclose(inFile);
+ return 11;
+ }
/* Alloc */
origBuff = malloc(benchedSize);
@@ -747,10 +767,9 @@ int optimizeForSize(const char* inFileName, U32 targetSpeed)
/* bench */
DISPLAY("\r%79s\r", "");
DISPLAY("optimizing for %s - limit speed %u MB/s \n", inFileName, targetSpeed);
- targetSpeed *= 1000;
+ targetSpeed *= 1000000;
{ ZSTD_CCtx* const ctx = ZSTD_createCCtx();
- ZSTD_compressionParameters params;
winnerInfo_t winner;
BMK_result_t candidate;
const size_t blockSize = g_blockSize ? g_blockSize : benchedSize;
@@ -764,26 +783,28 @@ int optimizeForSize(const char* inFileName, U32 targetSpeed)
{ const int maxSeeds = g_noSeed ? 1 : ZSTD_maxCLevel();
int i;
for (i=1; i<=maxSeeds; i++) {
- params = ZSTD_getCParams(i, blockSize, 0);
- BMK_benchParam(&candidate, origBuff, benchedSize, ctx, params);
+ ZSTD_compressionParameters const CParams = ZSTD_getCParams(i, blockSize, 0);
+ BMK_benchParam(&candidate, origBuff, benchedSize, ctx, CParams);
if (candidate.cSpeed < targetSpeed)
break;
if ( (candidate.cSize < winner.result.cSize)
| ((candidate.cSize == winner.result.cSize) & (candidate.cSpeed > winner.result.cSpeed)) )
{
- winner.params = params;
+ winner.params = CParams;
winner.result = candidate;
BMK_printWinner(stdout, i, winner.result, winner.params, benchedSize);
} }
}
BMK_printWinner(stdout, 99, winner.result, winner.params, benchedSize);
+ BMK_translateAdvancedParams(winner.params);
/* start tests */
{ time_t const grillStart = time(NULL);
do {
- params = winner.params;
+ ZSTD_compressionParameters params = winner.params;
paramVariation(¶ms);
- if ((FUZ_rand(&g_rand) & 15) == 3) params = randomParams();
+ if ((FUZ_rand(&g_rand) & 31) == 3) params = randomParams(); /* totally random config to improve search space */
+ params = ZSTD_adjustCParams(params, blockSize, 0);
/* exclude faster if already played set of params */
if (FUZ_rand(&g_rand) & ((1 << NB_TESTS_PLAYED(params))-1)) continue;
@@ -800,6 +821,7 @@ int optimizeForSize(const char* inFileName, U32 targetSpeed)
winner.params = params;
winner.result = candidate;
BMK_printWinner(stdout, 99, winner.result, winner.params, benchedSize);
+ BMK_translateAdvancedParams(winner.params);
}
} while (BMK_timeSpan(grillStart) < g_grillDuration_s);
}
@@ -833,7 +855,7 @@ static int usage_advanced(void)
DISPLAY( " -T# : set level 1 speed objective \n");
DISPLAY( " -B# : cut input into blocks of size # (default : single block) \n");
DISPLAY( " -i# : iteration loops [1-9](default : %i) \n", NBLOOPS);
- DISPLAY( " -O# : find Optimized parameters for # target speed (default : 0) \n");
+ DISPLAY( " -O# : find Optimized parameters for # MB/s compression speed (default : 0) \n");
DISPLAY( " -S : Single run \n");
DISPLAY( " -P# : generated sample compressibility (default : %.1f%%) \n", COMPRESSIBILITY_DEFAULT * 100);
return 0;
diff --git a/tests/playTests.sh b/tests/playTests.sh
index c493fed55358..021fd59fe2af 100755
--- a/tests/playTests.sh
+++ b/tests/playTests.sh
@@ -11,6 +11,7 @@ roundTripTest() {
local_p="$2"
else
local_c="$2"
+ local_p=""
fi
rm -f tmp1 tmp2
@@ -20,13 +21,36 @@ roundTripTest() {
$DIFF -q tmp1 tmp2
}
+fileRoundTripTest() {
+ if [ -n "$3" ]; then
+ local_c="$3"
+ local_p="$2"
+ else
+ local_c="$2"
+ local_p=""
+ fi
+
+ rm -f tmp.zstd tmp.md5.1 tmp.md5.2
+ $ECHO "fileRoundTripTest: ./datagen $1 $local_p > tmp && $ZSTD -v$local_c -c tmp | $ZSTD -d"
+ ./datagen $1 $local_p > tmp
+ cat tmp | $MD5SUM > tmp.md5.1
+ $ZSTD --ultra -v$local_c -c tmp | $ZSTD -d | $MD5SUM > tmp.md5.2
+ $DIFF -q tmp.md5.1 tmp.md5.2
+}
+
+isTerminal=false
+if [ -t 0 ] && [ -t 1 ]
+then
+ isTerminal=true
+fi
+
isWindows=false
-ECHO="echo"
+ECHO="echo -e"
INTOVOID="/dev/null"
case "$OS" in
Windows*)
isWindows=true
- ECHO="echo -e"
+ INTOVOID="NUL"
;;
esac
@@ -42,11 +66,17 @@ case "$UNAME" in
SunOS) DIFF="gdiff" ;;
esac
-
$ECHO "\nStarting playTests.sh isWindows=$isWindows ZSTD='$ZSTD'"
[ -n "$ZSTD" ] || die "ZSTD variable must be defined!"
+if [ -n "$(echo hello | $ZSTD -v -T2 2>&1 > $INTOVOID | grep 'multi-threading is disabled')" ]
+then
+ hasMT=""
+else
+ hasMT="true"
+fi
+
$ECHO "\n**** simple tests **** "
./datagen > tmp
@@ -72,6 +102,12 @@ cp tmp tmp2
$ZSTD tmp2 -fo && die "-o must be followed by filename "
$ECHO "test : implied stdout when input is stdin"
$ECHO bob | $ZSTD | $ZSTD -d
+if [ "$isTerminal" = true ]; then
+$ECHO "test : compressed data to terminal"
+$ECHO bob | $ZSTD && die "should have refused : compressed data to terminal"
+$ECHO "test : compressed data from terminal (a hang here is a test fail, zstd is wrongly waiting on data from terminal)"
+$ZSTD -d > $INTOVOID && die "should have refused : compressed data from terminal"
+fi
$ECHO "test : null-length file roundtrip"
$ECHO -n '' | $ZSTD - --stdout | $ZSTD -d --stdout
$ECHO "test : decompress file with wrong suffix (must fail)"
@@ -96,6 +132,14 @@ $ZSTD -q tmp && die "overwrite check failed!"
$ECHO "test : force overwrite"
$ZSTD -q -f tmp
$ZSTD -q --force tmp
+$ECHO "test : overwrite readonly file"
+rm -f tmpro tmpro.zst
+$ECHO foo > tmpro.zst
+$ECHO foo > tmpro
+chmod 400 tmpro.zst
+$ZSTD -q tmpro && die "should have refused to overwrite read-only file"
+$ZSTD -q -f tmpro
+rm -f tmpro tmpro.zst
$ECHO "test : file removal"
$ZSTD -f --rm tmp
ls tmp && die "tmp should no longer be present"
@@ -156,6 +200,19 @@ $ECHO "$ECHO foo | $ZSTD > /dev/full"
$ECHO foo | $ZSTD > /dev/full && die "write error not detected!"
$ECHO "$ECHO foo | $ZSTD | $ZSTD -d > /dev/full"
$ECHO foo | $ZSTD | $ZSTD -d > /dev/full && die "write error not detected!"
+
+$ECHO "\n**** symbolic link test **** "
+
+rm -f hello.tmp world.tmp hello.tmp.zst world.tmp.zst
+$ECHO "hello world" > hello.tmp
+ln -s hello.tmp world.tmp
+$ZSTD world.tmp hello.tmp
+ls hello.tmp.zst || die "regular file should have been compressed!"
+ls world.tmp.zst && die "symbolic link should not have been compressed!"
+$ZSTD world.tmp hello.tmp -f
+ls world.tmp.zst || die "symbol link should have been compressed with --force"
+rm -f hello.tmp world.tmp hello.tmp.zst world.tmp.zst
+
fi
@@ -227,12 +284,12 @@ $ECHO "- Create second (different) dictionary "
$ZSTD --train *.c ../programs/*.c ../programs/*.h -o tmpDictC
$ZSTD -d tmp.zst -D tmpDictC -fo result && die "wrong dictionary not detected!"
$ECHO "- Create dictionary with short dictID"
-$ZSTD --train *.c ../programs/*.c --dictID 1 -o tmpDict1
+$ZSTD --train *.c ../programs/*.c --dictID=1 -o tmpDict1
cmp tmpDict tmpDict1 && die "dictionaries should have different ID !"
$ECHO "- Create dictionary with wrong dictID parameter order (must fail)"
$ZSTD --train *.c ../programs/*.c --dictID -o 1 tmpDict1 && die "wrong order : --dictID must be followed by argument "
$ECHO "- Create dictionary with size limit"
-$ZSTD --train *.c ../programs/*.c -o tmpDict2 --maxdict 4K -v
+$ZSTD --train *.c ../programs/*.c -o tmpDict2 --maxdict=4K -v
$ECHO "- Create dictionary with wrong parameter order (must fail)"
$ZSTD --train *.c ../programs/*.c -o tmpDict2 --maxdict -v 4K && die "wrong order : --maxdict must be followed by argument "
$ECHO "- Compress without dictID"
@@ -240,7 +297,7 @@ $ZSTD -f tmp -D tmpDict1 --no-dictID
$ZSTD -d tmp.zst -D tmpDict -fo result
$DIFF $TESTFILE result
$ECHO "- Compress with wrong argument order (must fail)"
-$ZSTD tmp -Df tmpDict1 -c > /dev/null && die "-D must be followed by dictionary name "
+$ZSTD tmp -Df tmpDict1 -c > $INTOVOID && die "-D must be followed by dictionary name "
$ECHO "- Compress multiple files with dictionary"
rm -rf dirTestDict
mkdir dirTestDict
@@ -255,6 +312,11 @@ case "$UNAME" in
*) $MD5SUM -c tmph1 ;;
esac
rm -rf dirTestDict
+$ECHO "- dictionary builder on bogus input"
+$ECHO "Hello World" > tmp
+$ZSTD --train-legacy -q tmp && die "Dictionary training should fail : not enough input source"
+./datagen -P0 -g10M > tmp
+$ZSTD --train-legacy -q tmp && die "Dictionary training should fail : source is pure noise"
rm tmp*
@@ -263,19 +325,39 @@ $ECHO "\n**** cover dictionary tests **** "
TESTFILE=../programs/zstdcli.c
./datagen > tmpDict
$ECHO "- Create first dictionary"
-$ZSTD --train --cover=k=46,d=8 *.c ../programs/*.c -o tmpDict
+$ZSTD --train-cover=k=46,d=8 *.c ../programs/*.c -o tmpDict
cp $TESTFILE tmp
$ZSTD -f tmp -D tmpDict
$ZSTD -d tmp.zst -D tmpDict -fo result
$DIFF $TESTFILE result
$ECHO "- Create second (different) dictionary"
-$ZSTD --train --cover=k=56,d=8 *.c ../programs/*.c ../programs/*.h -o tmpDictC
+$ZSTD --train-cover=k=56,d=8 *.c ../programs/*.c ../programs/*.h -o tmpDictC
$ZSTD -d tmp.zst -D tmpDictC -fo result && die "wrong dictionary not detected!"
$ECHO "- Create dictionary with short dictID"
-$ZSTD --train --cover=k=46,d=8 *.c ../programs/*.c --dictID 1 -o tmpDict1
+$ZSTD --train-cover=k=46,d=8 *.c ../programs/*.c --dictID=1 -o tmpDict1
cmp tmpDict tmpDict1 && die "dictionaries should have different ID !"
$ECHO "- Create dictionary with size limit"
-$ZSTD --train --optimize-cover=steps=8 *.c ../programs/*.c -o tmpDict2 --maxdict 4K
+$ZSTD --train-cover=steps=8 *.c ../programs/*.c -o tmpDict2 --maxdict=4K
+rm tmp*
+
+$ECHO "\n**** legacy dictionary tests **** "
+
+TESTFILE=../programs/zstdcli.c
+./datagen > tmpDict
+$ECHO "- Create first dictionary"
+$ZSTD --train-legacy=selectivity=8 *.c ../programs/*.c -o tmpDict
+cp $TESTFILE tmp
+$ZSTD -f tmp -D tmpDict
+$ZSTD -d tmp.zst -D tmpDict -fo result
+$DIFF $TESTFILE result
+$ECHO "- Create second (different) dictionary"
+$ZSTD --train-legacy=s=5 *.c ../programs/*.c ../programs/*.h -o tmpDictC
+$ZSTD -d tmp.zst -D tmpDictC -fo result && die "wrong dictionary not detected!"
+$ECHO "- Create dictionary with short dictID"
+$ZSTD --train-legacy -s5 *.c ../programs/*.c --dictID=1 -o tmpDict1
+cmp tmpDict tmpDict1 && die "dictionaries should have different ID !"
+$ECHO "- Create dictionary with size limit"
+$ZSTD --train-legacy -s9 *.c ../programs/*.c -o tmpDict2 --maxdict=4K
rm tmp*
@@ -341,7 +423,7 @@ if [ $GZIPMODE -eq 1 ]; then
$ZSTD -f --format=gzip tmp
$ZSTD -f tmp
cat tmp.gz tmp.zst tmp.gz tmp.zst | $ZSTD -d -f -o tmp
- head -c -1 tmp.gz | $ZSTD -t && die "incomplete frame not detected !"
+ head -c -1 tmp.gz | $ZSTD -t > $INTOVOID && die "incomplete frame not detected !"
rm tmp*
else
$ECHO "gzip mode not supported"
@@ -383,13 +465,48 @@ if [ $LZMAMODE -eq 1 ]; then
$ZSTD -f --format=lzma tmp
$ZSTD -f tmp
cat tmp.xz tmp.lzma tmp.zst tmp.lzma tmp.xz tmp.zst | $ZSTD -d -f -o tmp
- head -c -1 tmp.xz | $ZSTD -t && die "incomplete frame not detected !"
- head -c -1 tmp.lzma | $ZSTD -t && die "incomplete frame not detected !"
+ head -c -1 tmp.xz | $ZSTD -t > $INTOVOID && die "incomplete frame not detected !"
+ head -c -1 tmp.lzma | $ZSTD -t > $INTOVOID && die "incomplete frame not detected !"
rm tmp*
else
$ECHO "xz mode not supported"
fi
+$ECHO "\n**** lz4 compatibility tests **** "
+
+LZ4MODE=1
+$ZSTD --format=lz4 -V || LZ4MODE=0
+if [ $LZ4MODE -eq 1 ]; then
+ $ECHO "lz4 support detected"
+ LZ4EXE=1
+ lz4 -V || LZ4EXE=0
+ if [ $LZ4EXE -eq 1 ]; then
+ ./datagen > tmp
+ $ZSTD --format=lz4 -f tmp
+ lz4 -t -v tmp.lz4
+ lz4 -f tmp
+ $ZSTD -d -f -v tmp.lz4
+ rm tmp*
+ else
+ $ECHO "lz4 binary not detected"
+ fi
+else
+ $ECHO "lz4 mode not supported"
+fi
+
+
+$ECHO "\n**** lz4 frame tests **** "
+
+if [ $LZ4MODE -eq 1 ]; then
+ ./datagen > tmp
+ $ZSTD -f --format=lz4 tmp
+ $ZSTD -f tmp
+ cat tmp.lz4 tmp.zst tmp.lz4 tmp.zst | $ZSTD -d -f -o tmp
+ head -c -1 tmp.lz4 | $ZSTD -t > $INTOVOID && die "incomplete frame not detected !"
+ rm tmp*
+else
+ $ECHO "lz4 mode not supported"
+fi
$ECHO "\n**** zstd round-trip tests **** "
@@ -402,6 +519,19 @@ roundTripTest -g519K 6 # greedy, hash chain
roundTripTest -g517K 16 # btlazy2
roundTripTest -g516K 19 # btopt
+fileRoundTripTest -g500K
+
+if [ -n "$hasMT" ]
+then
+ $ECHO "\n**** zstdmt round-trip tests **** "
+ roundTripTest -g4M "1 -T0"
+ roundTripTest -g8M "3 -T2"
+ roundTripTest -g8000K "2 --threads=2"
+ fileRoundTripTest -g4M "19 -T2 -B1M"
+else
+ $ECHO "\n**** no multithreading, skipping zstdmt tests **** "
+fi
+
rm tmp*
if [ "$1" != "--test-large-data" ]; then
@@ -437,4 +567,16 @@ roundTripTest -g50000000 -P94 19
roundTripTest -g99000000 -P99 20
roundTripTest -g6000000000 -P99 1
+fileRoundTripTest -g4193M -P99 1
+
+if [ -n "$hasMT" ]
+then
+ $ECHO "\n**** zstdmt long round-trip tests **** "
+ roundTripTest -g99000000 -P99 "20 -T2"
+ roundTripTest -g6000000000 -P99 "1 -T2"
+ fileRoundTripTest -g4193M -P98 " -T0"
+else
+ $ECHO "\n**** no multithreading, skipping zstdmt tests **** "
+fi
+
rm tmp*
diff --git a/tests/test-zstd-speed.py b/tests/test-zstd-speed.py
index 23d4f477c7f3..56108a5cae4c 100755
--- a/tests/test-zstd-speed.py
+++ b/tests/test-zstd-speed.py
@@ -14,14 +14,15 @@
# - dir1/zstd and dir2/zstd will be merged in a single results file
import argparse
-import os
+import os # getloadavg
import string
import subprocess
-import time
+import time # strftime
import traceback
import hashlib
+import platform # system
-script_version = 'v1.1.1 (2016-10-28)'
+script_version = 'v1.1.2 (2017-03-26)'
default_repo_url = 'https://github.com/facebook/zstd.git'
working_dir_name = 'speedTest'
working_path = os.getcwd() + '/' + working_dir_name # /path/to/zstd/tests/speedTest
@@ -152,10 +153,15 @@ def benchmark_and_compare(branch, commit, last_commit, args, executableName, md5
% (os.getloadavg()[0], args.maxLoadAvg, sleepTime))
time.sleep(sleepTime)
start_load = str(os.getloadavg())
+ osType = platform.system()
+ if osType == 'Linux':
+ cpuSelector = "taskset --cpu-list 0"
+ else:
+ cpuSelector = ""
if args.dictionary:
- result = execute('programs/%s -rqi5b1e%s -D %s %s' % (executableName, args.lastCLevel, args.dictionary, testFilePath), print_output=True)
+ result = execute('%s programs/%s -rqi5b1e%s -D %s %s' % (cpuSelector, executableName, args.lastCLevel, args.dictionary, testFilePath), print_output=True)
else:
- result = execute('programs/%s -rqi5b1e%s %s' % (executableName, args.lastCLevel, testFilePath), print_output=True)
+ result = execute('%s programs/%s -rqi5b1e%s %s' % (cpuSelector, executableName, args.lastCLevel, testFilePath), print_output=True)
end_load = str(os.getloadavg())
linesExpected = args.lastCLevel + 1
if len(result) != linesExpected:
@@ -291,7 +297,7 @@ if __name__ == '__main__':
log("ERROR: e-mail senders 'mail' or 'mutt' not found")
exit(1)
- clang_version = execute("clang -v 2>&1 | grep 'clang version' | sed -e 's:.*version \\([0-9.]*\\).*:\\1:' -e 's:\\.\\([0-9][0-9]\\):\\1:g'", verbose)[0];
+ clang_version = execute("clang -v 2>&1 | grep ' version ' | sed -e 's:.*version \\([0-9.]*\\).*:\\1:' -e 's:\\.\\([0-9][0-9]\\):\\1:g'", verbose)[0];
gcc_version = execute("gcc -dumpversion", verbose)[0];
if verbose:
diff --git a/tests/zbufftest.c b/tests/zbufftest.c
index 14b73923311d..601aa808d027 100644
--- a/tests/zbufftest.c
+++ b/tests/zbufftest.c
@@ -60,7 +60,7 @@ static U32 g_displayLevel = 2;
#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
if ((FUZ_GetClockSpan(g_displayClock) > g_refreshRate) || (g_displayLevel>=4)) \
{ g_displayClock = clock(); DISPLAY(__VA_ARGS__); \
- if (g_displayLevel>=4) fflush(stdout); } }
+ if (g_displayLevel>=4) fflush(stderr); } }
static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100;
static clock_t g_displayClock = 0;
diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c
index aa7367bcfd31..0e09e185642b 100644
--- a/tests/zstreamtest.c
+++ b/tests/zstreamtest.c
@@ -59,7 +59,7 @@ static U32 g_displayLevel = 2;
#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
if ((FUZ_GetClockSpan(g_displayClock) > g_refreshRate) || (g_displayLevel>=4)) \
{ g_displayClock = clock(); DISPLAY(__VA_ARGS__); \
- if (g_displayLevel>=4) fflush(stdout); } }
+ if (g_displayLevel>=4) fflush(stderr); } }
static const clock_t g_refreshRate = CLOCKS_PER_SEC / 6;
static clock_t g_displayClock = 0;
@@ -131,7 +131,7 @@ static buffer_t FUZ_createDictionary(const void* src, size_t srcSize, size_t blo
}
{ size_t const dictSize = ZDICT_trainFromBuffer(dict.start, requestedDictSize, src, blockSizes, (unsigned)nbBlocks);
free(blockSizes);
- if (ZDICT_isError(dictSize)) { free(dict.start); return (buffer_t){ NULL, 0, 0 }; }
+ if (ZDICT_isError(dictSize)) { free(dict.start); return g_nullBuffer; }
dict.size = requestedDictSize;
dict.filled = dictSize;
return dict; /* how to return dictSize ? */
@@ -207,6 +207,16 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo
DISPLAYLEVEL(3, "OK (%u bytes) \n", (U32)s);
}
+ /* Attempt bad compression parameters */
+ DISPLAYLEVEL(3, "test%3i : use bad compression parameters : ", testNb++);
+ { size_t r;
+ ZSTD_parameters params = ZSTD_getParams(1, 0, 0);
+ params.cParams.searchLength = 2;
+ r = ZSTD_initCStream_advanced(zc, NULL, 0, params, 0);
+ if (!ZSTD_isError(r)) goto _output_error;
+ DISPLAYLEVEL(3, "init error : %s \n", ZSTD_getErrorName(r));
+ }
+
/* skippable frame test */
DISPLAYLEVEL(3, "test%3i : decompress skippable frame : ", testNb++);
ZSTD_initDStream_usingDict(zd, CNBuffer, 128 KB);
@@ -438,11 +448,64 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo
if (!ZSTD_isError(r)) goto _output_error; /* must fail : frame requires > 100 bytes */
DISPLAYLEVEL(3, "OK (%s)\n", ZSTD_getErrorName(r)); }
- /* Unknown srcSize */
+ DISPLAYLEVEL(3, "test%3i : ZSTD_initCStream_usingCDict_advanced with masked dictID : ", testNb++);
+ { ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBufferSize, dictionary.filled);
+ ZSTD_frameParameters const fParams = { 1 /* contentSize */, 1 /* checksum */, 1 /* noDictID */};
+ ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictionary.start, dictionary.filled, 1 /* byReference */, cParams, customMem);
+ size_t const initError = ZSTD_initCStream_usingCDict_advanced(zc, cdict, CNBufferSize, fParams);
+ if (ZSTD_isError(initError)) goto _output_error;
+ cSize = 0;
+ outBuff.dst = compressedBuffer;
+ outBuff.size = compressedBufferSize;
+ outBuff.pos = 0;
+ inBuff.src = CNBuffer;
+ inBuff.size = CNBufferSize;
+ inBuff.pos = 0;
+ { size_t const r = ZSTD_compressStream(zc, &outBuff, &inBuff);
+ if (ZSTD_isError(r)) goto _output_error; }
+ if (inBuff.pos != inBuff.size) goto _output_error; /* entire input should be consumed */
+ { size_t const r = ZSTD_endStream(zc, &outBuff);
+ if (r != 0) goto _output_error; } /* error, or some data not flushed */
+ cSize = outBuff.pos;
+ ZSTD_freeCDict(cdict);
+ DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/CNBufferSize*100);
+ }
+
+ DISPLAYLEVEL(3, "test%3i : try retrieving dictID from frame : ", testNb++);
+ { U32 const did = ZSTD_getDictID_fromFrame(compressedBuffer, cSize);
+ if (did != 0) goto _output_error;
+ }
+ DISPLAYLEVEL(3, "OK (not detected) \n");
+
+ DISPLAYLEVEL(3, "test%3i : decompress without dictionary : ", testNb++);
+ { size_t const r = ZSTD_decompress(decodedBuffer, CNBufferSize, compressedBuffer, cSize);
+ if (!ZSTD_isError(r)) goto _output_error; /* must fail : dictionary not used */
+ DISPLAYLEVEL(3, "OK (%s)\n", ZSTD_getErrorName(r));
+ }
+
+ /* Empty srcSize */
+ DISPLAYLEVEL(3, "test%3i : ZSTD_initCStream_advanced with pledgedSrcSize=0 and dict : ", testNb++);
+ { ZSTD_parameters params = ZSTD_getParams(5, 0, 0);
+ params.fParams.contentSizeFlag = 1;
+ ZSTD_initCStream_advanced(zc, dictionary.start, dictionary.filled, params, 0);
+ } /* cstream advanced shall write content size = 0 */
+ inBuff.src = CNBuffer;
+ inBuff.size = 0;
+ inBuff.pos = 0;
+ outBuff.dst = compressedBuffer;
+ outBuff.size = compressedBufferSize;
+ outBuff.pos = 0;
+ if (ZSTD_isError(ZSTD_compressStream(zc, &outBuff, &inBuff))) goto _output_error;
+ if (ZSTD_endStream(zc, &outBuff) != 0) goto _output_error;
+ cSize = outBuff.pos;
+ if (ZSTD_findDecompressedSize(compressedBuffer, cSize) != 0) goto _output_error;
+ DISPLAYLEVEL(3, "OK \n");
+
DISPLAYLEVEL(3, "test%3i : pledgedSrcSize == 0 behaves properly : ", testNb++);
{ ZSTD_parameters params = ZSTD_getParams(5, 0, 0);
params.fParams.contentSizeFlag = 1;
- ZSTD_initCStream_advanced(zc, NULL, 0, params, 0); } /* cstream advanced should write the 0 size field */
+ ZSTD_initCStream_advanced(zc, NULL, 0, params, 0);
+ } /* cstream advanced shall write content size = 0 */
inBuff.src = CNBuffer;
inBuff.size = 0;
inBuff.pos = 0;
@@ -552,7 +615,7 @@ static size_t FUZ_randomLength(U32* seed, U32 maxLog)
#define CHECK(cond, ...) if (cond) { DISPLAY("Error => "); DISPLAY(__VA_ARGS__); \
DISPLAY(" (seed %u, test nb %u) \n", seed, testNb); goto _output_error; }
-static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibility)
+static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibility, int bigTests)
{
static const U32 maxSrcLog = 24;
static const U32 maxSampleLog = 19;
@@ -574,6 +637,7 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compres
const BYTE* dict=NULL; /* can keep same dict on 2 consecutive tests */
size_t dictSize = 0;
U32 oldTestLog = 0;
+ int const cLevelLimiter = bigTests ? 3 : 2;
/* allocations */
cNoiseBuffer[0] = (BYTE*)malloc (srcBufferSize);
@@ -638,7 +702,8 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compres
if ((FUZ_rand(&lseed)&1) /* at beginning, to keep same nb of rand */
&& oldTestLog /* at least one test happened */ && resetAllowed) {
maxTestSize = FUZ_randomLength(&lseed, oldTestLog+2);
- if (maxTestSize >= srcBufferSize) maxTestSize = srcBufferSize-1;
+ if (maxTestSize >= srcBufferSize)
+ maxTestSize = srcBufferSize-1;
{ U64 const pledgedSrcSize = (FUZ_rand(&lseed) & 3) ? 0 : maxTestSize;
size_t const resetError = ZSTD_resetCStream(zc, pledgedSrcSize);
CHECK(ZSTD_isError(resetError), "ZSTD_resetCStream error : %s", ZSTD_getErrorName(resetError));
@@ -646,11 +711,14 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compres
} else {
U32 const testLog = FUZ_rand(&lseed) % maxSrcLog;
U32 const dictLog = FUZ_rand(&lseed) % maxSrcLog;
- U32 const cLevel = (FUZ_rand(&lseed) % (ZSTD_maxCLevel() - (MAX(testLog, dictLog)/3))) + 1;
+ U32 const cLevel = ( FUZ_rand(&lseed) %
+ (ZSTD_maxCLevel() -
+ (MAX(testLog, dictLog) / cLevelLimiter)))
+ + 1;
maxTestSize = FUZ_rLogLength(&lseed, testLog);
oldTestLog = testLog;
/* random dictionary selection */
- dictSize = ((FUZ_rand(&lseed)&63)==1) ? FUZ_rLogLength(&lseed, dictLog) : 0;
+ dictSize = ((FUZ_rand(&lseed)&1)==1) ? FUZ_rLogLength(&lseed, dictLog) : 0;
{ size_t const dictStart = FUZ_rand(&lseed) % (srcBufferSize - dictSize);
dict = srcBuffer + dictStart;
}
@@ -785,7 +853,7 @@ _output_error:
/* Multi-threading version of fuzzer Tests */
-static int fuzzerTests_MT(U32 seed, U32 nbTests, unsigned startTest, double compressibility)
+static int fuzzerTests_MT(U32 seed, U32 nbTests, unsigned startTest, double compressibility, int bigTests)
{
static const U32 maxSrcLog = 24;
static const U32 maxSampleLog = 19;
@@ -807,6 +875,7 @@ static int fuzzerTests_MT(U32 seed, U32 nbTests, unsigned startTest, double comp
const BYTE* dict=NULL; /* can keep same dict on 2 consecutive tests */
size_t dictSize = 0;
U32 oldTestLog = 0;
+ int const cLevelLimiter = bigTests ? 3 : 2;
/* allocations */
cNoiseBuffer[0] = (BYTE*)malloc (srcBufferSize);
@@ -851,6 +920,7 @@ static int fuzzerTests_MT(U32 seed, U32 nbTests, unsigned startTest, double comp
/* some issues can only happen when reusing states */
if ((FUZ_rand(&lseed) & 0xFF) == 131) {
U32 const nbThreads = (FUZ_rand(&lseed) % 6) + 1;
+ DISPLAYLEVEL(5, "Creating new context with %u threads \n", nbThreads);
ZSTDMT_freeCCtx(zc);
zc = ZSTDMT_createCCtx(nbThreads);
resetAllowed=0;
@@ -888,7 +958,10 @@ static int fuzzerTests_MT(U32 seed, U32 nbTests, unsigned startTest, double comp
} else {
U32 const testLog = FUZ_rand(&lseed) % maxSrcLog;
U32 const dictLog = FUZ_rand(&lseed) % maxSrcLog;
- U32 const cLevel = (FUZ_rand(&lseed) % (ZSTD_maxCLevel() - (MAX(testLog, dictLog)/3))) + 1;
+ U32 const cLevel = (FUZ_rand(&lseed) %
+ (ZSTD_maxCLevel() -
+ (MAX(testLog, dictLog) / cLevelLimiter))) +
+ 1;
maxTestSize = FUZ_rLogLength(&lseed, testLog);
oldTestLog = testLog;
/* random dictionary selection */
@@ -898,9 +971,12 @@ static int fuzzerTests_MT(U32 seed, U32 nbTests, unsigned startTest, double comp
}
{ U64 const pledgedSrcSize = (FUZ_rand(&lseed) & 3) ? 0 : maxTestSize;
ZSTD_parameters params = ZSTD_getParams(cLevel, pledgedSrcSize, dictSize);
- DISPLAYLEVEL(5, "Init with windowLog = %u \n", params.cParams.windowLog);
+ DISPLAYLEVEL(5, "Init with windowLog = %u and pledgedSrcSize = %u \n",
+ params.cParams.windowLog, (U32)pledgedSrcSize);
params.fParams.checksumFlag = FUZ_rand(&lseed) & 1;
params.fParams.noDictIDFlag = FUZ_rand(&lseed) & 1;
+ params.fParams.contentSizeFlag = pledgedSrcSize>0;
+ DISPLAYLEVEL(5, "checksumFlag : %u \n", params.fParams.checksumFlag);
{ size_t const initError = ZSTDMT_initCStream_advanced(zc, dict, dictSize, params, pledgedSrcSize);
CHECK (ZSTD_isError(initError),"ZSTDMT_initCStream_advanced error : %s", ZSTD_getErrorName(initError)); }
ZSTDMT_setMTCtxParameter(zc, ZSTDMT_p_overlapSectionLog, FUZ_rand(&lseed) % 12);
@@ -938,7 +1014,7 @@ static int fuzzerTests_MT(U32 seed, U32 nbTests, unsigned startTest, double comp
outBuff.size = outBuff.pos + adjustedDstSize;
DISPLAYLEVEL(5, "Flushing into dst buffer of size %u \n", (U32)adjustedDstSize);
{ size_t const flushError = ZSTDMT_flushStream(zc, &outBuff);
- CHECK (ZSTD_isError(flushError), "flush error : %s", ZSTD_getErrorName(flushError));
+ CHECK (ZSTD_isError(flushError), "ZSTDMT_flushStream error : %s", ZSTD_getErrorName(flushError));
} } }
/* final frame epilogue */
@@ -949,12 +1025,12 @@ static int fuzzerTests_MT(U32 seed, U32 nbTests, unsigned startTest, double comp
outBuff.size = outBuff.pos + adjustedDstSize;
DISPLAYLEVEL(5, "Ending into dst buffer of size %u \n", (U32)adjustedDstSize);
remainingToFlush = ZSTDMT_endStream(zc, &outBuff);
- CHECK (ZSTD_isError(remainingToFlush), "flush error : %s", ZSTD_getErrorName(remainingToFlush));
+ CHECK (ZSTD_isError(remainingToFlush), "ZSTDMT_endStream error : %s", ZSTD_getErrorName(remainingToFlush));
DISPLAYLEVEL(5, "endStream : remainingToFlush : %u \n", (U32)remainingToFlush);
} }
- DISPLAYLEVEL(5, "Frame completed \n");
crcOrig = XXH64_digest(&xxhState);
cSize = outBuff.pos;
+ DISPLAYLEVEL(5, "Frame completed : %u bytes \n", (U32)cSize);
}
/* multi - fragments decompression test */
@@ -972,8 +1048,10 @@ static int fuzzerTests_MT(U32 seed, U32 nbTests, unsigned startTest, double comp
size_t const dstBuffSize = MIN(dstBufferSize - totalGenSize, randomDstSize);
inBuff.size = inBuff.pos + readCSrcSize;
outBuff.size = inBuff.pos + dstBuffSize;
+ DISPLAYLEVEL(5, "ZSTD_decompressStream input %u bytes \n", (U32)readCSrcSize);
decompressionResult = ZSTD_decompressStream(zd, &outBuff, &inBuff);
CHECK (ZSTD_isError(decompressionResult), "decompression error : %s", ZSTD_getErrorName(decompressionResult));
+ DISPLAYLEVEL(5, "inBuff.pos = %u \n", (U32)readCSrcSize);
}
CHECK (outBuff.pos != totalTestSize, "decompressed data : wrong size (%u != %u)", (U32)outBuff.pos, (U32)totalTestSize);
CHECK (inBuff.pos != cSize, "compressed data should be fully read (%u != %u)", (U32)inBuff.pos, (U32)cSize);
@@ -1063,6 +1141,7 @@ int main(int argc, const char** argv)
int result=0;
int mainPause = 0;
int mtOnly = 0;
+ int bigTests = 1;
const char* const programName = argv[0];
ZSTD_customMem const customMem = { allocFunction, freeFunction, NULL };
ZSTD_customMem const customNULL = { NULL, NULL, NULL };
@@ -1076,6 +1155,7 @@ int main(int argc, const char** argv)
if (argument[0]=='-') {
if (!strcmp(argument, "--mt")) { mtOnly=1; continue; }
+ if (!strcmp(argument, "--no-big-tests")) { bigTests=0; continue; }
argument++;
while (*argument!=0) {
@@ -1181,8 +1261,8 @@ int main(int argc, const char** argv)
result = basicUnitTests(0, ((double)proba) / 100, customMem); /* use custom memory allocation functions */
} }
- if (!result && !mtOnly) result = fuzzerTests(seed, nbTests, testNb, ((double)proba) / 100);
- if (!result) result = fuzzerTests_MT(seed, nbTests, testNb, ((double)proba) / 100);
+ if (!result && !mtOnly) result = fuzzerTests(seed, nbTests, testNb, ((double)proba) / 100, bigTests);
+ if (!result) result = fuzzerTests_MT(seed, nbTests, testNb, ((double)proba) / 100, bigTests);
if (mainPause) {
int unused;
diff --git a/zlibWrapper/examples/zwrapbench.c b/zlibWrapper/examples/zwrapbench.c
index 23c3ca4dab8f..a57ed51ec141 100644
--- a/zlibWrapper/examples/zwrapbench.c
+++ b/zlibWrapper/examples/zwrapbench.c
@@ -73,13 +73,13 @@ static U32 g_compressibilityDefault = 50;
#define DEFAULT_DISPLAY_LEVEL 2
#define DISPLAY(...) fprintf(displayOut, __VA_ARGS__)
#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
-static U32 g_displayLevel = DEFAULT_DISPLAY_LEVEL; /* 0 : no display; 1: errors; 2 : + result + interaction + warnings; 3 : + progression; 4 : + information */
+static int g_displayLevel = DEFAULT_DISPLAY_LEVEL; /* 0 : no display; 1: errors; 2 : + result + interaction + warnings; 3 : + progression; 4 : + information */
static FILE* displayOut;
#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
if ((clock() - g_time > refreshRate) || (g_displayLevel>=4)) \
{ g_time = clock(); DISPLAY(__VA_ARGS__); \
- if (g_displayLevel>=4) fflush(stdout); } }
+ if (g_displayLevel>=4) fflush(displayOut); } }
static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
static clock_t g_time = 0;
@@ -128,6 +128,11 @@ void BMK_SetBlockSize(size_t blockSize)
/* ********************************************************
* Bench functions
**********************************************************/
+#undef MIN
+#undef MAX
+#define MIN(a,b) ((a)<(b) ? (a) : (b))
+#define MAX(a,b) ((a)>(b) ? (a) : (b))
+
typedef struct
{
z_const char* srcPtr;
@@ -142,9 +147,6 @@ typedef struct
typedef enum { BMK_ZSTD, BMK_ZSTD_STREAM, BMK_ZLIB, BMK_ZWRAP_ZLIB, BMK_ZWRAP_ZSTD, BMK_ZLIB_REUSE, BMK_ZWRAP_ZLIB_REUSE, BMK_ZWRAP_ZSTD_REUSE } BMK_compressor;
-#define MIN(a,b) ((a)<(b) ? (a) : (b))
-#define MAX(a,b) ((a)>(b) ? (a) : (b))
-
static int BMK_benchMem(z_const void* srcBuffer, size_t srcSize,
const char* displayName, int cLevel,
const size_t* fileSizes, U32 nbFiles,
@@ -234,7 +236,7 @@ static int BMK_benchMem(z_const void* srcBuffer, size_t srcSize,
if (compressor == BMK_ZSTD) {
ZSTD_parameters const zparams = ZSTD_getParams(cLevel, avgSize, dictBufferSize);
ZSTD_customMem const cmem = { NULL, NULL, NULL };
- ZSTD_CDict* cdict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, 1, zparams, cmem);
+ ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, 1, zparams.cParams, cmem);
if (cdict==NULL) EXM_THROW(1, "ZSTD_createCDict_advanced() allocation failure");
do {
@@ -982,7 +984,7 @@ int main(int argCount, char** argv)
#ifdef UTIL_HAS_CREATEFILELIST
if (recursive) {
- fileNamesTable = UTIL_createFileList(filenameTable, filenameIdx, &fileNamesBuf, &fileNamesNb);
+ fileNamesTable = UTIL_createFileList(filenameTable, filenameIdx, &fileNamesBuf, &fileNamesNb, 1);
if (fileNamesTable) {
unsigned u;
for (u=0; u