diff options
author | Conrad Meyer <cem@FreeBSD.org> | 2018-10-22 20:00:30 +0000 |
---|---|---|
committer | Conrad Meyer <cem@FreeBSD.org> | 2018-10-22 20:00:30 +0000 |
commit | 706cfae467a217cc786fd96a72cc2e33c61987e4 (patch) | |
tree | e7673904660df47b5abd9a1c33cf982a514dac66 /lib/zstd.h | |
parent | 42239e68a5cfba3b37b054425eace8d14e0844e3 (diff) |
Notes
Diffstat (limited to 'lib/zstd.h')
-rw-r--r-- | lib/zstd.h | 355 |
1 files changed, 241 insertions, 114 deletions
diff --git a/lib/zstd.h b/lib/zstd.h index 6405da602e8f..f2af4ac8c429 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -35,31 +35,43 @@ extern "C" { #endif -/******************************************************************************************************* +/******************************************************************************* Introduction - zstd, short for Zstandard, is a fast lossless compression algorithm, - targeting real-time compression scenarios at zlib-level and better compression ratios. - The zstd compression library provides in-memory compression and decompression functions. - The library supports compression levels from 1 up to ZSTD_maxCLevel() which is currently 22. - Levels >= 20, labeled `--ultra`, should be used with caution, as they require more memory. + zstd, short for Zstandard, is a fast lossless compression algorithm, targeting + real-time compression scenarios at zlib-level and better compression ratios. + The zstd compression library provides in-memory compression and decompression + functions. + + The library supports regular compression levels from 1 up to ZSTD_maxCLevel(), + which is currently 22. Levels >= 20, labeled `--ultra`, should be used with + caution, as they require more memory. The library also offers negative + compression levels, which extend the range of speed vs. ratio preferences. + The lower the level, the faster the speed (at the cost of compression). + Compression can be done in: - a single step (described as Simple API) - a single step, reusing a context (described as Explicit context) - unbounded multiple steps (described as Streaming compression) - The compression ratio achievable on small data can be highly improved using a dictionary in: + + The compression ratio achievable on small data can be highly improved using + a dictionary. Dictionary compression can be performed in: - a single step (described as Simple dictionary API) - - a single step, reusing a dictionary (described as Bulk-processing dictionary API) + - a single step, reusing a dictionary (described as Bulk-processing + dictionary API) - Advanced experimental functions can be accessed using #define ZSTD_STATIC_LINKING_ONLY before including zstd.h. - Advanced experimental APIs shall never be used with a dynamic library. - They are not "stable", their definition may change in the future. Only static linking is allowed. -*********************************************************************************************************/ + Advanced experimental functions can be accessed using + `#define ZSTD_STATIC_LINKING_ONLY` before including zstd.h. + + Advanced experimental APIs should never be used with a dynamically-linked + library. They are not "stable"; their definitions or signatures may change in + the future. Only static linking is allowed. +*******************************************************************************/ /*------ Version ------*/ #define ZSTD_VERSION_MAJOR 1 #define ZSTD_VERSION_MINOR 3 -#define ZSTD_VERSION_RELEASE 4 +#define ZSTD_VERSION_RELEASE 7 #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< useful to check dll version */ @@ -68,8 +80,14 @@ ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< useful to check dll versio #define ZSTD_QUOTE(str) #str #define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str) #define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION) -ZSTDLIB_API const char* ZSTD_versionString(void); /* added in v1.3.0 */ +ZSTDLIB_API const char* ZSTD_versionString(void); /* v1.3.0+ */ +/*************************************** +* Default constant +***************************************/ +#ifndef ZSTD_CLEVEL_DEFAULT +# define ZSTD_CLEVEL_DEFAULT 3 +#endif /*************************************** * Simple API @@ -96,7 +114,7 @@ ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity, * `src` should point to the start of a ZSTD encoded frame. * `srcSize` must be at least as large as the frame header. * hint : any size >= `ZSTD_frameHeaderSize_max` is large enough. - * @return : - decompressed size of the frame in `src`, if known + * @return : - decompressed size of `src` frame content, if known * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) * note 1 : a 0 return value means the frame is valid but "empty". @@ -106,7 +124,8 @@ ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity, * Optionally, application can rely on some implicit limit, * as ZSTD_decompress() only needs an upper bound of decompressed size. * (For example, data could be necessarily cut into blocks <= 16 KB). - * note 3 : decompressed size is always present when compression is done with ZSTD_compress() + * note 3 : decompressed size is always present when compression is completed using single-pass functions, + * such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict(). * note 4 : decompressed size can be very large (64-bits value), * potentially larger than what local system can handle as a single memory segment. * In which case, it's necessary to use streaming mode to decompress data. @@ -123,8 +142,7 @@ ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t * Both functions work the same way, but ZSTD_getDecompressedSize() blends * "empty", "unknown" and "error" results to the same return value (0), * while ZSTD_getFrameContentSize() gives them separate return values. - * `src` is the start of a zstd compressed frame. - * @return : content size to be decompressed, as a 64-bits value _if known and not empty_, 0 otherwise. */ + * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */ ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize); @@ -205,7 +223,8 @@ typedef struct ZSTD_CDict_s ZSTD_CDict; * When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once. * ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay. * ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only. - * `dictBuffer` can be released after ZSTD_CDict creation, since its content is copied within CDict */ + * `dictBuffer` can be released after ZSTD_CDict creation, since its content is copied within CDict + * Note : A ZSTD_CDict can be created with an empty dictionary, but it is inefficient for small data. */ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, int compressionLevel); @@ -217,7 +236,9 @@ ZSTDLIB_API size_t ZSTD_freeCDict(ZSTD_CDict* CDict); * Compression using a digested Dictionary. * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times. * Note that compression level is decided during dictionary creation. - * Frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */ + * Frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) + * Note : ZSTD_compress_usingCDict() can be used with a ZSTD_CDict created from an empty dictionary. + * But it is inefficient for small data, and it is recommended to use ZSTD_compressCCtx(). */ ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, @@ -272,39 +293,44 @@ typedef struct ZSTD_outBuffer_s { * since it will play nicer with system's memory, by re-using already allocated memory. * Use one separate ZSTD_CStream per thread for parallel execution. * -* Start a new compression by initializing ZSTD_CStream. +* Start a new compression by initializing ZSTD_CStream context. * Use ZSTD_initCStream() to start a new compression operation. -* Use ZSTD_initCStream_usingDict() or ZSTD_initCStream_usingCDict() for a compression which requires a dictionary (experimental section) +* Use variants ZSTD_initCStream_usingDict() or ZSTD_initCStream_usingCDict() for streaming with dictionary (experimental section) * -* Use ZSTD_compressStream() repetitively to consume input stream. -* The function will automatically update both `pos` fields. -* Note that it may not consume the entire input, in which case `pos < size`, -* and it's up to the caller to present again remaining data. +* Use ZSTD_compressStream() as many times as necessary to consume input stream. +* The function will automatically update both `pos` fields within `input` and `output`. +* Note that the function may not consume the entire input, +* for example, because the output buffer is already full, +* in which case `input.pos < input.size`. +* The caller must check if input has been entirely consumed. +* If not, the caller must make some room to receive more compressed data, +* typically by emptying output buffer, or allocating a new output buffer, +* and then present again remaining input data. * @return : a size hint, preferred nb of bytes to use as input for next function call * or an error code, which can be tested using ZSTD_isError(). * Note 1 : it's just a hint, to help latency a little, any other value will work fine. * Note 2 : size hint is guaranteed to be <= ZSTD_CStreamInSize() * -* At any moment, it's possible to flush whatever data remains within internal buffer, using ZSTD_flushStream(). -* `output->pos` will be updated. -* Note that some content might still be left within internal buffer if `output->size` is too small. -* @return : nb of bytes still present within internal buffer (0 if it's empty) +* At any moment, it's possible to flush whatever data might remain stuck within internal buffer, +* using ZSTD_flushStream(). `output->pos` will be updated. +* Note that, if `output->size` is too small, a single invocation of ZSTD_flushStream() might not be enough (return code > 0). +* In which case, make some room to receive more compressed data, and call again ZSTD_flushStream(). +* @return : 0 if internal buffers are entirely flushed, +* >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), * or an error code, which can be tested using ZSTD_isError(). * * ZSTD_endStream() instructs to finish a frame. * It will perform a flush and write frame epilogue. * The epilogue is required for decoders to consider a frame completed. -* ZSTD_endStream() may not be able to flush full data if `output->size` is too small. -* In which case, call again ZSTD_endStream() to complete the flush. +* flush() operation is the same, and follows same rules as ZSTD_flushStream(). * @return : 0 if frame fully completed and fully flushed, - or >0 if some data is still present within internal buffer - (value is minimum size estimation for remaining data to flush, but it could be more) +* >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), * or an error code, which can be tested using ZSTD_isError(). * * *******************************************************************/ typedef ZSTD_CCtx ZSTD_CStream; /**< CCtx and CStream are now effectively same object (>= v1.3.0) */ - /* Continue to distinguish them for compatibility with versions <= v1.2.0 */ + /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */ /*===== ZSTD_CStream management functions =====*/ ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void); ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs); @@ -335,15 +361,21 @@ ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output * The function will update both `pos` fields. * If `input.pos < input.size`, some input has not been consumed. * It's up to the caller to present again remaining data. +* The function tries to flush all data decoded immediately, repecting buffer sizes. * If `output.pos < output.size`, decoder has flushed everything it could. -* @return : 0 when a frame is completely decoded and fully flushed, -* an error code, which can be tested using ZSTD_isError(), -* any other value > 0, which means there is still some decoding to do to complete current frame. -* The return value is a suggested next input size (a hint to improve latency) that will never load more than the current frame. +* But if `output.pos == output.size`, there is no such guarantee, +* it's likely that some decoded data was not flushed and still remains within internal buffers. +* In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer. +* When no additional input is provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX. +* @return : 0 when a frame is completely decoded and fully flushed, +* or an error code, which can be tested using ZSTD_isError(), +* or any other value > 0, which means there is still some decoding or flushing to do to complete current frame : +* the return value is a suggested next input size (a hint for better latency) +* that will never load more than the current frame. * *******************************************************************************/ typedef ZSTD_DCtx ZSTD_DStream; /**< DCtx and DStream are now effectively same object (>= v1.3.0) */ - /* For compatibility with versions <= v1.2.0, continue to consider them separated. */ + /* For compatibility with versions <= v1.2.0, prefer differentiating them. */ /*===== ZSTD_DStream management functions =====*/ ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void); ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds); @@ -359,21 +391,28 @@ ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output + +#if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY) +#define ZSTD_H_ZSTD_STATIC_LINKING_ONLY + /**************************************************************************************** - * START OF ADVANCED AND EXPERIMENTAL FUNCTIONS + * ADVANCED AND EXPERIMENTAL FUNCTIONS + **************************************************************************************** * The definitions in this section are considered experimental. * They should never be used with a dynamic library, as prototypes may change in the future. * They are provided for advanced scenarios. * Use them only in association with static linking. * ***************************************************************************************/ -#if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY) -#define ZSTD_H_ZSTD_STATIC_LINKING_ONLY +ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed */ -/* --- Constants ---*/ -#define ZSTD_MAGICNUMBER 0xFD2FB528 /* >= v0.8.0 */ +/* --- Constants ---*/ +#define ZSTD_MAGICNUMBER 0xFD2FB528 /* v0.8+ */ +#define ZSTD_MAGIC_DICTIONARY 0xEC30A437 /* v0.7+ */ #define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50U -#define ZSTD_MAGIC_DICTIONARY 0xEC30A437 /* >= v0.7.0 */ + +#define ZSTD_BLOCKSIZELOG_MAX 17 +#define ZSTD_BLOCKSIZE_MAX (1<<ZSTD_BLOCKSIZELOG_MAX) /* define, for static allocation */ #define ZSTD_WINDOWLOG_MAX_32 30 #define ZSTD_WINDOWLOG_MAX_64 31 @@ -390,9 +429,10 @@ ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output #define ZSTD_SEARCHLOG_MIN 1 #define ZSTD_SEARCHLENGTH_MAX 7 /* only for ZSTD_fast, other strategies are limited to 6 */ #define ZSTD_SEARCHLENGTH_MIN 3 /* only for ZSTD_btopt, other strategies are limited to 4 */ -#define ZSTD_TARGETLENGTH_MIN 1 /* only used by btopt, btultra and btfast */ -#define ZSTD_LDM_MINMATCH_MIN 4 +#define ZSTD_TARGETLENGTH_MAX ZSTD_BLOCKSIZE_MAX +#define ZSTD_TARGETLENGTH_MIN 0 /* note : comparing this constant to an unsigned results in a tautological test */ #define ZSTD_LDM_MINMATCH_MAX 4096 +#define ZSTD_LDM_MINMATCH_MIN 4 #define ZSTD_LDM_BUCKETSIZELOG_MAX 8 #define ZSTD_FRAMEHEADERSIZE_PREFIX 5 /* minimum input size to know frame header size */ @@ -404,7 +444,8 @@ static const size_t ZSTD_frameHeaderSize_max = ZSTD_FRAMEHEADERSIZE_MAX; static const size_t ZSTD_skippableHeaderSize = 8; /* magic number + skippable frame length */ -/*--- Advanced types ---*/ + +/* --- Advanced types --- */ typedef enum { ZSTD_fast=1, ZSTD_dfast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2, ZSTD_btopt, ZSTD_btultra } ZSTD_strategy; /* from faster to stronger */ @@ -480,9 +521,9 @@ ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize) ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize); /*! ZSTD_frameHeaderSize() : -* `src` should point to the start of a ZSTD frame -* `srcSize` must be >= ZSTD_frameHeaderSize_prefix. -* @return : size of the Frame Header */ + * srcSize must be >= ZSTD_frameHeaderSize_prefix. + * @return : size of the Frame Header, + * or an error code (if srcSize is too small) */ ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); @@ -711,29 +752,48 @@ ZSTDLIB_API size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const /*! ZSTD_resetCStream() : * start a new compression job, using same parameters from previous job. - * This is typically useful to skip dictionary loading stage, since it will re-use it in-place.. + * This is typically useful to skip dictionary loading stage, since it will re-use it in-place. * Note that zcs must be init at least once before using ZSTD_resetCStream(). * If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN. * If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end. * For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs, * but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead. - * @return : 0, or an error code (which can be tested using ZSTD_isError()) */ + * @return : 0, or an error code (which can be tested using ZSTD_isError()) + */ ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); typedef struct { - unsigned long long ingested; - unsigned long long consumed; - unsigned long long produced; + unsigned long long ingested; /* nb input bytes read and buffered */ + unsigned long long consumed; /* nb input bytes actually compressed */ + unsigned long long produced; /* nb of compressed bytes generated and buffered */ + unsigned long long flushed; /* nb of compressed bytes flushed : not provided; can be tracked from caller side */ + unsigned currentJobID; /* MT only : latest started job nb */ + unsigned nbActiveWorkers; /* MT only : nb of workers actively compressing at probe time */ } ZSTD_frameProgression; -/* ZSTD_getFrameProgression(): +/* ZSTD_getFrameProgression() : * tells how much data has been ingested (read from input) * consumed (input actually compressed) and produced (output) for current frame. - * Therefore, (ingested - consumed) is amount of input data buffered internally, not yet compressed. - * Can report progression inside worker threads (multi-threading and non-blocking mode). + * Note : (ingested - consumed) is amount of input data buffered internally, not yet compressed. + * Aggregates progression inside active worker threads. */ -ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx); +ZSTDLIB_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx); + +/*! ZSTD_toFlushNow() : + * Tell how many bytes are ready to be flushed immediately. + * Useful for multithreading scenarios (nbWorkers >= 1). + * Probe the oldest active job, defined as oldest job not yet entirely flushed, + * and check its output buffer. + * @return : amount of data stored in oldest job and ready to be flushed immediately. + * if @return == 0, it means either : + * + there is no active job (could be checked with ZSTD_frameProgression()), or + * + oldest job is still actively compressing data, + * but everything it has produced has also been flushed so far, + * therefore flushing speed is currently limited by production speed of oldest job + * irrespective of the speed of concurrent newer jobs. + */ +ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx); @@ -880,6 +940,11 @@ typedef struct { unsigned dictID; unsigned checksumFlag; } ZSTD_frameHeader; +/** ZSTD_getFrameHeader() : + * decode Frame Header, or requires larger `srcSize`. + * @return : 0, `zfhPtr` is correctly filled, + * >0, `srcSize` is too small, value is wanted `srcSize` amount, + * or an error code, which can be tested using ZSTD_isError() */ ZSTDLIB_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); /**< doesn't consume input */ ZSTDLIB_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize); /**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */ @@ -901,23 +966,15 @@ ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); /** New advanced API (experimental) */ /* ============================================ */ -/* notes on API design : - * In this proposal, parameters are pushed one by one into an existing context, - * and then applied on all subsequent compression jobs. - * When no parameter is ever provided, CCtx is created with compression level ZSTD_CLEVEL_DEFAULT. +/* API design : + * In this advanced API, parameters are pushed one by one into an existing context, + * using ZSTD_CCtx_set*() functions. + * Pushed parameters are sticky : they are applied to next job, and any subsequent job. + * It's possible to reset parameters to "default" using ZSTD_CCtx_reset(). + * Important : "sticky" parameters only work with `ZSTD_compress_generic()` ! + * For any other entry point, "sticky" parameters are ignored ! * * This API is intended to replace all others advanced / experimental API entry points. - * But it stands a reasonable chance to become "stable", after a reasonable testing period. - */ - -/* note on naming convention : - * Initially, the API favored names like ZSTD_setCCtxParameter() . - * In this proposal, convention is changed towards ZSTD_CCtx_setParameter() . - * The main driver is that it identifies more clearly the target object type. - * It feels clearer when considering multiple targets : - * ZSTD_CDict_setParameter() (rather than ZSTD_setCDictParameter()) - * ZSTD_CCtxParams_setParameter() (rather than ZSTD_setCCtxParamsParameter() ) - * etc... */ /* note on enum design : @@ -947,7 +1004,7 @@ typedef enum { /* compression parameters */ ZSTD_p_compressionLevel=100, /* Update all compression parameters according to pre-defined cLevel table * Default level is ZSTD_CLEVEL_DEFAULT==3. - * Special: value 0 means "do not change cLevel". + * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT. * Note 1 : it's possible to pass a negative compression level by casting it to unsigned type. * Note 2 : setting a level sets all default values of other compression parameters. * Note 3 : setting compressionLevel automatically updates ZSTD_p_compressLiterals. */ @@ -956,16 +1013,19 @@ typedef enum { * Special: value 0 means "use default windowLog". * Note: Using a window size greater than ZSTD_MAXWINDOWSIZE_DEFAULT (default: 2^27) * requires explicitly allowing such window size during decompression stage. */ - ZSTD_p_hashLog, /* Size of the probe table, as a power of 2. + ZSTD_p_hashLog, /* Size of the initial probe table, as a power of 2. * Resulting table size is (1 << (hashLog+2)). * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX. * Larger tables improve compression ratio of strategies <= dFast, * and improve speed of strategies > dFast. * Special: value 0 means "use default hashLog". */ - ZSTD_p_chainLog, /* Size of the full-search table, as a power of 2. + ZSTD_p_chainLog, /* Size of the multi-probe search table, as a power of 2. * Resulting table size is (1 << (chainLog+2)). + * Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX. * Larger tables result in better and slower compression. * This parameter is useless when using "fast" strategy. + * Note it's still useful when using "dfast" strategy, + * in which case it defines a secondary probe table. * Special: value 0 means "use default chainLog". */ ZSTD_p_searchLog, /* Number of search attempts, as a power of 2. * More attempts result in better and slower compression. @@ -1047,27 +1107,52 @@ typedef enum { /* experimental parameters - no stability guaranteed */ /* =================================================================== */ - ZSTD_p_compressLiterals=1000, /* control huffman compression of literals (enabled) by default. - * disabling it improves speed and decreases compression ratio by a large amount. - * note : this setting is automatically updated when changing compression level. - * positive compression levels set ZSTD_p_compressLiterals to 1. - * negative compression levels set ZSTD_p_compressLiterals to 0. */ - ZSTD_p_forceMaxWindow=1100, /* Force back-reference distances to remain < windowSize, * even when referencing into Dictionary content (default:0) */ + ZSTD_p_forceAttachDict, /* ZSTD supports usage of a CDict in-place + * (avoiding having to copy the compression tables + * from the CDict into the working context). Using + * a CDict in this way saves an initial setup step, + * but comes at the cost of more work per byte of + * input. ZSTD has a simple internal heuristic that + * guesses which strategy will be faster. You can + * use this flag to override that guess. + * + * Note that the by-reference, in-place strategy is + * only used when reusing a compression context + * with compatible compression parameters. (If + * incompatible / uninitialized, the working + * context needs to be cleared anyways, which is + * about as expensive as overwriting it with the + * dictionary context, so there's no savings in + * using the CDict by-ref.) + * + * Values greater than 0 force attaching the dict. + * Values less than 0 force copying the dict. + * 0 selects the default heuristic-guided behavior. + */ } ZSTD_cParameter; /*! ZSTD_CCtx_setParameter() : * Set one compression parameter, selected by enum ZSTD_cParameter. - * Setting a parameter is generally only possible during frame initialization (before starting compression), - * except for a few exceptions which can be updated during compression: compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy. - * Note : when `value` is an enum, cast it to unsigned for proper type checking. - * @result : informational value (typically, value being set clamped correctly), + * Setting a parameter is generally only possible during frame initialization (before starting compression). + * Exception : when using multi-threading mode (nbThreads >= 1), + * following parameters can be updated _during_ compression (within same frame): + * => compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy. + * new parameters will be active on next job, or after a flush(). + * Note : when `value` type is not unsigned (int, or enum), cast it to unsigned for proper type checking. + * @result : informational value (typically, value being set, correctly clamped), * or an error code (which can be tested with ZSTD_isError()). */ ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned value); +/*! ZSTD_CCtx_getParameter() : + * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned* value); + /*! ZSTD_CCtx_setPledgedSrcSize() : * Total input data size to be compressed as a single frame. * This value will be controlled at the end, and result in error if not respected. @@ -1114,36 +1199,55 @@ ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /*! ZSTD_CCtx_refPrefix() : * Reference a prefix (single-usage dictionary) for next compression job. - * Decompression need same prefix to properly regenerate data. - * Prefix is **only used once**. Tables are discarded at end of compression job. - * Subsequent compression jobs will be done without prefix (if none is explicitly referenced). - * If there is a need to use same prefix multiple times, consider embedding it into a ZSTD_CDict instead. + * Decompression will need same prefix to properly regenerate data. + * Compressing with a prefix is similar in outcome as performing a diff and compressing it, + * but performs much faster, especially during decompression (compression speed is tunable with compression level). + * Note that prefix is **only used once**. Tables are discarded at end of compression job (ZSTD_e_end). * @result : 0, or an error code (which can be tested with ZSTD_isError()). * Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary - * Note 1 : Prefix buffer is referenced. It must outlive compression job. - * Note 2 : Referencing a prefix involves building tables, which are dependent on compression parameters. + * Note 1 : Prefix buffer is referenced. It **must** outlive compression job. + * Its contain must remain unmodified up to end of compression (ZSTD_e_end). + * Note 2 : If the intention is to diff some large src data blob with some prior version of itself, + * ensure that the window size is large enough to contain the entire source. + * See ZSTD_p_windowLog. + * Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters. * It's a CPU consuming operation, with non-negligible impact on latency. - * Note 3 : By default, the prefix is treated as raw content (ZSTD_dm_rawContent). + * If there is a need to use same prefix multiple times, consider loadDictionary instead. + * Note 4 : By default, the prefix is treated as raw content (ZSTD_dm_rawContent). * Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode. */ -ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize); -ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); +ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, + const void* prefix, size_t prefixSize); +ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, + const void* prefix, size_t prefixSize, + ZSTD_dictContentType_e dictContentType); /*! ZSTD_CCtx_reset() : * Return a CCtx to clean state. * Useful after an error, or to interrupt an ongoing compression job and start a new one. * Any internal data not yet flushed is cancelled. - * Dictionary (if any) is dropped. - * All parameters are back to default values. - * It's possible to modify compression parameters after a reset. + * The parameters and dictionary are kept unchanged, to reset them use ZSTD_CCtx_resetParameters(). */ ZSTDLIB_API void ZSTD_CCtx_reset(ZSTD_CCtx* cctx); +/*! ZSTD_CCtx_resetParameters() : + * All parameters are back to default values (compression level is ZSTD_CLEVEL_DEFAULT). + * Dictionary (if any) is dropped. + * Resetting parameters is only possible during frame initialization (before starting compression). + * To reset the context use ZSTD_CCtx_reset(). + * @return 0 or an error code (which can be checked with ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtx_resetParameters(ZSTD_CCtx* cctx); + typedef enum { - ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal conditions */ - ZSTD_e_flush, /* flush any data provided so far - frame will continue, future data can still reference previous data for better compression */ - ZSTD_e_end /* flush any remaining data and close current frame. Any additional data starts a new frame. */ + ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal compression ratio */ + ZSTD_e_flush, /* flush any data provided so far, + * it creates (at least) one new block, that can be decoded immediately on reception; + * frame will continue: any future data can still reference previously compressed data, improving compression. */ + ZSTD_e_end /* flush any remaining data and close current frame. + * any additional data starts a new frame. + * each frame is independent (does not reference any content from previous frame). */ } ZSTD_EndDirective; /*! ZSTD_compress_generic() : @@ -1235,6 +1339,13 @@ ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, Z */ ZSTDLIB_API size_t ZSTD_CCtxParam_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, unsigned value); +/*! ZSTD_CCtxParam_getParameter() : + * Similar to ZSTD_CCtx_getParameter. + * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtxParam_getParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, unsigned* value); + /*! ZSTD_CCtx_setParametersUsingCCtxParams() : * Apply a set of ZSTD_CCtx_params to the compression context. * This can be done even after compression is started, @@ -1246,10 +1357,13 @@ ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams( ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params); -/*=== Advanced parameters for decompression API ===*/ +/* ==================================== */ +/*=== Advanced decompression API ===*/ +/* ==================================== */ -/* The following parameters must be set after creating a ZSTD_DCtx* (or ZSTD_DStream*) object, - * but before starting decompression of a frame. +/* The following API works the same way as the advanced compression API : + * a context is created, parameters are pushed into it one by one, + * then the context can be used to decompress data using an interface similar to the straming API. */ /*! ZSTD_DCtx_loadDictionary() : @@ -1286,17 +1400,25 @@ ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); /*! ZSTD_DCtx_refPrefix() : * Reference a prefix (single-usage dictionary) for next compression job. - * Prefix is **only used once**. It must be explicitly referenced before each frame. - * If there is a need to use same prefix multiple times, consider embedding it into a ZSTD_DDict instead. + * This is the reverse operation of ZSTD_CCtx_refPrefix(), + * and must use the same prefix as the one used during compression. + * Prefix is **only used once**. Reference is discarded at end of frame. + * End of frame is reached when ZSTD_DCtx_decompress_generic() returns 0. * @result : 0, or an error code (which can be tested with ZSTD_isError()). * Note 1 : Adding any prefix (including NULL) invalidates any previously set prefix or dictionary - * Note 2 : Prefix buffer is referenced. It must outlive compression job. + * Note 2 : Prefix buffer is referenced. It **must** outlive decompression job. + * Prefix buffer must remain unmodified up to the end of frame, + * reached when ZSTD_DCtx_decompress_generic() returns 0. * Note 3 : By default, the prefix is treated as raw content (ZSTD_dm_rawContent). * Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode. * Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost. + * A fulldict prefix is more costly though. */ -ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize); -ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); +ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, + const void* prefix, size_t prefixSize); +ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, + const void* prefix, size_t prefixSize, + ZSTD_dictContentType_e dictContentType); /*! ZSTD_DCtx_setMaxWindowSize() : @@ -1318,6 +1440,13 @@ ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowS ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); +/*! ZSTD_getFrameHeader_advanced() : + * same as ZSTD_getFrameHeader(), + * with added capability to select a format (like ZSTD_f_zstd1_magicless) */ +ZSTDLIB_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, + const void* src, size_t srcSize, ZSTD_format_e format); + + /*! ZSTD_decompress_generic() : * Behave the same as ZSTD_decompressStream. * Decompression parameters cannot be changed once decompression is started. @@ -1383,8 +1512,6 @@ ZSTDLIB_API void ZSTD_DCtx_reset(ZSTD_DCtx* dctx); Use ZSTD_insertBlock() for such a case. */ -#define ZSTD_BLOCKSIZELOG_MAX 17 -#define ZSTD_BLOCKSIZE_MAX (1<<ZSTD_BLOCKSIZELOG_MAX) /* define, for static allocation */ /*===== Raw zstd block functions =====*/ ZSTDLIB_API size_t ZSTD_getBlockSize (const ZSTD_CCtx* cctx); ZSTDLIB_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); |