diff options
| author | Baptiste Daroussin <bapt@FreeBSD.org> | 2017-05-06 10:17:59 +0000 |
|---|---|---|
| committer | Baptiste Daroussin <bapt@FreeBSD.org> | 2017-05-06 10:17:59 +0000 |
| commit | ffcbc2d7ba03067492045e4cbead519a3b3c27ef (patch) | |
| tree | df436f4253158a7d5a4875e54cd7d273dd5334a6 /doc | |
| parent | ab984b3e51d32af796fe89e130f57bf58b8a14b2 (diff) | |
Notes
Diffstat (limited to 'doc')
| -rw-r--r-- | doc/educational_decoder/zstd_decompress.c | 243 | ||||
| -rw-r--r-- | doc/images/Cspeed4.png | bin | 35361 -> 71276 bytes | |||
| -rw-r--r-- | doc/images/Dspeed4.png | bin | 8984 -> 24692 bytes | |||
| -rw-r--r-- | doc/images/dict-cr.png | bin | 23323 -> 90047 bytes | |||
| -rw-r--r-- | doc/images/dict-cs.png | bin | 25052 -> 93837 bytes | |||
| -rw-r--r-- | doc/images/dict-ds.png | bin | 27053 -> 89590 bytes | |||
| -rw-r--r-- | doc/zstd_compression_format.md | 387 | ||||
| -rw-r--r-- | doc/zstd_manual.html | 157 |
8 files changed, 419 insertions, 368 deletions
diff --git a/doc/educational_decoder/zstd_decompress.c b/doc/educational_decoder/zstd_decompress.c index ae4eaa81c6ae..7c8d8114d401 100644 --- a/doc/educational_decoder/zstd_decompress.c +++ b/doc/educational_decoder/zstd_decompress.c @@ -27,16 +27,19 @@ size_t ZSTD_decompress_with_dict(void *const dst, const size_t dst_len, /// Get the decompressed size of an input stream so memory can be allocated in /// advance +/// Returns -1 if the size can't be determined size_t ZSTD_get_decompressed_size(const void *const src, const size_t src_len); /******* UTILITY MACROS AND TYPES *********************************************/ -// Max block size decompressed size is 128 KB and literal blocks must be smaller -// than that +// Max block size decompressed size is 128 KB and literal blocks can't be +// larger than their block #define MAX_LITERALS_SIZE ((size_t)128 * 1024) #define MAX(a, b) ((a) > (b) ? (a) : (b)) #define MIN(a, b) ((a) < (b) ? (a) : (b)) +/// This decoder calls exit(1) when it encounters an error, however a production +/// library should propagate error codes #define ERROR(s) \ do { \ fprintf(stderr, "Error: %s\n", s); \ @@ -67,29 +70,31 @@ typedef int64_t i64; /// decompression functions. /*** IO STREAM OPERATIONS *************/ -/// These structs are the interface for IO, and do bounds checking on all -/// operations. They should be used opaquely to ensure safety. -/// Output is always done byte-by-byte +/// ostream_t/istream_t are used to wrap the pointers/length data passed into +/// ZSTD_decompress, so that all IO operations are safely bounds checked +/// They are written/read forward, and reads are treated as little-endian +/// They should be used opaquely to ensure safety typedef struct { u8 *ptr; size_t len; } ostream_t; -/// Input often reads a few bits at a time, so maintain an internal offset typedef struct { const u8 *ptr; - int bit_offset; size_t len; + + // Input often reads a few bits at a time, so maintain an internal offset + int bit_offset; } istream_t; /// The following two functions are the only ones that allow the istream to be /// non-byte aligned /// Reads `num` bits from a bitstream, and updates the internal offset -static inline u64 IO_read_bits(istream_t *const in, const int num); -/// Rewinds the stream by `num` bits -static inline void IO_rewind_bits(istream_t *const in, const int num); +static inline u64 IO_read_bits(istream_t *const in, const int num_bits); +/// Backs-up the stream by `num` bits so they can be read again +static inline void IO_rewind_bits(istream_t *const in, const int num_bits); /// If the remaining bits in a byte will be unused, advance to the end of the /// byte static inline void IO_align_stream(istream_t *const in); @@ -101,30 +106,31 @@ static inline void IO_write_byte(ostream_t *const out, u8 symb); /// be byte aligned. static inline size_t IO_istream_len(const istream_t *const in); -/// Returns a pointer where `len` bytes can be read, and advances the internal -/// state. The stream must be byte aligned. +/// Advances the stream by `len` bytes, and returns a pointer to the chunk that +/// was skipped. The stream must be byte aligned. static inline const u8 *IO_read_bytes(istream_t *const in, size_t len); -/// Returns a pointer where `len` bytes can be written, and advances the internal -/// state. The stream must be byte aligned. +/// Advances the stream by `len` bytes, and returns a pointer to the chunk that +/// was skipped so it can be written to. static inline u8 *IO_write_bytes(ostream_t *const out, size_t len); /// Advance the inner state by `len` bytes. The stream must be byte aligned. static inline void IO_advance_input(istream_t *const in, size_t len); -/// Returns an `ostream_t` constructed from the given pointer and length +/// Returns an `ostream_t` constructed from the given pointer and length. static inline ostream_t IO_make_ostream(u8 *out, size_t len); -/// Returns an `istream_t` constructed from the given pointer and length +/// Returns an `istream_t` constructed from the given pointer and length. static inline istream_t IO_make_istream(const u8 *in, size_t len); -/// Returns an `istream_t` with the same base as `in`, and length `len` -/// Then, advance `in` to account for the consumed bytes -/// `in` must be byte aligned +/// Returns an `istream_t` with the same base as `in`, and length `len`. +/// Then, advance `in` to account for the consumed bytes. +/// `in` must be byte aligned. static inline istream_t IO_make_sub_istream(istream_t *const in, size_t len); /*** END IO STREAM OPERATIONS *********/ /*** BITSTREAM OPERATIONS *************/ -/// Read `num` bits (up to 64) from `src + offset`, where `offset` is in bits -static inline u64 read_bits_LE(const u8 *src, const int num, +/// Read `num` bits (up to 64) from `src + offset`, where `offset` is in bits, +/// and return them interpreted as a little-endian unsigned integer. +static inline u64 read_bits_LE(const u8 *src, const int num_bits, const size_t offset); /// Read bits from the end of a HUF or FSE bitstream. `offset` is in bits, so @@ -136,9 +142,8 @@ static inline u64 STREAM_read_bits(const u8 *src, const int bits, /*** END BITSTREAM OPERATIONS *********/ /*** BIT COUNTING OPERATIONS **********/ -/// Returns `x`, where `2^x` is the largest power of 2 less than or equal to -/// `num`, or `-1` if `num == 0`. -static inline int log2inf(const u64 num); +/// Returns the index of the highest set bit in `num`, or `-1` if `num == 0` +static inline int highest_set_bit(const u64 num); /*** END BIT COUNTING OPERATIONS ******/ /*** HUFFMAN PRIMITIVES ***************/ @@ -384,8 +389,8 @@ size_t ZSTD_decompress_with_dict(void *const dst, const size_t dst_len, parse_dictionary(&parsed_dict, (const u8 *)dict, dict_len); } - istream_t in = {(const u8 *)src, 0, src_len}; - ostream_t out = {(u8 *)dst, dst_len}; + istream_t in = IO_make_istream(src, src_len); + ostream_t out = IO_make_ostream(dst, dst_len); // "A content compressed by Zstandard is transformed into a Zstandard frame. // Multiple frames can be appended into a single file or stream. A frame is @@ -633,6 +638,7 @@ static void frame_context_apply_dict(frame_context_t *const ctx, FSE_copy_dtable(&ctx->of_dtable, &dict->of_dtable); FSE_copy_dtable(&ctx->ml_dtable, &dict->ml_dtable); + // Copy the repeated offsets memcpy(ctx->previous_offsets, dict->previous_offsets, sizeof(ctx->previous_offsets)); } @@ -668,7 +674,7 @@ static void decompress_data(frame_context_t *const ctx, ostream_t *const out, // number of bytes to read and copy." const u8 *const read_ptr = IO_read_bytes(in, block_len); u8 *const write_ptr = IO_write_bytes(out, block_len); - // + // Copy the raw data into the output memcpy(write_ptr, read_ptr, block_len); @@ -682,7 +688,7 @@ static void decompress_data(frame_context_t *const ctx, ostream_t *const out, const u8 *const read_ptr = IO_read_bytes(in, 1); u8 *const write_ptr = IO_write_bytes(out, block_len); - // Copy `block_len` copies of `streams->src[0]` to the output + // Copy `block_len` copies of `read_ptr[0]` to the output memset(write_ptr, read_ptr[0], block_len); ctx->current_total_output += block_len; @@ -751,7 +757,7 @@ static size_t decode_literals_compressed(frame_context_t *const ctx, u8 **const literals, const int block_type, const int size_format); -static void decode_huf_table(istream_t *const in, HUF_dtable *const dtable); +static void decode_huf_table(HUF_dtable *const dtable, istream_t *const in); static void fse_decode_hufweights(ostream_t *weights, istream_t *const in, int *const num_symbs); @@ -894,12 +900,12 @@ static size_t decode_literals_compressed(frame_context_t *const ctx, istream_t huf_stream = IO_make_sub_istream(in, compressed_size); if (block_type == 2) { - // Decode provided Huffman table + // Decode the provided Huffman table // "This section is only present when Literals_Block_Type type is // Compressed_Literals_Block (2)." HUF_free_dtable(&ctx->literals_dtable); - decode_huf_table(&huf_stream, &ctx->literals_dtable); + decode_huf_table(&ctx->literals_dtable, &huf_stream); } else { // If the previous Huffman table is being repeated, ensure it exists if (!ctx->literals_dtable.symbols) { @@ -922,13 +928,13 @@ static size_t decode_literals_compressed(frame_context_t *const ctx, } // Decode the Huffman table description -static void decode_huf_table(istream_t *const in, HUF_dtable *const dtable) { - const u8 header = IO_read_bits(in, 8); - +static void decode_huf_table(HUF_dtable *const dtable, istream_t *const in) { // "All literal values from zero (included) to last present one (excluded) // are represented by Weight with values from 0 to Max_Number_of_Bits." // "This is a single byte value (0-255), which describes how to decode the list of weights." + const u8 header = IO_read_bits(in, 8); + u8 weights[HUF_MAX_SYMBS]; memset(weights, 0, sizeof(weights)); @@ -997,7 +1003,7 @@ typedef struct { u16 ll_state; u16 of_state; u16 ml_state; -} sequence_state_t; +} sequence_states_t; /// Different modes to signal to decode_seq_tables what to do typedef enum { @@ -1052,10 +1058,10 @@ static void decompress_sequences(frame_context_t *const ctx, istream_t *const in, sequence_command_t *const sequences, const size_t num_sequences); -static sequence_command_t decode_sequence(sequence_state_t *const state, +static sequence_command_t decode_sequence(sequence_states_t *const state, const u8 *const src, i64 *const offset); -static void decode_seq_table(istream_t *const in, FSE_dtable *const table, +static void decode_seq_table(FSE_dtable *const table, istream_t *const in, const seq_part_t type, const seq_mode_t mode); static size_t decode_sequences(frame_context_t *const ctx, istream_t *in, @@ -1131,34 +1137,33 @@ static void decompress_sequences(frame_context_t *const ctx, istream_t *in, // Offsets // Match Lengths" // Update the tables we have stored in the context - decode_seq_table(in, &ctx->ll_dtable, seq_literal_length, + decode_seq_table(&ctx->ll_dtable, in, seq_literal_length, (compression_modes >> 6) & 3); - decode_seq_table(in, &ctx->of_dtable, seq_offset, + decode_seq_table(&ctx->of_dtable, in, seq_offset, (compression_modes >> 4) & 3); - decode_seq_table(in, &ctx->ml_dtable, seq_match_length, + decode_seq_table(&ctx->ml_dtable, in, seq_match_length, (compression_modes >> 2) & 3); - // Check to make sure none of the tables are uninitialized - if (!ctx->ll_dtable.symbols || !ctx->of_dtable.symbols || - !ctx->ml_dtable.symbols) { - CORRUPTION(); - } - sequence_state_t state; - // Copy the context's tables into the local state - memcpy(&state.ll_table, &ctx->ll_dtable, sizeof(FSE_dtable)); - memcpy(&state.of_table, &ctx->of_dtable, sizeof(FSE_dtable)); - memcpy(&state.ml_table, &ctx->ml_dtable, sizeof(FSE_dtable)); + sequence_states_t states; - size_t len = IO_istream_len(in); + // Initialize the decoding tables + { + states.ll_table = ctx->ll_dtable; + states.of_table = ctx->of_dtable; + states.ml_table = ctx->ml_dtable; + } + + const size_t len = IO_istream_len(in); const u8 *const src = IO_read_bytes(in, len); // "After writing the last bit containing information, the compressor writes // a single 1-bit and then fills the byte with 0-7 0 bits of padding." - const int padding = 8 - log2inf(src[len - 1]); - i64 offset = len * 8 - padding; + const int padding = 8 - highest_set_bit(src[len - 1]); + // The offset starts at the end because FSE streams are read backwards + i64 bit_offset = len * 8 - padding; // "The bitstream starts with initial state values, each using the required // number of bits in their respective accuracy, decoded previously from @@ -1166,24 +1171,22 @@ static void decompress_sequences(frame_context_t *const ctx, istream_t *in, // // It starts by Literals_Length_State, followed by Offset_State, and finally // Match_Length_State." - FSE_init_state(&state.ll_table, &state.ll_state, src, &offset); - FSE_init_state(&state.of_table, &state.of_state, src, &offset); - FSE_init_state(&state.ml_table, &state.ml_state, src, &offset); + FSE_init_state(&states.ll_table, &states.ll_state, src, &bit_offset); + FSE_init_state(&states.of_table, &states.of_state, src, &bit_offset); + FSE_init_state(&states.ml_table, &states.ml_state, src, &bit_offset); for (size_t i = 0; i < num_sequences; i++) { // Decode sequences one by one - sequences[i] = decode_sequence(&state, src, &offset); + sequences[i] = decode_sequence(&states, src, &bit_offset); } - if (offset != 0) { + if (bit_offset != 0) { CORRUPTION(); } - - // Don't free tables so they can be used in the next block } // Decode a single sequence and update the state -static sequence_command_t decode_sequence(sequence_state_t *const state, +static sequence_command_t decode_sequence(sequence_states_t *const states, const u8 *const src, i64 *const offset) { // "Each symbol is a code in its own context, which specifies Baseline and @@ -1191,9 +1194,9 @@ static sequence_command_t decode_sequence(sequence_state_t *const state, // additional bits in the same bitstream." // Decode symbols, but don't update states - const u8 of_code = FSE_peek_symbol(&state->of_table, state->of_state); - const u8 ll_code = FSE_peek_symbol(&state->ll_table, state->ll_state); - const u8 ml_code = FSE_peek_symbol(&state->ml_table, state->ml_state); + const u8 of_code = FSE_peek_symbol(&states->of_table, states->of_state); + const u8 ll_code = FSE_peek_symbol(&states->ll_table, states->ll_state); + const u8 ml_code = FSE_peek_symbol(&states->ml_table, states->ml_state); // Offset doesn't need a max value as it's not decoded using a table if (ll_code > SEQ_MAX_CODES[seq_literal_length] || @@ -1221,17 +1224,18 @@ static sequence_command_t decode_sequence(sequence_state_t *const state, // then Offset_State." // If the stream is complete don't read bits to update state if (*offset != 0) { - FSE_update_state(&state->ll_table, &state->ll_state, src, offset); - FSE_update_state(&state->ml_table, &state->ml_state, src, offset); - FSE_update_state(&state->of_table, &state->of_state, src, offset); + FSE_update_state(&states->ll_table, &states->ll_state, src, offset); + FSE_update_state(&states->ml_table, &states->ml_state, src, offset); + FSE_update_state(&states->of_table, &states->of_state, src, offset); } return seq; } /// Given a sequence part and table mode, decode the FSE distribution -static void decode_seq_table(istream_t *const in, FSE_dtable *const table, - const seq_part_t type, const seq_mode_t mode) { +/// Errors if the mode is `seq_repeat` without a pre-existing table in `table` +static void decode_seq_table(FSE_dtable *const table, istream_t *const in, + const seq_part_t type, const seq_mode_t mode) { // Constant arrays indexed by seq_part_t const i16 *const default_distributions[] = {SEQ_LITERAL_LENGTH_DEFAULT_DIST, SEQ_OFFSET_DEFAULT_DIST, @@ -1272,12 +1276,17 @@ static void decode_seq_table(istream_t *const in, FSE_dtable *const table, // "Repeat_Mode : re-use distribution table from previous compressed // block." // Nothing to do here, table will be unchanged + if (!table->symbols) { + // This mode is invalid if we don't already have a table + CORRUPTION(); + } break; default: // Impossible, as mode is from 0-3 IMPOSSIBLE(); break; } + } /******* END SEQUENCE DECODING ************************************************/ @@ -1296,6 +1305,8 @@ static void execute_sequences(frame_context_t *const ctx, ostream_t *const out, const sequence_command_t seq = sequences[i]; { + // If the sequence asks for more literals than are left, the + // sequence must be corrupted if (seq.literal_length > IO_istream_len(&litstream)) { CORRUPTION(); } @@ -1336,7 +1347,8 @@ static void execute_sequences(frame_context_t *const ctx, ostream_t *const out, // as per the exception listed above offset = idx < 3 ? offset_hist[idx] : offset_hist[0] - 1; - // If idx == 1 we don't need to modify offset_hist[2] + // If idx == 1 we don't need to modify offset_hist[2], since + // we're using the second-most recent code if (idx > 1) { offset_hist[2] = offset_hist[1]; } @@ -1344,6 +1356,8 @@ static void execute_sequences(frame_context_t *const ctx, ostream_t *const out, offset_hist[0] = offset; } } else { + // When it's not a repeat offset: + // "if (Offset_Value > 3) offset = Offset_Value - 3;" offset = seq.offset - 3; // Shift back history @@ -1391,11 +1405,11 @@ static void execute_sequences(frame_context_t *const ctx, ostream_t *const out, total_output += seq.match_length; } + // Copy any leftover literals { size_t len = IO_istream_len(&litstream); u8 *const write_ptr = IO_write_bytes(out, len); const u8 *const read_ptr = IO_read_bytes(&litstream, len); - // Copy any leftover literals memcpy(write_ptr, read_ptr, len); total_output += len; @@ -1517,10 +1531,10 @@ static void parse_dictionary(dictionary_t *const dict, const u8 *src, // recent offsets (instead of using {1,4,8}), stored in order, 4-bytes // little-endian each, for a total of 12 bytes. Each recent offset must have // a value < dictionary size." - decode_huf_table(&in, &dict->literals_dtable); - decode_seq_table(&in, &dict->of_dtable, seq_offset, seq_fse); - decode_seq_table(&in, &dict->ml_dtable, seq_match_length, seq_fse); - decode_seq_table(&in, &dict->ll_dtable, seq_literal_length, seq_fse); + decode_huf_table(&dict->literals_dtable, &in); + decode_seq_table(&dict->of_dtable, &in, seq_offset, seq_fse); + decode_seq_table(&dict->ml_dtable, &in, seq_match_length, seq_fse); + decode_seq_table(&dict->ll_dtable, &in, seq_literal_length, seq_fse); // Read in the previous offset history dict->previous_offsets[0] = IO_read_bits(&in, 32); @@ -1571,20 +1585,20 @@ static void free_dictionary(dictionary_t *const dict) { /******* IO STREAM OPERATIONS *************************************************/ #define UNALIGNED() ERROR("Attempting to operate on a non-byte aligned stream") /// Reads `num` bits from a bitstream, and updates the internal offset -static inline u64 IO_read_bits(istream_t *const in, const int num) { - if (num > 64 || num <= 0) { +static inline u64 IO_read_bits(istream_t *const in, const int num_bits) { + if (num_bits > 64 || num_bits <= 0) { ERROR("Attempt to read an invalid number of bits"); } - const size_t bytes = (num + in->bit_offset + 7) / 8; - const size_t full_bytes = (num + in->bit_offset) / 8; + const size_t bytes = (num_bits + in->bit_offset + 7) / 8; + const size_t full_bytes = (num_bits + in->bit_offset) / 8; if (bytes > in->len) { INP_SIZE(); } - const u64 result = read_bits_LE(in->ptr, num, in->bit_offset); + const u64 result = read_bits_LE(in->ptr, num_bits, in->bit_offset); - in->bit_offset = (num + in->bit_offset) % 8; + in->bit_offset = (num_bits + in->bit_offset) % 8; in->ptr += full_bytes; in->len -= full_bytes; @@ -1593,16 +1607,21 @@ static inline u64 IO_read_bits(istream_t *const in, const int num) { /// If a non-zero number of bits have been read from the current byte, advance /// the offset to the next byte -static inline void IO_rewind_bits(istream_t *const in, int num) { - if (num < 0) { +static inline void IO_rewind_bits(istream_t *const in, int num_bits) { + if (num_bits < 0) { ERROR("Attempting to rewind stream by a negative number of bits"); } - const int new_offset = in->bit_offset - num; - const i64 bytes = (new_offset - 7) / 8; + // move the offset back by `num_bits` bits + const int new_offset = in->bit_offset - num_bits; + // determine the number of whole bytes we have to rewind, rounding up to an + // integer number (e.g. if `new_offset == -5`, `bytes == 1`) + const i64 bytes = -(new_offset - 7) / 8; - in->ptr += bytes; - in->len -= bytes; + in->ptr -= bytes; + in->len += bytes; + // make sure the resulting `bit_offset` is positive, as mod in C does not + // convert numbers from negative to positive (e.g. -22 % 8 == -6) in->bit_offset = ((new_offset % 8) + 8) % 8; } @@ -1683,33 +1702,26 @@ static inline ostream_t IO_make_ostream(u8 *out, size_t len) { /// Returns an `istream_t` constructed from the given pointer and length static inline istream_t IO_make_istream(const u8 *in, size_t len) { - return (istream_t) { in, 0, len }; + return (istream_t) { in, len, 0 }; } /// Returns an `istream_t` with the same base as `in`, and length `len` /// Then, advance `in` to account for the consumed bytes /// `in` must be byte aligned static inline istream_t IO_make_sub_istream(istream_t *const in, size_t len) { - if (len > in->len) { - INP_SIZE(); - } - if (in->bit_offset != 0) { - UNALIGNED(); - } - const istream_t sub = { in->ptr, in->bit_offset, len }; + // Consume `len` bytes of the parent stream + const u8 *const ptr = IO_read_bytes(in, len); - in->ptr += len; - in->len -= len; - - return sub; + // Make a substream using the pointer to those `len` bytes + return IO_make_istream(ptr, len); } /******* END IO STREAM OPERATIONS *********************************************/ /******* BITSTREAM OPERATIONS *************************************************/ /// Read `num` bits (up to 64) from `src + offset`, where `offset` is in bits -static inline u64 read_bits_LE(const u8 *src, const int num, +static inline u64 read_bits_LE(const u8 *src, const int num_bits, const size_t offset) { - if (num > 64) { + if (num_bits > 64) { ERROR("Attempt to read an invalid number of bits"); } @@ -1719,10 +1731,10 @@ static inline u64 read_bits_LE(const u8 *src, const int num, u64 res = 0; int shift = 0; - int left = num; + int left = num_bits; while (left > 0) { u64 mask = left >= 8 ? 0xff : (((u64)1 << left) - 1); - // Dead the next byte, shift it to account for the offset, and then mask + // Read the next byte, shift it to account for the offset, and then mask // out the top part if we don't need all the bits res += (((u64)*src++ >> bit_offset) & mask) << shift; shift += 8 - bit_offset; @@ -1761,7 +1773,7 @@ static inline u64 STREAM_read_bits(const u8 *const src, const int bits, /******* BIT COUNTING OPERATIONS **********************************************/ /// Returns `x`, where `2^x` is the largest power of 2 less than or equal to /// `num`, or `-1` if `num == 0`. -static inline int log2inf(const u64 num) { +static inline int highest_set_bit(const u64 num) { for (int i = 63; i >= 0; i--) { if (((u64)1 << i) <= num) { return i; @@ -1813,17 +1825,18 @@ static size_t HUF_decompress_1stream(const HUF_dtable *const dtable, // final-bit-flag. Consequently, a last byte of 0 is not possible. And the // final-bit-flag itself is not part of the useful bitstream. Hence, the // last byte contains between 0 and 7 useful bits." - const int padding = 8 - log2inf(src[len - 1]); + const int padding = 8 - highest_set_bit(src[len - 1]); - i64 offset = len * 8 - padding; + // Offset starts at the end because HUF streams are read backwards + i64 bit_offset = len * 8 - padding; u16 state; - HUF_init_state(dtable, &state, src, &offset); + HUF_init_state(dtable, &state, src, &bit_offset); size_t symbols_written = 0; - while (offset > -dtable->max_bits) { + while (bit_offset > -dtable->max_bits) { // Iterate over the stream, decoding one symbol at a time - IO_write_byte(out, HUF_decode_symbol(dtable, &state, src, &offset)); + IO_write_byte(out, HUF_decode_symbol(dtable, &state, src, &bit_offset)); symbols_written++; } // "The process continues up to reading the required number of symbols per @@ -1836,7 +1849,7 @@ static size_t HUF_decompress_1stream(const HUF_dtable *const dtable, // before the start of `src` // Therefore `offset`, the edge to start reading new bits at, should be // dtable->max_bits before the start of the stream - if (offset != -dtable->max_bits) { + if (bit_offset != -dtable->max_bits) { CORRUPTION(); } @@ -1960,7 +1973,7 @@ static void HUF_init_dtable_usingweights(HUF_dtable *const table, } // Find the first power of 2 larger than the sum - const int max_bits = log2inf(weight_sum) + 1; + const int max_bits = highest_set_bit(weight_sum) + 1; const u64 left_over = ((u64)1 << max_bits) - weight_sum; // If the left over isn't a power of 2, the weights are invalid if (left_over & (left_over - 1)) { @@ -1969,7 +1982,7 @@ static void HUF_init_dtable_usingweights(HUF_dtable *const table, // left_over is used to find the last weight as it's not transmitted // by inverting 2^(weight - 1) we can determine the value of last_weight - const int last_weight = log2inf(left_over) + 1; + const int last_weight = highest_set_bit(left_over) + 1; for (int i = 0; i < num_symbs; i++) { // "Number_of_Bits = Number_of_Bits ? Max_Number_of_Bits + 1 - Weight : 0" @@ -2063,7 +2076,7 @@ static size_t FSE_decompress_interleaved2(const FSE_dtable *const dtable, // final-bit-flag. Consequently, a last byte of 0 is not possible. And the // final-bit-flag itself is not part of the useful bitstream. Hence, the // last byte contains between 0 and 7 useful bits." - const int padding = 8 - log2inf(src[len - 1]); + const int padding = 8 - highest_set_bit(src[len - 1]); i64 offset = len * 8 - padding; u16 state1, state2; @@ -2184,7 +2197,7 @@ static void FSE_init_dtable(FSE_dtable *const dtable, u16 next_state_desc = state_desc[symbol]++; // Fills in the table appropriately, next_state_desc increases by symbol // over time, decreasing number of bits - dtable->num_bits[i] = (u8)(accuracy_log - log2inf(next_state_desc)); + dtable->num_bits[i] = (u8)(accuracy_log - highest_set_bit(next_state_desc)); // Baseline increases until the bit threshold is passed, at which point // it resets to 0 dtable->new_state_base[i] = @@ -2235,7 +2248,7 @@ static void FSE_decode_header(FSE_dtable *const dtable, istream_t *const in, int symb = 0; while (remaining > 0 && symb < FSE_MAX_SYMBS) { // Log of the number of possible values we could read - int bits = log2inf(remaining + 1) + 1; + int bits = highest_set_bit(remaining + 1) + 1; u16 val = IO_read_bits(in, bits); diff --git a/doc/images/Cspeed4.png b/doc/images/Cspeed4.png Binary files differindex f0ca0ffba9c4..318204c00e96 100644 --- a/doc/images/Cspeed4.png +++ b/doc/images/Cspeed4.png diff --git a/doc/images/Dspeed4.png b/doc/images/Dspeed4.png Binary files differindex eba485d0d1ed..b7baef1ff3f7 100644 --- a/doc/images/Dspeed4.png +++ b/doc/images/Dspeed4.png diff --git a/doc/images/dict-cr.png b/doc/images/dict-cr.png Binary files differindex f555a46c7b99..f3a9ce2bdda6 100644 --- a/doc/images/dict-cr.png +++ b/doc/images/dict-cr.png diff --git a/doc/images/dict-cs.png b/doc/images/dict-cs.png Binary files differindex ccc02b0d1713..55e5ef518fed 100644 --- a/doc/images/dict-cs.png +++ b/doc/images/dict-cs.png diff --git a/doc/images/dict-ds.png b/doc/images/dict-ds.png Binary files differindex 858cad685509..1153f1b95fd5 100644 --- a/doc/images/dict-ds.png +++ b/doc/images/dict-ds.png diff --git a/doc/zstd_compression_format.md b/doc/zstd_compression_format.md index d4b46548a3d2..1f212fea2305 100644 --- a/doc/zstd_compression_format.md +++ b/doc/zstd_compression_format.md @@ -16,7 +16,8 @@ Distribution of this document is unlimited. ### Version -0.2.4 (17/02/17) +0.2.5 (31/03/17) + Introduction ------------ @@ -109,7 +110,7 @@ The structure of a single Zstandard frame is following: __`Magic_Number`__ -4 Bytes, little-endian format. +4 Bytes, __little-endian__ format. Value : 0xFD2FB528 __`Frame_Header`__ @@ -127,7 +128,7 @@ An optional 32-bit checksum, only present if `Content_Checksum_flag` is set. The content checksum is the result of [xxh64() hash function](http://www.xxhash.org) digesting the original (decoded) data as input, and a seed of zero. -The low 4 bytes of the checksum are stored in little endian format. +The low 4 bytes of the checksum are stored in __little-endian__ format. ### `Frame_Header` @@ -154,41 +155,42 @@ Decoding this byte is enough to tell the size of `Frame_Header`. | 2 | `Content_Checksum_flag` | | 1-0 | `Dictionary_ID_flag` | -In this table, bit 7 the is highest bit, while bit 0 the is lowest. +In this table, bit 7 is the highest bit, while bit 0 is the lowest one. __`Frame_Content_Size_flag`__ This is a 2-bits flag (`= Frame_Header_Descriptor >> 6`), -specifying if decompressed data size is provided within the header. -The `Flag_Value` can be converted into `Field_Size`, +specifying if `Frame_Content_Size` (the decompressed data size) +is provided within the header. +`Flag_Value` provides `FCS_Field_Size`, which is the number of bytes used by `Frame_Content_Size` according to the following table: -|`Flag_Value`| 0 | 1 | 2 | 3 | -| ---------- | ------ | --- | --- | --- | -|`Field_Size`| 0 or 1 | 2 | 4 | 8 | +| `Flag_Value` | 0 | 1 | 2 | 3 | +| -------------- | ------ | --- | --- | --- | +|`FCS_Field_Size`| 0 or 1 | 2 | 4 | 8 | -When `Flag_Value` is `0`, `Field_Size` depends on `Single_Segment_flag` : +When `Flag_Value` is `0`, `FCS_Field_Size` depends on `Single_Segment_flag` : if `Single_Segment_flag` is set, `Field_Size` is 1. -Otherwise, `Field_Size` is 0 (content size not provided). +Otherwise, `Field_Size` is 0 : `Frame_Content_Size` is not provided. __`Single_Segment_flag`__ If this flag is set, data must be regenerated within a single continuous memory segment. -In this case, `Frame_Content_Size` is necessarily present, -but `Window_Descriptor` byte is skipped. +In this case, `Window_Descriptor` byte is skipped, +but `Frame_Content_Size` is necessarily present. As a consequence, the decoder must allocate a memory segment of size equal or bigger than `Frame_Content_Size`. In order to preserve the decoder from unreasonable memory requirements, -a decoder can reject a compressed frame +a decoder is allowed to reject a compressed frame which requests a memory size beyond decoder's authorized range. For broader compatibility, decoders are recommended to support memory sizes of at least 8 MB. -This is just a recommendation, +This is only a recommendation, each decoder is free to support higher or lower limits, depending on local limitations. @@ -224,37 +226,38 @@ It also specifies the size of this field as `Field_Size`. #### `Window_Descriptor` -Provides guarantees on maximum back-reference distance -that will be used within compressed data. +Provides guarantees on minimum memory buffer required to decompress a frame. This information is important for decoders to allocate enough memory. -The `Window_Descriptor` byte is optional. It is absent when `Single_Segment_flag` is set. -In this case, the maximum back-reference distance is the content size itself, -which can be any value from 1 to 2^64-1 bytes (16 EB). +The `Window_Descriptor` byte is optional. +When `Single_Segment_flag` is set, `Window_Descriptor` is not present. +In this case, `Window_Size` is `Frame_Content_Size`, +which can be any value from 0 to 2^64-1 bytes (16 ExaBytes). | Bit numbers | 7-3 | 2-0 | | ----------- | ---------- | ---------- | | Field name | `Exponent` | `Mantissa` | -Maximum distance is given by the following formulas : +The minimum memory buffer size is called `Window_Size`. +It is described by the following formulas : ``` windowLog = 10 + Exponent; windowBase = 1 << windowLog; windowAdd = (windowBase / 8) * Mantissa; Window_Size = windowBase + windowAdd; ``` -The minimum window size is 1 KB. -The maximum size is `15*(1<<38)` bytes, which is 1.875 TB. +The minimum `Window_Size` is 1 KB. +The maximum `Window_Size` is `(1<<41) + 7*(1<<38)` bytes, which is 3.75 TB. To properly decode compressed data, a decoder will need to allocate a buffer of at least `Window_Size` bytes. In order to preserve decoder from unreasonable memory requirements, -a decoder can refuse a compressed frame +a decoder is allowed to reject a compressed frame which requests a memory size beyond decoder's authorized range. For improved interoperability, -decoders are recommended to be compatible with window sizes of 8 MB, +decoders are recommended to be compatible with `Window_Size >= 8 MB`, and encoders are recommended to not request more than 8 MB. It's merely a recommendation though, decoders are free to support larger or lower limits, @@ -264,112 +267,118 @@ depending on local limitations. This is a variable size field, which contains the ID of the dictionary required to properly decode the frame. -Note that this field is optional. When it's not present, +`Dictionary_ID` field is optional. When it's not present, it's up to the decoder to make sure it uses the correct dictionary. -Format is little-endian. Field size depends on `Dictionary_ID_flag`. 1 byte can represent an ID 0-255. 2 bytes can represent an ID 0-65535. 4 bytes can represent an ID 0-4294967295. +Format is __little-endian__. It's allowed to represent a small ID (for example `13`) -with a large 4-bytes dictionary ID, losing some compacity in the process. +with a large 4-bytes dictionary ID, even if it is less efficient. _Reserved ranges :_ If the frame is going to be distributed in a private environment, any dictionary ID can be used. However, for public distribution of compressed frames using a dictionary, -the following ranges are reserved for future use and should not be used : -- low range : 1 - 32767 -- high range : >= (2^31) - +the following ranges are reserved and shall not be used : +- low range : `<= 32767` +- high range : `>= (1 << 31)` #### `Frame_Content_Size` This is the original (uncompressed) size. This information is optional. -The `Field_Size` is provided according to value of `Frame_Content_Size_flag`. -The `Field_Size` can be equal to 0 (not present), 1, 2, 4 or 8 bytes. -Format is little-endian. - -| `Field_Size` | Range | -| ------------ | ---------- | -| 1 | 0 - 255 | -| 2 | 256 - 65791| -| 4 | 0 - 2^32-1 | -| 8 | 0 - 2^64-1 | - -When `Field_Size` is 1, 4 or 8 bytes, the value is read directly. -When `Field_Size` is 2, _the offset of 256 is added_. +`Frame_Content_Size` uses a variable number of bytes, provided by `FCS_Field_Size`. +`FCS_Field_Size` is provided by the value of `Frame_Content_Size_flag`. +`FCS_Field_Size` can be equal to 0 (not present), 1, 2, 4 or 8 bytes. + +| `FCS_Field_Size` | Range | +| ---------------- | ---------- | +| 0 | unknown | +| 1 | 0 - 255 | +| 2 | 256 - 65791| +| 4 | 0 - 2^32-1 | +| 8 | 0 - 2^64-1 | + +`Frame_Content_Size` format is __little-endian__. +When `FCS_Field_Size` is 1, 4 or 8 bytes, the value is read directly. +When `FCS_Field_Size` is 2, _the offset of 256 is added_. It's allowed to represent a small size (for example `18`) using any compatible variant. + Blocks ------- -After the magic number and header of each block, -there are some number of blocks. -Each frame must have at least one block but there is no upper limit -on the number of blocks per frame. + +After `Magic_Number` and `Frame_Header`, there are some number of blocks. +Each frame must have at least one block, +but there is no upper limit on the number of blocks per frame. The structure of a block is as follows: -| `Last_Block` | `Block_Type` | `Block_Size` | `Block_Content` | -|:------------:|:------------:|:------------:|:---------------:| -| 1 bit | 2 bits | 21 bits | n bytes | +| `Block_Header` | `Block_Content` | +|:--------------:|:---------------:| +| 3 bytes | n bytes | + +`Block_Header` uses 3 bytes, written using __little-endian__ convention. +It contains 3 fields : -The block header (`Last_Block`, `Block_Type`, and `Block_Size`) uses 3-bytes. +| `Last_Block` | `Block_Type` | `Block_Size` | +|:------------:|:------------:|:------------:| +| bit 0 | bits 1-2 | bits 3-23 | __`Last_Block`__ The lowest bit signals if this block is the last one. -The frame will end after this one. +The frame will end after this last block. It may be followed by an optional `Content_Checksum` (see [Zstandard Frames](#zstandard-frames)). -__`Block_Type` and `Block_Size`__ - -The next 2 bits represent the `Block_Type`, -while the remaining 21 bits represent the `Block_Size`. -Format is __little-endian__. +__`Block_Type`__ +The next 2 bits represent the `Block_Type`. There are 4 block types : -| Value | 0 | 1 | 2 | 3 | +| Value | 0 | 1 | 2 | 3 | | ------------ | ----------- | ----------- | ------------------ | --------- | | `Block_Type` | `Raw_Block` | `RLE_Block` | `Compressed_Block` | `Reserved`| - `Raw_Block` - this is an uncompressed block. - `Block_Content` contains `Block_Size` bytes to read and copy - as decoded data. + `Block_Content` contains `Block_Size` bytes. -- `RLE_Block` - this is a single byte, repeated N times. - `Block_Content` consists of a single byte, - and `Block_Size` is the number of times this byte should be repeated. +- `RLE_Block` - this is a single byte, repeated `Block_Size` times. + `Block_Content` consists of a single byte. + On the decompression side, this byte must be repeated `Block_Size` times. - `Compressed_Block` - this is a [Zstandard compressed block](#compressed-blocks), explained later on. `Block_Size` is the length of `Block_Content`, the compressed data. - The decompressed size is unknown, + The decompressed size is not known, but its maximum possible value is guaranteed (see below) - `Reserved` - this is not a block. This value cannot be used with current version of this specification. +__`Block_Size`__ + +The upper 21 bits of `Block_Header` represent the `Block_Size`. + Block sizes must respect a few rules : -- In compressed mode, compressed size is always strictly less than decompressed size. -- Block decompressed size is always <= maximum back-reference distance. +- For `Compressed_Block`, `Block_Size` is always strictly less than decompressed size. +- Block decompressed size is always <= `Window_Size` - Block decompressed size is always <= 128 KB. -A data block is not necessarily "full" : -since an arbitrary “flush” may happen anytime, -block decompressed content can be any size (even empty), +A block can contain any number of bytes (even empty), up to `Block_Maximum_Decompressed_Size`, which is the smallest of : -- Maximum back-reference distance +- `Window_Size` - 128 KB + Compressed Blocks ----------------- -To decompress a compressed block, the compressed size must be provided from -`Block_Size` field in the block header. +To decompress a compressed block, the compressed size must be provided +from `Block_Size` field within `Block_Header`. A compressed block consists of 2 sections : - [Literals Section](#literals-section) @@ -381,36 +390,34 @@ data in [Sequence Execution](#sequence-execution) #### Prerequisites To decode a compressed block, the following elements are necessary : - Previous decoded data, up to a distance of `Window_Size`, - or all previous data when `Single_Segment_flag` is set. -- List of "recent offsets" from the previous compressed block. -- Decoding tables of the previous compressed block for each symbol type + or all previously decoded data when `Single_Segment_flag` is set. +- List of "recent offsets" from previous `Compressed_Block`. +- Decoding tables of previous `Compressed_Block` for each symbol type (literals, literals lengths, match lengths, offsets). Literals Section ---------------- -During sequence execution, symbols from the literals section -During sequence phase, literals will be entangled with match copy operations. All literals are regrouped in the first part of the block. -They can be decoded first, and then copied during sequence operations, -or they can be decoded on the flow, as needed by sequence commands. - -| `Literals_Section_Header` | [`Huffman_Tree_Description`] | Stream1 | [Stream2] | [Stream3] | [Stream4] | -| ------------------------- | ---------------------------- | ------- | --------- | --------- | --------- | +They can be decoded first, and then copied during [Sequence Execution], +or they can be decoded on the flow during [Sequence Execution]. Literals can be stored uncompressed or compressed using Huffman prefix codes. When compressed, an optional tree description can be present, followed by 1 or 4 streams. +| `Literals_Section_Header` | [`Huffman_Tree_Description`] | Stream1 | [Stream2] | [Stream3] | [Stream4] | +| ------------------------- | ---------------------------- | ------- | --------- | --------- | --------- | + #### `Literals_Section_Header` Header is in charge of describing how literals are packed. It's a byte-aligned variable-size bitfield, ranging from 1 to 5 bytes, -using little-endian convention. +using __little-endian__ convention. | `Literals_Block_Type` | `Size_Format` | `Regenerated_Size` | [`Compressed_Size`] | -| --------------------- | ------------- | ------------------ | ----------------- | -| 2 bits | 1 - 2 bits | 5 - 20 bits | 0 - 18 bits | +| --------------------- | ------------- | ------------------ | ------------------- | +| 2 bits | 1 - 2 bits | 5 - 20 bits | 0 - 18 bits | In this representation, bits on the left are the lowest bits. @@ -418,33 +425,38 @@ __`Literals_Block_Type`__ This field uses 2 lowest bits of first byte, describing 4 different block types : -| `Literals_Block_Type` | Value | -| ----------------------------- | ----- | -| `Raw_Literals_Block` | 0 | -| `RLE_Literals_Block` | 1 | -| `Compressed_Literals_Block` | 2 | -| `Repeat_Stats_Literals_Block` | 3 | +| `Literals_Block_Type` | Value | +| --------------------------- | ----- | +| `Raw_Literals_Block` | 0 | +| `RLE_Literals_Block` | 1 | +| `Compressed_Literals_Block` | 2 | +| `Treeless_Literals_Block` | 3 | - `Raw_Literals_Block` - Literals are stored uncompressed. -- `RLE_Literals_Block` - Literals consist of a single byte value repeated N times. +- `RLE_Literals_Block` - Literals consist of a single byte value + repeated `Regenerated_Size` times. - `Compressed_Literals_Block` - This is a standard Huffman-compressed block, starting with a Huffman tree description. See details below. -- `Repeat_Stats_Literals_Block` - This is a Huffman-compressed block, +- `Treeless_Literals_Block` - This is a Huffman-compressed block, using Huffman tree _from previous Huffman-compressed literals block_. - Huffman tree description will be skipped. - Note: If this mode is used without any previous Huffman-table in the frame - (or [dictionary](#dictionary-format)), this should be treated as corruption. + `Huffman_Tree_Description` will be skipped. + Note: If this mode is triggered without any previous Huffman-table in the frame + (or [dictionary](#dictionary-format)), this should be treated as data corruption. __`Size_Format`__ `Size_Format` is divided into 2 families : -- For `Raw_Literals_Block` and `RLE_Literals_Block` it's enough to decode `Regenerated_Size`. -- For `Compressed_Block`, its required to decode both `Compressed_Size` - and `Regenerated_Size` (the decompressed size). It will also decode the number of streams. +- For `Raw_Literals_Block` and `RLE_Literals_Block`, + it's only necessary to decode `Regenerated_Size`. + There is no `Compressed_Size` field. +- For `Compressed_Block` and `Treeless_Literals_Block`, + it's required to decode both `Compressed_Size` + and `Regenerated_Size` (the decompressed size). + It's also necessary to decode the number of streams (1 or 4). -For values spanning several bytes, convention is little-endian. +For values spanning several bytes, convention is __little-endian__. __`Size_Format` for `Raw_Literals_Block` and `RLE_Literals_Block`__ : @@ -463,9 +475,9 @@ __`Size_Format` for `Raw_Literals_Block` and `RLE_Literals_Block`__ : Only Stream1 is present for these cases. Note : it's allowed to represent a short value (for example `13`) -using a long format, accepting the increased compressed data size. +using a long format, even if it's less efficient. -__`Size_Format` for `Compressed_Literals_Block` and `Repeat_Stats_Literals_Block`__ : +__`Size_Format` for `Compressed_Literals_Block` and `Treeless_Literals_Block`__ : - Value 00 : _A single stream_. Both `Regenerated_Size` and `Compressed_Size` use 10 bits (0-1023). @@ -480,67 +492,68 @@ __`Size_Format` for `Compressed_Literals_Block` and `Repeat_Stats_Literals_Block Both `Regenerated_Size` and `Compressed_Size` use 18 bits (0-262143). `Literals_Section_Header` has 5 bytes. -Both `Compressed_Size` and `Regenerated_Size` fields follow little-endian convention. -Note: `Compressed_Size` __includes__ the size of the Huffman Tree description if it -is present. +Both `Compressed_Size` and `Regenerated_Size` fields follow __little-endian__ convention. +Note: `Compressed_Size` __includes__ the size of the Huffman Tree description +_when_ it is present. ### Raw Literals Block -The data in Stream1 is `Regenerated_Size` bytes long, and contains the raw literals data -to be used in sequence execution. +The data in Stream1 is `Regenerated_Size` bytes long, +it contains the raw literals data to be used during [Sequence Execution]. ### RLE Literals Block Stream1 consists of a single byte which should be repeated `Regenerated_Size` times to generate the decoded literals. -### Compressed Literals Block and Repeat Stats Literals Block -Both of these modes contain Huffman encoded data +### Compressed Literals Block and Treeless Literals Block +Both of these modes contain Huffman encoded data. +`Treeless_Literals_Block` does not have a `Huffman_Tree_Description`. #### `Huffman_Tree_Description` This section is only present when `Literals_Block_Type` type is `Compressed_Literals_Block` (`2`). The format of the Huffman tree description can be found at [Huffman Tree description](#huffman-tree-description). -The size Huffman Tree description will be determined during the decoding process, -and must be used to determine where the compressed Huffman streams begin. +The size of `Huffman_Tree_Description` is determined during decoding process, +it must be used to determine where streams begin. +`Total_Streams_Size = Compressed_Size - Huffman_Tree_Description_Size`. -If repeat stats mode is used, the Huffman table used in the previous compressed block will -be used to decompress this block as well. +For `Treeless_Literals_Block`, +the Huffman table comes from previously compressed literals block. -Huffman compressed data consists either 1 or 4 Huffman-coded streams. +Huffman compressed data consists of either 1 or 4 Huffman-coded streams. If only one stream is present, it is a single bitstream occupying the entire -remaining portion of the literals block, encoded as described at +remaining portion of the literals block, encoded as described within [Huffman-Coded Streams](#huffman-coded-streams). If there are four streams, the literals section header only provides enough -information to know the regenerated and compressed sizes of all four streams combined. -The regenerated size of each stream is equal to `(totalSize+3)/4`, except for the last stream, -which may be up to 3 bytes smaller, to reach a total decompressed size match that described -in the literals header. +information to know the decompressed and compressed sizes of all four streams _combined_. +The decompressed size of each stream is equal to `(Regenerated_Size+3)/4`, +except for the last stream which may be up to 3 bytes smaller, +to reach a total decompressed size as specified in `Regenerated_Size`. -The compressed size of each stream is provided explicitly: the first 6 bytes of the compressed -data consist of three 2-byte little endian fields, describing the compressed sizes -of the first three streams. -The last streams size is computed from the total compressed size and the size of the other -three streams. +The compressed size of each stream is provided explicitly: +the first 6 bytes of the compressed data consist of three 2-byte __little-endian__ fields, +describing the compressed sizes of the first three streams. +`Stream4_Size` is computed from total `Total_Streams_Size` minus sizes of other streams. -`stream4CSize = totalCSize - 6 - stream1CSize - stream2CSize - stream3CSize`. +`Stream4_Size = Total_Streams_Size - 6 - Stream1_Size - Stream2_Size - Stream3_Size`. -Note: remember that totalCSize may be smaller than the `Compressed_Size` found in the literals -block header as `Compressed_Size` also contains the size of the Huffman Tree description if it -is present. +Note: remember that `Total_Streams_Size` can be smaller than `Compressed_Size` in header, +because `Compressed_Size` also contains `Huffman_Tree_Description_Size` when it is present. Each of these 4 bitstreams is then decoded independently as a Huffman-Coded stream, as described at [Huffman-Coded Streams](#huffman-coded-streams) + Sequences Section ----------------- A compressed block is a succession of _sequences_ . A sequence is a literal copy command, followed by a match copy command. A literal copy command specifies a length. -It is the number of bytes to be copied (or extracted) from the literal section. +It is the number of bytes to be copied (or extracted) from the Literals Section. A match copy command specifies an offset and a length. When all _sequences_ are decoded, -if there is are any literals left in the _literal section_, +if there are literals left in the _literal section_, these bytes are added at the end of the block. This is described in more detail in [Sequence Execution](#sequence-execution) @@ -557,7 +570,7 @@ followed by the bitstream. | -------------------------- | ------------------------- | ---------------- | ---------------------- | --------- | To decode the `Sequences_Section`, it's required to know its size. -This size is deduced from `blockSize - literalSectionSize`. +This size is deduced from `Block_Size - Literals_Section_Size`. #### `Sequences_Section_Header` @@ -572,7 +585,7 @@ This is a variable size field using between 1 and 3 bytes. Let's call its first byte `byte0`. - `if (byte0 == 0)` : there are no sequences. The sequence section stops there. - Regenerated content is defined entirely by literals section. + Decompressed content is defined entirely as Literals Section content. - `if (byte0 < 128)` : `Number_of_Sequences = byte0` . Uses 1 byte. - `if (byte0 < 255)` : `Number_of_Sequences = ((byte0-128) << 8) + byte1` . Uses 2 bytes. - `if (byte0 == 255)`: `Number_of_Sequences = byte1 + (byte2<<8) + 0x7F00` . Uses 3 bytes. @@ -581,14 +594,14 @@ __Symbol compression modes__ This is a single byte, defining the compression mode of each symbol type. -|Bit number| 7-6 | 5-4 | 3-2 | 1-0 | +|Bit number| 7-6 | 5-4 | 3-2 | 1-0 | | -------- | ----------------------- | -------------- | -------------------- | ---------- | |Field name| `Literals_Lengths_Mode` | `Offsets_Mode` | `Match_Lengths_Mode` | `Reserved` | The last field, `Reserved`, must be all-zeroes. `Literals_Lengths_Mode`, `Offsets_Mode` and `Match_Lengths_Mode` define the `Compression_Mode` of -literals lengths, offsets, and match lengths respectively. +literals lengths, offsets, and match lengths symbols respectively. They follow the same enumeration : @@ -598,17 +611,17 @@ They follow the same enumeration : - `Predefined_Mode` : A predefined FSE distribution table is used, defined in [default distributions](#default-distributions). - The table takes no space in the compressed data. + No distribution table will be present. - `RLE_Mode` : The table description consists of a single byte. - This code will be repeated for every sequence. + This code will be repeated for all sequences. - `Repeat_Mode` : The table used in the previous compressed block will be used again. No distribution table will be present. - Note: this includes RLE mode, so if repeat_mode follows rle_mode the same symbol will be repeated. + Note: this includes RLE mode, so if `Repeat_Mode` follows `RLE_Mode`, the same symbol will be repeated. If this mode is used without any previous sequence table in the frame (or [dictionary](#dictionary-format)) to repeat, this should be treated as corruption. - `FSE_Compressed_Mode` : standard FSE compression. A distribution table will be present. - The format of this distribution table is described in (FSE Table Description)[#fse-table-description]. + The format of this distribution table is described in [FSE Table Description](#fse-table-description). Note that the maximum allowed accuracy log for literals length and match length tables is 9, and the maximum accuracy log for the offsets table is 8. @@ -625,7 +638,7 @@ Literals length codes are values ranging from `0` to `35` included. They define lengths from 0 to 131071 bytes. The literals length is equal to the decoded `Baseline` plus the result of reading `Number_of_Bits` bits from the bitstream, -as a little-endian value. +as a __little-endian__ value. | `Literals_Length_Code` | 0-15 | | ---------------------- | ---------------------- | @@ -654,7 +667,7 @@ Match length codes are values ranging from `0` to `52` included. They define lengths from 3 to 131074 bytes. The match length is equal to the decoded `Baseline` plus the result of reading `Number_of_Bits` bits from the bitstream, -as a little-endian value. +as a __little-endian__ value. | `Match_Length_Code` | 0-31 | | ------------------- | ----------------------- | @@ -685,7 +698,7 @@ Recommendation is to support at least up to `22`. For information, at the time of this writing. the reference decoder supports a maximum `N` value of `28` in 64-bits mode. -An offset code is also the number of additional bits to read in little-endian fashion, +An offset code is also the number of additional bits to read in __little-endian__ fashion, and can be translated into an `Offset_Value` using the following formulas : ``` @@ -720,8 +733,8 @@ begins. FSE decoding requires a 'state' to be carried from symbol to symbol. For more explanation on FSE decoding, see the [FSE section](#fse). -For sequence decoding, a separate state must be kept track of for each of -literal lengths, offsets, and match lengths. +For sequence decoding, a separate state keeps track of each +literal lengths, offsets, and match lengths symbols. Some FSE primitives are also used. For more details on the operation of these primitives, see the [FSE section](#fse). @@ -753,8 +766,7 @@ See the [description of the codes] for how to determine these values. [description of the codes]: #the-codes-for-literals-lengths-match-lengths-and-offsets Decoding starts by reading the `Number_of_Bits` required to decode `Offset`. -It then does the same for `Match_Length`, -and then for `Literals_Length`. +It then does the same for `Match_Length`, and then for `Literals_Length`. This sequence is then used for [sequence execution](#sequence-execution). If it is not the last sequence in the block, @@ -807,6 +819,7 @@ short offsetCodes_defaultDistribution[29] = 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1 }; ``` + Sequence Execution ------------------ Once literals and sequences have been decoded, @@ -826,7 +839,8 @@ in this case. The offset is defined as from the current position, so an offset of 6 and a match length of 3 means that 3 bytes should be copied from 6 bytes back. -Note that all offsets must be at most equal to the window size defined by the frame header. +Note that all offsets leading to previously decoded data +must be smaller than `Window_Size` defined in `Frame_Header_Descriptor`. #### Repeat offsets As seen in [Sequence Execution](#sequence-execution), @@ -842,11 +856,10 @@ so an `offset_value` of 1 means `Repeated_Offset2`, an `offset_value` of 2 means `Repeated_Offset3`, and an `offset_value` of 3 means `Repeated_Offset1 - 1_byte`. -In the first block, the offset history is populated with the following values : 1, 4 and 8 (in order). +For the first block, the starting offset history is populated with the following values : 1, 4 and 8 (in order). -Then each block gets its starting offset history from the ending values of the most recent compressed block. -Note that non-compressed blocks are skipped, -they do not contribute to offset history. +Then each block gets its starting offset history from the ending values of the most recent `Compressed_Block`. +Note that blocks which are not `Compressed_Block` are skipped, they do not contribute to offset history. [Offset Codes]: #offset-codes @@ -859,6 +872,7 @@ This means that when `Repeated_Offset1` (most recent) is used, history is unmodi When `Repeated_Offset2` is used, it's swapped with `Repeated_Offset1`. If any other offset is used, it becomes `Repeated_Offset1` and the rest are shift back by one. + Skippable Frames ---------------- @@ -878,7 +892,7 @@ Skippable frames defined in this specification are compatible with [LZ4] ones. __`Magic_Number`__ -4 Bytes, little-endian format. +4 Bytes, __little-endian__ format. Value : 0x184D2A5?, which means any value from 0x184D2A50 to 0x184D2A5F. All 16 values are valid to identify a skippable frame. @@ -886,13 +900,14 @@ __`Frame_Size`__ This is the size, in bytes, of the following `User_Data` (without including the magic number nor the size field itself). -This field is represented using 4 Bytes, little-endian format, unsigned 32-bits. +This field is represented using 4 Bytes, __little-endian__ format, unsigned 32-bits. This means `User_Data` can’t be bigger than (2^32-1) bytes. __`User_Data`__ The `User_Data` can be anything. Data will just be skipped by the decoder. + Entropy Encoding ---------------- Two types of entropy encoding are used by the Zstandard format: @@ -900,7 +915,7 @@ FSE, and Huffman coding. FSE --- -FSE, or FiniteStateEntropy is an entropy coding based on [ANS]. +FSE, short for Finite State Entropy, is an entropy codec based on [ANS]. FSE encoding/decoding involves a state that is carried over between symbols, so decoding must be done in the opposite direction as encoding. Therefore, all FSE bitstreams are read from end to beginning. @@ -909,15 +924,15 @@ For additional details on FSE, see [Finite State Entropy]. [Finite State Entropy]:https://github.com/Cyan4973/FiniteStateEntropy/ -FSE decoding involves a decoding table which has a power of 2 size and three elements: +FSE decoding involves a decoding table which has a power of 2 size, and contain three elements: `Symbol`, `Num_Bits`, and `Baseline`. The `log2` of the table size is its `Accuracy_Log`. The FSE state represents an index in this table. -The next symbol in the stream is the symbol indicated by the table value for that state. -To obtain the next state value, -the decoder should consume `Num_Bits` bits from the stream as a little endian value and add it to baseline. -To obtain the initial state value, consume `Accuracy_Log` bits from the stream as a little endian value. +To obtain the initial state value, consume `Accuracy_Log` bits from the stream as a __little-endian__ value. +The next symbol in the stream is the `Symbol` indicated in the table for that state. +To obtain the next state value, +the decoder should consume `Num_Bits` bits from the stream as a __little-endian__ value and add it to `Baseline`. [ANS]: https://en.wikipedia.org/wiki/Asymmetric_Numeral_Systems @@ -929,7 +944,7 @@ An FSE distribution table describes the probabilities of all symbols from `0` to the last present one (included) on a normalized scale of `1 << Accuracy_Log` . -It's a bitstream which is read forward, in little-endian fashion. +It's a bitstream which is read forward, in __little-endian__ fashion. It's not necessary to know its exact size, since it will be discovered and reported by the decoding process. @@ -1064,7 +1079,7 @@ Huffman Coding -------------- Zstandard Huffman-coded streams are read backwards, similar to the FSE bitstreams. -Therefore, to find the start of the bitstream it is therefore necessary to +Therefore, to find the start of the bitstream, it is therefore to know the offset of the last byte of the Huffman-coded stream. After writing the last bit containing information, the compressor @@ -1077,7 +1092,7 @@ byte to read. The decompressor needs to skip 0-7 initial `0`-bits and the first `1`-bit it occurs. Afterwards, the useful part of the bitstream begins. -The bitstream contains Huffman-coded symbols in little-endian order, +The bitstream contains Huffman-coded symbols in __little-endian__ order, with the codes defined by the method below. ### Huffman Tree Description @@ -1182,14 +1197,14 @@ The Huffman header compression uses 2 states, which share the same FSE distribution table. The first state (`State1`) encodes the even indexed symbols, and the second (`State2`) encodes the odd indexes. -State1 is initialized first, and then State2, and they take turns decoding -a single symbol and updating their state. +`State1` is initialized first, and then `State2`, and they take turns +decoding a single symbol and updating their state. For more details on these FSE operations, see the [FSE section](#fse). The number of symbols to decode is determined by tracking bitStream overflow condition: If updating state after decoding a symbol would require more bits than -remain in the stream, it is assumed the extra bits are 0. Then, +remain in the stream, it is assumed that extra bits are 0. Then, the symbols for each of the final states are decoded and the process is complete. ##### Conversion from weights to Huffman prefix codes @@ -1245,7 +1260,7 @@ it would be encoded as: |Encoding|`0000`|`0001`|`01`|`1`| `10000` | Starting from the end, -it's possible to read the bitstream in a little-endian fashion, +it's possible to read the bitstream in a __little-endian__ fashion, keeping track of already used bits. Since the bitstream is encoded in reverse order, by starting at the end the symbols can be read in forward order. @@ -1258,13 +1273,14 @@ If a bitstream is not entirely and exactly consumed, hence reaching exactly its beginning position with _all_ bits consumed, the decoding process is considered faulty. + Dictionary Format ----------------- -Zstandard is compatible with "raw content" dictionaries, free of any format restriction, -except that they must be at least 8 bytes. -These dictionaries function as if they were just the `Content` block of a formatted -dictionary. +Zstandard is compatible with "raw content" dictionaries, +free of any format restriction, except that they must be at least 8 bytes. +These dictionaries function as if they were just the `Content` part +of a formatted dictionary. But dictionaries created by `zstd --train` follow a format, described here. @@ -1274,9 +1290,9 @@ __Pre-requisites__ : a dictionary has a size, | `Magic_Number` | `Dictionary_ID` | `Entropy_Tables` | `Content` | | -------------- | --------------- | ---------------- | --------- | -__`Magic_Number`__ : 4 bytes ID, value 0xEC30A437, little-endian format +__`Magic_Number`__ : 4 bytes ID, value 0xEC30A437, __little-endian__ format -__`Dictionary_ID`__ : 4 bytes, stored in little-endian format. +__`Dictionary_ID`__ : 4 bytes, stored in __little-endian__ format. `Dictionary_ID` can be any value, except 0 (which means no `Dictionary_ID`). It's used by decoders to check if they use the correct dictionary. @@ -1284,9 +1300,9 @@ _Reserved ranges :_ If the frame is going to be distributed in a private environment, any `Dictionary_ID` can be used. However, for public distribution of compressed frames, - the following ranges are reserved for future use and should not be used : + the following ranges are reserved and shall not be used : - - low range : 1 - 32767 + - low range : <= 32767 - high range : >= (2^31) __`Entropy_Tables`__ : following the same format as the tables in compressed blocks. @@ -1298,26 +1314,30 @@ __`Entropy_Tables`__ : following the same format as the tables in compressed blo These tables populate the Repeat Stats literals mode and Repeat distribution mode for sequence decoding. It's finally followed by 3 offset values, populating recent offsets (instead of using `{1,4,8}`), - stored in order, 4-bytes little-endian each, for a total of 12 bytes. + stored in order, 4-bytes __little-endian__ each, for a total of 12 bytes. Each recent offset must have a value < dictionary size. __`Content`__ : The rest of the dictionary is its content. The content act as a "past" in front of data to compress or decompress, so it can be referenced in sequence commands. As long as the amount of data decoded from this frame is less than or - equal to the window-size, sequence commands may specify offsets longer - than the lenght of total decoded output so far to reference back to the - dictionary. After the total output has surpassed the window size however, + equal to `Window_Size`, sequence commands may specify offsets longer + than the total length of decoded output so far to reference back to the + dictionary. After the total output has surpassed `Window_Size` however, this is no longer allowed and the dictionary is no longer accessible. [compressed blocks]: #the-format-of-compressed_block + + Appendix A - Decoding tables for predefined codes ------------------------------------------------- -This appendix contains FSE decoding tables for the predefined literal length, match length, and offset -codes. The tables have been constructed using the algorithm as given above in the -"from normalized distribution to decoding tables" chapter. The tables here can be used as examples -to crosscheck that an implementation implements the decoding table generation algorithm correctly. +This appendix contains FSE decoding tables +for the predefined literal length, match length, and offset codes. +The tables have been constructed using the algorithm as given above in chapter +"from normalized distribution to decoding tables". +The tables here can be used as examples +to crosscheck that an implementation build its decoding tables correctly. #### Literal Length Code: @@ -1496,6 +1516,7 @@ to crosscheck that an implementation implements the decoding table generation al Version changes --------------- +- 0.2.5 : minor typos and clarifications - 0.2.4 : section restructuring, by Sean Purcell - 0.2.3 : clarified several details, by Sean Purcell - 0.2.2 : added predefined codes, by Johannes Rudolph diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html index 204f56ea5f21..2e77e7742f60 100644 --- a/doc/zstd_manual.html +++ b/doc/zstd_manual.html @@ -1,10 +1,10 @@ <html> <head> <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"> -<title>zstd 1.1.4 Manual</title> +<title>zstd 1.2.0 Manual</title> </head> <body> -<h1>zstd 1.1.4 Manual</h1> +<h1>zstd 1.2.0 Manual</h1> <hr> <a name="Contents"></a><h2>Contents</h2> <ol> @@ -57,46 +57,46 @@ <pre><b>size_t ZSTD_compress( void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel); -</b><p> Compresses `src` content as a single zstd compressed frame into already allocated `dst`. - Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. - @return : compressed size written into `dst` (<= `dstCapacity), - or an error code if it fails (which can be tested using ZSTD_isError()). +</b><p> Compresses `src` content as a single zstd compressed frame into already allocated `dst`. + Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. + @return : compressed size written into `dst` (<= `dstCapacity), + or an error code if it fails (which can be tested using ZSTD_isError()). </p></pre><BR> <pre><b>size_t ZSTD_decompress( void* dst, size_t dstCapacity, const void* src, size_t compressedSize); -</b><p> `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames. - `dstCapacity` is an upper bound of originalSize. - If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data. - @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), - or an errorCode if it fails (which can be tested using ZSTD_isError()). +</b><p> `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames. + `dstCapacity` is an upper bound of originalSize. + If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data. + @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), + or an errorCode if it fails (which can be tested using ZSTD_isError()). </p></pre><BR> <pre><b>unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize); -</b><p> NOTE: This function is planned to be obsolete, in favour of ZSTD_getFrameContentSize. - ZSTD_getFrameContentSize functions the same way, returning the decompressed size of a single - frame, but distinguishes empty frames from frames with an unknown size, or errors. - - Additionally, ZSTD_findDecompressedSize can be used instead. It can handle multiple - concatenated frames in one buffer, and so is more general. - As a result however, it requires more computation and entire frames to be passed to it, - as opposed to ZSTD_getFrameContentSize which requires only a single frame's header. - - 'src' is the start of a zstd compressed frame. - @return : content size to be decompressed, as a 64-bits value _if known_, 0 otherwise. - note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode. - When `return==0`, data to decompress could be any size. - In which case, it's necessary to use streaming mode to decompress data. - Optionally, application can still use ZSTD_decompress() while relying on implied limits. - (For example, data may be necessarily cut into blocks <= 16 KB). - note 2 : decompressed size is always present when compression is done with ZSTD_compress() - note 3 : decompressed size can be very large (64-bits value), - potentially larger than what local system can handle as a single memory segment. - In which case, it's necessary to use streaming mode to decompress data. - note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified. - Always ensure result fits within application's authorized limits. - Each application can set its own limits. - note 5 : when `return==0`, if precise failure cause is needed, use ZSTD_getFrameParams() to know more. +</b><p> NOTE: This function is planned to be obsolete, in favour of ZSTD_getFrameContentSize. + ZSTD_getFrameContentSize functions the same way, returning the decompressed size of a single + frame, but distinguishes empty frames from frames with an unknown size, or errors. + + Additionally, ZSTD_findDecompressedSize can be used instead. It can handle multiple + concatenated frames in one buffer, and so is more general. + As a result however, it requires more computation and entire frames to be passed to it, + as opposed to ZSTD_getFrameContentSize which requires only a single frame's header. + + 'src' is the start of a zstd compressed frame. + @return : content size to be decompressed, as a 64-bits value _if known_, 0 otherwise. + note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode. + When `return==0`, data to decompress could be any size. + In which case, it's necessary to use streaming mode to decompress data. + Optionally, application can still use ZSTD_decompress() while relying on implied limits. + (For example, data may be necessarily cut into blocks <= 16 KB). + note 2 : decompressed size is always present when compression is done with ZSTD_compress() + note 3 : decompressed size can be very large (64-bits value), + potentially larger than what local system can handle as a single memory segment. + In which case, it's necessary to use streaming mode to decompress data. + note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified. + Always ensure result fits within application's authorized limits. + Each application can set its own limits. + note 5 : when `return==0`, if precise failure cause is needed, use ZSTD_getFrameParams() to know more. </p></pre><BR> <h3>Helper functions</h3><pre></pre><b><pre>int ZSTD_maxCLevel(void); </b>/*!< maximum compression level available */<b> @@ -106,28 +106,28 @@ const char* ZSTD_getErrorName(size_t code); </b>/*!< provides readable strin </pre></b><BR> <a name="Chapter4"></a><h2>Explicit memory management</h2><pre></pre> -<h3>Compression context</h3><pre> When compressing many times, - it is recommended to allocate a context just once, and re-use it for each successive compression operation. - This will make workload friendlier for system's memory. - Use one context per thread for parallel execution in multi-threaded environments. +<h3>Compression context</h3><pre> When compressing many times, + it is recommended to allocate a context just once, and re-use it for each successive compression operation. + This will make workload friendlier for system's memory. + Use one context per thread for parallel execution in multi-threaded environments. </pre><b><pre>typedef struct ZSTD_CCtx_s ZSTD_CCtx; ZSTD_CCtx* ZSTD_createCCtx(void); size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); </pre></b><BR> <pre><b>size_t ZSTD_compressCCtx(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel); -</b><p> Same as ZSTD_compress(), requires an allocated ZSTD_CCtx (see ZSTD_createCCtx()). +</b><p> Same as ZSTD_compress(), requires an allocated ZSTD_CCtx (see ZSTD_createCCtx()). </p></pre><BR> -<h3>Decompression context</h3><pre> When decompressing many times, - it is recommended to allocate a context just once, and re-use it for each successive compression operation. - This will make workload friendlier for system's memory. - Use one context per thread for parallel execution in multi-threaded environments. +<h3>Decompression context</h3><pre> When decompressing many times, + it is recommended to allocate a context just once, and re-use it for each successive compression operation. + This will make workload friendlier for system's memory. + Use one context per thread for parallel execution in multi-threaded environments. </pre><b><pre>typedef struct ZSTD_DCtx_s ZSTD_DCtx; ZSTD_DCtx* ZSTD_createDCtx(void); size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); </pre></b><BR> <pre><b>size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -</b><p> Same as ZSTD_decompress(), requires an allocated ZSTD_DCtx (see ZSTD_createDCtx()). +</b><p> Same as ZSTD_decompress(), requires an allocated ZSTD_DCtx (see ZSTD_createDCtx()). </p></pre><BR> <a name="Chapter5"></a><h2>Simple dictionary API</h2><pre></pre> @@ -169,9 +169,10 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); void* dst, size_t dstCapacity, const void* src, size_t srcSize, const ZSTD_CDict* cdict); -</b><p> Compression using a digested Dictionary. - Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times. - Note that compression level is decided during dictionary creation. +</b><p> Compression using a digested Dictionary. + Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times. + Note that compression level is decided during dictionary creation. + Frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) </p></pre><BR> <pre><b>ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize); @@ -399,7 +400,7 @@ typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; v </p></pre><BR> <pre><b>ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, unsigned byReference, - ZSTD_parameters params, ZSTD_customMem customMem); + ZSTD_compressionParameters cParams, ZSTD_customMem customMem); </b><p> Create a ZSTD_CDict using external alloc and free, and customized compression parameters </p></pre><BR> @@ -426,12 +427,19 @@ typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; v both values are optional, select `0` if unknown. </p></pre><BR> -<pre><b>size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize, - ZSTD_parameters params); -</b><p> Same as ZSTD_compress_usingDict(), with fine-tune control of each compression parameter +<pre><b>size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params); +</b><p> Same as ZSTD_compress_usingDict(), with fine-tune control over each compression parameter +</p></pre><BR> + +<pre><b>size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, ZSTD_frameParameters fParams); +</b><p> Same as ZSTD_compress_usingCDict(), with fine-tune control over frame parameters </p></pre><BR> <a name="Chapter15"></a><h2>Advanced decompression functions</h2><pre></pre> @@ -491,20 +499,29 @@ typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; v Note : this use case also happens when using a non-conformant dictionary. - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). - This is not a Zstandard frame. - When identifying the exact failure cause, it's possible to used ZSTD_getFrameParams(), which will provide a more precise error code. + When identifying the exact failure cause, it's possible to use ZSTD_getFrameParams(), which will provide a more precise error code. </p></pre><BR> <a name="Chapter16"></a><h2>Advanced streaming functions</h2><pre></pre> <h3>Advanced Streaming compression functions</h3><pre></pre><b><pre>ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem); +size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs); </b>/**< size of CStream is variable, depending primarily on compression level */<b> size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize); </b>/**< pledgedSrcSize must be correct, a size of 0 means unknown. for a frame size of 0 use initCStream_advanced */<b> size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); </b>/**< note: a dict will not be used if dict == NULL or dictSize < 8 */<b> size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); </b>/**< pledgedSrcSize is optional and can be 0 (meaning unknown). note: if the contentSizeFlag is set, pledgedSrcSize == 0 means the source size is actually 0 */<b> size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); </b>/**< note : cdict will just be referenced, and must outlive compression session */<b> -size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); </b>/**< re-use compression parameters from previous init; skip dictionary loading stage; zcs must be init at least once before. note: pledgedSrcSize must be correct, a size of 0 means unknown. for a frame size of 0 use initCStream_advanced */<b> -size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs); +size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize, ZSTD_frameParameters fParams); </b>/**< same as ZSTD_initCStream_usingCDict(), with control over frame parameters */<b> </pre></b><BR> +<pre><b>size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); +</b><p> start a new compression job, using same parameters from previous job. + This is typically useful to skip dictionary loading stage, since it will re-use it in-place.. + Note that zcs must be init at least once before using ZSTD_resetCStream(). + pledgedSrcSize==0 means "srcSize unknown". + If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end. + @return : 0, or an error code (which can be tested using ZSTD_isError()) +</p></pre><BR> + <h3>Advanced Streaming decompression functions</h3><pre></pre><b><pre>typedef enum { DStream_p_maxWindowSize } ZSTD_DStreamParameter_e; ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem); size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); </b>/**< note: a dict will not be used if dict == NULL or dictSize < 8 */<b> @@ -552,10 +569,9 @@ size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds); <h3>Buffer-less streaming compression functions</h3><pre></pre><b><pre>size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); </b>/**< pledgedSrcSize is optional and can be 0 (meaning unknown). note: if the contentSizeFlag is set, pledgedSrcSize == 0 means the source size is actually 0 */<b> +size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); </b>/**< note: fails if cdict==NULL */<b> +size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize); </b>/* compression parameters are already set within cdict. pledgedSrcSize=0 means null-size */<b> size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); </b>/**< note: if pledgedSrcSize can be 0, indicating unknown size. if it is non-zero, it must be accurate. for 0 size frames, use compressBegin_advanced */<b> -size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize); </b>/**< note: if pledgedSrcSize can be 0, indicating unknown size. if it is non-zero, it must be accurate. for 0 size frames, use compressBegin_advanced */<b> -size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); </pre></b><BR> <a name="Chapter19"></a><h2>Buffer-less streaming decompression (synchronous mode)</h2><pre> A ZSTD_DCtx object is required to track streaming operations. @@ -640,19 +656,20 @@ ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); - Compressing and decompressing require a context structure + Use ZSTD_createCCtx() and ZSTD_createDCtx() - It is necessary to init context before starting - + compression : ZSTD_compressBegin() - + decompression : ZSTD_decompressBegin() - + variants _usingDict() are also allowed - + copyCCtx() and copyDCtx() work too - - Block size is limited, it must be <= ZSTD_getBlockSizeMax() - + If you need to compress more, cut data into multiple blocks - + Consider using the regular ZSTD_compress() instead, as frame metadata costs become negligible when source size is large. + + compression : any ZSTD_compressBegin*() variant, including with dictionary + + decompression : any ZSTD_decompressBegin*() variant, including with dictionary + + copyCCtx() and copyDCtx() can be used too + - Block size is limited, it must be <= ZSTD_getBlockSizeMax() <= ZSTD_BLOCKSIZE_ABSOLUTEMAX + + If input is larger than a block size, it's necessary to split input data into multiple blocks + + For inputs larger than a single block size, consider using the regular ZSTD_compress() instead. + Frame metadata is not that costly, and quickly becomes negligible as source size grows larger. - When a block is considered not compressible enough, ZSTD_compressBlock() result will be zero. In which case, nothing is produced into `dst`. + User must test for such outcome and deal directly with uncompressed data + ZSTD_decompressBlock() doesn't accept uncompressed data as input !!! - + In case of multiple successive blocks, decoder must be informed of uncompressed block existence to follow proper history. - Use ZSTD_insertBlock() in such a case. + + In case of multiple successive blocks, should some of them be uncompressed, + decoder must be informed of their existence in order to follow proper history. + Use ZSTD_insertBlock() for such a case. <BR></pre> <h3>Raw zstd block functions</h3><pre></pre><b><pre>size_t ZSTD_getBlockSizeMax(ZSTD_CCtx* cctx); |
