summaryrefslogtreecommitdiff
path: root/doc
diff options
context:
space:
mode:
authorBaptiste Daroussin <bapt@FreeBSD.org>2017-05-06 10:17:59 +0000
committerBaptiste Daroussin <bapt@FreeBSD.org>2017-05-06 10:17:59 +0000
commitffcbc2d7ba03067492045e4cbead519a3b3c27ef (patch)
treedf436f4253158a7d5a4875e54cd7d273dd5334a6 /doc
parentab984b3e51d32af796fe89e130f57bf58b8a14b2 (diff)
Notes
Diffstat (limited to 'doc')
-rw-r--r--doc/educational_decoder/zstd_decompress.c243
-rw-r--r--doc/images/Cspeed4.pngbin35361 -> 71276 bytes
-rw-r--r--doc/images/Dspeed4.pngbin8984 -> 24692 bytes
-rw-r--r--doc/images/dict-cr.pngbin23323 -> 90047 bytes
-rw-r--r--doc/images/dict-cs.pngbin25052 -> 93837 bytes
-rw-r--r--doc/images/dict-ds.pngbin27053 -> 89590 bytes
-rw-r--r--doc/zstd_compression_format.md387
-rw-r--r--doc/zstd_manual.html157
8 files changed, 419 insertions, 368 deletions
diff --git a/doc/educational_decoder/zstd_decompress.c b/doc/educational_decoder/zstd_decompress.c
index ae4eaa81c6ae..7c8d8114d401 100644
--- a/doc/educational_decoder/zstd_decompress.c
+++ b/doc/educational_decoder/zstd_decompress.c
@@ -27,16 +27,19 @@ size_t ZSTD_decompress_with_dict(void *const dst, const size_t dst_len,
/// Get the decompressed size of an input stream so memory can be allocated in
/// advance
+/// Returns -1 if the size can't be determined
size_t ZSTD_get_decompressed_size(const void *const src, const size_t src_len);
/******* UTILITY MACROS AND TYPES *********************************************/
-// Max block size decompressed size is 128 KB and literal blocks must be smaller
-// than that
+// Max block size decompressed size is 128 KB and literal blocks can't be
+// larger than their block
#define MAX_LITERALS_SIZE ((size_t)128 * 1024)
#define MAX(a, b) ((a) > (b) ? (a) : (b))
#define MIN(a, b) ((a) < (b) ? (a) : (b))
+/// This decoder calls exit(1) when it encounters an error, however a production
+/// library should propagate error codes
#define ERROR(s) \
do { \
fprintf(stderr, "Error: %s\n", s); \
@@ -67,29 +70,31 @@ typedef int64_t i64;
/// decompression functions.
/*** IO STREAM OPERATIONS *************/
-/// These structs are the interface for IO, and do bounds checking on all
-/// operations. They should be used opaquely to ensure safety.
-/// Output is always done byte-by-byte
+/// ostream_t/istream_t are used to wrap the pointers/length data passed into
+/// ZSTD_decompress, so that all IO operations are safely bounds checked
+/// They are written/read forward, and reads are treated as little-endian
+/// They should be used opaquely to ensure safety
typedef struct {
u8 *ptr;
size_t len;
} ostream_t;
-/// Input often reads a few bits at a time, so maintain an internal offset
typedef struct {
const u8 *ptr;
- int bit_offset;
size_t len;
+
+ // Input often reads a few bits at a time, so maintain an internal offset
+ int bit_offset;
} istream_t;
/// The following two functions are the only ones that allow the istream to be
/// non-byte aligned
/// Reads `num` bits from a bitstream, and updates the internal offset
-static inline u64 IO_read_bits(istream_t *const in, const int num);
-/// Rewinds the stream by `num` bits
-static inline void IO_rewind_bits(istream_t *const in, const int num);
+static inline u64 IO_read_bits(istream_t *const in, const int num_bits);
+/// Backs-up the stream by `num` bits so they can be read again
+static inline void IO_rewind_bits(istream_t *const in, const int num_bits);
/// If the remaining bits in a byte will be unused, advance to the end of the
/// byte
static inline void IO_align_stream(istream_t *const in);
@@ -101,30 +106,31 @@ static inline void IO_write_byte(ostream_t *const out, u8 symb);
/// be byte aligned.
static inline size_t IO_istream_len(const istream_t *const in);
-/// Returns a pointer where `len` bytes can be read, and advances the internal
-/// state. The stream must be byte aligned.
+/// Advances the stream by `len` bytes, and returns a pointer to the chunk that
+/// was skipped. The stream must be byte aligned.
static inline const u8 *IO_read_bytes(istream_t *const in, size_t len);
-/// Returns a pointer where `len` bytes can be written, and advances the internal
-/// state. The stream must be byte aligned.
+/// Advances the stream by `len` bytes, and returns a pointer to the chunk that
+/// was skipped so it can be written to.
static inline u8 *IO_write_bytes(ostream_t *const out, size_t len);
/// Advance the inner state by `len` bytes. The stream must be byte aligned.
static inline void IO_advance_input(istream_t *const in, size_t len);
-/// Returns an `ostream_t` constructed from the given pointer and length
+/// Returns an `ostream_t` constructed from the given pointer and length.
static inline ostream_t IO_make_ostream(u8 *out, size_t len);
-/// Returns an `istream_t` constructed from the given pointer and length
+/// Returns an `istream_t` constructed from the given pointer and length.
static inline istream_t IO_make_istream(const u8 *in, size_t len);
-/// Returns an `istream_t` with the same base as `in`, and length `len`
-/// Then, advance `in` to account for the consumed bytes
-/// `in` must be byte aligned
+/// Returns an `istream_t` with the same base as `in`, and length `len`.
+/// Then, advance `in` to account for the consumed bytes.
+/// `in` must be byte aligned.
static inline istream_t IO_make_sub_istream(istream_t *const in, size_t len);
/*** END IO STREAM OPERATIONS *********/
/*** BITSTREAM OPERATIONS *************/
-/// Read `num` bits (up to 64) from `src + offset`, where `offset` is in bits
-static inline u64 read_bits_LE(const u8 *src, const int num,
+/// Read `num` bits (up to 64) from `src + offset`, where `offset` is in bits,
+/// and return them interpreted as a little-endian unsigned integer.
+static inline u64 read_bits_LE(const u8 *src, const int num_bits,
const size_t offset);
/// Read bits from the end of a HUF or FSE bitstream. `offset` is in bits, so
@@ -136,9 +142,8 @@ static inline u64 STREAM_read_bits(const u8 *src, const int bits,
/*** END BITSTREAM OPERATIONS *********/
/*** BIT COUNTING OPERATIONS **********/
-/// Returns `x`, where `2^x` is the largest power of 2 less than or equal to
-/// `num`, or `-1` if `num == 0`.
-static inline int log2inf(const u64 num);
+/// Returns the index of the highest set bit in `num`, or `-1` if `num == 0`
+static inline int highest_set_bit(const u64 num);
/*** END BIT COUNTING OPERATIONS ******/
/*** HUFFMAN PRIMITIVES ***************/
@@ -384,8 +389,8 @@ size_t ZSTD_decompress_with_dict(void *const dst, const size_t dst_len,
parse_dictionary(&parsed_dict, (const u8 *)dict, dict_len);
}
- istream_t in = {(const u8 *)src, 0, src_len};
- ostream_t out = {(u8 *)dst, dst_len};
+ istream_t in = IO_make_istream(src, src_len);
+ ostream_t out = IO_make_ostream(dst, dst_len);
// "A content compressed by Zstandard is transformed into a Zstandard frame.
// Multiple frames can be appended into a single file or stream. A frame is
@@ -633,6 +638,7 @@ static void frame_context_apply_dict(frame_context_t *const ctx,
FSE_copy_dtable(&ctx->of_dtable, &dict->of_dtable);
FSE_copy_dtable(&ctx->ml_dtable, &dict->ml_dtable);
+ // Copy the repeated offsets
memcpy(ctx->previous_offsets, dict->previous_offsets,
sizeof(ctx->previous_offsets));
}
@@ -668,7 +674,7 @@ static void decompress_data(frame_context_t *const ctx, ostream_t *const out,
// number of bytes to read and copy."
const u8 *const read_ptr = IO_read_bytes(in, block_len);
u8 *const write_ptr = IO_write_bytes(out, block_len);
- //
+
// Copy the raw data into the output
memcpy(write_ptr, read_ptr, block_len);
@@ -682,7 +688,7 @@ static void decompress_data(frame_context_t *const ctx, ostream_t *const out,
const u8 *const read_ptr = IO_read_bytes(in, 1);
u8 *const write_ptr = IO_write_bytes(out, block_len);
- // Copy `block_len` copies of `streams->src[0]` to the output
+ // Copy `block_len` copies of `read_ptr[0]` to the output
memset(write_ptr, read_ptr[0], block_len);
ctx->current_total_output += block_len;
@@ -751,7 +757,7 @@ static size_t decode_literals_compressed(frame_context_t *const ctx,
u8 **const literals,
const int block_type,
const int size_format);
-static void decode_huf_table(istream_t *const in, HUF_dtable *const dtable);
+static void decode_huf_table(HUF_dtable *const dtable, istream_t *const in);
static void fse_decode_hufweights(ostream_t *weights, istream_t *const in,
int *const num_symbs);
@@ -894,12 +900,12 @@ static size_t decode_literals_compressed(frame_context_t *const ctx,
istream_t huf_stream = IO_make_sub_istream(in, compressed_size);
if (block_type == 2) {
- // Decode provided Huffman table
+ // Decode the provided Huffman table
// "This section is only present when Literals_Block_Type type is
// Compressed_Literals_Block (2)."
HUF_free_dtable(&ctx->literals_dtable);
- decode_huf_table(&huf_stream, &ctx->literals_dtable);
+ decode_huf_table(&ctx->literals_dtable, &huf_stream);
} else {
// If the previous Huffman table is being repeated, ensure it exists
if (!ctx->literals_dtable.symbols) {
@@ -922,13 +928,13 @@ static size_t decode_literals_compressed(frame_context_t *const ctx,
}
// Decode the Huffman table description
-static void decode_huf_table(istream_t *const in, HUF_dtable *const dtable) {
- const u8 header = IO_read_bits(in, 8);
-
+static void decode_huf_table(HUF_dtable *const dtable, istream_t *const in) {
// "All literal values from zero (included) to last present one (excluded)
// are represented by Weight with values from 0 to Max_Number_of_Bits."
// "This is a single byte value (0-255), which describes how to decode the list of weights."
+ const u8 header = IO_read_bits(in, 8);
+
u8 weights[HUF_MAX_SYMBS];
memset(weights, 0, sizeof(weights));
@@ -997,7 +1003,7 @@ typedef struct {
u16 ll_state;
u16 of_state;
u16 ml_state;
-} sequence_state_t;
+} sequence_states_t;
/// Different modes to signal to decode_seq_tables what to do
typedef enum {
@@ -1052,10 +1058,10 @@ static void decompress_sequences(frame_context_t *const ctx,
istream_t *const in,
sequence_command_t *const sequences,
const size_t num_sequences);
-static sequence_command_t decode_sequence(sequence_state_t *const state,
+static sequence_command_t decode_sequence(sequence_states_t *const state,
const u8 *const src,
i64 *const offset);
-static void decode_seq_table(istream_t *const in, FSE_dtable *const table,
+static void decode_seq_table(FSE_dtable *const table, istream_t *const in,
const seq_part_t type, const seq_mode_t mode);
static size_t decode_sequences(frame_context_t *const ctx, istream_t *in,
@@ -1131,34 +1137,33 @@ static void decompress_sequences(frame_context_t *const ctx, istream_t *in,
// Offsets
// Match Lengths"
// Update the tables we have stored in the context
- decode_seq_table(in, &ctx->ll_dtable, seq_literal_length,
+ decode_seq_table(&ctx->ll_dtable, in, seq_literal_length,
(compression_modes >> 6) & 3);
- decode_seq_table(in, &ctx->of_dtable, seq_offset,
+ decode_seq_table(&ctx->of_dtable, in, seq_offset,
(compression_modes >> 4) & 3);
- decode_seq_table(in, &ctx->ml_dtable, seq_match_length,
+ decode_seq_table(&ctx->ml_dtable, in, seq_match_length,
(compression_modes >> 2) & 3);
- // Check to make sure none of the tables are uninitialized
- if (!ctx->ll_dtable.symbols || !ctx->of_dtable.symbols ||
- !ctx->ml_dtable.symbols) {
- CORRUPTION();
- }
- sequence_state_t state;
- // Copy the context's tables into the local state
- memcpy(&state.ll_table, &ctx->ll_dtable, sizeof(FSE_dtable));
- memcpy(&state.of_table, &ctx->of_dtable, sizeof(FSE_dtable));
- memcpy(&state.ml_table, &ctx->ml_dtable, sizeof(FSE_dtable));
+ sequence_states_t states;
- size_t len = IO_istream_len(in);
+ // Initialize the decoding tables
+ {
+ states.ll_table = ctx->ll_dtable;
+ states.of_table = ctx->of_dtable;
+ states.ml_table = ctx->ml_dtable;
+ }
+
+ const size_t len = IO_istream_len(in);
const u8 *const src = IO_read_bytes(in, len);
// "After writing the last bit containing information, the compressor writes
// a single 1-bit and then fills the byte with 0-7 0 bits of padding."
- const int padding = 8 - log2inf(src[len - 1]);
- i64 offset = len * 8 - padding;
+ const int padding = 8 - highest_set_bit(src[len - 1]);
+ // The offset starts at the end because FSE streams are read backwards
+ i64 bit_offset = len * 8 - padding;
// "The bitstream starts with initial state values, each using the required
// number of bits in their respective accuracy, decoded previously from
@@ -1166,24 +1171,22 @@ static void decompress_sequences(frame_context_t *const ctx, istream_t *in,
//
// It starts by Literals_Length_State, followed by Offset_State, and finally
// Match_Length_State."
- FSE_init_state(&state.ll_table, &state.ll_state, src, &offset);
- FSE_init_state(&state.of_table, &state.of_state, src, &offset);
- FSE_init_state(&state.ml_table, &state.ml_state, src, &offset);
+ FSE_init_state(&states.ll_table, &states.ll_state, src, &bit_offset);
+ FSE_init_state(&states.of_table, &states.of_state, src, &bit_offset);
+ FSE_init_state(&states.ml_table, &states.ml_state, src, &bit_offset);
for (size_t i = 0; i < num_sequences; i++) {
// Decode sequences one by one
- sequences[i] = decode_sequence(&state, src, &offset);
+ sequences[i] = decode_sequence(&states, src, &bit_offset);
}
- if (offset != 0) {
+ if (bit_offset != 0) {
CORRUPTION();
}
-
- // Don't free tables so they can be used in the next block
}
// Decode a single sequence and update the state
-static sequence_command_t decode_sequence(sequence_state_t *const state,
+static sequence_command_t decode_sequence(sequence_states_t *const states,
const u8 *const src,
i64 *const offset) {
// "Each symbol is a code in its own context, which specifies Baseline and
@@ -1191,9 +1194,9 @@ static sequence_command_t decode_sequence(sequence_state_t *const state,
// additional bits in the same bitstream."
// Decode symbols, but don't update states
- const u8 of_code = FSE_peek_symbol(&state->of_table, state->of_state);
- const u8 ll_code = FSE_peek_symbol(&state->ll_table, state->ll_state);
- const u8 ml_code = FSE_peek_symbol(&state->ml_table, state->ml_state);
+ const u8 of_code = FSE_peek_symbol(&states->of_table, states->of_state);
+ const u8 ll_code = FSE_peek_symbol(&states->ll_table, states->ll_state);
+ const u8 ml_code = FSE_peek_symbol(&states->ml_table, states->ml_state);
// Offset doesn't need a max value as it's not decoded using a table
if (ll_code > SEQ_MAX_CODES[seq_literal_length] ||
@@ -1221,17 +1224,18 @@ static sequence_command_t decode_sequence(sequence_state_t *const state,
// then Offset_State."
// If the stream is complete don't read bits to update state
if (*offset != 0) {
- FSE_update_state(&state->ll_table, &state->ll_state, src, offset);
- FSE_update_state(&state->ml_table, &state->ml_state, src, offset);
- FSE_update_state(&state->of_table, &state->of_state, src, offset);
+ FSE_update_state(&states->ll_table, &states->ll_state, src, offset);
+ FSE_update_state(&states->ml_table, &states->ml_state, src, offset);
+ FSE_update_state(&states->of_table, &states->of_state, src, offset);
}
return seq;
}
/// Given a sequence part and table mode, decode the FSE distribution
-static void decode_seq_table(istream_t *const in, FSE_dtable *const table,
- const seq_part_t type, const seq_mode_t mode) {
+/// Errors if the mode is `seq_repeat` without a pre-existing table in `table`
+static void decode_seq_table(FSE_dtable *const table, istream_t *const in,
+ const seq_part_t type, const seq_mode_t mode) {
// Constant arrays indexed by seq_part_t
const i16 *const default_distributions[] = {SEQ_LITERAL_LENGTH_DEFAULT_DIST,
SEQ_OFFSET_DEFAULT_DIST,
@@ -1272,12 +1276,17 @@ static void decode_seq_table(istream_t *const in, FSE_dtable *const table,
// "Repeat_Mode : re-use distribution table from previous compressed
// block."
// Nothing to do here, table will be unchanged
+ if (!table->symbols) {
+ // This mode is invalid if we don't already have a table
+ CORRUPTION();
+ }
break;
default:
// Impossible, as mode is from 0-3
IMPOSSIBLE();
break;
}
+
}
/******* END SEQUENCE DECODING ************************************************/
@@ -1296,6 +1305,8 @@ static void execute_sequences(frame_context_t *const ctx, ostream_t *const out,
const sequence_command_t seq = sequences[i];
{
+ // If the sequence asks for more literals than are left, the
+ // sequence must be corrupted
if (seq.literal_length > IO_istream_len(&litstream)) {
CORRUPTION();
}
@@ -1336,7 +1347,8 @@ static void execute_sequences(frame_context_t *const ctx, ostream_t *const out,
// as per the exception listed above
offset = idx < 3 ? offset_hist[idx] : offset_hist[0] - 1;
- // If idx == 1 we don't need to modify offset_hist[2]
+ // If idx == 1 we don't need to modify offset_hist[2], since
+ // we're using the second-most recent code
if (idx > 1) {
offset_hist[2] = offset_hist[1];
}
@@ -1344,6 +1356,8 @@ static void execute_sequences(frame_context_t *const ctx, ostream_t *const out,
offset_hist[0] = offset;
}
} else {
+ // When it's not a repeat offset:
+ // "if (Offset_Value > 3) offset = Offset_Value - 3;"
offset = seq.offset - 3;
// Shift back history
@@ -1391,11 +1405,11 @@ static void execute_sequences(frame_context_t *const ctx, ostream_t *const out,
total_output += seq.match_length;
}
+ // Copy any leftover literals
{
size_t len = IO_istream_len(&litstream);
u8 *const write_ptr = IO_write_bytes(out, len);
const u8 *const read_ptr = IO_read_bytes(&litstream, len);
- // Copy any leftover literals
memcpy(write_ptr, read_ptr, len);
total_output += len;
@@ -1517,10 +1531,10 @@ static void parse_dictionary(dictionary_t *const dict, const u8 *src,
// recent offsets (instead of using {1,4,8}), stored in order, 4-bytes
// little-endian each, for a total of 12 bytes. Each recent offset must have
// a value < dictionary size."
- decode_huf_table(&in, &dict->literals_dtable);
- decode_seq_table(&in, &dict->of_dtable, seq_offset, seq_fse);
- decode_seq_table(&in, &dict->ml_dtable, seq_match_length, seq_fse);
- decode_seq_table(&in, &dict->ll_dtable, seq_literal_length, seq_fse);
+ decode_huf_table(&dict->literals_dtable, &in);
+ decode_seq_table(&dict->of_dtable, &in, seq_offset, seq_fse);
+ decode_seq_table(&dict->ml_dtable, &in, seq_match_length, seq_fse);
+ decode_seq_table(&dict->ll_dtable, &in, seq_literal_length, seq_fse);
// Read in the previous offset history
dict->previous_offsets[0] = IO_read_bits(&in, 32);
@@ -1571,20 +1585,20 @@ static void free_dictionary(dictionary_t *const dict) {
/******* IO STREAM OPERATIONS *************************************************/
#define UNALIGNED() ERROR("Attempting to operate on a non-byte aligned stream")
/// Reads `num` bits from a bitstream, and updates the internal offset
-static inline u64 IO_read_bits(istream_t *const in, const int num) {
- if (num > 64 || num <= 0) {
+static inline u64 IO_read_bits(istream_t *const in, const int num_bits) {
+ if (num_bits > 64 || num_bits <= 0) {
ERROR("Attempt to read an invalid number of bits");
}
- const size_t bytes = (num + in->bit_offset + 7) / 8;
- const size_t full_bytes = (num + in->bit_offset) / 8;
+ const size_t bytes = (num_bits + in->bit_offset + 7) / 8;
+ const size_t full_bytes = (num_bits + in->bit_offset) / 8;
if (bytes > in->len) {
INP_SIZE();
}
- const u64 result = read_bits_LE(in->ptr, num, in->bit_offset);
+ const u64 result = read_bits_LE(in->ptr, num_bits, in->bit_offset);
- in->bit_offset = (num + in->bit_offset) % 8;
+ in->bit_offset = (num_bits + in->bit_offset) % 8;
in->ptr += full_bytes;
in->len -= full_bytes;
@@ -1593,16 +1607,21 @@ static inline u64 IO_read_bits(istream_t *const in, const int num) {
/// If a non-zero number of bits have been read from the current byte, advance
/// the offset to the next byte
-static inline void IO_rewind_bits(istream_t *const in, int num) {
- if (num < 0) {
+static inline void IO_rewind_bits(istream_t *const in, int num_bits) {
+ if (num_bits < 0) {
ERROR("Attempting to rewind stream by a negative number of bits");
}
- const int new_offset = in->bit_offset - num;
- const i64 bytes = (new_offset - 7) / 8;
+ // move the offset back by `num_bits` bits
+ const int new_offset = in->bit_offset - num_bits;
+ // determine the number of whole bytes we have to rewind, rounding up to an
+ // integer number (e.g. if `new_offset == -5`, `bytes == 1`)
+ const i64 bytes = -(new_offset - 7) / 8;
- in->ptr += bytes;
- in->len -= bytes;
+ in->ptr -= bytes;
+ in->len += bytes;
+ // make sure the resulting `bit_offset` is positive, as mod in C does not
+ // convert numbers from negative to positive (e.g. -22 % 8 == -6)
in->bit_offset = ((new_offset % 8) + 8) % 8;
}
@@ -1683,33 +1702,26 @@ static inline ostream_t IO_make_ostream(u8 *out, size_t len) {
/// Returns an `istream_t` constructed from the given pointer and length
static inline istream_t IO_make_istream(const u8 *in, size_t len) {
- return (istream_t) { in, 0, len };
+ return (istream_t) { in, len, 0 };
}
/// Returns an `istream_t` with the same base as `in`, and length `len`
/// Then, advance `in` to account for the consumed bytes
/// `in` must be byte aligned
static inline istream_t IO_make_sub_istream(istream_t *const in, size_t len) {
- if (len > in->len) {
- INP_SIZE();
- }
- if (in->bit_offset != 0) {
- UNALIGNED();
- }
- const istream_t sub = { in->ptr, in->bit_offset, len };
+ // Consume `len` bytes of the parent stream
+ const u8 *const ptr = IO_read_bytes(in, len);
- in->ptr += len;
- in->len -= len;
-
- return sub;
+ // Make a substream using the pointer to those `len` bytes
+ return IO_make_istream(ptr, len);
}
/******* END IO STREAM OPERATIONS *********************************************/
/******* BITSTREAM OPERATIONS *************************************************/
/// Read `num` bits (up to 64) from `src + offset`, where `offset` is in bits
-static inline u64 read_bits_LE(const u8 *src, const int num,
+static inline u64 read_bits_LE(const u8 *src, const int num_bits,
const size_t offset) {
- if (num > 64) {
+ if (num_bits > 64) {
ERROR("Attempt to read an invalid number of bits");
}
@@ -1719,10 +1731,10 @@ static inline u64 read_bits_LE(const u8 *src, const int num,
u64 res = 0;
int shift = 0;
- int left = num;
+ int left = num_bits;
while (left > 0) {
u64 mask = left >= 8 ? 0xff : (((u64)1 << left) - 1);
- // Dead the next byte, shift it to account for the offset, and then mask
+ // Read the next byte, shift it to account for the offset, and then mask
// out the top part if we don't need all the bits
res += (((u64)*src++ >> bit_offset) & mask) << shift;
shift += 8 - bit_offset;
@@ -1761,7 +1773,7 @@ static inline u64 STREAM_read_bits(const u8 *const src, const int bits,
/******* BIT COUNTING OPERATIONS **********************************************/
/// Returns `x`, where `2^x` is the largest power of 2 less than or equal to
/// `num`, or `-1` if `num == 0`.
-static inline int log2inf(const u64 num) {
+static inline int highest_set_bit(const u64 num) {
for (int i = 63; i >= 0; i--) {
if (((u64)1 << i) <= num) {
return i;
@@ -1813,17 +1825,18 @@ static size_t HUF_decompress_1stream(const HUF_dtable *const dtable,
// final-bit-flag. Consequently, a last byte of 0 is not possible. And the
// final-bit-flag itself is not part of the useful bitstream. Hence, the
// last byte contains between 0 and 7 useful bits."
- const int padding = 8 - log2inf(src[len - 1]);
+ const int padding = 8 - highest_set_bit(src[len - 1]);
- i64 offset = len * 8 - padding;
+ // Offset starts at the end because HUF streams are read backwards
+ i64 bit_offset = len * 8 - padding;
u16 state;
- HUF_init_state(dtable, &state, src, &offset);
+ HUF_init_state(dtable, &state, src, &bit_offset);
size_t symbols_written = 0;
- while (offset > -dtable->max_bits) {
+ while (bit_offset > -dtable->max_bits) {
// Iterate over the stream, decoding one symbol at a time
- IO_write_byte(out, HUF_decode_symbol(dtable, &state, src, &offset));
+ IO_write_byte(out, HUF_decode_symbol(dtable, &state, src, &bit_offset));
symbols_written++;
}
// "The process continues up to reading the required number of symbols per
@@ -1836,7 +1849,7 @@ static size_t HUF_decompress_1stream(const HUF_dtable *const dtable,
// before the start of `src`
// Therefore `offset`, the edge to start reading new bits at, should be
// dtable->max_bits before the start of the stream
- if (offset != -dtable->max_bits) {
+ if (bit_offset != -dtable->max_bits) {
CORRUPTION();
}
@@ -1960,7 +1973,7 @@ static void HUF_init_dtable_usingweights(HUF_dtable *const table,
}
// Find the first power of 2 larger than the sum
- const int max_bits = log2inf(weight_sum) + 1;
+ const int max_bits = highest_set_bit(weight_sum) + 1;
const u64 left_over = ((u64)1 << max_bits) - weight_sum;
// If the left over isn't a power of 2, the weights are invalid
if (left_over & (left_over - 1)) {
@@ -1969,7 +1982,7 @@ static void HUF_init_dtable_usingweights(HUF_dtable *const table,
// left_over is used to find the last weight as it's not transmitted
// by inverting 2^(weight - 1) we can determine the value of last_weight
- const int last_weight = log2inf(left_over) + 1;
+ const int last_weight = highest_set_bit(left_over) + 1;
for (int i = 0; i < num_symbs; i++) {
// "Number_of_Bits = Number_of_Bits ? Max_Number_of_Bits + 1 - Weight : 0"
@@ -2063,7 +2076,7 @@ static size_t FSE_decompress_interleaved2(const FSE_dtable *const dtable,
// final-bit-flag. Consequently, a last byte of 0 is not possible. And the
// final-bit-flag itself is not part of the useful bitstream. Hence, the
// last byte contains between 0 and 7 useful bits."
- const int padding = 8 - log2inf(src[len - 1]);
+ const int padding = 8 - highest_set_bit(src[len - 1]);
i64 offset = len * 8 - padding;
u16 state1, state2;
@@ -2184,7 +2197,7 @@ static void FSE_init_dtable(FSE_dtable *const dtable,
u16 next_state_desc = state_desc[symbol]++;
// Fills in the table appropriately, next_state_desc increases by symbol
// over time, decreasing number of bits
- dtable->num_bits[i] = (u8)(accuracy_log - log2inf(next_state_desc));
+ dtable->num_bits[i] = (u8)(accuracy_log - highest_set_bit(next_state_desc));
// Baseline increases until the bit threshold is passed, at which point
// it resets to 0
dtable->new_state_base[i] =
@@ -2235,7 +2248,7 @@ static void FSE_decode_header(FSE_dtable *const dtable, istream_t *const in,
int symb = 0;
while (remaining > 0 && symb < FSE_MAX_SYMBS) {
// Log of the number of possible values we could read
- int bits = log2inf(remaining + 1) + 1;
+ int bits = highest_set_bit(remaining + 1) + 1;
u16 val = IO_read_bits(in, bits);
diff --git a/doc/images/Cspeed4.png b/doc/images/Cspeed4.png
index f0ca0ffba9c4..318204c00e96 100644
--- a/doc/images/Cspeed4.png
+++ b/doc/images/Cspeed4.png
Binary files differ
diff --git a/doc/images/Dspeed4.png b/doc/images/Dspeed4.png
index eba485d0d1ed..b7baef1ff3f7 100644
--- a/doc/images/Dspeed4.png
+++ b/doc/images/Dspeed4.png
Binary files differ
diff --git a/doc/images/dict-cr.png b/doc/images/dict-cr.png
index f555a46c7b99..f3a9ce2bdda6 100644
--- a/doc/images/dict-cr.png
+++ b/doc/images/dict-cr.png
Binary files differ
diff --git a/doc/images/dict-cs.png b/doc/images/dict-cs.png
index ccc02b0d1713..55e5ef518fed 100644
--- a/doc/images/dict-cs.png
+++ b/doc/images/dict-cs.png
Binary files differ
diff --git a/doc/images/dict-ds.png b/doc/images/dict-ds.png
index 858cad685509..1153f1b95fd5 100644
--- a/doc/images/dict-ds.png
+++ b/doc/images/dict-ds.png
Binary files differ
diff --git a/doc/zstd_compression_format.md b/doc/zstd_compression_format.md
index d4b46548a3d2..1f212fea2305 100644
--- a/doc/zstd_compression_format.md
+++ b/doc/zstd_compression_format.md
@@ -16,7 +16,8 @@ Distribution of this document is unlimited.
### Version
-0.2.4 (17/02/17)
+0.2.5 (31/03/17)
+
Introduction
------------
@@ -109,7 +110,7 @@ The structure of a single Zstandard frame is following:
__`Magic_Number`__
-4 Bytes, little-endian format.
+4 Bytes, __little-endian__ format.
Value : 0xFD2FB528
__`Frame_Header`__
@@ -127,7 +128,7 @@ An optional 32-bit checksum, only present if `Content_Checksum_flag` is set.
The content checksum is the result
of [xxh64() hash function](http://www.xxhash.org)
digesting the original (decoded) data as input, and a seed of zero.
-The low 4 bytes of the checksum are stored in little endian format.
+The low 4 bytes of the checksum are stored in __little-endian__ format.
### `Frame_Header`
@@ -154,41 +155,42 @@ Decoding this byte is enough to tell the size of `Frame_Header`.
| 2 | `Content_Checksum_flag` |
| 1-0 | `Dictionary_ID_flag` |
-In this table, bit 7 the is highest bit, while bit 0 the is lowest.
+In this table, bit 7 is the highest bit, while bit 0 is the lowest one.
__`Frame_Content_Size_flag`__
This is a 2-bits flag (`= Frame_Header_Descriptor >> 6`),
-specifying if decompressed data size is provided within the header.
-The `Flag_Value` can be converted into `Field_Size`,
+specifying if `Frame_Content_Size` (the decompressed data size)
+is provided within the header.
+`Flag_Value` provides `FCS_Field_Size`,
which is the number of bytes used by `Frame_Content_Size`
according to the following table:
-|`Flag_Value`| 0 | 1 | 2 | 3 |
-| ---------- | ------ | --- | --- | --- |
-|`Field_Size`| 0 or 1 | 2 | 4 | 8 |
+| `Flag_Value` | 0 | 1 | 2 | 3 |
+| -------------- | ------ | --- | --- | --- |
+|`FCS_Field_Size`| 0 or 1 | 2 | 4 | 8 |
-When `Flag_Value` is `0`, `Field_Size` depends on `Single_Segment_flag` :
+When `Flag_Value` is `0`, `FCS_Field_Size` depends on `Single_Segment_flag` :
if `Single_Segment_flag` is set, `Field_Size` is 1.
-Otherwise, `Field_Size` is 0 (content size not provided).
+Otherwise, `Field_Size` is 0 : `Frame_Content_Size` is not provided.
__`Single_Segment_flag`__
If this flag is set,
data must be regenerated within a single continuous memory segment.
-In this case, `Frame_Content_Size` is necessarily present,
-but `Window_Descriptor` byte is skipped.
+In this case, `Window_Descriptor` byte is skipped,
+but `Frame_Content_Size` is necessarily present.
As a consequence, the decoder must allocate a memory segment
of size equal or bigger than `Frame_Content_Size`.
In order to preserve the decoder from unreasonable memory requirements,
-a decoder can reject a compressed frame
+a decoder is allowed to reject a compressed frame
which requests a memory size beyond decoder's authorized range.
For broader compatibility, decoders are recommended to support
memory sizes of at least 8 MB.
-This is just a recommendation,
+This is only a recommendation,
each decoder is free to support higher or lower limits,
depending on local limitations.
@@ -224,37 +226,38 @@ It also specifies the size of this field as `Field_Size`.
#### `Window_Descriptor`
-Provides guarantees on maximum back-reference distance
-that will be used within compressed data.
+Provides guarantees on minimum memory buffer required to decompress a frame.
This information is important for decoders to allocate enough memory.
-The `Window_Descriptor` byte is optional. It is absent when `Single_Segment_flag` is set.
-In this case, the maximum back-reference distance is the content size itself,
-which can be any value from 1 to 2^64-1 bytes (16 EB).
+The `Window_Descriptor` byte is optional.
+When `Single_Segment_flag` is set, `Window_Descriptor` is not present.
+In this case, `Window_Size` is `Frame_Content_Size`,
+which can be any value from 0 to 2^64-1 bytes (16 ExaBytes).
| Bit numbers | 7-3 | 2-0 |
| ----------- | ---------- | ---------- |
| Field name | `Exponent` | `Mantissa` |
-Maximum distance is given by the following formulas :
+The minimum memory buffer size is called `Window_Size`.
+It is described by the following formulas :
```
windowLog = 10 + Exponent;
windowBase = 1 << windowLog;
windowAdd = (windowBase / 8) * Mantissa;
Window_Size = windowBase + windowAdd;
```
-The minimum window size is 1 KB.
-The maximum size is `15*(1<<38)` bytes, which is 1.875 TB.
+The minimum `Window_Size` is 1 KB.
+The maximum `Window_Size` is `(1<<41) + 7*(1<<38)` bytes, which is 3.75 TB.
To properly decode compressed data,
a decoder will need to allocate a buffer of at least `Window_Size` bytes.
In order to preserve decoder from unreasonable memory requirements,
-a decoder can refuse a compressed frame
+a decoder is allowed to reject a compressed frame
which requests a memory size beyond decoder's authorized range.
For improved interoperability,
-decoders are recommended to be compatible with window sizes of 8 MB,
+decoders are recommended to be compatible with `Window_Size >= 8 MB`,
and encoders are recommended to not request more than 8 MB.
It's merely a recommendation though,
decoders are free to support larger or lower limits,
@@ -264,112 +267,118 @@ depending on local limitations.
This is a variable size field, which contains
the ID of the dictionary required to properly decode the frame.
-Note that this field is optional. When it's not present,
+`Dictionary_ID` field is optional. When it's not present,
it's up to the decoder to make sure it uses the correct dictionary.
-Format is little-endian.
Field size depends on `Dictionary_ID_flag`.
1 byte can represent an ID 0-255.
2 bytes can represent an ID 0-65535.
4 bytes can represent an ID 0-4294967295.
+Format is __little-endian__.
It's allowed to represent a small ID (for example `13`)
-with a large 4-bytes dictionary ID, losing some compacity in the process.
+with a large 4-bytes dictionary ID, even if it is less efficient.
_Reserved ranges :_
If the frame is going to be distributed in a private environment,
any dictionary ID can be used.
However, for public distribution of compressed frames using a dictionary,
-the following ranges are reserved for future use and should not be used :
-- low range : 1 - 32767
-- high range : >= (2^31)
-
+the following ranges are reserved and shall not be used :
+- low range : `<= 32767`
+- high range : `>= (1 << 31)`
#### `Frame_Content_Size`
This is the original (uncompressed) size. This information is optional.
-The `Field_Size` is provided according to value of `Frame_Content_Size_flag`.
-The `Field_Size` can be equal to 0 (not present), 1, 2, 4 or 8 bytes.
-Format is little-endian.
-
-| `Field_Size` | Range |
-| ------------ | ---------- |
-| 1 | 0 - 255 |
-| 2 | 256 - 65791|
-| 4 | 0 - 2^32-1 |
-| 8 | 0 - 2^64-1 |
-
-When `Field_Size` is 1, 4 or 8 bytes, the value is read directly.
-When `Field_Size` is 2, _the offset of 256 is added_.
+`Frame_Content_Size` uses a variable number of bytes, provided by `FCS_Field_Size`.
+`FCS_Field_Size` is provided by the value of `Frame_Content_Size_flag`.
+`FCS_Field_Size` can be equal to 0 (not present), 1, 2, 4 or 8 bytes.
+
+| `FCS_Field_Size` | Range |
+| ---------------- | ---------- |
+| 0 | unknown |
+| 1 | 0 - 255 |
+| 2 | 256 - 65791|
+| 4 | 0 - 2^32-1 |
+| 8 | 0 - 2^64-1 |
+
+`Frame_Content_Size` format is __little-endian__.
+When `FCS_Field_Size` is 1, 4 or 8 bytes, the value is read directly.
+When `FCS_Field_Size` is 2, _the offset of 256 is added_.
It's allowed to represent a small size (for example `18`) using any compatible variant.
+
Blocks
-------
-After the magic number and header of each block,
-there are some number of blocks.
-Each frame must have at least one block but there is no upper limit
-on the number of blocks per frame.
+
+After `Magic_Number` and `Frame_Header`, there are some number of blocks.
+Each frame must have at least one block,
+but there is no upper limit on the number of blocks per frame.
The structure of a block is as follows:
-| `Last_Block` | `Block_Type` | `Block_Size` | `Block_Content` |
-|:------------:|:------------:|:------------:|:---------------:|
-| 1 bit | 2 bits | 21 bits | n bytes |
+| `Block_Header` | `Block_Content` |
+|:--------------:|:---------------:|
+| 3 bytes | n bytes |
+
+`Block_Header` uses 3 bytes, written using __little-endian__ convention.
+It contains 3 fields :
-The block header (`Last_Block`, `Block_Type`, and `Block_Size`) uses 3-bytes.
+| `Last_Block` | `Block_Type` | `Block_Size` |
+|:------------:|:------------:|:------------:|
+| bit 0 | bits 1-2 | bits 3-23 |
__`Last_Block`__
The lowest bit signals if this block is the last one.
-The frame will end after this one.
+The frame will end after this last block.
It may be followed by an optional `Content_Checksum`
(see [Zstandard Frames](#zstandard-frames)).
-__`Block_Type` and `Block_Size`__
-
-The next 2 bits represent the `Block_Type`,
-while the remaining 21 bits represent the `Block_Size`.
-Format is __little-endian__.
+__`Block_Type`__
+The next 2 bits represent the `Block_Type`.
There are 4 block types :
-| Value | 0 | 1 | 2 | 3 |
+| Value | 0 | 1 | 2 | 3 |
| ------------ | ----------- | ----------- | ------------------ | --------- |
| `Block_Type` | `Raw_Block` | `RLE_Block` | `Compressed_Block` | `Reserved`|
- `Raw_Block` - this is an uncompressed block.
- `Block_Content` contains `Block_Size` bytes to read and copy
- as decoded data.
+ `Block_Content` contains `Block_Size` bytes.
-- `RLE_Block` - this is a single byte, repeated N times.
- `Block_Content` consists of a single byte,
- and `Block_Size` is the number of times this byte should be repeated.
+- `RLE_Block` - this is a single byte, repeated `Block_Size` times.
+ `Block_Content` consists of a single byte.
+ On the decompression side, this byte must be repeated `Block_Size` times.
- `Compressed_Block` - this is a [Zstandard compressed block](#compressed-blocks),
explained later on.
`Block_Size` is the length of `Block_Content`, the compressed data.
- The decompressed size is unknown,
+ The decompressed size is not known,
but its maximum possible value is guaranteed (see below)
- `Reserved` - this is not a block.
This value cannot be used with current version of this specification.
+__`Block_Size`__
+
+The upper 21 bits of `Block_Header` represent the `Block_Size`.
+
Block sizes must respect a few rules :
-- In compressed mode, compressed size is always strictly less than decompressed size.
-- Block decompressed size is always <= maximum back-reference distance.
+- For `Compressed_Block`, `Block_Size` is always strictly less than decompressed size.
+- Block decompressed size is always <= `Window_Size`
- Block decompressed size is always <= 128 KB.
-A data block is not necessarily "full" :
-since an arbitrary “flush” may happen anytime,
-block decompressed content can be any size (even empty),
+A block can contain any number of bytes (even empty),
up to `Block_Maximum_Decompressed_Size`, which is the smallest of :
-- Maximum back-reference distance
+- `Window_Size`
- 128 KB
+
Compressed Blocks
-----------------
-To decompress a compressed block, the compressed size must be provided from
-`Block_Size` field in the block header.
+To decompress a compressed block, the compressed size must be provided
+from `Block_Size` field within `Block_Header`.
A compressed block consists of 2 sections :
- [Literals Section](#literals-section)
@@ -381,36 +390,34 @@ data in [Sequence Execution](#sequence-execution)
#### Prerequisites
To decode a compressed block, the following elements are necessary :
- Previous decoded data, up to a distance of `Window_Size`,
- or all previous data when `Single_Segment_flag` is set.
-- List of "recent offsets" from the previous compressed block.
-- Decoding tables of the previous compressed block for each symbol type
+ or all previously decoded data when `Single_Segment_flag` is set.
+- List of "recent offsets" from previous `Compressed_Block`.
+- Decoding tables of previous `Compressed_Block` for each symbol type
(literals, literals lengths, match lengths, offsets).
Literals Section
----------------
-During sequence execution, symbols from the literals section
-During sequence phase, literals will be entangled with match copy operations.
All literals are regrouped in the first part of the block.
-They can be decoded first, and then copied during sequence operations,
-or they can be decoded on the flow, as needed by sequence commands.
-
-| `Literals_Section_Header` | [`Huffman_Tree_Description`] | Stream1 | [Stream2] | [Stream3] | [Stream4] |
-| ------------------------- | ---------------------------- | ------- | --------- | --------- | --------- |
+They can be decoded first, and then copied during [Sequence Execution],
+or they can be decoded on the flow during [Sequence Execution].
Literals can be stored uncompressed or compressed using Huffman prefix codes.
When compressed, an optional tree description can be present,
followed by 1 or 4 streams.
+| `Literals_Section_Header` | [`Huffman_Tree_Description`] | Stream1 | [Stream2] | [Stream3] | [Stream4] |
+| ------------------------- | ---------------------------- | ------- | --------- | --------- | --------- |
+
#### `Literals_Section_Header`
Header is in charge of describing how literals are packed.
It's a byte-aligned variable-size bitfield, ranging from 1 to 5 bytes,
-using little-endian convention.
+using __little-endian__ convention.
| `Literals_Block_Type` | `Size_Format` | `Regenerated_Size` | [`Compressed_Size`] |
-| --------------------- | ------------- | ------------------ | ----------------- |
-| 2 bits | 1 - 2 bits | 5 - 20 bits | 0 - 18 bits |
+| --------------------- | ------------- | ------------------ | ------------------- |
+| 2 bits | 1 - 2 bits | 5 - 20 bits | 0 - 18 bits |
In this representation, bits on the left are the lowest bits.
@@ -418,33 +425,38 @@ __`Literals_Block_Type`__
This field uses 2 lowest bits of first byte, describing 4 different block types :
-| `Literals_Block_Type` | Value |
-| ----------------------------- | ----- |
-| `Raw_Literals_Block` | 0 |
-| `RLE_Literals_Block` | 1 |
-| `Compressed_Literals_Block` | 2 |
-| `Repeat_Stats_Literals_Block` | 3 |
+| `Literals_Block_Type` | Value |
+| --------------------------- | ----- |
+| `Raw_Literals_Block` | 0 |
+| `RLE_Literals_Block` | 1 |
+| `Compressed_Literals_Block` | 2 |
+| `Treeless_Literals_Block` | 3 |
- `Raw_Literals_Block` - Literals are stored uncompressed.
-- `RLE_Literals_Block` - Literals consist of a single byte value repeated N times.
+- `RLE_Literals_Block` - Literals consist of a single byte value
+ repeated `Regenerated_Size` times.
- `Compressed_Literals_Block` - This is a standard Huffman-compressed block,
starting with a Huffman tree description.
See details below.
-- `Repeat_Stats_Literals_Block` - This is a Huffman-compressed block,
+- `Treeless_Literals_Block` - This is a Huffman-compressed block,
using Huffman tree _from previous Huffman-compressed literals block_.
- Huffman tree description will be skipped.
- Note: If this mode is used without any previous Huffman-table in the frame
- (or [dictionary](#dictionary-format)), this should be treated as corruption.
+ `Huffman_Tree_Description` will be skipped.
+ Note: If this mode is triggered without any previous Huffman-table in the frame
+ (or [dictionary](#dictionary-format)), this should be treated as data corruption.
__`Size_Format`__
`Size_Format` is divided into 2 families :
-- For `Raw_Literals_Block` and `RLE_Literals_Block` it's enough to decode `Regenerated_Size`.
-- For `Compressed_Block`, its required to decode both `Compressed_Size`
- and `Regenerated_Size` (the decompressed size). It will also decode the number of streams.
+- For `Raw_Literals_Block` and `RLE_Literals_Block`,
+ it's only necessary to decode `Regenerated_Size`.
+ There is no `Compressed_Size` field.
+- For `Compressed_Block` and `Treeless_Literals_Block`,
+ it's required to decode both `Compressed_Size`
+ and `Regenerated_Size` (the decompressed size).
+ It's also necessary to decode the number of streams (1 or 4).
-For values spanning several bytes, convention is little-endian.
+For values spanning several bytes, convention is __little-endian__.
__`Size_Format` for `Raw_Literals_Block` and `RLE_Literals_Block`__ :
@@ -463,9 +475,9 @@ __`Size_Format` for `Raw_Literals_Block` and `RLE_Literals_Block`__ :
Only Stream1 is present for these cases.
Note : it's allowed to represent a short value (for example `13`)
-using a long format, accepting the increased compressed data size.
+using a long format, even if it's less efficient.
-__`Size_Format` for `Compressed_Literals_Block` and `Repeat_Stats_Literals_Block`__ :
+__`Size_Format` for `Compressed_Literals_Block` and `Treeless_Literals_Block`__ :
- Value 00 : _A single stream_.
Both `Regenerated_Size` and `Compressed_Size` use 10 bits (0-1023).
@@ -480,67 +492,68 @@ __`Size_Format` for `Compressed_Literals_Block` and `Repeat_Stats_Literals_Block
Both `Regenerated_Size` and `Compressed_Size` use 18 bits (0-262143).
`Literals_Section_Header` has 5 bytes.
-Both `Compressed_Size` and `Regenerated_Size` fields follow little-endian convention.
-Note: `Compressed_Size` __includes__ the size of the Huffman Tree description if it
-is present.
+Both `Compressed_Size` and `Regenerated_Size` fields follow __little-endian__ convention.
+Note: `Compressed_Size` __includes__ the size of the Huffman Tree description
+_when_ it is present.
### Raw Literals Block
-The data in Stream1 is `Regenerated_Size` bytes long, and contains the raw literals data
-to be used in sequence execution.
+The data in Stream1 is `Regenerated_Size` bytes long,
+it contains the raw literals data to be used during [Sequence Execution].
### RLE Literals Block
Stream1 consists of a single byte which should be repeated `Regenerated_Size` times
to generate the decoded literals.
-### Compressed Literals Block and Repeat Stats Literals Block
-Both of these modes contain Huffman encoded data
+### Compressed Literals Block and Treeless Literals Block
+Both of these modes contain Huffman encoded data.
+`Treeless_Literals_Block` does not have a `Huffman_Tree_Description`.
#### `Huffman_Tree_Description`
This section is only present when `Literals_Block_Type` type is `Compressed_Literals_Block` (`2`).
The format of the Huffman tree description can be found at [Huffman Tree description](#huffman-tree-description).
-The size Huffman Tree description will be determined during the decoding process,
-and must be used to determine where the compressed Huffman streams begin.
+The size of `Huffman_Tree_Description` is determined during decoding process,
+it must be used to determine where streams begin.
+`Total_Streams_Size = Compressed_Size - Huffman_Tree_Description_Size`.
-If repeat stats mode is used, the Huffman table used in the previous compressed block will
-be used to decompress this block as well.
+For `Treeless_Literals_Block`,
+the Huffman table comes from previously compressed literals block.
-Huffman compressed data consists either 1 or 4 Huffman-coded streams.
+Huffman compressed data consists of either 1 or 4 Huffman-coded streams.
If only one stream is present, it is a single bitstream occupying the entire
-remaining portion of the literals block, encoded as described at
+remaining portion of the literals block, encoded as described within
[Huffman-Coded Streams](#huffman-coded-streams).
If there are four streams, the literals section header only provides enough
-information to know the regenerated and compressed sizes of all four streams combined.
-The regenerated size of each stream is equal to `(totalSize+3)/4`, except for the last stream,
-which may be up to 3 bytes smaller, to reach a total decompressed size match that described
-in the literals header.
+information to know the decompressed and compressed sizes of all four streams _combined_.
+The decompressed size of each stream is equal to `(Regenerated_Size+3)/4`,
+except for the last stream which may be up to 3 bytes smaller,
+to reach a total decompressed size as specified in `Regenerated_Size`.
-The compressed size of each stream is provided explicitly: the first 6 bytes of the compressed
-data consist of three 2-byte little endian fields, describing the compressed sizes
-of the first three streams.
-The last streams size is computed from the total compressed size and the size of the other
-three streams.
+The compressed size of each stream is provided explicitly:
+the first 6 bytes of the compressed data consist of three 2-byte __little-endian__ fields,
+describing the compressed sizes of the first three streams.
+`Stream4_Size` is computed from total `Total_Streams_Size` minus sizes of other streams.
-`stream4CSize = totalCSize - 6 - stream1CSize - stream2CSize - stream3CSize`.
+`Stream4_Size = Total_Streams_Size - 6 - Stream1_Size - Stream2_Size - Stream3_Size`.
-Note: remember that totalCSize may be smaller than the `Compressed_Size` found in the literals
-block header as `Compressed_Size` also contains the size of the Huffman Tree description if it
-is present.
+Note: remember that `Total_Streams_Size` can be smaller than `Compressed_Size` in header,
+because `Compressed_Size` also contains `Huffman_Tree_Description_Size` when it is present.
Each of these 4 bitstreams is then decoded independently as a Huffman-Coded stream,
as described at [Huffman-Coded Streams](#huffman-coded-streams)
+
Sequences Section
-----------------
A compressed block is a succession of _sequences_ .
A sequence is a literal copy command, followed by a match copy command.
A literal copy command specifies a length.
-It is the number of bytes to be copied (or extracted) from the literal section.
+It is the number of bytes to be copied (or extracted) from the Literals Section.
A match copy command specifies an offset and a length.
When all _sequences_ are decoded,
-if there is are any literals left in the _literal section_,
+if there are literals left in the _literal section_,
these bytes are added at the end of the block.
This is described in more detail in [Sequence Execution](#sequence-execution)
@@ -557,7 +570,7 @@ followed by the bitstream.
| -------------------------- | ------------------------- | ---------------- | ---------------------- | --------- |
To decode the `Sequences_Section`, it's required to know its size.
-This size is deduced from `blockSize - literalSectionSize`.
+This size is deduced from `Block_Size - Literals_Section_Size`.
#### `Sequences_Section_Header`
@@ -572,7 +585,7 @@ This is a variable size field using between 1 and 3 bytes.
Let's call its first byte `byte0`.
- `if (byte0 == 0)` : there are no sequences.
The sequence section stops there.
- Regenerated content is defined entirely by literals section.
+ Decompressed content is defined entirely as Literals Section content.
- `if (byte0 < 128)` : `Number_of_Sequences = byte0` . Uses 1 byte.
- `if (byte0 < 255)` : `Number_of_Sequences = ((byte0-128) << 8) + byte1` . Uses 2 bytes.
- `if (byte0 == 255)`: `Number_of_Sequences = byte1 + (byte2<<8) + 0x7F00` . Uses 3 bytes.
@@ -581,14 +594,14 @@ __Symbol compression modes__
This is a single byte, defining the compression mode of each symbol type.
-|Bit number| 7-6 | 5-4 | 3-2 | 1-0 |
+|Bit number| 7-6 | 5-4 | 3-2 | 1-0 |
| -------- | ----------------------- | -------------- | -------------------- | ---------- |
|Field name| `Literals_Lengths_Mode` | `Offsets_Mode` | `Match_Lengths_Mode` | `Reserved` |
The last field, `Reserved`, must be all-zeroes.
`Literals_Lengths_Mode`, `Offsets_Mode` and `Match_Lengths_Mode` define the `Compression_Mode` of
-literals lengths, offsets, and match lengths respectively.
+literals lengths, offsets, and match lengths symbols respectively.
They follow the same enumeration :
@@ -598,17 +611,17 @@ They follow the same enumeration :
- `Predefined_Mode` : A predefined FSE distribution table is used, defined in
[default distributions](#default-distributions).
- The table takes no space in the compressed data.
+ No distribution table will be present.
- `RLE_Mode` : The table description consists of a single byte.
- This code will be repeated for every sequence.
+ This code will be repeated for all sequences.
- `Repeat_Mode` : The table used in the previous compressed block will be used again.
No distribution table will be present.
- Note: this includes RLE mode, so if repeat_mode follows rle_mode the same symbol will be repeated.
+ Note: this includes RLE mode, so if `Repeat_Mode` follows `RLE_Mode`, the same symbol will be repeated.
If this mode is used without any previous sequence table in the frame
(or [dictionary](#dictionary-format)) to repeat, this should be treated as corruption.
- `FSE_Compressed_Mode` : standard FSE compression.
A distribution table will be present.
- The format of this distribution table is described in (FSE Table Description)[#fse-table-description].
+ The format of this distribution table is described in [FSE Table Description](#fse-table-description).
Note that the maximum allowed accuracy log for literals length and match length tables is 9,
and the maximum accuracy log for the offsets table is 8.
@@ -625,7 +638,7 @@ Literals length codes are values ranging from `0` to `35` included.
They define lengths from 0 to 131071 bytes.
The literals length is equal to the decoded `Baseline` plus
the result of reading `Number_of_Bits` bits from the bitstream,
-as a little-endian value.
+as a __little-endian__ value.
| `Literals_Length_Code` | 0-15 |
| ---------------------- | ---------------------- |
@@ -654,7 +667,7 @@ Match length codes are values ranging from `0` to `52` included.
They define lengths from 3 to 131074 bytes.
The match length is equal to the decoded `Baseline` plus
the result of reading `Number_of_Bits` bits from the bitstream,
-as a little-endian value.
+as a __little-endian__ value.
| `Match_Length_Code` | 0-31 |
| ------------------- | ----------------------- |
@@ -685,7 +698,7 @@ Recommendation is to support at least up to `22`.
For information, at the time of this writing.
the reference decoder supports a maximum `N` value of `28` in 64-bits mode.
-An offset code is also the number of additional bits to read in little-endian fashion,
+An offset code is also the number of additional bits to read in __little-endian__ fashion,
and can be translated into an `Offset_Value` using the following formulas :
```
@@ -720,8 +733,8 @@ begins.
FSE decoding requires a 'state' to be carried from symbol to symbol.
For more explanation on FSE decoding, see the [FSE section](#fse).
-For sequence decoding, a separate state must be kept track of for each of
-literal lengths, offsets, and match lengths.
+For sequence decoding, a separate state keeps track of each
+literal lengths, offsets, and match lengths symbols.
Some FSE primitives are also used.
For more details on the operation of these primitives, see the [FSE section](#fse).
@@ -753,8 +766,7 @@ See the [description of the codes] for how to determine these values.
[description of the codes]: #the-codes-for-literals-lengths-match-lengths-and-offsets
Decoding starts by reading the `Number_of_Bits` required to decode `Offset`.
-It then does the same for `Match_Length`,
-and then for `Literals_Length`.
+It then does the same for `Match_Length`, and then for `Literals_Length`.
This sequence is then used for [sequence execution](#sequence-execution).
If it is not the last sequence in the block,
@@ -807,6 +819,7 @@ short offsetCodes_defaultDistribution[29] =
1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1 };
```
+
Sequence Execution
------------------
Once literals and sequences have been decoded,
@@ -826,7 +839,8 @@ in this case.
The offset is defined as from the current position, so an offset of 6
and a match length of 3 means that 3 bytes should be copied from 6 bytes back.
-Note that all offsets must be at most equal to the window size defined by the frame header.
+Note that all offsets leading to previously decoded data
+must be smaller than `Window_Size` defined in `Frame_Header_Descriptor`.
#### Repeat offsets
As seen in [Sequence Execution](#sequence-execution),
@@ -842,11 +856,10 @@ so an `offset_value` of 1 means `Repeated_Offset2`,
an `offset_value` of 2 means `Repeated_Offset3`,
and an `offset_value` of 3 means `Repeated_Offset1 - 1_byte`.
-In the first block, the offset history is populated with the following values : 1, 4 and 8 (in order).
+For the first block, the starting offset history is populated with the following values : 1, 4 and 8 (in order).
-Then each block gets its starting offset history from the ending values of the most recent compressed block.
-Note that non-compressed blocks are skipped,
-they do not contribute to offset history.
+Then each block gets its starting offset history from the ending values of the most recent `Compressed_Block`.
+Note that blocks which are not `Compressed_Block` are skipped, they do not contribute to offset history.
[Offset Codes]: #offset-codes
@@ -859,6 +872,7 @@ This means that when `Repeated_Offset1` (most recent) is used, history is unmodi
When `Repeated_Offset2` is used, it's swapped with `Repeated_Offset1`.
If any other offset is used, it becomes `Repeated_Offset1` and the rest are shift back by one.
+
Skippable Frames
----------------
@@ -878,7 +892,7 @@ Skippable frames defined in this specification are compatible with [LZ4] ones.
__`Magic_Number`__
-4 Bytes, little-endian format.
+4 Bytes, __little-endian__ format.
Value : 0x184D2A5?, which means any value from 0x184D2A50 to 0x184D2A5F.
All 16 values are valid to identify a skippable frame.
@@ -886,13 +900,14 @@ __`Frame_Size`__
This is the size, in bytes, of the following `User_Data`
(without including the magic number nor the size field itself).
-This field is represented using 4 Bytes, little-endian format, unsigned 32-bits.
+This field is represented using 4 Bytes, __little-endian__ format, unsigned 32-bits.
This means `User_Data` can’t be bigger than (2^32-1) bytes.
__`User_Data`__
The `User_Data` can be anything. Data will just be skipped by the decoder.
+
Entropy Encoding
----------------
Two types of entropy encoding are used by the Zstandard format:
@@ -900,7 +915,7 @@ FSE, and Huffman coding.
FSE
---
-FSE, or FiniteStateEntropy is an entropy coding based on [ANS].
+FSE, short for Finite State Entropy, is an entropy codec based on [ANS].
FSE encoding/decoding involves a state that is carried over between symbols,
so decoding must be done in the opposite direction as encoding.
Therefore, all FSE bitstreams are read from end to beginning.
@@ -909,15 +924,15 @@ For additional details on FSE, see [Finite State Entropy].
[Finite State Entropy]:https://github.com/Cyan4973/FiniteStateEntropy/
-FSE decoding involves a decoding table which has a power of 2 size and three elements:
+FSE decoding involves a decoding table which has a power of 2 size, and contain three elements:
`Symbol`, `Num_Bits`, and `Baseline`.
The `log2` of the table size is its `Accuracy_Log`.
The FSE state represents an index in this table.
-The next symbol in the stream is the symbol indicated by the table value for that state.
-To obtain the next state value,
-the decoder should consume `Num_Bits` bits from the stream as a little endian value and add it to baseline.
-To obtain the initial state value, consume `Accuracy_Log` bits from the stream as a little endian value.
+To obtain the initial state value, consume `Accuracy_Log` bits from the stream as a __little-endian__ value.
+The next symbol in the stream is the `Symbol` indicated in the table for that state.
+To obtain the next state value,
+the decoder should consume `Num_Bits` bits from the stream as a __little-endian__ value and add it to `Baseline`.
[ANS]: https://en.wikipedia.org/wiki/Asymmetric_Numeral_Systems
@@ -929,7 +944,7 @@ An FSE distribution table describes the probabilities of all symbols
from `0` to the last present one (included)
on a normalized scale of `1 << Accuracy_Log` .
-It's a bitstream which is read forward, in little-endian fashion.
+It's a bitstream which is read forward, in __little-endian__ fashion.
It's not necessary to know its exact size,
since it will be discovered and reported by the decoding process.
@@ -1064,7 +1079,7 @@ Huffman Coding
--------------
Zstandard Huffman-coded streams are read backwards,
similar to the FSE bitstreams.
-Therefore, to find the start of the bitstream it is therefore necessary to
+Therefore, to find the start of the bitstream, it is therefore to
know the offset of the last byte of the Huffman-coded stream.
After writing the last bit containing information, the compressor
@@ -1077,7 +1092,7 @@ byte to read. The decompressor needs to skip 0-7 initial `0`-bits and
the first `1`-bit it occurs. Afterwards, the useful part of the bitstream
begins.
-The bitstream contains Huffman-coded symbols in little-endian order,
+The bitstream contains Huffman-coded symbols in __little-endian__ order,
with the codes defined by the method below.
### Huffman Tree Description
@@ -1182,14 +1197,14 @@ The Huffman header compression uses 2 states,
which share the same FSE distribution table.
The first state (`State1`) encodes the even indexed symbols,
and the second (`State2`) encodes the odd indexes.
-State1 is initialized first, and then State2, and they take turns decoding
-a single symbol and updating their state.
+`State1` is initialized first, and then `State2`, and they take turns
+decoding a single symbol and updating their state.
For more details on these FSE operations, see the [FSE section](#fse).
The number of symbols to decode is determined
by tracking bitStream overflow condition:
If updating state after decoding a symbol would require more bits than
-remain in the stream, it is assumed the extra bits are 0. Then,
+remain in the stream, it is assumed that extra bits are 0. Then,
the symbols for each of the final states are decoded and the process is complete.
##### Conversion from weights to Huffman prefix codes
@@ -1245,7 +1260,7 @@ it would be encoded as:
|Encoding|`0000`|`0001`|`01`|`1`| `10000` |
Starting from the end,
-it's possible to read the bitstream in a little-endian fashion,
+it's possible to read the bitstream in a __little-endian__ fashion,
keeping track of already used bits. Since the bitstream is encoded in reverse
order, by starting at the end the symbols can be read in forward order.
@@ -1258,13 +1273,14 @@ If a bitstream is not entirely and exactly consumed,
hence reaching exactly its beginning position with _all_ bits consumed,
the decoding process is considered faulty.
+
Dictionary Format
-----------------
-Zstandard is compatible with "raw content" dictionaries, free of any format restriction,
-except that they must be at least 8 bytes.
-These dictionaries function as if they were just the `Content` block of a formatted
-dictionary.
+Zstandard is compatible with "raw content" dictionaries,
+free of any format restriction, except that they must be at least 8 bytes.
+These dictionaries function as if they were just the `Content` part
+of a formatted dictionary.
But dictionaries created by `zstd --train` follow a format, described here.
@@ -1274,9 +1290,9 @@ __Pre-requisites__ : a dictionary has a size,
| `Magic_Number` | `Dictionary_ID` | `Entropy_Tables` | `Content` |
| -------------- | --------------- | ---------------- | --------- |
-__`Magic_Number`__ : 4 bytes ID, value 0xEC30A437, little-endian format
+__`Magic_Number`__ : 4 bytes ID, value 0xEC30A437, __little-endian__ format
-__`Dictionary_ID`__ : 4 bytes, stored in little-endian format.
+__`Dictionary_ID`__ : 4 bytes, stored in __little-endian__ format.
`Dictionary_ID` can be any value, except 0 (which means no `Dictionary_ID`).
It's used by decoders to check if they use the correct dictionary.
@@ -1284,9 +1300,9 @@ _Reserved ranges :_
If the frame is going to be distributed in a private environment,
any `Dictionary_ID` can be used.
However, for public distribution of compressed frames,
- the following ranges are reserved for future use and should not be used :
+ the following ranges are reserved and shall not be used :
- - low range : 1 - 32767
+ - low range : <= 32767
- high range : >= (2^31)
__`Entropy_Tables`__ : following the same format as the tables in compressed blocks.
@@ -1298,26 +1314,30 @@ __`Entropy_Tables`__ : following the same format as the tables in compressed blo
These tables populate the Repeat Stats literals mode and
Repeat distribution mode for sequence decoding.
It's finally followed by 3 offset values, populating recent offsets (instead of using `{1,4,8}`),
- stored in order, 4-bytes little-endian each, for a total of 12 bytes.
+ stored in order, 4-bytes __little-endian__ each, for a total of 12 bytes.
Each recent offset must have a value < dictionary size.
__`Content`__ : The rest of the dictionary is its content.
The content act as a "past" in front of data to compress or decompress,
so it can be referenced in sequence commands.
As long as the amount of data decoded from this frame is less than or
- equal to the window-size, sequence commands may specify offsets longer
- than the lenght of total decoded output so far to reference back to the
- dictionary. After the total output has surpassed the window size however,
+ equal to `Window_Size`, sequence commands may specify offsets longer
+ than the total length of decoded output so far to reference back to the
+ dictionary. After the total output has surpassed `Window_Size` however,
this is no longer allowed and the dictionary is no longer accessible.
[compressed blocks]: #the-format-of-compressed_block
+
+
Appendix A - Decoding tables for predefined codes
-------------------------------------------------
-This appendix contains FSE decoding tables for the predefined literal length, match length, and offset
-codes. The tables have been constructed using the algorithm as given above in the
-"from normalized distribution to decoding tables" chapter. The tables here can be used as examples
-to crosscheck that an implementation implements the decoding table generation algorithm correctly.
+This appendix contains FSE decoding tables
+for the predefined literal length, match length, and offset codes.
+The tables have been constructed using the algorithm as given above in chapter
+"from normalized distribution to decoding tables".
+The tables here can be used as examples
+to crosscheck that an implementation build its decoding tables correctly.
#### Literal Length Code:
@@ -1496,6 +1516,7 @@ to crosscheck that an implementation implements the decoding table generation al
Version changes
---------------
+- 0.2.5 : minor typos and clarifications
- 0.2.4 : section restructuring, by Sean Purcell
- 0.2.3 : clarified several details, by Sean Purcell
- 0.2.2 : added predefined codes, by Johannes Rudolph
diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html
index 204f56ea5f21..2e77e7742f60 100644
--- a/doc/zstd_manual.html
+++ b/doc/zstd_manual.html
@@ -1,10 +1,10 @@
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
-<title>zstd 1.1.4 Manual</title>
+<title>zstd 1.2.0 Manual</title>
</head>
<body>
-<h1>zstd 1.1.4 Manual</h1>
+<h1>zstd 1.2.0 Manual</h1>
<hr>
<a name="Contents"></a><h2>Contents</h2>
<ol>
@@ -57,46 +57,46 @@
<pre><b>size_t ZSTD_compress( void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
int compressionLevel);
-</b><p> Compresses `src` content as a single zstd compressed frame into already allocated `dst`.
- Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`.
- @return : compressed size written into `dst` (<= `dstCapacity),
- or an error code if it fails (which can be tested using ZSTD_isError()).
+</b><p> Compresses `src` content as a single zstd compressed frame into already allocated `dst`.
+ Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`.
+ @return : compressed size written into `dst` (<= `dstCapacity),
+ or an error code if it fails (which can be tested using ZSTD_isError()).
</p></pre><BR>
<pre><b>size_t ZSTD_decompress( void* dst, size_t dstCapacity,
const void* src, size_t compressedSize);
-</b><p> `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames.
- `dstCapacity` is an upper bound of originalSize.
- If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data.
- @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
- or an errorCode if it fails (which can be tested using ZSTD_isError()).
+</b><p> `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames.
+ `dstCapacity` is an upper bound of originalSize.
+ If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data.
+ @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
+ or an errorCode if it fails (which can be tested using ZSTD_isError()).
</p></pre><BR>
<pre><b>unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
-</b><p> NOTE: This function is planned to be obsolete, in favour of ZSTD_getFrameContentSize.
- ZSTD_getFrameContentSize functions the same way, returning the decompressed size of a single
- frame, but distinguishes empty frames from frames with an unknown size, or errors.
-
- Additionally, ZSTD_findDecompressedSize can be used instead. It can handle multiple
- concatenated frames in one buffer, and so is more general.
- As a result however, it requires more computation and entire frames to be passed to it,
- as opposed to ZSTD_getFrameContentSize which requires only a single frame's header.
-
- 'src' is the start of a zstd compressed frame.
- @return : content size to be decompressed, as a 64-bits value _if known_, 0 otherwise.
- note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode.
- When `return==0`, data to decompress could be any size.
- In which case, it's necessary to use streaming mode to decompress data.
- Optionally, application can still use ZSTD_decompress() while relying on implied limits.
- (For example, data may be necessarily cut into blocks <= 16 KB).
- note 2 : decompressed size is always present when compression is done with ZSTD_compress()
- note 3 : decompressed size can be very large (64-bits value),
- potentially larger than what local system can handle as a single memory segment.
- In which case, it's necessary to use streaming mode to decompress data.
- note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified.
- Always ensure result fits within application's authorized limits.
- Each application can set its own limits.
- note 5 : when `return==0`, if precise failure cause is needed, use ZSTD_getFrameParams() to know more.
+</b><p> NOTE: This function is planned to be obsolete, in favour of ZSTD_getFrameContentSize.
+ ZSTD_getFrameContentSize functions the same way, returning the decompressed size of a single
+ frame, but distinguishes empty frames from frames with an unknown size, or errors.
+
+ Additionally, ZSTD_findDecompressedSize can be used instead. It can handle multiple
+ concatenated frames in one buffer, and so is more general.
+ As a result however, it requires more computation and entire frames to be passed to it,
+ as opposed to ZSTD_getFrameContentSize which requires only a single frame's header.
+
+ 'src' is the start of a zstd compressed frame.
+ @return : content size to be decompressed, as a 64-bits value _if known_, 0 otherwise.
+ note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode.
+ When `return==0`, data to decompress could be any size.
+ In which case, it's necessary to use streaming mode to decompress data.
+ Optionally, application can still use ZSTD_decompress() while relying on implied limits.
+ (For example, data may be necessarily cut into blocks <= 16 KB).
+ note 2 : decompressed size is always present when compression is done with ZSTD_compress()
+ note 3 : decompressed size can be very large (64-bits value),
+ potentially larger than what local system can handle as a single memory segment.
+ In which case, it's necessary to use streaming mode to decompress data.
+ note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified.
+ Always ensure result fits within application's authorized limits.
+ Each application can set its own limits.
+ note 5 : when `return==0`, if precise failure cause is needed, use ZSTD_getFrameParams() to know more.
</p></pre><BR>
<h3>Helper functions</h3><pre></pre><b><pre>int ZSTD_maxCLevel(void); </b>/*!< maximum compression level available */<b>
@@ -106,28 +106,28 @@ const char* ZSTD_getErrorName(size_t code); </b>/*!< provides readable strin
</pre></b><BR>
<a name="Chapter4"></a><h2>Explicit memory management</h2><pre></pre>
-<h3>Compression context</h3><pre> When compressing many times,
- it is recommended to allocate a context just once, and re-use it for each successive compression operation.
- This will make workload friendlier for system's memory.
- Use one context per thread for parallel execution in multi-threaded environments.
+<h3>Compression context</h3><pre> When compressing many times,
+ it is recommended to allocate a context just once, and re-use it for each successive compression operation.
+ This will make workload friendlier for system's memory.
+ Use one context per thread for parallel execution in multi-threaded environments.
</pre><b><pre>typedef struct ZSTD_CCtx_s ZSTD_CCtx;
ZSTD_CCtx* ZSTD_createCCtx(void);
size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx);
</pre></b><BR>
<pre><b>size_t ZSTD_compressCCtx(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel);
-</b><p> Same as ZSTD_compress(), requires an allocated ZSTD_CCtx (see ZSTD_createCCtx()).
+</b><p> Same as ZSTD_compress(), requires an allocated ZSTD_CCtx (see ZSTD_createCCtx()).
</p></pre><BR>
-<h3>Decompression context</h3><pre> When decompressing many times,
- it is recommended to allocate a context just once, and re-use it for each successive compression operation.
- This will make workload friendlier for system's memory.
- Use one context per thread for parallel execution in multi-threaded environments.
+<h3>Decompression context</h3><pre> When decompressing many times,
+ it is recommended to allocate a context just once, and re-use it for each successive compression operation.
+ This will make workload friendlier for system's memory.
+ Use one context per thread for parallel execution in multi-threaded environments.
</pre><b><pre>typedef struct ZSTD_DCtx_s ZSTD_DCtx;
ZSTD_DCtx* ZSTD_createDCtx(void);
size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
</pre></b><BR>
<pre><b>size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-</b><p> Same as ZSTD_decompress(), requires an allocated ZSTD_DCtx (see ZSTD_createDCtx()).
+</b><p> Same as ZSTD_decompress(), requires an allocated ZSTD_DCtx (see ZSTD_createDCtx()).
</p></pre><BR>
<a name="Chapter5"></a><h2>Simple dictionary API</h2><pre></pre>
@@ -169,9 +169,10 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
const ZSTD_CDict* cdict);
-</b><p> Compression using a digested Dictionary.
- Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
- Note that compression level is decided during dictionary creation.
+</b><p> Compression using a digested Dictionary.
+ Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
+ Note that compression level is decided during dictionary creation.
+ Frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no)
</p></pre><BR>
<pre><b>ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize);
@@ -399,7 +400,7 @@ typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; v
</p></pre><BR>
<pre><b>ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, unsigned byReference,
- ZSTD_parameters params, ZSTD_customMem customMem);
+ ZSTD_compressionParameters cParams, ZSTD_customMem customMem);
</b><p> Create a ZSTD_CDict using external alloc and free, and customized compression parameters
</p></pre><BR>
@@ -426,12 +427,19 @@ typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; v
both values are optional, select `0` if unknown.
</p></pre><BR>
-<pre><b>size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
- void* dst, size_t dstCapacity,
- const void* src, size_t srcSize,
- const void* dict,size_t dictSize,
- ZSTD_parameters params);
-</b><p> Same as ZSTD_compress_usingDict(), with fine-tune control of each compression parameter
+<pre><b>size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx,
+ void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize,
+ const void* dict,size_t dictSize,
+ ZSTD_parameters params);
+</b><p> Same as ZSTD_compress_usingDict(), with fine-tune control over each compression parameter
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
+ void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize,
+ const ZSTD_CDict* cdict, ZSTD_frameParameters fParams);
+</b><p> Same as ZSTD_compress_usingCDict(), with fine-tune control over frame parameters
</p></pre><BR>
<a name="Chapter15"></a><h2>Advanced decompression functions</h2><pre></pre>
@@ -491,20 +499,29 @@ typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; v
Note : this use case also happens when using a non-conformant dictionary.
- `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`).
- This is not a Zstandard frame.
- When identifying the exact failure cause, it's possible to used ZSTD_getFrameParams(), which will provide a more precise error code.
+ When identifying the exact failure cause, it's possible to use ZSTD_getFrameParams(), which will provide a more precise error code.
</p></pre><BR>
<a name="Chapter16"></a><h2>Advanced streaming functions</h2><pre></pre>
<h3>Advanced Streaming compression functions</h3><pre></pre><b><pre>ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
+size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs); </b>/**< size of CStream is variable, depending primarily on compression level */<b>
size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize); </b>/**< pledgedSrcSize must be correct, a size of 0 means unknown. for a frame size of 0 use initCStream_advanced */<b>
size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); </b>/**< note: a dict will not be used if dict == NULL or dictSize < 8 */<b>
size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize,
ZSTD_parameters params, unsigned long long pledgedSrcSize); </b>/**< pledgedSrcSize is optional and can be 0 (meaning unknown). note: if the contentSizeFlag is set, pledgedSrcSize == 0 means the source size is actually 0 */<b>
size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); </b>/**< note : cdict will just be referenced, and must outlive compression session */<b>
-size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); </b>/**< re-use compression parameters from previous init; skip dictionary loading stage; zcs must be init at least once before. note: pledgedSrcSize must be correct, a size of 0 means unknown. for a frame size of 0 use initCStream_advanced */<b>
-size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);
+size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize, ZSTD_frameParameters fParams); </b>/**< same as ZSTD_initCStream_usingCDict(), with control over frame parameters */<b>
</pre></b><BR>
+<pre><b>size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
+</b><p> start a new compression job, using same parameters from previous job.
+ This is typically useful to skip dictionary loading stage, since it will re-use it in-place..
+ Note that zcs must be init at least once before using ZSTD_resetCStream().
+ pledgedSrcSize==0 means "srcSize unknown".
+ If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end.
+ @return : 0, or an error code (which can be tested using ZSTD_isError())
+</p></pre><BR>
+
<h3>Advanced Streaming decompression functions</h3><pre></pre><b><pre>typedef enum { DStream_p_maxWindowSize } ZSTD_DStreamParameter_e;
ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); </b>/**< note: a dict will not be used if dict == NULL or dictSize < 8 */<b>
@@ -552,10 +569,9 @@ size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
<h3>Buffer-less streaming compression functions</h3><pre></pre><b><pre>size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); </b>/**< pledgedSrcSize is optional and can be 0 (meaning unknown). note: if the contentSizeFlag is set, pledgedSrcSize == 0 means the source size is actually 0 */<b>
+size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); </b>/**< note: fails if cdict==NULL */<b>
+size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize); </b>/* compression parameters are already set within cdict. pledgedSrcSize=0 means null-size */<b>
size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); </b>/**< note: if pledgedSrcSize can be 0, indicating unknown size. if it is non-zero, it must be accurate. for 0 size frames, use compressBegin_advanced */<b>
-size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize); </b>/**< note: if pledgedSrcSize can be 0, indicating unknown size. if it is non-zero, it must be accurate. for 0 size frames, use compressBegin_advanced */<b>
-size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
</pre></b><BR>
<a name="Chapter19"></a><h2>Buffer-less streaming decompression (synchronous mode)</h2><pre>
A ZSTD_DCtx object is required to track streaming operations.
@@ -640,19 +656,20 @@ ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
- Compressing and decompressing require a context structure
+ Use ZSTD_createCCtx() and ZSTD_createDCtx()
- It is necessary to init context before starting
- + compression : ZSTD_compressBegin()
- + decompression : ZSTD_decompressBegin()
- + variants _usingDict() are also allowed
- + copyCCtx() and copyDCtx() work too
- - Block size is limited, it must be <= ZSTD_getBlockSizeMax()
- + If you need to compress more, cut data into multiple blocks
- + Consider using the regular ZSTD_compress() instead, as frame metadata costs become negligible when source size is large.
+ + compression : any ZSTD_compressBegin*() variant, including with dictionary
+ + decompression : any ZSTD_decompressBegin*() variant, including with dictionary
+ + copyCCtx() and copyDCtx() can be used too
+ - Block size is limited, it must be <= ZSTD_getBlockSizeMax() <= ZSTD_BLOCKSIZE_ABSOLUTEMAX
+ + If input is larger than a block size, it's necessary to split input data into multiple blocks
+ + For inputs larger than a single block size, consider using the regular ZSTD_compress() instead.
+ Frame metadata is not that costly, and quickly becomes negligible as source size grows larger.
- When a block is considered not compressible enough, ZSTD_compressBlock() result will be zero.
In which case, nothing is produced into `dst`.
+ User must test for such outcome and deal directly with uncompressed data
+ ZSTD_decompressBlock() doesn't accept uncompressed data as input !!!
- + In case of multiple successive blocks, decoder must be informed of uncompressed block existence to follow proper history.
- Use ZSTD_insertBlock() in such a case.
+ + In case of multiple successive blocks, should some of them be uncompressed,
+ decoder must be informed of their existence in order to follow proper history.
+ Use ZSTD_insertBlock() for such a case.
<BR></pre>
<h3>Raw zstd block functions</h3><pre></pre><b><pre>size_t ZSTD_getBlockSizeMax(ZSTD_CCtx* cctx);