vendor/zstd/1.2.0

author: Baptiste Daroussin <bapt@FreeBSD.org> 2017-05-06 10:17:59 +0000
committer: Baptiste Daroussin <bapt@FreeBSD.org> 2017-05-06 10:17:59 +0000
commit: ffcbc2d7ba03067492045e4cbead519a3b3c27ef (patch)
tree: df436f4253158a7d5a4875e54cd7d273dd5334a6 /doc
parent: ab984b3e51d32af796fe89e130f57bf58b8a14b2 (diff)
8 files changed, 419 insertions, 368 deletions
diff --git a/doc/educational_decoder/zstd_decompress.c b/doc/educational_decoder/zstd_decompress.c
index ae4eaa81c6ae..7c8d8114d401 100644
--- a/doc/educational_decoder/zstd_decompress.c
+++ b/doc/educational_decoder/zstd_decompress.c
@@ -27,16 +27,19 @@ size_t ZSTD_decompress_with_dict(void *const dst, const size_t dst_len,
 
 /// Get the decompressed size of an input stream so memory can be allocated in
 /// advance
+/// Returns -1 if the size can't be determined
 size_t ZSTD_get_decompressed_size(const void *const src, const size_t src_len);
 
 /******* UTILITY MACROS AND TYPES *********************************************/
-// Max block size decompressed size is 128 KB and literal blocks must be smaller
-// than that
+// Max block size decompressed size is 128 KB and literal blocks can't be
+// larger than their block
 #define MAX_LITERALS_SIZE ((size_t)128 * 1024)
 
 #define MAX(a, b) ((a) > (b) ? (a) : (b))
 #define MIN(a, b) ((a) < (b) ? (a) : (b))
 
+/// This decoder calls exit(1) when it encounters an error, however a production
+/// library should propagate error codes
 #define ERROR(s)                                                               \
     do {                                                                       \
         fprintf(stderr, "Error: %s\n", s);                                     \
@@ -67,29 +70,31 @@ typedef int64_t i64;
 /// decompression functions.
 
 /*** IO STREAM OPERATIONS *************/
-/// These structs are the interface for IO, and do bounds checking on all
-/// operations.  They should be used opaquely to ensure safety.
 
-/// Output is always done byte-by-byte
+/// ostream_t/istream_t are used to wrap the pointers/length data passed into
+/// ZSTD_decompress, so that all IO operations are safely bounds checked
+/// They are written/read forward, and reads are treated as little-endian
+/// They should be used opaquely to ensure safety
 typedef struct {
     u8 *ptr;
     size_t len;
 } ostream_t;
 
-/// Input often reads a few bits at a time, so maintain an internal offset
 typedef struct {
     const u8 *ptr;
-    int bit_offset;
     size_t len;
+
+    // Input often reads a few bits at a time, so maintain an internal offset
+    int bit_offset;
 } istream_t;
 
 /// The following two functions are the only ones that allow the istream to be
 /// non-byte aligned
 
 /// Reads `num` bits from a bitstream, and updates the internal offset
-static inline u64 IO_read_bits(istream_t *const in, const int num);
-/// Rewinds the stream by `num` bits
-static inline void IO_rewind_bits(istream_t *const in, const int num);
+static inline u64 IO_read_bits(istream_t *const in, const int num_bits);
+/// Backs-up the stream by `num` bits so they can be read again
+static inline void IO_rewind_bits(istream_t *const in, const int num_bits);
 /// If the remaining bits in a byte will be unused, advance to the end of the
 /// byte
 static inline void IO_align_stream(istream_t *const in);
@@ -101,30 +106,31 @@ static inline void IO_write_byte(ostream_t *const out, u8 symb);
 /// be byte aligned.
 static inline size_t IO_istream_len(const istream_t *const in);
 
-/// Returns a pointer where `len` bytes can be read, and advances the internal
-/// state.  The stream must be byte aligned.
+/// Advances the stream by `len` bytes, and returns a pointer to the chunk that
+/// was skipped.  The stream must be byte aligned.
 static inline const u8 *IO_read_bytes(istream_t *const in, size_t len);
-/// Returns a pointer where `len` bytes can be written, and advances the internal
-/// state.  The stream must be byte aligned.
+/// Advances the stream by `len` bytes, and returns a pointer to the chunk that
+/// was skipped so it can be written to.
 static inline u8 *IO_write_bytes(ostream_t *const out, size_t len);
 
 /// Advance the inner state by `len` bytes.  The stream must be byte aligned.
 static inline void IO_advance_input(istream_t *const in, size_t len);
 
-/// Returns an `ostream_t` constructed from the given pointer and length
+/// Returns an `ostream_t` constructed from the given pointer and length.
 static inline ostream_t IO_make_ostream(u8 *out, size_t len);
-/// Returns an `istream_t` constructed from the given pointer and length
+/// Returns an `istream_t` constructed from the given pointer and length.
 static inline istream_t IO_make_istream(const u8 *in, size_t len);
 
-/// Returns an `istream_t` with the same base as `in`, and length `len`
-/// Then, advance `in` to account for the consumed bytes
-/// `in` must be byte aligned
+/// Returns an `istream_t` with the same base as `in`, and length `len`.
+/// Then, advance `in` to account for the consumed bytes.
+/// `in` must be byte aligned.
 static inline istream_t IO_make_sub_istream(istream_t *const in, size_t len);
 /*** END IO STREAM OPERATIONS *********/
 
 /*** BITSTREAM OPERATIONS *************/
-/// Read `num` bits (up to 64) from `src + offset`, where `offset` is in bits
-static inline u64 read_bits_LE(const u8 *src, const int num,
+/// Read `num` bits (up to 64) from `src + offset`, where `offset` is in bits,
+/// and return them interpreted as a little-endian unsigned integer.
+static inline u64 read_bits_LE(const u8 *src, const int num_bits,
                                const size_t offset);
 
 /// Read bits from the end of a HUF or FSE bitstream.  `offset` is in bits, so
@@ -136,9 +142,8 @@ static inline u64 STREAM_read_bits(const u8 *src, const int bits,
 /*** END BITSTREAM OPERATIONS *********/
 
 /*** BIT COUNTING OPERATIONS **********/
-/// Returns `x`, where `2^x` is the largest power of 2 less than or equal to
-/// `num`, or `-1` if `num == 0`.
-static inline int log2inf(const u64 num);
+/// Returns the index of the highest set bit in `num`, or `-1` if `num == 0`
+static inline int highest_set_bit(const u64 num);
 /*** END BIT COUNTING OPERATIONS ******/
 
 /*** HUFFMAN PRIMITIVES ***************/
@@ -384,8 +389,8 @@ size_t ZSTD_decompress_with_dict(void *const dst, const size_t dst_len,
         parse_dictionary(&parsed_dict, (const u8 *)dict, dict_len);
     }
 
-    istream_t in = {(const u8 *)src, 0, src_len};
-    ostream_t out = {(u8 *)dst, dst_len};
+    istream_t in = IO_make_istream(src, src_len);
+    ostream_t out = IO_make_ostream(dst, dst_len);
 
     // "A content compressed by Zstandard is transformed into a Zstandard frame.
     // Multiple frames can be appended into a single file or stream. A frame is
@@ -633,6 +638,7 @@ static void frame_context_apply_dict(frame_context_t *const ctx,
         FSE_copy_dtable(&ctx->of_dtable, &dict->of_dtable);
         FSE_copy_dtable(&ctx->ml_dtable, &dict->ml_dtable);
 
+        // Copy the repeated offsets
         memcpy(ctx->previous_offsets, dict->previous_offsets,
                sizeof(ctx->previous_offsets));
     }
@@ -668,7 +674,7 @@ static void decompress_data(frame_context_t *const ctx, ostream_t *const out,
             // number of bytes to read and copy."
             const u8 *const read_ptr = IO_read_bytes(in, block_len);
             u8 *const write_ptr = IO_write_bytes(out, block_len);
-            //
+
             // Copy the raw data into the output
             memcpy(write_ptr, read_ptr, block_len);
 
@@ -682,7 +688,7 @@ static void decompress_data(frame_context_t *const ctx, ostream_t *const out,
             const u8 *const read_ptr = IO_read_bytes(in, 1);
             u8 *const write_ptr = IO_write_bytes(out, block_len);
 
-            // Copy `block_len` copies of `streams->src[0]` to the output
+            // Copy `block_len` copies of `read_ptr[0]` to the output
             memset(write_ptr, read_ptr[0], block_len);
 
             ctx->current_total_output += block_len;
@@ -751,7 +757,7 @@ static size_t decode_literals_compressed(frame_context_t *const ctx,
                                          u8 **const literals,
                                          const int block_type,
                                          const int size_format);
-static void decode_huf_table(istream_t *const in, HUF_dtable *const dtable);
+static void decode_huf_table(HUF_dtable *const dtable, istream_t *const in);
 static void fse_decode_hufweights(ostream_t *weights, istream_t *const in,
                                     int *const num_symbs);
 
@@ -894,12 +900,12 @@ static size_t decode_literals_compressed(frame_context_t *const ctx,
     istream_t huf_stream = IO_make_sub_istream(in, compressed_size);
 
     if (block_type == 2) {
-        // Decode provided Huffman table
+        // Decode the provided Huffman table
         // "This section is only present when Literals_Block_Type type is
         // Compressed_Literals_Block (2)."
 
         HUF_free_dtable(&ctx->literals_dtable);
-        decode_huf_table(&huf_stream, &ctx->literals_dtable);
+        decode_huf_table(&ctx->literals_dtable, &huf_stream);
     } else {
         // If the previous Huffman table is being repeated, ensure it exists
         if (!ctx->literals_dtable.symbols) {
@@ -922,13 +928,13 @@ static size_t decode_literals_compressed(frame_context_t *const ctx,
 }
 
 // Decode the Huffman table description
-static void decode_huf_table(istream_t *const in, HUF_dtable *const dtable) {
-    const u8 header = IO_read_bits(in, 8);
-
+static void decode_huf_table(HUF_dtable *const dtable, istream_t *const in) {
     // "All literal values from zero (included) to last present one (excluded)
     // are represented by Weight with values from 0 to Max_Number_of_Bits."
 
     // "This is a single byte value (0-255), which describes how to decode the list of weights."
+    const u8 header = IO_read_bits(in, 8);
+
     u8 weights[HUF_MAX_SYMBS];
     memset(weights, 0, sizeof(weights));
 
@@ -997,7 +1003,7 @@ typedef struct {
     u16 ll_state;
     u16 of_state;
     u16 ml_state;
-} sequence_state_t;
+} sequence_states_t;
 
 /// Different modes to signal to decode_seq_tables what to do
 typedef enum {
@@ -1052,10 +1058,10 @@ static void decompress_sequences(frame_context_t *const ctx,
                                  istream_t *const in,
                                  sequence_command_t *const sequences,
                                  const size_t num_sequences);
-static sequence_command_t decode_sequence(sequence_state_t *const state,
+static sequence_command_t decode_sequence(sequence_states_t *const state,
                                           const u8 *const src,
                                           i64 *const offset);
-static void decode_seq_table(istream_t *const in, FSE_dtable *const table,
+static void decode_seq_table(FSE_dtable *const table, istream_t *const in,
                                const seq_part_t type, const seq_mode_t mode);
 
 static size_t decode_sequences(frame_context_t *const ctx, istream_t *in,
@@ -1131,34 +1137,33 @@ static void decompress_sequences(frame_context_t *const ctx, istream_t *in,
     // Offsets
     // Match Lengths"
     // Update the tables we have stored in the context
-    decode_seq_table(in, &ctx->ll_dtable, seq_literal_length,
+    decode_seq_table(&ctx->ll_dtable, in, seq_literal_length,
                      (compression_modes >> 6) & 3);
 
-    decode_seq_table(in, &ctx->of_dtable, seq_offset,
+    decode_seq_table(&ctx->of_dtable, in, seq_offset,
                      (compression_modes >> 4) & 3);
 
-    decode_seq_table(in, &ctx->ml_dtable, seq_match_length,
+    decode_seq_table(&ctx->ml_dtable, in, seq_match_length,
                      (compression_modes >> 2) & 3);
 
-    // Check to make sure none of the tables are uninitialized
-    if (!ctx->ll_dtable.symbols || !ctx->of_dtable.symbols ||
-        !ctx->ml_dtable.symbols) {
-        CORRUPTION();
-    }
 
-    sequence_state_t state;
-    // Copy the context's tables into the local state
-    memcpy(&state.ll_table, &ctx->ll_dtable, sizeof(FSE_dtable));
-    memcpy(&state.of_table, &ctx->of_dtable, sizeof(FSE_dtable));
-    memcpy(&state.ml_table, &ctx->ml_dtable, sizeof(FSE_dtable));
+    sequence_states_t states;
 
-    size_t len = IO_istream_len(in);
+    // Initialize the decoding tables
+    {
+        states.ll_table = ctx->ll_dtable;
+        states.of_table = ctx->of_dtable;
+        states.ml_table = ctx->ml_dtable;
+    }
+
+    const size_t len = IO_istream_len(in);
     const u8 *const src = IO_read_bytes(in, len);
 
     // "After writing the last bit containing information, the compressor writes
     // a single 1-bit and then fills the byte with 0-7 0 bits of padding."
-    const int padding = 8 - log2inf(src[len - 1]);
-    i64 offset = len * 8 - padding;
+    const int padding = 8 - highest_set_bit(src[len - 1]);
+    // The offset starts at the end because FSE streams are read backwards
+    i64 bit_offset = len * 8 - padding;
 
     // "The bitstream starts with initial state values, each using the required
     // number of bits in their respective accuracy, decoded previously from
@@ -1166,24 +1171,22 @@ static void decompress_sequences(frame_context_t *const ctx, istream_t *in,
     //
     // It starts by Literals_Length_State, followed by Offset_State, and finally
     // Match_Length_State."
-    FSE_init_state(&state.ll_table, &state.ll_state, src, &offset);
-    FSE_init_state(&state.of_table, &state.of_state, src, &offset);
-    FSE_init_state(&state.ml_table, &state.ml_state, src, &offset);
+    FSE_init_state(&states.ll_table, &states.ll_state, src, &bit_offset);
+    FSE_init_state(&states.of_table, &states.of_state, src, &bit_offset);
+    FSE_init_state(&states.ml_table, &states.ml_state, src, &bit_offset);
 
     for (size_t i = 0; i < num_sequences; i++) {
         // Decode sequences one by one
-        sequences[i] = decode_sequence(&state, src, &offset);
+        sequences[i] = decode_sequence(&states, src, &bit_offset);
     }
 
-    if (offset != 0) {
+    if (bit_offset != 0) {
         CORRUPTION();
     }
-
-    // Don't free tables so they can be used in the next block
 }
 
 // Decode a single sequence and update the state
-static sequence_command_t decode_sequence(sequence_state_t *const state,
+static sequence_command_t decode_sequence(sequence_states_t *const states,
                                           const u8 *const src,
                                           i64 *const offset) {
     // "Each symbol is a code in its own context, which specifies Baseline and
@@ -1191,9 +1194,9 @@ static sequence_command_t decode_sequence(sequence_state_t *const state,
     // additional bits in the same bitstream."
 
     // Decode symbols, but don't update states
-    const u8 of_code = FSE_peek_symbol(&state->of_table, state->of_state);
-    const u8 ll_code = FSE_peek_symbol(&state->ll_table, state->ll_state);
-    const u8 ml_code = FSE_peek_symbol(&state->ml_table, state->ml_state);
+    const u8 of_code = FSE_peek_symbol(&states->of_table, states->of_state);
+    const u8 ll_code = FSE_peek_symbol(&states->ll_table, states->ll_state);
+    const u8 ml_code = FSE_peek_symbol(&states->ml_table, states->ml_state);
 
     // Offset doesn't need a max value as it's not decoded using a table
     if (ll_code > SEQ_MAX_CODES[seq_literal_length] ||
@@ -1221,17 +1224,18 @@ static sequence_command_t decode_sequence(sequence_state_t *const state,
     // then Offset_State."
     // If the stream is complete don't read bits to update state
     if (*offset != 0) {
-        FSE_update_state(&state->ll_table, &state->ll_state, src, offset);
-        FSE_update_state(&state->ml_table, &state->ml_state, src, offset);
-        FSE_update_state(&state->of_table, &state->of_state, src, offset);
+        FSE_update_state(&states->ll_table, &states->ll_state, src, offset);
+        FSE_update_state(&states->ml_table, &states->ml_state, src, offset);
+        FSE_update_state(&states->of_table, &states->of_state, src, offset);
     }
 
     return seq;
 }
 
 /// Given a sequence part and table mode, decode the FSE distribution
-static void decode_seq_table(istream_t *const in, FSE_dtable *const table,
-                               const seq_part_t type, const seq_mode_t mode) {
+/// Errors if the mode is `seq_repeat` without a pre-existing table in `table`
+static void decode_seq_table(FSE_dtable *const table, istream_t *const in,
+                             const seq_part_t type, const seq_mode_t mode) {
     // Constant arrays indexed by seq_part_t
     const i16 *const default_distributions[] = {SEQ_LITERAL_LENGTH_DEFAULT_DIST,
                                                 SEQ_OFFSET_DEFAULT_DIST,
@@ -1272,12 +1276,17 @@ static void decode_seq_table(istream_t *const in, FSE_dtable *const table,
         // "Repeat_Mode : re-use distribution table from previous compressed
         // block."
         // Nothing to do here, table will be unchanged
+        if (!table->symbols) {
+            // This mode is invalid if we don't already have a table
+            CORRUPTION();
+        }
         break;
     default:
         // Impossible, as mode is from 0-3
         IMPOSSIBLE();
         break;
     }
+
 }
 /******* END SEQUENCE DECODING ************************************************/
 
@@ -1296,6 +1305,8 @@ static void execute_sequences(frame_context_t *const ctx, ostream_t *const out,
         const sequence_command_t seq = sequences[i];
 
         {
+            // If the sequence asks for more literals than are left, the
+            // sequence must be corrupted
             if (seq.literal_length > IO_istream_len(&litstream)) {
                 CORRUPTION();
             }
@@ -1336,7 +1347,8 @@ static void execute_sequences(frame_context_t *const ctx, ostream_t *const out,
                 // as per the exception listed above
                 offset = idx < 3 ? offset_hist[idx] : offset_hist[0] - 1;
 
-                // If idx == 1 we don't need to modify offset_hist[2]
+                // If idx == 1 we don't need to modify offset_hist[2], since
+                // we're using the second-most recent code
                 if (idx > 1) {
                     offset_hist[2] = offset_hist[1];
                 }
@@ -1344,6 +1356,8 @@ static void execute_sequences(frame_context_t *const ctx, ostream_t *const out,
                 offset_hist[0] = offset;
             }
         } else {
+            // When it's not a repeat offset:
+            // "if (Offset_Value > 3) offset = Offset_Value - 3;"
             offset = seq.offset - 3;
 
             // Shift back history
@@ -1391,11 +1405,11 @@ static void execute_sequences(frame_context_t *const ctx, ostream_t *const out,
         total_output += seq.match_length;
     }
 
+    // Copy any leftover literals
     {
         size_t len = IO_istream_len(&litstream);
         u8 *const write_ptr = IO_write_bytes(out, len);
         const u8 *const read_ptr = IO_read_bytes(&litstream, len);
-        // Copy any leftover literals
         memcpy(write_ptr, read_ptr, len);
 
         total_output += len;
@@ -1517,10 +1531,10 @@ static void parse_dictionary(dictionary_t *const dict, const u8 *src,
     // recent offsets (instead of using {1,4,8}), stored in order, 4-bytes
     // little-endian each, for a total of 12 bytes. Each recent offset must have
     // a value < dictionary size."
-    decode_huf_table(&in, &dict->literals_dtable);
-    decode_seq_table(&in, &dict->of_dtable, seq_offset, seq_fse);
-    decode_seq_table(&in, &dict->ml_dtable, seq_match_length, seq_fse);
-    decode_seq_table(&in, &dict->ll_dtable, seq_literal_length, seq_fse);
+    decode_huf_table(&dict->literals_dtable, &in);
+    decode_seq_table(&dict->of_dtable, &in, seq_offset, seq_fse);
+    decode_seq_table(&dict->ml_dtable, &in, seq_match_length, seq_fse);
+    decode_seq_table(&dict->ll_dtable, &in, seq_literal_length, seq_fse);
 
     // Read in the previous offset history
     dict->previous_offsets[0] = IO_read_bits(&in, 32);
@@ -1571,20 +1585,20 @@ static void free_dictionary(dictionary_t *const dict) {
 /******* IO STREAM OPERATIONS *************************************************/
 #define UNALIGNED() ERROR("Attempting to operate on a non-byte aligned stream")
 /// Reads `num` bits from a bitstream, and updates the internal offset
-static inline u64 IO_read_bits(istream_t *const in, const int num) {
-    if (num > 64 || num <= 0) {
+static inline u64 IO_read_bits(istream_t *const in, const int num_bits) {
+    if (num_bits > 64 || num_bits <= 0) {
         ERROR("Attempt to read an invalid number of bits");
     }
 
-    const size_t bytes = (num + in->bit_offset + 7) / 8;
-    const size_t full_bytes = (num + in->bit_offset) / 8;
+    const size_t bytes = (num_bits + in->bit_offset + 7) / 8;
+    const size_t full_bytes = (num_bits + in->bit_offset) / 8;
     if (bytes > in->len) {
         INP_SIZE();
     }
 
-    const u64 result = read_bits_LE(in->ptr, num, in->bit_offset);
+    const u64 result = read_bits_LE(in->ptr, num_bits, in->bit_offset);
 
-    in->bit_offset = (num + in->bit_offset) % 8;
+    in->bit_offset = (num_bits + in->bit_offset) % 8;
     in->ptr += full_bytes;
     in->len -= full_bytes;
 
@@ -1593,16 +1607,21 @@ static inline u64 IO_read_bits(istream_t *const in, const int num) {
 
 /// If a non-zero number of bits have been read from the current byte, advance
 /// the offset to the next byte
-static inline void IO_rewind_bits(istream_t *const in, int num) {
-    if (num < 0) {
+static inline void IO_rewind_bits(istream_t *const in, int num_bits) {
+    if (num_bits < 0) {
         ERROR("Attempting to rewind stream by a negative number of bits");
     }
 
-    const int new_offset = in->bit_offset - num;
-    const i64 bytes = (new_offset - 7) / 8;
+    // move the offset back by `num_bits` bits
+    const int new_offset = in->bit_offset - num_bits;
+    // determine the number of whole bytes we have to rewind, rounding up to an
+    // integer number (e.g. if `new_offset == -5`, `bytes == 1`)
+    const i64 bytes = -(new_offset - 7) / 8;
 
-    in->ptr += bytes;
-    in->len -= bytes;
+    in->ptr -= bytes;
+    in->len += bytes;
+    // make sure the resulting `bit_offset` is positive, as mod in C does not
+    // convert numbers from negative to positive (e.g. -22 % 8 == -6)
     in->bit_offset = ((new_offset % 8) + 8) % 8;
 }
 
@@ -1683,33 +1702,26 @@ static inline ostream_t IO_make_ostream(u8 *out, size_t len) {
 
 /// Returns an `istream_t` constructed from the given pointer and length
 static inline istream_t IO_make_istream(const u8 *in, size_t len) {
-    return (istream_t) { in, 0, len };
+    return (istream_t) { in, len, 0 };
 }
 
 /// Returns an `istream_t` with the same base as `in`, and length `len`
 /// Then, advance `in` to account for the consumed bytes
 /// `in` must be byte aligned
 static inline istream_t IO_make_sub_istream(istream_t *const in, size_t len) {
-    if (len > in->len) {
-        INP_SIZE();
-    }
-    if (in->bit_offset != 0) {
-        UNALIGNED();
-    }
-    const istream_t sub = { in->ptr, in->bit_offset, len };
+    // Consume `len` bytes of the parent stream
+    const u8 *const ptr = IO_read_bytes(in, len);
 
-    in->ptr += len;
-    in->len -= len;
-
-    return sub;
+    // Make a substream using the pointer to those `len` bytes
+    return IO_make_istream(ptr, len);
 }
 /******* END IO STREAM OPERATIONS *********************************************/
 
 /******* BITSTREAM OPERATIONS *************************************************/
 /// Read `num` bits (up to 64) from `src + offset`, where `offset` is in bits
-static inline u64 read_bits_LE(const u8 *src, const int num,
+static inline u64 read_bits_LE(const u8 *src, const int num_bits,
                                const size_t offset) {
-    if (num > 64) {
+    if (num_bits > 64) {
         ERROR("Attempt to read an invalid number of bits");
     }
 
@@ -1719,10 +1731,10 @@ static inline u64 read_bits_LE(const u8 *src, const int num,
     u64 res = 0;
 
     int shift = 0;
-    int left = num;
+    int left = num_bits;
     while (left > 0) {
         u64 mask = left >= 8 ? 0xff : (((u64)1 << left) - 1);
-        // Dead the next byte, shift it to account for the offset, and then mask
+        // Read the next byte, shift it to account for the offset, and then mask
         // out the top part if we don't need all the bits
         res += (((u64)*src++ >> bit_offset) & mask) << shift;
         shift += 8 - bit_offset;
@@ -1761,7 +1773,7 @@ static inline u64 STREAM_read_bits(const u8 *const src, const int bits,
 /******* BIT COUNTING OPERATIONS **********************************************/
 /// Returns `x`, where `2^x` is the largest power of 2 less than or equal to
 /// `num`, or `-1` if `num == 0`.
-static inline int log2inf(const u64 num) {
+static inline int highest_set_bit(const u64 num) {
     for (int i = 63; i >= 0; i--) {
         if (((u64)1 << i) <= num) {
             return i;
@@ -1813,17 +1825,18 @@ static size_t HUF_decompress_1stream(const HUF_dtable *const dtable,
     // final-bit-flag. Consequently, a last byte of 0 is not possible. And the
     // final-bit-flag itself is not part of the useful bitstream. Hence, the
     // last byte contains between 0 and 7 useful bits."
-    const int padding = 8 - log2inf(src[len - 1]);
+    const int padding = 8 - highest_set_bit(src[len - 1]);
 
-    i64 offset = len * 8 - padding;
+    // Offset starts at the end because HUF streams are read backwards
+    i64 bit_offset = len * 8 - padding;
     u16 state;
 
-    HUF_init_state(dtable, &state, src, &offset);
+    HUF_init_state(dtable, &state, src, &bit_offset);
 
     size_t symbols_written = 0;
-    while (offset > -dtable->max_bits) {
+    while (bit_offset > -dtable->max_bits) {
         // Iterate over the stream, decoding one symbol at a time
-        IO_write_byte(out, HUF_decode_symbol(dtable, &state, src, &offset));
+        IO_write_byte(out, HUF_decode_symbol(dtable, &state, src, &bit_offset));
         symbols_written++;
     }
     // "The process continues up to reading the required number of symbols per
@@ -1836,7 +1849,7 @@ static size_t HUF_decompress_1stream(const HUF_dtable *const dtable,
     // before the start of `src`
     // Therefore `offset`, the edge to start reading new bits at, should be
     // dtable->max_bits before the start of the stream
-    if (offset != -dtable->max_bits) {
+    if (bit_offset != -dtable->max_bits) {
         CORRUPTION();
     }
 
@@ -1960,7 +1973,7 @@ static void HUF_init_dtable_usingweights(HUF_dtable *const table,
     }
 
     // Find the first power of 2 larger than the sum
-    const int max_bits = log2inf(weight_sum) + 1;
+    const int max_bits = highest_set_bit(weight_sum) + 1;
     const u64 left_over = ((u64)1 << max_bits) - weight_sum;
     // If the left over isn't a power of 2, the weights are invalid
     if (left_over & (left_over - 1)) {
@@ -1969,7 +1982,7 @@ static void HUF_init_dtable_usingweights(HUF_dtable *const table,
 
     // left_over is used to find the last weight as it's not transmitted
     // by inverting 2^(weight - 1) we can determine the value of last_weight
-    const int last_weight = log2inf(left_over) + 1;
+    const int last_weight = highest_set_bit(left_over) + 1;
 
     for (int i = 0; i < num_symbs; i++) {
         // "Number_of_Bits = Number_of_Bits ? Max_Number_of_Bits + 1 - Weight : 0"
@@ -2063,7 +2076,7 @@ static size_t FSE_decompress_interleaved2(const FSE_dtable *const dtable,
     // final-bit-flag. Consequently, a last byte of 0 is not possible. And the
     // final-bit-flag itself is not part of the useful bitstream. Hence, the
     // last byte contains between 0 and 7 useful bits."
-    const int padding = 8 - log2inf(src[len - 1]);
+    const int padding = 8 - highest_set_bit(src[len - 1]);
     i64 offset = len * 8 - padding;
 
     u16 state1, state2;
@@ -2184,7 +2197,7 @@ static void FSE_init_dtable(FSE_dtable *const dtable,
         u16 next_state_desc = state_desc[symbol]++;
         // Fills in the table appropriately, next_state_desc increases by symbol
         // over time, decreasing number of bits
-        dtable->num_bits[i] = (u8)(accuracy_log - log2inf(next_state_desc));
+        dtable->num_bits[i] = (u8)(accuracy_log - highest_set_bit(next_state_desc));
         // Baseline increases until the bit threshold is passed, at which point
         // it resets to 0
         dtable->new_state_base[i] =
@@ -2235,7 +2248,7 @@ static void FSE_decode_header(FSE_dtable *const dtable, istream_t *const in,
     int symb = 0;
     while (remaining > 0 && symb < FSE_MAX_SYMBS) {
         // Log of the number of possible values we could read
-        int bits = log2inf(remaining + 1) + 1;
+        int bits = highest_set_bit(remaining + 1) + 1;
 
         u16 val = IO_read_bits(in, bits);
 
diff --git a/doc/images/Cspeed4.png b/doc/images/Cspeed4.png
index f0ca0ffba9c4..318204c00e96 100644
--- a/doc/images/Cspeed4.png
+++ b/doc/images/Cspeed4.png
diff --git a/doc/images/Dspeed4.png b/doc/images/Dspeed4.png
index eba485d0d1ed..b7baef1ff3f7 100644
--- a/doc/images/Dspeed4.png
+++ b/doc/images/Dspeed4.png
diff --git a/doc/images/dict-cr.png b/doc/images/dict-cr.png
index f555a46c7b99..f3a9ce2bdda6 100644
--- a/doc/images/dict-cr.png
+++ b/doc/images/dict-cr.png
diff --git a/doc/images/dict-cs.png b/doc/images/dict-cs.png
index ccc02b0d1713..55e5ef518fed 100644
--- a/doc/images/dict-cs.png
+++ b/doc/images/dict-cs.png
diff --git a/doc/images/dict-ds.png b/doc/images/dict-ds.png
index 858cad685509..1153f1b95fd5 100644
--- a/doc/images/dict-ds.png
+++ b/doc/images/dict-ds.png
diff --git a/doc/zstd_compression_format.md b/doc/zstd_compression_format.md
index d4b46548a3d2..1f212fea2305 100644
--- a/doc/zstd_compression_format.md
+++ b/doc/zstd_compression_format.md
@@ -16,7 +16,8 @@ Distribution of this document is unlimited.
 
 ### Version
 
-0.2.4 (17/02/17)
+0.2.5 (31/03/17)
+
 
 Introduction
 ------------
@@ -109,7 +110,7 @@ The structure of a single Zstandard frame is following:
 
 __`Magic_Number`__
 
-4 Bytes, little-endian format.
+4 Bytes, __little-endian__ format.
 Value : 0xFD2FB528
 
 __`Frame_Header`__
@@ -127,7 +128,7 @@ An optional 32-bit checksum, only present if `Content_Checksum_flag` is set.
 The content checksum is the result
 of [xxh64() hash function](http://www.xxhash.org)
 digesting the original (decoded) data as input, and a seed of zero.
-The low 4 bytes of the checksum are stored in little endian format.
+The low 4 bytes of the checksum are stored in __little-endian__ format.
 
 ### `Frame_Header`
 
@@ -154,41 +155,42 @@ Decoding this byte is enough to tell the size of `Frame_Header`.
 | 2          | `Content_Checksum_flag`   |
 | 1-0        | `Dictionary_ID_flag`      |
 
-In this table, bit 7 the is highest bit, while bit 0 the is lowest.
+In this table, bit 7 is the highest bit, while bit 0 is the lowest one.
 
 __`Frame_Content_Size_flag`__
 
 This is a 2-bits flag (`= Frame_Header_Descriptor >> 6`),
-specifying if decompressed data size is provided within the header.
-The `Flag_Value` can be converted into `Field_Size`,
+specifying if `Frame_Content_Size` (the decompressed data size)
+is provided within the header.
+`Flag_Value` provides `FCS_Field_Size`,
 which is the number of bytes used by `Frame_Content_Size`
 according to the following table:
 
-|`Flag_Value`|    0   |  1  |  2  |  3  |
-| ---------- | ------ | --- | --- | --- |
-|`Field_Size`| 0 or 1 |  2  |  4  |  8  |
+|  `Flag_Value`  |    0   |  1  |  2  |  3  |
+| -------------- | ------ | --- | --- | --- |
+|`FCS_Field_Size`| 0 or 1 |  2  |  4  |  8  |
 
-When `Flag_Value` is `0`, `Field_Size` depends on `Single_Segment_flag` :
+When `Flag_Value` is `0`, `FCS_Field_Size` depends on `Single_Segment_flag` :
 if `Single_Segment_flag` is set, `Field_Size` is 1.
-Otherwise, `Field_Size` is 0 (content size not provided).
+Otherwise, `Field_Size` is 0 : `Frame_Content_Size` is not provided.
 
 __`Single_Segment_flag`__
 
 If this flag is set,
 data must be regenerated within a single continuous memory segment.
 
-In this case, `Frame_Content_Size` is necessarily present,
-but `Window_Descriptor` byte is skipped.
+In this case, `Window_Descriptor` byte is skipped,
+but `Frame_Content_Size` is necessarily present.
 As a consequence, the decoder must allocate a memory segment
 of size equal or bigger than `Frame_Content_Size`.
 
 In order to preserve the decoder from unreasonable memory requirements,
-a decoder can reject a compressed frame
+a decoder is allowed to reject a compressed frame
 which requests a memory size beyond decoder's authorized range.
 
 For broader compatibility, decoders are recommended to support
 memory sizes of at least 8 MB.
-This is just a recommendation,
+This is only a recommendation,
 each decoder is free to support higher or lower limits,
 depending on local limitations.
 
@@ -224,37 +226,38 @@ It also specifies the size of this field as `Field_Size`.
 
 #### `Window_Descriptor`
 
-Provides guarantees on maximum back-reference distance
-that will be used within compressed data.
+Provides guarantees on minimum memory buffer required to decompress a frame.
 This information is important for decoders to allocate enough memory.
 
-The `Window_Descriptor` byte is optional. It is absent when `Single_Segment_flag` is set.
-In this case, the maximum back-reference distance is the content size itself,
-which can be any value from 1 to 2^64-1 bytes (16 EB).
+The `Window_Descriptor` byte is optional.
+When `Single_Segment_flag` is set, `Window_Descriptor` is not present.
+In this case, `Window_Size` is `Frame_Content_Size`,
+which can be any value from 0 to 2^64-1 bytes (16 ExaBytes).
 
 | Bit numbers |     7-3    |     2-0    |
 | ----------- | ---------- | ---------- |
 | Field name  | `Exponent` | `Mantissa` |
 
-Maximum distance is given by the following formulas :
+The minimum memory buffer size is called `Window_Size`.
+It is described by the following formulas :
 ```
 windowLog = 10 + Exponent;
 windowBase = 1 << windowLog;
 windowAdd = (windowBase / 8) * Mantissa;
 Window_Size = windowBase + windowAdd;
 ```
-The minimum window size is 1 KB.
-The maximum size is `15*(1<<38)` bytes, which is 1.875 TB.
+The minimum `Window_Size` is 1 KB.
+The maximum `Window_Size` is `(1<<41) + 7*(1<<38)` bytes, which is 3.75 TB.
 
 To properly decode compressed data,
 a decoder will need to allocate a buffer of at least `Window_Size` bytes.
 
 In order to preserve decoder from unreasonable memory requirements,
-a decoder can refuse a compressed frame
+a decoder is allowed to reject a compressed frame
 which requests a memory size beyond decoder's authorized range.
 
 For improved interoperability,
-decoders are recommended to be compatible with window sizes of 8 MB,
+decoders are recommended to be compatible with `Window_Size >= 8 MB`,
 and encoders are recommended to not request more than 8 MB.
 It's merely a recommendation though,
 decoders are free to support larger or lower limits,
@@ -264,112 +267,118 @@ depending on local limitations.
 
 This is a variable size field, which contains
 the ID of the dictionary required to properly decode the frame.
-Note that this field is optional. When it's not present,
+`Dictionary_ID` field is optional. When it's not present,
 it's up to the decoder to make sure it uses the correct dictionary.
-Format is little-endian.
 
 Field size depends on `Dictionary_ID_flag`.
 1 byte can represent an ID 0-255.
 2 bytes can represent an ID 0-65535.
 4 bytes can represent an ID 0-4294967295.
+Format is __little-endian__.
 
 It's allowed to represent a small ID (for example `13`)
-with a large 4-bytes dictionary ID, losing some compacity in the process.
+with a large 4-bytes dictionary ID, even if it is less efficient.
 
 _Reserved ranges :_
 If the frame is going to be distributed in a private environment,
 any dictionary ID can be used.
 However, for public distribution of compressed frames using a dictionary,
-the following ranges are reserved for future use and should not be used :
-- low range : 1 - 32767
-- high range : >= (2^31)
-
+the following ranges are reserved and shall not be used :
+- low range  : `<= 32767`
+- high range : `>= (1 << 31)`
 
 #### `Frame_Content_Size`
 
 This is the original (uncompressed) size. This information is optional.
-The `Field_Size` is provided according to value of `Frame_Content_Size_flag`.
-The `Field_Size` can be equal to 0 (not present), 1, 2, 4 or 8 bytes.
-Format is little-endian.
-
-| `Field_Size` |    Range   |
-| ------------ | ---------- |
-|      1       |   0 - 255  |
-|      2       | 256 - 65791|
-|      4       | 0 - 2^32-1 |
-|      8       | 0 - 2^64-1 |
-
-When `Field_Size` is 1, 4 or 8 bytes, the value is read directly.
-When `Field_Size` is 2, _the offset of 256 is added_.
+`Frame_Content_Size` uses a variable number of bytes, provided by `FCS_Field_Size`.
+`FCS_Field_Size` is provided by the value of `Frame_Content_Size_flag`.
+`FCS_Field_Size` can be equal to 0 (not present), 1, 2, 4 or 8 bytes.
+
+| `FCS_Field_Size` |    Range   |
+| ---------------- | ---------- |
+|        0         |   unknown  |
+|        1         |   0 - 255  |
+|        2         | 256 - 65791|
+|        4         | 0 - 2^32-1 |
+|        8         | 0 - 2^64-1 |
+
+`Frame_Content_Size` format is __little-endian__.
+When `FCS_Field_Size` is 1, 4 or 8 bytes, the value is read directly.
+When `FCS_Field_Size` is 2, _the offset of 256 is added_.
 It's allowed to represent a small size (for example `18`) using any compatible variant.
 
+
 Blocks
 -------
-After the magic number and header of each block,
-there are some number of blocks.
-Each frame must have at least one block  but there is no upper limit
-on the number of blocks per frame.
+
+After `Magic_Number` and `Frame_Header`, there are some number of blocks.
+Each frame must have at least one block,
+but there is no upper limit on the number of blocks per frame.
 
 The structure of a block is as follows:
 
-| `Last_Block` | `Block_Type` | `Block_Size` | `Block_Content` |
-|:------------:|:------------:|:------------:|:---------------:|
-|   1 bit      |  2 bits      |  21 bits     |  n bytes        |
+| `Block_Header` | `Block_Content` |
+|:--------------:|:---------------:|
+|    3 bytes     |     n bytes     |
+
+`Block_Header` uses 3 bytes, written using __little-endian__ convention.
+It contains 3 fields :
 
-The block header (`Last_Block`, `Block_Type`, and `Block_Size`) uses 3-bytes.
+| `Last_Block` | `Block_Type` | `Block_Size` |
+|:------------:|:------------:|:------------:|
+|    bit 0     |  bits 1-2    |  bits 3-23   |
 
 __`Last_Block`__
 
 The lowest bit signals if this block is the last one.
-The frame will end after this one.
+The frame will end after this last block.
 It may be followed by an optional `Content_Checksum`
 (see [Zstandard Frames](#zstandard-frames)).
 
-__`Block_Type` and `Block_Size`__
-
-The next 2 bits represent the `Block_Type`,
-while the remaining 21 bits represent the `Block_Size`.
-Format is __little-endian__.
+__`Block_Type`__
 
+The next 2 bits represent the `Block_Type`.
 There are 4 block types :
 
-|    Value     |      0      |     1       |  2                 |    3      |
+|    Value     |      0      |      1      |         2          |     3     |
 | ------------ | ----------- | ----------- | ------------------ | --------- |
 | `Block_Type` | `Raw_Block` | `RLE_Block` | `Compressed_Block` | `Reserved`|
 
 - `Raw_Block` - this is an uncompressed block.
-  `Block_Content` contains `Block_Size` bytes to read and copy
-  as decoded data.
+  `Block_Content` contains `Block_Size` bytes.
 
-- `RLE_Block` - this is a single byte, repeated N times.
-  `Block_Content` consists of a single byte,
-  and `Block_Size` is the number of times this byte should be repeated.
+- `RLE_Block` - this is a single byte, repeated `Block_Size` times.
+  `Block_Content` consists of a single byte.
+  On the decompression side, this byte must be repeated `Block_Size` times.
 
 - `Compressed_Block` - this is a [Zstandard compressed block](#compressed-blocks),
   explained later on.
   `Block_Size` is the length of `Block_Content`, the compressed data.
-  The decompressed size is unknown,
+  The decompressed size is not known,
   but its maximum possible value is guaranteed (see below)
 
 - `Reserved` - this is not a block.
   This value cannot be used with current version of this specification.
 
+__`Block_Size`__
+
+The upper 21 bits of `Block_Header` represent the `Block_Size`.
+
 Block sizes must respect a few rules :
-- In compressed mode, compressed size is always strictly less than decompressed size.
-- Block decompressed size is always <= maximum back-reference distance.
+- For `Compressed_Block`, `Block_Size` is always strictly less than decompressed size.
+- Block decompressed size is always <= `Window_Size`
 - Block decompressed size is always <= 128 KB.
 
-A data block is not necessarily "full" :
-since an arbitrary “flush” may happen anytime,
-block decompressed content can be any size (even empty),
+A block can contain any number of bytes (even empty),
 up to `Block_Maximum_Decompressed_Size`, which is the smallest of :
-- Maximum back-reference distance
+- `Window_Size`
 - 128 KB
 
+
 Compressed Blocks
 -----------------
-To decompress a compressed block, the compressed size must be provided from
-`Block_Size` field in the block header.
+To decompress a compressed block, the compressed size must be provided
+from `Block_Size` field within `Block_Header`.
 
 A compressed block consists of 2 sections :
 - [Literals Section](#literals-section)
@@ -381,36 +390,34 @@ data in [Sequence Execution](#sequence-execution)
 #### Prerequisites
 To decode a compressed block, the following elements are necessary :
 - Previous decoded data, up to a distance of `Window_Size`,
-  or all previous data when `Single_Segment_flag` is set.
-- List of "recent offsets" from the previous compressed block.
-- Decoding tables of the previous compressed block for each symbol type
+  or all previously decoded data when `Single_Segment_flag` is set.
+- List of "recent offsets" from previous `Compressed_Block`.
+- Decoding tables of previous `Compressed_Block` for each symbol type
   (literals, literals lengths, match lengths, offsets).
 
 Literals Section
 ----------------
-During sequence execution, symbols from the literals section
-During sequence phase, literals will be entangled with match copy operations.
 All literals are regrouped in the first part of the block.
-They can be decoded first, and then copied during sequence operations,
-or they can be decoded on the flow, as needed by sequence commands.
-
-| `Literals_Section_Header` | [`Huffman_Tree_Description`] | Stream1 | [Stream2] | [Stream3] | [Stream4] |
-| ------------------------- | ---------------------------- | ------- | --------- | --------- | --------- |
+They can be decoded first, and then copied during [Sequence Execution],
+or they can be decoded on the flow during [Sequence Execution].
 
 Literals can be stored uncompressed or compressed using Huffman prefix codes.
 When compressed, an optional tree description can be present,
 followed by 1 or 4 streams.
 
+| `Literals_Section_Header` | [`Huffman_Tree_Description`] | Stream1 | [Stream2] | [Stream3] | [Stream4] |
+| ------------------------- | ---------------------------- | ------- | --------- | --------- | --------- |
+
 
 #### `Literals_Section_Header`
 
 Header is in charge of describing how literals are packed.
 It's a byte-aligned variable-size bitfield, ranging from 1 to 5 bytes,
-using little-endian convention.
+using __little-endian__ convention.
 
 | `Literals_Block_Type` | `Size_Format` | `Regenerated_Size` | [`Compressed_Size`] |
-| --------------------- | ------------- | ------------------ | ----------------- |
-|   2 bits              |  1 - 2 bits   |    5 - 20 bits     |    0 - 18 bits    |
+| --------------------- | ------------- | ------------------ | ------------------- |
+|       2 bits          |  1 - 2 bits   |    5 - 20 bits     |     0 - 18 bits     |
 
 In this representation, bits on the left are the lowest bits.
 
@@ -418,33 +425,38 @@ __`Literals_Block_Type`__
 
 This field uses 2 lowest bits of first byte, describing 4 different block types :
 
-| `Literals_Block_Type`         | Value |
-| ----------------------------- | ----- |
-| `Raw_Literals_Block`          |   0   |
-| `RLE_Literals_Block`          |   1   |
-| `Compressed_Literals_Block`   |   2   |
-| `Repeat_Stats_Literals_Block` |   3   |
+| `Literals_Block_Type`       | Value |
+| --------------------------- | ----- |
+| `Raw_Literals_Block`        |   0   |
+| `RLE_Literals_Block`        |   1   |
+| `Compressed_Literals_Block` |   2   |
+| `Treeless_Literals_Block`   |   3   |
 
 - `Raw_Literals_Block` - Literals are stored uncompressed.
-- `RLE_Literals_Block` - Literals consist of a single byte value repeated N times.
+- `RLE_Literals_Block` - Literals consist of a single byte value
+        repeated `Regenerated_Size` times.
 - `Compressed_Literals_Block` - This is a standard Huffman-compressed block,
         starting with a Huffman tree description.
         See details below.
-- `Repeat_Stats_Literals_Block` - This is a Huffman-compressed block,
+- `Treeless_Literals_Block` - This is a Huffman-compressed block,
         using Huffman tree _from previous Huffman-compressed literals block_.
-        Huffman tree description will be skipped.
-        Note: If this mode is used without any previous Huffman-table in the frame
-        (or [dictionary](#dictionary-format)), this should be treated as corruption.
+        `Huffman_Tree_Description` will be skipped.
+        Note: If this mode is triggered without any previous Huffman-table in the frame
+        (or [dictionary](#dictionary-format)), this should be treated as data corruption.
 
 __`Size_Format`__
 
 `Size_Format` is divided into 2 families :
 
-- For `Raw_Literals_Block` and `RLE_Literals_Block` it's enough to decode `Regenerated_Size`.
-- For `Compressed_Block`, its required to decode both `Compressed_Size`
-  and `Regenerated_Size` (the decompressed size). It will also decode the number of streams.
+- For `Raw_Literals_Block` and `RLE_Literals_Block`,
+  it's only necessary to decode `Regenerated_Size`.
+  There is no `Compressed_Size` field.
+- For `Compressed_Block` and `Treeless_Literals_Block`,
+  it's required to decode both `Compressed_Size`
+  and `Regenerated_Size` (the decompressed size).
+  It's also necessary to decode the number of streams (1 or 4).
 
-For values spanning several bytes, convention is little-endian.
+For values spanning several bytes, convention is __little-endian__.
 
 __`Size_Format` for `Raw_Literals_Block` and `RLE_Literals_Block`__ :
 
@@ -463,9 +475,9 @@ __`Size_Format` for `Raw_Literals_Block` and `RLE_Literals_Block`__ :
 
 Only Stream1 is present for these cases.
 Note : it's allowed to represent a short value (for example `13`)
-using a long format, accepting the increased compressed data size.
+using a long format, even if it's less efficient.
 
-__`Size_Format` for `Compressed_Literals_Block` and `Repeat_Stats_Literals_Block`__ :
+__`Size_Format` for `Compressed_Literals_Block` and `Treeless_Literals_Block`__ :
 
 - Value 00 : _A single stream_.
                Both `Regenerated_Size` and `Compressed_Size` use 10 bits (0-1023).
@@ -480,67 +492,68 @@ __`Size_Format` for `Compressed_Literals_Block` and `Repeat_Stats_Literals_Block
                Both `Regenerated_Size` and `Compressed_Size` use 18 bits (0-262143).
                `Literals_Section_Header` has 5 bytes.
 
-Both `Compressed_Size` and `Regenerated_Size` fields follow little-endian convention.
-Note: `Compressed_Size` __includes__ the size of the Huffman Tree description if it
-is present.
+Both `Compressed_Size` and `Regenerated_Size` fields follow __little-endian__ convention.
+Note: `Compressed_Size` __includes__ the size of the Huffman Tree description
+_when_ it is present.
 
 ### Raw Literals Block
-The data in Stream1 is `Regenerated_Size` bytes long, and contains the raw literals data
-to be used in sequence execution.
+The data in Stream1 is `Regenerated_Size` bytes long,
+it contains the raw literals data to be used during [Sequence Execution].
 
 ### RLE Literals Block
 Stream1 consists of a single byte which should be repeated `Regenerated_Size` times
 to generate the decoded literals.
 
-### Compressed Literals Block and Repeat Stats Literals Block
-Both of these modes contain Huffman encoded data 
+### Compressed Literals Block and Treeless Literals Block
+Both of these modes contain Huffman encoded data.
+`Treeless_Literals_Block` does not have a `Huffman_Tree_Description`.
 
 #### `Huffman_Tree_Description`
 This section is only present when `Literals_Block_Type` type is `Compressed_Literals_Block` (`2`).
 The format of the Huffman tree description can be found at [Huffman Tree description](#huffman-tree-description).
-The size Huffman Tree description will be determined during the decoding process,
-and must be used to determine where the compressed Huffman streams begin.
+The size of `Huffman_Tree_Description` is determined during decoding process,
+it must be used to determine where streams begin.
+`Total_Streams_Size = Compressed_Size - Huffman_Tree_Description_Size`.
 
-If repeat stats mode is used, the Huffman table used in the previous compressed block will
-be used to decompress this block as well.
+For `Treeless_Literals_Block`,
+the Huffman table comes from previously compressed literals block.
 
-Huffman compressed data consists either 1 or 4 Huffman-coded streams.
+Huffman compressed data consists of either 1 or 4 Huffman-coded streams.
 
 If only one stream is present, it is a single bitstream occupying the entire
-remaining portion of the literals block, encoded as described at
+remaining portion of the literals block, encoded as described within
 [Huffman-Coded Streams](#huffman-coded-streams).
 
 If there are four streams, the literals section header only provides enough
-information to know the regenerated and compressed sizes of all four streams combined.
-The regenerated size of each stream is equal to `(totalSize+3)/4`, except for the last stream,
-which may be up to 3 bytes smaller, to reach a total decompressed size match that described
-in the literals header.
+information to know the decompressed and compressed sizes of all four streams _combined_.
+The decompressed size of each stream is equal to `(Regenerated_Size+3)/4`,
+except for the last stream which may be up to 3 bytes smaller,
+to reach a total decompressed size as specified in `Regenerated_Size`.
 
-The compressed size of each stream is provided explicitly: the first 6 bytes of the compressed
-data consist of three 2-byte little endian fields, describing the compressed sizes
-of the first three streams.
-The last streams size is computed from the total compressed size and the size of the other
-three streams.
+The compressed size of each stream is provided explicitly:
+the first 6 bytes of the compressed data consist of three 2-byte __little-endian__ fields,
+describing the compressed sizes of the first three streams.
+`Stream4_Size` is computed from total `Total_Streams_Size` minus sizes of other streams.
 
-`stream4CSize = totalCSize - 6 - stream1CSize - stream2CSize - stream3CSize`.
+`Stream4_Size = Total_Streams_Size - 6 - Stream1_Size - Stream2_Size - Stream3_Size`.
 
-Note: remember that totalCSize may be smaller than the `Compressed_Size` found in the literals
-block header as `Compressed_Size` also contains the size of the Huffman Tree description if it
-is present.
+Note: remember that `Total_Streams_Size` can be smaller than `Compressed_Size` in header,
+because `Compressed_Size` also contains `Huffman_Tree_Description_Size` when it is present.
 
 Each of these 4 bitstreams is then decoded independently as a Huffman-Coded stream,
 as described at [Huffman-Coded Streams](#huffman-coded-streams)
 
+
 Sequences Section
 -----------------
 A compressed block is a succession of _sequences_ .
 A sequence is a literal copy command, followed by a match copy command.
 A literal copy command specifies a length.
-It is the number of bytes to be copied (or extracted) from the literal section.
+It is the number of bytes to be copied (or extracted) from the Literals Section.
 A match copy command specifies an offset and a length.
 
 When all _sequences_ are decoded,
-if there is are any literals left in the _literal section_,
+if there are literals left in the _literal section_,
 these bytes are added at the end of the block.
 
 This is described in more detail in [Sequence Execution](#sequence-execution)
@@ -557,7 +570,7 @@ followed by the bitstream.
 | -------------------------- | ------------------------- | ---------------- | ---------------------- | --------- |
 
 To decode the `Sequences_Section`, it's required to know its size.
-This size is deduced from `blockSize - literalSectionSize`.
+This size is deduced from `Block_Size - Literals_Section_Size`.
 
 
 #### `Sequences_Section_Header`
@@ -572,7 +585,7 @@ This is a variable size field using between 1 and 3 bytes.
 Let's call its first byte `byte0`.
 - `if (byte0 == 0)` : there are no sequences.
             The sequence section stops there.
-            Regenerated content is defined entirely by literals section.
+            Decompressed content is defined entirely as Literals Section content.
 - `if (byte0 < 128)` : `Number_of_Sequences = byte0` . Uses 1 byte.
 - `if (byte0 < 255)` : `Number_of_Sequences = ((byte0-128) << 8) + byte1` . Uses 2 bytes.
 - `if (byte0 == 255)`: `Number_of_Sequences = byte1 + (byte2<<8) + 0x7F00` . Uses 3 bytes.
@@ -581,14 +594,14 @@ __Symbol compression modes__
 
 This is a single byte, defining the compression mode of each symbol type.
 
-|Bit number|   7-6                   |   5-4          |   3-2                |     1-0    |
+|Bit number|          7-6            |      5-4       |        3-2           |     1-0    |
 | -------- | ----------------------- | -------------- | -------------------- | ---------- |
 |Field name| `Literals_Lengths_Mode` | `Offsets_Mode` | `Match_Lengths_Mode` | `Reserved` |
 
 The last field, `Reserved`, must be all-zeroes.
 
 `Literals_Lengths_Mode`, `Offsets_Mode` and `Match_Lengths_Mode` define the `Compression_Mode` of
-literals lengths, offsets, and match lengths respectively.
+literals lengths, offsets, and match lengths symbols respectively.
 
 They follow the same enumeration :
 
@@ -598,17 +611,17 @@ They follow the same enumeration :
 
 - `Predefined_Mode` : A predefined FSE distribution table is used, defined in
           [default distributions](#default-distributions).
-          The table takes no space in the compressed data.
+          No distribution table will be present.
 - `RLE_Mode` : The table description consists of a single byte.
-          This code will be repeated for every sequence.
+          This code will be repeated for all sequences.
 - `Repeat_Mode` : The table used in the previous compressed block will be used again.
           No distribution table will be present.
-          Note: this includes RLE mode, so if repeat_mode follows rle_mode the same symbol will be repeated.
+          Note: this includes RLE mode, so if `Repeat_Mode` follows `RLE_Mode`, the same symbol will be repeated.
           If this mode is used without any previous sequence table in the frame
           (or [dictionary](#dictionary-format)) to repeat, this should be treated as corruption.
 - `FSE_Compressed_Mode` : standard FSE compression.
           A distribution table will be present.
-          The format of this distribution table is described in (FSE Table Description)[#fse-table-description].
+          The format of this distribution table is described in [FSE Table Description](#fse-table-description).
           Note that the maximum allowed accuracy log for literals length and match length tables is 9,
           and the maximum accuracy log for the offsets table is 8.
 
@@ -625,7 +638,7 @@ Literals length codes are values ranging from `0` to `35` included.
 They define lengths from 0 to 131071 bytes.
 The literals length is equal to the decoded `Baseline` plus
 the result of reading `Number_of_Bits` bits from the bitstream,
-as a little-endian value.
+as a __little-endian__ value.
 
 | `Literals_Length_Code` |         0-15           |
 | ---------------------- | ---------------------- |
@@ -654,7 +667,7 @@ Match length codes are values ranging from `0` to `52` included.
 They define lengths from 3 to 131074 bytes.
 The match length is equal to the decoded `Baseline` plus
 the result of reading `Number_of_Bits` bits from the bitstream,
-as a little-endian value.
+as a __little-endian__ value.
 
 | `Match_Length_Code` |         0-31            |
 | ------------------- | ----------------------- |
@@ -685,7 +698,7 @@ Recommendation is to support at least up to `22`.
 For information, at the time of this writing.
 the reference decoder supports a maximum `N` value of `28` in 64-bits mode.
 
-An offset code is also the number of additional bits to read in little-endian fashion,
+An offset code is also the number of additional bits to read in __little-endian__ fashion,
 and can be translated into an `Offset_Value` using the following formulas :
 
 ```
@@ -720,8 +733,8 @@ begins.
 FSE decoding requires a 'state' to be carried from symbol to symbol.
 For more explanation on FSE decoding, see the [FSE section](#fse).
 
-For sequence decoding, a separate state must be kept track of for each of
-literal lengths, offsets, and match lengths.
+For sequence decoding, a separate state keeps track of each
+literal lengths, offsets, and match lengths symbols.
 Some FSE primitives are also used.
 For more details on the operation of these primitives, see the [FSE section](#fse).
 
@@ -753,8 +766,7 @@ See the [description of the codes] for how to determine these values.
 [description of the codes]: #the-codes-for-literals-lengths-match-lengths-and-offsets
 
 Decoding starts by reading the `Number_of_Bits` required to decode `Offset`.
-It then does the same for `Match_Length`,
-and then for `Literals_Length`.
+It then does the same for `Match_Length`, and then for `Literals_Length`.
 This sequence is then used for [sequence execution](#sequence-execution).
 
 If it is not the last sequence in the block,
@@ -807,6 +819,7 @@ short offsetCodes_defaultDistribution[29] =
           1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1 };
 ```
 
+
 Sequence Execution
 ------------------
 Once literals and sequences have been decoded,
@@ -826,7 +839,8 @@ in this case.
 
 The offset is defined as from the current position, so an offset of 6
 and a match length of 3 means that 3 bytes should be copied from 6 bytes back.
-Note that all offsets must be at most equal to the window size defined by the frame header.
+Note that all offsets leading to previously decoded data
+must be smaller than `Window_Size` defined in `Frame_Header_Descriptor`.
 
 #### Repeat offsets
 As seen in [Sequence Execution](#sequence-execution),
@@ -842,11 +856,10 @@ so an `offset_value` of 1 means `Repeated_Offset2`,
 an `offset_value` of 2 means `Repeated_Offset3`,
 and an `offset_value` of 3 means `Repeated_Offset1 - 1_byte`.
 
-In the first block, the offset history is populated with the following values : 1, 4 and 8 (in order).
+For the first block, the starting offset history is populated with the following values : 1, 4 and 8 (in order).
 
-Then each block gets its starting offset history from the ending values of the most recent compressed block.
-Note that non-compressed blocks are skipped,
-they do not contribute to offset history.
+Then each block gets its starting offset history from the ending values of the most recent `Compressed_Block`.
+Note that blocks which are not `Compressed_Block` are skipped, they do not contribute to offset history.
 
 [Offset Codes]: #offset-codes
 
@@ -859,6 +872,7 @@ This means that when `Repeated_Offset1` (most recent) is used, history is unmodi
 When `Repeated_Offset2` is used, it's swapped with `Repeated_Offset1`.
 If any other offset is used, it becomes `Repeated_Offset1` and the rest are shift back by one.
 
+
 Skippable Frames
 ----------------
 
@@ -878,7 +892,7 @@ Skippable frames defined in this specification are compatible with [LZ4] ones.
 
 __`Magic_Number`__
 
-4 Bytes, little-endian format.
+4 Bytes, __little-endian__ format.
 Value : 0x184D2A5?, which means any value from 0x184D2A50 to 0x184D2A5F.
 All 16 values are valid to identify a skippable frame.
 
@@ -886,13 +900,14 @@ __`Frame_Size`__
 
 This is the size, in bytes, of the following `User_Data`
 (without including the magic number nor the size field itself).
-This field is represented using 4 Bytes, little-endian format, unsigned 32-bits.
+This field is represented using 4 Bytes, __little-endian__ format, unsigned 32-bits.
 This means `User_Data` can’t be bigger than (2^32-1) bytes.
 
 __`User_Data`__
 
 The `User_Data` can be anything. Data will just be skipped by the decoder.
 
+
 Entropy Encoding
 ----------------
 Two types of entropy encoding are used by the Zstandard format:
@@ -900,7 +915,7 @@ FSE, and Huffman coding.
 
 FSE
 ---
-FSE, or FiniteStateEntropy is an entropy coding based on [ANS].
+FSE, short for Finite State Entropy, is an entropy codec based on [ANS].
 FSE encoding/decoding involves a state that is carried over between symbols,
 so decoding must be done in the opposite direction as encoding.
 Therefore, all FSE bitstreams are read from end to beginning.
@@ -909,15 +924,15 @@ For additional details on FSE, see [Finite State Entropy].
 
 [Finite State Entropy]:https://github.com/Cyan4973/FiniteStateEntropy/
 
-FSE decoding involves a decoding table which has a power of 2 size and three elements:
+FSE decoding involves a decoding table which has a power of 2 size, and contain three elements:
 `Symbol`, `Num_Bits`, and `Baseline`.
 The `log2` of the table size is its `Accuracy_Log`.
 The FSE state represents an index in this table.
-The next symbol in the stream is the symbol indicated by the table value for that state.
-To obtain the next state value,
-the decoder should consume `Num_Bits` bits from the stream as a little endian value and add it to baseline.
 
-To obtain the initial state value, consume `Accuracy_Log` bits from the stream as a little endian value.
+To obtain the initial state value, consume `Accuracy_Log` bits from the stream as a __little-endian__ value.
+The next symbol in the stream is the `Symbol` indicated in the table for that state.
+To obtain the next state value,
+the decoder should consume `Num_Bits` bits from the stream as a __little-endian__ value and add it to `Baseline`.
 
 [ANS]: https://en.wikipedia.org/wiki/Asymmetric_Numeral_Systems
 
@@ -929,7 +944,7 @@ An FSE distribution table describes the probabilities of all symbols
 from `0` to the last present one (included)
 on a normalized scale of `1 << Accuracy_Log` .
 
-It's a bitstream which is read forward, in little-endian fashion.
+It's a bitstream which is read forward, in __little-endian__ fashion.
 It's not necessary to know its exact size,
 since it will be discovered and reported by the decoding process.
 
@@ -1064,7 +1079,7 @@ Huffman Coding
 --------------
 Zstandard Huffman-coded streams are read backwards,
 similar to the FSE bitstreams.
-Therefore, to find the start of the bitstream it is therefore necessary to
+Therefore, to find the start of the bitstream, it is therefore to
 know the offset of the last byte of the Huffman-coded stream.
 
 After writing the last bit containing information, the compressor
@@ -1077,7 +1092,7 @@ byte to read. The decompressor needs to skip 0-7 initial `0`-bits and
 the first `1`-bit it occurs. Afterwards, the useful part of the bitstream
 begins.
 
-The bitstream contains Huffman-coded symbols in little-endian order,
+The bitstream contains Huffman-coded symbols in __little-endian__ order,
 with the codes defined by the method below.
 
 ### Huffman Tree Description
@@ -1182,14 +1197,14 @@ The Huffman header compression uses 2 states,
 which share the same FSE distribution table.
 The first state (`State1`) encodes the even indexed symbols,
 and the second (`State2`) encodes the odd indexes.
-State1 is initialized first, and then State2, and they take turns decoding
-a single symbol and updating their state.
+`State1` is initialized first, and then `State2`, and they take turns
+decoding a single symbol and updating their state.
 For more details on these FSE operations, see the [FSE section](#fse).
 
 The number of symbols to decode is determined
 by tracking bitStream overflow condition:
 If updating state after decoding a symbol would require more bits than
-remain in the stream, it is assumed the extra bits are 0.  Then,
+remain in the stream, it is assumed that extra bits are 0.  Then,
 the symbols for each of the final states are decoded and the process is complete.
 
 ##### Conversion from weights to Huffman prefix codes
@@ -1245,7 +1260,7 @@ it would be encoded as:
 |Encoding|`0000`|`0001`|`01`|`1`| `10000` |
 
 Starting from the end,
-it's possible to read the bitstream in a little-endian fashion,
+it's possible to read the bitstream in a __little-endian__ fashion,
 keeping track of already used bits.  Since the bitstream is encoded in reverse
 order, by starting at the end the symbols can be read in forward order.
 
@@ -1258,13 +1273,14 @@ If a bitstream is not entirely and exactly consumed,
 hence reaching exactly its beginning position with _all_ bits consumed,
 the decoding process is considered faulty.
 
+
 Dictionary Format
 -----------------
 
-Zstandard is compatible with "raw content" dictionaries, free of any format restriction,
-except that they must be at least 8 bytes.
-These dictionaries function as if they were just the `Content` block of a formatted
-dictionary.
+Zstandard is compatible with "raw content" dictionaries,
+free of any format restriction, except that they must be at least 8 bytes.
+These dictionaries function as if they were just the `Content` part
+of a formatted dictionary.
 
 But dictionaries created by `zstd --train` follow a format, described here.
 
@@ -1274,9 +1290,9 @@ __Pre-requisites__ : a dictionary has a size,
 | `Magic_Number` | `Dictionary_ID` | `Entropy_Tables` | `Content` |
 | -------------- | --------------- | ---------------- | --------- |
 
-__`Magic_Number`__ : 4 bytes ID, value 0xEC30A437, little-endian format
+__`Magic_Number`__ : 4 bytes ID, value 0xEC30A437, __little-endian__ format
 
-__`Dictionary_ID`__ : 4 bytes, stored in little-endian format.
+__`Dictionary_ID`__ : 4 bytes, stored in __little-endian__ format.
               `Dictionary_ID` can be any value, except 0 (which means no `Dictionary_ID`).
               It's used by decoders to check if they use the correct dictionary.
 
@@ -1284,9 +1300,9 @@ _Reserved ranges :_
               If the frame is going to be distributed in a private environment,
               any `Dictionary_ID` can be used.
               However, for public distribution of compressed frames,
-              the following ranges are reserved for future use and should not be used :
+              the following ranges are reserved and shall not be used :
 
-              - low range : 1 - 32767
+              - low range  : <= 32767
               - high range : >= (2^31)
 
 __`Entropy_Tables`__ : following the same format as the tables in compressed blocks.
@@ -1298,26 +1314,30 @@ __`Entropy_Tables`__ : following the same format as the tables in compressed blo
               These tables populate the Repeat Stats literals mode and
               Repeat distribution mode for sequence decoding.
               It's finally followed by 3 offset values, populating recent offsets (instead of using `{1,4,8}`),
-              stored in order, 4-bytes little-endian each, for a total of 12 bytes.
+              stored in order, 4-bytes __little-endian__ each, for a total of 12 bytes.
               Each recent offset must have a value < dictionary size.
 
 __`Content`__ : The rest of the dictionary is its content.
               The content act as a "past" in front of data to compress or decompress,
               so it can be referenced in sequence commands.
               As long as the amount of data decoded from this frame is less than or
-              equal to the window-size, sequence commands may specify offsets longer
-              than the lenght of total decoded output so far to reference back to the
-              dictionary.  After the total output has surpassed the window size however,
+              equal to `Window_Size`, sequence commands may specify offsets longer
+              than the total length of decoded output so far to reference back to the
+              dictionary.  After the total output has surpassed `Window_Size` however,
               this is no longer allowed and the dictionary is no longer accessible.
 
 [compressed blocks]: #the-format-of-compressed_block
+
+
 Appendix A - Decoding tables for predefined codes
 -------------------------------------------------
 
-This appendix contains FSE decoding tables for the predefined literal length, match length, and offset
-codes. The tables have been constructed using the algorithm as given above in the
-"from normalized distribution to decoding tables" chapter. The tables here can be used as examples
-to crosscheck that an implementation implements the decoding table generation algorithm correctly.
+This appendix contains FSE decoding tables
+for the predefined literal length, match length, and offset codes.
+The tables have been constructed using the algorithm as given above in chapter
+"from normalized distribution to decoding tables".
+The tables here can be used as examples
+to crosscheck that an implementation build its decoding tables correctly.
 
 #### Literal Length Code:
 
@@ -1496,6 +1516,7 @@ to crosscheck that an implementation implements the decoding table generation al
 
 Version changes
 ---------------
+- 0.2.5 : minor typos and clarifications
 - 0.2.4 : section restructuring, by Sean Purcell
 - 0.2.3 : clarified several details, by Sean Purcell
 - 0.2.2 : added predefined codes, by Johannes Rudolph
diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html
index 204f56ea5f21..2e77e7742f60 100644
--- a/doc/zstd_manual.html
+++ b/doc/zstd_manual.html
@@ -1,10 +1,10 @@
 <html>
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
-<title>zstd 1.1.4 Manual</title>
+<title>zstd 1.2.0 Manual</title>
 </head>
 <body>
-<h1>zstd 1.1.4 Manual</h1>
+<h1>zstd 1.2.0 Manual</h1>
 <hr>
 <a name="Contents"></a><h2>Contents</h2>
 <ol>
@@ -57,46 +57,46 @@
 <pre><b>size_t ZSTD_compress( void* dst, size_t dstCapacity,
                 const void* src, size_t srcSize,
                       int compressionLevel);
-</b><p>    Compresses `src` content as a single zstd compressed frame into already allocated `dst`.
-    Hint : compression runs faster if `dstCapacity` >=  `ZSTD_compressBound(srcSize)`.
-    @return : compressed size written into `dst` (<= `dstCapacity),
-              or an error code if it fails (which can be tested using ZSTD_isError()). 
+</b><p>  Compresses `src` content as a single zstd compressed frame into already allocated `dst`.
+  Hint : compression runs faster if `dstCapacity` >=  `ZSTD_compressBound(srcSize)`.
+  @return : compressed size written into `dst` (<= `dstCapacity),
+            or an error code if it fails (which can be tested using ZSTD_isError()). 
 </p></pre><BR>
 
 <pre><b>size_t ZSTD_decompress( void* dst, size_t dstCapacity,
                   const void* src, size_t compressedSize);
-</b><p>    `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames.
-    `dstCapacity` is an upper bound of originalSize.
-    If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data.
-    @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
-              or an errorCode if it fails (which can be tested using ZSTD_isError()). 
+</b><p>  `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames.
+  `dstCapacity` is an upper bound of originalSize.
+  If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data.
+  @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
+            or an errorCode if it fails (which can be tested using ZSTD_isError()). 
 </p></pre><BR>
 
 <pre><b>unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
-</b><p>   NOTE: This function is planned to be obsolete, in favour of ZSTD_getFrameContentSize.
-   ZSTD_getFrameContentSize functions the same way, returning the decompressed size of a single
-   frame, but distinguishes empty frames from frames with an unknown size, or errors.
-
-   Additionally, ZSTD_findDecompressedSize can be used instead.  It can handle multiple
-   concatenated frames in one buffer, and so is more general.
-   As a result however, it requires more computation and entire frames to be passed to it,
-   as opposed to ZSTD_getFrameContentSize which requires only a single frame's header.
-
-   'src' is the start of a zstd compressed frame.
-   @return : content size to be decompressed, as a 64-bits value _if known_, 0 otherwise.
-    note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode.
-             When `return==0`, data to decompress could be any size.
-             In which case, it's necessary to use streaming mode to decompress data.
-             Optionally, application can still use ZSTD_decompress() while relying on implied limits.
-             (For example, data may be necessarily cut into blocks <= 16 KB).
-    note 2 : decompressed size is always present when compression is done with ZSTD_compress()
-    note 3 : decompressed size can be very large (64-bits value),
-             potentially larger than what local system can handle as a single memory segment.
-             In which case, it's necessary to use streaming mode to decompress data.
-    note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified.
-             Always ensure result fits within application's authorized limits.
-             Each application can set its own limits.
-    note 5 : when `return==0`, if precise failure cause is needed, use ZSTD_getFrameParams() to know more. 
+</b><p>  NOTE: This function is planned to be obsolete, in favour of ZSTD_getFrameContentSize.
+  ZSTD_getFrameContentSize functions the same way, returning the decompressed size of a single
+  frame, but distinguishes empty frames from frames with an unknown size, or errors.
+
+  Additionally, ZSTD_findDecompressedSize can be used instead.  It can handle multiple
+  concatenated frames in one buffer, and so is more general.
+  As a result however, it requires more computation and entire frames to be passed to it,
+  as opposed to ZSTD_getFrameContentSize which requires only a single frame's header.
+
+  'src' is the start of a zstd compressed frame.
+  @return : content size to be decompressed, as a 64-bits value _if known_, 0 otherwise.
+   note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode.
+            When `return==0`, data to decompress could be any size.
+            In which case, it's necessary to use streaming mode to decompress data.
+            Optionally, application can still use ZSTD_decompress() while relying on implied limits.
+            (For example, data may be necessarily cut into blocks <= 16 KB).
+   note 2 : decompressed size is always present when compression is done with ZSTD_compress()
+   note 3 : decompressed size can be very large (64-bits value),
+            potentially larger than what local system can handle as a single memory segment.
+            In which case, it's necessary to use streaming mode to decompress data.
+   note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified.
+            Always ensure result fits within application's authorized limits.
+            Each application can set its own limits.
+   note 5 : when `return==0`, if precise failure cause is needed, use ZSTD_getFrameParams() to know more. 
 </p></pre><BR>
 
 <h3>Helper functions</h3><pre></pre><b><pre>int         ZSTD_maxCLevel(void);               </b>/*!< maximum compression level available */<b>
@@ -106,28 +106,28 @@ const char* ZSTD_getErrorName(size_t code);     </b>/*!< provides readable strin
 </pre></b><BR>
 <a name="Chapter4"></a><h2>Explicit memory management</h2><pre></pre>
 
-<h3>Compression context</h3><pre>   When compressing many times,
-   it is recommended to allocate a context just once, and re-use it for each successive compression operation.
-   This will make workload friendlier for system's memory.
-   Use one context per thread for parallel execution in multi-threaded environments. 
+<h3>Compression context</h3><pre>  When compressing many times,
+  it is recommended to allocate a context just once, and re-use it for each successive compression operation.
+  This will make workload friendlier for system's memory.
+  Use one context per thread for parallel execution in multi-threaded environments. 
 </pre><b><pre>typedef struct ZSTD_CCtx_s ZSTD_CCtx;
 ZSTD_CCtx* ZSTD_createCCtx(void);
 size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);
 </pre></b><BR>
 <pre><b>size_t ZSTD_compressCCtx(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel);
-</b><p>    Same as ZSTD_compress(), requires an allocated ZSTD_CCtx (see ZSTD_createCCtx()). 
+</b><p>  Same as ZSTD_compress(), requires an allocated ZSTD_CCtx (see ZSTD_createCCtx()). 
 </p></pre><BR>
 
-<h3>Decompression context</h3><pre>   When decompressing many times,
-   it is recommended to allocate a context just once, and re-use it for each successive compression operation.
-   This will make workload friendlier for system's memory.
-   Use one context per thread for parallel execution in multi-threaded environments. 
+<h3>Decompression context</h3><pre>  When decompressing many times,
+  it is recommended to allocate a context just once, and re-use it for each successive compression operation.
+  This will make workload friendlier for system's memory.
+  Use one context per thread for parallel execution in multi-threaded environments. 
 </pre><b><pre>typedef struct ZSTD_DCtx_s ZSTD_DCtx;
 ZSTD_DCtx* ZSTD_createDCtx(void);
 size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
 </pre></b><BR>
 <pre><b>size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-</b><p>   Same as ZSTD_decompress(), requires an allocated ZSTD_DCtx (see ZSTD_createDCtx()). 
+</b><p>  Same as ZSTD_decompress(), requires an allocated ZSTD_DCtx (see ZSTD_createDCtx()). 
 </p></pre><BR>
 
 <a name="Chapter5"></a><h2>Simple dictionary API</h2><pre></pre>
@@ -169,9 +169,10 @@ size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
                                 void* dst, size_t dstCapacity,
                           const void* src, size_t srcSize,
                           const ZSTD_CDict* cdict);
-</b><p>   Compression using a digested Dictionary.
-   Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
-   Note that compression level is decided during dictionary creation. 
+</b><p>  Compression using a digested Dictionary.
+  Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
+  Note that compression level is decided during dictionary creation.
+  Frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) 
 </p></pre><BR>
 
 <pre><b>ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize);
@@ -399,7 +400,7 @@ typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; v
 </p></pre><BR>
 
 <pre><b>ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, unsigned byReference,
-                                      ZSTD_parameters params, ZSTD_customMem customMem);
+                                      ZSTD_compressionParameters cParams, ZSTD_customMem customMem);
 </b><p>  Create a ZSTD_CDict using external alloc and free, and customized compression parameters 
 </p></pre><BR>
 
@@ -426,12 +427,19 @@ typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; v
    both values are optional, select `0` if unknown. 
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
-                               void* dst, size_t dstCapacity,
-                         const void* src, size_t srcSize,
-                         const void* dict,size_t dictSize,
-                               ZSTD_parameters params);
-</b><p>   Same as ZSTD_compress_usingDict(), with fine-tune control of each compression parameter 
+<pre><b>size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx,
+                      void* dst, size_t dstCapacity,
+                const void* src, size_t srcSize,
+                const void* dict,size_t dictSize,
+                      ZSTD_parameters params);
+</b><p>   Same as ZSTD_compress_usingDict(), with fine-tune control over each compression parameter 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
+                      void* dst, size_t dstCapacity,
+                const void* src, size_t srcSize,
+                const ZSTD_CDict* cdict, ZSTD_frameParameters fParams);
+</b><p>   Same as ZSTD_compress_usingCDict(), with fine-tune control over frame parameters 
 </p></pre><BR>
 
 <a name="Chapter15"></a><h2>Advanced decompression functions</h2><pre></pre>
@@ -491,20 +499,29 @@ typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; v
     Note : this use case also happens when using a non-conformant dictionary.
   - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`).
   - This is not a Zstandard frame.
-  When identifying the exact failure cause, it's possible to used ZSTD_getFrameParams(), which will provide a more precise error code. 
+  When identifying the exact failure cause, it's possible to use ZSTD_getFrameParams(), which will provide a more precise error code. 
 </p></pre><BR>
 
 <a name="Chapter16"></a><h2>Advanced streaming functions</h2><pre></pre>
 
 <h3>Advanced Streaming compression functions</h3><pre></pre><b><pre>ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
+size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);   </b>/**< size of CStream is variable, depending primarily on compression level */<b>
 size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize);   </b>/**< pledgedSrcSize must be correct, a size of 0 means unknown.  for a frame size of 0 use initCStream_advanced */<b>
 size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); </b>/**< note: a dict will not be used if dict == NULL or dictSize < 8 */<b>
 size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize,
                                              ZSTD_parameters params, unsigned long long pledgedSrcSize);  </b>/**< pledgedSrcSize is optional and can be 0 (meaning unknown). note: if the contentSizeFlag is set, pledgedSrcSize == 0 means the source size is actually 0 */<b>
 size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);  </b>/**< note : cdict will just be referenced, and must outlive compression session */<b>
-size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);  </b>/**< re-use compression parameters from previous init; skip dictionary loading stage; zcs must be init at least once before. note: pledgedSrcSize must be correct, a size of 0 means unknown.  for a frame size of 0 use initCStream_advanced */<b>
-size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);
+size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize, ZSTD_frameParameters fParams);  </b>/**< same as ZSTD_initCStream_usingCDict(), with control over frame parameters */<b>
 </pre></b><BR>
+<pre><b>size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
+</b><p>  start a new compression job, using same parameters from previous job.
+  This is typically useful to skip dictionary loading stage, since it will re-use it in-place..
+  Note that zcs must be init at least once before using ZSTD_resetCStream().
+  pledgedSrcSize==0 means "srcSize unknown".
+  If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end.
+  @return : 0, or an error code (which can be tested using ZSTD_isError()) 
+</p></pre><BR>
+
 <h3>Advanced Streaming decompression functions</h3><pre></pre><b><pre>typedef enum { DStream_p_maxWindowSize } ZSTD_DStreamParameter_e;
 ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
 size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); </b>/**< note: a dict will not be used if dict == NULL or dictSize < 8 */<b>
@@ -552,10 +569,9 @@ size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
 <h3>Buffer-less streaming compression functions</h3><pre></pre><b><pre>size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
 size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
 size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); </b>/**< pledgedSrcSize is optional and can be 0 (meaning unknown). note: if the contentSizeFlag is set, pledgedSrcSize == 0 means the source size is actually 0 */<b>
+size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); </b>/**< note: fails if cdict==NULL */<b>
+size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize);   </b>/* compression parameters are already set within cdict. pledgedSrcSize=0 means null-size */<b>
 size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); </b>/**<  note: if pledgedSrcSize can be 0, indicating unknown size.  if it is non-zero, it must be accurate.  for 0 size frames, use compressBegin_advanced */<b>
-size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize); </b>/**< note: if pledgedSrcSize can be 0, indicating unknown size.  if it is non-zero, it must be accurate.  for 0 size frames, use compressBegin_advanced */<b>
-size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 </pre></b><BR>
 <a name="Chapter19"></a><h2>Buffer-less streaming decompression (synchronous mode)</h2><pre>
   A ZSTD_DCtx object is required to track streaming operations.
@@ -640,19 +656,20 @@ ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
     - Compressing and decompressing require a context structure
       + Use ZSTD_createCCtx() and ZSTD_createDCtx()
     - It is necessary to init context before starting
-      + compression : ZSTD_compressBegin()
-      + decompression : ZSTD_decompressBegin()
-      + variants _usingDict() are also allowed
-      + copyCCtx() and copyDCtx() work too
-    - Block size is limited, it must be <= ZSTD_getBlockSizeMax()
-      + If you need to compress more, cut data into multiple blocks
-      + Consider using the regular ZSTD_compress() instead, as frame metadata costs become negligible when source size is large.
+      + compression : any ZSTD_compressBegin*() variant, including with dictionary
+      + decompression : any ZSTD_decompressBegin*() variant, including with dictionary
+      + copyCCtx() and copyDCtx() can be used too
+    - Block size is limited, it must be <= ZSTD_getBlockSizeMax() <= ZSTD_BLOCKSIZE_ABSOLUTEMAX
+      + If input is larger than a block size, it's necessary to split input data into multiple blocks
+      + For inputs larger than a single block size, consider using the regular ZSTD_compress() instead.
+        Frame metadata is not that costly, and quickly becomes negligible as source size grows larger.
     - When a block is considered not compressible enough, ZSTD_compressBlock() result will be zero.
       In which case, nothing is produced into `dst`.
       + User must test for such outcome and deal directly with uncompressed data
       + ZSTD_decompressBlock() doesn't accept uncompressed data as input !!!
-      + In case of multiple successive blocks, decoder must be informed of uncompressed block existence to follow proper history.
-        Use ZSTD_insertBlock() in such a case.
+      + In case of multiple successive blocks, should some of them be uncompressed,
+        decoder must be informed of their existence in order to follow proper history.
+        Use ZSTD_insertBlock() for such a case.
 <BR></pre>
 
 <h3>Raw zstd block functions</h3><pre></pre><b><pre>size_t ZSTD_getBlockSizeMax(ZSTD_CCtx* cctx);
author	Baptiste Daroussin <bapt@FreeBSD.org>	2017-05-06 10:17:59 +0000
committer	Baptiste Daroussin <bapt@FreeBSD.org>	2017-05-06 10:17:59 +0000
commit	ffcbc2d7ba03067492045e4cbead519a3b3c27ef (patch)
tree	df436f4253158a7d5a4875e54cd7d273dd5334a6 /doc
parent	ab984b3e51d32af796fe89e130f57bf58b8a14b2 (diff)