src - FreeBSD source tree

diff options


context:
space:
mode:

author	Allan Jude <allanjude@FreeBSD.org>	2023-01-25 01:20:16 +0000
committer	Allan Jude <allanjude@FreeBSD.org>	2023-01-25 01:20:16 +0000
commit	b3392d84da5bf2162baf937c77e0557f3fd8a52b (patch)
tree	d9bfa8081b46f82d07e21f8ff8e70627a2aec35f /lib/compress/zstd_compress_internal.h
parent	f6ae97673c28bdb9ae795bd235ab6f26f2536a2d (diff)

vendor/zstd/1.5.2 vendor/zstd

Diffstat (limited to 'lib/compress/zstd_compress_internal.h')

-rw-r--r--

lib/compress/zstd_compress_internal.h

443

1 files changed, 349 insertions, 94 deletions

diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h
index c04998b8b18f..c406e794bdb9 100644
--- a/lib/compress/zstd_compress_internal.h
+++ b/lib/compress/zstd_compress_internal.h

@@ -1,5 +1,5 @@

+ * Copyright (c) Yann Collet, Facebook, Inc.

* This source code is licensed under both the BSD-style license (found in the

@@ -63,7 +63,7 @@ typedef struct {

} ZSTD_localDict;

typedef struct {

- HUF_CElt CTable[HUF_CTABLE_SIZE_U32(255)];

+ HUF_CElt CTable[HUF_CTABLE_SIZE_ST(255)];

HUF_repeat repeatMode;

} ZSTD_hufCTables_t;

@@ -81,8 +81,55 @@ typedef struct {

ZSTD_fseCTables_t fse;

} ZSTD_entropyCTables_t;

+/***********************************************

+* Entropy buffer statistics structs and funcs *

+***********************************************/

+/** ZSTD_hufCTablesMetadata_t :

+ * Stores Literals Block Type for a super-block in hType, and

+ * huffman tree description in hufDesBuffer.

+ * hufDesSize refers to the size of huffman tree description in bytes.

+ * This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */

typedef struct {

- U32 off; /* Offset code (offset + ZSTD_REP_MOVE) for the match */

+ symbolEncodingType_e hType;

+ BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];

+ size_t hufDesSize;

+} ZSTD_hufCTablesMetadata_t;

+/** ZSTD_fseCTablesMetadata_t :

+ * Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and

+ * fse tables in fseTablesBuffer.

+ * fseTablesSize refers to the size of fse tables in bytes.

+ * This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */

+typedef struct {

+ symbolEncodingType_e llType;

+ symbolEncodingType_e ofType;

+ symbolEncodingType_e mlType;

+ BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE];

+ size_t fseTablesSize;

+ size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */

+} ZSTD_fseCTablesMetadata_t;

+typedef struct {

+ ZSTD_hufCTablesMetadata_t hufMetadata;

+ ZSTD_fseCTablesMetadata_t fseMetadata;

+} ZSTD_entropyCTablesMetadata_t;

+/** ZSTD_buildBlockEntropyStats() :

+ * Builds entropy for the block.

+ * @return : 0 on success or error code */

+size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,

+ const ZSTD_entropyCTables_t* prevEntropy,

+ ZSTD_entropyCTables_t* nextEntropy,

+ const ZSTD_CCtx_params* cctxParams,

+ ZSTD_entropyCTablesMetadata_t* entropyMetadata,

+ void* workspace, size_t wkspSize);

+/*********************************

+* Compression internals structs *

+*********************************/

+typedef struct {

+ U32 off; /* Offset sumtype code for the match, using ZSTD_storeSeq() format */

U32 len; /* Raw length of match */

} ZSTD_match_t;

@@ -132,7 +179,7 @@ typedef struct {

U32 offCodeSumBasePrice; /* to compare to log2(offreq) */

ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */

const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */

- ZSTD_literalCompressionMode_e literalCompressionMode;

+ ZSTD_paramSwitch_e literalCompressionMode;

} optState_t;

typedef struct {

@@ -141,14 +188,23 @@ typedef struct {

} ZSTD_compressedBlockState_t;

typedef struct {

- BYTE const* nextSrc; /* next block here to continue on current prefix */

- BYTE const* base; /* All regular indexes relative to this position */

- BYTE const* dictBase; /* extDict indexes relative to this position */

- U32 dictLimit; /* below that point, need extDict */

- U32 lowLimit; /* below that point, no more valid data */

+ BYTE const* nextSrc; /* next block here to continue on current prefix */

+ BYTE const* base; /* All regular indexes relative to this position */

+ BYTE const* dictBase; /* extDict indexes relative to this position */

+ U32 dictLimit; /* below that point, need extDict */

+ U32 lowLimit; /* below that point, no more valid data */

+ U32 nbOverflowCorrections; /* Number of times overflow correction has run since

+ * ZSTD_window_init(). Useful for debugging coredumps

+ * and for ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY.

+ */

} ZSTD_window_t;

+#define ZSTD_WINDOW_START_INDEX 2

typedef struct ZSTD_matchState_t ZSTD_matchState_t;

+#define ZSTD_ROW_HASH_CACHE_SIZE 8 /* Size of prefetching hash cache for row-based matchfinder */

struct ZSTD_matchState_t {

ZSTD_window_t window; /* State for window round buffer management */

U32 loadedDictEnd; /* index of end of dictionary, within context's referential.

@@ -160,9 +216,17 @@ struct ZSTD_matchState_t {

U32 nextToUpdate; /* index from which to continue table update */

U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */

+ U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/

+ U16* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */

+ U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */

U32* hashTable;

U32* hashTable3;

U32* chainTable;

+ U32 forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */

int dedicatedDictSearch; /* Indicates whether this matchState is using the

* dedicated dictionary search structure.

@@ -184,16 +248,25 @@ typedef struct {

} ldmEntry_t;

typedef struct {

+ BYTE const* split;

+ U32 hash;

+ U32 checksum;

+ ldmEntry_t* bucket;

+} ldmMatchCandidate_t;

+#define LDM_BATCH_SIZE 64

+typedef struct {

ZSTD_window_t window; /* State for the window round buffer management */

ldmEntry_t* hashTable;

U32 loadedDictEnd;

BYTE* bucketOffsets; /* Next position in bucket to insert entry */

- U64 hashPower; /* Used to compute the rolling hash.

- * Depends on ldmParams.minMatchLength */

+ size_t splitIndices[LDM_BATCH_SIZE];

+ ldmMatchCandidate_t matchCandidates[LDM_BATCH_SIZE];

} ldmState_t;

typedef struct {

- U32 enableLdm; /* 1 if enable long distance matching */

+ ZSTD_paramSwitch_e enableLdm; /* ZSTD_ps_enable to enable LDM. ZSTD_ps_auto by default */

U32 hashLog; /* Log size of hashTable */

U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */

U32 minMatchLength; /* Minimum match length */

@@ -224,7 +297,7 @@ struct ZSTD_CCtx_params_s {

* There is no guarantee that hint is close to actual source size */

ZSTD_dictAttachPref_e attachDictPref;

- ZSTD_literalCompressionMode_e literalCompressionMode;

+ ZSTD_paramSwitch_e literalCompressionMode;

/* Multithreading: used to pass parameters to mtctx */

int nbWorkers;

@@ -246,6 +319,15 @@ struct ZSTD_CCtx_params_s {

ZSTD_sequenceFormat_e blockDelimiters;

int validateSequences;

+ /* Block splitting */

+ ZSTD_paramSwitch_e useBlockSplitter;

+ /* Param for deciding whether to use row-based matchfinder */

+ ZSTD_paramSwitch_e useRowMatchFinder;

+ /* Always load a dictionary in ext-dict mode (not prefix mode)? */

+ int deterministicRefPrefix;

/* Internal use, for createCCtxParams() and freeCCtxParams() only */

ZSTD_customMem customMem;

}; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */

@@ -263,13 +345,31 @@ typedef enum {

ZSTDb_buffered

} ZSTD_buffered_policy_e;

+/**

+ * Struct that contains all elements of block splitter that should be allocated

+ * in a wksp.

+ */

+#define ZSTD_MAX_NB_BLOCK_SPLITS 196

+typedef struct {

+ seqStore_t fullSeqStoreChunk;

+ seqStore_t firstHalfSeqStore;

+ seqStore_t secondHalfSeqStore;

+ seqStore_t currSeqStore;

+ seqStore_t nextSeqStore;

+ U32 partitions[ZSTD_MAX_NB_BLOCK_SPLITS];

+ ZSTD_entropyCTablesMetadata_t entropyMetadata;

+} ZSTD_blockSplitCtx;

struct ZSTD_CCtx_s {

ZSTD_compressionStage_e stage;

int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */

int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */

ZSTD_CCtx_params requestedParams;

ZSTD_CCtx_params appliedParams;

+ ZSTD_CCtx_params simpleApiParams; /* Param storage used by the simple API - not sticky. Must only be used in top-level simple API functions for storage. */

U32 dictID;

+ size_t dictContentSize;

ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */

size_t blockSize;

@@ -292,7 +392,7 @@ struct ZSTD_CCtx_s {

ZSTD_blockState_t blockState;

U32* entropyWorkspace; /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */

- /* Wether we are streaming or not */

+ /* Whether we are streaming or not */

ZSTD_buffered_policy_e bufferedPolicy;

/* streaming */

@@ -321,6 +421,14 @@ struct ZSTD_CCtx_s {

#ifdef ZSTD_MULTITHREAD

ZSTDMT_CCtx* mtctx;

#endif

+ /* Tracing */

+#if ZSTD_TRACE

+ ZSTD_TraceCtx traceCtx;

+#endif

+ /* Workspace for block splitter */

+ ZSTD_blockSplitCtx blockSplitCtx;

};

typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;

@@ -355,7 +463,7 @@ typedef enum {

typedef size_t (*ZSTD_blockCompressor) (

ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],

void const* src, size_t srcSize);

-ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode);

+ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e rowMatchfinderMode, ZSTD_dictMode_e dictMode);

MEM_STATIC U32 ZSTD_LLcode(U32 litLength)

@@ -389,31 +497,6 @@ MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)

return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];

}

-typedef struct repcodes_s {

- U32 rep[3];

-} repcodes_t;

-MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)

- repcodes_t newReps;

- if (offset >= ZSTD_REP_NUM) { /* full offset */

- newReps.rep[2] = rep[1];

- newReps.rep[1] = rep[0];

- newReps.rep[0] = offset - ZSTD_REP_MOVE;

- } else { /* repcode */

- U32 const repCode = offset + ll0;

- if (repCode > 0) { /* note : if repCode==0, no change */

- U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];

- newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];

- newReps.rep[1] = rep[0];

- newReps.rep[0] = currentOffset;

- } else { /* repCode == 0 */

- ZSTD_memcpy(&newReps, rep, sizeof(newReps));

- }

- return newReps;

/* ZSTD_cParam_withinBounds:

* @return 1 if value is within cParam bounds,

* 0 otherwise */

@@ -462,17 +545,17 @@ MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)

return (srcSize >> minlog) + 2;

}

-MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams)

+MEM_STATIC int ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params* cctxParams)

{

switch (cctxParams->literalCompressionMode) {

- case ZSTD_lcm_huffman:

+ case ZSTD_ps_enable:

return 0;

- case ZSTD_lcm_uncompressed:

+ case ZSTD_ps_disable:

return 1;

default:

assert(0 /* impossible: pre-validated */);

- /* fall-through */

- case ZSTD_lcm_auto:

+ ZSTD_FALLTHROUGH;

+ case ZSTD_ps_auto:

return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);

}

@@ -482,7 +565,9 @@ MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParam

* Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single

* large copies.

-static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) {

+static void

+ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w)

assert(iend > ilimit_w);

if (ip <= ilimit_w) {

ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);

@@ -492,14 +577,30 @@ static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const ie

while (ip < iend) *op++ = *ip++;

}

+#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1)

+#define STORE_REPCODE_1 STORE_REPCODE(1)

+#define STORE_REPCODE_2 STORE_REPCODE(2)

+#define STORE_REPCODE_3 STORE_REPCODE(3)

+#define STORE_REPCODE(r) (assert((r)>=1), assert((r)<=3), (r)-1)

+#define STORE_OFFSET(o) (assert((o)>0), o + ZSTD_REP_MOVE)

+#define STORED_IS_OFFSET(o) ((o) > ZSTD_REP_MOVE)

+#define STORED_IS_REPCODE(o) ((o) <= ZSTD_REP_MOVE)

+#define STORED_OFFSET(o) (assert(STORED_IS_OFFSET(o)), (o)-ZSTD_REP_MOVE)

+#define STORED_REPCODE(o) (assert(STORED_IS_REPCODE(o)), (o)+1) /* returns ID 1,2,3 */

+#define STORED_TO_OFFBASE(o) ((o)+1)

+#define OFFBASE_TO_STORED(o) ((o)-1)

/*! ZSTD_storeSeq() :

- * Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t.

- * `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes).

- * `mlBase` : matchLength - MINMATCH

+ * Store a sequence (litlen, litPtr, offCode and matchLength) into seqStore_t.

+ * @offBase_minus1 : Users should use employ macros STORE_REPCODE_X and STORE_OFFSET().

+ * @matchLength : must be >= MINMATCH

* Allowed to overread literals up to litLimit.

-HINT_INLINE UNUSED_ATTR

-void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase)

+HINT_INLINE UNUSED_ATTR void

+ZSTD_storeSeq(seqStore_t* seqStorePtr,

+ size_t litLength, const BYTE* literals, const BYTE* litLimit,

+ U32 offBase_minus1,

+ size_t matchLength)

{

BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;

BYTE const* const litEnd = literals + litLength;

@@ -508,7 +609,7 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera

if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */

{ U32 const pos = (U32)((const BYTE*)literals - g_start);

DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",

- pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode);

+ pos, (U32)litLength, (U32)matchLength, (U32)offBase_minus1);

}

#endif

assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);

@@ -532,26 +633,66 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera

/* literal Length */

if (litLength>0xFFFF) {

- assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */

- seqStorePtr->longLengthID = 1;

+ assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */

+ seqStorePtr->longLengthType = ZSTD_llt_literalLength;

seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);

}

seqStorePtr->sequences[0].litLength = (U16)litLength;

/* match offset */

- seqStorePtr->sequences[0].offset = offCode + 1;

+ seqStorePtr->sequences[0].offBase = STORED_TO_OFFBASE(offBase_minus1);

/* match Length */

- if (mlBase>0xFFFF) {

- assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */

- seqStorePtr->longLengthID = 2;

- seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);

+ assert(matchLength >= MINMATCH);

+ { size_t const mlBase = matchLength - MINMATCH;

+ if (mlBase>0xFFFF) {

+ assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */

+ seqStorePtr->longLengthType = ZSTD_llt_matchLength;

+ seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);

+ }

+ seqStorePtr->sequences[0].mlBase = (U16)mlBase;

}

- seqStorePtr->sequences[0].matchLength = (U16)mlBase;

seqStorePtr->sequences++;

}

+/* ZSTD_updateRep() :

+ * updates in-place @rep (array of repeat offsets)

+ * @offBase_minus1 : sum-type, with same numeric representation as ZSTD_storeSeq()

+ */

+MEM_STATIC void

+ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0)

+ if (STORED_IS_OFFSET(offBase_minus1)) { /* full offset */

+ rep[2] = rep[1];

+ rep[1] = rep[0];

+ rep[0] = STORED_OFFSET(offBase_minus1);

+ } else { /* repcode */

+ U32 const repCode = STORED_REPCODE(offBase_minus1) - 1 + ll0;

+ if (repCode > 0) { /* note : if repCode==0, no change */

+ U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];

+ rep[2] = (repCode >= 2) ? rep[1] : rep[2];

+ rep[1] = rep[0];

+ rep[0] = currentOffset;

+ } else { /* repCode == 0 */

+ /* nothing to do */

+ }

+typedef struct repcodes_s {

+ U32 rep[3];

+} repcodes_t;

+MEM_STATIC repcodes_t

+ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0)

+ repcodes_t newReps;

+ ZSTD_memcpy(&newReps, rep, sizeof(newReps));

+ ZSTD_updateRep(newReps.rep, offBase_minus1, ll0);

+ return newReps;

/*-*************************************

* Match length counter

@@ -564,8 +705,14 @@ static unsigned ZSTD_NbCommonBytes (size_t val)

# if STATIC_BMI2

return _tzcnt_u64(val) >> 3;

# else

- unsigned long r = 0;

- return _BitScanForward64( &r, (U64)val ) ? (unsigned)(r >> 3) : 0;

+ if (val != 0) {

+ unsigned long r;

+ _BitScanForward64(&r, (U64)val);

+ return (unsigned)(r >> 3);

+ } else {

+ /* Should not reach this code path */

+ __assume(0);

+ }

# endif

# elif defined(__GNUC__) && (__GNUC__ >= 4)

return (__builtin_ctzll((U64)val) >> 3);

@@ -582,8 +729,14 @@ static unsigned ZSTD_NbCommonBytes (size_t val)

# endif

} else { /* 32 bits */

# if defined(_MSC_VER)

- unsigned long r=0;

- return _BitScanForward( &r, (U32)val ) ? (unsigned)(r >> 3) : 0;

+ if (val != 0) {

+ unsigned long r;

+ _BitScanForward(&r, (U32)val);

+ return (unsigned)(r >> 3);

+ } else {

+ /* Should not reach this code path */

+ __assume(0);

+ }

# elif defined(__GNUC__) && (__GNUC__ >= 3)

return (__builtin_ctz((U32)val) >> 3);

# else

@@ -600,8 +753,14 @@ static unsigned ZSTD_NbCommonBytes (size_t val)

# if STATIC_BMI2

return _lzcnt_u64(val) >> 3;

# else

- unsigned long r = 0;

- return _BitScanReverse64(&r, (U64)val) ? (unsigned)(r >> 3) : 0;

+ if (val != 0) {

+ unsigned long r;

+ _BitScanReverse64(&r, (U64)val);

+ return (unsigned)(r >> 3);

+ } else {

+ /* Should not reach this code path */

+ __assume(0);

+ }

# endif

# elif defined(__GNUC__) && (__GNUC__ >= 4)

return (__builtin_clzll(val) >> 3);

@@ -615,8 +774,14 @@ static unsigned ZSTD_NbCommonBytes (size_t val)

# endif

} else { /* 32 bits */

# if defined(_MSC_VER)

- unsigned long r = 0;

- return _BitScanReverse( &r, (unsigned long)val ) ? (unsigned)(r >> 3) : 0;

+ if (val != 0) {

+ unsigned long r;

+ _BitScanReverse(&r, (unsigned long)val);

+ return (unsigned)(r >> 3);

+ } else {

+ /* Should not reach this code path */

+ __assume(0);

+ }

# elif defined(__GNUC__) && (__GNUC__ >= 3)

return (__builtin_clz((U32)val) >> 3);

# else

@@ -795,6 +960,13 @@ MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)

window->dictLimit = end;

}

+MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window)

+ return window.dictLimit == ZSTD_WINDOW_START_INDEX &&

+ window.lowLimit == ZSTD_WINDOW_START_INDEX &&

+ (window.nextSrc - window.base) == ZSTD_WINDOW_START_INDEX;

/**

* ZSTD_window_hasExtDict():

* Returns non-zero if the window has a non-empty extDict.

@@ -818,15 +990,71 @@ MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)

ZSTD_noDict;

}

+/* Defining this macro to non-zero tells zstd to run the overflow correction

+ * code much more frequently. This is very inefficient, and should only be

+ * used for tests and fuzzers.

+ */

+#ifndef ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY

+# ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION

+# define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 1

+# else

+# define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 0

+# endif

+#endif

+/**

+ * ZSTD_window_canOverflowCorrect():

+ * Returns non-zero if the indices are large enough for overflow correction

+ * to work correctly without impacting compression ratio.

+ */

+MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,

+ U32 cycleLog,

+ U32 maxDist,

+ U32 loadedDictEnd,

+ void const* src)

+ U32 const cycleSize = 1u << cycleLog;

+ U32 const curr = (U32)((BYTE const*)src - window.base);

+ U32 const minIndexToOverflowCorrect = cycleSize

+ + MAX(maxDist, cycleSize)

+ + ZSTD_WINDOW_START_INDEX;

+ /* Adjust the min index to backoff the overflow correction frequency,

+ * so we don't waste too much CPU in overflow correction. If this

+ * computation overflows we don't really care, we just need to make

+ * sure it is at least minIndexToOverflowCorrect.

+ */

+ U32 const adjustment = window.nbOverflowCorrections + 1;

+ U32 const adjustedIndex = MAX(minIndexToOverflowCorrect * adjustment,

+ minIndexToOverflowCorrect);

+ U32 const indexLargeEnough = curr > adjustedIndex;

+ /* Only overflow correct early if the dictionary is invalidated already,

+ * so we don't hurt compression ratio.

+ */

+ U32 const dictionaryInvalidated = curr > maxDist + loadedDictEnd;

+ return indexLargeEnough && dictionaryInvalidated;

/**

* ZSTD_window_needOverflowCorrection():

* Returns non-zero if the indices are getting too large and need overflow

* protection.

MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,

+ U32 cycleLog,

+ U32 maxDist,

+ U32 loadedDictEnd,

+ void const* src,

void const* srcEnd)

{

U32 const curr = (U32)((BYTE const*)srcEnd - window.base);

+ if (ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {

+ if (ZSTD_window_canOverflowCorrect(window, cycleLog, maxDist, loadedDictEnd, src)) {

+ return 1;

+ }

return curr > ZSTD_CURRENT_MAX;

}

@@ -838,7 +1066,6 @@ MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,

* The least significant cycleLog bits of the indices must remain the same,

* which may be 0. Every index up to maxDist in the past must be valid.

- * NOTE: (maxDist & cycleMask) must be zero.

MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,

U32 maxDist, void const* src)

@@ -862,32 +1089,52 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,

* 3. (cctx->lowLimit + 1<<windowLog) < 1<<32:

* windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.

- U32 const cycleMask = (1U << cycleLog) - 1;

+ U32 const cycleSize = 1u << cycleLog;

+ U32 const cycleMask = cycleSize - 1;

U32 const curr = (U32)((BYTE const*)src - window->base);

- U32 const currentCycle0 = curr & cycleMask;

- /* Exclude zero so that newCurrent - maxDist >= 1. */

- U32 const currentCycle1 = currentCycle0 == 0 ? (1U << cycleLog) : currentCycle0;

- U32 const newCurrent = currentCycle1 + maxDist;

+ U32 const currentCycle = curr & cycleMask;

+ /* Ensure newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX. */

+ U32 const currentCycleCorrection = currentCycle < ZSTD_WINDOW_START_INDEX

+ ? MAX(cycleSize, ZSTD_WINDOW_START_INDEX)

+ : 0;

+ U32 const newCurrent = currentCycle

+ + currentCycleCorrection

+ + MAX(maxDist, cycleSize);

U32 const correction = curr - newCurrent;

- assert((maxDist & cycleMask) == 0);

+ /* maxDist must be a power of two so that:

+ * (newCurrent & cycleMask) == (curr & cycleMask)

+ * This is required to not corrupt the chains / binary tree.

+ */

+ assert((maxDist & (maxDist - 1)) == 0);

+ assert((curr & cycleMask) == (newCurrent & cycleMask));

assert(curr > newCurrent);

- /* Loose bound, should be around 1<<29 (see above) */

- assert(correction > 1<<28);

+ if (!ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {

+ /* Loose bound, should be around 1<<29 (see above) */

+ assert(correction > 1<<28);

+ }

window->base += correction;

window->dictBase += correction;

- if (window->lowLimit <= correction) window->lowLimit = 1;

- else window->lowLimit -= correction;

- if (window->dictLimit <= correction) window->dictLimit = 1;

- else window->dictLimit -= correction;

+ if (window->lowLimit < correction + ZSTD_WINDOW_START_INDEX) {

+ window->lowLimit = ZSTD_WINDOW_START_INDEX;

+ } else {

+ window->lowLimit -= correction;

+ }

+ if (window->dictLimit < correction + ZSTD_WINDOW_START_INDEX) {

+ window->dictLimit = ZSTD_WINDOW_START_INDEX;

+ } else {

+ window->dictLimit -= correction;

+ }

/* Ensure we can still reference the full window. */

assert(newCurrent >= maxDist);

- assert(newCurrent - maxDist >= 1);

+ assert(newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX);

/* Ensure that lowLimit and dictLimit didn't underflow. */

assert(window->lowLimit <= newCurrent);

assert(window->dictLimit <= newCurrent);

+ ++window->nbOverflowCorrections;

DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,

window->lowLimit);

return correction;

@@ -992,11 +1239,13 @@ ZSTD_checkDictValidity(const ZSTD_window_t* window,

MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {

ZSTD_memset(window, 0, sizeof(*window));

- window->base = (BYTE const*)"";

- window->dictBase = (BYTE const*)"";

- window->dictLimit = 1; /* start from 1, so that 1st position is valid */

- window->lowLimit = 1; /* it ensures first and later CCtx usages compress the same */

- window->nextSrc = window->base + 1; /* see issue #1241 */

+ window->base = (BYTE const*)" ";

+ window->dictBase = (BYTE const*)" ";

+ ZSTD_STATIC_ASSERT(ZSTD_DUBT_UNSORTED_MARK < ZSTD_WINDOW_START_INDEX); /* Start above ZSTD_DUBT_UNSORTED_MARK */

+ window->dictLimit = ZSTD_WINDOW_START_INDEX; /* start from >0, so that 1st position is valid */

+ window->lowLimit = ZSTD_WINDOW_START_INDEX; /* it ensures first and later CCtx usages compress the same */

+ window->nextSrc = window->base + ZSTD_WINDOW_START_INDEX; /* see issue #1241 */

+ window->nbOverflowCorrections = 0;

}

/**

@@ -1007,7 +1256,8 @@ MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {

* Returns non-zero if the segment is contiguous.

MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,

- void const* src, size_t srcSize)

+ void const* src, size_t srcSize,

+ int forceNonContiguous)

{

BYTE const* const ip = (BYTE const*)src;

U32 contiguous = 1;

@@ -1017,7 +1267,7 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,

assert(window->base != NULL);

assert(window->dictBase != NULL);

/* Check if blocks follow each other */

- if (src != window->nextSrc) {

+ if (src != window->nextSrc || forceNonContiguous) {

/* not contiguous */

size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);

DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit);

@@ -1047,15 +1297,15 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,

MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)

{

- U32 const maxDistance = 1U << windowLog;

- U32 const lowestValid = ms->window.lowLimit;

- U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;

- U32 const isDictionary = (ms->loadedDictEnd != 0);

+ U32 const maxDistance = 1U << windowLog;

+ U32 const lowestValid = ms->window.lowLimit;

+ U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;

+ U32 const isDictionary = (ms->loadedDictEnd != 0);

/* When using a dictionary the entire dictionary is valid if a single byte of the dictionary

* is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't

* valid for the entire block. So this check is sufficient to find the lowest valid match index.

- U32 const matchLowest = isDictionary ? lowestValid : withinWindow;

+ U32 const matchLowest = isDictionary ? lowestValid : withinWindow;

return matchLowest;

}

@@ -1200,4 +1450,9 @@ size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSe

* condition for correct operation : hashLog > 1 */

U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);

+/** ZSTD_CCtx_trace() :

+ * Trace the end of a compression call.

+ */

+void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize);

#endif /* ZSTD_COMPRESS_H */