summaryrefslogtreecommitdiff
path: root/lib/common
diff options
context:
space:
mode:
Diffstat (limited to 'lib/common')
-rw-r--r--lib/common/bitstream.h44
-rw-r--r--lib/common/compiler.h42
-rw-r--r--lib/common/cpu.h5
-rw-r--r--lib/common/debug.c44
-rw-r--r--lib/common/debug.h123
-rw-r--r--lib/common/entropy_common.c17
-rw-r--r--lib/common/fse.h86
-rw-r--r--lib/common/fse_decompress.c2
-rw-r--r--lib/common/huf.h61
-rw-r--r--lib/common/mem.h22
-rw-r--r--lib/common/pool.c121
-rw-r--r--lib/common/pool.h48
-rw-r--r--lib/common/xxhash.c1
-rw-r--r--lib/common/zstd_common.c5
-rw-r--r--lib/common/zstd_internal.h45
15 files changed, 456 insertions, 210 deletions
diff --git a/lib/common/bitstream.h b/lib/common/bitstream.h
index f7f389fe0fa8..ef89b9878e22 100644
--- a/lib/common/bitstream.h
+++ b/lib/common/bitstream.h
@@ -1,8 +1,7 @@
/* ******************************************************************
bitstream
Part of FSE library
- header file (to include)
- Copyright (C) 2013-2017, Yann Collet.
+ Copyright (C) 2013-present, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
@@ -49,21 +48,10 @@ extern "C" {
* Dependencies
******************************************/
#include "mem.h" /* unaligned access routines */
+#include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */
#include "error_private.h" /* error codes and messages */
-/*-*************************************
-* Debug
-***************************************/
-#if defined(BIT_DEBUG) && (BIT_DEBUG>=1)
-# include <assert.h>
-#else
-# ifndef assert
-# define assert(condition) ((void)0)
-# endif
-#endif
-
-
/*=========================================
* Target specific
=========================================*/
@@ -83,8 +71,7 @@ extern "C" {
* A critical property of these streams is that they encode and decode in **reverse** direction.
* So the first bit sequence you add will be the last to be read, like a LIFO stack.
*/
-typedef struct
-{
+typedef struct {
size_t bitContainer;
unsigned bitPos;
char* startPtr;
@@ -118,8 +105,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
/*-********************************************
* bitStream decoding API (read backward)
**********************************************/
-typedef struct
-{
+typedef struct {
size_t bitContainer;
unsigned bitsConsumed;
const char* ptr;
@@ -236,7 +222,8 @@ MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
}
/*! BIT_addBitsFast() :
- * works only if `value` is _clean_, meaning all high bits above nbBits are 0 */
+ * works only if `value` is _clean_,
+ * meaning all high bits above nbBits are 0 */
MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
size_t value, unsigned nbBits)
{
@@ -352,17 +339,10 @@ MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
{
-#if defined(__BMI__) && defined(__GNUC__) && __GNUC__*1000+__GNUC_MINOR__ >= 4008 /* experimental */
-# if defined(__x86_64__)
- if (sizeof(bitContainer)==8)
- return _bextr_u64(bitContainer, start, nbBits);
- else
-# endif
- return _bextr_u32(bitContainer, start, nbBits);
-#else
+ U32 const regMask = sizeof(bitContainer)*8 - 1;
+ /* if start > regMask, bitstream is corrupted, and result is undefined */
assert(nbBits < BIT_MASK_SIZE);
- return (bitContainer >> start) & BIT_mask[nbBits];
-#endif
+ return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
}
MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
@@ -379,9 +359,13 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
* @return : value extracted */
MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
{
-#if defined(__BMI__) && defined(__GNUC__) /* experimental; fails if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8 */
+ /* arbitrate between double-shift and shift+mask */
+#if 1
+ /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8,
+ * bitstream is likely corrupted, and result is undefined */
return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
#else
+ /* this code path is slower on my os-x laptop */
U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
#endif
diff --git a/lib/common/compiler.h b/lib/common/compiler.h
index e90a3bcde36c..07f875e4d38e 100644
--- a/lib/common/compiler.h
+++ b/lib/common/compiler.h
@@ -77,9 +77,9 @@
* Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
*/
#ifndef DYNAMIC_BMI2
- #if (defined(__clang__) && __has_attribute(__target__)) \
+ #if ((defined(__clang__) && __has_attribute(__target__)) \
|| (defined(__GNUC__) \
- && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))) \
+ && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
&& (defined(__x86_64__) || defined(_M_X86)) \
&& !defined(__BMI2__)
# define DYNAMIC_BMI2 1
@@ -88,15 +88,37 @@
#endif
#endif
-/* prefetch */
-#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
-# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
-# define PREFETCH(ptr) _mm_prefetch((const char*)ptr, _MM_HINT_T0)
-#elif defined(__GNUC__)
-# define PREFETCH(ptr) __builtin_prefetch(ptr, 0, 0)
+/* prefetch
+ * can be disabled, by declaring NO_PREFETCH macro
+ * All prefetch invocations use a single default locality 2,
+ * generating instruction prefetcht1,
+ * which, according to Intel, means "load data into L2 cache".
+ * This is a good enough "middle ground" for the time being,
+ * though in theory, it would be better to specialize locality depending on data being prefetched.
+ * Tests could not determine any sensible difference based on locality value. */
+#if defined(NO_PREFETCH)
+# define PREFETCH(ptr) (void)(ptr) /* disabled */
#else
-# define PREFETCH(ptr) /* disabled */
-#endif
+# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
+# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
+# define PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
+# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
+# define PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
+# else
+# define PREFETCH(ptr) (void)(ptr) /* disabled */
+# endif
+#endif /* NO_PREFETCH */
+
+#define CACHELINE_SIZE 64
+
+#define PREFETCH_AREA(p, s) { \
+ const char* const _ptr = (const char*)(p); \
+ size_t const _size = (size_t)(s); \
+ size_t _pos; \
+ for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
+ PREFETCH(_ptr + _pos); \
+ } \
+}
/* disable warnings */
#ifdef _MSC_VER /* Visual Studio */
diff --git a/lib/common/cpu.h b/lib/common/cpu.h
index 4eb48e39e10e..eeb428ad5f6a 100644
--- a/lib/common/cpu.h
+++ b/lib/common/cpu.h
@@ -36,7 +36,7 @@ MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
U32 f1d = 0;
U32 f7b = 0;
U32 f7c = 0;
-#ifdef _MSC_VER
+#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
int reg[4];
__cpuid((int*)reg, 0);
{
@@ -72,8 +72,7 @@ MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
"cpuid\n\t"
"popl %%ebx\n\t"
: "=a"(f1a), "=c"(f1c), "=d"(f1d)
- : "a"(1)
- :);
+ : "a"(1));
}
if (n >= 7) {
__asm__(
diff --git a/lib/common/debug.c b/lib/common/debug.c
new file mode 100644
index 000000000000..3ebdd1cb15a6
--- /dev/null
+++ b/lib/common/debug.c
@@ -0,0 +1,44 @@
+/* ******************************************************************
+ debug
+ Part of FSE library
+ Copyright (C) 2013-present, Yann Collet.
+
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with the
+ distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ You can contact the author at :
+ - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+****************************************************************** */
+
+
+/*
+ * This module only hosts one global variable
+ * which can be used to dynamically influence the verbosity of traces,
+ * such as DEBUGLOG and RAWLOG
+ */
+
+#include "debug.h"
+
+int g_debuglevel = DEBUGLEVEL;
diff --git a/lib/common/debug.h b/lib/common/debug.h
new file mode 100644
index 000000000000..0c04ad2cc98c
--- /dev/null
+++ b/lib/common/debug.h
@@ -0,0 +1,123 @@
+/* ******************************************************************
+ debug
+ Part of FSE library
+ Copyright (C) 2013-present, Yann Collet.
+
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with the
+ distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ You can contact the author at :
+ - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+****************************************************************** */
+
+
+/*
+ * The purpose of this header is to enable debug functions.
+ * They regroup assert(), DEBUGLOG() and RAWLOG() for run-time,
+ * and DEBUG_STATIC_ASSERT() for compile-time.
+ *
+ * By default, DEBUGLEVEL==0, which means run-time debug is disabled.
+ *
+ * Level 1 enables assert() only.
+ * Starting level 2, traces can be generated and pushed to stderr.
+ * The higher the level, the more verbose the traces.
+ *
+ * It's possible to dynamically adjust level using variable g_debug_level,
+ * which is only declared if DEBUGLEVEL>=2,
+ * and is a global variable, not multi-thread protected (use with care)
+ */
+
+#ifndef DEBUG_H_12987983217
+#define DEBUG_H_12987983217
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* static assert is triggered at compile time, leaving no runtime artefact,
+ * but can only work with compile-time constants.
+ * This variant can only be used inside a function. */
+#define DEBUG_STATIC_ASSERT(c) (void)sizeof(char[(c) ? 1 : -1])
+
+
+/* DEBUGLEVEL is expected to be defined externally,
+ * typically through compiler command line.
+ * Value must be a number. */
+#ifndef DEBUGLEVEL
+# define DEBUGLEVEL 0
+#endif
+
+/* recommended values for DEBUGLEVEL :
+ * 0 : no debug, all run-time functions disabled
+ * 1 : no display, enables assert() only
+ * 2 : reserved, for currently active debug path
+ * 3 : events once per object lifetime (CCtx, CDict, etc.)
+ * 4 : events once per frame
+ * 5 : events once per block
+ * 6 : events once per sequence (verbose)
+ * 7+: events at every position (*very* verbose)
+ *
+ * It's generally inconvenient to output traces > 5.
+ * In which case, it's possible to selectively enable higher verbosity levels
+ * by modifying g_debug_level.
+ */
+
+#if (DEBUGLEVEL>=1)
+# include <assert.h>
+#else
+# ifndef assert /* assert may be already defined, due to prior #include <assert.h> */
+# define assert(condition) ((void)0) /* disable assert (default) */
+# endif
+#endif
+
+#if (DEBUGLEVEL>=2)
+# include <stdio.h>
+extern int g_debuglevel; /* here, this variable is only declared,
+ it actually lives in debug.c,
+ and is shared by the whole process.
+ It's typically used to enable very verbose levels
+ on selective conditions (such as position in src) */
+
+# define RAWLOG(l, ...) { \
+ if (l<=g_debuglevel) { \
+ fprintf(stderr, __VA_ARGS__); \
+ } }
+# define DEBUGLOG(l, ...) { \
+ if (l<=g_debuglevel) { \
+ fprintf(stderr, __FILE__ ": " __VA_ARGS__); \
+ fprintf(stderr, " \n"); \
+ } }
+#else
+# define RAWLOG(l, ...) {} /* disabled */
+# define DEBUGLOG(l, ...) {} /* disabled */
+#endif
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* DEBUG_H_12987983217 */
diff --git a/lib/common/entropy_common.c b/lib/common/entropy_common.c
index b37a082fee2c..b12944e1de93 100644
--- a/lib/common/entropy_common.c
+++ b/lib/common/entropy_common.c
@@ -72,7 +72,21 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
unsigned charnum = 0;
int previous0 = 0;
- if (hbSize < 4) return ERROR(srcSize_wrong);
+ if (hbSize < 4) {
+ /* This function only works when hbSize >= 4 */
+ char buffer[4];
+ memset(buffer, 0, sizeof(buffer));
+ memcpy(buffer, headerBuffer, hbSize);
+ { size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr,
+ buffer, sizeof(buffer));
+ if (FSE_isError(countSize)) return countSize;
+ if (countSize > hbSize) return ERROR(corruption_detected);
+ return countSize;
+ } }
+ assert(hbSize >= 4);
+
+ /* init */
+ memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */
bitStream = MEM_readLE32(ip);
nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
@@ -105,6 +119,7 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
while (charnum < n0) normalizedCounter[charnum++] = 0;
if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
+ assert((bitCount >> 3) <= 3); /* For first condition to work */
ip += bitCount>>3;
bitCount &= 7;
bitStream = MEM_readLE32(ip) >> bitCount;
diff --git a/lib/common/fse.h b/lib/common/fse.h
index 6a1d272be5cb..a5a6b6d4db70 100644
--- a/lib/common/fse.h
+++ b/lib/common/fse.h
@@ -72,6 +72,7 @@ extern "C" {
#define FSE_VERSION_NUMBER (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE)
FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */
+
/*-****************************************
* FSE simple functions
******************************************/
@@ -129,7 +130,7 @@ FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src,
******************************************/
/*!
FSE_compress() does the following:
-1. count symbol occurrence from source[] into table count[]
+1. count symbol occurrence from source[] into table count[] (see hist.h)
2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
3. save normalized counters to memory buffer using writeNCount()
4. build encoding table 'CTable' from normalized counters
@@ -147,15 +148,6 @@ or to save and provide normalized distribution using external method.
/* *** COMPRESSION *** */
-/*! FSE_count():
- Provides the precise count of each byte within a table 'count'.
- 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1).
- *maxSymbolValuePtr will be updated if detected smaller than initial value.
- @return : the count of the most frequent symbol (which is not identified).
- if return == srcSize, there is only one symbol.
- Can also return an error code, which can be tested with FSE_isError(). */
-FSE_PUBLIC_API size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
-
/*! FSE_optimalTableLog():
dynamically downsize 'tableLog' when conditions are met.
It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
@@ -167,7 +159,8 @@ FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize
'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
@return : tableLog,
or an errorCode, which can be tested using FSE_isError() */
-FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
+FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog,
+ const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
/*! FSE_NCountWriteBound():
Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
@@ -178,8 +171,9 @@ FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tab
Compactly save 'normalizedCounter' into 'buffer'.
@return : size of the compressed table,
or an errorCode, which can be tested using FSE_isError(). */
-FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
-
+FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize,
+ const short* normalizedCounter,
+ unsigned maxSymbolValue, unsigned tableLog);
/*! Constructor and Destructor of FSE_CTable.
Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
@@ -250,7 +244,9 @@ If there is an error, the function will return an ErrorCode (which can be tested
@return : size read from 'rBuffer',
or an errorCode, which can be tested using FSE_isError().
maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
-FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize);
+FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter,
+ unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
+ const void* rBuffer, size_t rBuffSize);
/*! Constructor and Destructor of FSE_DTable.
Note that its size depends on 'tableLog' */
@@ -325,33 +321,8 @@ If there is an error, the function will return an error code, which can be teste
/* *****************************************
-* FSE advanced API
-*******************************************/
-/* FSE_count_wksp() :
- * Same as FSE_count(), but using an externally provided scratch buffer.
- * `workSpace` size must be table of >= `1024` unsigned
- */
-size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
- const void* source, size_t sourceSize, unsigned* workSpace);
-
-/** FSE_countFast() :
- * same as FSE_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr
- */
-size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
-
-/* FSE_countFast_wksp() :
- * Same as FSE_countFast(), but using an externally provided scratch buffer.
- * `workSpace` must be a table of minimum `1024` unsigned
- */
-size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* workSpace);
-
-/*! FSE_count_simple() :
- * Same as FSE_countFast(), but does not use any additional memory (not even on stack).
- * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` (presuming it's also the size of `count`).
-*/
-size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
-
-
+ * FSE advanced API
+ ***************************************** */
unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
/**< same as FSE_optimalTableLog(), which used `minus==2` */
@@ -576,6 +547,39 @@ MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePt
}
+/* FSE_getMaxNbBits() :
+ * Approximate maximum cost of a symbol, in bits.
+ * Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2)
+ * note 1 : assume symbolValue is valid (<= maxSymbolValue)
+ * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
+MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue)
+{
+ const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr;
+ return (symbolTT[symbolValue].deltaNbBits + ((1<<16)-1)) >> 16;
+}
+
+/* FSE_bitCost() :
+ * Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits)
+ * note 1 : assume symbolValue is valid (<= maxSymbolValue)
+ * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
+MEM_STATIC U32 FSE_bitCost(const void* symbolTTPtr, U32 tableLog, U32 symbolValue, U32 accuracyLog)
+{
+ const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr;
+ U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16;
+ U32 const threshold = (minNbBits+1) << 16;
+ assert(tableLog < 16);
+ assert(accuracyLog < 31-tableLog); /* ensure enough room for renormalization double shift */
+ { U32 const tableSize = 1 << tableLog;
+ U32 const deltaFromThreshold = threshold - (symbolTT[symbolValue].deltaNbBits + tableSize);
+ U32 const normalizedDeltaFromThreshold = (deltaFromThreshold << accuracyLog) >> tableLog; /* linear interpolation (very approximate) */
+ U32 const bitMultiplier = 1 << accuracyLog;
+ assert(symbolTT[symbolValue].deltaNbBits + tableSize <= threshold);
+ assert(normalizedDeltaFromThreshold <= bitMultiplier);
+ return (minNbBits+1)*bitMultiplier - normalizedDeltaFromThreshold;
+ }
+}
+
+
/* ====== Decompression ====== */
typedef struct {
diff --git a/lib/common/fse_decompress.c b/lib/common/fse_decompress.c
index 4c66c3b77464..72bbead5beea 100644
--- a/lib/common/fse_decompress.c
+++ b/lib/common/fse_decompress.c
@@ -49,7 +49,7 @@
* Error Management
****************************************************************/
#define FSE_isError ERR_isError
-#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
+#define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */
/* check and forward error code */
#define CHECK_F(f) { size_t const e = f; if (FSE_isError(e)) return e; }
diff --git a/lib/common/huf.h b/lib/common/huf.h
index b4645b4e5197..de9464111064 100644
--- a/lib/common/huf.h
+++ b/lib/common/huf.h
@@ -1,7 +1,7 @@
/* ******************************************************************
- Huffman coder, part of New Generation Entropy library
- header file
- Copyright (C) 2013-2016, Yann Collet.
+ huff0 huffman codec,
+ part of Finite State Entropy library
+ Copyright (C) 2013-present, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
@@ -163,25 +163,25 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
/* static allocation of HUF's DTable */
typedef U32 HUF_DTable;
#define HUF_DTABLE_SIZE(maxTableLog) (1 + (1<<(maxTableLog)))
-#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
+#define HUF_CREATE_STATIC_DTABLEX1(DTable, maxTableLog) \
HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1) * 0x01000001) }
-#define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \
+#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog) * 0x01000001) }
/* ****************************************
* Advanced decompression functions
******************************************/
-size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
-size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
+size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
+size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< decodes RLE and uncompressed */
size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */
size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< considers RLE and uncompressed as errors */
-size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
-size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */
-size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
-size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */
+size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
+size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */
+size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
+size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */
/* ****************************************
@@ -208,7 +208,7 @@ size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, si
typedef enum {
HUF_repeat_none, /**< Cannot use the previous table */
HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */
- HUF_repeat_valid /**< Can use the previous table and it is asumed to be valid */
+ HUF_repeat_valid /**< Can use the previous table and it is assumed to be valid */
} HUF_repeat;
/** HUF_compress4X_repeat() :
* Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
@@ -227,7 +227,9 @@ size_t HUF_compress4X_repeat(void* dst, size_t dstSize,
*/
#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1)
#define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned))
-size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize);
+size_t HUF_buildCTable_wksp (HUF_CElt* tree,
+ const U32* count, U32 maxSymbolValue, U32 maxNbBits,
+ void* workSpace, size_t wkspSize);
/*! HUF_readStats() :
* Read compact Huffman tree, saved by HUF_writeCTable().
@@ -242,10 +244,15 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize,
* Loading a CTable saved with HUF_writeCTable() */
size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
+/** HUF_getNbBits() :
+ * Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX
+ * Note 1 : is not inlined, as HUF_CElt definition is private
+ * Note 2 : const void* used, so that it can provide a statically allocated table as argument (which uses type U32) */
+U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue);
/*
* HUF_decompress() does the following:
- * 1. select the decompression algorithm (X2, X4) based on pre-computed heuristics
+ * 1. select the decompression algorithm (X1, X2) based on pre-computed heuristics
* 2. build Huffman table from save, using HUF_readDTableX?()
* 3. decode 1 or 4 segments in parallel using HUF_decompress?X?_usingDTable()
*/
@@ -253,13 +260,13 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
/** HUF_selectDecoder() :
* Tells which decoder is likely to decode faster,
* based on a set of pre-computed metrics.
- * @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
+ * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
* Assumption : 0 < dstSize <= 128 KB */
U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);
/**
* The minimum workspace size for the `workSpace` used in
- * HUF_readDTableX2_wksp() and HUF_readDTableX4_wksp().
+ * HUF_readDTableX1_wksp() and HUF_readDTableX2_wksp().
*
* The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when
* HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15.
@@ -270,14 +277,14 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);
#define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10)
#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
+size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize);
+size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize);
size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
-size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize);
-size_t HUF_readDTableX4_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
-size_t HUF_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
/* ====================== */
@@ -298,25 +305,25 @@ size_t HUF_compress1X_repeat(void* dst, size_t dstSize,
void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
-size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
-size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */
+size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
+size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */
size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);
-size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
-size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */
-size_t HUF_decompress1X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
-size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */
+size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
+size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */
+size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
+size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */
size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */
+size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
-size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
/* BMI2 variants.
* If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
*/
size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
-size_t HUF_decompress1X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
+size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
diff --git a/lib/common/mem.h b/lib/common/mem.h
index 47d2300177c0..5da248756ffd 100644
--- a/lib/common/mem.h
+++ b/lib/common/mem.h
@@ -39,6 +39,10 @@ extern "C" {
# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */
#endif
+#ifndef __has_builtin
+# define __has_builtin(x) 0 /* compat. with non-clang compilers */
+#endif
+
/* code only tested on 32 and 64 bits systems */
#define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; }
MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
@@ -57,11 +61,23 @@ MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (size
typedef uint64_t U64;
typedef int64_t S64;
#else
+# include <limits.h>
+#if CHAR_BIT != 8
+# error "this implementation requires char to be exactly 8-bit type"
+#endif
typedef unsigned char BYTE;
+#if USHRT_MAX != 65535
+# error "this implementation requires short to be exactly 16-bit type"
+#endif
typedef unsigned short U16;
typedef signed short S16;
+#if UINT_MAX != 4294967295
+# error "this implementation requires int to be exactly 32-bit type"
+#endif
typedef unsigned int U32;
typedef signed int S32;
+/* note : there are no limits defined for long long type in C90.
+ * limits exist in C99, however, in such case, <stdint.h> is preferred */
typedef unsigned long long U64;
typedef signed long long S64;
#endif
@@ -186,7 +202,8 @@ MEM_STATIC U32 MEM_swap32(U32 in)
{
#if defined(_MSC_VER) /* Visual Studio */
return _byteswap_ulong(in);
-#elif defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)
+#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \
+ || (defined(__clang__) && __has_builtin(__builtin_bswap32))
return __builtin_bswap32(in);
#else
return ((in << 24) & 0xff000000 ) |
@@ -200,7 +217,8 @@ MEM_STATIC U64 MEM_swap64(U64 in)
{
#if defined(_MSC_VER) /* Visual Studio */
return _byteswap_uint64(in);
-#elif defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)
+#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \
+ || (defined(__clang__) && __has_builtin(__builtin_bswap64))
return __builtin_bswap64(in);
#else
return ((in << 56) & 0xff00000000000000ULL) |
diff --git a/lib/common/pool.c b/lib/common/pool.c
index 773488b07255..281b3824ac4d 100644
--- a/lib/common/pool.c
+++ b/lib/common/pool.c
@@ -10,9 +10,10 @@
/* ====== Dependencies ======= */
-#include <stddef.h> /* size_t */
-#include "pool.h"
+#include <stddef.h> /* size_t */
+#include "debug.h" /* assert */
#include "zstd_internal.h" /* ZSTD_malloc, ZSTD_free */
+#include "pool.h"
/* ====== Compiler specifics ====== */
#if defined(_MSC_VER)
@@ -33,8 +34,9 @@ typedef struct POOL_job_s {
struct POOL_ctx_s {
ZSTD_customMem customMem;
/* Keep track of the threads */
- ZSTD_pthread_t *threads;
- size_t numThreads;
+ ZSTD_pthread_t* threads;
+ size_t threadCapacity;
+ size_t threadLimit;
/* The queue is a circular buffer */
POOL_job *queue;
@@ -58,10 +60,10 @@ struct POOL_ctx_s {
};
/* POOL_thread() :
- Work thread for the thread pool.
- Waits for jobs and executes them.
- @returns : NULL on failure else non-null.
-*/
+ * Work thread for the thread pool.
+ * Waits for jobs and executes them.
+ * @returns : NULL on failure else non-null.
+ */
static void* POOL_thread(void* opaque) {
POOL_ctx* const ctx = (POOL_ctx*)opaque;
if (!ctx) { return NULL; }
@@ -69,14 +71,17 @@ static void* POOL_thread(void* opaque) {
/* Lock the mutex and wait for a non-empty queue or until shutdown */
ZSTD_pthread_mutex_lock(&ctx->queueMutex);
- while (ctx->queueEmpty && !ctx->shutdown) {
+ while ( ctx->queueEmpty
+ || (ctx->numThreadsBusy >= ctx->threadLimit) ) {
+ if (ctx->shutdown) {
+ /* even if !queueEmpty, (possible if numThreadsBusy >= threadLimit),
+ * a few threads will be shutdown while !queueEmpty,
+ * but enough threads will remain active to finish the queue */
+ ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
+ return opaque;
+ }
ZSTD_pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex);
}
- /* empty => shutting down: so stop */
- if (ctx->queueEmpty) {
- ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
- return opaque;
- }
/* Pop a job off the queue */
{ POOL_job const job = ctx->queue[ctx->queueHead];
ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize;
@@ -89,30 +94,32 @@ static void* POOL_thread(void* opaque) {
job.function(job.opaque);
/* If the intended queue size was 0, signal after finishing job */
+ ZSTD_pthread_mutex_lock(&ctx->queueMutex);
+ ctx->numThreadsBusy--;
if (ctx->queueSize == 1) {
- ZSTD_pthread_mutex_lock(&ctx->queueMutex);
- ctx->numThreadsBusy--;
- ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
ZSTD_pthread_cond_signal(&ctx->queuePushCond);
- } }
+ }
+ ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
+ }
} /* for (;;) */
- /* Unreachable */
+ assert(0); /* Unreachable */
}
POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) {
return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem);
}
-POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem) {
+POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
+ ZSTD_customMem customMem) {
POOL_ctx* ctx;
- /* Check the parameters */
+ /* Check parameters */
if (!numThreads) { return NULL; }
/* Allocate the context and zero initialize */
ctx = (POOL_ctx*)ZSTD_calloc(sizeof(POOL_ctx), customMem);
if (!ctx) { return NULL; }
/* Initialize the job queue.
- * It needs one extra space since one space is wasted to differentiate empty
- * and full queues.
+ * It needs one extra space since one space is wasted to differentiate
+ * empty and full queues.
*/
ctx->queueSize = queueSize + 1;
ctx->queue = (POOL_job*)ZSTD_malloc(ctx->queueSize * sizeof(POOL_job), customMem);
@@ -126,7 +133,7 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customM
ctx->shutdown = 0;
/* Allocate space for the thread handles */
ctx->threads = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), customMem);
- ctx->numThreads = 0;
+ ctx->threadCapacity = 0;
ctx->customMem = customMem;
/* Check for errors */
if (!ctx->threads || !ctx->queue) { POOL_free(ctx); return NULL; }
@@ -134,11 +141,12 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customM
{ size_t i;
for (i = 0; i < numThreads; ++i) {
if (ZSTD_pthread_create(&ctx->threads[i], NULL, &POOL_thread, ctx)) {
- ctx->numThreads = i;
+ ctx->threadCapacity = i;
POOL_free(ctx);
return NULL;
} }
- ctx->numThreads = numThreads;
+ ctx->threadCapacity = numThreads;
+ ctx->threadLimit = numThreads;
}
return ctx;
}
@@ -156,8 +164,8 @@ static void POOL_join(POOL_ctx* ctx) {
ZSTD_pthread_cond_broadcast(&ctx->queuePopCond);
/* Join all of the threads */
{ size_t i;
- for (i = 0; i < ctx->numThreads; ++i) {
- ZSTD_pthread_join(ctx->threads[i], NULL);
+ for (i = 0; i < ctx->threadCapacity; ++i) {
+ ZSTD_pthread_join(ctx->threads[i], NULL); /* note : could fail */
} }
}
@@ -172,24 +180,68 @@ void POOL_free(POOL_ctx *ctx) {
ZSTD_free(ctx, ctx->customMem);
}
+
+
size_t POOL_sizeof(POOL_ctx *ctx) {
if (ctx==NULL) return 0; /* supports sizeof NULL */
return sizeof(*ctx)
+ ctx->queueSize * sizeof(POOL_job)
- + ctx->numThreads * sizeof(ZSTD_pthread_t);
+ + ctx->threadCapacity * sizeof(ZSTD_pthread_t);
+}
+
+
+/* @return : 0 on success, 1 on error */
+static int POOL_resize_internal(POOL_ctx* ctx, size_t numThreads)
+{
+ if (numThreads <= ctx->threadCapacity) {
+ if (!numThreads) return 1;
+ ctx->threadLimit = numThreads;
+ return 0;
+ }
+ /* numThreads > threadCapacity */
+ { ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem);
+ if (!threadPool) return 1;
+ /* replace existing thread pool */
+ memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(*threadPool));
+ ZSTD_free(ctx->threads, ctx->customMem);
+ ctx->threads = threadPool;
+ /* Initialize additional threads */
+ { size_t threadId;
+ for (threadId = ctx->threadCapacity; threadId < numThreads; ++threadId) {
+ if (ZSTD_pthread_create(&threadPool[threadId], NULL, &POOL_thread, ctx)) {
+ ctx->threadCapacity = threadId;
+ return 1;
+ } }
+ } }
+ /* successfully expanded */
+ ctx->threadCapacity = numThreads;
+ ctx->threadLimit = numThreads;
+ return 0;
+}
+
+/* @return : 0 on success, 1 on error */
+int POOL_resize(POOL_ctx* ctx, size_t numThreads)
+{
+ int result;
+ if (ctx==NULL) return 1;
+ ZSTD_pthread_mutex_lock(&ctx->queueMutex);
+ result = POOL_resize_internal(ctx, numThreads);
+ ZSTD_pthread_cond_broadcast(&ctx->queuePopCond);
+ ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
+ return result;
}
/**
* Returns 1 if the queue is full and 0 otherwise.
*
- * If the queueSize is 1 (the pool was created with an intended queueSize of 0),
- * then a queue is empty if there is a thread free and no job is waiting.
+ * When queueSize is 1 (pool was created with an intended queueSize of 0),
+ * then a queue is empty if there is a thread free _and_ no job is waiting.
*/
static int isQueueFull(POOL_ctx const* ctx) {
if (ctx->queueSize > 1) {
return ctx->queueHead == ((ctx->queueTail + 1) % ctx->queueSize);
} else {
- return ctx->numThreadsBusy == ctx->numThreads ||
+ return (ctx->numThreadsBusy == ctx->threadLimit) ||
!ctx->queueEmpty;
}
}
@@ -263,6 +315,11 @@ void POOL_free(POOL_ctx* ctx) {
(void)ctx;
}
+int POOL_resize(POOL_ctx* ctx, size_t numThreads) {
+ (void)ctx; (void)numThreads;
+ return 0;
+}
+
void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque) {
(void)ctx;
function(opaque);
diff --git a/lib/common/pool.h b/lib/common/pool.h
index a57e9b4fabc2..458d37f13c3e 100644
--- a/lib/common/pool.h
+++ b/lib/common/pool.h
@@ -30,40 +30,50 @@ typedef struct POOL_ctx_s POOL_ctx;
*/
POOL_ctx* POOL_create(size_t numThreads, size_t queueSize);
-POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem);
+POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
+ ZSTD_customMem customMem);
/*! POOL_free() :
- Free a thread pool returned by POOL_create().
-*/
+ * Free a thread pool returned by POOL_create().
+ */
void POOL_free(POOL_ctx* ctx);
+/*! POOL_resize() :
+ * Expands or shrinks pool's number of threads.
+ * This is more efficient than releasing + creating a new context,
+ * since it tries to preserve and re-use existing threads.
+ * `numThreads` must be at least 1.
+ * @return : 0 when resize was successful,
+ * !0 (typically 1) if there is an error.
+ * note : only numThreads can be resized, queueSize remains unchanged.
+ */
+int POOL_resize(POOL_ctx* ctx, size_t numThreads);
+
/*! POOL_sizeof() :
- return memory usage of pool returned by POOL_create().
-*/
+ * @return threadpool memory usage
+ * note : compatible with NULL (returns 0 in this case)
+ */
size_t POOL_sizeof(POOL_ctx* ctx);
/*! POOL_function :
- The function type that can be added to a thread pool.
-*/
+ * The function type that can be added to a thread pool.
+ */
typedef void (*POOL_function)(void*);
-/*! POOL_add_function :
- The function type for a generic thread pool add function.
-*/
-typedef void (*POOL_add_function)(void*, POOL_function, void*);
/*! POOL_add() :
- Add the job `function(opaque)` to the thread pool. `ctx` must be valid.
- Possibly blocks until there is room in the queue.
- Note : The function may be executed asynchronously, so `opaque` must live until the function has been completed.
-*/
+ * Add the job `function(opaque)` to the thread pool. `ctx` must be valid.
+ * Possibly blocks until there is room in the queue.
+ * Note : The function may be executed asynchronously,
+ * therefore, `opaque` must live until function has been completed.
+ */
void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque);
/*! POOL_tryAdd() :
- Add the job `function(opaque)` to the thread pool if a worker is available.
- return immediately otherwise.
- @return : 1 if successful, 0 if not.
-*/
+ * Add the job `function(opaque)` to thread pool _if_ a worker is available.
+ * Returns immediately even if not (does not block).
+ * @return : 1 if successful, 0 if not.
+ */
int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque);
diff --git a/lib/common/xxhash.c b/lib/common/xxhash.c
index 9d9c0e963cbf..532b8161929d 100644
--- a/lib/common/xxhash.c
+++ b/lib/common/xxhash.c
@@ -98,6 +98,7 @@
/* Modify the local functions below should you wish to use some other memory routines */
/* for malloc(), free() */
#include <stdlib.h>
+#include <stddef.h> /* size_t */
static void* XXH_malloc(size_t s) { return malloc(s); }
static void XXH_free (void* p) { free(p); }
/* for memcpy() */
diff --git a/lib/common/zstd_common.c b/lib/common/zstd_common.c
index bccc948892d8..6f05d240e43c 100644
--- a/lib/common/zstd_common.c
+++ b/lib/common/zstd_common.c
@@ -46,11 +46,6 @@ ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); }
* provides error code string from enum */
const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); }
-/*! g_debuglog_enable :
- * turn on/off debug traces (global switch) */
-#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG >= 2)
-int g_debuglog_enable = 1;
-#endif
/*=**************************************************************
diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h
index 65c08a825706..e75adfa61323 100644
--- a/lib/common/zstd_internal.h
+++ b/lib/common/zstd_internal.h
@@ -21,6 +21,7 @@
***************************************/
#include "compiler.h"
#include "mem.h"
+#include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */
#include "error_private.h"
#define ZSTD_STATIC_LINKING_ONLY
#include "zstd.h"
@@ -38,43 +39,8 @@
extern "C" {
#endif
-
-/*-*************************************
-* Debug
-***************************************/
-#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=1)
-# include <assert.h>
-#else
-# ifndef assert
-# define assert(condition) ((void)0)
-# endif
-#endif
-
-#define ZSTD_STATIC_ASSERT(c) { enum { ZSTD_static_assert = 1/(int)(!!(c)) }; }
-
-#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2)
-# include <stdio.h>
-extern int g_debuglog_enable;
-/* recommended values for ZSTD_DEBUG display levels :
- * 1 : no display, enables assert() only
- * 2 : reserved for currently active debug path
- * 3 : events once per object lifetime (CCtx, CDict, etc.)
- * 4 : events once per frame
- * 5 : events once per block
- * 6 : events once per sequence (*very* verbose) */
-# define RAWLOG(l, ...) { \
- if ((g_debuglog_enable) & (l<=ZSTD_DEBUG)) { \
- fprintf(stderr, __VA_ARGS__); \
- } }
-# define DEBUGLOG(l, ...) { \
- if ((g_debuglog_enable) & (l<=ZSTD_DEBUG)) { \
- fprintf(stderr, __FILE__ ": " __VA_ARGS__); \
- fprintf(stderr, " \n"); \
- } }
-#else
-# define RAWLOG(l, ...) {} /* disabled */
-# define DEBUGLOG(l, ...) {} /* disabled */
-#endif
+/* ---- static assert (debug) --- */
+#define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)
/*-*************************************
@@ -113,8 +79,7 @@ static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
-#define ZSTD_FRAMEIDSIZE 4
-static const size_t ZSTD_frameIdSize = ZSTD_FRAMEIDSIZE; /* magic number size */
+#define ZSTD_FRAMEIDSIZE 4 /* magic number size */
#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
@@ -227,6 +192,8 @@ typedef struct {
BYTE* llCode;
BYTE* mlCode;
BYTE* ofCode;
+ size_t maxNbSeq;
+ size_t maxNbLit;
U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
U32 longLengthPos;
} seqStore_t;