diff options
Diffstat (limited to 'lib/legacy/zstd_v01.c')
-rw-r--r-- | lib/legacy/zstd_v01.c | 196 |
1 files changed, 95 insertions, 101 deletions
diff --git a/lib/legacy/zstd_v01.c b/lib/legacy/zstd_v01.c index cad2b99b4a732..ae8cba2a3fa20 100644 --- a/lib/legacy/zstd_v01.c +++ b/lib/legacy/zstd_v01.c @@ -1073,99 +1073,102 @@ static size_t HUF_decompress_usingDTable( /* -3% slower when non static */ const void* cSrc, size_t cSrcSize, const U16* DTable) { - BYTE* const ostart = (BYTE*) dst; - BYTE* op = ostart; - BYTE* const omax = op + maxDstSize; - BYTE* const olimit = omax-15; - - const void* ptr = DTable; - const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1; - const U32 dtLog = DTable[0]; - size_t errorCode; - U32 reloadStatus; - - /* Init */ - - const U16* jumpTable = (const U16*)cSrc; - const size_t length1 = FSE_readLE16(jumpTable); - const size_t length2 = FSE_readLE16(jumpTable+1); - const size_t length3 = FSE_readLE16(jumpTable+2); - const size_t length4 = cSrcSize - 6 - length1 - length2 - length3; // check coherency !! - const char* const start1 = (const char*)(cSrc) + 6; - const char* const start2 = start1 + length1; - const char* const start3 = start2 + length2; - const char* const start4 = start3 + length3; - FSE_DStream_t bitD1, bitD2, bitD3, bitD4; - - if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; - - errorCode = FSE_initDStream(&bitD1, start1, length1); - if (FSE_isError(errorCode)) return errorCode; - errorCode = FSE_initDStream(&bitD2, start2, length2); - if (FSE_isError(errorCode)) return errorCode; - errorCode = FSE_initDStream(&bitD3, start3, length3); - if (FSE_isError(errorCode)) return errorCode; - errorCode = FSE_initDStream(&bitD4, start4, length4); - if (FSE_isError(errorCode)) return errorCode; - - reloadStatus=FSE_reloadDStream(&bitD2); - - /* 16 symbols per loop */ - for ( ; (reloadStatus<FSE_DStream_completed) && (op<olimit); /* D2-3-4 are supposed to be synchronized and finish together */ - op+=16, reloadStatus = FSE_reloadDStream(&bitD2) | FSE_reloadDStream(&bitD3) | FSE_reloadDStream(&bitD4), FSE_reloadDStream(&bitD1)) + if (cSrcSize < 6) return (size_t)-FSE_ERROR_srcSize_wrong; { -#define HUF_DECODE_SYMBOL_0(n, Dstream) \ - op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); - -#define HUF_DECODE_SYMBOL_1(n, Dstream) \ - op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \ - if (FSE_32bits() && (HUF_MAX_TABLELOG>12)) FSE_reloadDStream(&Dstream) - -#define HUF_DECODE_SYMBOL_2(n, Dstream) \ - op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \ - if (FSE_32bits()) FSE_reloadDStream(&Dstream) - - HUF_DECODE_SYMBOL_1( 0, bitD1); - HUF_DECODE_SYMBOL_1( 1, bitD2); - HUF_DECODE_SYMBOL_1( 2, bitD3); - HUF_DECODE_SYMBOL_1( 3, bitD4); - HUF_DECODE_SYMBOL_2( 4, bitD1); - HUF_DECODE_SYMBOL_2( 5, bitD2); - HUF_DECODE_SYMBOL_2( 6, bitD3); - HUF_DECODE_SYMBOL_2( 7, bitD4); - HUF_DECODE_SYMBOL_1( 8, bitD1); - HUF_DECODE_SYMBOL_1( 9, bitD2); - HUF_DECODE_SYMBOL_1(10, bitD3); - HUF_DECODE_SYMBOL_1(11, bitD4); - HUF_DECODE_SYMBOL_0(12, bitD1); - HUF_DECODE_SYMBOL_0(13, bitD2); - HUF_DECODE_SYMBOL_0(14, bitD3); - HUF_DECODE_SYMBOL_0(15, bitD4); - } + BYTE* const ostart = (BYTE*) dst; + BYTE* op = ostart; + BYTE* const omax = op + maxDstSize; + BYTE* const olimit = omax-15; + + const void* ptr = DTable; + const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1; + const U32 dtLog = DTable[0]; + size_t errorCode; + U32 reloadStatus; + + /* Init */ + + const U16* jumpTable = (const U16*)cSrc; + const size_t length1 = FSE_readLE16(jumpTable); + const size_t length2 = FSE_readLE16(jumpTable+1); + const size_t length3 = FSE_readLE16(jumpTable+2); + const size_t length4 = cSrcSize - 6 - length1 - length2 - length3; // check coherency !! + const char* const start1 = (const char*)(cSrc) + 6; + const char* const start2 = start1 + length1; + const char* const start3 = start2 + length2; + const char* const start4 = start3 + length3; + FSE_DStream_t bitD1, bitD2, bitD3, bitD4; + + if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; + + errorCode = FSE_initDStream(&bitD1, start1, length1); + if (FSE_isError(errorCode)) return errorCode; + errorCode = FSE_initDStream(&bitD2, start2, length2); + if (FSE_isError(errorCode)) return errorCode; + errorCode = FSE_initDStream(&bitD3, start3, length3); + if (FSE_isError(errorCode)) return errorCode; + errorCode = FSE_initDStream(&bitD4, start4, length4); + if (FSE_isError(errorCode)) return errorCode; + + reloadStatus=FSE_reloadDStream(&bitD2); + + /* 16 symbols per loop */ + for ( ; (reloadStatus<FSE_DStream_completed) && (op<olimit); /* D2-3-4 are supposed to be synchronized and finish together */ + op+=16, reloadStatus = FSE_reloadDStream(&bitD2) | FSE_reloadDStream(&bitD3) | FSE_reloadDStream(&bitD4), FSE_reloadDStream(&bitD1)) + { + #define HUF_DECODE_SYMBOL_0(n, Dstream) \ + op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); + + #define HUF_DECODE_SYMBOL_1(n, Dstream) \ + op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \ + if (FSE_32bits() && (HUF_MAX_TABLELOG>12)) FSE_reloadDStream(&Dstream) + + #define HUF_DECODE_SYMBOL_2(n, Dstream) \ + op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \ + if (FSE_32bits()) FSE_reloadDStream(&Dstream) + + HUF_DECODE_SYMBOL_1( 0, bitD1); + HUF_DECODE_SYMBOL_1( 1, bitD2); + HUF_DECODE_SYMBOL_1( 2, bitD3); + HUF_DECODE_SYMBOL_1( 3, bitD4); + HUF_DECODE_SYMBOL_2( 4, bitD1); + HUF_DECODE_SYMBOL_2( 5, bitD2); + HUF_DECODE_SYMBOL_2( 6, bitD3); + HUF_DECODE_SYMBOL_2( 7, bitD4); + HUF_DECODE_SYMBOL_1( 8, bitD1); + HUF_DECODE_SYMBOL_1( 9, bitD2); + HUF_DECODE_SYMBOL_1(10, bitD3); + HUF_DECODE_SYMBOL_1(11, bitD4); + HUF_DECODE_SYMBOL_0(12, bitD1); + HUF_DECODE_SYMBOL_0(13, bitD2); + HUF_DECODE_SYMBOL_0(14, bitD3); + HUF_DECODE_SYMBOL_0(15, bitD4); + } - if (reloadStatus!=FSE_DStream_completed) /* not complete : some bitStream might be FSE_DStream_unfinished */ - return (size_t)-FSE_ERROR_corruptionDetected; + if (reloadStatus!=FSE_DStream_completed) /* not complete : some bitStream might be FSE_DStream_unfinished */ + return (size_t)-FSE_ERROR_corruptionDetected; - /* tail */ - { - // bitTail = bitD1; // *much* slower : -20% !??! - FSE_DStream_t bitTail; - bitTail.ptr = bitD1.ptr; - bitTail.bitsConsumed = bitD1.bitsConsumed; - bitTail.bitContainer = bitD1.bitContainer; // required in case of FSE_DStream_endOfBuffer - bitTail.start = start1; - for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++) + /* tail */ { - HUF_DECODE_SYMBOL_0(0, bitTail); - } + // bitTail = bitD1; // *much* slower : -20% !??! + FSE_DStream_t bitTail; + bitTail.ptr = bitD1.ptr; + bitTail.bitsConsumed = bitD1.bitsConsumed; + bitTail.bitContainer = bitD1.bitContainer; // required in case of FSE_DStream_endOfBuffer + bitTail.start = start1; + for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++) + { + HUF_DECODE_SYMBOL_0(0, bitTail); + } - if (FSE_endOfDStream(&bitTail)) - return op-ostart; - } + if (FSE_endOfDStream(&bitTail)) + return op-ostart; + } - if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */ + if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */ - return (size_t)-FSE_ERROR_corruptionDetected; + return (size_t)-FSE_ERROR_corruptionDetected; + } } @@ -1355,8 +1358,6 @@ static unsigned ZSTD_isLittleEndian(void) static U16 ZSTD_read16(const void* p) { U16 r; memcpy(&r, p, sizeof(r)); return r; } -static U32 ZSTD_read32(const void* p) { U32 r; memcpy(&r, p, sizeof(r)); return r; } - static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); } static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); } @@ -1381,16 +1382,9 @@ static U16 ZSTD_readLE16(const void* memPtr) } } - -static U32 ZSTD_readLE32(const void* memPtr) +static U32 ZSTD_readLE24(const void* memPtr) { - if (ZSTD_isLittleEndian()) - return ZSTD_read32(memPtr); - else - { - const BYTE* p = (const BYTE*)memPtr; - return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24)); - } + return ZSTD_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16); } static U32 ZSTD_readBE32(const void* memPtr) @@ -1704,13 +1698,13 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) seqState->prevOffset = seq->offset; if (litLength == MaxLL) { - U32 add = dumps<de ? *dumps++ : 0; + const U32 add = dumps<de ? *dumps++ : 0; if (add < 255) litLength += add; else { if (dumps<=(de-3)) { - litLength = ZSTD_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */ + litLength = ZSTD_readLE24(dumps); dumps += 3; } } @@ -1732,13 +1726,13 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); if (matchLength == MaxML) { - U32 add = dumps<de ? *dumps++ : 0; + const U32 add = dumps<de ? *dumps++ : 0; if (add < 255) matchLength += add; else { if (dumps<=(de-3)) { - matchLength = ZSTD_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */ + matchLength = ZSTD_readLE24(dumps); dumps += 3; } } |