diff options
Diffstat (limited to 'lib/compress/zstd_lazy.c')
| -rw-r--r-- | lib/compress/zstd_lazy.c | 749 | 
1 files changed, 749 insertions, 0 deletions
diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c new file mode 100644 index 000000000000..2a7f6a0fe2be --- /dev/null +++ b/lib/compress/zstd_lazy.c @@ -0,0 +1,749 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "zstd_lazy.h" + + +/*-************************************* +*  Binary Tree search +***************************************/ +/** ZSTD_insertBt1() : add one or multiple positions to tree. +*   ip : assumed <= iend-8 . +*   @return : nb of positions added */ +static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, const BYTE* const iend, U32 nbCompares, +                          U32 extDict) +{ +    U32*   const hashTable = zc->hashTable; +    U32    const hashLog = zc->appliedParams.cParams.hashLog; +    size_t const h  = ZSTD_hashPtr(ip, hashLog, mls); +    U32*   const bt = zc->chainTable; +    U32    const btLog  = zc->appliedParams.cParams.chainLog - 1; +    U32    const btMask = (1 << btLog) - 1; +    U32 matchIndex = hashTable[h]; +    size_t commonLengthSmaller=0, commonLengthLarger=0; +    const BYTE* const base = zc->base; +    const BYTE* const dictBase = zc->dictBase; +    const U32 dictLimit = zc->dictLimit; +    const BYTE* const dictEnd = dictBase + dictLimit; +    const BYTE* const prefixStart = base + dictLimit; +    const BYTE* match; +    const U32 current = (U32)(ip-base); +    const U32 btLow = btMask >= current ? 0 : current - btMask; +    U32* smallerPtr = bt + 2*(current&btMask); +    U32* largerPtr  = smallerPtr + 1; +    U32 dummy32;   /* to be nullified at the end */ +    U32 const windowLow = zc->lowLimit; +    U32 matchEndIdx = current+8; +    size_t bestLength = 8; +#ifdef ZSTD_C_PREDICT +    U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0); +    U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1); +    predictedSmall += (predictedSmall>0); +    predictedLarge += (predictedLarge>0); +#endif /* ZSTD_C_PREDICT */ + +    assert(ip <= iend-8);   /* required for h calculation */ +    hashTable[h] = current;   /* Update Hash Table */ + +    while (nbCompares-- && (matchIndex > windowLow)) { +        U32* const nextPtr = bt + 2*(matchIndex & btMask); +        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */ + +#ifdef ZSTD_C_PREDICT   /* note : can create issues when hlog small <= 11 */ +        const U32* predictPtr = bt + 2*((matchIndex-1) & btMask);   /* written this way, as bt is a roll buffer */ +        if (matchIndex == predictedSmall) { +            /* no need to check length, result known */ +            *smallerPtr = matchIndex; +            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */ +            smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */ +            matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */ +            predictedSmall = predictPtr[1] + (predictPtr[1]>0); +            continue; +        } +        if (matchIndex == predictedLarge) { +            *largerPtr = matchIndex; +            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */ +            largerPtr = nextPtr; +            matchIndex = nextPtr[0]; +            predictedLarge = predictPtr[0] + (predictPtr[0]>0); +            continue; +        } +#endif +        if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { +            match = base + matchIndex; +            if (match[matchLength] == ip[matchLength]) +                matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1; +        } else { +            match = dictBase + matchIndex; +            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); +            if (matchIndex+matchLength >= dictLimit) +                match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */ +        } + +        if (matchLength > bestLength) { +            bestLength = matchLength; +            if (matchLength > matchEndIdx - matchIndex) +                matchEndIdx = matchIndex + (U32)matchLength; +        } + +        if (ip+matchLength == iend)   /* equal : no way to know if inf or sup */ +            break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ + +        if (match[matchLength] < ip[matchLength]) {  /* necessarily within buffer */ +            /* match+1 is smaller than current */ +            *smallerPtr = matchIndex;             /* update smaller idx */ +            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */ +            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop searching */ +            smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */ +            matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */ +        } else { +            /* match is larger than current */ +            *largerPtr = matchIndex; +            commonLengthLarger = matchLength; +            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop searching */ +            largerPtr = nextPtr; +            matchIndex = nextPtr[0]; +    }   } + +    *smallerPtr = *largerPtr = 0; +    if (bestLength > 384) return MIN(192, (U32)(bestLength - 384));   /* speed optimization */ +    if (matchEndIdx > current + 8) return matchEndIdx - (current + 8); +    return 1; +} + + +static size_t ZSTD_insertBtAndFindBestMatch ( +                        ZSTD_CCtx* zc, +                        const BYTE* const ip, const BYTE* const iend, +                        size_t* offsetPtr, +                        U32 nbCompares, const U32 mls, +                        U32 extDict) +{ +    U32*   const hashTable = zc->hashTable; +    U32    const hashLog = zc->appliedParams.cParams.hashLog; +    size_t const h  = ZSTD_hashPtr(ip, hashLog, mls); +    U32*   const bt = zc->chainTable; +    U32    const btLog  = zc->appliedParams.cParams.chainLog - 1; +    U32    const btMask = (1 << btLog) - 1; +    U32 matchIndex  = hashTable[h]; +    size_t commonLengthSmaller=0, commonLengthLarger=0; +    const BYTE* const base = zc->base; +    const BYTE* const dictBase = zc->dictBase; +    const U32 dictLimit = zc->dictLimit; +    const BYTE* const dictEnd = dictBase + dictLimit; +    const BYTE* const prefixStart = base + dictLimit; +    const U32 current = (U32)(ip-base); +    const U32 btLow = btMask >= current ? 0 : current - btMask; +    const U32 windowLow = zc->lowLimit; +    U32* smallerPtr = bt + 2*(current&btMask); +    U32* largerPtr  = bt + 2*(current&btMask) + 1; +    U32 matchEndIdx = current+8; +    U32 dummy32;   /* to be nullified at the end */ +    size_t bestLength = 0; + +    assert(ip <= iend-8);   /* required for h calculation */ +    hashTable[h] = current;   /* Update Hash Table */ + +    while (nbCompares-- && (matchIndex > windowLow)) { +        U32* const nextPtr = bt + 2*(matchIndex & btMask); +        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */ +        const BYTE* match; + +        if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { +            match = base + matchIndex; +            if (match[matchLength] == ip[matchLength]) +                matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1; +        } else { +            match = dictBase + matchIndex; +            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); +            if (matchIndex+matchLength >= dictLimit) +                match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */ +        } + +        if (matchLength > bestLength) { +            if (matchLength > matchEndIdx - matchIndex) +                matchEndIdx = matchIndex + (U32)matchLength; +            if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) +                bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; +            if (ip+matchLength == iend)   /* equal : no way to know if inf or sup */ +                break;   /* drop, to guarantee consistency (miss a little bit of compression) */ +        } + +        if (match[matchLength] < ip[matchLength]) { +            /* match is smaller than current */ +            *smallerPtr = matchIndex;             /* update smaller idx */ +            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */ +            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */ +            smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */ +            matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */ +        } else { +            /* match is larger than current */ +            *largerPtr = matchIndex; +            commonLengthLarger = matchLength; +            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */ +            largerPtr = nextPtr; +            matchIndex = nextPtr[0]; +    }   } + +    *smallerPtr = *largerPtr = 0; + +    zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1; +    return bestLength; +} + + +void ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls) +{ +    const BYTE* const base = zc->base; +    const U32 target = (U32)(ip - base); +    U32 idx = zc->nextToUpdate; + +    while(idx < target) +        idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 0); +} + +/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */ +static size_t ZSTD_BtFindBestMatch ( +                        ZSTD_CCtx* zc, +                        const BYTE* const ip, const BYTE* const iLimit, +                        size_t* offsetPtr, +                        const U32 maxNbAttempts, const U32 mls) +{ +    if (ip < zc->base + zc->nextToUpdate) return 0;   /* skipped area */ +    ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls); +    return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 0); +} + + +static size_t ZSTD_BtFindBestMatch_selectMLS ( +                        ZSTD_CCtx* zc,   /* Index table will be updated */ +                        const BYTE* ip, const BYTE* const iLimit, +                        size_t* offsetPtr, +                        const U32 maxNbAttempts, const U32 matchLengthSearch) +{ +    switch(matchLengthSearch) +    { +    default : /* includes case 3 */ +    case 4 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); +    case 5 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); +    case 7 : +    case 6 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); +    } +} + + +void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls) +{ +    const BYTE* const base = zc->base; +    const U32 target = (U32)(ip - base); +    U32 idx = zc->nextToUpdate; + +    while (idx < target) idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 1); +} + + +/** Tree updater, providing best match */ +static size_t ZSTD_BtFindBestMatch_extDict ( +                        ZSTD_CCtx* zc, +                        const BYTE* const ip, const BYTE* const iLimit, +                        size_t* offsetPtr, +                        const U32 maxNbAttempts, const U32 mls) +{ +    if (ip < zc->base + zc->nextToUpdate) return 0;   /* skipped area */ +    ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls); +    return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 1); +} + + +static size_t ZSTD_BtFindBestMatch_selectMLS_extDict ( +                        ZSTD_CCtx* zc,   /* Index table will be updated */ +                        const BYTE* ip, const BYTE* const iLimit, +                        size_t* offsetPtr, +                        const U32 maxNbAttempts, const U32 matchLengthSearch) +{ +    switch(matchLengthSearch) +    { +    default : /* includes case 3 */ +    case 4 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); +    case 5 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); +    case 7 : +    case 6 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); +    } +} + + + +/* ********************************* +*  Hash Chain +***********************************/ +#define NEXT_IN_CHAIN(d, mask)   chainTable[(d) & mask] + +/* Update chains up to ip (excluded) +   Assumption : always within prefix (i.e. not within extDict) */ +U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls) +{ +    U32* const hashTable  = zc->hashTable; +    const U32 hashLog = zc->appliedParams.cParams.hashLog; +    U32* const chainTable = zc->chainTable; +    const U32 chainMask = (1 << zc->appliedParams.cParams.chainLog) - 1; +    const BYTE* const base = zc->base; +    const U32 target = (U32)(ip - base); +    U32 idx = zc->nextToUpdate; + +    while(idx < target) { /* catch up */ +        size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls); +        NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; +        hashTable[h] = idx; +        idx++; +    } + +    zc->nextToUpdate = target; +    return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; +} + + +/* inlining is important to hardwire a hot branch (template emulation) */ +FORCE_INLINE_TEMPLATE +size_t ZSTD_HcFindBestMatch_generic ( +                        ZSTD_CCtx* zc,   /* Index table will be updated */ +                        const BYTE* const ip, const BYTE* const iLimit, +                        size_t* offsetPtr, +                        const U32 maxNbAttempts, const U32 mls, const U32 extDict) +{ +    U32* const chainTable = zc->chainTable; +    const U32 chainSize = (1 << zc->appliedParams.cParams.chainLog); +    const U32 chainMask = chainSize-1; +    const BYTE* const base = zc->base; +    const BYTE* const dictBase = zc->dictBase; +    const U32 dictLimit = zc->dictLimit; +    const BYTE* const prefixStart = base + dictLimit; +    const BYTE* const dictEnd = dictBase + dictLimit; +    const U32 lowLimit = zc->lowLimit; +    const U32 current = (U32)(ip-base); +    const U32 minChain = current > chainSize ? current - chainSize : 0; +    int nbAttempts=maxNbAttempts; +    size_t ml=4-1; + +    /* HC4 match finder */ +    U32 matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls); + +    for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) { +        const BYTE* match; +        size_t currentMl=0; +        if ((!extDict) || matchIndex >= dictLimit) { +            match = base + matchIndex; +            if (match[ml] == ip[ml])   /* potentially better */ +                currentMl = ZSTD_count(ip, match, iLimit); +        } else { +            match = dictBase + matchIndex; +            if (MEM_read32(match) == MEM_read32(ip))   /* assumption : matchIndex <= dictLimit-4 (by table construction) */ +                currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4; +        } + +        /* save best solution */ +        if (currentMl > ml) { +            ml = currentMl; +            *offsetPtr = current - matchIndex + ZSTD_REP_MOVE; +            if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ +        } + +        if (matchIndex <= minChain) break; +        matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); +    } + +    return ml; +} + + +FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS ( +                        ZSTD_CCtx* zc, +                        const BYTE* ip, const BYTE* const iLimit, +                        size_t* offsetPtr, +                        const U32 maxNbAttempts, const U32 matchLengthSearch) +{ +    switch(matchLengthSearch) +    { +    default : /* includes case 3 */ +    case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 0); +    case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 0); +    case 7 : +    case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 0); +    } +} + + +FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( +                        ZSTD_CCtx* zc, +                        const BYTE* ip, const BYTE* const iLimit, +                        size_t* offsetPtr, +                        const U32 maxNbAttempts, const U32 matchLengthSearch) +{ +    switch(matchLengthSearch) +    { +    default : /* includes case 3 */ +    case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 1); +    case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 1); +    case 7 : +    case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 1); +    } +} + + +/* ******************************* +*  Common parser - lazy strategy +*********************************/ +FORCE_INLINE_TEMPLATE +size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, +                                       const void* src, size_t srcSize, +                                       const U32 searchMethod, const U32 depth) +{ +    seqStore_t* seqStorePtr = &(ctx->seqStore); +    const BYTE* const istart = (const BYTE*)src; +    const BYTE* ip = istart; +    const BYTE* anchor = istart; +    const BYTE* const iend = istart + srcSize; +    const BYTE* const ilimit = iend - 8; +    const BYTE* const base = ctx->base + ctx->dictLimit; + +    U32 const maxSearches = 1 << ctx->appliedParams.cParams.searchLog; +    U32 const mls = ctx->appliedParams.cParams.searchLength; + +    typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit, +                        size_t* offsetPtr, +                        U32 maxNbAttempts, U32 matchLengthSearch); +    searchMax_f const searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS; +    U32 offset_1 = seqStorePtr->rep[0], offset_2 = seqStorePtr->rep[1], savedOffset=0; + +    /* init */ +    ip += (ip==base); +    ctx->nextToUpdate3 = ctx->nextToUpdate; +    {   U32 const maxRep = (U32)(ip-base); +        if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0; +        if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0; +    } + +    /* Match Loop */ +    while (ip < ilimit) { +        size_t matchLength=0; +        size_t offset=0; +        const BYTE* start=ip+1; + +        /* check repCode */ +        if ((offset_1>0) & (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1))) { +            /* repcode : we take it */ +            matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; +            if (depth==0) goto _storeSequence; +        } + +        /* first search (depth 0) */ +        {   size_t offsetFound = 99999999; +            size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); +            if (ml2 > matchLength) +                matchLength = ml2, start = ip, offset=offsetFound; +        } + +        if (matchLength < 4) { +            ip += ((ip-anchor) >> g_searchStrength) + 1;   /* jump faster over incompressible sections */ +            continue; +        } + +        /* let's try to find a better solution */ +        if (depth>=1) +        while (ip<ilimit) { +            ip ++; +            if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { +                size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; +                int const gain2 = (int)(mlRep * 3); +                int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); +                if ((mlRep >= 4) && (gain2 > gain1)) +                    matchLength = mlRep, offset = 0, start = ip; +            } +            {   size_t offset2=99999999; +                size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); +                int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */ +                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); +                if ((ml2 >= 4) && (gain2 > gain1)) { +                    matchLength = ml2, offset = offset2, start = ip; +                    continue;   /* search a better one */ +            }   } + +            /* let's find an even better one */ +            if ((depth==2) && (ip<ilimit)) { +                ip ++; +                if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { +                    size_t const ml2 = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; +                    int const gain2 = (int)(ml2 * 4); +                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); +                    if ((ml2 >= 4) && (gain2 > gain1)) +                        matchLength = ml2, offset = 0, start = ip; +                } +                {   size_t offset2=99999999; +                    size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); +                    int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */ +                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); +                    if ((ml2 >= 4) && (gain2 > gain1)) { +                        matchLength = ml2, offset = offset2, start = ip; +                        continue; +            }   }   } +            break;  /* nothing found : store previous solution */ +        } + +        /* NOTE: +         * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior. +         * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which +         * overflows the pointer, which is undefined behavior. +         */ +        /* catch up */ +        if (offset) { +            while ( (start > anchor) +                 && (start > base+offset-ZSTD_REP_MOVE) +                 && (start[-1] == (start-offset+ZSTD_REP_MOVE)[-1]) )  /* only search for offset within prefix */ +                { start--; matchLength++; } +            offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); +        } +        /* store sequence */ +_storeSequence: +        {   size_t const litLength = start - anchor; +            ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH); +            anchor = ip = start + matchLength; +        } + +        /* check immediate repcode */ +        while ( (ip <= ilimit) +             && ((offset_2>0) +             & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { +            /* store sequence */ +            matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; +            offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */ +            ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH); +            ip += matchLength; +            anchor = ip; +            continue;   /* faster when present ... (?) */ +    }   } + +    /* Save reps for next block */ +    seqStorePtr->repToConfirm[0] = offset_1 ? offset_1 : savedOffset; +    seqStorePtr->repToConfirm[1] = offset_2 ? offset_2 : savedOffset; + +    /* Return the last literals size */ +    return iend - anchor; +} + + +size_t ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ +    return ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 1, 2); +} + +size_t ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ +    return ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 2); +} + +size_t ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ +    return ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 1); +} + +size_t ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ +    return ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 0); +} + + +FORCE_INLINE_TEMPLATE +size_t ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, +                                     const void* src, size_t srcSize, +                                     const U32 searchMethod, const U32 depth) +{ +    seqStore_t* seqStorePtr = &(ctx->seqStore); +    const BYTE* const istart = (const BYTE*)src; +    const BYTE* ip = istart; +    const BYTE* anchor = istart; +    const BYTE* const iend = istart + srcSize; +    const BYTE* const ilimit = iend - 8; +    const BYTE* const base = ctx->base; +    const U32 dictLimit = ctx->dictLimit; +    const U32 lowestIndex = ctx->lowLimit; +    const BYTE* const prefixStart = base + dictLimit; +    const BYTE* const dictBase = ctx->dictBase; +    const BYTE* const dictEnd  = dictBase + dictLimit; +    const BYTE* const dictStart  = dictBase + ctx->lowLimit; + +    const U32 maxSearches = 1 << ctx->appliedParams.cParams.searchLog; +    const U32 mls = ctx->appliedParams.cParams.searchLength; + +    typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit, +                        size_t* offsetPtr, +                        U32 maxNbAttempts, U32 matchLengthSearch); +    searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS; + +    U32 offset_1 = seqStorePtr->rep[0], offset_2 = seqStorePtr->rep[1]; + +    /* init */ +    ctx->nextToUpdate3 = ctx->nextToUpdate; +    ip += (ip == prefixStart); + +    /* Match Loop */ +    while (ip < ilimit) { +        size_t matchLength=0; +        size_t offset=0; +        const BYTE* start=ip+1; +        U32 current = (U32)(ip-base); + +        /* check repCode */ +        {   const U32 repIndex = (U32)(current+1 - offset_1); +            const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; +            const BYTE* const repMatch = repBase + repIndex; +            if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex))   /* intentional overflow */ +            if (MEM_read32(ip+1) == MEM_read32(repMatch)) { +                /* repcode detected we should take it */ +                const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; +                matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4; +                if (depth==0) goto _storeSequence; +        }   } + +        /* first search (depth 0) */ +        {   size_t offsetFound = 99999999; +            size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); +            if (ml2 > matchLength) +                matchLength = ml2, start = ip, offset=offsetFound; +        } + +         if (matchLength < 4) { +            ip += ((ip-anchor) >> g_searchStrength) + 1;   /* jump faster over incompressible sections */ +            continue; +        } + +        /* let's try to find a better solution */ +        if (depth>=1) +        while (ip<ilimit) { +            ip ++; +            current++; +            /* check repCode */ +            if (offset) { +                const U32 repIndex = (U32)(current - offset_1); +                const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; +                const BYTE* const repMatch = repBase + repIndex; +                if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex))  /* intentional overflow */ +                if (MEM_read32(ip) == MEM_read32(repMatch)) { +                    /* repcode detected */ +                    const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; +                    size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; +                    int const gain2 = (int)(repLength * 3); +                    int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); +                    if ((repLength >= 4) && (gain2 > gain1)) +                        matchLength = repLength, offset = 0, start = ip; +            }   } + +            /* search match, depth 1 */ +            {   size_t offset2=99999999; +                size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); +                int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */ +                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); +                if ((ml2 >= 4) && (gain2 > gain1)) { +                    matchLength = ml2, offset = offset2, start = ip; +                    continue;   /* search a better one */ +            }   } + +            /* let's find an even better one */ +            if ((depth==2) && (ip<ilimit)) { +                ip ++; +                current++; +                /* check repCode */ +                if (offset) { +                    const U32 repIndex = (U32)(current - offset_1); +                    const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; +                    const BYTE* const repMatch = repBase + repIndex; +                    if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex))  /* intentional overflow */ +                    if (MEM_read32(ip) == MEM_read32(repMatch)) { +                        /* repcode detected */ +                        const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; +                        size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; +                        int const gain2 = (int)(repLength * 4); +                        int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); +                        if ((repLength >= 4) && (gain2 > gain1)) +                            matchLength = repLength, offset = 0, start = ip; +                }   } + +                /* search match, depth 2 */ +                {   size_t offset2=99999999; +                    size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); +                    int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */ +                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); +                    if ((ml2 >= 4) && (gain2 > gain1)) { +                        matchLength = ml2, offset = offset2, start = ip; +                        continue; +            }   }   } +            break;  /* nothing found : store previous solution */ +        } + +        /* catch up */ +        if (offset) { +            U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); +            const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; +            const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; +            while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */ +            offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); +        } + +        /* store sequence */ +_storeSequence: +        {   size_t const litLength = start - anchor; +            ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH); +            anchor = ip = start + matchLength; +        } + +        /* check immediate repcode */ +        while (ip <= ilimit) { +            const U32 repIndex = (U32)((ip-base) - offset_2); +            const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; +            const BYTE* const repMatch = repBase + repIndex; +            if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex))  /* intentional overflow */ +            if (MEM_read32(ip) == MEM_read32(repMatch)) { +                /* repcode detected we should take it */ +                const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; +                matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; +                offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset history */ +                ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH); +                ip += matchLength; +                anchor = ip; +                continue;   /* faster when present ... (?) */ +            } +            break; +    }   } + +    /* Save reps for next block */ +    seqStorePtr->repToConfirm[0] = offset_1; seqStorePtr->repToConfirm[1] = offset_2; + +    /* Return the last literals size */ +    return iend - anchor; +} + + +size_t ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ +    return ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 0); +} + +size_t ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ +    return ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 1); +} + +size_t ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ +    return ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 2); +} + +size_t ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ +    return ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 1, 2); +}  | 
