1/*
2 * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11#include "zstd_compress_internal.h"
12#include "hist.h"
13#include "zstd_opt.h"
14
15
16#define ZSTD_LITFREQ_ADD    2   /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
17#define ZSTD_MAX_PRICE     (1<<30)
18
19#define ZSTD_PREDEF_THRESHOLD 1024   /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
20
21
22/*-*************************************
23*  Price functions for optimal parser
24***************************************/
25
26#if 0    /* approximation at bit level (for tests) */
27#  define BITCOST_ACCURACY 0
28#  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
29#  define WEIGHT(stat, opt) ((void)opt, ZSTD_bitWeight(stat))
30#elif 0  /* fractional bit accuracy (for tests) */
31#  define BITCOST_ACCURACY 8
32#  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
33#  define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
34#else    /* opt==approx, ultra==accurate */
35#  define BITCOST_ACCURACY 8
36#  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
37#  define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
38#endif
39
40MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
41{
42    return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
43}
44
45MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
46{
47    U32 const stat = rawStat + 1;
48    U32 const hb = ZSTD_highbit32(stat);
49    U32 const BWeight = hb * BITCOST_MULTIPLIER;
50    U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
51    U32 const weight = BWeight + FWeight;
52    assert(hb + BITCOST_ACCURACY < 31);
53    return weight;
54}
55
56#if (DEBUGLEVEL>=2)
57/* debugging function,
58 * @return price in bytes as fractional value
59 * for debug messages only */
60MEM_STATIC double ZSTD_fCost(U32 price)
61{
62    return (double)price / (BITCOST_MULTIPLIER*8);
63}
64#endif
65
66static int ZSTD_compressedLiterals(optState_t const* const optPtr)
67{
68    return optPtr->literalCompressionMode != ZSTD_ps_disable;
69}
70
71static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
72{
73    if (ZSTD_compressedLiterals(optPtr))
74        optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
75    optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
76    optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
77    optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
78}
79
80
81static U32 sum_u32(const unsigned table[], size_t nbElts)
82{
83    size_t n;
84    U32 total = 0;
85    for (n=0; n<nbElts; n++) {
86        total += table[n];
87    }
88    return total;
89}
90
91static U32 ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift)
92{
93    U32 s, sum=0;
94    DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", (unsigned)lastEltIndex+1, (unsigned)shift);
95    assert(shift < 30);
96    for (s=0; s<lastEltIndex+1; s++) {
97        table[s] = 1 + (table[s] >> shift);
98        sum += table[s];
99    }
100    return sum;
101}
102
103/* ZSTD_scaleStats() :
104 * reduce all elements in table is sum too large
105 * return the resulting sum of elements */
106static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
107{
108    U32 const prevsum = sum_u32(table, lastEltIndex+1);
109    U32 const factor = prevsum >> logTarget;
110    DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
111    assert(logTarget < 30);
112    if (factor <= 1) return prevsum;
113    return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor));
114}
115
116/* ZSTD_rescaleFreqs() :
117 * if first block (detected by optPtr->litLengthSum == 0) : init statistics
118 *    take hints from dictionary if there is one
119 *    and init from zero if there is none,
120 *    using src for literals stats, and baseline stats for sequence symbols
121 * otherwise downscale existing stats, to be used as seed for next block.
122 */
123static void
124ZSTD_rescaleFreqs(optState_t* const optPtr,
125            const BYTE* const src, size_t const srcSize,
126                  int const optLevel)
127{
128    int const compressedLiterals = ZSTD_compressedLiterals(optPtr);
129    DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
130    optPtr->priceType = zop_dynamic;
131
132    if (optPtr->litLengthSum == 0) {  /* first block : init */
133        if (srcSize <= ZSTD_PREDEF_THRESHOLD) {  /* heuristic */
134            DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef");
135            optPtr->priceType = zop_predef;
136        }
137
138        assert(optPtr->symbolCosts != NULL);
139        if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
140            /* huffman table presumed generated by dictionary */
141            optPtr->priceType = zop_dynamic;
142
143            if (compressedLiterals) {
144                unsigned lit;
145                assert(optPtr->litFreq != NULL);
146                optPtr->litSum = 0;
147                for (lit=0; lit<=MaxLit; lit++) {
148                    U32 const scaleLog = 11;   /* scale to 2K */
149                    U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
150                    assert(bitCost <= scaleLog);
151                    optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
152                    optPtr->litSum += optPtr->litFreq[lit];
153            }   }
154
155            {   unsigned ll;
156                FSE_CState_t llstate;
157                FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable);
158                optPtr->litLengthSum = 0;
159                for (ll=0; ll<=MaxLL; ll++) {
160                    U32 const scaleLog = 10;   /* scale to 1K */
161                    U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll);
162                    assert(bitCost < scaleLog);
163                    optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
164                    optPtr->litLengthSum += optPtr->litLengthFreq[ll];
165            }   }
166
167            {   unsigned ml;
168                FSE_CState_t mlstate;
169                FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
170                optPtr->matchLengthSum = 0;
171                for (ml=0; ml<=MaxML; ml++) {
172                    U32 const scaleLog = 10;
173                    U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml);
174                    assert(bitCost < scaleLog);
175                    optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
176                    optPtr->matchLengthSum += optPtr->matchLengthFreq[ml];
177            }   }
178
179            {   unsigned of;
180                FSE_CState_t ofstate;
181                FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable);
182                optPtr->offCodeSum = 0;
183                for (of=0; of<=MaxOff; of++) {
184                    U32 const scaleLog = 10;
185                    U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of);
186                    assert(bitCost < scaleLog);
187                    optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
188                    optPtr->offCodeSum += optPtr->offCodeFreq[of];
189            }   }
190
191        } else {  /* not a dictionary */
192
193            assert(optPtr->litFreq != NULL);
194            if (compressedLiterals) {
195                unsigned lit = MaxLit;
196                HIST_count_simple(optPtr->litFreq, &lit, src, srcSize);   /* use raw first block to init statistics */
197                optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8);
198            }
199
200            {   unsigned const baseLLfreqs[MaxLL+1] = {
201                    4, 2, 1, 1, 1, 1, 1, 1,
202                    1, 1, 1, 1, 1, 1, 1, 1,
203                    1, 1, 1, 1, 1, 1, 1, 1,
204                    1, 1, 1, 1, 1, 1, 1, 1,
205                    1, 1, 1, 1
206                };
207                ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs));
208                optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
209            }
210
211            {   unsigned ml;
212                for (ml=0; ml<=MaxML; ml++)
213                    optPtr->matchLengthFreq[ml] = 1;
214            }
215            optPtr->matchLengthSum = MaxML+1;
216
217            {   unsigned const baseOFCfreqs[MaxOff+1] = {
218                    6, 2, 1, 1, 2, 3, 4, 4,
219                    4, 3, 2, 1, 1, 1, 1, 1,
220                    1, 1, 1, 1, 1, 1, 1, 1,
221                    1, 1, 1, 1, 1, 1, 1, 1
222                };
223                ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs));
224                optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
225            }
226
227
228        }
229
230    } else {   /* new block : re-use previous statistics, scaled down */
231
232        if (compressedLiterals)
233            optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
234        optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11);
235        optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11);
236        optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11);
237    }
238
239    ZSTD_setBasePrices(optPtr, optLevel);
240}
241
242/* ZSTD_rawLiteralsCost() :
243 * price of literals (only) in specified segment (which length can be 0).
244 * does not include price of literalLength symbol */
245static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
246                                const optState_t* const optPtr,
247                                int optLevel)
248{
249    if (litLength == 0) return 0;
250
251    if (!ZSTD_compressedLiterals(optPtr))
252        return (litLength << 3) * BITCOST_MULTIPLIER;  /* Uncompressed - 8 bytes per literal. */
253
254    if (optPtr->priceType == zop_predef)
255        return (litLength*6) * BITCOST_MULTIPLIER;  /* 6 bit per literal - no statistic used */
256
257    /* dynamic statistics */
258    {   U32 price = litLength * optPtr->litSumBasePrice;
259        U32 u;
260        for (u=0; u < litLength; u++) {
261            assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice);   /* literal cost should never be negative */
262            price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel);
263        }
264        return price;
265    }
266}
267
268/* ZSTD_litLengthPrice() :
269 * cost of literalLength symbol */
270static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
271{
272    assert(litLength <= ZSTD_BLOCKSIZE_MAX);
273    if (optPtr->priceType == zop_predef)
274        return WEIGHT(litLength, optLevel);
275    /* We can't compute the litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
276     * because it isn't representable in the zstd format. So instead just
277     * call it 1 bit more than ZSTD_BLOCKSIZE_MAX - 1. In this case the block
278     * would be all literals.
279     */
280    if (litLength == ZSTD_BLOCKSIZE_MAX)
281        return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
282
283    /* dynamic statistics */
284    {   U32 const llCode = ZSTD_LLcode(litLength);
285        return (LL_bits[llCode] * BITCOST_MULTIPLIER)
286             + optPtr->litLengthSumBasePrice
287             - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
288    }
289}
290
291/* ZSTD_getMatchPrice() :
292 * Provides the cost of the match part (offset + matchLength) of a sequence
293 * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
294 * @offcode : expects a scale where 0,1,2 are repcodes 1-3, and 3+ are real_offsets+2
295 * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
296 */
297FORCE_INLINE_TEMPLATE U32
298ZSTD_getMatchPrice(U32 const offcode,
299                   U32 const matchLength,
300             const optState_t* const optPtr,
301                   int const optLevel)
302{
303    U32 price;
304    U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offcode));
305    U32 const mlBase = matchLength - MINMATCH;
306    assert(matchLength >= MINMATCH);
307
308    if (optPtr->priceType == zop_predef)  /* fixed scheme, do not use statistics */
309        return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER);
310
311    /* dynamic statistics */
312    price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
313    if ((optLevel<2) /*static*/ && offCode >= 20)
314        price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */
315
316    /* match Length */
317    {   U32 const mlCode = ZSTD_MLcode(mlBase);
318        price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel));
319    }
320
321    price += BITCOST_MULTIPLIER / 5;   /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */
322
323    DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
324    return price;
325}
326
327/* ZSTD_updateStats() :
328 * assumption : literals + litLengtn <= iend */
329static void ZSTD_updateStats(optState_t* const optPtr,
330                             U32 litLength, const BYTE* literals,
331                             U32 offsetCode, U32 matchLength)
332{
333    /* literals */
334    if (ZSTD_compressedLiterals(optPtr)) {
335        U32 u;
336        for (u=0; u < litLength; u++)
337            optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
338        optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
339    }
340
341    /* literal Length */
342    {   U32 const llCode = ZSTD_LLcode(litLength);
343        optPtr->litLengthFreq[llCode]++;
344        optPtr->litLengthSum++;
345    }
346
347    /* offset code : expected to follow storeSeq() numeric representation */
348    {   U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offsetCode));
349        assert(offCode <= MaxOff);
350        optPtr->offCodeFreq[offCode]++;
351        optPtr->offCodeSum++;
352    }
353
354    /* match Length */
355    {   U32 const mlBase = matchLength - MINMATCH;
356        U32 const mlCode = ZSTD_MLcode(mlBase);
357        optPtr->matchLengthFreq[mlCode]++;
358        optPtr->matchLengthSum++;
359    }
360}
361
362
363/* ZSTD_readMINMATCH() :
364 * function safe only for comparisons
365 * assumption : memPtr must be at least 4 bytes before end of buffer */
366MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
367{
368    switch (length)
369    {
370    default :
371    case 4 : return MEM_read32(memPtr);
372    case 3 : if (MEM_isLittleEndian())
373                return MEM_read32(memPtr)<<8;
374             else
375                return MEM_read32(memPtr)>>8;
376    }
377}
378
379
380/* Update hashTable3 up to ip (excluded)
381   Assumption : always within prefix (i.e. not within extDict) */
382static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
383                                              U32* nextToUpdate3,
384                                              const BYTE* const ip)
385{
386    U32* const hashTable3 = ms->hashTable3;
387    U32 const hashLog3 = ms->hashLog3;
388    const BYTE* const base = ms->window.base;
389    U32 idx = *nextToUpdate3;
390    U32 const target = (U32)(ip - base);
391    size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
392    assert(hashLog3 > 0);
393
394    while(idx < target) {
395        hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
396        idx++;
397    }
398
399    *nextToUpdate3 = target;
400    return hashTable3[hash3];
401}
402
403
404/*-*************************************
405*  Binary Tree search
406***************************************/
407/** ZSTD_insertBt1() : add one or multiple positions to tree.
408 * @param ip assumed <= iend-8 .
409 * @param target The target of ZSTD_updateTree_internal() - we are filling to this position
410 * @return : nb of positions added */
411static U32 ZSTD_insertBt1(
412                const ZSTD_matchState_t* ms,
413                const BYTE* const ip, const BYTE* const iend,
414                U32 const target,
415                U32 const mls, const int extDict)
416{
417    const ZSTD_compressionParameters* const cParams = &ms->cParams;
418    U32*   const hashTable = ms->hashTable;
419    U32    const hashLog = cParams->hashLog;
420    size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
421    U32*   const bt = ms->chainTable;
422    U32    const btLog  = cParams->chainLog - 1;
423    U32    const btMask = (1 << btLog) - 1;
424    U32 matchIndex = hashTable[h];
425    size_t commonLengthSmaller=0, commonLengthLarger=0;
426    const BYTE* const base = ms->window.base;
427    const BYTE* const dictBase = ms->window.dictBase;
428    const U32 dictLimit = ms->window.dictLimit;
429    const BYTE* const dictEnd = dictBase + dictLimit;
430    const BYTE* const prefixStart = base + dictLimit;
431    const BYTE* match;
432    const U32 curr = (U32)(ip-base);
433    const U32 btLow = btMask >= curr ? 0 : curr - btMask;
434    U32* smallerPtr = bt + 2*(curr&btMask);
435    U32* largerPtr  = smallerPtr + 1;
436    U32 dummy32;   /* to be nullified at the end */
437    /* windowLow is based on target because
438     * we only need positions that will be in the window at the end of the tree update.
439     */
440    U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
441    U32 matchEndIdx = curr+8+1;
442    size_t bestLength = 8;
443    U32 nbCompares = 1U << cParams->searchLog;
444#ifdef ZSTD_C_PREDICT
445    U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0);
446    U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1);
447    predictedSmall += (predictedSmall>0);
448    predictedLarge += (predictedLarge>0);
449#endif /* ZSTD_C_PREDICT */
450
451    DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
452
453    assert(curr <= target);
454    assert(ip <= iend-8);   /* required for h calculation */
455    hashTable[h] = curr;   /* Update Hash Table */
456
457    assert(windowLow > 0);
458    for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
459        U32* const nextPtr = bt + 2*(matchIndex & btMask);
460        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
461        assert(matchIndex < curr);
462
463#ifdef ZSTD_C_PREDICT   /* note : can create issues when hlog small <= 11 */
464        const U32* predictPtr = bt + 2*((matchIndex-1) & btMask);   /* written this way, as bt is a roll buffer */
465        if (matchIndex == predictedSmall) {
466            /* no need to check length, result known */
467            *smallerPtr = matchIndex;
468            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
469            smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
470            matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
471            predictedSmall = predictPtr[1] + (predictPtr[1]>0);
472            continue;
473        }
474        if (matchIndex == predictedLarge) {
475            *largerPtr = matchIndex;
476            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
477            largerPtr = nextPtr;
478            matchIndex = nextPtr[0];
479            predictedLarge = predictPtr[0] + (predictPtr[0]>0);
480            continue;
481        }
482#endif
483
484        if (!extDict || (matchIndex+matchLength >= dictLimit)) {
485            assert(matchIndex+matchLength >= dictLimit);   /* might be wrong if actually extDict */
486            match = base + matchIndex;
487            matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
488        } else {
489            match = dictBase + matchIndex;
490            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
491            if (matchIndex+matchLength >= dictLimit)
492                match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
493        }
494
495        if (matchLength > bestLength) {
496            bestLength = matchLength;
497            if (matchLength > matchEndIdx - matchIndex)
498                matchEndIdx = matchIndex + (U32)matchLength;
499        }
500
501        if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
502            break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
503        }
504
505        if (match[matchLength] < ip[matchLength]) {  /* necessarily within buffer */
506            /* match is smaller than current */
507            *smallerPtr = matchIndex;             /* update smaller idx */
508            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
509            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
510            smallerPtr = nextPtr+1;               /* new "candidate" => larger than match, which was smaller than target */
511            matchIndex = nextPtr[1];              /* new matchIndex, larger than previous and closer to current */
512        } else {
513            /* match is larger than current */
514            *largerPtr = matchIndex;
515            commonLengthLarger = matchLength;
516            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
517            largerPtr = nextPtr;
518            matchIndex = nextPtr[0];
519    }   }
520
521    *smallerPtr = *largerPtr = 0;
522    {   U32 positions = 0;
523        if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384));   /* speed optimization */
524        assert(matchEndIdx > curr + 8);
525        return MAX(positions, matchEndIdx - (curr + 8));
526    }
527}
528
529FORCE_INLINE_TEMPLATE
530void ZSTD_updateTree_internal(
531                ZSTD_matchState_t* ms,
532                const BYTE* const ip, const BYTE* const iend,
533                const U32 mls, const ZSTD_dictMode_e dictMode)
534{
535    const BYTE* const base = ms->window.base;
536    U32 const target = (U32)(ip - base);
537    U32 idx = ms->nextToUpdate;
538    DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u  (dictMode:%u)",
539                idx, target, dictMode);
540
541    while(idx < target) {
542        U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
543        assert(idx < (U32)(idx + forward));
544        idx += forward;
545    }
546    assert((size_t)(ip - base) <= (size_t)(U32)(-1));
547    assert((size_t)(iend - base) <= (size_t)(U32)(-1));
548    ms->nextToUpdate = target;
549}
550
551void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
552    ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
553}
554
555FORCE_INLINE_TEMPLATE
556U32 ZSTD_insertBtAndGetAllMatches (
557                    ZSTD_match_t* matches,   /* store result (found matches) in this table (presumed large enough) */
558                    ZSTD_matchState_t* ms,
559                    U32* nextToUpdate3,
560                    const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
561                    const U32 rep[ZSTD_REP_NUM],
562                    U32 const ll0,   /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
563                    const U32 lengthToBeat,
564                    U32 const mls /* template */)
565{
566    const ZSTD_compressionParameters* const cParams = &ms->cParams;
567    U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
568    const BYTE* const base = ms->window.base;
569    U32 const curr = (U32)(ip-base);
570    U32 const hashLog = cParams->hashLog;
571    U32 const minMatch = (mls==3) ? 3 : 4;
572    U32* const hashTable = ms->hashTable;
573    size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
574    U32 matchIndex  = hashTable[h];
575    U32* const bt   = ms->chainTable;
576    U32 const btLog = cParams->chainLog - 1;
577    U32 const btMask= (1U << btLog) - 1;
578    size_t commonLengthSmaller=0, commonLengthLarger=0;
579    const BYTE* const dictBase = ms->window.dictBase;
580    U32 const dictLimit = ms->window.dictLimit;
581    const BYTE* const dictEnd = dictBase + dictLimit;
582    const BYTE* const prefixStart = base + dictLimit;
583    U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
584    U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
585    U32 const matchLow = windowLow ? windowLow : 1;
586    U32* smallerPtr = bt + 2*(curr&btMask);
587    U32* largerPtr  = bt + 2*(curr&btMask) + 1;
588    U32 matchEndIdx = curr+8+1;   /* farthest referenced position of any match => detects repetitive patterns */
589    U32 dummy32;   /* to be nullified at the end */
590    U32 mnum = 0;
591    U32 nbCompares = 1U << cParams->searchLog;
592
593    const ZSTD_matchState_t* dms    = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
594    const ZSTD_compressionParameters* const dmsCParams =
595                                      dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL;
596    const BYTE* const dmsBase       = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL;
597    const BYTE* const dmsEnd        = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL;
598    U32         const dmsHighLimit  = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0;
599    U32         const dmsLowLimit   = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0;
600    U32         const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0;
601    U32         const dmsHashLog    = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog;
602    U32         const dmsBtLog      = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog;
603    U32         const dmsBtMask     = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0;
604    U32         const dmsBtLow      = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
605
606    size_t bestLength = lengthToBeat-1;
607    DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr);
608
609    /* check repCode */
610    assert(ll0 <= 1);   /* necessarily 1 or 0 */
611    {   U32 const lastR = ZSTD_REP_NUM + ll0;
612        U32 repCode;
613        for (repCode = ll0; repCode < lastR; repCode++) {
614            U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
615            U32 const repIndex = curr - repOffset;
616            U32 repLen = 0;
617            assert(curr >= dictLimit);
618            if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) {  /* equivalent to `curr > repIndex >= dictLimit` */
619                /* We must validate the repcode offset because when we're using a dictionary the
620                 * valid offset range shrinks when the dictionary goes out of bounds.
621                 */
622                if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
623                    repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
624                }
625            } else {  /* repIndex < dictLimit || repIndex >= curr */
626                const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
627                                             dmsBase + repIndex - dmsIndexDelta :
628                                             dictBase + repIndex;
629                assert(curr >= windowLow);
630                if ( dictMode == ZSTD_extDict
631                  && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow)  /* equivalent to `curr > repIndex >= windowLow` */
632                     & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
633                  && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
634                    repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
635                }
636                if (dictMode == ZSTD_dictMatchState
637                  && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta))  /* equivalent to `curr > repIndex >= dmsLowLimit` */
638                     & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
639                  && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
640                    repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
641            }   }
642            /* save longer solution */
643            if (repLen > bestLength) {
644                DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
645                            repCode, ll0, repOffset, repLen);
646                bestLength = repLen;
647                matches[mnum].off = STORE_REPCODE(repCode - ll0 + 1);  /* expect value between 1 and 3 */
648                matches[mnum].len = (U32)repLen;
649                mnum++;
650                if ( (repLen > sufficient_len)
651                   | (ip+repLen == iLimit) ) {  /* best possible */
652                    return mnum;
653    }   }   }   }
654
655    /* HC3 match finder */
656    if ((mls == 3) /*static*/ && (bestLength < mls)) {
657        U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
658        if ((matchIndex3 >= matchLow)
659          & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
660            size_t mlen;
661            if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
662                const BYTE* const match = base + matchIndex3;
663                mlen = ZSTD_count(ip, match, iLimit);
664            } else {
665                const BYTE* const match = dictBase + matchIndex3;
666                mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
667            }
668
669            /* save best solution */
670            if (mlen >= mls /* == 3 > bestLength */) {
671                DEBUGLOG(8, "found small match with hlog3, of length %u",
672                            (U32)mlen);
673                bestLength = mlen;
674                assert(curr > matchIndex3);
675                assert(mnum==0);  /* no prior solution */
676                matches[0].off = STORE_OFFSET(curr - matchIndex3);
677                matches[0].len = (U32)mlen;
678                mnum = 1;
679                if ( (mlen > sufficient_len) |
680                     (ip+mlen == iLimit) ) {  /* best possible length */
681                    ms->nextToUpdate = curr+1;  /* skip insertion */
682                    return 1;
683        }   }   }
684        /* no dictMatchState lookup: dicts don't have a populated HC3 table */
685    }  /* if (mls == 3) */
686
687    hashTable[h] = curr;   /* Update Hash Table */
688
689    for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
690        U32* const nextPtr = bt + 2*(matchIndex & btMask);
691        const BYTE* match;
692        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
693        assert(curr > matchIndex);
694
695        if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
696            assert(matchIndex+matchLength >= dictLimit);  /* ensure the condition is correct when !extDict */
697            match = base + matchIndex;
698            if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
699            matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
700        } else {
701            match = dictBase + matchIndex;
702            assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
703            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
704            if (matchIndex+matchLength >= dictLimit)
705                match = base + matchIndex;   /* prepare for match[matchLength] read */
706        }
707
708        if (matchLength > bestLength) {
709            DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
710                    (U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex));
711            assert(matchEndIdx > matchIndex);
712            if (matchLength > matchEndIdx - matchIndex)
713                matchEndIdx = matchIndex + (U32)matchLength;
714            bestLength = matchLength;
715            matches[mnum].off = STORE_OFFSET(curr - matchIndex);
716            matches[mnum].len = (U32)matchLength;
717            mnum++;
718            if ( (matchLength > ZSTD_OPT_NUM)
719               | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
720                if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
721                break; /* drop, to preserve bt consistency (miss a little bit of compression) */
722        }   }
723
724        if (match[matchLength] < ip[matchLength]) {
725            /* match smaller than current */
726            *smallerPtr = matchIndex;             /* update smaller idx */
727            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
728            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
729            smallerPtr = nextPtr+1;               /* new candidate => larger than match, which was smaller than current */
730            matchIndex = nextPtr[1];              /* new matchIndex, larger than previous, closer to current */
731        } else {
732            *largerPtr = matchIndex;
733            commonLengthLarger = matchLength;
734            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
735            largerPtr = nextPtr;
736            matchIndex = nextPtr[0];
737    }   }
738
739    *smallerPtr = *largerPtr = 0;
740
741    assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
742    if (dictMode == ZSTD_dictMatchState && nbCompares) {
743        size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
744        U32 dictMatchIndex = dms->hashTable[dmsH];
745        const U32* const dmsBt = dms->chainTable;
746        commonLengthSmaller = commonLengthLarger = 0;
747        for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
748            const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
749            size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
750            const BYTE* match = dmsBase + dictMatchIndex;
751            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart);
752            if (dictMatchIndex+matchLength >= dmsHighLimit)
753                match = base + dictMatchIndex + dmsIndexDelta;   /* to prepare for next usage of match[matchLength] */
754
755            if (matchLength > bestLength) {
756                matchIndex = dictMatchIndex + dmsIndexDelta;
757                DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
758                        (U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex));
759                if (matchLength > matchEndIdx - matchIndex)
760                    matchEndIdx = matchIndex + (U32)matchLength;
761                bestLength = matchLength;
762                matches[mnum].off = STORE_OFFSET(curr - matchIndex);
763                matches[mnum].len = (U32)matchLength;
764                mnum++;
765                if ( (matchLength > ZSTD_OPT_NUM)
766                   | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
767                    break;   /* drop, to guarantee consistency (miss a little bit of compression) */
768            }   }
769
770            if (dictMatchIndex <= dmsBtLow) { break; }   /* beyond tree size, stop the search */
771            if (match[matchLength] < ip[matchLength]) {
772                commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
773                dictMatchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
774            } else {
775                /* match is larger than current */
776                commonLengthLarger = matchLength;
777                dictMatchIndex = nextPtr[0];
778    }   }   }  /* if (dictMode == ZSTD_dictMatchState) */
779
780    assert(matchEndIdx > curr+8);
781    ms->nextToUpdate = matchEndIdx - 8;  /* skip repetitive patterns */
782    return mnum;
783}
784
785typedef U32 (*ZSTD_getAllMatchesFn)(
786    ZSTD_match_t*,
787    ZSTD_matchState_t*,
788    U32*,
789    const BYTE*,
790    const BYTE*,
791    const U32 rep[ZSTD_REP_NUM],
792    U32 const ll0,
793    U32 const lengthToBeat);
794
795FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllMatches_internal(
796        ZSTD_match_t* matches,
797        ZSTD_matchState_t* ms,
798        U32* nextToUpdate3,
799        const BYTE* ip,
800        const BYTE* const iHighLimit,
801        const U32 rep[ZSTD_REP_NUM],
802        U32 const ll0,
803        U32 const lengthToBeat,
804        const ZSTD_dictMode_e dictMode,
805        const U32 mls)
806{
807    assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls);
808    DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls);
809    if (ip < ms->window.base + ms->nextToUpdate)
810        return 0;   /* skipped area */
811    ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode);
812    return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls);
813}
814
815#define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls
816
817#define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls)            \
818    static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)(      \
819            ZSTD_match_t* matches,                             \
820            ZSTD_matchState_t* ms,                             \
821            U32* nextToUpdate3,                                \
822            const BYTE* ip,                                    \
823            const BYTE* const iHighLimit,                      \
824            const U32 rep[ZSTD_REP_NUM],                       \
825            U32 const ll0,                                     \
826            U32 const lengthToBeat)                            \
827    {                                                          \
828        return ZSTD_btGetAllMatches_internal(                  \
829                matches, ms, nextToUpdate3, ip, iHighLimit,    \
830                rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
831    }
832
833#define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode)  \
834    GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3)  \
835    GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4)  \
836    GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5)  \
837    GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6)
838
839GEN_ZSTD_BT_GET_ALL_MATCHES(noDict)
840GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)
841GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
842
843#define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode)  \
844    {                                            \
845        ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \
846        ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \
847        ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \
848        ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6)  \
849    }
850
851static ZSTD_getAllMatchesFn
852ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode)
853{
854    ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
855        ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
856        ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict),
857        ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState)
858    };
859    U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6);
860    assert((U32)dictMode < 3);
861    assert(mls - 3 < 4);
862    return getAllMatchesFns[(int)dictMode][mls - 3];
863}
864
865/*************************
866*  LDM helper functions  *
867*************************/
868
869/* Struct containing info needed to make decision about ldm inclusion */
870typedef struct {
871    rawSeqStore_t seqStore;   /* External match candidates store for this block */
872    U32 startPosInBlock;      /* Start position of the current match candidate */
873    U32 endPosInBlock;        /* End position of the current match candidate */
874    U32 offset;               /* Offset of the match candidate */
875} ZSTD_optLdm_t;
876
877/* ZSTD_optLdm_skipRawSeqStoreBytes():
878 * Moves forward in @rawSeqStore by @nbBytes,
879 * which will update the fields 'pos' and 'posInSequence'.
880 */
881static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes)
882{
883    U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
884    while (currPos && rawSeqStore->pos < rawSeqStore->size) {
885        rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
886        if (currPos >= currSeq.litLength + currSeq.matchLength) {
887            currPos -= currSeq.litLength + currSeq.matchLength;
888            rawSeqStore->pos++;
889        } else {
890            rawSeqStore->posInSequence = currPos;
891            break;
892        }
893    }
894    if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
895        rawSeqStore->posInSequence = 0;
896    }
897}
898
899/* ZSTD_opt_getNextMatchAndUpdateSeqStore():
900 * Calculates the beginning and end of the next match in the current block.
901 * Updates 'pos' and 'posInSequence' of the ldmSeqStore.
902 */
903static void
904ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
905                                       U32 blockBytesRemaining)
906{
907    rawSeq currSeq;
908    U32 currBlockEndPos;
909    U32 literalsBytesRemaining;
910    U32 matchBytesRemaining;
911
912    /* Setting match end position to MAX to ensure we never use an LDM during this block */
913    if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
914        optLdm->startPosInBlock = UINT_MAX;
915        optLdm->endPosInBlock = UINT_MAX;
916        return;
917    }
918    /* Calculate appropriate bytes left in matchLength and litLength
919     * after adjusting based on ldmSeqStore->posInSequence */
920    currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
921    assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
922    currBlockEndPos = currPosInBlock + blockBytesRemaining;
923    literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ?
924            currSeq.litLength - (U32)optLdm->seqStore.posInSequence :
925            0;
926    matchBytesRemaining = (literalsBytesRemaining == 0) ?
927            currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) :
928            currSeq.matchLength;
929
930    /* If there are more literal bytes than bytes remaining in block, no ldm is possible */
931    if (literalsBytesRemaining >= blockBytesRemaining) {
932        optLdm->startPosInBlock = UINT_MAX;
933        optLdm->endPosInBlock = UINT_MAX;
934        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining);
935        return;
936    }
937
938    /* Matches may be < MINMATCH by this process. In that case, we will reject them
939       when we are deciding whether or not to add the ldm */
940    optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
941    optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
942    optLdm->offset = currSeq.offset;
943
944    if (optLdm->endPosInBlock > currBlockEndPos) {
945        /* Match ends after the block ends, we can't use the whole match */
946        optLdm->endPosInBlock = currBlockEndPos;
947        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock);
948    } else {
949        /* Consume nb of bytes equal to size of sequence left */
950        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining);
951    }
952}
953
954/* ZSTD_optLdm_maybeAddMatch():
955 * Adds a match if it's long enough,
956 * based on it's 'matchStartPosInBlock' and 'matchEndPosInBlock',
957 * into 'matches'. Maintains the correct ordering of 'matches'.
958 */
959static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
960                                      const ZSTD_optLdm_t* optLdm, U32 currPosInBlock)
961{
962    U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
963    /* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */
964    U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
965
966    /* Ensure that current block position is not outside of the match */
967    if (currPosInBlock < optLdm->startPosInBlock
968      || currPosInBlock >= optLdm->endPosInBlock
969      || candidateMatchLength < MINMATCH) {
970        return;
971    }
972
973    if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
974        U32 const candidateOffCode = STORE_OFFSET(optLdm->offset);
975        DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u",
976                 candidateOffCode, candidateMatchLength, currPosInBlock);
977        matches[*nbMatches].len = candidateMatchLength;
978        matches[*nbMatches].off = candidateOffCode;
979        (*nbMatches)++;
980    }
981}
982
983/* ZSTD_optLdm_processMatchCandidate():
984 * Wrapper function to update ldm seq store and call ldm functions as necessary.
985 */
986static void
987ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
988                                  ZSTD_match_t* matches, U32* nbMatches,
989                                  U32 currPosInBlock, U32 remainingBytes)
990{
991    if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
992        return;
993    }
994
995    if (currPosInBlock >= optLdm->endPosInBlock) {
996        if (currPosInBlock > optLdm->endPosInBlock) {
997            /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily
998             * at the end of a match from the ldm seq store, and will often be some bytes
999             * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
1000             */
1001            U32 const posOvershoot = currPosInBlock - optLdm->endPosInBlock;
1002            ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
1003        }
1004        ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
1005    }
1006    ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
1007}
1008
1009
1010/*-*******************************
1011*  Optimal parser
1012*********************************/
1013
1014static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
1015{
1016    return sol.litlen + sol.mlen;
1017}
1018
1019#if 0 /* debug */
1020
1021static void
1022listStats(const U32* table, int lastEltID)
1023{
1024    int const nbElts = lastEltID + 1;
1025    int enb;
1026    for (enb=0; enb < nbElts; enb++) {
1027        (void)table;
1028        /* RAWLOG(2, "%3i:%3i,  ", enb, table[enb]); */
1029        RAWLOG(2, "%4i,", table[enb]);
1030    }
1031    RAWLOG(2, " \n");
1032}
1033
1034#endif
1035
1036FORCE_INLINE_TEMPLATE size_t
1037ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1038                               seqStore_t* seqStore,
1039                               U32 rep[ZSTD_REP_NUM],
1040                         const void* src, size_t srcSize,
1041                         const int optLevel,
1042                         const ZSTD_dictMode_e dictMode)
1043{
1044    optState_t* const optStatePtr = &ms->opt;
1045    const BYTE* const istart = (const BYTE*)src;
1046    const BYTE* ip = istart;
1047    const BYTE* anchor = istart;
1048    const BYTE* const iend = istart + srcSize;
1049    const BYTE* const ilimit = iend - 8;
1050    const BYTE* const base = ms->window.base;
1051    const BYTE* const prefixStart = base + ms->window.dictLimit;
1052    const ZSTD_compressionParameters* const cParams = &ms->cParams;
1053
1054    ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode);
1055
1056    U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
1057    U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
1058    U32 nextToUpdate3 = ms->nextToUpdate;
1059
1060    ZSTD_optimal_t* const opt = optStatePtr->priceTable;
1061    ZSTD_match_t* const matches = optStatePtr->matchTable;
1062    ZSTD_optimal_t lastSequence;
1063    ZSTD_optLdm_t optLdm;
1064
1065    optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
1066    optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
1067    ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
1068
1069    /* init */
1070    DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
1071                (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
1072    assert(optLevel <= 2);
1073    ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
1074    ip += (ip==prefixStart);
1075
1076    /* Match Loop */
1077    while (ip < ilimit) {
1078        U32 cur, last_pos = 0;
1079
1080        /* find first match */
1081        {   U32 const litlen = (U32)(ip - anchor);
1082            U32 const ll0 = !litlen;
1083            U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
1084            ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1085                                              (U32)(ip-istart), (U32)(iend - ip));
1086            if (!nbMatches) { ip++; continue; }
1087
1088            /* initialize opt[0] */
1089            { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
1090            opt[0].mlen = 0;  /* means is_a_literal */
1091            opt[0].litlen = litlen;
1092            /* We don't need to include the actual price of the literals because
1093             * it is static for the duration of the forward pass, and is included
1094             * in every price. We include the literal length to avoid negative
1095             * prices when we subtract the previous literal length.
1096             */
1097            opt[0].price = (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
1098
1099            /* large match -> immediate encoding */
1100            {   U32 const maxML = matches[nbMatches-1].len;
1101                U32 const maxOffcode = matches[nbMatches-1].off;
1102                DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series",
1103                            nbMatches, maxML, maxOffcode, (U32)(ip-prefixStart));
1104
1105                if (maxML > sufficient_len) {
1106                    lastSequence.litlen = litlen;
1107                    lastSequence.mlen = maxML;
1108                    lastSequence.off = maxOffcode;
1109                    DEBUGLOG(6, "large match (%u>%u), immediate encoding",
1110                                maxML, sufficient_len);
1111                    cur = 0;
1112                    last_pos = ZSTD_totalLen(lastSequence);
1113                    goto _shortestPath;
1114            }   }
1115
1116            /* set prices for first matches starting position == 0 */
1117            assert(opt[0].price >= 0);
1118            {   U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
1119                U32 pos;
1120                U32 matchNb;
1121                for (pos = 1; pos < minMatch; pos++) {
1122                    opt[pos].price = ZSTD_MAX_PRICE;   /* mlen, litlen and price will be fixed during forward scanning */
1123                }
1124                for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1125                    U32 const offcode = matches[matchNb].off;
1126                    U32 const end = matches[matchNb].len;
1127                    for ( ; pos <= end ; pos++ ) {
1128                        U32 const matchPrice = ZSTD_getMatchPrice(offcode, pos, optStatePtr, optLevel);
1129                        U32 const sequencePrice = literalsPrice + matchPrice;
1130                        DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
1131                                    pos, ZSTD_fCost(sequencePrice));
1132                        opt[pos].mlen = pos;
1133                        opt[pos].off = offcode;
1134                        opt[pos].litlen = litlen;
1135                        opt[pos].price = (int)sequencePrice;
1136                }   }
1137                last_pos = pos-1;
1138            }
1139        }
1140
1141        /* check further positions */
1142        for (cur = 1; cur <= last_pos; cur++) {
1143            const BYTE* const inr = ip + cur;
1144            assert(cur < ZSTD_OPT_NUM);
1145            DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur)
1146
1147            /* Fix current position with one literal if cheaper */
1148            {   U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
1149                int const price = opt[cur-1].price
1150                                + (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
1151                                + (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
1152                                - (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
1153                assert(price < 1000000000); /* overflow check */
1154                if (price <= opt[cur].price) {
1155                    DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
1156                                inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
1157                                opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
1158                    opt[cur].mlen = 0;
1159                    opt[cur].off = 0;
1160                    opt[cur].litlen = litlen;
1161                    opt[cur].price = price;
1162                } else {
1163                    DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
1164                                inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
1165                                opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]);
1166                }
1167            }
1168
1169            /* Set the repcodes of the current position. We must do it here
1170             * because we rely on the repcodes of the 2nd to last sequence being
1171             * correct to set the next chunks repcodes during the backward
1172             * traversal.
1173             */
1174            ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
1175            assert(cur >= opt[cur].mlen);
1176            if (opt[cur].mlen != 0) {
1177                U32 const prev = cur - opt[cur].mlen;
1178                repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
1179                ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
1180            } else {
1181                ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
1182            }
1183
1184            /* last match must start at a minimum distance of 8 from oend */
1185            if (inr > ilimit) continue;
1186
1187            if (cur == last_pos) break;
1188
1189            if ( (optLevel==0) /*static_test*/
1190              && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
1191                DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1);
1192                continue;  /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
1193            }
1194
1195            assert(opt[cur].price >= 0);
1196            {   U32 const ll0 = (opt[cur].mlen != 0);
1197                U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
1198                U32 const previousPrice = (U32)opt[cur].price;
1199                U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
1200                U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
1201                U32 matchNb;
1202
1203                ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1204                                                  (U32)(inr-istart), (U32)(iend-inr));
1205
1206                if (!nbMatches) {
1207                    DEBUGLOG(7, "rPos:%u : no match found", cur);
1208                    continue;
1209                }
1210
1211                {   U32 const maxML = matches[nbMatches-1].len;
1212                    DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u",
1213                                inr-istart, cur, nbMatches, maxML);
1214
1215                    if ( (maxML > sufficient_len)
1216                      || (cur + maxML >= ZSTD_OPT_NUM) ) {
1217                        lastSequence.mlen = maxML;
1218                        lastSequence.off = matches[nbMatches-1].off;
1219                        lastSequence.litlen = litlen;
1220                        cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0;  /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */
1221                        last_pos = cur + ZSTD_totalLen(lastSequence);
1222                        if (cur > ZSTD_OPT_NUM) cur = 0;   /* underflow => first match */
1223                        goto _shortestPath;
1224                }   }
1225
1226                /* set prices using matches found at position == cur */
1227                for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1228                    U32 const offset = matches[matchNb].off;
1229                    U32 const lastML = matches[matchNb].len;
1230                    U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
1231                    U32 mlen;
1232
1233                    DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u",
1234                                matchNb, matches[matchNb].off, lastML, litlen);
1235
1236                    for (mlen = lastML; mlen >= startML; mlen--) {  /* scan downward */
1237                        U32 const pos = cur + mlen;
1238                        int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
1239
1240                        if ((pos > last_pos) || (price < opt[pos].price)) {
1241                            DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
1242                                        pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1243                            while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; }   /* fill empty positions */
1244                            opt[pos].mlen = mlen;
1245                            opt[pos].off = offset;
1246                            opt[pos].litlen = litlen;
1247                            opt[pos].price = price;
1248                        } else {
1249                            DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
1250                                        pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1251                            if (optLevel==0) break;  /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
1252                        }
1253            }   }   }
1254        }  /* for (cur = 1; cur <= last_pos; cur++) */
1255
1256        lastSequence = opt[last_pos];
1257        cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0;  /* single sequence, and it starts before `ip` */
1258        assert(cur < ZSTD_OPT_NUM);  /* control overflow*/
1259
1260_shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
1261        assert(opt[0].mlen == 0);
1262
1263        /* Set the next chunk's repcodes based on the repcodes of the beginning
1264         * of the last match, and the last sequence. This avoids us having to
1265         * update them while traversing the sequences.
1266         */
1267        if (lastSequence.mlen != 0) {
1268            repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
1269            ZSTD_memcpy(rep, &reps, sizeof(reps));
1270        } else {
1271            ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
1272        }
1273
1274        {   U32 const storeEnd = cur + 1;
1275            U32 storeStart = storeEnd;
1276            U32 seqPos = cur;
1277
1278            DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
1279                        last_pos, cur); (void)last_pos;
1280            assert(storeEnd < ZSTD_OPT_NUM);
1281            DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1282                        storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off);
1283            opt[storeEnd] = lastSequence;
1284            while (seqPos > 0) {
1285                U32 const backDist = ZSTD_totalLen(opt[seqPos]);
1286                storeStart--;
1287                DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1288                            seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off);
1289                opt[storeStart] = opt[seqPos];
1290                seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
1291            }
1292
1293            /* save sequences */
1294            DEBUGLOG(6, "sending selected sequences into seqStore")
1295            {   U32 storePos;
1296                for (storePos=storeStart; storePos <= storeEnd; storePos++) {
1297                    U32 const llen = opt[storePos].litlen;
1298                    U32 const mlen = opt[storePos].mlen;
1299                    U32 const offCode = opt[storePos].off;
1300                    U32 const advance = llen + mlen;
1301                    DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
1302                                anchor - istart, (unsigned)llen, (unsigned)mlen);
1303
1304                    if (mlen==0) {  /* only literals => must be last "sequence", actually starting a new stream of sequences */
1305                        assert(storePos == storeEnd);   /* must be last sequence */
1306                        ip = anchor + llen;     /* last "sequence" is a bunch of literals => don't progress anchor */
1307                        continue;   /* will finish */
1308                    }
1309
1310                    assert(anchor + llen <= iend);
1311                    ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
1312                    ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen);
1313                    anchor += advance;
1314                    ip = anchor;
1315            }   }
1316            ZSTD_setBasePrices(optStatePtr, optLevel);
1317        }
1318    }   /* while (ip < ilimit) */
1319
1320    /* Return the last literals size */
1321    return (size_t)(iend - anchor);
1322}
1323
1324static size_t ZSTD_compressBlock_opt0(
1325        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1326        const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1327{
1328    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
1329}
1330
1331static size_t ZSTD_compressBlock_opt2(
1332        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1333        const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1334{
1335    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
1336}
1337
1338size_t ZSTD_compressBlock_btopt(
1339        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1340        const void* src, size_t srcSize)
1341{
1342    DEBUGLOG(5, "ZSTD_compressBlock_btopt");
1343    return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1344}
1345
1346
1347
1348
1349/* ZSTD_initStats_ultra():
1350 * make a first compression pass, just to seed stats with more accurate starting values.
1351 * only works on first block, with no dictionary and no ldm.
1352 * this function cannot error, hence its contract must be respected.
1353 */
1354static void
1355ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1356                     seqStore_t* seqStore,
1357                     U32 rep[ZSTD_REP_NUM],
1358               const void* src, size_t srcSize)
1359{
1360    U32 tmpRep[ZSTD_REP_NUM];  /* updated rep codes will sink here */
1361    ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
1362
1363    DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
1364    assert(ms->opt.litLengthSum == 0);    /* first block */
1365    assert(seqStore->sequences == seqStore->sequencesStart);   /* no ldm */
1366    assert(ms->window.dictLimit == ms->window.lowLimit);   /* no dictionary */
1367    assert(ms->window.dictLimit - ms->nextToUpdate <= 1);  /* no prefix (note: intentional overflow, defined as 2-complement) */
1368
1369    ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict);   /* generate stats into ms->opt*/
1370
1371    /* invalidate first scan from history */
1372    ZSTD_resetSeqStore(seqStore);
1373    ms->window.base -= srcSize;
1374    ms->window.dictLimit += (U32)srcSize;
1375    ms->window.lowLimit = ms->window.dictLimit;
1376    ms->nextToUpdate = ms->window.dictLimit;
1377
1378}
1379
1380size_t ZSTD_compressBlock_btultra(
1381        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1382        const void* src, size_t srcSize)
1383{
1384    DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
1385    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1386}
1387
1388size_t ZSTD_compressBlock_btultra2(
1389        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1390        const void* src, size_t srcSize)
1391{
1392    U32 const curr = (U32)((const BYTE*)src - ms->window.base);
1393    DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
1394
1395    /* 2-pass strategy:
1396     * this strategy makes a first pass over first block to collect statistics
1397     * and seed next round's statistics with it.
1398     * After 1st pass, function forgets everything, and starts a new block.
1399     * Consequently, this can only work if no data has been previously loaded in tables,
1400     * aka, no dictionary, no prefix, no ldm preprocessing.
1401     * The compression ratio gain is generally small (~0.5% on first block),
1402     * the cost is 2x cpu time on first block. */
1403    assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
1404    if ( (ms->opt.litLengthSum==0)   /* first block */
1405      && (seqStore->sequences == seqStore->sequencesStart)  /* no ldm */
1406      && (ms->window.dictLimit == ms->window.lowLimit)   /* no dictionary */
1407      && (curr == ms->window.dictLimit)   /* start of frame, nothing already loaded nor skipped */
1408      && (srcSize > ZSTD_PREDEF_THRESHOLD)
1409      ) {
1410        ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
1411    }
1412
1413    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1414}
1415
1416size_t ZSTD_compressBlock_btopt_dictMatchState(
1417        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1418        const void* src, size_t srcSize)
1419{
1420    return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1421}
1422
1423size_t ZSTD_compressBlock_btultra_dictMatchState(
1424        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1425        const void* src, size_t srcSize)
1426{
1427    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1428}
1429
1430size_t ZSTD_compressBlock_btopt_extDict(
1431        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1432        const void* src, size_t srcSize)
1433{
1434    return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1435}
1436
1437size_t ZSTD_compressBlock_btultra_extDict(
1438        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1439        const void* src, size_t srcSize)
1440{
1441    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1442}
1443
1444/* note : no btultra2 variant for extDict nor dictMatchState,
1445 * because btultra2 is not meant to work with dictionaries
1446 * and is only specific for the first block (no prefix) */
1447