1/*
2   LZ4 - Fast LZ compression algorithm
3   Copyright (C) 2011-present, Yann Collet.
4
5   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
7   Redistribution and use in source and binary forms, with or without
8   modification, are permitted provided that the following conditions are
9   met:
10
11       * Redistributions of source code must retain the above copyright
12   notice, this list of conditions and the following disclaimer.
13       * Redistributions in binary form must reproduce the above
14   copyright notice, this list of conditions and the following disclaimer
15   in the documentation and/or other materials provided with the
16   distribution.
17
18   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30   You can contact the author at :
31    - LZ4 homepage : http://www.lz4.org
32    - LZ4 source repository : https://github.com/lz4/lz4
33*/
34
35/*
36 * This file contains unmodified code from lz4 1.9.3's decompressor, plus
37 * associated macros and constants.
38 *
39 * It also contains a couple of defines from the old lz4.c to make things
40 * fit together smoothly.
41 *
42 */
43
44#include <sys/zfs_context.h>
45
46int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
47    int isize, int maxOutputSize);
48
49/*
50 * Tuning parameters
51 */
52
53/*
54 * COMPRESSIONLEVEL: Increasing this value improves compression ratio
55 *	 Lowering this value reduces memory usage. Reduced memory usage
56 *	typically improves speed, due to cache effect (ex: L1 32KB for Intel,
57 *	L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
58 *	(examples : 12 -> 16KB ; 17 -> 512KB)
59 */
60#define	COMPRESSIONLEVEL 12
61
62/*
63 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
64 *	algorithm skip faster data segments considered "incompressible".
65 *	This may decrease compression ratio dramatically, but will be
66 *	faster on incompressible data. Increasing this value will make
67 *	the algorithm search more before declaring a segment "incompressible".
68 *	This could improve compression a bit, but will be slower on
69 *	incompressible data. The default value (6) is recommended.
70 */
71#define	NOTCOMPRESSIBLE_CONFIRMATION 6
72
73/*
74 * Little Endian or Big Endian?
75 * Note: overwrite the below #define if you know your architecture endianness.
76 */
77#if defined(_ZFS_BIG_ENDIAN)
78#define	LZ4_BIG_ENDIAN 1
79#else
80/*
81 * Little Endian assumed. PDP Endian and other very rare endian format
82 * are unsupported.
83 */
84#undef LZ4_BIG_ENDIAN
85#endif
86
87/*-************************************
88*  CPU Feature Detection
89**************************************/
90/* LZ4_FORCE_MEMORY_ACCESS
91 * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
92 * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
93 * The below switch allow to select different access method for improved performance.
94 * Method 0 (default) : use `memcpy()`. Safe and portable.
95 * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
96 *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
97 * Method 2 : direct access. This method is portable but violate C standard.
98 *            It can generate buggy code on targets which assembly generation depends on alignment.
99 *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
100 * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
101 * Prefer these methods in priority order (0 > 1 > 2)
102 */
103#ifndef LZ4_FORCE_MEMORY_ACCESS   /* can be defined externally */
104#  if defined(__GNUC__) && \
105  ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \
106  || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
107#    define LZ4_FORCE_MEMORY_ACCESS 2
108#  elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__)
109#    define LZ4_FORCE_MEMORY_ACCESS 1
110#  endif
111#endif
112
113/*
114 * LZ4_FORCE_SW_BITCOUNT
115 * Define this parameter if your target system or compiler does not support hardware bit count
116 */
117/*
118 * Illumos : we can't use GCC's __builtin_ctz family of builtins in the
119 * kernel
120 * Linux : we can use GCC's __builtin_ctz family of builtins in the
121 * kernel
122 */
123#undef	LZ4_FORCE_SW_BITCOUNT
124#if defined(__sunos__)
125#define	LZ4_FORCE_SW_BITCOUNT
126#endif
127
128/*
129 * Compiler Options
130 */
131/* Disable restrict */
132#define	restrict
133
134/*
135 * Linux : GCC_VERSION is defined as of 3.9-rc1, so undefine it.
136 * torvalds/linux@3f3f8d2f48acfd8ed3b8e6b7377935da57b27b16
137 */
138#ifdef GCC_VERSION
139#undef GCC_VERSION
140#endif
141
142#define	GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
143
144#ifndef LZ4_FORCE_INLINE
145#  ifdef _MSC_VER    /* Visual Studio */
146#    define LZ4_FORCE_INLINE static __forceinline
147#  else
148#    if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
149#      ifdef __GNUC__
150#        define LZ4_FORCE_INLINE static inline __attribute__((always_inline))
151#      else
152#        define LZ4_FORCE_INLINE static inline
153#      endif
154#    else
155#      define LZ4_FORCE_INLINE static
156#    endif /* __STDC_VERSION__ */
157#  endif  /* _MSC_VER */
158#endif /* LZ4_FORCE_INLINE */
159
160/* LZ4_FORCE_O2 and LZ4_FORCE_INLINE
161 * gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8,
162 * together with a simple 8-byte copy loop as a fall-back path.
163 * However, this optimization hurts the decompression speed by >30%,
164 * because the execution does not go to the optimized loop
165 * for typical compressible data, and all of the preamble checks
166 * before going to the fall-back path become useless overhead.
167 * This optimization happens only with the -O3 flag, and -O2 generates
168 * a simple 8-byte copy loop.
169 * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy8
170 * functions are annotated with __attribute__((optimize("O2"))),
171 * and also LZ4_wildCopy8 is forcibly inlined, so that the O2 attribute
172 * of LZ4_wildCopy8 does not affect the compression speed.
173 */
174#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__)
175#  define LZ4_FORCE_O2  __attribute__((optimize("O2")))
176#  undef LZ4_FORCE_INLINE
177#  define LZ4_FORCE_INLINE  static __inline __attribute__((optimize("O2"),always_inline))
178#else
179#  define LZ4_FORCE_O2
180#endif
181
182#ifndef expect
183#if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__)
184#  define expect(expr,value)    (__builtin_expect ((expr),(value)) )
185#else
186#  define expect(expr,value)    (expr)
187#endif
188#endif
189
190#ifndef likely
191#define	likely(expr)	expect((expr) != 0, 1)
192#endif
193
194#ifndef unlikely
195#define	unlikely(expr)	expect((expr) != 0, 0)
196#endif
197
198#ifndef _KERNEL
199#include <stdlib.h>   /* malloc, calloc, free */
200#include <string.h>   /* memset, memcpy */
201#endif
202#define ALLOC(s)          malloc(s)
203#define ALLOC_AND_ZERO(s) calloc(1,s)
204#define FREEMEM(p)        free(p)
205
206#define MEM_INIT(p,v,s)   memset((p),(v),(s))
207
208
209/*-************************************
210*  Common Constants
211**************************************/
212#define MINMATCH 4
213
214#define WILDCOPYLENGTH 8
215#define LASTLITERALS   5   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
216#define MFLIMIT       12   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
217#define MATCH_SAFEGUARD_DISTANCE  ((2*WILDCOPYLENGTH) - MINMATCH)   /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */
218#define FASTLOOP_SAFE_DISTANCE 64
219
220#define KB *(1 <<10)
221#define MB *(1 <<20)
222#define GB *(1U<<30)
223
224#ifndef LZ4_DISTANCE_MAX   /* history window size; can be user-defined at compile time */
225#  define LZ4_DISTANCE_MAX 65535   /* set to maximum value by default */
226#endif
227
228#define LZ4_DISTANCE_ABSOLUTE_MAX 65535
229#if (LZ4_DISTANCE_MAX > LZ4_DISTANCE_ABSOLUTE_MAX)   /* max supported by LZ4 format */
230#  error "LZ4_DISTANCE_MAX is too big : must be <= 65535"
231#endif
232
233#define ML_BITS  4
234#define ML_MASK  ((1U<<ML_BITS)-1)
235#define RUN_BITS (8-ML_BITS)
236#define RUN_MASK ((1U<<RUN_BITS)-1)
237
238#define DEBUGLOG(l, ...) {}    /* disabled */
239
240#ifndef assert
241#define assert ASSERT
242#endif
243
244/*-************************************
245*  Types
246**************************************/
247#ifndef _KERNEL
248#include <limits.h>
249#endif
250#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
251#ifndef _KERNEL
252#include <stdint.h>
253#endif
254  typedef  uint8_t BYTE;
255  typedef uint16_t U16;
256  typedef uint32_t U32;
257  typedef  int32_t S32;
258  typedef uint64_t U64;
259  typedef uintptr_t uptrval;
260#else
261# if UINT_MAX != 4294967295UL
262#   error "LZ4 code (when not C++ or C99) assumes that sizeof(int) == 4"
263# endif
264  typedef unsigned char       BYTE;
265  typedef unsigned short      U16;
266  typedef unsigned int        U32;
267  typedef   signed int        S32;
268  typedef unsigned long long  U64;
269  typedef size_t              uptrval;   /* generally true, except OpenVMS-64 */
270#endif
271
272#if defined(__x86_64__)
273  typedef U64    reg_t;   /* 64-bits in x32 mode */
274#else
275  typedef size_t reg_t;   /* 32-bits in x32 mode */
276#endif
277
278typedef enum {
279    notLimited = 0,
280    limitedOutput = 1,
281    fillOutput = 2
282} limitedOutput_directive;
283
284
285/*-************************************
286*  Reading and writing into memory
287**************************************/
288
289/**
290 * LZ4 relies on memcpy with a constant size being inlined. In freestanding
291 * environments, the compiler can't assume the implementation of memcpy() is
292 * standard compliant, so it can't apply its specialized memcpy() inlining
293 * logic. When possible, use __builtin_memcpy() to tell the compiler to analyze
294 * memcpy() as if it were standard compliant, so it can inline it in freestanding
295 * environments. This is needed when decompressing the Linux Kernel, for example.
296 */
297#if defined(__GNUC__) && (__GNUC__ >= 4)
298#define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size)
299#else
300#define LZ4_memcpy(dst, src, size) memcpy(dst, src, size)
301#endif
302
303static unsigned LZ4_isLittleEndian(void)
304{
305    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental */
306    return one.c[0];
307}
308
309
310#if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2)
311/* lie to the compiler about data alignment; use with caution */
312
313static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; }
314
315static void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
316static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
317
318#elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1)
319
320/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
321/* currently only defined for gcc and icc */
322typedef union { U16 u16; U32 u32; reg_t uArch; } __attribute__((packed)) unalign;
323
324static U16 LZ4_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
325
326static void LZ4_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
327
328#else  /* safe and portable access using memcpy() */
329
330static U16 LZ4_read16(const void* memPtr)
331{
332    U16 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
333}
334
335static void LZ4_write32(void* memPtr, U32 value)
336{
337    LZ4_memcpy(memPtr, &value, sizeof(value));
338}
339
340#endif /* LZ4_FORCE_MEMORY_ACCESS */
341
342static U16 LZ4_readLE16(const void* memPtr)
343{
344    if (LZ4_isLittleEndian()) {
345        return LZ4_read16(memPtr);
346    } else {
347        const BYTE* p = (const BYTE*)memPtr;
348        return (U16)((U16)p[0] + (p[1]<<8));
349    }
350}
351
352/* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */
353LZ4_FORCE_INLINE
354void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd)
355{
356    BYTE* d = (BYTE*)dstPtr;
357    const BYTE* s = (const BYTE*)srcPtr;
358    BYTE* const e = (BYTE*)dstEnd;
359
360    do { LZ4_memcpy(d,s,8); d+=8; s+=8; } while (d<e);
361}
362
363static const unsigned inc32table[8] = {0, 1, 2,  1,  0,  4, 4, 4};
364static const int      dec64table[8] = {0, 0, 0, -1, -4,  1, 2, 3};
365
366
367#ifndef LZ4_FAST_DEC_LOOP
368#  if defined __i386__ || defined _M_IX86 || defined __x86_64__ || defined _M_X64
369#    define LZ4_FAST_DEC_LOOP 1
370#  elif defined(__aarch64__) && !defined(__clang__)
371     /* On aarch64, we disable this optimization for clang because on certain
372      * mobile chipsets, performance is reduced with clang. For information
373      * refer to https://github.com/lz4/lz4/pull/707 */
374#    define LZ4_FAST_DEC_LOOP 1
375#  else
376#    define LZ4_FAST_DEC_LOOP 0
377#  endif
378#endif
379
380#if LZ4_FAST_DEC_LOOP
381
382LZ4_FORCE_INLINE void
383LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
384{
385    assert(srcPtr + offset == dstPtr);
386    if (offset < 8) {
387        LZ4_write32(dstPtr, 0);   /* silence an msan warning when offset==0 */
388        dstPtr[0] = srcPtr[0];
389        dstPtr[1] = srcPtr[1];
390        dstPtr[2] = srcPtr[2];
391        dstPtr[3] = srcPtr[3];
392        srcPtr += inc32table[offset];
393        LZ4_memcpy(dstPtr+4, srcPtr, 4);
394        srcPtr -= dec64table[offset];
395        dstPtr += 8;
396    } else {
397        LZ4_memcpy(dstPtr, srcPtr, 8);
398        dstPtr += 8;
399        srcPtr += 8;
400    }
401
402    LZ4_wildCopy8(dstPtr, srcPtr, dstEnd);
403}
404
405/* customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd
406 * this version copies two times 16 bytes (instead of one time 32 bytes)
407 * because it must be compatible with offsets >= 16. */
408LZ4_FORCE_INLINE void
409LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd)
410{
411    BYTE* d = (BYTE*)dstPtr;
412    const BYTE* s = (const BYTE*)srcPtr;
413    BYTE* const e = (BYTE*)dstEnd;
414
415    do { LZ4_memcpy(d,s,16); LZ4_memcpy(d+16,s+16,16); d+=32; s+=32; } while (d<e);
416}
417
418/* LZ4_memcpy_using_offset()  presumes :
419 * - dstEnd >= dstPtr + MINMATCH
420 * - there is at least 8 bytes available to write after dstEnd */
421LZ4_FORCE_INLINE void
422LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
423{
424    BYTE v[8];
425
426    assert(dstEnd >= dstPtr + MINMATCH);
427
428    switch(offset) {
429    case 1:
430        MEM_INIT(v, *srcPtr, 8);
431        break;
432    case 2:
433        LZ4_memcpy(v, srcPtr, 2);
434        LZ4_memcpy(&v[2], srcPtr, 2);
435        LZ4_memcpy(&v[4], v, 4);
436        break;
437    case 4:
438        LZ4_memcpy(v, srcPtr, 4);
439        LZ4_memcpy(&v[4], srcPtr, 4);
440        break;
441    default:
442        LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset);
443        return;
444    }
445
446    LZ4_memcpy(dstPtr, v, 8);
447    dstPtr += 8;
448    while (dstPtr < dstEnd) {
449        LZ4_memcpy(dstPtr, v, 8);
450        dstPtr += 8;
451    }
452}
453#endif
454
455
456/*-************************************
457*  Local Structures and types
458**************************************/
459typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t;
460
461/**
462 * This enum distinguishes several different modes of accessing previous
463 * content in the stream.
464 *
465 * - noDict        : There is no preceding content.
466 * - withPrefix64k : Table entries up to ctx->dictSize before the current blob
467 *                   blob being compressed are valid and refer to the preceding
468 *                   content (of length ctx->dictSize), which is available
469 *                   contiguously preceding in memory the content currently
470 *                   being compressed.
471 * - usingExtDict  : Like withPrefix64k, but the preceding content is somewhere
472 *                   else in memory, starting at ctx->dictionary with length
473 *                   ctx->dictSize.
474 * - usingDictCtx  : Like usingExtDict, but everything concerning the preceding
475 *                   content is in a separate context, pointed to by
476 *                   ctx->dictCtx. ctx->dictionary, ctx->dictSize, and table
477 *                   entries in the current context that refer to positions
478 *                   preceding the beginning of the current compression are
479 *                   ignored. Instead, ctx->dictCtx->dictionary and ctx->dictCtx
480 *                   ->dictSize describe the location and size of the preceding
481 *                   content, and matches are found by looking in the ctx
482 *                   ->dictCtx->hashTable.
483 */
484typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive;
485typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
486
487/*-*******************************
488 *  Decompression functions
489 ********************************/
490
491typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
492typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive;
493
494typedef enum { loop_error = -2, initial_error = -1, ok = 0 } variable_length_error;
495
496LZ4_FORCE_INLINE unsigned
497read_variable_length(const BYTE**ip, const BYTE* lencheck,
498                     int loop_check, int initial_check,
499                     variable_length_error* error)
500{
501    U32 length = 0;
502    U32 s;
503    if (initial_check && unlikely((*ip) >= lencheck)) {    /* overflow detection */
504        *error = initial_error;
505        return length;
506    }
507    do {
508        s = **ip;
509        (*ip)++;
510        length += s;
511        if (loop_check && unlikely((*ip) >= lencheck)) {    /* overflow detection */
512            *error = loop_error;
513            return length;
514        }
515    } while (s==255);
516
517    return length;
518}
519
520#define	LZ4_STATIC_ASSERT(c)	ASSERT(c)
521
522
523/*! LZ4_decompress_generic() :
524 *  This generic decompression function covers all use cases.
525 *  It shall be instantiated several times, using different sets of directives.
526 *  Note that it is important for performance that this function really get inlined,
527 *  in order to remove useless branches during compilation optimization.
528 */
529LZ4_FORCE_INLINE int
530LZ4_decompress_generic(
531                 const char* const src,
532                 char* const dst,
533                 int srcSize,
534                 int outputSize,         /* If endOnInput==endOnInputSize, this value is `dstCapacity` */
535
536                 endCondition_directive endOnInput,   /* endOnOutputSize, endOnInputSize */
537                 earlyEnd_directive partialDecoding,  /* full, partial */
538                 dict_directive dict,                 /* noDict, withPrefix64k, usingExtDict */
539                 const BYTE* const lowPrefix,  /* always <= dst, == dst when no prefix */
540                 const BYTE* const dictStart,  /* only if dict==usingExtDict */
541                 const size_t dictSize         /* note : = 0 if noDict */
542                 )
543{
544    if ((src == NULL) || (outputSize < 0)) { return -1; }
545
546    {   const BYTE* ip = (const BYTE*) src;
547        const BYTE* const iend = ip + srcSize;
548
549        BYTE* op = (BYTE*) dst;
550        BYTE* const oend = op + outputSize;
551        BYTE* cpy;
552
553        const BYTE* const dictEnd = (dictStart == NULL) ? NULL : dictStart + dictSize;
554
555        const int safeDecode = (endOnInput==endOnInputSize);
556        const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));
557
558
559        /* Set up the "end" pointers for the shortcut. */
560        const BYTE* const shortiend = iend - (endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/;
561        const BYTE* const shortoend = oend - (endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/;
562
563        const BYTE* match;
564        size_t offset;
565        unsigned token;
566        size_t length;
567
568
569        DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize);
570
571        /* Special cases */
572        assert(lowPrefix <= op);
573        if ((endOnInput) && (unlikely(outputSize==0))) {
574            /* Empty output buffer */
575            if (partialDecoding) return 0;
576            return ((srcSize==1) && (*ip==0)) ? 0 : -1;
577        }
578        if ((!endOnInput) && (unlikely(outputSize==0))) { return (*ip==0 ? 1 : -1); }
579        if ((endOnInput) && unlikely(srcSize==0)) { return -1; }
580
581	/* Currently the fast loop shows a regression on qualcomm arm chips. */
582#if LZ4_FAST_DEC_LOOP
583        if ((oend - op) < FASTLOOP_SAFE_DISTANCE) {
584            DEBUGLOG(6, "skip fast decode loop");
585            goto safe_decode;
586        }
587
588        /* Fast loop : decode sequences as long as output < iend-FASTLOOP_SAFE_DISTANCE */
589        while (1) {
590            /* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */
591            assert(oend - op >= FASTLOOP_SAFE_DISTANCE);
592            if (endOnInput) { assert(ip < iend); }
593            token = *ip++;
594            length = token >> ML_BITS;  /* literal length */
595
596            assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
597
598            /* decode literal length */
599            if (length == RUN_MASK) {
600                variable_length_error error = ok;
601                length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error);
602                if (error == initial_error) { goto _output_error; }
603                if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
604                if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
605
606                /* copy literals */
607                cpy = op+length;
608                LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
609                if (endOnInput) {  /* LZ4_decompress_safe() */
610                    if ((cpy>oend-32) || (ip+length>iend-32)) { goto safe_literal_copy; }
611                    LZ4_wildCopy32(op, ip, cpy);
612                } else {   /* LZ4_decompress_fast() */
613                    if (cpy>oend-8) { goto safe_literal_copy; }
614                    LZ4_wildCopy8(op, ip, cpy); /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time :
615                                                 * it doesn't know input length, and only relies on end-of-block properties */
616                }
617                ip += length; op = cpy;
618            } else {
619                cpy = op+length;
620                if (endOnInput) {  /* LZ4_decompress_safe() */
621                    DEBUGLOG(7, "copy %u bytes in a 16-bytes stripe", (unsigned)length);
622                    /* We don't need to check oend, since we check it once for each loop below */
623                    if (ip > iend-(16 + 1/*max lit + offset + nextToken*/)) { goto safe_literal_copy; }
624                    /* Literals can only be 14, but hope compilers optimize if we copy by a register size */
625                    LZ4_memcpy(op, ip, 16);
626                } else {  /* LZ4_decompress_fast() */
627                    /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time :
628                     * it doesn't know input length, and relies on end-of-block properties */
629                    LZ4_memcpy(op, ip, 8);
630                    if (length > 8) { LZ4_memcpy(op+8, ip+8, 8); }
631                }
632                ip += length; op = cpy;
633            }
634
635            /* get offset */
636            offset = LZ4_readLE16(ip); ip+=2;
637            match = op - offset;
638            assert(match <= op);
639
640            /* get matchlength */
641            length = token & ML_MASK;
642
643            if (length == ML_MASK) {
644                variable_length_error error = ok;
645                if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
646                length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error);
647                if (error != ok) { goto _output_error; }
648                if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */
649                length += MINMATCH;
650                if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
651                    goto safe_match_copy;
652                }
653            } else {
654                length += MINMATCH;
655                if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
656                    goto safe_match_copy;
657                }
658
659                /* Fastpath check: Avoids a branch in LZ4_wildCopy32 if true */
660                if ((dict == withPrefix64k) || (match >= lowPrefix)) {
661                    if (offset >= 8) {
662                        assert(match >= lowPrefix);
663                        assert(match <= op);
664                        assert(op + 18 <= oend);
665
666                        LZ4_memcpy(op, match, 8);
667                        LZ4_memcpy(op+8, match+8, 8);
668                        LZ4_memcpy(op+16, match+16, 2);
669                        op += length;
670                        continue;
671            }   }   }
672
673            if (checkOffset && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
674            /* match starting within external dictionary */
675            if ((dict==usingExtDict) && (match < lowPrefix)) {
676                if (unlikely(op+length > oend-LASTLITERALS)) {
677                    if (partialDecoding) {
678                        DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd");
679                        length = MIN(length, (size_t)(oend-op));
680                    } else {
681                        goto _output_error;  /* end-of-block condition violated */
682                }   }
683
684                if (length <= (size_t)(lowPrefix-match)) {
685                    /* match fits entirely within external dictionary : just copy */
686                    memmove(op, dictEnd - (lowPrefix-match), length);
687                    op += length;
688                } else {
689                    /* match stretches into both external dictionary and current block */
690                    size_t const copySize = (size_t)(lowPrefix - match);
691                    size_t const restSize = length - copySize;
692                    LZ4_memcpy(op, dictEnd - copySize, copySize);
693                    op += copySize;
694                    if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
695                        BYTE* const endOfMatch = op + restSize;
696                        const BYTE* copyFrom = lowPrefix;
697                        while (op < endOfMatch) { *op++ = *copyFrom++; }
698                    } else {
699                        LZ4_memcpy(op, lowPrefix, restSize);
700                        op += restSize;
701                }   }
702                continue;
703            }
704
705            /* copy match within block */
706            cpy = op + length;
707
708            assert((op <= oend) && (oend-op >= 32));
709            if (unlikely(offset<16)) {
710                LZ4_memcpy_using_offset(op, match, cpy, offset);
711            } else {
712                LZ4_wildCopy32(op, match, cpy);
713            }
714
715            op = cpy;   /* wildcopy correction */
716        }
717    safe_decode:
718#endif
719
720        /* Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE */
721        while (1) {
722            token = *ip++;
723            length = token >> ML_BITS;  /* literal length */
724
725            assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
726
727            /* A two-stage shortcut for the most common case:
728             * 1) If the literal length is 0..14, and there is enough space,
729             * enter the shortcut and copy 16 bytes on behalf of the literals
730             * (in the fast mode, only 8 bytes can be safely copied this way).
731             * 2) Further if the match length is 4..18, copy 18 bytes in a similar
732             * manner; but we ensure that there's enough space in the output for
733             * those 18 bytes earlier, upon entering the shortcut (in other words,
734             * there is a combined check for both stages).
735             */
736            if ( (endOnInput ? length != RUN_MASK : length <= 8)
737                /* strictly "less than" on input, to re-enter the loop with at least one byte */
738              && likely((endOnInput ? ip < shortiend : 1) & (op <= shortoend)) ) {
739                /* Copy the literals */
740                LZ4_memcpy(op, ip, endOnInput ? 16 : 8);
741                op += length; ip += length;
742
743                /* The second stage: prepare for match copying, decode full info.
744                 * If it doesn't work out, the info won't be wasted. */
745                length = token & ML_MASK; /* match length */
746                offset = LZ4_readLE16(ip); ip += 2;
747                match = op - offset;
748                assert(match <= op); /* check overflow */
749
750                /* Do not deal with overlapping matches. */
751                if ( (length != ML_MASK)
752                  && (offset >= 8)
753                  && (dict==withPrefix64k || match >= lowPrefix) ) {
754                    /* Copy the match. */
755                    LZ4_memcpy(op + 0, match + 0, 8);
756                    LZ4_memcpy(op + 8, match + 8, 8);
757                    LZ4_memcpy(op +16, match +16, 2);
758                    op += length + MINMATCH;
759                    /* Both stages worked, load the next token. */
760                    continue;
761                }
762
763                /* The second stage didn't work out, but the info is ready.
764                 * Propel it right to the point of match copying. */
765                goto _copy_match;
766            }
767
768            /* decode literal length */
769            if (length == RUN_MASK) {
770                variable_length_error error = ok;
771                length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error);
772                if (error == initial_error) { goto _output_error; }
773                if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
774                if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
775            }
776
777            /* copy literals */
778            cpy = op+length;
779#if LZ4_FAST_DEC_LOOP
780        safe_literal_copy:
781#endif
782            LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
783            if ( ((endOnInput) && ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) )
784              || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) )
785            {
786                /* We've either hit the input parsing restriction or the output parsing restriction.
787                 * In the normal scenario, decoding a full block, it must be the last sequence,
788                 * otherwise it's an error (invalid input or dimensions).
789                 * In partialDecoding scenario, it's necessary to ensure there is no buffer overflow.
790                 */
791                if (partialDecoding) {
792                    /* Since we are partial decoding we may be in this block because of the output parsing
793                     * restriction, which is not valid since the output buffer is allowed to be undersized.
794                     */
795                    assert(endOnInput);
796                    DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end")
797                    DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length);
798                    DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op));
799                    DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip));
800                    /* Finishing in the middle of a literals segment,
801                     * due to lack of input.
802                     */
803                    if (ip+length > iend) {
804                        length = (size_t)(iend-ip);
805                        cpy = op + length;
806                    }
807                    /* Finishing in the middle of a literals segment,
808                     * due to lack of output space.
809                     */
810                    if (cpy > oend) {
811                        cpy = oend;
812                        assert(op<=oend);
813                        length = (size_t)(oend-op);
814                    }
815                } else {
816                    /* We must be on the last sequence because of the parsing limitations so check
817                     * that we exactly regenerate the original size (must be exact when !endOnInput).
818                     */
819                    if ((!endOnInput) && (cpy != oend)) { goto _output_error; }
820                     /* We must be on the last sequence (or invalid) because of the parsing limitations
821                      * so check that we exactly consume the input and don't overrun the output buffer.
822                      */
823                    if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) {
824                        DEBUGLOG(6, "should have been last run of literals")
825                        DEBUGLOG(6, "ip(%p) + length(%i) = %p != iend (%p)", ip, (int)length, ip+length, iend);
826                        DEBUGLOG(6, "or cpy(%p) > oend(%p)", cpy, oend);
827                        goto _output_error;
828                    }
829                }
830                memmove(op, ip, length);  /* supports overlapping memory regions; only matters for in-place decompression scenarios */
831                ip += length;
832                op += length;
833                /* Necessarily EOF when !partialDecoding.
834                 * When partialDecoding, it is EOF if we've either
835                 * filled the output buffer or
836                 * can't proceed with reading an offset for following match.
837                 */
838                if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) {
839                    break;
840                }
841            } else {
842                LZ4_wildCopy8(op, ip, cpy);   /* may overwrite up to WILDCOPYLENGTH beyond cpy */
843                ip += length; op = cpy;
844            }
845
846            /* get offset */
847            offset = LZ4_readLE16(ip); ip+=2;
848            match = op - offset;
849
850            /* get matchlength */
851            length = token & ML_MASK;
852
853    _copy_match:
854            if (length == ML_MASK) {
855              variable_length_error error = ok;
856              length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error);
857              if (error != ok) goto _output_error;
858                if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error;   /* overflow detection */
859            }
860            length += MINMATCH;
861
862#if LZ4_FAST_DEC_LOOP
863        safe_match_copy:
864#endif
865            if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error;   /* Error : offset outside buffers */
866            /* match starting within external dictionary */
867            if ((dict==usingExtDict) && (match < lowPrefix)) {
868                if (unlikely(op+length > oend-LASTLITERALS)) {
869                    if (partialDecoding) length = MIN(length, (size_t)(oend-op));
870                    else goto _output_error;   /* doesn't respect parsing restriction */
871                }
872
873                if (length <= (size_t)(lowPrefix-match)) {
874                    /* match fits entirely within external dictionary : just copy */
875                    memmove(op, dictEnd - (lowPrefix-match), length);
876                    op += length;
877                } else {
878                    /* match stretches into both external dictionary and current block */
879                    size_t const copySize = (size_t)(lowPrefix - match);
880                    size_t const restSize = length - copySize;
881                    LZ4_memcpy(op, dictEnd - copySize, copySize);
882                    op += copySize;
883                    if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
884                        BYTE* const endOfMatch = op + restSize;
885                        const BYTE* copyFrom = lowPrefix;
886                        while (op < endOfMatch) *op++ = *copyFrom++;
887                    } else {
888                        LZ4_memcpy(op, lowPrefix, restSize);
889                        op += restSize;
890                }   }
891                continue;
892            }
893            assert(match >= lowPrefix);
894
895            /* copy match within block */
896            cpy = op + length;
897
898            /* partialDecoding : may end anywhere within the block */
899            assert(op<=oend);
900            if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
901                size_t const mlen = MIN(length, (size_t)(oend-op));
902                const BYTE* const matchEnd = match + mlen;
903                BYTE* const copyEnd = op + mlen;
904                if (matchEnd > op) {   /* overlap copy */
905                    while (op < copyEnd) { *op++ = *match++; }
906                } else {
907                    LZ4_memcpy(op, match, mlen);
908                }
909                op = copyEnd;
910                if (op == oend) { break; }
911                continue;
912            }
913
914            if (unlikely(offset<8)) {
915                LZ4_write32(op, 0);   /* silence msan warning when offset==0 */
916                op[0] = match[0];
917                op[1] = match[1];
918                op[2] = match[2];
919                op[3] = match[3];
920                match += inc32table[offset];
921                LZ4_memcpy(op+4, match, 4);
922                match -= dec64table[offset];
923            } else {
924                LZ4_memcpy(op, match, 8);
925                match += 8;
926            }
927            op += 8;
928
929            if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
930                BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1);
931                if (cpy > oend-LASTLITERALS) { goto _output_error; } /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
932                if (op < oCopyLimit) {
933                    LZ4_wildCopy8(op, match, oCopyLimit);
934                    match += oCopyLimit - op;
935                    op = oCopyLimit;
936                }
937                while (op < cpy) { *op++ = *match++; }
938            } else {
939                LZ4_memcpy(op, match, 8);
940                if (length > 16)  { LZ4_wildCopy8(op+8, match+8, cpy); }
941            }
942            op = cpy;   /* wildcopy correction */
943        }
944
945        /* end of decoding */
946        if (endOnInput) {
947            DEBUGLOG(5, "decoded %i bytes", (int) (((char*)op)-dst));
948           return (int) (((char*)op)-dst);     /* Nb of output bytes decoded */
949       } else {
950           return (int) (((const char*)ip)-src);   /* Nb of input bytes read */
951       }
952
953        /* Overflow error detected */
954    _output_error:
955        return (int) (-(((const char*)ip)-src))-1;
956    }
957}
958
959/*
960 * LZ4_uncompress_unknownOutputSize() :
961 * 	isize  : is the input size, therefore the compressed size
962 * 	maxOutputSize : is the size of the destination buffer (which must be
963 * 		already allocated)
964 * 	return : the number of bytes decoded in the destination buffer
965 * 		(necessarily <= maxOutputSize). If the source stream is
966 * 		malformed, the function will stop decoding and return a
967 * 		negative result, indicating the byte position of the faulty
968 * 		instruction. This function never writes beyond dest +
969 * 		maxOutputSize, and is therefore protected against malicious
970 * 		data packets.
971 * 	note   : Destination buffer must be already allocated.
972 *		This version is slightly slower than real_LZ4_uncompress()
973 *
974 */
975
976/*
977 * Note: In upstream code, LZ4_uncompress_unknownOutputSize is now a legacy
978 *       wrapper for LZ4_decompress_safe which is a wrapper for
979 *	 LZ4_decompress_generic; this wrapper flattens that, rather than
980 *	 rewriting the callers.
981 */
982int LZ4_uncompress_unknownOutputSize(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
983{
984    return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize,
985                                  endOnInputSize, decode_full_block, noDict,
986                                  (BYTE*)dest, NULL, 0);
987}
988