1/*
2 * LZ4 - Fast LZ compression algorithm
3 * Header File
4 * Copyright (C) 2011-2013, Yann Collet.
5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met:
10 *
11 *     * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *     * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following disclaimer
15 * in the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * You can contact the author at :
31 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32 * - LZ4 source repository : http://code.google.com/p/lz4/
33 */
34/*
35 * Copyright (c) 2016 by Delphix. All rights reserved.
36 */
37
38#if defined(_KERNEL) || defined(_FAKE_KERNEL)
39#include <sys/zfs_context.h>
40#elif defined(_STANDALONE)
41#include <sys/cdefs.h>
42#include <stand.h>
43#include <sys/types.h>
44#include <sys/endian.h>
45#include <assert.h>
46
47#undef ASSERT
48#define	ASSERT	assert
49#else
50#include <string.h>
51#include <stdlib.h>
52#include <sys/types.h>
53#include <netinet/in.h>
54#include <assert.h>
55
56#undef ASSERT
57#define	ASSERT	assert
58#endif
59#include "lz4.h"
60
61static int real_LZ4_compress(const char *source, char *dest, int isize,
62    int osize);
63static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
64    int isize, int maxOutputSize);
65static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
66    int isize, int osize);
67static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
68    int isize, int osize);
69
70#if defined(_KERNEL) || defined(_FAKE_KERNEL)
71static kmem_cache_t *lz4_ctx_cache;
72#endif
73
74size_t
75lz4_compress(void *s_start, void *d_start, size_t s_len, size_t d_len,
76    int n __unused)
77{
78	uint32_t bufsiz;
79	char *dest = d_start;
80
81	ASSERT(d_len >= sizeof (bufsiz));
82
83	bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
84	    d_len - sizeof (bufsiz));
85
86	/* Signal an error if the compression routine returned zero. */
87	if (bufsiz == 0)
88		return (s_len);
89
90	/*
91	 * Encode the compresed buffer size at the start. We'll need this in
92	 * decompression to counter the effects of padding which might be
93	 * added to the compressed buffer and which, if unhandled, would
94	 * confuse the hell out of our decompression function.
95	 */
96#if defined(_KERNEL) || defined(_FAKE_KERNEL)
97	*(uint32_t *)(void *)dest = BE_32(bufsiz);
98#else
99	*(uint32_t *)(void *)dest = htonl(bufsiz);
100#endif
101
102	return (bufsiz + sizeof (bufsiz));
103}
104
105int
106lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len,
107    int n __unused)
108{
109	const char *src = s_start;
110#if defined(_KERNEL) || defined(_FAKE_KERNEL)
111	uint32_t bufsiz = BE_IN32(s_start);
112#else
113	uint32_t bufsiz = htonl(*(uint32_t *)s_start);
114#endif
115
116	/* invalid compressed buffer size encoded at start */
117	if (bufsiz + sizeof (bufsiz) > s_len)
118		return (1);
119
120	/*
121	 * Returns 0 on success (decompression function returned non-negative)
122	 * and non-zero on failure (decompression function returned negative).
123	 */
124	return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
125	    d_start, bufsiz, d_len) < 0);
126}
127
128/*
129 * LZ4 API Description:
130 *
131 * Simple Functions:
132 * real_LZ4_compress() :
133 * 	isize  : is the input size. Max supported value is ~1.9GB
134 * 	return : the number of bytes written in buffer dest
135 *		 or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
136 * 	note : destination buffer must be already allocated.
137 * 		destination buffer must be sized to handle worst cases
138 * 		situations (input data not compressible).
139 *
140 * Advanced Functions
141 *
142 * LZ4_uncompress_unknownOutputSize() :
143 * 	isize  : is the input size, therefore the compressed size
144 * 	maxOutputSize : is the size of the destination buffer (which must be
145 * 		already allocated)
146 * 	return : the number of bytes decoded in the destination buffer
147 * 		(necessarily <= maxOutputSize). If the source stream is
148 * 		malformed, the function will stop decoding and return a
149 * 		negative result, indicating the byte position of the faulty
150 * 		instruction. This function never writes beyond dest +
151 * 		maxOutputSize, and is therefore protected against malicious
152 * 		data packets.
153 * 	note   : Destination buffer must be already allocated.
154 *
155 * LZ4_compressCtx() :
156 * 	This function explicitly handles the CTX memory structure.
157 *
158 * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
159 * 	by the caller (either on the stack or using kmem_zalloc). Passing NULL
160 * 	isn't valid.
161 *
162 * LZ4_compress64kCtx() :
163 * 	Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
164 * 	isize *Must* be <64KB, otherwise the output will be corrupted.
165 *
166 * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
167 * 	by the caller (either on the stack or using kmem_zalloc). Passing NULL
168 * 	isn't valid.
169 */
170
171/*
172 * Tuning parameters
173 */
174
175/*
176 * COMPRESSIONLEVEL: Increasing this value improves compression ratio
177 *	 Lowering this value reduces memory usage. Reduced memory usage
178 *	typically improves speed, due to cache effect (ex: L1 32KB for Intel,
179 *	L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
180 *	(examples : 12 -> 16KB ; 17 -> 512KB)
181 */
182#define	COMPRESSIONLEVEL 12
183
184/*
185 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
186 *	algorithm skip faster data segments considered "incompressible".
187 *	This may decrease compression ratio dramatically, but will be
188 *	faster on incompressible data. Increasing this value will make
189 *	the algorithm search more before declaring a segment "incompressible".
190 *	This could improve compression a bit, but will be slower on
191 *	incompressible data. The default value (6) is recommended.
192 */
193#define	NOTCOMPRESSIBLE_CONFIRMATION 6
194
195/*
196 * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
197 * performance for big endian cpu, but the resulting compressed stream
198 * will be incompatible with little-endian CPU. You can set this option
199 * to 1 in situations where data will stay within closed environment.
200 * This option is useless on Little_Endian CPU (such as x86).
201 */
202/* #define	BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
203
204/*
205 * CPU Feature Detection
206 */
207
208/* 32 or 64 bits ? */
209#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \
210    defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \
211    defined(__LP64__) || defined(_LP64))
212#define	LZ4_ARCH64 1
213#else
214#define	LZ4_ARCH64 0
215#endif
216
217/*
218 * Limits the amount of stack space that the algorithm may consume to hold
219 * the compression lookup table. The value `9' here means we'll never use
220 * more than 2k of stack (see above for a description of COMPRESSIONLEVEL).
221 * If more memory is needed, it is allocated from the heap.
222 */
223/* FreeBSD: Use heap for all platforms for now */
224#define	STACKLIMIT 0
225
226/*
227 * Little Endian or Big Endian?
228 * Note: overwrite the below #define if you know your architecture endianess.
229 */
230#if BYTE_ORDER == BIG_ENDIAN
231#define	LZ4_BIG_ENDIAN 1
232#else
233/*
234 * Little Endian assumed. PDP Endian and other very rare endian format
235 * are unsupported.
236 */
237#endif
238
239/*
240 * Unaligned memory access is automatically enabled for "common" CPU,
241 * such as x86. For others CPU, the compiler will be more cautious, and
242 * insert extra code to ensure aligned access is respected. If you know
243 * your target CPU supports unaligned memory access, you may want to
244 * force this option manually to improve performance
245 */
246#if defined(__ARM_FEATURE_UNALIGNED)
247#define	LZ4_FORCE_UNALIGNED_ACCESS 1
248#endif
249
250/*
251 * Compiler Options
252 */
253#if __STDC_VERSION__ >= 199901L	/* C99 */
254/* "restrict" is a known keyword */
255#else
256/* Disable restrict */
257#define	restrict
258#endif
259
260#define	lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
261	(((x) & 0xffu) << 8)))
262
263#define	expect(expr, value)    (__builtin_expect((expr), (value)))
264
265#if defined(likely)
266#undef likely
267#endif
268#if defined(unlikely)
269#undef unlikely
270#endif
271
272#ifndef likely
273#define	likely(expr)	expect((expr) != 0, 1)
274#endif
275
276#ifndef unlikely
277#define	unlikely(expr)	expect((expr) != 0, 0)
278#endif
279
280/* Basic types */
281#define	BYTE	uint8_t
282#define	U16	uint16_t
283#define	U32	uint32_t
284#define	S32	int32_t
285#define	U64	uint64_t
286
287#ifndef LZ4_FORCE_UNALIGNED_ACCESS
288#pragma pack(1)
289#endif
290
291typedef struct _U16_S {
292	U16 v;
293} U16_S;
294typedef struct _U32_S {
295	U32 v;
296} U32_S;
297typedef struct _U64_S {
298	U64 v;
299} U64_S;
300
301#ifndef LZ4_FORCE_UNALIGNED_ACCESS
302#pragma pack()
303#endif
304
305#define	A64(x) (((U64_S *)(__DECONST(void *, x)))->v)
306#define	A32(x) (((U32_S *)(__DECONST(void *, x)))->v)
307#define	A16(x) (((U16_S *)(__DECONST(void *, x)))->v)
308
309/*
310 * Constants
311 */
312#define	MINMATCH 4
313
314#define	HASH_LOG COMPRESSIONLEVEL
315#define	HASHTABLESIZE (1 << HASH_LOG)
316#define	HASH_MASK (HASHTABLESIZE - 1)
317
318#define	SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
319	NOTCOMPRESSIBLE_CONFIRMATION : 2)
320
321/*
322 * Defines if memory is allocated into the stack (local variable),
323 * or into the heap (kmem_alloc()).
324 */
325#define	HEAPMODE (HASH_LOG > STACKLIMIT)
326#define	COPYLENGTH 8
327#define	LASTLITERALS 5
328#define	MFLIMIT (COPYLENGTH + MINMATCH)
329#define	MINLENGTH (MFLIMIT + 1)
330
331#define	MAXD_LOG 16
332#define	MAX_DISTANCE ((1 << MAXD_LOG) - 1)
333
334#define	ML_BITS 4
335#define	ML_MASK ((1U<<ML_BITS)-1)
336#define	RUN_BITS (8-ML_BITS)
337#define	RUN_MASK ((1U<<RUN_BITS)-1)
338
339
340/*
341 * Architecture-specific macros
342 */
343#if LZ4_ARCH64
344#define	STEPSIZE 8
345#define	UARCH U64
346#define	AARCH A64
347#define	LZ4_COPYSTEP(s, d)	A64(d) = A64(s); d += 8; s += 8;
348#define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d)
349#define	LZ4_SECURECOPY(s, d, e)	if (d < e) LZ4_WILDCOPY(s, d, e)
350#define	HTYPE U32
351#define	INITBASE(base)		const BYTE* const base = ip
352#else /* !LZ4_ARCH64 */
353#define	STEPSIZE 4
354#define	UARCH U32
355#define	AARCH A32
356#define	LZ4_COPYSTEP(s, d)	A32(d) = A32(s); d += 4; s += 4;
357#define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
358#define	LZ4_SECURECOPY		LZ4_WILDCOPY
359#define	HTYPE const BYTE *
360#define	INITBASE(base)		const int base = 0
361#endif /* !LZ4_ARCH64 */
362
363#if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
364#define	LZ4_READ_LITTLEENDIAN_16(d, s, p) \
365	{ U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
366#define	LZ4_WRITE_LITTLEENDIAN_16(p, i) \
367	{ U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
368#else
369#define	LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
370#define	LZ4_WRITE_LITTLEENDIAN_16(p, v)  { A16(p) = v; p += 2; }
371#endif
372
373
374/* Local structures */
375struct refTables {
376	HTYPE hashTable[HASHTABLESIZE];
377};
378
379
380/* Macros */
381#define	LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
382	HASH_LOG))
383#define	LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
384#define	LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
385#define	LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
386	d = e; }
387
388
389/* Private functions */
390#if LZ4_ARCH64
391
392static inline int
393LZ4_NbCommonBytes(register U64 val)
394{
395#if defined(LZ4_BIG_ENDIAN)
396#if !defined(LZ4_FORCE_SW_BITCOUNT)
397	return (__builtin_clzll(val) >> 3);
398#else
399	int r;
400	if (!(val >> 32)) {
401		r = 4;
402	} else {
403		r = 0;
404		val >>= 32;
405	}
406	if (!(val >> 16)) {
407		r += 2;
408		val >>= 8;
409	} else {
410		val >>= 24;
411	}
412	r += (!val);
413	return (r);
414#endif
415#else
416#if !defined(LZ4_FORCE_SW_BITCOUNT)
417	return (__builtin_ctzll(val) >> 3);
418#else
419	static const int DeBruijnBytePos[64] =
420	    { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
421		3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
422		5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
423		4, 5, 7, 2, 6, 5, 7, 6, 7, 7
424	};
425	return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
426	    58];
427#endif
428#endif
429}
430
431#else
432
433static inline int
434LZ4_NbCommonBytes(register U32 val)
435{
436#if defined(LZ4_BIG_ENDIAN)
437#if !defined(LZ4_FORCE_SW_BITCOUNT)
438	return (__builtin_clz(val) >> 3);
439#else
440	int r;
441	if (!(val >> 16)) {
442		r = 2;
443		val >>= 8;
444	} else {
445		r = 0;
446		val >>= 24;
447	}
448	r += (!val);
449	return (r);
450#endif
451#else
452#if !defined(LZ4_FORCE_SW_BITCOUNT)
453	return (__builtin_ctz(val) >> 3);
454#else
455	static const int DeBruijnBytePos[32] = {
456		0, 0, 3, 0, 3, 1, 3, 0,
457		3, 2, 2, 1, 3, 2, 0, 1,
458		3, 3, 1, 2, 2, 2, 2, 0,
459		3, 1, 2, 0, 1, 0, 1, 1
460	};
461	return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
462	    27];
463#endif
464#endif
465}
466
467#endif
468
469/* Compression functions */
470
471/*ARGSUSED*/
472static int
473LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
474    int osize)
475{
476#if HEAPMODE
477	struct refTables *srt = (struct refTables *)ctx;
478	HTYPE *HashTable = (HTYPE *) (srt->hashTable);
479#else
480	HTYPE HashTable[HASHTABLESIZE] = { 0 };
481#endif
482
483	const BYTE *ip = (const BYTE *) source;
484	INITBASE(base);
485	const BYTE *anchor = ip;
486	const BYTE *const iend = ip + isize;
487	const BYTE *const oend = (BYTE *) dest + osize;
488	const BYTE *const mflimit = iend - MFLIMIT;
489#define	matchlimit (iend - LASTLITERALS)
490
491	BYTE *op = (BYTE *) dest;
492
493	int len, length;
494	const int skipStrength = SKIPSTRENGTH;
495	U32 forwardH;
496
497
498	/* Init */
499	if (isize < MINLENGTH)
500		goto _last_literals;
501
502	/* First Byte */
503	HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
504	ip++;
505	forwardH = LZ4_HASH_VALUE(ip);
506
507	/* Main Loop */
508	for (;;) {
509		int findMatchAttempts = (1U << skipStrength) + 3;
510		const BYTE *forwardIp = ip;
511		const BYTE *ref;
512		BYTE *token;
513
514		/* Find a match */
515		do {
516			U32 h = forwardH;
517			int step = findMatchAttempts++ >> skipStrength;
518			ip = forwardIp;
519			forwardIp = ip + step;
520
521			if unlikely(forwardIp > mflimit) {
522				goto _last_literals;
523			}
524
525			forwardH = LZ4_HASH_VALUE(forwardIp);
526			ref = base + HashTable[h];
527			HashTable[h] = ip - base;
528
529		} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
530
531		/* Catch up */
532		while ((ip > anchor) && (ref > (const BYTE *) source) &&
533		    unlikely(ip[-1] == ref[-1])) {
534			ip--;
535			ref--;
536		}
537
538		/* Encode Literal length */
539		length = ip - anchor;
540		token = op++;
541
542		/* Check output limit */
543		if unlikely(op + length + (2 + 1 + LASTLITERALS) +
544		    (length >> 8) > oend)
545			return (0);
546
547		if (length >= (int)RUN_MASK) {
548			*token = (RUN_MASK << ML_BITS);
549			len = length - RUN_MASK;
550			for (; len > 254; len -= 255)
551				*op++ = 255;
552			*op++ = (BYTE)len;
553		} else
554			*token = (length << ML_BITS);
555
556		/* Copy Literals */
557		LZ4_BLINDCOPY(anchor, op, length);
558
559		_next_match:
560		/* Encode Offset */
561		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
562
563		/* Start Counting */
564		ip += MINMATCH;
565		ref += MINMATCH;	/* MinMatch verified */
566		anchor = ip;
567		while likely(ip < matchlimit - (STEPSIZE - 1)) {
568			UARCH diff = AARCH(ref) ^ AARCH(ip);
569			if (!diff) {
570				ip += STEPSIZE;
571				ref += STEPSIZE;
572				continue;
573			}
574			ip += LZ4_NbCommonBytes(diff);
575			goto _endCount;
576		}
577#if LZ4_ARCH64
578		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
579			ip += 4;
580			ref += 4;
581		}
582#endif
583		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
584			ip += 2;
585			ref += 2;
586		}
587		if ((ip < matchlimit) && (*ref == *ip))
588			ip++;
589		_endCount:
590
591		/* Encode MatchLength */
592		len = (ip - anchor);
593		/* Check output limit */
594		if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
595			return (0);
596		if (len >= (int)ML_MASK) {
597			*token += ML_MASK;
598			len -= ML_MASK;
599			for (; len > 509; len -= 510) {
600				*op++ = 255;
601				*op++ = 255;
602			}
603			if (len > 254) {
604				len -= 255;
605				*op++ = 255;
606			}
607			*op++ = (BYTE)len;
608		} else
609			*token += len;
610
611		/* Test end of chunk */
612		if (ip > mflimit) {
613			anchor = ip;
614			break;
615		}
616		/* Fill table */
617		HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
618
619		/* Test next position */
620		ref = base + HashTable[LZ4_HASH_VALUE(ip)];
621		HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
622		if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
623			token = op++;
624			*token = 0;
625			goto _next_match;
626		}
627		/* Prepare next loop */
628		anchor = ip++;
629		forwardH = LZ4_HASH_VALUE(ip);
630	}
631
632	_last_literals:
633	/* Encode Last Literals */
634	{
635		int lastRun = iend - anchor;
636		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
637		    oend)
638			return (0);
639		if (lastRun >= (int)RUN_MASK) {
640			*op++ = (RUN_MASK << ML_BITS);
641			lastRun -= RUN_MASK;
642			for (; lastRun > 254; lastRun -= 255) {
643				*op++ = 255;
644			}
645			*op++ = (BYTE)lastRun;
646		} else
647			*op++ = (lastRun << ML_BITS);
648		(void) memcpy(op, anchor, iend - anchor);
649		op += iend - anchor;
650	}
651
652	/* End */
653	return (int)(((char *)op) - dest);
654}
655
656
657
658/* Note : this function is valid only if isize < LZ4_64KLIMIT */
659#define	LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
660#define	HASHLOG64K (HASH_LOG + 1)
661#define	HASH64KTABLESIZE (1U << HASHLOG64K)
662#define	LZ4_HASH64K_FUNCTION(i)	(((i) * 2654435761U) >> ((MINMATCH*8) - \
663	HASHLOG64K))
664#define	LZ4_HASH64K_VALUE(p)	LZ4_HASH64K_FUNCTION(A32(p))
665
666/*ARGSUSED*/
667static int
668LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
669    int osize)
670{
671#if HEAPMODE
672	struct refTables *srt = (struct refTables *)ctx;
673	U16 *HashTable = (U16 *) (srt->hashTable);
674#else
675	U16 HashTable[HASH64KTABLESIZE] = { 0 };
676#endif
677
678	const BYTE *ip = (const BYTE *) source;
679	const BYTE *anchor = ip;
680	const BYTE *const base = ip;
681	const BYTE *const iend = ip + isize;
682	const BYTE *const oend = (BYTE *) dest + osize;
683	const BYTE *const mflimit = iend - MFLIMIT;
684#define	matchlimit (iend - LASTLITERALS)
685
686	BYTE *op = (BYTE *) dest;
687
688	int len, length;
689	const int skipStrength = SKIPSTRENGTH;
690	U32 forwardH;
691
692	/* Init */
693	if (isize < MINLENGTH)
694		goto _last_literals;
695
696	/* First Byte */
697	ip++;
698	forwardH = LZ4_HASH64K_VALUE(ip);
699
700	/* Main Loop */
701	for (;;) {
702		int findMatchAttempts = (1U << skipStrength) + 3;
703		const BYTE *forwardIp = ip;
704		const BYTE *ref;
705		BYTE *token;
706
707		/* Find a match */
708		do {
709			U32 h = forwardH;
710			int step = findMatchAttempts++ >> skipStrength;
711			ip = forwardIp;
712			forwardIp = ip + step;
713
714			if (forwardIp > mflimit) {
715				goto _last_literals;
716			}
717
718			forwardH = LZ4_HASH64K_VALUE(forwardIp);
719			ref = base + HashTable[h];
720			HashTable[h] = ip - base;
721
722		} while (A32(ref) != A32(ip));
723
724		/* Catch up */
725		while ((ip > anchor) && (ref > (const BYTE *) source) &&
726		    (ip[-1] == ref[-1])) {
727			ip--;
728			ref--;
729		}
730
731		/* Encode Literal length */
732		length = ip - anchor;
733		token = op++;
734
735		/* Check output limit */
736		if unlikely(op + length + (2 + 1 + LASTLITERALS) +
737		    (length >> 8) > oend)
738			return (0);
739
740		if (length >= (int)RUN_MASK) {
741			*token = (RUN_MASK << ML_BITS);
742			len = length - RUN_MASK;
743			for (; len > 254; len -= 255)
744				*op++ = 255;
745			*op++ = (BYTE)len;
746		} else
747			*token = (length << ML_BITS);
748
749		/* Copy Literals */
750		LZ4_BLINDCOPY(anchor, op, length);
751
752		_next_match:
753		/* Encode Offset */
754		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
755
756		/* Start Counting */
757		ip += MINMATCH;
758		ref += MINMATCH;	/* MinMatch verified */
759		anchor = ip;
760		while (ip < matchlimit - (STEPSIZE - 1)) {
761			UARCH diff = AARCH(ref) ^ AARCH(ip);
762			if (!diff) {
763				ip += STEPSIZE;
764				ref += STEPSIZE;
765				continue;
766			}
767			ip += LZ4_NbCommonBytes(diff);
768			goto _endCount;
769		}
770#if LZ4_ARCH64
771		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
772			ip += 4;
773			ref += 4;
774		}
775#endif
776		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
777			ip += 2;
778			ref += 2;
779		}
780		if ((ip < matchlimit) && (*ref == *ip))
781			ip++;
782		_endCount:
783
784		/* Encode MatchLength */
785		len = (ip - anchor);
786		/* Check output limit */
787		if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
788			return (0);
789		if (len >= (int)ML_MASK) {
790			*token += ML_MASK;
791			len -= ML_MASK;
792			for (; len > 509; len -= 510) {
793				*op++ = 255;
794				*op++ = 255;
795			}
796			if (len > 254) {
797				len -= 255;
798				*op++ = 255;
799			}
800			*op++ = (BYTE)len;
801		} else
802			*token += len;
803
804		/* Test end of chunk */
805		if (ip > mflimit) {
806			anchor = ip;
807			break;
808		}
809		/* Fill table */
810		HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
811
812		/* Test next position */
813		ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
814		HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
815		if (A32(ref) == A32(ip)) {
816			token = op++;
817			*token = 0;
818			goto _next_match;
819		}
820		/* Prepare next loop */
821		anchor = ip++;
822		forwardH = LZ4_HASH64K_VALUE(ip);
823	}
824
825	_last_literals:
826	/* Encode Last Literals */
827	{
828		int lastRun = iend - anchor;
829		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
830		    oend)
831			return (0);
832		if (lastRun >= (int)RUN_MASK) {
833			*op++ = (RUN_MASK << ML_BITS);
834			lastRun -= RUN_MASK;
835			for (; lastRun > 254; lastRun -= 255)
836				*op++ = 255;
837			*op++ = (BYTE)lastRun;
838		} else
839			*op++ = (lastRun << ML_BITS);
840		(void) memcpy(op, anchor, iend - anchor);
841		op += iend - anchor;
842	}
843
844	/* End */
845	return (int)(((char *)op) - dest);
846}
847
848static int
849real_LZ4_compress(const char *source, char *dest, int isize, int osize)
850{
851#if HEAPMODE
852#if defined(_KERNEL) || defined(_FAKE_KERNEL)
853	void *ctx = kmem_cache_alloc(lz4_ctx_cache, KM_NOSLEEP);
854#else
855	void *ctx = calloc(1, sizeof(struct refTables));
856#endif
857	int result;
858
859	/*
860	 * out of kernel memory, gently fall through - this will disable
861	 * compression in zio_compress_data
862	 */
863	if (ctx == NULL)
864		return (0);
865
866	bzero(ctx, sizeof(struct refTables));
867	if (isize < LZ4_64KLIMIT)
868		result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
869	else
870		result = LZ4_compressCtx(ctx, source, dest, isize, osize);
871
872#if defined(_KERNEL) || defined(_FAKE_KERNEL)
873	kmem_cache_free(lz4_ctx_cache, ctx);
874#else
875	free(ctx);
876#endif
877	return (result);
878#else
879	if (isize < (int)LZ4_64KLIMIT)
880		return (LZ4_compress64kCtx(NULL, source, dest, isize, osize));
881	return (LZ4_compressCtx(NULL, source, dest, isize, osize));
882#endif
883}
884
885/* Decompression functions */
886
887/*
888 * Note: The decoding function LZ4_uncompress_unknownOutputSize() is safe
889 *	against "buffer overflow" attack type. It will never write nor
890 *	read outside of the provided output buffers.
891 *	LZ4_uncompress_unknownOutputSize() also insures that it will never
892 *	read outside of the input buffer.  A corrupted input will produce
893 *	an error result, a negative int, indicating the position of the
894 *	error within input stream.
895 */
896
897static int
898LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
899    int maxOutputSize)
900{
901	/* Local Variables */
902	const BYTE *restrict ip = (const BYTE *) source;
903	const BYTE *const iend = ip + isize;
904	const BYTE *ref;
905
906	BYTE *op = (BYTE *) dest;
907	BYTE *const oend = op + maxOutputSize;
908	BYTE *cpy;
909
910	size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
911#if LZ4_ARCH64
912	size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
913#endif
914
915	/* Main Loop */
916	while (ip < iend) {
917		unsigned token;
918		size_t length;
919
920		/* get runlength */
921		token = *ip++;
922		if ((length = (token >> ML_BITS)) == RUN_MASK) {
923			int s = 255;
924			while ((ip < iend) && (s == 255)) {
925				s = *ip++;
926				length += s;
927			}
928		}
929		/* copy literals */
930		cpy = op + length;
931		/* CORNER-CASE: cpy might overflow. */
932		if (cpy < op)
933			goto _output_error;	/* cpy was overflowed, bail! */
934		if ((cpy > oend - COPYLENGTH) ||
935		    (ip + length > iend - COPYLENGTH)) {
936			if (cpy > oend)
937				/* Error: writes beyond output buffer */
938				goto _output_error;
939			if (ip + length != iend)
940				/*
941				 * Error: LZ4 format requires to consume all
942				 * input at this stage
943				 */
944				goto _output_error;
945			(void) memcpy(op, ip, length);
946			op += length;
947			/* Necessarily EOF, due to parsing restrictions */
948			break;
949		}
950		LZ4_WILDCOPY(ip, op, cpy);
951		ip -= (op - cpy);
952		op = cpy;
953
954		/* get offset */
955		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
956		ip += 2;
957		if (ref < (BYTE * const) dest)
958			/*
959			 * Error: offset creates reference outside of
960			 * destination buffer
961			 */
962			goto _output_error;
963
964		/* get matchlength */
965		if ((length = (token & ML_MASK)) == ML_MASK) {
966			while (ip < iend) {
967				int s = *ip++;
968				length += s;
969				if (s == 255)
970					continue;
971				break;
972			}
973		}
974		/* copy repeated sequence */
975		if unlikely(op - ref < STEPSIZE) {
976#if LZ4_ARCH64
977			size_t dec64 = dec64table[op-ref];
978#else
979			const int dec64 = 0;
980#endif
981			op[0] = ref[0];
982			op[1] = ref[1];
983			op[2] = ref[2];
984			op[3] = ref[3];
985			op += 4;
986			ref += 4;
987			ref -= dec32table[op-ref];
988			A32(op) = A32(ref);
989			op += STEPSIZE - 4;
990			ref -= dec64;
991		} else {
992			LZ4_COPYSTEP(ref, op);
993		}
994		cpy = op + length - (STEPSIZE - 4);
995		if (cpy > oend - COPYLENGTH) {
996			if (cpy > oend)
997				/*
998				 * Error: request to write outside of
999				 * destination buffer
1000				 */
1001				goto _output_error;
1002			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
1003			while (op < cpy)
1004				*op++ = *ref++;
1005			op = cpy;
1006			if (op == oend)
1007				/*
1008				 * Check EOF (should never happen, since
1009				 * last 5 bytes are supposed to be literals)
1010				 */
1011				goto _output_error;
1012			continue;
1013		}
1014		LZ4_SECURECOPY(ref, op, cpy);
1015		op = cpy;	/* correction */
1016	}
1017
1018	/* end of decoding */
1019	return (int)(((char *)op) - dest);
1020
1021	/* write overflow error detected */
1022	_output_error:
1023	return (int)(-(((const char *)ip) - source));
1024}
1025
1026#if defined(_KERNEL) || defined(_FAKE_KERNEL)
1027void
1028lz4_init(void)
1029{
1030
1031#if HEAPMODE
1032	lz4_ctx_cache = kmem_cache_create("lz4_ctx", sizeof(struct refTables),
1033	    0, NULL, NULL, NULL, NULL, NULL, 0);
1034#endif
1035}
1036
1037void
1038lz4_fini(void)
1039{
1040
1041#if HEAPMODE
1042	kmem_cache_destroy(lz4_ctx_cache);
1043#endif
1044}
1045#endif /* _KERNEL || _FAKE_KERNEL */
1046