lz4.c revision 321610
1/*
2 * LZ4 - Fast LZ compression algorithm
3 * Header File
4 * Copyright (C) 2011-2013, Yann Collet.
5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met:
10 *
11 *     * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *     * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following disclaimer
15 * in the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * You can contact the author at :
31 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32 * - LZ4 source repository : http://code.google.com/p/lz4/
33 */
34/*
35 * Copyright (c) 2016 by Delphix. All rights reserved.
36 */
37
38#include <sys/zfs_context.h>
39
40static int real_LZ4_compress(const char *source, char *dest, int isize,
41    int osize);
42static int LZ4_compressBound(int isize);
43static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
44    int isize, int maxOutputSize);
45static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
46    int isize, int osize);
47static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
48    int isize, int osize);
49
50static kmem_cache_t *lz4_ctx_cache;
51
52/*ARGSUSED*/
53size_t
54lz4_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
55{
56	uint32_t bufsiz;
57	char *dest = d_start;
58
59	ASSERT(d_len >= sizeof (bufsiz));
60
61	bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
62	    d_len - sizeof (bufsiz));
63
64	/* Signal an error if the compression routine returned zero. */
65	if (bufsiz == 0)
66		return (s_len);
67
68	/*
69	 * Encode the compresed buffer size at the start. We'll need this in
70	 * decompression to counter the effects of padding which might be
71	 * added to the compressed buffer and which, if unhandled, would
72	 * confuse the hell out of our decompression function.
73	 */
74	*(uint32_t *)dest = BE_32(bufsiz);
75
76	return (bufsiz + sizeof (bufsiz));
77}
78
79/*ARGSUSED*/
80int
81lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
82{
83	const char *src = s_start;
84	uint32_t bufsiz = BE_IN32(src);
85
86	/* invalid compressed buffer size encoded at start */
87	if (bufsiz + sizeof (bufsiz) > s_len)
88		return (1);
89
90	/*
91	 * Returns 0 on success (decompression function returned non-negative)
92	 * and non-zero on failure (decompression function returned negative).
93	 */
94	return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
95	    d_start, bufsiz, d_len) < 0);
96}
97
98/*
99 * LZ4 API Description:
100 *
101 * Simple Functions:
102 * real_LZ4_compress() :
103 * 	isize  : is the input size. Max supported value is ~1.9GB
104 * 	return : the number of bytes written in buffer dest
105 *		 or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
106 * 	note : destination buffer must be already allocated.
107 * 		destination buffer must be sized to handle worst cases
108 * 		situations (input data not compressible) worst case size
109 * 		evaluation is provided by function LZ4_compressBound().
110 *
111 * Advanced Functions
112 *
113 * LZ4_compressBound() :
114 * 	Provides the maximum size that LZ4 may output in a "worst case"
115 * 	scenario (input data not compressible) primarily useful for memory
116 * 	allocation of output buffer.
117 *
118 * 	isize  : is the input size. Max supported value is ~1.9GB
119 * 	return : maximum output size in a "worst case" scenario
120 * 	note : this function is limited by "int" range (2^31-1)
121 *
122 * LZ4_uncompress_unknownOutputSize() :
123 * 	isize  : is the input size, therefore the compressed size
124 * 	maxOutputSize : is the size of the destination buffer (which must be
125 * 		already allocated)
126 * 	return : the number of bytes decoded in the destination buffer
127 * 		(necessarily <= maxOutputSize). If the source stream is
128 * 		malformed, the function will stop decoding and return a
129 * 		negative result, indicating the byte position of the faulty
130 * 		instruction. This function never writes beyond dest +
131 * 		maxOutputSize, and is therefore protected against malicious
132 * 		data packets.
133 * 	note   : Destination buffer must be already allocated.
134 *
135 * LZ4_compressCtx() :
136 * 	This function explicitly handles the CTX memory structure.
137 *
138 * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
139 * 	by the caller (either on the stack or using kmem_zalloc). Passing NULL
140 * 	isn't valid.
141 *
142 * LZ4_compress64kCtx() :
143 * 	Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
144 * 	isize *Must* be <64KB, otherwise the output will be corrupted.
145 *
146 * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
147 * 	by the caller (either on the stack or using kmem_zalloc). Passing NULL
148 * 	isn't valid.
149 */
150
151/*
152 * Tuning parameters
153 */
154
155/*
156 * COMPRESSIONLEVEL: Increasing this value improves compression ratio
157 *	 Lowering this value reduces memory usage. Reduced memory usage
158 *	typically improves speed, due to cache effect (ex: L1 32KB for Intel,
159 *	L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
160 *	(examples : 12 -> 16KB ; 17 -> 512KB)
161 */
162#define	COMPRESSIONLEVEL 12
163
164/*
165 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
166 *	algorithm skip faster data segments considered "incompressible".
167 *	This may decrease compression ratio dramatically, but will be
168 *	faster on incompressible data. Increasing this value will make
169 *	the algorithm search more before declaring a segment "incompressible".
170 *	This could improve compression a bit, but will be slower on
171 *	incompressible data. The default value (6) is recommended.
172 */
173#define	NOTCOMPRESSIBLE_CONFIRMATION 6
174
175/*
176 * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
177 * performance for big endian cpu, but the resulting compressed stream
178 * will be incompatible with little-endian CPU. You can set this option
179 * to 1 in situations where data will stay within closed environment.
180 * This option is useless on Little_Endian CPU (such as x86).
181 */
182/* #define	BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
183
184/*
185 * CPU Feature Detection
186 */
187
188/* 32 or 64 bits ? */
189#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \
190    defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \
191    defined(__LP64__) || defined(_LP64))
192#define	LZ4_ARCH64 1
193#else
194#define	LZ4_ARCH64 0
195#endif
196
197/*
198 * Limits the amount of stack space that the algorithm may consume to hold
199 * the compression lookup table. The value `9' here means we'll never use
200 * more than 2k of stack (see above for a description of COMPRESSIONLEVEL).
201 * If more memory is needed, it is allocated from the heap.
202 */
203/* FreeBSD: Use heap for all platforms for now */
204#define	STACKLIMIT 0
205
206/*
207 * Little Endian or Big Endian?
208 * Note: overwrite the below #define if you know your architecture endianess.
209 */
210#if BYTE_ORDER == BIG_ENDIAN
211#define	LZ4_BIG_ENDIAN 1
212#else
213/*
214 * Little Endian assumed. PDP Endian and other very rare endian format
215 * are unsupported.
216 */
217#endif
218
219/*
220 * Unaligned memory access is automatically enabled for "common" CPU,
221 * such as x86. For others CPU, the compiler will be more cautious, and
222 * insert extra code to ensure aligned access is respected. If you know
223 * your target CPU supports unaligned memory access, you may want to
224 * force this option manually to improve performance
225 */
226#if defined(__ARM_FEATURE_UNALIGNED)
227#define	LZ4_FORCE_UNALIGNED_ACCESS 1
228#endif
229
230/*
231 * FreeBSD: can't use GCC's __builtin_ctz when using sparc64 because
232 * gcc currently rely on libcompiler_rt.
233 *
234 * TODO: revisit this when situation changes.
235 */
236#if defined(__sparc64__)
237#define	LZ4_FORCE_SW_BITCOUNT
238#endif
239
240/*
241 * Compiler Options
242 */
243#if __STDC_VERSION__ >= 199901L	/* C99 */
244/* "restrict" is a known keyword */
245#else
246/* Disable restrict */
247#define	restrict
248#endif
249
250#define	lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
251	(((x) & 0xffu) << 8)))
252
253#define	expect(expr, value)    (__builtin_expect((expr), (value)))
254
255#if defined(likely)
256#undef likely
257#endif
258#if defined(unlikely)
259#undef unlikely
260#endif
261
262#define	likely(expr)	expect((expr) != 0, 1)
263#define	unlikely(expr)	expect((expr) != 0, 0)
264
265/* Basic types */
266#define	BYTE	uint8_t
267#define	U16	uint16_t
268#define	U32	uint32_t
269#define	S32	int32_t
270#define	U64	uint64_t
271
272#ifndef LZ4_FORCE_UNALIGNED_ACCESS
273#pragma pack(1)
274#endif
275
276typedef struct _U16_S {
277	U16 v;
278} U16_S;
279typedef struct _U32_S {
280	U32 v;
281} U32_S;
282typedef struct _U64_S {
283	U64 v;
284} U64_S;
285
286#ifndef LZ4_FORCE_UNALIGNED_ACCESS
287#pragma pack()
288#endif
289
290#define	A64(x) (((U64_S *)(x))->v)
291#define	A32(x) (((U32_S *)(x))->v)
292#define	A16(x) (((U16_S *)(x))->v)
293
294/*
295 * Constants
296 */
297#define	MINMATCH 4
298
299#define	HASH_LOG COMPRESSIONLEVEL
300#define	HASHTABLESIZE (1 << HASH_LOG)
301#define	HASH_MASK (HASHTABLESIZE - 1)
302
303#define	SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
304	NOTCOMPRESSIBLE_CONFIRMATION : 2)
305
306/*
307 * Defines if memory is allocated into the stack (local variable),
308 * or into the heap (kmem_alloc()).
309 */
310#define	HEAPMODE (HASH_LOG > STACKLIMIT)
311#define	COPYLENGTH 8
312#define	LASTLITERALS 5
313#define	MFLIMIT (COPYLENGTH + MINMATCH)
314#define	MINLENGTH (MFLIMIT + 1)
315
316#define	MAXD_LOG 16
317#define	MAX_DISTANCE ((1 << MAXD_LOG) - 1)
318
319#define	ML_BITS 4
320#define	ML_MASK ((1U<<ML_BITS)-1)
321#define	RUN_BITS (8-ML_BITS)
322#define	RUN_MASK ((1U<<RUN_BITS)-1)
323
324
325/*
326 * Architecture-specific macros
327 */
328#if LZ4_ARCH64
329#define	STEPSIZE 8
330#define	UARCH U64
331#define	AARCH A64
332#define	LZ4_COPYSTEP(s, d)	A64(d) = A64(s); d += 8; s += 8;
333#define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d)
334#define	LZ4_SECURECOPY(s, d, e)	if (d < e) LZ4_WILDCOPY(s, d, e)
335#define	HTYPE U32
336#define	INITBASE(base)		const BYTE* const base = ip
337#else /* !LZ4_ARCH64 */
338#define	STEPSIZE 4
339#define	UARCH U32
340#define	AARCH A32
341#define	LZ4_COPYSTEP(s, d)	A32(d) = A32(s); d += 4; s += 4;
342#define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
343#define	LZ4_SECURECOPY		LZ4_WILDCOPY
344#define	HTYPE const BYTE *
345#define	INITBASE(base)		const int base = 0
346#endif /* !LZ4_ARCH64 */
347
348#if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
349#define	LZ4_READ_LITTLEENDIAN_16(d, s, p) \
350	{ U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
351#define	LZ4_WRITE_LITTLEENDIAN_16(p, i) \
352	{ U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
353#else
354#define	LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
355#define	LZ4_WRITE_LITTLEENDIAN_16(p, v)  { A16(p) = v; p += 2; }
356#endif
357
358
359/* Local structures */
360struct refTables {
361	HTYPE hashTable[HASHTABLESIZE];
362};
363
364
365/* Macros */
366#define	LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
367	HASH_LOG))
368#define	LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
369#define	LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
370#define	LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
371	d = e; }
372
373
374/* Private functions */
375#if LZ4_ARCH64
376
377static inline int
378LZ4_NbCommonBytes(register U64 val)
379{
380#if defined(LZ4_BIG_ENDIAN)
381#if !defined(LZ4_FORCE_SW_BITCOUNT)
382	return (__builtin_clzll(val) >> 3);
383#else
384	int r;
385	if (!(val >> 32)) {
386		r = 4;
387	} else {
388		r = 0;
389		val >>= 32;
390	}
391	if (!(val >> 16)) {
392		r += 2;
393		val >>= 8;
394	} else {
395		val >>= 24;
396	}
397	r += (!val);
398	return (r);
399#endif
400#else
401#if !defined(LZ4_FORCE_SW_BITCOUNT)
402	return (__builtin_ctzll(val) >> 3);
403#else
404	static const int DeBruijnBytePos[64] =
405	    { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
406		3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
407		5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
408		4, 5, 7, 2, 6, 5, 7, 6, 7, 7
409	};
410	return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
411	    58];
412#endif
413#endif
414}
415
416#else
417
418static inline int
419LZ4_NbCommonBytes(register U32 val)
420{
421#if defined(LZ4_BIG_ENDIAN)
422#if !defined(LZ4_FORCE_SW_BITCOUNT)
423	return (__builtin_clz(val) >> 3);
424#else
425	int r;
426	if (!(val >> 16)) {
427		r = 2;
428		val >>= 8;
429	} else {
430		r = 0;
431		val >>= 24;
432	}
433	r += (!val);
434	return (r);
435#endif
436#else
437#if !defined(LZ4_FORCE_SW_BITCOUNT)
438	return (__builtin_ctz(val) >> 3);
439#else
440	static const int DeBruijnBytePos[32] = {
441		0, 0, 3, 0, 3, 1, 3, 0,
442		3, 2, 2, 1, 3, 2, 0, 1,
443		3, 3, 1, 2, 2, 2, 2, 0,
444		3, 1, 2, 0, 1, 0, 1, 1
445	};
446	return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
447	    27];
448#endif
449#endif
450}
451
452#endif
453
454/* Public functions */
455
456static int
457LZ4_compressBound(int isize)
458{
459	return (isize + (isize / 255) + 16);
460}
461
462/* Compression functions */
463
464/*ARGSUSED*/
465static int
466LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
467    int osize)
468{
469#if HEAPMODE
470	struct refTables *srt = (struct refTables *)ctx;
471	HTYPE *HashTable = (HTYPE *) (srt->hashTable);
472#else
473	HTYPE HashTable[HASHTABLESIZE] = { 0 };
474#endif
475
476	const BYTE *ip = (BYTE *) source;
477	INITBASE(base);
478	const BYTE *anchor = ip;
479	const BYTE *const iend = ip + isize;
480	const BYTE *const oend = (BYTE *) dest + osize;
481	const BYTE *const mflimit = iend - MFLIMIT;
482#define	matchlimit (iend - LASTLITERALS)
483
484	BYTE *op = (BYTE *) dest;
485
486	int len, length;
487	const int skipStrength = SKIPSTRENGTH;
488	U32 forwardH;
489
490
491	/* Init */
492	if (isize < MINLENGTH)
493		goto _last_literals;
494
495	/* First Byte */
496	HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
497	ip++;
498	forwardH = LZ4_HASH_VALUE(ip);
499
500	/* Main Loop */
501	for (;;) {
502		int findMatchAttempts = (1U << skipStrength) + 3;
503		const BYTE *forwardIp = ip;
504		const BYTE *ref;
505		BYTE *token;
506
507		/* Find a match */
508		do {
509			U32 h = forwardH;
510			int step = findMatchAttempts++ >> skipStrength;
511			ip = forwardIp;
512			forwardIp = ip + step;
513
514			if unlikely(forwardIp > mflimit) {
515				goto _last_literals;
516			}
517
518			forwardH = LZ4_HASH_VALUE(forwardIp);
519			ref = base + HashTable[h];
520			HashTable[h] = ip - base;
521
522		} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
523
524		/* Catch up */
525		while ((ip > anchor) && (ref > (BYTE *) source) &&
526		    unlikely(ip[-1] == ref[-1])) {
527			ip--;
528			ref--;
529		}
530
531		/* Encode Literal length */
532		length = ip - anchor;
533		token = op++;
534
535		/* Check output limit */
536		if unlikely(op + length + (2 + 1 + LASTLITERALS) +
537		    (length >> 8) > oend)
538			return (0);
539
540		if (length >= (int)RUN_MASK) {
541			*token = (RUN_MASK << ML_BITS);
542			len = length - RUN_MASK;
543			for (; len > 254; len -= 255)
544				*op++ = 255;
545			*op++ = (BYTE)len;
546		} else
547			*token = (length << ML_BITS);
548
549		/* Copy Literals */
550		LZ4_BLINDCOPY(anchor, op, length);
551
552		_next_match:
553		/* Encode Offset */
554		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
555
556		/* Start Counting */
557		ip += MINMATCH;
558		ref += MINMATCH;	/* MinMatch verified */
559		anchor = ip;
560		while likely(ip < matchlimit - (STEPSIZE - 1)) {
561			UARCH diff = AARCH(ref) ^ AARCH(ip);
562			if (!diff) {
563				ip += STEPSIZE;
564				ref += STEPSIZE;
565				continue;
566			}
567			ip += LZ4_NbCommonBytes(diff);
568			goto _endCount;
569		}
570#if LZ4_ARCH64
571		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
572			ip += 4;
573			ref += 4;
574		}
575#endif
576		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
577			ip += 2;
578			ref += 2;
579		}
580		if ((ip < matchlimit) && (*ref == *ip))
581			ip++;
582		_endCount:
583
584		/* Encode MatchLength */
585		len = (ip - anchor);
586		/* Check output limit */
587		if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
588			return (0);
589		if (len >= (int)ML_MASK) {
590			*token += ML_MASK;
591			len -= ML_MASK;
592			for (; len > 509; len -= 510) {
593				*op++ = 255;
594				*op++ = 255;
595			}
596			if (len > 254) {
597				len -= 255;
598				*op++ = 255;
599			}
600			*op++ = (BYTE)len;
601		} else
602			*token += len;
603
604		/* Test end of chunk */
605		if (ip > mflimit) {
606			anchor = ip;
607			break;
608		}
609		/* Fill table */
610		HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
611
612		/* Test next position */
613		ref = base + HashTable[LZ4_HASH_VALUE(ip)];
614		HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
615		if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
616			token = op++;
617			*token = 0;
618			goto _next_match;
619		}
620		/* Prepare next loop */
621		anchor = ip++;
622		forwardH = LZ4_HASH_VALUE(ip);
623	}
624
625	_last_literals:
626	/* Encode Last Literals */
627	{
628		int lastRun = iend - anchor;
629		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
630		    oend)
631			return (0);
632		if (lastRun >= (int)RUN_MASK) {
633			*op++ = (RUN_MASK << ML_BITS);
634			lastRun -= RUN_MASK;
635			for (; lastRun > 254; lastRun -= 255) {
636				*op++ = 255;
637			}
638			*op++ = (BYTE)lastRun;
639		} else
640			*op++ = (lastRun << ML_BITS);
641		(void) memcpy(op, anchor, iend - anchor);
642		op += iend - anchor;
643	}
644
645	/* End */
646	return (int)(((char *)op) - dest);
647}
648
649
650
651/* Note : this function is valid only if isize < LZ4_64KLIMIT */
652#define	LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
653#define	HASHLOG64K (HASH_LOG + 1)
654#define	HASH64KTABLESIZE (1U << HASHLOG64K)
655#define	LZ4_HASH64K_FUNCTION(i)	(((i) * 2654435761U) >> ((MINMATCH*8) - \
656	HASHLOG64K))
657#define	LZ4_HASH64K_VALUE(p)	LZ4_HASH64K_FUNCTION(A32(p))
658
659/*ARGSUSED*/
660static int
661LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
662    int osize)
663{
664#if HEAPMODE
665	struct refTables *srt = (struct refTables *)ctx;
666	U16 *HashTable = (U16 *) (srt->hashTable);
667#else
668	U16 HashTable[HASH64KTABLESIZE] = { 0 };
669#endif
670
671	const BYTE *ip = (BYTE *) source;
672	const BYTE *anchor = ip;
673	const BYTE *const base = ip;
674	const BYTE *const iend = ip + isize;
675	const BYTE *const oend = (BYTE *) dest + osize;
676	const BYTE *const mflimit = iend - MFLIMIT;
677#define	matchlimit (iend - LASTLITERALS)
678
679	BYTE *op = (BYTE *) dest;
680
681	int len, length;
682	const int skipStrength = SKIPSTRENGTH;
683	U32 forwardH;
684
685	/* Init */
686	if (isize < MINLENGTH)
687		goto _last_literals;
688
689	/* First Byte */
690	ip++;
691	forwardH = LZ4_HASH64K_VALUE(ip);
692
693	/* Main Loop */
694	for (;;) {
695		int findMatchAttempts = (1U << skipStrength) + 3;
696		const BYTE *forwardIp = ip;
697		const BYTE *ref;
698		BYTE *token;
699
700		/* Find a match */
701		do {
702			U32 h = forwardH;
703			int step = findMatchAttempts++ >> skipStrength;
704			ip = forwardIp;
705			forwardIp = ip + step;
706
707			if (forwardIp > mflimit) {
708				goto _last_literals;
709			}
710
711			forwardH = LZ4_HASH64K_VALUE(forwardIp);
712			ref = base + HashTable[h];
713			HashTable[h] = ip - base;
714
715		} while (A32(ref) != A32(ip));
716
717		/* Catch up */
718		while ((ip > anchor) && (ref > (BYTE *) source) &&
719		    (ip[-1] == ref[-1])) {
720			ip--;
721			ref--;
722		}
723
724		/* Encode Literal length */
725		length = ip - anchor;
726		token = op++;
727
728		/* Check output limit */
729		if unlikely(op + length + (2 + 1 + LASTLITERALS) +
730		    (length >> 8) > oend)
731			return (0);
732
733		if (length >= (int)RUN_MASK) {
734			*token = (RUN_MASK << ML_BITS);
735			len = length - RUN_MASK;
736			for (; len > 254; len -= 255)
737				*op++ = 255;
738			*op++ = (BYTE)len;
739		} else
740			*token = (length << ML_BITS);
741
742		/* Copy Literals */
743		LZ4_BLINDCOPY(anchor, op, length);
744
745		_next_match:
746		/* Encode Offset */
747		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
748
749		/* Start Counting */
750		ip += MINMATCH;
751		ref += MINMATCH;	/* MinMatch verified */
752		anchor = ip;
753		while (ip < matchlimit - (STEPSIZE - 1)) {
754			UARCH diff = AARCH(ref) ^ AARCH(ip);
755			if (!diff) {
756				ip += STEPSIZE;
757				ref += STEPSIZE;
758				continue;
759			}
760			ip += LZ4_NbCommonBytes(diff);
761			goto _endCount;
762		}
763#if LZ4_ARCH64
764		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
765			ip += 4;
766			ref += 4;
767		}
768#endif
769		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
770			ip += 2;
771			ref += 2;
772		}
773		if ((ip < matchlimit) && (*ref == *ip))
774			ip++;
775		_endCount:
776
777		/* Encode MatchLength */
778		len = (ip - anchor);
779		/* Check output limit */
780		if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
781			return (0);
782		if (len >= (int)ML_MASK) {
783			*token += ML_MASK;
784			len -= ML_MASK;
785			for (; len > 509; len -= 510) {
786				*op++ = 255;
787				*op++ = 255;
788			}
789			if (len > 254) {
790				len -= 255;
791				*op++ = 255;
792			}
793			*op++ = (BYTE)len;
794		} else
795			*token += len;
796
797		/* Test end of chunk */
798		if (ip > mflimit) {
799			anchor = ip;
800			break;
801		}
802		/* Fill table */
803		HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
804
805		/* Test next position */
806		ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
807		HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
808		if (A32(ref) == A32(ip)) {
809			token = op++;
810			*token = 0;
811			goto _next_match;
812		}
813		/* Prepare next loop */
814		anchor = ip++;
815		forwardH = LZ4_HASH64K_VALUE(ip);
816	}
817
818	_last_literals:
819	/* Encode Last Literals */
820	{
821		int lastRun = iend - anchor;
822		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
823		    oend)
824			return (0);
825		if (lastRun >= (int)RUN_MASK) {
826			*op++ = (RUN_MASK << ML_BITS);
827			lastRun -= RUN_MASK;
828			for (; lastRun > 254; lastRun -= 255)
829				*op++ = 255;
830			*op++ = (BYTE)lastRun;
831		} else
832			*op++ = (lastRun << ML_BITS);
833		(void) memcpy(op, anchor, iend - anchor);
834		op += iend - anchor;
835	}
836
837	/* End */
838	return (int)(((char *)op) - dest);
839}
840
841static int
842real_LZ4_compress(const char *source, char *dest, int isize, int osize)
843{
844#if HEAPMODE
845	void *ctx = kmem_cache_alloc(lz4_ctx_cache, KM_NOSLEEP);
846	int result;
847
848	/*
849	 * out of kernel memory, gently fall through - this will disable
850	 * compression in zio_compress_data
851	 */
852	if (ctx == NULL)
853		return (0);
854
855	bzero(ctx, sizeof(struct refTables));
856	if (isize < LZ4_64KLIMIT)
857		result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
858	else
859		result = LZ4_compressCtx(ctx, source, dest, isize, osize);
860
861	kmem_cache_free(lz4_ctx_cache, ctx);
862	return (result);
863#else
864	if (isize < (int)LZ4_64KLIMIT)
865		return (LZ4_compress64kCtx(NULL, source, dest, isize, osize));
866	return (LZ4_compressCtx(NULL, source, dest, isize, osize));
867#endif
868}
869
870/* Decompression functions */
871
872/*
873 * Note: The decoding function LZ4_uncompress_unknownOutputSize() is safe
874 *	against "buffer overflow" attack type. They will never write nor
875 *	read outside of the provided output buffers.
876 *	LZ4_uncompress_unknownOutputSize() also insures that it will never
877 *	read outside of the input buffer.  A corrupted input will produce
878 *	an error result, a negative int, indicating the position of the
879 *	error within input stream.
880 */
881
882static int
883LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
884    int maxOutputSize)
885{
886	/* Local Variables */
887	const BYTE *restrict ip = (const BYTE *) source;
888	const BYTE *const iend = ip + isize;
889	const BYTE *ref;
890
891	BYTE *op = (BYTE *) dest;
892	BYTE *const oend = op + maxOutputSize;
893	BYTE *cpy;
894
895	size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
896#if LZ4_ARCH64
897	size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
898#endif
899
900	/* Main Loop */
901	while (ip < iend) {
902		unsigned token;
903		size_t length;
904
905		/* get runlength */
906		token = *ip++;
907		if ((length = (token >> ML_BITS)) == RUN_MASK) {
908			int s = 255;
909			while ((ip < iend) && (s == 255)) {
910				s = *ip++;
911				length += s;
912			}
913		}
914		/* copy literals */
915		cpy = op + length;
916		/* CORNER-CASE: cpy might overflow. */
917		if (cpy < op)
918			goto _output_error;	/* cpy was overflowed, bail! */
919		if ((cpy > oend - COPYLENGTH) ||
920		    (ip + length > iend - COPYLENGTH)) {
921			if (cpy > oend)
922				/* Error: writes beyond output buffer */
923				goto _output_error;
924			if (ip + length != iend)
925				/*
926				 * Error: LZ4 format requires to consume all
927				 * input at this stage
928				 */
929				goto _output_error;
930			(void) memcpy(op, ip, length);
931			op += length;
932			/* Necessarily EOF, due to parsing restrictions */
933			break;
934		}
935		LZ4_WILDCOPY(ip, op, cpy);
936		ip -= (op - cpy);
937		op = cpy;
938
939		/* get offset */
940		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
941		ip += 2;
942		if (ref < (BYTE * const) dest)
943			/*
944			 * Error: offset creates reference outside of
945			 * destination buffer
946			 */
947			goto _output_error;
948
949		/* get matchlength */
950		if ((length = (token & ML_MASK)) == ML_MASK) {
951			while (ip < iend) {
952				int s = *ip++;
953				length += s;
954				if (s == 255)
955					continue;
956				break;
957			}
958		}
959		/* copy repeated sequence */
960		if unlikely(op - ref < STEPSIZE) {
961#if LZ4_ARCH64
962			size_t dec64 = dec64table[op-ref];
963#else
964			const int dec64 = 0;
965#endif
966			op[0] = ref[0];
967			op[1] = ref[1];
968			op[2] = ref[2];
969			op[3] = ref[3];
970			op += 4;
971			ref += 4;
972			ref -= dec32table[op-ref];
973			A32(op) = A32(ref);
974			op += STEPSIZE - 4;
975			ref -= dec64;
976		} else {
977			LZ4_COPYSTEP(ref, op);
978		}
979		cpy = op + length - (STEPSIZE - 4);
980		if (cpy > oend - COPYLENGTH) {
981			if (cpy > oend)
982				/*
983				 * Error: request to write outside of
984				 * destination buffer
985				 */
986				goto _output_error;
987			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
988			while (op < cpy)
989				*op++ = *ref++;
990			op = cpy;
991			if (op == oend)
992				/*
993				 * Check EOF (should never happen, since
994				 * last 5 bytes are supposed to be literals)
995				 */
996				goto _output_error;
997			continue;
998		}
999		LZ4_SECURECOPY(ref, op, cpy);
1000		op = cpy;	/* correction */
1001	}
1002
1003	/* end of decoding */
1004	return (int)(((char *)op) - dest);
1005
1006	/* write overflow error detected */
1007	_output_error:
1008	return (int)(-(((char *)ip) - source));
1009}
1010
1011extern void
1012lz4_init(void)
1013{
1014
1015#if HEAPMODE
1016	lz4_ctx_cache = kmem_cache_create("lz4_ctx", sizeof(struct refTables),
1017	    0, NULL, NULL, NULL, NULL, NULL, 0);
1018#endif
1019}
1020
1021extern void
1022lz4_fini(void)
1023{
1024
1025#if HEAPMODE
1026	kmem_cache_destroy(lz4_ctx_cache);
1027#endif
1028}
1029