lz4.c revision 294718
1/*
2 * LZ4 - Fast LZ compression algorithm
3 * Header File
4 * Copyright (C) 2011-2013, Yann Collet.
5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met:
10 *
11 *     * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *     * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following disclaimer
15 * in the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * You can contact the author at :
31 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32 * - LZ4 source repository : http://code.google.com/p/lz4/
33 *
34 * $FreeBSD: stable/10/sys/cddl/boot/zfs/lz4.c 294718 2016-01-25 10:45:18Z smh $
35 */
36
37static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
38					    int isize, int maxOutputSize);
39
40/* ARGSUSED */
41static int
42lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int dummy __unused)
43{
44	const uint8_t *src = s_start;
45	uint32_t bufsiz = htonl(*(uint32_t *)src);
46
47	/* invalid compressed buffer size encoded at start */
48	if (bufsiz + 4 > s_len)
49		return (1);
50
51	/*
52	 * Returns 0 on success (decompression function returned non-negative)
53	 * and non-zero on failure (decompression function returned negative).
54	 */
55	return (LZ4_uncompress_unknownOutputSize((const char *)s_start + 4, d_start, bufsiz,
56	    d_len) < 0);
57}
58
59/*
60 * CPU Feature Detection
61 */
62
63/* 32 or 64 bits ? */
64#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \
65	defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \
66	defined(__LP64__) || defined(_LP64))
67#define	LZ4_ARCH64	1
68#else
69#define	LZ4_ARCH64	0
70#endif
71
72/*
73 * Little Endian or Big Endian?
74 * Note: overwrite the below #define if you know your architecture endianess.
75 */
76#if BYTE_ORDER == BIG_ENDIAN
77#define	LZ4_BIG_ENDIAN	1
78#else
79	/*
80	 * Little Endian assumed. PDP Endian and other very rare endian format
81	 * are unsupported.
82	 */
83#endif
84
85/*
86 * Unaligned memory access is automatically enabled for "common" CPU,
87 * such as x86. For others CPU, the compiler will be more cautious, and
88 * insert extra code to ensure aligned access is respected. If you know
89 * your target CPU supports unaligned memory access, you may want to
90 * force this option manually to improve performance
91 */
92#if defined(__ARM_FEATURE_UNALIGNED)
93#define	LZ4_FORCE_UNALIGNED_ACCESS 1
94#endif
95
96/*
97 * Compiler Options
98 */
99#if __STDC_VERSION__ >= 199901L	/* C99 */
100/* "restrict" is a known keyword */
101#else
102/* Disable restrict */
103#define	restrict
104#endif
105
106#define	GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
107
108#define	lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) \
109	| (((x) & 0xffu) << 8)))
110
111#if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
112#define	expect(expr, value)    (__builtin_expect((expr), (value)))
113#else
114#define	expect(expr, value)    (expr)
115#endif
116
117#define	likely(expr)	expect((expr) != 0, 1)
118#define	unlikely(expr)	expect((expr) != 0, 0)
119
120/* Basic types */
121#define	BYTE	uint8_t
122#define	U16	uint16_t
123#define	U32	uint32_t
124#define	S32	int32_t
125#define	U64	uint64_t
126
127#ifndef LZ4_FORCE_UNALIGNED_ACCESS
128#pragma pack(1)
129#endif
130
131typedef struct _U16_S {
132	U16 v;
133} U16_S;
134typedef struct _U32_S {
135	U32 v;
136} U32_S;
137typedef struct _U64_S {
138	U64 v;
139} U64_S;
140
141#ifndef LZ4_FORCE_UNALIGNED_ACCESS
142#pragma pack()
143#endif
144
145#define	A64(x)	(((U64_S *)(x))->v)
146#define	A32(x)	(((U32_S *)(x))->v)
147#define	A16(x)	(((U16_S *)(x))->v)
148
149/*
150 * Constants
151 */
152#define	MINMATCH 4
153
154#define	COPYLENGTH 8
155#define	LASTLITERALS 5
156
157#define	ML_BITS 4
158#define	ML_MASK ((1U<<ML_BITS)-1)
159#define	RUN_BITS (8-ML_BITS)
160#define	RUN_MASK ((1U<<RUN_BITS)-1)
161
162/*
163 * Architecture-specific macros
164 */
165#if LZ4_ARCH64
166#define	STEPSIZE 8
167#define	UARCH U64
168#define	AARCH A64
169#define	LZ4_COPYSTEP(s, d)	A64(d) = A64(s); d += 8; s += 8;
170#define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d)
171#define	LZ4_SECURECOPY(s, d, e)	if (d < e) LZ4_WILDCOPY(s, d, e)
172#define	HTYPE U32
173#define	INITBASE(base)		const BYTE* const base = ip
174#else
175#define	STEPSIZE 4
176#define	UARCH U32
177#define	AARCH A32
178#define	LZ4_COPYSTEP(s, d)	A32(d) = A32(s); d += 4; s += 4;
179#define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
180#define	LZ4_SECURECOPY		LZ4_WILDCOPY
181#define	HTYPE const BYTE*
182#define	INITBASE(base)		const int base = 0
183#endif
184
185#if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
186#define	LZ4_READ_LITTLEENDIAN_16(d, s, p) \
187	{ U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
188#define	LZ4_WRITE_LITTLEENDIAN_16(p, i) \
189	{ U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
190#else
191#define	LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
192#define	LZ4_WRITE_LITTLEENDIAN_16(p, v)  { A16(p) = v; p += 2; }
193#endif
194
195/* Macros */
196#define	LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
197
198/* Decompression functions */
199
200static int
201LZ4_uncompress_unknownOutputSize(const char *source,
202    char *dest, int isize, int maxOutputSize)
203{
204	/* Local Variables */
205	const BYTE *restrict ip = (const BYTE *) source;
206	const BYTE *const iend = ip + isize;
207	const BYTE *restrict ref;
208
209	BYTE *restrict op = (BYTE *) dest;
210	BYTE *const oend = op + maxOutputSize;
211	BYTE *cpy;
212
213	size_t dec[] = { 0, 3, 2, 3, 0, 0, 0, 0 };
214
215	/* Main Loop */
216	while (ip < iend) {
217		BYTE token;
218		int length;
219
220		/* get runlength */
221		token = *ip++;
222		if ((length = (token >> ML_BITS)) == RUN_MASK) {
223			int s = 255;
224			while ((ip < iend) && (s == 255)) {
225				s = *ip++;
226				length += s;
227			}
228		}
229		/* copy literals */
230		cpy = op + length;
231		if ((cpy > oend - COPYLENGTH) ||
232		    (ip + length > iend - COPYLENGTH)) {
233			if (cpy > oend)
234				/*
235				 * Error: request to write beyond destination
236				 * buffer.
237				 */
238				goto _output_error;
239			if (ip + length > iend)
240				/*
241				 * Error : request to read beyond source
242				 * buffer.
243				 */
244				goto _output_error;
245			memcpy(op, ip, length);
246			op += length;
247			ip += length;
248			if (ip < iend)
249				/* Error : LZ4 format violation */
250				goto _output_error;
251			/* Necessarily EOF, due to parsing restrictions. */
252			break;
253		}
254		LZ4_WILDCOPY(ip, op, cpy);
255		ip -= (op - cpy);
256		op = cpy;
257
258		/* get offset */
259		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
260		ip += 2;
261		if (ref < (BYTE * const) dest)
262			/*
263			 * Error: offset creates reference outside of
264			 * destination buffer.
265			 */
266			goto _output_error;
267
268		/* get matchlength */
269		if ((length = (token & ML_MASK)) == ML_MASK) {
270			while (ip < iend) {
271				int s = *ip++;
272				length += s;
273				if (s == 255)
274					continue;
275				break;
276			}
277		}
278		/* copy repeated sequence */
279		if unlikely(op - ref < STEPSIZE) {
280#if LZ4_ARCH64
281			size_t dec2table[] = { 0, 0, 0, -1, 0, 1, 2, 3 };
282			size_t dec2 = dec2table[op - ref];
283#else
284			const int dec2 = 0;
285#endif
286			*op++ = *ref++;
287			*op++ = *ref++;
288			*op++ = *ref++;
289			*op++ = *ref++;
290			ref -= dec[op - ref];
291			A32(op) = A32(ref);
292			op += STEPSIZE - 4;
293			ref -= dec2;
294		} else {
295			LZ4_COPYSTEP(ref, op);
296		}
297		cpy = op + length - (STEPSIZE - 4);
298		if (cpy > oend - COPYLENGTH) {
299			if (cpy > oend)
300				/*
301				 * Error: request to write outside of
302				 * destination buffer.
303				 */
304				goto _output_error;
305			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
306			while (op < cpy)
307				*op++ = *ref++;
308			op = cpy;
309			if (op == oend)
310				/*
311				 * Check EOF (should never happen, since last
312				 * 5 bytes are supposed to be literals).
313				 */
314				break;
315			continue;
316		}
317		LZ4_SECURECOPY(ref, op, cpy);
318		op = cpy;	/* correction */
319	}
320
321	/* end of decoding */
322	return (int)(((char *)op) - dest);
323
324	/* write overflow error detected */
325	_output_error:
326	return (int)(-(((char *)ip) - source));
327}
328