1/*-
2 * Copyright (c) 2014 Michihiro NAKAJIMA
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "archive_platform.h"
27
28#ifdef HAVE_ERRNO_H
29#include <errno.h>
30#endif
31#include <stdio.h>
32#ifdef HAVE_STDLIB_H
33#include <stdlib.h>
34#endif
35#ifdef HAVE_STRING_H
36#include <string.h>
37#endif
38#ifdef HAVE_LZ4_H
39#include <lz4.h>
40#endif
41#ifdef HAVE_LZ4HC_H
42#include <lz4hc.h>
43#endif
44
45#include "archive.h"
46#include "archive_endian.h"
47#include "archive_private.h"
48#include "archive_write_private.h"
49#include "archive_xxhash.h"
50
51#define LZ4_MAGICNUMBER	0x184d2204
52
53struct private_data {
54	int		 compression_level;
55	unsigned	 header_written:1;
56	unsigned	 version_number:1;
57	unsigned	 block_independence:1;
58	unsigned	 block_checksum:1;
59	unsigned	 stream_size:1;
60	unsigned	 stream_checksum:1;
61	unsigned	 preset_dictionary:1;
62	unsigned	 block_maximum_size:3;
63#if defined(HAVE_LIBLZ4) && LZ4_VERSION_MAJOR >= 1 && LZ4_VERSION_MINOR >= 2
64	int64_t		 total_in;
65	char		*out;
66	char		*out_buffer;
67	size_t		 out_buffer_size;
68	size_t		 out_block_size;
69	char		*in;
70	char		*in_buffer_allocated;
71	char		*in_buffer;
72	size_t		 in_buffer_size;
73	size_t		 block_size;
74
75	void		*xxh32_state;
76	void		*lz4_stream;
77#else
78	struct archive_write_program_data *pdata;
79#endif
80};
81
82static int archive_filter_lz4_close(struct archive_write_filter *);
83static int archive_filter_lz4_free(struct archive_write_filter *);
84static int archive_filter_lz4_open(struct archive_write_filter *);
85static int archive_filter_lz4_options(struct archive_write_filter *,
86		    const char *, const char *);
87static int archive_filter_lz4_write(struct archive_write_filter *,
88		    const void *, size_t);
89
90/*
91 * Add a lz4 compression filter to this write handle.
92 */
93int
94archive_write_add_filter_lz4(struct archive *_a)
95{
96	struct archive_write *a = (struct archive_write *)_a;
97	struct archive_write_filter *f = __archive_write_allocate_filter(_a);
98	struct private_data *data;
99
100	archive_check_magic(&a->archive, ARCHIVE_WRITE_MAGIC,
101	    ARCHIVE_STATE_NEW, "archive_write_add_filter_lz4");
102
103	data = calloc(1, sizeof(*data));
104	if (data == NULL) {
105		archive_set_error(&a->archive, ENOMEM, "Out of memory");
106		return (ARCHIVE_FATAL);
107	}
108
109	/*
110	 * Setup default settings.
111	 */
112	data->compression_level = 1;
113	data->version_number = 0x01;
114	data->block_independence = 1;
115	data->block_checksum = 0;
116	data->stream_size = 0;
117	data->stream_checksum = 1;
118	data->preset_dictionary = 0;
119	data->block_maximum_size = 7;
120
121	/*
122	 * Setup a filter setting.
123	 */
124	f->data = data;
125	f->options = &archive_filter_lz4_options;
126	f->close = &archive_filter_lz4_close;
127	f->free = &archive_filter_lz4_free;
128	f->open = &archive_filter_lz4_open;
129	f->code = ARCHIVE_FILTER_LZ4;
130	f->name = "lz4";
131#if defined(HAVE_LIBLZ4) && LZ4_VERSION_MAJOR >= 1 && LZ4_VERSION_MINOR >= 2
132	return (ARCHIVE_OK);
133#else
134	/*
135	 * We don't have lz4 library, and execute external lz4 program
136	 * instead.
137	 */
138	data->pdata = __archive_write_program_allocate("lz4");
139	if (data->pdata == NULL) {
140		free(data);
141		archive_set_error(&a->archive, ENOMEM, "Out of memory");
142		return (ARCHIVE_FATAL);
143	}
144	data->compression_level = 0;
145	archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
146	    "Using external lz4 program");
147	return (ARCHIVE_WARN);
148#endif
149}
150
151/*
152 * Set write options.
153 */
154static int
155archive_filter_lz4_options(struct archive_write_filter *f,
156    const char *key, const char *value)
157{
158	struct private_data *data = (struct private_data *)f->data;
159
160	if (strcmp(key, "compression-level") == 0) {
161		int val;
162		if (value == NULL || !((val = value[0] - '0') >= 1 && val <= 9) ||
163		    value[1] != '\0')
164			return (ARCHIVE_WARN);
165
166#ifndef HAVE_LZ4HC_H
167		if(val >= 3)
168		{
169			archive_set_error(f->archive, ARCHIVE_ERRNO_PROGRAMMER,
170				"High compression not included in this build");
171			return (ARCHIVE_FATAL);
172		}
173#endif
174		data->compression_level = val;
175		return (ARCHIVE_OK);
176	}
177	if (strcmp(key, "stream-checksum") == 0) {
178		data->stream_checksum = value != NULL;
179		return (ARCHIVE_OK);
180	}
181	if (strcmp(key, "block-checksum") == 0) {
182		data->block_checksum = value != NULL;
183		return (ARCHIVE_OK);
184	}
185	if (strcmp(key, "block-size") == 0) {
186		if (value == NULL || !(value[0] >= '4' && value[0] <= '7') ||
187		    value[1] != '\0')
188			return (ARCHIVE_WARN);
189		data->block_maximum_size = value[0] - '0';
190		return (ARCHIVE_OK);
191	}
192	if (strcmp(key, "block-dependence") == 0) {
193		data->block_independence = value == NULL;
194		return (ARCHIVE_OK);
195	}
196
197	/* Note: The "warn" return is just to inform the options
198	 * supervisor that we didn't handle it.  It will generate
199	 * a suitable error if no one used this option. */
200	return (ARCHIVE_WARN);
201}
202
203#if defined(HAVE_LIBLZ4) && LZ4_VERSION_MAJOR >= 1 && LZ4_VERSION_MINOR >= 2
204/* Don't compile this if we don't have liblz4. */
205
206static int drive_compressor(struct archive_write_filter *, const char *,
207    size_t);
208static int drive_compressor_independence(struct archive_write_filter *,
209    const char *, size_t);
210static int drive_compressor_dependence(struct archive_write_filter *,
211    const char *, size_t);
212static int lz4_write_stream_descriptor(struct archive_write_filter *);
213static ssize_t lz4_write_one_block(struct archive_write_filter *, const char *,
214    size_t);
215
216
217/*
218 * Setup callback.
219 */
220static int
221archive_filter_lz4_open(struct archive_write_filter *f)
222{
223	struct private_data *data = (struct private_data *)f->data;
224	size_t required_size;
225	static size_t const bkmap[] = { 64 * 1024, 256 * 1024, 1 * 1024 * 1024,
226			   4 * 1024 * 1024 };
227	size_t pre_block_size;
228
229	if (data->block_maximum_size < 4)
230		data->block_size = bkmap[0];
231	else
232		data->block_size = bkmap[data->block_maximum_size - 4];
233
234	required_size = 4 + 15 + 4 + data->block_size + 4 + 4;
235	if (data->out_buffer_size < required_size) {
236		size_t bs = required_size, bpb;
237		free(data->out_buffer);
238		if (f->archive->magic == ARCHIVE_WRITE_MAGIC) {
239			/* Buffer size should be a multiple number of
240			 * the of bytes per block for performance. */
241			bpb = archive_write_get_bytes_per_block(f->archive);
242			if (bpb > bs)
243				bs = bpb;
244			else if (bpb != 0) {
245				bs += bpb;
246				bs -= bs % bpb;
247			}
248		}
249		data->out_block_size = bs;
250		bs += required_size;
251		data->out_buffer = malloc(bs);
252		data->out = data->out_buffer;
253		data->out_buffer_size = bs;
254	}
255
256	pre_block_size = (data->block_independence)? 0: 64 * 1024;
257	if (data->in_buffer_size < data->block_size + pre_block_size) {
258		free(data->in_buffer_allocated);
259		data->in_buffer_size = data->block_size;
260		data->in_buffer_allocated =
261		    malloc(data->in_buffer_size + pre_block_size);
262		data->in_buffer = data->in_buffer_allocated + pre_block_size;
263		if (!data->block_independence && data->compression_level >= 3)
264		    data->in_buffer = data->in_buffer_allocated;
265		data->in = data->in_buffer;
266		data->in_buffer_size = data->block_size;
267	}
268
269	if (data->out_buffer == NULL || data->in_buffer_allocated == NULL) {
270		archive_set_error(f->archive, ENOMEM,
271		    "Can't allocate data for compression buffer");
272		return (ARCHIVE_FATAL);
273	}
274
275	f->write = archive_filter_lz4_write;
276
277	return (ARCHIVE_OK);
278}
279
280/*
281 * Write data to the out stream.
282 *
283 * Returns ARCHIVE_OK if all data written, error otherwise.
284 */
285static int
286archive_filter_lz4_write(struct archive_write_filter *f,
287    const void *buff, size_t length)
288{
289	struct private_data *data = (struct private_data *)f->data;
290	int ret = ARCHIVE_OK;
291	const char *p;
292	size_t remaining;
293	ssize_t size;
294
295	/* If we haven't written a stream descriptor, we have to do it first. */
296	if (!data->header_written) {
297		ret = lz4_write_stream_descriptor(f);
298		if (ret != ARCHIVE_OK)
299			return (ret);
300		data->header_written = 1;
301	}
302
303	/* Update statistics */
304	data->total_in += length;
305
306	p = (const char *)buff;
307	remaining = length;
308	while (remaining) {
309		size_t l;
310		/* Compress input data to output buffer */
311		size = lz4_write_one_block(f, p, remaining);
312		if (size < ARCHIVE_OK)
313			return (ARCHIVE_FATAL);
314		l = data->out - data->out_buffer;
315		if (l >= data->out_block_size) {
316			ret = __archive_write_filter(f->next_filter,
317			    data->out_buffer, data->out_block_size);
318			l -= data->out_block_size;
319			memcpy(data->out_buffer,
320			    data->out_buffer + data->out_block_size, l);
321			data->out = data->out_buffer + l;
322			if (ret < ARCHIVE_WARN)
323				break;
324		}
325		p += size;
326		remaining -= size;
327	}
328
329	return (ret);
330}
331
332/*
333 * Finish the compression.
334 */
335static int
336archive_filter_lz4_close(struct archive_write_filter *f)
337{
338	struct private_data *data = (struct private_data *)f->data;
339	int ret;
340
341	/* Finish compression cycle. */
342	ret = (int)lz4_write_one_block(f, NULL, 0);
343	if (ret >= 0) {
344		/*
345		 * Write the last block and the end of the stream data.
346		 */
347
348		/* Write End Of Stream. */
349		memset(data->out, 0, 4); data->out += 4;
350		/* Write Stream checksum if needed. */
351		if (data->stream_checksum) {
352			unsigned int checksum;
353			checksum = __archive_xxhash.XXH32_digest(
354					data->xxh32_state);
355			data->xxh32_state = NULL;
356			archive_le32enc(data->out, checksum);
357			data->out += 4;
358		}
359		ret = __archive_write_filter(f->next_filter,
360			    data->out_buffer, data->out - data->out_buffer);
361	}
362	return ret;
363}
364
365static int
366archive_filter_lz4_free(struct archive_write_filter *f)
367{
368	struct private_data *data = (struct private_data *)f->data;
369
370	if (data->lz4_stream != NULL) {
371#ifdef HAVE_LZ4HC_H
372		if (data->compression_level >= 3)
373#if LZ4_VERSION_MAJOR >= 1 && LZ4_VERSION_MINOR >= 7
374			LZ4_freeStreamHC(data->lz4_stream);
375#else
376			LZ4_freeHC(data->lz4_stream);
377#endif
378		else
379#endif
380#if LZ4_VERSION_MINOR >= 3
381			LZ4_freeStream(data->lz4_stream);
382#else
383			LZ4_free(data->lz4_stream);
384#endif
385	}
386	free(data->out_buffer);
387	free(data->in_buffer_allocated);
388	free(data->xxh32_state);
389	free(data);
390	f->data = NULL;
391	return (ARCHIVE_OK);
392}
393
394static int
395lz4_write_stream_descriptor(struct archive_write_filter *f)
396{
397	struct private_data *data = (struct private_data *)f->data;
398	uint8_t *sd;
399
400	sd = (uint8_t *)data->out;
401	/* Write Magic Number. */
402	archive_le32enc(&sd[0], LZ4_MAGICNUMBER);
403	/* FLG */
404	sd[4] = (data->version_number << 6)
405	      | (data->block_independence << 5)
406	      | (data->block_checksum << 4)
407	      | (data->stream_size << 3)
408	      | (data->stream_checksum << 2)
409	      | (data->preset_dictionary << 0);
410	/* BD */
411	sd[5] = (data->block_maximum_size << 4);
412	sd[6] = (__archive_xxhash.XXH32(&sd[4], 2, 0) >> 8) & 0xff;
413	data->out += 7;
414	if (data->stream_checksum)
415		data->xxh32_state = __archive_xxhash.XXH32_init(0);
416	else
417		data->xxh32_state = NULL;
418	return (ARCHIVE_OK);
419}
420
421static ssize_t
422lz4_write_one_block(struct archive_write_filter *f, const char *p,
423    size_t length)
424{
425	struct private_data *data = (struct private_data *)f->data;
426	ssize_t r;
427
428	if (p == NULL) {
429		/* Compress remaining uncompressed data. */
430		if (data->in_buffer == data->in)
431			return 0;
432		else {
433			size_t l = data->in - data->in_buffer;
434			r = drive_compressor(f, data->in_buffer, l);
435			if (r == ARCHIVE_OK)
436				r = (ssize_t)l;
437		}
438	} else if ((data->block_independence || data->compression_level < 3) &&
439	    data->in_buffer == data->in && length >= data->block_size) {
440		r = drive_compressor(f, p, data->block_size);
441		if (r == ARCHIVE_OK)
442			r = (ssize_t)data->block_size;
443	} else {
444		size_t remaining_size = data->in_buffer_size -
445			(data->in - data->in_buffer);
446		size_t l = (remaining_size > length)? length: remaining_size;
447		memcpy(data->in, p, l);
448		data->in += l;
449		if (l == remaining_size) {
450			r = drive_compressor(f, data->in_buffer,
451			    data->block_size);
452			if (r == ARCHIVE_OK)
453				r = (ssize_t)l;
454			data->in = data->in_buffer;
455		} else
456			r = (ssize_t)l;
457	}
458
459	return (r);
460}
461
462
463/*
464 * Utility function to push input data through compressor, writing
465 * full output blocks as necessary.
466 *
467 * Note that this handles both the regular write case (finishing ==
468 * false) and the end-of-archive case (finishing == true).
469 */
470static int
471drive_compressor(struct archive_write_filter *f, const char *p, size_t length)
472{
473	struct private_data *data = (struct private_data *)f->data;
474
475	if (data->stream_checksum)
476		__archive_xxhash.XXH32_update(data->xxh32_state,
477			p, (int)length);
478	if (data->block_independence)
479		return drive_compressor_independence(f, p, length);
480	else
481		return drive_compressor_dependence(f, p, length);
482}
483
484static int
485drive_compressor_independence(struct archive_write_filter *f, const char *p,
486    size_t length)
487{
488	struct private_data *data = (struct private_data *)f->data;
489	unsigned int outsize;
490
491#ifdef HAVE_LZ4HC_H
492	if (data->compression_level >= 3)
493#if LZ4_VERSION_MAJOR >= 1 && LZ4_VERSION_MINOR >= 7
494		outsize = LZ4_compress_HC(p, data->out + 4,
495		     (int)length, (int)data->block_size,
496		    data->compression_level);
497#else
498		outsize = LZ4_compressHC2_limitedOutput(p, data->out + 4,
499		    (int)length, (int)data->block_size,
500		    data->compression_level);
501#endif
502	else
503#endif
504#if LZ4_VERSION_MAJOR >= 1 && LZ4_VERSION_MINOR >= 7
505		outsize = LZ4_compress_default(p, data->out + 4,
506		    (int)length, (int)data->block_size);
507#else
508		outsize = LZ4_compress_limitedOutput(p, data->out + 4,
509		    (int)length, (int)data->block_size);
510#endif
511
512	if (outsize) {
513		/* The buffer is compressed. */
514		archive_le32enc(data->out, outsize);
515		data->out += 4;
516	} else {
517		/* The buffer is not compressed. The compressed size was
518		 * bigger than its uncompressed size. */
519		archive_le32enc(data->out, (uint32_t)(length | 0x80000000));
520		data->out += 4;
521		memcpy(data->out, p, length);
522		outsize = (uint32_t)length;
523	}
524	data->out += outsize;
525	if (data->block_checksum) {
526		unsigned int checksum =
527		    __archive_xxhash.XXH32(data->out - outsize, outsize, 0);
528		archive_le32enc(data->out, checksum);
529		data->out += 4;
530	}
531	return (ARCHIVE_OK);
532}
533
534static int
535drive_compressor_dependence(struct archive_write_filter *f, const char *p,
536    size_t length)
537{
538	struct private_data *data = (struct private_data *)f->data;
539	int outsize;
540
541#define DICT_SIZE	(64 * 1024)
542#ifdef HAVE_LZ4HC_H
543	if (data->compression_level >= 3) {
544		if (data->lz4_stream == NULL) {
545#if LZ4_VERSION_MAJOR >= 1 && LZ4_VERSION_MINOR >= 7
546			data->lz4_stream = LZ4_createStreamHC();
547			LZ4_resetStreamHC(data->lz4_stream, data->compression_level);
548#else
549			data->lz4_stream =
550			    LZ4_createHC(data->in_buffer_allocated);
551#endif
552			if (data->lz4_stream == NULL) {
553				archive_set_error(f->archive, ENOMEM,
554				    "Can't allocate data for compression"
555				    " buffer");
556				return (ARCHIVE_FATAL);
557			}
558		}
559		else
560			LZ4_loadDictHC(data->lz4_stream, data->in_buffer_allocated, DICT_SIZE);
561
562#if LZ4_VERSION_MAJOR >= 1 && LZ4_VERSION_MINOR >= 7
563		outsize = LZ4_compress_HC_continue(
564		    data->lz4_stream, p, data->out + 4, (int)length,
565		    (int)data->block_size);
566#else
567		outsize = LZ4_compressHC2_limitedOutput_continue(
568		    data->lz4_stream, p, data->out + 4, (int)length,
569		    (int)data->block_size, data->compression_level);
570#endif
571	} else
572#endif
573	{
574		if (data->lz4_stream == NULL) {
575			data->lz4_stream = LZ4_createStream();
576			if (data->lz4_stream == NULL) {
577				archive_set_error(f->archive, ENOMEM,
578				    "Can't allocate data for compression"
579				    " buffer");
580				return (ARCHIVE_FATAL);
581			}
582		}
583		else
584			LZ4_loadDict(data->lz4_stream, data->in_buffer_allocated, DICT_SIZE);
585
586#if LZ4_VERSION_MAJOR >= 1 && LZ4_VERSION_MINOR >= 7
587		outsize = LZ4_compress_fast_continue(
588		    data->lz4_stream, p, data->out + 4, (int)length,
589		    (int)data->block_size, 1);
590#else
591		outsize = LZ4_compress_limitedOutput_continue(
592		    data->lz4_stream, p, data->out + 4, (int)length,
593		    (int)data->block_size);
594#endif
595	}
596
597	if (outsize) {
598		/* The buffer is compressed. */
599		archive_le32enc(data->out, outsize);
600		data->out += 4;
601	} else {
602		/* The buffer is not compressed. The compressed size was
603		 * bigger than its uncompressed size. */
604		archive_le32enc(data->out, (uint32_t)(length | 0x80000000));
605		data->out += 4;
606		memcpy(data->out, p, length);
607		outsize = (uint32_t)length;
608	}
609	data->out += outsize;
610	if (data->block_checksum) {
611		unsigned int checksum =
612		    __archive_xxhash.XXH32(data->out - outsize, outsize, 0);
613		archive_le32enc(data->out, checksum);
614		data->out += 4;
615	}
616
617	if (length == data->block_size) {
618#ifdef HAVE_LZ4HC_H
619		if (data->compression_level >= 3) {
620#if LZ4_VERSION_MAJOR >= 1 && LZ4_VERSION_MINOR >= 7
621			LZ4_saveDictHC(data->lz4_stream, data->in_buffer_allocated, DICT_SIZE);
622#else
623			LZ4_slideInputBufferHC(data->lz4_stream);
624#endif
625			data->in_buffer = data->in_buffer_allocated + DICT_SIZE;
626		}
627		else
628#endif
629			LZ4_saveDict(data->lz4_stream,
630			    data->in_buffer_allocated, DICT_SIZE);
631#undef DICT_SIZE
632	}
633	return (ARCHIVE_OK);
634}
635
636#else /* HAVE_LIBLZ4 */
637
638static int
639archive_filter_lz4_open(struct archive_write_filter *f)
640{
641	struct private_data *data = (struct private_data *)f->data;
642	struct archive_string as;
643	int r;
644
645	archive_string_init(&as);
646	archive_strcpy(&as, "lz4 -z -q -q");
647
648	/* Specify a compression level. */
649	if (data->compression_level > 0) {
650		archive_strcat(&as, " -");
651		archive_strappend_char(&as, '0' + data->compression_level);
652	}
653	/* Specify a block size. */
654	archive_strcat(&as, " -B");
655	archive_strappend_char(&as, '0' + data->block_maximum_size);
656
657	if (data->block_checksum)
658		archive_strcat(&as, " -BX");
659	if (data->stream_checksum == 0)
660		archive_strcat(&as, " --no-frame-crc");
661	if (data->block_independence == 0)
662		archive_strcat(&as, " -BD");
663
664	f->write = archive_filter_lz4_write;
665
666	r = __archive_write_program_open(f, data->pdata, as.s);
667	archive_string_free(&as);
668	return (r);
669}
670
671static int
672archive_filter_lz4_write(struct archive_write_filter *f, const void *buff,
673    size_t length)
674{
675	struct private_data *data = (struct private_data *)f->data;
676
677	return __archive_write_program_write(f, data->pdata, buff, length);
678}
679
680static int
681archive_filter_lz4_close(struct archive_write_filter *f)
682{
683	struct private_data *data = (struct private_data *)f->data;
684
685	return __archive_write_program_close(f, data->pdata);
686}
687
688static int
689archive_filter_lz4_free(struct archive_write_filter *f)
690{
691	struct private_data *data = (struct private_data *)f->data;
692
693	__archive_write_program_free(data->pdata);
694	free(data);
695	return (ARCHIVE_OK);
696}
697
698#endif /* HAVE_LIBLZ4 */
699