archive_write_add_filter_xz.c revision 248616
1/*-
2 * Copyright (c) 2003-2010 Tim Kientzle
3 * Copyright (c) 2009-2012 Michihiro NAKAJIMA
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include "archive_platform.h"
28
29__FBSDID("$FreeBSD: head/lib/libarchive/archive_write_set_compression_xz.c 201108 2009-12-28 03:28:21Z kientzle $");
30
31#ifdef HAVE_ERRNO_H
32#include <errno.h>
33#endif
34#ifdef HAVE_STDLIB_H
35#include <stdlib.h>
36#endif
37#ifdef HAVE_STRING_H
38#include <string.h>
39#endif
40#include <time.h>
41#ifdef HAVE_LZMA_H
42#include <lzma.h>
43#endif
44
45#include "archive.h"
46#include "archive_endian.h"
47#include "archive_private.h"
48#include "archive_write_private.h"
49
50#if ARCHIVE_VERSION_NUMBER < 4000000
51int
52archive_write_set_compression_lzip(struct archive *a)
53{
54	__archive_write_filters_free(a);
55	return (archive_write_add_filter_lzip(a));
56}
57
58int
59archive_write_set_compression_lzma(struct archive *a)
60{
61	__archive_write_filters_free(a);
62	return (archive_write_add_filter_lzma(a));
63}
64
65int
66archive_write_set_compression_xz(struct archive *a)
67{
68	__archive_write_filters_free(a);
69	return (archive_write_add_filter_xz(a));
70}
71
72#endif
73
74#ifndef HAVE_LZMA_H
75int
76archive_write_add_filter_xz(struct archive *a)
77{
78	archive_set_error(a, ARCHIVE_ERRNO_MISC,
79	    "xz compression not supported on this platform");
80	return (ARCHIVE_FATAL);
81}
82
83int
84archive_write_add_filter_lzma(struct archive *a)
85{
86	archive_set_error(a, ARCHIVE_ERRNO_MISC,
87	    "lzma compression not supported on this platform");
88	return (ARCHIVE_FATAL);
89}
90
91int
92archive_write_add_filter_lzip(struct archive *a)
93{
94	archive_set_error(a, ARCHIVE_ERRNO_MISC,
95	    "lzma compression not supported on this platform");
96	return (ARCHIVE_FATAL);
97}
98#else
99/* Don't compile this if we don't have liblzma. */
100
101struct private_data {
102	int		 compression_level;
103	lzma_stream	 stream;
104	lzma_filter	 lzmafilters[2];
105	lzma_options_lzma lzma_opt;
106	int64_t		 total_in;
107	unsigned char	*compressed;
108	size_t		 compressed_buffer_size;
109	int64_t		 total_out;
110	/* the CRC32 value of uncompressed data for lzip */
111	uint32_t	 crc32;
112};
113
114static int	archive_compressor_xz_options(struct archive_write_filter *,
115		    const char *, const char *);
116static int	archive_compressor_xz_open(struct archive_write_filter *);
117static int	archive_compressor_xz_write(struct archive_write_filter *,
118		    const void *, size_t);
119static int	archive_compressor_xz_close(struct archive_write_filter *);
120static int	archive_compressor_xz_free(struct archive_write_filter *);
121static int	drive_compressor(struct archive_write_filter *,
122		    struct private_data *, int finishing);
123
124struct option_value {
125	uint32_t dict_size;
126	uint32_t nice_len;
127	lzma_match_finder mf;
128};
129static const struct option_value option_values[] = {
130	{ 1 << 16, 32, LZMA_MF_HC3},
131	{ 1 << 20, 32, LZMA_MF_HC3},
132	{ 3 << 19, 32, LZMA_MF_HC4},
133	{ 1 << 21, 32, LZMA_MF_BT4},
134	{ 3 << 20, 32, LZMA_MF_BT4},
135	{ 1 << 22, 32, LZMA_MF_BT4},
136	{ 1 << 23, 64, LZMA_MF_BT4},
137	{ 1 << 24, 64, LZMA_MF_BT4},
138	{ 3 << 23, 64, LZMA_MF_BT4},
139	{ 1 << 25, 64, LZMA_MF_BT4}
140};
141
142static int
143common_setup(struct archive_write_filter *f)
144{
145	struct private_data *data;
146	struct archive_write *a = (struct archive_write *)f->archive;
147	data = calloc(1, sizeof(*data));
148	if (data == NULL) {
149		archive_set_error(&a->archive, ENOMEM, "Out of memory");
150		return (ARCHIVE_FATAL);
151	}
152	f->data = data;
153	data->compression_level = LZMA_PRESET_DEFAULT;
154	f->open = &archive_compressor_xz_open;
155	f->close = archive_compressor_xz_close;
156	f->free = archive_compressor_xz_free;
157	f->options = &archive_compressor_xz_options;
158	return (ARCHIVE_OK);
159}
160
161/*
162 * Add an xz compression filter to this write handle.
163 */
164int
165archive_write_add_filter_xz(struct archive *_a)
166{
167	struct archive_write_filter *f;
168	int r;
169
170	archive_check_magic(_a, ARCHIVE_WRITE_MAGIC,
171	    ARCHIVE_STATE_NEW, "archive_write_add_filter_xz");
172	f = __archive_write_allocate_filter(_a);
173	r = common_setup(f);
174	if (r == ARCHIVE_OK) {
175		f->code = ARCHIVE_FILTER_XZ;
176		f->name = "xz";
177	}
178	return (r);
179}
180
181/* LZMA is handled identically, we just need a different compression
182 * code set.  (The liblzma setup looks at the code to determine
183 * the one place that XZ and LZMA require different handling.) */
184int
185archive_write_add_filter_lzma(struct archive *_a)
186{
187	struct archive_write_filter *f;
188	int r;
189
190	archive_check_magic(_a, ARCHIVE_WRITE_MAGIC,
191	    ARCHIVE_STATE_NEW, "archive_write_add_filter_lzma");
192	f = __archive_write_allocate_filter(_a);
193	r = common_setup(f);
194	if (r == ARCHIVE_OK) {
195		f->code = ARCHIVE_FILTER_LZMA;
196		f->name = "lzma";
197	}
198	return (r);
199}
200
201int
202archive_write_add_filter_lzip(struct archive *_a)
203{
204	struct archive_write_filter *f;
205	int r;
206
207	archive_check_magic(_a, ARCHIVE_WRITE_MAGIC,
208	    ARCHIVE_STATE_NEW, "archive_write_add_filter_lzip");
209	f = __archive_write_allocate_filter(_a);
210	r = common_setup(f);
211	if (r == ARCHIVE_OK) {
212		f->code = ARCHIVE_FILTER_LZIP;
213		f->name = "lzip";
214	}
215	return (r);
216}
217
218static int
219archive_compressor_xz_init_stream(struct archive_write_filter *f,
220    struct private_data *data)
221{
222	static const lzma_stream lzma_stream_init_data = LZMA_STREAM_INIT;
223	int ret;
224
225	data->stream = lzma_stream_init_data;
226	data->stream.next_out = data->compressed;
227	data->stream.avail_out = data->compressed_buffer_size;
228	if (f->code == ARCHIVE_FILTER_XZ)
229		ret = lzma_stream_encoder(&(data->stream),
230		    data->lzmafilters, LZMA_CHECK_CRC64);
231	else if (f->code == ARCHIVE_FILTER_LZMA)
232		ret = lzma_alone_encoder(&(data->stream), &data->lzma_opt);
233	else {	/* ARCHIVE_FILTER_LZIP */
234		int dict_size = data->lzma_opt.dict_size;
235		int ds, log2dic, wedges;
236
237		/* Calculate a coded dictionary size */
238		if (dict_size < (1 << 12) || dict_size > (1 << 27)) {
239			archive_set_error(f->archive, ARCHIVE_ERRNO_MISC,
240			    "Unacceptable dictionary dize for lzip: %d",
241			    dict_size);
242			return (ARCHIVE_FATAL);
243		}
244		for (log2dic = 27; log2dic >= 12; log2dic--) {
245			if (dict_size & (1 << log2dic))
246				break;
247		}
248		if (dict_size > (1 << log2dic)) {
249			log2dic++;
250			wedges =
251			    ((1 << log2dic) - dict_size) / (1 << (log2dic - 4));
252		} else
253			wedges = 0;
254		ds = ((wedges << 5) & 0xe0) | (log2dic & 0x1f);
255
256		data->crc32 = 0;
257		/* Make a header */
258		data->compressed[0] = 0x4C;
259		data->compressed[1] = 0x5A;
260		data->compressed[2] = 0x49;
261		data->compressed[3] = 0x50;
262		data->compressed[4] = 1;/* Version */
263		data->compressed[5] = (unsigned char)ds;
264		data->stream.next_out += 6;
265		data->stream.avail_out -= 6;
266
267		ret = lzma_raw_encoder(&(data->stream), data->lzmafilters);
268	}
269	if (ret == LZMA_OK)
270		return (ARCHIVE_OK);
271
272	switch (ret) {
273	case LZMA_MEM_ERROR:
274		archive_set_error(f->archive, ENOMEM,
275		    "Internal error initializing compression library: "
276		    "Cannot allocate memory");
277		break;
278	default:
279		archive_set_error(f->archive, ARCHIVE_ERRNO_MISC,
280		    "Internal error initializing compression library: "
281		    "It's a bug in liblzma");
282		break;
283	}
284	return (ARCHIVE_FATAL);
285}
286
287/*
288 * Setup callback.
289 */
290static int
291archive_compressor_xz_open(struct archive_write_filter *f)
292{
293	struct private_data *data = f->data;
294	int ret;
295
296	ret = __archive_write_open_filter(f->next_filter);
297	if (ret != ARCHIVE_OK)
298		return (ret);
299
300	if (data->compressed == NULL) {
301		size_t bs = 65536, bpb;
302		if (f->archive->magic == ARCHIVE_WRITE_MAGIC) {
303			/* Buffer size should be a multiple number of the of bytes
304			 * per block for performance. */
305			bpb = archive_write_get_bytes_per_block(f->archive);
306			if (bpb > bs)
307				bs = bpb;
308			else if (bpb != 0)
309				bs -= bs % bpb;
310		}
311		data->compressed_buffer_size = bs;
312		data->compressed
313		    = (unsigned char *)malloc(data->compressed_buffer_size);
314		if (data->compressed == NULL) {
315			archive_set_error(f->archive, ENOMEM,
316			    "Can't allocate data for compression buffer");
317			return (ARCHIVE_FATAL);
318		}
319	}
320
321	f->write = archive_compressor_xz_write;
322
323	/* Initialize compression library. */
324	if (f->code == ARCHIVE_FILTER_LZIP) {
325		const struct option_value *val =
326		    &option_values[data->compression_level];
327
328		data->lzma_opt.dict_size = val->dict_size;
329		data->lzma_opt.preset_dict = NULL;
330		data->lzma_opt.preset_dict_size = 0;
331		data->lzma_opt.lc = LZMA_LC_DEFAULT;
332		data->lzma_opt.lp = LZMA_LP_DEFAULT;
333		data->lzma_opt.pb = LZMA_PB_DEFAULT;
334		data->lzma_opt.mode =
335		    data->compression_level<= 2? LZMA_MODE_FAST:LZMA_MODE_NORMAL;
336		data->lzma_opt.nice_len = val->nice_len;
337		data->lzma_opt.mf = val->mf;
338		data->lzma_opt.depth = 0;
339		data->lzmafilters[0].id = LZMA_FILTER_LZMA1;
340		data->lzmafilters[0].options = &data->lzma_opt;
341		data->lzmafilters[1].id = LZMA_VLI_UNKNOWN;/* Terminate */
342	} else {
343		if (lzma_lzma_preset(&data->lzma_opt, data->compression_level)) {
344			archive_set_error(f->archive, ARCHIVE_ERRNO_MISC,
345			    "Internal error initializing compression library");
346		}
347		data->lzmafilters[0].id = LZMA_FILTER_LZMA2;
348		data->lzmafilters[0].options = &data->lzma_opt;
349		data->lzmafilters[1].id = LZMA_VLI_UNKNOWN;/* Terminate */
350	}
351	ret = archive_compressor_xz_init_stream(f, data);
352	if (ret == LZMA_OK) {
353		f->data = data;
354		return (0);
355	}
356	return (ARCHIVE_FATAL);
357}
358
359/*
360 * Set write options.
361 */
362static int
363archive_compressor_xz_options(struct archive_write_filter *f,
364    const char *key, const char *value)
365{
366	struct private_data *data = (struct private_data *)f->data;
367
368	if (strcmp(key, "compression-level") == 0) {
369		if (value == NULL || !(value[0] >= '0' && value[0] <= '9') ||
370		    value[1] != '\0')
371			return (ARCHIVE_WARN);
372		data->compression_level = value[0] - '0';
373		if (data->compression_level > 6)
374			data->compression_level = 6;
375		return (ARCHIVE_OK);
376	}
377
378	/* Note: The "warn" return is just to inform the options
379	 * supervisor that we didn't handle it.  It will generate
380	 * a suitable error if no one used this option. */
381	return (ARCHIVE_WARN);
382}
383
384/*
385 * Write data to the compressed stream.
386 */
387static int
388archive_compressor_xz_write(struct archive_write_filter *f,
389    const void *buff, size_t length)
390{
391	struct private_data *data = (struct private_data *)f->data;
392	int ret;
393
394	/* Update statistics */
395	data->total_in += length;
396	if (f->code == ARCHIVE_FILTER_LZIP)
397		data->crc32 = lzma_crc32(buff, length, data->crc32);
398
399	/* Compress input data to output buffer */
400	data->stream.next_in = buff;
401	data->stream.avail_in = length;
402	if ((ret = drive_compressor(f, data, 0)) != ARCHIVE_OK)
403		return (ret);
404
405	return (ARCHIVE_OK);
406}
407
408
409/*
410 * Finish the compression...
411 */
412static int
413archive_compressor_xz_close(struct archive_write_filter *f)
414{
415	struct private_data *data = (struct private_data *)f->data;
416	int ret, r1;
417
418	ret = drive_compressor(f, data, 1);
419	if (ret == ARCHIVE_OK) {
420		data->total_out +=
421		    data->compressed_buffer_size - data->stream.avail_out;
422		ret = __archive_write_filter(f->next_filter,
423		    data->compressed,
424		    data->compressed_buffer_size - data->stream.avail_out);
425		if (f->code == ARCHIVE_FILTER_LZIP && ret == ARCHIVE_OK) {
426			archive_le32enc(data->compressed, data->crc32);
427			archive_le64enc(data->compressed+4, data->total_in);
428			archive_le64enc(data->compressed+12, data->total_out + 20);
429			ret = __archive_write_filter(f->next_filter,
430			    data->compressed, 20);
431		}
432	}
433	lzma_end(&(data->stream));
434	r1 = __archive_write_close_filter(f->next_filter);
435	return (r1 < ret ? r1 : ret);
436}
437
438static int
439archive_compressor_xz_free(struct archive_write_filter *f)
440{
441	struct private_data *data = (struct private_data *)f->data;
442	free(data->compressed);
443	free(data);
444	f->data = NULL;
445	return (ARCHIVE_OK);
446}
447
448/*
449 * Utility function to push input data through compressor,
450 * writing full output blocks as necessary.
451 *
452 * Note that this handles both the regular write case (finishing ==
453 * false) and the end-of-archive case (finishing == true).
454 */
455static int
456drive_compressor(struct archive_write_filter *f,
457    struct private_data *data, int finishing)
458{
459	int ret;
460
461	for (;;) {
462		if (data->stream.avail_out == 0) {
463			data->total_out += data->compressed_buffer_size;
464			ret = __archive_write_filter(f->next_filter,
465			    data->compressed,
466			    data->compressed_buffer_size);
467			if (ret != ARCHIVE_OK)
468				return (ARCHIVE_FATAL);
469			data->stream.next_out = data->compressed;
470			data->stream.avail_out = data->compressed_buffer_size;
471		}
472
473		/* If there's nothing to do, we're done. */
474		if (!finishing && data->stream.avail_in == 0)
475			return (ARCHIVE_OK);
476
477		ret = lzma_code(&(data->stream),
478		    finishing ? LZMA_FINISH : LZMA_RUN );
479
480		switch (ret) {
481		case LZMA_OK:
482			/* In non-finishing case, check if compressor
483			 * consumed everything */
484			if (!finishing && data->stream.avail_in == 0)
485				return (ARCHIVE_OK);
486			/* In finishing case, this return always means
487			 * there's more work */
488			break;
489		case LZMA_STREAM_END:
490			/* This return can only occur in finishing case. */
491			if (finishing)
492				return (ARCHIVE_OK);
493			archive_set_error(f->archive, ARCHIVE_ERRNO_MISC,
494			    "lzma compression data error");
495			return (ARCHIVE_FATAL);
496		case LZMA_MEMLIMIT_ERROR:
497			archive_set_error(f->archive, ENOMEM,
498			    "lzma compression error: "
499			    "%ju MiB would have been needed",
500			    (uintmax_t)((lzma_memusage(&(data->stream))
501				    + 1024 * 1024 -1)
502				/ (1024 * 1024)));
503			return (ARCHIVE_FATAL);
504		default:
505			/* Any other return value indicates an error. */
506			archive_set_error(f->archive, ARCHIVE_ERRNO_MISC,
507			    "lzma compression failed:"
508			    " lzma_code() call returned status %d",
509			    ret);
510			return (ARCHIVE_FATAL);
511		}
512	}
513}
514
515#endif /* HAVE_LZMA_H */
516