1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <sys/types.h>
30#include <sys/endian.h>
31#include <sys/param.h>
32#include <sys/sysctl.h>
33#include <sys/stat.h>
34#include <sys/uio.h>
35#include <netinet/in.h>
36#include <assert.h>
37#include <ctype.h>
38#include <err.h>
39#include <fcntl.h>
40#include <pthread.h>
41#include <signal.h>
42#include <stdint.h>
43#include <stdio.h>
44#include <stdlib.h>
45#include <string.h>
46#include <unistd.h>
47
48#include "mkuzip.h"
49#include "mkuz_cloop.h"
50#include "mkuz_blockcache.h"
51#include "mkuz_lzma.h"
52#include "mkuz_zlib.h"
53#include "mkuz_zstd.h"
54#include "mkuz_blk.h"
55#include "mkuz_cfg.h"
56#include "mkuz_conveyor.h"
57#include "mkuz_format.h"
58#include "mkuz_fqueue.h"
59#include "mkuz_time.h"
60#include "mkuz_insize.h"
61
62#define DEFAULT_CLSTSIZE	16384
63
64enum UZ_ALGORITHM {
65	UZ_ZLIB = 0,
66	UZ_LZMA,
67	UZ_ZSTD,
68	UZ_INVALID
69};
70
71static const struct mkuz_format uzip_fmts[] = {
72	[UZ_ZLIB] = {
73		.option = "zlib",
74		.magic = CLOOP_MAGIC_ZLIB,
75		.default_sufx = DEFAULT_SUFX_ZLIB,
76		.f_compress_bound = mkuz_zlib_cbound,
77		.f_init = mkuz_zlib_init,
78		.f_compress = mkuz_zlib_compress,
79	},
80	[UZ_LZMA] = {
81		.option = "lzma",
82		.magic = CLOOP_MAGIC_LZMA,
83		.default_sufx = DEFAULT_SUFX_LZMA,
84		.f_compress_bound = mkuz_lzma_cbound,
85		.f_init = mkuz_lzma_init,
86		.f_compress = mkuz_lzma_compress,
87	},
88	[UZ_ZSTD] = {
89		.option = "zstd",
90		.magic = CLOOP_MAGIC_ZSTD,
91		.default_sufx = DEFAULT_SUFX_ZSTD,
92		.f_compress_bound = mkuz_zstd_cbound,
93		.f_init = mkuz_zstd_init,
94		.f_compress = mkuz_zstd_compress,
95	},
96};
97
98static struct mkuz_blk *readblock(int, u_int32_t);
99static void usage(void) __dead2;
100static void cleanup(void);
101
102static char *cleanfile = NULL;
103
104static int
105cmp_blkno(const struct mkuz_blk *bp, void *p)
106{
107	uint32_t *ap;
108
109	ap = (uint32_t *)p;
110
111	return (bp->info.blkno == *ap);
112}
113
114int main(int argc, char **argv)
115{
116	struct mkuz_cfg cfs;
117	char *oname;
118	uint64_t *toc;
119	int i, io, opt, tmp;
120	struct {
121		int en;
122		FILE *f;
123	} summary;
124	struct iovec iov[2];
125	uint64_t offset, last_offset;
126	struct cloop_header hdr;
127	struct mkuz_conveyor *cvp;
128	struct mkuz_blk_info *chit;
129	size_t ncpusz, ncpu, magiclen;
130	double st, et;
131	enum UZ_ALGORITHM comp_alg;
132	int comp_level;
133
134	st = getdtime();
135
136	ncpusz = sizeof(size_t);
137	if (sysctlbyname("hw.ncpu", &ncpu, &ncpusz, NULL, 0) < 0) {
138		ncpu = 1;
139	} else if (ncpu > MAX_WORKERS_AUTO) {
140		ncpu = MAX_WORKERS_AUTO;
141	}
142
143	memset(&hdr, 0, sizeof(hdr));
144	cfs.blksz = DEFAULT_CLSTSIZE;
145	oname = NULL;
146	cfs.verbose = 0;
147	cfs.no_zcomp = 0;
148	cfs.en_dedup = 0;
149	summary.en = 0;
150	summary.f = stderr;
151	comp_alg = UZ_ZLIB;
152	comp_level = USE_DEFAULT_LEVEL;
153	cfs.nworkers = ncpu;
154	struct mkuz_blk *iblk, *oblk;
155
156	while((opt = getopt(argc, argv, "A:C:o:s:vZdLSj:")) != -1) {
157		switch(opt) {
158		case 'A':
159			for (tmp = UZ_ZLIB; tmp < UZ_INVALID; tmp++) {
160				if (strcmp(uzip_fmts[tmp].option, optarg) == 0)
161					break;
162			}
163			if (tmp == UZ_INVALID)
164				errx(1, "invalid algorithm specified: %s",
165				    optarg);
166				/* Not reached */
167			comp_alg = tmp;
168			break;
169		case 'C':
170			comp_level = atoi(optarg);
171			break;
172		case 'o':
173			oname = optarg;
174			break;
175
176		case 's':
177			tmp = atoi(optarg);
178			if (tmp <= 0) {
179				errx(1, "invalid cluster size specified: %s",
180				    optarg);
181				/* Not reached */
182			}
183			cfs.blksz = tmp;
184			break;
185
186		case 'v':
187			cfs.verbose = 1;
188			break;
189
190		case 'Z':
191			cfs.no_zcomp = 1;
192			break;
193
194		case 'd':
195			cfs.en_dedup = 1;
196			break;
197
198		case 'L':
199			comp_alg = UZ_LZMA;
200			break;
201
202		case 'S':
203			summary.en = 1;
204			summary.f = stdout;
205			break;
206
207		case 'j':
208			tmp = atoi(optarg);
209			if (tmp <= 0) {
210				errx(1, "invalid number of compression threads"
211                                    " specified: %s", optarg);
212				/* Not reached */
213			}
214			cfs.nworkers = tmp;
215			break;
216
217		default:
218			usage();
219			/* Not reached */
220		}
221	}
222	argc -= optind;
223	argv += optind;
224
225	if (argc != 1) {
226		usage();
227		/* Not reached */
228	}
229
230	cfs.handler = &uzip_fmts[comp_alg];
231
232	magiclen = strlcpy(hdr.magic, cfs.handler->magic, sizeof(hdr.magic));
233	assert(magiclen < sizeof(hdr.magic));
234
235	if (cfs.en_dedup != 0) {
236		/*
237		 * Dedupe requires a version 3 format.  Don't downgrade newer
238		 * formats.
239		 */
240		if (hdr.magic[CLOOP_OFS_VERSN] == CLOOP_MAJVER_2)
241			hdr.magic[CLOOP_OFS_VERSN] = CLOOP_MAJVER_3;
242		hdr.magic[CLOOP_OFS_COMPR] =
243		    tolower(hdr.magic[CLOOP_OFS_COMPR]);
244	}
245
246	if (cfs.blksz % DEV_BSIZE != 0)
247		errx(1, "cluster size should be multiple of %d", DEV_BSIZE);
248
249	cfs.cbound_blksz = cfs.handler->f_compress_bound(cfs.blksz);
250	if (cfs.cbound_blksz > MAXPHYS)
251		errx(1, "maximal compressed cluster size %zu greater than MAXPHYS %zu",
252		    cfs.cbound_blksz, (size_t)MAXPHYS);
253
254	cfs.handler->f_init(&comp_level);
255	cfs.comp_level = comp_level;
256
257	cfs.iname = argv[0];
258	if (oname == NULL) {
259		asprintf(&oname, "%s%s", cfs.iname, cfs.handler->default_sufx);
260		if (oname == NULL) {
261			err(1, "can't allocate memory");
262			/* Not reached */
263		}
264	}
265
266	signal(SIGHUP, exit);
267	signal(SIGINT, exit);
268	signal(SIGTERM, exit);
269	signal(SIGXCPU, exit);
270	signal(SIGXFSZ, exit);
271	atexit(cleanup);
272
273	cfs.fdr = open(cfs.iname, O_RDONLY);
274	if (cfs.fdr < 0) {
275		err(1, "open(%s)", cfs.iname);
276		/* Not reached */
277	}
278	cfs.isize = mkuz_get_insize(&cfs);
279	if (cfs.isize < 0) {
280		errx(1, "can't determine input image size");
281		/* Not reached */
282	}
283	hdr.nblocks = cfs.isize / cfs.blksz;
284	if ((cfs.isize % cfs.blksz) != 0) {
285		if (cfs.verbose != 0)
286			fprintf(stderr, "file size is not multiple "
287			"of %d, padding data\n", cfs.blksz);
288		hdr.nblocks++;
289	}
290	toc = mkuz_safe_malloc((hdr.nblocks + 1) * sizeof(*toc));
291
292	/*
293	 * Initialize last+1 entry with non-heap trash.  If final padding is
294	 * added later, it may or may not be overwritten with an offset
295	 * representing the length of the final compressed block.  If not,
296	 * initialize to a defined value.
297	 */
298	toc[hdr.nblocks] = 0;
299
300	cfs.fdw = open(oname, (cfs.en_dedup ? O_RDWR : O_WRONLY) | O_TRUNC | O_CREAT,
301		   S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
302	if (cfs.fdw < 0) {
303		err(1, "open(%s)", oname);
304		/* Not reached */
305	}
306	cleanfile = oname;
307
308	/* Prepare header that we will write later when we have index ready. */
309	iov[0].iov_base = (char *)&hdr;
310	iov[0].iov_len = sizeof(hdr);
311	iov[1].iov_base = (char *)toc;
312	iov[1].iov_len = (hdr.nblocks + 1) * sizeof(*toc);
313	offset = iov[0].iov_len + iov[1].iov_len;
314
315	/* Reserve space for header */
316	lseek(cfs.fdw, offset, SEEK_SET);
317
318	if (cfs.verbose != 0) {
319		fprintf(stderr, "data size %ju bytes, number of clusters "
320		    "%u, index length %zu bytes\n", cfs.isize,
321		    hdr.nblocks, iov[1].iov_len);
322	}
323
324	cvp = mkuz_conveyor_ctor(&cfs);
325
326	last_offset = 0;
327        iblk = oblk = NULL;
328	for(i = io = 0; iblk != MKUZ_BLK_EOF; i++) {
329		iblk = readblock(cfs.fdr, cfs.blksz);
330		mkuz_fqueue_enq(cvp->wrk_queue, iblk);
331		if (iblk != MKUZ_BLK_EOF &&
332		    (i < (cfs.nworkers * ITEMS_PER_WORKER))) {
333			continue;
334		}
335drain:
336		oblk = mkuz_fqueue_deq_when(cvp->results, cmp_blkno, &io);
337		assert(oblk->info.blkno == (unsigned)io);
338		oblk->info.offset = offset;
339		chit = NULL;
340		if (cfs.en_dedup != 0 && oblk->info.len > 0) {
341			chit = mkuz_blkcache_regblock(cfs.fdw, oblk);
342			/*
343			 * There should be at least one non-empty block
344			 * between us and the backref'ed offset, otherwise
345			 * we won't be able to parse that sequence correctly
346			 * as it would be indistinguishible from another
347			 * empty block.
348			 */
349			if (chit != NULL && chit->offset == last_offset) {
350				chit = NULL;
351			}
352		}
353		if (chit != NULL) {
354			toc[io] = htobe64(chit->offset);
355			oblk->info.len = 0;
356		} else {
357			if (oblk->info.len > 0 && write(cfs.fdw, oblk->data,
358			    oblk->info.len) < 0) {
359				err(1, "write(%s)", oname);
360				/* Not reached */
361			}
362			toc[io] = htobe64(offset);
363			last_offset = offset;
364			offset += oblk->info.len;
365		}
366		if (cfs.verbose != 0) {
367			fprintf(stderr, "cluster #%d, in %u bytes, "
368			    "out len=%lu offset=%lu", io, cfs.blksz,
369			    (u_long)oblk->info.len, (u_long)be64toh(toc[io]));
370			if (chit != NULL) {
371				fprintf(stderr, " (backref'ed to #%d)",
372				    chit->blkno);
373			}
374			fprintf(stderr, "\n");
375		}
376		free(oblk);
377		io += 1;
378		if (iblk == MKUZ_BLK_EOF) {
379			if (io < i)
380				goto drain;
381			/* Last block, see if we need to add some padding */
382			if ((offset % DEV_BSIZE) == 0)
383				continue;
384			oblk = mkuz_blk_ctor(DEV_BSIZE - (offset % DEV_BSIZE));
385			oblk->info.blkno = io;
386			oblk->info.len = oblk->alen;
387			if (cfs.verbose != 0) {
388				fprintf(stderr, "padding data with %lu bytes "
389				    "so that file size is multiple of %d\n",
390				    (u_long)oblk->alen, DEV_BSIZE);
391			}
392			mkuz_fqueue_enq(cvp->results, oblk);
393			goto drain;
394		}
395	}
396
397	close(cfs.fdr);
398
399	if (cfs.verbose != 0 || summary.en != 0) {
400		et = getdtime();
401		fprintf(summary.f, "compressed data to %ju bytes, saved %lld "
402		    "bytes, %.2f%% decrease, %.2f bytes/sec.\n", offset,
403		    (long long)(cfs.isize - offset),
404		    100.0 * (long long)(cfs.isize - offset) /
405		    (float)cfs.isize, (float)cfs.isize / (et - st));
406	}
407
408	/* Convert to big endian */
409	hdr.blksz = htonl(cfs.blksz);
410	hdr.nblocks = htonl(hdr.nblocks);
411	/* Write headers into pre-allocated space */
412	lseek(cfs.fdw, 0, SEEK_SET);
413	if (writev(cfs.fdw, iov, 2) < 0) {
414		err(1, "writev(%s)", oname);
415		/* Not reached */
416	}
417	cleanfile = NULL;
418	close(cfs.fdw);
419
420	exit(0);
421}
422
423static struct mkuz_blk *
424readblock(int fd, u_int32_t clstsize)
425{
426	int numread;
427	struct mkuz_blk *rval;
428	static int blockcnt;
429	off_t cpos;
430
431	rval = mkuz_blk_ctor(clstsize);
432
433	rval->info.blkno = blockcnt;
434	blockcnt += 1;
435	cpos = lseek(fd, 0, SEEK_CUR);
436	if (cpos < 0) {
437		err(1, "readblock: lseek() failed");
438		/* Not reached */
439	}
440	rval->info.offset = cpos;
441
442	numread = read(fd, rval->data, clstsize);
443	if (numread < 0) {
444		err(1, "readblock: read() failed");
445		/* Not reached */
446	}
447	if (numread == 0) {
448		free(rval);
449		return MKUZ_BLK_EOF;
450	}
451	rval->info.len = numread;
452	return rval;
453}
454
455static void
456usage(void)
457{
458
459	fprintf(stderr, "usage: mkuzip [-vZdLS] [-o outfile] [-s cluster_size] "
460	    "[-j ncompr] infile\n");
461	exit(1);
462}
463
464void *
465mkuz_safe_malloc(size_t size)
466{
467	void *retval;
468
469	retval = malloc(size);
470	if (retval == NULL) {
471		err(1, "can't allocate memory");
472		/* Not reached */
473	}
474	return retval;
475}
476
477void *
478mkuz_safe_zmalloc(size_t size)
479{
480	void *retval;
481
482	retval = mkuz_safe_malloc(size);
483	bzero(retval, size);
484	return retval;
485}
486
487static void
488cleanup(void)
489{
490
491	if (cleanfile != NULL)
492		unlink(cleanfile);
493}
494
495int
496mkuz_memvcmp(const void *memory, unsigned char val, size_t size)
497{
498    const u_char *mm;
499
500    mm = (const u_char *)memory;
501    return (*mm == val) && memcmp(mm, mm + 1, size - 1) == 0;
502}
503