1/*-
2 * Copyright (c) 2014 Juniper Networks, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD$");
29
30#include <sys/mman.h>
31#include <sys/queue.h>
32#include <sys/stat.h>
33#include <sys/types.h>
34#include <assert.h>
35#include <err.h>
36#include <errno.h>
37#include <limits.h>
38#include <paths.h>
39#include <stdint.h>
40#include <stdio.h>
41#include <stdlib.h>
42#include <string.h>
43#include <unistd.h>
44
45#include "image.h"
46#include "mkimg.h"
47
48struct chunk {
49	STAILQ_ENTRY(chunk) ch_list;
50	size_t	ch_size;		/* Size of chunk in bytes. */
51	lba_t	ch_block;		/* Block address in image. */
52	union {
53		struct {
54			off_t	ofs;	/* Offset in backing file. */
55			int	fd;	/* FD of backing file. */
56		} file;
57		struct {
58			void	*ptr;	/* Pointer to data in memory */
59		} mem;
60	} ch_u;
61	u_int	ch_type;
62#define	CH_TYPE_ZEROES		0	/* Chunk is a gap (no data). */
63#define	CH_TYPE_FILE		1	/* File-backed chunk. */
64#define	CH_TYPE_MEMORY		2	/* Memory-backed chunk */
65};
66
67static STAILQ_HEAD(chunk_head, chunk) image_chunks;
68static u_int image_nchunks;
69
70static char image_swap_file[PATH_MAX];
71static int image_swap_fd = -1;
72static u_int image_swap_pgsz;
73static off_t image_swap_size;
74
75static lba_t image_size;
76
77static int
78is_empty_sector(void *buf)
79{
80	uint64_t *p = buf;
81	size_t n, max;
82
83	assert(((uintptr_t)p & 3) == 0);
84
85	max = secsz / sizeof(uint64_t);
86	for (n = 0; n < max; n++) {
87		if (p[n] != 0UL)
88			return (0);
89	}
90	return (1);
91}
92
93/*
94 * Swap file handlng.
95 */
96
97static off_t
98image_swap_alloc(size_t size)
99{
100	off_t ofs;
101	size_t unit;
102
103	unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz;
104	assert((unit & (unit - 1)) == 0);
105
106	size = (size + unit - 1) & ~(unit - 1);
107
108	ofs = image_swap_size;
109	image_swap_size += size;
110	if (ftruncate(image_swap_fd, image_swap_size) == -1) {
111		image_swap_size = ofs;
112		ofs = -1LL;
113	}
114	return (ofs);
115}
116
117/*
118 * Image chunk handling.
119 */
120
121static struct chunk *
122image_chunk_find(lba_t blk)
123{
124	static struct chunk *last = NULL;
125	struct chunk *ch;
126
127	ch = (last != NULL && last->ch_block <= blk)
128	    ? last : STAILQ_FIRST(&image_chunks);
129	while (ch != NULL) {
130		if (ch->ch_block <= blk &&
131		    (lba_t)(ch->ch_block + (ch->ch_size / secsz)) > blk) {
132			last = ch;
133			break;
134		}
135		ch = STAILQ_NEXT(ch, ch_list);
136	}
137	return (ch);
138}
139
140static size_t
141image_chunk_grow(struct chunk *ch, size_t sz)
142{
143	size_t dsz, newsz;
144
145	newsz = ch->ch_size + sz;
146	if (newsz > ch->ch_size) {
147		ch->ch_size = newsz;
148		return (0);
149	}
150	/* We would overflow -- create new chunk for remainder. */
151	dsz = SIZE_MAX - ch->ch_size;
152	assert(dsz < sz);
153	ch->ch_size = SIZE_MAX;
154	return (sz - dsz);
155}
156
157static struct chunk *
158image_chunk_memory(struct chunk *ch, lba_t blk)
159{
160	struct chunk *new;
161	void *ptr;
162
163	ptr = calloc(1, secsz);
164	if (ptr == NULL)
165		return (NULL);
166
167	if (ch->ch_block < blk) {
168		new = malloc(sizeof(*new));
169		if (new == NULL) {
170			free(ptr);
171			return (NULL);
172		}
173		memcpy(new, ch, sizeof(*new));
174		ch->ch_size = (blk - ch->ch_block) * secsz;
175		new->ch_block = blk;
176		new->ch_size -= ch->ch_size;
177		STAILQ_INSERT_AFTER(&image_chunks, ch, new, ch_list);
178		image_nchunks++;
179		ch = new;
180	}
181
182	if (ch->ch_size > secsz) {
183		new = malloc(sizeof(*new));
184		if (new == NULL) {
185			free(ptr);
186			return (NULL);
187		}
188		memcpy(new, ch, sizeof(*new));
189		ch->ch_size = secsz;
190		new->ch_block++;
191		new->ch_size -= secsz;
192		STAILQ_INSERT_AFTER(&image_chunks, ch, new, ch_list);
193		image_nchunks++;
194	}
195
196	ch->ch_type = CH_TYPE_MEMORY;
197	ch->ch_u.mem.ptr = ptr;
198	return (ch);
199}
200
201static int
202image_chunk_skipto(lba_t to)
203{
204	struct chunk *ch;
205	lba_t from;
206	size_t sz;
207
208	ch = STAILQ_LAST(&image_chunks, chunk, ch_list);
209	from = (ch != NULL) ? ch->ch_block + (ch->ch_size / secsz) : 0LL;
210
211	assert(from <= to);
212
213	/* Nothing to do? */
214	if (from == to)
215		return (0);
216	/* Avoid bugs due to overflows. */
217	if ((uintmax_t)(to - from) > (uintmax_t)(SIZE_MAX / secsz))
218		return (EFBIG);
219	sz = (to - from) * secsz;
220	if (ch != NULL && ch->ch_type == CH_TYPE_ZEROES) {
221		sz = image_chunk_grow(ch, sz);
222		if (sz == 0)
223			return (0);
224		from = ch->ch_block + (ch->ch_size / secsz);
225	}
226	ch = malloc(sizeof(*ch));
227	if (ch == NULL)
228		return (ENOMEM);
229	memset(ch, 0, sizeof(*ch));
230	ch->ch_block = from;
231	ch->ch_size = sz;
232	ch->ch_type = CH_TYPE_ZEROES;
233	STAILQ_INSERT_TAIL(&image_chunks, ch, ch_list);
234	image_nchunks++;
235	return (0);
236}
237
238static int
239image_chunk_append(lba_t blk, size_t sz, off_t ofs, int fd)
240{
241	struct chunk *ch;
242
243	ch = STAILQ_LAST(&image_chunks, chunk, ch_list);
244	if (ch != NULL && ch->ch_type == CH_TYPE_FILE) {
245		if (fd == ch->ch_u.file.fd &&
246		    blk == (lba_t)(ch->ch_block + (ch->ch_size / secsz)) &&
247		    ofs == (off_t)(ch->ch_u.file.ofs + ch->ch_size)) {
248			sz = image_chunk_grow(ch, sz);
249			if (sz == 0)
250				return (0);
251			blk = ch->ch_block + (ch->ch_size / secsz);
252			ofs = ch->ch_u.file.ofs + ch->ch_size;
253		}
254	}
255	ch = malloc(sizeof(*ch));
256	if (ch == NULL)
257		return (ENOMEM);
258	memset(ch, 0, sizeof(*ch));
259	ch->ch_block = blk;
260	ch->ch_size = sz;
261	ch->ch_type = CH_TYPE_FILE;
262	ch->ch_u.file.ofs = ofs;
263	ch->ch_u.file.fd = fd;
264	STAILQ_INSERT_TAIL(&image_chunks, ch, ch_list);
265	image_nchunks++;
266	return (0);
267}
268
269static int
270image_chunk_copyin(lba_t blk, void *buf, size_t sz, off_t ofs, int fd)
271{
272	uint8_t *p = buf;
273	int error;
274
275	error = 0;
276	sz = (sz + secsz - 1) & ~(secsz - 1);
277	while (!error && sz > 0) {
278		if (is_empty_sector(p))
279			error = image_chunk_skipto(blk + 1);
280		else
281			error = image_chunk_append(blk, secsz, ofs, fd);
282		blk++;
283		p += secsz;
284		sz -= secsz;
285		ofs += secsz;
286	}
287	return (error);
288}
289
290/*
291 * File mapping support.
292 */
293
294static void *
295image_file_map(int fd, off_t ofs, size_t sz)
296{
297	void *ptr;
298	size_t unit;
299	int flags, prot;
300
301	unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz;
302	assert((unit & (unit - 1)) == 0);
303
304	flags = MAP_NOCORE | MAP_NOSYNC | MAP_SHARED;
305	/* Allow writing to our swap file only. */
306	prot = PROT_READ | ((fd == image_swap_fd) ? PROT_WRITE : 0);
307	sz = (sz + unit - 1) & ~(unit - 1);
308	ptr = mmap(NULL, sz, prot, flags, fd, ofs);
309	return ((ptr == MAP_FAILED) ? NULL : ptr);
310}
311
312static int
313image_file_unmap(void *buffer, size_t sz)
314{
315	size_t unit;
316
317	unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz;
318	sz = (sz + unit - 1) & ~(unit - 1);
319	if (madvise(buffer, sz, MADV_DONTNEED) != 0)
320		warn("madvise");
321	munmap(buffer, sz);
322	return (0);
323}
324
325/*
326 * Input/source file handling.
327 */
328
329static int
330image_copyin_stream(lba_t blk, int fd, uint64_t *sizep)
331{
332	char *buffer;
333	uint64_t bytesize;
334	off_t swofs;
335	size_t iosz;
336	ssize_t rdsz;
337	int error;
338
339	/*
340	 * This makes sure we're doing I/O in multiples of the page
341	 * size as well as of the sector size. 2MB is the minimum
342	 * by virtue of secsz at least 512 bytes and the page size
343	 * at least 4K bytes.
344	 */
345	iosz = secsz * image_swap_pgsz;
346
347	bytesize = 0;
348	do {
349		swofs = image_swap_alloc(iosz);
350		if (swofs == -1LL)
351			return (errno);
352		buffer = image_file_map(image_swap_fd, swofs, iosz);
353		if (buffer == NULL)
354			return (errno);
355		rdsz = read(fd, buffer, iosz);
356		if (rdsz > 0)
357			error = image_chunk_copyin(blk, buffer, rdsz, swofs,
358			    image_swap_fd);
359		else if (rdsz < 0)
360			error = errno;
361		else
362			error = 0;
363		image_file_unmap(buffer, iosz);
364		/* XXX should we relinguish unused swap space? */
365		if (error)
366			return (error);
367
368		bytesize += rdsz;
369		blk += (rdsz + secsz - 1) / secsz;
370	} while (rdsz > 0);
371
372	if (sizep != NULL)
373		*sizep = bytesize;
374	return (0);
375}
376
377static int
378image_copyin_mapped(lba_t blk, int fd, uint64_t *sizep)
379{
380	off_t cur, data, end, hole, pos;
381	void *buf;
382	uint64_t bytesize;
383	size_t iosz, sz;
384	int error;
385
386	/*
387	 * We'd like to know the size of the file and we must
388	 * be able to seek in order to mmap(2). If this isn't
389	 * possible, then treat the file as a stream/pipe.
390	 */
391	end = lseek(fd, 0L, SEEK_END);
392	if (end == -1L)
393		return (image_copyin_stream(blk, fd, sizep));
394
395	/*
396	 * We need the file opened for the duration and our
397	 * caller is going to close the file. Make a dup(2)
398	 * so that control the faith of the descriptor.
399	 */
400	fd = dup(fd);
401	if (fd == -1)
402		return (errno);
403
404	iosz = secsz * image_swap_pgsz;
405
406	bytesize = 0;
407	cur = pos = 0;
408	error = 0;
409	while (!error && cur < end) {
410		hole = lseek(fd, cur, SEEK_HOLE);
411		if (hole == -1)
412			hole = end;
413		data = lseek(fd, cur, SEEK_DATA);
414		if (data == -1)
415			data = end;
416
417		/*
418		 * Treat the entire file as data if sparse files
419		 * are not supported by the underlying file system.
420		 */
421		if (hole == end && data == end)
422			data = cur;
423
424		if (cur == hole && data > hole) {
425			hole = pos;
426			pos = data & ~((uint64_t)secsz - 1);
427
428			blk += (pos - hole) / secsz;
429			error = image_chunk_skipto(blk);
430
431			bytesize += pos - hole;
432			cur = data;
433		} else if (cur == data && hole > data) {
434			data = pos;
435			pos = (hole + secsz - 1) & ~((uint64_t)secsz - 1);
436
437			while (data < pos) {
438				sz = (pos - data > (off_t)iosz)
439				    ? iosz : (size_t)(pos - data);
440
441				buf = image_file_map(fd, data, sz);
442				if (buf != NULL) {
443					error = image_chunk_copyin(blk, buf,
444					    sz, data, fd);
445					image_file_unmap(buf, sz);
446				} else
447					error = errno;
448
449				blk += sz / secsz;
450				bytesize += sz;
451				data += sz;
452			}
453			cur = hole;
454		} else {
455			/*
456			 * I don't know what this means or whether it
457			 * can happen at all...
458			 */
459			error = EDOOFUS;
460			break;
461		}
462	}
463	if (error)
464		close(fd);
465	if (!error && sizep != NULL)
466		*sizep = bytesize;
467	return (error);
468}
469
470int
471image_copyin(lba_t blk, int fd, uint64_t *sizep)
472{
473	struct stat sb;
474	int error;
475
476	error = image_chunk_skipto(blk);
477	if (!error) {
478		if (fstat(fd, &sb) == -1 || !S_ISREG(sb.st_mode))
479			error = image_copyin_stream(blk, fd, sizep);
480		else
481			error = image_copyin_mapped(blk, fd, sizep);
482	}
483	return (error);
484}
485
486/*
487 * Output/sink file handling.
488 */
489
490int
491image_copyout(int fd)
492{
493	int error;
494
495	error = image_copyout_region(fd, 0, image_size);
496	if (!error)
497		error = image_copyout_done(fd);
498	return (error);
499}
500
501int
502image_copyout_done(int fd)
503{
504	off_t ofs;
505	int error;
506
507	ofs = lseek(fd, 0L, SEEK_CUR);
508	if (ofs == -1)
509		return (0);
510	error = (ftruncate(fd, ofs) == -1) ? errno : 0;
511	return (error);
512}
513
514static int
515image_copyout_memory(int fd, size_t size, void *ptr)
516{
517
518	if (write(fd, ptr, size) == -1)
519		return (errno);
520	return (0);
521}
522
523int
524image_copyout_zeroes(int fd, size_t count)
525{
526	static uint8_t *zeroes = NULL;
527	size_t sz;
528	int error;
529
530	if (lseek(fd, (off_t)count, SEEK_CUR) != -1)
531		return (0);
532
533	/*
534	 * If we can't seek, we must write.
535	 */
536
537	if (zeroes == NULL) {
538		zeroes = calloc(1, secsz);
539		if (zeroes == NULL)
540			return (ENOMEM);
541	}
542
543	while (count > 0) {
544		sz = (count > secsz) ? secsz : count;
545		error = image_copyout_memory(fd, sz, zeroes);
546		if (error)
547			return (error);
548		count -= sz;
549	}
550	return (0);
551}
552
553static int
554image_copyout_file(int fd, size_t size, int ifd, off_t iofs)
555{
556	void *buf;
557	size_t iosz, sz;
558	int error;
559
560	iosz = secsz * image_swap_pgsz;
561
562	while (size > 0) {
563		sz = (size > iosz) ? iosz : size;
564		buf = image_file_map(ifd, iofs, sz);
565		if (buf == NULL)
566			return (errno);
567		error = image_copyout_memory(fd, sz, buf);
568		image_file_unmap(buf, sz);
569		if (error)
570			return (error);
571		size -= sz;
572		iofs += sz;
573	}
574	return (0);
575}
576
577int
578image_copyout_region(int fd, lba_t blk, lba_t size)
579{
580	struct chunk *ch;
581	size_t ofs, sz;
582	int error;
583
584	size *= secsz;
585
586	while (size > 0) {
587		ch = image_chunk_find(blk);
588		if (ch == NULL)
589			return (EINVAL);
590		ofs = (blk - ch->ch_block) * secsz;
591		sz = ch->ch_size - ofs;
592		sz = ((lba_t)sz < size) ? sz : (size_t)size;
593		switch (ch->ch_type) {
594		case CH_TYPE_ZEROES:
595			error = image_copyout_zeroes(fd, sz);
596			break;
597		case CH_TYPE_FILE:
598			error = image_copyout_file(fd, sz, ch->ch_u.file.fd,
599			    ch->ch_u.file.ofs + ofs);
600			break;
601		case CH_TYPE_MEMORY:
602			error = image_copyout_memory(fd, sz, ch->ch_u.mem.ptr);
603			break;
604		default:
605			return (EDOOFUS);
606		}
607		size -= sz;
608		blk += sz / secsz;
609	}
610	return (0);
611}
612
613int
614image_data(lba_t blk, lba_t size)
615{
616	struct chunk *ch;
617	lba_t lim;
618
619	while (1) {
620		ch = image_chunk_find(blk);
621		if (ch == NULL)
622			return (0);
623		if (ch->ch_type != CH_TYPE_ZEROES)
624			return (1);
625		lim = ch->ch_block + (ch->ch_size / secsz);
626		if (lim >= blk + size)
627			return (0);
628		size -= lim - blk;
629		blk = lim;
630	}
631	/*NOTREACHED*/
632}
633
634lba_t
635image_get_size(void)
636{
637
638	return (image_size);
639}
640
641int
642image_set_size(lba_t blk)
643{
644	int error;
645
646	error = image_chunk_skipto(blk);
647	if (!error)
648		image_size = blk;
649	return (error);
650}
651
652int
653image_write(lba_t blk, void *buf, ssize_t len)
654{
655	struct chunk *ch;
656
657	while (len > 0) {
658		if (!is_empty_sector(buf)) {
659			ch = image_chunk_find(blk);
660			if (ch == NULL)
661				return (ENXIO);
662			/* We may not be able to write to files. */
663			if (ch->ch_type == CH_TYPE_FILE)
664				return (EINVAL);
665			if (ch->ch_type == CH_TYPE_ZEROES) {
666				ch = image_chunk_memory(ch, blk);
667				if (ch == NULL)
668					return (ENOMEM);
669			}
670			assert(ch->ch_type == CH_TYPE_MEMORY);
671			memcpy(ch->ch_u.mem.ptr, buf, secsz);
672		}
673		blk++;
674		buf = (char *)buf + secsz;
675		len--;
676	}
677	return (0);
678}
679
680static void
681image_cleanup(void)
682{
683	struct chunk *ch;
684
685	while ((ch = STAILQ_FIRST(&image_chunks)) != NULL) {
686		switch (ch->ch_type) {
687		case CH_TYPE_FILE:
688			/* We may be closing the same file multiple times. */
689			if (ch->ch_u.file.fd != -1)
690				close(ch->ch_u.file.fd);
691			break;
692		case CH_TYPE_MEMORY:
693			free(ch->ch_u.mem.ptr);
694			break;
695		default:
696			break;
697		}
698		STAILQ_REMOVE_HEAD(&image_chunks, ch_list);
699		free(ch);
700	}
701	if (image_swap_fd != -1)
702		close(image_swap_fd);
703	unlink(image_swap_file);
704}
705
706int
707image_init(void)
708{
709	const char *tmpdir;
710
711	STAILQ_INIT(&image_chunks);
712	image_nchunks = 0;
713
714	image_swap_size = 0;
715	image_swap_pgsz = getpagesize();
716
717	if (atexit(image_cleanup) == -1)
718		return (errno);
719	if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0')
720		tmpdir = _PATH_TMP;
721	snprintf(image_swap_file, sizeof(image_swap_file), "%s/mkimg-XXXXXX",
722	    tmpdir);
723	image_swap_fd = mkstemp(image_swap_file);
724	if (image_swap_fd == -1)
725		return (errno);
726	return (0);
727}
728