1/*-
2 * Copyright (c) 2014 Marcel Moolenaar
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28#include <sys/errno.h>
29#include <assert.h>
30#include <stdio.h>
31#include <stdlib.h>
32#include <string.h>
33
34#include "endian.h"
35#include "image.h"
36#include "format.h"
37#include "mkimg.h"
38
39/* Default cluster sizes. */
40#define	QCOW1_CLSTR_LOG2SZ	12	/* 4KB */
41#define	QCOW2_CLSTR_LOG2SZ	16	/* 64KB */
42
43/* Flag bits in cluster offsets */
44#define	QCOW_CLSTR_COMPRESSED	(1ULL << 62)
45#define	QCOW_CLSTR_COPIED	(1ULL << 63)
46
47struct qcow_header {
48	uint32_t	magic;
49#define	QCOW_MAGIC		0x514649fb
50	uint32_t	version;
51#define	QCOW_VERSION_1		1
52#define	QCOW_VERSION_2		2
53	uint64_t	path_offset;
54	uint32_t	path_length;
55	uint32_t	clstr_log2sz;	/* v2 only */
56	uint64_t	disk_size;
57	union {
58		struct {
59			uint8_t		clstr_log2sz;
60			uint8_t		l2_log2sz;
61			uint16_t	_pad;
62			uint32_t	encryption;
63			uint64_t	l1_offset;
64		} v1;
65		struct {
66			uint32_t	encryption;
67			uint32_t	l1_entries;
68			uint64_t	l1_offset;
69			uint64_t	refcnt_offset;
70			uint32_t	refcnt_clstrs;
71			uint32_t	snapshot_count;
72			uint64_t	snapshot_offset;
73		} v2;
74	} u;
75};
76
77static u_int clstr_log2sz;
78
79static uint64_t
80round_clstr(uint64_t ofs)
81{
82	uint64_t clstrsz;
83
84	clstrsz = 1UL << clstr_log2sz;
85	return ((ofs + clstrsz - 1) & ~(clstrsz - 1));
86}
87
88static int
89qcow_resize(lba_t imgsz, u_int version)
90{
91	uint64_t imagesz;
92
93	switch (version) {
94	case QCOW_VERSION_1:
95		clstr_log2sz = QCOW1_CLSTR_LOG2SZ;
96		break;
97	case QCOW_VERSION_2:
98		clstr_log2sz = QCOW2_CLSTR_LOG2SZ;
99		break;
100	default:
101		assert(0);
102	}
103
104	imagesz = round_clstr(imgsz * secsz);
105
106	if (verbose)
107		fprintf(stderr, "QCOW: image size = %ju, cluster size = %u\n",
108		    (uintmax_t)imagesz, (u_int)(1U << clstr_log2sz));
109
110	return (image_set_size(imagesz / secsz));
111}
112
113static int
114qcow1_resize(lba_t imgsz)
115{
116
117	return (qcow_resize(imgsz, QCOW_VERSION_1));
118}
119
120static int
121qcow2_resize(lba_t imgsz)
122{
123
124	return (qcow_resize(imgsz, QCOW_VERSION_2));
125}
126
127static int
128qcow_write(int fd, u_int version)
129{
130	struct qcow_header *hdr;
131	uint64_t *l1tbl, *l2tbl, *rctbl;
132	uint16_t *rcblk;
133	uint64_t clstr_imgsz, clstr_l2tbls, clstr_l1tblsz;
134	uint64_t clstr_rcblks, clstr_rctblsz;
135	uint64_t n, imagesz, nclstrs, ofs, ofsflags;
136	lba_t blk, blkofs, blk_imgsz;
137	u_int l1clno, l2clno, rcclno;
138	u_int blk_clstrsz, refcnt_clstrs;
139	u_int clstrsz, l1idx, l2idx;
140	int error;
141
142	assert(clstr_log2sz != 0);
143
144	clstrsz = 1U << clstr_log2sz;
145	blk_clstrsz = clstrsz / secsz;
146	blk_imgsz = image_get_size();
147	imagesz = blk_imgsz * secsz;
148	clstr_imgsz = imagesz >> clstr_log2sz;
149	clstr_l2tbls = round_clstr(clstr_imgsz * 8) >> clstr_log2sz;
150	clstr_l1tblsz = round_clstr(clstr_l2tbls * 8) >> clstr_log2sz;
151	nclstrs = clstr_imgsz + clstr_l2tbls + clstr_l1tblsz + 1;
152	clstr_rcblks = clstr_rctblsz = 0;
153	do {
154		n = clstr_rcblks + clstr_rctblsz;
155		clstr_rcblks = round_clstr((nclstrs + n) * 2) >> clstr_log2sz;
156		clstr_rctblsz = round_clstr(clstr_rcblks * 8) >> clstr_log2sz;
157	} while (n < (clstr_rcblks + clstr_rctblsz));
158
159	/*
160	 * We got all the sizes in clusters. Start the layout.
161	 * 0 - header
162	 * 1 - L1 table
163	 * 2 - RC table (v2 only)
164	 * 3 - L2 tables
165	 * 4 - RC block (v2 only)
166	 * 5 - data
167	 */
168
169	l1clno = 1;
170	rcclno = 0;
171	rctbl = l2tbl = l1tbl = NULL;
172	rcblk = NULL;
173
174	hdr = calloc(1, clstrsz);
175	if (hdr == NULL)
176		return (errno);
177
178	be32enc(&hdr->magic, QCOW_MAGIC);
179	be32enc(&hdr->version, version);
180	be64enc(&hdr->disk_size, imagesz);
181	switch (version) {
182	case QCOW_VERSION_1:
183		ofsflags = 0;
184		l2clno = l1clno + clstr_l1tblsz;
185		hdr->u.v1.clstr_log2sz = clstr_log2sz;
186		hdr->u.v1.l2_log2sz = clstr_log2sz - 3;
187		be64enc(&hdr->u.v1.l1_offset, clstrsz * l1clno);
188		break;
189	case QCOW_VERSION_2:
190		ofsflags = QCOW_CLSTR_COPIED;
191		rcclno = l1clno + clstr_l1tblsz;
192		l2clno = rcclno + clstr_rctblsz;
193		be32enc(&hdr->clstr_log2sz, clstr_log2sz);
194		be32enc(&hdr->u.v2.l1_entries, clstr_l2tbls);
195		be64enc(&hdr->u.v2.l1_offset, clstrsz * l1clno);
196		be64enc(&hdr->u.v2.refcnt_offset, clstrsz * rcclno);
197		refcnt_clstrs = round_clstr(clstr_rcblks * 8) >> clstr_log2sz;
198		be32enc(&hdr->u.v2.refcnt_clstrs, refcnt_clstrs);
199		break;
200	default:
201		assert(0);
202	}
203
204	if (sparse_write(fd, hdr, clstrsz) < 0) {
205		error = errno;
206		goto out;
207	}
208
209	free(hdr);
210	hdr = NULL;
211
212	ofs = clstrsz * l2clno;
213	nclstrs = 1 + clstr_l1tblsz + clstr_rctblsz;
214
215	l1tbl = calloc(clstr_l1tblsz, clstrsz);
216	if (l1tbl == NULL) {
217		error = ENOMEM;
218		goto out;
219	}
220
221	for (n = 0; n < clstr_imgsz; n++) {
222		blk = n * blk_clstrsz;
223		if (image_data(blk, blk_clstrsz)) {
224			nclstrs++;
225			l1idx = n >> (clstr_log2sz - 3);
226			if (l1tbl[l1idx] == 0) {
227				be64enc(l1tbl + l1idx, ofs + ofsflags);
228				ofs += clstrsz;
229				nclstrs++;
230			}
231		}
232	}
233
234	if (sparse_write(fd, l1tbl, clstrsz * clstr_l1tblsz) < 0) {
235		error = errno;
236		goto out;
237	}
238
239	clstr_rcblks = 0;
240	do {
241		n = clstr_rcblks;
242		clstr_rcblks = round_clstr((nclstrs + n) * 2) >> clstr_log2sz;
243	} while (n < clstr_rcblks);
244
245	if (rcclno > 0) {
246		rctbl = calloc(clstr_rctblsz, clstrsz);
247		if (rctbl == NULL) {
248			error = ENOMEM;
249			goto out;
250		}
251		for (n = 0; n < clstr_rcblks; n++) {
252			be64enc(rctbl + n, ofs);
253			ofs += clstrsz;
254			nclstrs++;
255		}
256		if (sparse_write(fd, rctbl, clstrsz * clstr_rctblsz) < 0) {
257			error = errno;
258			goto out;
259		}
260		free(rctbl);
261		rctbl = NULL;
262	}
263
264	l2tbl = malloc(clstrsz);
265	if (l2tbl == NULL) {
266		error = ENOMEM;
267		goto out;
268	}
269
270	for (l1idx = 0; l1idx < clstr_l2tbls; l1idx++) {
271		if (l1tbl[l1idx] == 0)
272			continue;
273		memset(l2tbl, 0, clstrsz);
274		blkofs = (lba_t)l1idx * blk_clstrsz * (clstrsz >> 3);
275		for (l2idx = 0; l2idx < (clstrsz >> 3); l2idx++) {
276			blk = blkofs + (lba_t)l2idx * blk_clstrsz;
277			if (blk >= blk_imgsz)
278				break;
279			if (image_data(blk, blk_clstrsz)) {
280				be64enc(l2tbl + l2idx, ofs + ofsflags);
281				ofs += clstrsz;
282			}
283		}
284		if (sparse_write(fd, l2tbl, clstrsz) < 0) {
285			error = errno;
286			goto out;
287		}
288	}
289
290	free(l2tbl);
291	l2tbl = NULL;
292	free(l1tbl);
293	l1tbl = NULL;
294
295	if (rcclno > 0) {
296		rcblk = calloc(clstr_rcblks, clstrsz);
297		if (rcblk == NULL) {
298			error = ENOMEM;
299			goto out;
300		}
301		for (n = 0; n < nclstrs; n++)
302			be16enc(rcblk + n, 1);
303		if (sparse_write(fd, rcblk, clstrsz * clstr_rcblks) < 0) {
304			error = errno;
305			goto out;
306		}
307		free(rcblk);
308		rcblk = NULL;
309	}
310
311	error = 0;
312	for (n = 0; n < clstr_imgsz; n++) {
313		blk = n * blk_clstrsz;
314		if (image_data(blk, blk_clstrsz)) {
315			error = image_copyout_region(fd, blk, blk_clstrsz);
316			if (error)
317				break;
318		}
319	}
320	if (!error)
321		error = image_copyout_done(fd);
322
323 out:
324	if (rcblk != NULL)
325		free(rcblk);
326	if (l2tbl != NULL)
327		free(l2tbl);
328	if (rctbl != NULL)
329		free(rctbl);
330	if (l1tbl != NULL)
331		free(l1tbl);
332	if (hdr != NULL)
333		free(hdr);
334	return (error);
335}
336
337static int
338qcow1_write(int fd)
339{
340
341	return (qcow_write(fd, QCOW_VERSION_1));
342}
343
344static int
345qcow2_write(int fd)
346{
347
348	return (qcow_write(fd, QCOW_VERSION_2));
349}
350
351static struct mkimg_format qcow1_format = {
352	.name = "qcow",
353	.description = "QEMU Copy-On-Write, version 1",
354	.resize = qcow1_resize,
355	.write = qcow1_write,
356};
357FORMAT_DEFINE(qcow1_format);
358
359static struct mkimg_format qcow2_format = {
360	.name = "qcow2",
361	.description = "QEMU Copy-On-Write, version 2",
362	.resize = qcow2_resize,
363	.write = qcow2_write,
364};
365FORMAT_DEFINE(qcow2_format);
366