1/*-
2 * Copyright (c) 2014 Marcel Moolenaar
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD$");
29
30#include <sys/types.h>
31#include <sys/endian.h>
32#include <sys/errno.h>
33#include <stdint.h>
34#include <stdio.h>
35#include <stdlib.h>
36#include <string.h>
37#include <unistd.h>
38
39#include "image.h"
40#include "format.h"
41#include "mkimg.h"
42
43/* Default cluster sizes. */
44#define	QCOW1_CLSTR_LOG2SZ	12	/* 4KB */
45#define	QCOW2_CLSTR_LOG2SZ	16	/* 64KB */
46
47/* Flag bits in cluster offsets */
48#define	QCOW_CLSTR_COMPRESSED	(1ULL << 62)
49#define	QCOW_CLSTR_COPIED	(1ULL << 63)
50
51struct qcow_header {
52	uint32_t	magic;
53#define	QCOW_MAGIC		0x514649fb
54	uint32_t	version;
55#define	QCOW_VERSION_1		1
56#define	QCOW_VERSION_2		2
57	uint64_t	path_offset;
58	uint32_t	path_length;
59	uint32_t	clstr_log2sz;	/* v2 only */
60	uint64_t	disk_size;
61	union {
62		struct {
63			uint8_t		clstr_log2sz;
64			uint8_t		l2_log2sz;
65			uint16_t	_pad;
66			uint32_t	encryption;
67			uint64_t	l1_offset;
68		} v1;
69		struct {
70			uint32_t	encryption;
71			uint32_t	l1_entries;
72			uint64_t	l1_offset;
73			uint64_t	refcnt_offset;
74			uint32_t	refcnt_entries;
75			uint32_t	snapshot_count;
76			uint64_t	snapshot_offset;
77		} v2;
78	} u;
79};
80
81static u_int clstr_log2sz;
82
83static uint64_t
84round_clstr(uint64_t ofs)
85{
86	uint64_t clstrsz;
87
88	clstrsz = 1UL << clstr_log2sz;
89	return ((ofs + clstrsz - 1) & ~(clstrsz - 1));
90}
91
92static int
93qcow_resize(lba_t imgsz, u_int version)
94{
95	uint64_t imagesz;
96
97	switch (version) {
98	case QCOW_VERSION_1:
99		clstr_log2sz = QCOW1_CLSTR_LOG2SZ;
100		break;
101	case QCOW_VERSION_2:
102		clstr_log2sz = QCOW2_CLSTR_LOG2SZ;
103		break;
104	default:
105		return (EDOOFUS);
106	}
107
108	imagesz = round_clstr(imgsz * secsz);
109
110	if (verbose)
111		fprintf(stderr, "QCOW: image size = %ju, cluster size = %u\n",
112		    (uintmax_t)imagesz, (u_int)(1U << clstr_log2sz));
113
114	return (image_set_size(imagesz / secsz));
115}
116
117static int
118qcow1_resize(lba_t imgsz)
119{
120
121	return (qcow_resize(imgsz, QCOW_VERSION_1));
122}
123
124static int
125qcow2_resize(lba_t imgsz)
126{
127
128	return (qcow_resize(imgsz, QCOW_VERSION_2));
129}
130
131static int
132qcow_write(int fd, u_int version)
133{
134	struct qcow_header *hdr;
135	uint64_t *l1tbl, *l2tbl, *rctbl;
136	uint16_t *rcblk;
137	uint64_t clstr_imgsz, clstr_l2tbls, clstr_l1tblsz;
138	uint64_t clstr_rcblks, clstr_rctblsz;
139	uint64_t n, imagesz, nclstrs, ofs, ofsflags;
140	lba_t blk, blkofs, blk_imgsz;
141	u_int l1clno, l2clno, rcclno;
142	u_int blk_clstrsz;
143	u_int clstrsz, l1idx, l2idx;
144	int error;
145
146	if (clstr_log2sz == 0)
147		return (EDOOFUS);
148
149	clstrsz = 1U << clstr_log2sz;
150	blk_clstrsz = clstrsz / secsz;
151	blk_imgsz = image_get_size();
152	imagesz = blk_imgsz * secsz;
153	clstr_imgsz = imagesz >> clstr_log2sz;
154	clstr_l2tbls = round_clstr(clstr_imgsz * 8) >> clstr_log2sz;
155	clstr_l1tblsz = round_clstr(clstr_l2tbls * 8) >> clstr_log2sz;
156	nclstrs = clstr_imgsz + clstr_l2tbls + clstr_l1tblsz + 1;
157	clstr_rcblks = clstr_rctblsz = 0;
158	do {
159		n = clstr_rcblks + clstr_rctblsz;
160		clstr_rcblks = round_clstr((nclstrs + n) * 2) >> clstr_log2sz;
161		clstr_rctblsz = round_clstr(clstr_rcblks * 8) >> clstr_log2sz;
162	} while (n < (clstr_rcblks + clstr_rctblsz));
163
164	/*
165	 * We got all the sizes in clusters. Start the layout.
166	 * 0 - header
167	 * 1 - L1 table
168	 * 2 - RC table (v2 only)
169	 * 3 - L2 tables
170	 * 4 - RC block (v2 only)
171	 * 5 - data
172	 */
173
174	l1clno = 1;
175	rcclno = 0;
176	rctbl = l2tbl = l1tbl = NULL;
177	rcblk = NULL;
178
179	hdr = calloc(1, clstrsz);
180	if (hdr == NULL)
181		return (errno);
182
183	be32enc(&hdr->magic, QCOW_MAGIC);
184	be32enc(&hdr->version, version);
185	be64enc(&hdr->disk_size, imagesz);
186	switch (version) {
187	case QCOW_VERSION_1:
188		ofsflags = 0;
189		l2clno = l1clno + clstr_l1tblsz;
190		hdr->u.v1.clstr_log2sz = clstr_log2sz;
191		hdr->u.v1.l2_log2sz = clstr_log2sz - 3;
192		be64enc(&hdr->u.v1.l1_offset, clstrsz * l1clno);
193		break;
194	case QCOW_VERSION_2:
195		ofsflags = QCOW_CLSTR_COPIED;
196		rcclno = l1clno + clstr_l1tblsz;
197		l2clno = rcclno + clstr_rctblsz;
198		be32enc(&hdr->clstr_log2sz, clstr_log2sz);
199		be32enc(&hdr->u.v2.l1_entries, clstr_l2tbls);
200		be64enc(&hdr->u.v2.l1_offset, clstrsz * l1clno);
201		be64enc(&hdr->u.v2.refcnt_offset, clstrsz * rcclno);
202		be32enc(&hdr->u.v2.refcnt_entries, clstr_rcblks);
203		break;
204	default:
205		return (EDOOFUS);
206	}
207
208	if (sparse_write(fd, hdr, clstrsz) < 0) {
209                error = errno;
210		goto out;
211	}
212
213	free(hdr);
214	hdr = NULL;
215
216	ofs = clstrsz * l2clno;
217	nclstrs = 1 + clstr_l1tblsz + clstr_rctblsz;
218
219	l1tbl = calloc(1, clstrsz * clstr_l1tblsz);
220	if (l1tbl == NULL) {
221		error = ENOMEM;
222		goto out;
223	}
224
225	for (n = 0; n < clstr_imgsz; n++) {
226		blk = n * blk_clstrsz;
227		if (image_data(blk, blk_clstrsz)) {
228			nclstrs++;
229			l1idx = n >> (clstr_log2sz - 3);
230			if (l1tbl[l1idx] == 0) {
231				be64enc(l1tbl + l1idx, ofs + ofsflags);
232				ofs += clstrsz;
233				nclstrs++;
234			}
235		}
236	}
237
238	if (sparse_write(fd, l1tbl, clstrsz * clstr_l1tblsz) < 0) {
239		error = errno;
240		goto out;
241	}
242
243	clstr_rcblks = 0;
244	do {
245		n = clstr_rcblks;
246		clstr_rcblks = round_clstr((nclstrs + n) * 2) >> clstr_log2sz;
247	} while (n < clstr_rcblks);
248
249	if (rcclno > 0) {
250		rctbl = calloc(1, clstrsz * clstr_rctblsz);
251		if (rctbl == NULL) {
252			error = ENOMEM;
253			goto out;
254		}
255		for (n = 0; n < clstr_rcblks; n++) {
256			be64enc(rctbl + n, ofs);
257			ofs += clstrsz;
258			nclstrs++;
259		}
260		if (sparse_write(fd, rctbl, clstrsz * clstr_rctblsz) < 0) {
261			error = errno;
262			goto out;
263		}
264		free(rctbl);
265		rctbl = NULL;
266	}
267
268	l2tbl = malloc(clstrsz);
269	if (l2tbl == NULL) {
270		error = ENOMEM;
271		goto out;
272	}
273
274	for (l1idx = 0; l1idx < clstr_l2tbls; l1idx++) {
275		if (l1tbl[l1idx] == 0)
276			continue;
277		memset(l2tbl, 0, clstrsz);
278		blkofs = (lba_t)l1idx * blk_clstrsz * (clstrsz >> 3);
279		for (l2idx = 0; l2idx < (clstrsz >> 3); l2idx++) {
280			blk = blkofs + (lba_t)l2idx * blk_clstrsz;
281			if (blk >= blk_imgsz)
282				break;
283			if (image_data(blk, blk_clstrsz)) {
284				be64enc(l2tbl + l2idx, ofs + ofsflags);
285				ofs += clstrsz;
286			}
287		}
288		if (sparse_write(fd, l2tbl, clstrsz) < 0) {
289			error = errno;
290			goto out;
291		}
292	}
293
294	free(l2tbl);
295	l2tbl = NULL;
296	free(l1tbl);
297	l1tbl = NULL;
298
299	if (rcclno > 0) {
300		rcblk = calloc(1, clstrsz * clstr_rcblks);
301		if (rcblk == NULL) {
302			error = ENOMEM;
303			goto out;
304		}
305		for (n = 0; n < nclstrs; n++)
306			be16enc(rcblk + n, 1);
307		if (sparse_write(fd, rcblk, clstrsz * clstr_rcblks) < 0) {
308			error = errno;
309			goto out;
310		}
311		free(rcblk);
312		rcblk = NULL;
313	}
314
315	error = 0;
316	for (n = 0; n < clstr_imgsz; n++) {
317		blk = n * blk_clstrsz;
318		if (image_data(blk, blk_clstrsz)) {
319			error = image_copyout_region(fd, blk, blk_clstrsz);
320			if (error)
321				break;
322		}
323	}
324	if (!error)
325		error = image_copyout_done(fd);
326
327 out:
328	if (rcblk != NULL)
329		free(rcblk);
330	if (l2tbl != NULL)
331		free(l2tbl);
332	if (rctbl != NULL)
333		free(rctbl);
334	if (l1tbl != NULL)
335		free(l1tbl);
336	if (hdr != NULL)
337		free(hdr);
338	return (error);
339}
340
341static int
342qcow1_write(int fd)
343{
344
345	return (qcow_write(fd, QCOW_VERSION_1));
346}
347
348static int
349qcow2_write(int fd)
350{
351
352	return (qcow_write(fd, QCOW_VERSION_2));
353}
354
355static struct mkimg_format qcow1_format = {
356	.name = "qcow",
357	.description = "QEMU Copy-On-Write, version 1",
358	.resize = qcow1_resize,
359	.write = qcow1_write,
360};
361FORMAT_DEFINE(qcow1_format);
362
363static struct mkimg_format qcow2_format = {
364	.name = "qcow2",
365	.description = "QEMU Copy-On-Write, version 2",
366	.resize = qcow2_resize,
367	.write = qcow2_write,
368};
369FORMAT_DEFINE(qcow2_format);
370