qcow.c revision 272773
1228753Smm/*-
2228753Smm * Copyright (c) 2014 Marcel Moolenaar
3228753Smm * All rights reserved.
4228753Smm *
5228753Smm * Redistribution and use in source and binary forms, with or without
6228753Smm * modification, are permitted provided that the following conditions
7228753Smm * are met:
8228753Smm * 1. Redistributions of source code must retain the above copyright
9228753Smm *    notice, this list of conditions and the following disclaimer.
10228753Smm * 2. Redistributions in binary form must reproduce the above copyright
11228753Smm *    notice, this list of conditions and the following disclaimer in the
12228753Smm *    documentation and/or other materials provided with the distribution.
13228753Smm *
14228753Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15228753Smm * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16228753Smm * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17228753Smm * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18228753Smm * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19228753Smm * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20228753Smm * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21228753Smm * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22228753Smm * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23228753Smm * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24228753Smm * SUCH DAMAGE.
25228753Smm */
26228753Smm
27228753Smm#include <sys/cdefs.h>
28228753Smm__FBSDID("$FreeBSD: stable/10/usr.bin/mkimg/qcow.c 272773 2014-10-08 22:01:35Z marcel $");
29228753Smm
30228753Smm#include <sys/types.h>
31228753Smm#include <sys/endian.h>
32228753Smm#include <sys/errno.h>
33228753Smm#include <stdint.h>
34228753Smm#include <stdio.h>
35228753Smm#include <stdlib.h>
36228753Smm#include <string.h>
37228753Smm#include <unistd.h>
38228753Smm
39228753Smm#include "image.h"
40228753Smm#include "format.h"
41228753Smm#include "mkimg.h"
42228753Smm
43228753Smm/* Default cluster sizes. */
44228753Smm#define	QCOW1_CLSTR_LOG2SZ	12	/* 4KB */
45228753Smm#define	QCOW2_CLSTR_LOG2SZ	16	/* 64KB */
46228753Smm
47228753Smm/* Flag bits in cluster offsets */
48228753Smm#define	QCOW_CLSTR_COMPRESSED	(1ULL << 62)
49228753Smm#define	QCOW_CLSTR_COPIED	(1ULL << 63)
50228753Smm
51228753Smmstruct qcow_header {
52228753Smm	uint32_t	magic;
53228753Smm#define	QCOW_MAGIC		0x514649fb
54232153Smm	uint32_t	version;
55232153Smm#define	QCOW_VERSION_1		1
56232153Smm#define	QCOW_VERSION_2		2
57232153Smm	uint64_t	path_offset;
58232153Smm	uint32_t	path_length;
59232153Smm	uint32_t	clstr_log2sz;	/* v2 only */
60232153Smm	uint64_t	disk_size;
61232153Smm	union {
62232153Smm		struct {
63232153Smm			uint8_t		clstr_log2sz;
64228753Smm			uint8_t		l2_log2sz;
65228753Smm			uint16_t	_pad;
66228753Smm			uint32_t	encryption;
67228753Smm			uint64_t	l1_offset;
68302001Smm		} v1;
69302001Smm		struct {
70228753Smm			uint32_t	encryption;
71228753Smm			uint32_t	l1_entries;
72228753Smm			uint64_t	l1_offset;
73228753Smm			uint64_t	refcnt_offset;
74228753Smm			uint32_t	refcnt_entries;
75228753Smm			uint32_t	snapshot_count;
76228753Smm			uint64_t	snapshot_offset;
77228753Smm		} v2;
78228753Smm	} u;
79228753Smm};
80228753Smm
81228753Smmstatic u_int clstr_log2sz;
82228753Smm
83228753Smmstatic uint64_t
84228753Smmround_clstr(uint64_t ofs)
85228753Smm{
86228753Smm	uint64_t clstrsz;
87228753Smm
88348608Smm	clstrsz = 1UL << clstr_log2sz;
89228753Smm	return ((ofs + clstrsz - 1) & ~(clstrsz - 1));
90228753Smm}
91228753Smm
92228753Smmstatic int
93228753Smmqcow_resize(lba_t imgsz, u_int version)
94228753Smm{
95228753Smm	uint64_t imagesz;
96228753Smm
97228753Smm	switch (version) {
98228753Smm	case QCOW_VERSION_1:
99228753Smm		clstr_log2sz = QCOW1_CLSTR_LOG2SZ;
100302001Smm		break;
101228753Smm	case QCOW_VERSION_2:
102228753Smm		clstr_log2sz = QCOW2_CLSTR_LOG2SZ;
103228753Smm		break;
104228753Smm	default:
105228753Smm		return (EDOOFUS);
106228753Smm	}
107228753Smm
108228753Smm	imagesz = round_clstr(imgsz * secsz);
109228753Smm
110228753Smm	if (verbose)
111228753Smm		fprintf(stderr, "QCOW: image size = %ju, cluster size = %u\n",
112228753Smm		    (uintmax_t)imagesz, (u_int)(1U << clstr_log2sz));
113228753Smm
114228753Smm	return (image_set_size(imagesz / secsz));
115228753Smm}
116228753Smm
117228753Smmstatic int
118228753Smmqcow1_resize(lba_t imgsz)
119228753Smm{
120228753Smm
121228753Smm	return (qcow_resize(imgsz, QCOW_VERSION_1));
122228753Smm}
123228753Smm
124228753Smmstatic int
125228753Smmqcow2_resize(lba_t imgsz)
126228753Smm{
127228753Smm
128228753Smm	return (qcow_resize(imgsz, QCOW_VERSION_2));
129228753Smm}
130228753Smm
131228753Smmstatic int
132228753Smmqcow_write(int fd, u_int version)
133228753Smm{
134228753Smm	struct qcow_header *hdr;
135228753Smm	uint64_t *l1tbl, *l2tbl, *rctbl;
136228753Smm	uint16_t *rcblk;
137228753Smm	uint64_t clstr_imgsz, clstr_l2tbls, clstr_l1tblsz;
138228753Smm	uint64_t clstr_rcblks, clstr_rctblsz;
139228753Smm	uint64_t n, imagesz, nclstrs, ofs, ofsflags;
140228753Smm	lba_t blk, blkofs, blk_imgsz;
141228753Smm	u_int l1clno, l2clno, rcclno;
142228753Smm	u_int blk_clstrsz;
143228753Smm	u_int clstrsz, l1idx, l2idx;
144228753Smm	int error;
145228753Smm
146228753Smm	if (clstr_log2sz == 0)
147228753Smm		return (EDOOFUS);
148228753Smm
149228753Smm	clstrsz = 1U << clstr_log2sz;
150228753Smm	blk_clstrsz = clstrsz / secsz;
151228753Smm	blk_imgsz = image_get_size();
152228753Smm	imagesz = blk_imgsz * secsz;
153228753Smm	clstr_imgsz = imagesz >> clstr_log2sz;
154228753Smm	clstr_l2tbls = round_clstr(clstr_imgsz * 8) >> clstr_log2sz;
155228753Smm	clstr_l1tblsz = round_clstr(clstr_l2tbls * 8) >> clstr_log2sz;
156228753Smm	nclstrs = clstr_imgsz + clstr_l2tbls + clstr_l1tblsz + 1;
157228753Smm	clstr_rcblks = clstr_rctblsz = 0;
158228753Smm	do {
159228753Smm		n = clstr_rcblks + clstr_rctblsz;
160228753Smm		clstr_rcblks = round_clstr((nclstrs + n) * 2) >> clstr_log2sz;
161228753Smm		clstr_rctblsz = round_clstr(clstr_rcblks * 8) >> clstr_log2sz;
162228753Smm	} while (n < (clstr_rcblks + clstr_rctblsz));
163228753Smm
164228753Smm	/*
165228753Smm	 * We got all the sizes in clusters. Start the layout.
166228753Smm	 * 0 - header
167228753Smm	 * 1 - L1 table
168228753Smm	 * 2 - RC table (v2 only)
169228753Smm	 * 3 - L2 tables
170228753Smm	 * 4 - RC block (v2 only)
171228753Smm	 * 5 - data
172228753Smm	 */
173228753Smm
174228753Smm	l1clno = 1;
175228753Smm	rcclno = 0;
176228753Smm	rctbl = l2tbl = l1tbl = NULL;
177228753Smm	rcblk = NULL;
178228753Smm
179228753Smm	hdr = calloc(1, clstrsz);
180228753Smm	if (hdr == NULL)
181228753Smm		return (errno);
182228753Smm
183228753Smm	be32enc(&hdr->magic, QCOW_MAGIC);
184228753Smm	be32enc(&hdr->version, version);
185228753Smm	be64enc(&hdr->disk_size, imagesz);
186228753Smm	switch (version) {
187228753Smm	case QCOW_VERSION_1:
188228753Smm		ofsflags = 0;
189228753Smm		l2clno = l1clno + clstr_l1tblsz;
190232153Smm		hdr->u.v1.clstr_log2sz = clstr_log2sz;
191228753Smm		hdr->u.v1.l2_log2sz = clstr_log2sz - 3;
192228753Smm		be64enc(&hdr->u.v1.l1_offset, clstrsz * l1clno);
193228753Smm		break;
194232153Smm	case QCOW_VERSION_2:
195228753Smm		ofsflags = QCOW_CLSTR_COPIED;
196228753Smm		rcclno = l1clno + clstr_l1tblsz;
197228753Smm		l2clno = rcclno + clstr_rctblsz;
198232153Smm		be32enc(&hdr->clstr_log2sz, clstr_log2sz);
199232153Smm		be32enc(&hdr->u.v2.l1_entries, clstr_l2tbls);
200232153Smm		be64enc(&hdr->u.v2.l1_offset, clstrsz * l1clno);
201228753Smm		be64enc(&hdr->u.v2.refcnt_offset, clstrsz * rcclno);
202232153Smm		be32enc(&hdr->u.v2.refcnt_entries, clstr_rcblks);
203228753Smm		break;
204228753Smm	default:
205228753Smm		return (EDOOFUS);
206228753Smm	}
207228753Smm
208228753Smm	if (sparse_write(fd, hdr, clstrsz) < 0) {
209228753Smm                error = errno;
210228753Smm		goto out;
211228753Smm	}
212228753Smm
213228753Smm	free(hdr);
214228753Smm	hdr = NULL;
215228753Smm
216228753Smm	ofs = clstrsz * l2clno;
217228753Smm	nclstrs = 1 + clstr_l1tblsz + clstr_rctblsz;
218228753Smm
219228753Smm	l1tbl = calloc(1, clstrsz * clstr_l1tblsz);
220228753Smm	if (l1tbl == NULL) {
221228753Smm		error = ENOMEM;
222228753Smm		goto out;
223228753Smm	}
224228753Smm
225228753Smm	for (n = 0; n < clstr_imgsz; n++) {
226228753Smm		blk = n * blk_clstrsz;
227228753Smm		if (image_data(blk, blk_clstrsz)) {
228228753Smm			nclstrs++;
229228753Smm			l1idx = n >> (clstr_log2sz - 3);
230			if (l1tbl[l1idx] == 0) {
231				be64enc(l1tbl + l1idx, ofs + ofsflags);
232				ofs += clstrsz;
233				nclstrs++;
234			}
235		}
236	}
237
238	if (sparse_write(fd, l1tbl, clstrsz * clstr_l1tblsz) < 0) {
239		error = errno;
240		goto out;
241	}
242
243	clstr_rcblks = 0;
244	do {
245		n = clstr_rcblks;
246		clstr_rcblks = round_clstr((nclstrs + n) * 2) >> clstr_log2sz;
247	} while (n < clstr_rcblks);
248
249	if (rcclno > 0) {
250		rctbl = calloc(1, clstrsz * clstr_rctblsz);
251		if (rctbl == NULL) {
252			error = ENOMEM;
253			goto out;
254		}
255		for (n = 0; n < clstr_rcblks; n++) {
256			be64enc(rctbl + n, ofs);
257			ofs += clstrsz;
258			nclstrs++;
259		}
260		if (sparse_write(fd, rctbl, clstrsz * clstr_rctblsz) < 0) {
261			error = errno;
262			goto out;
263		}
264		free(rctbl);
265		rctbl = NULL;
266	}
267
268	l2tbl = malloc(clstrsz);
269	if (l2tbl == NULL) {
270		error = ENOMEM;
271		goto out;
272	}
273
274	for (l1idx = 0; l1idx < clstr_l2tbls; l1idx++) {
275		if (l1tbl[l1idx] == 0)
276			continue;
277		memset(l2tbl, 0, clstrsz);
278		blkofs = (lba_t)l1idx * blk_clstrsz * (clstrsz >> 3);
279		for (l2idx = 0; l2idx < (clstrsz >> 3); l2idx++) {
280			blk = blkofs + (lba_t)l2idx * blk_clstrsz;
281			if (blk >= blk_imgsz)
282				break;
283			if (image_data(blk, blk_clstrsz)) {
284				be64enc(l2tbl + l2idx, ofs + ofsflags);
285				ofs += clstrsz;
286			}
287		}
288		if (sparse_write(fd, l2tbl, clstrsz) < 0) {
289			error = errno;
290			goto out;
291		}
292	}
293
294	free(l2tbl);
295	l2tbl = NULL;
296	free(l1tbl);
297	l1tbl = NULL;
298
299	if (rcclno > 0) {
300		rcblk = calloc(1, clstrsz * clstr_rcblks);
301		if (rcblk == NULL) {
302			error = ENOMEM;
303			goto out;
304		}
305		for (n = 0; n < nclstrs; n++)
306			be16enc(rcblk + n, 1);
307		if (sparse_write(fd, rcblk, clstrsz * clstr_rcblks) < 0) {
308			error = errno;
309			goto out;
310		}
311		free(rcblk);
312		rcblk = NULL;
313	}
314
315	error = 0;
316	for (n = 0; n < clstr_imgsz; n++) {
317		blk = n * blk_clstrsz;
318		if (image_data(blk, blk_clstrsz)) {
319			error = image_copyout_region(fd, blk, blk_clstrsz);
320			if (error)
321				break;
322		}
323	}
324	if (!error)
325		error = image_copyout_done(fd);
326
327 out:
328	if (rcblk != NULL)
329		free(rcblk);
330	if (l2tbl != NULL)
331		free(l2tbl);
332	if (rctbl != NULL)
333		free(rctbl);
334	if (l1tbl != NULL)
335		free(l1tbl);
336	if (hdr != NULL)
337		free(hdr);
338	return (error);
339}
340
341static int
342qcow1_write(int fd)
343{
344
345	return (qcow_write(fd, QCOW_VERSION_1));
346}
347
348static int
349qcow2_write(int fd)
350{
351
352	return (qcow_write(fd, QCOW_VERSION_2));
353}
354
355static struct mkimg_format qcow1_format = {
356	.name = "qcow",
357	.description = "QEMU Copy-On-Write, version 1",
358	.resize = qcow1_resize,
359	.write = qcow1_write,
360};
361FORMAT_DEFINE(qcow1_format);
362
363static struct mkimg_format qcow2_format = {
364	.name = "qcow2",
365	.description = "QEMU Copy-On-Write, version 2",
366	.resize = qcow2_resize,
367	.write = qcow2_write,
368};
369FORMAT_DEFINE(qcow2_format);
370