g_journal.h revision 299497
1/*-
2 * Copyright (c) 2005-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: stable/10/sys/geom/journal/g_journal.h 299497 2016-05-12 00:45:57Z pfg $
27 */
28
29#ifndef	_G_JOURNAL_H_
30#define	_G_JOURNAL_H_
31
32#include <sys/endian.h>
33#include <sys/md5.h>
34#ifdef _KERNEL
35#include <sys/bio.h>
36#endif
37
38#define	G_JOURNAL_CLASS_NAME	"JOURNAL"
39
40#define	G_JOURNAL_MAGIC		"GEOM::JOURNAL"
41/*
42 * Version history:
43 * 0 - Initial version number.
44 */
45#define	G_JOURNAL_VERSION	0
46
47#ifdef _KERNEL
48extern int g_journal_debug;
49
50#define	GJ_DEBUG(lvl, ...)	do {					\
51	if (g_journal_debug >= (lvl)) {					\
52		printf("GEOM_JOURNAL");					\
53		if (g_journal_debug > 0)				\
54			printf("[%u]", lvl);				\
55		printf(": ");						\
56		printf(__VA_ARGS__);					\
57		printf("\n");						\
58	}								\
59} while (0)
60#define	GJ_LOGREQ(lvl, bp, ...)	do {					\
61	if (g_journal_debug >= (lvl)) {					\
62		printf("GEOM_JOURNAL");					\
63		if (g_journal_debug > 0)				\
64			printf("[%u]", lvl);				\
65		printf(": ");						\
66		printf(__VA_ARGS__);					\
67		printf(" ");						\
68		g_print_bio(bp);					\
69		printf("\n");						\
70	}								\
71} while (0)
72
73#define	JEMPTY(sc)	((sc)->sc_journal_offset -			\
74			 (sc)->sc_jprovider->sectorsize ==		\
75			 (sc)->sc_active.jj_offset &&			\
76			 (sc)->sc_current_count == 0)
77
78#define	GJ_BIO_REGULAR		0x00
79#define	GJ_BIO_READ		0x01
80#define	GJ_BIO_JOURNAL		0x02
81#define	GJ_BIO_COPY		0x03
82#define	GJ_BIO_MASK		0x0f
83
84#if 0
85#define	GJF_BIO_DONT_FREE	0x10
86#define	GJF_BIO_MASK		0xf0
87#endif
88
89#define	GJF_DEVICE_HARDCODED		0x0001
90#define	GJF_DEVICE_DESTROY		0x0010
91#define	GJF_DEVICE_SWITCH		0x0020
92#define	GJF_DEVICE_BEFORE_SWITCH	0x0040
93#define	GJF_DEVICE_CLEAN		0x0080
94#define	GJF_DEVICE_CHECKSUM		0x0100
95
96#define	GJ_HARD_LIMIT		64
97
98/*
99 * We keep pointers to journaled data in bio structure and because we
100 * need to store two off_t values (offset in data provider and offset in
101 * journal), we have to borrow bio_completed field for this.
102 */
103#define	bio_joffset	bio_completed
104/*
105 * Use bio_caller1 field as a pointer in queue.
106 */
107#define	bio_next	bio_caller1
108
109/*
110 * There are two such structures maintained inside each journaled device.
111 * One describes active part of the journal, were recent requests are stored.
112 * The second describes the last consistent part of the journal with requests
113 * that are copied to the destination provider.
114 */
115struct g_journal_journal {
116	struct bio	*jj_queue;	/* Cached journal entries. */
117	off_t		 jj_offset;	/* Journal's start offset. */
118};
119
120struct g_journal_softc {
121	uint32_t	 sc_id;
122	uint8_t		 sc_type;
123	uint8_t		 sc_orig_type;
124	struct g_geom	*sc_geom;
125	u_int		 sc_flags;
126	struct mtx	 sc_mtx;
127	off_t		 sc_mediasize;
128	u_int		 sc_sectorsize;
129#define	GJ_FLUSH_DATA		0x01
130#define	GJ_FLUSH_JOURNAL	0x02
131	u_int		 sc_bio_flush;
132
133	uint32_t	 sc_journal_id;
134	uint32_t	 sc_journal_next_id;
135	int		 sc_journal_copying;
136	off_t		 sc_journal_offset;
137	off_t		 sc_journal_previous_id;
138
139	struct bio_queue_head sc_back_queue;
140	struct bio_queue_head sc_regular_queue;
141
142	struct bio_queue_head sc_delayed_queue;
143	int		 sc_delayed_count;
144
145	struct bio	*sc_current_queue;
146	int		 sc_current_count;
147
148	struct bio	*sc_flush_queue;
149	int		 sc_flush_count;
150	int		 sc_flush_in_progress;
151
152	struct bio	*sc_copy_queue;
153	int		 sc_copy_in_progress;
154
155	struct g_consumer *sc_dconsumer;
156	struct g_consumer *sc_jconsumer;
157
158	struct g_journal_journal sc_inactive;
159	struct g_journal_journal sc_active;
160
161	off_t		 sc_jstart;	/* Journal space start offset. */
162	off_t		 sc_jend;	/* Journal space end offset. */
163
164	struct callout	 sc_callout;
165	struct proc	*sc_worker;
166
167	struct root_hold_token *sc_rootmount;
168};
169#define	sc_dprovider	sc_dconsumer->provider
170#define	sc_jprovider	sc_jconsumer->provider
171#define	sc_name		sc_dprovider->name
172
173#define	GJQ_INSERT_HEAD(head, bp)	do {				\
174	(bp)->bio_next = (head);					\
175	(head) = (bp);							\
176} while (0)
177#define	GJQ_INSERT_AFTER(head, bp, pbp)	do {				\
178	if ((pbp) == NULL)						\
179		GJQ_INSERT_HEAD(head, bp);				\
180	else {								\
181		(bp)->bio_next = (pbp)->bio_next;			\
182		(pbp)->bio_next = (bp);					\
183	}								\
184} while (0)
185#define	GJQ_FIRST(head)	(head)
186#define	GJQ_REMOVE(head, bp)	do {					\
187	struct bio *_bp;						\
188									\
189	if ((head) == (bp)) {						\
190		(head) = (bp)->bio_next;				\
191		(bp)->bio_next = NULL;					\
192		break;							\
193	}								\
194	for (_bp = (head); _bp->bio_next != NULL; _bp = _bp->bio_next) {\
195		if (_bp->bio_next == (bp))				\
196			break;						\
197	}								\
198	KASSERT(_bp->bio_next != NULL, ("NULL bio_next"));		\
199	KASSERT(_bp->bio_next == (bp), ("bio_next != bp"));		\
200	_bp->bio_next = (bp)->bio_next;					\
201	(bp)->bio_next = NULL;						\
202} while (0)
203#define GJQ_FOREACH(head, bp)						\
204	for ((bp) = (head); (bp) != NULL; (bp) = (bp)->bio_next)
205
206#define	GJ_HEADER_MAGIC	"GJHDR"
207
208struct g_journal_header {
209	char		jh_magic[sizeof(GJ_HEADER_MAGIC)];
210	uint32_t	jh_journal_id;
211	uint32_t	jh_journal_next_id;
212} __packed;
213
214struct g_journal_entry {
215	uint64_t	je_joffset;
216	uint64_t	je_offset;
217	uint64_t	je_length;
218} __packed;
219
220#define	GJ_RECORD_HEADER_MAGIC		"GJRHDR"
221#define	GJ_RECORD_HEADER_NENTRIES	(20)
222#define	GJ_RECORD_MAX_SIZE(sc)	\
223	((sc)->sc_jprovider->sectorsize + GJ_RECORD_HEADER_NENTRIES * MAXPHYS)
224#define	GJ_VALIDATE_OFFSET(offset, sc)	do {				\
225	if ((offset) + GJ_RECORD_MAX_SIZE(sc) >= (sc)->sc_jend) {	\
226		(offset) = (sc)->sc_jstart;				\
227		GJ_DEBUG(2, "Starting from the beginning (%s).",		\
228		    (sc)->sc_name);					\
229	}								\
230} while (0)
231
232struct g_journal_record_header {
233	char		jrh_magic[sizeof(GJ_RECORD_HEADER_MAGIC)];
234	uint32_t	jrh_journal_id;
235	uint16_t	jrh_nentries;
236	u_char		jrh_sum[8];
237	struct g_journal_entry jrh_entries[GJ_RECORD_HEADER_NENTRIES];
238} __packed;
239
240typedef int (g_journal_clean_t)(struct mount *mp);
241typedef void (g_journal_dirty_t)(struct g_consumer *cp);
242
243struct g_journal_desc {
244	const char		*jd_fstype;
245	g_journal_clean_t	*jd_clean;
246	g_journal_dirty_t	*jd_dirty;
247};
248
249/* Supported file systems. */
250extern const struct g_journal_desc g_journal_ufs;
251
252#define	GJ_TIMER_START(lvl, bt)	do {					\
253	if (g_journal_debug >= (lvl))					\
254		binuptime(bt);						\
255} while (0)
256#define	GJ_TIMER_STOP(lvl, bt, ...)	do {				\
257	if (g_journal_debug >= (lvl)) {					\
258		struct bintime _bt2;					\
259		struct timeval _tv;					\
260									\
261		binuptime(&_bt2);					\
262		bintime_sub(&_bt2, bt);					\
263		bintime2timeval(&_bt2, &_tv);				\
264		printf("GEOM_JOURNAL");					\
265		if (g_journal_debug > 0)				\
266			printf("[%u]", lvl);				\
267		printf(": ");						\
268		printf(__VA_ARGS__);					\
269		printf(": %jd.%06jds\n", (intmax_t)_tv.tv_sec,		\
270		    (intmax_t)_tv.tv_usec);				\
271	}								\
272} while (0)
273#endif	/* _KERNEL */
274
275#define	GJ_TYPE_DATA		0x01
276#define	GJ_TYPE_JOURNAL		0x02
277#define	GJ_TYPE_COMPLETE	(GJ_TYPE_DATA|GJ_TYPE_JOURNAL)
278
279#define	GJ_FLAG_CLEAN		0x01
280#define	GJ_FLAG_CHECKSUM	0x02
281
282struct g_journal_metadata {
283	char		md_magic[16];	/* Magic value. */
284	uint32_t	md_version;	/* Version number. */
285	uint32_t	md_id;		/* Journal unique ID. */
286	uint8_t		md_type;	/* Provider type. */
287	uint64_t	md_jstart;	/* Journal space start offset. */
288	uint64_t	md_jend;	/* Journal space end offset. */
289	uint64_t	md_joffset;	/* Last known consistent journal offset. */
290	uint32_t	md_jid;		/* Last known consistent journal ID. */
291	uint64_t	md_flags;	/* Journal flags. */
292	char		md_provider[16]; /* Hardcoded provider. */
293	uint64_t	md_provsize;	/* Provider's size. */
294	u_char		md_hash[16];	/* MD5 hash. */
295};
296static __inline void
297journal_metadata_encode(struct g_journal_metadata *md, u_char *data)
298{
299	MD5_CTX ctx;
300
301	bcopy(md->md_magic, data, 16);
302	le32enc(data + 16, md->md_version);
303	le32enc(data + 20, md->md_id);
304	*(data + 24) = md->md_type;
305	le64enc(data + 25, md->md_jstart);
306	le64enc(data + 33, md->md_jend);
307	le64enc(data + 41, md->md_joffset);
308	le32enc(data + 49, md->md_jid);
309	le64enc(data + 53, md->md_flags);
310	bcopy(md->md_provider, data + 61, 16);
311	le64enc(data + 77, md->md_provsize);
312	MD5Init(&ctx);
313	MD5Update(&ctx, data, 85);
314	MD5Final(md->md_hash, &ctx);
315	bcopy(md->md_hash, data + 85, 16);
316}
317static __inline int
318journal_metadata_decode_v0(const u_char *data, struct g_journal_metadata *md)
319{
320	MD5_CTX ctx;
321
322	md->md_id = le32dec(data + 20);
323	md->md_type = *(data + 24);
324	md->md_jstart = le64dec(data + 25);
325	md->md_jend = le64dec(data + 33);
326	md->md_joffset = le64dec(data + 41);
327	md->md_jid = le32dec(data + 49);
328	md->md_flags = le64dec(data + 53);
329	bcopy(data + 61, md->md_provider, 16);
330	md->md_provsize = le64dec(data + 77);
331	MD5Init(&ctx);
332	MD5Update(&ctx, data, 85);
333	MD5Final(md->md_hash, &ctx);
334	if (bcmp(md->md_hash, data + 85, 16) != 0)
335		return (EINVAL);
336	return (0);
337}
338static __inline int
339journal_metadata_decode(const u_char *data, struct g_journal_metadata *md)
340{
341	int error;
342
343	bcopy(data, md->md_magic, 16);
344	md->md_version = le32dec(data + 16);
345	switch (md->md_version) {
346	case 0:
347		error = journal_metadata_decode_v0(data, md);
348		break;
349	default:
350		error = EINVAL;
351		break;
352	}
353	return (error);
354}
355
356static __inline void
357journal_metadata_dump(const struct g_journal_metadata *md)
358{
359	static const char hex[] = "0123456789abcdef";
360	char hash[16 * 2 + 1];
361	u_int i;
362
363	printf("     magic: %s\n", md->md_magic);
364	printf("   version: %u\n", (u_int)md->md_version);
365	printf("        id: %u\n", (u_int)md->md_id);
366	printf("      type: %u\n", (u_int)md->md_type);
367	printf("     start: %ju\n", (uintmax_t)md->md_jstart);
368	printf("       end: %ju\n", (uintmax_t)md->md_jend);
369	printf("   joffset: %ju\n", (uintmax_t)md->md_joffset);
370	printf("       jid: %u\n", (u_int)md->md_jid);
371	printf("     flags: %u\n", (u_int)md->md_flags);
372	printf("hcprovider: %s\n", md->md_provider);
373	printf("  provsize: %ju\n", (uintmax_t)md->md_provsize);
374	bzero(hash, sizeof(hash));
375	for (i = 0; i < 16; i++) {
376		hash[i * 2] = hex[md->md_hash[i] >> 4];
377		hash[i * 2 + 1] = hex[md->md_hash[i] & 0x0f];
378	}
379	printf("  MD5 hash: %s\n", hash);
380}
381#endif	/* !_G_JOURNAL_H_ */
382