1/*-
2 * Copyright (c) 2010-2012 Semihalf
3 * Copyright (c) 2008, 2009 Reinoud Zandijk
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *
26 * From: NetBSD: nilfs_vfsops.c,v 1.1 2009/07/18 16:31:42 reinoud Exp
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD$");
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/fcntl.h>
35#include <sys/kernel.h>
36#include <sys/lock.h>
37#include <sys/malloc.h>
38#include <sys/mount.h>
39#include <sys/namei.h>
40#include <sys/proc.h>
41#include <sys/priv.h>
42#include <sys/vnode.h>
43#include <sys/buf.h>
44#include <sys/sysctl.h>
45#include <sys/libkern.h>
46
47#include <geom/geom.h>
48#include <geom/geom_vfs.h>
49
50#include <machine/_inttypes.h>
51
52#include <fs/nandfs/nandfs_mount.h>
53#include <fs/nandfs/nandfs.h>
54#include <fs/nandfs/nandfs_subr.h>
55
56static MALLOC_DEFINE(M_NANDFSMNT, "nandfs_mount", "NANDFS mount structure");
57
58#define	NANDFS_SET_SYSTEMFILE(vp) {	\
59	(vp)->v_vflag |= VV_SYSTEM;	\
60	vref(vp);			\
61	vput(vp); }
62
63#define	NANDFS_UNSET_SYSTEMFILE(vp) {	\
64	VOP_LOCK(vp, LK_EXCLUSIVE);	\
65	MPASS(vp->v_bufobj.bo_dirty.bv_cnt == 0); \
66	(vp)->v_vflag &= ~VV_SYSTEM;	\
67	vgone(vp);			\
68	vput(vp); }
69
70/* Globals */
71struct _nandfs_devices nandfs_devices;
72
73/* Parameters */
74int nandfs_verbose = 0;
75
76static void
77nandfs_tunable_init(void *arg)
78{
79
80	TUNABLE_INT_FETCH("vfs.nandfs.verbose", &nandfs_verbose);
81}
82SYSINIT(nandfs_tunables, SI_SUB_VFS, SI_ORDER_ANY, nandfs_tunable_init, NULL);
83
84static SYSCTL_NODE(_vfs, OID_AUTO, nandfs, CTLFLAG_RD, 0, "NAND filesystem");
85static SYSCTL_NODE(_vfs_nandfs, OID_AUTO, mount, CTLFLAG_RD, 0,
86    "NANDFS mountpoints");
87SYSCTL_INT(_vfs_nandfs, OID_AUTO, verbose, CTLFLAG_RW, &nandfs_verbose, 0, "");
88
89#define NANDFS_CONSTR_INTERVAL	5
90int nandfs_sync_interval = NANDFS_CONSTR_INTERVAL; /* sync every 5 seconds */
91SYSCTL_UINT(_vfs_nandfs, OID_AUTO, sync_interval, CTLFLAG_RW,
92    &nandfs_sync_interval, 0, "");
93
94#define NANDFS_MAX_DIRTY_SEGS	5
95int nandfs_max_dirty_segs = NANDFS_MAX_DIRTY_SEGS; /* sync when 5 dirty seg */
96SYSCTL_UINT(_vfs_nandfs, OID_AUTO, max_dirty_segs, CTLFLAG_RW,
97    &nandfs_max_dirty_segs, 0, "");
98
99#define NANDFS_CPS_BETWEEN_SBLOCKS 5
100int nandfs_cps_between_sblocks = NANDFS_CPS_BETWEEN_SBLOCKS; /* write superblock every 5 checkpoints */
101SYSCTL_UINT(_vfs_nandfs, OID_AUTO, cps_between_sblocks, CTLFLAG_RW,
102    &nandfs_cps_between_sblocks, 0, "");
103
104#define NANDFS_CLEANER_ENABLE 1
105int nandfs_cleaner_enable = NANDFS_CLEANER_ENABLE;
106SYSCTL_UINT(_vfs_nandfs, OID_AUTO, cleaner_enable, CTLFLAG_RW,
107    &nandfs_cleaner_enable, 0, "");
108
109#define NANDFS_CLEANER_INTERVAL 5
110int nandfs_cleaner_interval = NANDFS_CLEANER_INTERVAL;
111SYSCTL_UINT(_vfs_nandfs, OID_AUTO, cleaner_interval, CTLFLAG_RW,
112    &nandfs_cleaner_interval, 0, "");
113
114#define NANDFS_CLEANER_SEGMENTS 5
115int nandfs_cleaner_segments = NANDFS_CLEANER_SEGMENTS;
116SYSCTL_UINT(_vfs_nandfs, OID_AUTO, cleaner_segments, CTLFLAG_RW,
117    &nandfs_cleaner_segments, 0, "");
118
119static int nandfs_mountfs(struct vnode *devvp, struct mount *mp);
120static vfs_mount_t	nandfs_mount;
121static vfs_root_t	nandfs_root;
122static vfs_statfs_t	nandfs_statfs;
123static vfs_unmount_t	nandfs_unmount;
124static vfs_vget_t	nandfs_vget;
125static vfs_sync_t	nandfs_sync;
126static const char *nandfs_opts[] = {
127	"snap", "from", "noatime", NULL
128};
129
130/* System nodes */
131static int
132nandfs_create_system_nodes(struct nandfs_device *nandfsdev)
133{
134	int error;
135
136	error = nandfs_get_node_raw(nandfsdev, NULL, NANDFS_DAT_INO,
137	    &nandfsdev->nd_super_root.sr_dat, &nandfsdev->nd_dat_node);
138	if (error)
139		goto errorout;
140
141	error = nandfs_get_node_raw(nandfsdev, NULL, NANDFS_CPFILE_INO,
142	    &nandfsdev->nd_super_root.sr_cpfile, &nandfsdev->nd_cp_node);
143	if (error)
144		goto errorout;
145
146	error = nandfs_get_node_raw(nandfsdev, NULL, NANDFS_SUFILE_INO,
147	    &nandfsdev->nd_super_root.sr_sufile, &nandfsdev->nd_su_node);
148	if (error)
149		goto errorout;
150
151	error = nandfs_get_node_raw(nandfsdev, NULL, NANDFS_GC_INO,
152	    NULL, &nandfsdev->nd_gc_node);
153	if (error)
154		goto errorout;
155
156	NANDFS_SET_SYSTEMFILE(NTOV(nandfsdev->nd_dat_node));
157	NANDFS_SET_SYSTEMFILE(NTOV(nandfsdev->nd_cp_node));
158	NANDFS_SET_SYSTEMFILE(NTOV(nandfsdev->nd_su_node));
159	NANDFS_SET_SYSTEMFILE(NTOV(nandfsdev->nd_gc_node));
160
161	DPRINTF(VOLUMES, ("System vnodes: dat: %p cp: %p su: %p\n",
162	    NTOV(nandfsdev->nd_dat_node), NTOV(nandfsdev->nd_cp_node),
163	    NTOV(nandfsdev->nd_su_node)));
164	return (0);
165
166errorout:
167	nandfs_dispose_node(&nandfsdev->nd_gc_node);
168	nandfs_dispose_node(&nandfsdev->nd_dat_node);
169	nandfs_dispose_node(&nandfsdev->nd_cp_node);
170	nandfs_dispose_node(&nandfsdev->nd_su_node);
171
172	return (error);
173}
174
175static void
176nandfs_release_system_nodes(struct nandfs_device *nandfsdev)
177{
178
179	if (!nandfsdev)
180		return;
181	if (nandfsdev->nd_refcnt > 0)
182		return;
183
184	if (nandfsdev->nd_gc_node)
185		NANDFS_UNSET_SYSTEMFILE(NTOV(nandfsdev->nd_gc_node));
186	if (nandfsdev->nd_dat_node)
187		NANDFS_UNSET_SYSTEMFILE(NTOV(nandfsdev->nd_dat_node));
188	if (nandfsdev->nd_cp_node)
189		NANDFS_UNSET_SYSTEMFILE(NTOV(nandfsdev->nd_cp_node));
190	if (nandfsdev->nd_su_node)
191		NANDFS_UNSET_SYSTEMFILE(NTOV(nandfsdev->nd_su_node));
192}
193
194static int
195nandfs_check_fsdata_crc(struct nandfs_fsdata *fsdata)
196{
197	uint32_t fsdata_crc, comp_crc;
198
199	if (fsdata->f_magic != NANDFS_FSDATA_MAGIC)
200		return (0);
201
202	/* Preserve CRC */
203	fsdata_crc = fsdata->f_sum;
204
205	/* Calculate */
206	fsdata->f_sum = (0);
207	comp_crc = crc32((uint8_t *)fsdata, fsdata->f_bytes);
208
209	/* Restore */
210	fsdata->f_sum = fsdata_crc;
211
212	/* Check CRC */
213	return (fsdata_crc == comp_crc);
214}
215
216static int
217nandfs_check_superblock_crc(struct nandfs_fsdata *fsdata,
218    struct nandfs_super_block *super)
219{
220	uint32_t super_crc, comp_crc;
221
222	/* Check super block magic */
223	if (super->s_magic != NANDFS_SUPER_MAGIC)
224		return (0);
225
226	/* Preserve CRC */
227	super_crc = super->s_sum;
228
229	/* Calculate */
230	super->s_sum = (0);
231	comp_crc = crc32((uint8_t *)super, fsdata->f_sbbytes);
232
233	/* Restore */
234	super->s_sum = super_crc;
235
236	/* Check CRC */
237	return (super_crc == comp_crc);
238}
239
240static void
241nandfs_calc_superblock_crc(struct nandfs_fsdata *fsdata,
242    struct nandfs_super_block *super)
243{
244	uint32_t comp_crc;
245
246	/* Calculate */
247	super->s_sum = 0;
248	comp_crc = crc32((uint8_t *)super, fsdata->f_sbbytes);
249
250	/* Restore */
251	super->s_sum = comp_crc;
252}
253
254static int
255nandfs_is_empty(u_char *area, int size)
256{
257	int i;
258
259	for (i = 0; i < size; i++)
260		if (area[i] != 0xff)
261			return (0);
262
263	return (1);
264}
265
266static __inline int
267nandfs_sblocks_in_esize(struct nandfs_device *fsdev)
268{
269
270	return ((fsdev->nd_erasesize - NANDFS_SBLOCK_OFFSET_BYTES) /
271	    sizeof(struct nandfs_super_block));
272}
273
274static __inline int
275nandfs_max_sblocks(struct nandfs_device *fsdev)
276{
277
278	return (NANDFS_NFSAREAS * nandfs_sblocks_in_esize(fsdev));
279}
280
281static __inline int
282nandfs_sblocks_in_block(struct nandfs_device *fsdev)
283{
284
285	return (fsdev->nd_devblocksize / sizeof(struct nandfs_super_block));
286}
287
288static __inline int
289nandfs_sblocks_in_first_block(struct nandfs_device *fsdev)
290{
291	int n;
292
293	n = nandfs_sblocks_in_block(fsdev) -
294	    NANDFS_SBLOCK_OFFSET_BYTES / sizeof(struct nandfs_super_block);
295	if (n < 0)
296		n = 0;
297
298	return (n);
299}
300
301static int
302nandfs_write_superblock_at(struct nandfs_device *fsdev,
303    struct nandfs_fsarea *fstp)
304{
305	struct nandfs_super_block *super, *supert;
306	struct buf *bp;
307	int sb_per_sector, sbs_in_fsd, read_block;
308	int index, pos, error;
309	off_t offset;
310
311	DPRINTF(SYNC, ("%s: last_used %d nandfs_sblocks_in_esize %d\n",
312	    __func__, fstp->last_used, nandfs_sblocks_in_esize(fsdev)));
313	if (fstp->last_used == nandfs_sblocks_in_esize(fsdev) - 1)
314		index = 0;
315	else
316		index = fstp->last_used + 1;
317
318	super = &fsdev->nd_super;
319	supert = NULL;
320
321	sb_per_sector = nandfs_sblocks_in_block(fsdev);
322	sbs_in_fsd = sizeof(struct nandfs_fsdata) /
323	    sizeof(struct nandfs_super_block);
324	index += sbs_in_fsd;
325	offset = fstp->offset;
326
327	DPRINTF(SYNC, ("%s: offset %#jx s_last_pseg %#jx s_last_cno %#jx "
328	    "s_last_seq %#jx wtime %jd index %d\n", __func__, offset,
329	    super->s_last_pseg, super->s_last_cno, super->s_last_seq,
330	    super->s_wtime, index));
331
332	read_block = btodb(offset + ((index / sb_per_sector) * sb_per_sector)
333	    * sizeof(struct nandfs_super_block));
334
335	DPRINTF(SYNC, ("%s: read_block %#x\n", __func__, read_block));
336
337	if (index == sbs_in_fsd) {
338		error = nandfs_erase(fsdev, offset, fsdev->nd_erasesize);
339		if (error)
340			return (error);
341
342		error = bread(fsdev->nd_devvp, btodb(offset),
343		    fsdev->nd_devblocksize, NOCRED, &bp);
344		if (error) {
345			printf("NANDFS: couldn't read initial data: %d\n",
346			    error);
347			brelse(bp);
348			return (error);
349		}
350		memcpy(bp->b_data, &fsdev->nd_fsdata, sizeof(fsdev->nd_fsdata));
351		/*
352		 * 0xff-out the rest. This bp could be cached, so potentially
353		 * b_data contains stale super blocks.
354		 *
355		 * We don't mind cached bp since most of the time we just add
356		 * super blocks to already 0xff-out b_data and don't need to
357		 * perform actual read.
358		 */
359		if (fsdev->nd_devblocksize > sizeof(fsdev->nd_fsdata))
360			memset(bp->b_data + sizeof(fsdev->nd_fsdata), 0xff,
361			    fsdev->nd_devblocksize - sizeof(fsdev->nd_fsdata));
362		error = bwrite(bp);
363		if (error) {
364			printf("NANDFS: cannot rewrite initial data at %jx\n",
365			    offset);
366			return (error);
367		}
368	}
369
370	error = bread(fsdev->nd_devvp, read_block, fsdev->nd_devblocksize,
371	    NOCRED, &bp);
372	if (error) {
373		brelse(bp);
374		return (error);
375	}
376
377	supert = (struct nandfs_super_block *)(bp->b_data);
378	pos = index % sb_per_sector;
379
380	DPRINTF(SYNC, ("%s: storing at %d\n", __func__, pos));
381	memcpy(&supert[pos], super, sizeof(struct nandfs_super_block));
382
383	/*
384	 * See comment above in code that performs erase.
385	 */
386	if (pos == 0)
387		memset(&supert[1], 0xff,
388		    (sb_per_sector - 1) * sizeof(struct nandfs_super_block));
389
390	error = bwrite(bp);
391	if (error) {
392		printf("NANDFS: cannot update superblock at %jx\n", offset);
393		return (error);
394	}
395
396	DPRINTF(SYNC, ("%s: fstp->last_used %d -> %d\n", __func__,
397	    fstp->last_used, index - sbs_in_fsd));
398	fstp->last_used = index - sbs_in_fsd;
399
400	return (0);
401}
402
403int
404nandfs_write_superblock(struct nandfs_device *fsdev)
405{
406	struct nandfs_super_block *super;
407	struct timespec ts;
408	int error;
409	int i, j;
410
411	vfs_timestamp(&ts);
412
413	super = &fsdev->nd_super;
414
415	super->s_last_pseg = fsdev->nd_last_pseg;
416	super->s_last_cno = fsdev->nd_last_cno;
417	super->s_last_seq = fsdev->nd_seg_sequence;
418	super->s_wtime = ts.tv_sec;
419
420	nandfs_calc_superblock_crc(&fsdev->nd_fsdata, super);
421
422	error = 0;
423	for (i = 0, j = fsdev->nd_last_fsarea; i < NANDFS_NFSAREAS;
424	    i++, j = (j + 1 % NANDFS_NFSAREAS)) {
425		if (fsdev->nd_fsarea[j].flags & NANDFS_FSSTOR_FAILED) {
426			DPRINTF(SYNC, ("%s: skipping %d\n", __func__, j));
427			continue;
428		}
429		error = nandfs_write_superblock_at(fsdev, &fsdev->nd_fsarea[j]);
430		if (error) {
431			printf("NANDFS: writing superblock at offset %d failed:"
432			    "%d\n", j * fsdev->nd_erasesize, error);
433			fsdev->nd_fsarea[j].flags |= NANDFS_FSSTOR_FAILED;
434		} else
435			break;
436	}
437
438	if (i == NANDFS_NFSAREAS) {
439		printf("NANDFS: superblock was not written\n");
440		/*
441		 * TODO: switch to read-only?
442		 */
443		return (error);
444	} else
445		fsdev->nd_last_fsarea = (j + 1) % NANDFS_NFSAREAS;
446
447	return (0);
448}
449
450static int
451nandfs_select_fsdata(struct nandfs_device *fsdev,
452    struct nandfs_fsdata *fsdatat, struct nandfs_fsdata **fsdata, int nfsds)
453{
454	int i;
455
456	*fsdata = NULL;
457	for (i = 0; i < nfsds; i++) {
458		DPRINTF(VOLUMES, ("%s: i %d f_magic %x f_crc %x\n", __func__,
459		    i, fsdatat[i].f_magic, fsdatat[i].f_sum));
460		if (!nandfs_check_fsdata_crc(&fsdatat[i]))
461			continue;
462		*fsdata = &fsdatat[i];
463		break;
464	}
465
466	return (*fsdata != NULL ? 0 : EINVAL);
467}
468
469static int
470nandfs_select_sb(struct nandfs_device *fsdev,
471    struct nandfs_super_block *supert, struct nandfs_super_block **super,
472    int nsbs)
473{
474	int i;
475
476	*super = NULL;
477	for (i = 0; i < nsbs; i++) {
478		if (!nandfs_check_superblock_crc(&fsdev->nd_fsdata, &supert[i]))
479			continue;
480		DPRINTF(SYNC, ("%s: i %d s_last_cno %jx s_magic %x "
481		    "s_wtime %jd\n", __func__, i, supert[i].s_last_cno,
482		    supert[i].s_magic, supert[i].s_wtime));
483		if (*super == NULL || supert[i].s_last_cno >
484		    (*super)->s_last_cno)
485			*super = &supert[i];
486	}
487
488	return (*super != NULL ? 0 : EINVAL);
489}
490
491static int
492nandfs_read_structures_at(struct nandfs_device *fsdev,
493    struct nandfs_fsarea *fstp, struct nandfs_fsdata *fsdata,
494    struct nandfs_super_block *super)
495{
496	struct nandfs_super_block *tsuper, *tsuperd;
497	struct buf *bp;
498	int error, read_size;
499	int i;
500	int offset;
501
502	offset = fstp->offset;
503
504	if (fsdev->nd_erasesize > MAXBSIZE)
505		read_size = MAXBSIZE;
506	else
507		read_size = fsdev->nd_erasesize;
508
509	error = bread(fsdev->nd_devvp, btodb(offset), read_size, NOCRED, &bp);
510	if (error) {
511		printf("couldn't read: %d\n", error);
512		brelse(bp);
513		fstp->flags |= NANDFS_FSSTOR_FAILED;
514		return (error);
515	}
516
517	tsuper = super;
518
519	memcpy(fsdata, bp->b_data, sizeof(struct nandfs_fsdata));
520	memcpy(tsuper, (bp->b_data + sizeof(struct nandfs_fsdata)),
521	    read_size - sizeof(struct nandfs_fsdata));
522	brelse(bp);
523
524	tsuper += (read_size - sizeof(struct nandfs_fsdata)) /
525	    sizeof(struct nandfs_super_block);
526
527	for (i = 1; i < fsdev->nd_erasesize / read_size; i++) {
528		error = bread(fsdev->nd_devvp, btodb(offset + i * read_size),
529		    read_size, NOCRED, &bp);
530		if (error) {
531			printf("couldn't read: %d\n", error);
532			brelse(bp);
533			fstp->flags |= NANDFS_FSSTOR_FAILED;
534			return (error);
535		}
536		memcpy(tsuper, bp->b_data, read_size);
537		tsuper += read_size / sizeof(struct nandfs_super_block);
538		brelse(bp);
539	}
540
541	tsuper -= 1;
542	fstp->last_used = nandfs_sblocks_in_esize(fsdev) - 1;
543	for (tsuperd = super - 1; (tsuper != tsuperd); tsuper -= 1) {
544		if (nandfs_is_empty((u_char *)tsuper, sizeof(*tsuper)))
545			fstp->last_used--;
546		else
547			break;
548	}
549
550	DPRINTF(VOLUMES, ("%s: last_used %d\n", __func__, fstp->last_used));
551
552	return (0);
553}
554
555static int
556nandfs_read_structures(struct nandfs_device *fsdev)
557{
558	struct nandfs_fsdata *fsdata, *fsdatat;
559	struct nandfs_super_block *sblocks, *ssblock;
560	int nsbs, nfsds, i;
561	int error = 0;
562	int nrsbs;
563
564	nfsds = NANDFS_NFSAREAS;
565	nsbs = nandfs_max_sblocks(fsdev);
566
567	fsdatat = malloc(sizeof(struct nandfs_fsdata) * nfsds, M_NANDFSTEMP,
568	    M_WAITOK | M_ZERO);
569	sblocks = malloc(sizeof(struct nandfs_super_block) * nsbs, M_NANDFSTEMP,
570	    M_WAITOK | M_ZERO);
571
572	nrsbs = 0;
573	for (i = 0; i < NANDFS_NFSAREAS; i++) {
574		fsdev->nd_fsarea[i].offset = i * fsdev->nd_erasesize;
575		error = nandfs_read_structures_at(fsdev, &fsdev->nd_fsarea[i],
576		    &fsdatat[i], sblocks + nrsbs);
577		if (error)
578			continue;
579		nrsbs += (fsdev->nd_fsarea[i].last_used + 1);
580		if (fsdev->nd_fsarea[fsdev->nd_last_fsarea].last_used >
581		    fsdev->nd_fsarea[i].last_used)
582			fsdev->nd_last_fsarea = i;
583	}
584
585	if (nrsbs == 0) {
586		printf("nandfs: no valid superblocks found\n");
587		error = EINVAL;
588		goto out;
589	}
590
591	error = nandfs_select_fsdata(fsdev, fsdatat, &fsdata, nfsds);
592	if (error)
593		goto out;
594	memcpy(&fsdev->nd_fsdata, fsdata, sizeof(struct nandfs_fsdata));
595
596	error = nandfs_select_sb(fsdev, sblocks, &ssblock, nsbs);
597	if (error)
598		goto out;
599
600	memcpy(&fsdev->nd_super, ssblock, sizeof(struct nandfs_super_block));
601out:
602	free(fsdatat, M_NANDFSTEMP);
603	free(sblocks, M_NANDFSTEMP);
604
605	if (error == 0)
606		DPRINTF(VOLUMES, ("%s: selected sb with w_time %jd "
607		    "last_pseg %#jx\n", __func__, fsdev->nd_super.s_wtime,
608		    fsdev->nd_super.s_last_pseg));
609
610	return (error);
611}
612
613static void
614nandfs_unmount_base(struct nandfs_device *nandfsdev)
615{
616	int error;
617
618	if (!nandfsdev)
619		return;
620
621	/* Remove all our information */
622	error = vinvalbuf(nandfsdev->nd_devvp, V_SAVE, 0, 0);
623	if (error) {
624		/*
625		 * Flushing buffers failed when fs was umounting, can't do
626		 * much now, just printf error and continue with umount.
627		 */
628		nandfs_error("%s(): error:%d when umounting FS\n",
629		    __func__, error);
630	}
631
632	/* Release the device's system nodes */
633	nandfs_release_system_nodes(nandfsdev);
634}
635
636static void
637nandfs_get_ncleanseg(struct nandfs_device *nandfsdev)
638{
639	struct nandfs_seg_stat nss;
640
641	nandfs_get_seg_stat(nandfsdev, &nss);
642	nandfsdev->nd_clean_segs = nss.nss_ncleansegs;
643	DPRINTF(VOLUMES, ("nandfs_mount: clean segs: %jx\n",
644	    (uintmax_t)nandfsdev->nd_clean_segs));
645}
646
647
648static int
649nandfs_mount_base(struct nandfs_device *nandfsdev, struct mount *mp,
650    struct nandfs_args *args)
651{
652	uint32_t log_blocksize;
653	int error;
654
655	/* Flush out any old buffers remaining from a previous use. */
656	if ((error = vinvalbuf(nandfsdev->nd_devvp, V_SAVE, 0, 0)))
657		return (error);
658
659	error = nandfs_read_structures(nandfsdev);
660	if (error) {
661		printf("nandfs: could not get valid filesystem structures\n");
662		return (error);
663	}
664
665	if (nandfsdev->nd_fsdata.f_rev_level != NANDFS_CURRENT_REV) {
666		printf("nandfs: unsupported file system revision: %d "
667		    "(supported is %d).\n", nandfsdev->nd_fsdata.f_rev_level,
668		    NANDFS_CURRENT_REV);
669		return (EINVAL);
670	}
671
672	if (nandfsdev->nd_fsdata.f_erasesize != nandfsdev->nd_erasesize) {
673		printf("nandfs: erasesize mismatch (device %#x, fs %#x)\n",
674		    nandfsdev->nd_erasesize, nandfsdev->nd_fsdata.f_erasesize);
675		return (EINVAL);
676	}
677
678	/* Get our blocksize */
679	log_blocksize = nandfsdev->nd_fsdata.f_log_block_size;
680	nandfsdev->nd_blocksize = (uint64_t) 1 << (log_blocksize + 10);
681	DPRINTF(VOLUMES, ("%s: blocksize:%x\n", __func__,
682	    nandfsdev->nd_blocksize));
683
684	DPRINTF(VOLUMES, ("%s: accepted super block with cp %#jx\n", __func__,
685	    (uintmax_t)nandfsdev->nd_super.s_last_cno));
686
687	/* Calculate dat structure parameters */
688	nandfs_calc_mdt_consts(nandfsdev, &nandfsdev->nd_dat_mdt,
689	    nandfsdev->nd_fsdata.f_dat_entry_size);
690	nandfs_calc_mdt_consts(nandfsdev, &nandfsdev->nd_ifile_mdt,
691	    nandfsdev->nd_fsdata.f_inode_size);
692
693	/* Search for the super root and roll forward when needed */
694	if (nandfs_search_super_root(nandfsdev)) {
695		printf("Cannot find valid SuperRoot\n");
696		return (EINVAL);
697	}
698
699	nandfsdev->nd_mount_state = nandfsdev->nd_super.s_state;
700	if (nandfsdev->nd_mount_state != NANDFS_VALID_FS) {
701		printf("FS is seriously damaged, needs repairing\n");
702		printf("aborting mount\n");
703		return (EINVAL);
704	}
705
706	/*
707	 * FS should be ok now. The superblock and the last segsum could be
708	 * updated from the repair so extract running values again.
709	 */
710	nandfsdev->nd_last_pseg = nandfsdev->nd_super.s_last_pseg;
711	nandfsdev->nd_seg_sequence = nandfsdev->nd_super.s_last_seq;
712	nandfsdev->nd_seg_num = nandfs_get_segnum_of_block(nandfsdev,
713	    nandfsdev->nd_last_pseg);
714	nandfsdev->nd_next_seg_num = nandfs_get_segnum_of_block(nandfsdev,
715	    nandfsdev->nd_last_segsum.ss_next);
716	nandfsdev->nd_ts.tv_sec = nandfsdev->nd_last_segsum.ss_create;
717	nandfsdev->nd_last_cno = nandfsdev->nd_super.s_last_cno;
718	nandfsdev->nd_fakevblk = 1;
719	nandfsdev->nd_last_ino  = NANDFS_USER_INO;
720	DPRINTF(VOLUMES, ("%s: last_pseg %#jx last_cno %#jx last_seq %#jx\n"
721	    "fsdev: last_seg: seq %#jx num %#jx, next_seg_num %#jx\n",
722	    __func__, (uintmax_t)nandfsdev->nd_last_pseg,
723	    (uintmax_t)nandfsdev->nd_last_cno,
724	    (uintmax_t)nandfsdev->nd_seg_sequence,
725	    (uintmax_t)nandfsdev->nd_seg_sequence,
726	    (uintmax_t)nandfsdev->nd_seg_num,
727	    (uintmax_t)nandfsdev->nd_next_seg_num));
728
729	DPRINTF(VOLUMES, ("nandfs_mount: accepted super root\n"));
730
731	/* Create system vnodes for DAT, CP and SEGSUM */
732	error = nandfs_create_system_nodes(nandfsdev);
733	if (error)
734		nandfs_unmount_base(nandfsdev);
735
736	nandfs_get_ncleanseg(nandfsdev);
737
738	return (error);
739}
740
741static void
742nandfs_unmount_device(struct nandfs_device *nandfsdev)
743{
744
745	/* Is there anything? */
746	if (nandfsdev == NULL)
747		return;
748
749	/* Remove the device only if we're the last reference */
750	nandfsdev->nd_refcnt--;
751	if (nandfsdev->nd_refcnt >= 1)
752		return;
753
754	MPASS(nandfsdev->nd_syncer == NULL);
755	MPASS(nandfsdev->nd_cleaner == NULL);
756	MPASS(nandfsdev->nd_free_base == NULL);
757
758	/* Unmount our base */
759	nandfs_unmount_base(nandfsdev);
760
761	/* Remove from our device list */
762	SLIST_REMOVE(&nandfs_devices, nandfsdev, nandfs_device, nd_next_device);
763
764	DROP_GIANT();
765	g_topology_lock();
766	g_vfs_close(nandfsdev->nd_gconsumer);
767	g_topology_unlock();
768	PICKUP_GIANT();
769
770	DPRINTF(VOLUMES, ("closing device\n"));
771
772	/* Clear our mount reference and release device node */
773	vrele(nandfsdev->nd_devvp);
774
775	dev_rel(nandfsdev->nd_devvp->v_rdev);
776
777	/* Free our device info */
778	cv_destroy(&nandfsdev->nd_sync_cv);
779	mtx_destroy(&nandfsdev->nd_sync_mtx);
780	cv_destroy(&nandfsdev->nd_clean_cv);
781	mtx_destroy(&nandfsdev->nd_clean_mtx);
782	mtx_destroy(&nandfsdev->nd_mutex);
783	lockdestroy(&nandfsdev->nd_seg_const);
784	free(nandfsdev, M_NANDFSMNT);
785}
786
787static int
788nandfs_check_mounts(struct nandfs_device *nandfsdev, struct mount *mp,
789    struct nandfs_args *args)
790{
791	struct nandfsmount *nmp;
792	uint64_t last_cno;
793
794	/* no double-mounting of the same checkpoint */
795	STAILQ_FOREACH(nmp, &nandfsdev->nd_mounts, nm_next_mount) {
796		if (nmp->nm_mount_args.cpno == args->cpno)
797			return (EBUSY);
798	}
799
800	/* Allow readonly mounts without questioning here */
801	if (mp->mnt_flag & MNT_RDONLY)
802		return (0);
803
804	/* Read/write mount */
805	STAILQ_FOREACH(nmp, &nandfsdev->nd_mounts, nm_next_mount) {
806		/* Only one RW mount on this device! */
807		if ((nmp->nm_vfs_mountp->mnt_flag & MNT_RDONLY)==0)
808			return (EROFS);
809		/* RDONLY on last mountpoint is device busy */
810		last_cno = nmp->nm_nandfsdev->nd_super.s_last_cno;
811		if (nmp->nm_mount_args.cpno == last_cno)
812			return (EBUSY);
813	}
814
815	/* OK for now */
816	return (0);
817}
818
819static int
820nandfs_mount_device(struct vnode *devvp, struct mount *mp,
821    struct nandfs_args *args, struct nandfs_device **nandfsdev_p)
822{
823	struct nandfs_device *nandfsdev;
824	struct g_provider *pp;
825	struct g_consumer *cp;
826	struct cdev *dev;
827	uint32_t erasesize;
828	int error, size;
829	int ronly;
830
831	DPRINTF(VOLUMES, ("Mounting NANDFS device\n"));
832
833	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
834
835	/* Look up device in our nandfs_mountpoints */
836	*nandfsdev_p = NULL;
837	SLIST_FOREACH(nandfsdev, &nandfs_devices, nd_next_device)
838		if (nandfsdev->nd_devvp == devvp)
839			break;
840
841	if (nandfsdev) {
842		DPRINTF(VOLUMES, ("device already mounted\n"));
843		error = nandfs_check_mounts(nandfsdev, mp, args);
844		if (error)
845			return error;
846		nandfsdev->nd_refcnt++;
847		*nandfsdev_p = nandfsdev;
848
849		if (!ronly) {
850			DROP_GIANT();
851			g_topology_lock();
852			error = g_access(nandfsdev->nd_gconsumer, 0, 1, 0);
853			g_topology_unlock();
854			PICKUP_GIANT();
855		}
856		return (error);
857	}
858
859	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
860	dev = devvp->v_rdev;
861	dev_ref(dev);
862	DROP_GIANT();
863	g_topology_lock();
864	error = g_vfs_open(devvp, &cp, "nandfs", ronly ? 0 : 1);
865	pp = g_dev_getprovider(dev);
866	g_topology_unlock();
867	PICKUP_GIANT();
868	VOP_UNLOCK(devvp, 0);
869	if (error) {
870		dev_rel(dev);
871		return (error);
872	}
873
874	nandfsdev = malloc(sizeof(struct nandfs_device), M_NANDFSMNT, M_WAITOK | M_ZERO);
875
876	/* Initialise */
877	nandfsdev->nd_refcnt = 1;
878	nandfsdev->nd_devvp = devvp;
879	nandfsdev->nd_syncing = 0;
880	nandfsdev->nd_cleaning = 0;
881	nandfsdev->nd_gconsumer = cp;
882	cv_init(&nandfsdev->nd_sync_cv, "nandfssync");
883	mtx_init(&nandfsdev->nd_sync_mtx, "nffssyncmtx", NULL, MTX_DEF);
884	cv_init(&nandfsdev->nd_clean_cv, "nandfsclean");
885	mtx_init(&nandfsdev->nd_clean_mtx, "nffscleanmtx", NULL, MTX_DEF);
886	mtx_init(&nandfsdev->nd_mutex, "nandfsdev lock", NULL, MTX_DEF);
887	lockinit(&nandfsdev->nd_seg_const, PVFS, "nffssegcon", VLKTIMEOUT,
888	    LK_CANRECURSE);
889	STAILQ_INIT(&nandfsdev->nd_mounts);
890
891	nandfsdev->nd_devsize = pp->mediasize;
892	nandfsdev->nd_devblocksize = pp->sectorsize;
893
894	size = sizeof(erasesize);
895	error = g_io_getattr("NAND::blocksize", nandfsdev->nd_gconsumer, &size,
896	    &erasesize);
897	if (error) {
898		DPRINTF(VOLUMES, ("couldn't get erasesize: %d\n", error));
899
900		if (error == ENOIOCTL || error == EOPNOTSUPP) {
901			/*
902			 * We conclude that this is not NAND storage
903			 */
904			nandfsdev->nd_erasesize = NANDFS_DEF_ERASESIZE;
905			nandfsdev->nd_is_nand = 0;
906		} else {
907			DROP_GIANT();
908			g_topology_lock();
909			g_vfs_close(nandfsdev->nd_gconsumer);
910			g_topology_unlock();
911			PICKUP_GIANT();
912			dev_rel(dev);
913			free(nandfsdev, M_NANDFSMNT);
914			return (error);
915		}
916	} else {
917		nandfsdev->nd_erasesize = erasesize;
918		nandfsdev->nd_is_nand = 1;
919	}
920
921	DPRINTF(VOLUMES, ("%s: erasesize %x\n", __func__,
922	    nandfsdev->nd_erasesize));
923
924	/* Register nandfs_device in list */
925	SLIST_INSERT_HEAD(&nandfs_devices, nandfsdev, nd_next_device);
926
927	error = nandfs_mount_base(nandfsdev, mp, args);
928	if (error) {
929		/* Remove all our information */
930		nandfs_unmount_device(nandfsdev);
931		return (EINVAL);
932	}
933
934	nandfsdev->nd_maxfilesize = nandfs_get_maxfilesize(nandfsdev);
935
936	*nandfsdev_p = nandfsdev;
937	DPRINTF(VOLUMES, ("NANDFS device mounted ok\n"));
938
939	return (0);
940}
941
942static int
943nandfs_mount_checkpoint(struct nandfsmount *nmp)
944{
945	struct nandfs_cpfile_header *cphdr;
946	struct nandfs_checkpoint *cp;
947	struct nandfs_inode ifile_inode;
948	struct nandfs_node *cp_node;
949	struct buf *bp;
950	uint64_t ncp, nsn, cpno, fcpno, blocknr, last_cno;
951	uint32_t off, dlen;
952	int cp_per_block, error;
953
954	cpno = nmp->nm_mount_args.cpno;
955	if (cpno == 0)
956		cpno = nmp->nm_nandfsdev->nd_super.s_last_cno;
957
958	DPRINTF(VOLUMES, ("%s: trying to mount checkpoint number %"PRIu64"\n",
959	    __func__, cpno));
960
961	cp_node = nmp->nm_nandfsdev->nd_cp_node;
962
963	VOP_LOCK(NTOV(cp_node), LK_SHARED);
964	/* Get cpfile header from 1st block of cp file */
965	error = nandfs_bread(cp_node, 0, NOCRED, 0, &bp);
966	if (error) {
967		brelse(bp);
968		VOP_UNLOCK(NTOV(cp_node), 0);
969		return (error);
970	}
971
972	cphdr = (struct nandfs_cpfile_header *) bp->b_data;
973	ncp = cphdr->ch_ncheckpoints;
974	nsn = cphdr->ch_nsnapshots;
975
976	brelse(bp);
977
978	DPRINTF(VOLUMES, ("mount_nandfs: checkpoint header read in\n"));
979	DPRINTF(VOLUMES, ("\tNumber of checkpoints %"PRIu64"\n", ncp));
980	DPRINTF(VOLUMES, ("\tNumber of snapshots %"PRIu64"\n", nsn));
981
982	/* Read in our specified checkpoint */
983	dlen = nmp->nm_nandfsdev->nd_fsdata.f_checkpoint_size;
984	cp_per_block = nmp->nm_nandfsdev->nd_blocksize / dlen;
985
986	fcpno = cpno + NANDFS_CPFILE_FIRST_CHECKPOINT_OFFSET - 1;
987	blocknr = fcpno / cp_per_block;
988	off = (fcpno % cp_per_block) * dlen;
989	error = nandfs_bread(cp_node, blocknr, NOCRED, 0, &bp);
990	if (error) {
991		brelse(bp);
992		VOP_UNLOCK(NTOV(cp_node), 0);
993		printf("mount_nandfs: couldn't read cp block %"PRIu64"\n",
994		    fcpno);
995		return (EINVAL);
996	}
997
998	/* Needs to be a valid checkpoint */
999	cp = (struct nandfs_checkpoint *) ((uint8_t *) bp->b_data + off);
1000	if (cp->cp_flags & NANDFS_CHECKPOINT_INVALID) {
1001		printf("mount_nandfs: checkpoint marked invalid\n");
1002		brelse(bp);
1003		VOP_UNLOCK(NTOV(cp_node), 0);
1004		return (EINVAL);
1005	}
1006
1007	/* Is this really the checkpoint we want? */
1008	if (cp->cp_cno != cpno) {
1009		printf("mount_nandfs: checkpoint file corrupt? "
1010		    "expected cpno %"PRIu64", found cpno %"PRIu64"\n",
1011		    cpno, cp->cp_cno);
1012		brelse(bp);
1013		VOP_UNLOCK(NTOV(cp_node), 0);
1014		return (EINVAL);
1015	}
1016
1017	/* Check if it's a snapshot ! */
1018	last_cno = nmp->nm_nandfsdev->nd_super.s_last_cno;
1019	if (cpno != last_cno) {
1020		/* Only allow snapshots if not mounting on the last cp */
1021		if ((cp->cp_flags & NANDFS_CHECKPOINT_SNAPSHOT) == 0) {
1022			printf( "mount_nandfs: checkpoint %"PRIu64" is not a "
1023			    "snapshot\n", cpno);
1024			brelse(bp);
1025			VOP_UNLOCK(NTOV(cp_node), 0);
1026			return (EINVAL);
1027		}
1028	}
1029
1030	ifile_inode = cp->cp_ifile_inode;
1031	brelse(bp);
1032
1033	/* Get ifile inode */
1034	error = nandfs_get_node_raw(nmp->nm_nandfsdev, NULL, NANDFS_IFILE_INO,
1035	    &ifile_inode, &nmp->nm_ifile_node);
1036	if (error) {
1037		printf("mount_nandfs: can't read ifile node\n");
1038		VOP_UNLOCK(NTOV(cp_node), 0);
1039		return (EINVAL);
1040	}
1041
1042	NANDFS_SET_SYSTEMFILE(NTOV(nmp->nm_ifile_node));
1043	VOP_UNLOCK(NTOV(cp_node), 0);
1044	/* Get root node? */
1045
1046	return (0);
1047}
1048
1049static void
1050free_nandfs_mountinfo(struct mount *mp)
1051{
1052	struct nandfsmount *nmp = VFSTONANDFS(mp);
1053
1054	if (nmp == NULL)
1055		return;
1056
1057	free(nmp, M_NANDFSMNT);
1058}
1059
1060void
1061nandfs_wakeup_wait_sync(struct nandfs_device *nffsdev, int reason)
1062{
1063	char *reasons[] = {
1064	    "umount",
1065	    "vfssync",
1066	    "bdflush",
1067	    "fforce",
1068	    "fsync",
1069	    "ro_upd"
1070	};
1071
1072	DPRINTF(SYNC, ("%s: %s\n", __func__, reasons[reason]));
1073	mtx_lock(&nffsdev->nd_sync_mtx);
1074	if (nffsdev->nd_syncing)
1075		cv_wait(&nffsdev->nd_sync_cv, &nffsdev->nd_sync_mtx);
1076	if (reason == SYNCER_UMOUNT)
1077		nffsdev->nd_syncer_exit = 1;
1078	nffsdev->nd_syncing = 1;
1079	wakeup(&nffsdev->nd_syncing);
1080	cv_wait(&nffsdev->nd_sync_cv, &nffsdev->nd_sync_mtx);
1081
1082	mtx_unlock(&nffsdev->nd_sync_mtx);
1083}
1084
1085static void
1086nandfs_gc_finished(struct nandfs_device *nffsdev, int exit)
1087{
1088	int error;
1089
1090	mtx_lock(&nffsdev->nd_sync_mtx);
1091	nffsdev->nd_syncing = 0;
1092	DPRINTF(SYNC, ("%s: cleaner finish\n", __func__));
1093	cv_broadcast(&nffsdev->nd_sync_cv);
1094	mtx_unlock(&nffsdev->nd_sync_mtx);
1095	if (!exit) {
1096		error = tsleep(&nffsdev->nd_syncing, PRIBIO, "-",
1097		    hz * nandfs_sync_interval);
1098		DPRINTF(SYNC, ("%s: cleaner waked up: %d\n",
1099		    __func__, error));
1100	}
1101}
1102
1103static void
1104nandfs_syncer(struct nandfsmount *nmp)
1105{
1106	struct nandfs_device *nffsdev;
1107	struct mount *mp;
1108	int flags, error;
1109
1110	mp = nmp->nm_vfs_mountp;
1111	nffsdev = nmp->nm_nandfsdev;
1112	tsleep(&nffsdev->nd_syncing, PRIBIO, "-", hz * nandfs_sync_interval);
1113
1114	while (!nffsdev->nd_syncer_exit) {
1115		DPRINTF(SYNC, ("%s: syncer run\n", __func__));
1116		nffsdev->nd_syncing = 1;
1117
1118		flags = (nmp->nm_flags & (NANDFS_FORCE_SYNCER | NANDFS_UMOUNT));
1119
1120		error = nandfs_segment_constructor(nmp, flags);
1121		if (error)
1122			nandfs_error("%s: error:%d when creating segments\n",
1123			    __func__, error);
1124
1125		nmp->nm_flags &= ~flags;
1126
1127		nandfs_gc_finished(nffsdev, 0);
1128	}
1129
1130	MPASS(nffsdev->nd_cleaner == NULL);
1131	error = nandfs_segment_constructor(nmp,
1132	    NANDFS_FORCE_SYNCER | NANDFS_UMOUNT);
1133	if (error)
1134		nandfs_error("%s: error:%d when creating segments\n",
1135		    __func__, error);
1136	nandfs_gc_finished(nffsdev, 1);
1137	nffsdev->nd_syncer = NULL;
1138	MPASS(nffsdev->nd_free_base == NULL);
1139
1140	DPRINTF(SYNC, ("%s: exiting\n", __func__));
1141	kthread_exit();
1142}
1143
1144static int
1145start_syncer(struct nandfsmount *nmp)
1146{
1147	int error;
1148
1149	MPASS(nmp->nm_nandfsdev->nd_syncer == NULL);
1150
1151	DPRINTF(SYNC, ("%s: start syncer\n", __func__));
1152
1153	nmp->nm_nandfsdev->nd_syncer_exit = 0;
1154
1155	error = kthread_add((void(*)(void *))nandfs_syncer, nmp, NULL,
1156	    &nmp->nm_nandfsdev->nd_syncer, 0, 0, "nandfs_syncer");
1157
1158	if (error)
1159		printf("nandfs: could not start syncer: %d\n", error);
1160
1161	return (error);
1162}
1163
1164static int
1165stop_syncer(struct nandfsmount *nmp)
1166{
1167
1168	MPASS(nmp->nm_nandfsdev->nd_syncer != NULL);
1169
1170	nandfs_wakeup_wait_sync(nmp->nm_nandfsdev, SYNCER_UMOUNT);
1171
1172	DPRINTF(SYNC, ("%s: stop syncer\n", __func__));
1173	return (0);
1174}
1175
1176/*
1177 * Mount null layer
1178 */
1179static int
1180nandfs_mount(struct mount *mp)
1181{
1182	struct nandfsmount *nmp;
1183	struct vnode *devvp;
1184	struct nameidata nd;
1185	struct vfsoptlist *opts;
1186	struct thread *td;
1187	char *from;
1188	int error = 0, flags;
1189
1190	DPRINTF(VOLUMES, ("%s: mp = %p\n", __func__, (void *)mp));
1191
1192	td = curthread;
1193	opts = mp->mnt_optnew;
1194
1195	if (vfs_filteropt(opts, nandfs_opts))
1196		return (EINVAL);
1197
1198	/*
1199	 * Update is a no-op
1200	 */
1201	if (mp->mnt_flag & MNT_UPDATE) {
1202		nmp = VFSTONANDFS(mp);
1203		if (vfs_flagopt(mp->mnt_optnew, "export", NULL, 0)) {
1204			return (error);
1205		}
1206		if (!(nmp->nm_ronly) && vfs_flagopt(opts, "ro", NULL, 0)) {
1207			vn_start_write(NULL, &mp, V_WAIT);
1208			error = VFS_SYNC(mp, MNT_WAIT);
1209			if (error)
1210				return (error);
1211			vn_finished_write(mp);
1212
1213			flags = WRITECLOSE;
1214			if (mp->mnt_flag & MNT_FORCE)
1215				flags |= FORCECLOSE;
1216
1217			nandfs_wakeup_wait_sync(nmp->nm_nandfsdev,
1218			    SYNCER_ROUPD);
1219			error = vflush(mp, 0, flags, td);
1220			if (error)
1221				return (error);
1222
1223			nandfs_stop_cleaner(nmp->nm_nandfsdev);
1224			stop_syncer(nmp);
1225			DROP_GIANT();
1226			g_topology_lock();
1227			g_access(nmp->nm_nandfsdev->nd_gconsumer, 0, -1, 0);
1228			g_topology_unlock();
1229			PICKUP_GIANT();
1230			MNT_ILOCK(mp);
1231			mp->mnt_flag |= MNT_RDONLY;
1232			MNT_IUNLOCK(mp);
1233			nmp->nm_ronly = 1;
1234
1235		} else if ((nmp->nm_ronly) &&
1236		    !vfs_flagopt(opts, "ro", NULL, 0)) {
1237			/*
1238			 * Don't allow read-write snapshots.
1239			 */
1240			if (nmp->nm_mount_args.cpno != 0)
1241				return (EROFS);
1242			/*
1243			 * If upgrade to read-write by non-root, then verify
1244			 * that user has necessary permissions on the device.
1245			 */
1246			devvp = nmp->nm_nandfsdev->nd_devvp;
1247			vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
1248			error = VOP_ACCESS(devvp, VREAD | VWRITE,
1249			    td->td_ucred, td);
1250			if (error) {
1251				error = priv_check(td, PRIV_VFS_MOUNT_PERM);
1252				if (error) {
1253					VOP_UNLOCK(devvp, 0);
1254					return (error);
1255				}
1256			}
1257
1258			VOP_UNLOCK(devvp, 0);
1259			DROP_GIANT();
1260			g_topology_lock();
1261			error = g_access(nmp->nm_nandfsdev->nd_gconsumer, 0, 1,
1262			    0);
1263			g_topology_unlock();
1264			PICKUP_GIANT();
1265			if (error)
1266				return (error);
1267
1268			MNT_ILOCK(mp);
1269			mp->mnt_flag &= ~MNT_RDONLY;
1270			MNT_IUNLOCK(mp);
1271			error = start_syncer(nmp);
1272			if (error == 0)
1273				error = nandfs_start_cleaner(nmp->nm_nandfsdev);
1274			if (error) {
1275				DROP_GIANT();
1276				g_topology_lock();
1277				g_access(nmp->nm_nandfsdev->nd_gconsumer, 0, -1,
1278				    0);
1279				g_topology_unlock();
1280				PICKUP_GIANT();
1281				return (error);
1282			}
1283
1284			nmp->nm_ronly = 0;
1285		}
1286		return (0);
1287	}
1288
1289	from = vfs_getopts(opts, "from", &error);
1290	if (error)
1291		return (error);
1292
1293	/*
1294	 * Find device node
1295	 */
1296	NDINIT(&nd, LOOKUP, FOLLOW|LOCKLEAF, UIO_SYSSPACE, from, curthread);
1297	error = namei(&nd);
1298	if (error)
1299		return (error);
1300	NDFREE(&nd, NDF_ONLY_PNBUF);
1301
1302	devvp = nd.ni_vp;
1303
1304	if (!vn_isdisk(devvp, &error)) {
1305		vput(devvp);
1306		return (error);
1307	}
1308
1309	/* Check the access rights on the mount device */
1310	error = VOP_ACCESS(devvp, VREAD, curthread->td_ucred, curthread);
1311	if (error)
1312		error = priv_check(curthread, PRIV_VFS_MOUNT_PERM);
1313	if (error) {
1314		vput(devvp);
1315		return (error);
1316	}
1317
1318	vfs_getnewfsid(mp);
1319
1320	error = nandfs_mountfs(devvp, mp);
1321	if (error)
1322		return (error);
1323	vfs_mountedfrom(mp, from);
1324
1325	return (0);
1326}
1327
1328static int
1329nandfs_mountfs(struct vnode *devvp, struct mount *mp)
1330{
1331	struct nandfsmount *nmp = NULL;
1332	struct nandfs_args *args = NULL;
1333	struct nandfs_device *nandfsdev;
1334	char *from;
1335	int error, ronly;
1336	char *cpno;
1337
1338	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
1339
1340	if (devvp->v_rdev->si_iosize_max != 0)
1341		mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
1342	VOP_UNLOCK(devvp, 0);
1343
1344	if (mp->mnt_iosize_max > MAXPHYS)
1345		mp->mnt_iosize_max = MAXPHYS;
1346
1347	from = vfs_getopts(mp->mnt_optnew, "from", &error);
1348	if (error)
1349		goto error;
1350
1351	error = vfs_getopt(mp->mnt_optnew, "snap", (void **)&cpno, NULL);
1352	if (error == ENOENT)
1353		cpno = NULL;
1354	else if (error)
1355		goto error;
1356
1357	args = (struct nandfs_args *)malloc(sizeof(struct nandfs_args),
1358	    M_NANDFSMNT, M_WAITOK | M_ZERO);
1359
1360	if (cpno != NULL)
1361		args->cpno = strtoul(cpno, (char **)NULL, 10);
1362	else
1363		args->cpno = 0;
1364	args->fspec = from;
1365
1366	if (args->cpno != 0 && !ronly) {
1367		error = EROFS;
1368		goto error;
1369	}
1370
1371	printf("WARNING: NANDFS is considered to be a highly experimental "
1372	    "feature in FreeBSD.\n");
1373
1374	error = nandfs_mount_device(devvp, mp, args, &nandfsdev);
1375	if (error)
1376		goto error;
1377
1378	nmp = (struct nandfsmount *) malloc(sizeof(struct nandfsmount),
1379	    M_NANDFSMNT, M_WAITOK | M_ZERO);
1380
1381	mp->mnt_data = nmp;
1382	nmp->nm_vfs_mountp = mp;
1383	nmp->nm_ronly = ronly;
1384	MNT_ILOCK(mp);
1385	mp->mnt_flag |= MNT_LOCAL;
1386	MNT_IUNLOCK(mp);
1387	nmp->nm_nandfsdev = nandfsdev;
1388	/* Add our mountpoint */
1389	STAILQ_INSERT_TAIL(&nandfsdev->nd_mounts, nmp, nm_next_mount);
1390
1391	if (args->cpno > nandfsdev->nd_last_cno) {
1392		printf("WARNING: supplied checkpoint number (%jd) is greater "
1393		    "than last known checkpoint on filesystem (%jd). Mounting"
1394		    " checkpoint %jd\n", (uintmax_t)args->cpno,
1395		    (uintmax_t)nandfsdev->nd_last_cno,
1396		    (uintmax_t)nandfsdev->nd_last_cno);
1397		args->cpno = nandfsdev->nd_last_cno;
1398	}
1399
1400	/* Setting up other parameters */
1401	nmp->nm_mount_args = *args;
1402	free(args, M_NANDFSMNT);
1403	error = nandfs_mount_checkpoint(nmp);
1404	if (error) {
1405		nandfs_unmount(mp, MNT_FORCE);
1406		goto unmounted;
1407	}
1408
1409	if (!ronly) {
1410		error = start_syncer(nmp);
1411		if (error == 0)
1412			error = nandfs_start_cleaner(nmp->nm_nandfsdev);
1413		if (error)
1414			nandfs_unmount(mp, MNT_FORCE);
1415	}
1416
1417	return (0);
1418
1419error:
1420	if (args != NULL)
1421		free(args, M_NANDFSMNT);
1422
1423	if (nmp != NULL) {
1424		free(nmp, M_NANDFSMNT);
1425		mp->mnt_data = NULL;
1426	}
1427unmounted:
1428	return (error);
1429}
1430
1431static int
1432nandfs_unmount(struct mount *mp, int mntflags)
1433{
1434	struct nandfs_device *nandfsdev;
1435	struct nandfsmount *nmp;
1436	int error;
1437	int flags = 0;
1438
1439	DPRINTF(VOLUMES, ("%s: mp = %p\n", __func__, (void *)mp));
1440
1441	if (mntflags & MNT_FORCE)
1442		flags |= FORCECLOSE;
1443
1444	nmp = mp->mnt_data;
1445	nandfsdev = nmp->nm_nandfsdev;
1446
1447	error = vflush(mp, 0, flags | SKIPSYSTEM, curthread);
1448	if (error)
1449		return (error);
1450
1451	if (!(nmp->nm_ronly)) {
1452		nandfs_stop_cleaner(nandfsdev);
1453		stop_syncer(nmp);
1454	}
1455
1456	if (nmp->nm_ifile_node)
1457		NANDFS_UNSET_SYSTEMFILE(NTOV(nmp->nm_ifile_node));
1458
1459	/* Remove our mount point */
1460	STAILQ_REMOVE(&nandfsdev->nd_mounts, nmp, nandfsmount, nm_next_mount);
1461
1462	/* Unmount the device itself when we're the last one */
1463	nandfs_unmount_device(nandfsdev);
1464
1465	free_nandfs_mountinfo(mp);
1466
1467	/*
1468	 * Finally, throw away the null_mount structure
1469	 */
1470	mp->mnt_data = 0;
1471	MNT_ILOCK(mp);
1472	mp->mnt_flag &= ~MNT_LOCAL;
1473	MNT_IUNLOCK(mp);
1474
1475	return (0);
1476}
1477
1478static int
1479nandfs_statfs(struct mount *mp, struct statfs *sbp)
1480{
1481	struct nandfsmount *nmp;
1482	struct nandfs_device *nandfsdev;
1483	struct nandfs_fsdata *fsdata;
1484	struct nandfs_super_block *sb;
1485	struct nandfs_block_group_desc *groups;
1486	struct nandfs_node *ifile;
1487	struct nandfs_mdt *mdt;
1488	struct buf *bp;
1489	int i, error;
1490	uint32_t entries_per_group;
1491	uint64_t files = 0;
1492
1493	nmp = mp->mnt_data;
1494	nandfsdev = nmp->nm_nandfsdev;
1495	fsdata = &nandfsdev->nd_fsdata;
1496	sb = &nandfsdev->nd_super;
1497	ifile = nmp->nm_ifile_node;
1498	mdt = &nandfsdev->nd_ifile_mdt;
1499	entries_per_group = mdt->entries_per_group;
1500
1501	VOP_LOCK(NTOV(ifile), LK_SHARED);
1502	error = nandfs_bread(ifile, 0, NOCRED, 0, &bp);
1503	if (error) {
1504		brelse(bp);
1505		VOP_UNLOCK(NTOV(ifile), 0);
1506		return (error);
1507	}
1508
1509	groups = (struct nandfs_block_group_desc *)bp->b_data;
1510
1511	for (i = 0; i < mdt->groups_per_desc_block; i++)
1512		files += (entries_per_group - groups[i].bg_nfrees);
1513
1514	brelse(bp);
1515	VOP_UNLOCK(NTOV(ifile), 0);
1516
1517	sbp->f_bsize = nandfsdev->nd_blocksize;
1518	sbp->f_iosize = sbp->f_bsize;
1519	sbp->f_blocks = fsdata->f_blocks_per_segment * fsdata->f_nsegments;
1520	sbp->f_bfree = sb->s_free_blocks_count;
1521	sbp->f_bavail = sbp->f_bfree;
1522	sbp->f_files = files;
1523	sbp->f_ffree = 0;
1524	return (0);
1525}
1526
1527static int
1528nandfs_root(struct mount *mp, int flags, struct vnode **vpp)
1529{
1530	struct nandfsmount *nmp = VFSTONANDFS(mp);
1531	struct nandfs_node *node;
1532	int error;
1533
1534	error = nandfs_get_node(nmp, NANDFS_ROOT_INO, &node);
1535	if (error)
1536		return (error);
1537
1538	KASSERT(NTOV(node)->v_vflag & VV_ROOT,
1539	    ("root_vp->v_vflag & VV_ROOT"));
1540
1541	*vpp = NTOV(node);
1542
1543	return (error);
1544}
1545
1546static int
1547nandfs_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp)
1548{
1549	struct nandfsmount *nmp = VFSTONANDFS(mp);
1550	struct nandfs_node *node;
1551	int error;
1552
1553	error = nandfs_get_node(nmp, ino, &node);
1554	if (node)
1555		*vpp = NTOV(node);
1556
1557	return (error);
1558}
1559
1560static int
1561nandfs_sync(struct mount *mp, int waitfor)
1562{
1563	struct nandfsmount *nmp = VFSTONANDFS(mp);
1564
1565	DPRINTF(SYNC, ("%s: mp %p waitfor %d\n", __func__, mp, waitfor));
1566
1567	/*
1568	 * XXX: A hack to be removed soon
1569	 */
1570	if (waitfor == MNT_LAZY)
1571		return (0);
1572	if (waitfor == MNT_SUSPEND)
1573		return (0);
1574	nandfs_wakeup_wait_sync(nmp->nm_nandfsdev, SYNCER_VFS_SYNC);
1575	return (0);
1576}
1577
1578static struct vfsops nandfs_vfsops = {
1579	.vfs_init =		nandfs_init,
1580	.vfs_mount =		nandfs_mount,
1581	.vfs_root =		nandfs_root,
1582	.vfs_statfs =		nandfs_statfs,
1583	.vfs_uninit =		nandfs_uninit,
1584	.vfs_unmount =		nandfs_unmount,
1585	.vfs_vget =		nandfs_vget,
1586	.vfs_sync =		nandfs_sync,
1587};
1588
1589VFS_SET(nandfs_vfsops, nandfs, VFCF_LOOPBACK);
1590