ffs_vfsops.c revision 331017
1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1989, 1991, 1993, 1994
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 4. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
32 */
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: stable/11/sys/ufs/ffs/ffs_vfsops.c 331017 2018-03-15 19:08:33Z kevans $");
36
37#include "opt_quota.h"
38#include "opt_ufs.h"
39#include "opt_ffs.h"
40#include "opt_ddb.h"
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/namei.h>
45#include <sys/priv.h>
46#include <sys/proc.h>
47#include <sys/taskqueue.h>
48#include <sys/kernel.h>
49#include <sys/vnode.h>
50#include <sys/mount.h>
51#include <sys/bio.h>
52#include <sys/buf.h>
53#include <sys/conf.h>
54#include <sys/fcntl.h>
55#include <sys/ioccom.h>
56#include <sys/malloc.h>
57#include <sys/mutex.h>
58#include <sys/rwlock.h>
59#include <sys/vmmeter.h>
60
61#include <security/mac/mac_framework.h>
62
63#include <ufs/ufs/extattr.h>
64#include <ufs/ufs/gjournal.h>
65#include <ufs/ufs/quota.h>
66#include <ufs/ufs/ufsmount.h>
67#include <ufs/ufs/inode.h>
68#include <ufs/ufs/ufs_extern.h>
69
70#include <ufs/ffs/fs.h>
71#include <ufs/ffs/ffs_extern.h>
72
73#include <vm/vm.h>
74#include <vm/uma.h>
75#include <vm/vm_page.h>
76
77#include <geom/geom.h>
78#include <geom/geom_vfs.h>
79
80#include <ddb/ddb.h>
81
82static uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
83
84static int	ffs_mountfs(struct vnode *, struct mount *, struct thread *);
85static void	ffs_oldfscompat_read(struct fs *, struct ufsmount *,
86		    ufs2_daddr_t);
87static void	ffs_ifree(struct ufsmount *ump, struct inode *ip);
88static int	ffs_sync_lazy(struct mount *mp);
89
90static vfs_init_t ffs_init;
91static vfs_uninit_t ffs_uninit;
92static vfs_extattrctl_t ffs_extattrctl;
93static vfs_cmount_t ffs_cmount;
94static vfs_unmount_t ffs_unmount;
95static vfs_mount_t ffs_mount;
96static vfs_statfs_t ffs_statfs;
97static vfs_fhtovp_t ffs_fhtovp;
98static vfs_sync_t ffs_sync;
99
100static struct vfsops ufs_vfsops = {
101	.vfs_extattrctl =	ffs_extattrctl,
102	.vfs_fhtovp =		ffs_fhtovp,
103	.vfs_init =		ffs_init,
104	.vfs_mount =		ffs_mount,
105	.vfs_cmount =		ffs_cmount,
106	.vfs_quotactl =		ufs_quotactl,
107	.vfs_root =		ufs_root,
108	.vfs_statfs =		ffs_statfs,
109	.vfs_sync =		ffs_sync,
110	.vfs_uninit =		ffs_uninit,
111	.vfs_unmount =		ffs_unmount,
112	.vfs_vget =		ffs_vget,
113	.vfs_susp_clean =	process_deferred_inactive,
114};
115
116VFS_SET(ufs_vfsops, ufs, 0);
117MODULE_VERSION(ufs, 1);
118
119static b_strategy_t ffs_geom_strategy;
120static b_write_t ffs_bufwrite;
121
122static struct buf_ops ffs_ops = {
123	.bop_name =	"FFS",
124	.bop_write =	ffs_bufwrite,
125	.bop_strategy =	ffs_geom_strategy,
126	.bop_sync =	bufsync,
127#ifdef NO_FFS_SNAPSHOT
128	.bop_bdflush =	bufbdflush,
129#else
130	.bop_bdflush =	ffs_bdflush,
131#endif
132};
133
134/*
135 * Note that userquota and groupquota options are not currently used
136 * by UFS/FFS code and generally mount(8) does not pass those options
137 * from userland, but they can be passed by loader(8) via
138 * vfs.root.mountfrom.options.
139 */
140static const char *ffs_opts[] = { "acls", "async", "noatime", "noclusterr",
141    "noclusterw", "noexec", "export", "force", "from", "groupquota",
142    "multilabel", "nfsv4acls", "fsckpid", "snapshot", "nosuid", "suiddir",
143    "nosymfollow", "sync", "union", "userquota", NULL };
144
145static int
146ffs_mount(struct mount *mp)
147{
148	struct vnode *devvp;
149	struct thread *td;
150	struct ufsmount *ump = NULL;
151	struct fs *fs;
152	pid_t fsckpid = 0;
153	int error, error1, flags;
154	uint64_t mntorflags;
155	accmode_t accmode;
156	struct nameidata ndp;
157	char *fspec;
158
159	td = curthread;
160	if (vfs_filteropt(mp->mnt_optnew, ffs_opts))
161		return (EINVAL);
162	if (uma_inode == NULL) {
163		uma_inode = uma_zcreate("FFS inode",
164		    sizeof(struct inode), NULL, NULL, NULL, NULL,
165		    UMA_ALIGN_PTR, 0);
166		uma_ufs1 = uma_zcreate("FFS1 dinode",
167		    sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL,
168		    UMA_ALIGN_PTR, 0);
169		uma_ufs2 = uma_zcreate("FFS2 dinode",
170		    sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL,
171		    UMA_ALIGN_PTR, 0);
172	}
173
174	vfs_deleteopt(mp->mnt_optnew, "groupquota");
175	vfs_deleteopt(mp->mnt_optnew, "userquota");
176
177	fspec = vfs_getopts(mp->mnt_optnew, "from", &error);
178	if (error)
179		return (error);
180
181	mntorflags = 0;
182	if (vfs_getopt(mp->mnt_optnew, "acls", NULL, NULL) == 0)
183		mntorflags |= MNT_ACLS;
184
185	if (vfs_getopt(mp->mnt_optnew, "snapshot", NULL, NULL) == 0) {
186		mntorflags |= MNT_SNAPSHOT;
187		/*
188		 * Once we have set the MNT_SNAPSHOT flag, do not
189		 * persist "snapshot" in the options list.
190		 */
191		vfs_deleteopt(mp->mnt_optnew, "snapshot");
192		vfs_deleteopt(mp->mnt_opt, "snapshot");
193	}
194
195	if (vfs_getopt(mp->mnt_optnew, "fsckpid", NULL, NULL) == 0 &&
196	    vfs_scanopt(mp->mnt_optnew, "fsckpid", "%d", &fsckpid) == 1) {
197		/*
198		 * Once we have set the restricted PID, do not
199		 * persist "fsckpid" in the options list.
200		 */
201		vfs_deleteopt(mp->mnt_optnew, "fsckpid");
202		vfs_deleteopt(mp->mnt_opt, "fsckpid");
203		if (mp->mnt_flag & MNT_UPDATE) {
204			if (VFSTOUFS(mp)->um_fs->fs_ronly == 0 &&
205			     vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0) == 0) {
206				vfs_mount_error(mp,
207				    "Checker enable: Must be read-only");
208				return (EINVAL);
209			}
210		} else if (vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0) == 0) {
211			vfs_mount_error(mp,
212			    "Checker enable: Must be read-only");
213			return (EINVAL);
214		}
215		/* Set to -1 if we are done */
216		if (fsckpid == 0)
217			fsckpid = -1;
218	}
219
220	if (vfs_getopt(mp->mnt_optnew, "nfsv4acls", NULL, NULL) == 0) {
221		if (mntorflags & MNT_ACLS) {
222			vfs_mount_error(mp,
223			    "\"acls\" and \"nfsv4acls\" options "
224			    "are mutually exclusive");
225			return (EINVAL);
226		}
227		mntorflags |= MNT_NFS4ACLS;
228	}
229
230	MNT_ILOCK(mp);
231	mp->mnt_flag |= mntorflags;
232	MNT_IUNLOCK(mp);
233	/*
234	 * If updating, check whether changing from read-only to
235	 * read/write; if there is no device name, that's all we do.
236	 */
237	if (mp->mnt_flag & MNT_UPDATE) {
238		ump = VFSTOUFS(mp);
239		fs = ump->um_fs;
240		devvp = ump->um_devvp;
241		if (fsckpid == -1 && ump->um_fsckpid > 0) {
242			if ((error = ffs_flushfiles(mp, WRITECLOSE, td)) != 0 ||
243			    (error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0)
244				return (error);
245			g_topology_lock();
246			/*
247			 * Return to normal read-only mode.
248			 */
249			error = g_access(ump->um_cp, 0, -1, 0);
250			g_topology_unlock();
251			ump->um_fsckpid = 0;
252		}
253		if (fs->fs_ronly == 0 &&
254		    vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
255			/*
256			 * Flush any dirty data and suspend filesystem.
257			 */
258			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
259				return (error);
260			error = vfs_write_suspend_umnt(mp);
261			if (error != 0)
262				return (error);
263			/*
264			 * Check for and optionally get rid of files open
265			 * for writing.
266			 */
267			flags = WRITECLOSE;
268			if (mp->mnt_flag & MNT_FORCE)
269				flags |= FORCECLOSE;
270			if (MOUNTEDSOFTDEP(mp)) {
271				error = softdep_flushfiles(mp, flags, td);
272			} else {
273				error = ffs_flushfiles(mp, flags, td);
274			}
275			if (error) {
276				vfs_write_resume(mp, 0);
277				return (error);
278			}
279			if (fs->fs_pendingblocks != 0 ||
280			    fs->fs_pendinginodes != 0) {
281				printf("WARNING: %s Update error: blocks %jd "
282				    "files %d\n", fs->fs_fsmnt,
283				    (intmax_t)fs->fs_pendingblocks,
284				    fs->fs_pendinginodes);
285				fs->fs_pendingblocks = 0;
286				fs->fs_pendinginodes = 0;
287			}
288			if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
289				fs->fs_clean = 1;
290			if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
291				fs->fs_ronly = 0;
292				fs->fs_clean = 0;
293				vfs_write_resume(mp, 0);
294				return (error);
295			}
296			if (MOUNTEDSOFTDEP(mp))
297				softdep_unmount(mp);
298			g_topology_lock();
299			/*
300			 * Drop our write and exclusive access.
301			 */
302			g_access(ump->um_cp, 0, -1, -1);
303			g_topology_unlock();
304			fs->fs_ronly = 1;
305			MNT_ILOCK(mp);
306			mp->mnt_flag |= MNT_RDONLY;
307			MNT_IUNLOCK(mp);
308			/*
309			 * Allow the writers to note that filesystem
310			 * is ro now.
311			 */
312			vfs_write_resume(mp, 0);
313		}
314		if ((mp->mnt_flag & MNT_RELOAD) &&
315		    (error = ffs_reload(mp, td, 0)) != 0)
316			return (error);
317		if (fs->fs_ronly &&
318		    !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
319			/*
320			 * If we are running a checker, do not allow upgrade.
321			 */
322			if (ump->um_fsckpid > 0) {
323				vfs_mount_error(mp,
324				    "Active checker, cannot upgrade to write");
325				return (EINVAL);
326			}
327			/*
328			 * If upgrade to read-write by non-root, then verify
329			 * that user has necessary permissions on the device.
330			 */
331			vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
332			error = VOP_ACCESS(devvp, VREAD | VWRITE,
333			    td->td_ucred, td);
334			if (error)
335				error = priv_check(td, PRIV_VFS_MOUNT_PERM);
336			if (error) {
337				VOP_UNLOCK(devvp, 0);
338				return (error);
339			}
340			VOP_UNLOCK(devvp, 0);
341			fs->fs_flags &= ~FS_UNCLEAN;
342			if (fs->fs_clean == 0) {
343				fs->fs_flags |= FS_UNCLEAN;
344				if ((mp->mnt_flag & MNT_FORCE) ||
345				    ((fs->fs_flags &
346				     (FS_SUJ | FS_NEEDSFSCK)) == 0 &&
347				     (fs->fs_flags & FS_DOSOFTDEP))) {
348					printf("WARNING: %s was not properly "
349					   "dismounted\n", fs->fs_fsmnt);
350				} else {
351					vfs_mount_error(mp,
352					   "R/W mount of %s denied. %s.%s",
353					   fs->fs_fsmnt,
354					   "Filesystem is not clean - run fsck",
355					   (fs->fs_flags & FS_SUJ) == 0 ? "" :
356					   " Forced mount will invalidate"
357					   " journal contents");
358					return (EPERM);
359				}
360			}
361			g_topology_lock();
362			/*
363			 * Request exclusive write access.
364			 */
365			error = g_access(ump->um_cp, 0, 1, 1);
366			g_topology_unlock();
367			if (error)
368				return (error);
369			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
370				return (error);
371			fs->fs_ronly = 0;
372			MNT_ILOCK(mp);
373			mp->mnt_flag &= ~MNT_RDONLY;
374			MNT_IUNLOCK(mp);
375			fs->fs_mtime = time_second;
376			/* check to see if we need to start softdep */
377			if ((fs->fs_flags & FS_DOSOFTDEP) &&
378			    (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
379				vn_finished_write(mp);
380				return (error);
381			}
382			fs->fs_clean = 0;
383			if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
384				vn_finished_write(mp);
385				return (error);
386			}
387			if (fs->fs_snapinum[0] != 0)
388				ffs_snapshot_mount(mp);
389			vn_finished_write(mp);
390		}
391		/*
392		 * Soft updates is incompatible with "async",
393		 * so if we are doing softupdates stop the user
394		 * from setting the async flag in an update.
395		 * Softdep_mount() clears it in an initial mount
396		 * or ro->rw remount.
397		 */
398		if (MOUNTEDSOFTDEP(mp)) {
399			/* XXX: Reset too late ? */
400			MNT_ILOCK(mp);
401			mp->mnt_flag &= ~MNT_ASYNC;
402			MNT_IUNLOCK(mp);
403		}
404		/*
405		 * Keep MNT_ACLS flag if it is stored in superblock.
406		 */
407		if ((fs->fs_flags & FS_ACLS) != 0) {
408			/* XXX: Set too late ? */
409			MNT_ILOCK(mp);
410			mp->mnt_flag |= MNT_ACLS;
411			MNT_IUNLOCK(mp);
412		}
413
414		if ((fs->fs_flags & FS_NFS4ACLS) != 0) {
415			/* XXX: Set too late ? */
416			MNT_ILOCK(mp);
417			mp->mnt_flag |= MNT_NFS4ACLS;
418			MNT_IUNLOCK(mp);
419		}
420		/*
421		 * If this is a request from fsck to clean up the filesystem,
422		 * then allow the specified pid to proceed.
423		 */
424		if (fsckpid > 0) {
425			if (ump->um_fsckpid != 0) {
426				vfs_mount_error(mp,
427				    "Active checker already running on %s",
428				    fs->fs_fsmnt);
429				return (EINVAL);
430			}
431			KASSERT(MOUNTEDSOFTDEP(mp) == 0,
432			    ("soft updates enabled on read-only file system"));
433			g_topology_lock();
434			/*
435			 * Request write access.
436			 */
437			error = g_access(ump->um_cp, 0, 1, 0);
438			g_topology_unlock();
439			if (error) {
440				vfs_mount_error(mp,
441				    "Checker activation failed on %s",
442				    fs->fs_fsmnt);
443				return (error);
444			}
445			ump->um_fsckpid = fsckpid;
446			if (fs->fs_snapinum[0] != 0)
447				ffs_snapshot_mount(mp);
448			fs->fs_mtime = time_second;
449			fs->fs_fmod = 1;
450			fs->fs_clean = 0;
451			(void) ffs_sbupdate(ump, MNT_WAIT, 0);
452		}
453
454		/*
455		 * If this is a snapshot request, take the snapshot.
456		 */
457		if (mp->mnt_flag & MNT_SNAPSHOT)
458			return (ffs_snapshot(mp, fspec));
459
460		/*
461		 * Must not call namei() while owning busy ref.
462		 */
463		vfs_unbusy(mp);
464	}
465
466	/*
467	 * Not an update, or updating the name: look up the name
468	 * and verify that it refers to a sensible disk device.
469	 */
470	NDINIT(&ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td);
471	error = namei(&ndp);
472	if ((mp->mnt_flag & MNT_UPDATE) != 0) {
473		/*
474		 * Unmount does not start if MNT_UPDATE is set.  Mount
475		 * update busies mp before setting MNT_UPDATE.  We
476		 * must be able to retain our busy ref succesfully,
477		 * without sleep.
478		 */
479		error1 = vfs_busy(mp, MBF_NOWAIT);
480		MPASS(error1 == 0);
481	}
482	if (error != 0)
483		return (error);
484	NDFREE(&ndp, NDF_ONLY_PNBUF);
485	devvp = ndp.ni_vp;
486	if (!vn_isdisk(devvp, &error)) {
487		vput(devvp);
488		return (error);
489	}
490
491	/*
492	 * If mount by non-root, then verify that user has necessary
493	 * permissions on the device.
494	 */
495	accmode = VREAD;
496	if ((mp->mnt_flag & MNT_RDONLY) == 0)
497		accmode |= VWRITE;
498	error = VOP_ACCESS(devvp, accmode, td->td_ucred, td);
499	if (error)
500		error = priv_check(td, PRIV_VFS_MOUNT_PERM);
501	if (error) {
502		vput(devvp);
503		return (error);
504	}
505
506	if (mp->mnt_flag & MNT_UPDATE) {
507		/*
508		 * Update only
509		 *
510		 * If it's not the same vnode, or at least the same device
511		 * then it's not correct.
512		 */
513
514		if (devvp->v_rdev != ump->um_devvp->v_rdev)
515			error = EINVAL;	/* needs translation */
516		vput(devvp);
517		if (error)
518			return (error);
519	} else {
520		/*
521		 * New mount
522		 *
523		 * We need the name for the mount point (also used for
524		 * "last mounted on") copied in. If an error occurs,
525		 * the mount point is discarded by the upper level code.
526		 * Note that vfs_mount_alloc() populates f_mntonname for us.
527		 */
528		if ((error = ffs_mountfs(devvp, mp, td)) != 0) {
529			vrele(devvp);
530			return (error);
531		}
532		if (fsckpid > 0) {
533			KASSERT(MOUNTEDSOFTDEP(mp) == 0,
534			    ("soft updates enabled on read-only file system"));
535			ump = VFSTOUFS(mp);
536			fs = ump->um_fs;
537			g_topology_lock();
538			/*
539			 * Request write access.
540			 */
541			error = g_access(ump->um_cp, 0, 1, 0);
542			g_topology_unlock();
543			if (error) {
544				printf("WARNING: %s: Checker activation "
545				    "failed\n", fs->fs_fsmnt);
546			} else {
547				ump->um_fsckpid = fsckpid;
548				if (fs->fs_snapinum[0] != 0)
549					ffs_snapshot_mount(mp);
550				fs->fs_mtime = time_second;
551				fs->fs_clean = 0;
552				(void) ffs_sbupdate(ump, MNT_WAIT, 0);
553			}
554		}
555	}
556	vfs_mountedfrom(mp, fspec);
557	return (0);
558}
559
560/*
561 * Compatibility with old mount system call.
562 */
563
564static int
565ffs_cmount(struct mntarg *ma, void *data, uint64_t flags)
566{
567	struct ufs_args args;
568	struct export_args exp;
569	int error;
570
571	if (data == NULL)
572		return (EINVAL);
573	error = copyin(data, &args, sizeof args);
574	if (error)
575		return (error);
576	vfs_oexport_conv(&args.export, &exp);
577
578	ma = mount_argsu(ma, "from", args.fspec, MAXPATHLEN);
579	ma = mount_arg(ma, "export", &exp, sizeof(exp));
580	error = kernel_mount(ma, flags);
581
582	return (error);
583}
584
585/*
586 * Reload all incore data for a filesystem (used after running fsck on
587 * the root filesystem and finding things to fix). If the 'force' flag
588 * is 0, the filesystem must be mounted read-only.
589 *
590 * Things to do to update the mount:
591 *	1) invalidate all cached meta-data.
592 *	2) re-read superblock from disk.
593 *	3) re-read summary information from disk.
594 *	4) invalidate all inactive vnodes.
595 *	5) clear MNTK_SUSPEND2 and MNTK_SUSPENDED flags, allowing secondary
596 *	   writers, if requested.
597 *	6) invalidate all cached file data.
598 *	7) re-read inode data for all active vnodes.
599 */
600int
601ffs_reload(struct mount *mp, struct thread *td, int flags)
602{
603	struct vnode *vp, *mvp, *devvp;
604	struct inode *ip;
605	void *space;
606	struct buf *bp;
607	struct fs *fs, *newfs;
608	struct ufsmount *ump;
609	ufs2_daddr_t sblockloc;
610	int i, blks, error;
611	u_long size;
612	int32_t *lp;
613
614	ump = VFSTOUFS(mp);
615
616	MNT_ILOCK(mp);
617	if ((mp->mnt_flag & MNT_RDONLY) == 0 && (flags & FFSR_FORCE) == 0) {
618		MNT_IUNLOCK(mp);
619		return (EINVAL);
620	}
621	MNT_IUNLOCK(mp);
622
623	/*
624	 * Step 1: invalidate all cached meta-data.
625	 */
626	devvp = VFSTOUFS(mp)->um_devvp;
627	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
628	if (vinvalbuf(devvp, 0, 0, 0) != 0)
629		panic("ffs_reload: dirty1");
630	VOP_UNLOCK(devvp, 0);
631
632	/*
633	 * Step 2: re-read superblock from disk.
634	 */
635	fs = VFSTOUFS(mp)->um_fs;
636	if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize,
637	    NOCRED, &bp)) != 0)
638		return (error);
639	newfs = (struct fs *)bp->b_data;
640	if ((newfs->fs_magic != FS_UFS1_MAGIC &&
641	     newfs->fs_magic != FS_UFS2_MAGIC) ||
642	    newfs->fs_bsize > MAXBSIZE ||
643	    newfs->fs_bsize < sizeof(struct fs)) {
644			brelse(bp);
645			return (EIO);		/* XXX needs translation */
646	}
647	/*
648	 * Copy pointer fields back into superblock before copying in	XXX
649	 * new superblock. These should really be in the ufsmount.	XXX
650	 * Note that important parameters (eg fs_ncg) are unchanged.
651	 */
652	newfs->fs_csp = fs->fs_csp;
653	newfs->fs_maxcluster = fs->fs_maxcluster;
654	newfs->fs_contigdirs = fs->fs_contigdirs;
655	newfs->fs_active = fs->fs_active;
656	newfs->fs_ronly = fs->fs_ronly;
657	sblockloc = fs->fs_sblockloc;
658	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
659	brelse(bp);
660	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
661	ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
662	UFS_LOCK(ump);
663	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
664		printf("WARNING: %s: reload pending error: blocks %jd "
665		    "files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
666		    fs->fs_pendinginodes);
667		fs->fs_pendingblocks = 0;
668		fs->fs_pendinginodes = 0;
669	}
670	UFS_UNLOCK(ump);
671
672	/*
673	 * Step 3: re-read summary information from disk.
674	 */
675	size = fs->fs_cssize;
676	blks = howmany(size, fs->fs_fsize);
677	if (fs->fs_contigsumsize > 0)
678		size += fs->fs_ncg * sizeof(int32_t);
679	size += fs->fs_ncg * sizeof(u_int8_t);
680	free(fs->fs_csp, M_UFSMNT);
681	space = malloc(size, M_UFSMNT, M_WAITOK);
682	fs->fs_csp = space;
683	for (i = 0; i < blks; i += fs->fs_frag) {
684		size = fs->fs_bsize;
685		if (i + fs->fs_frag > blks)
686			size = (blks - i) * fs->fs_fsize;
687		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
688		    NOCRED, &bp);
689		if (error)
690			return (error);
691		bcopy(bp->b_data, space, (u_int)size);
692		space = (char *)space + size;
693		brelse(bp);
694	}
695	/*
696	 * We no longer know anything about clusters per cylinder group.
697	 */
698	if (fs->fs_contigsumsize > 0) {
699		fs->fs_maxcluster = lp = space;
700		for (i = 0; i < fs->fs_ncg; i++)
701			*lp++ = fs->fs_contigsumsize;
702		space = lp;
703	}
704	size = fs->fs_ncg * sizeof(u_int8_t);
705	fs->fs_contigdirs = (u_int8_t *)space;
706	bzero(fs->fs_contigdirs, size);
707	if ((flags & FFSR_UNSUSPEND) != 0) {
708		MNT_ILOCK(mp);
709		mp->mnt_kern_flag &= ~(MNTK_SUSPENDED | MNTK_SUSPEND2);
710		wakeup(&mp->mnt_flag);
711		MNT_IUNLOCK(mp);
712	}
713
714loop:
715	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
716		/*
717		 * Skip syncer vnode.
718		 */
719		if (vp->v_type == VNON) {
720			VI_UNLOCK(vp);
721			continue;
722		}
723		/*
724		 * Step 4: invalidate all cached file data.
725		 */
726		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
727			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
728			goto loop;
729		}
730		if (vinvalbuf(vp, 0, 0, 0))
731			panic("ffs_reload: dirty2");
732		/*
733		 * Step 5: re-read inode data for all active vnodes.
734		 */
735		ip = VTOI(vp);
736		error =
737		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
738		    (int)fs->fs_bsize, NOCRED, &bp);
739		if (error) {
740			VOP_UNLOCK(vp, 0);
741			vrele(vp);
742			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
743			return (error);
744		}
745		ffs_load_inode(bp, ip, fs, ip->i_number);
746		ip->i_effnlink = ip->i_nlink;
747		brelse(bp);
748		VOP_UNLOCK(vp, 0);
749		vrele(vp);
750	}
751	return (0);
752}
753
754/*
755 * Possible superblock locations ordered from most to least likely.
756 */
757static int sblock_try[] = SBLOCKSEARCH;
758
759/*
760 * Common code for mount and mountroot
761 */
762static int
763ffs_mountfs(devvp, mp, td)
764	struct vnode *devvp;
765	struct mount *mp;
766	struct thread *td;
767{
768	struct ufsmount *ump;
769	struct buf *bp;
770	struct fs *fs;
771	struct cdev *dev;
772	void *space;
773	ufs2_daddr_t sblockloc;
774	int error, i, blks, len, ronly;
775	u_long size;
776	int32_t *lp;
777	struct ucred *cred;
778	struct g_consumer *cp;
779	struct mount *nmp;
780
781	bp = NULL;
782	ump = NULL;
783	cred = td ? td->td_ucred : NOCRED;
784	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
785
786	KASSERT(devvp->v_type == VCHR, ("reclaimed devvp"));
787	dev = devvp->v_rdev;
788	if (atomic_cmpset_acq_ptr((uintptr_t *)&dev->si_mountpt, 0,
789	    (uintptr_t)mp) == 0) {
790		VOP_UNLOCK(devvp, 0);
791		return (EBUSY);
792	}
793	g_topology_lock();
794	error = g_vfs_open(devvp, &cp, "ffs", ronly ? 0 : 1);
795	g_topology_unlock();
796	if (error != 0) {
797		atomic_store_rel_ptr((uintptr_t *)&dev->si_mountpt, 0);
798		VOP_UNLOCK(devvp, 0);
799		return (error);
800	}
801	dev_ref(dev);
802	devvp->v_bufobj.bo_ops = &ffs_ops;
803	VOP_UNLOCK(devvp, 0);
804	if (dev->si_iosize_max != 0)
805		mp->mnt_iosize_max = dev->si_iosize_max;
806	if (mp->mnt_iosize_max > MAXPHYS)
807		mp->mnt_iosize_max = MAXPHYS;
808
809	fs = NULL;
810	sblockloc = 0;
811	/*
812	 * Try reading the superblock in each of its possible locations.
813	 */
814	for (i = 0; sblock_try[i] != -1; i++) {
815		if ((SBLOCKSIZE % cp->provider->sectorsize) != 0) {
816			error = EINVAL;
817			vfs_mount_error(mp,
818			    "Invalid sectorsize %d for superblock size %d",
819			    cp->provider->sectorsize, SBLOCKSIZE);
820			goto out;
821		}
822		if ((error = bread(devvp, btodb(sblock_try[i]), SBLOCKSIZE,
823		    cred, &bp)) != 0)
824			goto out;
825		fs = (struct fs *)bp->b_data;
826		sblockloc = sblock_try[i];
827		if ((fs->fs_magic == FS_UFS1_MAGIC ||
828		     (fs->fs_magic == FS_UFS2_MAGIC &&
829		      (fs->fs_sblockloc == sblockloc ||
830		       (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0))) &&
831		    fs->fs_bsize <= MAXBSIZE &&
832		    fs->fs_bsize >= sizeof(struct fs))
833			break;
834		brelse(bp);
835		bp = NULL;
836	}
837	if (sblock_try[i] == -1) {
838		error = EINVAL;		/* XXX needs translation */
839		goto out;
840	}
841	fs->fs_fmod = 0;
842	fs->fs_flags &= ~FS_INDEXDIRS;	/* no support for directory indices */
843	fs->fs_flags &= ~FS_UNCLEAN;
844	if (fs->fs_clean == 0) {
845		fs->fs_flags |= FS_UNCLEAN;
846		if (ronly || (mp->mnt_flag & MNT_FORCE) ||
847		    ((fs->fs_flags & (FS_SUJ | FS_NEEDSFSCK)) == 0 &&
848		     (fs->fs_flags & FS_DOSOFTDEP))) {
849			printf("WARNING: %s was not properly dismounted\n",
850			    fs->fs_fsmnt);
851		} else {
852			vfs_mount_error(mp, "R/W mount of %s denied. %s%s",
853			    fs->fs_fsmnt, "Filesystem is not clean - run fsck.",
854			    (fs->fs_flags & FS_SUJ) == 0 ? "" :
855			    " Forced mount will invalidate journal contents");
856			error = EPERM;
857			goto out;
858		}
859		if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
860		    (mp->mnt_flag & MNT_FORCE)) {
861			printf("WARNING: %s: lost blocks %jd files %d\n",
862			    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
863			    fs->fs_pendinginodes);
864			fs->fs_pendingblocks = 0;
865			fs->fs_pendinginodes = 0;
866		}
867	}
868	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
869		printf("WARNING: %s: mount pending error: blocks %jd "
870		    "files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
871		    fs->fs_pendinginodes);
872		fs->fs_pendingblocks = 0;
873		fs->fs_pendinginodes = 0;
874	}
875	if ((fs->fs_flags & FS_GJOURNAL) != 0) {
876#ifdef UFS_GJOURNAL
877		/*
878		 * Get journal provider name.
879		 */
880		len = 1024;
881		mp->mnt_gjprovider = malloc((u_long)len, M_UFSMNT, M_WAITOK);
882		if (g_io_getattr("GJOURNAL::provider", cp, &len,
883		    mp->mnt_gjprovider) == 0) {
884			mp->mnt_gjprovider = realloc(mp->mnt_gjprovider, len,
885			    M_UFSMNT, M_WAITOK);
886			MNT_ILOCK(mp);
887			mp->mnt_flag |= MNT_GJOURNAL;
888			MNT_IUNLOCK(mp);
889		} else {
890			printf("WARNING: %s: GJOURNAL flag on fs "
891			    "but no gjournal provider below\n",
892			    mp->mnt_stat.f_mntonname);
893			free(mp->mnt_gjprovider, M_UFSMNT);
894			mp->mnt_gjprovider = NULL;
895		}
896#else
897		printf("WARNING: %s: GJOURNAL flag on fs but no "
898		    "UFS_GJOURNAL support\n", mp->mnt_stat.f_mntonname);
899#endif
900	} else {
901		mp->mnt_gjprovider = NULL;
902	}
903	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
904	ump->um_cp = cp;
905	ump->um_bo = &devvp->v_bufobj;
906	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT, M_WAITOK);
907	if (fs->fs_magic == FS_UFS1_MAGIC) {
908		ump->um_fstype = UFS1;
909		ump->um_balloc = ffs_balloc_ufs1;
910	} else {
911		ump->um_fstype = UFS2;
912		ump->um_balloc = ffs_balloc_ufs2;
913	}
914	ump->um_blkatoff = ffs_blkatoff;
915	ump->um_truncate = ffs_truncate;
916	ump->um_update = ffs_update;
917	ump->um_valloc = ffs_valloc;
918	ump->um_vfree = ffs_vfree;
919	ump->um_ifree = ffs_ifree;
920	ump->um_rdonly = ffs_rdonly;
921	ump->um_snapgone = ffs_snapgone;
922	mtx_init(UFS_MTX(ump), "FFS", "FFS Lock", MTX_DEF);
923	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
924	if (fs->fs_sbsize < SBLOCKSIZE)
925		bp->b_flags |= B_INVAL | B_NOCACHE;
926	brelse(bp);
927	bp = NULL;
928	fs = ump->um_fs;
929	ffs_oldfscompat_read(fs, ump, sblockloc);
930	fs->fs_ronly = ronly;
931	size = fs->fs_cssize;
932	blks = howmany(size, fs->fs_fsize);
933	if (fs->fs_contigsumsize > 0)
934		size += fs->fs_ncg * sizeof(int32_t);
935	size += fs->fs_ncg * sizeof(u_int8_t);
936	space = malloc(size, M_UFSMNT, M_WAITOK);
937	fs->fs_csp = space;
938	for (i = 0; i < blks; i += fs->fs_frag) {
939		size = fs->fs_bsize;
940		if (i + fs->fs_frag > blks)
941			size = (blks - i) * fs->fs_fsize;
942		if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
943		    cred, &bp)) != 0) {
944			free(fs->fs_csp, M_UFSMNT);
945			goto out;
946		}
947		bcopy(bp->b_data, space, (u_int)size);
948		space = (char *)space + size;
949		brelse(bp);
950		bp = NULL;
951	}
952	if (fs->fs_contigsumsize > 0) {
953		fs->fs_maxcluster = lp = space;
954		for (i = 0; i < fs->fs_ncg; i++)
955			*lp++ = fs->fs_contigsumsize;
956		space = lp;
957	}
958	size = fs->fs_ncg * sizeof(u_int8_t);
959	fs->fs_contigdirs = (u_int8_t *)space;
960	bzero(fs->fs_contigdirs, size);
961	fs->fs_active = NULL;
962	mp->mnt_data = ump;
963	mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
964	mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
965	nmp = NULL;
966	if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
967	    (nmp = vfs_getvfs(&mp->mnt_stat.f_fsid))) {
968		if (nmp)
969			vfs_rel(nmp);
970		vfs_getnewfsid(mp);
971	}
972	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
973	MNT_ILOCK(mp);
974	mp->mnt_flag |= MNT_LOCAL;
975	MNT_IUNLOCK(mp);
976	if ((fs->fs_flags & FS_MULTILABEL) != 0) {
977#ifdef MAC
978		MNT_ILOCK(mp);
979		mp->mnt_flag |= MNT_MULTILABEL;
980		MNT_IUNLOCK(mp);
981#else
982		printf("WARNING: %s: multilabel flag on fs but "
983		    "no MAC support\n", mp->mnt_stat.f_mntonname);
984#endif
985	}
986	if ((fs->fs_flags & FS_ACLS) != 0) {
987#ifdef UFS_ACL
988		MNT_ILOCK(mp);
989
990		if (mp->mnt_flag & MNT_NFS4ACLS)
991			printf("WARNING: %s: ACLs flag on fs conflicts with "
992			    "\"nfsv4acls\" mount option; option ignored\n",
993			    mp->mnt_stat.f_mntonname);
994		mp->mnt_flag &= ~MNT_NFS4ACLS;
995		mp->mnt_flag |= MNT_ACLS;
996
997		MNT_IUNLOCK(mp);
998#else
999		printf("WARNING: %s: ACLs flag on fs but no ACLs support\n",
1000		    mp->mnt_stat.f_mntonname);
1001#endif
1002	}
1003	if ((fs->fs_flags & FS_NFS4ACLS) != 0) {
1004#ifdef UFS_ACL
1005		MNT_ILOCK(mp);
1006
1007		if (mp->mnt_flag & MNT_ACLS)
1008			printf("WARNING: %s: NFSv4 ACLs flag on fs conflicts "
1009			    "with \"acls\" mount option; option ignored\n",
1010			    mp->mnt_stat.f_mntonname);
1011		mp->mnt_flag &= ~MNT_ACLS;
1012		mp->mnt_flag |= MNT_NFS4ACLS;
1013
1014		MNT_IUNLOCK(mp);
1015#else
1016		printf("WARNING: %s: NFSv4 ACLs flag on fs but no "
1017		    "ACLs support\n", mp->mnt_stat.f_mntonname);
1018#endif
1019	}
1020	if ((fs->fs_flags & FS_TRIM) != 0) {
1021		len = sizeof(int);
1022		if (g_io_getattr("GEOM::candelete", cp, &len,
1023		    &ump->um_candelete) == 0) {
1024			if (!ump->um_candelete)
1025				printf("WARNING: %s: TRIM flag on fs but disk "
1026				    "does not support TRIM\n",
1027				    mp->mnt_stat.f_mntonname);
1028		} else {
1029			printf("WARNING: %s: TRIM flag on fs but disk does "
1030			    "not confirm that it supports TRIM\n",
1031			    mp->mnt_stat.f_mntonname);
1032			ump->um_candelete = 0;
1033		}
1034		if (ump->um_candelete) {
1035			ump->um_trim_tq = taskqueue_create("trim", M_WAITOK,
1036			    taskqueue_thread_enqueue, &ump->um_trim_tq);
1037			taskqueue_start_threads(&ump->um_trim_tq, 1, PVFS,
1038			    "%s trim", mp->mnt_stat.f_mntonname);
1039		}
1040	}
1041
1042	ump->um_mountp = mp;
1043	ump->um_dev = dev;
1044	ump->um_devvp = devvp;
1045	ump->um_nindir = fs->fs_nindir;
1046	ump->um_bptrtodb = fs->fs_fsbtodb;
1047	ump->um_seqinc = fs->fs_frag;
1048	for (i = 0; i < MAXQUOTAS; i++)
1049		ump->um_quotas[i] = NULLVP;
1050#ifdef UFS_EXTATTR
1051	ufs_extattr_uepm_init(&ump->um_extattr);
1052#endif
1053	/*
1054	 * Set FS local "last mounted on" information (NULL pad)
1055	 */
1056	bzero(fs->fs_fsmnt, MAXMNTLEN);
1057	strlcpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MAXMNTLEN);
1058	mp->mnt_stat.f_iosize = fs->fs_bsize;
1059
1060	if (mp->mnt_flag & MNT_ROOTFS) {
1061		/*
1062		 * Root mount; update timestamp in mount structure.
1063		 * this will be used by the common root mount code
1064		 * to update the system clock.
1065		 */
1066		mp->mnt_time = fs->fs_time;
1067	}
1068
1069	if (ronly == 0) {
1070		fs->fs_mtime = time_second;
1071		if ((fs->fs_flags & FS_DOSOFTDEP) &&
1072		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
1073			free(fs->fs_csp, M_UFSMNT);
1074			ffs_flushfiles(mp, FORCECLOSE, td);
1075			goto out;
1076		}
1077		if (fs->fs_snapinum[0] != 0)
1078			ffs_snapshot_mount(mp);
1079		fs->fs_fmod = 1;
1080		fs->fs_clean = 0;
1081		(void) ffs_sbupdate(ump, MNT_WAIT, 0);
1082	}
1083	/*
1084	 * Initialize filesystem state information in mount struct.
1085	 */
1086	MNT_ILOCK(mp);
1087	mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED |
1088	    MNTK_NO_IOPF | MNTK_UNMAPPED_BUFS | MNTK_USES_BCACHE;
1089	MNT_IUNLOCK(mp);
1090#ifdef UFS_EXTATTR
1091#ifdef UFS_EXTATTR_AUTOSTART
1092	/*
1093	 *
1094	 * Auto-starting does the following:
1095	 *	- check for /.attribute in the fs, and extattr_start if so
1096	 *	- for each file in .attribute, enable that file with
1097	 * 	  an attribute of the same name.
1098	 * Not clear how to report errors -- probably eat them.
1099	 * This would all happen while the filesystem was busy/not
1100	 * available, so would effectively be "atomic".
1101	 */
1102	(void) ufs_extattr_autostart(mp, td);
1103#endif /* !UFS_EXTATTR_AUTOSTART */
1104#endif /* !UFS_EXTATTR */
1105	return (0);
1106out:
1107	if (bp)
1108		brelse(bp);
1109	if (cp != NULL) {
1110		g_topology_lock();
1111		g_vfs_close(cp);
1112		g_topology_unlock();
1113	}
1114	if (ump) {
1115		mtx_destroy(UFS_MTX(ump));
1116		if (mp->mnt_gjprovider != NULL) {
1117			free(mp->mnt_gjprovider, M_UFSMNT);
1118			mp->mnt_gjprovider = NULL;
1119		}
1120		free(ump->um_fs, M_UFSMNT);
1121		free(ump, M_UFSMNT);
1122		mp->mnt_data = NULL;
1123	}
1124	atomic_store_rel_ptr((uintptr_t *)&dev->si_mountpt, 0);
1125	dev_rel(dev);
1126	return (error);
1127}
1128
1129#include <sys/sysctl.h>
1130static int bigcgs = 0;
1131SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
1132
1133/*
1134 * Sanity checks for loading old filesystem superblocks.
1135 * See ffs_oldfscompat_write below for unwound actions.
1136 *
1137 * XXX - Parts get retired eventually.
1138 * Unfortunately new bits get added.
1139 */
1140static void
1141ffs_oldfscompat_read(fs, ump, sblockloc)
1142	struct fs *fs;
1143	struct ufsmount *ump;
1144	ufs2_daddr_t sblockloc;
1145{
1146	off_t maxfilesize;
1147
1148	/*
1149	 * If not yet done, update fs_flags location and value of fs_sblockloc.
1150	 */
1151	if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
1152		fs->fs_flags = fs->fs_old_flags;
1153		fs->fs_old_flags |= FS_FLAGS_UPDATED;
1154		fs->fs_sblockloc = sblockloc;
1155	}
1156	/*
1157	 * If not yet done, update UFS1 superblock with new wider fields.
1158	 */
1159	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) {
1160		fs->fs_maxbsize = fs->fs_bsize;
1161		fs->fs_time = fs->fs_old_time;
1162		fs->fs_size = fs->fs_old_size;
1163		fs->fs_dsize = fs->fs_old_dsize;
1164		fs->fs_csaddr = fs->fs_old_csaddr;
1165		fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
1166		fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
1167		fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
1168		fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
1169	}
1170	if (fs->fs_magic == FS_UFS1_MAGIC &&
1171	    fs->fs_old_inodefmt < FS_44INODEFMT) {
1172		fs->fs_maxfilesize = ((uint64_t)1 << 31) - 1;
1173		fs->fs_qbmask = ~fs->fs_bmask;
1174		fs->fs_qfmask = ~fs->fs_fmask;
1175	}
1176	if (fs->fs_magic == FS_UFS1_MAGIC) {
1177		ump->um_savedmaxfilesize = fs->fs_maxfilesize;
1178		maxfilesize = (uint64_t)0x80000000 * fs->fs_bsize - 1;
1179		if (fs->fs_maxfilesize > maxfilesize)
1180			fs->fs_maxfilesize = maxfilesize;
1181	}
1182	/* Compatibility for old filesystems */
1183	if (fs->fs_avgfilesize <= 0)
1184		fs->fs_avgfilesize = AVFILESIZ;
1185	if (fs->fs_avgfpdir <= 0)
1186		fs->fs_avgfpdir = AFPDIR;
1187	if (bigcgs) {
1188		fs->fs_save_cgsize = fs->fs_cgsize;
1189		fs->fs_cgsize = fs->fs_bsize;
1190	}
1191}
1192
1193/*
1194 * Unwinding superblock updates for old filesystems.
1195 * See ffs_oldfscompat_read above for details.
1196 *
1197 * XXX - Parts get retired eventually.
1198 * Unfortunately new bits get added.
1199 */
1200void
1201ffs_oldfscompat_write(fs, ump)
1202	struct fs *fs;
1203	struct ufsmount *ump;
1204{
1205
1206	/*
1207	 * Copy back UFS2 updated fields that UFS1 inspects.
1208	 */
1209	if (fs->fs_magic == FS_UFS1_MAGIC) {
1210		fs->fs_old_time = fs->fs_time;
1211		fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
1212		fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
1213		fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
1214		fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
1215		fs->fs_maxfilesize = ump->um_savedmaxfilesize;
1216	}
1217	if (bigcgs) {
1218		fs->fs_cgsize = fs->fs_save_cgsize;
1219		fs->fs_save_cgsize = 0;
1220	}
1221}
1222
1223/*
1224 * unmount system call
1225 */
1226static int
1227ffs_unmount(mp, mntflags)
1228	struct mount *mp;
1229	int mntflags;
1230{
1231	struct thread *td;
1232	struct ufsmount *ump = VFSTOUFS(mp);
1233	struct fs *fs;
1234	int error, flags, susp;
1235#ifdef UFS_EXTATTR
1236	int e_restart;
1237#endif
1238
1239	flags = 0;
1240	td = curthread;
1241	fs = ump->um_fs;
1242	susp = 0;
1243	if (mntflags & MNT_FORCE) {
1244		flags |= FORCECLOSE;
1245		susp = fs->fs_ronly == 0;
1246	}
1247#ifdef UFS_EXTATTR
1248	if ((error = ufs_extattr_stop(mp, td))) {
1249		if (error != EOPNOTSUPP)
1250			printf("WARNING: unmount %s: ufs_extattr_stop "
1251			    "returned errno %d\n", mp->mnt_stat.f_mntonname,
1252			    error);
1253		e_restart = 0;
1254	} else {
1255		ufs_extattr_uepm_destroy(&ump->um_extattr);
1256		e_restart = 1;
1257	}
1258#endif
1259	if (susp) {
1260		error = vfs_write_suspend_umnt(mp);
1261		if (error != 0)
1262			goto fail1;
1263	}
1264	if (MOUNTEDSOFTDEP(mp))
1265		error = softdep_flushfiles(mp, flags, td);
1266	else
1267		error = ffs_flushfiles(mp, flags, td);
1268	if (error != 0 && error != ENXIO)
1269		goto fail;
1270
1271	UFS_LOCK(ump);
1272	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
1273		printf("WARNING: unmount %s: pending error: blocks %jd "
1274		    "files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
1275		    fs->fs_pendinginodes);
1276		fs->fs_pendingblocks = 0;
1277		fs->fs_pendinginodes = 0;
1278	}
1279	UFS_UNLOCK(ump);
1280	if (MOUNTEDSOFTDEP(mp))
1281		softdep_unmount(mp);
1282	if (fs->fs_ronly == 0 || ump->um_fsckpid > 0) {
1283		fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
1284		error = ffs_sbupdate(ump, MNT_WAIT, 0);
1285		if (error && error != ENXIO) {
1286			fs->fs_clean = 0;
1287			goto fail;
1288		}
1289	}
1290	if (susp)
1291		vfs_write_resume(mp, VR_START_WRITE);
1292	if (ump->um_trim_tq != NULL) {
1293		while (ump->um_trim_inflight != 0)
1294			pause("ufsutr", hz);
1295		taskqueue_drain_all(ump->um_trim_tq);
1296		taskqueue_free(ump->um_trim_tq);
1297	}
1298	g_topology_lock();
1299	if (ump->um_fsckpid > 0) {
1300		/*
1301		 * Return to normal read-only mode.
1302		 */
1303		error = g_access(ump->um_cp, 0, -1, 0);
1304		ump->um_fsckpid = 0;
1305	}
1306	g_vfs_close(ump->um_cp);
1307	g_topology_unlock();
1308	atomic_store_rel_ptr((uintptr_t *)&ump->um_dev->si_mountpt, 0);
1309	vrele(ump->um_devvp);
1310	dev_rel(ump->um_dev);
1311	mtx_destroy(UFS_MTX(ump));
1312	if (mp->mnt_gjprovider != NULL) {
1313		free(mp->mnt_gjprovider, M_UFSMNT);
1314		mp->mnt_gjprovider = NULL;
1315	}
1316	free(fs->fs_csp, M_UFSMNT);
1317	free(fs, M_UFSMNT);
1318	free(ump, M_UFSMNT);
1319	mp->mnt_data = NULL;
1320	MNT_ILOCK(mp);
1321	mp->mnt_flag &= ~MNT_LOCAL;
1322	MNT_IUNLOCK(mp);
1323	if (td->td_su == mp) {
1324		td->td_su = NULL;
1325		vfs_rel(mp);
1326	}
1327	return (error);
1328
1329fail:
1330	if (susp)
1331		vfs_write_resume(mp, VR_START_WRITE);
1332fail1:
1333#ifdef UFS_EXTATTR
1334	if (e_restart) {
1335		ufs_extattr_uepm_init(&ump->um_extattr);
1336#ifdef UFS_EXTATTR_AUTOSTART
1337		(void) ufs_extattr_autostart(mp, td);
1338#endif
1339	}
1340#endif
1341
1342	return (error);
1343}
1344
1345/*
1346 * Flush out all the files in a filesystem.
1347 */
1348int
1349ffs_flushfiles(mp, flags, td)
1350	struct mount *mp;
1351	int flags;
1352	struct thread *td;
1353{
1354	struct ufsmount *ump;
1355	int qerror, error;
1356
1357	ump = VFSTOUFS(mp);
1358	qerror = 0;
1359#ifdef QUOTA
1360	if (mp->mnt_flag & MNT_QUOTA) {
1361		int i;
1362		error = vflush(mp, 0, SKIPSYSTEM|flags, td);
1363		if (error)
1364			return (error);
1365		for (i = 0; i < MAXQUOTAS; i++) {
1366			error = quotaoff(td, mp, i);
1367			if (error != 0) {
1368				if ((flags & EARLYFLUSH) == 0)
1369					return (error);
1370				else
1371					qerror = error;
1372			}
1373		}
1374
1375		/*
1376		 * Here we fall through to vflush again to ensure that
1377		 * we have gotten rid of all the system vnodes, unless
1378		 * quotas must not be closed.
1379		 */
1380	}
1381#endif
1382	ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles");
1383	if (ump->um_devvp->v_vflag & VV_COPYONWRITE) {
1384		if ((error = vflush(mp, 0, SKIPSYSTEM | flags, td)) != 0)
1385			return (error);
1386		ffs_snapshot_unmount(mp);
1387		flags |= FORCECLOSE;
1388		/*
1389		 * Here we fall through to vflush again to ensure
1390		 * that we have gotten rid of all the system vnodes.
1391		 */
1392	}
1393
1394	/*
1395	 * Do not close system files if quotas were not closed, to be
1396	 * able to sync the remaining dquots.  The freeblks softupdate
1397	 * workitems might hold a reference on a dquot, preventing
1398	 * quotaoff() from completing.  Next round of
1399	 * softdep_flushworklist() iteration should process the
1400	 * blockers, allowing the next run of quotaoff() to finally
1401	 * flush held dquots.
1402	 *
1403	 * Otherwise, flush all the files.
1404	 */
1405	if (qerror == 0 && (error = vflush(mp, 0, flags, td)) != 0)
1406		return (error);
1407
1408	/*
1409	 * Flush filesystem metadata.
1410	 */
1411	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1412	error = VOP_FSYNC(ump->um_devvp, MNT_WAIT, td);
1413	VOP_UNLOCK(ump->um_devvp, 0);
1414	return (error);
1415}
1416
1417/*
1418 * Get filesystem statistics.
1419 */
1420static int
1421ffs_statfs(mp, sbp)
1422	struct mount *mp;
1423	struct statfs *sbp;
1424{
1425	struct ufsmount *ump;
1426	struct fs *fs;
1427
1428	ump = VFSTOUFS(mp);
1429	fs = ump->um_fs;
1430	if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
1431		panic("ffs_statfs");
1432	sbp->f_version = STATFS_VERSION;
1433	sbp->f_bsize = fs->fs_fsize;
1434	sbp->f_iosize = fs->fs_bsize;
1435	sbp->f_blocks = fs->fs_dsize;
1436	UFS_LOCK(ump);
1437	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
1438	    fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1439	sbp->f_bavail = freespace(fs, fs->fs_minfree) +
1440	    dbtofsb(fs, fs->fs_pendingblocks);
1441	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
1442	sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1443	UFS_UNLOCK(ump);
1444	sbp->f_namemax = NAME_MAX;
1445	return (0);
1446}
1447
1448static bool
1449sync_doupdate(struct inode *ip)
1450{
1451
1452	return ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED |
1453	    IN_UPDATE)) != 0);
1454}
1455
1456/*
1457 * For a lazy sync, we only care about access times, quotas and the
1458 * superblock.  Other filesystem changes are already converted to
1459 * cylinder group blocks or inode blocks updates and are written to
1460 * disk by syncer.
1461 */
1462static int
1463ffs_sync_lazy(mp)
1464     struct mount *mp;
1465{
1466	struct vnode *mvp, *vp;
1467	struct inode *ip;
1468	struct thread *td;
1469	int allerror, error;
1470
1471	allerror = 0;
1472	td = curthread;
1473	if ((mp->mnt_flag & MNT_NOATIME) != 0)
1474		goto qupdate;
1475	MNT_VNODE_FOREACH_ACTIVE(vp, mp, mvp) {
1476		if (vp->v_type == VNON) {
1477			VI_UNLOCK(vp);
1478			continue;
1479		}
1480		ip = VTOI(vp);
1481
1482		/*
1483		 * The IN_ACCESS flag is converted to IN_MODIFIED by
1484		 * ufs_close() and ufs_getattr() by the calls to
1485		 * ufs_itimes_locked(), without subsequent UFS_UPDATE().
1486		 * Test also all the other timestamp flags too, to pick up
1487		 * any other cases that could be missed.
1488		 */
1489		if (!sync_doupdate(ip) && (vp->v_iflag & VI_OWEINACT) == 0) {
1490			VI_UNLOCK(vp);
1491			continue;
1492		}
1493		if ((error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK,
1494		    td)) != 0)
1495			continue;
1496		if (sync_doupdate(ip))
1497			error = ffs_update(vp, 0);
1498		if (error != 0)
1499			allerror = error;
1500		vput(vp);
1501	}
1502
1503qupdate:
1504#ifdef QUOTA
1505	qsync(mp);
1506#endif
1507
1508	if (VFSTOUFS(mp)->um_fs->fs_fmod != 0 &&
1509	    (error = ffs_sbupdate(VFSTOUFS(mp), MNT_LAZY, 0)) != 0)
1510		allerror = error;
1511	return (allerror);
1512}
1513
1514/*
1515 * Go through the disk queues to initiate sandbagged IO;
1516 * go through the inodes to write those that have been modified;
1517 * initiate the writing of the super block if it has been modified.
1518 *
1519 * Note: we are always called with the filesystem marked busy using
1520 * vfs_busy().
1521 */
1522static int
1523ffs_sync(mp, waitfor)
1524	struct mount *mp;
1525	int waitfor;
1526{
1527	struct vnode *mvp, *vp, *devvp;
1528	struct thread *td;
1529	struct inode *ip;
1530	struct ufsmount *ump = VFSTOUFS(mp);
1531	struct fs *fs;
1532	int error, count, lockreq, allerror = 0;
1533	int suspend;
1534	int suspended;
1535	int secondary_writes;
1536	int secondary_accwrites;
1537	int softdep_deps;
1538	int softdep_accdeps;
1539	struct bufobj *bo;
1540
1541	suspend = 0;
1542	suspended = 0;
1543	td = curthread;
1544	fs = ump->um_fs;
1545	if (fs->fs_fmod != 0 && fs->fs_ronly != 0 && ump->um_fsckpid == 0)
1546		panic("%s: ffs_sync: modification on read-only filesystem",
1547		    fs->fs_fsmnt);
1548	if (waitfor == MNT_LAZY) {
1549		if (!rebooting)
1550			return (ffs_sync_lazy(mp));
1551		waitfor = MNT_NOWAIT;
1552	}
1553
1554	/*
1555	 * Write back each (modified) inode.
1556	 */
1557	lockreq = LK_EXCLUSIVE | LK_NOWAIT;
1558	if (waitfor == MNT_SUSPEND) {
1559		suspend = 1;
1560		waitfor = MNT_WAIT;
1561	}
1562	if (waitfor == MNT_WAIT)
1563		lockreq = LK_EXCLUSIVE;
1564	lockreq |= LK_INTERLOCK | LK_SLEEPFAIL;
1565loop:
1566	/* Grab snapshot of secondary write counts */
1567	MNT_ILOCK(mp);
1568	secondary_writes = mp->mnt_secondary_writes;
1569	secondary_accwrites = mp->mnt_secondary_accwrites;
1570	MNT_IUNLOCK(mp);
1571
1572	/* Grab snapshot of softdep dependency counts */
1573	softdep_get_depcounts(mp, &softdep_deps, &softdep_accdeps);
1574
1575	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1576		/*
1577		 * Depend on the vnode interlock to keep things stable enough
1578		 * for a quick test.  Since there might be hundreds of
1579		 * thousands of vnodes, we cannot afford even a subroutine
1580		 * call unless there's a good chance that we have work to do.
1581		 */
1582		if (vp->v_type == VNON) {
1583			VI_UNLOCK(vp);
1584			continue;
1585		}
1586		ip = VTOI(vp);
1587		if ((ip->i_flag &
1588		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1589		    vp->v_bufobj.bo_dirty.bv_cnt == 0) {
1590			VI_UNLOCK(vp);
1591			continue;
1592		}
1593		if ((error = vget(vp, lockreq, td)) != 0) {
1594			if (error == ENOENT || error == ENOLCK) {
1595				MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1596				goto loop;
1597			}
1598			continue;
1599		}
1600		if ((error = ffs_syncvnode(vp, waitfor, 0)) != 0)
1601			allerror = error;
1602		vput(vp);
1603	}
1604	/*
1605	 * Force stale filesystem control information to be flushed.
1606	 */
1607	if (waitfor == MNT_WAIT || rebooting) {
1608		if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
1609			allerror = error;
1610		/* Flushed work items may create new vnodes to clean */
1611		if (allerror == 0 && count)
1612			goto loop;
1613	}
1614#ifdef QUOTA
1615	qsync(mp);
1616#endif
1617
1618	devvp = ump->um_devvp;
1619	bo = &devvp->v_bufobj;
1620	BO_LOCK(bo);
1621	if (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0) {
1622		BO_UNLOCK(bo);
1623		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
1624		error = VOP_FSYNC(devvp, waitfor, td);
1625		VOP_UNLOCK(devvp, 0);
1626		if (MOUNTEDSOFTDEP(mp) && (error == 0 || error == EAGAIN))
1627			error = ffs_sbupdate(ump, waitfor, 0);
1628		if (error != 0)
1629			allerror = error;
1630		if (allerror == 0 && waitfor == MNT_WAIT)
1631			goto loop;
1632	} else if (suspend != 0) {
1633		if (softdep_check_suspend(mp,
1634					  devvp,
1635					  softdep_deps,
1636					  softdep_accdeps,
1637					  secondary_writes,
1638					  secondary_accwrites) != 0) {
1639			MNT_IUNLOCK(mp);
1640			goto loop;	/* More work needed */
1641		}
1642		mtx_assert(MNT_MTX(mp), MA_OWNED);
1643		mp->mnt_kern_flag |= MNTK_SUSPEND2 | MNTK_SUSPENDED;
1644		MNT_IUNLOCK(mp);
1645		suspended = 1;
1646	} else
1647		BO_UNLOCK(bo);
1648	/*
1649	 * Write back modified superblock.
1650	 */
1651	if (fs->fs_fmod != 0 &&
1652	    (error = ffs_sbupdate(ump, waitfor, suspended)) != 0)
1653		allerror = error;
1654	return (allerror);
1655}
1656
1657int
1658ffs_vget(mp, ino, flags, vpp)
1659	struct mount *mp;
1660	ino_t ino;
1661	int flags;
1662	struct vnode **vpp;
1663{
1664	return (ffs_vgetf(mp, ino, flags, vpp, 0));
1665}
1666
1667int
1668ffs_vgetf(mp, ino, flags, vpp, ffs_flags)
1669	struct mount *mp;
1670	ino_t ino;
1671	int flags;
1672	struct vnode **vpp;
1673	int ffs_flags;
1674{
1675	struct fs *fs;
1676	struct inode *ip;
1677	struct ufsmount *ump;
1678	struct buf *bp;
1679	struct vnode *vp;
1680	int error;
1681
1682	error = vfs_hash_get(mp, ino, flags, curthread, vpp, NULL, NULL);
1683	if (error || *vpp != NULL)
1684		return (error);
1685
1686	/*
1687	 * We must promote to an exclusive lock for vnode creation.  This
1688	 * can happen if lookup is passed LOCKSHARED.
1689	 */
1690	if ((flags & LK_TYPE_MASK) == LK_SHARED) {
1691		flags &= ~LK_TYPE_MASK;
1692		flags |= LK_EXCLUSIVE;
1693	}
1694
1695	/*
1696	 * We do not lock vnode creation as it is believed to be too
1697	 * expensive for such rare case as simultaneous creation of vnode
1698	 * for same ino by different processes. We just allow them to race
1699	 * and check later to decide who wins. Let the race begin!
1700	 */
1701
1702	ump = VFSTOUFS(mp);
1703	fs = ump->um_fs;
1704	ip = uma_zalloc(uma_inode, M_WAITOK | M_ZERO);
1705
1706	/* Allocate a new vnode/inode. */
1707	error = getnewvnode("ufs", mp, fs->fs_magic == FS_UFS1_MAGIC ?
1708	    &ffs_vnodeops1 : &ffs_vnodeops2, &vp);
1709	if (error) {
1710		*vpp = NULL;
1711		uma_zfree(uma_inode, ip);
1712		return (error);
1713	}
1714	/*
1715	 * FFS supports recursive locking.
1716	 */
1717	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
1718	VN_LOCK_AREC(vp);
1719	vp->v_data = ip;
1720	vp->v_bufobj.bo_bsize = fs->fs_bsize;
1721	ip->i_vnode = vp;
1722	ip->i_ump = ump;
1723	ip->i_number = ino;
1724	ip->i_ea_refs = 0;
1725	ip->i_nextclustercg = -1;
1726	ip->i_flag = fs->fs_magic == FS_UFS1_MAGIC ? 0 : IN_UFS2;
1727#ifdef QUOTA
1728	{
1729		int i;
1730		for (i = 0; i < MAXQUOTAS; i++)
1731			ip->i_dquot[i] = NODQUOT;
1732	}
1733#endif
1734
1735	if (ffs_flags & FFSV_FORCEINSMQ)
1736		vp->v_vflag |= VV_FORCEINSMQ;
1737	error = insmntque(vp, mp);
1738	if (error != 0) {
1739		uma_zfree(uma_inode, ip);
1740		*vpp = NULL;
1741		return (error);
1742	}
1743	vp->v_vflag &= ~VV_FORCEINSMQ;
1744	error = vfs_hash_insert(vp, ino, flags, curthread, vpp, NULL, NULL);
1745	if (error || *vpp != NULL)
1746		return (error);
1747
1748	/* Read in the disk contents for the inode, copy into the inode. */
1749	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1750	    (int)fs->fs_bsize, NOCRED, &bp);
1751	if (error) {
1752		/*
1753		 * The inode does not contain anything useful, so it would
1754		 * be misleading to leave it on its hash chain. With mode
1755		 * still zero, it will be unlinked and returned to the free
1756		 * list by vput().
1757		 */
1758		brelse(bp);
1759		vput(vp);
1760		*vpp = NULL;
1761		return (error);
1762	}
1763	if (I_IS_UFS1(ip))
1764		ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK);
1765	else
1766		ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK);
1767	ffs_load_inode(bp, ip, fs, ino);
1768	if (DOINGSOFTDEP(vp))
1769		softdep_load_inodeblock(ip);
1770	else
1771		ip->i_effnlink = ip->i_nlink;
1772	bqrelse(bp);
1773
1774	/*
1775	 * Initialize the vnode from the inode, check for aliases.
1776	 * Note that the underlying vnode may have changed.
1777	 */
1778	error = ufs_vinit(mp, I_IS_UFS1(ip) ? &ffs_fifoops1 : &ffs_fifoops2,
1779	    &vp);
1780	if (error) {
1781		vput(vp);
1782		*vpp = NULL;
1783		return (error);
1784	}
1785
1786	/*
1787	 * Finish inode initialization.
1788	 */
1789	if (vp->v_type != VFIFO) {
1790		/* FFS supports shared locking for all files except fifos. */
1791		VN_LOCK_ASHARE(vp);
1792	}
1793
1794	/*
1795	 * Set up a generation number for this inode if it does not
1796	 * already have one. This should only happen on old filesystems.
1797	 */
1798	if (ip->i_gen == 0) {
1799		while (ip->i_gen == 0)
1800			ip->i_gen = arc4random();
1801		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
1802			ip->i_flag |= IN_MODIFIED;
1803			DIP_SET(ip, i_gen, ip->i_gen);
1804		}
1805	}
1806#ifdef MAC
1807	if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) {
1808		/*
1809		 * If this vnode is already allocated, and we're running
1810		 * multi-label, attempt to perform a label association
1811		 * from the extended attributes on the inode.
1812		 */
1813		error = mac_vnode_associate_extattr(mp, vp);
1814		if (error) {
1815			/* ufs_inactive will release ip->i_devvp ref. */
1816			vput(vp);
1817			*vpp = NULL;
1818			return (error);
1819		}
1820	}
1821#endif
1822
1823	*vpp = vp;
1824	return (0);
1825}
1826
1827/*
1828 * File handle to vnode
1829 *
1830 * Have to be really careful about stale file handles:
1831 * - check that the inode number is valid
1832 * - for UFS2 check that the inode number is initialized
1833 * - call ffs_vget() to get the locked inode
1834 * - check for an unallocated inode (i_mode == 0)
1835 * - check that the given client host has export rights and return
1836 *   those rights via. exflagsp and credanonp
1837 */
1838static int
1839ffs_fhtovp(mp, fhp, flags, vpp)
1840	struct mount *mp;
1841	struct fid *fhp;
1842	int flags;
1843	struct vnode **vpp;
1844{
1845	struct ufid *ufhp;
1846	struct ufsmount *ump;
1847	struct fs *fs;
1848	struct cg *cgp;
1849	struct buf *bp;
1850	ino_t ino;
1851	u_int cg;
1852	int error;
1853
1854	ufhp = (struct ufid *)fhp;
1855	ino = ufhp->ufid_ino;
1856	ump = VFSTOUFS(mp);
1857	fs = ump->um_fs;
1858	if (ino < ROOTINO || ino >= fs->fs_ncg * fs->fs_ipg)
1859		return (ESTALE);
1860	/*
1861	 * Need to check if inode is initialized because UFS2 does lazy
1862	 * initialization and nfs_fhtovp can offer arbitrary inode numbers.
1863	 */
1864	if (fs->fs_magic != FS_UFS2_MAGIC)
1865		return (ufs_fhtovp(mp, ufhp, flags, vpp));
1866	cg = ino_to_cg(fs, ino);
1867	error = bread(ump->um_devvp, fsbtodb(fs, cgtod(fs, cg)),
1868		(int)fs->fs_cgsize, NOCRED, &bp);
1869	if (error)
1870		return (error);
1871	cgp = (struct cg *)bp->b_data;
1872	if (!cg_chkmagic(cgp) || ino >= cg * fs->fs_ipg + cgp->cg_initediblk) {
1873		brelse(bp);
1874		return (ESTALE);
1875	}
1876	brelse(bp);
1877	return (ufs_fhtovp(mp, ufhp, flags, vpp));
1878}
1879
1880/*
1881 * Initialize the filesystem.
1882 */
1883static int
1884ffs_init(vfsp)
1885	struct vfsconf *vfsp;
1886{
1887
1888	ffs_susp_initialize();
1889	softdep_initialize();
1890	return (ufs_init(vfsp));
1891}
1892
1893/*
1894 * Undo the work of ffs_init().
1895 */
1896static int
1897ffs_uninit(vfsp)
1898	struct vfsconf *vfsp;
1899{
1900	int ret;
1901
1902	ret = ufs_uninit(vfsp);
1903	softdep_uninitialize();
1904	ffs_susp_uninitialize();
1905	return (ret);
1906}
1907
1908/*
1909 * Write a superblock and associated information back to disk.
1910 */
1911int
1912ffs_sbupdate(ump, waitfor, suspended)
1913	struct ufsmount *ump;
1914	int waitfor;
1915	int suspended;
1916{
1917	struct fs *fs = ump->um_fs;
1918	struct buf *sbbp;
1919	struct buf *bp;
1920	int blks;
1921	void *space;
1922	int i, size, error, allerror = 0;
1923
1924	if (fs->fs_ronly == 1 &&
1925	    (ump->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) !=
1926	    (MNT_RDONLY | MNT_UPDATE) && ump->um_fsckpid == 0)
1927		panic("ffs_sbupdate: write read-only filesystem");
1928	/*
1929	 * We use the superblock's buf to serialize calls to ffs_sbupdate().
1930	 */
1931	sbbp = getblk(ump->um_devvp, btodb(fs->fs_sblockloc),
1932	    (int)fs->fs_sbsize, 0, 0, 0);
1933	/*
1934	 * First write back the summary information.
1935	 */
1936	blks = howmany(fs->fs_cssize, fs->fs_fsize);
1937	space = fs->fs_csp;
1938	for (i = 0; i < blks; i += fs->fs_frag) {
1939		size = fs->fs_bsize;
1940		if (i + fs->fs_frag > blks)
1941			size = (blks - i) * fs->fs_fsize;
1942		bp = getblk(ump->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1943		    size, 0, 0, 0);
1944		bcopy(space, bp->b_data, (u_int)size);
1945		space = (char *)space + size;
1946		if (suspended)
1947			bp->b_flags |= B_VALIDSUSPWRT;
1948		if (waitfor != MNT_WAIT)
1949			bawrite(bp);
1950		else if ((error = bwrite(bp)) != 0)
1951			allerror = error;
1952	}
1953	/*
1954	 * Now write back the superblock itself. If any errors occurred
1955	 * up to this point, then fail so that the superblock avoids
1956	 * being written out as clean.
1957	 */
1958	if (allerror) {
1959		brelse(sbbp);
1960		return (allerror);
1961	}
1962	bp = sbbp;
1963	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 &&
1964	    (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
1965		printf("WARNING: %s: correcting fs_sblockloc from %jd to %d\n",
1966		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1);
1967		fs->fs_sblockloc = SBLOCK_UFS1;
1968	}
1969	if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 &&
1970	    (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
1971		printf("WARNING: %s: correcting fs_sblockloc from %jd to %d\n",
1972		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2);
1973		fs->fs_sblockloc = SBLOCK_UFS2;
1974	}
1975	fs->fs_fmod = 0;
1976	fs->fs_time = time_second;
1977	if (MOUNTEDSOFTDEP(ump->um_mountp))
1978		softdep_setup_sbupdate(ump, (struct fs *)bp->b_data, bp);
1979	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1980	ffs_oldfscompat_write((struct fs *)bp->b_data, ump);
1981	if (suspended)
1982		bp->b_flags |= B_VALIDSUSPWRT;
1983	if (waitfor != MNT_WAIT)
1984		bawrite(bp);
1985	else if ((error = bwrite(bp)) != 0)
1986		allerror = error;
1987	return (allerror);
1988}
1989
1990static int
1991ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
1992	int attrnamespace, const char *attrname)
1993{
1994
1995#ifdef UFS_EXTATTR
1996	return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace,
1997	    attrname));
1998#else
1999	return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace,
2000	    attrname));
2001#endif
2002}
2003
2004static void
2005ffs_ifree(struct ufsmount *ump, struct inode *ip)
2006{
2007
2008	if (ump->um_fstype == UFS1 && ip->i_din1 != NULL)
2009		uma_zfree(uma_ufs1, ip->i_din1);
2010	else if (ip->i_din2 != NULL)
2011		uma_zfree(uma_ufs2, ip->i_din2);
2012	uma_zfree(uma_inode, ip);
2013}
2014
2015static int dobkgrdwrite = 1;
2016SYSCTL_INT(_debug, OID_AUTO, dobkgrdwrite, CTLFLAG_RW, &dobkgrdwrite, 0,
2017    "Do background writes (honoring the BV_BKGRDWRITE flag)?");
2018
2019/*
2020 * Complete a background write started from bwrite.
2021 */
2022static void
2023ffs_backgroundwritedone(struct buf *bp)
2024{
2025	struct bufobj *bufobj;
2026	struct buf *origbp;
2027
2028	/*
2029	 * Find the original buffer that we are writing.
2030	 */
2031	bufobj = bp->b_bufobj;
2032	BO_LOCK(bufobj);
2033	if ((origbp = gbincore(bp->b_bufobj, bp->b_lblkno)) == NULL)
2034		panic("backgroundwritedone: lost buffer");
2035
2036	/*
2037	 * We should mark the cylinder group buffer origbp as
2038	 * dirty, to not loose the failed write.
2039	 */
2040	if ((bp->b_ioflags & BIO_ERROR) != 0)
2041		origbp->b_vflags |= BV_BKGRDERR;
2042	BO_UNLOCK(bufobj);
2043	/*
2044	 * Process dependencies then return any unfinished ones.
2045	 */
2046	if (!LIST_EMPTY(&bp->b_dep) && (bp->b_ioflags & BIO_ERROR) == 0)
2047		buf_complete(bp);
2048#ifdef SOFTUPDATES
2049	if (!LIST_EMPTY(&bp->b_dep))
2050		softdep_move_dependencies(bp, origbp);
2051#endif
2052	/*
2053	 * This buffer is marked B_NOCACHE so when it is released
2054	 * by biodone it will be tossed.
2055	 */
2056	bp->b_flags |= B_NOCACHE;
2057	bp->b_flags &= ~B_CACHE;
2058	pbrelvp(bp);
2059
2060	/*
2061	 * Prevent brelse() from trying to keep and re-dirtying bp on
2062	 * errors. It causes b_bufobj dereference in
2063	 * bdirty()/reassignbuf(), and b_bufobj was cleared in
2064	 * pbrelvp() above.
2065	 */
2066	if ((bp->b_ioflags & BIO_ERROR) != 0)
2067		bp->b_flags |= B_INVAL;
2068	bufdone(bp);
2069	BO_LOCK(bufobj);
2070	/*
2071	 * Clear the BV_BKGRDINPROG flag in the original buffer
2072	 * and awaken it if it is waiting for the write to complete.
2073	 * If BV_BKGRDINPROG is not set in the original buffer it must
2074	 * have been released and re-instantiated - which is not legal.
2075	 */
2076	KASSERT((origbp->b_vflags & BV_BKGRDINPROG),
2077	    ("backgroundwritedone: lost buffer2"));
2078	origbp->b_vflags &= ~BV_BKGRDINPROG;
2079	if (origbp->b_vflags & BV_BKGRDWAIT) {
2080		origbp->b_vflags &= ~BV_BKGRDWAIT;
2081		wakeup(&origbp->b_xflags);
2082	}
2083	BO_UNLOCK(bufobj);
2084}
2085
2086
2087/*
2088 * Write, release buffer on completion.  (Done by iodone
2089 * if async).  Do not bother writing anything if the buffer
2090 * is invalid.
2091 *
2092 * Note that we set B_CACHE here, indicating that buffer is
2093 * fully valid and thus cacheable.  This is true even of NFS
2094 * now so we set it generally.  This could be set either here
2095 * or in biodone() since the I/O is synchronous.  We put it
2096 * here.
2097 */
2098static int
2099ffs_bufwrite(struct buf *bp)
2100{
2101	struct buf *newbp;
2102
2103	CTR3(KTR_BUF, "bufwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
2104	if (bp->b_flags & B_INVAL) {
2105		brelse(bp);
2106		return (0);
2107	}
2108
2109	if (!BUF_ISLOCKED(bp))
2110		panic("bufwrite: buffer is not busy???");
2111	/*
2112	 * If a background write is already in progress, delay
2113	 * writing this block if it is asynchronous. Otherwise
2114	 * wait for the background write to complete.
2115	 */
2116	BO_LOCK(bp->b_bufobj);
2117	if (bp->b_vflags & BV_BKGRDINPROG) {
2118		if (bp->b_flags & B_ASYNC) {
2119			BO_UNLOCK(bp->b_bufobj);
2120			bdwrite(bp);
2121			return (0);
2122		}
2123		bp->b_vflags |= BV_BKGRDWAIT;
2124		msleep(&bp->b_xflags, BO_LOCKPTR(bp->b_bufobj), PRIBIO,
2125		    "bwrbg", 0);
2126		if (bp->b_vflags & BV_BKGRDINPROG)
2127			panic("bufwrite: still writing");
2128	}
2129	bp->b_vflags &= ~BV_BKGRDERR;
2130	BO_UNLOCK(bp->b_bufobj);
2131
2132	/*
2133	 * If this buffer is marked for background writing and we
2134	 * do not have to wait for it, make a copy and write the
2135	 * copy so as to leave this buffer ready for further use.
2136	 *
2137	 * This optimization eats a lot of memory.  If we have a page
2138	 * or buffer shortfall we can't do it.
2139	 */
2140	if (dobkgrdwrite && (bp->b_xflags & BX_BKGRDWRITE) &&
2141	    (bp->b_flags & B_ASYNC) &&
2142	    !vm_page_count_severe() &&
2143	    !buf_dirty_count_severe()) {
2144		KASSERT(bp->b_iodone == NULL,
2145		    ("bufwrite: needs chained iodone (%p)", bp->b_iodone));
2146
2147		/* get a new block */
2148		newbp = geteblk(bp->b_bufsize, GB_NOWAIT_BD);
2149		if (newbp == NULL)
2150			goto normal_write;
2151
2152		KASSERT(buf_mapped(bp), ("Unmapped cg"));
2153		memcpy(newbp->b_data, bp->b_data, bp->b_bufsize);
2154		BO_LOCK(bp->b_bufobj);
2155		bp->b_vflags |= BV_BKGRDINPROG;
2156		BO_UNLOCK(bp->b_bufobj);
2157		newbp->b_xflags |= BX_BKGRDMARKER;
2158		newbp->b_lblkno = bp->b_lblkno;
2159		newbp->b_blkno = bp->b_blkno;
2160		newbp->b_offset = bp->b_offset;
2161		newbp->b_iodone = ffs_backgroundwritedone;
2162		newbp->b_flags |= B_ASYNC;
2163		newbp->b_flags &= ~B_INVAL;
2164		pbgetvp(bp->b_vp, newbp);
2165
2166#ifdef SOFTUPDATES
2167		/*
2168		 * Move over the dependencies.  If there are rollbacks,
2169		 * leave the parent buffer dirtied as it will need to
2170		 * be written again.
2171		 */
2172		if (LIST_EMPTY(&bp->b_dep) ||
2173		    softdep_move_dependencies(bp, newbp) == 0)
2174			bundirty(bp);
2175#else
2176		bundirty(bp);
2177#endif
2178
2179		/*
2180		 * Initiate write on the copy, release the original.  The
2181		 * BKGRDINPROG flag prevents it from going away until
2182		 * the background write completes.
2183		 */
2184		bqrelse(bp);
2185		bp = newbp;
2186	} else
2187		/* Mark the buffer clean */
2188		bundirty(bp);
2189
2190
2191	/* Let the normal bufwrite do the rest for us */
2192normal_write:
2193	return (bufwrite(bp));
2194}
2195
2196
2197static void
2198ffs_geom_strategy(struct bufobj *bo, struct buf *bp)
2199{
2200	struct vnode *vp;
2201	int error;
2202	struct buf *tbp;
2203	int nocopy;
2204
2205	vp = bo->__bo_vnode;
2206	if (bp->b_iocmd == BIO_WRITE) {
2207		if ((bp->b_flags & B_VALIDSUSPWRT) == 0 &&
2208		    bp->b_vp != NULL && bp->b_vp->v_mount != NULL &&
2209		    (bp->b_vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0)
2210			panic("ffs_geom_strategy: bad I/O");
2211		nocopy = bp->b_flags & B_NOCOPY;
2212		bp->b_flags &= ~(B_VALIDSUSPWRT | B_NOCOPY);
2213		if ((vp->v_vflag & VV_COPYONWRITE) && nocopy == 0 &&
2214		    vp->v_rdev->si_snapdata != NULL) {
2215			if ((bp->b_flags & B_CLUSTER) != 0) {
2216				runningbufwakeup(bp);
2217				TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
2218					      b_cluster.cluster_entry) {
2219					error = ffs_copyonwrite(vp, tbp);
2220					if (error != 0 &&
2221					    error != EOPNOTSUPP) {
2222						bp->b_error = error;
2223						bp->b_ioflags |= BIO_ERROR;
2224						bufdone(bp);
2225						return;
2226					}
2227				}
2228				bp->b_runningbufspace = bp->b_bufsize;
2229				atomic_add_long(&runningbufspace,
2230					       bp->b_runningbufspace);
2231			} else {
2232				error = ffs_copyonwrite(vp, bp);
2233				if (error != 0 && error != EOPNOTSUPP) {
2234					bp->b_error = error;
2235					bp->b_ioflags |= BIO_ERROR;
2236					bufdone(bp);
2237					return;
2238				}
2239			}
2240		}
2241#ifdef SOFTUPDATES
2242		if ((bp->b_flags & B_CLUSTER) != 0) {
2243			TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
2244				      b_cluster.cluster_entry) {
2245				if (!LIST_EMPTY(&tbp->b_dep))
2246					buf_start(tbp);
2247			}
2248		} else {
2249			if (!LIST_EMPTY(&bp->b_dep))
2250				buf_start(bp);
2251		}
2252
2253#endif
2254	}
2255	g_vfs_strategy(bo, bp);
2256}
2257
2258int
2259ffs_own_mount(const struct mount *mp)
2260{
2261
2262	if (mp->mnt_op == &ufs_vfsops)
2263		return (1);
2264	return (0);
2265}
2266
2267#ifdef	DDB
2268#ifdef SOFTUPDATES
2269
2270/* defined in ffs_softdep.c */
2271extern void db_print_ffs(struct ufsmount *ump);
2272
2273DB_SHOW_COMMAND(ffs, db_show_ffs)
2274{
2275	struct mount *mp;
2276	struct ufsmount *ump;
2277
2278	if (have_addr) {
2279		ump = VFSTOUFS((struct mount *)addr);
2280		db_print_ffs(ump);
2281		return;
2282	}
2283
2284	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
2285		if (!strcmp(mp->mnt_stat.f_fstypename, ufs_vfsconf.vfc_name))
2286			db_print_ffs(VFSTOUFS(mp));
2287	}
2288}
2289
2290#endif	/* SOFTUPDATES */
2291#endif	/* DDB */
2292