1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1989, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed
8 * to Berkeley by John Heidemann of the UCLA Ficus project.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/bio.h>
38#include <sys/buf.h>
39#include <sys/conf.h>
40#include <sys/event.h>
41#include <sys/filio.h>
42#include <sys/kernel.h>
43#include <sys/limits.h>
44#include <sys/lock.h>
45#include <sys/lockf.h>
46#include <sys/malloc.h>
47#include <sys/mount.h>
48#include <sys/namei.h>
49#include <sys/rwlock.h>
50#include <sys/fcntl.h>
51#include <sys/unistd.h>
52#include <sys/vnode.h>
53#include <sys/dirent.h>
54#include <sys/poll.h>
55#include <sys/stat.h>
56#include <security/audit/audit.h>
57#include <sys/priv.h>
58
59#include <security/mac/mac_framework.h>
60
61#include <vm/vm.h>
62#include <vm/vm_object.h>
63#include <vm/vm_extern.h>
64#include <vm/pmap.h>
65#include <vm/vm_map.h>
66#include <vm/vm_page.h>
67#include <vm/vm_pager.h>
68#include <vm/vnode_pager.h>
69
70static int	vop_nolookup(struct vop_lookup_args *);
71static int	vop_norename(struct vop_rename_args *);
72static int	vop_nostrategy(struct vop_strategy_args *);
73static int	dirent_exists(struct vnode *vp, const char *dirname,
74			      struct thread *td);
75
76static int vop_stdis_text(struct vop_is_text_args *ap);
77static int vop_stdunset_text(struct vop_unset_text_args *ap);
78static int vop_stdadd_writecount(struct vop_add_writecount_args *ap);
79static int vop_stdcopy_file_range(struct vop_copy_file_range_args *ap);
80static int vop_stdfdatasync(struct vop_fdatasync_args *ap);
81static int vop_stdgetpages_async(struct vop_getpages_async_args *ap);
82static int vop_stdread_pgcache(struct vop_read_pgcache_args *ap);
83static int vop_stdstat(struct vop_stat_args *ap);
84static int vop_stdvput_pair(struct vop_vput_pair_args *ap);
85static int vop_stdgetlowvnode(struct vop_getlowvnode_args *ap);
86
87/*
88 * This vnode table stores what we want to do if the filesystem doesn't
89 * implement a particular VOP.
90 *
91 * If there is no specific entry here, we will return EOPNOTSUPP.
92 *
93 * Note that every filesystem has to implement either vop_access
94 * or vop_accessx; failing to do so will result in immediate crash
95 * due to stack overflow, as vop_stdaccess() calls vop_stdaccessx(),
96 * which calls vop_stdaccess() etc.
97 */
98
99struct vop_vector default_vnodeops = {
100	.vop_default =		NULL,
101	.vop_bypass =		VOP_EOPNOTSUPP,
102
103	.vop_access =		vop_stdaccess,
104	.vop_accessx =		vop_stdaccessx,
105	.vop_advise =		vop_stdadvise,
106	.vop_advlock =		vop_stdadvlock,
107	.vop_advlockasync =	vop_stdadvlockasync,
108	.vop_advlockpurge =	vop_stdadvlockpurge,
109	.vop_allocate =		vop_stdallocate,
110	.vop_deallocate =	vop_stddeallocate,
111	.vop_bmap =		vop_stdbmap,
112	.vop_close =		VOP_NULL,
113	.vop_fsync =		VOP_NULL,
114	.vop_stat =		vop_stdstat,
115	.vop_fdatasync =	vop_stdfdatasync,
116	.vop_getlowvnode = 	vop_stdgetlowvnode,
117	.vop_getpages =		vop_stdgetpages,
118	.vop_getpages_async =	vop_stdgetpages_async,
119	.vop_getwritemount = 	vop_stdgetwritemount,
120	.vop_inactive =		VOP_NULL,
121	.vop_need_inactive =	vop_stdneed_inactive,
122	.vop_ioctl =		vop_stdioctl,
123	.vop_kqfilter =		vop_stdkqfilter,
124	.vop_islocked =		vop_stdislocked,
125	.vop_lock1 =		vop_stdlock,
126	.vop_lookup =		vop_nolookup,
127	.vop_open =		VOP_NULL,
128	.vop_pathconf =		VOP_EINVAL,
129	.vop_poll =		vop_nopoll,
130	.vop_putpages =		vop_stdputpages,
131	.vop_readlink =		VOP_EINVAL,
132	.vop_read_pgcache =	vop_stdread_pgcache,
133	.vop_rename =		vop_norename,
134	.vop_revoke =		VOP_PANIC,
135	.vop_strategy =		vop_nostrategy,
136	.vop_unlock =		vop_stdunlock,
137	.vop_vptocnp =		vop_stdvptocnp,
138	.vop_vptofh =		vop_stdvptofh,
139	.vop_unp_bind =		vop_stdunp_bind,
140	.vop_unp_connect =	vop_stdunp_connect,
141	.vop_unp_detach =	vop_stdunp_detach,
142	.vop_is_text =		vop_stdis_text,
143	.vop_set_text =		vop_stdset_text,
144	.vop_unset_text =	vop_stdunset_text,
145	.vop_add_writecount =	vop_stdadd_writecount,
146	.vop_copy_file_range =	vop_stdcopy_file_range,
147	.vop_vput_pair =	vop_stdvput_pair,
148};
149VFS_VOP_VECTOR_REGISTER(default_vnodeops);
150
151/*
152 * Series of placeholder functions for various error returns for
153 * VOPs.
154 */
155
156int
157vop_eopnotsupp(struct vop_generic_args *ap)
158{
159	/*
160	printf("vop_notsupp[%s]\n", ap->a_desc->vdesc_name);
161	*/
162
163	return (EOPNOTSUPP);
164}
165
166int
167vop_ebadf(struct vop_generic_args *ap)
168{
169
170	return (EBADF);
171}
172
173int
174vop_enotty(struct vop_generic_args *ap)
175{
176
177	return (ENOTTY);
178}
179
180int
181vop_einval(struct vop_generic_args *ap)
182{
183
184	return (EINVAL);
185}
186
187int
188vop_enoent(struct vop_generic_args *ap)
189{
190
191	return (ENOENT);
192}
193
194int
195vop_eagain(struct vop_generic_args *ap)
196{
197
198	return (EAGAIN);
199}
200
201int
202vop_null(struct vop_generic_args *ap)
203{
204
205	return (0);
206}
207
208/*
209 * Helper function to panic on some bad VOPs in some filesystems.
210 */
211int
212vop_panic(struct vop_generic_args *ap)
213{
214
215	panic("filesystem goof: vop_panic[%s]", ap->a_desc->vdesc_name);
216}
217
218/*
219 * vop_std<something> and vop_no<something> are default functions for use by
220 * filesystems that need the "default reasonable" implementation for a
221 * particular operation.
222 *
223 * The documentation for the operations they implement exists (if it exists)
224 * in the VOP_<SOMETHING>(9) manpage (all uppercase).
225 */
226
227/*
228 * Default vop for filesystems that do not support name lookup
229 */
230static int
231vop_nolookup(struct vop_lookup_args *ap)
232{
233
234	*ap->a_vpp = NULL;
235	return (ENOTDIR);
236}
237
238/*
239 * vop_norename:
240 *
241 * Handle unlock and reference counting for arguments of vop_rename
242 * for filesystems that do not implement rename operation.
243 */
244static int
245vop_norename(struct vop_rename_args *ap)
246{
247
248	vop_rename_fail(ap);
249	return (EOPNOTSUPP);
250}
251
252/*
253 *	vop_nostrategy:
254 *
255 *	Strategy routine for VFS devices that have none.
256 *
257 *	BIO_ERROR and B_INVAL must be cleared prior to calling any strategy
258 *	routine.  Typically this is done for a BIO_READ strategy call.
259 *	Typically B_INVAL is assumed to already be clear prior to a write
260 *	and should not be cleared manually unless you just made the buffer
261 *	invalid.  BIO_ERROR should be cleared either way.
262 */
263
264static int
265vop_nostrategy (struct vop_strategy_args *ap)
266{
267	printf("No strategy for buffer at %p\n", ap->a_bp);
268	vn_printf(ap->a_vp, "vnode ");
269	ap->a_bp->b_ioflags |= BIO_ERROR;
270	ap->a_bp->b_error = EOPNOTSUPP;
271	bufdone(ap->a_bp);
272	return (EOPNOTSUPP);
273}
274
275/*
276 * Check if a named file exists in a given directory vnode
277 *
278 * Returns 0 if the file exists, ENOENT if it doesn't, or errors returned by
279 * vn_dir_next_dirent().
280 */
281static int
282dirent_exists(struct vnode *vp, const char *dirname, struct thread *td)
283{
284	char *dirbuf;
285	int error, eofflag;
286	size_t dirbuflen, len;
287	off_t off;
288	struct dirent *dp;
289	struct vattr va;
290
291	ASSERT_VOP_LOCKED(vp, "vnode not locked");
292	KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp));
293
294	error = VOP_GETATTR(vp, &va, td->td_ucred);
295	if (error != 0)
296		return (error);
297
298	dirbuflen = MAX(DEV_BSIZE, GENERIC_MAXDIRSIZ);
299	if (dirbuflen < va.va_blocksize)
300		dirbuflen = va.va_blocksize;
301	dirbuf = malloc(dirbuflen, M_TEMP, M_WAITOK);
302
303	len = 0;
304	off = 0;
305	eofflag = 0;
306
307	for (;;) {
308		error = vn_dir_next_dirent(vp, td, dirbuf, dirbuflen,
309		    &dp, &len, &off, &eofflag);
310		if (error != 0)
311			goto out;
312
313		if (len == 0)
314			break;
315
316		if (dp->d_type != DT_WHT && dp->d_fileno != 0 &&
317		    strcmp(dp->d_name, dirname) == 0)
318			goto out;
319	}
320
321	error = ENOENT;
322
323out:
324	free(dirbuf, M_TEMP);
325	return (error);
326}
327
328int
329vop_stdaccess(struct vop_access_args *ap)
330{
331
332	KASSERT((ap->a_accmode & ~(VEXEC | VWRITE | VREAD | VADMIN |
333	    VAPPEND)) == 0, ("invalid bit in accmode"));
334
335	return (VOP_ACCESSX(ap->a_vp, ap->a_accmode, ap->a_cred, ap->a_td));
336}
337
338int
339vop_stdaccessx(struct vop_accessx_args *ap)
340{
341	int error;
342	accmode_t accmode = ap->a_accmode;
343
344	error = vfs_unixify_accmode(&accmode);
345	if (error != 0)
346		return (error);
347
348	if (accmode == 0)
349		return (0);
350
351	return (VOP_ACCESS(ap->a_vp, accmode, ap->a_cred, ap->a_td));
352}
353
354/*
355 * Advisory record locking support
356 */
357int
358vop_stdadvlock(struct vop_advlock_args *ap)
359{
360	struct vnode *vp;
361	struct mount *mp;
362	struct vattr vattr;
363	int error;
364
365	vp = ap->a_vp;
366
367	/*
368	 * Provide atomicity of open(O_CREAT | O_EXCL | O_EXLOCK) for
369	 * local filesystems.  See vn_open_cred() for reciprocal part.
370	 */
371	mp = vp->v_mount;
372	if (mp != NULL && (mp->mnt_flag & MNT_LOCAL) != 0 &&
373	    ap->a_op == F_SETLK && (ap->a_flags & F_FIRSTOPEN) == 0) {
374		VI_LOCK(vp);
375		while ((vp->v_iflag & VI_FOPENING) != 0)
376			msleep(vp, VI_MTX(vp), PLOCK, "lockfo", 0);
377		VI_UNLOCK(vp);
378	}
379
380	if (ap->a_fl->l_whence == SEEK_END) {
381		/*
382		 * The NFSv4 server must avoid doing a vn_lock() here, since it
383		 * can deadlock the nfsd threads, due to a LOR.  Fortunately
384		 * the NFSv4 server always uses SEEK_SET and this code is
385		 * only required for the SEEK_END case.
386		 */
387		vn_lock(vp, LK_SHARED | LK_RETRY);
388		error = VOP_GETATTR(vp, &vattr, curthread->td_ucred);
389		VOP_UNLOCK(vp);
390		if (error)
391			return (error);
392	} else
393		vattr.va_size = 0;
394
395	return (lf_advlock(ap, &(vp->v_lockf), vattr.va_size));
396}
397
398int
399vop_stdadvlockasync(struct vop_advlockasync_args *ap)
400{
401	struct vnode *vp;
402	struct vattr vattr;
403	int error;
404
405	vp = ap->a_vp;
406	if (ap->a_fl->l_whence == SEEK_END) {
407		/* The size argument is only needed for SEEK_END. */
408		vn_lock(vp, LK_SHARED | LK_RETRY);
409		error = VOP_GETATTR(vp, &vattr, curthread->td_ucred);
410		VOP_UNLOCK(vp);
411		if (error)
412			return (error);
413	} else
414		vattr.va_size = 0;
415
416	return (lf_advlockasync(ap, &(vp->v_lockf), vattr.va_size));
417}
418
419int
420vop_stdadvlockpurge(struct vop_advlockpurge_args *ap)
421{
422	struct vnode *vp;
423
424	vp = ap->a_vp;
425	lf_purgelocks(vp, &vp->v_lockf);
426	return (0);
427}
428
429/*
430 * vop_stdpathconf:
431 *
432 * Standard implementation of POSIX pathconf, to get information about limits
433 * for a filesystem.
434 * Override per filesystem for the case where the filesystem has smaller
435 * limits.
436 */
437int
438vop_stdpathconf(struct vop_pathconf_args *ap)
439{
440
441	switch (ap->a_name) {
442		case _PC_ASYNC_IO:
443			*ap->a_retval = _POSIX_ASYNCHRONOUS_IO;
444			return (0);
445		case _PC_PATH_MAX:
446			*ap->a_retval = PATH_MAX;
447			return (0);
448		case _PC_ACL_EXTENDED:
449		case _PC_ACL_NFS4:
450		case _PC_CAP_PRESENT:
451		case _PC_DEALLOC_PRESENT:
452		case _PC_INF_PRESENT:
453		case _PC_MAC_PRESENT:
454			*ap->a_retval = 0;
455			return (0);
456		default:
457			return (EINVAL);
458	}
459	/* NOTREACHED */
460}
461
462/*
463 * Standard lock, unlock and islocked functions.
464 */
465int
466vop_stdlock(struct vop_lock1_args *ap)
467{
468	struct vnode *vp = ap->a_vp;
469	struct mtx *ilk;
470
471	ilk = VI_MTX(vp);
472	return (lockmgr_lock_flags(vp->v_vnlock, ap->a_flags,
473	    &ilk->lock_object, ap->a_file, ap->a_line));
474}
475
476/* See above. */
477int
478vop_stdunlock(struct vop_unlock_args *ap)
479{
480	struct vnode *vp = ap->a_vp;
481
482	return (lockmgr_unlock(vp->v_vnlock));
483}
484
485/* See above. */
486int
487vop_stdislocked(struct vop_islocked_args *ap)
488{
489
490	return (lockstatus(ap->a_vp->v_vnlock));
491}
492
493/*
494 * Variants of the above set.
495 *
496 * Differences are:
497 * - shared locking disablement is not supported
498 * - v_vnlock pointer is not honored
499 */
500int
501vop_lock(struct vop_lock1_args *ap)
502{
503	struct vnode *vp = ap->a_vp;
504	int flags = ap->a_flags;
505	struct mtx *ilk;
506
507	MPASS(vp->v_vnlock == &vp->v_lock);
508
509	if (__predict_false((flags & ~(LK_TYPE_MASK | LK_NODDLKTREAT | LK_RETRY)) != 0))
510		goto other;
511
512	switch (flags & LK_TYPE_MASK) {
513	case LK_SHARED:
514		return (lockmgr_slock(&vp->v_lock, flags, ap->a_file, ap->a_line));
515	case LK_EXCLUSIVE:
516		return (lockmgr_xlock(&vp->v_lock, flags, ap->a_file, ap->a_line));
517	}
518other:
519	ilk = VI_MTX(vp);
520	return (lockmgr_lock_flags(&vp->v_lock, flags,
521	    &ilk->lock_object, ap->a_file, ap->a_line));
522}
523
524int
525vop_unlock(struct vop_unlock_args *ap)
526{
527	struct vnode *vp = ap->a_vp;
528
529	MPASS(vp->v_vnlock == &vp->v_lock);
530
531	return (lockmgr_unlock(&vp->v_lock));
532}
533
534int
535vop_islocked(struct vop_islocked_args *ap)
536{
537	struct vnode *vp = ap->a_vp;
538
539	MPASS(vp->v_vnlock == &vp->v_lock);
540
541	return (lockstatus(&vp->v_lock));
542}
543
544/*
545 * Return true for select/poll.
546 */
547int
548vop_nopoll(struct vop_poll_args *ap)
549{
550
551	if (ap->a_events & ~POLLSTANDARD)
552		return (POLLNVAL);
553	return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
554}
555
556/*
557 * Implement poll for local filesystems that support it.
558 */
559int
560vop_stdpoll(struct vop_poll_args *ap)
561{
562	if (ap->a_events & ~POLLSTANDARD)
563		return (vn_pollrecord(ap->a_vp, ap->a_td, ap->a_events));
564	return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
565}
566
567/*
568 * Return our mount point, as we will take charge of the writes.
569 */
570int
571vop_stdgetwritemount(struct vop_getwritemount_args *ap)
572{
573	struct mount *mp;
574	struct vnode *vp;
575
576	/*
577	 * Note that having a reference does not prevent forced unmount from
578	 * setting ->v_mount to NULL after the lock gets released. This is of
579	 * no consequence for typical consumers (most notably vn_start_write)
580	 * since in this case the vnode is VIRF_DOOMED. Unmount might have
581	 * progressed far enough that its completion is only delayed by the
582	 * reference obtained here. The consumer only needs to concern itself
583	 * with releasing it.
584	 */
585	vp = ap->a_vp;
586	mp = vfs_ref_from_vp(vp);
587	*(ap->a_mpp) = mp;
588	return (0);
589}
590
591/*
592 * If the file system doesn't implement VOP_BMAP, then return sensible defaults:
593 * - Return the vnode's bufobj instead of any underlying device's bufobj
594 * - Calculate the physical block number as if there were equal size
595 *   consecutive blocks, but
596 * - Report no contiguous runs of blocks.
597 */
598int
599vop_stdbmap(struct vop_bmap_args *ap)
600{
601
602	if (ap->a_bop != NULL)
603		*ap->a_bop = &ap->a_vp->v_bufobj;
604	if (ap->a_bnp != NULL)
605		*ap->a_bnp = ap->a_bn * btodb(ap->a_vp->v_mount->mnt_stat.f_iosize);
606	if (ap->a_runp != NULL)
607		*ap->a_runp = 0;
608	if (ap->a_runb != NULL)
609		*ap->a_runb = 0;
610	return (0);
611}
612
613int
614vop_stdfsync(struct vop_fsync_args *ap)
615{
616
617	return (vn_fsync_buf(ap->a_vp, ap->a_waitfor));
618}
619
620static int
621vop_stdfdatasync(struct vop_fdatasync_args *ap)
622{
623
624	return (VOP_FSYNC(ap->a_vp, MNT_WAIT, ap->a_td));
625}
626
627int
628vop_stdfdatasync_buf(struct vop_fdatasync_args *ap)
629{
630
631	return (vn_fsync_buf(ap->a_vp, MNT_WAIT));
632}
633
634/* XXX Needs good comment and more info in the manpage (VOP_GETPAGES(9)). */
635int
636vop_stdgetpages(struct vop_getpages_args *ap)
637{
638
639	return vnode_pager_generic_getpages(ap->a_vp, ap->a_m,
640	    ap->a_count, ap->a_rbehind, ap->a_rahead, NULL, NULL);
641}
642
643static int
644vop_stdgetpages_async(struct vop_getpages_async_args *ap)
645{
646	int error;
647
648	error = VOP_GETPAGES(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind,
649	    ap->a_rahead);
650	if (ap->a_iodone != NULL)
651		ap->a_iodone(ap->a_arg, ap->a_m, ap->a_count, error);
652	return (error);
653}
654
655int
656vop_stdkqfilter(struct vop_kqfilter_args *ap)
657{
658	return vfs_kqfilter(ap);
659}
660
661/* XXX Needs good comment and more info in the manpage (VOP_PUTPAGES(9)). */
662int
663vop_stdputpages(struct vop_putpages_args *ap)
664{
665
666	return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
667	     ap->a_sync, ap->a_rtvals);
668}
669
670int
671vop_stdvptofh(struct vop_vptofh_args *ap)
672{
673	return (EOPNOTSUPP);
674}
675
676int
677vop_stdvptocnp(struct vop_vptocnp_args *ap)
678{
679	struct vnode *const vp = ap->a_vp;
680	struct vnode **const dvp = ap->a_vpp;
681	char *buf = ap->a_buf;
682	size_t *buflen = ap->a_buflen;
683	char *dirbuf;
684	int i = *buflen;
685	int error = 0, covered = 0;
686	int eofflag, flags, locked;
687	size_t dirbuflen, len;
688	off_t off;
689	ino_t fileno;
690	struct vattr va;
691	struct nameidata nd;
692	struct thread *const td = curthread;
693	struct ucred *const cred = td->td_ucred;
694	struct dirent *dp;
695	struct vnode *mvp;
696
697	if (vp->v_type != VDIR)
698		return (ENOENT);
699
700	error = VOP_GETATTR(vp, &va, cred);
701	if (error)
702		return (error);
703
704	VREF(vp);
705	locked = VOP_ISLOCKED(vp);
706	VOP_UNLOCK(vp);
707	NDINIT_ATVP(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE,
708	    "..", vp);
709	flags = FREAD;
710	error = vn_open_cred(&nd, &flags, 0, VN_OPEN_NOAUDIT, cred, NULL);
711	if (error) {
712		vn_lock(vp, locked | LK_RETRY);
713		return (error);
714	}
715	NDFREE_PNBUF(&nd);
716
717	mvp = *dvp = nd.ni_vp;
718
719	if (vp->v_mount != (*dvp)->v_mount &&
720	    ((*dvp)->v_vflag & VV_ROOT) &&
721	    ((*dvp)->v_mount->mnt_flag & MNT_UNION)) {
722		*dvp = (*dvp)->v_mount->mnt_vnodecovered;
723		VREF(mvp);
724		VOP_UNLOCK(mvp);
725		vn_close(mvp, FREAD, cred, td);
726		VREF(*dvp);
727		vn_lock(*dvp, LK_SHARED | LK_RETRY);
728		covered = 1;
729	}
730
731	fileno = va.va_fileid;
732
733	dirbuflen = MAX(DEV_BSIZE, GENERIC_MAXDIRSIZ);
734	if (dirbuflen < va.va_blocksize)
735		dirbuflen = va.va_blocksize;
736	dirbuf = malloc(dirbuflen, M_TEMP, M_WAITOK);
737
738	if ((*dvp)->v_type != VDIR) {
739		error = ENOENT;
740		goto out;
741	}
742
743	len = 0;
744	off = 0;
745	eofflag = 0;
746
747	for (;;) {
748		/* call VOP_READDIR of parent */
749		error = vn_dir_next_dirent(*dvp, td,
750		    dirbuf, dirbuflen, &dp, &len, &off, &eofflag);
751		if (error != 0)
752			goto out;
753
754		if (len == 0) {
755			error = ENOENT;
756			goto out;
757		}
758
759		if ((dp->d_type != DT_WHT) &&
760		    (dp->d_fileno == fileno)) {
761			if (covered) {
762				VOP_UNLOCK(*dvp);
763				vn_lock(mvp, LK_SHARED | LK_RETRY);
764				if (dirent_exists(mvp, dp->d_name, td) == 0) {
765					error = ENOENT;
766					VOP_UNLOCK(mvp);
767					vn_lock(*dvp, LK_SHARED | LK_RETRY);
768					goto out;
769				}
770				VOP_UNLOCK(mvp);
771				vn_lock(*dvp, LK_SHARED | LK_RETRY);
772			}
773			i -= dp->d_namlen;
774
775			if (i < 0) {
776				error = ENOMEM;
777				goto out;
778			}
779			if (dp->d_namlen == 1 && dp->d_name[0] == '.') {
780				error = ENOENT;
781			} else {
782				bcopy(dp->d_name, buf + i, dp->d_namlen);
783				error = 0;
784			}
785			goto out;
786		}
787	}
788
789out:
790	free(dirbuf, M_TEMP);
791	if (!error) {
792		*buflen = i;
793		vref(*dvp);
794	}
795	if (covered) {
796		vput(*dvp);
797		vrele(mvp);
798	} else {
799		VOP_UNLOCK(mvp);
800		vn_close(mvp, FREAD, cred, td);
801	}
802	vn_lock(vp, locked | LK_RETRY);
803	return (error);
804}
805
806int
807vop_stdallocate(struct vop_allocate_args *ap)
808{
809#ifdef __notyet__
810	struct statfs *sfs;
811	off_t maxfilesize = 0;
812#endif
813	struct iovec aiov;
814	struct vattr vattr, *vap;
815	struct uio auio;
816	off_t fsize, len, cur, offset;
817	uint8_t *buf;
818	struct thread *td;
819	struct vnode *vp;
820	size_t iosize;
821	int error;
822
823	buf = NULL;
824	error = 0;
825	td = curthread;
826	vap = &vattr;
827	vp = ap->a_vp;
828	len = *ap->a_len;
829	offset = *ap->a_offset;
830
831	error = VOP_GETATTR(vp, vap, ap->a_cred);
832	if (error != 0)
833		goto out;
834	fsize = vap->va_size;
835	iosize = vap->va_blocksize;
836	if (iosize == 0)
837		iosize = BLKDEV_IOSIZE;
838	if (iosize > maxphys)
839		iosize = maxphys;
840	buf = malloc(iosize, M_TEMP, M_WAITOK);
841
842#ifdef __notyet__
843	/*
844	 * Check if the filesystem sets f_maxfilesize; if not use
845	 * VOP_SETATTR to perform the check.
846	 */
847	sfs = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
848	error = VFS_STATFS(vp->v_mount, sfs, td);
849	if (error == 0)
850		maxfilesize = sfs->f_maxfilesize;
851	free(sfs, M_STATFS);
852	if (error != 0)
853		goto out;
854	if (maxfilesize) {
855		if (offset > maxfilesize || len > maxfilesize ||
856		    offset + len > maxfilesize) {
857			error = EFBIG;
858			goto out;
859		}
860	} else
861#endif
862	if (offset + len > vap->va_size) {
863		/*
864		 * Test offset + len against the filesystem's maxfilesize.
865		 */
866		VATTR_NULL(vap);
867		vap->va_size = offset + len;
868		error = VOP_SETATTR(vp, vap, ap->a_cred);
869		if (error != 0)
870			goto out;
871		VATTR_NULL(vap);
872		vap->va_size = fsize;
873		error = VOP_SETATTR(vp, vap, ap->a_cred);
874		if (error != 0)
875			goto out;
876	}
877
878	for (;;) {
879		/*
880		 * Read and write back anything below the nominal file
881		 * size.  There's currently no way outside the filesystem
882		 * to know whether this area is sparse or not.
883		 */
884		cur = iosize;
885		if ((offset % iosize) != 0)
886			cur -= (offset % iosize);
887		if (cur > len)
888			cur = len;
889		if (offset < fsize) {
890			aiov.iov_base = buf;
891			aiov.iov_len = cur;
892			auio.uio_iov = &aiov;
893			auio.uio_iovcnt = 1;
894			auio.uio_offset = offset;
895			auio.uio_resid = cur;
896			auio.uio_segflg = UIO_SYSSPACE;
897			auio.uio_rw = UIO_READ;
898			auio.uio_td = td;
899			error = VOP_READ(vp, &auio, ap->a_ioflag, ap->a_cred);
900			if (error != 0)
901				break;
902			if (auio.uio_resid > 0) {
903				bzero(buf + cur - auio.uio_resid,
904				    auio.uio_resid);
905			}
906		} else {
907			bzero(buf, cur);
908		}
909
910		aiov.iov_base = buf;
911		aiov.iov_len = cur;
912		auio.uio_iov = &aiov;
913		auio.uio_iovcnt = 1;
914		auio.uio_offset = offset;
915		auio.uio_resid = cur;
916		auio.uio_segflg = UIO_SYSSPACE;
917		auio.uio_rw = UIO_WRITE;
918		auio.uio_td = td;
919
920		error = VOP_WRITE(vp, &auio, ap->a_ioflag, ap->a_cred);
921		if (error != 0)
922			break;
923
924		len -= cur;
925		offset += cur;
926		if (len == 0)
927			break;
928		if (should_yield())
929			break;
930	}
931
932 out:
933	*ap->a_len = len;
934	*ap->a_offset = offset;
935	free(buf, M_TEMP);
936	return (error);
937}
938
939static int
940vp_zerofill(struct vnode *vp, struct vattr *vap, off_t *offsetp, off_t *lenp,
941    int ioflag, struct ucred *cred)
942{
943	int iosize;
944	int error = 0;
945	struct iovec aiov;
946	struct uio auio;
947	struct thread *td;
948	off_t offset, len;
949
950	iosize = vap->va_blocksize;
951	td = curthread;
952	offset = *offsetp;
953	len = *lenp;
954
955	if (iosize == 0)
956		iosize = BLKDEV_IOSIZE;
957	/* If va_blocksize is 512 bytes, iosize will be 4 kilobytes */
958	iosize = min(iosize * 8, ZERO_REGION_SIZE);
959
960	while (len > 0) {
961		int xfersize = iosize;
962		if (offset % iosize != 0)
963			xfersize -= offset % iosize;
964		if (xfersize > len)
965			xfersize = len;
966
967		aiov.iov_base = __DECONST(void *, zero_region);
968		aiov.iov_len = xfersize;
969		auio.uio_iov = &aiov;
970		auio.uio_iovcnt = 1;
971		auio.uio_offset = offset;
972		auio.uio_resid = xfersize;
973		auio.uio_segflg = UIO_SYSSPACE;
974		auio.uio_rw = UIO_WRITE;
975		auio.uio_td = td;
976
977		error = VOP_WRITE(vp, &auio, ioflag, cred);
978		if (error != 0) {
979			len -= xfersize - auio.uio_resid;
980			offset += xfersize - auio.uio_resid;
981			break;
982		}
983
984		len -= xfersize;
985		offset += xfersize;
986	}
987
988	*offsetp = offset;
989	*lenp = len;
990	return (error);
991}
992
993int
994vop_stddeallocate(struct vop_deallocate_args *ap)
995{
996	struct vnode *vp;
997	off_t offset, len;
998	struct ucred *cred;
999	int error;
1000	struct vattr va;
1001	off_t noff, xfersize, rem;
1002
1003	vp = ap->a_vp;
1004	offset = *ap->a_offset;
1005	cred = ap->a_cred;
1006
1007	error = VOP_GETATTR(vp, &va, cred);
1008	if (error)
1009		return (error);
1010
1011	len = omin((off_t)va.va_size - offset, *ap->a_len);
1012	while (len > 0) {
1013		noff = offset;
1014		error = vn_bmap_seekhole_locked(vp, FIOSEEKDATA, &noff, cred);
1015		if (error) {
1016			if (error != ENXIO)
1017				/* XXX: Is it okay to fallback further? */
1018				goto out;
1019
1020			/*
1021			 * No more data region to be filled
1022			 */
1023			offset += len;
1024			len = 0;
1025			error = 0;
1026			break;
1027		}
1028		KASSERT(noff >= offset, ("FIOSEEKDATA going backward"));
1029		if (noff != offset) {
1030			xfersize = omin(noff - offset, len);
1031			len -= xfersize;
1032			offset += xfersize;
1033			if (len == 0)
1034				break;
1035		}
1036		error = vn_bmap_seekhole_locked(vp, FIOSEEKHOLE, &noff, cred);
1037		if (error)
1038			goto out;
1039
1040		/* Fill zeroes */
1041		xfersize = rem = omin(noff - offset, len);
1042		error = vp_zerofill(vp, &va, &offset, &rem, ap->a_ioflag, cred);
1043		if (error) {
1044			len -= xfersize - rem;
1045			goto out;
1046		}
1047
1048		len -= xfersize;
1049		if (should_yield())
1050			break;
1051	}
1052	/* Handle the case when offset is beyond EOF */
1053	if (len < 0)
1054		len = 0;
1055out:
1056	*ap->a_offset = offset;
1057	*ap->a_len = len;
1058	return (error);
1059}
1060
1061int
1062vop_stdadvise(struct vop_advise_args *ap)
1063{
1064	struct vnode *vp;
1065	struct bufobj *bo;
1066	daddr_t startn, endn;
1067	off_t bstart, bend, start, end;
1068	int bsize, error;
1069
1070	vp = ap->a_vp;
1071	switch (ap->a_advice) {
1072	case POSIX_FADV_WILLNEED:
1073		/*
1074		 * Do nothing for now.  Filesystems should provide a
1075		 * custom method which starts an asynchronous read of
1076		 * the requested region.
1077		 */
1078		error = 0;
1079		break;
1080	case POSIX_FADV_DONTNEED:
1081		error = 0;
1082		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1083		if (VN_IS_DOOMED(vp)) {
1084			VOP_UNLOCK(vp);
1085			break;
1086		}
1087
1088		/*
1089		 * Round to block boundaries (and later possibly further to
1090		 * page boundaries).  Applications cannot reasonably be aware
1091		 * of the boundaries, and the rounding must be to expand at
1092		 * both extremities to cover enough.  It still doesn't cover
1093		 * read-ahead.  For partial blocks, this gives unnecessary
1094		 * discarding of buffers but is efficient enough since the
1095		 * pages usually remain in VMIO for some time.
1096		 */
1097		bsize = vp->v_bufobj.bo_bsize;
1098		bstart = rounddown(ap->a_start, bsize);
1099		bend = roundup(ap->a_end, bsize);
1100
1101		/*
1102		 * Deactivate pages in the specified range from the backing VM
1103		 * object.  Pages that are resident in the buffer cache will
1104		 * remain wired until their corresponding buffers are released
1105		 * below.
1106		 */
1107		if (vp->v_object != NULL) {
1108			start = trunc_page(bstart);
1109			end = round_page(bend);
1110			VM_OBJECT_RLOCK(vp->v_object);
1111			vm_object_page_noreuse(vp->v_object, OFF_TO_IDX(start),
1112			    OFF_TO_IDX(end));
1113			VM_OBJECT_RUNLOCK(vp->v_object);
1114		}
1115
1116		bo = &vp->v_bufobj;
1117		BO_RLOCK(bo);
1118		startn = bstart / bsize;
1119		endn = bend / bsize;
1120		error = bnoreuselist(&bo->bo_clean, bo, startn, endn);
1121		if (error == 0)
1122			error = bnoreuselist(&bo->bo_dirty, bo, startn, endn);
1123		BO_RUNLOCK(bo);
1124		VOP_UNLOCK(vp);
1125		break;
1126	default:
1127		error = EINVAL;
1128		break;
1129	}
1130	return (error);
1131}
1132
1133int
1134vop_stdunp_bind(struct vop_unp_bind_args *ap)
1135{
1136
1137	ap->a_vp->v_unpcb = ap->a_unpcb;
1138	return (0);
1139}
1140
1141int
1142vop_stdunp_connect(struct vop_unp_connect_args *ap)
1143{
1144
1145	*ap->a_unpcb = ap->a_vp->v_unpcb;
1146	return (0);
1147}
1148
1149int
1150vop_stdunp_detach(struct vop_unp_detach_args *ap)
1151{
1152
1153	ap->a_vp->v_unpcb = NULL;
1154	return (0);
1155}
1156
1157static int
1158vop_stdis_text(struct vop_is_text_args *ap)
1159{
1160
1161	return (atomic_load_int(&ap->a_vp->v_writecount) < 0);
1162}
1163
1164int
1165vop_stdset_text(struct vop_set_text_args *ap)
1166{
1167	struct vnode *vp;
1168	int n;
1169	bool gotref;
1170
1171	vp = ap->a_vp;
1172
1173	n = atomic_load_int(&vp->v_writecount);
1174	for (;;) {
1175		if (__predict_false(n > 0)) {
1176			return (ETXTBSY);
1177		}
1178
1179		/*
1180		 * Transition point, we may need to grab a reference on the vnode.
1181		 *
1182		 * Take the ref early As a safety measure against bogus calls
1183		 * to vop_stdunset_text.
1184		 */
1185		if (n == 0) {
1186			gotref = false;
1187			if ((vn_irflag_read(vp) & VIRF_TEXT_REF) != 0) {
1188				vref(vp);
1189				gotref = true;
1190			}
1191			if (atomic_fcmpset_int(&vp->v_writecount, &n, -1)) {
1192				return (0);
1193			}
1194			if (gotref) {
1195				vunref(vp);
1196			}
1197			continue;
1198		}
1199
1200		MPASS(n < 0);
1201		if (atomic_fcmpset_int(&vp->v_writecount, &n, n - 1)) {
1202			return (0);
1203		}
1204	}
1205	__assert_unreachable();
1206}
1207
1208static int
1209vop_stdunset_text(struct vop_unset_text_args *ap)
1210{
1211	struct vnode *vp;
1212	int n;
1213
1214	vp = ap->a_vp;
1215
1216	n = atomic_load_int(&vp->v_writecount);
1217	for (;;) {
1218		if (__predict_false(n >= 0)) {
1219			return (EINVAL);
1220		}
1221
1222		/*
1223		 * Transition point, we may need to release a reference on the vnode.
1224		 */
1225		if (n == -1) {
1226			if (atomic_fcmpset_int(&vp->v_writecount, &n, 0)) {
1227				if ((vn_irflag_read(vp) & VIRF_TEXT_REF) != 0) {
1228					vunref(vp);
1229				}
1230				return (0);
1231			}
1232			continue;
1233		}
1234
1235		MPASS(n < -1);
1236		if (atomic_fcmpset_int(&vp->v_writecount, &n, n + 1)) {
1237			return (0);
1238		}
1239	}
1240	__assert_unreachable();
1241}
1242
1243static int __always_inline
1244vop_stdadd_writecount_impl(struct vop_add_writecount_args *ap, bool handle_msync)
1245{
1246	struct vnode *vp;
1247	struct mount *mp __diagused;
1248	int n;
1249
1250	vp = ap->a_vp;
1251
1252#ifdef INVARIANTS
1253	mp = vp->v_mount;
1254	if (mp != NULL) {
1255		if (handle_msync) {
1256			VNPASS((mp->mnt_kern_flag & MNTK_NOMSYNC) == 0, vp);
1257		} else {
1258			VNPASS((mp->mnt_kern_flag & MNTK_NOMSYNC) != 0, vp);
1259		}
1260	}
1261#endif
1262
1263	n = atomic_load_int(&vp->v_writecount);
1264	for (;;) {
1265		if (__predict_false(n < 0)) {
1266			return (ETXTBSY);
1267		}
1268
1269		VNASSERT(n + ap->a_inc >= 0, vp,
1270		    ("neg writecount increment %d + %d = %d", n, ap->a_inc,
1271		    n + ap->a_inc));
1272		if (n == 0) {
1273			if (handle_msync) {
1274				vlazy(vp);
1275			}
1276		}
1277
1278		if (atomic_fcmpset_int(&vp->v_writecount, &n, n + ap->a_inc)) {
1279			return (0);
1280		}
1281	}
1282	__assert_unreachable();
1283}
1284
1285int
1286vop_stdadd_writecount(struct vop_add_writecount_args *ap)
1287{
1288
1289	return (vop_stdadd_writecount_impl(ap, true));
1290}
1291
1292int
1293vop_stdadd_writecount_nomsync(struct vop_add_writecount_args *ap)
1294{
1295
1296	return (vop_stdadd_writecount_impl(ap, false));
1297}
1298
1299int
1300vop_stdneed_inactive(struct vop_need_inactive_args *ap)
1301{
1302
1303	return (1);
1304}
1305
1306int
1307vop_stdioctl(struct vop_ioctl_args *ap)
1308{
1309	struct vnode *vp;
1310	struct vattr va;
1311	off_t *offp;
1312	int error;
1313
1314	switch (ap->a_command) {
1315	case FIOSEEKDATA:
1316	case FIOSEEKHOLE:
1317		vp = ap->a_vp;
1318		error = vn_lock(vp, LK_SHARED);
1319		if (error != 0)
1320			return (EBADF);
1321		if (vp->v_type == VREG)
1322			error = VOP_GETATTR(vp, &va, ap->a_cred);
1323		else
1324			error = ENOTTY;
1325		if (error == 0) {
1326			offp = ap->a_data;
1327			if (*offp < 0 || *offp >= va.va_size)
1328				error = ENXIO;
1329			else if (ap->a_command == FIOSEEKHOLE)
1330				*offp = va.va_size;
1331		}
1332		VOP_UNLOCK(vp);
1333		break;
1334	default:
1335		error = ENOTTY;
1336		break;
1337	}
1338	return (error);
1339}
1340
1341/*
1342 * vfs default ops
1343 * used to fill the vfs function table to get reasonable default return values.
1344 */
1345int
1346vfs_stdroot(struct mount *mp, int flags, struct vnode **vpp)
1347{
1348
1349	return (EOPNOTSUPP);
1350}
1351
1352int
1353vfs_stdstatfs(struct mount *mp, struct statfs *sbp)
1354{
1355
1356	return (EOPNOTSUPP);
1357}
1358
1359int
1360vfs_stdquotactl(struct mount *mp, int cmds, uid_t uid, void *arg, bool *mp_busy)
1361{
1362	return (EOPNOTSUPP);
1363}
1364
1365int
1366vfs_stdsync(struct mount *mp, int waitfor)
1367{
1368	struct vnode *vp, *mvp;
1369	struct thread *td;
1370	int error, lockreq, allerror = 0;
1371
1372	td = curthread;
1373	lockreq = LK_EXCLUSIVE | LK_INTERLOCK;
1374	if (waitfor != MNT_WAIT)
1375		lockreq |= LK_NOWAIT;
1376	/*
1377	 * Force stale buffer cache information to be flushed.
1378	 */
1379loop:
1380	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1381		if (vp->v_bufobj.bo_dirty.bv_cnt == 0) {
1382			VI_UNLOCK(vp);
1383			continue;
1384		}
1385		if ((error = vget(vp, lockreq)) != 0) {
1386			if (error == ENOENT) {
1387				MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1388				goto loop;
1389			}
1390			continue;
1391		}
1392		error = VOP_FSYNC(vp, waitfor, td);
1393		if (error)
1394			allerror = error;
1395		vput(vp);
1396	}
1397	return (allerror);
1398}
1399
1400int
1401vfs_stdnosync(struct mount *mp, int waitfor)
1402{
1403
1404	return (0);
1405}
1406
1407static int
1408vop_stdcopy_file_range(struct vop_copy_file_range_args *ap)
1409{
1410	int error;
1411
1412	error = vn_generic_copy_file_range(ap->a_invp, ap->a_inoffp,
1413	    ap->a_outvp, ap->a_outoffp, ap->a_lenp, ap->a_flags, ap->a_incred,
1414	    ap->a_outcred, ap->a_fsizetd);
1415	return (error);
1416}
1417
1418int
1419vfs_stdvget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp)
1420{
1421
1422	return (EOPNOTSUPP);
1423}
1424
1425int
1426vfs_stdfhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp)
1427{
1428
1429	return (EOPNOTSUPP);
1430}
1431
1432int
1433vfs_stdinit(struct vfsconf *vfsp)
1434{
1435
1436	return (0);
1437}
1438
1439int
1440vfs_stduninit(struct vfsconf *vfsp)
1441{
1442
1443	return(0);
1444}
1445
1446int
1447vfs_stdextattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
1448    int attrnamespace, const char *attrname)
1449{
1450
1451	if (filename_vp != NULL)
1452		VOP_UNLOCK(filename_vp);
1453	return (EOPNOTSUPP);
1454}
1455
1456int
1457vfs_stdsysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1458{
1459
1460	return (EOPNOTSUPP);
1461}
1462
1463static vop_bypass_t *
1464bp_by_off(struct vop_vector *vop, struct vop_generic_args *a)
1465{
1466
1467	return (*(vop_bypass_t **)((char *)vop + a->a_desc->vdesc_vop_offset));
1468}
1469
1470int
1471vop_sigdefer(struct vop_vector *vop, struct vop_generic_args *a)
1472{
1473	vop_bypass_t *bp;
1474	int prev_stops, rc;
1475
1476	bp = bp_by_off(vop, a);
1477	MPASS(bp != NULL);
1478
1479	prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT);
1480	rc = bp(a);
1481	sigallowstop(prev_stops);
1482	return (rc);
1483}
1484
1485static int
1486vop_stdstat(struct vop_stat_args *a)
1487{
1488	struct vattr vattr;
1489	struct vattr *vap;
1490	struct vnode *vp;
1491	struct stat *sb;
1492	int error;
1493	u_short mode;
1494
1495	vp = a->a_vp;
1496	sb = a->a_sb;
1497
1498	error = vop_stat_helper_pre(a);
1499	if (error != 0)
1500		return (error);
1501
1502	vap = &vattr;
1503
1504	/*
1505	 * Initialize defaults for new and unusual fields, so that file
1506	 * systems which don't support these fields don't need to know
1507	 * about them.
1508	 */
1509	vap->va_birthtime.tv_sec = -1;
1510	vap->va_birthtime.tv_nsec = 0;
1511	vap->va_fsid = VNOVAL;
1512	vap->va_gen = 0;
1513	vap->va_rdev = NODEV;
1514
1515	error = VOP_GETATTR(vp, vap, a->a_active_cred);
1516	if (error)
1517		goto out;
1518
1519	/*
1520	 * Zero the spare stat fields
1521	 */
1522	bzero(sb, sizeof *sb);
1523
1524	/*
1525	 * Copy from vattr table
1526	 */
1527	if (vap->va_fsid != VNOVAL)
1528		sb->st_dev = vap->va_fsid;
1529	else
1530		sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0];
1531	sb->st_ino = vap->va_fileid;
1532	mode = vap->va_mode;
1533	switch (vap->va_type) {
1534	case VREG:
1535		mode |= S_IFREG;
1536		break;
1537	case VDIR:
1538		mode |= S_IFDIR;
1539		break;
1540	case VBLK:
1541		mode |= S_IFBLK;
1542		break;
1543	case VCHR:
1544		mode |= S_IFCHR;
1545		break;
1546	case VLNK:
1547		mode |= S_IFLNK;
1548		break;
1549	case VSOCK:
1550		mode |= S_IFSOCK;
1551		break;
1552	case VFIFO:
1553		mode |= S_IFIFO;
1554		break;
1555	default:
1556		error = EBADF;
1557		goto out;
1558	}
1559	sb->st_mode = mode;
1560	sb->st_nlink = vap->va_nlink;
1561	sb->st_uid = vap->va_uid;
1562	sb->st_gid = vap->va_gid;
1563	sb->st_rdev = vap->va_rdev;
1564	if (vap->va_size > OFF_MAX) {
1565		error = EOVERFLOW;
1566		goto out;
1567	}
1568	sb->st_size = vap->va_size;
1569	sb->st_atim.tv_sec = vap->va_atime.tv_sec;
1570	sb->st_atim.tv_nsec = vap->va_atime.tv_nsec;
1571	sb->st_mtim.tv_sec = vap->va_mtime.tv_sec;
1572	sb->st_mtim.tv_nsec = vap->va_mtime.tv_nsec;
1573	sb->st_ctim.tv_sec = vap->va_ctime.tv_sec;
1574	sb->st_ctim.tv_nsec = vap->va_ctime.tv_nsec;
1575	sb->st_birthtim.tv_sec = vap->va_birthtime.tv_sec;
1576	sb->st_birthtim.tv_nsec = vap->va_birthtime.tv_nsec;
1577
1578	/*
1579	 * According to www.opengroup.org, the meaning of st_blksize is
1580	 *   "a filesystem-specific preferred I/O block size for this
1581	 *    object.  In some filesystem types, this may vary from file
1582	 *    to file"
1583	 * Use minimum/default of PAGE_SIZE (e.g. for VCHR).
1584	 */
1585
1586	sb->st_blksize = max(PAGE_SIZE, vap->va_blocksize);
1587	sb->st_flags = vap->va_flags;
1588	sb->st_blocks = vap->va_bytes / S_BLKSIZE;
1589	sb->st_gen = vap->va_gen;
1590out:
1591	return (vop_stat_helper_post(a, error));
1592}
1593
1594static int
1595vop_stdread_pgcache(struct vop_read_pgcache_args *ap __unused)
1596{
1597	return (EJUSTRETURN);
1598}
1599
1600static int
1601vop_stdvput_pair(struct vop_vput_pair_args *ap)
1602{
1603	struct vnode *dvp, *vp, **vpp;
1604
1605	dvp = ap->a_dvp;
1606	vpp = ap->a_vpp;
1607	vput(dvp);
1608	if (vpp != NULL && ap->a_unlock_vp && (vp = *vpp) != NULL)
1609		vput(vp);
1610	return (0);
1611}
1612
1613static int
1614vop_stdgetlowvnode(struct vop_getlowvnode_args *ap)
1615{
1616	vref(ap->a_vp);
1617	*ap->a_vplp = ap->a_vp;
1618	return (0);
1619}
1620