ufs_vnops.c revision 276648
1/*-
2 * Copyright (c) 1982, 1986, 1989, 1993, 1995
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *	@(#)ufs_vnops.c	8.27 (Berkeley) 5/27/95
35 */
36
37#include <sys/cdefs.h>
38__FBSDID("$FreeBSD: stable/10/sys/ufs/ufs/ufs_vnops.c 276648 2015-01-04 00:46:06Z kib $");
39
40#include "opt_quota.h"
41#include "opt_suiddir.h"
42#include "opt_ufs.h"
43#include "opt_ffs.h"
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/malloc.h>
48#include <sys/namei.h>
49#include <sys/kernel.h>
50#include <sys/fcntl.h>
51#include <sys/filio.h>
52#include <sys/stat.h>
53#include <sys/bio.h>
54#include <sys/buf.h>
55#include <sys/mount.h>
56#include <sys/priv.h>
57#include <sys/refcount.h>
58#include <sys/unistd.h>
59#include <sys/vnode.h>
60#include <sys/dirent.h>
61#include <sys/lockf.h>
62#include <sys/conf.h>
63#include <sys/acl.h>
64
65#include <security/mac/mac_framework.h>
66
67#include <sys/file.h>		/* XXX */
68
69#include <vm/vm.h>
70#include <vm/vm_extern.h>
71
72#include <ufs/ufs/acl.h>
73#include <ufs/ufs/extattr.h>
74#include <ufs/ufs/quota.h>
75#include <ufs/ufs/inode.h>
76#include <ufs/ufs/dir.h>
77#include <ufs/ufs/ufsmount.h>
78#include <ufs/ufs/ufs_extern.h>
79#ifdef UFS_DIRHASH
80#include <ufs/ufs/dirhash.h>
81#endif
82#ifdef UFS_GJOURNAL
83#include <ufs/ufs/gjournal.h>
84FEATURE(ufs_gjournal, "Journaling support through GEOM for UFS");
85#endif
86
87#ifdef QUOTA
88FEATURE(ufs_quota, "UFS disk quotas support");
89FEATURE(ufs_quota64, "64bit UFS disk quotas support");
90#endif
91
92#ifdef SUIDDIR
93FEATURE(suiddir,
94    "Give all new files in directory the same ownership as the directory");
95#endif
96
97
98#include <ufs/ffs/ffs_extern.h>
99
100static vop_accessx_t	ufs_accessx;
101static int ufs_chmod(struct vnode *, int, struct ucred *, struct thread *);
102static int ufs_chown(struct vnode *, uid_t, gid_t, struct ucred *, struct thread *);
103static vop_close_t	ufs_close;
104static vop_create_t	ufs_create;
105static vop_getattr_t	ufs_getattr;
106static vop_ioctl_t	ufs_ioctl;
107static vop_link_t	ufs_link;
108static int ufs_makeinode(int mode, struct vnode *, struct vnode **, struct componentname *);
109static vop_markatime_t	ufs_markatime;
110static vop_mkdir_t	ufs_mkdir;
111static vop_mknod_t	ufs_mknod;
112static vop_open_t	ufs_open;
113static vop_pathconf_t	ufs_pathconf;
114static vop_print_t	ufs_print;
115static vop_readlink_t	ufs_readlink;
116static vop_remove_t	ufs_remove;
117static vop_rename_t	ufs_rename;
118static vop_rmdir_t	ufs_rmdir;
119static vop_setattr_t	ufs_setattr;
120static vop_strategy_t	ufs_strategy;
121static vop_symlink_t	ufs_symlink;
122static vop_whiteout_t	ufs_whiteout;
123static vop_close_t	ufsfifo_close;
124static vop_kqfilter_t	ufsfifo_kqfilter;
125static vop_pathconf_t	ufsfifo_pathconf;
126
127SYSCTL_NODE(_vfs, OID_AUTO, ufs, CTLFLAG_RD, 0, "UFS filesystem");
128
129/*
130 * A virgin directory (no blushing please).
131 */
132static struct dirtemplate mastertemplate = {
133	0, 12, DT_DIR, 1, ".",
134	0, DIRBLKSIZ - 12, DT_DIR, 2, ".."
135};
136static struct odirtemplate omastertemplate = {
137	0, 12, 1, ".",
138	0, DIRBLKSIZ - 12, 2, ".."
139};
140
141static void
142ufs_itimes_locked(struct vnode *vp)
143{
144	struct inode *ip;
145	struct timespec ts;
146
147	ASSERT_VI_LOCKED(vp, __func__);
148
149	ip = VTOI(vp);
150	if (UFS_RDONLY(ip))
151		goto out;
152	if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0)
153		return;
154
155	if ((vp->v_type == VBLK || vp->v_type == VCHR) && !DOINGSOFTDEP(vp))
156		ip->i_flag |= IN_LAZYMOD;
157	else if (((vp->v_mount->mnt_kern_flag &
158		    (MNTK_SUSPENDED | MNTK_SUSPEND)) == 0) ||
159		    (ip->i_flag & (IN_CHANGE | IN_UPDATE)))
160		ip->i_flag |= IN_MODIFIED;
161	else if (ip->i_flag & IN_ACCESS)
162		ip->i_flag |= IN_LAZYACCESS;
163	vfs_timestamp(&ts);
164	if (ip->i_flag & IN_ACCESS) {
165		DIP_SET(ip, i_atime, ts.tv_sec);
166		DIP_SET(ip, i_atimensec, ts.tv_nsec);
167	}
168	if (ip->i_flag & IN_UPDATE) {
169		DIP_SET(ip, i_mtime, ts.tv_sec);
170		DIP_SET(ip, i_mtimensec, ts.tv_nsec);
171	}
172	if (ip->i_flag & IN_CHANGE) {
173		DIP_SET(ip, i_ctime, ts.tv_sec);
174		DIP_SET(ip, i_ctimensec, ts.tv_nsec);
175		DIP_SET(ip, i_modrev, DIP(ip, i_modrev) + 1);
176	}
177
178 out:
179	ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE);
180}
181
182void
183ufs_itimes(struct vnode *vp)
184{
185
186	VI_LOCK(vp);
187	ufs_itimes_locked(vp);
188	VI_UNLOCK(vp);
189}
190
191/*
192 * Create a regular file
193 */
194static int
195ufs_create(ap)
196	struct vop_create_args /* {
197		struct vnode *a_dvp;
198		struct vnode **a_vpp;
199		struct componentname *a_cnp;
200		struct vattr *a_vap;
201	} */ *ap;
202{
203	int error;
204
205	error =
206	    ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode),
207	    ap->a_dvp, ap->a_vpp, ap->a_cnp);
208	if (error != 0)
209		return (error);
210	if ((ap->a_cnp->cn_flags & MAKEENTRY) != 0)
211		cache_enter(ap->a_dvp, *ap->a_vpp, ap->a_cnp);
212	return (0);
213}
214
215/*
216 * Mknod vnode call
217 */
218/* ARGSUSED */
219static int
220ufs_mknod(ap)
221	struct vop_mknod_args /* {
222		struct vnode *a_dvp;
223		struct vnode **a_vpp;
224		struct componentname *a_cnp;
225		struct vattr *a_vap;
226	} */ *ap;
227{
228	struct vattr *vap = ap->a_vap;
229	struct vnode **vpp = ap->a_vpp;
230	struct inode *ip;
231	ino_t ino;
232	int error;
233
234	error = ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode),
235	    ap->a_dvp, vpp, ap->a_cnp);
236	if (error)
237		return (error);
238	ip = VTOI(*vpp);
239	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
240	if (vap->va_rdev != VNOVAL) {
241		/*
242		 * Want to be able to use this to make badblock
243		 * inodes, so don't truncate the dev number.
244		 */
245		DIP_SET(ip, i_rdev, vap->va_rdev);
246	}
247	/*
248	 * Remove inode, then reload it through VFS_VGET so it is
249	 * checked to see if it is an alias of an existing entry in
250	 * the inode cache.  XXX I don't believe this is necessary now.
251	 */
252	(*vpp)->v_type = VNON;
253	ino = ip->i_number;	/* Save this before vgone() invalidates ip. */
254	vgone(*vpp);
255	vput(*vpp);
256	error = VFS_VGET(ap->a_dvp->v_mount, ino, LK_EXCLUSIVE, vpp);
257	if (error) {
258		*vpp = NULL;
259		return (error);
260	}
261	return (0);
262}
263
264/*
265 * Open called.
266 */
267/* ARGSUSED */
268static int
269ufs_open(struct vop_open_args *ap)
270{
271	struct vnode *vp = ap->a_vp;
272	struct inode *ip;
273
274	if (vp->v_type == VCHR || vp->v_type == VBLK)
275		return (EOPNOTSUPP);
276
277	ip = VTOI(vp);
278	/*
279	 * Files marked append-only must be opened for appending.
280	 */
281	if ((ip->i_flags & APPEND) &&
282	    (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE)
283		return (EPERM);
284	vnode_create_vobject(vp, DIP(ip, i_size), ap->a_td);
285	return (0);
286}
287
288/*
289 * Close called.
290 *
291 * Update the times on the inode.
292 */
293/* ARGSUSED */
294static int
295ufs_close(ap)
296	struct vop_close_args /* {
297		struct vnode *a_vp;
298		int  a_fflag;
299		struct ucred *a_cred;
300		struct thread *a_td;
301	} */ *ap;
302{
303	struct vnode *vp = ap->a_vp;
304	int usecount;
305
306	VI_LOCK(vp);
307	usecount = vp->v_usecount;
308	if (usecount > 1)
309		ufs_itimes_locked(vp);
310	VI_UNLOCK(vp);
311	return (0);
312}
313
314static int
315ufs_accessx(ap)
316	struct vop_accessx_args /* {
317		struct vnode *a_vp;
318		accmode_t a_accmode;
319		struct ucred *a_cred;
320		struct thread *a_td;
321	} */ *ap;
322{
323	struct vnode *vp = ap->a_vp;
324	struct inode *ip = VTOI(vp);
325	accmode_t accmode = ap->a_accmode;
326	int error;
327#ifdef QUOTA
328	int relocked;
329#endif
330#ifdef UFS_ACL
331	struct acl *acl;
332	acl_type_t type;
333#endif
334
335	/*
336	 * Disallow write attempts on read-only filesystems;
337	 * unless the file is a socket, fifo, or a block or
338	 * character device resident on the filesystem.
339	 */
340	if (accmode & VMODIFY_PERMS) {
341		switch (vp->v_type) {
342		case VDIR:
343		case VLNK:
344		case VREG:
345			if (vp->v_mount->mnt_flag & MNT_RDONLY)
346				return (EROFS);
347#ifdef QUOTA
348			/*
349			 * Inode is accounted in the quotas only if struct
350			 * dquot is attached to it. VOP_ACCESS() is called
351			 * from vn_open_cred() and provides a convenient
352			 * point to call getinoquota().
353			 */
354			if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {
355
356				/*
357				 * Upgrade vnode lock, since getinoquota()
358				 * requires exclusive lock to modify inode.
359				 */
360				relocked = 1;
361				vhold(vp);
362				vn_lock(vp, LK_UPGRADE | LK_RETRY);
363				VI_LOCK(vp);
364				if (vp->v_iflag & VI_DOOMED) {
365					vdropl(vp);
366					error = ENOENT;
367					goto relock;
368				}
369				vdropl(vp);
370			} else
371				relocked = 0;
372			error = getinoquota(ip);
373relock:
374			if (relocked)
375				vn_lock(vp, LK_DOWNGRADE | LK_RETRY);
376			if (error != 0)
377				return (error);
378#endif
379			break;
380		default:
381			break;
382		}
383	}
384
385	/*
386	 * If immutable bit set, nobody gets to write it.  "& ~VADMIN_PERMS"
387	 * is here, because without it, * it would be impossible for the owner
388	 * to remove the IMMUTABLE flag.
389	 */
390	if ((accmode & (VMODIFY_PERMS & ~VADMIN_PERMS)) &&
391	    (ip->i_flags & (IMMUTABLE | SF_SNAPSHOT)))
392		return (EPERM);
393
394#ifdef UFS_ACL
395	if ((vp->v_mount->mnt_flag & (MNT_ACLS | MNT_NFS4ACLS)) != 0) {
396		if (vp->v_mount->mnt_flag & MNT_NFS4ACLS)
397			type = ACL_TYPE_NFS4;
398		else
399			type = ACL_TYPE_ACCESS;
400
401		acl = acl_alloc(M_WAITOK);
402		if (type == ACL_TYPE_NFS4)
403			error = ufs_getacl_nfs4_internal(vp, acl, ap->a_td);
404		else
405			error = VOP_GETACL(vp, type, acl, ap->a_cred, ap->a_td);
406		switch (error) {
407		case 0:
408			if (type == ACL_TYPE_NFS4) {
409				error = vaccess_acl_nfs4(vp->v_type, ip->i_uid,
410				    ip->i_gid, acl, accmode, ap->a_cred, NULL);
411			} else {
412				error = vfs_unixify_accmode(&accmode);
413				if (error == 0)
414					error = vaccess_acl_posix1e(vp->v_type, ip->i_uid,
415					    ip->i_gid, acl, accmode, ap->a_cred, NULL);
416			}
417			break;
418		default:
419			if (error != EOPNOTSUPP)
420				printf(
421"ufs_accessx(): Error retrieving ACL on object (%d).\n",
422				    error);
423			/*
424			 * XXX: Fall back until debugged.  Should
425			 * eventually possibly log an error, and return
426			 * EPERM for safety.
427			 */
428			error = vfs_unixify_accmode(&accmode);
429			if (error == 0)
430				error = vaccess(vp->v_type, ip->i_mode, ip->i_uid,
431				    ip->i_gid, accmode, ap->a_cred, NULL);
432		}
433		acl_free(acl);
434
435		return (error);
436	}
437#endif /* !UFS_ACL */
438	error = vfs_unixify_accmode(&accmode);
439	if (error == 0)
440		error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, ip->i_gid,
441		    accmode, ap->a_cred, NULL);
442	return (error);
443}
444
445/* ARGSUSED */
446static int
447ufs_getattr(ap)
448	struct vop_getattr_args /* {
449		struct vnode *a_vp;
450		struct vattr *a_vap;
451		struct ucred *a_cred;
452	} */ *ap;
453{
454	struct vnode *vp = ap->a_vp;
455	struct inode *ip = VTOI(vp);
456	struct vattr *vap = ap->a_vap;
457
458	VI_LOCK(vp);
459	ufs_itimes_locked(vp);
460	if (ip->i_ump->um_fstype == UFS1) {
461		vap->va_atime.tv_sec = ip->i_din1->di_atime;
462		vap->va_atime.tv_nsec = ip->i_din1->di_atimensec;
463	} else {
464		vap->va_atime.tv_sec = ip->i_din2->di_atime;
465		vap->va_atime.tv_nsec = ip->i_din2->di_atimensec;
466	}
467	VI_UNLOCK(vp);
468	/*
469	 * Copy from inode table
470	 */
471	vap->va_fsid = dev2udev(ip->i_dev);
472	vap->va_fileid = ip->i_number;
473	vap->va_mode = ip->i_mode & ~IFMT;
474	vap->va_nlink = ip->i_effnlink;
475	vap->va_uid = ip->i_uid;
476	vap->va_gid = ip->i_gid;
477	if (ip->i_ump->um_fstype == UFS1) {
478		vap->va_rdev = ip->i_din1->di_rdev;
479		vap->va_size = ip->i_din1->di_size;
480		vap->va_mtime.tv_sec = ip->i_din1->di_mtime;
481		vap->va_mtime.tv_nsec = ip->i_din1->di_mtimensec;
482		vap->va_ctime.tv_sec = ip->i_din1->di_ctime;
483		vap->va_ctime.tv_nsec = ip->i_din1->di_ctimensec;
484		vap->va_bytes = dbtob((u_quad_t)ip->i_din1->di_blocks);
485		vap->va_filerev = ip->i_din1->di_modrev;
486	} else {
487		vap->va_rdev = ip->i_din2->di_rdev;
488		vap->va_size = ip->i_din2->di_size;
489		vap->va_mtime.tv_sec = ip->i_din2->di_mtime;
490		vap->va_mtime.tv_nsec = ip->i_din2->di_mtimensec;
491		vap->va_ctime.tv_sec = ip->i_din2->di_ctime;
492		vap->va_ctime.tv_nsec = ip->i_din2->di_ctimensec;
493		vap->va_birthtime.tv_sec = ip->i_din2->di_birthtime;
494		vap->va_birthtime.tv_nsec = ip->i_din2->di_birthnsec;
495		vap->va_bytes = dbtob((u_quad_t)ip->i_din2->di_blocks);
496		vap->va_filerev = ip->i_din2->di_modrev;
497	}
498	vap->va_flags = ip->i_flags;
499	vap->va_gen = ip->i_gen;
500	vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
501	vap->va_type = IFTOVT(ip->i_mode);
502	return (0);
503}
504
505/*
506 * Set attribute vnode op. called from several syscalls
507 */
508static int
509ufs_setattr(ap)
510	struct vop_setattr_args /* {
511		struct vnode *a_vp;
512		struct vattr *a_vap;
513		struct ucred *a_cred;
514	} */ *ap;
515{
516	struct vattr *vap = ap->a_vap;
517	struct vnode *vp = ap->a_vp;
518	struct inode *ip = VTOI(vp);
519	struct ucred *cred = ap->a_cred;
520	struct thread *td = curthread;
521	int error;
522
523	/*
524	 * Check for unsettable attributes.
525	 */
526	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
527	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
528	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
529	    ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
530		return (EINVAL);
531	}
532	if (vap->va_flags != VNOVAL) {
533		if ((vap->va_flags & ~(SF_APPEND | SF_ARCHIVED | SF_IMMUTABLE |
534		    SF_NOUNLINK | SF_SNAPSHOT | UF_APPEND | UF_ARCHIVE |
535		    UF_HIDDEN | UF_IMMUTABLE | UF_NODUMP | UF_NOUNLINK |
536		    UF_OFFLINE | UF_OPAQUE | UF_READONLY | UF_REPARSE |
537		    UF_SPARSE | UF_SYSTEM)) != 0)
538			return (EOPNOTSUPP);
539		if (vp->v_mount->mnt_flag & MNT_RDONLY)
540			return (EROFS);
541		/*
542		 * Callers may only modify the file flags on objects they
543		 * have VADMIN rights for.
544		 */
545		if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
546			return (error);
547		/*
548		 * Unprivileged processes are not permitted to unset system
549		 * flags, or modify flags if any system flags are set.
550		 * Privileged non-jail processes may not modify system flags
551		 * if securelevel > 0 and any existing system flags are set.
552		 * Privileged jail processes behave like privileged non-jail
553		 * processes if the security.jail.chflags_allowed sysctl is
554		 * is non-zero; otherwise, they behave like unprivileged
555		 * processes.
556		 */
557		if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0)) {
558			if (ip->i_flags &
559			    (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) {
560				error = securelevel_gt(cred, 0);
561				if (error)
562					return (error);
563			}
564			/* The snapshot flag cannot be toggled. */
565			if ((vap->va_flags ^ ip->i_flags) & SF_SNAPSHOT)
566				return (EPERM);
567		} else {
568			if (ip->i_flags &
569			    (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) ||
570			    ((vap->va_flags ^ ip->i_flags) & SF_SETTABLE))
571				return (EPERM);
572		}
573		ip->i_flags = vap->va_flags;
574		DIP_SET(ip, i_flags, vap->va_flags);
575		ip->i_flag |= IN_CHANGE;
576		error = UFS_UPDATE(vp, 0);
577		if (ip->i_flags & (IMMUTABLE | APPEND))
578			return (error);
579	}
580	/*
581	 * If immutable or append, no one can change any of its attributes
582	 * except the ones already handled (in some cases, file flags
583	 * including the immutability flags themselves for the superuser).
584	 */
585	if (ip->i_flags & (IMMUTABLE | APPEND))
586		return (EPERM);
587	/*
588	 * Go through the fields and update iff not VNOVAL.
589	 */
590	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
591		if (vp->v_mount->mnt_flag & MNT_RDONLY)
592			return (EROFS);
593		if ((error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred,
594		    td)) != 0)
595			return (error);
596	}
597	if (vap->va_size != VNOVAL) {
598		/*
599		 * XXX most of the following special cases should be in
600		 * callers instead of in N filesystems.  The VDIR check
601		 * mostly already is.
602		 */
603		switch (vp->v_type) {
604		case VDIR:
605			return (EISDIR);
606		case VLNK:
607		case VREG:
608			/*
609			 * Truncation should have an effect in these cases.
610			 * Disallow it if the filesystem is read-only or
611			 * the file is being snapshotted.
612			 */
613			if (vp->v_mount->mnt_flag & MNT_RDONLY)
614				return (EROFS);
615			if ((ip->i_flags & SF_SNAPSHOT) != 0)
616				return (EPERM);
617			break;
618		default:
619			/*
620			 * According to POSIX, the result is unspecified
621			 * for file types other than regular files,
622			 * directories and shared memory objects.  We
623			 * don't support shared memory objects in the file
624			 * system, and have dubious support for truncating
625			 * symlinks.  Just ignore the request in other cases.
626			 */
627			return (0);
628		}
629		if ((error = UFS_TRUNCATE(vp, vap->va_size, IO_NORMAL,
630		    cred)) != 0)
631			return (error);
632	}
633	if (vap->va_atime.tv_sec != VNOVAL ||
634	    vap->va_mtime.tv_sec != VNOVAL ||
635	    vap->va_birthtime.tv_sec != VNOVAL) {
636		if (vp->v_mount->mnt_flag & MNT_RDONLY)
637			return (EROFS);
638		if ((ip->i_flags & SF_SNAPSHOT) != 0)
639			return (EPERM);
640		error = vn_utimes_perm(vp, vap, cred, td);
641		if (error != 0)
642			return (error);
643		if (vap->va_atime.tv_sec != VNOVAL)
644			ip->i_flag |= IN_ACCESS;
645		if (vap->va_mtime.tv_sec != VNOVAL)
646			ip->i_flag |= IN_CHANGE | IN_UPDATE;
647		if (vap->va_birthtime.tv_sec != VNOVAL &&
648		    ip->i_ump->um_fstype == UFS2)
649			ip->i_flag |= IN_MODIFIED;
650		ufs_itimes(vp);
651		if (vap->va_atime.tv_sec != VNOVAL) {
652			DIP_SET(ip, i_atime, vap->va_atime.tv_sec);
653			DIP_SET(ip, i_atimensec, vap->va_atime.tv_nsec);
654		}
655		if (vap->va_mtime.tv_sec != VNOVAL) {
656			DIP_SET(ip, i_mtime, vap->va_mtime.tv_sec);
657			DIP_SET(ip, i_mtimensec, vap->va_mtime.tv_nsec);
658		}
659		if (vap->va_birthtime.tv_sec != VNOVAL &&
660		    ip->i_ump->um_fstype == UFS2) {
661			ip->i_din2->di_birthtime = vap->va_birthtime.tv_sec;
662			ip->i_din2->di_birthnsec = vap->va_birthtime.tv_nsec;
663		}
664		error = UFS_UPDATE(vp, 0);
665		if (error)
666			return (error);
667	}
668	error = 0;
669	if (vap->va_mode != (mode_t)VNOVAL) {
670		if (vp->v_mount->mnt_flag & MNT_RDONLY)
671			return (EROFS);
672		if ((ip->i_flags & SF_SNAPSHOT) != 0 && (vap->va_mode &
673		   (S_IXUSR | S_IWUSR | S_IXGRP | S_IWGRP | S_IXOTH | S_IWOTH)))
674			return (EPERM);
675		error = ufs_chmod(vp, (int)vap->va_mode, cred, td);
676	}
677	return (error);
678}
679
680#ifdef UFS_ACL
681static int
682ufs_update_nfs4_acl_after_mode_change(struct vnode *vp, int mode,
683    int file_owner_id, struct ucred *cred, struct thread *td)
684{
685	int error;
686	struct acl *aclp;
687
688	aclp = acl_alloc(M_WAITOK);
689	error = ufs_getacl_nfs4_internal(vp, aclp, td);
690	/*
691	 * We don't have to handle EOPNOTSUPP here, as the filesystem claims
692	 * it supports ACLs.
693	 */
694	if (error)
695		goto out;
696
697	acl_nfs4_sync_acl_from_mode(aclp, mode, file_owner_id);
698	error = ufs_setacl_nfs4_internal(vp, aclp, td);
699
700out:
701	acl_free(aclp);
702	return (error);
703}
704#endif /* UFS_ACL */
705
706/*
707 * Mark this file's access time for update for vfs_mark_atime().  This
708 * is called from execve() and mmap().
709 */
710static int
711ufs_markatime(ap)
712	struct vop_markatime_args /* {
713		struct vnode *a_vp;
714	} */ *ap;
715{
716	struct vnode *vp = ap->a_vp;
717	struct inode *ip = VTOI(vp);
718
719	VI_LOCK(vp);
720	ip->i_flag |= IN_ACCESS;
721	VI_UNLOCK(vp);
722	/*
723	 * XXXKIB No UFS_UPDATE(ap->a_vp, 0) there.
724	 */
725	return (0);
726}
727
728/*
729 * Change the mode on a file.
730 * Inode must be locked before calling.
731 */
732static int
733ufs_chmod(vp, mode, cred, td)
734	struct vnode *vp;
735	int mode;
736	struct ucred *cred;
737	struct thread *td;
738{
739	struct inode *ip = VTOI(vp);
740	int error;
741
742	/*
743	 * To modify the permissions on a file, must possess VADMIN
744	 * for that file.
745	 */
746	if ((error = VOP_ACCESSX(vp, VWRITE_ACL, cred, td)))
747		return (error);
748	/*
749	 * Privileged processes may set the sticky bit on non-directories,
750	 * as well as set the setgid bit on a file with a group that the
751	 * process is not a member of.  Both of these are allowed in
752	 * jail(8).
753	 */
754	if (vp->v_type != VDIR && (mode & S_ISTXT)) {
755		if (priv_check_cred(cred, PRIV_VFS_STICKYFILE, 0))
756			return (EFTYPE);
757	}
758	if (!groupmember(ip->i_gid, cred) && (mode & ISGID)) {
759		error = priv_check_cred(cred, PRIV_VFS_SETGID, 0);
760		if (error)
761			return (error);
762	}
763
764	/*
765	 * Deny setting setuid if we are not the file owner.
766	 */
767	if ((mode & ISUID) && ip->i_uid != cred->cr_uid) {
768		error = priv_check_cred(cred, PRIV_VFS_ADMIN, 0);
769		if (error)
770			return (error);
771	}
772
773	ip->i_mode &= ~ALLPERMS;
774	ip->i_mode |= (mode & ALLPERMS);
775	DIP_SET(ip, i_mode, ip->i_mode);
776	ip->i_flag |= IN_CHANGE;
777#ifdef UFS_ACL
778	if ((vp->v_mount->mnt_flag & MNT_NFS4ACLS) != 0)
779		error = ufs_update_nfs4_acl_after_mode_change(vp, mode, ip->i_uid, cred, td);
780#endif
781	if (error == 0 && (ip->i_flag & IN_CHANGE) != 0)
782		error = UFS_UPDATE(vp, 0);
783
784	return (error);
785}
786
787/*
788 * Perform chown operation on inode ip;
789 * inode must be locked prior to call.
790 */
791static int
792ufs_chown(vp, uid, gid, cred, td)
793	struct vnode *vp;
794	uid_t uid;
795	gid_t gid;
796	struct ucred *cred;
797	struct thread *td;
798{
799	struct inode *ip = VTOI(vp);
800	uid_t ouid;
801	gid_t ogid;
802	int error = 0;
803#ifdef QUOTA
804	int i;
805	ufs2_daddr_t change;
806#endif
807
808	if (uid == (uid_t)VNOVAL)
809		uid = ip->i_uid;
810	if (gid == (gid_t)VNOVAL)
811		gid = ip->i_gid;
812	/*
813	 * To modify the ownership of a file, must possess VADMIN for that
814	 * file.
815	 */
816	if ((error = VOP_ACCESSX(vp, VWRITE_OWNER, cred, td)))
817		return (error);
818	/*
819	 * To change the owner of a file, or change the group of a file to a
820	 * group of which we are not a member, the caller must have
821	 * privilege.
822	 */
823	if (((uid != ip->i_uid && uid != cred->cr_uid) ||
824	    (gid != ip->i_gid && !groupmember(gid, cred))) &&
825	    (error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0)))
826		return (error);
827	ogid = ip->i_gid;
828	ouid = ip->i_uid;
829#ifdef QUOTA
830	if ((error = getinoquota(ip)) != 0)
831		return (error);
832	if (ouid == uid) {
833		dqrele(vp, ip->i_dquot[USRQUOTA]);
834		ip->i_dquot[USRQUOTA] = NODQUOT;
835	}
836	if (ogid == gid) {
837		dqrele(vp, ip->i_dquot[GRPQUOTA]);
838		ip->i_dquot[GRPQUOTA] = NODQUOT;
839	}
840	change = DIP(ip, i_blocks);
841	(void) chkdq(ip, -change, cred, CHOWN);
842	(void) chkiq(ip, -1, cred, CHOWN);
843	for (i = 0; i < MAXQUOTAS; i++) {
844		dqrele(vp, ip->i_dquot[i]);
845		ip->i_dquot[i] = NODQUOT;
846	}
847#endif
848	ip->i_gid = gid;
849	DIP_SET(ip, i_gid, gid);
850	ip->i_uid = uid;
851	DIP_SET(ip, i_uid, uid);
852#ifdef QUOTA
853	if ((error = getinoquota(ip)) == 0) {
854		if (ouid == uid) {
855			dqrele(vp, ip->i_dquot[USRQUOTA]);
856			ip->i_dquot[USRQUOTA] = NODQUOT;
857		}
858		if (ogid == gid) {
859			dqrele(vp, ip->i_dquot[GRPQUOTA]);
860			ip->i_dquot[GRPQUOTA] = NODQUOT;
861		}
862		if ((error = chkdq(ip, change, cred, CHOWN)) == 0) {
863			if ((error = chkiq(ip, 1, cred, CHOWN)) == 0)
864				goto good;
865			else
866				(void) chkdq(ip, -change, cred, CHOWN|FORCE);
867		}
868		for (i = 0; i < MAXQUOTAS; i++) {
869			dqrele(vp, ip->i_dquot[i]);
870			ip->i_dquot[i] = NODQUOT;
871		}
872	}
873	ip->i_gid = ogid;
874	DIP_SET(ip, i_gid, ogid);
875	ip->i_uid = ouid;
876	DIP_SET(ip, i_uid, ouid);
877	if (getinoquota(ip) == 0) {
878		if (ouid == uid) {
879			dqrele(vp, ip->i_dquot[USRQUOTA]);
880			ip->i_dquot[USRQUOTA] = NODQUOT;
881		}
882		if (ogid == gid) {
883			dqrele(vp, ip->i_dquot[GRPQUOTA]);
884			ip->i_dquot[GRPQUOTA] = NODQUOT;
885		}
886		(void) chkdq(ip, change, cred, FORCE|CHOWN);
887		(void) chkiq(ip, 1, cred, FORCE|CHOWN);
888		(void) getinoquota(ip);
889	}
890	return (error);
891good:
892	if (getinoquota(ip))
893		panic("ufs_chown: lost quota");
894#endif /* QUOTA */
895	ip->i_flag |= IN_CHANGE;
896	if ((ip->i_mode & (ISUID | ISGID)) && (ouid != uid || ogid != gid)) {
897		if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID, 0)) {
898			ip->i_mode &= ~(ISUID | ISGID);
899			DIP_SET(ip, i_mode, ip->i_mode);
900		}
901	}
902	error = UFS_UPDATE(vp, 0);
903	return (error);
904}
905
906static int
907ufs_remove(ap)
908	struct vop_remove_args /* {
909		struct vnode *a_dvp;
910		struct vnode *a_vp;
911		struct componentname *a_cnp;
912	} */ *ap;
913{
914	struct inode *ip;
915	struct vnode *vp = ap->a_vp;
916	struct vnode *dvp = ap->a_dvp;
917	int error;
918	struct thread *td;
919
920	td = curthread;
921	ip = VTOI(vp);
922	if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
923	    (VTOI(dvp)->i_flags & APPEND)) {
924		error = EPERM;
925		goto out;
926	}
927#ifdef UFS_GJOURNAL
928	ufs_gjournal_orphan(vp);
929#endif
930	error = ufs_dirremove(dvp, ip, ap->a_cnp->cn_flags, 0);
931	if (ip->i_nlink <= 0)
932		vp->v_vflag |= VV_NOSYNC;
933	if ((ip->i_flags & SF_SNAPSHOT) != 0) {
934		/*
935		 * Avoid deadlock where another thread is trying to
936		 * update the inodeblock for dvp and is waiting on
937		 * snaplk.  Temporary unlock the vnode lock for the
938		 * unlinked file and sync the directory.  This should
939		 * allow vput() of the directory to not block later on
940		 * while holding the snapshot vnode locked, assuming
941		 * that the directory hasn't been unlinked too.
942		 */
943		VOP_UNLOCK(vp, 0);
944		(void) VOP_FSYNC(dvp, MNT_WAIT, td);
945		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
946	}
947out:
948	return (error);
949}
950
951/*
952 * link vnode call
953 */
954static int
955ufs_link(ap)
956	struct vop_link_args /* {
957		struct vnode *a_tdvp;
958		struct vnode *a_vp;
959		struct componentname *a_cnp;
960	} */ *ap;
961{
962	struct vnode *vp = ap->a_vp;
963	struct vnode *tdvp = ap->a_tdvp;
964	struct componentname *cnp = ap->a_cnp;
965	struct inode *ip;
966	struct direct newdir;
967	int error;
968
969#ifdef INVARIANTS
970	if ((cnp->cn_flags & HASBUF) == 0)
971		panic("ufs_link: no name");
972#endif
973	if (VTOI(tdvp)->i_effnlink < 2)
974		panic("ufs_link: Bad link count %d on parent",
975		    VTOI(tdvp)->i_effnlink);
976	ip = VTOI(vp);
977	if ((nlink_t)ip->i_nlink >= LINK_MAX) {
978		error = EMLINK;
979		goto out;
980	}
981	/*
982	 * The file may have been removed after namei droped the original
983	 * lock.
984	 */
985	if (ip->i_effnlink == 0) {
986		error = ENOENT;
987		goto out;
988	}
989	if (ip->i_flags & (IMMUTABLE | APPEND)) {
990		error = EPERM;
991		goto out;
992	}
993	ip->i_effnlink++;
994	ip->i_nlink++;
995	DIP_SET(ip, i_nlink, ip->i_nlink);
996	ip->i_flag |= IN_CHANGE;
997	if (DOINGSOFTDEP(vp))
998		softdep_setup_link(VTOI(tdvp), ip);
999	error = UFS_UPDATE(vp, !(DOINGSOFTDEP(vp) | DOINGASYNC(vp)));
1000	if (!error) {
1001		ufs_makedirentry(ip, cnp, &newdir);
1002		error = ufs_direnter(tdvp, vp, &newdir, cnp, NULL, 0);
1003	}
1004
1005	if (error) {
1006		ip->i_effnlink--;
1007		ip->i_nlink--;
1008		DIP_SET(ip, i_nlink, ip->i_nlink);
1009		ip->i_flag |= IN_CHANGE;
1010		if (DOINGSOFTDEP(vp))
1011			softdep_revert_link(VTOI(tdvp), ip);
1012	}
1013out:
1014	return (error);
1015}
1016
1017/*
1018 * whiteout vnode call
1019 */
1020static int
1021ufs_whiteout(ap)
1022	struct vop_whiteout_args /* {
1023		struct vnode *a_dvp;
1024		struct componentname *a_cnp;
1025		int a_flags;
1026	} */ *ap;
1027{
1028	struct vnode *dvp = ap->a_dvp;
1029	struct componentname *cnp = ap->a_cnp;
1030	struct direct newdir;
1031	int error = 0;
1032
1033	switch (ap->a_flags) {
1034	case LOOKUP:
1035		/* 4.4 format directories support whiteout operations */
1036		if (dvp->v_mount->mnt_maxsymlinklen > 0)
1037			return (0);
1038		return (EOPNOTSUPP);
1039
1040	case CREATE:
1041		/* create a new directory whiteout */
1042#ifdef INVARIANTS
1043		if ((cnp->cn_flags & SAVENAME) == 0)
1044			panic("ufs_whiteout: missing name");
1045		if (dvp->v_mount->mnt_maxsymlinklen <= 0)
1046			panic("ufs_whiteout: old format filesystem");
1047#endif
1048
1049		newdir.d_ino = WINO;
1050		newdir.d_namlen = cnp->cn_namelen;
1051		bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1);
1052		newdir.d_type = DT_WHT;
1053		error = ufs_direnter(dvp, NULL, &newdir, cnp, NULL, 0);
1054		break;
1055
1056	case DELETE:
1057		/* remove an existing directory whiteout */
1058#ifdef INVARIANTS
1059		if (dvp->v_mount->mnt_maxsymlinklen <= 0)
1060			panic("ufs_whiteout: old format filesystem");
1061#endif
1062
1063		cnp->cn_flags &= ~DOWHITEOUT;
1064		error = ufs_dirremove(dvp, NULL, cnp->cn_flags, 0);
1065		break;
1066	default:
1067		panic("ufs_whiteout: unknown op");
1068	}
1069	return (error);
1070}
1071
1072static volatile int rename_restarts;
1073SYSCTL_INT(_vfs_ufs, OID_AUTO, rename_restarts, CTLFLAG_RD,
1074    __DEVOLATILE(int *, &rename_restarts), 0,
1075    "Times rename had to restart due to lock contention");
1076
1077/*
1078 * Rename system call.
1079 * 	rename("foo", "bar");
1080 * is essentially
1081 *	unlink("bar");
1082 *	link("foo", "bar");
1083 *	unlink("foo");
1084 * but ``atomically''.  Can't do full commit without saving state in the
1085 * inode on disk which isn't feasible at this time.  Best we can do is
1086 * always guarantee the target exists.
1087 *
1088 * Basic algorithm is:
1089 *
1090 * 1) Bump link count on source while we're linking it to the
1091 *    target.  This also ensure the inode won't be deleted out
1092 *    from underneath us while we work (it may be truncated by
1093 *    a concurrent `trunc' or `open' for creation).
1094 * 2) Link source to destination.  If destination already exists,
1095 *    delete it first.
1096 * 3) Unlink source reference to inode if still around. If a
1097 *    directory was moved and the parent of the destination
1098 *    is different from the source, patch the ".." entry in the
1099 *    directory.
1100 */
1101static int
1102ufs_rename(ap)
1103	struct vop_rename_args  /* {
1104		struct vnode *a_fdvp;
1105		struct vnode *a_fvp;
1106		struct componentname *a_fcnp;
1107		struct vnode *a_tdvp;
1108		struct vnode *a_tvp;
1109		struct componentname *a_tcnp;
1110	} */ *ap;
1111{
1112	struct vnode *tvp = ap->a_tvp;
1113	struct vnode *tdvp = ap->a_tdvp;
1114	struct vnode *fvp = ap->a_fvp;
1115	struct vnode *fdvp = ap->a_fdvp;
1116	struct vnode *nvp;
1117	struct componentname *tcnp = ap->a_tcnp;
1118	struct componentname *fcnp = ap->a_fcnp;
1119	struct thread *td = fcnp->cn_thread;
1120	struct inode *fip, *tip, *tdp, *fdp;
1121	struct direct newdir;
1122	off_t endoff;
1123	int doingdirectory, newparent;
1124	int error = 0;
1125	struct mount *mp;
1126	ino_t ino;
1127
1128#ifdef INVARIANTS
1129	if ((tcnp->cn_flags & HASBUF) == 0 ||
1130	    (fcnp->cn_flags & HASBUF) == 0)
1131		panic("ufs_rename: no name");
1132#endif
1133	endoff = 0;
1134	mp = tdvp->v_mount;
1135	VOP_UNLOCK(tdvp, 0);
1136	if (tvp && tvp != tdvp)
1137		VOP_UNLOCK(tvp, 0);
1138	/*
1139	 * Check for cross-device rename.
1140	 */
1141	if ((fvp->v_mount != tdvp->v_mount) ||
1142	    (tvp && (fvp->v_mount != tvp->v_mount))) {
1143		error = EXDEV;
1144		mp = NULL;
1145		goto releout;
1146	}
1147relock:
1148	/*
1149	 * We need to acquire 2 to 4 locks depending on whether tvp is NULL
1150	 * and fdvp and tdvp are the same directory.  Subsequently we need
1151	 * to double-check all paths and in the directory rename case we
1152	 * need to verify that we are not creating a directory loop.  To
1153	 * handle this we acquire all but fdvp using non-blocking
1154	 * acquisitions.  If we fail to acquire any lock in the path we will
1155	 * drop all held locks, acquire the new lock in a blocking fashion,
1156	 * and then release it and restart the rename.  This acquire/release
1157	 * step ensures that we do not spin on a lock waiting for release.
1158	 */
1159	error = vn_lock(fdvp, LK_EXCLUSIVE);
1160	if (error)
1161		goto releout;
1162	if (vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
1163		VOP_UNLOCK(fdvp, 0);
1164		error = vn_lock(tdvp, LK_EXCLUSIVE);
1165		if (error)
1166			goto releout;
1167		VOP_UNLOCK(tdvp, 0);
1168		atomic_add_int(&rename_restarts, 1);
1169		goto relock;
1170	}
1171	/*
1172	 * Re-resolve fvp to be certain it still exists and fetch the
1173	 * correct vnode.
1174	 */
1175	error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino);
1176	if (error) {
1177		VOP_UNLOCK(fdvp, 0);
1178		VOP_UNLOCK(tdvp, 0);
1179		goto releout;
1180	}
1181	error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp);
1182	if (error) {
1183		VOP_UNLOCK(fdvp, 0);
1184		VOP_UNLOCK(tdvp, 0);
1185		if (error != EBUSY)
1186			goto releout;
1187		error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp);
1188		if (error != 0)
1189			goto releout;
1190		VOP_UNLOCK(nvp, 0);
1191		vrele(fvp);
1192		fvp = nvp;
1193		atomic_add_int(&rename_restarts, 1);
1194		goto relock;
1195	}
1196	vrele(fvp);
1197	fvp = nvp;
1198	/*
1199	 * Re-resolve tvp and acquire the vnode lock if present.
1200	 */
1201	error = ufs_lookup_ino(tdvp, NULL, tcnp, &ino);
1202	if (error != 0 && error != EJUSTRETURN) {
1203		VOP_UNLOCK(fdvp, 0);
1204		VOP_UNLOCK(tdvp, 0);
1205		VOP_UNLOCK(fvp, 0);
1206		goto releout;
1207	}
1208	/*
1209	 * If tvp disappeared we just carry on.
1210	 */
1211	if (error == EJUSTRETURN && tvp != NULL) {
1212		vrele(tvp);
1213		tvp = NULL;
1214	}
1215	/*
1216	 * Get the tvp ino if the lookup succeeded.  We may have to restart
1217	 * if the non-blocking acquire fails.
1218	 */
1219	if (error == 0) {
1220		nvp = NULL;
1221		error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp);
1222		if (tvp)
1223			vrele(tvp);
1224		tvp = nvp;
1225		if (error) {
1226			VOP_UNLOCK(fdvp, 0);
1227			VOP_UNLOCK(tdvp, 0);
1228			VOP_UNLOCK(fvp, 0);
1229			if (error != EBUSY)
1230				goto releout;
1231			error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp);
1232			if (error != 0)
1233				goto releout;
1234			vput(nvp);
1235			atomic_add_int(&rename_restarts, 1);
1236			goto relock;
1237		}
1238	}
1239	fdp = VTOI(fdvp);
1240	fip = VTOI(fvp);
1241	tdp = VTOI(tdvp);
1242	tip = NULL;
1243	if (tvp)
1244		tip = VTOI(tvp);
1245	if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
1246	    (VTOI(tdvp)->i_flags & APPEND))) {
1247		error = EPERM;
1248		goto unlockout;
1249	}
1250	/*
1251	 * Renaming a file to itself has no effect.  The upper layers should
1252	 * not call us in that case.  However, things could change after
1253	 * we drop the locks above.
1254	 */
1255	if (fvp == tvp) {
1256		error = 0;
1257		goto unlockout;
1258	}
1259	doingdirectory = 0;
1260	newparent = 0;
1261	ino = fip->i_number;
1262	if (fip->i_nlink >= LINK_MAX) {
1263		error = EMLINK;
1264		goto unlockout;
1265	}
1266	if ((fip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))
1267	    || (fdp->i_flags & APPEND)) {
1268		error = EPERM;
1269		goto unlockout;
1270	}
1271	if ((fip->i_mode & IFMT) == IFDIR) {
1272		/*
1273		 * Avoid ".", "..", and aliases of "." for obvious reasons.
1274		 */
1275		if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
1276		    fdp == fip ||
1277		    (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) {
1278			error = EINVAL;
1279			goto unlockout;
1280		}
1281		if (fdp->i_number != tdp->i_number)
1282			newparent = tdp->i_number;
1283		doingdirectory = 1;
1284	}
1285	if ((fvp->v_type == VDIR && fvp->v_mountedhere != NULL) ||
1286	    (tvp != NULL && tvp->v_type == VDIR &&
1287	    tvp->v_mountedhere != NULL)) {
1288		error = EXDEV;
1289		goto unlockout;
1290	}
1291
1292	/*
1293	 * If ".." must be changed (ie the directory gets a new
1294	 * parent) then the source directory must not be in the
1295	 * directory hierarchy above the target, as this would
1296	 * orphan everything below the source directory. Also
1297	 * the user must have write permission in the source so
1298	 * as to be able to change "..".
1299	 */
1300	if (doingdirectory && newparent) {
1301		error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread);
1302		if (error)
1303			goto unlockout;
1304		error = ufs_checkpath(ino, fdp->i_number, tdp, tcnp->cn_cred,
1305		    &ino);
1306		/*
1307		 * We encountered a lock that we have to wait for.  Unlock
1308		 * everything else and VGET before restarting.
1309		 */
1310		if (ino) {
1311			VOP_UNLOCK(fdvp, 0);
1312			VOP_UNLOCK(fvp, 0);
1313			VOP_UNLOCK(tdvp, 0);
1314			if (tvp)
1315				VOP_UNLOCK(tvp, 0);
1316			error = VFS_VGET(mp, ino, LK_SHARED, &nvp);
1317			if (error == 0)
1318				vput(nvp);
1319			atomic_add_int(&rename_restarts, 1);
1320			goto relock;
1321		}
1322		if (error)
1323			goto unlockout;
1324		if ((tcnp->cn_flags & SAVESTART) == 0)
1325			panic("ufs_rename: lost to startdir");
1326	}
1327	if (fip->i_effnlink == 0 || fdp->i_effnlink == 0 ||
1328	    tdp->i_effnlink == 0)
1329		panic("Bad effnlink fip %p, fdp %p, tdp %p", fip, fdp, tdp);
1330
1331	/*
1332	 * 1) Bump link count while we're moving stuff
1333	 *    around.  If we crash somewhere before
1334	 *    completing our work, the link count
1335	 *    may be wrong, but correctable.
1336	 */
1337	fip->i_effnlink++;
1338	fip->i_nlink++;
1339	DIP_SET(fip, i_nlink, fip->i_nlink);
1340	fip->i_flag |= IN_CHANGE;
1341	if (DOINGSOFTDEP(fvp))
1342		softdep_setup_link(tdp, fip);
1343	error = UFS_UPDATE(fvp, !(DOINGSOFTDEP(fvp) | DOINGASYNC(fvp)));
1344	if (error)
1345		goto bad;
1346
1347	/*
1348	 * 2) If target doesn't exist, link the target
1349	 *    to the source and unlink the source.
1350	 *    Otherwise, rewrite the target directory
1351	 *    entry to reference the source inode and
1352	 *    expunge the original entry's existence.
1353	 */
1354	if (tip == NULL) {
1355		if (tdp->i_dev != fip->i_dev)
1356			panic("ufs_rename: EXDEV");
1357		if (doingdirectory && newparent) {
1358			/*
1359			 * Account for ".." in new directory.
1360			 * When source and destination have the same
1361			 * parent we don't adjust the link count.  The
1362			 * actual link modification is completed when
1363			 * .. is rewritten below.
1364			 */
1365			if ((nlink_t)tdp->i_nlink >= LINK_MAX) {
1366				error = EMLINK;
1367				goto bad;
1368			}
1369		}
1370		ufs_makedirentry(fip, tcnp, &newdir);
1371		error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL, 1);
1372		if (error)
1373			goto bad;
1374		/* Setup tdvp for directory compaction if needed. */
1375		if (tdp->i_count && tdp->i_endoff &&
1376		    tdp->i_endoff < tdp->i_size)
1377			endoff = tdp->i_endoff;
1378	} else {
1379		if (tip->i_dev != tdp->i_dev || tip->i_dev != fip->i_dev)
1380			panic("ufs_rename: EXDEV");
1381		/*
1382		 * Short circuit rename(foo, foo).
1383		 */
1384		if (tip->i_number == fip->i_number)
1385			panic("ufs_rename: same file");
1386		/*
1387		 * If the parent directory is "sticky", then the caller
1388		 * must possess VADMIN for the parent directory, or the
1389		 * destination of the rename.  This implements append-only
1390		 * directories.
1391		 */
1392		if ((tdp->i_mode & S_ISTXT) &&
1393		    VOP_ACCESS(tdvp, VADMIN, tcnp->cn_cred, td) &&
1394		    VOP_ACCESS(tvp, VADMIN, tcnp->cn_cred, td)) {
1395			error = EPERM;
1396			goto bad;
1397		}
1398		/*
1399		 * Target must be empty if a directory and have no links
1400		 * to it. Also, ensure source and target are compatible
1401		 * (both directories, or both not directories).
1402		 */
1403		if ((tip->i_mode & IFMT) == IFDIR) {
1404			if ((tip->i_effnlink > 2) ||
1405			    !ufs_dirempty(tip, tdp->i_number, tcnp->cn_cred)) {
1406				error = ENOTEMPTY;
1407				goto bad;
1408			}
1409			if (!doingdirectory) {
1410				error = ENOTDIR;
1411				goto bad;
1412			}
1413			cache_purge(tdvp);
1414		} else if (doingdirectory) {
1415			error = EISDIR;
1416			goto bad;
1417		}
1418		if (doingdirectory) {
1419			if (!newparent) {
1420				tdp->i_effnlink--;
1421				if (DOINGSOFTDEP(tdvp))
1422					softdep_change_linkcnt(tdp);
1423			}
1424			tip->i_effnlink--;
1425			if (DOINGSOFTDEP(tvp))
1426				softdep_change_linkcnt(tip);
1427		}
1428		error = ufs_dirrewrite(tdp, tip, fip->i_number,
1429		    IFTODT(fip->i_mode),
1430		    (doingdirectory && newparent) ? newparent : doingdirectory);
1431		if (error) {
1432			if (doingdirectory) {
1433				if (!newparent) {
1434					tdp->i_effnlink++;
1435					if (DOINGSOFTDEP(tdvp))
1436						softdep_change_linkcnt(tdp);
1437				}
1438				tip->i_effnlink++;
1439				if (DOINGSOFTDEP(tvp))
1440					softdep_change_linkcnt(tip);
1441			}
1442		}
1443		if (doingdirectory && !DOINGSOFTDEP(tvp)) {
1444			/*
1445			 * The only stuff left in the directory is "."
1446			 * and "..". The "." reference is inconsequential
1447			 * since we are quashing it. We have removed the "."
1448			 * reference and the reference in the parent directory,
1449			 * but there may be other hard links. The soft
1450			 * dependency code will arrange to do these operations
1451			 * after the parent directory entry has been deleted on
1452			 * disk, so when running with that code we avoid doing
1453			 * them now.
1454			 */
1455			if (!newparent) {
1456				tdp->i_nlink--;
1457				DIP_SET(tdp, i_nlink, tdp->i_nlink);
1458				tdp->i_flag |= IN_CHANGE;
1459			}
1460			tip->i_nlink--;
1461			DIP_SET(tip, i_nlink, tip->i_nlink);
1462			tip->i_flag |= IN_CHANGE;
1463		}
1464	}
1465
1466	/*
1467	 * 3) Unlink the source.  We have to resolve the path again to
1468	 * fixup the directory offset and count for ufs_dirremove.
1469	 */
1470	if (fdvp == tdvp) {
1471		error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino);
1472		if (error)
1473			panic("ufs_rename: from entry went away!");
1474		if (ino != fip->i_number)
1475			panic("ufs_rename: ino mismatch %ju != %ju\n",
1476			    (uintmax_t)ino, (uintmax_t)fip->i_number);
1477	}
1478	/*
1479	 * If the source is a directory with a
1480	 * new parent, the link count of the old
1481	 * parent directory must be decremented
1482	 * and ".." set to point to the new parent.
1483	 */
1484	if (doingdirectory && newparent) {
1485		/*
1486		 * If tip exists we simply use its link, otherwise we must
1487		 * add a new one.
1488		 */
1489		if (tip == NULL) {
1490			tdp->i_effnlink++;
1491			tdp->i_nlink++;
1492			DIP_SET(tdp, i_nlink, tdp->i_nlink);
1493			tdp->i_flag |= IN_CHANGE;
1494			if (DOINGSOFTDEP(tdvp))
1495				softdep_setup_dotdot_link(tdp, fip);
1496			error = UFS_UPDATE(tdvp, !(DOINGSOFTDEP(tdvp) |
1497						   DOINGASYNC(tdvp)));
1498			/* Don't go to bad here as the new link exists. */
1499			if (error)
1500				goto unlockout;
1501		} else if (DOINGSUJ(tdvp))
1502			/* Journal must account for each new link. */
1503			softdep_setup_dotdot_link(tdp, fip);
1504		fip->i_offset = mastertemplate.dot_reclen;
1505		ufs_dirrewrite(fip, fdp, newparent, DT_DIR, 0);
1506		cache_purge(fdvp);
1507	}
1508	error = ufs_dirremove(fdvp, fip, fcnp->cn_flags, 0);
1509	/*
1510	 * The kern_renameat() looks up the fvp using the DELETE flag, which
1511	 * causes the removal of the name cache entry for fvp.
1512	 * As the relookup of the fvp is done in two steps:
1513	 * ufs_lookup_ino() and then VFS_VGET(), another thread might do a
1514	 * normal lookup of the from name just before the VFS_VGET() call,
1515	 * causing the cache entry to be re-instantiated.
1516	 *
1517	 * The same issue also applies to tvp if it exists as
1518	 * otherwise we may have a stale name cache entry for the new
1519	 * name that references the old i-node if it has other links
1520	 * or open file descriptors.
1521	 */
1522	cache_purge(fvp);
1523	if (tvp)
1524		cache_purge(tvp);
1525	cache_purge_negative(tdvp);
1526
1527unlockout:
1528	vput(fdvp);
1529	vput(fvp);
1530	if (tvp)
1531		vput(tvp);
1532	/*
1533	 * If compaction or fsync was requested do it now that other locks
1534	 * are no longer needed.
1535	 */
1536	if (error == 0 && endoff != 0) {
1537#ifdef UFS_DIRHASH
1538		if (tdp->i_dirhash != NULL)
1539			ufsdirhash_dirtrunc(tdp, endoff);
1540#endif
1541		UFS_TRUNCATE(tdvp, endoff, IO_NORMAL | IO_SYNC, tcnp->cn_cred);
1542	}
1543	if (error == 0 && tdp->i_flag & IN_NEEDSYNC)
1544		error = VOP_FSYNC(tdvp, MNT_WAIT, td);
1545	vput(tdvp);
1546	return (error);
1547
1548bad:
1549	fip->i_effnlink--;
1550	fip->i_nlink--;
1551	DIP_SET(fip, i_nlink, fip->i_nlink);
1552	fip->i_flag |= IN_CHANGE;
1553	if (DOINGSOFTDEP(fvp))
1554		softdep_revert_link(tdp, fip);
1555	goto unlockout;
1556
1557releout:
1558	vrele(fdvp);
1559	vrele(fvp);
1560	vrele(tdvp);
1561	if (tvp)
1562		vrele(tvp);
1563
1564	return (error);
1565}
1566
1567#ifdef UFS_ACL
1568static int
1569ufs_do_posix1e_acl_inheritance_dir(struct vnode *dvp, struct vnode *tvp,
1570    mode_t dmode, struct ucred *cred, struct thread *td)
1571{
1572	int error;
1573	struct inode *ip = VTOI(tvp);
1574	struct acl *dacl, *acl;
1575
1576	acl = acl_alloc(M_WAITOK);
1577	dacl = acl_alloc(M_WAITOK);
1578
1579	/*
1580	 * Retrieve default ACL from parent, if any.
1581	 */
1582	error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td);
1583	switch (error) {
1584	case 0:
1585		/*
1586		 * Retrieved a default ACL, so merge mode and ACL if
1587		 * necessary.  If the ACL is empty, fall through to
1588		 * the "not defined or available" case.
1589		 */
1590		if (acl->acl_cnt != 0) {
1591			dmode = acl_posix1e_newfilemode(dmode, acl);
1592			ip->i_mode = dmode;
1593			DIP_SET(ip, i_mode, dmode);
1594			*dacl = *acl;
1595			ufs_sync_acl_from_inode(ip, acl);
1596			break;
1597		}
1598		/* FALLTHROUGH */
1599
1600	case EOPNOTSUPP:
1601		/*
1602		 * Just use the mode as-is.
1603		 */
1604		ip->i_mode = dmode;
1605		DIP_SET(ip, i_mode, dmode);
1606		error = 0;
1607		goto out;
1608
1609	default:
1610		goto out;
1611	}
1612
1613	/*
1614	 * XXX: If we abort now, will Soft Updates notify the extattr
1615	 * code that the EAs for the file need to be released?
1616	 */
1617	error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td);
1618	if (error == 0)
1619		error = VOP_SETACL(tvp, ACL_TYPE_DEFAULT, dacl, cred, td);
1620	switch (error) {
1621	case 0:
1622		break;
1623
1624	case EOPNOTSUPP:
1625		/*
1626		 * XXX: This should not happen, as EOPNOTSUPP above
1627		 * was supposed to free acl.
1628		 */
1629		printf("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()\n");
1630		/*
1631		panic("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()");
1632		 */
1633		break;
1634
1635	default:
1636		goto out;
1637	}
1638
1639out:
1640	acl_free(acl);
1641	acl_free(dacl);
1642
1643	return (error);
1644}
1645
1646static int
1647ufs_do_posix1e_acl_inheritance_file(struct vnode *dvp, struct vnode *tvp,
1648    mode_t mode, struct ucred *cred, struct thread *td)
1649{
1650	int error;
1651	struct inode *ip = VTOI(tvp);
1652	struct acl *acl;
1653
1654	acl = acl_alloc(M_WAITOK);
1655
1656	/*
1657	 * Retrieve default ACL for parent, if any.
1658	 */
1659	error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td);
1660	switch (error) {
1661	case 0:
1662		/*
1663		 * Retrieved a default ACL, so merge mode and ACL if
1664		 * necessary.
1665		 */
1666		if (acl->acl_cnt != 0) {
1667			/*
1668			 * Two possible ways for default ACL to not
1669			 * be present.  First, the EA can be
1670			 * undefined, or second, the default ACL can
1671			 * be blank.  If it's blank, fall through to
1672			 * the it's not defined case.
1673			 */
1674			mode = acl_posix1e_newfilemode(mode, acl);
1675			ip->i_mode = mode;
1676			DIP_SET(ip, i_mode, mode);
1677			ufs_sync_acl_from_inode(ip, acl);
1678			break;
1679		}
1680		/* FALLTHROUGH */
1681
1682	case EOPNOTSUPP:
1683		/*
1684		 * Just use the mode as-is.
1685		 */
1686		ip->i_mode = mode;
1687		DIP_SET(ip, i_mode, mode);
1688		error = 0;
1689		goto out;
1690
1691	default:
1692		goto out;
1693	}
1694
1695	/*
1696	 * XXX: If we abort now, will Soft Updates notify the extattr
1697	 * code that the EAs for the file need to be released?
1698	 */
1699	error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td);
1700	switch (error) {
1701	case 0:
1702		break;
1703
1704	case EOPNOTSUPP:
1705		/*
1706		 * XXX: This should not happen, as EOPNOTSUPP above was
1707		 * supposed to free acl.
1708		 */
1709		printf("ufs_makeinode: VOP_GETACL() but no "
1710		    "VOP_SETACL()\n");
1711		/* panic("ufs_makeinode: VOP_GETACL() but no "
1712		    "VOP_SETACL()"); */
1713		break;
1714
1715	default:
1716		goto out;
1717	}
1718
1719out:
1720	acl_free(acl);
1721
1722	return (error);
1723}
1724
1725static int
1726ufs_do_nfs4_acl_inheritance(struct vnode *dvp, struct vnode *tvp,
1727    mode_t child_mode, struct ucred *cred, struct thread *td)
1728{
1729	int error;
1730	struct acl *parent_aclp, *child_aclp;
1731
1732	parent_aclp = acl_alloc(M_WAITOK);
1733	child_aclp = acl_alloc(M_WAITOK | M_ZERO);
1734
1735	error = ufs_getacl_nfs4_internal(dvp, parent_aclp, td);
1736	if (error)
1737		goto out;
1738	acl_nfs4_compute_inherited_acl(parent_aclp, child_aclp,
1739	    child_mode, VTOI(tvp)->i_uid, tvp->v_type == VDIR);
1740	error = ufs_setacl_nfs4_internal(tvp, child_aclp, td);
1741	if (error)
1742		goto out;
1743out:
1744	acl_free(parent_aclp);
1745	acl_free(child_aclp);
1746
1747	return (error);
1748}
1749#endif
1750
1751/*
1752 * Mkdir system call
1753 */
1754static int
1755ufs_mkdir(ap)
1756	struct vop_mkdir_args /* {
1757		struct vnode *a_dvp;
1758		struct vnode **a_vpp;
1759		struct componentname *a_cnp;
1760		struct vattr *a_vap;
1761	} */ *ap;
1762{
1763	struct vnode *dvp = ap->a_dvp;
1764	struct vattr *vap = ap->a_vap;
1765	struct componentname *cnp = ap->a_cnp;
1766	struct inode *ip, *dp;
1767	struct vnode *tvp;
1768	struct buf *bp;
1769	struct dirtemplate dirtemplate, *dtp;
1770	struct direct newdir;
1771	int error, dmode;
1772	long blkoff;
1773
1774#ifdef INVARIANTS
1775	if ((cnp->cn_flags & HASBUF) == 0)
1776		panic("ufs_mkdir: no name");
1777#endif
1778	dp = VTOI(dvp);
1779	if ((nlink_t)dp->i_nlink >= LINK_MAX) {
1780		error = EMLINK;
1781		goto out;
1782	}
1783	dmode = vap->va_mode & 0777;
1784	dmode |= IFDIR;
1785	/*
1786	 * Must simulate part of ufs_makeinode here to acquire the inode,
1787	 * but not have it entered in the parent directory. The entry is
1788	 * made later after writing "." and ".." entries.
1789	 */
1790	error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, &tvp);
1791	if (error)
1792		goto out;
1793	ip = VTOI(tvp);
1794	ip->i_gid = dp->i_gid;
1795	DIP_SET(ip, i_gid, dp->i_gid);
1796#ifdef SUIDDIR
1797	{
1798#ifdef QUOTA
1799		struct ucred ucred, *ucp;
1800		gid_t ucred_group;
1801		ucp = cnp->cn_cred;
1802#endif
1803		/*
1804		 * If we are hacking owners here, (only do this where told to)
1805		 * and we are not giving it TO root, (would subvert quotas)
1806		 * then go ahead and give it to the other user.
1807		 * The new directory also inherits the SUID bit.
1808		 * If user's UID and dir UID are the same,
1809		 * 'give it away' so that the SUID is still forced on.
1810		 */
1811		if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
1812		    (dp->i_mode & ISUID) && dp->i_uid) {
1813			dmode |= ISUID;
1814			ip->i_uid = dp->i_uid;
1815			DIP_SET(ip, i_uid, dp->i_uid);
1816#ifdef QUOTA
1817			if (dp->i_uid != cnp->cn_cred->cr_uid) {
1818				/*
1819				 * Make sure the correct user gets charged
1820				 * for the space.
1821				 * Make a dummy credential for the victim.
1822				 * XXX This seems to never be accessed out of
1823				 * our context so a stack variable is ok.
1824				 */
1825				refcount_init(&ucred.cr_ref, 1);
1826				ucred.cr_uid = ip->i_uid;
1827				ucred.cr_ngroups = 1;
1828				ucred.cr_groups = &ucred_group;
1829				ucred.cr_groups[0] = dp->i_gid;
1830				ucp = &ucred;
1831			}
1832#endif
1833		} else {
1834			ip->i_uid = cnp->cn_cred->cr_uid;
1835			DIP_SET(ip, i_uid, ip->i_uid);
1836		}
1837#ifdef QUOTA
1838		if ((error = getinoquota(ip)) ||
1839	    	    (error = chkiq(ip, 1, ucp, 0))) {
1840			if (DOINGSOFTDEP(tvp))
1841				softdep_revert_link(dp, ip);
1842			UFS_VFREE(tvp, ip->i_number, dmode);
1843			vput(tvp);
1844			return (error);
1845		}
1846#endif
1847	}
1848#else	/* !SUIDDIR */
1849	ip->i_uid = cnp->cn_cred->cr_uid;
1850	DIP_SET(ip, i_uid, ip->i_uid);
1851#ifdef QUOTA
1852	if ((error = getinoquota(ip)) ||
1853	    (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
1854		if (DOINGSOFTDEP(tvp))
1855			softdep_revert_link(dp, ip);
1856		UFS_VFREE(tvp, ip->i_number, dmode);
1857		vput(tvp);
1858		return (error);
1859	}
1860#endif
1861#endif	/* !SUIDDIR */
1862	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
1863	ip->i_mode = dmode;
1864	DIP_SET(ip, i_mode, dmode);
1865	tvp->v_type = VDIR;	/* Rest init'd in getnewvnode(). */
1866	ip->i_effnlink = 2;
1867	ip->i_nlink = 2;
1868	DIP_SET(ip, i_nlink, 2);
1869
1870	if (cnp->cn_flags & ISWHITEOUT) {
1871		ip->i_flags |= UF_OPAQUE;
1872		DIP_SET(ip, i_flags, ip->i_flags);
1873	}
1874
1875	/*
1876	 * Bump link count in parent directory to reflect work done below.
1877	 * Should be done before reference is created so cleanup is
1878	 * possible if we crash.
1879	 */
1880	dp->i_effnlink++;
1881	dp->i_nlink++;
1882	DIP_SET(dp, i_nlink, dp->i_nlink);
1883	dp->i_flag |= IN_CHANGE;
1884	if (DOINGSOFTDEP(dvp))
1885		softdep_setup_mkdir(dp, ip);
1886	error = UFS_UPDATE(dvp, !(DOINGSOFTDEP(dvp) | DOINGASYNC(dvp)));
1887	if (error)
1888		goto bad;
1889#ifdef MAC
1890	if (dvp->v_mount->mnt_flag & MNT_MULTILABEL) {
1891		error = mac_vnode_create_extattr(cnp->cn_cred, dvp->v_mount,
1892		    dvp, tvp, cnp);
1893		if (error)
1894			goto bad;
1895	}
1896#endif
1897#ifdef UFS_ACL
1898	if (dvp->v_mount->mnt_flag & MNT_ACLS) {
1899		error = ufs_do_posix1e_acl_inheritance_dir(dvp, tvp, dmode,
1900		    cnp->cn_cred, cnp->cn_thread);
1901		if (error)
1902			goto bad;
1903	} else if (dvp->v_mount->mnt_flag & MNT_NFS4ACLS) {
1904		error = ufs_do_nfs4_acl_inheritance(dvp, tvp, dmode,
1905		    cnp->cn_cred, cnp->cn_thread);
1906		if (error)
1907			goto bad;
1908	}
1909#endif /* !UFS_ACL */
1910
1911	/*
1912	 * Initialize directory with "." and ".." from static template.
1913	 */
1914	if (dvp->v_mount->mnt_maxsymlinklen > 0)
1915		dtp = &mastertemplate;
1916	else
1917		dtp = (struct dirtemplate *)&omastertemplate;
1918	dirtemplate = *dtp;
1919	dirtemplate.dot_ino = ip->i_number;
1920	dirtemplate.dotdot_ino = dp->i_number;
1921	if ((error = UFS_BALLOC(tvp, (off_t)0, DIRBLKSIZ, cnp->cn_cred,
1922	    BA_CLRBUF, &bp)) != 0)
1923		goto bad;
1924	ip->i_size = DIRBLKSIZ;
1925	DIP_SET(ip, i_size, DIRBLKSIZ);
1926	ip->i_flag |= IN_CHANGE | IN_UPDATE;
1927	vnode_pager_setsize(tvp, (u_long)ip->i_size);
1928	bcopy((caddr_t)&dirtemplate, (caddr_t)bp->b_data, sizeof dirtemplate);
1929	if (DOINGSOFTDEP(tvp)) {
1930		/*
1931		 * Ensure that the entire newly allocated block is a
1932		 * valid directory so that future growth within the
1933		 * block does not have to ensure that the block is
1934		 * written before the inode.
1935		 */
1936		blkoff = DIRBLKSIZ;
1937		while (blkoff < bp->b_bcount) {
1938			((struct direct *)
1939			   (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ;
1940			blkoff += DIRBLKSIZ;
1941		}
1942	}
1943	if ((error = UFS_UPDATE(tvp, !(DOINGSOFTDEP(tvp) |
1944				       DOINGASYNC(tvp)))) != 0) {
1945		(void)bwrite(bp);
1946		goto bad;
1947	}
1948	/*
1949	 * Directory set up, now install its entry in the parent directory.
1950	 *
1951	 * If we are not doing soft dependencies, then we must write out the
1952	 * buffer containing the new directory body before entering the new
1953	 * name in the parent. If we are doing soft dependencies, then the
1954	 * buffer containing the new directory body will be passed to and
1955	 * released in the soft dependency code after the code has attached
1956	 * an appropriate ordering dependency to the buffer which ensures that
1957	 * the buffer is written before the new name is written in the parent.
1958	 */
1959	if (DOINGASYNC(dvp))
1960		bdwrite(bp);
1961	else if (!DOINGSOFTDEP(dvp) && ((error = bwrite(bp))))
1962		goto bad;
1963	ufs_makedirentry(ip, cnp, &newdir);
1964	error = ufs_direnter(dvp, tvp, &newdir, cnp, bp, 0);
1965
1966bad:
1967	if (error == 0) {
1968		*ap->a_vpp = tvp;
1969	} else {
1970		dp->i_effnlink--;
1971		dp->i_nlink--;
1972		DIP_SET(dp, i_nlink, dp->i_nlink);
1973		dp->i_flag |= IN_CHANGE;
1974		/*
1975		 * No need to do an explicit VOP_TRUNCATE here, vrele will
1976		 * do this for us because we set the link count to 0.
1977		 */
1978		ip->i_effnlink = 0;
1979		ip->i_nlink = 0;
1980		DIP_SET(ip, i_nlink, 0);
1981		ip->i_flag |= IN_CHANGE;
1982		if (DOINGSOFTDEP(tvp))
1983			softdep_revert_mkdir(dp, ip);
1984
1985		vput(tvp);
1986	}
1987out:
1988	return (error);
1989}
1990
1991/*
1992 * Rmdir system call.
1993 */
1994static int
1995ufs_rmdir(ap)
1996	struct vop_rmdir_args /* {
1997		struct vnode *a_dvp;
1998		struct vnode *a_vp;
1999		struct componentname *a_cnp;
2000	} */ *ap;
2001{
2002	struct vnode *vp = ap->a_vp;
2003	struct vnode *dvp = ap->a_dvp;
2004	struct componentname *cnp = ap->a_cnp;
2005	struct inode *ip, *dp;
2006	int error;
2007
2008	ip = VTOI(vp);
2009	dp = VTOI(dvp);
2010
2011	/*
2012	 * Do not remove a directory that is in the process of being renamed.
2013	 * Verify the directory is empty (and valid). Rmdir ".." will not be
2014	 * valid since ".." will contain a reference to the current directory
2015	 * and thus be non-empty. Do not allow the removal of mounted on
2016	 * directories (this can happen when an NFS exported filesystem
2017	 * tries to remove a locally mounted on directory).
2018	 */
2019	error = 0;
2020	if (ip->i_effnlink < 2) {
2021		error = EINVAL;
2022		goto out;
2023	}
2024	if (dp->i_effnlink < 3)
2025		panic("ufs_dirrem: Bad link count %d on parent",
2026		    dp->i_effnlink);
2027	if (!ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) {
2028		error = ENOTEMPTY;
2029		goto out;
2030	}
2031	if ((dp->i_flags & APPEND)
2032	    || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
2033		error = EPERM;
2034		goto out;
2035	}
2036	if (vp->v_mountedhere != 0) {
2037		error = EINVAL;
2038		goto out;
2039	}
2040#ifdef UFS_GJOURNAL
2041	ufs_gjournal_orphan(vp);
2042#endif
2043	/*
2044	 * Delete reference to directory before purging
2045	 * inode.  If we crash in between, the directory
2046	 * will be reattached to lost+found,
2047	 */
2048	dp->i_effnlink--;
2049	ip->i_effnlink--;
2050	if (DOINGSOFTDEP(vp))
2051		softdep_setup_rmdir(dp, ip);
2052	error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1);
2053	if (error) {
2054		dp->i_effnlink++;
2055		ip->i_effnlink++;
2056		if (DOINGSOFTDEP(vp))
2057			softdep_revert_rmdir(dp, ip);
2058		goto out;
2059	}
2060	cache_purge(dvp);
2061	/*
2062	 * The only stuff left in the directory is "." and "..". The "."
2063	 * reference is inconsequential since we are quashing it. The soft
2064	 * dependency code will arrange to do these operations after
2065	 * the parent directory entry has been deleted on disk, so
2066	 * when running with that code we avoid doing them now.
2067	 */
2068	if (!DOINGSOFTDEP(vp)) {
2069		dp->i_nlink--;
2070		DIP_SET(dp, i_nlink, dp->i_nlink);
2071		dp->i_flag |= IN_CHANGE;
2072		error = UFS_UPDATE(dvp, 0);
2073		ip->i_nlink--;
2074		DIP_SET(ip, i_nlink, ip->i_nlink);
2075		ip->i_flag |= IN_CHANGE;
2076	}
2077	cache_purge(vp);
2078#ifdef UFS_DIRHASH
2079	/* Kill any active hash; i_effnlink == 0, so it will not come back. */
2080	if (ip->i_dirhash != NULL)
2081		ufsdirhash_free(ip);
2082#endif
2083out:
2084	return (error);
2085}
2086
2087/*
2088 * symlink -- make a symbolic link
2089 */
2090static int
2091ufs_symlink(ap)
2092	struct vop_symlink_args /* {
2093		struct vnode *a_dvp;
2094		struct vnode **a_vpp;
2095		struct componentname *a_cnp;
2096		struct vattr *a_vap;
2097		char *a_target;
2098	} */ *ap;
2099{
2100	struct vnode *vp, **vpp = ap->a_vpp;
2101	struct inode *ip;
2102	int len, error;
2103
2104	error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp,
2105	    vpp, ap->a_cnp);
2106	if (error)
2107		return (error);
2108	vp = *vpp;
2109	len = strlen(ap->a_target);
2110	if (len < vp->v_mount->mnt_maxsymlinklen) {
2111		ip = VTOI(vp);
2112		bcopy(ap->a_target, SHORTLINK(ip), len);
2113		ip->i_size = len;
2114		DIP_SET(ip, i_size, len);
2115		ip->i_flag |= IN_CHANGE | IN_UPDATE;
2116		error = UFS_UPDATE(vp, 0);
2117	} else
2118		error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0,
2119		    UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK,
2120		    ap->a_cnp->cn_cred, NOCRED, NULL, NULL);
2121	if (error)
2122		vput(vp);
2123	return (error);
2124}
2125
2126/*
2127 * Vnode op for reading directories.
2128 */
2129int
2130ufs_readdir(ap)
2131	struct vop_readdir_args /* {
2132		struct vnode *a_vp;
2133		struct uio *a_uio;
2134		struct ucred *a_cred;
2135		int *a_eofflag;
2136		int *a_ncookies;
2137		u_long **a_cookies;
2138	} */ *ap;
2139{
2140	struct vnode *vp = ap->a_vp;
2141	struct uio *uio = ap->a_uio;
2142	struct buf *bp;
2143	struct inode *ip;
2144	struct direct *dp, *edp;
2145	u_long *cookies;
2146	struct dirent dstdp;
2147	off_t offset, startoffset;
2148	size_t readcnt, skipcnt;
2149	ssize_t startresid;
2150	int ncookies;
2151	int error;
2152
2153	if (uio->uio_offset < 0)
2154		return (EINVAL);
2155	ip = VTOI(vp);
2156	if (ip->i_effnlink == 0)
2157		return (0);
2158	if (ap->a_ncookies != NULL) {
2159		ncookies = uio->uio_resid;
2160		if (uio->uio_offset >= ip->i_size)
2161			ncookies = 0;
2162		else if (ip->i_size - uio->uio_offset < ncookies)
2163			ncookies = ip->i_size - uio->uio_offset;
2164		ncookies = ncookies / (offsetof(struct direct, d_name) + 4) + 1;
2165		cookies = malloc(ncookies * sizeof(*cookies), M_TEMP, M_WAITOK);
2166		*ap->a_ncookies = ncookies;
2167		*ap->a_cookies = cookies;
2168	} else {
2169		ncookies = 0;
2170		cookies = NULL;
2171	}
2172	offset = startoffset = uio->uio_offset;
2173	startresid = uio->uio_resid;
2174	error = 0;
2175	while (error == 0 && uio->uio_resid > 0 &&
2176	    uio->uio_offset < ip->i_size) {
2177		error = ffs_blkatoff(vp, uio->uio_offset, NULL, &bp);
2178		if (error)
2179			break;
2180		if (bp->b_offset + bp->b_bcount > ip->i_size)
2181			readcnt = ip->i_size - bp->b_offset;
2182		else
2183			readcnt = bp->b_bcount;
2184		skipcnt = (size_t)(uio->uio_offset - bp->b_offset) &
2185		    ~(size_t)(DIRBLKSIZ - 1);
2186		offset = bp->b_offset + skipcnt;
2187		dp = (struct direct *)&bp->b_data[skipcnt];
2188		edp = (struct direct *)&bp->b_data[readcnt];
2189		while (error == 0 && uio->uio_resid > 0 && dp < edp) {
2190			if (dp->d_reclen <= offsetof(struct direct, d_name) ||
2191			    (caddr_t)dp + dp->d_reclen > (caddr_t)edp) {
2192				error = EIO;
2193				break;
2194			}
2195#if BYTE_ORDER == LITTLE_ENDIAN
2196			/* Old filesystem format. */
2197			if (vp->v_mount->mnt_maxsymlinklen <= 0) {
2198				dstdp.d_namlen = dp->d_type;
2199				dstdp.d_type = dp->d_namlen;
2200			} else
2201#endif
2202			{
2203				dstdp.d_namlen = dp->d_namlen;
2204				dstdp.d_type = dp->d_type;
2205			}
2206			if (offsetof(struct direct, d_name) + dstdp.d_namlen >
2207			    dp->d_reclen) {
2208				error = EIO;
2209				break;
2210			}
2211			if (offset < startoffset || dp->d_ino == 0)
2212				goto nextentry;
2213			dstdp.d_fileno = dp->d_ino;
2214			dstdp.d_reclen = GENERIC_DIRSIZ(&dstdp);
2215			bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen);
2216			dstdp.d_name[dstdp.d_namlen] = '\0';
2217			if (dstdp.d_reclen > uio->uio_resid) {
2218				if (uio->uio_resid == startresid)
2219					error = EINVAL;
2220				else
2221					error = EJUSTRETURN;
2222				break;
2223			}
2224			/* Advance dp. */
2225			error = uiomove((caddr_t)&dstdp, dstdp.d_reclen, uio);
2226			if (error)
2227				break;
2228			if (cookies != NULL) {
2229				KASSERT(ncookies > 0,
2230				    ("ufs_readdir: cookies buffer too small"));
2231				*cookies = offset + dp->d_reclen;
2232				cookies++;
2233				ncookies--;
2234			}
2235nextentry:
2236			offset += dp->d_reclen;
2237			dp = (struct direct *)((caddr_t)dp + dp->d_reclen);
2238		}
2239		bqrelse(bp);
2240		uio->uio_offset = offset;
2241	}
2242	/* We need to correct uio_offset. */
2243	uio->uio_offset = offset;
2244	if (error == EJUSTRETURN)
2245		error = 0;
2246	if (ap->a_ncookies != NULL) {
2247		if (error == 0) {
2248			ap->a_ncookies -= ncookies;
2249		} else {
2250			free(*ap->a_cookies, M_TEMP);
2251			*ap->a_ncookies = 0;
2252			*ap->a_cookies = NULL;
2253		}
2254	}
2255	if (error == 0 && ap->a_eofflag)
2256		*ap->a_eofflag = ip->i_size <= uio->uio_offset;
2257	return (error);
2258}
2259
2260/*
2261 * Return target name of a symbolic link
2262 */
2263static int
2264ufs_readlink(ap)
2265	struct vop_readlink_args /* {
2266		struct vnode *a_vp;
2267		struct uio *a_uio;
2268		struct ucred *a_cred;
2269	} */ *ap;
2270{
2271	struct vnode *vp = ap->a_vp;
2272	struct inode *ip = VTOI(vp);
2273	doff_t isize;
2274
2275	isize = ip->i_size;
2276	if ((isize < vp->v_mount->mnt_maxsymlinklen) ||
2277	    DIP(ip, i_blocks) == 0) { /* XXX - for old fastlink support */
2278		return (uiomove(SHORTLINK(ip), isize, ap->a_uio));
2279	}
2280	return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred));
2281}
2282
2283/*
2284 * Calculate the logical to physical mapping if not done already,
2285 * then call the device strategy routine.
2286 *
2287 * In order to be able to swap to a file, the ufs_bmaparray() operation may not
2288 * deadlock on memory.  See ufs_bmap() for details.
2289 */
2290static int
2291ufs_strategy(ap)
2292	struct vop_strategy_args /* {
2293		struct vnode *a_vp;
2294		struct buf *a_bp;
2295	} */ *ap;
2296{
2297	struct buf *bp = ap->a_bp;
2298	struct vnode *vp = ap->a_vp;
2299	struct bufobj *bo;
2300	struct inode *ip;
2301	ufs2_daddr_t blkno;
2302	int error;
2303
2304	ip = VTOI(vp);
2305	if (bp->b_blkno == bp->b_lblkno) {
2306		error = ufs_bmaparray(vp, bp->b_lblkno, &blkno, bp, NULL, NULL);
2307		bp->b_blkno = blkno;
2308		if (error) {
2309			bp->b_error = error;
2310			bp->b_ioflags |= BIO_ERROR;
2311			bufdone(bp);
2312			return (0);
2313		}
2314		if ((long)bp->b_blkno == -1)
2315			vfs_bio_clrbuf(bp);
2316	}
2317	if ((long)bp->b_blkno == -1) {
2318		bufdone(bp);
2319		return (0);
2320	}
2321	bp->b_iooffset = dbtob(bp->b_blkno);
2322	bo = ip->i_umbufobj;
2323	BO_STRATEGY(bo, bp);
2324	return (0);
2325}
2326
2327/*
2328 * Print out the contents of an inode.
2329 */
2330static int
2331ufs_print(ap)
2332	struct vop_print_args /* {
2333		struct vnode *a_vp;
2334	} */ *ap;
2335{
2336	struct vnode *vp = ap->a_vp;
2337	struct inode *ip = VTOI(vp);
2338
2339	printf("\tino %lu, on dev %s", (u_long)ip->i_number,
2340	    devtoname(ip->i_dev));
2341	if (vp->v_type == VFIFO)
2342		fifo_printinfo(vp);
2343	printf("\n");
2344	return (0);
2345}
2346
2347/*
2348 * Close wrapper for fifos.
2349 *
2350 * Update the times on the inode then do device close.
2351 */
2352static int
2353ufsfifo_close(ap)
2354	struct vop_close_args /* {
2355		struct vnode *a_vp;
2356		int  a_fflag;
2357		struct ucred *a_cred;
2358		struct thread *a_td;
2359	} */ *ap;
2360{
2361	struct vnode *vp = ap->a_vp;
2362	int usecount;
2363
2364	VI_LOCK(vp);
2365	usecount = vp->v_usecount;
2366	if (usecount > 1)
2367		ufs_itimes_locked(vp);
2368	VI_UNLOCK(vp);
2369	return (fifo_specops.vop_close(ap));
2370}
2371
2372/*
2373 * Kqfilter wrapper for fifos.
2374 *
2375 * Fall through to ufs kqfilter routines if needed
2376 */
2377static int
2378ufsfifo_kqfilter(ap)
2379	struct vop_kqfilter_args *ap;
2380{
2381	int error;
2382
2383	error = fifo_specops.vop_kqfilter(ap);
2384	if (error)
2385		error = vfs_kqfilter(ap);
2386	return (error);
2387}
2388
2389/*
2390 * Return POSIX pathconf information applicable to fifos.
2391 */
2392static int
2393ufsfifo_pathconf(ap)
2394	struct vop_pathconf_args /* {
2395		struct vnode *a_vp;
2396		int a_name;
2397		int *a_retval;
2398	} */ *ap;
2399{
2400
2401	switch (ap->a_name) {
2402	case _PC_ACL_EXTENDED:
2403	case _PC_ACL_NFS4:
2404	case _PC_ACL_PATH_MAX:
2405	case _PC_MAC_PRESENT:
2406		return (ufs_pathconf(ap));
2407	default:
2408		return (fifo_specops.vop_pathconf(ap));
2409	}
2410	/* NOTREACHED */
2411}
2412
2413/*
2414 * Return POSIX pathconf information applicable to ufs filesystems.
2415 */
2416static int
2417ufs_pathconf(ap)
2418	struct vop_pathconf_args /* {
2419		struct vnode *a_vp;
2420		int a_name;
2421		int *a_retval;
2422	} */ *ap;
2423{
2424	int error;
2425
2426	error = 0;
2427	switch (ap->a_name) {
2428	case _PC_LINK_MAX:
2429		*ap->a_retval = LINK_MAX;
2430		break;
2431	case _PC_NAME_MAX:
2432		*ap->a_retval = NAME_MAX;
2433		break;
2434	case _PC_PATH_MAX:
2435		*ap->a_retval = PATH_MAX;
2436		break;
2437	case _PC_PIPE_BUF:
2438		*ap->a_retval = PIPE_BUF;
2439		break;
2440	case _PC_CHOWN_RESTRICTED:
2441		*ap->a_retval = 1;
2442		break;
2443	case _PC_NO_TRUNC:
2444		*ap->a_retval = 1;
2445		break;
2446	case _PC_ACL_EXTENDED:
2447#ifdef UFS_ACL
2448		if (ap->a_vp->v_mount->mnt_flag & MNT_ACLS)
2449			*ap->a_retval = 1;
2450		else
2451			*ap->a_retval = 0;
2452#else
2453		*ap->a_retval = 0;
2454#endif
2455		break;
2456
2457	case _PC_ACL_NFS4:
2458#ifdef UFS_ACL
2459		if (ap->a_vp->v_mount->mnt_flag & MNT_NFS4ACLS)
2460			*ap->a_retval = 1;
2461		else
2462			*ap->a_retval = 0;
2463#else
2464		*ap->a_retval = 0;
2465#endif
2466		break;
2467
2468	case _PC_ACL_PATH_MAX:
2469#ifdef UFS_ACL
2470		if (ap->a_vp->v_mount->mnt_flag & (MNT_ACLS | MNT_NFS4ACLS))
2471			*ap->a_retval = ACL_MAX_ENTRIES;
2472		else
2473			*ap->a_retval = 3;
2474#else
2475		*ap->a_retval = 3;
2476#endif
2477		break;
2478	case _PC_MAC_PRESENT:
2479#ifdef MAC
2480		if (ap->a_vp->v_mount->mnt_flag & MNT_MULTILABEL)
2481			*ap->a_retval = 1;
2482		else
2483			*ap->a_retval = 0;
2484#else
2485		*ap->a_retval = 0;
2486#endif
2487		break;
2488	case _PC_MIN_HOLE_SIZE:
2489		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
2490		break;
2491	case _PC_ASYNC_IO:
2492		/* _PC_ASYNC_IO should have been handled by upper layers. */
2493		KASSERT(0, ("_PC_ASYNC_IO should not get here"));
2494		error = EINVAL;
2495		break;
2496	case _PC_PRIO_IO:
2497		*ap->a_retval = 0;
2498		break;
2499	case _PC_SYNC_IO:
2500		*ap->a_retval = 0;
2501		break;
2502	case _PC_ALLOC_SIZE_MIN:
2503		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize;
2504		break;
2505	case _PC_FILESIZEBITS:
2506		*ap->a_retval = 64;
2507		break;
2508	case _PC_REC_INCR_XFER_SIZE:
2509		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
2510		break;
2511	case _PC_REC_MAX_XFER_SIZE:
2512		*ap->a_retval = -1; /* means ``unlimited'' */
2513		break;
2514	case _PC_REC_MIN_XFER_SIZE:
2515		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
2516		break;
2517	case _PC_REC_XFER_ALIGN:
2518		*ap->a_retval = PAGE_SIZE;
2519		break;
2520	case _PC_SYMLINK_MAX:
2521		*ap->a_retval = MAXPATHLEN;
2522		break;
2523
2524	default:
2525		error = EINVAL;
2526		break;
2527	}
2528	return (error);
2529}
2530
2531/*
2532 * Initialize the vnode associated with a new inode, handle aliased
2533 * vnodes.
2534 */
2535int
2536ufs_vinit(mntp, fifoops, vpp)
2537	struct mount *mntp;
2538	struct vop_vector *fifoops;
2539	struct vnode **vpp;
2540{
2541	struct inode *ip;
2542	struct vnode *vp;
2543
2544	vp = *vpp;
2545	ip = VTOI(vp);
2546	vp->v_type = IFTOVT(ip->i_mode);
2547	if (vp->v_type == VFIFO)
2548		vp->v_op = fifoops;
2549	ASSERT_VOP_LOCKED(vp, "ufs_vinit");
2550	if (ip->i_number == ROOTINO)
2551		vp->v_vflag |= VV_ROOT;
2552	*vpp = vp;
2553	return (0);
2554}
2555
2556/*
2557 * Allocate a new inode.
2558 * Vnode dvp must be locked.
2559 */
2560static int
2561ufs_makeinode(mode, dvp, vpp, cnp)
2562	int mode;
2563	struct vnode *dvp;
2564	struct vnode **vpp;
2565	struct componentname *cnp;
2566{
2567	struct inode *ip, *pdir;
2568	struct direct newdir;
2569	struct vnode *tvp;
2570	int error;
2571
2572	pdir = VTOI(dvp);
2573#ifdef INVARIANTS
2574	if ((cnp->cn_flags & HASBUF) == 0)
2575		panic("ufs_makeinode: no name");
2576#endif
2577	*vpp = NULL;
2578	if ((mode & IFMT) == 0)
2579		mode |= IFREG;
2580
2581	if (VTOI(dvp)->i_effnlink < 2)
2582		panic("ufs_makeinode: Bad link count %d on parent",
2583		    VTOI(dvp)->i_effnlink);
2584	error = UFS_VALLOC(dvp, mode, cnp->cn_cred, &tvp);
2585	if (error)
2586		return (error);
2587	ip = VTOI(tvp);
2588	ip->i_gid = pdir->i_gid;
2589	DIP_SET(ip, i_gid, pdir->i_gid);
2590#ifdef SUIDDIR
2591	{
2592#ifdef QUOTA
2593		struct ucred ucred, *ucp;
2594		gid_t ucred_group;
2595		ucp = cnp->cn_cred;
2596#endif
2597		/*
2598		 * If we are not the owner of the directory,
2599		 * and we are hacking owners here, (only do this where told to)
2600		 * and we are not giving it TO root, (would subvert quotas)
2601		 * then go ahead and give it to the other user.
2602		 * Note that this drops off the execute bits for security.
2603		 */
2604		if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
2605		    (pdir->i_mode & ISUID) &&
2606		    (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) {
2607			ip->i_uid = pdir->i_uid;
2608			DIP_SET(ip, i_uid, ip->i_uid);
2609			mode &= ~07111;
2610#ifdef QUOTA
2611			/*
2612			 * Make sure the correct user gets charged
2613			 * for the space.
2614			 * Quickly knock up a dummy credential for the victim.
2615			 * XXX This seems to never be accessed out of our
2616			 * context so a stack variable is ok.
2617			 */
2618			refcount_init(&ucred.cr_ref, 1);
2619			ucred.cr_uid = ip->i_uid;
2620			ucred.cr_ngroups = 1;
2621			ucred.cr_groups = &ucred_group;
2622			ucred.cr_groups[0] = pdir->i_gid;
2623			ucp = &ucred;
2624#endif
2625		} else {
2626			ip->i_uid = cnp->cn_cred->cr_uid;
2627			DIP_SET(ip, i_uid, ip->i_uid);
2628		}
2629
2630#ifdef QUOTA
2631		if ((error = getinoquota(ip)) ||
2632	    	    (error = chkiq(ip, 1, ucp, 0))) {
2633			if (DOINGSOFTDEP(tvp))
2634				softdep_revert_link(pdir, ip);
2635			UFS_VFREE(tvp, ip->i_number, mode);
2636			vput(tvp);
2637			return (error);
2638		}
2639#endif
2640	}
2641#else	/* !SUIDDIR */
2642	ip->i_uid = cnp->cn_cred->cr_uid;
2643	DIP_SET(ip, i_uid, ip->i_uid);
2644#ifdef QUOTA
2645	if ((error = getinoquota(ip)) ||
2646	    (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
2647		if (DOINGSOFTDEP(tvp))
2648			softdep_revert_link(pdir, ip);
2649		UFS_VFREE(tvp, ip->i_number, mode);
2650		vput(tvp);
2651		return (error);
2652	}
2653#endif
2654#endif	/* !SUIDDIR */
2655	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
2656	ip->i_mode = mode;
2657	DIP_SET(ip, i_mode, mode);
2658	tvp->v_type = IFTOVT(mode);	/* Rest init'd in getnewvnode(). */
2659	ip->i_effnlink = 1;
2660	ip->i_nlink = 1;
2661	DIP_SET(ip, i_nlink, 1);
2662	if (DOINGSOFTDEP(tvp))
2663		softdep_setup_create(VTOI(dvp), ip);
2664	if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) &&
2665	    priv_check_cred(cnp->cn_cred, PRIV_VFS_SETGID, 0)) {
2666		ip->i_mode &= ~ISGID;
2667		DIP_SET(ip, i_mode, ip->i_mode);
2668	}
2669
2670	if (cnp->cn_flags & ISWHITEOUT) {
2671		ip->i_flags |= UF_OPAQUE;
2672		DIP_SET(ip, i_flags, ip->i_flags);
2673	}
2674
2675	/*
2676	 * Make sure inode goes to disk before directory entry.
2677	 */
2678	error = UFS_UPDATE(tvp, !(DOINGSOFTDEP(tvp) | DOINGASYNC(tvp)));
2679	if (error)
2680		goto bad;
2681#ifdef MAC
2682	if (dvp->v_mount->mnt_flag & MNT_MULTILABEL) {
2683		error = mac_vnode_create_extattr(cnp->cn_cred, dvp->v_mount,
2684		    dvp, tvp, cnp);
2685		if (error)
2686			goto bad;
2687	}
2688#endif
2689#ifdef UFS_ACL
2690	if (dvp->v_mount->mnt_flag & MNT_ACLS) {
2691		error = ufs_do_posix1e_acl_inheritance_file(dvp, tvp, mode,
2692		    cnp->cn_cred, cnp->cn_thread);
2693		if (error)
2694			goto bad;
2695	} else if (dvp->v_mount->mnt_flag & MNT_NFS4ACLS) {
2696		error = ufs_do_nfs4_acl_inheritance(dvp, tvp, mode,
2697		    cnp->cn_cred, cnp->cn_thread);
2698		if (error)
2699			goto bad;
2700	}
2701#endif /* !UFS_ACL */
2702	ufs_makedirentry(ip, cnp, &newdir);
2703	error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL, 0);
2704	if (error)
2705		goto bad;
2706	*vpp = tvp;
2707	return (0);
2708
2709bad:
2710	/*
2711	 * Write error occurred trying to update the inode
2712	 * or the directory so must deallocate the inode.
2713	 */
2714	ip->i_effnlink = 0;
2715	ip->i_nlink = 0;
2716	DIP_SET(ip, i_nlink, 0);
2717	ip->i_flag |= IN_CHANGE;
2718	if (DOINGSOFTDEP(tvp))
2719		softdep_revert_create(VTOI(dvp), ip);
2720	vput(tvp);
2721	return (error);
2722}
2723
2724static int
2725ufs_ioctl(struct vop_ioctl_args *ap)
2726{
2727
2728	switch (ap->a_command) {
2729	case FIOSEEKDATA:
2730	case FIOSEEKHOLE:
2731		return (vn_bmap_seekhole(ap->a_vp, ap->a_command,
2732		    (off_t *)ap->a_data, ap->a_cred));
2733	default:
2734		return (ENOTTY);
2735	}
2736}
2737
2738/* Global vfs data structures for ufs. */
2739struct vop_vector ufs_vnodeops = {
2740	.vop_default =		&default_vnodeops,
2741	.vop_fsync =		VOP_PANIC,
2742	.vop_read =		VOP_PANIC,
2743	.vop_reallocblks =	VOP_PANIC,
2744	.vop_write =		VOP_PANIC,
2745	.vop_accessx =		ufs_accessx,
2746	.vop_bmap =		ufs_bmap,
2747	.vop_cachedlookup =	ufs_lookup,
2748	.vop_close =		ufs_close,
2749	.vop_create =		ufs_create,
2750	.vop_getattr =		ufs_getattr,
2751	.vop_inactive =		ufs_inactive,
2752	.vop_ioctl =		ufs_ioctl,
2753	.vop_link =		ufs_link,
2754	.vop_lookup =		vfs_cache_lookup,
2755	.vop_markatime =	ufs_markatime,
2756	.vop_mkdir =		ufs_mkdir,
2757	.vop_mknod =		ufs_mknod,
2758	.vop_open =		ufs_open,
2759	.vop_pathconf =		ufs_pathconf,
2760	.vop_poll =		vop_stdpoll,
2761	.vop_print =		ufs_print,
2762	.vop_readdir =		ufs_readdir,
2763	.vop_readlink =		ufs_readlink,
2764	.vop_reclaim =		ufs_reclaim,
2765	.vop_remove =		ufs_remove,
2766	.vop_rename =		ufs_rename,
2767	.vop_rmdir =		ufs_rmdir,
2768	.vop_setattr =		ufs_setattr,
2769#ifdef MAC
2770	.vop_setlabel =		vop_stdsetlabel_ea,
2771#endif
2772	.vop_strategy =		ufs_strategy,
2773	.vop_symlink =		ufs_symlink,
2774	.vop_whiteout =		ufs_whiteout,
2775#ifdef UFS_EXTATTR
2776	.vop_getextattr =	ufs_getextattr,
2777	.vop_deleteextattr =	ufs_deleteextattr,
2778	.vop_setextattr =	ufs_setextattr,
2779#endif
2780#ifdef UFS_ACL
2781	.vop_getacl =		ufs_getacl,
2782	.vop_setacl =		ufs_setacl,
2783	.vop_aclcheck =		ufs_aclcheck,
2784#endif
2785};
2786
2787struct vop_vector ufs_fifoops = {
2788	.vop_default =		&fifo_specops,
2789	.vop_fsync =		VOP_PANIC,
2790	.vop_accessx =		ufs_accessx,
2791	.vop_close =		ufsfifo_close,
2792	.vop_getattr =		ufs_getattr,
2793	.vop_inactive =		ufs_inactive,
2794	.vop_kqfilter =		ufsfifo_kqfilter,
2795	.vop_markatime =	ufs_markatime,
2796	.vop_pathconf = 	ufsfifo_pathconf,
2797	.vop_print =		ufs_print,
2798	.vop_read =		VOP_PANIC,
2799	.vop_reclaim =		ufs_reclaim,
2800	.vop_setattr =		ufs_setattr,
2801#ifdef MAC
2802	.vop_setlabel =		vop_stdsetlabel_ea,
2803#endif
2804	.vop_write =		VOP_PANIC,
2805#ifdef UFS_EXTATTR
2806	.vop_getextattr =	ufs_getextattr,
2807	.vop_deleteextattr =	ufs_deleteextattr,
2808	.vop_setextattr =	ufs_setextattr,
2809#endif
2810#ifdef UFS_ACL
2811	.vop_getacl =		ufs_getacl,
2812	.vop_setacl =		ufs_setacl,
2813	.vop_aclcheck =		ufs_aclcheck,
2814#endif
2815};
2816