1139778Simp/*-
212115Sdyson *  modified for EXT2FS support in Lites 1.1
312115Sdyson *
412115Sdyson *  Aug 1995, Godmar Back (gback@cs.utah.edu)
512115Sdyson *  University of Utah, Department of Computer Science
612115Sdyson */
7139778Simp/*-
812115Sdyson * Copyright (c) 1982, 1986, 1989, 1993
912115Sdyson *	The Regents of the University of California.  All rights reserved.
1031495Sphk * (c) UNIX System Laboratories, Inc.
1131495Sphk * All or some portions of this file are derived from material licensed
1231495Sphk * to the University of California by American Telephone and Telegraph
1331495Sphk * Co. or Unix System Laboratories, Inc. and are reproduced herein with
1431495Sphk * the permission of UNIX System Laboratories, Inc.
1512115Sdyson *
1612115Sdyson * Redistribution and use in source and binary forms, with or without
1712115Sdyson * modification, are permitted provided that the following conditions
1812115Sdyson * are met:
1912115Sdyson * 1. Redistributions of source code must retain the above copyright
2012115Sdyson *    notice, this list of conditions and the following disclaimer.
2112115Sdyson * 2. Redistributions in binary form must reproduce the above copyright
2212115Sdyson *    notice, this list of conditions and the following disclaimer in the
2312115Sdyson *    documentation and/or other materials provided with the distribution.
2412115Sdyson * 4. Neither the name of the University nor the names of its contributors
2512115Sdyson *    may be used to endorse or promote products derived from this software
2612115Sdyson *    without specific prior written permission.
2712115Sdyson *
2812115Sdyson * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2912115Sdyson * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
3012115Sdyson * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
3112115Sdyson * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
3212115Sdyson * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
3312115Sdyson * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
3412115Sdyson * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
3512115Sdyson * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3612115Sdyson * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3712115Sdyson * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3812115Sdyson * SUCH DAMAGE.
3912115Sdyson *
4093015Sbde *	@(#)ufs_vnops.c	8.7 (Berkeley) 2/3/94
4131495Sphk *	@(#)ufs_vnops.c 8.27 (Berkeley) 5/27/95
4253101Seivind * $FreeBSD$
4312115Sdyson */
4412115Sdyson
4531749Seivind#include "opt_suiddir.h"
4631398Sbde
4712115Sdyson#include <sys/param.h>
4812115Sdyson#include <sys/systm.h>
4912115Sdyson#include <sys/kernel.h>
5096749Siedowse#include <sys/fcntl.h>
51252956Spfg#include <sys/filio.h>
5212115Sdyson#include <sys/stat.h>
5360041Sphk#include <sys/bio.h>
5412115Sdyson#include <sys/buf.h>
55193377Sstas#include <sys/endian.h>
56164033Srwatson#include <sys/priv.h>
5712115Sdyson#include <sys/mount.h>
5896749Siedowse#include <sys/unistd.h>
5926641Sbde#include <sys/time.h>
6012115Sdyson#include <sys/vnode.h>
6131268Sphk#include <sys/namei.h>
6296749Siedowse#include <sys/lockf.h>
6396749Siedowse#include <sys/event.h>
6496749Siedowse#include <sys/conf.h>
6596749Siedowse#include <sys/file.h>
6612115Sdyson
6712115Sdyson#include <vm/vm.h>
68228507Spfg#include <vm/vm_page.h>
69228507Spfg#include <vm/vm_object.h>
7012726Sbde#include <vm/vm_extern.h>
7133933Smsmith#include <vm/vnode_pager.h>
7212115Sdyson
73228507Spfg#include "opt_directio.h"
74228507Spfg
7531268Sphk#include <ufs/ufs/dir.h>
7612115Sdyson
77221128Sjhb#include <fs/ext2fs/fs.h>
78202283Slulf#include <fs/ext2fs/inode.h>
79202283Slulf#include <fs/ext2fs/ext2_extern.h>
80202283Slulf#include <fs/ext2fs/ext2fs.h>
81221128Sjhb#include <fs/ext2fs/ext2_dinode.h>
82202283Slulf#include <fs/ext2fs/ext2_dir.h>
83221128Sjhb#include <fs/ext2fs/ext2_mount.h>
8412115Sdyson
8592728Salfredstatic int ext2_makeinode(int mode, struct vnode *, struct vnode **, struct componentname *);
86202283Slulfstatic void ext2_itimes_locked(struct vnode *);
87254260Spfgstatic int ext4_ext_read(struct vop_read_args *);
88254260Spfgstatic int ext2_ind_read(struct vop_read_args *);
8931268Sphk
90138270Sphkstatic vop_access_t	ext2_access;
9196749Siedowsestatic int ext2_chmod(struct vnode *, int, struct ucred *, struct thread *);
9296749Siedowsestatic int ext2_chown(struct vnode *, uid_t, gid_t, struct ucred *,
9396749Siedowse    struct thread *);
94138270Sphkstatic vop_close_t	ext2_close;
95138270Sphkstatic vop_create_t	ext2_create;
96138270Sphkstatic vop_fsync_t	ext2_fsync;
97138270Sphkstatic vop_getattr_t	ext2_getattr;
98252956Spfgstatic vop_ioctl_t	ext2_ioctl;
99138270Sphkstatic vop_link_t	ext2_link;
100138270Sphkstatic vop_mkdir_t	ext2_mkdir;
101138270Sphkstatic vop_mknod_t	ext2_mknod;
102138270Sphkstatic vop_open_t	ext2_open;
103138270Sphkstatic vop_pathconf_t	ext2_pathconf;
104138270Sphkstatic vop_print_t	ext2_print;
105138270Sphkstatic vop_read_t	ext2_read;
106138270Sphkstatic vop_readlink_t	ext2_readlink;
107138270Sphkstatic vop_remove_t	ext2_remove;
108138270Sphkstatic vop_rename_t	ext2_rename;
109138270Sphkstatic vop_rmdir_t	ext2_rmdir;
110138270Sphkstatic vop_setattr_t	ext2_setattr;
111138270Sphkstatic vop_strategy_t	ext2_strategy;
112138270Sphkstatic vop_symlink_t	ext2_symlink;
113138270Sphkstatic vop_write_t	ext2_write;
114166774Spjdstatic vop_vptofh_t	ext2_vptofh;
115138270Sphkstatic vop_close_t	ext2fifo_close;
116138270Sphkstatic vop_kqfilter_t	ext2fifo_kqfilter;
11712911Sphk
11896749Siedowse/* Global vfs data structures for ext2. */
119138290Sphkstruct vop_vector ext2_vnodeops = {
120138290Sphk	.vop_default =		&default_vnodeops,
121138290Sphk	.vop_access =		ext2_access,
122138290Sphk	.vop_bmap =		ext2_bmap,
123138290Sphk	.vop_cachedlookup =	ext2_lookup,
124138290Sphk	.vop_close =		ext2_close,
125138290Sphk	.vop_create =		ext2_create,
126138290Sphk	.vop_fsync =		ext2_fsync,
127138290Sphk	.vop_getattr =		ext2_getattr,
128138290Sphk	.vop_inactive =		ext2_inactive,
129252956Spfg	.vop_ioctl =		ext2_ioctl,
130138290Sphk	.vop_link =		ext2_link,
131138290Sphk	.vop_lookup =		vfs_cache_lookup,
132138290Sphk	.vop_mkdir =		ext2_mkdir,
133138290Sphk	.vop_mknod =		ext2_mknod,
134138290Sphk	.vop_open =		ext2_open,
135138290Sphk	.vop_pathconf =		ext2_pathconf,
136138290Sphk	.vop_poll =		vop_stdpoll,
137138290Sphk	.vop_print =		ext2_print,
138138290Sphk	.vop_read =		ext2_read,
139138290Sphk	.vop_readdir =		ext2_readdir,
140138290Sphk	.vop_readlink =		ext2_readlink,
141138290Sphk	.vop_reallocblks =	ext2_reallocblks,
142138290Sphk	.vop_reclaim =		ext2_reclaim,
143138290Sphk	.vop_remove =		ext2_remove,
144138290Sphk	.vop_rename =		ext2_rename,
145138290Sphk	.vop_rmdir =		ext2_rmdir,
146138290Sphk	.vop_setattr =		ext2_setattr,
147138290Sphk	.vop_strategy =		ext2_strategy,
148138290Sphk	.vop_symlink =		ext2_symlink,
149138290Sphk	.vop_write =		ext2_write,
150166774Spjd	.vop_vptofh =		ext2_vptofh,
15112115Sdyson};
15212115Sdyson
153138290Sphkstruct vop_vector ext2_fifoops = {
154138290Sphk	.vop_default =		&fifo_specops,
155138290Sphk	.vop_access =		ext2_access,
156138290Sphk	.vop_close =		ext2fifo_close,
157138290Sphk	.vop_fsync =		ext2_fsync,
158138290Sphk	.vop_getattr =		ext2_getattr,
159138290Sphk	.vop_inactive =		ext2_inactive,
160138290Sphk	.vop_kqfilter =		ext2fifo_kqfilter,
161138290Sphk	.vop_print =		ext2_print,
162138868Sphk	.vop_read =		VOP_PANIC,
163138290Sphk	.vop_reclaim =		ext2_reclaim,
164138290Sphk	.vop_setattr =		ext2_setattr,
165138868Sphk	.vop_write =		VOP_PANIC,
166166774Spjd	.vop_vptofh =		ext2_vptofh,
16712115Sdyson};
16812115Sdyson
16912115Sdyson/*
17057710Sbde * A virgin directory (no blushing please).
17196749Siedowse * Note that the type and namlen fields are reversed relative to ext2.
17257710Sbde * Also, we don't use `struct odirtemplate', since it would just cause
17357710Sbde * endianness problems.
17457710Sbde */
17557710Sbdestatic struct dirtemplate mastertemplate = {
17657710Sbde	0, 12, 1, EXT2_FT_DIR, ".",
17757710Sbde	0, DIRBLKSIZ - 12, 2, EXT2_FT_DIR, ".."
17857710Sbde};
17957710Sbdestatic struct dirtemplate omastertemplate = {
18057710Sbde	0, 12, 1, EXT2_FT_UNKNOWN, ".",
18157710Sbde	0, DIRBLKSIZ - 12, 2, EXT2_FT_UNKNOWN, ".."
18257710Sbde};
18357710Sbde
184202283Slulfstatic void
185202283Slulfext2_itimes_locked(struct vnode *vp)
18696749Siedowse{
18796749Siedowse	struct inode *ip;
18896749Siedowse	struct timespec ts;
18996749Siedowse
190202283Slulf	ASSERT_VI_LOCKED(vp, __func__);
191202283Slulf
19296749Siedowse	ip = VTOI(vp);
19396749Siedowse	if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0)
19496749Siedowse		return;
19596749Siedowse	if ((vp->v_type == VBLK || vp->v_type == VCHR))
19696749Siedowse		ip->i_flag |= IN_LAZYMOD;
19796749Siedowse	else
19896749Siedowse		ip->i_flag |= IN_MODIFIED;
19996749Siedowse	if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
20096749Siedowse		vfs_timestamp(&ts);
20196749Siedowse		if (ip->i_flag & IN_ACCESS) {
20296749Siedowse			ip->i_atime = ts.tv_sec;
20396749Siedowse			ip->i_atimensec = ts.tv_nsec;
20496749Siedowse		}
20596749Siedowse		if (ip->i_flag & IN_UPDATE) {
20696749Siedowse			ip->i_mtime = ts.tv_sec;
20796749Siedowse			ip->i_mtimensec = ts.tv_nsec;
20896749Siedowse			ip->i_modrev++;
20996749Siedowse		}
21096749Siedowse		if (ip->i_flag & IN_CHANGE) {
21196749Siedowse			ip->i_ctime = ts.tv_sec;
21296749Siedowse			ip->i_ctimensec = ts.tv_nsec;
21396749Siedowse		}
21496749Siedowse	}
21596749Siedowse	ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE);
21696749Siedowse}
21796749Siedowse
218202283Slulfvoid
219202283Slulfext2_itimes(struct vnode *vp)
220202283Slulf{
221202283Slulf
222202283Slulf	VI_LOCK(vp);
223202283Slulf	ext2_itimes_locked(vp);
224202283Slulf	VI_UNLOCK(vp);
225202283Slulf}
226202283Slulf
22757710Sbde/*
22831268Sphk * Create a regular file
22931268Sphk */
23031268Sphkstatic int
231246634Spfgext2_create(struct vop_create_args *ap)
23231268Sphk{
23331268Sphk	int error;
23431268Sphk
23531268Sphk	error =
23631268Sphk	    ext2_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode),
23731268Sphk	    ap->a_dvp, ap->a_vpp, ap->a_cnp);
23831268Sphk	if (error)
23931268Sphk		return (error);
24031268Sphk	return (0);
24131268Sphk}
24231268Sphk
243105223Sphkstatic int
244246634Spfgext2_open(struct vop_open_args *ap)
24596749Siedowse{
24696749Siedowse
247135864Sphk	if (ap->a_vp->v_type == VBLK || ap->a_vp->v_type == VCHR)
248135864Sphk		return (EOPNOTSUPP);
249135864Sphk
25096749Siedowse	/*
25196749Siedowse	 * Files marked append-only must be opened for appending.
25296749Siedowse	 */
25396749Siedowse	if ((VTOI(ap->a_vp)->i_flags & APPEND) &&
25496749Siedowse	    (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE)
25596749Siedowse		return (EPERM);
256151811Scracauer
257153858Scracauer	vnode_create_vobject(ap->a_vp, VTOI(ap->a_vp)->i_size, ap->a_td);
258151811Scracauer
25996749Siedowse	return (0);
26096749Siedowse}
26196749Siedowse
26296749Siedowse/*
26396749Siedowse * Close called.
26496749Siedowse *
26596749Siedowse * Update the times on the inode.
26696749Siedowse */
26796749Siedowsestatic int
268246634Spfgext2_close(struct vop_close_args *ap)
26996749Siedowse{
27096749Siedowse	struct vnode *vp = ap->a_vp;
27196749Siedowse
272103938Sjeff	VI_LOCK(vp);
273143509Sjeff	if (vp->v_usecount > 1)
274202283Slulf		ext2_itimes_locked(vp);
275143509Sjeff	VI_UNLOCK(vp);
27696749Siedowse	return (0);
27796749Siedowse}
27896749Siedowse
27996749Siedowsestatic int
280246634Spfgext2_access(struct vop_access_args *ap)
28196749Siedowse{
28296749Siedowse	struct vnode *vp = ap->a_vp;
28396749Siedowse	struct inode *ip = VTOI(vp);
284184413Strasz	accmode_t accmode = ap->a_accmode;
28596749Siedowse	int error;
28696749Siedowse
287135864Sphk	if (vp->v_type == VBLK || vp->v_type == VCHR)
288135864Sphk		return (EOPNOTSUPP);
289135864Sphk
29096749Siedowse	/*
29196749Siedowse	 * Disallow write attempts on read-only file systems;
29296749Siedowse	 * unless the file is a socket, fifo, or a block or
29396749Siedowse	 * character device resident on the file system.
29496749Siedowse	 */
295184413Strasz	if (accmode & VWRITE) {
29696749Siedowse		switch (vp->v_type) {
29796749Siedowse		case VDIR:
29896749Siedowse		case VLNK:
29996749Siedowse		case VREG:
30096749Siedowse			if (vp->v_mount->mnt_flag & MNT_RDONLY)
30196749Siedowse				return (EROFS);
30296749Siedowse			break;
30396749Siedowse		default:
30496749Siedowse			break;
30596749Siedowse		}
30696749Siedowse	}
30796749Siedowse
30896749Siedowse	/* If immutable bit set, nobody gets to write it. */
309202283Slulf	if ((accmode & VWRITE) && (ip->i_flags & (SF_IMMUTABLE | SF_SNAPSHOT)))
31096749Siedowse		return (EPERM);
31196749Siedowse
31296749Siedowse	error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, ip->i_gid,
313184413Strasz	    ap->a_accmode, ap->a_cred, NULL);
31496749Siedowse	return (error);
31596749Siedowse}
31696749Siedowse
31796749Siedowsestatic int
318246634Spfgext2_getattr(struct vop_getattr_args *ap)
31996749Siedowse{
32096749Siedowse	struct vnode *vp = ap->a_vp;
32196749Siedowse	struct inode *ip = VTOI(vp);
32296749Siedowse	struct vattr *vap = ap->a_vap;
32396749Siedowse
32496749Siedowse	ext2_itimes(vp);
32596749Siedowse	/*
32696749Siedowse	 * Copy from inode table
32796749Siedowse	 */
328147868Scracauer	vap->va_fsid = dev2udev(ip->i_devvp->v_rdev);
32996749Siedowse	vap->va_fileid = ip->i_number;
33096749Siedowse	vap->va_mode = ip->i_mode & ~IFMT;
33196749Siedowse	vap->va_nlink = ip->i_nlink;
33296749Siedowse	vap->va_uid = ip->i_uid;
33396749Siedowse	vap->va_gid = ip->i_gid;
33496749Siedowse	vap->va_rdev = ip->i_rdev;
33596749Siedowse	vap->va_size = ip->i_size;
33696749Siedowse	vap->va_atime.tv_sec = ip->i_atime;
337232703Spfg	vap->va_atime.tv_nsec = E2DI_HAS_XTIME(ip) ? ip->i_atimensec : 0;
33896749Siedowse	vap->va_mtime.tv_sec = ip->i_mtime;
339232703Spfg	vap->va_mtime.tv_nsec = E2DI_HAS_XTIME(ip) ? ip->i_mtimensec : 0;
34096749Siedowse	vap->va_ctime.tv_sec = ip->i_ctime;
341232703Spfg	vap->va_ctime.tv_nsec = E2DI_HAS_XTIME(ip) ? ip->i_ctimensec : 0;
342232703Spfg	if E2DI_HAS_XTIME(ip) {
343232703Spfg		vap->va_birthtime.tv_sec = ip->i_birthtime;
344232703Spfg		vap->va_birthtime.tv_nsec = ip->i_birthnsec;
345232703Spfg	}
34696749Siedowse	vap->va_flags = ip->i_flags;
34796749Siedowse	vap->va_gen = ip->i_gen;
34896749Siedowse	vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
34996749Siedowse	vap->va_bytes = dbtob((u_quad_t)ip->i_blocks);
35096749Siedowse	vap->va_type = IFTOVT(ip->i_mode);
35196749Siedowse	vap->va_filerev = ip->i_modrev;
35296749Siedowse	return (0);
35396749Siedowse}
35496749Siedowse
35596749Siedowse/*
35696749Siedowse * Set attribute vnode op. called from several syscalls
35796749Siedowse */
358105223Sphkstatic int
359246634Spfgext2_setattr(struct vop_setattr_args *ap)
36096749Siedowse{
36196749Siedowse	struct vattr *vap = ap->a_vap;
36296749Siedowse	struct vnode *vp = ap->a_vp;
36396749Siedowse	struct inode *ip = VTOI(vp);
36496749Siedowse	struct ucred *cred = ap->a_cred;
365182371Sattilio	struct thread *td = curthread;
36696749Siedowse	int error;
36796749Siedowse
36896749Siedowse	/*
36996749Siedowse	 * Check for unsettable attributes.
37096749Siedowse	 */
37196749Siedowse	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
37296749Siedowse	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
37396749Siedowse	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
37496749Siedowse	    ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
37596749Siedowse		return (EINVAL);
37696749Siedowse	}
37796749Siedowse	if (vap->va_flags != VNOVAL) {
378202584Slulf		/* Disallow flags not supported by ext2fs. */
379202584Slulf		if(vap->va_flags & ~(SF_APPEND | SF_IMMUTABLE | UF_NODUMP))
380202584Slulf			return (EOPNOTSUPP);
381234139Sjh
38296749Siedowse		if (vp->v_mount->mnt_flag & MNT_RDONLY)
38396749Siedowse			return (EROFS);
38496749Siedowse		/*
38596749Siedowse		 * Callers may only modify the file flags on objects they
38696749Siedowse		 * have VADMIN rights for.
38796749Siedowse		 */
38896749Siedowse		if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
38996749Siedowse			return (error);
39096749Siedowse		/*
39196749Siedowse		 * Unprivileged processes and privileged processes in
39296749Siedowse		 * jail() are not permitted to unset system flags, or
39396749Siedowse		 * modify flags if any system flags are set.
39496749Siedowse		 * Privileged non-jail processes may not modify system flags
39596749Siedowse		 * if securelevel > 0 and any existing system flags are set.
39696749Siedowse		 */
397170587Srwatson		if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0)) {
398234203Sjh			if (ip->i_flags & (SF_IMMUTABLE | SF_APPEND)) {
39996749Siedowse				error = securelevel_gt(cred, 0);
40096749Siedowse				if (error)
40196749Siedowse					return (error);
40296749Siedowse			}
40396749Siedowse		} else {
404234203Sjh			if (ip->i_flags & (SF_IMMUTABLE | SF_APPEND) ||
405234203Sjh			    ((vap->va_flags ^ ip->i_flags) & SF_SETTABLE))
40696749Siedowse				return (EPERM);
40796749Siedowse		}
408234203Sjh		ip->i_flags = vap->va_flags;
40996749Siedowse		ip->i_flag |= IN_CHANGE;
410234104Sjh		if (ip->i_flags & (IMMUTABLE | APPEND))
41196749Siedowse			return (0);
41296749Siedowse	}
41396749Siedowse	if (ip->i_flags & (IMMUTABLE | APPEND))
41496749Siedowse		return (EPERM);
41596749Siedowse	/*
41696749Siedowse	 * Go through the fields and update iff not VNOVAL.
41796749Siedowse	 */
41896749Siedowse	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
41996749Siedowse		if (vp->v_mount->mnt_flag & MNT_RDONLY)
42096749Siedowse			return (EROFS);
42196749Siedowse		if ((error = ext2_chown(vp, vap->va_uid, vap->va_gid, cred,
42296749Siedowse		    td)) != 0)
42396749Siedowse			return (error);
42496749Siedowse	}
42596749Siedowse	if (vap->va_size != VNOVAL) {
42696749Siedowse		/*
42796749Siedowse		 * Disallow write attempts on read-only file systems;
42896749Siedowse		 * unless the file is a socket, fifo, or a block or
42996749Siedowse		 * character device resident on the file system.
43096749Siedowse		 */
43196749Siedowse		switch (vp->v_type) {
43296749Siedowse		case VDIR:
43396749Siedowse			return (EISDIR);
43496749Siedowse		case VLNK:
43596749Siedowse		case VREG:
43696749Siedowse			if (vp->v_mount->mnt_flag & MNT_RDONLY)
43796749Siedowse				return (EROFS);
43896749Siedowse			break;
43996749Siedowse		default:
44096749Siedowse			break;
44196749Siedowse		}
44296749Siedowse		if ((error = ext2_truncate(vp, vap->va_size, 0, cred, td)) != 0)
44396749Siedowse			return (error);
44496749Siedowse	}
44596749Siedowse	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
44696749Siedowse		if (vp->v_mount->mnt_flag & MNT_RDONLY)
44796749Siedowse			return (EROFS);
44896749Siedowse		/*
44996749Siedowse		 * From utimes(2):
45096749Siedowse		 * If times is NULL, ... The caller must be the owner of
45196749Siedowse		 * the file, have permission to write the file, or be the
45296749Siedowse		 * super-user.
45396749Siedowse		 * If times is non-NULL, ... The caller must be the owner of
45496749Siedowse		 * the file or be the super-user.
45596749Siedowse		 */
45696749Siedowse		if ((error = VOP_ACCESS(vp, VADMIN, cred, td)) &&
45796749Siedowse		    ((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
45896749Siedowse		    (error = VOP_ACCESS(vp, VWRITE, cred, td))))
45996749Siedowse			return (error);
46096749Siedowse		if (vap->va_atime.tv_sec != VNOVAL)
46196749Siedowse			ip->i_flag |= IN_ACCESS;
46296749Siedowse		if (vap->va_mtime.tv_sec != VNOVAL)
46396749Siedowse			ip->i_flag |= IN_CHANGE | IN_UPDATE;
46496749Siedowse		ext2_itimes(vp);
46596749Siedowse		if (vap->va_atime.tv_sec != VNOVAL) {
46696749Siedowse			ip->i_atime = vap->va_atime.tv_sec;
46796749Siedowse			ip->i_atimensec = vap->va_atime.tv_nsec;
46896749Siedowse		}
46996749Siedowse		if (vap->va_mtime.tv_sec != VNOVAL) {
47096749Siedowse			ip->i_mtime = vap->va_mtime.tv_sec;
47196749Siedowse			ip->i_mtimensec = vap->va_mtime.tv_nsec;
47296749Siedowse		}
473232703Spfg		ip->i_birthtime = vap->va_birthtime.tv_sec;
474232703Spfg		ip->i_birthnsec = vap->va_birthtime.tv_nsec;
47596749Siedowse		error = ext2_update(vp, 0);
47696749Siedowse		if (error)
47796749Siedowse			return (error);
47896749Siedowse	}
47996749Siedowse	error = 0;
48096749Siedowse	if (vap->va_mode != (mode_t)VNOVAL) {
48196749Siedowse		if (vp->v_mount->mnt_flag & MNT_RDONLY)
48296749Siedowse			return (EROFS);
48396749Siedowse		error = ext2_chmod(vp, (int)vap->va_mode, cred, td);
48496749Siedowse	}
48596749Siedowse	return (error);
48696749Siedowse}
48796749Siedowse
48896749Siedowse/*
48996749Siedowse * Change the mode on a file.
49096749Siedowse * Inode must be locked before calling.
49196749Siedowse */
49296749Siedowsestatic int
493246634Spfgext2_chmod(struct vnode *vp, int mode, struct ucred *cred, struct thread *td)
49496749Siedowse{
49596749Siedowse	struct inode *ip = VTOI(vp);
49696749Siedowse	int error;
49796749Siedowse
49896749Siedowse	/*
49996749Siedowse	 * To modify the permissions on a file, must possess VADMIN
50096749Siedowse	 * for that file.
50196749Siedowse	 */
50296749Siedowse	if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
50396749Siedowse		return (error);
50496749Siedowse	/*
50596749Siedowse	 * Privileged processes may set the sticky bit on non-directories,
50696749Siedowse	 * as well as set the setgid bit on a file with a group that the
50796749Siedowse	 * process is not a member of.
50896749Siedowse	 */
509164033Srwatson	if (vp->v_type != VDIR && (mode & S_ISTXT)) {
510170587Srwatson		error = priv_check_cred(cred, PRIV_VFS_STICKYFILE, 0);
511164033Srwatson		if (error)
51296749Siedowse			return (EFTYPE);
51396749Siedowse	}
514164033Srwatson	if (!groupmember(ip->i_gid, cred) && (mode & ISGID)) {
515170587Srwatson		error = priv_check_cred(cred, PRIV_VFS_SETGID, 0);
516164033Srwatson		if (error)
517164033Srwatson			return (error);
518164033Srwatson	}
51996749Siedowse	ip->i_mode &= ~ALLPERMS;
52096749Siedowse	ip->i_mode |= (mode & ALLPERMS);
52196749Siedowse	ip->i_flag |= IN_CHANGE;
52296749Siedowse	return (0);
52396749Siedowse}
52496749Siedowse
52596749Siedowse/*
52696749Siedowse * Perform chown operation on inode ip;
52796749Siedowse * inode must be locked prior to call.
52896749Siedowse */
52996749Siedowsestatic int
530246634Spfgext2_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred,
531246634Spfg    struct thread *td)
53296749Siedowse{
53396749Siedowse	struct inode *ip = VTOI(vp);
53496749Siedowse	uid_t ouid;
53596749Siedowse	gid_t ogid;
53696749Siedowse	int error = 0;
53796749Siedowse
53896749Siedowse	if (uid == (uid_t)VNOVAL)
53996749Siedowse		uid = ip->i_uid;
54096749Siedowse	if (gid == (gid_t)VNOVAL)
54196749Siedowse		gid = ip->i_gid;
54296749Siedowse	/*
54396749Siedowse	 * To modify the ownership of a file, must possess VADMIN
54496749Siedowse	 * for that file.
54596749Siedowse	 */
54696749Siedowse	if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
54796749Siedowse		return (error);
54896749Siedowse	/*
54996749Siedowse	 * To change the owner of a file, or change the group of a file
55096749Siedowse	 * to a group of which we are not a member, the caller must
55196749Siedowse	 * have privilege.
55296749Siedowse	 */
553164033Srwatson	if (uid != ip->i_uid || (gid != ip->i_gid &&
554164033Srwatson	    !groupmember(gid, cred))) {
555170587Srwatson		error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0);
556164033Srwatson		if (error)
557164033Srwatson			return (error);
558164033Srwatson	}
55996749Siedowse	ogid = ip->i_gid;
56096749Siedowse	ouid = ip->i_uid;
56196749Siedowse	ip->i_gid = gid;
56296749Siedowse	ip->i_uid = uid;
56396749Siedowse	ip->i_flag |= IN_CHANGE;
564167151Spjd	if ((ip->i_mode & (ISUID | ISGID)) && (ouid != uid || ogid != gid)) {
565170587Srwatson		if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID, 0) != 0)
566164033Srwatson			ip->i_mode &= ~(ISUID | ISGID);
567164033Srwatson	}
56896749Siedowse	return (0);
56996749Siedowse}
57096749Siedowse
57196749Siedowse/*
57231398Sbde * Synch an open file.
57331398Sbde */
57431398Sbde/* ARGSUSED */
57531398Sbdestatic int
576246634Spfgext2_fsync(struct vop_fsync_args *ap)
57731398Sbde{
57831398Sbde	/*
57931398Sbde	 * Flush all dirty buffers associated with a vnode.
58031398Sbde	 */
58131398Sbde
582110587Sjeff	vop_stdfsync(ap);
583110587Sjeff
58496749Siedowse	return (ext2_update(ap->a_vp, ap->a_waitfor == MNT_WAIT));
58531398Sbde}
58631398Sbde
58731398Sbde/*
58831268Sphk * Mknod vnode call
58931268Sphk */
59031268Sphk/* ARGSUSED */
59131268Sphkstatic int
592246634Spfgext2_mknod(struct vop_mknod_args *ap)
59331268Sphk{
59431268Sphk	struct vattr *vap = ap->a_vap;
59531268Sphk	struct vnode **vpp = ap->a_vpp;
59631268Sphk	struct inode *ip;
59768307Sbde	ino_t ino;
59831268Sphk	int error;
59931268Sphk
60031268Sphk	error = ext2_makeinode(MAKEIMODE(vap->va_type, vap->va_mode),
60131268Sphk	    ap->a_dvp, vpp, ap->a_cnp);
60231268Sphk	if (error)
60331268Sphk		return (error);
60431268Sphk	ip = VTOI(*vpp);
60531268Sphk	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
60631268Sphk	if (vap->va_rdev != VNOVAL) {
60731268Sphk		/*
60831268Sphk		 * Want to be able to use this to make badblock
60931268Sphk		 * inodes, so don't truncate the dev number.
61031268Sphk		 */
61131268Sphk		ip->i_rdev = vap->va_rdev;
61231268Sphk	}
61331268Sphk	/*
61453101Seivind	 * Remove inode, then reload it through VFS_VGET so it is
61531268Sphk	 * checked to see if it is an alias of an existing entry in
616143509Sjeff	 * the inode cache.	 XXX I don't believe this is necessary now.
61731268Sphk	 */
61831268Sphk	(*vpp)->v_type = VNON;
61968307Sbde	ino = ip->i_number;	/* Save this before vgone() invalidates ip. */
62031268Sphk	vgone(*vpp);
621143509Sjeff	vput(*vpp);
62292462Smckusick	error = VFS_VGET(ap->a_dvp->v_mount, ino, LK_EXCLUSIVE, vpp);
62353101Seivind	if (error) {
62453101Seivind		*vpp = NULL;
62553101Seivind		return (error);
62653101Seivind	}
62731268Sphk	return (0);
62831268Sphk}
62931268Sphk
63031268Sphkstatic int
631246634Spfgext2_remove(struct vop_remove_args *ap)
63231268Sphk{
63331398Sbde	struct inode *ip;
63431398Sbde	struct vnode *vp = ap->a_vp;
63531398Sbde	struct vnode *dvp = ap->a_dvp;
63631398Sbde	int error;
63731268Sphk
63831398Sbde	ip = VTOI(vp);
63931398Sbde	if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
64031398Sbde	    (VTOI(dvp)->i_flags & APPEND)) {
64131398Sbde		error = EPERM;
64231268Sphk		goto out;
64331268Sphk	}
64431398Sbde	error = ext2_dirremove(dvp, ap->a_cnp);
64531398Sbde	if (error == 0) {
64631398Sbde		ip->i_nlink--;
64731268Sphk		ip->i_flag |= IN_CHANGE;
64831268Sphk	}
64931268Sphkout:
65031268Sphk	return (error);
65131268Sphk}
65231268Sphk
65331268Sphk/*
65431398Sbde * link vnode call
65531268Sphk */
65631268Sphkstatic int
657246634Spfgext2_link(struct vop_link_args *ap)
65831268Sphk{
65931268Sphk	struct vnode *vp = ap->a_vp;
66031398Sbde	struct vnode *tdvp = ap->a_tdvp;
66131268Sphk	struct componentname *cnp = ap->a_cnp;
66231398Sbde	struct inode *ip;
66331268Sphk	int error;
66431268Sphk
665251658Spfg#ifdef INVARIANTS
66631398Sbde	if ((cnp->cn_flags & HASBUF) == 0)
66796749Siedowse		panic("ext2_link: no name");
66831398Sbde#endif
66931268Sphk	ip = VTOI(vp);
670246347Spfg	if ((nlink_t)ip->i_nlink >= EXT2_LINK_MAX) {
67131398Sbde		error = EMLINK;
672103636Struckman		goto out;
67331268Sphk	}
67431398Sbde	if (ip->i_flags & (IMMUTABLE | APPEND)) {
67531268Sphk		error = EPERM;
676103636Struckman		goto out;
67731268Sphk	}
67831398Sbde	ip->i_nlink++;
67931398Sbde	ip->i_flag |= IN_CHANGE;
680221166Sjhb	error = ext2_update(vp, !DOINGASYNC(vp));
68131398Sbde	if (!error)
68231398Sbde		error = ext2_direnter(ip, tdvp, cnp);
68331398Sbde	if (error) {
68431398Sbde		ip->i_nlink--;
68531398Sbde		ip->i_flag |= IN_CHANGE;
68631398Sbde	}
687103636Struckmanout:
68831268Sphk	return (error);
68931268Sphk}
69031268Sphk
69131268Sphk/*
69231268Sphk * Rename system call.
693202283Slulf * 	rename("foo", "bar");
694202283Slulf * is essentially
695202283Slulf *	unlink("bar");
696202283Slulf *	link("foo", "bar");
697202283Slulf *	unlink("foo");
698202283Slulf * but ``atomically''.  Can't do full commit without saving state in the
699202283Slulf * inode on disk which isn't feasible at this time.  Best we can do is
700202283Slulf * always guarantee the target exists.
701202283Slulf *
702202283Slulf * Basic algorithm is:
703202283Slulf *
704202283Slulf * 1) Bump link count on source while we're linking it to the
705202283Slulf *    target.  This also ensure the inode won't be deleted out
706202283Slulf *    from underneath us while we work (it may be truncated by
707202283Slulf *    a concurrent `trunc' or `open' for creation).
708202283Slulf * 2) Link source to destination.  If destination already exists,
709202283Slulf *    delete it first.
710202283Slulf * 3) Unlink source reference to inode if still around. If a
711202283Slulf *    directory was moved and the parent of the destination
712202283Slulf *    is different from the source, patch the ".." entry in the
713202283Slulf *    directory.
71431268Sphk */
71531268Sphkstatic int
716246634Spfgext2_rename(struct vop_rename_args *ap)
71731268Sphk{
71831268Sphk	struct vnode *tvp = ap->a_tvp;
71996752Siedowse	struct vnode *tdvp = ap->a_tdvp;
72031268Sphk	struct vnode *fvp = ap->a_fvp;
72131268Sphk	struct vnode *fdvp = ap->a_fdvp;
72231268Sphk	struct componentname *tcnp = ap->a_tcnp;
72331268Sphk	struct componentname *fcnp = ap->a_fcnp;
72431268Sphk	struct inode *ip, *xp, *dp;
72531268Sphk	struct dirtemplate dirbuf;
72631268Sphk	int doingdirectory = 0, oldparent = 0, newparent = 0;
72731268Sphk	int error = 0;
72831268Sphk	u_char namlen;
72931268Sphk
730251658Spfg#ifdef INVARIANTS
73131268Sphk	if ((tcnp->cn_flags & HASBUF) == 0 ||
73231268Sphk	    (fcnp->cn_flags & HASBUF) == 0)
73396749Siedowse		panic("ext2_rename: no name");
73431268Sphk#endif
73531268Sphk	/*
73631268Sphk	 * Check for cross-device rename.
73731268Sphk	 */
73831268Sphk	if ((fvp->v_mount != tdvp->v_mount) ||
73931268Sphk	    (tvp && (fvp->v_mount != tvp->v_mount))) {
74031268Sphk		error = EXDEV;
74131268Sphkabortit:
74231268Sphk		if (tdvp == tvp)
74331268Sphk			vrele(tdvp);
74431268Sphk		else
74531268Sphk			vput(tdvp);
74631268Sphk		if (tvp)
74731268Sphk			vput(tvp);
74831268Sphk		vrele(fdvp);
74931268Sphk		vrele(fvp);
75031268Sphk		return (error);
75131268Sphk	}
75231268Sphk
75331268Sphk	if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
75431268Sphk	    (VTOI(tdvp)->i_flags & APPEND))) {
75531268Sphk		error = EPERM;
75631268Sphk		goto abortit;
75731268Sphk	}
75831268Sphk
75931268Sphk	/*
760103180Sbde	 * Renaming a file to itself has no effect.  The upper layers should
761103180Sbde	 * not call us in that case.  Temporarily just warn if they do.
76231268Sphk	 */
76331268Sphk	if (fvp == tvp) {
764103180Sbde		printf("ext2_rename: fvp == tvp (can't happen)\n");
765103180Sbde		error = 0;
766103180Sbde		goto abortit;
767103180Sbde	}
76831268Sphk
769175202Sattilio	if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0)
77031268Sphk		goto abortit;
77131268Sphk	dp = VTOI(fdvp);
77231268Sphk	ip = VTOI(fvp);
773246347Spfg	if (ip->i_nlink >= EXT2_LINK_MAX) {
774262723Spfg		VOP_UNLOCK(fvp, 0);
775262723Spfg		error = EMLINK;
776262723Spfg		goto abortit;
777262723Spfg	}
77831268Sphk	if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))
77931268Sphk	    || (dp->i_flags & APPEND)) {
780175294Sattilio		VOP_UNLOCK(fvp, 0);
78131268Sphk		error = EPERM;
78231268Sphk		goto abortit;
78331268Sphk	}
78431268Sphk	if ((ip->i_mode & IFMT) == IFDIR) {
78531268Sphk		/*
78631268Sphk		 * Avoid ".", "..", and aliases of "." for obvious reasons.
78731268Sphk		 */
78831268Sphk		if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
78931268Sphk		    dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT ||
79031268Sphk		    (ip->i_flag & IN_RENAME)) {
791175294Sattilio			VOP_UNLOCK(fvp, 0);
79231268Sphk			error = EINVAL;
79331268Sphk			goto abortit;
79431268Sphk		}
79531268Sphk		ip->i_flag |= IN_RENAME;
79631268Sphk		oldparent = dp->i_number;
79731268Sphk		doingdirectory++;
79831268Sphk	}
79931268Sphk	vrele(fdvp);
80031268Sphk
80131268Sphk	/*
80231268Sphk	 * When the target exists, both the directory
80331268Sphk	 * and target vnodes are returned locked.
80431268Sphk	 */
80531268Sphk	dp = VTOI(tdvp);
80631268Sphk	xp = NULL;
80731268Sphk	if (tvp)
80831268Sphk		xp = VTOI(tvp);
80931268Sphk
81031268Sphk	/*
81131268Sphk	 * 1) Bump link count while we're moving stuff
81231268Sphk	 *    around.  If we crash somewhere before
81331268Sphk	 *    completing our work, the link count
81431268Sphk	 *    may be wrong, but correctable.
81531268Sphk	 */
81631268Sphk	ip->i_nlink++;
81731268Sphk	ip->i_flag |= IN_CHANGE;
818221166Sjhb	if ((error = ext2_update(fvp, !DOINGASYNC(fvp))) != 0) {
819175294Sattilio		VOP_UNLOCK(fvp, 0);
82031268Sphk		goto bad;
82131268Sphk	}
82231268Sphk
82331268Sphk	/*
82431268Sphk	 * If ".." must be changed (ie the directory gets a new
82531268Sphk	 * parent) then the source directory must not be in the
826204111Suqs	 * directory hierarchy above the target, as this would
82731268Sphk	 * orphan everything below the source directory. Also
82831268Sphk	 * the user must have write permission in the source so
82931268Sphk	 * as to be able to change "..". We must repeat the call
83031268Sphk	 * to namei, as the parent directory is unlocked by the
83131268Sphk	 * call to checkpath().
83231268Sphk	 */
83383366Sjulian	error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread);
834175294Sattilio	VOP_UNLOCK(fvp, 0);
83531268Sphk	if (oldparent != dp->i_number)
83631268Sphk		newparent = dp->i_number;
83731268Sphk	if (doingdirectory && newparent) {
83831268Sphk		if (error)	/* write access check above */
83931268Sphk			goto bad;
84031268Sphk		if (xp != NULL)
84131268Sphk			vput(tvp);
84231268Sphk		error = ext2_checkpath(ip, dp, tcnp->cn_cred);
84331268Sphk		if (error)
84431268Sphk			goto out;
84531268Sphk		VREF(tdvp);
84631268Sphk		error = relookup(tdvp, &tvp, tcnp);
84731268Sphk		if (error)
84831268Sphk			goto out;
84931268Sphk		vrele(tdvp);
85031268Sphk		dp = VTOI(tdvp);
85131268Sphk		xp = NULL;
85231268Sphk		if (tvp)
85331268Sphk			xp = VTOI(tvp);
85431268Sphk	}
85531268Sphk	/*
85631268Sphk	 * 2) If target doesn't exist, link the target
85731268Sphk	 *    to the source and unlink the source.
85831268Sphk	 *    Otherwise, rewrite the target directory
85931268Sphk	 *    entry to reference the source inode and
86031268Sphk	 *    expunge the original entry's existence.
86131268Sphk	 */
86231268Sphk	if (xp == NULL) {
863143677Sphk		if (dp->i_devvp != ip->i_devvp)
86496749Siedowse			panic("ext2_rename: EXDEV");
86531268Sphk		/*
86631268Sphk		 * Account for ".." in new directory.
86731268Sphk		 * When source and destination have the same
86831268Sphk		 * parent we don't fool with the link count.
86931268Sphk		 */
87031268Sphk		if (doingdirectory && newparent) {
871246347Spfg			if ((nlink_t)dp->i_nlink >= EXT2_LINK_MAX) {
87231268Sphk				error = EMLINK;
87331268Sphk				goto bad;
87431268Sphk			}
87531268Sphk			dp->i_nlink++;
87631268Sphk			dp->i_flag |= IN_CHANGE;
877221166Sjhb			error = ext2_update(tdvp, !DOINGASYNC(tdvp));
87831268Sphk			if (error)
87931268Sphk				goto bad;
88031268Sphk		}
88131268Sphk		error = ext2_direnter(ip, tdvp, tcnp);
88231268Sphk		if (error) {
88331268Sphk			if (doingdirectory && newparent) {
88431268Sphk				dp->i_nlink--;
88531268Sphk				dp->i_flag |= IN_CHANGE;
88696749Siedowse				(void)ext2_update(tdvp, 1);
88731268Sphk			}
88831268Sphk			goto bad;
88931268Sphk		}
89031268Sphk		vput(tdvp);
89131268Sphk	} else {
892143677Sphk		if (xp->i_devvp != dp->i_devvp || xp->i_devvp != ip->i_devvp)
893105223Sphk		       panic("ext2_rename: EXDEV");
89431268Sphk		/*
89531268Sphk		 * Short circuit rename(foo, foo).
89631268Sphk		 */
89731268Sphk		if (xp->i_number == ip->i_number)
89896749Siedowse			panic("ext2_rename: same file");
89931268Sphk		/*
90031268Sphk		 * If the parent directory is "sticky", then the user must
90131268Sphk		 * own the parent directory, or the destination of the rename,
90231268Sphk		 * otherwise the destination may not be changed (except by
90331268Sphk		 * root). This implements append-only directories.
90431268Sphk		 */
90531268Sphk		if ((dp->i_mode & S_ISTXT) && tcnp->cn_cred->cr_uid != 0 &&
90631268Sphk		    tcnp->cn_cred->cr_uid != dp->i_uid &&
90731268Sphk		    xp->i_uid != tcnp->cn_cred->cr_uid) {
90831268Sphk			error = EPERM;
90931268Sphk			goto bad;
91031268Sphk		}
91131268Sphk		/*
91231268Sphk		 * Target must be empty if a directory and have no links
91331268Sphk		 * to it. Also, ensure source and target are compatible
91431268Sphk		 * (both directories, or both not directories).
91531268Sphk		 */
91631268Sphk		if ((xp->i_mode&IFMT) == IFDIR) {
91731268Sphk			if (! ext2_dirempty(xp, dp->i_number, tcnp->cn_cred) ||
91831268Sphk			    xp->i_nlink > 2) {
91931268Sphk				error = ENOTEMPTY;
92031268Sphk				goto bad;
92131268Sphk			}
92231268Sphk			if (!doingdirectory) {
92331268Sphk				error = ENOTDIR;
92431268Sphk				goto bad;
92531268Sphk			}
92631268Sphk			cache_purge(tdvp);
92731268Sphk		} else if (doingdirectory) {
92831268Sphk			error = EISDIR;
92931268Sphk			goto bad;
93031268Sphk		}
93131268Sphk		error = ext2_dirrewrite(dp, ip, tcnp);
93231268Sphk		if (error)
93331268Sphk			goto bad;
93431268Sphk		/*
93531268Sphk		 * If the target directory is in the same
93631268Sphk		 * directory as the source directory,
93731268Sphk		 * decrement the link count on the parent
93831268Sphk		 * of the target directory.
93931268Sphk		 */
940105223Sphk		if (doingdirectory && !newparent) {
941262723Spfg			dp->i_nlink--;
942262723Spfg			dp->i_flag |= IN_CHANGE;
94331268Sphk		}
94431268Sphk		vput(tdvp);
94531268Sphk		/*
94631268Sphk		 * Adjust the link count of the target to
94731268Sphk		 * reflect the dirrewrite above.  If this is
94831268Sphk		 * a directory it is empty and there are
94931268Sphk		 * no links to it, so we can squash the inode and
95031268Sphk		 * any space associated with it.  We disallowed
95131268Sphk		 * renaming over top of a directory with links to
95231268Sphk		 * it above, as the remaining link would point to
95331268Sphk		 * a directory without "." or ".." entries.
95431268Sphk		 */
95531268Sphk		xp->i_nlink--;
95631268Sphk		if (doingdirectory) {
95731268Sphk			if (--xp->i_nlink != 0)
95896749Siedowse				panic("ext2_rename: linked directory");
95996749Siedowse			error = ext2_truncate(tvp, (off_t)0, IO_SYNC,
96083366Sjulian			    tcnp->cn_cred, tcnp->cn_thread);
96131268Sphk		}
96231268Sphk		xp->i_flag |= IN_CHANGE;
96331268Sphk		vput(tvp);
96431268Sphk		xp = NULL;
96531268Sphk	}
96631268Sphk
96731268Sphk	/*
96831268Sphk	 * 3) Unlink the source.
96931268Sphk	 */
97031268Sphk	fcnp->cn_flags &= ~MODMASK;
97131268Sphk	fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
97231268Sphk	VREF(fdvp);
97331268Sphk	error = relookup(fdvp, &fvp, fcnp);
97431268Sphk	if (error == 0)
97531268Sphk		vrele(fdvp);
97631268Sphk	if (fvp != NULL) {
97731268Sphk		xp = VTOI(fvp);
97831268Sphk		dp = VTOI(fdvp);
97931268Sphk	} else {
98031268Sphk		/*
98131268Sphk		 * From name has disappeared.
98231268Sphk		 */
98331268Sphk		if (doingdirectory)
98496749Siedowse			panic("ext2_rename: lost dir entry");
98531268Sphk		vrele(ap->a_fvp);
98631268Sphk		return (0);
98731268Sphk	}
98831268Sphk	/*
98931268Sphk	 * Ensure that the directory entry still exists and has not
99031268Sphk	 * changed while the new name has been entered. If the source is
99131268Sphk	 * a file then the entry may have been unlinked or renamed. In
99231268Sphk	 * either case there is no further work to be done. If the source
99331268Sphk	 * is a directory then it cannot have been rmdir'ed; its link
99431268Sphk	 * count of three would cause a rmdir to fail with ENOTEMPTY.
99531268Sphk	 * The IN_RENAME flag ensures that it cannot be moved by another
99631268Sphk	 * rename.
99731268Sphk	 */
99831268Sphk	if (xp != ip) {
99931268Sphk		if (doingdirectory)
100096749Siedowse			panic("ext2_rename: lost dir entry");
100131268Sphk	} else {
100231268Sphk		/*
100331268Sphk		 * If the source is a directory with a
100431268Sphk		 * new parent, the link count of the old
100531268Sphk		 * parent directory must be decremented
100631268Sphk		 * and ".." set to point to the new parent.
100731268Sphk		 */
100831268Sphk		if (doingdirectory && newparent) {
100931268Sphk			dp->i_nlink--;
101031268Sphk			dp->i_flag |= IN_CHANGE;
101131268Sphk			error = vn_rdwr(UIO_READ, fvp, (caddr_t)&dirbuf,
1012228583Spfg				sizeof(struct dirtemplate), (off_t)0,
1013101744Srwatson				UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK,
1014194296Skib				tcnp->cn_cred, NOCRED, NULL, NULL);
101531268Sphk			if (error == 0) {
101657710Sbde				/* Like ufs little-endian: */
101757710Sbde				namlen = dirbuf.dotdot_type;
101831268Sphk				if (namlen != 2 ||
101931268Sphk				    dirbuf.dotdot_name[0] != '.' ||
102031268Sphk				    dirbuf.dotdot_name[1] != '.') {
102196749Siedowse					ext2_dirbad(xp, (doff_t)12,
102231268Sphk					    "rename: mangled dir");
102331268Sphk				} else {
102431268Sphk					dirbuf.dotdot_ino = newparent;
102531268Sphk					(void) vn_rdwr(UIO_WRITE, fvp,
102631268Sphk					    (caddr_t)&dirbuf,
1027228583Spfg					    sizeof(struct dirtemplate),
102831268Sphk					    (off_t)0, UIO_SYSSPACE,
1029101744Srwatson					    IO_NODELOCKED | IO_SYNC |
1030101744Srwatson					    IO_NOMACCHECK, tcnp->cn_cred,
1031194296Skib					    NOCRED, NULL, NULL);
103231268Sphk					cache_purge(fdvp);
103331268Sphk				}
103431268Sphk			}
103531268Sphk		}
103631268Sphk		error = ext2_dirremove(fdvp, fcnp);
103731268Sphk		if (!error) {
103831268Sphk			xp->i_nlink--;
103931268Sphk			xp->i_flag |= IN_CHANGE;
104031268Sphk		}
104131268Sphk		xp->i_flag &= ~IN_RENAME;
104231268Sphk	}
104331268Sphk	if (dp)
104431268Sphk		vput(fdvp);
104531268Sphk	if (xp)
104631268Sphk		vput(fvp);
104731268Sphk	vrele(ap->a_fvp);
104831268Sphk	return (error);
104931268Sphk
105031268Sphkbad:
105131268Sphk	if (xp)
105231268Sphk		vput(ITOV(xp));
105331268Sphk	vput(ITOV(dp));
105431268Sphkout:
105531268Sphk	if (doingdirectory)
105631268Sphk		ip->i_flag &= ~IN_RENAME;
1057175202Sattilio	if (vn_lock(fvp, LK_EXCLUSIVE) == 0) {
105831268Sphk		ip->i_nlink--;
105931268Sphk		ip->i_flag |= IN_CHANGE;
106031268Sphk		ip->i_flag &= ~IN_RENAME;
106131268Sphk		vput(fvp);
106231268Sphk	} else
106331268Sphk		vrele(fvp);
106431268Sphk	return (error);
106531268Sphk}
106631268Sphk
106731268Sphk/*
106831398Sbde * Mkdir system call
106931398Sbde */
107031268Sphkstatic int
1071246634Spfgext2_mkdir(struct vop_mkdir_args *ap)
107231268Sphk{
107396752Siedowse	struct vnode *dvp = ap->a_dvp;
107496752Siedowse	struct vattr *vap = ap->a_vap;
107596752Siedowse	struct componentname *cnp = ap->a_cnp;
107696752Siedowse	struct inode *ip, *dp;
107731398Sbde	struct vnode *tvp;
107831398Sbde	struct dirtemplate dirtemplate, *dtp;
107931398Sbde	int error, dmode;
108031268Sphk
1081251658Spfg#ifdef INVARIANTS
108231268Sphk	if ((cnp->cn_flags & HASBUF) == 0)
108396749Siedowse		panic("ext2_mkdir: no name");
108431268Sphk#endif
108531398Sbde	dp = VTOI(dvp);
1086246347Spfg	if ((nlink_t)dp->i_nlink >= EXT2_LINK_MAX) {
108731398Sbde		error = EMLINK;
108831398Sbde		goto out;
108931268Sphk	}
109031398Sbde	dmode = vap->va_mode & 0777;
109131398Sbde	dmode |= IFDIR;
109231398Sbde	/*
109331398Sbde	 * Must simulate part of ext2_makeinode here to acquire the inode,
109431398Sbde	 * but not have it entered in the parent directory. The entry is
109531398Sbde	 * made later after writing "." and ".." entries.
109631398Sbde	 */
109796749Siedowse	error = ext2_valloc(dvp, dmode, cnp->cn_cred, &tvp);
109831398Sbde	if (error)
109931398Sbde		goto out;
110031398Sbde	ip = VTOI(tvp);
110131398Sbde	ip->i_gid = dp->i_gid;
110231398Sbde#ifdef SUIDDIR
110331398Sbde	{
110431398Sbde		/*
110531398Sbde		 * if we are hacking owners here, (only do this where told to)
110631398Sbde		 * and we are not giving it TOO root, (would subvert quotas)
110731398Sbde		 * then go ahead and give it to the other user.
110831398Sbde		 * The new directory also inherits the SUID bit.
110933064Seivind		 * If user's UID and dir UID are the same,
111031398Sbde		 * 'give it away' so that the SUID is still forced on.
111131398Sbde		 */
111231398Sbde		if ( (dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
111331398Sbde		   (dp->i_mode & ISUID) && dp->i_uid) {
111431398Sbde			dmode |= ISUID;
111531398Sbde			ip->i_uid = dp->i_uid;
111631398Sbde		} else {
111731398Sbde			ip->i_uid = cnp->cn_cred->cr_uid;
111831398Sbde		}
111931268Sphk	}
112031398Sbde#else
112131398Sbde	ip->i_uid = cnp->cn_cred->cr_uid;
112231398Sbde#endif
112331398Sbde	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
112431398Sbde	ip->i_mode = dmode;
112531398Sbde	tvp->v_type = VDIR;	/* Rest init'd in getnewvnode(). */
112631398Sbde	ip->i_nlink = 2;
112731398Sbde	if (cnp->cn_flags & ISWHITEOUT)
112831398Sbde		ip->i_flags |= UF_OPAQUE;
112996749Siedowse	error = ext2_update(tvp, 1);
113031398Sbde
113131398Sbde	/*
113231398Sbde	 * Bump link count in parent directory
113331398Sbde	 * to reflect work done below.  Should
113431398Sbde	 * be done before reference is created
113531398Sbde	 * so reparation is possible if we crash.
113631398Sbde	 */
113731398Sbde	dp->i_nlink++;
113831398Sbde	dp->i_flag |= IN_CHANGE;
1139221166Sjhb	error = ext2_update(dvp, !DOINGASYNC(dvp));
114031398Sbde	if (error)
114131398Sbde		goto bad;
114231398Sbde
114331398Sbde	/* Initialize directory with "." and ".." from static template. */
1144193377Sstas	if (EXT2_HAS_INCOMPAT_FEATURE(ip->i_e2fs,
1145202283Slulf	    EXT2F_INCOMPAT_FTYPE))
114657710Sbde		dtp = &mastertemplate;
114757710Sbde	else
114857710Sbde		dtp = &omastertemplate;
114931398Sbde	dirtemplate = *dtp;
115031398Sbde	dirtemplate.dot_ino = ip->i_number;
115131398Sbde	dirtemplate.dotdot_ino = dp->i_number;
115231398Sbde	/* note that in ext2 DIRBLKSIZ == blocksize, not DEV_BSIZE
115331398Sbde	 * so let's just redefine it - for this function only
115431398Sbde	 */
115531398Sbde#undef  DIRBLKSIZ
1156202283Slulf#define DIRBLKSIZ  VTOI(dvp)->i_e2fs->e2fs_bsize
115731398Sbde	dirtemplate.dotdot_reclen = DIRBLKSIZ - 12;
115831398Sbde	error = vn_rdwr(UIO_WRITE, tvp, (caddr_t)&dirtemplate,
1159228583Spfg	    sizeof(dirtemplate), (off_t)0, UIO_SYSSPACE,
1160101941Srwatson	    IO_NODELOCKED | IO_SYNC | IO_NOMACCHECK, cnp->cn_cred, NOCRED,
1161194296Skib	    NULL, NULL);
116231398Sbde	if (error) {
116331398Sbde		dp->i_nlink--;
116431398Sbde		dp->i_flag |= IN_CHANGE;
116531398Sbde		goto bad;
116631268Sphk	}
116796749Siedowse	if (DIRBLKSIZ > VFSTOEXT2(dvp->v_mount)->um_mountp->mnt_stat.f_bsize)
116896749Siedowse		/* XXX should grow with balloc() */
116996749Siedowse		panic("ext2_mkdir: blksize");
117031398Sbde	else {
117131398Sbde		ip->i_size = DIRBLKSIZ;
117231398Sbde		ip->i_flag |= IN_CHANGE;
117331398Sbde	}
117431268Sphk
117535256Sdes	/* Directory set up, now install its entry in the parent directory. */
117631398Sbde	error = ext2_direnter(ip, dvp, cnp);
117731268Sphk	if (error) {
117831398Sbde		dp->i_nlink--;
117931398Sbde		dp->i_flag |= IN_CHANGE;
118031398Sbde	}
118131398Sbdebad:
118231398Sbde	/*
118331398Sbde	 * No need to do an explicit VOP_TRUNCATE here, vrele will do this
118431398Sbde	 * for us because we set the link count to 0.
118531398Sbde	 */
118631398Sbde	if (error) {
118731398Sbde		ip->i_nlink = 0;
118831268Sphk		ip->i_flag |= IN_CHANGE;
118931398Sbde		vput(tvp);
119031398Sbde	} else
119131398Sbde		*ap->a_vpp = tvp;
119231398Sbdeout:
119331268Sphk	return (error);
119431398Sbde#undef  DIRBLKSIZ
119531398Sbde#define DIRBLKSIZ  DEV_BSIZE
119631268Sphk}
119731268Sphk
119831398Sbde/*
119931398Sbde * Rmdir system call.
120031398Sbde */
120131268Sphkstatic int
1202246634Spfgext2_rmdir(struct vop_rmdir_args *ap)
120331268Sphk{
120431268Sphk	struct vnode *vp = ap->a_vp;
120531268Sphk	struct vnode *dvp = ap->a_dvp;
120631398Sbde	struct componentname *cnp = ap->a_cnp;
120731398Sbde	struct inode *ip, *dp;
120831268Sphk	int error;
120931268Sphk
121031268Sphk	ip = VTOI(vp);
121131398Sbde	dp = VTOI(dvp);
121231398Sbde
121331398Sbde	/*
121431398Sbde	 * Verify the directory is empty (and valid).
121531398Sbde	 * (Rmdir ".." won't be valid since
121631398Sbde	 *  ".." will contain a reference to
121731398Sbde	 *  the current directory and thus be
121831398Sbde	 *  non-empty.)
121931398Sbde	 */
122031398Sbde	error = 0;
122131398Sbde	if (ip->i_nlink != 2 || !ext2_dirempty(ip, dp->i_number, cnp->cn_cred)) {
122231398Sbde		error = ENOTEMPTY;
122331398Sbde		goto out;
122431398Sbde	}
122531398Sbde	if ((dp->i_flags & APPEND)
122631398Sbde	    || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
122731268Sphk		error = EPERM;
122831268Sphk		goto out;
122931268Sphk	}
123031398Sbde	/*
123131398Sbde	 * Delete reference to directory before purging
123231398Sbde	 * inode.  If we crash in between, the directory
123331398Sbde	 * will be reattached to lost+found,
123431398Sbde	 */
123531398Sbde	error = ext2_dirremove(dvp, cnp);
123631398Sbde	if (error)
123731398Sbde		goto out;
123831398Sbde	dp->i_nlink--;
123931398Sbde	dp->i_flag |= IN_CHANGE;
124031398Sbde	cache_purge(dvp);
1241175294Sattilio	VOP_UNLOCK(dvp, 0);
124231398Sbde	/*
124331398Sbde	 * Truncate inode.  The only stuff left
124431398Sbde	 * in the directory is "." and "..".  The
124531398Sbde	 * "." reference is inconsequential since
124631398Sbde	 * we're quashing it.  The ".." reference
124731398Sbde	 * has already been adjusted above.  We've
124831398Sbde	 * removed the "." reference and the reference
124931398Sbde	 * in the parent directory, but there may be
125031398Sbde	 * other hard links so decrement by 2 and
125131398Sbde	 * worry about them later.
125231398Sbde	 */
125331398Sbde	ip->i_nlink -= 2;
1254175294Sattilio	error = ext2_truncate(vp, (off_t)0, IO_SYNC, cnp->cn_cred,
1255175294Sattilio	    cnp->cn_thread);
125631398Sbde	cache_purge(ITOV(ip));
1257235508Spfg	if (vn_lock(dvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
1258235508Spfg		VOP_UNLOCK(vp, 0);
1259235508Spfg		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
1260235508Spfg		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1261235508Spfg	}
126231268Sphkout:
126331268Sphk	return (error);
126431268Sphk}
126531268Sphk
126631268Sphk/*
126731398Sbde * symlink -- make a symbolic link
126812115Sdyson */
126912911Sphkstatic int
1270246634Spfgext2_symlink(struct vop_symlink_args *ap)
127112115Sdyson{
127296752Siedowse	struct vnode *vp, **vpp = ap->a_vpp;
127396752Siedowse	struct inode *ip;
127431398Sbde	int len, error;
127531398Sbde
127631398Sbde	error = ext2_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp,
127731398Sbde	    vpp, ap->a_cnp);
127831398Sbde	if (error)
127931398Sbde		return (error);
128031398Sbde	vp = *vpp;
128131398Sbde	len = strlen(ap->a_target);
128231398Sbde	if (len < vp->v_mount->mnt_maxsymlinklen) {
128331398Sbde		ip = VTOI(vp);
128431398Sbde		bcopy(ap->a_target, (char *)ip->i_shortlink, len);
128531398Sbde		ip->i_size = len;
128631398Sbde		ip->i_flag |= IN_CHANGE | IN_UPDATE;
128731398Sbde	} else
128831398Sbde		error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0,
1289101744Srwatson		    UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK,
1290194296Skib		    ap->a_cnp->cn_cred, NOCRED, NULL, NULL);
129153131Seivind	if (error)
129253131Seivind		vput(vp);
129331398Sbde	return (error);
129431398Sbde}
129531398Sbde
129631398Sbde/*
129796749Siedowse * Return target name of a symbolic link
129896749Siedowse */
129996749Siedowsestatic int
1300246634Spfgext2_readlink(struct vop_readlink_args *ap)
130196749Siedowse{
130296749Siedowse	struct vnode *vp = ap->a_vp;
130396749Siedowse	struct inode *ip = VTOI(vp);
130496749Siedowse	int isize;
130596749Siedowse
130696749Siedowse	isize = ip->i_size;
130796749Siedowse	if (isize < vp->v_mount->mnt_maxsymlinklen) {
130896749Siedowse		uiomove((char *)ip->i_shortlink, isize, ap->a_uio);
130996749Siedowse		return (0);
131096749Siedowse	}
131196749Siedowse	return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred));
131296749Siedowse}
131396749Siedowse
131496749Siedowse/*
131596749Siedowse * Calculate the logical to physical mapping if not done already,
131696749Siedowse * then call the device strategy routine.
131796749Siedowse *
131896749Siedowse * In order to be able to swap to a file, the ext2_bmaparray() operation may not
131996749Siedowse * deadlock on memory.  See ext2_bmap() for details.
132096749Siedowse */
1321105223Sphkstatic int
1322246634Spfgext2_strategy(struct vop_strategy_args *ap)
132396749Siedowse{
132496749Siedowse	struct buf *bp = ap->a_bp;
132596749Siedowse	struct vnode *vp = ap->a_vp;
132696749Siedowse	struct inode *ip;
1327137039Sphk	struct bufobj *bo;
1328254283Spfg	daddr_t blkno;
132996749Siedowse	int error;
133096749Siedowse
133196749Siedowse	ip = VTOI(vp);
133296749Siedowse	if (vp->v_type == VBLK || vp->v_type == VCHR)
133396749Siedowse		panic("ext2_strategy: spec");
133496749Siedowse	if (bp->b_blkno == bp->b_lblkno) {
133596749Siedowse		error = ext2_bmaparray(vp, bp->b_lblkno, &blkno, NULL, NULL);
133696749Siedowse		bp->b_blkno = blkno;
133796749Siedowse		if (error) {
133896749Siedowse			bp->b_error = error;
133996749Siedowse			bp->b_ioflags |= BIO_ERROR;
134096749Siedowse			bufdone(bp);
1341186194Strasz			return (0);
134296749Siedowse		}
134396749Siedowse		if ((long)bp->b_blkno == -1)
134496749Siedowse			vfs_bio_clrbuf(bp);
134596749Siedowse	}
134696749Siedowse	if ((long)bp->b_blkno == -1) {
134796749Siedowse		bufdone(bp);
134896749Siedowse		return (0);
134996749Siedowse	}
1350121205Sphk	bp->b_iooffset = dbtob(bp->b_blkno);
1351137039Sphk	bo = VFSTOEXT2(vp->v_mount)->um_bo;
1352140051Sphk	BO_STRATEGY(bo, bp);
135396749Siedowse	return (0);
135496749Siedowse}
135596749Siedowse
135696749Siedowse/*
135796749Siedowse * Print out the contents of an inode.
135896749Siedowse */
1359105223Sphkstatic int
1360246634Spfgext2_print(struct vop_print_args *ap)
136196749Siedowse{
136296749Siedowse	struct vnode *vp = ap->a_vp;
136396749Siedowse	struct inode *ip = VTOI(vp);
136496749Siedowse
1365143677Sphk	vn_printf(ip->i_devvp, "\tino %lu", (u_long)ip->i_number);
136696749Siedowse	if (vp->v_type == VFIFO)
136796749Siedowse		fifo_printinfo(vp);
136896749Siedowse	printf("\n");
136996749Siedowse	return (0);
137096749Siedowse}
137196749Siedowse
137296749Siedowse/*
137396749Siedowse * Close wrapper for fifos.
137496749Siedowse *
137596749Siedowse * Update the times on the inode then do device close.
137696749Siedowse */
1377105223Sphkstatic int
1378246634Spfgext2fifo_close(struct vop_close_args *ap)
137996749Siedowse{
138096749Siedowse	struct vnode *vp = ap->a_vp;
138196749Siedowse
1382103938Sjeff	VI_LOCK(vp);
138396749Siedowse	if (vp->v_usecount > 1)
1384202283Slulf		ext2_itimes_locked(vp);
1385103938Sjeff	VI_UNLOCK(vp);
1386138693Smarcel	return (fifo_specops.vop_close(ap));
138796749Siedowse}
138896749Siedowse
138996749Siedowse/*
139096749Siedowse * Kqfilter wrapper for fifos.
139196749Siedowse *
139296749Siedowse * Fall through to ext2 kqfilter routines if needed
139396749Siedowse */
1394105223Sphkstatic int
1395246634Spfgext2fifo_kqfilter(struct vop_kqfilter_args *ap)
139696749Siedowse{
139796749Siedowse	int error;
139896749Siedowse
1399138693Smarcel	error = fifo_specops.vop_kqfilter(ap);
140096749Siedowse	if (error)
1401184410Skib		error = vfs_kqfilter(ap);
140296749Siedowse	return (error);
140396749Siedowse}
140496749Siedowse
140596749Siedowse/*
140696749Siedowse * Return POSIX pathconf information applicable to ext2 filesystems.
140796749Siedowse */
1408105223Sphkstatic int
1409246634Spfgext2_pathconf(struct vop_pathconf_args *ap)
141096749Siedowse{
1411253173Spfg	int error = 0;
141296749Siedowse
141396749Siedowse	switch (ap->a_name) {
141496749Siedowse	case _PC_LINK_MAX:
1415246347Spfg		*ap->a_retval = EXT2_LINK_MAX;
1416253173Spfg		break;
141796749Siedowse	case _PC_NAME_MAX:
141896749Siedowse		*ap->a_retval = NAME_MAX;
1419253173Spfg		break;
142096749Siedowse	case _PC_PATH_MAX:
142196749Siedowse		*ap->a_retval = PATH_MAX;
1422253173Spfg		break;
142396749Siedowse	case _PC_PIPE_BUF:
142496749Siedowse		*ap->a_retval = PIPE_BUF;
1425253173Spfg		break;
142696749Siedowse	case _PC_CHOWN_RESTRICTED:
142796749Siedowse		*ap->a_retval = 1;
1428253173Spfg		break;
142996749Siedowse	case _PC_NO_TRUNC:
143096749Siedowse		*ap->a_retval = 1;
1431253173Spfg		break;
1432252956Spfg	case _PC_MIN_HOLE_SIZE:
1433252956Spfg		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
1434253173Spfg		break;
1435253173Spfg	case _PC_ASYNC_IO:
1436253173Spfg		/* _PC_ASYNC_IO should have been handled by upper layers. */
1437253173Spfg		KASSERT(0, ("_PC_ASYNC_IO should not get here"));
1438253173Spfg		error = EINVAL;
1439253173Spfg		break;
1440253173Spfg	case _PC_PRIO_IO:
1441253173Spfg		*ap->a_retval = 0;
1442253173Spfg		break;
1443253173Spfg	case _PC_SYNC_IO:
1444253173Spfg		*ap->a_retval = 0;
1445253173Spfg		break;
1446253173Spfg	case _PC_ALLOC_SIZE_MIN:
1447253173Spfg		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize;
1448253173Spfg		break;
1449253173Spfg	case _PC_FILESIZEBITS:
1450253173Spfg		*ap->a_retval = 64;
1451253173Spfg		break;
1452253173Spfg	case _PC_REC_INCR_XFER_SIZE:
1453253173Spfg		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
1454253173Spfg		break;
1455253173Spfg	case _PC_REC_MAX_XFER_SIZE:
1456253173Spfg		*ap->a_retval = -1; /* means ``unlimited'' */
1457253173Spfg		break;
1458253173Spfg	case _PC_REC_MIN_XFER_SIZE:
1459253173Spfg		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
1460253173Spfg		break;
1461253173Spfg	case _PC_REC_XFER_ALIGN:
1462253173Spfg		*ap->a_retval = PAGE_SIZE;
1463253173Spfg		break;
1464253173Spfg	case _PC_SYMLINK_MAX:
1465253173Spfg		*ap->a_retval = MAXPATHLEN;
1466253173Spfg		break;
1467253173Spfg
146896749Siedowse	default:
1469253173Spfg		error = EINVAL;
1470253173Spfg		break;
147196749Siedowse	}
1472253173Spfg	return (error);
147396749Siedowse}
147496749Siedowse
147596749Siedowse/*
1476166774Spjd * Vnode pointer to File handle
1477166774Spjd */
1478166774Spjd/* ARGSUSED */
1479166774Spjdstatic int
1480246634Spfgext2_vptofh(struct vop_vptofh_args *ap)
1481166774Spjd{
1482166774Spjd	struct inode *ip;
1483166774Spjd	struct ufid *ufhp;
1484166774Spjd
1485166774Spjd	ip = VTOI(ap->a_vp);
1486166774Spjd	ufhp = (struct ufid *)ap->a_fhp;
1487166774Spjd	ufhp->ufid_len = sizeof(struct ufid);
1488166774Spjd	ufhp->ufid_ino = ip->i_number;
1489166774Spjd	ufhp->ufid_gen = ip->i_gen;
1490166774Spjd	return (0);
1491166774Spjd}
1492166774Spjd
1493166774Spjd/*
149496749Siedowse * Initialize the vnode associated with a new inode, handle aliased
149596749Siedowse * vnodes.
149696749Siedowse */
149796749Siedowseint
1498246634Spfgext2_vinit(struct mount *mntp, struct vop_vector *fifoops, struct vnode **vpp)
149996749Siedowse{
150096749Siedowse	struct inode *ip;
150196749Siedowse	struct vnode *vp;
150296749Siedowse
150396749Siedowse	vp = *vpp;
150496749Siedowse	ip = VTOI(vp);
1505135864Sphk	vp->v_type = IFTOVT(ip->i_mode);
1506135864Sphk	if (vp->v_type == VFIFO)
150796749Siedowse		vp->v_op = fifoops;
1508137039Sphk
1509221128Sjhb	if (ip->i_number == EXT2_ROOTINO)
1510101308Sjeff		vp->v_vflag |= VV_ROOT;
1511134899Sphk	ip->i_modrev = init_va_filerev();
151296749Siedowse	*vpp = vp;
151396749Siedowse	return (0);
151496749Siedowse}
151596749Siedowse
151696749Siedowse/*
151731398Sbde * Allocate a new inode.
151831398Sbde */
151931398Sbdestatic int
1520246634Spfgext2_makeinode(int mode, struct vnode *dvp, struct vnode **vpp,
1521246634Spfg    struct componentname *cnp)
152231398Sbde{
152396752Siedowse	struct inode *ip, *pdir;
152431398Sbde	struct vnode *tvp;
152531398Sbde	int error;
152612115Sdyson
152731398Sbde	pdir = VTOI(dvp);
1528251658Spfg#ifdef INVARIANTS
152931398Sbde	if ((cnp->cn_flags & HASBUF) == 0)
153031398Sbde		panic("ext2_makeinode: no name");
153112115Sdyson#endif
153231398Sbde	*vpp = NULL;
153331398Sbde	if ((mode & IFMT) == 0)
153431398Sbde		mode |= IFREG;
153512115Sdyson
153696749Siedowse	error = ext2_valloc(dvp, mode, cnp->cn_cred, &tvp);
153731398Sbde	if (error) {
153831398Sbde		return (error);
153931398Sbde	}
154031398Sbde	ip = VTOI(tvp);
154131398Sbde	ip->i_gid = pdir->i_gid;
154231398Sbde#ifdef SUIDDIR
154331398Sbde	{
154412115Sdyson		/*
154531398Sbde		 * if we are
154631398Sbde		 * not the owner of the directory,
154731398Sbde		 * and we are hacking owners here, (only do this where told to)
154831398Sbde		 * and we are not giving it TOO root, (would subvert quotas)
154931398Sbde		 * then go ahead and give it to the other user.
155031398Sbde		 * Note that this drops off the execute bits for security.
155112115Sdyson		 */
155231398Sbde		if ( (dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
155331398Sbde		     (pdir->i_mode & ISUID) &&
155431398Sbde		     (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) {
155531398Sbde			ip->i_uid = pdir->i_uid;
155631398Sbde			mode &= ~07111;
155731398Sbde		} else {
155831398Sbde			ip->i_uid = cnp->cn_cred->cr_uid;
155931398Sbde		}
156012115Sdyson	}
156112115Sdyson#else
156231398Sbde	ip->i_uid = cnp->cn_cred->cr_uid;
156312115Sdyson#endif
156431398Sbde	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
156531398Sbde	ip->i_mode = mode;
156631398Sbde	tvp->v_type = IFTOVT(mode);	/* Rest init'd in getnewvnode(). */
156731398Sbde	ip->i_nlink = 1;
1568164033Srwatson	if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred)) {
1569170587Srwatson		if (priv_check_cred(cnp->cn_cred, PRIV_VFS_RETAINSUGID, 0))
1570164033Srwatson			ip->i_mode &= ~ISGID;
1571164033Srwatson	}
157231398Sbde
157331398Sbde	if (cnp->cn_flags & ISWHITEOUT)
157431398Sbde		ip->i_flags |= UF_OPAQUE;
157531398Sbde
157631398Sbde	/*
157731398Sbde	 * Make sure inode goes to disk before directory entry.
157831398Sbde	 */
1579221166Sjhb	error = ext2_update(tvp, !DOINGASYNC(tvp));
158031398Sbde	if (error)
158131398Sbde		goto bad;
158231398Sbde	error = ext2_direnter(ip, dvp, cnp);
158331398Sbde	if (error)
158431398Sbde		goto bad;
158531398Sbde
158631398Sbde	*vpp = tvp;
158731398Sbde	return (0);
158831398Sbde
158931398Sbdebad:
159031398Sbde	/*
159131398Sbde	 * Write error occurred trying to update the inode
159231398Sbde	 * or the directory so must deallocate the inode.
159331398Sbde	 */
159431398Sbde	ip->i_nlink = 0;
159531398Sbde	ip->i_flag |= IN_CHANGE;
159631398Sbde	vput(tvp);
159731398Sbde	return (error);
159812115Sdyson}
1599228507Spfg
1600228507Spfg/*
1601228507Spfg * Vnode op for reading.
1602228507Spfg */
1603228507Spfgstatic int
1604246634Spfgext2_read(struct vop_read_args *ap)
1605228507Spfg{
1606228507Spfg	struct vnode *vp;
1607228507Spfg	struct inode *ip;
1608254260Spfg	int error;
1609254260Spfg
1610254260Spfg	vp = ap->a_vp;
1611254260Spfg	ip = VTOI(vp);
1612254260Spfg
1613254260Spfg	/*EXT4_EXT_LOCK(ip);*/
1614261311Spfg	if (ip->i_flag & IN_E4EXTENTS)
1615254260Spfg		error = ext4_ext_read(ap);
1616254260Spfg	else
1617254260Spfg		error = ext2_ind_read(ap);
1618254260Spfg	/*EXT4_EXT_UNLOCK(ip);*/
1619254260Spfg	return (error);
1620254260Spfg}
1621254260Spfg
1622254260Spfg/*
1623254260Spfg * Vnode op for reading.
1624254260Spfg */
1625254260Spfgstatic int
1626254260Spfgext2_ind_read(struct vop_read_args *ap)
1627254260Spfg{
1628254260Spfg	struct vnode *vp;
1629254260Spfg	struct inode *ip;
1630228507Spfg	struct uio *uio;
1631228507Spfg	struct m_ext2fs *fs;
1632228507Spfg	struct buf *bp;
1633228507Spfg	daddr_t lbn, nextlbn;
1634228507Spfg	off_t bytesinfile;
1635228507Spfg	long size, xfersize, blkoffset;
1636228507Spfg	int error, orig_resid, seqcount;
1637228507Spfg	int ioflag;
1638228507Spfg
1639228507Spfg	vp = ap->a_vp;
1640228507Spfg	uio = ap->a_uio;
1641228507Spfg	ioflag = ap->a_ioflag;
1642228507Spfg
1643228507Spfg	seqcount = ap->a_ioflag >> IO_SEQSHIFT;
1644228507Spfg	ip = VTOI(vp);
1645228507Spfg
1646228507Spfg#ifdef INVARIANTS
1647228507Spfg	if (uio->uio_rw != UIO_READ)
1648228507Spfg		panic("%s: mode", "ext2_read");
1649228507Spfg
1650228507Spfg	if (vp->v_type == VLNK) {
1651228507Spfg		if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen)
1652228507Spfg			panic("%s: short symlink", "ext2_read");
1653228507Spfg	} else if (vp->v_type != VREG && vp->v_type != VDIR)
1654228507Spfg		panic("%s: type %d", "ext2_read", vp->v_type);
1655228507Spfg#endif
1656228507Spfg	orig_resid = uio->uio_resid;
1657253098Spfg	KASSERT(orig_resid >= 0, ("ext2_read: uio->uio_resid < 0"));
1658228507Spfg	if (orig_resid == 0)
1659228507Spfg		return (0);
1660253098Spfg	KASSERT(uio->uio_offset >= 0, ("ext2_read: uio->uio_offset < 0"));
1661228507Spfg	fs = ip->i_e2fs;
1662228507Spfg	if (uio->uio_offset < ip->i_size &&
1663228507Spfg	    uio->uio_offset >= fs->e2fs_maxfilesize)
1664228507Spfg	    	return (EOVERFLOW);
1665228507Spfg
1666228507Spfg	for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
1667228507Spfg		if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
1668228507Spfg			break;
1669228507Spfg		lbn = lblkno(fs, uio->uio_offset);
1670228507Spfg		nextlbn = lbn + 1;
1671228507Spfg		size = blksize(fs, ip, lbn);
1672228507Spfg		blkoffset = blkoff(fs, uio->uio_offset);
1673228507Spfg
1674228507Spfg		xfersize = fs->e2fs_fsize - blkoffset;
1675228507Spfg		if (uio->uio_resid < xfersize)
1676228507Spfg			xfersize = uio->uio_resid;
1677228507Spfg		if (bytesinfile < xfersize)
1678228507Spfg			xfersize = bytesinfile;
1679228507Spfg
1680228507Spfg		if (lblktosize(fs, nextlbn) >= ip->i_size)
1681228507Spfg			error = bread(vp, lbn, size, NOCRED, &bp);
1682248282Skib		else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
1683228507Spfg			error = cluster_read(vp, ip->i_size, lbn, size,
1684248282Skib			    NOCRED, blkoffset + uio->uio_resid, seqcount,
1685248282Skib			    0, &bp);
1686248282Skib		} else if (seqcount > 1) {
1687259223Spfg			u_int nextsize = blksize(fs, ip, nextlbn);
1688228507Spfg			error = breadn(vp, lbn,
1689228507Spfg			    size, &nextlbn, &nextsize, 1, NOCRED, &bp);
1690228507Spfg		} else
1691228507Spfg			error = bread(vp, lbn, size, NOCRED, &bp);
1692228507Spfg		if (error) {
1693228507Spfg			brelse(bp);
1694228507Spfg			bp = NULL;
1695228507Spfg			break;
1696228507Spfg		}
1697228507Spfg
1698228507Spfg		/*
1699228507Spfg		 * If IO_DIRECT then set B_DIRECT for the buffer.  This
1700228507Spfg		 * will cause us to attempt to release the buffer later on
1701228507Spfg		 * and will cause the buffer cache to attempt to free the
1702228507Spfg		 * underlying pages.
1703228507Spfg		 */
1704228507Spfg		if (ioflag & IO_DIRECT)
1705228507Spfg			bp->b_flags |= B_DIRECT;
1706228507Spfg
1707228507Spfg		/*
1708228507Spfg		 * We should only get non-zero b_resid when an I/O error
1709228507Spfg		 * has occurred, which should cause us to break above.
1710228507Spfg		 * However, if the short read did not cause an error,
1711228507Spfg		 * then we want to ensure that we do not uiomove bad
1712228507Spfg		 * or uninitialized data.
1713228507Spfg		 */
1714228507Spfg		size -= bp->b_resid;
1715228507Spfg		if (size < xfersize) {
1716228507Spfg			if (size == 0)
1717228507Spfg				break;
1718228507Spfg			xfersize = size;
1719228507Spfg		}
1720228507Spfg		error = uiomove((char *)bp->b_data + blkoffset,
1721262723Spfg			(int)xfersize, uio);
1722228507Spfg		if (error)
1723228507Spfg			break;
1724228507Spfg
1725228507Spfg		if (ioflag & (IO_VMIO|IO_DIRECT)) {
1726228507Spfg			/*
1727228507Spfg			 * If it's VMIO or direct I/O, then we don't
1728228507Spfg			 * need the buf, mark it available for
1729228507Spfg			 * freeing. If it's non-direct VMIO, the VM has
1730228507Spfg			 * the data.
1731228507Spfg			 */
1732228507Spfg			bp->b_flags |= B_RELBUF;
1733228507Spfg			brelse(bp);
1734228507Spfg		} else {
1735228507Spfg			/*
1736228507Spfg			 * Otherwise let whoever
1737228507Spfg			 * made the request take care of
1738228507Spfg			 * freeing it. We just queue
1739228507Spfg			 * it onto another list.
1740228507Spfg			 */
1741228507Spfg			bqrelse(bp);
1742228507Spfg		}
1743228507Spfg	}
1744228507Spfg
1745228507Spfg	/*
1746228507Spfg	 * This can only happen in the case of an error
1747228507Spfg	 * because the loop above resets bp to NULL on each iteration
1748228507Spfg	 * and on normal completion has not set a new value into it.
1749228507Spfg	 * so it must have come from a 'break' statement
1750228507Spfg	 */
1751228507Spfg	if (bp != NULL) {
1752228507Spfg		if (ioflag & (IO_VMIO|IO_DIRECT)) {
1753228507Spfg			bp->b_flags |= B_RELBUF;
1754228507Spfg			brelse(bp);
1755228507Spfg		} else {
1756228507Spfg			bqrelse(bp);
1757228507Spfg		}
1758228507Spfg	}
1759228507Spfg
1760228507Spfg	if ((error == 0 || uio->uio_resid != orig_resid) &&
1761228507Spfg	    (vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
1762228507Spfg		ip->i_flag |= IN_ACCESS;
1763228507Spfg	return (error);
1764228507Spfg}
1765228507Spfg
1766252956Spfgstatic int
1767252956Spfgext2_ioctl(struct vop_ioctl_args *ap)
1768252956Spfg{
1769252956Spfg
1770252956Spfg	switch (ap->a_command) {
1771252956Spfg	case FIOSEEKDATA:
1772252956Spfg	case FIOSEEKHOLE:
1773252956Spfg		return (vn_bmap_seekhole(ap->a_vp, ap->a_command,
1774252956Spfg		    (off_t *)ap->a_data, ap->a_cred));
1775252956Spfg	default:
1776252956Spfg		return (ENOTTY);
1777252956Spfg	}
1778252956Spfg}
1779252956Spfg
1780228507Spfg/*
1781254260Spfg * this function handles ext4 extents block mapping
1782254260Spfg */
1783254260Spfgstatic int
1784254260Spfgext4_ext_read(struct vop_read_args *ap)
1785254260Spfg{
1786254260Spfg	struct vnode *vp;
1787254260Spfg	struct inode *ip;
1788254260Spfg	struct uio *uio;
1789254260Spfg	struct m_ext2fs *fs;
1790254260Spfg	struct buf *bp;
1791254260Spfg	struct ext4_extent nex, *ep;
1792254260Spfg	struct ext4_extent_path path;
1793254260Spfg	daddr_t lbn, newblk;
1794254260Spfg	off_t bytesinfile;
1795254260Spfg	int cache_type;
1796254260Spfg	ssize_t orig_resid;
1797254260Spfg	int error;
1798254260Spfg	long size, xfersize, blkoffset;
1799254260Spfg
1800254260Spfg	vp = ap->a_vp;
1801254260Spfg	ip = VTOI(vp);
1802254260Spfg	uio = ap->a_uio;
1803254260Spfg	memset(&path, 0, sizeof(path));
1804254260Spfg
1805254260Spfg	orig_resid = uio->uio_resid;
1806254260Spfg	KASSERT(orig_resid >= 0, ("%s: uio->uio_resid < 0", __func__));
1807254260Spfg	if (orig_resid == 0)
1808254260Spfg		return (0);
1809254260Spfg	KASSERT(uio->uio_offset >= 0, ("%s: uio->uio_offset < 0", __func__));
1810254260Spfg	fs = ip->i_e2fs;
1811254260Spfg	if (uio->uio_offset < ip->i_size && uio->uio_offset >= fs->e2fs_maxfilesize)
1812254260Spfg		return (EOVERFLOW);
1813254260Spfg
1814254260Spfg	while (uio->uio_resid > 0) {
1815254260Spfg		if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
1816254260Spfg			break;
1817254260Spfg		lbn = lblkno(fs, uio->uio_offset);
1818254260Spfg		size = blksize(fs, ip, lbn);
1819254260Spfg		blkoffset = blkoff(fs, uio->uio_offset);
1820254260Spfg
1821254260Spfg		xfersize = fs->e2fs_fsize - blkoffset;
1822254260Spfg		xfersize = MIN(xfersize, uio->uio_resid);
1823254260Spfg		xfersize = MIN(xfersize, bytesinfile);
1824254260Spfg
1825254260Spfg		/* get block from ext4 extent cache */
1826254260Spfg		cache_type = ext4_ext_in_cache(ip, lbn, &nex);
1827254260Spfg		switch (cache_type) {
1828254260Spfg		case EXT4_EXT_CACHE_NO:
1829254260Spfg			ext4_ext_find_extent(fs, ip, lbn, &path);
1830254260Spfg			ep = path.ep_ext;
1831254260Spfg			if (ep == NULL)
1832254260Spfg				return (EIO);
1833254260Spfg
1834254260Spfg			ext4_ext_put_cache(ip, ep, EXT4_EXT_CACHE_IN);
1835254260Spfg
1836254260Spfg			newblk = lbn - ep->e_blk + (ep->e_start_lo |
1837254260Spfg			    (daddr_t)ep->e_start_hi << 32);
1838254260Spfg
1839254260Spfg			if (path.ep_bp != NULL) {
1840254260Spfg				brelse(path.ep_bp);
1841254260Spfg				path.ep_bp = NULL;
1842254260Spfg			}
1843254260Spfg			break;
1844254260Spfg
1845254260Spfg		case EXT4_EXT_CACHE_GAP:
1846254260Spfg			/* block has not been allocated yet */
1847254260Spfg			return (0);
1848254260Spfg
1849254260Spfg		case EXT4_EXT_CACHE_IN:
1850254260Spfg			newblk = lbn - nex.e_blk + (nex.e_start_lo |
1851254260Spfg			    (daddr_t)nex.e_start_hi << 32);
1852254260Spfg			break;
1853254260Spfg
1854254260Spfg		default:
1855254260Spfg			panic("%s: invalid cache type", __func__);
1856254260Spfg		}
1857254260Spfg
1858254260Spfg		error = bread(ip->i_devvp, fsbtodb(fs, newblk), size, NOCRED, &bp);
1859254260Spfg		if (error) {
1860254260Spfg			brelse(bp);
1861254260Spfg			return (error);
1862254260Spfg		}
1863254260Spfg
1864254260Spfg		size -= bp->b_resid;
1865254260Spfg		if (size < xfersize) {
1866254260Spfg			if (size == 0) {
1867254260Spfg				bqrelse(bp);
1868254260Spfg				break;
1869254260Spfg			}
1870254260Spfg			xfersize = size;
1871254260Spfg		}
1872254260Spfg		error = uiomove(bp->b_data + blkoffset, (int)xfersize, uio);
1873254260Spfg		bqrelse(bp);
1874254260Spfg		if (error)
1875254260Spfg			return (error);
1876254260Spfg	}
1877254260Spfg
1878254260Spfg	return (0);
1879254260Spfg}
1880254260Spfg
1881254260Spfg/*
1882228507Spfg * Vnode op for writing.
1883228507Spfg */
1884228507Spfgstatic int
1885246634Spfgext2_write(struct vop_write_args *ap)
1886228507Spfg{
1887228507Spfg	struct vnode *vp;
1888228507Spfg	struct uio *uio;
1889228507Spfg	struct inode *ip;
1890228507Spfg	struct m_ext2fs *fs;
1891228507Spfg	struct buf *bp;
1892228507Spfg	daddr_t lbn;
1893228507Spfg	off_t osize;
1894228507Spfg	int blkoffset, error, flags, ioflag, resid, size, seqcount, xfersize;
1895228507Spfg
1896228507Spfg	ioflag = ap->a_ioflag;
1897228507Spfg	uio = ap->a_uio;
1898228507Spfg	vp = ap->a_vp;
1899228507Spfg
1900228507Spfg	seqcount = ioflag >> IO_SEQSHIFT;
1901228507Spfg	ip = VTOI(vp);
1902228507Spfg
1903228507Spfg#ifdef INVARIANTS
1904228507Spfg	if (uio->uio_rw != UIO_WRITE)
1905228507Spfg		panic("%s: mode", "ext2_write");
1906228507Spfg#endif
1907228507Spfg
1908228507Spfg	switch (vp->v_type) {
1909228507Spfg	case VREG:
1910228507Spfg		if (ioflag & IO_APPEND)
1911228507Spfg			uio->uio_offset = ip->i_size;
1912228507Spfg		if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size)
1913228507Spfg			return (EPERM);
1914228507Spfg		/* FALLTHROUGH */
1915228507Spfg	case VLNK:
1916228507Spfg		break;
1917228507Spfg	case VDIR:
1918228507Spfg		/* XXX differs from ffs -- this is called from ext2_mkdir(). */
1919228507Spfg		if ((ioflag & IO_SYNC) == 0)
1920228507Spfg		panic("ext2_write: nonsync dir write");
1921228507Spfg		break;
1922228507Spfg	default:
1923228507Spfg		panic("ext2_write: type %p %d (%jd,%jd)", (void *)vp,
1924228507Spfg		    vp->v_type, (intmax_t)uio->uio_offset,
1925228507Spfg		    (intmax_t)uio->uio_resid);
1926228507Spfg	}
1927228507Spfg
1928228507Spfg	KASSERT(uio->uio_resid >= 0, ("ext2_write: uio->uio_resid < 0"));
1929228507Spfg	KASSERT(uio->uio_offset >= 0, ("ext2_write: uio->uio_offset < 0"));
1930228507Spfg	fs = ip->i_e2fs;
1931228507Spfg	if ((uoff_t)uio->uio_offset + uio->uio_resid > fs->e2fs_maxfilesize)
1932228507Spfg		return (EFBIG);
1933228507Spfg	/*
1934228507Spfg	 * Maybe this should be above the vnode op call, but so long as
1935228507Spfg	 * file servers have no limits, I don't think it matters.
1936228507Spfg	 */
1937228507Spfg	if (vn_rlimit_fsize(vp, uio, uio->uio_td))
1938228507Spfg		return (EFBIG);
1939228507Spfg
1940228507Spfg	resid = uio->uio_resid;
1941228507Spfg	osize = ip->i_size;
1942228507Spfg	if (seqcount > BA_SEQMAX)
1943228507Spfg		flags = BA_SEQMAX << BA_SEQSHIFT;
1944228507Spfg	else
1945228507Spfg		flags = seqcount << BA_SEQSHIFT;
1946228507Spfg	if ((ioflag & IO_SYNC) && !DOINGASYNC(vp))
1947228507Spfg		flags |= IO_SYNC;
1948228507Spfg
1949228507Spfg	for (error = 0; uio->uio_resid > 0;) {
1950228507Spfg		lbn = lblkno(fs, uio->uio_offset);
1951228507Spfg		blkoffset = blkoff(fs, uio->uio_offset);
1952228507Spfg		xfersize = fs->e2fs_fsize - blkoffset;
1953228507Spfg		if (uio->uio_resid < xfersize)
1954228507Spfg			xfersize = uio->uio_resid;
1955228507Spfg		if (uio->uio_offset + xfersize > ip->i_size)
1956228507Spfg			vnode_pager_setsize(vp, uio->uio_offset + xfersize);
1957228507Spfg
1958262723Spfg		/*
1959228507Spfg		 * We must perform a read-before-write if the transfer size
1960228507Spfg		 * does not cover the entire buffer.
1961262723Spfg		 */
1962228507Spfg		if (fs->e2fs_bsize > xfersize)
1963228507Spfg			flags |= BA_CLRBUF;
1964228507Spfg		else
1965228507Spfg			flags &= ~BA_CLRBUF;
1966228507Spfg		error = ext2_balloc(ip, lbn, blkoffset + xfersize,
1967228507Spfg		    ap->a_cred, &bp, flags);
1968228507Spfg		if (error != 0)
1969228507Spfg			break;
1970228507Spfg
1971228507Spfg		if ((ioflag & (IO_SYNC|IO_INVAL)) == (IO_SYNC|IO_INVAL))
1972228507Spfg			bp->b_flags |= B_NOCACHE;
1973228507Spfg		if (uio->uio_offset + xfersize > ip->i_size)
1974228507Spfg			ip->i_size = uio->uio_offset + xfersize;
1975228507Spfg		size = blksize(fs, ip, lbn) - bp->b_resid;
1976228507Spfg		if (size < xfersize)
1977228507Spfg			xfersize = size;
1978228507Spfg
1979228507Spfg		error =
1980228507Spfg		    uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
1981253050Spfg		/*
1982253050Spfg		 * If the buffer is not already filled and we encounter an
1983253050Spfg		 * error while trying to fill it, we have to clear out any
1984253050Spfg		 * garbage data from the pages instantiated for the buffer.
1985253050Spfg		 * If we do not, a failed uiomove() during a write can leave
1986253050Spfg		 * the prior contents of the pages exposed to a userland mmap.
1987253050Spfg		 *
1988253050Spfg		 * Note that we need only clear buffers with a transfer size
1989253050Spfg		 * equal to the block size because buffers with a shorter
1990253050Spfg		 * transfer size were cleared above by the call to ext2_balloc()
1991253050Spfg		 * with the BA_CLRBUF flag set.
1992253050Spfg		 *
1993253050Spfg		 * If the source region for uiomove identically mmaps the
1994253050Spfg		 * buffer, uiomove() performed the NOP copy, and the buffer
1995253050Spfg		 * content remains valid because the page fault handler
1996253050Spfg		 * validated the pages.
1997253050Spfg		 */
1998253050Spfg		if (error != 0 && (bp->b_flags & B_CACHE) == 0 &&
1999253050Spfg		    fs->e2fs_bsize == xfersize)
2000253050Spfg			vfs_bio_clrbuf(bp);
2001228507Spfg		if (ioflag & (IO_VMIO|IO_DIRECT)) {
2002228507Spfg			bp->b_flags |= B_RELBUF;
2003228507Spfg		}
2004228507Spfg
2005228507Spfg		/*
2006228507Spfg		 * If IO_SYNC each buffer is written synchronously.  Otherwise
2007228507Spfg		 * if we have a severe page deficiency write the buffer
2008228507Spfg		 * asynchronously.  Otherwise try to cluster, and if that
2009228507Spfg		 * doesn't do it then either do an async write (if O_DIRECT),
2010228507Spfg		 * or a delayed write (if not).
2011228507Spfg		 */
2012228507Spfg		if (ioflag & IO_SYNC) {
2013228507Spfg			(void)bwrite(bp);
2014228507Spfg		} else if (vm_page_count_severe() ||
2015228507Spfg		    buf_dirty_count_severe() ||
2016228507Spfg		    (ioflag & IO_ASYNC)) {
2017228507Spfg			bp->b_flags |= B_CLUSTEROK;
2018228507Spfg			bawrite(bp);
2019228507Spfg		} else if (xfersize + blkoffset == fs->e2fs_fsize) {
2020228507Spfg			if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) {
2021228507Spfg				bp->b_flags |= B_CLUSTEROK;
2022248282Skib				cluster_write(vp, bp, ip->i_size, seqcount, 0);
2023228507Spfg			} else {
2024228507Spfg				bawrite(bp);
2025228507Spfg			}
2026228507Spfg		} else if (ioflag & IO_DIRECT) {
2027228507Spfg			bp->b_flags |= B_CLUSTEROK;
2028228507Spfg			bawrite(bp);
2029228507Spfg		} else {
2030228507Spfg			bp->b_flags |= B_CLUSTEROK;
2031228507Spfg			bdwrite(bp);
2032228507Spfg		}
2033228507Spfg		if (error || xfersize == 0)
2034228507Spfg			break;
2035228507Spfg	}
2036228507Spfg	/*
2037228507Spfg	 * If we successfully wrote any data, and we are not the superuser
2038228507Spfg	 * we clear the setuid and setgid bits as a precaution against
2039228507Spfg	 * tampering.
2040228507Spfg	 */
2041228507Spfg	if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid &&
2042228507Spfg	    ap->a_cred) {
2043228507Spfg		if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID, 0))
2044228507Spfg			ip->i_mode &= ~(ISUID | ISGID);
2045228507Spfg	}
2046228507Spfg	if (error) {
2047228507Spfg		if (ioflag & IO_UNIT) {
2048228507Spfg			(void)ext2_truncate(vp, osize,
2049228507Spfg			    ioflag & IO_SYNC, ap->a_cred, uio->uio_td);
2050228507Spfg			uio->uio_offset -= resid - uio->uio_resid;
2051228507Spfg			uio->uio_resid = resid;
2052228507Spfg		}
2053228507Spfg	}
2054228507Spfg	if (uio->uio_resid != resid) {
2055262723Spfg		ip->i_flag |= IN_CHANGE | IN_UPDATE;
2056262723Spfg		if (ioflag & IO_SYNC)
2057262723Spfg			error = ext2_update(vp, 1);
2058262723Spfg	}
2059228507Spfg	return (error);
2060228507Spfg}
2061