zfs_znode.c revision 273736
1121098Swollman/*
264499Swollman * CDDL HEADER START
32742Swollman *
42742Swollman * The contents of this file are subject to the terms of the
52742Swollman * Common Development and Distribution License (the "License").
62742Swollman * You may not use this file except in compliance with the License.
758787Sru *
82742Swollman * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
958787Sru * or http://www.opensolaris.org/os/licensing.
1058787Sru * See the License for the specific language governing permissions
112742Swollman * and limitations under the License.
1286222Swollman *
1320094Swollman * When distributing Covered Code, include this CDDL HEADER in each
1420094Swollman * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1520094Swollman * If applicable, add the following below this CDDL HEADER, with the
1620094Swollman * fields enclosed by brackets "[]" replaced with your own identifying
1720094Swollman * information: Portions Copyright [yyyy] [name of copyright owner]
1858787Sru *
1958787Sru * CDDL HEADER END
2020094Swollman */
2143543Swollman/*
222742Swollman * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
2343543Swollman * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
2443543Swollman */
2543543Swollman
2643543Swollman/* Portions Copyright 2007 Jeremy Teo */
27121098Swollman/* Portions Copyright 2011 Martin Matuska <mm@FreeBSD.org> */
28121098Swollman
29121098Swollman#ifdef _KERNEL
30121098Swollman#include <sys/types.h>
3143543Swollman#include <sys/param.h>
3243543Swollman#include <sys/time.h>
3343543Swollman#include <sys/systm.h>
3443543Swollman#include <sys/sysmacros.h>
3543543Swollman#include <sys/resource.h>
3643543Swollman#include <sys/mntent.h>
372742Swollman#include <sys/u8_textprep.h>
382742Swollman#include <sys/dsl_dataset.h>
3919878Swollman#include <sys/vfs.h>
40114173Swollman#include <sys/vnode.h>
41114173Swollman#include <sys/file.h>
42114173Swollman#include <sys/kmem.h>
43114173Swollman#include <sys/errno.h>
44114173Swollman#include <sys/unistd.h>
45114173Swollman#include <sys/atomic.h>
46114173Swollman#include <sys/zfs_dir.h>
47114173Swollman#include <sys/zfs_acl.h>
48114173Swollman#include <sys/zfs_ioctl.h>
49114173Swollman#include <sys/zfs_rlock.h>
50114173Swollman#include <sys/zfs_fuid.h>
51114173Swollman#include <sys/dnode.h>
52114173Swollman#include <sys/fs/zfs.h>
53114173Swollman#include <sys/kidmap.h>
542742Swollman#endif /* _KERNEL */
552742Swollman
5658787Sru#include <sys/dmu.h>
572742Swollman#include <sys/refcount.h>
5819878Swollman#include <sys/stat.h>
5914343Swollman#include <sys/zap.h>
609908Swollman#include <sys/zfs_znode.h>
619908Swollman#include <sys/sa.h>
629908Swollman#include <sys/zfs_sa.h>
639908Swollman#include <sys/zfs_stat.h>
649908Swollman#include <sys/refcount.h>
659908Swollman
669908Swollman#include "zfs_prop.h"
679908Swollman#include "zfs_comutil.h"
689908Swollman
699908Swollman/* Used by fstat(1). */
7014343SwollmanSYSCTL_INT(_debug_sizeof, OID_AUTO, znode, CTLFLAG_RD,
7114343Swollman    SYSCTL_NULL_INT_PTR, sizeof(znode_t), "sizeof(znode_t)");
729908Swollman
739908Swollman/*
749908Swollman * Define ZNODE_STATS to turn on statistic gathering. By default, it is only
759908Swollman * turned on when DEBUG is also defined.
769908Swollman */
779908Swollman#ifdef	DEBUG
789908Swollman#define	ZNODE_STATS
7919878Swollman#endif	/* DEBUG */
802742Swollman
812742Swollman#ifdef	ZNODE_STATS
8243014Swollman#define	ZNODE_STAT_ADD(stat)			((stat)++)
832742Swollman#else
8419878Swollman#define	ZNODE_STAT_ADD(stat)			/* nothing */
852742Swollman#endif	/* ZNODE_STATS */
862742Swollman
872742Swollman/*
882742Swollman * Functions needed for userland (ie: libzpool) are not put under
892742Swollman * #ifdef_KERNEL; the rest of the functions have dependencies
902742Swollman * (such as VFS logic) that will not compile easily in userland.
912742Swollman */
9221217Swollman#ifdef _KERNEL
932742Swollman/*
942742Swollman * Needed to close a small window in zfs_znode_move() that allows the zfsvfs to
952742Swollman * be freed before it can be safely accessed.
962742Swollman */
972742Swollmankrwlock_t zfsvfs_lock;
982742Swollman
992742Swollmanstatic kmem_cache_t *znode_cache = NULL;
10021217Swollman
1012742Swollman/*ARGSUSED*/
1022742Swollmanstatic void
1032742Swollmanznode_evict_error(dmu_buf_t *dbuf, void *user_ptr)
1042742Swollman{
10519878Swollman	/*
1062742Swollman	 * We should never drop all dbuf refs without first clearing
1072742Swollman	 * the eviction callback.
1082742Swollman	 */
1092742Swollman	panic("evicting znode %p\n", user_ptr);
11058787Sru}
11158787Sru
1122742Swollmanextern struct vop_vector zfs_vnodeops;
1132742Swollmanextern struct vop_vector zfs_fifoops;
11458787Sruextern struct vop_vector zfs_shareops;
1152742Swollman
11658787Srustatic int
11758787Sruzfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
11858787Sru{
1192742Swollman	znode_t *zp = buf;
12058787Sru
12158787Sru	POINTER_INVALIDATE(&zp->z_zfsvfs);
1222742Swollman
1232742Swollman	list_link_init(&zp->z_link_node);
1242742Swollman
1252742Swollman	mutex_init(&zp->z_lock, NULL, MUTEX_DEFAULT, NULL);
12658787Sru	rw_init(&zp->z_parent_lock, NULL, RW_DEFAULT, NULL);
1272742Swollman	rw_init(&zp->z_name_lock, NULL, RW_DEFAULT, NULL);
1282742Swollman	mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL);
12958787Sru
13058787Sru	mutex_init(&zp->z_range_lock, NULL, MUTEX_DEFAULT, NULL);
1312742Swollman	avl_create(&zp->z_range_avl, zfs_range_compare,
132121098Swollman	    sizeof (rl_t), offsetof(rl_t, r_node));
13317200Swollman
13443543Swollman	zp->z_dirlocks = NULL;
13558787Sru	zp->z_acl_cached = NULL;
13617200Swollman	zp->z_vnode = NULL;
13717200Swollman	zp->z_moved = 0;
13817200Swollman	return (0);
13917200Swollman}
140121098Swollman
141121098Swollman/*ARGSUSED*/
142121098Swollmanstatic void
143121098Swollmanzfs_znode_cache_destructor(void *buf, void *arg)
144121098Swollman{
145121098Swollman	znode_t *zp = buf;
146121098Swollman
147121098Swollman	ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs));
14817200Swollman	ASSERT(ZTOV(zp) == NULL);
149121098Swollman	vn_free(ZTOV(zp));
150121098Swollman	ASSERT(!list_link_active(&zp->z_link_node));
151121098Swollman	mutex_destroy(&zp->z_lock);
152121098Swollman	rw_destroy(&zp->z_parent_lock);
153121098Swollman	rw_destroy(&zp->z_name_lock);
154121098Swollman	mutex_destroy(&zp->z_acl_lock);
155121098Swollman	avl_destroy(&zp->z_range_avl);
156121098Swollman	mutex_destroy(&zp->z_range_lock);
157121098Swollman
158121098Swollman	ASSERT(zp->z_dirlocks == NULL);
15919878Swollman	ASSERT(zp->z_acl_cached == NULL);
16019878Swollman}
16119878Swollman
16219878Swollman#ifdef	ZNODE_STATS
16319878Swollmanstatic struct {
16419878Swollman	uint64_t zms_zfsvfs_invalid;
16519878Swollman	uint64_t zms_zfsvfs_recheck1;
1662742Swollman	uint64_t zms_zfsvfs_unmounted;
1672742Swollman	uint64_t zms_zfsvfs_recheck2;
1682742Swollman	uint64_t zms_obj_held;
1692742Swollman	uint64_t zms_vnode_locked;
1702742Swollman	uint64_t zms_not_only_dnlc;
1712742Swollman} znode_move_stats;
1722742Swollman#endif	/* ZNODE_STATS */
1732742Swollman
1742742Swollman#ifdef sun
1752742Swollmanstatic void
17643014Swollmanzfs_znode_move_impl(znode_t *ozp, znode_t *nzp)
17743014Swollman{
17843014Swollman	vnode_t *vp;
17943014Swollman
18043014Swollman	/* Copy fields. */
18143014Swollman	nzp->z_zfsvfs = ozp->z_zfsvfs;
18258787Sru
18358787Sru	/* Swap vnodes. */
18458787Sru	vp = nzp->z_vnode;
18558787Sru	nzp->z_vnode = ozp->z_vnode;
1862742Swollman	ozp->z_vnode = vp; /* let destructor free the overwritten vnode */
18775267Swollman	ZTOV(ozp)->v_data = ozp;
18867578Swollman	ZTOV(nzp)->v_data = nzp;
18967578Swollman
19067578Swollman	nzp->z_id = ozp->z_id;
19167578Swollman	ASSERT(ozp->z_dirlocks == NULL); /* znode not in use */
19267578Swollman	ASSERT(avl_numnodes(&ozp->z_range_avl) == 0);
19375267Swollman	nzp->z_unlinked = ozp->z_unlinked;
19467578Swollman	nzp->z_atime_dirty = ozp->z_atime_dirty;
19558787Sru	nzp->z_zn_prefetch = ozp->z_zn_prefetch;
19667578Swollman	nzp->z_blksz = ozp->z_blksz;
19767578Swollman	nzp->z_seq = ozp->z_seq;
19867578Swollman	nzp->z_mapcnt = ozp->z_mapcnt;
19967578Swollman	nzp->z_gen = ozp->z_gen;
20067578Swollman	nzp->z_sync_cnt = ozp->z_sync_cnt;
20167578Swollman	nzp->z_is_sa = ozp->z_is_sa;
20267578Swollman	nzp->z_sa_hdl = ozp->z_sa_hdl;
20367578Swollman	bcopy(ozp->z_atime, nzp->z_atime, sizeof (uint64_t) * 2);
20467578Swollman	nzp->z_links = ozp->z_links;
20567578Swollman	nzp->z_size = ozp->z_size;
20667578Swollman	nzp->z_pflags = ozp->z_pflags;
20758787Sru	nzp->z_uid = ozp->z_uid;
20858787Sru	nzp->z_gid = ozp->z_gid;
20958787Sru	nzp->z_mode = ozp->z_mode;
21058787Sru
21158787Sru	/*
21258787Sru	 * Since this is just an idle znode and kmem is already dealing with
21393799Swollman	 * memory pressure, release any cached ACL.
21458787Sru	 */
21558787Sru	if (ozp->z_acl_cached) {
21643014Swollman		zfs_acl_free(ozp->z_acl_cached);
21743014Swollman		ozp->z_acl_cached = NULL;
21843014Swollman	}
21943014Swollman
22043014Swollman	sa_set_userp(nzp->z_sa_hdl, nzp);
22143014Swollman
2222742Swollman	/*
22386222Swollman	 * Invalidate the original znode by clearing fields that provide a
2242742Swollman	 * pointer back to the znode. Set the low bit of the vfs pointer to
22558787Sru	 * ensure that zfs_znode_move() recognizes the znode as invalid in any
2262742Swollman	 * subsequent callback.
22758787Sru	 */
22830711Swollman	ozp->z_sa_hdl = NULL;
2292742Swollman	POINTER_INVALIDATE(&ozp->z_zfsvfs);
2302742Swollman
2312742Swollman	/*
2322742Swollman	 * Mark the znode.
23330711Swollman	 */
2342742Swollman	nzp->z_moved = 1;
23517200Swollman	ozp->z_moved = (uint8_t)-1;
23617200Swollman}
23717200Swollman
2382742Swollman/*ARGSUSED*/
23975267Swollmanstatic kmem_cbrc_t
2402742Swollmanzfs_znode_move(void *buf, void *newbuf, size_t size, void *arg)
24130711Swollman{
2422742Swollman	znode_t *ozp = buf, *nzp = newbuf;
24358787Sru	zfsvfs_t *zfsvfs;
24458787Sru	vnode_t *vp;
24558787Sru
24658787Sru	/*
24717200Swollman	 * The znode is on the file system's list of known znodes if the vfs
2482742Swollman	 * pointer is valid. We set the low bit of the vfs pointer when freeing
24917200Swollman	 * the znode to invalidate it, and the memory patterns written by kmem
2502742Swollman	 * (baddcafe and deadbeef) set at least one of the two low bits. A newly
25186222Swollman	 * created znode sets the vfs pointer last of all to indicate that the
25286222Swollman	 * znode is known and in a valid state to be moved by this function.
25386222Swollman	 */
25486222Swollman	zfsvfs = ozp->z_zfsvfs;
25586222Swollman	if (!POINTER_IS_VALID(zfsvfs)) {
25686222Swollman		ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_invalid);
2572742Swollman		return (KMEM_CBRC_DONT_KNOW);
25858787Sru	}
25958787Sru
26058787Sru	/*
26158787Sru	 * Close a small window in which it's possible that the filesystem could
26258787Sru	 * be unmounted and freed, and zfsvfs, though valid in the previous
26358787Sru	 * statement, could point to unrelated memory by the time we try to
26458787Sru	 * prevent the filesystem from being unmounted.
26558787Sru	 */
26658787Sru	rw_enter(&zfsvfs_lock, RW_WRITER);
26758787Sru	if (zfsvfs != ozp->z_zfsvfs) {
26858787Sru		rw_exit(&zfsvfs_lock);
26958787Sru		ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck1);
27058787Sru		return (KMEM_CBRC_DONT_KNOW);
27158787Sru	}
27217200Swollman
27317200Swollman	/*
27417200Swollman	 * If the znode is still valid, then so is the file system. We know that
27517200Swollman	 * no valid file system can be freed while we hold zfsvfs_lock, so we
27617200Swollman	 * can safely ensure that the filesystem is not and will not be
27717200Swollman	 * unmounted. The next statement is equivalent to ZFS_ENTER().
27817200Swollman	 */
27917200Swollman	rrm_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG);
2802742Swollman	if (zfsvfs->z_unmounted) {
28143014Swollman		ZFS_EXIT(zfsvfs);
2822742Swollman		rw_exit(&zfsvfs_lock);
2832742Swollman		ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_unmounted);
28443014Swollman		return (KMEM_CBRC_DONT_KNOW);
2852742Swollman	}
2862742Swollman	rw_exit(&zfsvfs_lock);
28743014Swollman
2882742Swollman	mutex_enter(&zfsvfs->z_znodes_lock);
2892742Swollman	/*
29043014Swollman	 * Recheck the vfs pointer in case the znode was removed just before
2912742Swollman	 * acquiring the lock.
2922742Swollman	 */
29343014Swollman	if (zfsvfs != ozp->z_zfsvfs) {
2942742Swollman		mutex_exit(&zfsvfs->z_znodes_lock);
29543014Swollman		ZFS_EXIT(zfsvfs);
2962742Swollman		ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck2);
29743014Swollman		return (KMEM_CBRC_DONT_KNOW);
2982742Swollman	}
2992742Swollman
30043014Swollman	/*
3012742Swollman	 * At this point we know that as long as we hold z_znodes_lock, the
30258787Sru	 * znode cannot be freed and fields within the znode can be safely
30343014Swollman	 * accessed. Now, prevent a race with zfs_zget().
3042742Swollman	 */
3052742Swollman	if (ZFS_OBJ_HOLD_TRYENTER(zfsvfs, ozp->z_id) == 0) {
30643014Swollman		mutex_exit(&zfsvfs->z_znodes_lock);
3072742Swollman		ZFS_EXIT(zfsvfs);
30843014Swollman		ZNODE_STAT_ADD(znode_move_stats.zms_obj_held);
3092742Swollman		return (KMEM_CBRC_LATER);
31043014Swollman	}
3112742Swollman
31243014Swollman	vp = ZTOV(ozp);
3132742Swollman	if (mutex_tryenter(&vp->v_lock) == 0) {
31443014Swollman		ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id);
3152742Swollman		mutex_exit(&zfsvfs->z_znodes_lock);
31643014Swollman		ZFS_EXIT(zfsvfs);
3172742Swollman		ZNODE_STAT_ADD(znode_move_stats.zms_vnode_locked);
31843014Swollman		return (KMEM_CBRC_LATER);
3192742Swollman	}
32043014Swollman
32143014Swollman	/* Only move znodes that are referenced _only_ by the DNLC. */
32243014Swollman	if (vp->v_count != 1 || !vn_in_dnlc(vp)) {
32343014Swollman		mutex_exit(&vp->v_lock);
32443014Swollman		ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id);
32519878Swollman		mutex_exit(&zfsvfs->z_znodes_lock);
3262742Swollman		ZFS_EXIT(zfsvfs);
32743014Swollman		ZNODE_STAT_ADD(znode_move_stats.zms_not_only_dnlc);
32819878Swollman		return (KMEM_CBRC_LATER);
32943014Swollman	}
3302742Swollman
33143014Swollman	/*
33243014Swollman	 * The znode is known and in a valid state to move. We're holding the
33343014Swollman	 * locks needed to execute the critical section.
33443014Swollman	 */
33543014Swollman	zfs_znode_move_impl(ozp, nzp);
33643014Swollman	mutex_exit(&vp->v_lock);
33743014Swollman	ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id);
3382742Swollman
33919878Swollman	list_link_replace(&ozp->z_link_node, &nzp->z_link_node);
3402742Swollman	mutex_exit(&zfsvfs->z_znodes_lock);
3412742Swollman	ZFS_EXIT(zfsvfs);
34243014Swollman
3432742Swollman	return (KMEM_CBRC_YES);
34443014Swollman}
34543014Swollman#endif /* sun */
3462742Swollman
34743014Swollmanvoid
34843014Swollmanzfs_znode_init(void)
34943014Swollman{
35043014Swollman	/*
35143014Swollman	 * Initialize zcache
3522742Swollman	 */
35343014Swollman	rw_init(&zfsvfs_lock, NULL, RW_DEFAULT, NULL);
35443014Swollman	ASSERT(znode_cache == NULL);
35543014Swollman	znode_cache = kmem_cache_create("zfs_znode_cache",
35643014Swollman	    sizeof (znode_t), 0, zfs_znode_cache_constructor,
3572742Swollman	    zfs_znode_cache_destructor, NULL, NULL, NULL, 0);
35843014Swollman	kmem_cache_set_move(znode_cache, zfs_znode_move);
3592742Swollman}
36043014Swollman
36143014Swollmanvoid
36243014Swollmanzfs_znode_fini(void)
36343014Swollman{
3642742Swollman#ifdef sun
36543014Swollman	/*
36643014Swollman	 * Cleanup vfs & vnode ops
36743014Swollman	 */
36843014Swollman	zfs_remove_op_tables();
3692742Swollman#endif	/* sun */
37043014Swollman
37117200Swollman	/*
37243014Swollman	 * Cleanup zcache
37343014Swollman	 */
37443014Swollman	if (znode_cache)
3752742Swollman		kmem_cache_destroy(znode_cache);
3762742Swollman	znode_cache = NULL;
37743014Swollman	rw_destroy(&zfsvfs_lock);
37843014Swollman}
37943014Swollman
38043014Swollman#ifdef sun
3819908Swollmanstruct vnodeops *zfs_dvnodeops;
3829908Swollmanstruct vnodeops *zfs_fvnodeops;
38343014Swollmanstruct vnodeops *zfs_symvnodeops;
38443014Swollmanstruct vnodeops *zfs_xdvnodeops;
38543014Swollmanstruct vnodeops *zfs_evnodeops;
3869908Swollmanstruct vnodeops *zfs_sharevnodeops;
38743014Swollman
38817200Swollmanvoid
3892742Swollmanzfs_remove_op_tables()
3902742Swollman{
39158787Sru	/*
39217200Swollman	 * Remove vfs ops
39317200Swollman	 */
3949908Swollman	ASSERT(zfsfstype);
39517200Swollman	(void) vfs_freevfsops_by_type(zfsfstype);
3962742Swollman	zfsfstype = 0;
39786222Swollman
39843014Swollman	/*
39917200Swollman	 * Remove vnode ops
40017200Swollman	 */
4019908Swollman	if (zfs_dvnodeops)
40217200Swollman		vn_freevnodeops(zfs_dvnodeops);
40386222Swollman	if (zfs_fvnodeops)
40486222Swollman		vn_freevnodeops(zfs_fvnodeops);
40543014Swollman	if (zfs_symvnodeops)
40619878Swollman		vn_freevnodeops(zfs_symvnodeops);
40717200Swollman	if (zfs_xdvnodeops)
40817200Swollman		vn_freevnodeops(zfs_xdvnodeops);
4092742Swollman	if (zfs_evnodeops)
41017200Swollman		vn_freevnodeops(zfs_evnodeops);
4112742Swollman	if (zfs_sharevnodeops)
41217200Swollman		vn_freevnodeops(zfs_sharevnodeops);
41317200Swollman
41417200Swollman	zfs_dvnodeops = NULL;
41517200Swollman	zfs_fvnodeops = NULL;
4162742Swollman	zfs_symvnodeops = NULL;
4172742Swollman	zfs_xdvnodeops = NULL;
4182742Swollman	zfs_evnodeops = NULL;
4192742Swollman	zfs_sharevnodeops = NULL;
4202742Swollman}
42117200Swollman
42217200Swollmanextern const fs_operation_def_t zfs_dvnodeops_template[];
4239908Swollmanextern const fs_operation_def_t zfs_fvnodeops_template[];
4249908Swollmanextern const fs_operation_def_t zfs_xdvnodeops_template[];
42519878Swollmanextern const fs_operation_def_t zfs_symvnodeops_template[];
42617200Swollmanextern const fs_operation_def_t zfs_evnodeops_template[];
42717200Swollmanextern const fs_operation_def_t zfs_sharevnodeops_template[];
42817200Swollman
42919878Swollmanint
43017200Swollmanzfs_create_op_tables()
43175267Swollman{
43275267Swollman	int error;
43375267Swollman
43475267Swollman	/*
43575267Swollman	 * zfs_dvnodeops can be set if mod_remove() calls mod_installfs()
4369908Swollman	 * due to a failure to remove the the 2nd modlinkage (zfs_modldrv).
43717200Swollman	 * In this case we just return as the ops vectors are already set up.
43819878Swollman	 */
4392742Swollman	if (zfs_dvnodeops)
4402742Swollman		return (0);
4419908Swollman
44219878Swollman	error = vn_make_ops(MNTTYPE_ZFS, zfs_dvnodeops_template,
4439908Swollman	    &zfs_dvnodeops);
4442742Swollman	if (error)
44519878Swollman		return (error);
44619878Swollman
44719878Swollman	error = vn_make_ops(MNTTYPE_ZFS, zfs_fvnodeops_template,
44819878Swollman	    &zfs_fvnodeops);
44919878Swollman	if (error)
45019878Swollman		return (error);
45119878Swollman
45219878Swollman	error = vn_make_ops(MNTTYPE_ZFS, zfs_symvnodeops_template,
45319878Swollman	    &zfs_symvnodeops);
45419878Swollman	if (error)
45519878Swollman		return (error);
4562742Swollman
45719878Swollman	error = vn_make_ops(MNTTYPE_ZFS, zfs_xdvnodeops_template,
45819878Swollman	    &zfs_xdvnodeops);
45919878Swollman	if (error)
46019878Swollman		return (error);
46119878Swollman
46219878Swollman	error = vn_make_ops(MNTTYPE_ZFS, zfs_evnodeops_template,
46319878Swollman	    &zfs_evnodeops);
4642742Swollman	if (error)
46520094Swollman		return (error);
46620094Swollman
46720094Swollman	error = vn_make_ops(MNTTYPE_ZFS, zfs_sharevnodeops_template,
46820094Swollman	    &zfs_sharevnodeops);
46920094Swollman
47020094Swollman	return (error);
47120094Swollman}
47220094Swollman#endif	/* sun */
4732742Swollman
47420094Swollmanint
47520094Swollmanzfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx)
47620094Swollman{
47758787Sru	zfs_acl_ids_t acl_ids;
47820094Swollman	vattr_t vattr;
47920094Swollman	znode_t *sharezp;
48020094Swollman	znode_t *zp;
48120094Swollman	int error;
48243014Swollman
48320094Swollman	vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
48420094Swollman	vattr.va_type = VDIR;
48543014Swollman	vattr.va_mode = S_IFDIR|0555;
48620094Swollman	vattr.va_uid = crgetuid(kcred);
48720094Swollman	vattr.va_gid = crgetgid(kcred);
48843014Swollman
48943014Swollman	sharezp = kmem_cache_alloc(znode_cache, KM_SLEEP);
49020094Swollman	ASSERT(!POINTER_IS_VALID(sharezp->z_zfsvfs));
49120094Swollman	sharezp->z_moved = 0;
49243014Swollman	sharezp->z_unlinked = 0;
49320094Swollman	sharezp->z_atime_dirty = 0;
49420094Swollman	sharezp->z_zfsvfs = zfsvfs;
49520094Swollman	sharezp->z_is_sa = zfsvfs->z_use_sa;
49620094Swollman
49720094Swollman	VERIFY(0 == zfs_acl_ids_create(sharezp, IS_ROOT_NODE, &vattr,
4982742Swollman	    kcred, NULL, &acl_ids));
4992742Swollman	zfs_mknode(sharezp, &vattr, tx, kcred, IS_ROOT_NODE, &zp, &acl_ids);
5002742Swollman	ASSERT3P(zp, ==, sharezp);
5012742Swollman	POINTER_INVALIDATE(&sharezp->z_zfsvfs);
50219878Swollman	error = zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
50319878Swollman	    ZFS_SHARES_DIR, 8, 1, &sharezp->z_id, tx);
50419878Swollman	zfsvfs->z_shares_dir = sharezp->z_id;
50519878Swollman
5062742Swollman	zfs_acl_ids_free(&acl_ids);
50719878Swollman	sa_handle_destroy(sharezp->z_sa_hdl);
50819878Swollman	kmem_cache_free(znode_cache, sharezp);
5092742Swollman
51019878Swollman	return (error);
51119878Swollman}
51219878Swollman
51319878Swollman/*
51419878Swollman * define a couple of values we need available
51519878Swollman * for both 64 and 32 bit environments.
51619878Swollman */
51719878Swollman#ifndef NBITSMINOR64
51819878Swollman#define	NBITSMINOR64	32
51919878Swollman#endif
52019878Swollman#ifndef MAXMAJ64
52119878Swollman#define	MAXMAJ64	0xffffffffUL
52219878Swollman#endif
52319878Swollman#ifndef	MAXMIN64
52419878Swollman#define	MAXMIN64	0xffffffffUL
52519878Swollman#endif
52619878Swollman
52719878Swollman/*
52819878Swollman * Create special expldev for ZFS private use.
52919878Swollman * Can't use standard expldev since it doesn't do
53019878Swollman * what we want.  The standard expldev() takes a
53119878Swollman * dev32_t in LP64 and expands it to a long dev_t.
5322742Swollman * We need an interface that takes a dev32_t in ILP32
53319878Swollman * and expands it to a long dev_t.
5342742Swollman */
5352742Swollmanstatic uint64_t
53619878Swollmanzfs_expldev(dev_t dev)
5372742Swollman{
53819878Swollman	return (((uint64_t)major(dev) << NBITSMINOR64) | minor(dev));
5392742Swollman}
54019878Swollman/*
5412742Swollman * Special cmpldev for ZFS private use.
54219878Swollman * Can't use standard cmpldev since it takes
5432742Swollman * a long dev_t and compresses it to dev32_t in
54419878Swollman * LP64.  We need to do a compaction of a long dev_t
5452742Swollman * to a dev32_t in ILP32.
54619878Swollman */
5472742Swollmandev_t
54819878Swollmanzfs_cmpldev(uint64_t dev)
5492742Swollman{
55019878Swollman	return (makedev((dev >> NBITSMINOR64), (dev & MAXMIN64)));
5512742Swollman}
55219878Swollman
5532742Swollmanstatic void
55419878Swollmanzfs_znode_sa_init(zfsvfs_t *zfsvfs, znode_t *zp,
5552742Swollman    dmu_buf_t *db, dmu_object_type_t obj_type, sa_handle_t *sa_hdl)
55619878Swollman{
5572742Swollman	ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs) || (zfsvfs == zp->z_zfsvfs));
55819878Swollman	ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs, zp->z_id)));
5592742Swollman
56019878Swollman	mutex_enter(&zp->z_lock);
5612742Swollman
5622742Swollman	ASSERT(zp->z_sa_hdl == NULL);
56319878Swollman	ASSERT(zp->z_acl_cached == NULL);
56430711Swollman	if (sa_hdl == NULL) {
56520094Swollman		VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, zp,
5662742Swollman		    SA_HDL_SHARED, &zp->z_sa_hdl));
5672742Swollman	} else {
5682742Swollman		zp->z_sa_hdl = sa_hdl;
5692742Swollman		sa_set_userp(sa_hdl, zp);
5702742Swollman	}
57119878Swollman
57219878Swollman	zp->z_is_sa = (obj_type == DMU_OT_SA) ? B_TRUE : B_FALSE;
5732742Swollman
5742742Swollman	/*
575114173Swollman	 * Slap on VROOT if we are the root znode
576114173Swollman	 */
577114173Swollman	if (zp->z_id == zfsvfs->z_root)
578114173Swollman		ZTOV(zp)->v_flag |= VROOT;
579114173Swollman
580114173Swollman	mutex_exit(&zp->z_lock);
581114173Swollman	vn_exists(ZTOV(zp));
582114173Swollman}
5832742Swollman
58419878Swollmanvoid
5852742Swollmanzfs_znode_dmu_fini(znode_t *zp)
58619878Swollman{
5872742Swollman	ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zp->z_zfsvfs, zp->z_id)) ||
58819878Swollman	    zp->z_unlinked ||
58919878Swollman	    RW_WRITE_HELD(&zp->z_zfsvfs->z_teardown_inactive_lock));
590114173Swollman
591114173Swollman	sa_handle_destroy(zp->z_sa_hdl);
5922742Swollman	zp->z_sa_hdl = NULL;
5932742Swollman}
594114173Swollman
595114173Swollmanstatic void
596114173Swollmanzfs_vnode_forget(vnode_t *vp)
597114173Swollman{
598114173Swollman
59919878Swollman	/* copied from insmntque_stddtr */
60019878Swollman	vp->v_data = NULL;
6012742Swollman	vp->v_op = &dead_vnodeops;
6022742Swollman	vgone(vp);
6032742Swollman	vput(vp);
6042742Swollman}
60543014Swollman
6062742Swollman/*
60743014Swollman * Construct a new znode/vnode and intialize.
60843014Swollman *
60958787Sru * This does not do a call to dmu_set_user() that is
61058787Sru * up to the caller to do, in case you don't want to
61119878Swollman * return the znode
61258787Sru */
61358787Srustatic znode_t *
61420094Swollmanzfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
6152742Swollman    dmu_object_type_t obj_type, sa_handle_t *hdl)
6162742Swollman{
61730711Swollman	znode_t	*zp;
61830711Swollman	vnode_t *vp;
61930711Swollman	uint64_t mode;
62030711Swollman	uint64_t parent;
62130711Swollman	sa_bulk_attr_t bulk[9];
62230711Swollman	int count = 0;
62330711Swollman	int error;
62430711Swollman
62530711Swollman	zp = kmem_cache_alloc(znode_cache, KM_SLEEP);
62630711Swollman
62730711Swollman	KASSERT(curthread->td_vp_reserv > 0,
62830711Swollman	    ("zfs_znode_alloc: getnewvnode without any vnodes reserved"));
62975267Swollman	error = getnewvnode("zfs", zfsvfs->z_parent->z_vfs, &zfs_vnodeops, &vp);
6302742Swollman	if (error != 0) {
63130711Swollman		kmem_cache_free(znode_cache, zp);
63230711Swollman		return (NULL);
63319878Swollman	}
63430711Swollman	zp->z_vnode = vp;
6352742Swollman	vp->v_data = zp;
63630711Swollman
6372742Swollman	ASSERT(zp->z_dirlocks == NULL);
63819878Swollman	ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs));
63930711Swollman	zp->z_moved = 0;
64019878Swollman
64119878Swollman	/*
64219878Swollman	 * Defer setting z_zfsvfs until the znode is ready to be a candidate for
64343543Swollman	 * the zfs_znode_move() callback.
64443543Swollman	 */
64543543Swollman	zp->z_sa_hdl = NULL;
64643543Swollman	zp->z_unlinked = 0;
64719878Swollman	zp->z_atime_dirty = 0;
64819878Swollman	zp->z_mapcnt = 0;
64919878Swollman	zp->z_id = db->db_object;
65030711Swollman	zp->z_blksz = blksz;
65119878Swollman	zp->z_seq = 0x7A4653;
65219878Swollman	zp->z_sync_cnt = 0;
65319878Swollman
65430711Swollman	vp = ZTOV(zp);
65519878Swollman
65619878Swollman	zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl);
65719878Swollman
65819878Swollman	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8);
65919878Swollman	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL, &zp->z_gen, 8);
66019878Swollman	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
66119878Swollman	    &zp->z_size, 8);
6622742Swollman	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
66330711Swollman	    &zp->z_links, 8);
66430711Swollman	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
66519878Swollman	    &zp->z_pflags, 8);
6662742Swollman	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, &parent, 8);
66719878Swollman	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
6682742Swollman	    &zp->z_atime, 16);
6692742Swollman	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
67030711Swollman	    &zp->z_uid, 8);
67130711Swollman	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL,
67230711Swollman	    &zp->z_gid, 8);
67330711Swollman
67430711Swollman	if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count) != 0 || zp->z_gen == 0) {
67530711Swollman		if (hdl == NULL)
67630711Swollman			sa_handle_destroy(zp->z_sa_hdl);
67719878Swollman		zfs_vnode_forget(vp);
67819878Swollman		zp->z_vnode = NULL;
6792742Swollman		kmem_cache_free(znode_cache, zp);
6802742Swollman		return (NULL);
681114173Swollman	}
6822742Swollman
6832742Swollman	zp->z_mode = mode;
68458787Sru
68558787Sru	vp->v_type = IFTOVT((mode_t)mode);
68658787Sru
68758787Sru	switch (vp->v_type) {
68858787Sru	case VDIR:
68958787Sru		zp->z_zn_prefetch = B_TRUE; /* z_prefetch default is enabled */
6902742Swollman		break;
69119878Swollman#ifdef sun
6922742Swollman	case VBLK:
69319878Swollman	case VCHR:
6942742Swollman		{
6952742Swollman			uint64_t rdev;
6962742Swollman			VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_RDEV(zfsvfs),
6972742Swollman			    &rdev, sizeof (rdev)) == 0);
69819878Swollman
6992742Swollman			vp->v_rdev = zfs_cmpldev(rdev);
70019878Swollman		}
7012742Swollman		break;
70219878Swollman#endif	/* sun */
70320094Swollman	case VFIFO:
70458787Sru#ifdef sun
70558787Sru	case VSOCK:
7062742Swollman	case VDOOR:
7072742Swollman#endif	/* sun */
708114173Swollman		vp->v_op = &zfs_fifoops;
7092742Swollman		break;
7102742Swollman	case VREG:
7112742Swollman		if (parent == zfsvfs->z_shares_dir) {
71219878Swollman			ASSERT(zp->z_uid == 0 && zp->z_gid == 0);
7132742Swollman			vp->v_op = &zfs_shareops;
71419878Swollman		}
7152742Swollman		break;
71619878Swollman#ifdef sun
71719878Swollman	case VLNK:
71819878Swollman		vn_setops(vp, zfs_symvnodeops);
7192742Swollman		break;
7202742Swollman	default:
72119878Swollman		vn_setops(vp, zfs_evnodeops);
72219878Swollman		break;
72319878Swollman#endif	/* sun */
72419878Swollman	}
7252742Swollman
72643014Swollman	mutex_enter(&zfsvfs->z_znodes_lock);
7272742Swollman	list_insert_tail(&zfsvfs->z_all_znodes, zp);
72819878Swollman	membar_producer();
7292742Swollman	/*
73019878Swollman	 * Everything else must be valid before assigning z_zfsvfs makes the
73119878Swollman	 * znode eligible for zfs_znode_move().
7322742Swollman	 */
73319878Swollman	zp->z_zfsvfs = zfsvfs;
7342742Swollman	mutex_exit(&zfsvfs->z_znodes_lock);
73519878Swollman
7362742Swollman	/*
73719878Swollman	 * Acquire vnode lock before making it available to the world.
7382742Swollman	 */
7392742Swollman	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
7402742Swollman	VN_LOCK_AREC(vp);
7412742Swollman	if (vp->v_type != VFIFO)
7422742Swollman		VN_LOCK_ASHARE(vp);
7432742Swollman
7442742Swollman	VFS_HOLD(zfsvfs->z_vfs);
74519878Swollman	return (zp);
74619878Swollman}
74719878Swollman
74819878Swollmanstatic uint64_t empty_xattr;
74919878Swollmanstatic uint64_t pad[4];
7502742Swollmanstatic zfs_acl_phys_t acl_phys;
7519908Swollman/*
75219878Swollman * Create a new DMU object to hold a zfs znode.
75320094Swollman *
75420094Swollman *	IN:	dzp	- parent directory for new znode
75520094Swollman *		vap	- file attributes for new znode
75620094Swollman *		tx	- dmu transaction id for zap operations
75720094Swollman *		cr	- credentials of caller
75820094Swollman *		flag	- flags:
75920094Swollman *			  IS_ROOT_NODE	- new object will be root
76020094Swollman *			  IS_XATTR	- new object is an attribute
76186222Swollman *		bonuslen - length of bonus buffer
76286222Swollman *		setaclp  - File/Dir initial ACL
76386222Swollman *		fuidp	 - Tracks fuid allocation.
76420094Swollman *
76586222Swollman *	OUT:	zpp	- allocated znode
76686222Swollman *
76786222Swollman */
76886222Swollmanvoid
76986222Swollmanzfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
77086222Swollman    uint_t flag, znode_t **zpp, zfs_acl_ids_t *acl_ids)
77186222Swollman{
77286222Swollman	uint64_t	crtime[2], atime[2], mtime[2], ctime[2];
77386222Swollman	uint64_t	mode, size, links, parent, pflags;
77486222Swollman	uint64_t	dzp_pflags = 0;
77586222Swollman	uint64_t	rdev = 0;
77693799Swollman	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
777114173Swollman	dmu_buf_t	*db;
77893799Swollman	timestruc_t	now;
77993799Swollman	uint64_t	gen, obj;
78093799Swollman	int		err;
78193799Swollman	int		bonuslen;
78293799Swollman	sa_handle_t	*sa_hdl;
78393799Swollman	dmu_object_type_t obj_type;
78493799Swollman	sa_bulk_attr_t	sa_attrs[ZPL_END];
78593799Swollman	int		cnt = 0;
78693799Swollman	zfs_acl_locator_cb_t locate = { 0 };
78793799Swollman
78893799Swollman	ASSERT(vap && (vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE));
78993799Swollman
79093799Swollman	if (zfsvfs->z_replay) {
79193799Swollman		obj = vap->va_nodeid;
79293799Swollman		now = vap->va_ctime;		/* see zfs_replay_create() */
79393799Swollman		gen = vap->va_nblocks;		/* ditto */
79493799Swollman	} else {
79593799Swollman		obj = 0;
79693799Swollman		gethrestime(&now);
79793799Swollman		gen = dmu_tx_get_txg(tx);
79893799Swollman	}
79993799Swollman
80093799Swollman	obj_type = zfsvfs->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE;
80193799Swollman	bonuslen = (obj_type == DMU_OT_SA) ?
80293799Swollman	    DN_MAX_BONUSLEN : ZFS_OLD_ZNODE_PHYS_SIZE;
80393799Swollman
80493799Swollman	/*
80593799Swollman	 * Create a new DMU object.
80693799Swollman	 */
80793799Swollman	/*
80893799Swollman	 * There's currently no mechanism for pre-reading the blocks that will
80993799Swollman	 * be needed to allocate a new object, so we accept the small chance
81093799Swollman	 * that there will be an i/o error and we will fail one of the
81193799Swollman	 * assertions below.
81293799Swollman	 */
81386222Swollman	if (vap->va_type == VDIR) {
81486222Swollman		if (zfsvfs->z_replay) {
81593799Swollman			VERIFY0(zap_create_claim_norm(zfsvfs->z_os, obj,
81693799Swollman			    zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
81786222Swollman			    obj_type, bonuslen, tx));
81820094Swollman		} else {
81993799Swollman			obj = zap_create_norm(zfsvfs->z_os,
82093799Swollman			    zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
82193799Swollman			    obj_type, bonuslen, tx);
82293799Swollman		}
82320094Swollman	} else {
82420094Swollman		if (zfsvfs->z_replay) {
82586222Swollman			VERIFY0(dmu_object_claim(zfsvfs->z_os, obj,
82686222Swollman			    DMU_OT_PLAIN_FILE_CONTENTS, 0,
82786222Swollman			    obj_type, bonuslen, tx));
82886222Swollman		} else {
82967578Swollman			obj = dmu_object_alloc(zfsvfs->z_os,
83019878Swollman			    DMU_OT_PLAIN_FILE_CONTENTS, 0,
83119878Swollman			    obj_type, bonuslen, tx);
83220094Swollman		}
83320094Swollman	}
8342742Swollman
83520094Swollman	ZFS_OBJ_HOLD_ENTER(zfsvfs, obj);
83667578Swollman	VERIFY(0 == sa_buf_hold(zfsvfs->z_os, obj, NULL, &db));
83720094Swollman
8382742Swollman	/*
8392742Swollman	 * If this is the root, fix up the half-initialized parent pointer
8409908Swollman	 * to reference the just-allocated physical data area.
8419908Swollman	 */
8429908Swollman	if (flag & IS_ROOT_NODE) {
8439908Swollman		dzp->z_id = obj;
84420094Swollman	} else {
84520094Swollman		dzp_pflags = dzp->z_pflags;
84620094Swollman	}
84720094Swollman
84820094Swollman	/*
84920094Swollman	 * If parent is an xattr, so am I.
85020094Swollman	 */
85120094Swollman	if (dzp_pflags & ZFS_XATTR) {
85220094Swollman		flag |= IS_XATTR;
85320094Swollman	}
85443543Swollman
85543543Swollman	if (zfsvfs->z_use_fuids)
85643543Swollman		pflags = ZFS_ARCHIVE | ZFS_AV_MODIFIED;
85743543Swollman	else
85843543Swollman		pflags = 0;
85943543Swollman
86043543Swollman	if (vap->va_type == VDIR) {
86143543Swollman		size = 2;		/* contents ("." and "..") */
86243543Swollman		links = (flag & (IS_ROOT_NODE | IS_XATTR)) ? 2 : 1;
86343543Swollman	} else {
86443543Swollman		size = links = 0;
86543543Swollman	}
86658787Sru
86758787Sru	if (vap->va_type == VBLK || vap->va_type == VCHR) {
86858787Sru		rdev = zfs_expldev(vap->va_rdev);
86958787Sru	}
87058787Sru
87158787Sru	parent = dzp->z_id;
87258787Sru	mode = acl_ids->z_mode;
87375267Swollman	if (flag & IS_XATTR)
87458787Sru		pflags |= ZFS_XATTR;
87558787Sru
87658787Sru	/*
87758787Sru	 * No execs denied will be deterimed when zfs_mode_compute() is called.
87858787Sru	 */
87958787Sru	pflags |= acl_ids->z_aclp->z_hints &
88093799Swollman	    (ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|ZFS_ACL_AUTO_INHERIT|
88193799Swollman	    ZFS_ACL_DEFAULTED|ZFS_ACL_PROTECTED);
88293799Swollman
88393799Swollman	ZFS_TIME_ENCODE(&now, crtime);
88493799Swollman	ZFS_TIME_ENCODE(&now, ctime);
88593799Swollman
88693799Swollman	if (vap->va_mask & AT_ATIME) {
8872742Swollman		ZFS_TIME_ENCODE(&vap->va_atime, atime);
8882742Swollman	} else {
88919878Swollman		ZFS_TIME_ENCODE(&now, atime);
89019878Swollman	}
89119878Swollman
8922742Swollman	if (vap->va_mask & AT_MTIME) {
89320094Swollman		ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
89419878Swollman	} else {
89520094Swollman		ZFS_TIME_ENCODE(&now, mtime);
89619878Swollman	}
89743543Swollman
89858787Sru	/* Now add in all of the "SA" attributes */
89993799Swollman	VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, NULL, SA_HDL_SHARED,
90093799Swollman	    &sa_hdl));
9012742Swollman
9022742Swollman	/*
9039908Swollman	 * Setup the array of attributes to be replaced/set on the new file
9049908Swollman	 *
9059908Swollman	 * order for  DMU_OT_ZNODE is critical since it needs to be constructed
9069908Swollman	 * in the old znode_phys_t format.  Don't change this ordering
9079908Swollman	 */
9089908Swollman
9099908Swollman	if (obj_type == DMU_OT_ZNODE) {
9109908Swollman		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs),
9119908Swollman		    NULL, &atime, 16);
9122742Swollman		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs),
91319878Swollman		    NULL, &mtime, 16);
9142742Swollman		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs),
9152742Swollman		    NULL, &ctime, 16);
9162742Swollman		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs),
91719878Swollman		    NULL, &crtime, 16);
91819878Swollman		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs),
91919878Swollman		    NULL, &gen, 8);
9202742Swollman		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs),
9212742Swollman		    NULL, &mode, 8);
92275267Swollman		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs),
92375267Swollman		    NULL, &size, 8);
92419878Swollman		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs),
92575267Swollman		    NULL, &parent, 8);
92675267Swollman	} else {
92775267Swollman		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs),
92875267Swollman		    NULL, &mode, 8);
92975267Swollman		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs),
93075267Swollman		    NULL, &size, 8);
93175267Swollman		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs),
93275267Swollman		    NULL, &gen, 8);
93375267Swollman		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), NULL,
93475267Swollman		    &acl_ids->z_fuid, 8);
93575267Swollman		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), NULL,
9362742Swollman		    &acl_ids->z_fgid, 8);
9372742Swollman		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs),
93819878Swollman		    NULL, &parent, 8);
93943014Swollman		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs),
94019878Swollman		    NULL, &pflags, 8);
94119878Swollman		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs),
94219878Swollman		    NULL, &atime, 16);
94319878Swollman		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs),
9442742Swollman		    NULL, &mtime, 16);
94519878Swollman		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs),
9462742Swollman		    NULL, &ctime, 16);
94719878Swollman		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs),
94843543Swollman		    NULL, &crtime, 16);
94943543Swollman	}
95043543Swollman
9512742Swollman	SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_LINKS(zfsvfs), NULL, &links, 8);
95219878Swollman
95319878Swollman	if (obj_type == DMU_OT_ZNODE) {
95419878Swollman		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_XATTR(zfsvfs), NULL,
95519878Swollman		    &empty_xattr, 8);
95619878Swollman	}
95719878Swollman	if (obj_type == DMU_OT_ZNODE ||
95819878Swollman	    (vap->va_type == VBLK || vap->va_type == VCHR)) {
95919878Swollman		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zfsvfs),
96019878Swollman		    NULL, &rdev, 8);
96119878Swollman
96219878Swollman	}
96319878Swollman	if (obj_type == DMU_OT_ZNODE) {
96419878Swollman		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs),
96519878Swollman		    NULL, &pflags, 8);
96619878Swollman		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), NULL,
96719878Swollman		    &acl_ids->z_fuid, 8);
96819878Swollman		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), NULL,
9692742Swollman		    &acl_ids->z_fgid, 8);
97019878Swollman		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PAD(zfsvfs), NULL, pad,
97175267Swollman		    sizeof (uint64_t) * 4);
97275267Swollman		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ZNODE_ACL(zfsvfs), NULL,
97375267Swollman		    &acl_phys, sizeof (zfs_acl_phys_t));
97475267Swollman	} else if (acl_ids->z_aclp->z_version >= ZFS_ACL_VERSION_FUID) {
97575267Swollman		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_COUNT(zfsvfs), NULL,
97675267Swollman		    &acl_ids->z_aclp->z_acl_count, 8);
97775267Swollman		locate.cb_aclp = acl_ids->z_aclp;
97875267Swollman		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_ACES(zfsvfs),
97943014Swollman		    zfs_acl_data_locator, &locate,
98043014Swollman		    acl_ids->z_aclp->z_acl_bytes);
98143014Swollman		mode = zfs_mode_compute(mode, acl_ids->z_aclp, &pflags,
98243014Swollman		    acl_ids->z_fuid, acl_ids->z_fgid);
98343014Swollman	}
98443014Swollman
98543014Swollman	VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0);
98619878Swollman
98743014Swollman	if (!(flag & IS_ROOT_NODE)) {
98843014Swollman		*zpp = zfs_znode_alloc(zfsvfs, db, 0, obj_type, sa_hdl);
98943014Swollman		ASSERT(*zpp != NULL);
99019878Swollman	} else {
99143014Swollman		/*
9922742Swollman		 * If we are creating the root node, the "parent" we
99343014Swollman		 * passed in is the znode for the root.
99443014Swollman		 */
99543014Swollman		*zpp = dzp;
99643014Swollman
99714343Swollman		(*zpp)->z_sa_hdl = sa_hdl;
99814343Swollman	}
99914343Swollman
100014343Swollman	(*zpp)->z_pflags = pflags;
10012742Swollman	(*zpp)->z_mode = mode;
100243014Swollman
100375267Swollman	if (vap->va_mask & AT_XVATTR)
100475267Swollman		zfs_xvattr_set(*zpp, (xvattr_t *)vap, tx);
100543014Swollman
100675267Swollman	if (obj_type == DMU_OT_ZNODE ||
100775267Swollman	    acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) {
100819878Swollman		VERIFY0(zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx));
100943014Swollman	}
101019878Swollman	if (!(flag & IS_ROOT_NODE)) {
101119878Swollman		vnode_t *vp;
10122742Swollman
101319878Swollman		vp = ZTOV(*zpp);
10148049Swollman		vp->v_vflag |= VV_FORCEINSMQ;
101543014Swollman		err = insmntque(vp, zfsvfs->z_vfs);
101643014Swollman		vp->v_vflag &= ~VV_FORCEINSMQ;
101743014Swollman		KASSERT(err == 0, ("insmntque() failed: error %d", err));
1018114173Swollman	}
101943014Swollman	ZFS_OBJ_HOLD_EXIT(zfsvfs, obj);
1020114173Swollman}
1021114173Swollman
1022114173Swollman/*
1023114173Swollman * Update in-core attributes.  It is assumed the caller will be doing an
1024114173Swollman * sa_bulk_update to push the changes out.
1025114173Swollman */
1026114173Swollmanvoid
1027114173Swollmanzfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
1028114173Swollman{
1029114173Swollman	xoptattr_t *xoap;
1030114173Swollman
1031114173Swollman	xoap = xva_getxoptattr(xvap);
10322742Swollman	ASSERT(xoap);
103319878Swollman
1034114173Swollman	if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) {
1035114173Swollman		uint64_t times[2];
10362742Swollman		ZFS_TIME_ENCODE(&xoap->xoa_createtime, times);
103719878Swollman		(void) sa_update(zp->z_sa_hdl, SA_ZPL_CRTIME(zp->z_zfsvfs),
103843014Swollman		    &times, sizeof (times), tx);
103943014Swollman		XVA_SET_RTN(xvap, XAT_CREATETIME);
104019878Swollman	}
104143014Swollman	if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
104219878Swollman		ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly,
104319878Swollman		    zp->z_pflags, tx);
104419878Swollman		XVA_SET_RTN(xvap, XAT_READONLY);
10452742Swollman	}
10462742Swollman	if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
104719878Swollman		ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden,
104819878Swollman		    zp->z_pflags, tx);
104919878Swollman		XVA_SET_RTN(xvap, XAT_HIDDEN);
10502742Swollman	}
10512742Swollman	if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
10522742Swollman		ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system,
10532742Swollman		    zp->z_pflags, tx);
10542742Swollman		XVA_SET_RTN(xvap, XAT_SYSTEM);
105519878Swollman	}
105619878Swollman	if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
10572742Swollman		ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive,
10582742Swollman		    zp->z_pflags, tx);
10592742Swollman		XVA_SET_RTN(xvap, XAT_ARCHIVE);
10602742Swollman	}
106119878Swollman	if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
10622742Swollman		ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable,
10632742Swollman		    zp->z_pflags, tx);
106419878Swollman		XVA_SET_RTN(xvap, XAT_IMMUTABLE);
10652742Swollman	}
10662742Swollman	if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
106719878Swollman		ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink,
10682742Swollman		    zp->z_pflags, tx);
10692742Swollman		XVA_SET_RTN(xvap, XAT_NOUNLINK);
107019878Swollman	}
10712742Swollman	if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
107219878Swollman		ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly,
10732742Swollman		    zp->z_pflags, tx);
107419878Swollman		XVA_SET_RTN(xvap, XAT_APPENDONLY);
10752742Swollman	}
107619878Swollman	if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
10772742Swollman		ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump,
10782742Swollman		    zp->z_pflags, tx);
107919878Swollman		XVA_SET_RTN(xvap, XAT_NODUMP);
10802742Swollman	}
108119878Swollman	if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
10822742Swollman		ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque,
10832742Swollman		    zp->z_pflags, tx);
10842742Swollman		XVA_SET_RTN(xvap, XAT_OPAQUE);
108519878Swollman	}
108619878Swollman	if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
108719878Swollman		ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED,
108819878Swollman		    xoap->xoa_av_quarantined, zp->z_pflags, tx);
108919878Swollman		XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
109021217Swollman	}
109119878Swollman	if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
10922742Swollman		ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified,
10932742Swollman		    zp->z_pflags, tx);
10942742Swollman		XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
109519878Swollman	}
109617200Swollman	if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) {
109719878Swollman		zfs_sa_set_scanstamp(zp, xvap, tx);
10982742Swollman		XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP);
109919878Swollman	}
11002742Swollman	if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
110119878Swollman		ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse,
11022742Swollman		    zp->z_pflags, tx);
110319878Swollman		XVA_SET_RTN(xvap, XAT_REPARSE);
11042742Swollman	}
110519878Swollman	if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
110619878Swollman		ZFS_ATTR_SET(zp, ZFS_OFFLINE, xoap->xoa_offline,
11072742Swollman		    zp->z_pflags, tx);
110819878Swollman		XVA_SET_RTN(xvap, XAT_OFFLINE);
11092742Swollman	}
111019878Swollman	if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
11112742Swollman		ZFS_ATTR_SET(zp, ZFS_SPARSE, xoap->xoa_sparse,
111219878Swollman		    zp->z_pflags, tx);
11132742Swollman		XVA_SET_RTN(xvap, XAT_SPARSE);
111419878Swollman	}
11152742Swollman}
11162742Swollman
111719878Swollmanint
111819878Swollmanzfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp)
111919878Swollman{
112019878Swollman	dmu_object_info_t doi;
112119878Swollman	dmu_buf_t	*db;
11222742Swollman	znode_t		*zp;
11232742Swollman	vnode_t		*vp;
11242742Swollman	sa_handle_t	*hdl;
112519878Swollman	struct thread	*td;
11262742Swollman	int locked;
11272742Swollman	int err;
112819878Swollman
11292742Swollman	td = curthread;
11302742Swollman	getnewvnode_reserve(1);
11312742Swollmanagain:
11322742Swollman	*zpp = NULL;
11332742Swollman	ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num);
11342742Swollman
11352742Swollman	err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db);
11362742Swollman	if (err) {
11372742Swollman		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
11382742Swollman		getnewvnode_drop_reserve();
11392742Swollman		return (err);
11402742Swollman	}
11412742Swollman
114219878Swollman	dmu_object_info_from_db(db, &doi);
11432742Swollman	if (doi.doi_bonus_type != DMU_OT_SA &&
11442742Swollman	    (doi.doi_bonus_type != DMU_OT_ZNODE ||
11452742Swollman	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
11462742Swollman	    doi.doi_bonus_size < sizeof (znode_phys_t)))) {
11472742Swollman		sa_buf_rele(db, NULL);
11482742Swollman		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
11492742Swollman#ifdef __FreeBSD__
115019878Swollman		getnewvnode_drop_reserve();
11512742Swollman#endif
115258787Sru		return (SET_ERROR(EINVAL));
11532742Swollman	}
11542742Swollman
115558787Sru	hdl = dmu_buf_get_user(db);
11562742Swollman	if (hdl != NULL) {
11572742Swollman		zp  = sa_get_userdata(hdl);
115819878Swollman
115919878Swollman
116019878Swollman		/*
116119878Swollman		 * Since "SA" does immediate eviction we
116219878Swollman		 * should never find a sa handle that doesn't
116319878Swollman		 * know about the znode.
116419878Swollman		 */
116519878Swollman
116619878Swollman		ASSERT3P(zp, !=, NULL);
116719878Swollman
116819878Swollman		mutex_enter(&zp->z_lock);
11692742Swollman		ASSERT3U(zp->z_id, ==, obj_num);
117019878Swollman		if (zp->z_unlinked) {
117119878Swollman			err = SET_ERROR(ENOENT);
11722742Swollman		} else {
117319878Swollman			vp = ZTOV(zp);
11742742Swollman			*zpp = zp;
117519878Swollman			err = 0;
117619878Swollman		}
117719878Swollman		sa_buf_rele(db, NULL);
11782742Swollman
11792742Swollman		/* Don't let the vnode disappear after ZFS_OBJ_HOLD_EXIT. */
118019878Swollman		if (err == 0)
118119878Swollman			VN_HOLD(vp);
11822742Swollman
11832742Swollman		mutex_exit(&zp->z_lock);
11842742Swollman		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
118519878Swollman
118675267Swollman		if (err == 0) {
118775267Swollman			locked = VOP_ISLOCKED(vp);
118886222Swollman			VI_LOCK(vp);
118975267Swollman			if ((vp->v_iflag & VI_DOOMED) != 0 &&
119075267Swollman			    locked != LK_EXCLUSIVE) {
119175267Swollman				/*
119275267Swollman				 * The vnode is doomed and this thread doesn't
119317200Swollman				 * hold the exclusive lock on it, so the vnode
119417200Swollman				 * must be being reclaimed by another thread.
119575267Swollman				 * Otherwise the doomed vnode is being reclaimed
119643014Swollman				 * by this thread and zfs_zget is called from
119743014Swollman				 * ZIL internals.
119843014Swollman				 */
119943014Swollman				VI_UNLOCK(vp);
120017200Swollman				VN_RELE(vp);
120117200Swollman				goto again;
120217200Swollman			}
120317200Swollman			VI_UNLOCK(vp);
120417200Swollman		}
120517200Swollman		getnewvnode_drop_reserve();
120617200Swollman		return (err);
120717200Swollman	}
120817200Swollman
120917200Swollman	/*
121017200Swollman	 * Not found create new znode/vnode
121117200Swollman	 * but only if file exists.
121219878Swollman	 *
121317200Swollman	 * There is a small window where zfs_vget() could
121417200Swollman	 * find this object while a file create is still in
121517200Swollman	 * progress.  This is checked for in zfs_znode_alloc()
121617200Swollman	 *
121717200Swollman	 * if zfs_znode_alloc() fails it will drop the hold on the
12182742Swollman	 * bonus buffer.
121919878Swollman	 */
122017200Swollman	zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size,
122119878Swollman	    doi.doi_bonus_type, NULL);
12222742Swollman	if (zp == NULL) {
122319878Swollman		err = SET_ERROR(ENOENT);
12242742Swollman	} else {
122519878Swollman		*zpp = zp;
122619878Swollman	}
122717200Swollman	if (err == 0) {
122819878Swollman		vnode_t *vp = ZTOV(zp);
122917200Swollman
123019878Swollman		err = insmntque(vp, zfsvfs->z_vfs);
123117200Swollman		if (err == 0) {
123219878Swollman			vp->v_hash = obj_num;
12332742Swollman			VOP_UNLOCK(vp, 0);
123419878Swollman		} else {
12352742Swollman			zp->z_vnode = NULL;
123619878Swollman			zfs_znode_dmu_fini(zp);
12372742Swollman			zfs_znode_free(zp);
123819878Swollman			*zpp = NULL;
12392742Swollman		}
124019878Swollman	}
124119878Swollman	ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
12422742Swollman	getnewvnode_drop_reserve();
124319878Swollman	return (err);
12442742Swollman}
12452742Swollman
124619878Swollmanint
12472742Swollmanzfs_rezget(znode_t *zp)
124819878Swollman{
124919878Swollman	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
12502742Swollman	dmu_object_info_t doi;
125119878Swollman	dmu_buf_t *db;
125219878Swollman	vnode_t *vp;
12532742Swollman	uint64_t obj_num = zp->z_id;
12542742Swollman	uint64_t mode, size;
12552742Swollman	sa_bulk_attr_t bulk[8];
12562742Swollman	int err;
125719878Swollman	int count = 0;
125819878Swollman	uint64_t gen;
125919878Swollman
126019878Swollman	ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num);
126119878Swollman
12622742Swollman	mutex_enter(&zp->z_acl_lock);
12639908Swollman	if (zp->z_acl_cached) {
12649908Swollman		zfs_acl_free(zp->z_acl_cached);
12659908Swollman		zp->z_acl_cached = NULL;
12662742Swollman	}
126743014Swollman
126843014Swollman	mutex_exit(&zp->z_acl_lock);
126943014Swollman	ASSERT(zp->z_sa_hdl == NULL);
127043014Swollman	err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db);
127143014Swollman	if (err) {
127243014Swollman		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
127343014Swollman		return (err);
127443014Swollman	}
127543014Swollman
127643014Swollman	dmu_object_info_from_db(db, &doi);
127743014Swollman	if (doi.doi_bonus_type != DMU_OT_SA &&
127843014Swollman	    (doi.doi_bonus_type != DMU_OT_ZNODE ||
127943014Swollman	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
128043014Swollman	    doi.doi_bonus_size < sizeof (znode_phys_t)))) {
128143014Swollman		sa_buf_rele(db, NULL);
128243014Swollman		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
128343014Swollman		return (SET_ERROR(EINVAL));
128443014Swollman	}
128543014Swollman
128643014Swollman	zfs_znode_sa_init(zfsvfs, zp, db, doi.doi_bonus_type, NULL);
128743014Swollman	size = zp->z_size;
128843014Swollman
128943014Swollman	/* reload cached values */
129043014Swollman	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL,
129143014Swollman	    &gen, sizeof (gen));
129243014Swollman	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
129343014Swollman	    &zp->z_size, sizeof (zp->z_size));
129443014Swollman	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
129543014Swollman	    &zp->z_links, sizeof (zp->z_links));
129643014Swollman	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
129743014Swollman	    &zp->z_pflags, sizeof (zp->z_pflags));
129843014Swollman	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
129943014Swollman	    &zp->z_atime, sizeof (zp->z_atime));
130043014Swollman	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
130143014Swollman	    &zp->z_uid, sizeof (zp->z_uid));
130243014Swollman	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL,
130358787Sru	    &zp->z_gid, sizeof (zp->z_gid));
130458787Sru	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
130558787Sru	    &mode, sizeof (mode));
130658787Sru
130758787Sru	if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) {
130858787Sru		zfs_znode_dmu_fini(zp);
130975267Swollman		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
131075267Swollman		return (SET_ERROR(EIO));
131175267Swollman	}
131275267Swollman
131375267Swollman	zp->z_mode = mode;
131475267Swollman
131575267Swollman	if (gen != zp->z_gen) {
131675267Swollman		zfs_znode_dmu_fini(zp);
131775267Swollman		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
131875267Swollman		return (SET_ERROR(EIO));
131975267Swollman	}
132075267Swollman
132175267Swollman	/*
132275267Swollman	 * XXXPJD: Not sure how is that possible, but under heavy
132320094Swollman	 * zfs recv -F load it happens that z_gen is the same, but
132443014Swollman	 * vnode type is different than znode type. This would mean
132543014Swollman	 * that for example regular file was replaced with directory
13262742Swollman	 * which has the same object number.
13272742Swollman	 */
132819878Swollman	vp = ZTOV(zp);
132919878Swollman	if (vp != NULL &&
133019878Swollman	    vp->v_type != IFTOVT((mode_t)zp->z_mode)) {
133119878Swollman		zfs_znode_dmu_fini(zp);
133219878Swollman		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
13332742Swollman		return (EIO);
133420094Swollman	}
133543014Swollman
133643014Swollman	zp->z_unlinked = (zp->z_links == 0);
133743014Swollman	zp->z_blksz = doi.doi_data_block_size;
133843014Swollman	if (vp != NULL) {
133958787Sru		vn_pages_remove(vp, 0, 0);
1340121098Swollman		if (zp->z_size != size)
134175267Swollman			vnode_pager_setsize(vp, zp->z_size);
13422742Swollman	}
13432742Swollman
13442742Swollman	ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
13452742Swollman
134619878Swollman	return (0);
134719878Swollman}
13482742Swollman
13492742Swollmanvoid
135058787Sruzfs_znode_delete(znode_t *zp, dmu_tx_t *tx)
135158787Sru{
135258787Sru	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
135358787Sru	objset_t *os = zfsvfs->z_os;
135458787Sru	uint64_t obj = zp->z_id;
135558787Sru	uint64_t acl_obj = zfs_external_acl(zp);
135658787Sru
135758787Sru	ZFS_OBJ_HOLD_ENTER(zfsvfs, obj);
135858787Sru	if (acl_obj) {
135958787Sru		VERIFY(!zp->z_is_sa);
136058787Sru		VERIFY(0 == dmu_object_free(os, acl_obj, tx));
136158787Sru	}
136258787Sru	VERIFY(0 == dmu_object_free(os, obj, tx));
136358787Sru	zfs_znode_dmu_fini(zp);
136458787Sru	ZFS_OBJ_HOLD_EXIT(zfsvfs, obj);
136558787Sru	zfs_znode_free(zp);
136675267Swollman}
136758787Sru
136864499Swollmanvoid
136964499Swollmanzfs_zinactive(znode_t *zp)
137064499Swollman{
137164499Swollman	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1372114173Swollman	uint64_t z_id = zp->z_id;
1373114173Swollman
1374114173Swollman	ASSERT(zp->z_sa_hdl);
1375114173Swollman
1376114173Swollman	/*
1377114173Swollman	 * Don't allow a zfs_zget() while were trying to release this znode
1378114173Swollman	 */
1379114173Swollman	ZFS_OBJ_HOLD_ENTER(zfsvfs, z_id);
1380114173Swollman
13812742Swollman	mutex_enter(&zp->z_lock);
13822742Swollman
138319878Swollman	/*
138419878Swollman	 * If this was the last reference to a file with no links,
138519878Swollman	 * remove the file from the file system.
13862742Swollman	 */
138719878Swollman	if (zp->z_unlinked) {
138820094Swollman		mutex_exit(&zp->z_lock);
138919878Swollman		ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id);
139020094Swollman		zfs_rmnode(zp);
139119878Swollman		return;
139243014Swollman	}
139343014Swollman
139458787Sru	mutex_exit(&zp->z_lock);
1395114173Swollman	zfs_znode_dmu_fini(zp);
1396114173Swollman	ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id);
13972742Swollman	zfs_znode_free(zp);
13982742Swollman}
13992742Swollman
14002742Swollmanvoid
140119878Swollmanzfs_znode_free(znode_t *zp)
14022742Swollman{
140319878Swollman	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
14042742Swollman
140519878Swollman	ASSERT(zp->z_sa_hdl == NULL);
14062742Swollman	zp->z_vnode = NULL;
140719878Swollman	mutex_enter(&zfsvfs->z_znodes_lock);
14082742Swollman	POINTER_INVALIDATE(&zp->z_zfsvfs);
140919878Swollman	list_remove(&zfsvfs->z_all_znodes, zp);
14102742Swollman	mutex_exit(&zfsvfs->z_znodes_lock);
141119878Swollman
14122742Swollman	if (zp->z_acl_cached) {
141319878Swollman		zfs_acl_free(zp->z_acl_cached);
14142742Swollman		zp->z_acl_cached = NULL;
141519878Swollman	}
14162742Swollman
141719878Swollman	kmem_cache_free(znode_cache, zp);
14182742Swollman
141919878Swollman	VFS_RELE(zfsvfs->z_vfs);
142019878Swollman}
142119878Swollman
142219878Swollmanvoid
142319878Swollmanzfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2],
14242742Swollman    uint64_t ctime[2], boolean_t have_tx)
14252742Swollman{
142619878Swollman	timestruc_t	now;
142719878Swollman
142819878Swollman	gethrestime(&now);
142919878Swollman
143019878Swollman	if (have_tx) {	/* will sa_bulk_update happen really soon? */
143119878Swollman		zp->z_atime_dirty = 0;
14322742Swollman		zp->z_seq++;
14332742Swollman	} else {
1434114173Swollman		zp->z_atime_dirty = 1;
14352742Swollman	}
14362742Swollman
14372742Swollman	if (flag & AT_ATIME) {
143819878Swollman		ZFS_TIME_ENCODE(&now, zp->z_atime);
14392742Swollman	}
144019878Swollman
14412742Swollman	if (flag & AT_MTIME) {
144219878Swollman		ZFS_TIME_ENCODE(&now, mtime);
14432742Swollman		if (zp->z_zfsvfs->z_use_fuids) {
144419878Swollman			zp->z_pflags |= (ZFS_ARCHIVE |
14452742Swollman			    ZFS_AV_MODIFIED);
14462742Swollman		}
144719878Swollman	}
144819878Swollman
144919878Swollman	if (flag & AT_CTIME) {
145019878Swollman		ZFS_TIME_ENCODE(&now, ctime);
145119878Swollman		if (zp->z_zfsvfs->z_use_fuids)
14522742Swollman			zp->z_pflags |= ZFS_ARCHIVE;
14532742Swollman	}
145475267Swollman}
145575267Swollman
145675267Swollman/*
145775267Swollman * Grow the block size for a file.
145875267Swollman *
145975267Swollman *	IN:	zp	- znode of file to free data in.
146075267Swollman *		size	- requested block size
146175267Swollman *		tx	- open transaction.
146275267Swollman *
146375267Swollman * NOTE: this function assumes that the znode is write locked.
146475267Swollman */
146575267Swollmanvoid
14662742Swollmanzfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx)
146758787Sru{
146858787Sru	int		error;
146958787Sru	u_longlong_t	dummy;
147058787Sru
147158787Sru	if (size <= zp->z_blksz)
147258787Sru		return;
147358787Sru	/*
147458787Sru	 * If the file size is already greater than the current blocksize,
147558787Sru	 * we will not grow.  If there is more than one block in a file,
147658787Sru	 * the blocksize cannot change.
147758787Sru	 */
147858787Sru	if (zp->z_blksz && zp->z_size > zp->z_blksz)
147958787Sru		return;
14802742Swollman
14812742Swollman	error = dmu_object_set_blocksize(zp->z_zfsvfs->z_os, zp->z_id,
148214343Swollman	    size, 0, tx);
14832742Swollman
14842742Swollman	if (error == ENOTSUP)
148514343Swollman		return;
148619878Swollman	ASSERT0(error);
148719878Swollman
148819878Swollman	/* What blocksize did we actually get? */
14892742Swollman	dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &zp->z_blksz, &dummy);
14902742Swollman}
149186222Swollman
149214343Swollman#ifdef sun
149314343Swollman/*
149486222Swollman * This is a dummy interface used when pvn_vplist_dirty() should *not*
149586222Swollman * be calling back into the fs for a putpage().  E.g.: when truncating
149686222Swollman * a file, the pages being "thrown away* don't need to be written out.
149786222Swollman */
149886222Swollman/* ARGSUSED */
149986222Swollmanstatic int
150086222Swollmanzfs_no_putpage(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp,
150186222Swollman    int flags, cred_t *cr)
150286222Swollman{
150386222Swollman	ASSERT(0);
150486222Swollman	return (0);
150586222Swollman}
150686222Swollman#endif	/* sun */
150786222Swollman
150886222Swollman/*
150986222Swollman * Increase the file length
151086222Swollman *
151186222Swollman *	IN:	zp	- znode of file to free data in.
151286222Swollman *		end	- new end-of-file
151386222Swollman *
151486222Swollman *	RETURN:	0 on success, error code on failure
151586222Swollman */
151686222Swollmanstatic int
151786222Swollmanzfs_extend(znode_t *zp, uint64_t end)
151886222Swollman{
151986222Swollman	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
152086222Swollman	dmu_tx_t *tx;
152186222Swollman	rl_t *rl;
152286222Swollman	uint64_t newblksz;
152386222Swollman	int error;
152486222Swollman
152586222Swollman	/*
152686222Swollman	 * We will change zp_size, lock the whole file.
152786222Swollman	 */
152875267Swollman	rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER);
152986222Swollman
15302742Swollman	/*
153175267Swollman	 * Nothing to do if file already at desired length.
153275267Swollman	 */
15332742Swollman	if (end <= zp->z_size) {
15342742Swollman		zfs_range_unlock(rl);
15352742Swollman		return (0);
153686222Swollman	}
153775267Swollman	tx = dmu_tx_create(zfsvfs->z_os);
153819878Swollman	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
153975267Swollman	zfs_sa_upgrade_txholds(tx, zp);
154075267Swollman	if (end > zp->z_blksz &&
154175267Swollman	    (!ISP2(zp->z_blksz) || zp->z_blksz < zfsvfs->z_max_blksz)) {
154286222Swollman		/*
154386222Swollman		 * We are growing the file past the current block size.
15442742Swollman		 */
15452742Swollman		if (zp->z_blksz > zp->z_zfsvfs->z_max_blksz) {
15462742Swollman			ASSERT(!ISP2(zp->z_blksz));
15472742Swollman			newblksz = MIN(end, SPA_MAXBLOCKSIZE);
154819878Swollman		} else {
154919878Swollman			newblksz = MIN(end, zp->z_zfsvfs->z_max_blksz);
155019878Swollman		}
155119878Swollman		dmu_tx_hold_write(tx, zp->z_id, 0, newblksz);
155275267Swollman	} else {
155386222Swollman		newblksz = 0;
155486222Swollman	}
155586222Swollman
15562742Swollman	error = dmu_tx_assign(tx, TXG_WAIT);
155786222Swollman	if (error) {
155886222Swollman		dmu_tx_abort(tx);
155986222Swollman		zfs_range_unlock(rl);
156019878Swollman		return (error);
156119878Swollman	}
156219878Swollman
15632742Swollman	if (newblksz)
15642742Swollman		zfs_grow_blocksize(zp, newblksz, tx);
15652742Swollman
15662742Swollman	zp->z_size = end;
156719878Swollman
15682742Swollman	VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zp->z_zfsvfs),
156943543Swollman	    &zp->z_size, sizeof (zp->z_size), tx));
157043543Swollman
157119878Swollman	vnode_pager_setsize(ZTOV(zp), end);
15722742Swollman
157319878Swollman	zfs_range_unlock(rl);
15742742Swollman
157519878Swollman	dmu_tx_commit(tx);
15762742Swollman
157786222Swollman	return (0);
157819878Swollman}
157919878Swollman
158019878Swollman/*
158119878Swollman * Free space in a file.
158243014Swollman *
158343014Swollman *	IN:	zp	- znode of file to free data in.
158486222Swollman *		off	- start of section to free.
158586222Swollman *		len	- length of section to free.
158686222Swollman *
158786222Swollman *	RETURN:	0 on success, error code on failure
158886222Swollman */
158986222Swollmanstatic int
159086222Swollmanzfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
159186222Swollman{
159286222Swollman	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
159386222Swollman	rl_t *rl;
159486222Swollman	int error;
159586222Swollman
159686222Swollman	/*
159786222Swollman	 * Lock the range being freed.
159886222Swollman	 */
159986222Swollman	rl = zfs_range_lock(zp, off, len, RL_WRITER);
160086222Swollman
160186222Swollman	/*
160286222Swollman	 * Nothing to do if file already at desired length.
160386222Swollman	 */
160486222Swollman	if (off >= zp->z_size) {
160586222Swollman		zfs_range_unlock(rl);
160686222Swollman		return (0);
160786222Swollman	}
160886222Swollman
160986222Swollman	if (off + len > zp->z_size)
161086222Swollman		len = zp->z_size - off;
161186222Swollman
161286222Swollman	error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, off, len);
161386222Swollman
161486222Swollman	if (error == 0) {
161586222Swollman		/*
161686222Swollman		 * In FreeBSD we cannot free block in the middle of a file,
161786222Swollman		 * but only at the end of a file, so this code path should
161886222Swollman		 * never happen.
161986222Swollman		 */
162086222Swollman		vnode_pager_setsize(ZTOV(zp), off);
162186222Swollman	}
162286222Swollman
162386222Swollman	zfs_range_unlock(rl);
162486222Swollman
162586222Swollman	return (error);
162686222Swollman}
162786222Swollman
162886222Swollman/*
162986222Swollman * Truncate a file
163086222Swollman *
163119878Swollman *	IN:	zp	- znode of file to free data in.
163286222Swollman *		end	- new end-of-file.
16332742Swollman *
16342742Swollman *	RETURN:	0 on success, error code on failure
16352742Swollman */
16362742Swollmanstatic int
163719878Swollmanzfs_trunc(znode_t *zp, uint64_t end)
16382742Swollman{
16392742Swollman	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
16402742Swollman	vnode_t *vp = ZTOV(zp);
164119878Swollman	dmu_tx_t *tx;
16422742Swollman	rl_t *rl;
164319878Swollman	int error;
16442742Swollman	sa_bulk_attr_t bulk[2];
164519878Swollman	int count = 0;
16462742Swollman
164719878Swollman	/*
16482742Swollman	 * We will change zp_size, lock the whole file.
164919878Swollman	 */
16502742Swollman	rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER);
165119878Swollman
165219878Swollman	/*
16532742Swollman	 * Nothing to do if file already at desired length.
165419878Swollman	 */
165519878Swollman	if (end >= zp->z_size) {
16562742Swollman		zfs_range_unlock(rl);
16572742Swollman		return (0);
16582742Swollman	}
165919878Swollman
166019878Swollman	error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end,  -1);
166119878Swollman	if (error) {
166219878Swollman		zfs_range_unlock(rl);
166319878Swollman		return (error);
166419878Swollman	}
166558787Sru	tx = dmu_tx_create(zfsvfs->z_os);
166620094Swollman	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
166758787Sru	zfs_sa_upgrade_txholds(tx, zp);
166858787Sru	dmu_tx_mark_netfree(tx);
166943014Swollman	error = dmu_tx_assign(tx, TXG_WAIT);
167043014Swollman	if (error) {
167120094Swollman		dmu_tx_abort(tx);
167258787Sru		zfs_range_unlock(rl);
167358787Sru		return (error);
167458787Sru	}
167558787Sru
167658787Sru	zp->z_size = end;
167758787Sru	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs),
167858787Sru	    NULL, &zp->z_size, sizeof (zp->z_size));
167958787Sru
168058787Sru	if (end == 0) {
168158787Sru		zp->z_pflags &= ~ZFS_SPARSE;
168258787Sru		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
16832742Swollman		    NULL, &zp->z_pflags, 8);
16842742Swollman	}
168520094Swollman	VERIFY(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx) == 0);
168620094Swollman
168720094Swollman	dmu_tx_commit(tx);
168820094Swollman
168920094Swollman	/*
169020094Swollman	 * Clear any mapped pages in the truncated region.  This has to
169120094Swollman	 * happen outside of the transaction to avoid the possibility of
169220094Swollman	 * a deadlock with someone trying to push a page that we are
169320094Swollman	 * about to invalidate.
169420094Swollman	 */
169520094Swollman	vnode_pager_setsize(vp, end);
169620094Swollman
169720094Swollman	zfs_range_unlock(rl);
169820094Swollman
169920094Swollman	return (0);
170020094Swollman}
170120094Swollman
17022742Swollman/*
170343543Swollman * Free space in a file
170443543Swollman *
170543543Swollman *	IN:	zp	- znode of file to free data in.
170619878Swollman *		off	- start of range
17072742Swollman *		len	- end of range (0 => EOF)
17082742Swollman *		flag	- current file open mode flags.
170919878Swollman *		log	- TRUE if this action should be logged
17102742Swollman *
171119878Swollman *	RETURN:	0 on success, error code on failure
171219878Swollman */
171319878Swollmanint
171419878Swollmanzfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log)
171519878Swollman{
17162742Swollman	vnode_t *vp = ZTOV(zp);
171719878Swollman	dmu_tx_t *tx;
17182742Swollman	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
171919878Swollman	zilog_t *zilog = zfsvfs->z_log;
172019878Swollman	uint64_t mode;
172119878Swollman	uint64_t mtime[2], ctime[2];
172219878Swollman	sa_bulk_attr_t bulk[3];
17232742Swollman	int count = 0;
17242742Swollman	int error;
172519878Swollman
17262742Swollman	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), &mode,
172719878Swollman	    sizeof (mode))) != 0)
17282742Swollman		return (error);
17292742Swollman
17302742Swollman	if (off > zp->z_size) {
173119878Swollman		error =  zfs_extend(zp, off+len);
173219878Swollman		if (error == 0 && log)
17332742Swollman			goto log;
173419878Swollman		else
173519878Swollman			return (error);
173619878Swollman	}
17372742Swollman
17382742Swollman	/*
173919878Swollman	 * Check for any locks in the region to be freed.
17402742Swollman	 */
17412742Swollman
174219878Swollman	if (MANDLOCK(vp, (mode_t)mode)) {
174319878Swollman		uint64_t length = (len ? len : zp->z_size - off);
174443014Swollman		if (error = chklock(vp, FWRITE, off, length, flag, NULL))
174519878Swollman			return (error);
17462742Swollman	}
174743014Swollman
174819878Swollman	if (len == 0) {
174943014Swollman		error = zfs_trunc(zp, off);
175019878Swollman	} else {
17512742Swollman		if ((error = zfs_free_range(zp, off, len)) == 0 &&
175219878Swollman		    off + len > zp->z_size)
17532742Swollman			error = zfs_extend(zp, off+len);
17542742Swollman	}
17552742Swollman	if (error || !log)
175619878Swollman		return (error);
17572742Swollmanlog:
175819878Swollman	tx = dmu_tx_create(zfsvfs->z_os);
17592742Swollman	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
176019878Swollman	zfs_sa_upgrade_txholds(tx, zp);
17612742Swollman	error = dmu_tx_assign(tx, TXG_WAIT);
17622742Swollman	if (error) {
176319878Swollman		dmu_tx_abort(tx);
176419878Swollman		return (error);
176519878Swollman	}
17662742Swollman
1767121098Swollman	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, mtime, 16);
1768121098Swollman	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, ctime, 16);
17692742Swollman	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
1770121098Swollman	    NULL, &zp->z_pflags, 8);
177119878Swollman	zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, B_TRUE);
177219878Swollman	error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
177319878Swollman	ASSERT(error == 0);
177420094Swollman
177519878Swollman	zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len);
177619878Swollman
17772742Swollman	dmu_tx_commit(tx);
177886222Swollman	return (0);
177919878Swollman}
178019878Swollman
178120094Swollmanvoid
178220094Swollmanzfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
178320094Swollman{
17842742Swollman	zfsvfs_t	zfsvfs;
178586222Swollman	uint64_t	moid, obj, sa_obj, version;
178619878Swollman	uint64_t	sense = ZFS_CASE_SENSITIVE;
178719878Swollman	uint64_t	norm = 0;
178819878Swollman	nvpair_t	*elem;
17892742Swollman	int		error;
17902742Swollman	int		i;
179158787Sru	znode_t		*rootzp = NULL;
179258787Sru	vattr_t		vattr;
179358787Sru	znode_t		*zp;
179458787Sru	zfs_acl_ids_t	acl_ids;
179558787Sru
179658787Sru	/*
179758787Sru	 * First attempt to create master node.
179858787Sru	 */
17992742Swollman	/*
180019878Swollman	 * In an empty objset, there are no blocks to read and thus
18012742Swollman	 * there can be no i/o errors (which we assert below).
180219878Swollman	 */
180319878Swollman	moid = MASTER_NODE_OBJ;
18042742Swollman	error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE,
180519878Swollman	    DMU_OT_NONE, 0, tx);
18062742Swollman	ASSERT(error == 0);
180720094Swollman
180820094Swollman	/*
18092742Swollman	 * Set starting attributes.
18102742Swollman	 */
181119878Swollman	version = zfs_zpl_version_map(spa_version(dmu_objset_spa(os)));
181219878Swollman	elem = NULL;
181320094Swollman	while ((elem = nvlist_next_nvpair(zplprops, elem)) != NULL) {
181420094Swollman		/* For the moment we expect all zpl props to be uint64_ts */
181558787Sru		uint64_t val;
181658787Sru		char *name;
18172742Swollman
18182742Swollman		ASSERT(nvpair_type(elem) == DATA_TYPE_UINT64);
181921217Swollman		VERIFY(nvpair_value_uint64(elem, &val) == 0);
182058787Sru		name = nvpair_name(elem);
182158787Sru		if (strcmp(name, zfs_prop_to_name(ZFS_PROP_VERSION)) == 0) {
182220094Swollman			if (val < version)
182358787Sru				version = val;
182458787Sru		} else {
18252742Swollman			error = zap_update(os, moid, name, 8, 1, &val, tx);
182686222Swollman		}
182786222Swollman		ASSERT(error == 0);
182886222Swollman		if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0)
182986222Swollman			norm = val;
183086222Swollman		else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0)
183186222Swollman			sense = val;
183220094Swollman	}
183320094Swollman	ASSERT(version != 0);
183420094Swollman	error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx);
183520094Swollman
183620094Swollman	/*
183720094Swollman	 * Create zap object used for SA attribute registration
183820094Swollman	 */
183920094Swollman
184020094Swollman	if (version >= ZPL_VERSION_SA) {
184120094Swollman		sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
184220094Swollman		    DMU_OT_NONE, 0, tx);
184375267Swollman		error = zap_add(os, moid, ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
184475267Swollman		ASSERT(error == 0);
184575267Swollman	} else {
184675267Swollman		sa_obj = 0;
184775267Swollman	}
184875267Swollman	/*
184975267Swollman	 * Create a delete queue.
185086222Swollman	 */
185186222Swollman	obj = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx);
185286222Swollman
185386222Swollman	error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx);
185486222Swollman	ASSERT(error == 0);
185586222Swollman
185686222Swollman	/*
185786222Swollman	 * Create root znode.  Create minimal znode/vnode/zfsvfs
185886222Swollman	 * to allow zfs_mknode to work.
18592742Swollman	 */
186086222Swollman	VATTR_NULL(&vattr);
186186222Swollman	vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
186286222Swollman	vattr.va_type = VDIR;
186320094Swollman	vattr.va_mode = S_IFDIR|0755;
186458787Sru	vattr.va_uid = crgetuid(cr);
186520094Swollman	vattr.va_gid = crgetgid(cr);
186620094Swollman
186786222Swollman	bzero(&zfsvfs, sizeof (zfsvfs_t));
186886222Swollman
186986222Swollman	rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP);
187086222Swollman	ASSERT(!POINTER_IS_VALID(rootzp->z_zfsvfs));
187186222Swollman	rootzp->z_moved = 0;
187286222Swollman	rootzp->z_unlinked = 0;
187386222Swollman	rootzp->z_atime_dirty = 0;
187486222Swollman	rootzp->z_is_sa = USE_SA(version, os);
187586222Swollman
187686222Swollman	zfsvfs.z_os = os;
187786222Swollman	zfsvfs.z_parent = &zfsvfs;
187886222Swollman	zfsvfs.z_version = version;
187986222Swollman	zfsvfs.z_use_fuids = USE_FUIDS(version, os);
188086222Swollman	zfsvfs.z_use_sa = USE_SA(version, os);
188186222Swollman	zfsvfs.z_norm = norm;
188286222Swollman
188386222Swollman	error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
188486222Swollman	    &zfsvfs.z_attr_table);
188586222Swollman
188686222Swollman	ASSERT(error == 0);
188786222Swollman
18882742Swollman	/*
188958787Sru	 * Fold case on file systems that are always or sometimes case
189058787Sru	 * insensitive.
189120094Swollman	 */
18922742Swollman	if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED)
189320094Swollman		zfsvfs.z_norm |= U8_TEXTPREP_TOUPPER;
189458787Sru
189520094Swollman	mutex_init(&zfsvfs.z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
189686222Swollman	list_create(&zfsvfs.z_all_znodes, sizeof (znode_t),
189786222Swollman	    offsetof(znode_t, z_link_node));
189886222Swollman
189958787Sru	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
190058787Sru		mutex_init(&zfsvfs.z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
190158787Sru
190258787Sru	rootzp->z_zfsvfs = &zfsvfs;
190358787Sru	VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr,
190458787Sru	    cr, NULL, &acl_ids));
190558787Sru	zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids);
190686222Swollman	ASSERT3P(zp, ==, rootzp);
190786222Swollman	error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx);
190886222Swollman	ASSERT(error == 0);
190986222Swollman	zfs_acl_ids_free(&acl_ids);
191086222Swollman	POINTER_INVALIDATE(&rootzp->z_zfsvfs);
191186222Swollman
191286222Swollman	sa_handle_destroy(rootzp->z_sa_hdl);
191358787Sru	kmem_cache_free(znode_cache, rootzp);
191458787Sru
191543766Sbde	/*
191658787Sru	 * Create shares directory
191743766Sbde	 */
191886222Swollman
191986222Swollman	error = zfs_create_share_dir(&zfsvfs, tx);
192086222Swollman
192158787Sru	ASSERT(error == 0);
192258787Sru
192343766Sbde	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
192458787Sru		mutex_destroy(&zfsvfs.z_hold_mtx[i]);
192543766Sbde}
192686222Swollman
192786222Swollman#endif /* _KERNEL */
192858787Sru
192958787Srustatic int
193043766Sbdezfs_sa_setup(objset_t *osp, sa_attr_type_t **sa_table)
193158787Sru{
193258787Sru	uint64_t sa_obj = 0;
193343766Sbde	int error;
193486222Swollman
193586222Swollman	error = zap_lookup(osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj);
193686222Swollman	if (error != 0 && error != ENOENT)
193786222Swollman		return (error);
193886222Swollman
193958787Sru	error = sa_setup(osp, sa_obj, zfs_attr_table, ZPL_END, sa_table);
194058787Sru	return (error);
194143766Sbde}
194258787Sru
194343766Sbdestatic int
194486222Swollmanzfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp,
194586222Swollman    dmu_buf_t **db, void *tag)
194686222Swollman{
194786222Swollman	dmu_object_info_t doi;
19482742Swollman	int error;
194958787Sru
195058787Sru	if ((error = sa_buf_hold(osp, obj, tag, db)) != 0)
195143766Sbde		return (error);
195258787Sru
195343766Sbde	dmu_object_info_from_db(*db, &doi);
195486222Swollman	if ((doi.doi_bonus_type != DMU_OT_SA &&
195586222Swollman	    doi.doi_bonus_type != DMU_OT_ZNODE) ||
195686222Swollman	    doi.doi_bonus_type == DMU_OT_ZNODE &&
195786222Swollman	    doi.doi_bonus_size < sizeof (znode_phys_t)) {
195886222Swollman		sa_buf_rele(*db, tag);
195986222Swollman		return (SET_ERROR(ENOTSUP));
196086222Swollman	}
196186222Swollman
196286222Swollman	error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp);
196386222Swollman	if (error != 0) {
196486222Swollman		sa_buf_rele(*db, tag);
196558787Sru		return (error);
196658787Sru	}
196743766Sbde
196858787Sru	return (0);
196943766Sbde}
197086222Swollman
197186222Swollmanvoid
197286222Swollmanzfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, void *tag)
197386222Swollman{
197486222Swollman	sa_handle_destroy(hdl);
197586222Swollman	sa_buf_rele(db, tag);
197686222Swollman}
197758787Sru
197858787Sru/*
197943766Sbde * Given an object number, return its parent object number and whether
198058787Sru * or not the object is an extended attribute directory.
198143766Sbde */
198286222Swollmanstatic int
198386222Swollmanzfs_obj_to_pobj(objset_t *osp, sa_handle_t *hdl, sa_attr_type_t *sa_table,
198486222Swollman    uint64_t *pobjp, int *is_xattrdir)
198586222Swollman{
198686222Swollman	uint64_t parent;
198786222Swollman	uint64_t pflags;
198886222Swollman	uint64_t mode;
198986222Swollman	uint64_t parent_mode;
199086222Swollman	sa_bulk_attr_t bulk[3];
199186222Swollman	sa_handle_t *sa_hdl;
199286222Swollman	dmu_buf_t *sa_db;
199386222Swollman	int count = 0;
199486222Swollman	int error;
199586222Swollman
199686222Swollman	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_PARENT], NULL,
199786222Swollman	    &parent, sizeof (parent));
19982742Swollman	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_FLAGS], NULL,
199958787Sru	    &pflags, sizeof (pflags));
200043766Sbde	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL,
200158787Sru	    &mode, sizeof (mode));
200243766Sbde
200386222Swollman	if ((error = sa_bulk_lookup(hdl, bulk, count)) != 0)
200486222Swollman		return (error);
200586222Swollman
200686222Swollman	/*
200786222Swollman	 * When a link is removed its parent pointer is not changed and will
200858787Sru	 * be invalid.  There are two cases where a link is removed but the
200958787Sru	 * file stays around, when it goes to the delete queue and when there
201043766Sbde	 * are additional links.
201158787Sru	 */
201243766Sbde	error = zfs_grab_sa_handle(osp, parent, &sa_hdl, &sa_db, FTAG);
201386222Swollman	if (error != 0)
201486222Swollman		return (error);
20152742Swollman
201658787Sru	error = sa_lookup(sa_hdl, ZPL_MODE, &parent_mode, sizeof (parent_mode));
201758787Sru	zfs_release_sa_handle(sa_hdl, sa_db, FTAG);
201858787Sru	if (error != 0)
201958787Sru		return (error);
202058787Sru
20212742Swollman	*is_xattrdir = ((pflags & ZFS_XATTR) != 0) && S_ISDIR(mode);
2022114173Swollman
2023114173Swollman	/*
2024114173Swollman	 * Extended attributes can be applied to files, directories, etc.
2025114173Swollman	 * Otherwise the parent must be a directory.
2026114173Swollman	 */
2027114173Swollman	if (!*is_xattrdir && !S_ISDIR(parent_mode))
2028114173Swollman		return (SET_ERROR(EINVAL));
2029114173Swollman
2030114173Swollman	*pobjp = parent;
2031114173Swollman
2032114173Swollman	return (0);
2033114173Swollman}
2034114173Swollman
2035114173Swollman/*
2036114173Swollman * Given an object number, return some zpl level statistics
2037114173Swollman */
203814343Swollmanstatic int
203914343Swollmanzfs_obj_to_stats_impl(sa_handle_t *hdl, sa_attr_type_t *sa_table,
20409908Swollman    zfs_stat_t *sb)
20412742Swollman{
2042114173Swollman	sa_bulk_attr_t bulk[4];
20432742Swollman	int count = 0;
20442742Swollman
20452742Swollman	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL,
20462742Swollman	    &sb->zs_mode, sizeof (sb->zs_mode));
204719878Swollman	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GEN], NULL,
20482742Swollman	    &sb->zs_gen, sizeof (sb->zs_gen));
204919878Swollman	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_LINKS], NULL,
205019878Swollman	    &sb->zs_links, sizeof (sb->zs_links));
20512742Swollman	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_CTIME], NULL,
205219878Swollman	    &sb->zs_ctime, sizeof (sb->zs_ctime));
20532742Swollman
20542742Swollman	return (sa_bulk_lookup(hdl, bulk, count));
205519878Swollman}
20562742Swollman
20572742Swollmanstatic int
205819878Swollmanzfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl,
205919878Swollman    sa_attr_type_t *sa_table, char *buf, int len)
206019878Swollman{
20612742Swollman	sa_handle_t *sa_hdl;
206219878Swollman	sa_handle_t *prevhdl = NULL;
20632742Swollman	dmu_buf_t *prevdb = NULL;
206419878Swollman	dmu_buf_t *sa_db = NULL;
206519878Swollman	char *path = buf + len - 1;
206619878Swollman	int error;
20672742Swollman
206843014Swollman	*path = '\0';
206919878Swollman	sa_hdl = hdl;
207043014Swollman
207119878Swollman	for (;;) {
207219878Swollman		uint64_t pobj;
207319878Swollman		char component[MAXNAMELEN + 2];
207414343Swollman		size_t complen;
207519878Swollman		int is_xattrdir;
20762742Swollman
207719878Swollman		if (prevdb)
20782742Swollman			zfs_release_sa_handle(prevhdl, prevdb, FTAG);
207919878Swollman
20802742Swollman		if ((error = zfs_obj_to_pobj(osp, sa_hdl, sa_table, &pobj,
208119878Swollman		    &is_xattrdir)) != 0)
20822742Swollman			break;
208319878Swollman
208419878Swollman		if (pobj == obj) {
208519878Swollman			if (path[0] != '/')
208619878Swollman				*--path = '/';
208719878Swollman			break;
208819878Swollman		}
208919878Swollman
209019878Swollman		component[0] = '/';
209119878Swollman		if (is_xattrdir) {
209219878Swollman			(void) sprintf(component + 1, "<xattrdir>");
20932742Swollman		} else {
20942742Swollman			error = zap_value_search(osp, pobj, obj,
209519878Swollman			    ZFS_DIRENT_OBJ(-1ULL), component + 1);
209619878Swollman			if (error != 0)
209719878Swollman				break;
209819878Swollman		}
209919878Swollman
210019878Swollman		complen = strlen(component);
210119878Swollman		path -= complen;
210219878Swollman		ASSERT(path >= buf);
210319878Swollman		bcopy(component, path, complen);
210419878Swollman		obj = pobj;
210519878Swollman
21062742Swollman		if (sa_hdl != hdl) {
210719878Swollman			prevhdl = sa_hdl;
21082742Swollman			prevdb = sa_db;
210919878Swollman		}
211019878Swollman		error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db, FTAG);
211120094Swollman		if (error != 0) {
211220094Swollman			sa_hdl = prevhdl;
21132742Swollman			sa_db = prevdb;
211419878Swollman			break;
211586222Swollman		}
211686222Swollman	}
211786222Swollman
211886222Swollman	if (sa_hdl != NULL && sa_hdl != hdl) {
211986222Swollman		ASSERT(sa_db != NULL);
212086222Swollman		zfs_release_sa_handle(sa_hdl, sa_db, FTAG);
212186222Swollman	}
212286222Swollman
212386222Swollman	if (error == 0)
212486222Swollman		(void) memmove(buf, path, buf + len - path);
212586222Swollman
212686222Swollman	return (error);
212786222Swollman}
212886222Swollman
212986222Swollmanint
213086222Swollmanzfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len)
213186222Swollman{
213286222Swollman	sa_attr_type_t *sa_table;
213386222Swollman	sa_handle_t *hdl;
213486222Swollman	dmu_buf_t *db;
213586222Swollman	int error;
213686222Swollman
213786222Swollman	error = zfs_sa_setup(osp, &sa_table);
213886222Swollman	if (error != 0)
213986222Swollman		return (error);
214086222Swollman
214186222Swollman	error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
214286222Swollman	if (error != 0)
214386222Swollman		return (error);
214486222Swollman
214586222Swollman	error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
214686222Swollman
214786222Swollman	zfs_release_sa_handle(hdl, db, FTAG);
214886222Swollman	return (error);
214986222Swollman}
215086222Swollman
215186222Swollmanint
215286222Swollmanzfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb,
215386222Swollman    char *buf, int len)
215486222Swollman{
215586222Swollman	char *path = buf + len - 1;
215686222Swollman	sa_attr_type_t *sa_table;
215786222Swollman	sa_handle_t *hdl;
215886222Swollman	dmu_buf_t *db;
215986222Swollman	int error;
21602742Swollman
216186222Swollman	*path = '\0';
216286222Swollman
216386222Swollman	error = zfs_sa_setup(osp, &sa_table);
216486222Swollman	if (error != 0)
216519878Swollman		return (error);
216619878Swollman
21672742Swollman	error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
21682742Swollman	if (error != 0)
216958787Sru		return (error);
21702742Swollman
21712742Swollman	error = zfs_obj_to_stats_impl(hdl, sa_table, sb);
21722742Swollman	if (error != 0) {
21732742Swollman		zfs_release_sa_handle(hdl, db, FTAG);
21742742Swollman		return (error);
217519878Swollman	}
21769908Swollman
217758787Sru	error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
217819878Swollman
21799908Swollman	zfs_release_sa_handle(hdl, db, FTAG);
21802742Swollman	return (error);
21812742Swollman}
218219878Swollman