zfs_znode.c revision 273736
1121098Swollman/* 264499Swollman * CDDL HEADER START 32742Swollman * 42742Swollman * The contents of this file are subject to the terms of the 52742Swollman * Common Development and Distribution License (the "License"). 62742Swollman * You may not use this file except in compliance with the License. 758787Sru * 82742Swollman * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 958787Sru * or http://www.opensolaris.org/os/licensing. 1058787Sru * See the License for the specific language governing permissions 112742Swollman * and limitations under the License. 1286222Swollman * 1320094Swollman * When distributing Covered Code, include this CDDL HEADER in each 1420094Swollman * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 1520094Swollman * If applicable, add the following below this CDDL HEADER, with the 1620094Swollman * fields enclosed by brackets "[]" replaced with your own identifying 1720094Swollman * information: Portions Copyright [yyyy] [name of copyright owner] 1858787Sru * 1958787Sru * CDDL HEADER END 2020094Swollman */ 2143543Swollman/* 222742Swollman * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 2343543Swollman * Copyright (c) 2012, 2014 by Delphix. All rights reserved. 2443543Swollman */ 2543543Swollman 2643543Swollman/* Portions Copyright 2007 Jeremy Teo */ 27121098Swollman/* Portions Copyright 2011 Martin Matuska <mm@FreeBSD.org> */ 28121098Swollman 29121098Swollman#ifdef _KERNEL 30121098Swollman#include <sys/types.h> 3143543Swollman#include <sys/param.h> 3243543Swollman#include <sys/time.h> 3343543Swollman#include <sys/systm.h> 3443543Swollman#include <sys/sysmacros.h> 3543543Swollman#include <sys/resource.h> 3643543Swollman#include <sys/mntent.h> 372742Swollman#include <sys/u8_textprep.h> 382742Swollman#include <sys/dsl_dataset.h> 3919878Swollman#include <sys/vfs.h> 40114173Swollman#include <sys/vnode.h> 41114173Swollman#include <sys/file.h> 42114173Swollman#include <sys/kmem.h> 43114173Swollman#include <sys/errno.h> 44114173Swollman#include <sys/unistd.h> 45114173Swollman#include <sys/atomic.h> 46114173Swollman#include <sys/zfs_dir.h> 47114173Swollman#include <sys/zfs_acl.h> 48114173Swollman#include <sys/zfs_ioctl.h> 49114173Swollman#include <sys/zfs_rlock.h> 50114173Swollman#include <sys/zfs_fuid.h> 51114173Swollman#include <sys/dnode.h> 52114173Swollman#include <sys/fs/zfs.h> 53114173Swollman#include <sys/kidmap.h> 542742Swollman#endif /* _KERNEL */ 552742Swollman 5658787Sru#include <sys/dmu.h> 572742Swollman#include <sys/refcount.h> 5819878Swollman#include <sys/stat.h> 5914343Swollman#include <sys/zap.h> 609908Swollman#include <sys/zfs_znode.h> 619908Swollman#include <sys/sa.h> 629908Swollman#include <sys/zfs_sa.h> 639908Swollman#include <sys/zfs_stat.h> 649908Swollman#include <sys/refcount.h> 659908Swollman 669908Swollman#include "zfs_prop.h" 679908Swollman#include "zfs_comutil.h" 689908Swollman 699908Swollman/* Used by fstat(1). */ 7014343SwollmanSYSCTL_INT(_debug_sizeof, OID_AUTO, znode, CTLFLAG_RD, 7114343Swollman SYSCTL_NULL_INT_PTR, sizeof(znode_t), "sizeof(znode_t)"); 729908Swollman 739908Swollman/* 749908Swollman * Define ZNODE_STATS to turn on statistic gathering. By default, it is only 759908Swollman * turned on when DEBUG is also defined. 769908Swollman */ 779908Swollman#ifdef DEBUG 789908Swollman#define ZNODE_STATS 7919878Swollman#endif /* DEBUG */ 802742Swollman 812742Swollman#ifdef ZNODE_STATS 8243014Swollman#define ZNODE_STAT_ADD(stat) ((stat)++) 832742Swollman#else 8419878Swollman#define ZNODE_STAT_ADD(stat) /* nothing */ 852742Swollman#endif /* ZNODE_STATS */ 862742Swollman 872742Swollman/* 882742Swollman * Functions needed for userland (ie: libzpool) are not put under 892742Swollman * #ifdef_KERNEL; the rest of the functions have dependencies 902742Swollman * (such as VFS logic) that will not compile easily in userland. 912742Swollman */ 9221217Swollman#ifdef _KERNEL 932742Swollman/* 942742Swollman * Needed to close a small window in zfs_znode_move() that allows the zfsvfs to 952742Swollman * be freed before it can be safely accessed. 962742Swollman */ 972742Swollmankrwlock_t zfsvfs_lock; 982742Swollman 992742Swollmanstatic kmem_cache_t *znode_cache = NULL; 10021217Swollman 1012742Swollman/*ARGSUSED*/ 1022742Swollmanstatic void 1032742Swollmanznode_evict_error(dmu_buf_t *dbuf, void *user_ptr) 1042742Swollman{ 10519878Swollman /* 1062742Swollman * We should never drop all dbuf refs without first clearing 1072742Swollman * the eviction callback. 1082742Swollman */ 1092742Swollman panic("evicting znode %p\n", user_ptr); 11058787Sru} 11158787Sru 1122742Swollmanextern struct vop_vector zfs_vnodeops; 1132742Swollmanextern struct vop_vector zfs_fifoops; 11458787Sruextern struct vop_vector zfs_shareops; 1152742Swollman 11658787Srustatic int 11758787Sruzfs_znode_cache_constructor(void *buf, void *arg, int kmflags) 11858787Sru{ 1192742Swollman znode_t *zp = buf; 12058787Sru 12158787Sru POINTER_INVALIDATE(&zp->z_zfsvfs); 1222742Swollman 1232742Swollman list_link_init(&zp->z_link_node); 1242742Swollman 1252742Swollman mutex_init(&zp->z_lock, NULL, MUTEX_DEFAULT, NULL); 12658787Sru rw_init(&zp->z_parent_lock, NULL, RW_DEFAULT, NULL); 1272742Swollman rw_init(&zp->z_name_lock, NULL, RW_DEFAULT, NULL); 1282742Swollman mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL); 12958787Sru 13058787Sru mutex_init(&zp->z_range_lock, NULL, MUTEX_DEFAULT, NULL); 1312742Swollman avl_create(&zp->z_range_avl, zfs_range_compare, 132121098Swollman sizeof (rl_t), offsetof(rl_t, r_node)); 13317200Swollman 13443543Swollman zp->z_dirlocks = NULL; 13558787Sru zp->z_acl_cached = NULL; 13617200Swollman zp->z_vnode = NULL; 13717200Swollman zp->z_moved = 0; 13817200Swollman return (0); 13917200Swollman} 140121098Swollman 141121098Swollman/*ARGSUSED*/ 142121098Swollmanstatic void 143121098Swollmanzfs_znode_cache_destructor(void *buf, void *arg) 144121098Swollman{ 145121098Swollman znode_t *zp = buf; 146121098Swollman 147121098Swollman ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs)); 14817200Swollman ASSERT(ZTOV(zp) == NULL); 149121098Swollman vn_free(ZTOV(zp)); 150121098Swollman ASSERT(!list_link_active(&zp->z_link_node)); 151121098Swollman mutex_destroy(&zp->z_lock); 152121098Swollman rw_destroy(&zp->z_parent_lock); 153121098Swollman rw_destroy(&zp->z_name_lock); 154121098Swollman mutex_destroy(&zp->z_acl_lock); 155121098Swollman avl_destroy(&zp->z_range_avl); 156121098Swollman mutex_destroy(&zp->z_range_lock); 157121098Swollman 158121098Swollman ASSERT(zp->z_dirlocks == NULL); 15919878Swollman ASSERT(zp->z_acl_cached == NULL); 16019878Swollman} 16119878Swollman 16219878Swollman#ifdef ZNODE_STATS 16319878Swollmanstatic struct { 16419878Swollman uint64_t zms_zfsvfs_invalid; 16519878Swollman uint64_t zms_zfsvfs_recheck1; 1662742Swollman uint64_t zms_zfsvfs_unmounted; 1672742Swollman uint64_t zms_zfsvfs_recheck2; 1682742Swollman uint64_t zms_obj_held; 1692742Swollman uint64_t zms_vnode_locked; 1702742Swollman uint64_t zms_not_only_dnlc; 1712742Swollman} znode_move_stats; 1722742Swollman#endif /* ZNODE_STATS */ 1732742Swollman 1742742Swollman#ifdef sun 1752742Swollmanstatic void 17643014Swollmanzfs_znode_move_impl(znode_t *ozp, znode_t *nzp) 17743014Swollman{ 17843014Swollman vnode_t *vp; 17943014Swollman 18043014Swollman /* Copy fields. */ 18143014Swollman nzp->z_zfsvfs = ozp->z_zfsvfs; 18258787Sru 18358787Sru /* Swap vnodes. */ 18458787Sru vp = nzp->z_vnode; 18558787Sru nzp->z_vnode = ozp->z_vnode; 1862742Swollman ozp->z_vnode = vp; /* let destructor free the overwritten vnode */ 18775267Swollman ZTOV(ozp)->v_data = ozp; 18867578Swollman ZTOV(nzp)->v_data = nzp; 18967578Swollman 19067578Swollman nzp->z_id = ozp->z_id; 19167578Swollman ASSERT(ozp->z_dirlocks == NULL); /* znode not in use */ 19267578Swollman ASSERT(avl_numnodes(&ozp->z_range_avl) == 0); 19375267Swollman nzp->z_unlinked = ozp->z_unlinked; 19467578Swollman nzp->z_atime_dirty = ozp->z_atime_dirty; 19558787Sru nzp->z_zn_prefetch = ozp->z_zn_prefetch; 19667578Swollman nzp->z_blksz = ozp->z_blksz; 19767578Swollman nzp->z_seq = ozp->z_seq; 19867578Swollman nzp->z_mapcnt = ozp->z_mapcnt; 19967578Swollman nzp->z_gen = ozp->z_gen; 20067578Swollman nzp->z_sync_cnt = ozp->z_sync_cnt; 20167578Swollman nzp->z_is_sa = ozp->z_is_sa; 20267578Swollman nzp->z_sa_hdl = ozp->z_sa_hdl; 20367578Swollman bcopy(ozp->z_atime, nzp->z_atime, sizeof (uint64_t) * 2); 20467578Swollman nzp->z_links = ozp->z_links; 20567578Swollman nzp->z_size = ozp->z_size; 20667578Swollman nzp->z_pflags = ozp->z_pflags; 20758787Sru nzp->z_uid = ozp->z_uid; 20858787Sru nzp->z_gid = ozp->z_gid; 20958787Sru nzp->z_mode = ozp->z_mode; 21058787Sru 21158787Sru /* 21258787Sru * Since this is just an idle znode and kmem is already dealing with 21393799Swollman * memory pressure, release any cached ACL. 21458787Sru */ 21558787Sru if (ozp->z_acl_cached) { 21643014Swollman zfs_acl_free(ozp->z_acl_cached); 21743014Swollman ozp->z_acl_cached = NULL; 21843014Swollman } 21943014Swollman 22043014Swollman sa_set_userp(nzp->z_sa_hdl, nzp); 22143014Swollman 2222742Swollman /* 22386222Swollman * Invalidate the original znode by clearing fields that provide a 2242742Swollman * pointer back to the znode. Set the low bit of the vfs pointer to 22558787Sru * ensure that zfs_znode_move() recognizes the znode as invalid in any 2262742Swollman * subsequent callback. 22758787Sru */ 22830711Swollman ozp->z_sa_hdl = NULL; 2292742Swollman POINTER_INVALIDATE(&ozp->z_zfsvfs); 2302742Swollman 2312742Swollman /* 2322742Swollman * Mark the znode. 23330711Swollman */ 2342742Swollman nzp->z_moved = 1; 23517200Swollman ozp->z_moved = (uint8_t)-1; 23617200Swollman} 23717200Swollman 2382742Swollman/*ARGSUSED*/ 23975267Swollmanstatic kmem_cbrc_t 2402742Swollmanzfs_znode_move(void *buf, void *newbuf, size_t size, void *arg) 24130711Swollman{ 2422742Swollman znode_t *ozp = buf, *nzp = newbuf; 24358787Sru zfsvfs_t *zfsvfs; 24458787Sru vnode_t *vp; 24558787Sru 24658787Sru /* 24717200Swollman * The znode is on the file system's list of known znodes if the vfs 2482742Swollman * pointer is valid. We set the low bit of the vfs pointer when freeing 24917200Swollman * the znode to invalidate it, and the memory patterns written by kmem 2502742Swollman * (baddcafe and deadbeef) set at least one of the two low bits. A newly 25186222Swollman * created znode sets the vfs pointer last of all to indicate that the 25286222Swollman * znode is known and in a valid state to be moved by this function. 25386222Swollman */ 25486222Swollman zfsvfs = ozp->z_zfsvfs; 25586222Swollman if (!POINTER_IS_VALID(zfsvfs)) { 25686222Swollman ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_invalid); 2572742Swollman return (KMEM_CBRC_DONT_KNOW); 25858787Sru } 25958787Sru 26058787Sru /* 26158787Sru * Close a small window in which it's possible that the filesystem could 26258787Sru * be unmounted and freed, and zfsvfs, though valid in the previous 26358787Sru * statement, could point to unrelated memory by the time we try to 26458787Sru * prevent the filesystem from being unmounted. 26558787Sru */ 26658787Sru rw_enter(&zfsvfs_lock, RW_WRITER); 26758787Sru if (zfsvfs != ozp->z_zfsvfs) { 26858787Sru rw_exit(&zfsvfs_lock); 26958787Sru ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck1); 27058787Sru return (KMEM_CBRC_DONT_KNOW); 27158787Sru } 27217200Swollman 27317200Swollman /* 27417200Swollman * If the znode is still valid, then so is the file system. We know that 27517200Swollman * no valid file system can be freed while we hold zfsvfs_lock, so we 27617200Swollman * can safely ensure that the filesystem is not and will not be 27717200Swollman * unmounted. The next statement is equivalent to ZFS_ENTER(). 27817200Swollman */ 27917200Swollman rrm_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG); 2802742Swollman if (zfsvfs->z_unmounted) { 28143014Swollman ZFS_EXIT(zfsvfs); 2822742Swollman rw_exit(&zfsvfs_lock); 2832742Swollman ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_unmounted); 28443014Swollman return (KMEM_CBRC_DONT_KNOW); 2852742Swollman } 2862742Swollman rw_exit(&zfsvfs_lock); 28743014Swollman 2882742Swollman mutex_enter(&zfsvfs->z_znodes_lock); 2892742Swollman /* 29043014Swollman * Recheck the vfs pointer in case the znode was removed just before 2912742Swollman * acquiring the lock. 2922742Swollman */ 29343014Swollman if (zfsvfs != ozp->z_zfsvfs) { 2942742Swollman mutex_exit(&zfsvfs->z_znodes_lock); 29543014Swollman ZFS_EXIT(zfsvfs); 2962742Swollman ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck2); 29743014Swollman return (KMEM_CBRC_DONT_KNOW); 2982742Swollman } 2992742Swollman 30043014Swollman /* 3012742Swollman * At this point we know that as long as we hold z_znodes_lock, the 30258787Sru * znode cannot be freed and fields within the znode can be safely 30343014Swollman * accessed. Now, prevent a race with zfs_zget(). 3042742Swollman */ 3052742Swollman if (ZFS_OBJ_HOLD_TRYENTER(zfsvfs, ozp->z_id) == 0) { 30643014Swollman mutex_exit(&zfsvfs->z_znodes_lock); 3072742Swollman ZFS_EXIT(zfsvfs); 30843014Swollman ZNODE_STAT_ADD(znode_move_stats.zms_obj_held); 3092742Swollman return (KMEM_CBRC_LATER); 31043014Swollman } 3112742Swollman 31243014Swollman vp = ZTOV(ozp); 3132742Swollman if (mutex_tryenter(&vp->v_lock) == 0) { 31443014Swollman ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); 3152742Swollman mutex_exit(&zfsvfs->z_znodes_lock); 31643014Swollman ZFS_EXIT(zfsvfs); 3172742Swollman ZNODE_STAT_ADD(znode_move_stats.zms_vnode_locked); 31843014Swollman return (KMEM_CBRC_LATER); 3192742Swollman } 32043014Swollman 32143014Swollman /* Only move znodes that are referenced _only_ by the DNLC. */ 32243014Swollman if (vp->v_count != 1 || !vn_in_dnlc(vp)) { 32343014Swollman mutex_exit(&vp->v_lock); 32443014Swollman ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); 32519878Swollman mutex_exit(&zfsvfs->z_znodes_lock); 3262742Swollman ZFS_EXIT(zfsvfs); 32743014Swollman ZNODE_STAT_ADD(znode_move_stats.zms_not_only_dnlc); 32819878Swollman return (KMEM_CBRC_LATER); 32943014Swollman } 3302742Swollman 33143014Swollman /* 33243014Swollman * The znode is known and in a valid state to move. We're holding the 33343014Swollman * locks needed to execute the critical section. 33443014Swollman */ 33543014Swollman zfs_znode_move_impl(ozp, nzp); 33643014Swollman mutex_exit(&vp->v_lock); 33743014Swollman ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); 3382742Swollman 33919878Swollman list_link_replace(&ozp->z_link_node, &nzp->z_link_node); 3402742Swollman mutex_exit(&zfsvfs->z_znodes_lock); 3412742Swollman ZFS_EXIT(zfsvfs); 34243014Swollman 3432742Swollman return (KMEM_CBRC_YES); 34443014Swollman} 34543014Swollman#endif /* sun */ 3462742Swollman 34743014Swollmanvoid 34843014Swollmanzfs_znode_init(void) 34943014Swollman{ 35043014Swollman /* 35143014Swollman * Initialize zcache 3522742Swollman */ 35343014Swollman rw_init(&zfsvfs_lock, NULL, RW_DEFAULT, NULL); 35443014Swollman ASSERT(znode_cache == NULL); 35543014Swollman znode_cache = kmem_cache_create("zfs_znode_cache", 35643014Swollman sizeof (znode_t), 0, zfs_znode_cache_constructor, 3572742Swollman zfs_znode_cache_destructor, NULL, NULL, NULL, 0); 35843014Swollman kmem_cache_set_move(znode_cache, zfs_znode_move); 3592742Swollman} 36043014Swollman 36143014Swollmanvoid 36243014Swollmanzfs_znode_fini(void) 36343014Swollman{ 3642742Swollman#ifdef sun 36543014Swollman /* 36643014Swollman * Cleanup vfs & vnode ops 36743014Swollman */ 36843014Swollman zfs_remove_op_tables(); 3692742Swollman#endif /* sun */ 37043014Swollman 37117200Swollman /* 37243014Swollman * Cleanup zcache 37343014Swollman */ 37443014Swollman if (znode_cache) 3752742Swollman kmem_cache_destroy(znode_cache); 3762742Swollman znode_cache = NULL; 37743014Swollman rw_destroy(&zfsvfs_lock); 37843014Swollman} 37943014Swollman 38043014Swollman#ifdef sun 3819908Swollmanstruct vnodeops *zfs_dvnodeops; 3829908Swollmanstruct vnodeops *zfs_fvnodeops; 38343014Swollmanstruct vnodeops *zfs_symvnodeops; 38443014Swollmanstruct vnodeops *zfs_xdvnodeops; 38543014Swollmanstruct vnodeops *zfs_evnodeops; 3869908Swollmanstruct vnodeops *zfs_sharevnodeops; 38743014Swollman 38817200Swollmanvoid 3892742Swollmanzfs_remove_op_tables() 3902742Swollman{ 39158787Sru /* 39217200Swollman * Remove vfs ops 39317200Swollman */ 3949908Swollman ASSERT(zfsfstype); 39517200Swollman (void) vfs_freevfsops_by_type(zfsfstype); 3962742Swollman zfsfstype = 0; 39786222Swollman 39843014Swollman /* 39917200Swollman * Remove vnode ops 40017200Swollman */ 4019908Swollman if (zfs_dvnodeops) 40217200Swollman vn_freevnodeops(zfs_dvnodeops); 40386222Swollman if (zfs_fvnodeops) 40486222Swollman vn_freevnodeops(zfs_fvnodeops); 40543014Swollman if (zfs_symvnodeops) 40619878Swollman vn_freevnodeops(zfs_symvnodeops); 40717200Swollman if (zfs_xdvnodeops) 40817200Swollman vn_freevnodeops(zfs_xdvnodeops); 4092742Swollman if (zfs_evnodeops) 41017200Swollman vn_freevnodeops(zfs_evnodeops); 4112742Swollman if (zfs_sharevnodeops) 41217200Swollman vn_freevnodeops(zfs_sharevnodeops); 41317200Swollman 41417200Swollman zfs_dvnodeops = NULL; 41517200Swollman zfs_fvnodeops = NULL; 4162742Swollman zfs_symvnodeops = NULL; 4172742Swollman zfs_xdvnodeops = NULL; 4182742Swollman zfs_evnodeops = NULL; 4192742Swollman zfs_sharevnodeops = NULL; 4202742Swollman} 42117200Swollman 42217200Swollmanextern const fs_operation_def_t zfs_dvnodeops_template[]; 4239908Swollmanextern const fs_operation_def_t zfs_fvnodeops_template[]; 4249908Swollmanextern const fs_operation_def_t zfs_xdvnodeops_template[]; 42519878Swollmanextern const fs_operation_def_t zfs_symvnodeops_template[]; 42617200Swollmanextern const fs_operation_def_t zfs_evnodeops_template[]; 42717200Swollmanextern const fs_operation_def_t zfs_sharevnodeops_template[]; 42817200Swollman 42919878Swollmanint 43017200Swollmanzfs_create_op_tables() 43175267Swollman{ 43275267Swollman int error; 43375267Swollman 43475267Swollman /* 43575267Swollman * zfs_dvnodeops can be set if mod_remove() calls mod_installfs() 4369908Swollman * due to a failure to remove the the 2nd modlinkage (zfs_modldrv). 43717200Swollman * In this case we just return as the ops vectors are already set up. 43819878Swollman */ 4392742Swollman if (zfs_dvnodeops) 4402742Swollman return (0); 4419908Swollman 44219878Swollman error = vn_make_ops(MNTTYPE_ZFS, zfs_dvnodeops_template, 4439908Swollman &zfs_dvnodeops); 4442742Swollman if (error) 44519878Swollman return (error); 44619878Swollman 44719878Swollman error = vn_make_ops(MNTTYPE_ZFS, zfs_fvnodeops_template, 44819878Swollman &zfs_fvnodeops); 44919878Swollman if (error) 45019878Swollman return (error); 45119878Swollman 45219878Swollman error = vn_make_ops(MNTTYPE_ZFS, zfs_symvnodeops_template, 45319878Swollman &zfs_symvnodeops); 45419878Swollman if (error) 45519878Swollman return (error); 4562742Swollman 45719878Swollman error = vn_make_ops(MNTTYPE_ZFS, zfs_xdvnodeops_template, 45819878Swollman &zfs_xdvnodeops); 45919878Swollman if (error) 46019878Swollman return (error); 46119878Swollman 46219878Swollman error = vn_make_ops(MNTTYPE_ZFS, zfs_evnodeops_template, 46319878Swollman &zfs_evnodeops); 4642742Swollman if (error) 46520094Swollman return (error); 46620094Swollman 46720094Swollman error = vn_make_ops(MNTTYPE_ZFS, zfs_sharevnodeops_template, 46820094Swollman &zfs_sharevnodeops); 46920094Swollman 47020094Swollman return (error); 47120094Swollman} 47220094Swollman#endif /* sun */ 4732742Swollman 47420094Swollmanint 47520094Swollmanzfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx) 47620094Swollman{ 47758787Sru zfs_acl_ids_t acl_ids; 47820094Swollman vattr_t vattr; 47920094Swollman znode_t *sharezp; 48020094Swollman znode_t *zp; 48120094Swollman int error; 48243014Swollman 48320094Swollman vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE; 48420094Swollman vattr.va_type = VDIR; 48543014Swollman vattr.va_mode = S_IFDIR|0555; 48620094Swollman vattr.va_uid = crgetuid(kcred); 48720094Swollman vattr.va_gid = crgetgid(kcred); 48843014Swollman 48943014Swollman sharezp = kmem_cache_alloc(znode_cache, KM_SLEEP); 49020094Swollman ASSERT(!POINTER_IS_VALID(sharezp->z_zfsvfs)); 49120094Swollman sharezp->z_moved = 0; 49243014Swollman sharezp->z_unlinked = 0; 49320094Swollman sharezp->z_atime_dirty = 0; 49420094Swollman sharezp->z_zfsvfs = zfsvfs; 49520094Swollman sharezp->z_is_sa = zfsvfs->z_use_sa; 49620094Swollman 49720094Swollman VERIFY(0 == zfs_acl_ids_create(sharezp, IS_ROOT_NODE, &vattr, 4982742Swollman kcred, NULL, &acl_ids)); 4992742Swollman zfs_mknode(sharezp, &vattr, tx, kcred, IS_ROOT_NODE, &zp, &acl_ids); 5002742Swollman ASSERT3P(zp, ==, sharezp); 5012742Swollman POINTER_INVALIDATE(&sharezp->z_zfsvfs); 50219878Swollman error = zap_add(zfsvfs->z_os, MASTER_NODE_OBJ, 50319878Swollman ZFS_SHARES_DIR, 8, 1, &sharezp->z_id, tx); 50419878Swollman zfsvfs->z_shares_dir = sharezp->z_id; 50519878Swollman 5062742Swollman zfs_acl_ids_free(&acl_ids); 50719878Swollman sa_handle_destroy(sharezp->z_sa_hdl); 50819878Swollman kmem_cache_free(znode_cache, sharezp); 5092742Swollman 51019878Swollman return (error); 51119878Swollman} 51219878Swollman 51319878Swollman/* 51419878Swollman * define a couple of values we need available 51519878Swollman * for both 64 and 32 bit environments. 51619878Swollman */ 51719878Swollman#ifndef NBITSMINOR64 51819878Swollman#define NBITSMINOR64 32 51919878Swollman#endif 52019878Swollman#ifndef MAXMAJ64 52119878Swollman#define MAXMAJ64 0xffffffffUL 52219878Swollman#endif 52319878Swollman#ifndef MAXMIN64 52419878Swollman#define MAXMIN64 0xffffffffUL 52519878Swollman#endif 52619878Swollman 52719878Swollman/* 52819878Swollman * Create special expldev for ZFS private use. 52919878Swollman * Can't use standard expldev since it doesn't do 53019878Swollman * what we want. The standard expldev() takes a 53119878Swollman * dev32_t in LP64 and expands it to a long dev_t. 5322742Swollman * We need an interface that takes a dev32_t in ILP32 53319878Swollman * and expands it to a long dev_t. 5342742Swollman */ 5352742Swollmanstatic uint64_t 53619878Swollmanzfs_expldev(dev_t dev) 5372742Swollman{ 53819878Swollman return (((uint64_t)major(dev) << NBITSMINOR64) | minor(dev)); 5392742Swollman} 54019878Swollman/* 5412742Swollman * Special cmpldev for ZFS private use. 54219878Swollman * Can't use standard cmpldev since it takes 5432742Swollman * a long dev_t and compresses it to dev32_t in 54419878Swollman * LP64. We need to do a compaction of a long dev_t 5452742Swollman * to a dev32_t in ILP32. 54619878Swollman */ 5472742Swollmandev_t 54819878Swollmanzfs_cmpldev(uint64_t dev) 5492742Swollman{ 55019878Swollman return (makedev((dev >> NBITSMINOR64), (dev & MAXMIN64))); 5512742Swollman} 55219878Swollman 5532742Swollmanstatic void 55419878Swollmanzfs_znode_sa_init(zfsvfs_t *zfsvfs, znode_t *zp, 5552742Swollman dmu_buf_t *db, dmu_object_type_t obj_type, sa_handle_t *sa_hdl) 55619878Swollman{ 5572742Swollman ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs) || (zfsvfs == zp->z_zfsvfs)); 55819878Swollman ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs, zp->z_id))); 5592742Swollman 56019878Swollman mutex_enter(&zp->z_lock); 5612742Swollman 5622742Swollman ASSERT(zp->z_sa_hdl == NULL); 56319878Swollman ASSERT(zp->z_acl_cached == NULL); 56430711Swollman if (sa_hdl == NULL) { 56520094Swollman VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, zp, 5662742Swollman SA_HDL_SHARED, &zp->z_sa_hdl)); 5672742Swollman } else { 5682742Swollman zp->z_sa_hdl = sa_hdl; 5692742Swollman sa_set_userp(sa_hdl, zp); 5702742Swollman } 57119878Swollman 57219878Swollman zp->z_is_sa = (obj_type == DMU_OT_SA) ? B_TRUE : B_FALSE; 5732742Swollman 5742742Swollman /* 575114173Swollman * Slap on VROOT if we are the root znode 576114173Swollman */ 577114173Swollman if (zp->z_id == zfsvfs->z_root) 578114173Swollman ZTOV(zp)->v_flag |= VROOT; 579114173Swollman 580114173Swollman mutex_exit(&zp->z_lock); 581114173Swollman vn_exists(ZTOV(zp)); 582114173Swollman} 5832742Swollman 58419878Swollmanvoid 5852742Swollmanzfs_znode_dmu_fini(znode_t *zp) 58619878Swollman{ 5872742Swollman ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zp->z_zfsvfs, zp->z_id)) || 58819878Swollman zp->z_unlinked || 58919878Swollman RW_WRITE_HELD(&zp->z_zfsvfs->z_teardown_inactive_lock)); 590114173Swollman 591114173Swollman sa_handle_destroy(zp->z_sa_hdl); 5922742Swollman zp->z_sa_hdl = NULL; 5932742Swollman} 594114173Swollman 595114173Swollmanstatic void 596114173Swollmanzfs_vnode_forget(vnode_t *vp) 597114173Swollman{ 598114173Swollman 59919878Swollman /* copied from insmntque_stddtr */ 60019878Swollman vp->v_data = NULL; 6012742Swollman vp->v_op = &dead_vnodeops; 6022742Swollman vgone(vp); 6032742Swollman vput(vp); 6042742Swollman} 60543014Swollman 6062742Swollman/* 60743014Swollman * Construct a new znode/vnode and intialize. 60843014Swollman * 60958787Sru * This does not do a call to dmu_set_user() that is 61058787Sru * up to the caller to do, in case you don't want to 61119878Swollman * return the znode 61258787Sru */ 61358787Srustatic znode_t * 61420094Swollmanzfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, 6152742Swollman dmu_object_type_t obj_type, sa_handle_t *hdl) 6162742Swollman{ 61730711Swollman znode_t *zp; 61830711Swollman vnode_t *vp; 61930711Swollman uint64_t mode; 62030711Swollman uint64_t parent; 62130711Swollman sa_bulk_attr_t bulk[9]; 62230711Swollman int count = 0; 62330711Swollman int error; 62430711Swollman 62530711Swollman zp = kmem_cache_alloc(znode_cache, KM_SLEEP); 62630711Swollman 62730711Swollman KASSERT(curthread->td_vp_reserv > 0, 62830711Swollman ("zfs_znode_alloc: getnewvnode without any vnodes reserved")); 62975267Swollman error = getnewvnode("zfs", zfsvfs->z_parent->z_vfs, &zfs_vnodeops, &vp); 6302742Swollman if (error != 0) { 63130711Swollman kmem_cache_free(znode_cache, zp); 63230711Swollman return (NULL); 63319878Swollman } 63430711Swollman zp->z_vnode = vp; 6352742Swollman vp->v_data = zp; 63630711Swollman 6372742Swollman ASSERT(zp->z_dirlocks == NULL); 63819878Swollman ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs)); 63930711Swollman zp->z_moved = 0; 64019878Swollman 64119878Swollman /* 64219878Swollman * Defer setting z_zfsvfs until the znode is ready to be a candidate for 64343543Swollman * the zfs_znode_move() callback. 64443543Swollman */ 64543543Swollman zp->z_sa_hdl = NULL; 64643543Swollman zp->z_unlinked = 0; 64719878Swollman zp->z_atime_dirty = 0; 64819878Swollman zp->z_mapcnt = 0; 64919878Swollman zp->z_id = db->db_object; 65030711Swollman zp->z_blksz = blksz; 65119878Swollman zp->z_seq = 0x7A4653; 65219878Swollman zp->z_sync_cnt = 0; 65319878Swollman 65430711Swollman vp = ZTOV(zp); 65519878Swollman 65619878Swollman zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl); 65719878Swollman 65819878Swollman SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8); 65919878Swollman SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL, &zp->z_gen, 8); 66019878Swollman SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 66119878Swollman &zp->z_size, 8); 6622742Swollman SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, 66330711Swollman &zp->z_links, 8); 66430711Swollman SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 66519878Swollman &zp->z_pflags, 8); 6662742Swollman SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, &parent, 8); 66719878Swollman SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, 6682742Swollman &zp->z_atime, 16); 6692742Swollman SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, 67030711Swollman &zp->z_uid, 8); 67130711Swollman SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, 67230711Swollman &zp->z_gid, 8); 67330711Swollman 67430711Swollman if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count) != 0 || zp->z_gen == 0) { 67530711Swollman if (hdl == NULL) 67630711Swollman sa_handle_destroy(zp->z_sa_hdl); 67719878Swollman zfs_vnode_forget(vp); 67819878Swollman zp->z_vnode = NULL; 6792742Swollman kmem_cache_free(znode_cache, zp); 6802742Swollman return (NULL); 681114173Swollman } 6822742Swollman 6832742Swollman zp->z_mode = mode; 68458787Sru 68558787Sru vp->v_type = IFTOVT((mode_t)mode); 68658787Sru 68758787Sru switch (vp->v_type) { 68858787Sru case VDIR: 68958787Sru zp->z_zn_prefetch = B_TRUE; /* z_prefetch default is enabled */ 6902742Swollman break; 69119878Swollman#ifdef sun 6922742Swollman case VBLK: 69319878Swollman case VCHR: 6942742Swollman { 6952742Swollman uint64_t rdev; 6962742Swollman VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_RDEV(zfsvfs), 6972742Swollman &rdev, sizeof (rdev)) == 0); 69819878Swollman 6992742Swollman vp->v_rdev = zfs_cmpldev(rdev); 70019878Swollman } 7012742Swollman break; 70219878Swollman#endif /* sun */ 70320094Swollman case VFIFO: 70458787Sru#ifdef sun 70558787Sru case VSOCK: 7062742Swollman case VDOOR: 7072742Swollman#endif /* sun */ 708114173Swollman vp->v_op = &zfs_fifoops; 7092742Swollman break; 7102742Swollman case VREG: 7112742Swollman if (parent == zfsvfs->z_shares_dir) { 71219878Swollman ASSERT(zp->z_uid == 0 && zp->z_gid == 0); 7132742Swollman vp->v_op = &zfs_shareops; 71419878Swollman } 7152742Swollman break; 71619878Swollman#ifdef sun 71719878Swollman case VLNK: 71819878Swollman vn_setops(vp, zfs_symvnodeops); 7192742Swollman break; 7202742Swollman default: 72119878Swollman vn_setops(vp, zfs_evnodeops); 72219878Swollman break; 72319878Swollman#endif /* sun */ 72419878Swollman } 7252742Swollman 72643014Swollman mutex_enter(&zfsvfs->z_znodes_lock); 7272742Swollman list_insert_tail(&zfsvfs->z_all_znodes, zp); 72819878Swollman membar_producer(); 7292742Swollman /* 73019878Swollman * Everything else must be valid before assigning z_zfsvfs makes the 73119878Swollman * znode eligible for zfs_znode_move(). 7322742Swollman */ 73319878Swollman zp->z_zfsvfs = zfsvfs; 7342742Swollman mutex_exit(&zfsvfs->z_znodes_lock); 73519878Swollman 7362742Swollman /* 73719878Swollman * Acquire vnode lock before making it available to the world. 7382742Swollman */ 7392742Swollman vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 7402742Swollman VN_LOCK_AREC(vp); 7412742Swollman if (vp->v_type != VFIFO) 7422742Swollman VN_LOCK_ASHARE(vp); 7432742Swollman 7442742Swollman VFS_HOLD(zfsvfs->z_vfs); 74519878Swollman return (zp); 74619878Swollman} 74719878Swollman 74819878Swollmanstatic uint64_t empty_xattr; 74919878Swollmanstatic uint64_t pad[4]; 7502742Swollmanstatic zfs_acl_phys_t acl_phys; 7519908Swollman/* 75219878Swollman * Create a new DMU object to hold a zfs znode. 75320094Swollman * 75420094Swollman * IN: dzp - parent directory for new znode 75520094Swollman * vap - file attributes for new znode 75620094Swollman * tx - dmu transaction id for zap operations 75720094Swollman * cr - credentials of caller 75820094Swollman * flag - flags: 75920094Swollman * IS_ROOT_NODE - new object will be root 76020094Swollman * IS_XATTR - new object is an attribute 76186222Swollman * bonuslen - length of bonus buffer 76286222Swollman * setaclp - File/Dir initial ACL 76386222Swollman * fuidp - Tracks fuid allocation. 76420094Swollman * 76586222Swollman * OUT: zpp - allocated znode 76686222Swollman * 76786222Swollman */ 76886222Swollmanvoid 76986222Swollmanzfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, 77086222Swollman uint_t flag, znode_t **zpp, zfs_acl_ids_t *acl_ids) 77186222Swollman{ 77286222Swollman uint64_t crtime[2], atime[2], mtime[2], ctime[2]; 77386222Swollman uint64_t mode, size, links, parent, pflags; 77486222Swollman uint64_t dzp_pflags = 0; 77586222Swollman uint64_t rdev = 0; 77693799Swollman zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 777114173Swollman dmu_buf_t *db; 77893799Swollman timestruc_t now; 77993799Swollman uint64_t gen, obj; 78093799Swollman int err; 78193799Swollman int bonuslen; 78293799Swollman sa_handle_t *sa_hdl; 78393799Swollman dmu_object_type_t obj_type; 78493799Swollman sa_bulk_attr_t sa_attrs[ZPL_END]; 78593799Swollman int cnt = 0; 78693799Swollman zfs_acl_locator_cb_t locate = { 0 }; 78793799Swollman 78893799Swollman ASSERT(vap && (vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE)); 78993799Swollman 79093799Swollman if (zfsvfs->z_replay) { 79193799Swollman obj = vap->va_nodeid; 79293799Swollman now = vap->va_ctime; /* see zfs_replay_create() */ 79393799Swollman gen = vap->va_nblocks; /* ditto */ 79493799Swollman } else { 79593799Swollman obj = 0; 79693799Swollman gethrestime(&now); 79793799Swollman gen = dmu_tx_get_txg(tx); 79893799Swollman } 79993799Swollman 80093799Swollman obj_type = zfsvfs->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE; 80193799Swollman bonuslen = (obj_type == DMU_OT_SA) ? 80293799Swollman DN_MAX_BONUSLEN : ZFS_OLD_ZNODE_PHYS_SIZE; 80393799Swollman 80493799Swollman /* 80593799Swollman * Create a new DMU object. 80693799Swollman */ 80793799Swollman /* 80893799Swollman * There's currently no mechanism for pre-reading the blocks that will 80993799Swollman * be needed to allocate a new object, so we accept the small chance 81093799Swollman * that there will be an i/o error and we will fail one of the 81193799Swollman * assertions below. 81293799Swollman */ 81386222Swollman if (vap->va_type == VDIR) { 81486222Swollman if (zfsvfs->z_replay) { 81593799Swollman VERIFY0(zap_create_claim_norm(zfsvfs->z_os, obj, 81693799Swollman zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS, 81786222Swollman obj_type, bonuslen, tx)); 81820094Swollman } else { 81993799Swollman obj = zap_create_norm(zfsvfs->z_os, 82093799Swollman zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS, 82193799Swollman obj_type, bonuslen, tx); 82293799Swollman } 82320094Swollman } else { 82420094Swollman if (zfsvfs->z_replay) { 82586222Swollman VERIFY0(dmu_object_claim(zfsvfs->z_os, obj, 82686222Swollman DMU_OT_PLAIN_FILE_CONTENTS, 0, 82786222Swollman obj_type, bonuslen, tx)); 82886222Swollman } else { 82967578Swollman obj = dmu_object_alloc(zfsvfs->z_os, 83019878Swollman DMU_OT_PLAIN_FILE_CONTENTS, 0, 83119878Swollman obj_type, bonuslen, tx); 83220094Swollman } 83320094Swollman } 8342742Swollman 83520094Swollman ZFS_OBJ_HOLD_ENTER(zfsvfs, obj); 83667578Swollman VERIFY(0 == sa_buf_hold(zfsvfs->z_os, obj, NULL, &db)); 83720094Swollman 8382742Swollman /* 8392742Swollman * If this is the root, fix up the half-initialized parent pointer 8409908Swollman * to reference the just-allocated physical data area. 8419908Swollman */ 8429908Swollman if (flag & IS_ROOT_NODE) { 8439908Swollman dzp->z_id = obj; 84420094Swollman } else { 84520094Swollman dzp_pflags = dzp->z_pflags; 84620094Swollman } 84720094Swollman 84820094Swollman /* 84920094Swollman * If parent is an xattr, so am I. 85020094Swollman */ 85120094Swollman if (dzp_pflags & ZFS_XATTR) { 85220094Swollman flag |= IS_XATTR; 85320094Swollman } 85443543Swollman 85543543Swollman if (zfsvfs->z_use_fuids) 85643543Swollman pflags = ZFS_ARCHIVE | ZFS_AV_MODIFIED; 85743543Swollman else 85843543Swollman pflags = 0; 85943543Swollman 86043543Swollman if (vap->va_type == VDIR) { 86143543Swollman size = 2; /* contents ("." and "..") */ 86243543Swollman links = (flag & (IS_ROOT_NODE | IS_XATTR)) ? 2 : 1; 86343543Swollman } else { 86443543Swollman size = links = 0; 86543543Swollman } 86658787Sru 86758787Sru if (vap->va_type == VBLK || vap->va_type == VCHR) { 86858787Sru rdev = zfs_expldev(vap->va_rdev); 86958787Sru } 87058787Sru 87158787Sru parent = dzp->z_id; 87258787Sru mode = acl_ids->z_mode; 87375267Swollman if (flag & IS_XATTR) 87458787Sru pflags |= ZFS_XATTR; 87558787Sru 87658787Sru /* 87758787Sru * No execs denied will be deterimed when zfs_mode_compute() is called. 87858787Sru */ 87958787Sru pflags |= acl_ids->z_aclp->z_hints & 88093799Swollman (ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|ZFS_ACL_AUTO_INHERIT| 88193799Swollman ZFS_ACL_DEFAULTED|ZFS_ACL_PROTECTED); 88293799Swollman 88393799Swollman ZFS_TIME_ENCODE(&now, crtime); 88493799Swollman ZFS_TIME_ENCODE(&now, ctime); 88593799Swollman 88693799Swollman if (vap->va_mask & AT_ATIME) { 8872742Swollman ZFS_TIME_ENCODE(&vap->va_atime, atime); 8882742Swollman } else { 88919878Swollman ZFS_TIME_ENCODE(&now, atime); 89019878Swollman } 89119878Swollman 8922742Swollman if (vap->va_mask & AT_MTIME) { 89320094Swollman ZFS_TIME_ENCODE(&vap->va_mtime, mtime); 89419878Swollman } else { 89520094Swollman ZFS_TIME_ENCODE(&now, mtime); 89619878Swollman } 89743543Swollman 89858787Sru /* Now add in all of the "SA" attributes */ 89993799Swollman VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, NULL, SA_HDL_SHARED, 90093799Swollman &sa_hdl)); 9012742Swollman 9022742Swollman /* 9039908Swollman * Setup the array of attributes to be replaced/set on the new file 9049908Swollman * 9059908Swollman * order for DMU_OT_ZNODE is critical since it needs to be constructed 9069908Swollman * in the old znode_phys_t format. Don't change this ordering 9079908Swollman */ 9089908Swollman 9099908Swollman if (obj_type == DMU_OT_ZNODE) { 9109908Swollman SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs), 9119908Swollman NULL, &atime, 16); 9122742Swollman SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs), 91319878Swollman NULL, &mtime, 16); 9142742Swollman SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs), 9152742Swollman NULL, &ctime, 16); 9162742Swollman SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs), 91719878Swollman NULL, &crtime, 16); 91819878Swollman SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs), 91919878Swollman NULL, &gen, 8); 9202742Swollman SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs), 9212742Swollman NULL, &mode, 8); 92275267Swollman SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs), 92375267Swollman NULL, &size, 8); 92419878Swollman SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs), 92575267Swollman NULL, &parent, 8); 92675267Swollman } else { 92775267Swollman SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs), 92875267Swollman NULL, &mode, 8); 92975267Swollman SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs), 93075267Swollman NULL, &size, 8); 93175267Swollman SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs), 93275267Swollman NULL, &gen, 8); 93375267Swollman SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), NULL, 93475267Swollman &acl_ids->z_fuid, 8); 93575267Swollman SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), NULL, 9362742Swollman &acl_ids->z_fgid, 8); 9372742Swollman SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs), 93819878Swollman NULL, &parent, 8); 93943014Swollman SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs), 94019878Swollman NULL, &pflags, 8); 94119878Swollman SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs), 94219878Swollman NULL, &atime, 16); 94319878Swollman SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs), 9442742Swollman NULL, &mtime, 16); 94519878Swollman SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs), 9462742Swollman NULL, &ctime, 16); 94719878Swollman SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs), 94843543Swollman NULL, &crtime, 16); 94943543Swollman } 95043543Swollman 9512742Swollman SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_LINKS(zfsvfs), NULL, &links, 8); 95219878Swollman 95319878Swollman if (obj_type == DMU_OT_ZNODE) { 95419878Swollman SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_XATTR(zfsvfs), NULL, 95519878Swollman &empty_xattr, 8); 95619878Swollman } 95719878Swollman if (obj_type == DMU_OT_ZNODE || 95819878Swollman (vap->va_type == VBLK || vap->va_type == VCHR)) { 95919878Swollman SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zfsvfs), 96019878Swollman NULL, &rdev, 8); 96119878Swollman 96219878Swollman } 96319878Swollman if (obj_type == DMU_OT_ZNODE) { 96419878Swollman SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs), 96519878Swollman NULL, &pflags, 8); 96619878Swollman SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), NULL, 96719878Swollman &acl_ids->z_fuid, 8); 96819878Swollman SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), NULL, 9692742Swollman &acl_ids->z_fgid, 8); 97019878Swollman SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PAD(zfsvfs), NULL, pad, 97175267Swollman sizeof (uint64_t) * 4); 97275267Swollman SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ZNODE_ACL(zfsvfs), NULL, 97375267Swollman &acl_phys, sizeof (zfs_acl_phys_t)); 97475267Swollman } else if (acl_ids->z_aclp->z_version >= ZFS_ACL_VERSION_FUID) { 97575267Swollman SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_COUNT(zfsvfs), NULL, 97675267Swollman &acl_ids->z_aclp->z_acl_count, 8); 97775267Swollman locate.cb_aclp = acl_ids->z_aclp; 97875267Swollman SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_ACES(zfsvfs), 97943014Swollman zfs_acl_data_locator, &locate, 98043014Swollman acl_ids->z_aclp->z_acl_bytes); 98143014Swollman mode = zfs_mode_compute(mode, acl_ids->z_aclp, &pflags, 98243014Swollman acl_ids->z_fuid, acl_ids->z_fgid); 98343014Swollman } 98443014Swollman 98543014Swollman VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0); 98619878Swollman 98743014Swollman if (!(flag & IS_ROOT_NODE)) { 98843014Swollman *zpp = zfs_znode_alloc(zfsvfs, db, 0, obj_type, sa_hdl); 98943014Swollman ASSERT(*zpp != NULL); 99019878Swollman } else { 99143014Swollman /* 9922742Swollman * If we are creating the root node, the "parent" we 99343014Swollman * passed in is the znode for the root. 99443014Swollman */ 99543014Swollman *zpp = dzp; 99643014Swollman 99714343Swollman (*zpp)->z_sa_hdl = sa_hdl; 99814343Swollman } 99914343Swollman 100014343Swollman (*zpp)->z_pflags = pflags; 10012742Swollman (*zpp)->z_mode = mode; 100243014Swollman 100375267Swollman if (vap->va_mask & AT_XVATTR) 100475267Swollman zfs_xvattr_set(*zpp, (xvattr_t *)vap, tx); 100543014Swollman 100675267Swollman if (obj_type == DMU_OT_ZNODE || 100775267Swollman acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) { 100819878Swollman VERIFY0(zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx)); 100943014Swollman } 101019878Swollman if (!(flag & IS_ROOT_NODE)) { 101119878Swollman vnode_t *vp; 10122742Swollman 101319878Swollman vp = ZTOV(*zpp); 10148049Swollman vp->v_vflag |= VV_FORCEINSMQ; 101543014Swollman err = insmntque(vp, zfsvfs->z_vfs); 101643014Swollman vp->v_vflag &= ~VV_FORCEINSMQ; 101743014Swollman KASSERT(err == 0, ("insmntque() failed: error %d", err)); 1018114173Swollman } 101943014Swollman ZFS_OBJ_HOLD_EXIT(zfsvfs, obj); 1020114173Swollman} 1021114173Swollman 1022114173Swollman/* 1023114173Swollman * Update in-core attributes. It is assumed the caller will be doing an 1024114173Swollman * sa_bulk_update to push the changes out. 1025114173Swollman */ 1026114173Swollmanvoid 1027114173Swollmanzfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx) 1028114173Swollman{ 1029114173Swollman xoptattr_t *xoap; 1030114173Swollman 1031114173Swollman xoap = xva_getxoptattr(xvap); 10322742Swollman ASSERT(xoap); 103319878Swollman 1034114173Swollman if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { 1035114173Swollman uint64_t times[2]; 10362742Swollman ZFS_TIME_ENCODE(&xoap->xoa_createtime, times); 103719878Swollman (void) sa_update(zp->z_sa_hdl, SA_ZPL_CRTIME(zp->z_zfsvfs), 103843014Swollman ×, sizeof (times), tx); 103943014Swollman XVA_SET_RTN(xvap, XAT_CREATETIME); 104019878Swollman } 104143014Swollman if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 104219878Swollman ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly, 104319878Swollman zp->z_pflags, tx); 104419878Swollman XVA_SET_RTN(xvap, XAT_READONLY); 10452742Swollman } 10462742Swollman if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 104719878Swollman ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden, 104819878Swollman zp->z_pflags, tx); 104919878Swollman XVA_SET_RTN(xvap, XAT_HIDDEN); 10502742Swollman } 10512742Swollman if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 10522742Swollman ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system, 10532742Swollman zp->z_pflags, tx); 10542742Swollman XVA_SET_RTN(xvap, XAT_SYSTEM); 105519878Swollman } 105619878Swollman if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 10572742Swollman ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive, 10582742Swollman zp->z_pflags, tx); 10592742Swollman XVA_SET_RTN(xvap, XAT_ARCHIVE); 10602742Swollman } 106119878Swollman if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 10622742Swollman ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable, 10632742Swollman zp->z_pflags, tx); 106419878Swollman XVA_SET_RTN(xvap, XAT_IMMUTABLE); 10652742Swollman } 10662742Swollman if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 106719878Swollman ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink, 10682742Swollman zp->z_pflags, tx); 10692742Swollman XVA_SET_RTN(xvap, XAT_NOUNLINK); 107019878Swollman } 10712742Swollman if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 107219878Swollman ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly, 10732742Swollman zp->z_pflags, tx); 107419878Swollman XVA_SET_RTN(xvap, XAT_APPENDONLY); 10752742Swollman } 107619878Swollman if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 10772742Swollman ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump, 10782742Swollman zp->z_pflags, tx); 107919878Swollman XVA_SET_RTN(xvap, XAT_NODUMP); 10802742Swollman } 108119878Swollman if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 10822742Swollman ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque, 10832742Swollman zp->z_pflags, tx); 10842742Swollman XVA_SET_RTN(xvap, XAT_OPAQUE); 108519878Swollman } 108619878Swollman if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 108719878Swollman ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED, 108819878Swollman xoap->xoa_av_quarantined, zp->z_pflags, tx); 108919878Swollman XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 109021217Swollman } 109119878Swollman if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 10922742Swollman ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified, 10932742Swollman zp->z_pflags, tx); 10942742Swollman XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 109519878Swollman } 109617200Swollman if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) { 109719878Swollman zfs_sa_set_scanstamp(zp, xvap, tx); 10982742Swollman XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP); 109919878Swollman } 11002742Swollman if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 110119878Swollman ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse, 11022742Swollman zp->z_pflags, tx); 110319878Swollman XVA_SET_RTN(xvap, XAT_REPARSE); 11042742Swollman } 110519878Swollman if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { 110619878Swollman ZFS_ATTR_SET(zp, ZFS_OFFLINE, xoap->xoa_offline, 11072742Swollman zp->z_pflags, tx); 110819878Swollman XVA_SET_RTN(xvap, XAT_OFFLINE); 11092742Swollman } 111019878Swollman if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { 11112742Swollman ZFS_ATTR_SET(zp, ZFS_SPARSE, xoap->xoa_sparse, 111219878Swollman zp->z_pflags, tx); 11132742Swollman XVA_SET_RTN(xvap, XAT_SPARSE); 111419878Swollman } 11152742Swollman} 11162742Swollman 111719878Swollmanint 111819878Swollmanzfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp) 111919878Swollman{ 112019878Swollman dmu_object_info_t doi; 112119878Swollman dmu_buf_t *db; 11222742Swollman znode_t *zp; 11232742Swollman vnode_t *vp; 11242742Swollman sa_handle_t *hdl; 112519878Swollman struct thread *td; 11262742Swollman int locked; 11272742Swollman int err; 112819878Swollman 11292742Swollman td = curthread; 11302742Swollman getnewvnode_reserve(1); 11312742Swollmanagain: 11322742Swollman *zpp = NULL; 11332742Swollman ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num); 11342742Swollman 11352742Swollman err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db); 11362742Swollman if (err) { 11372742Swollman ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 11382742Swollman getnewvnode_drop_reserve(); 11392742Swollman return (err); 11402742Swollman } 11412742Swollman 114219878Swollman dmu_object_info_from_db(db, &doi); 11432742Swollman if (doi.doi_bonus_type != DMU_OT_SA && 11442742Swollman (doi.doi_bonus_type != DMU_OT_ZNODE || 11452742Swollman (doi.doi_bonus_type == DMU_OT_ZNODE && 11462742Swollman doi.doi_bonus_size < sizeof (znode_phys_t)))) { 11472742Swollman sa_buf_rele(db, NULL); 11482742Swollman ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 11492742Swollman#ifdef __FreeBSD__ 115019878Swollman getnewvnode_drop_reserve(); 11512742Swollman#endif 115258787Sru return (SET_ERROR(EINVAL)); 11532742Swollman } 11542742Swollman 115558787Sru hdl = dmu_buf_get_user(db); 11562742Swollman if (hdl != NULL) { 11572742Swollman zp = sa_get_userdata(hdl); 115819878Swollman 115919878Swollman 116019878Swollman /* 116119878Swollman * Since "SA" does immediate eviction we 116219878Swollman * should never find a sa handle that doesn't 116319878Swollman * know about the znode. 116419878Swollman */ 116519878Swollman 116619878Swollman ASSERT3P(zp, !=, NULL); 116719878Swollman 116819878Swollman mutex_enter(&zp->z_lock); 11692742Swollman ASSERT3U(zp->z_id, ==, obj_num); 117019878Swollman if (zp->z_unlinked) { 117119878Swollman err = SET_ERROR(ENOENT); 11722742Swollman } else { 117319878Swollman vp = ZTOV(zp); 11742742Swollman *zpp = zp; 117519878Swollman err = 0; 117619878Swollman } 117719878Swollman sa_buf_rele(db, NULL); 11782742Swollman 11792742Swollman /* Don't let the vnode disappear after ZFS_OBJ_HOLD_EXIT. */ 118019878Swollman if (err == 0) 118119878Swollman VN_HOLD(vp); 11822742Swollman 11832742Swollman mutex_exit(&zp->z_lock); 11842742Swollman ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 118519878Swollman 118675267Swollman if (err == 0) { 118775267Swollman locked = VOP_ISLOCKED(vp); 118886222Swollman VI_LOCK(vp); 118975267Swollman if ((vp->v_iflag & VI_DOOMED) != 0 && 119075267Swollman locked != LK_EXCLUSIVE) { 119175267Swollman /* 119275267Swollman * The vnode is doomed and this thread doesn't 119317200Swollman * hold the exclusive lock on it, so the vnode 119417200Swollman * must be being reclaimed by another thread. 119575267Swollman * Otherwise the doomed vnode is being reclaimed 119643014Swollman * by this thread and zfs_zget is called from 119743014Swollman * ZIL internals. 119843014Swollman */ 119943014Swollman VI_UNLOCK(vp); 120017200Swollman VN_RELE(vp); 120117200Swollman goto again; 120217200Swollman } 120317200Swollman VI_UNLOCK(vp); 120417200Swollman } 120517200Swollman getnewvnode_drop_reserve(); 120617200Swollman return (err); 120717200Swollman } 120817200Swollman 120917200Swollman /* 121017200Swollman * Not found create new znode/vnode 121117200Swollman * but only if file exists. 121219878Swollman * 121317200Swollman * There is a small window where zfs_vget() could 121417200Swollman * find this object while a file create is still in 121517200Swollman * progress. This is checked for in zfs_znode_alloc() 121617200Swollman * 121717200Swollman * if zfs_znode_alloc() fails it will drop the hold on the 12182742Swollman * bonus buffer. 121919878Swollman */ 122017200Swollman zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size, 122119878Swollman doi.doi_bonus_type, NULL); 12222742Swollman if (zp == NULL) { 122319878Swollman err = SET_ERROR(ENOENT); 12242742Swollman } else { 122519878Swollman *zpp = zp; 122619878Swollman } 122717200Swollman if (err == 0) { 122819878Swollman vnode_t *vp = ZTOV(zp); 122917200Swollman 123019878Swollman err = insmntque(vp, zfsvfs->z_vfs); 123117200Swollman if (err == 0) { 123219878Swollman vp->v_hash = obj_num; 12332742Swollman VOP_UNLOCK(vp, 0); 123419878Swollman } else { 12352742Swollman zp->z_vnode = NULL; 123619878Swollman zfs_znode_dmu_fini(zp); 12372742Swollman zfs_znode_free(zp); 123819878Swollman *zpp = NULL; 12392742Swollman } 124019878Swollman } 124119878Swollman ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 12422742Swollman getnewvnode_drop_reserve(); 124319878Swollman return (err); 12442742Swollman} 12452742Swollman 124619878Swollmanint 12472742Swollmanzfs_rezget(znode_t *zp) 124819878Swollman{ 124919878Swollman zfsvfs_t *zfsvfs = zp->z_zfsvfs; 12502742Swollman dmu_object_info_t doi; 125119878Swollman dmu_buf_t *db; 125219878Swollman vnode_t *vp; 12532742Swollman uint64_t obj_num = zp->z_id; 12542742Swollman uint64_t mode, size; 12552742Swollman sa_bulk_attr_t bulk[8]; 12562742Swollman int err; 125719878Swollman int count = 0; 125819878Swollman uint64_t gen; 125919878Swollman 126019878Swollman ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num); 126119878Swollman 12622742Swollman mutex_enter(&zp->z_acl_lock); 12639908Swollman if (zp->z_acl_cached) { 12649908Swollman zfs_acl_free(zp->z_acl_cached); 12659908Swollman zp->z_acl_cached = NULL; 12662742Swollman } 126743014Swollman 126843014Swollman mutex_exit(&zp->z_acl_lock); 126943014Swollman ASSERT(zp->z_sa_hdl == NULL); 127043014Swollman err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db); 127143014Swollman if (err) { 127243014Swollman ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 127343014Swollman return (err); 127443014Swollman } 127543014Swollman 127643014Swollman dmu_object_info_from_db(db, &doi); 127743014Swollman if (doi.doi_bonus_type != DMU_OT_SA && 127843014Swollman (doi.doi_bonus_type != DMU_OT_ZNODE || 127943014Swollman (doi.doi_bonus_type == DMU_OT_ZNODE && 128043014Swollman doi.doi_bonus_size < sizeof (znode_phys_t)))) { 128143014Swollman sa_buf_rele(db, NULL); 128243014Swollman ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 128343014Swollman return (SET_ERROR(EINVAL)); 128443014Swollman } 128543014Swollman 128643014Swollman zfs_znode_sa_init(zfsvfs, zp, db, doi.doi_bonus_type, NULL); 128743014Swollman size = zp->z_size; 128843014Swollman 128943014Swollman /* reload cached values */ 129043014Swollman SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL, 129143014Swollman &gen, sizeof (gen)); 129243014Swollman SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 129343014Swollman &zp->z_size, sizeof (zp->z_size)); 129443014Swollman SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, 129543014Swollman &zp->z_links, sizeof (zp->z_links)); 129643014Swollman SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 129743014Swollman &zp->z_pflags, sizeof (zp->z_pflags)); 129843014Swollman SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, 129943014Swollman &zp->z_atime, sizeof (zp->z_atime)); 130043014Swollman SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, 130143014Swollman &zp->z_uid, sizeof (zp->z_uid)); 130243014Swollman SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, 130358787Sru &zp->z_gid, sizeof (zp->z_gid)); 130458787Sru SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, 130558787Sru &mode, sizeof (mode)); 130658787Sru 130758787Sru if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) { 130858787Sru zfs_znode_dmu_fini(zp); 130975267Swollman ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 131075267Swollman return (SET_ERROR(EIO)); 131175267Swollman } 131275267Swollman 131375267Swollman zp->z_mode = mode; 131475267Swollman 131575267Swollman if (gen != zp->z_gen) { 131675267Swollman zfs_znode_dmu_fini(zp); 131775267Swollman ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 131875267Swollman return (SET_ERROR(EIO)); 131975267Swollman } 132075267Swollman 132175267Swollman /* 132275267Swollman * XXXPJD: Not sure how is that possible, but under heavy 132320094Swollman * zfs recv -F load it happens that z_gen is the same, but 132443014Swollman * vnode type is different than znode type. This would mean 132543014Swollman * that for example regular file was replaced with directory 13262742Swollman * which has the same object number. 13272742Swollman */ 132819878Swollman vp = ZTOV(zp); 132919878Swollman if (vp != NULL && 133019878Swollman vp->v_type != IFTOVT((mode_t)zp->z_mode)) { 133119878Swollman zfs_znode_dmu_fini(zp); 133219878Swollman ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 13332742Swollman return (EIO); 133420094Swollman } 133543014Swollman 133643014Swollman zp->z_unlinked = (zp->z_links == 0); 133743014Swollman zp->z_blksz = doi.doi_data_block_size; 133843014Swollman if (vp != NULL) { 133958787Sru vn_pages_remove(vp, 0, 0); 1340121098Swollman if (zp->z_size != size) 134175267Swollman vnode_pager_setsize(vp, zp->z_size); 13422742Swollman } 13432742Swollman 13442742Swollman ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 13452742Swollman 134619878Swollman return (0); 134719878Swollman} 13482742Swollman 13492742Swollmanvoid 135058787Sruzfs_znode_delete(znode_t *zp, dmu_tx_t *tx) 135158787Sru{ 135258787Sru zfsvfs_t *zfsvfs = zp->z_zfsvfs; 135358787Sru objset_t *os = zfsvfs->z_os; 135458787Sru uint64_t obj = zp->z_id; 135558787Sru uint64_t acl_obj = zfs_external_acl(zp); 135658787Sru 135758787Sru ZFS_OBJ_HOLD_ENTER(zfsvfs, obj); 135858787Sru if (acl_obj) { 135958787Sru VERIFY(!zp->z_is_sa); 136058787Sru VERIFY(0 == dmu_object_free(os, acl_obj, tx)); 136158787Sru } 136258787Sru VERIFY(0 == dmu_object_free(os, obj, tx)); 136358787Sru zfs_znode_dmu_fini(zp); 136458787Sru ZFS_OBJ_HOLD_EXIT(zfsvfs, obj); 136558787Sru zfs_znode_free(zp); 136675267Swollman} 136758787Sru 136864499Swollmanvoid 136964499Swollmanzfs_zinactive(znode_t *zp) 137064499Swollman{ 137164499Swollman zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1372114173Swollman uint64_t z_id = zp->z_id; 1373114173Swollman 1374114173Swollman ASSERT(zp->z_sa_hdl); 1375114173Swollman 1376114173Swollman /* 1377114173Swollman * Don't allow a zfs_zget() while were trying to release this znode 1378114173Swollman */ 1379114173Swollman ZFS_OBJ_HOLD_ENTER(zfsvfs, z_id); 1380114173Swollman 13812742Swollman mutex_enter(&zp->z_lock); 13822742Swollman 138319878Swollman /* 138419878Swollman * If this was the last reference to a file with no links, 138519878Swollman * remove the file from the file system. 13862742Swollman */ 138719878Swollman if (zp->z_unlinked) { 138820094Swollman mutex_exit(&zp->z_lock); 138919878Swollman ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); 139020094Swollman zfs_rmnode(zp); 139119878Swollman return; 139243014Swollman } 139343014Swollman 139458787Sru mutex_exit(&zp->z_lock); 1395114173Swollman zfs_znode_dmu_fini(zp); 1396114173Swollman ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); 13972742Swollman zfs_znode_free(zp); 13982742Swollman} 13992742Swollman 14002742Swollmanvoid 140119878Swollmanzfs_znode_free(znode_t *zp) 14022742Swollman{ 140319878Swollman zfsvfs_t *zfsvfs = zp->z_zfsvfs; 14042742Swollman 140519878Swollman ASSERT(zp->z_sa_hdl == NULL); 14062742Swollman zp->z_vnode = NULL; 140719878Swollman mutex_enter(&zfsvfs->z_znodes_lock); 14082742Swollman POINTER_INVALIDATE(&zp->z_zfsvfs); 140919878Swollman list_remove(&zfsvfs->z_all_znodes, zp); 14102742Swollman mutex_exit(&zfsvfs->z_znodes_lock); 141119878Swollman 14122742Swollman if (zp->z_acl_cached) { 141319878Swollman zfs_acl_free(zp->z_acl_cached); 14142742Swollman zp->z_acl_cached = NULL; 141519878Swollman } 14162742Swollman 141719878Swollman kmem_cache_free(znode_cache, zp); 14182742Swollman 141919878Swollman VFS_RELE(zfsvfs->z_vfs); 142019878Swollman} 142119878Swollman 142219878Swollmanvoid 142319878Swollmanzfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2], 14242742Swollman uint64_t ctime[2], boolean_t have_tx) 14252742Swollman{ 142619878Swollman timestruc_t now; 142719878Swollman 142819878Swollman gethrestime(&now); 142919878Swollman 143019878Swollman if (have_tx) { /* will sa_bulk_update happen really soon? */ 143119878Swollman zp->z_atime_dirty = 0; 14322742Swollman zp->z_seq++; 14332742Swollman } else { 1434114173Swollman zp->z_atime_dirty = 1; 14352742Swollman } 14362742Swollman 14372742Swollman if (flag & AT_ATIME) { 143819878Swollman ZFS_TIME_ENCODE(&now, zp->z_atime); 14392742Swollman } 144019878Swollman 14412742Swollman if (flag & AT_MTIME) { 144219878Swollman ZFS_TIME_ENCODE(&now, mtime); 14432742Swollman if (zp->z_zfsvfs->z_use_fuids) { 144419878Swollman zp->z_pflags |= (ZFS_ARCHIVE | 14452742Swollman ZFS_AV_MODIFIED); 14462742Swollman } 144719878Swollman } 144819878Swollman 144919878Swollman if (flag & AT_CTIME) { 145019878Swollman ZFS_TIME_ENCODE(&now, ctime); 145119878Swollman if (zp->z_zfsvfs->z_use_fuids) 14522742Swollman zp->z_pflags |= ZFS_ARCHIVE; 14532742Swollman } 145475267Swollman} 145575267Swollman 145675267Swollman/* 145775267Swollman * Grow the block size for a file. 145875267Swollman * 145975267Swollman * IN: zp - znode of file to free data in. 146075267Swollman * size - requested block size 146175267Swollman * tx - open transaction. 146275267Swollman * 146375267Swollman * NOTE: this function assumes that the znode is write locked. 146475267Swollman */ 146575267Swollmanvoid 14662742Swollmanzfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx) 146758787Sru{ 146858787Sru int error; 146958787Sru u_longlong_t dummy; 147058787Sru 147158787Sru if (size <= zp->z_blksz) 147258787Sru return; 147358787Sru /* 147458787Sru * If the file size is already greater than the current blocksize, 147558787Sru * we will not grow. If there is more than one block in a file, 147658787Sru * the blocksize cannot change. 147758787Sru */ 147858787Sru if (zp->z_blksz && zp->z_size > zp->z_blksz) 147958787Sru return; 14802742Swollman 14812742Swollman error = dmu_object_set_blocksize(zp->z_zfsvfs->z_os, zp->z_id, 148214343Swollman size, 0, tx); 14832742Swollman 14842742Swollman if (error == ENOTSUP) 148514343Swollman return; 148619878Swollman ASSERT0(error); 148719878Swollman 148819878Swollman /* What blocksize did we actually get? */ 14892742Swollman dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &zp->z_blksz, &dummy); 14902742Swollman} 149186222Swollman 149214343Swollman#ifdef sun 149314343Swollman/* 149486222Swollman * This is a dummy interface used when pvn_vplist_dirty() should *not* 149586222Swollman * be calling back into the fs for a putpage(). E.g.: when truncating 149686222Swollman * a file, the pages being "thrown away* don't need to be written out. 149786222Swollman */ 149886222Swollman/* ARGSUSED */ 149986222Swollmanstatic int 150086222Swollmanzfs_no_putpage(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp, 150186222Swollman int flags, cred_t *cr) 150286222Swollman{ 150386222Swollman ASSERT(0); 150486222Swollman return (0); 150586222Swollman} 150686222Swollman#endif /* sun */ 150786222Swollman 150886222Swollman/* 150986222Swollman * Increase the file length 151086222Swollman * 151186222Swollman * IN: zp - znode of file to free data in. 151286222Swollman * end - new end-of-file 151386222Swollman * 151486222Swollman * RETURN: 0 on success, error code on failure 151586222Swollman */ 151686222Swollmanstatic int 151786222Swollmanzfs_extend(znode_t *zp, uint64_t end) 151886222Swollman{ 151986222Swollman zfsvfs_t *zfsvfs = zp->z_zfsvfs; 152086222Swollman dmu_tx_t *tx; 152186222Swollman rl_t *rl; 152286222Swollman uint64_t newblksz; 152386222Swollman int error; 152486222Swollman 152586222Swollman /* 152686222Swollman * We will change zp_size, lock the whole file. 152786222Swollman */ 152875267Swollman rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER); 152986222Swollman 15302742Swollman /* 153175267Swollman * Nothing to do if file already at desired length. 153275267Swollman */ 15332742Swollman if (end <= zp->z_size) { 15342742Swollman zfs_range_unlock(rl); 15352742Swollman return (0); 153686222Swollman } 153775267Swollman tx = dmu_tx_create(zfsvfs->z_os); 153819878Swollman dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 153975267Swollman zfs_sa_upgrade_txholds(tx, zp); 154075267Swollman if (end > zp->z_blksz && 154175267Swollman (!ISP2(zp->z_blksz) || zp->z_blksz < zfsvfs->z_max_blksz)) { 154286222Swollman /* 154386222Swollman * We are growing the file past the current block size. 15442742Swollman */ 15452742Swollman if (zp->z_blksz > zp->z_zfsvfs->z_max_blksz) { 15462742Swollman ASSERT(!ISP2(zp->z_blksz)); 15472742Swollman newblksz = MIN(end, SPA_MAXBLOCKSIZE); 154819878Swollman } else { 154919878Swollman newblksz = MIN(end, zp->z_zfsvfs->z_max_blksz); 155019878Swollman } 155119878Swollman dmu_tx_hold_write(tx, zp->z_id, 0, newblksz); 155275267Swollman } else { 155386222Swollman newblksz = 0; 155486222Swollman } 155586222Swollman 15562742Swollman error = dmu_tx_assign(tx, TXG_WAIT); 155786222Swollman if (error) { 155886222Swollman dmu_tx_abort(tx); 155986222Swollman zfs_range_unlock(rl); 156019878Swollman return (error); 156119878Swollman } 156219878Swollman 15632742Swollman if (newblksz) 15642742Swollman zfs_grow_blocksize(zp, newblksz, tx); 15652742Swollman 15662742Swollman zp->z_size = end; 156719878Swollman 15682742Swollman VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zp->z_zfsvfs), 156943543Swollman &zp->z_size, sizeof (zp->z_size), tx)); 157043543Swollman 157119878Swollman vnode_pager_setsize(ZTOV(zp), end); 15722742Swollman 157319878Swollman zfs_range_unlock(rl); 15742742Swollman 157519878Swollman dmu_tx_commit(tx); 15762742Swollman 157786222Swollman return (0); 157819878Swollman} 157919878Swollman 158019878Swollman/* 158119878Swollman * Free space in a file. 158243014Swollman * 158343014Swollman * IN: zp - znode of file to free data in. 158486222Swollman * off - start of section to free. 158586222Swollman * len - length of section to free. 158686222Swollman * 158786222Swollman * RETURN: 0 on success, error code on failure 158886222Swollman */ 158986222Swollmanstatic int 159086222Swollmanzfs_free_range(znode_t *zp, uint64_t off, uint64_t len) 159186222Swollman{ 159286222Swollman zfsvfs_t *zfsvfs = zp->z_zfsvfs; 159386222Swollman rl_t *rl; 159486222Swollman int error; 159586222Swollman 159686222Swollman /* 159786222Swollman * Lock the range being freed. 159886222Swollman */ 159986222Swollman rl = zfs_range_lock(zp, off, len, RL_WRITER); 160086222Swollman 160186222Swollman /* 160286222Swollman * Nothing to do if file already at desired length. 160386222Swollman */ 160486222Swollman if (off >= zp->z_size) { 160586222Swollman zfs_range_unlock(rl); 160686222Swollman return (0); 160786222Swollman } 160886222Swollman 160986222Swollman if (off + len > zp->z_size) 161086222Swollman len = zp->z_size - off; 161186222Swollman 161286222Swollman error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, off, len); 161386222Swollman 161486222Swollman if (error == 0) { 161586222Swollman /* 161686222Swollman * In FreeBSD we cannot free block in the middle of a file, 161786222Swollman * but only at the end of a file, so this code path should 161886222Swollman * never happen. 161986222Swollman */ 162086222Swollman vnode_pager_setsize(ZTOV(zp), off); 162186222Swollman } 162286222Swollman 162386222Swollman zfs_range_unlock(rl); 162486222Swollman 162586222Swollman return (error); 162686222Swollman} 162786222Swollman 162886222Swollman/* 162986222Swollman * Truncate a file 163086222Swollman * 163119878Swollman * IN: zp - znode of file to free data in. 163286222Swollman * end - new end-of-file. 16332742Swollman * 16342742Swollman * RETURN: 0 on success, error code on failure 16352742Swollman */ 16362742Swollmanstatic int 163719878Swollmanzfs_trunc(znode_t *zp, uint64_t end) 16382742Swollman{ 16392742Swollman zfsvfs_t *zfsvfs = zp->z_zfsvfs; 16402742Swollman vnode_t *vp = ZTOV(zp); 164119878Swollman dmu_tx_t *tx; 16422742Swollman rl_t *rl; 164319878Swollman int error; 16442742Swollman sa_bulk_attr_t bulk[2]; 164519878Swollman int count = 0; 16462742Swollman 164719878Swollman /* 16482742Swollman * We will change zp_size, lock the whole file. 164919878Swollman */ 16502742Swollman rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER); 165119878Swollman 165219878Swollman /* 16532742Swollman * Nothing to do if file already at desired length. 165419878Swollman */ 165519878Swollman if (end >= zp->z_size) { 16562742Swollman zfs_range_unlock(rl); 16572742Swollman return (0); 16582742Swollman } 165919878Swollman 166019878Swollman error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end, -1); 166119878Swollman if (error) { 166219878Swollman zfs_range_unlock(rl); 166319878Swollman return (error); 166419878Swollman } 166558787Sru tx = dmu_tx_create(zfsvfs->z_os); 166620094Swollman dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 166758787Sru zfs_sa_upgrade_txholds(tx, zp); 166858787Sru dmu_tx_mark_netfree(tx); 166943014Swollman error = dmu_tx_assign(tx, TXG_WAIT); 167043014Swollman if (error) { 167120094Swollman dmu_tx_abort(tx); 167258787Sru zfs_range_unlock(rl); 167358787Sru return (error); 167458787Sru } 167558787Sru 167658787Sru zp->z_size = end; 167758787Sru SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), 167858787Sru NULL, &zp->z_size, sizeof (zp->z_size)); 167958787Sru 168058787Sru if (end == 0) { 168158787Sru zp->z_pflags &= ~ZFS_SPARSE; 168258787Sru SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), 16832742Swollman NULL, &zp->z_pflags, 8); 16842742Swollman } 168520094Swollman VERIFY(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx) == 0); 168620094Swollman 168720094Swollman dmu_tx_commit(tx); 168820094Swollman 168920094Swollman /* 169020094Swollman * Clear any mapped pages in the truncated region. This has to 169120094Swollman * happen outside of the transaction to avoid the possibility of 169220094Swollman * a deadlock with someone trying to push a page that we are 169320094Swollman * about to invalidate. 169420094Swollman */ 169520094Swollman vnode_pager_setsize(vp, end); 169620094Swollman 169720094Swollman zfs_range_unlock(rl); 169820094Swollman 169920094Swollman return (0); 170020094Swollman} 170120094Swollman 17022742Swollman/* 170343543Swollman * Free space in a file 170443543Swollman * 170543543Swollman * IN: zp - znode of file to free data in. 170619878Swollman * off - start of range 17072742Swollman * len - end of range (0 => EOF) 17082742Swollman * flag - current file open mode flags. 170919878Swollman * log - TRUE if this action should be logged 17102742Swollman * 171119878Swollman * RETURN: 0 on success, error code on failure 171219878Swollman */ 171319878Swollmanint 171419878Swollmanzfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log) 171519878Swollman{ 17162742Swollman vnode_t *vp = ZTOV(zp); 171719878Swollman dmu_tx_t *tx; 17182742Swollman zfsvfs_t *zfsvfs = zp->z_zfsvfs; 171919878Swollman zilog_t *zilog = zfsvfs->z_log; 172019878Swollman uint64_t mode; 172119878Swollman uint64_t mtime[2], ctime[2]; 172219878Swollman sa_bulk_attr_t bulk[3]; 17232742Swollman int count = 0; 17242742Swollman int error; 172519878Swollman 17262742Swollman if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), &mode, 172719878Swollman sizeof (mode))) != 0) 17282742Swollman return (error); 17292742Swollman 17302742Swollman if (off > zp->z_size) { 173119878Swollman error = zfs_extend(zp, off+len); 173219878Swollman if (error == 0 && log) 17332742Swollman goto log; 173419878Swollman else 173519878Swollman return (error); 173619878Swollman } 17372742Swollman 17382742Swollman /* 173919878Swollman * Check for any locks in the region to be freed. 17402742Swollman */ 17412742Swollman 174219878Swollman if (MANDLOCK(vp, (mode_t)mode)) { 174319878Swollman uint64_t length = (len ? len : zp->z_size - off); 174443014Swollman if (error = chklock(vp, FWRITE, off, length, flag, NULL)) 174519878Swollman return (error); 17462742Swollman } 174743014Swollman 174819878Swollman if (len == 0) { 174943014Swollman error = zfs_trunc(zp, off); 175019878Swollman } else { 17512742Swollman if ((error = zfs_free_range(zp, off, len)) == 0 && 175219878Swollman off + len > zp->z_size) 17532742Swollman error = zfs_extend(zp, off+len); 17542742Swollman } 17552742Swollman if (error || !log) 175619878Swollman return (error); 17572742Swollmanlog: 175819878Swollman tx = dmu_tx_create(zfsvfs->z_os); 17592742Swollman dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 176019878Swollman zfs_sa_upgrade_txholds(tx, zp); 17612742Swollman error = dmu_tx_assign(tx, TXG_WAIT); 17622742Swollman if (error) { 176319878Swollman dmu_tx_abort(tx); 176419878Swollman return (error); 176519878Swollman } 17662742Swollman 1767121098Swollman SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, mtime, 16); 1768121098Swollman SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, ctime, 16); 17692742Swollman SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), 1770121098Swollman NULL, &zp->z_pflags, 8); 177119878Swollman zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, B_TRUE); 177219878Swollman error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 177319878Swollman ASSERT(error == 0); 177420094Swollman 177519878Swollman zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len); 177619878Swollman 17772742Swollman dmu_tx_commit(tx); 177886222Swollman return (0); 177919878Swollman} 178019878Swollman 178120094Swollmanvoid 178220094Swollmanzfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) 178320094Swollman{ 17842742Swollman zfsvfs_t zfsvfs; 178586222Swollman uint64_t moid, obj, sa_obj, version; 178619878Swollman uint64_t sense = ZFS_CASE_SENSITIVE; 178719878Swollman uint64_t norm = 0; 178819878Swollman nvpair_t *elem; 17892742Swollman int error; 17902742Swollman int i; 179158787Sru znode_t *rootzp = NULL; 179258787Sru vattr_t vattr; 179358787Sru znode_t *zp; 179458787Sru zfs_acl_ids_t acl_ids; 179558787Sru 179658787Sru /* 179758787Sru * First attempt to create master node. 179858787Sru */ 17992742Swollman /* 180019878Swollman * In an empty objset, there are no blocks to read and thus 18012742Swollman * there can be no i/o errors (which we assert below). 180219878Swollman */ 180319878Swollman moid = MASTER_NODE_OBJ; 18042742Swollman error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE, 180519878Swollman DMU_OT_NONE, 0, tx); 18062742Swollman ASSERT(error == 0); 180720094Swollman 180820094Swollman /* 18092742Swollman * Set starting attributes. 18102742Swollman */ 181119878Swollman version = zfs_zpl_version_map(spa_version(dmu_objset_spa(os))); 181219878Swollman elem = NULL; 181320094Swollman while ((elem = nvlist_next_nvpair(zplprops, elem)) != NULL) { 181420094Swollman /* For the moment we expect all zpl props to be uint64_ts */ 181558787Sru uint64_t val; 181658787Sru char *name; 18172742Swollman 18182742Swollman ASSERT(nvpair_type(elem) == DATA_TYPE_UINT64); 181921217Swollman VERIFY(nvpair_value_uint64(elem, &val) == 0); 182058787Sru name = nvpair_name(elem); 182158787Sru if (strcmp(name, zfs_prop_to_name(ZFS_PROP_VERSION)) == 0) { 182220094Swollman if (val < version) 182358787Sru version = val; 182458787Sru } else { 18252742Swollman error = zap_update(os, moid, name, 8, 1, &val, tx); 182686222Swollman } 182786222Swollman ASSERT(error == 0); 182886222Swollman if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0) 182986222Swollman norm = val; 183086222Swollman else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0) 183186222Swollman sense = val; 183220094Swollman } 183320094Swollman ASSERT(version != 0); 183420094Swollman error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx); 183520094Swollman 183620094Swollman /* 183720094Swollman * Create zap object used for SA attribute registration 183820094Swollman */ 183920094Swollman 184020094Swollman if (version >= ZPL_VERSION_SA) { 184120094Swollman sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE, 184220094Swollman DMU_OT_NONE, 0, tx); 184375267Swollman error = zap_add(os, moid, ZFS_SA_ATTRS, 8, 1, &sa_obj, tx); 184475267Swollman ASSERT(error == 0); 184575267Swollman } else { 184675267Swollman sa_obj = 0; 184775267Swollman } 184875267Swollman /* 184975267Swollman * Create a delete queue. 185086222Swollman */ 185186222Swollman obj = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx); 185286222Swollman 185386222Swollman error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx); 185486222Swollman ASSERT(error == 0); 185586222Swollman 185686222Swollman /* 185786222Swollman * Create root znode. Create minimal znode/vnode/zfsvfs 185886222Swollman * to allow zfs_mknode to work. 18592742Swollman */ 186086222Swollman VATTR_NULL(&vattr); 186186222Swollman vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE; 186286222Swollman vattr.va_type = VDIR; 186320094Swollman vattr.va_mode = S_IFDIR|0755; 186458787Sru vattr.va_uid = crgetuid(cr); 186520094Swollman vattr.va_gid = crgetgid(cr); 186620094Swollman 186786222Swollman bzero(&zfsvfs, sizeof (zfsvfs_t)); 186886222Swollman 186986222Swollman rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP); 187086222Swollman ASSERT(!POINTER_IS_VALID(rootzp->z_zfsvfs)); 187186222Swollman rootzp->z_moved = 0; 187286222Swollman rootzp->z_unlinked = 0; 187386222Swollman rootzp->z_atime_dirty = 0; 187486222Swollman rootzp->z_is_sa = USE_SA(version, os); 187586222Swollman 187686222Swollman zfsvfs.z_os = os; 187786222Swollman zfsvfs.z_parent = &zfsvfs; 187886222Swollman zfsvfs.z_version = version; 187986222Swollman zfsvfs.z_use_fuids = USE_FUIDS(version, os); 188086222Swollman zfsvfs.z_use_sa = USE_SA(version, os); 188186222Swollman zfsvfs.z_norm = norm; 188286222Swollman 188386222Swollman error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END, 188486222Swollman &zfsvfs.z_attr_table); 188586222Swollman 188686222Swollman ASSERT(error == 0); 188786222Swollman 18882742Swollman /* 188958787Sru * Fold case on file systems that are always or sometimes case 189058787Sru * insensitive. 189120094Swollman */ 18922742Swollman if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED) 189320094Swollman zfsvfs.z_norm |= U8_TEXTPREP_TOUPPER; 189458787Sru 189520094Swollman mutex_init(&zfsvfs.z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 189686222Swollman list_create(&zfsvfs.z_all_znodes, sizeof (znode_t), 189786222Swollman offsetof(znode_t, z_link_node)); 189886222Swollman 189958787Sru for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 190058787Sru mutex_init(&zfsvfs.z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); 190158787Sru 190258787Sru rootzp->z_zfsvfs = &zfsvfs; 190358787Sru VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr, 190458787Sru cr, NULL, &acl_ids)); 190558787Sru zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids); 190686222Swollman ASSERT3P(zp, ==, rootzp); 190786222Swollman error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx); 190886222Swollman ASSERT(error == 0); 190986222Swollman zfs_acl_ids_free(&acl_ids); 191086222Swollman POINTER_INVALIDATE(&rootzp->z_zfsvfs); 191186222Swollman 191286222Swollman sa_handle_destroy(rootzp->z_sa_hdl); 191358787Sru kmem_cache_free(znode_cache, rootzp); 191458787Sru 191543766Sbde /* 191658787Sru * Create shares directory 191743766Sbde */ 191886222Swollman 191986222Swollman error = zfs_create_share_dir(&zfsvfs, tx); 192086222Swollman 192158787Sru ASSERT(error == 0); 192258787Sru 192343766Sbde for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 192458787Sru mutex_destroy(&zfsvfs.z_hold_mtx[i]); 192543766Sbde} 192686222Swollman 192786222Swollman#endif /* _KERNEL */ 192858787Sru 192958787Srustatic int 193043766Sbdezfs_sa_setup(objset_t *osp, sa_attr_type_t **sa_table) 193158787Sru{ 193258787Sru uint64_t sa_obj = 0; 193343766Sbde int error; 193486222Swollman 193586222Swollman error = zap_lookup(osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj); 193686222Swollman if (error != 0 && error != ENOENT) 193786222Swollman return (error); 193886222Swollman 193958787Sru error = sa_setup(osp, sa_obj, zfs_attr_table, ZPL_END, sa_table); 194058787Sru return (error); 194143766Sbde} 194258787Sru 194343766Sbdestatic int 194486222Swollmanzfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp, 194586222Swollman dmu_buf_t **db, void *tag) 194686222Swollman{ 194786222Swollman dmu_object_info_t doi; 19482742Swollman int error; 194958787Sru 195058787Sru if ((error = sa_buf_hold(osp, obj, tag, db)) != 0) 195143766Sbde return (error); 195258787Sru 195343766Sbde dmu_object_info_from_db(*db, &doi); 195486222Swollman if ((doi.doi_bonus_type != DMU_OT_SA && 195586222Swollman doi.doi_bonus_type != DMU_OT_ZNODE) || 195686222Swollman doi.doi_bonus_type == DMU_OT_ZNODE && 195786222Swollman doi.doi_bonus_size < sizeof (znode_phys_t)) { 195886222Swollman sa_buf_rele(*db, tag); 195986222Swollman return (SET_ERROR(ENOTSUP)); 196086222Swollman } 196186222Swollman 196286222Swollman error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp); 196386222Swollman if (error != 0) { 196486222Swollman sa_buf_rele(*db, tag); 196558787Sru return (error); 196658787Sru } 196743766Sbde 196858787Sru return (0); 196943766Sbde} 197086222Swollman 197186222Swollmanvoid 197286222Swollmanzfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, void *tag) 197386222Swollman{ 197486222Swollman sa_handle_destroy(hdl); 197586222Swollman sa_buf_rele(db, tag); 197686222Swollman} 197758787Sru 197858787Sru/* 197943766Sbde * Given an object number, return its parent object number and whether 198058787Sru * or not the object is an extended attribute directory. 198143766Sbde */ 198286222Swollmanstatic int 198386222Swollmanzfs_obj_to_pobj(objset_t *osp, sa_handle_t *hdl, sa_attr_type_t *sa_table, 198486222Swollman uint64_t *pobjp, int *is_xattrdir) 198586222Swollman{ 198686222Swollman uint64_t parent; 198786222Swollman uint64_t pflags; 198886222Swollman uint64_t mode; 198986222Swollman uint64_t parent_mode; 199086222Swollman sa_bulk_attr_t bulk[3]; 199186222Swollman sa_handle_t *sa_hdl; 199286222Swollman dmu_buf_t *sa_db; 199386222Swollman int count = 0; 199486222Swollman int error; 199586222Swollman 199686222Swollman SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_PARENT], NULL, 199786222Swollman &parent, sizeof (parent)); 19982742Swollman SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_FLAGS], NULL, 199958787Sru &pflags, sizeof (pflags)); 200043766Sbde SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL, 200158787Sru &mode, sizeof (mode)); 200243766Sbde 200386222Swollman if ((error = sa_bulk_lookup(hdl, bulk, count)) != 0) 200486222Swollman return (error); 200586222Swollman 200686222Swollman /* 200786222Swollman * When a link is removed its parent pointer is not changed and will 200858787Sru * be invalid. There are two cases where a link is removed but the 200958787Sru * file stays around, when it goes to the delete queue and when there 201043766Sbde * are additional links. 201158787Sru */ 201243766Sbde error = zfs_grab_sa_handle(osp, parent, &sa_hdl, &sa_db, FTAG); 201386222Swollman if (error != 0) 201486222Swollman return (error); 20152742Swollman 201658787Sru error = sa_lookup(sa_hdl, ZPL_MODE, &parent_mode, sizeof (parent_mode)); 201758787Sru zfs_release_sa_handle(sa_hdl, sa_db, FTAG); 201858787Sru if (error != 0) 201958787Sru return (error); 202058787Sru 20212742Swollman *is_xattrdir = ((pflags & ZFS_XATTR) != 0) && S_ISDIR(mode); 2022114173Swollman 2023114173Swollman /* 2024114173Swollman * Extended attributes can be applied to files, directories, etc. 2025114173Swollman * Otherwise the parent must be a directory. 2026114173Swollman */ 2027114173Swollman if (!*is_xattrdir && !S_ISDIR(parent_mode)) 2028114173Swollman return (SET_ERROR(EINVAL)); 2029114173Swollman 2030114173Swollman *pobjp = parent; 2031114173Swollman 2032114173Swollman return (0); 2033114173Swollman} 2034114173Swollman 2035114173Swollman/* 2036114173Swollman * Given an object number, return some zpl level statistics 2037114173Swollman */ 203814343Swollmanstatic int 203914343Swollmanzfs_obj_to_stats_impl(sa_handle_t *hdl, sa_attr_type_t *sa_table, 20409908Swollman zfs_stat_t *sb) 20412742Swollman{ 2042114173Swollman sa_bulk_attr_t bulk[4]; 20432742Swollman int count = 0; 20442742Swollman 20452742Swollman SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL, 20462742Swollman &sb->zs_mode, sizeof (sb->zs_mode)); 204719878Swollman SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GEN], NULL, 20482742Swollman &sb->zs_gen, sizeof (sb->zs_gen)); 204919878Swollman SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_LINKS], NULL, 205019878Swollman &sb->zs_links, sizeof (sb->zs_links)); 20512742Swollman SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_CTIME], NULL, 205219878Swollman &sb->zs_ctime, sizeof (sb->zs_ctime)); 20532742Swollman 20542742Swollman return (sa_bulk_lookup(hdl, bulk, count)); 205519878Swollman} 20562742Swollman 20572742Swollmanstatic int 205819878Swollmanzfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl, 205919878Swollman sa_attr_type_t *sa_table, char *buf, int len) 206019878Swollman{ 20612742Swollman sa_handle_t *sa_hdl; 206219878Swollman sa_handle_t *prevhdl = NULL; 20632742Swollman dmu_buf_t *prevdb = NULL; 206419878Swollman dmu_buf_t *sa_db = NULL; 206519878Swollman char *path = buf + len - 1; 206619878Swollman int error; 20672742Swollman 206843014Swollman *path = '\0'; 206919878Swollman sa_hdl = hdl; 207043014Swollman 207119878Swollman for (;;) { 207219878Swollman uint64_t pobj; 207319878Swollman char component[MAXNAMELEN + 2]; 207414343Swollman size_t complen; 207519878Swollman int is_xattrdir; 20762742Swollman 207719878Swollman if (prevdb) 20782742Swollman zfs_release_sa_handle(prevhdl, prevdb, FTAG); 207919878Swollman 20802742Swollman if ((error = zfs_obj_to_pobj(osp, sa_hdl, sa_table, &pobj, 208119878Swollman &is_xattrdir)) != 0) 20822742Swollman break; 208319878Swollman 208419878Swollman if (pobj == obj) { 208519878Swollman if (path[0] != '/') 208619878Swollman *--path = '/'; 208719878Swollman break; 208819878Swollman } 208919878Swollman 209019878Swollman component[0] = '/'; 209119878Swollman if (is_xattrdir) { 209219878Swollman (void) sprintf(component + 1, "<xattrdir>"); 20932742Swollman } else { 20942742Swollman error = zap_value_search(osp, pobj, obj, 209519878Swollman ZFS_DIRENT_OBJ(-1ULL), component + 1); 209619878Swollman if (error != 0) 209719878Swollman break; 209819878Swollman } 209919878Swollman 210019878Swollman complen = strlen(component); 210119878Swollman path -= complen; 210219878Swollman ASSERT(path >= buf); 210319878Swollman bcopy(component, path, complen); 210419878Swollman obj = pobj; 210519878Swollman 21062742Swollman if (sa_hdl != hdl) { 210719878Swollman prevhdl = sa_hdl; 21082742Swollman prevdb = sa_db; 210919878Swollman } 211019878Swollman error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db, FTAG); 211120094Swollman if (error != 0) { 211220094Swollman sa_hdl = prevhdl; 21132742Swollman sa_db = prevdb; 211419878Swollman break; 211586222Swollman } 211686222Swollman } 211786222Swollman 211886222Swollman if (sa_hdl != NULL && sa_hdl != hdl) { 211986222Swollman ASSERT(sa_db != NULL); 212086222Swollman zfs_release_sa_handle(sa_hdl, sa_db, FTAG); 212186222Swollman } 212286222Swollman 212386222Swollman if (error == 0) 212486222Swollman (void) memmove(buf, path, buf + len - path); 212586222Swollman 212686222Swollman return (error); 212786222Swollman} 212886222Swollman 212986222Swollmanint 213086222Swollmanzfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len) 213186222Swollman{ 213286222Swollman sa_attr_type_t *sa_table; 213386222Swollman sa_handle_t *hdl; 213486222Swollman dmu_buf_t *db; 213586222Swollman int error; 213686222Swollman 213786222Swollman error = zfs_sa_setup(osp, &sa_table); 213886222Swollman if (error != 0) 213986222Swollman return (error); 214086222Swollman 214186222Swollman error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG); 214286222Swollman if (error != 0) 214386222Swollman return (error); 214486222Swollman 214586222Swollman error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len); 214686222Swollman 214786222Swollman zfs_release_sa_handle(hdl, db, FTAG); 214886222Swollman return (error); 214986222Swollman} 215086222Swollman 215186222Swollmanint 215286222Swollmanzfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb, 215386222Swollman char *buf, int len) 215486222Swollman{ 215586222Swollman char *path = buf + len - 1; 215686222Swollman sa_attr_type_t *sa_table; 215786222Swollman sa_handle_t *hdl; 215886222Swollman dmu_buf_t *db; 215986222Swollman int error; 21602742Swollman 216186222Swollman *path = '\0'; 216286222Swollman 216386222Swollman error = zfs_sa_setup(osp, &sa_table); 216486222Swollman if (error != 0) 216519878Swollman return (error); 216619878Swollman 21672742Swollman error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG); 21682742Swollman if (error != 0) 216958787Sru return (error); 21702742Swollman 21712742Swollman error = zfs_obj_to_stats_impl(hdl, sa_table, sb); 21722742Swollman if (error != 0) { 21732742Swollman zfs_release_sa_handle(hdl, db, FTAG); 21742742Swollman return (error); 217519878Swollman } 21769908Swollman 217758787Sru error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len); 217819878Swollman 21799908Swollman zfs_release_sa_handle(hdl, db, FTAG); 21802742Swollman return (error); 21812742Swollman} 218219878Swollman