1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22212694Smm * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23272134Sdelphij * Copyright (c) 2012, 2014 by Delphix. All rights reserved. 24254585Sdelphij * Copyright 2013 Nexenta Systems, Inc. All rights reserved. 25168404Spjd */ 26168404Spjd 27169195Spjd/* Portions Copyright 2007 Jeremy Teo */ 28219089Spjd/* Portions Copyright 2010 Robert Milkowski */ 29169195Spjd 30168404Spjd#include <sys/types.h> 31168404Spjd#include <sys/param.h> 32168404Spjd#include <sys/time.h> 33168404Spjd#include <sys/systm.h> 34168404Spjd#include <sys/sysmacros.h> 35168404Spjd#include <sys/resource.h> 36168404Spjd#include <sys/vfs.h> 37248084Sattilio#include <sys/vm.h> 38168404Spjd#include <sys/vnode.h> 39168404Spjd#include <sys/file.h> 40168404Spjd#include <sys/stat.h> 41168404Spjd#include <sys/kmem.h> 42168404Spjd#include <sys/taskq.h> 43168404Spjd#include <sys/uio.h> 44168404Spjd#include <sys/atomic.h> 45168404Spjd#include <sys/namei.h> 46168404Spjd#include <sys/mman.h> 47168404Spjd#include <sys/cmn_err.h> 48168404Spjd#include <sys/errno.h> 49168404Spjd#include <sys/unistd.h> 50168404Spjd#include <sys/zfs_dir.h> 51168404Spjd#include <sys/zfs_ioctl.h> 52168404Spjd#include <sys/fs/zfs.h> 53168404Spjd#include <sys/dmu.h> 54219089Spjd#include <sys/dmu_objset.h> 55168404Spjd#include <sys/spa.h> 56168404Spjd#include <sys/txg.h> 57168404Spjd#include <sys/dbuf.h> 58168404Spjd#include <sys/zap.h> 59219089Spjd#include <sys/sa.h> 60168404Spjd#include <sys/dirent.h> 61168962Spjd#include <sys/policy.h> 62168962Spjd#include <sys/sunddi.h> 63168404Spjd#include <sys/filio.h> 64209962Smm#include <sys/sid.h> 65168404Spjd#include <sys/zfs_ctldir.h> 66185029Spjd#include <sys/zfs_fuid.h> 67219089Spjd#include <sys/zfs_sa.h> 68168404Spjd#include <sys/dnlc.h> 69168404Spjd#include <sys/zfs_rlock.h> 70185029Spjd#include <sys/extdirent.h> 71185029Spjd#include <sys/kidmap.h> 72168404Spjd#include <sys/bio.h> 73168404Spjd#include <sys/buf.h> 74168404Spjd#include <sys/sched.h> 75192800Strasz#include <sys/acl.h> 76239077Smarius#include <vm/vm_param.h> 77215401Savg#include <vm/vm_pageout.h> 78168404Spjd 79168404Spjd/* 80168404Spjd * Programming rules. 81168404Spjd * 82168404Spjd * Each vnode op performs some logical unit of work. To do this, the ZPL must 83168404Spjd * properly lock its in-core state, create a DMU transaction, do the work, 84168404Spjd * record this work in the intent log (ZIL), commit the DMU transaction, 85185029Spjd * and wait for the intent log to commit if it is a synchronous operation. 86185029Spjd * Moreover, the vnode ops must work in both normal and log replay context. 87168404Spjd * The ordering of events is important to avoid deadlocks and references 88168404Spjd * to freed memory. The example below illustrates the following Big Rules: 89168404Spjd * 90251631Sdelphij * (1) A check must be made in each zfs thread for a mounted file system. 91168404Spjd * This is done avoiding races using ZFS_ENTER(zfsvfs). 92251631Sdelphij * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes 93251631Sdelphij * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros 94251631Sdelphij * can return EIO from the calling function. 95168404Spjd * 96168404Spjd * (2) VN_RELE() should always be the last thing except for zil_commit() 97168404Spjd * (if necessary) and ZFS_EXIT(). This is for 3 reasons: 98168404Spjd * First, if it's the last reference, the vnode/znode 99168404Spjd * can be freed, so the zp may point to freed memory. Second, the last 100168404Spjd * reference will call zfs_zinactive(), which may induce a lot of work -- 101168404Spjd * pushing cached pages (which acquires range locks) and syncing out 102168404Spjd * cached atime changes. Third, zfs_zinactive() may require a new tx, 103168404Spjd * which could deadlock the system if you were already holding one. 104191900Skmacy * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC(). 105168404Spjd * 106168404Spjd * (3) All range locks must be grabbed before calling dmu_tx_assign(), 107168404Spjd * as they can span dmu_tx_assign() calls. 108168404Spjd * 109260776Savg * (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to 110260776Savg * dmu_tx_assign(). This is critical because we don't want to block 111260776Savg * while holding locks. 112168404Spjd * 113260776Savg * If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT. This 114260776Savg * reduces lock contention and CPU usage when we must wait (note that if 115260776Savg * throughput is constrained by the storage, nearly every transaction 116260776Savg * must wait). 117260776Savg * 118260776Savg * Note, in particular, that if a lock is sometimes acquired before 119260776Savg * the tx assigns, and sometimes after (e.g. z_lock), then failing 120260776Savg * to use a non-blocking assign can deadlock the system. The scenario: 121260776Savg * 122168404Spjd * Thread A has grabbed a lock before calling dmu_tx_assign(). 123168404Spjd * Thread B is in an already-assigned tx, and blocks for this lock. 124168404Spjd * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open() 125168404Spjd * forever, because the previous txg can't quiesce until B's tx commits. 126168404Spjd * 127168404Spjd * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, 128260763Savg * then drop all locks, call dmu_tx_wait(), and try again. On subsequent 129260763Savg * calls to dmu_tx_assign(), pass TXG_WAITED rather than TXG_NOWAIT, 130260763Savg * to indicate that this operation has already called dmu_tx_wait(). 131260763Savg * This will ensure that we don't retry forever, waiting a short bit 132260763Savg * each time. 133168404Spjd * 134168404Spjd * (5) If the operation succeeded, generate the intent log entry for it 135168404Spjd * before dropping locks. This ensures that the ordering of events 136168404Spjd * in the intent log matches the order in which they actually occurred. 137251631Sdelphij * During ZIL replay the zfs_log_* functions will update the sequence 138209962Smm * number to indicate the zil transaction has replayed. 139168404Spjd * 140168404Spjd * (6) At the end of each vnode op, the DMU tx must always commit, 141168404Spjd * regardless of whether there were any errors. 142168404Spjd * 143219089Spjd * (7) After dropping all locks, invoke zil_commit(zilog, foid) 144168404Spjd * to ensure that synchronous semantics are provided when necessary. 145168404Spjd * 146168404Spjd * In general, this is how things should be ordered in each vnode op: 147168404Spjd * 148168404Spjd * ZFS_ENTER(zfsvfs); // exit if unmounted 149168404Spjd * top: 150168404Spjd * zfs_dirent_lock(&dl, ...) // lock directory entry (may VN_HOLD()) 151168404Spjd * rw_enter(...); // grab any other locks you need 152168404Spjd * tx = dmu_tx_create(...); // get DMU tx 153168404Spjd * dmu_tx_hold_*(); // hold each object you might modify 154260763Savg * error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 155168404Spjd * if (error) { 156168404Spjd * rw_exit(...); // drop locks 157168404Spjd * zfs_dirent_unlock(dl); // unlock directory entry 158168404Spjd * VN_RELE(...); // release held vnodes 159209962Smm * if (error == ERESTART) { 160260763Savg * waited = B_TRUE; 161168404Spjd * dmu_tx_wait(tx); 162168404Spjd * dmu_tx_abort(tx); 163168404Spjd * goto top; 164168404Spjd * } 165168404Spjd * dmu_tx_abort(tx); // abort DMU tx 166168404Spjd * ZFS_EXIT(zfsvfs); // finished in zfs 167168404Spjd * return (error); // really out of space 168168404Spjd * } 169168404Spjd * error = do_real_work(); // do whatever this VOP does 170168404Spjd * if (error == 0) 171168404Spjd * zfs_log_*(...); // on success, make ZIL entry 172168404Spjd * dmu_tx_commit(tx); // commit DMU tx -- error or not 173168404Spjd * rw_exit(...); // drop locks 174168404Spjd * zfs_dirent_unlock(dl); // unlock directory entry 175168404Spjd * VN_RELE(...); // release held vnodes 176219089Spjd * zil_commit(zilog, foid); // synchronous when necessary 177168404Spjd * ZFS_EXIT(zfsvfs); // finished in zfs 178168404Spjd * return (error); // done, report error 179168404Spjd */ 180185029Spjd 181168404Spjd/* ARGSUSED */ 182168404Spjdstatic int 183185029Spjdzfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 184168404Spjd{ 185168962Spjd znode_t *zp = VTOZ(*vpp); 186209962Smm zfsvfs_t *zfsvfs = zp->z_zfsvfs; 187168404Spjd 188209962Smm ZFS_ENTER(zfsvfs); 189209962Smm ZFS_VERIFY_ZP(zp); 190209962Smm 191219089Spjd if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) && 192185029Spjd ((flag & FAPPEND) == 0)) { 193209962Smm ZFS_EXIT(zfsvfs); 194249195Smm return (SET_ERROR(EPERM)); 195185029Spjd } 196185029Spjd 197185029Spjd if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 198185029Spjd ZTOV(zp)->v_type == VREG && 199219089Spjd !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) { 200209962Smm if (fs_vscan(*vpp, cr, 0) != 0) { 201209962Smm ZFS_EXIT(zfsvfs); 202249195Smm return (SET_ERROR(EACCES)); 203209962Smm } 204209962Smm } 205185029Spjd 206168404Spjd /* Keep a count of the synchronous opens in the znode */ 207168962Spjd if (flag & (FSYNC | FDSYNC)) 208168404Spjd atomic_inc_32(&zp->z_sync_cnt); 209185029Spjd 210209962Smm ZFS_EXIT(zfsvfs); 211168404Spjd return (0); 212168404Spjd} 213168404Spjd 214168404Spjd/* ARGSUSED */ 215168404Spjdstatic int 216185029Spjdzfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 217185029Spjd caller_context_t *ct) 218168404Spjd{ 219168962Spjd znode_t *zp = VTOZ(vp); 220209962Smm zfsvfs_t *zfsvfs = zp->z_zfsvfs; 221168404Spjd 222210470Smm /* 223210470Smm * Clean up any locks held by this process on the vp. 224210470Smm */ 225210470Smm cleanlocks(vp, ddi_get_pid(), 0); 226210470Smm cleanshares(vp, ddi_get_pid()); 227210470Smm 228209962Smm ZFS_ENTER(zfsvfs); 229209962Smm ZFS_VERIFY_ZP(zp); 230209962Smm 231168404Spjd /* Decrement the synchronous opens in the znode */ 232185029Spjd if ((flag & (FSYNC | FDSYNC)) && (count == 1)) 233168404Spjd atomic_dec_32(&zp->z_sync_cnt); 234168404Spjd 235185029Spjd if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 236185029Spjd ZTOV(zp)->v_type == VREG && 237219089Spjd !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) 238185029Spjd VERIFY(fs_vscan(vp, cr, 1) == 0); 239185029Spjd 240209962Smm ZFS_EXIT(zfsvfs); 241168404Spjd return (0); 242168404Spjd} 243168404Spjd 244168404Spjd/* 245168404Spjd * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and 246168404Spjd * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter. 247168404Spjd */ 248168404Spjdstatic int 249168978Spjdzfs_holey(vnode_t *vp, u_long cmd, offset_t *off) 250168404Spjd{ 251168404Spjd znode_t *zp = VTOZ(vp); 252168404Spjd uint64_t noff = (uint64_t)*off; /* new offset */ 253168404Spjd uint64_t file_sz; 254168404Spjd int error; 255168404Spjd boolean_t hole; 256168404Spjd 257219089Spjd file_sz = zp->z_size; 258168404Spjd if (noff >= file_sz) { 259249195Smm return (SET_ERROR(ENXIO)); 260168404Spjd } 261168404Spjd 262168962Spjd if (cmd == _FIO_SEEK_HOLE) 263168404Spjd hole = B_TRUE; 264168404Spjd else 265168404Spjd hole = B_FALSE; 266168404Spjd 267168404Spjd error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff); 268168404Spjd 269272134Sdelphij if (error == ESRCH) 270249195Smm return (SET_ERROR(ENXIO)); 271272134Sdelphij 272272134Sdelphij /* 273272134Sdelphij * We could find a hole that begins after the logical end-of-file, 274272134Sdelphij * because dmu_offset_next() only works on whole blocks. If the 275272134Sdelphij * EOF falls mid-block, then indicate that the "virtual hole" 276272134Sdelphij * at the end of the file begins at the logical EOF, rather than 277272134Sdelphij * at the end of the last block. 278272134Sdelphij */ 279272134Sdelphij if (noff > file_sz) { 280272134Sdelphij ASSERT(hole); 281272134Sdelphij noff = file_sz; 282168404Spjd } 283168404Spjd 284168404Spjd if (noff < *off) 285168404Spjd return (error); 286168404Spjd *off = noff; 287168404Spjd return (error); 288168404Spjd} 289168404Spjd 290168404Spjd/* ARGSUSED */ 291168404Spjdstatic int 292168978Spjdzfs_ioctl(vnode_t *vp, u_long com, intptr_t data, int flag, cred_t *cred, 293185029Spjd int *rvalp, caller_context_t *ct) 294168404Spjd{ 295168962Spjd offset_t off; 296168962Spjd int error; 297168962Spjd zfsvfs_t *zfsvfs; 298185029Spjd znode_t *zp; 299168404Spjd 300168404Spjd switch (com) { 301185029Spjd case _FIOFFS: 302168962Spjd return (0); 303168404Spjd 304168962Spjd /* 305168962Spjd * The following two ioctls are used by bfu. Faking out, 306168962Spjd * necessary to avoid bfu errors. 307168962Spjd */ 308185029Spjd case _FIOGDIO: 309185029Spjd case _FIOSDIO: 310168962Spjd return (0); 311168962Spjd 312185029Spjd case _FIO_SEEK_DATA: 313185029Spjd case _FIO_SEEK_HOLE: 314233918Savg#ifdef sun 315168962Spjd if (ddi_copyin((void *)data, &off, sizeof (off), flag)) 316249195Smm return (SET_ERROR(EFAULT)); 317233918Savg#else 318233918Savg off = *(offset_t *)data; 319233918Savg#endif 320185029Spjd zp = VTOZ(vp); 321185029Spjd zfsvfs = zp->z_zfsvfs; 322168404Spjd ZFS_ENTER(zfsvfs); 323185029Spjd ZFS_VERIFY_ZP(zp); 324168404Spjd 325168404Spjd /* offset parameter is in/out */ 326168404Spjd error = zfs_holey(vp, com, &off); 327168404Spjd ZFS_EXIT(zfsvfs); 328168404Spjd if (error) 329168404Spjd return (error); 330233918Savg#ifdef sun 331168962Spjd if (ddi_copyout(&off, (void *)data, sizeof (off), flag)) 332249195Smm return (SET_ERROR(EFAULT)); 333233918Savg#else 334233918Savg *(offset_t *)data = off; 335233918Savg#endif 336168404Spjd return (0); 337168404Spjd } 338249195Smm return (SET_ERROR(ENOTTY)); 339168404Spjd} 340168404Spjd 341209962Smmstatic vm_page_t 342253953Sattiliopage_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes) 343209962Smm{ 344209962Smm vm_object_t obj; 345209962Smm vm_page_t pp; 346258563Savg int64_t end; 347209962Smm 348258563Savg /* 349258563Savg * At present vm_page_clear_dirty extends the cleared range to DEV_BSIZE 350258563Savg * aligned boundaries, if the range is not aligned. As a result a 351258563Savg * DEV_BSIZE subrange with partially dirty data may get marked as clean. 352258563Savg * It may happen that all DEV_BSIZE subranges are marked clean and thus 353258563Savg * the whole page would be considred clean despite have some dirty data. 354258563Savg * For this reason we should shrink the range to DEV_BSIZE aligned 355258563Savg * boundaries before calling vm_page_clear_dirty. 356258563Savg */ 357258563Savg end = rounddown2(off + nbytes, DEV_BSIZE); 358258563Savg off = roundup2(off, DEV_BSIZE); 359258563Savg nbytes = end - off; 360258563Savg 361209962Smm obj = vp->v_object; 362248084Sattilio zfs_vmobject_assert_wlocked(obj); 363209962Smm 364209962Smm for (;;) { 365209962Smm if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 366246293Savg pp->valid) { 367254138Sattilio if (vm_page_xbusied(pp)) { 368212652Savg /* 369212652Savg * Reference the page before unlocking and 370212652Savg * sleeping so that the page daemon is less 371212652Savg * likely to reclaim it. 372212652Savg */ 373225418Skib vm_page_reference(pp); 374254138Sattilio vm_page_lock(pp); 375254138Sattilio zfs_vmobject_wunlock(obj); 376254138Sattilio vm_page_busy_sleep(pp, "zfsmwb"); 377254138Sattilio zfs_vmobject_wlock(obj); 378209962Smm continue; 379212652Savg } 380254138Sattilio vm_page_sbusy(pp); 381252337Sgavin } else if (pp == NULL) { 382246293Savg pp = vm_page_alloc(obj, OFF_TO_IDX(start), 383246293Savg VM_ALLOC_SYSTEM | VM_ALLOC_IFCACHED | 384254138Sattilio VM_ALLOC_SBUSY); 385252337Sgavin } else { 386252337Sgavin ASSERT(pp != NULL && !pp->valid); 387252337Sgavin pp = NULL; 388209962Smm } 389246293Savg 390246293Savg if (pp != NULL) { 391246293Savg ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 392253953Sattilio vm_object_pip_add(obj, 1); 393246293Savg pmap_remove_write(pp); 394258563Savg if (nbytes != 0) 395258563Savg vm_page_clear_dirty(pp, off, nbytes); 396246293Savg } 397209962Smm break; 398209962Smm } 399209962Smm return (pp); 400209962Smm} 401209962Smm 402209962Smmstatic void 403253953Sattiliopage_unbusy(vm_page_t pp) 404209962Smm{ 405209962Smm 406254138Sattilio vm_page_sunbusy(pp); 407253953Sattilio vm_object_pip_subtract(pp->object, 1); 408209962Smm} 409209962Smm 410253953Sattiliostatic vm_page_t 411253953Sattiliopage_hold(vnode_t *vp, int64_t start) 412253953Sattilio{ 413253953Sattilio vm_object_t obj; 414253953Sattilio vm_page_t pp; 415253953Sattilio 416253953Sattilio obj = vp->v_object; 417253953Sattilio zfs_vmobject_assert_wlocked(obj); 418253953Sattilio 419253953Sattilio for (;;) { 420253953Sattilio if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 421253953Sattilio pp->valid) { 422254138Sattilio if (vm_page_xbusied(pp)) { 423253953Sattilio /* 424253953Sattilio * Reference the page before unlocking and 425253953Sattilio * sleeping so that the page daemon is less 426253953Sattilio * likely to reclaim it. 427253953Sattilio */ 428253953Sattilio vm_page_reference(pp); 429254138Sattilio vm_page_lock(pp); 430254138Sattilio zfs_vmobject_wunlock(obj); 431254138Sattilio vm_page_busy_sleep(pp, "zfsmwb"); 432254138Sattilio zfs_vmobject_wlock(obj); 433253953Sattilio continue; 434253953Sattilio } 435253953Sattilio 436253953Sattilio ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 437253953Sattilio vm_page_lock(pp); 438253953Sattilio vm_page_hold(pp); 439253953Sattilio vm_page_unlock(pp); 440253953Sattilio 441253953Sattilio } else 442253953Sattilio pp = NULL; 443253953Sattilio break; 444253953Sattilio } 445253953Sattilio return (pp); 446253953Sattilio} 447253953Sattilio 448253953Sattiliostatic void 449253953Sattiliopage_unhold(vm_page_t pp) 450253953Sattilio{ 451253953Sattilio 452253953Sattilio vm_page_lock(pp); 453253953Sattilio vm_page_unhold(pp); 454253953Sattilio vm_page_unlock(pp); 455253953Sattilio} 456253953Sattilio 457168404Spjd/* 458168404Spjd * When a file is memory mapped, we must keep the IO data synchronized 459168404Spjd * between the DMU cache and the memory mapped pages. What this means: 460168404Spjd * 461168404Spjd * On Write: If we find a memory mapped page, we write to *both* 462168404Spjd * the page and the dmu buffer. 463168404Spjd */ 464209962Smmstatic void 465209962Smmupdate_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid, 466209962Smm int segflg, dmu_tx_t *tx) 467168404Spjd{ 468168404Spjd vm_object_t obj; 469168404Spjd struct sf_buf *sf; 470246293Savg caddr_t va; 471212655Savg int off; 472168404Spjd 473260786Savg ASSERT(segflg != UIO_NOCOPY); 474168404Spjd ASSERT(vp->v_mount != NULL); 475168404Spjd obj = vp->v_object; 476168404Spjd ASSERT(obj != NULL); 477168404Spjd 478168404Spjd off = start & PAGEOFFSET; 479248084Sattilio zfs_vmobject_wlock(obj); 480168404Spjd for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 481209962Smm vm_page_t pp; 482246293Savg int nbytes = imin(PAGESIZE - off, len); 483168404Spjd 484260786Savg if ((pp = page_busy(vp, start, off, nbytes)) != NULL) { 485248084Sattilio zfs_vmobject_wunlock(obj); 486168404Spjd 487246293Savg va = zfs_map_page(pp, &sf); 488246293Savg (void) dmu_read(os, oid, start+off, nbytes, 489246293Savg va+off, DMU_READ_PREFETCH);; 490209962Smm zfs_unmap_page(sf); 491246293Savg 492248084Sattilio zfs_vmobject_wlock(obj); 493253953Sattilio page_unbusy(pp); 494168404Spjd } 495209962Smm len -= nbytes; 496168404Spjd off = 0; 497168404Spjd } 498260786Savg vm_object_pip_wakeupn(obj, 0); 499248084Sattilio zfs_vmobject_wunlock(obj); 500168404Spjd} 501168404Spjd 502168404Spjd/* 503219089Spjd * Read with UIO_NOCOPY flag means that sendfile(2) requests 504219089Spjd * ZFS to populate a range of page cache pages with data. 505219089Spjd * 506219089Spjd * NOTE: this function could be optimized to pre-allocate 507254138Sattilio * all pages in advance, drain exclusive busy on all of them, 508219089Spjd * map them into contiguous KVA region and populate them 509219089Spjd * in one single dmu_read() call. 510219089Spjd */ 511219089Spjdstatic int 512219089Spjdmappedread_sf(vnode_t *vp, int nbytes, uio_t *uio) 513219089Spjd{ 514219089Spjd znode_t *zp = VTOZ(vp); 515219089Spjd objset_t *os = zp->z_zfsvfs->z_os; 516219089Spjd struct sf_buf *sf; 517219089Spjd vm_object_t obj; 518219089Spjd vm_page_t pp; 519219089Spjd int64_t start; 520219089Spjd caddr_t va; 521219089Spjd int len = nbytes; 522219089Spjd int off; 523219089Spjd int error = 0; 524219089Spjd 525219089Spjd ASSERT(uio->uio_segflg == UIO_NOCOPY); 526219089Spjd ASSERT(vp->v_mount != NULL); 527219089Spjd obj = vp->v_object; 528219089Spjd ASSERT(obj != NULL); 529219089Spjd ASSERT((uio->uio_loffset & PAGEOFFSET) == 0); 530219089Spjd 531248084Sattilio zfs_vmobject_wlock(obj); 532219089Spjd for (start = uio->uio_loffset; len > 0; start += PAGESIZE) { 533219089Spjd int bytes = MIN(PAGESIZE, len); 534219089Spjd 535254138Sattilio pp = vm_page_grab(obj, OFF_TO_IDX(start), VM_ALLOC_SBUSY | 536254649Skib VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY); 537219089Spjd if (pp->valid == 0) { 538248084Sattilio zfs_vmobject_wunlock(obj); 539219089Spjd va = zfs_map_page(pp, &sf); 540219089Spjd error = dmu_read(os, zp->z_id, start, bytes, va, 541219089Spjd DMU_READ_PREFETCH); 542219089Spjd if (bytes != PAGESIZE && error == 0) 543219089Spjd bzero(va + bytes, PAGESIZE - bytes); 544219089Spjd zfs_unmap_page(sf); 545248084Sattilio zfs_vmobject_wlock(obj); 546254138Sattilio vm_page_sunbusy(pp); 547219089Spjd vm_page_lock(pp); 548219089Spjd if (error) { 549253073Savg if (pp->wire_count == 0 && pp->valid == 0 && 550254138Sattilio !vm_page_busied(pp)) 551253073Savg vm_page_free(pp); 552219089Spjd } else { 553219089Spjd pp->valid = VM_PAGE_BITS_ALL; 554219089Spjd vm_page_activate(pp); 555219089Spjd } 556219089Spjd vm_page_unlock(pp); 557260773Savg } else { 558260773Savg ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 559254138Sattilio vm_page_sunbusy(pp); 560260773Savg } 561219089Spjd if (error) 562219089Spjd break; 563219089Spjd uio->uio_resid -= bytes; 564219089Spjd uio->uio_offset += bytes; 565219089Spjd len -= bytes; 566219089Spjd } 567248084Sattilio zfs_vmobject_wunlock(obj); 568219089Spjd return (error); 569219089Spjd} 570219089Spjd 571219089Spjd/* 572168404Spjd * When a file is memory mapped, we must keep the IO data synchronized 573168404Spjd * between the DMU cache and the memory mapped pages. What this means: 574168404Spjd * 575168404Spjd * On Read: We "read" preferentially from memory mapped pages, 576168404Spjd * else we default from the dmu buffer. 577168404Spjd * 578168404Spjd * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when 579251631Sdelphij * the file is memory mapped. 580168404Spjd */ 581168404Spjdstatic int 582168404Spjdmappedread(vnode_t *vp, int nbytes, uio_t *uio) 583168404Spjd{ 584168404Spjd znode_t *zp = VTOZ(vp); 585168404Spjd objset_t *os = zp->z_zfsvfs->z_os; 586168404Spjd vm_object_t obj; 587212655Savg int64_t start; 588168926Spjd caddr_t va; 589168404Spjd int len = nbytes; 590212655Savg int off; 591168404Spjd int error = 0; 592168404Spjd 593168404Spjd ASSERT(vp->v_mount != NULL); 594168404Spjd obj = vp->v_object; 595168404Spjd ASSERT(obj != NULL); 596168404Spjd 597168404Spjd start = uio->uio_loffset; 598168404Spjd off = start & PAGEOFFSET; 599248084Sattilio zfs_vmobject_wlock(obj); 600168404Spjd for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 601219089Spjd vm_page_t pp; 602219089Spjd uint64_t bytes = MIN(PAGESIZE - off, len); 603168404Spjd 604253953Sattilio if (pp = page_hold(vp, start)) { 605219089Spjd struct sf_buf *sf; 606219089Spjd caddr_t va; 607212652Savg 608248084Sattilio zfs_vmobject_wunlock(obj); 609219089Spjd va = zfs_map_page(pp, &sf); 610219089Spjd error = uiomove(va + off, bytes, UIO_READ, uio); 611219089Spjd zfs_unmap_page(sf); 612248084Sattilio zfs_vmobject_wlock(obj); 613253953Sattilio page_unhold(pp); 614219089Spjd } else { 615248084Sattilio zfs_vmobject_wunlock(obj); 616219089Spjd error = dmu_read_uio(os, zp->z_id, uio, bytes); 617248084Sattilio zfs_vmobject_wlock(obj); 618168404Spjd } 619168404Spjd len -= bytes; 620168404Spjd off = 0; 621168404Spjd if (error) 622168404Spjd break; 623168404Spjd } 624248084Sattilio zfs_vmobject_wunlock(obj); 625168404Spjd return (error); 626168404Spjd} 627168404Spjd 628168404Spjdoffset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */ 629168404Spjd 630168404Spjd/* 631168404Spjd * Read bytes from specified file into supplied buffer. 632168404Spjd * 633168404Spjd * IN: vp - vnode of file to be read from. 634168404Spjd * uio - structure supplying read location, range info, 635168404Spjd * and return buffer. 636168404Spjd * ioflag - SYNC flags; used to provide FRSYNC semantics. 637168404Spjd * cr - credentials of caller. 638185029Spjd * ct - caller context 639168404Spjd * 640168404Spjd * OUT: uio - updated offset and range, buffer filled. 641168404Spjd * 642251631Sdelphij * RETURN: 0 on success, error code on failure. 643168404Spjd * 644168404Spjd * Side Effects: 645168404Spjd * vp - atime updated if byte count > 0 646168404Spjd */ 647168404Spjd/* ARGSUSED */ 648168404Spjdstatic int 649168962Spjdzfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 650168404Spjd{ 651168404Spjd znode_t *zp = VTOZ(vp); 652168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 653185029Spjd objset_t *os; 654168404Spjd ssize_t n, nbytes; 655247187Smm int error = 0; 656168404Spjd rl_t *rl; 657219089Spjd xuio_t *xuio = NULL; 658168404Spjd 659168404Spjd ZFS_ENTER(zfsvfs); 660185029Spjd ZFS_VERIFY_ZP(zp); 661185029Spjd os = zfsvfs->z_os; 662168404Spjd 663219089Spjd if (zp->z_pflags & ZFS_AV_QUARANTINED) { 664185029Spjd ZFS_EXIT(zfsvfs); 665249195Smm return (SET_ERROR(EACCES)); 666185029Spjd } 667185029Spjd 668168404Spjd /* 669168404Spjd * Validate file offset 670168404Spjd */ 671168404Spjd if (uio->uio_loffset < (offset_t)0) { 672168404Spjd ZFS_EXIT(zfsvfs); 673249195Smm return (SET_ERROR(EINVAL)); 674168404Spjd } 675168404Spjd 676168404Spjd /* 677168404Spjd * Fasttrack empty reads 678168404Spjd */ 679168404Spjd if (uio->uio_resid == 0) { 680168404Spjd ZFS_EXIT(zfsvfs); 681168404Spjd return (0); 682168404Spjd } 683168404Spjd 684168404Spjd /* 685168962Spjd * Check for mandatory locks 686168962Spjd */ 687219089Spjd if (MANDMODE(zp->z_mode)) { 688168962Spjd if (error = chklock(vp, FREAD, 689168962Spjd uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) { 690168962Spjd ZFS_EXIT(zfsvfs); 691168962Spjd return (error); 692168962Spjd } 693168962Spjd } 694168962Spjd 695168962Spjd /* 696168404Spjd * If we're in FRSYNC mode, sync out this znode before reading it. 697168404Spjd */ 698224605Smm if (zfsvfs->z_log && 699224605Smm (ioflag & FRSYNC || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)) 700219089Spjd zil_commit(zfsvfs->z_log, zp->z_id); 701168404Spjd 702168404Spjd /* 703168404Spjd * Lock the range against changes. 704168404Spjd */ 705168404Spjd rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER); 706168404Spjd 707168404Spjd /* 708168404Spjd * If we are reading past end-of-file we can skip 709168404Spjd * to the end; but we might still need to set atime. 710168404Spjd */ 711219089Spjd if (uio->uio_loffset >= zp->z_size) { 712168404Spjd error = 0; 713168404Spjd goto out; 714168404Spjd } 715168404Spjd 716219089Spjd ASSERT(uio->uio_loffset < zp->z_size); 717219089Spjd n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset); 718168404Spjd 719219089Spjd#ifdef sun 720219089Spjd if ((uio->uio_extflg == UIO_XUIO) && 721219089Spjd (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) { 722219089Spjd int nblk; 723219089Spjd int blksz = zp->z_blksz; 724219089Spjd uint64_t offset = uio->uio_loffset; 725219089Spjd 726219089Spjd xuio = (xuio_t *)uio; 727219089Spjd if ((ISP2(blksz))) { 728219089Spjd nblk = (P2ROUNDUP(offset + n, blksz) - P2ALIGN(offset, 729219089Spjd blksz)) / blksz; 730219089Spjd } else { 731219089Spjd ASSERT(offset + n <= blksz); 732219089Spjd nblk = 1; 733219089Spjd } 734219089Spjd (void) dmu_xuio_init(xuio, nblk); 735219089Spjd 736219089Spjd if (vn_has_cached_data(vp)) { 737219089Spjd /* 738219089Spjd * For simplicity, we always allocate a full buffer 739219089Spjd * even if we only expect to read a portion of a block. 740219089Spjd */ 741219089Spjd while (--nblk >= 0) { 742219089Spjd (void) dmu_xuio_add(xuio, 743219089Spjd dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 744219089Spjd blksz), 0, blksz); 745219089Spjd } 746219089Spjd } 747219089Spjd } 748219089Spjd#endif /* sun */ 749219089Spjd 750168404Spjd while (n > 0) { 751168404Spjd nbytes = MIN(n, zfs_read_chunk_size - 752168404Spjd P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); 753168404Spjd 754219089Spjd#ifdef __FreeBSD__ 755219089Spjd if (uio->uio_segflg == UIO_NOCOPY) 756219089Spjd error = mappedread_sf(vp, nbytes, uio); 757219089Spjd else 758219089Spjd#endif /* __FreeBSD__ */ 759168404Spjd if (vn_has_cached_data(vp)) 760168404Spjd error = mappedread(vp, nbytes, uio); 761168404Spjd else 762168404Spjd error = dmu_read_uio(os, zp->z_id, uio, nbytes); 763185029Spjd if (error) { 764185029Spjd /* convert checksum errors into IO errors */ 765185029Spjd if (error == ECKSUM) 766249195Smm error = SET_ERROR(EIO); 767168404Spjd break; 768185029Spjd } 769168962Spjd 770168404Spjd n -= nbytes; 771168404Spjd } 772168404Spjdout: 773168404Spjd zfs_range_unlock(rl); 774168404Spjd 775168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 776168404Spjd ZFS_EXIT(zfsvfs); 777168404Spjd return (error); 778168404Spjd} 779168404Spjd 780168404Spjd/* 781168404Spjd * Write the bytes to a file. 782168404Spjd * 783168404Spjd * IN: vp - vnode of file to be written to. 784168404Spjd * uio - structure supplying write location, range info, 785168404Spjd * and data buffer. 786251631Sdelphij * ioflag - FAPPEND, FSYNC, and/or FDSYNC. FAPPEND is 787251631Sdelphij * set if in append mode. 788168404Spjd * cr - credentials of caller. 789185029Spjd * ct - caller context (NFS/CIFS fem monitor only) 790168404Spjd * 791168404Spjd * OUT: uio - updated offset and range. 792168404Spjd * 793251631Sdelphij * RETURN: 0 on success, error code on failure. 794168404Spjd * 795168404Spjd * Timestamps: 796168404Spjd * vp - ctime|mtime updated if byte count > 0 797168404Spjd */ 798219089Spjd 799168404Spjd/* ARGSUSED */ 800168404Spjdstatic int 801168962Spjdzfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 802168404Spjd{ 803168404Spjd znode_t *zp = VTOZ(vp); 804168962Spjd rlim64_t limit = MAXOFFSET_T; 805168404Spjd ssize_t start_resid = uio->uio_resid; 806168404Spjd ssize_t tx_bytes; 807168404Spjd uint64_t end_size; 808168404Spjd dmu_tx_t *tx; 809168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 810185029Spjd zilog_t *zilog; 811168404Spjd offset_t woff; 812168404Spjd ssize_t n, nbytes; 813168404Spjd rl_t *rl; 814168404Spjd int max_blksz = zfsvfs->z_max_blksz; 815247187Smm int error = 0; 816209962Smm arc_buf_t *abuf; 817247187Smm iovec_t *aiov = NULL; 818219089Spjd xuio_t *xuio = NULL; 819219089Spjd int i_iov = 0; 820219089Spjd int iovcnt = uio->uio_iovcnt; 821219089Spjd iovec_t *iovp = uio->uio_iov; 822219089Spjd int write_eof; 823219089Spjd int count = 0; 824219089Spjd sa_bulk_attr_t bulk[4]; 825219089Spjd uint64_t mtime[2], ctime[2]; 826168404Spjd 827168404Spjd /* 828168404Spjd * Fasttrack empty write 829168404Spjd */ 830168404Spjd n = start_resid; 831168404Spjd if (n == 0) 832168404Spjd return (0); 833168404Spjd 834168962Spjd if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) 835168962Spjd limit = MAXOFFSET_T; 836168962Spjd 837168404Spjd ZFS_ENTER(zfsvfs); 838185029Spjd ZFS_VERIFY_ZP(zp); 839168404Spjd 840219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 841219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 842219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 843219089Spjd &zp->z_size, 8); 844219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 845219089Spjd &zp->z_pflags, 8); 846219089Spjd 847168404Spjd /* 848185029Spjd * If immutable or not appending then return EPERM 849185029Spjd */ 850219089Spjd if ((zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) || 851219089Spjd ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) && 852219089Spjd (uio->uio_loffset < zp->z_size))) { 853185029Spjd ZFS_EXIT(zfsvfs); 854249195Smm return (SET_ERROR(EPERM)); 855185029Spjd } 856185029Spjd 857185029Spjd zilog = zfsvfs->z_log; 858185029Spjd 859185029Spjd /* 860219089Spjd * Validate file offset 861219089Spjd */ 862219089Spjd woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset; 863219089Spjd if (woff < 0) { 864219089Spjd ZFS_EXIT(zfsvfs); 865249195Smm return (SET_ERROR(EINVAL)); 866219089Spjd } 867219089Spjd 868219089Spjd /* 869219089Spjd * Check for mandatory locks before calling zfs_range_lock() 870219089Spjd * in order to prevent a deadlock with locks set via fcntl(). 871219089Spjd */ 872219089Spjd if (MANDMODE((mode_t)zp->z_mode) && 873219089Spjd (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) { 874219089Spjd ZFS_EXIT(zfsvfs); 875219089Spjd return (error); 876219089Spjd } 877219089Spjd 878219089Spjd#ifdef sun 879219089Spjd /* 880168404Spjd * Pre-fault the pages to ensure slow (eg NFS) pages 881168404Spjd * don't hold up txg. 882219089Spjd * Skip this if uio contains loaned arc_buf. 883168404Spjd */ 884219089Spjd if ((uio->uio_extflg == UIO_XUIO) && 885219089Spjd (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) 886219089Spjd xuio = (xuio_t *)uio; 887219089Spjd else 888219089Spjd uio_prefaultpages(MIN(n, max_blksz), uio); 889219089Spjd#endif /* sun */ 890168404Spjd 891168404Spjd /* 892168404Spjd * If in append mode, set the io offset pointer to eof. 893168404Spjd */ 894213673Spjd if (ioflag & FAPPEND) { 895168404Spjd /* 896219089Spjd * Obtain an appending range lock to guarantee file append 897219089Spjd * semantics. We reset the write offset once we have the lock. 898168404Spjd */ 899168404Spjd rl = zfs_range_lock(zp, 0, n, RL_APPEND); 900219089Spjd woff = rl->r_off; 901168404Spjd if (rl->r_len == UINT64_MAX) { 902219089Spjd /* 903219089Spjd * We overlocked the file because this write will cause 904219089Spjd * the file block size to increase. 905219089Spjd * Note that zp_size cannot change with this lock held. 906219089Spjd */ 907219089Spjd woff = zp->z_size; 908168404Spjd } 909219089Spjd uio->uio_loffset = woff; 910168404Spjd } else { 911168404Spjd /* 912219089Spjd * Note that if the file block size will change as a result of 913219089Spjd * this write, then this range lock will lock the entire file 914219089Spjd * so that we can re-write the block safely. 915168404Spjd */ 916168404Spjd rl = zfs_range_lock(zp, woff, n, RL_WRITER); 917168404Spjd } 918168404Spjd 919235781Strasz if (vn_rlimit_fsize(vp, uio, uio->uio_td)) { 920235781Strasz zfs_range_unlock(rl); 921235781Strasz ZFS_EXIT(zfsvfs); 922235781Strasz return (EFBIG); 923235781Strasz } 924235781Strasz 925168962Spjd if (woff >= limit) { 926168962Spjd zfs_range_unlock(rl); 927168962Spjd ZFS_EXIT(zfsvfs); 928249195Smm return (SET_ERROR(EFBIG)); 929168962Spjd } 930168962Spjd 931168962Spjd if ((woff + n) > limit || woff > (limit - n)) 932168962Spjd n = limit - woff; 933168962Spjd 934219089Spjd /* Will this write extend the file length? */ 935219089Spjd write_eof = (woff + n > zp->z_size); 936168404Spjd 937219089Spjd end_size = MAX(zp->z_size, woff + n); 938219089Spjd 939168404Spjd /* 940168404Spjd * Write the file in reasonable size chunks. Each chunk is written 941168404Spjd * in a separate transaction; this keeps the intent log records small 942168404Spjd * and allows us to do more fine-grained space accounting. 943168404Spjd */ 944168404Spjd while (n > 0) { 945209962Smm abuf = NULL; 946209962Smm woff = uio->uio_loffset; 947219089Spjd if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 948219089Spjd zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 949209962Smm if (abuf != NULL) 950209962Smm dmu_return_arcbuf(abuf); 951249195Smm error = SET_ERROR(EDQUOT); 952209962Smm break; 953209962Smm } 954209962Smm 955219089Spjd if (xuio && abuf == NULL) { 956219089Spjd ASSERT(i_iov < iovcnt); 957219089Spjd aiov = &iovp[i_iov]; 958219089Spjd abuf = dmu_xuio_arcbuf(xuio, i_iov); 959219089Spjd dmu_xuio_clear(xuio, i_iov); 960219089Spjd DTRACE_PROBE3(zfs_cp_write, int, i_iov, 961219089Spjd iovec_t *, aiov, arc_buf_t *, abuf); 962219089Spjd ASSERT((aiov->iov_base == abuf->b_data) || 963219089Spjd ((char *)aiov->iov_base - (char *)abuf->b_data + 964219089Spjd aiov->iov_len == arc_buf_size(abuf))); 965219089Spjd i_iov++; 966219089Spjd } else if (abuf == NULL && n >= max_blksz && 967219089Spjd woff >= zp->z_size && 968209962Smm P2PHASE(woff, max_blksz) == 0 && 969209962Smm zp->z_blksz == max_blksz) { 970219089Spjd /* 971219089Spjd * This write covers a full block. "Borrow" a buffer 972219089Spjd * from the dmu so that we can fill it before we enter 973219089Spjd * a transaction. This avoids the possibility of 974219089Spjd * holding up the transaction if the data copy hangs 975219089Spjd * up on a pagefault (e.g., from an NFS server mapping). 976219089Spjd */ 977209962Smm size_t cbytes; 978209962Smm 979219089Spjd abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 980219089Spjd max_blksz); 981209962Smm ASSERT(abuf != NULL); 982209962Smm ASSERT(arc_buf_size(abuf) == max_blksz); 983209962Smm if (error = uiocopy(abuf->b_data, max_blksz, 984209962Smm UIO_WRITE, uio, &cbytes)) { 985209962Smm dmu_return_arcbuf(abuf); 986209962Smm break; 987209962Smm } 988209962Smm ASSERT(cbytes == max_blksz); 989209962Smm } 990209962Smm 991209962Smm /* 992168404Spjd * Start a transaction. 993168404Spjd */ 994168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 995219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 996168404Spjd dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); 997219089Spjd zfs_sa_upgrade_txholds(tx, zp); 998260776Savg error = dmu_tx_assign(tx, TXG_WAIT); 999168404Spjd if (error) { 1000168404Spjd dmu_tx_abort(tx); 1001209962Smm if (abuf != NULL) 1002209962Smm dmu_return_arcbuf(abuf); 1003168404Spjd break; 1004168404Spjd } 1005168404Spjd 1006168404Spjd /* 1007168404Spjd * If zfs_range_lock() over-locked we grow the blocksize 1008168404Spjd * and then reduce the lock range. This will only happen 1009168404Spjd * on the first iteration since zfs_range_reduce() will 1010168404Spjd * shrink down r_len to the appropriate size. 1011168404Spjd */ 1012168404Spjd if (rl->r_len == UINT64_MAX) { 1013168404Spjd uint64_t new_blksz; 1014168404Spjd 1015168404Spjd if (zp->z_blksz > max_blksz) { 1016168404Spjd ASSERT(!ISP2(zp->z_blksz)); 1017168404Spjd new_blksz = MIN(end_size, SPA_MAXBLOCKSIZE); 1018168404Spjd } else { 1019168404Spjd new_blksz = MIN(end_size, max_blksz); 1020168404Spjd } 1021168404Spjd zfs_grow_blocksize(zp, new_blksz, tx); 1022168404Spjd zfs_range_reduce(rl, woff, n); 1023168404Spjd } 1024168404Spjd 1025168404Spjd /* 1026168404Spjd * XXX - should we really limit each write to z_max_blksz? 1027168404Spjd * Perhaps we should use SPA_MAXBLOCKSIZE chunks? 1028168404Spjd */ 1029168404Spjd nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz)); 1030168404Spjd 1031219089Spjd if (woff + nbytes > zp->z_size) 1032168404Spjd vnode_pager_setsize(vp, woff + nbytes); 1033168404Spjd 1034209962Smm if (abuf == NULL) { 1035209962Smm tx_bytes = uio->uio_resid; 1036219089Spjd error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl), 1037219089Spjd uio, nbytes, tx); 1038209962Smm tx_bytes -= uio->uio_resid; 1039168404Spjd } else { 1040209962Smm tx_bytes = nbytes; 1041219089Spjd ASSERT(xuio == NULL || tx_bytes == aiov->iov_len); 1042219089Spjd /* 1043219089Spjd * If this is not a full block write, but we are 1044219089Spjd * extending the file past EOF and this data starts 1045219089Spjd * block-aligned, use assign_arcbuf(). Otherwise, 1046219089Spjd * write via dmu_write(). 1047219089Spjd */ 1048219089Spjd if (tx_bytes < max_blksz && (!write_eof || 1049219089Spjd aiov->iov_base != abuf->b_data)) { 1050219089Spjd ASSERT(xuio); 1051219089Spjd dmu_write(zfsvfs->z_os, zp->z_id, woff, 1052219089Spjd aiov->iov_len, aiov->iov_base, tx); 1053219089Spjd dmu_return_arcbuf(abuf); 1054219089Spjd xuio_stat_wbuf_copied(); 1055219089Spjd } else { 1056219089Spjd ASSERT(xuio || tx_bytes == max_blksz); 1057219089Spjd dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl), 1058219089Spjd woff, abuf, tx); 1059219089Spjd } 1060209962Smm ASSERT(tx_bytes <= uio->uio_resid); 1061209962Smm uioskip(uio, tx_bytes); 1062168404Spjd } 1063212657Savg if (tx_bytes && vn_has_cached_data(vp)) { 1064209962Smm update_pages(vp, woff, tx_bytes, zfsvfs->z_os, 1065209962Smm zp->z_id, uio->uio_segflg, tx); 1066209962Smm } 1067209962Smm 1068209962Smm /* 1069168404Spjd * If we made no progress, we're done. If we made even 1070168404Spjd * partial progress, update the znode and ZIL accordingly. 1071168404Spjd */ 1072168404Spjd if (tx_bytes == 0) { 1073219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 1074219089Spjd (void *)&zp->z_size, sizeof (uint64_t), tx); 1075168404Spjd dmu_tx_commit(tx); 1076168404Spjd ASSERT(error != 0); 1077168404Spjd break; 1078168404Spjd } 1079168404Spjd 1080168404Spjd /* 1081168404Spjd * Clear Set-UID/Set-GID bits on successful write if not 1082168404Spjd * privileged and at least one of the excute bits is set. 1083168404Spjd * 1084168404Spjd * It would be nice to to this after all writes have 1085168404Spjd * been done, but that would still expose the ISUID/ISGID 1086168404Spjd * to another app after the partial write is committed. 1087185029Spjd * 1088185029Spjd * Note: we don't call zfs_fuid_map_id() here because 1089185029Spjd * user 0 is not an ephemeral uid. 1090168404Spjd */ 1091168404Spjd mutex_enter(&zp->z_acl_lock); 1092219089Spjd if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) | 1093168404Spjd (S_IXUSR >> 6))) != 0 && 1094219089Spjd (zp->z_mode & (S_ISUID | S_ISGID)) != 0 && 1095185029Spjd secpolicy_vnode_setid_retain(vp, cr, 1096219089Spjd (zp->z_mode & S_ISUID) != 0 && zp->z_uid == 0) != 0) { 1097219089Spjd uint64_t newmode; 1098219089Spjd zp->z_mode &= ~(S_ISUID | S_ISGID); 1099219089Spjd newmode = zp->z_mode; 1100219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), 1101219089Spjd (void *)&newmode, sizeof (uint64_t), tx); 1102168404Spjd } 1103168404Spjd mutex_exit(&zp->z_acl_lock); 1104168404Spjd 1105219089Spjd zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 1106219089Spjd B_TRUE); 1107168404Spjd 1108168404Spjd /* 1109168404Spjd * Update the file size (zp_size) if it has changed; 1110168404Spjd * account for possible concurrent updates. 1111168404Spjd */ 1112219089Spjd while ((end_size = zp->z_size) < uio->uio_loffset) { 1113219089Spjd (void) atomic_cas_64(&zp->z_size, end_size, 1114168404Spjd uio->uio_loffset); 1115219089Spjd ASSERT(error == 0); 1116219089Spjd } 1117219089Spjd /* 1118219089Spjd * If we are replaying and eof is non zero then force 1119219089Spjd * the file size to the specified eof. Note, there's no 1120219089Spjd * concurrency during replay. 1121219089Spjd */ 1122219089Spjd if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0) 1123219089Spjd zp->z_size = zfsvfs->z_replay_eof; 1124219089Spjd 1125219089Spjd error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 1126219089Spjd 1127168404Spjd zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag); 1128168404Spjd dmu_tx_commit(tx); 1129168404Spjd 1130168404Spjd if (error != 0) 1131168404Spjd break; 1132168404Spjd ASSERT(tx_bytes == nbytes); 1133168404Spjd n -= nbytes; 1134219089Spjd 1135219089Spjd#ifdef sun 1136219089Spjd if (!xuio && n > 0) 1137219089Spjd uio_prefaultpages(MIN(n, max_blksz), uio); 1138219089Spjd#endif /* sun */ 1139168404Spjd } 1140168404Spjd 1141168404Spjd zfs_range_unlock(rl); 1142168404Spjd 1143168404Spjd /* 1144168404Spjd * If we're in replay mode, or we made no progress, return error. 1145168404Spjd * Otherwise, it's at least a partial write, so it's successful. 1146168404Spjd */ 1147209962Smm if (zfsvfs->z_replay || uio->uio_resid == start_resid) { 1148168404Spjd ZFS_EXIT(zfsvfs); 1149168404Spjd return (error); 1150168404Spjd } 1151168404Spjd 1152219089Spjd if (ioflag & (FSYNC | FDSYNC) || 1153219089Spjd zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1154219089Spjd zil_commit(zilog, zp->z_id); 1155168404Spjd 1156168404Spjd ZFS_EXIT(zfsvfs); 1157168404Spjd return (0); 1158168404Spjd} 1159168404Spjd 1160168404Spjdvoid 1161219089Spjdzfs_get_done(zgd_t *zgd, int error) 1162168404Spjd{ 1163219089Spjd znode_t *zp = zgd->zgd_private; 1164219089Spjd objset_t *os = zp->z_zfsvfs->z_os; 1165168404Spjd 1166219089Spjd if (zgd->zgd_db) 1167219089Spjd dmu_buf_rele(zgd->zgd_db, zgd); 1168219089Spjd 1169219089Spjd zfs_range_unlock(zgd->zgd_rl); 1170219089Spjd 1171191900Skmacy /* 1172191900Skmacy * Release the vnode asynchronously as we currently have the 1173191900Skmacy * txg stopped from syncing. 1174191900Skmacy */ 1175219089Spjd VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1176219089Spjd 1177219089Spjd if (error == 0 && zgd->zgd_bp) 1178219089Spjd zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); 1179219089Spjd 1180168404Spjd kmem_free(zgd, sizeof (zgd_t)); 1181168404Spjd} 1182168404Spjd 1183214378Smm#ifdef DEBUG 1184214378Smmstatic int zil_fault_io = 0; 1185214378Smm#endif 1186214378Smm 1187168404Spjd/* 1188168404Spjd * Get data to generate a TX_WRITE intent log record. 1189168404Spjd */ 1190168404Spjdint 1191168404Spjdzfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) 1192168404Spjd{ 1193168404Spjd zfsvfs_t *zfsvfs = arg; 1194168404Spjd objset_t *os = zfsvfs->z_os; 1195168404Spjd znode_t *zp; 1196219089Spjd uint64_t object = lr->lr_foid; 1197219089Spjd uint64_t offset = lr->lr_offset; 1198219089Spjd uint64_t size = lr->lr_length; 1199219089Spjd blkptr_t *bp = &lr->lr_blkptr; 1200168404Spjd dmu_buf_t *db; 1201168404Spjd zgd_t *zgd; 1202168404Spjd int error = 0; 1203168404Spjd 1204219089Spjd ASSERT(zio != NULL); 1205219089Spjd ASSERT(size != 0); 1206168404Spjd 1207168404Spjd /* 1208168404Spjd * Nothing to do if the file has been removed 1209168404Spjd */ 1210219089Spjd if (zfs_zget(zfsvfs, object, &zp) != 0) 1211249195Smm return (SET_ERROR(ENOENT)); 1212168404Spjd if (zp->z_unlinked) { 1213191900Skmacy /* 1214191900Skmacy * Release the vnode asynchronously as we currently have the 1215191900Skmacy * txg stopped from syncing. 1216191900Skmacy */ 1217196307Spjd VN_RELE_ASYNC(ZTOV(zp), 1218196307Spjd dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1219249195Smm return (SET_ERROR(ENOENT)); 1220168404Spjd } 1221168404Spjd 1222219089Spjd zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); 1223219089Spjd zgd->zgd_zilog = zfsvfs->z_log; 1224219089Spjd zgd->zgd_private = zp; 1225219089Spjd 1226168404Spjd /* 1227168404Spjd * Write records come in two flavors: immediate and indirect. 1228168404Spjd * For small writes it's cheaper to store the data with the 1229168404Spjd * log record (immediate); for large writes it's cheaper to 1230168404Spjd * sync the data and get a pointer to it (indirect) so that 1231168404Spjd * we don't have to write the data twice. 1232168404Spjd */ 1233168404Spjd if (buf != NULL) { /* immediate write */ 1234219089Spjd zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER); 1235168404Spjd /* test for truncation needs to be done while range locked */ 1236219089Spjd if (offset >= zp->z_size) { 1237249195Smm error = SET_ERROR(ENOENT); 1238219089Spjd } else { 1239219089Spjd error = dmu_read(os, object, offset, size, buf, 1240219089Spjd DMU_READ_NO_PREFETCH); 1241168404Spjd } 1242219089Spjd ASSERT(error == 0 || error == ENOENT); 1243168404Spjd } else { /* indirect write */ 1244168404Spjd /* 1245168404Spjd * Have to lock the whole block to ensure when it's 1246168404Spjd * written out and it's checksum is being calculated 1247168404Spjd * that no one can change the data. We need to re-check 1248168404Spjd * blocksize after we get the lock in case it's changed! 1249168404Spjd */ 1250168404Spjd for (;;) { 1251219089Spjd uint64_t blkoff; 1252219089Spjd size = zp->z_blksz; 1253219089Spjd blkoff = ISP2(size) ? P2PHASE(offset, size) : offset; 1254219089Spjd offset -= blkoff; 1255219089Spjd zgd->zgd_rl = zfs_range_lock(zp, offset, size, 1256219089Spjd RL_READER); 1257219089Spjd if (zp->z_blksz == size) 1258168404Spjd break; 1259219089Spjd offset += blkoff; 1260219089Spjd zfs_range_unlock(zgd->zgd_rl); 1261168404Spjd } 1262168404Spjd /* test for truncation needs to be done while range locked */ 1263219089Spjd if (lr->lr_offset >= zp->z_size) 1264249195Smm error = SET_ERROR(ENOENT); 1265214378Smm#ifdef DEBUG 1266214378Smm if (zil_fault_io) { 1267249195Smm error = SET_ERROR(EIO); 1268214378Smm zil_fault_io = 0; 1269214378Smm } 1270214378Smm#endif 1271219089Spjd if (error == 0) 1272219089Spjd error = dmu_buf_hold(os, object, offset, zgd, &db, 1273219089Spjd DMU_READ_NO_PREFETCH); 1274214378Smm 1275209962Smm if (error == 0) { 1276243524Smm blkptr_t *obp = dmu_buf_get_blkptr(db); 1277243524Smm if (obp) { 1278243524Smm ASSERT(BP_IS_HOLE(bp)); 1279243524Smm *bp = *obp; 1280243524Smm } 1281243524Smm 1282219089Spjd zgd->zgd_db = db; 1283219089Spjd zgd->zgd_bp = bp; 1284219089Spjd 1285219089Spjd ASSERT(db->db_offset == offset); 1286219089Spjd ASSERT(db->db_size == size); 1287219089Spjd 1288219089Spjd error = dmu_sync(zio, lr->lr_common.lrc_txg, 1289219089Spjd zfs_get_done, zgd); 1290219089Spjd ASSERT(error || lr->lr_length <= zp->z_blksz); 1291219089Spjd 1292209962Smm /* 1293219089Spjd * On success, we need to wait for the write I/O 1294219089Spjd * initiated by dmu_sync() to complete before we can 1295219089Spjd * release this dbuf. We will finish everything up 1296219089Spjd * in the zfs_get_done() callback. 1297209962Smm */ 1298219089Spjd if (error == 0) 1299219089Spjd return (0); 1300209962Smm 1301219089Spjd if (error == EALREADY) { 1302219089Spjd lr->lr_common.lrc_txtype = TX_WRITE2; 1303219089Spjd error = 0; 1304219089Spjd } 1305209962Smm } 1306168404Spjd } 1307219089Spjd 1308219089Spjd zfs_get_done(zgd, error); 1309219089Spjd 1310168404Spjd return (error); 1311168404Spjd} 1312168404Spjd 1313168404Spjd/*ARGSUSED*/ 1314168404Spjdstatic int 1315185029Spjdzfs_access(vnode_t *vp, int mode, int flag, cred_t *cr, 1316185029Spjd caller_context_t *ct) 1317168404Spjd{ 1318168404Spjd znode_t *zp = VTOZ(vp); 1319168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1320168404Spjd int error; 1321168404Spjd 1322168404Spjd ZFS_ENTER(zfsvfs); 1323185029Spjd ZFS_VERIFY_ZP(zp); 1324185029Spjd 1325185029Spjd if (flag & V_ACE_MASK) 1326185029Spjd error = zfs_zaccess(zp, mode, flag, B_FALSE, cr); 1327185029Spjd else 1328185029Spjd error = zfs_zaccess_rwx(zp, mode, flag, cr); 1329185029Spjd 1330168404Spjd ZFS_EXIT(zfsvfs); 1331168404Spjd return (error); 1332168404Spjd} 1333168404Spjd 1334168404Spjd/* 1335211932Smm * If vnode is for a device return a specfs vnode instead. 1336211932Smm */ 1337211932Smmstatic int 1338211932Smmspecvp_check(vnode_t **vpp, cred_t *cr) 1339211932Smm{ 1340211932Smm int error = 0; 1341211932Smm 1342211932Smm if (IS_DEVVP(*vpp)) { 1343211932Smm struct vnode *svp; 1344211932Smm 1345211932Smm svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 1346211932Smm VN_RELE(*vpp); 1347211932Smm if (svp == NULL) 1348249195Smm error = SET_ERROR(ENOSYS); 1349211932Smm *vpp = svp; 1350211932Smm } 1351211932Smm return (error); 1352211932Smm} 1353211932Smm 1354211932Smm 1355211932Smm/* 1356168404Spjd * Lookup an entry in a directory, or an extended attribute directory. 1357168404Spjd * If it exists, return a held vnode reference for it. 1358168404Spjd * 1359168404Spjd * IN: dvp - vnode of directory to search. 1360168404Spjd * nm - name of entry to lookup. 1361168404Spjd * pnp - full pathname to lookup [UNUSED]. 1362168404Spjd * flags - LOOKUP_XATTR set if looking for an attribute. 1363168404Spjd * rdir - root directory vnode [UNUSED]. 1364168404Spjd * cr - credentials of caller. 1365185029Spjd * ct - caller context 1366185029Spjd * direntflags - directory lookup flags 1367185029Spjd * realpnp - returned pathname. 1368168404Spjd * 1369168404Spjd * OUT: vpp - vnode of located entry, NULL if not found. 1370168404Spjd * 1371251631Sdelphij * RETURN: 0 on success, error code on failure. 1372168404Spjd * 1373168404Spjd * Timestamps: 1374168404Spjd * NA 1375168404Spjd */ 1376168404Spjd/* ARGSUSED */ 1377168962Spjdstatic int 1378168962Spjdzfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct componentname *cnp, 1379185029Spjd int nameiop, cred_t *cr, kthread_t *td, int flags) 1380168404Spjd{ 1381168962Spjd znode_t *zdp = VTOZ(dvp); 1382168962Spjd zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1383211932Smm int error = 0; 1384185029Spjd int *direntflags = NULL; 1385185029Spjd void *realpnp = NULL; 1386168404Spjd 1387211932Smm /* fast path */ 1388211932Smm if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) { 1389211932Smm 1390211932Smm if (dvp->v_type != VDIR) { 1391249195Smm return (SET_ERROR(ENOTDIR)); 1392219089Spjd } else if (zdp->z_sa_hdl == NULL) { 1393249195Smm return (SET_ERROR(EIO)); 1394211932Smm } 1395211932Smm 1396211932Smm if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) { 1397211932Smm error = zfs_fastaccesschk_execute(zdp, cr); 1398211932Smm if (!error) { 1399211932Smm *vpp = dvp; 1400211932Smm VN_HOLD(*vpp); 1401211932Smm return (0); 1402211932Smm } 1403211932Smm return (error); 1404211932Smm } else { 1405211932Smm vnode_t *tvp = dnlc_lookup(dvp, nm); 1406211932Smm 1407211932Smm if (tvp) { 1408211932Smm error = zfs_fastaccesschk_execute(zdp, cr); 1409211932Smm if (error) { 1410211932Smm VN_RELE(tvp); 1411211932Smm return (error); 1412211932Smm } 1413211932Smm if (tvp == DNLC_NO_VNODE) { 1414211932Smm VN_RELE(tvp); 1415249195Smm return (SET_ERROR(ENOENT)); 1416211932Smm } else { 1417211932Smm *vpp = tvp; 1418211932Smm return (specvp_check(vpp, cr)); 1419211932Smm } 1420211932Smm } 1421211932Smm } 1422211932Smm } 1423211932Smm 1424211932Smm DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm); 1425211932Smm 1426168404Spjd ZFS_ENTER(zfsvfs); 1427185029Spjd ZFS_VERIFY_ZP(zdp); 1428168404Spjd 1429168404Spjd *vpp = NULL; 1430168404Spjd 1431185029Spjd if (flags & LOOKUP_XATTR) { 1432168404Spjd#ifdef TODO 1433168404Spjd /* 1434168404Spjd * If the xattr property is off, refuse the lookup request. 1435168404Spjd */ 1436168404Spjd if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) { 1437168404Spjd ZFS_EXIT(zfsvfs); 1438249195Smm return (SET_ERROR(EINVAL)); 1439168404Spjd } 1440185029Spjd#endif 1441168404Spjd 1442168404Spjd /* 1443168404Spjd * We don't allow recursive attributes.. 1444168404Spjd * Maybe someday we will. 1445168404Spjd */ 1446219089Spjd if (zdp->z_pflags & ZFS_XATTR) { 1447168404Spjd ZFS_EXIT(zfsvfs); 1448249195Smm return (SET_ERROR(EINVAL)); 1449168404Spjd } 1450168404Spjd 1451168404Spjd if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) { 1452168404Spjd ZFS_EXIT(zfsvfs); 1453168404Spjd return (error); 1454168404Spjd } 1455168404Spjd 1456168404Spjd /* 1457168404Spjd * Do we have permission to get into attribute directory? 1458168404Spjd */ 1459168404Spjd 1460185029Spjd if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0, 1461185029Spjd B_FALSE, cr)) { 1462168404Spjd VN_RELE(*vpp); 1463185029Spjd *vpp = NULL; 1464168404Spjd } 1465168404Spjd 1466168404Spjd ZFS_EXIT(zfsvfs); 1467168404Spjd return (error); 1468168404Spjd } 1469168404Spjd 1470168404Spjd if (dvp->v_type != VDIR) { 1471168404Spjd ZFS_EXIT(zfsvfs); 1472249195Smm return (SET_ERROR(ENOTDIR)); 1473168404Spjd } 1474168404Spjd 1475168404Spjd /* 1476168404Spjd * Check accessibility of directory. 1477168404Spjd */ 1478168404Spjd 1479185029Spjd if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) { 1480168404Spjd ZFS_EXIT(zfsvfs); 1481168404Spjd return (error); 1482168404Spjd } 1483168404Spjd 1484185029Spjd if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), 1485185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1486185029Spjd ZFS_EXIT(zfsvfs); 1487249195Smm return (SET_ERROR(EILSEQ)); 1488185029Spjd } 1489168404Spjd 1490185029Spjd error = zfs_dirlook(zdp, nm, vpp, flags, direntflags, realpnp); 1491211932Smm if (error == 0) 1492211932Smm error = specvp_check(vpp, cr); 1493168962Spjd 1494168404Spjd /* Translate errors and add SAVENAME when needed. */ 1495168404Spjd if (cnp->cn_flags & ISLASTCN) { 1496168404Spjd switch (nameiop) { 1497168404Spjd case CREATE: 1498168404Spjd case RENAME: 1499168404Spjd if (error == ENOENT) { 1500168404Spjd error = EJUSTRETURN; 1501168404Spjd cnp->cn_flags |= SAVENAME; 1502168404Spjd break; 1503168404Spjd } 1504168404Spjd /* FALLTHROUGH */ 1505168404Spjd case DELETE: 1506168404Spjd if (error == 0) 1507168404Spjd cnp->cn_flags |= SAVENAME; 1508168404Spjd break; 1509168404Spjd } 1510168404Spjd } 1511168404Spjd if (error == 0 && (nm[0] != '.' || nm[1] != '\0')) { 1512169198Spjd int ltype = 0; 1513169198Spjd 1514169198Spjd if (cnp->cn_flags & ISDOTDOT) { 1515176559Sattilio ltype = VOP_ISLOCKED(dvp); 1516175294Sattilio VOP_UNLOCK(dvp, 0); 1517169198Spjd } 1518206667Spjd ZFS_EXIT(zfsvfs); 1519254711Savg error = vn_lock(*vpp, cnp->cn_lkflags); 1520168962Spjd if (cnp->cn_flags & ISDOTDOT) 1521175202Sattilio vn_lock(dvp, ltype | LK_RETRY); 1522169172Spjd if (error != 0) { 1523169172Spjd VN_RELE(*vpp); 1524169172Spjd *vpp = NULL; 1525169172Spjd return (error); 1526169172Spjd } 1527206667Spjd } else { 1528206667Spjd ZFS_EXIT(zfsvfs); 1529168404Spjd } 1530168404Spjd 1531168404Spjd#ifdef FREEBSD_NAMECACHE 1532168404Spjd /* 1533168404Spjd * Insert name into cache (as non-existent) if appropriate. 1534168404Spjd */ 1535168404Spjd if (error == ENOENT && (cnp->cn_flags & MAKEENTRY) && nameiop != CREATE) 1536168404Spjd cache_enter(dvp, *vpp, cnp); 1537169170Spjd /* 1538169170Spjd * Insert name into cache if appropriate. 1539169170Spjd */ 1540168404Spjd if (error == 0 && (cnp->cn_flags & MAKEENTRY)) { 1541168404Spjd if (!(cnp->cn_flags & ISLASTCN) || 1542168404Spjd (nameiop != DELETE && nameiop != RENAME)) { 1543168404Spjd cache_enter(dvp, *vpp, cnp); 1544168404Spjd } 1545168404Spjd } 1546168404Spjd#endif 1547168404Spjd 1548168404Spjd return (error); 1549168404Spjd} 1550168404Spjd 1551168404Spjd/* 1552168404Spjd * Attempt to create a new entry in a directory. If the entry 1553168404Spjd * already exists, truncate the file if permissible, else return 1554168404Spjd * an error. Return the vp of the created or trunc'd file. 1555168404Spjd * 1556168404Spjd * IN: dvp - vnode of directory to put new file entry in. 1557168404Spjd * name - name of new file entry. 1558168404Spjd * vap - attributes of new file. 1559168404Spjd * excl - flag indicating exclusive or non-exclusive mode. 1560168404Spjd * mode - mode to open file with. 1561168404Spjd * cr - credentials of caller. 1562168404Spjd * flag - large file flag [UNUSED]. 1563185029Spjd * ct - caller context 1564269002Sdelphij * vsecp - ACL to be set 1565168404Spjd * 1566168404Spjd * OUT: vpp - vnode of created or trunc'd entry. 1567168404Spjd * 1568251631Sdelphij * RETURN: 0 on success, error code on failure. 1569168404Spjd * 1570168404Spjd * Timestamps: 1571168404Spjd * dvp - ctime|mtime updated if new entry created 1572168404Spjd * vp - ctime|mtime always, atime if new 1573168404Spjd */ 1574185029Spjd 1575168404Spjd/* ARGSUSED */ 1576168404Spjdstatic int 1577168962Spjdzfs_create(vnode_t *dvp, char *name, vattr_t *vap, int excl, int mode, 1578185029Spjd vnode_t **vpp, cred_t *cr, kthread_t *td) 1579168404Spjd{ 1580168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 1581168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1582185029Spjd zilog_t *zilog; 1583185029Spjd objset_t *os; 1584168404Spjd zfs_dirlock_t *dl; 1585168404Spjd dmu_tx_t *tx; 1586168404Spjd int error; 1587209962Smm ksid_t *ksid; 1588209962Smm uid_t uid; 1589209962Smm gid_t gid = crgetgid(cr); 1590219089Spjd zfs_acl_ids_t acl_ids; 1591209962Smm boolean_t fuid_dirtied; 1592219089Spjd boolean_t have_acl = B_FALSE; 1593260763Savg boolean_t waited = B_FALSE; 1594185029Spjd void *vsecp = NULL; 1595185029Spjd int flag = 0; 1596168404Spjd 1597185029Spjd /* 1598185029Spjd * If we have an ephemeral id, ACL, or XVATTR then 1599185029Spjd * make sure file system is at proper version 1600185029Spjd */ 1601185029Spjd 1602209962Smm ksid = crgetsid(cr, KSID_OWNER); 1603209962Smm if (ksid) 1604209962Smm uid = ksid_getid(ksid); 1605209962Smm else 1606209962Smm uid = crgetuid(cr); 1607219089Spjd 1608185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 1609185029Spjd (vsecp || (vap->va_mask & AT_XVATTR) || 1610219089Spjd IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 1611249195Smm return (SET_ERROR(EINVAL)); 1612185029Spjd 1613168404Spjd ZFS_ENTER(zfsvfs); 1614185029Spjd ZFS_VERIFY_ZP(dzp); 1615185029Spjd os = zfsvfs->z_os; 1616185029Spjd zilog = zfsvfs->z_log; 1617168404Spjd 1618185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 1619185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1620185029Spjd ZFS_EXIT(zfsvfs); 1621249195Smm return (SET_ERROR(EILSEQ)); 1622185029Spjd } 1623185029Spjd 1624185029Spjd if (vap->va_mask & AT_XVATTR) { 1625197861Spjd if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 1626185029Spjd crgetuid(cr), cr, vap->va_type)) != 0) { 1627185029Spjd ZFS_EXIT(zfsvfs); 1628185029Spjd return (error); 1629185029Spjd } 1630185029Spjd } 1631262112Savg 1632262112Savg getnewvnode_reserve(1); 1633262112Savg 1634168404Spjdtop: 1635168404Spjd *vpp = NULL; 1636168404Spjd 1637182905Strasz if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr)) 1638182905Strasz vap->va_mode &= ~S_ISVTX; 1639168404Spjd 1640168404Spjd if (*name == '\0') { 1641168404Spjd /* 1642168404Spjd * Null component name refers to the directory itself. 1643168404Spjd */ 1644168404Spjd VN_HOLD(dvp); 1645168404Spjd zp = dzp; 1646168404Spjd dl = NULL; 1647168404Spjd error = 0; 1648168404Spjd } else { 1649168404Spjd /* possible VN_HOLD(zp) */ 1650185029Spjd int zflg = 0; 1651185029Spjd 1652185029Spjd if (flag & FIGNORECASE) 1653185029Spjd zflg |= ZCILOOK; 1654185029Spjd 1655185029Spjd error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1656185029Spjd NULL, NULL); 1657185029Spjd if (error) { 1658219089Spjd if (have_acl) 1659219089Spjd zfs_acl_ids_free(&acl_ids); 1660168404Spjd if (strcmp(name, "..") == 0) 1661249195Smm error = SET_ERROR(EISDIR); 1662262112Savg getnewvnode_drop_reserve(); 1663168404Spjd ZFS_EXIT(zfsvfs); 1664168404Spjd return (error); 1665168404Spjd } 1666168404Spjd } 1667219089Spjd 1668185029Spjd if (zp == NULL) { 1669185029Spjd uint64_t txtype; 1670168404Spjd 1671168404Spjd /* 1672168404Spjd * Create a new file object and update the directory 1673168404Spjd * to reference it. 1674168404Spjd */ 1675185029Spjd if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 1676219089Spjd if (have_acl) 1677219089Spjd zfs_acl_ids_free(&acl_ids); 1678168404Spjd goto out; 1679168404Spjd } 1680168404Spjd 1681168404Spjd /* 1682168404Spjd * We only support the creation of regular files in 1683168404Spjd * extended attribute directories. 1684168404Spjd */ 1685219089Spjd 1686219089Spjd if ((dzp->z_pflags & ZFS_XATTR) && 1687168404Spjd (vap->va_type != VREG)) { 1688219089Spjd if (have_acl) 1689219089Spjd zfs_acl_ids_free(&acl_ids); 1690249195Smm error = SET_ERROR(EINVAL); 1691168404Spjd goto out; 1692168404Spjd } 1693168404Spjd 1694219089Spjd if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap, 1695219089Spjd cr, vsecp, &acl_ids)) != 0) 1696219089Spjd goto out; 1697219089Spjd have_acl = B_TRUE; 1698209962Smm 1699209962Smm if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 1700211932Smm zfs_acl_ids_free(&acl_ids); 1701249195Smm error = SET_ERROR(EDQUOT); 1702209962Smm goto out; 1703209962Smm } 1704209962Smm 1705168404Spjd tx = dmu_tx_create(os); 1706219089Spjd 1707219089Spjd dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 1708219089Spjd ZFS_SA_BASE_ATTR_SIZE); 1709219089Spjd 1710209962Smm fuid_dirtied = zfsvfs->z_fuid_dirty; 1711209962Smm if (fuid_dirtied) 1712209962Smm zfs_fuid_txhold(zfsvfs, tx); 1713168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 1714219089Spjd dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 1715219089Spjd if (!zfsvfs->z_use_sa && 1716219089Spjd acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 1717168404Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 1718219089Spjd 0, acl_ids.z_aclp->z_acl_bytes); 1719185029Spjd } 1720260763Savg error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 1721168404Spjd if (error) { 1722168404Spjd zfs_dirent_unlock(dl); 1723209962Smm if (error == ERESTART) { 1724260763Savg waited = B_TRUE; 1725168404Spjd dmu_tx_wait(tx); 1726168404Spjd dmu_tx_abort(tx); 1727168404Spjd goto top; 1728168404Spjd } 1729219089Spjd zfs_acl_ids_free(&acl_ids); 1730168404Spjd dmu_tx_abort(tx); 1731262112Savg getnewvnode_drop_reserve(); 1732168404Spjd ZFS_EXIT(zfsvfs); 1733168404Spjd return (error); 1734168404Spjd } 1735219089Spjd zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 1736209962Smm 1737209962Smm if (fuid_dirtied) 1738209962Smm zfs_fuid_sync(zfsvfs, tx); 1739209962Smm 1740168404Spjd (void) zfs_link_create(dl, zp, tx, ZNEW); 1741185029Spjd txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); 1742185029Spjd if (flag & FIGNORECASE) 1743185029Spjd txtype |= TX_CI; 1744185029Spjd zfs_log_create(zilog, tx, txtype, dzp, zp, name, 1745209962Smm vsecp, acl_ids.z_fuidp, vap); 1746209962Smm zfs_acl_ids_free(&acl_ids); 1747168404Spjd dmu_tx_commit(tx); 1748168404Spjd } else { 1749185029Spjd int aflags = (flag & FAPPEND) ? V_APPEND : 0; 1750185029Spjd 1751219089Spjd if (have_acl) 1752219089Spjd zfs_acl_ids_free(&acl_ids); 1753219089Spjd have_acl = B_FALSE; 1754219089Spjd 1755168404Spjd /* 1756168404Spjd * A directory entry already exists for this name. 1757168404Spjd */ 1758168404Spjd /* 1759168962Spjd * Can't truncate an existing file if in exclusive mode. 1760168962Spjd */ 1761168962Spjd if (excl == EXCL) { 1762249195Smm error = SET_ERROR(EEXIST); 1763168962Spjd goto out; 1764168962Spjd } 1765168962Spjd /* 1766168404Spjd * Can't open a directory for writing. 1767168404Spjd */ 1768168404Spjd if ((ZTOV(zp)->v_type == VDIR) && (mode & S_IWRITE)) { 1769249195Smm error = SET_ERROR(EISDIR); 1770168404Spjd goto out; 1771168404Spjd } 1772168404Spjd /* 1773168404Spjd * Verify requested access to file. 1774168404Spjd */ 1775185029Spjd if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr))) { 1776168404Spjd goto out; 1777168404Spjd } 1778168404Spjd 1779168404Spjd mutex_enter(&dzp->z_lock); 1780168404Spjd dzp->z_seq++; 1781168404Spjd mutex_exit(&dzp->z_lock); 1782168404Spjd 1783168404Spjd /* 1784168404Spjd * Truncate regular files if requested. 1785168404Spjd */ 1786168404Spjd if ((ZTOV(zp)->v_type == VREG) && 1787168404Spjd (vap->va_mask & AT_SIZE) && (vap->va_size == 0)) { 1788185029Spjd /* we can't hold any locks when calling zfs_freesp() */ 1789185029Spjd zfs_dirent_unlock(dl); 1790185029Spjd dl = NULL; 1791168404Spjd error = zfs_freesp(zp, 0, 0, mode, TRUE); 1792185029Spjd if (error == 0) { 1793185029Spjd vnevent_create(ZTOV(zp), ct); 1794168404Spjd } 1795168404Spjd } 1796168404Spjd } 1797168404Spjdout: 1798262112Savg getnewvnode_drop_reserve(); 1799168404Spjd if (dl) 1800168404Spjd zfs_dirent_unlock(dl); 1801168404Spjd 1802168404Spjd if (error) { 1803168404Spjd if (zp) 1804168404Spjd VN_RELE(ZTOV(zp)); 1805168962Spjd } else { 1806168962Spjd *vpp = ZTOV(zp); 1807211932Smm error = specvp_check(vpp, cr); 1808168404Spjd } 1809168404Spjd 1810219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1811219089Spjd zil_commit(zilog, 0); 1812219089Spjd 1813168404Spjd ZFS_EXIT(zfsvfs); 1814168404Spjd return (error); 1815168404Spjd} 1816168404Spjd 1817168404Spjd/* 1818168404Spjd * Remove an entry from a directory. 1819168404Spjd * 1820168404Spjd * IN: dvp - vnode of directory to remove entry from. 1821168404Spjd * name - name of entry to remove. 1822168404Spjd * cr - credentials of caller. 1823185029Spjd * ct - caller context 1824185029Spjd * flags - case flags 1825168404Spjd * 1826251631Sdelphij * RETURN: 0 on success, error code on failure. 1827168404Spjd * 1828168404Spjd * Timestamps: 1829168404Spjd * dvp - ctime|mtime 1830168404Spjd * vp - ctime (if nlink > 0) 1831168404Spjd */ 1832219089Spjd 1833219089Spjduint64_t null_xattr = 0; 1834219089Spjd 1835185029Spjd/*ARGSUSED*/ 1836168404Spjdstatic int 1837185029Spjdzfs_remove(vnode_t *dvp, char *name, cred_t *cr, caller_context_t *ct, 1838185029Spjd int flags) 1839168404Spjd{ 1840168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 1841219089Spjd znode_t *xzp; 1842168404Spjd vnode_t *vp; 1843168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1844185029Spjd zilog_t *zilog; 1845168962Spjd uint64_t acl_obj, xattr_obj; 1846269002Sdelphij uint64_t xattr_obj_unlinked = 0; 1847219089Spjd uint64_t obj = 0; 1848168404Spjd zfs_dirlock_t *dl; 1849168404Spjd dmu_tx_t *tx; 1850168962Spjd boolean_t may_delete_now, delete_now = FALSE; 1851185029Spjd boolean_t unlinked, toobig = FALSE; 1852185029Spjd uint64_t txtype; 1853185029Spjd pathname_t *realnmp = NULL; 1854185029Spjd pathname_t realnm; 1855168404Spjd int error; 1856185029Spjd int zflg = ZEXISTS; 1857260763Savg boolean_t waited = B_FALSE; 1858168404Spjd 1859168404Spjd ZFS_ENTER(zfsvfs); 1860185029Spjd ZFS_VERIFY_ZP(dzp); 1861185029Spjd zilog = zfsvfs->z_log; 1862168404Spjd 1863185029Spjd if (flags & FIGNORECASE) { 1864185029Spjd zflg |= ZCILOOK; 1865185029Spjd pn_alloc(&realnm); 1866185029Spjd realnmp = &realnm; 1867185029Spjd } 1868185029Spjd 1869168404Spjdtop: 1870219089Spjd xattr_obj = 0; 1871219089Spjd xzp = NULL; 1872168404Spjd /* 1873168404Spjd * Attempt to lock directory; fail if entry doesn't exist. 1874168404Spjd */ 1875185029Spjd if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1876185029Spjd NULL, realnmp)) { 1877185029Spjd if (realnmp) 1878185029Spjd pn_free(realnmp); 1879168404Spjd ZFS_EXIT(zfsvfs); 1880168404Spjd return (error); 1881168404Spjd } 1882168404Spjd 1883168404Spjd vp = ZTOV(zp); 1884168404Spjd 1885168962Spjd if (error = zfs_zaccess_delete(dzp, zp, cr)) { 1886168404Spjd goto out; 1887168962Spjd } 1888168404Spjd 1889168962Spjd /* 1890168962Spjd * Need to use rmdir for removing directories. 1891168962Spjd */ 1892168962Spjd if (vp->v_type == VDIR) { 1893249195Smm error = SET_ERROR(EPERM); 1894168962Spjd goto out; 1895168962Spjd } 1896168962Spjd 1897185029Spjd vnevent_remove(vp, dvp, name, ct); 1898168962Spjd 1899185029Spjd if (realnmp) 1900185029Spjd dnlc_remove(dvp, realnmp->pn_buf); 1901185029Spjd else 1902185029Spjd dnlc_remove(dvp, name); 1903168404Spjd 1904219089Spjd VI_LOCK(vp); 1905219089Spjd may_delete_now = vp->v_count == 1 && !vn_has_cached_data(vp); 1906219089Spjd VI_UNLOCK(vp); 1907168962Spjd 1908168404Spjd /* 1909168404Spjd * We may delete the znode now, or we may put it in the unlinked set; 1910168404Spjd * it depends on whether we're the last link, and on whether there are 1911168404Spjd * other holds on the vnode. So we dmu_tx_hold() the right things to 1912168404Spjd * allow for either case. 1913168404Spjd */ 1914219089Spjd obj = zp->z_id; 1915168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 1916168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 1917219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1918219089Spjd zfs_sa_upgrade_txholds(tx, zp); 1919219089Spjd zfs_sa_upgrade_txholds(tx, dzp); 1920185029Spjd if (may_delete_now) { 1921185029Spjd toobig = 1922219089Spjd zp->z_size > zp->z_blksz * DMU_MAX_DELETEBLKCNT; 1923185029Spjd /* if the file is too big, only hold_free a token amount */ 1924185029Spjd dmu_tx_hold_free(tx, zp->z_id, 0, 1925185029Spjd (toobig ? DMU_MAX_ACCESS : DMU_OBJECT_END)); 1926185029Spjd } 1927168404Spjd 1928168404Spjd /* are there any extended attributes? */ 1929219089Spjd error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 1930219089Spjd &xattr_obj, sizeof (xattr_obj)); 1931219089Spjd if (error == 0 && xattr_obj) { 1932219089Spjd error = zfs_zget(zfsvfs, xattr_obj, &xzp); 1933240415Smm ASSERT0(error); 1934219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 1935219089Spjd dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); 1936168404Spjd } 1937168404Spjd 1938219089Spjd mutex_enter(&zp->z_lock); 1939219089Spjd if ((acl_obj = zfs_external_acl(zp)) != 0 && may_delete_now) 1940168962Spjd dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); 1941219089Spjd mutex_exit(&zp->z_lock); 1942168962Spjd 1943168404Spjd /* charge as an update -- would be nice not to charge at all */ 1944168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 1945168404Spjd 1946269002Sdelphij /* 1947269002Sdelphij * Mark this transaction as typically resulting in a net free of 1948269002Sdelphij * space, unless object removal will be delayed indefinitely 1949269002Sdelphij * (due to active holds on the vnode due to the file being open). 1950269002Sdelphij */ 1951269002Sdelphij if (may_delete_now) 1952269002Sdelphij dmu_tx_mark_netfree(tx); 1953269002Sdelphij 1954260763Savg error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 1955168404Spjd if (error) { 1956168404Spjd zfs_dirent_unlock(dl); 1957168962Spjd VN_RELE(vp); 1958219089Spjd if (xzp) 1959219089Spjd VN_RELE(ZTOV(xzp)); 1960209962Smm if (error == ERESTART) { 1961260763Savg waited = B_TRUE; 1962168404Spjd dmu_tx_wait(tx); 1963168404Spjd dmu_tx_abort(tx); 1964168404Spjd goto top; 1965168404Spjd } 1966185029Spjd if (realnmp) 1967185029Spjd pn_free(realnmp); 1968168404Spjd dmu_tx_abort(tx); 1969168404Spjd ZFS_EXIT(zfsvfs); 1970168404Spjd return (error); 1971168404Spjd } 1972168404Spjd 1973168404Spjd /* 1974168404Spjd * Remove the directory entry. 1975168404Spjd */ 1976185029Spjd error = zfs_link_destroy(dl, zp, tx, zflg, &unlinked); 1977168404Spjd 1978168404Spjd if (error) { 1979168404Spjd dmu_tx_commit(tx); 1980168404Spjd goto out; 1981168404Spjd } 1982168404Spjd 1983219089Spjd if (unlinked) { 1984219089Spjd /* 1985219089Spjd * Hold z_lock so that we can make sure that the ACL obj 1986219089Spjd * hasn't changed. Could have been deleted due to 1987219089Spjd * zfs_sa_upgrade(). 1988219089Spjd */ 1989219089Spjd mutex_enter(&zp->z_lock); 1990168962Spjd VI_LOCK(vp); 1991219089Spjd (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 1992219089Spjd &xattr_obj_unlinked, sizeof (xattr_obj_unlinked)); 1993185029Spjd delete_now = may_delete_now && !toobig && 1994168962Spjd vp->v_count == 1 && !vn_has_cached_data(vp) && 1995219089Spjd xattr_obj == xattr_obj_unlinked && zfs_external_acl(zp) == 1996219089Spjd acl_obj; 1997168962Spjd VI_UNLOCK(vp); 1998168962Spjd } 1999168962Spjd 2000168962Spjd if (delete_now) { 2001243270Savg#ifdef __FreeBSD__ 2002243270Savg panic("zfs_remove: delete_now branch taken"); 2003243270Savg#endif 2004219089Spjd if (xattr_obj_unlinked) { 2005219089Spjd ASSERT3U(xzp->z_links, ==, 2); 2006168962Spjd mutex_enter(&xzp->z_lock); 2007168962Spjd xzp->z_unlinked = 1; 2008219089Spjd xzp->z_links = 0; 2009219089Spjd error = sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs), 2010219089Spjd &xzp->z_links, sizeof (xzp->z_links), tx); 2011219089Spjd ASSERT3U(error, ==, 0); 2012168962Spjd mutex_exit(&xzp->z_lock); 2013168962Spjd zfs_unlinked_add(xzp, tx); 2014219089Spjd 2015219089Spjd if (zp->z_is_sa) 2016219089Spjd error = sa_remove(zp->z_sa_hdl, 2017219089Spjd SA_ZPL_XATTR(zfsvfs), tx); 2018219089Spjd else 2019219089Spjd error = sa_update(zp->z_sa_hdl, 2020219089Spjd SA_ZPL_XATTR(zfsvfs), &null_xattr, 2021219089Spjd sizeof (uint64_t), tx); 2022240415Smm ASSERT0(error); 2023168962Spjd } 2024168962Spjd VI_LOCK(vp); 2025168962Spjd vp->v_count--; 2026240415Smm ASSERT0(vp->v_count); 2027168962Spjd VI_UNLOCK(vp); 2028168962Spjd mutex_exit(&zp->z_lock); 2029168962Spjd zfs_znode_delete(zp, tx); 2030168962Spjd } else if (unlinked) { 2031219089Spjd mutex_exit(&zp->z_lock); 2032168404Spjd zfs_unlinked_add(zp, tx); 2033243268Savg#ifdef __FreeBSD__ 2034243268Savg vp->v_vflag |= VV_NOSYNC; 2035243268Savg#endif 2036168962Spjd } 2037168404Spjd 2038185029Spjd txtype = TX_REMOVE; 2039185029Spjd if (flags & FIGNORECASE) 2040185029Spjd txtype |= TX_CI; 2041219089Spjd zfs_log_remove(zilog, tx, txtype, dzp, name, obj); 2042168404Spjd 2043168404Spjd dmu_tx_commit(tx); 2044168404Spjdout: 2045185029Spjd if (realnmp) 2046185029Spjd pn_free(realnmp); 2047185029Spjd 2048168404Spjd zfs_dirent_unlock(dl); 2049168404Spjd 2050219089Spjd if (!delete_now) 2051168962Spjd VN_RELE(vp); 2052219089Spjd if (xzp) 2053168962Spjd VN_RELE(ZTOV(xzp)); 2054168962Spjd 2055219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2056219089Spjd zil_commit(zilog, 0); 2057219089Spjd 2058168404Spjd ZFS_EXIT(zfsvfs); 2059168404Spjd return (error); 2060168404Spjd} 2061168404Spjd 2062168404Spjd/* 2063168404Spjd * Create a new directory and insert it into dvp using the name 2064168404Spjd * provided. Return a pointer to the inserted directory. 2065168404Spjd * 2066168404Spjd * IN: dvp - vnode of directory to add subdir to. 2067168404Spjd * dirname - name of new directory. 2068168404Spjd * vap - attributes of new directory. 2069168404Spjd * cr - credentials of caller. 2070185029Spjd * ct - caller context 2071251631Sdelphij * flags - case flags 2072185029Spjd * vsecp - ACL to be set 2073168404Spjd * 2074168404Spjd * OUT: vpp - vnode of created directory. 2075168404Spjd * 2076251631Sdelphij * RETURN: 0 on success, error code on failure. 2077168404Spjd * 2078168404Spjd * Timestamps: 2079168404Spjd * dvp - ctime|mtime updated 2080168404Spjd * vp - ctime|mtime|atime updated 2081168404Spjd */ 2082185029Spjd/*ARGSUSED*/ 2083168404Spjdstatic int 2084185029Spjdzfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr, 2085185029Spjd caller_context_t *ct, int flags, vsecattr_t *vsecp) 2086168404Spjd{ 2087168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 2088168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2089185029Spjd zilog_t *zilog; 2090168404Spjd zfs_dirlock_t *dl; 2091185029Spjd uint64_t txtype; 2092168404Spjd dmu_tx_t *tx; 2093168404Spjd int error; 2094185029Spjd int zf = ZNEW; 2095209962Smm ksid_t *ksid; 2096209962Smm uid_t uid; 2097209962Smm gid_t gid = crgetgid(cr); 2098219089Spjd zfs_acl_ids_t acl_ids; 2099209962Smm boolean_t fuid_dirtied; 2100260763Savg boolean_t waited = B_FALSE; 2101168404Spjd 2102168404Spjd ASSERT(vap->va_type == VDIR); 2103168404Spjd 2104185029Spjd /* 2105185029Spjd * If we have an ephemeral id, ACL, or XVATTR then 2106185029Spjd * make sure file system is at proper version 2107185029Spjd */ 2108185029Spjd 2109209962Smm ksid = crgetsid(cr, KSID_OWNER); 2110209962Smm if (ksid) 2111209962Smm uid = ksid_getid(ksid); 2112209962Smm else 2113209962Smm uid = crgetuid(cr); 2114185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 2115219089Spjd (vsecp || (vap->va_mask & AT_XVATTR) || 2116219089Spjd IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 2117249195Smm return (SET_ERROR(EINVAL)); 2118185029Spjd 2119168404Spjd ZFS_ENTER(zfsvfs); 2120185029Spjd ZFS_VERIFY_ZP(dzp); 2121185029Spjd zilog = zfsvfs->z_log; 2122168404Spjd 2123219089Spjd if (dzp->z_pflags & ZFS_XATTR) { 2124168404Spjd ZFS_EXIT(zfsvfs); 2125249195Smm return (SET_ERROR(EINVAL)); 2126168404Spjd } 2127168404Spjd 2128185029Spjd if (zfsvfs->z_utf8 && u8_validate(dirname, 2129185029Spjd strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 2130185029Spjd ZFS_EXIT(zfsvfs); 2131249195Smm return (SET_ERROR(EILSEQ)); 2132185029Spjd } 2133185029Spjd if (flags & FIGNORECASE) 2134185029Spjd zf |= ZCILOOK; 2135185029Spjd 2136219089Spjd if (vap->va_mask & AT_XVATTR) { 2137197861Spjd if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 2138185029Spjd crgetuid(cr), cr, vap->va_type)) != 0) { 2139185029Spjd ZFS_EXIT(zfsvfs); 2140185029Spjd return (error); 2141185029Spjd } 2142219089Spjd } 2143185029Spjd 2144219089Spjd if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, 2145219089Spjd vsecp, &acl_ids)) != 0) { 2146219089Spjd ZFS_EXIT(zfsvfs); 2147219089Spjd return (error); 2148219089Spjd } 2149262112Savg 2150262112Savg getnewvnode_reserve(1); 2151262112Savg 2152168404Spjd /* 2153168404Spjd * First make sure the new directory doesn't exist. 2154219089Spjd * 2155219089Spjd * Existence is checked first to make sure we don't return 2156219089Spjd * EACCES instead of EEXIST which can cause some applications 2157219089Spjd * to fail. 2158168404Spjd */ 2159185029Spjdtop: 2160185029Spjd *vpp = NULL; 2161185029Spjd 2162185029Spjd if (error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf, 2163185029Spjd NULL, NULL)) { 2164219089Spjd zfs_acl_ids_free(&acl_ids); 2165262112Savg getnewvnode_drop_reserve(); 2166168404Spjd ZFS_EXIT(zfsvfs); 2167168404Spjd return (error); 2168168404Spjd } 2169168404Spjd 2170185029Spjd if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr)) { 2171219089Spjd zfs_acl_ids_free(&acl_ids); 2172168404Spjd zfs_dirent_unlock(dl); 2173262112Savg getnewvnode_drop_reserve(); 2174168404Spjd ZFS_EXIT(zfsvfs); 2175168404Spjd return (error); 2176168404Spjd } 2177168404Spjd 2178209962Smm if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 2179211932Smm zfs_acl_ids_free(&acl_ids); 2180209962Smm zfs_dirent_unlock(dl); 2181262112Savg getnewvnode_drop_reserve(); 2182209962Smm ZFS_EXIT(zfsvfs); 2183249195Smm return (SET_ERROR(EDQUOT)); 2184209962Smm } 2185209962Smm 2186168404Spjd /* 2187168404Spjd * Add a new entry to the directory. 2188168404Spjd */ 2189168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 2190168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); 2191168404Spjd dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 2192209962Smm fuid_dirtied = zfsvfs->z_fuid_dirty; 2193209962Smm if (fuid_dirtied) 2194209962Smm zfs_fuid_txhold(zfsvfs, tx); 2195219089Spjd if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 2196219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 2197219089Spjd acl_ids.z_aclp->z_acl_bytes); 2198219089Spjd } 2199219089Spjd 2200219089Spjd dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 2201219089Spjd ZFS_SA_BASE_ATTR_SIZE); 2202219089Spjd 2203260763Savg error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 2204168404Spjd if (error) { 2205168404Spjd zfs_dirent_unlock(dl); 2206209962Smm if (error == ERESTART) { 2207260763Savg waited = B_TRUE; 2208168404Spjd dmu_tx_wait(tx); 2209168404Spjd dmu_tx_abort(tx); 2210168404Spjd goto top; 2211168404Spjd } 2212219089Spjd zfs_acl_ids_free(&acl_ids); 2213168404Spjd dmu_tx_abort(tx); 2214262112Savg getnewvnode_drop_reserve(); 2215168404Spjd ZFS_EXIT(zfsvfs); 2216168404Spjd return (error); 2217168404Spjd } 2218168404Spjd 2219168404Spjd /* 2220168404Spjd * Create new node. 2221168404Spjd */ 2222219089Spjd zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 2223168404Spjd 2224209962Smm if (fuid_dirtied) 2225209962Smm zfs_fuid_sync(zfsvfs, tx); 2226219089Spjd 2227168404Spjd /* 2228168404Spjd * Now put new name in parent dir. 2229168404Spjd */ 2230168404Spjd (void) zfs_link_create(dl, zp, tx, ZNEW); 2231168404Spjd 2232168404Spjd *vpp = ZTOV(zp); 2233168404Spjd 2234185029Spjd txtype = zfs_log_create_txtype(Z_DIR, vsecp, vap); 2235185029Spjd if (flags & FIGNORECASE) 2236185029Spjd txtype |= TX_CI; 2237209962Smm zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp, 2238209962Smm acl_ids.z_fuidp, vap); 2239185029Spjd 2240209962Smm zfs_acl_ids_free(&acl_ids); 2241219089Spjd 2242168404Spjd dmu_tx_commit(tx); 2243168404Spjd 2244262112Savg getnewvnode_drop_reserve(); 2245262112Savg 2246168404Spjd zfs_dirent_unlock(dl); 2247168404Spjd 2248219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2249219089Spjd zil_commit(zilog, 0); 2250219089Spjd 2251168404Spjd ZFS_EXIT(zfsvfs); 2252168404Spjd return (0); 2253168404Spjd} 2254168404Spjd 2255168404Spjd/* 2256168404Spjd * Remove a directory subdir entry. If the current working 2257168404Spjd * directory is the same as the subdir to be removed, the 2258168404Spjd * remove will fail. 2259168404Spjd * 2260168404Spjd * IN: dvp - vnode of directory to remove from. 2261168404Spjd * name - name of directory to be removed. 2262168404Spjd * cwd - vnode of current working directory. 2263168404Spjd * cr - credentials of caller. 2264185029Spjd * ct - caller context 2265185029Spjd * flags - case flags 2266168404Spjd * 2267251631Sdelphij * RETURN: 0 on success, error code on failure. 2268168404Spjd * 2269168404Spjd * Timestamps: 2270168404Spjd * dvp - ctime|mtime updated 2271168404Spjd */ 2272185029Spjd/*ARGSUSED*/ 2273168404Spjdstatic int 2274185029Spjdzfs_rmdir(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr, 2275185029Spjd caller_context_t *ct, int flags) 2276168404Spjd{ 2277168404Spjd znode_t *dzp = VTOZ(dvp); 2278168404Spjd znode_t *zp; 2279168404Spjd vnode_t *vp; 2280168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2281185029Spjd zilog_t *zilog; 2282168404Spjd zfs_dirlock_t *dl; 2283168404Spjd dmu_tx_t *tx; 2284168404Spjd int error; 2285185029Spjd int zflg = ZEXISTS; 2286260763Savg boolean_t waited = B_FALSE; 2287168404Spjd 2288168962Spjd ZFS_ENTER(zfsvfs); 2289185029Spjd ZFS_VERIFY_ZP(dzp); 2290185029Spjd zilog = zfsvfs->z_log; 2291168404Spjd 2292185029Spjd if (flags & FIGNORECASE) 2293185029Spjd zflg |= ZCILOOK; 2294168404Spjdtop: 2295168404Spjd zp = NULL; 2296168404Spjd 2297168404Spjd /* 2298168404Spjd * Attempt to lock directory; fail if entry doesn't exist. 2299168404Spjd */ 2300185029Spjd if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 2301185029Spjd NULL, NULL)) { 2302168404Spjd ZFS_EXIT(zfsvfs); 2303168404Spjd return (error); 2304168404Spjd } 2305168404Spjd 2306168404Spjd vp = ZTOV(zp); 2307168404Spjd 2308168404Spjd if (error = zfs_zaccess_delete(dzp, zp, cr)) { 2309168404Spjd goto out; 2310168404Spjd } 2311168404Spjd 2312168962Spjd if (vp->v_type != VDIR) { 2313249195Smm error = SET_ERROR(ENOTDIR); 2314168962Spjd goto out; 2315168962Spjd } 2316168962Spjd 2317168962Spjd if (vp == cwd) { 2318249195Smm error = SET_ERROR(EINVAL); 2319168962Spjd goto out; 2320168962Spjd } 2321168962Spjd 2322185029Spjd vnevent_rmdir(vp, dvp, name, ct); 2323168962Spjd 2324168404Spjd /* 2325168404Spjd * Grab a lock on the directory to make sure that noone is 2326168404Spjd * trying to add (or lookup) entries while we are removing it. 2327168404Spjd */ 2328168404Spjd rw_enter(&zp->z_name_lock, RW_WRITER); 2329168404Spjd 2330168404Spjd /* 2331168404Spjd * Grab a lock on the parent pointer to make sure we play well 2332168404Spjd * with the treewalk and directory rename code. 2333168404Spjd */ 2334168404Spjd rw_enter(&zp->z_parent_lock, RW_WRITER); 2335168404Spjd 2336168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 2337168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 2338219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 2339168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 2340219089Spjd zfs_sa_upgrade_txholds(tx, zp); 2341219089Spjd zfs_sa_upgrade_txholds(tx, dzp); 2342260763Savg error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 2343168404Spjd if (error) { 2344168404Spjd rw_exit(&zp->z_parent_lock); 2345168404Spjd rw_exit(&zp->z_name_lock); 2346168404Spjd zfs_dirent_unlock(dl); 2347168962Spjd VN_RELE(vp); 2348209962Smm if (error == ERESTART) { 2349260763Savg waited = B_TRUE; 2350168404Spjd dmu_tx_wait(tx); 2351168404Spjd dmu_tx_abort(tx); 2352168404Spjd goto top; 2353168404Spjd } 2354168404Spjd dmu_tx_abort(tx); 2355168404Spjd ZFS_EXIT(zfsvfs); 2356168404Spjd return (error); 2357168404Spjd } 2358168404Spjd 2359168404Spjd#ifdef FREEBSD_NAMECACHE 2360168404Spjd cache_purge(dvp); 2361168404Spjd#endif 2362168404Spjd 2363185029Spjd error = zfs_link_destroy(dl, zp, tx, zflg, NULL); 2364168404Spjd 2365185029Spjd if (error == 0) { 2366185029Spjd uint64_t txtype = TX_RMDIR; 2367185029Spjd if (flags & FIGNORECASE) 2368185029Spjd txtype |= TX_CI; 2369219089Spjd zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT); 2370185029Spjd } 2371168404Spjd 2372168404Spjd dmu_tx_commit(tx); 2373168404Spjd 2374168404Spjd rw_exit(&zp->z_parent_lock); 2375168404Spjd rw_exit(&zp->z_name_lock); 2376168404Spjd#ifdef FREEBSD_NAMECACHE 2377168404Spjd cache_purge(vp); 2378168404Spjd#endif 2379168404Spjdout: 2380168404Spjd zfs_dirent_unlock(dl); 2381168404Spjd 2382168962Spjd VN_RELE(vp); 2383168962Spjd 2384219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2385219089Spjd zil_commit(zilog, 0); 2386219089Spjd 2387168404Spjd ZFS_EXIT(zfsvfs); 2388168404Spjd return (error); 2389168404Spjd} 2390168404Spjd 2391168404Spjd/* 2392168404Spjd * Read as many directory entries as will fit into the provided 2393168404Spjd * buffer from the given directory cursor position (specified in 2394251631Sdelphij * the uio structure). 2395168404Spjd * 2396168404Spjd * IN: vp - vnode of directory to read. 2397168404Spjd * uio - structure supplying read location, range info, 2398168404Spjd * and return buffer. 2399168404Spjd * cr - credentials of caller. 2400185029Spjd * ct - caller context 2401185029Spjd * flags - case flags 2402168404Spjd * 2403168404Spjd * OUT: uio - updated offset and range, buffer filled. 2404168404Spjd * eofp - set to true if end-of-file detected. 2405168404Spjd * 2406251631Sdelphij * RETURN: 0 on success, error code on failure. 2407168404Spjd * 2408168404Spjd * Timestamps: 2409168404Spjd * vp - atime updated 2410168404Spjd * 2411168404Spjd * Note that the low 4 bits of the cookie returned by zap is always zero. 2412168404Spjd * This allows us to use the low range for "special" directory entries: 2413168404Spjd * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, 2414168404Spjd * we use the offset 2 for the '.zfs' directory. 2415168404Spjd */ 2416168404Spjd/* ARGSUSED */ 2417168404Spjdstatic int 2418168962Spjdzfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, int *ncookies, u_long **cookies) 2419168404Spjd{ 2420168404Spjd znode_t *zp = VTOZ(vp); 2421168404Spjd iovec_t *iovp; 2422185029Spjd edirent_t *eodp; 2423168404Spjd dirent64_t *odp; 2424168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2425168404Spjd objset_t *os; 2426168404Spjd caddr_t outbuf; 2427168404Spjd size_t bufsize; 2428168404Spjd zap_cursor_t zc; 2429168404Spjd zap_attribute_t zap; 2430168404Spjd uint_t bytes_wanted; 2431168404Spjd uint64_t offset; /* must be unsigned; checks for < 1 */ 2432219089Spjd uint64_t parent; 2433168404Spjd int local_eof; 2434168404Spjd int outcount; 2435168404Spjd int error; 2436168404Spjd uint8_t prefetch; 2437185029Spjd boolean_t check_sysattrs; 2438168404Spjd uint8_t type; 2439168962Spjd int ncooks; 2440168962Spjd u_long *cooks = NULL; 2441185029Spjd int flags = 0; 2442168404Spjd 2443168404Spjd ZFS_ENTER(zfsvfs); 2444185029Spjd ZFS_VERIFY_ZP(zp); 2445168404Spjd 2446219089Spjd if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 2447219089Spjd &parent, sizeof (parent))) != 0) { 2448219089Spjd ZFS_EXIT(zfsvfs); 2449219089Spjd return (error); 2450219089Spjd } 2451219089Spjd 2452168404Spjd /* 2453168404Spjd * If we are not given an eof variable, 2454168404Spjd * use a local one. 2455168404Spjd */ 2456168404Spjd if (eofp == NULL) 2457168404Spjd eofp = &local_eof; 2458168404Spjd 2459168404Spjd /* 2460168404Spjd * Check for valid iov_len. 2461168404Spjd */ 2462168404Spjd if (uio->uio_iov->iov_len <= 0) { 2463168404Spjd ZFS_EXIT(zfsvfs); 2464249195Smm return (SET_ERROR(EINVAL)); 2465168404Spjd } 2466168404Spjd 2467168404Spjd /* 2468168404Spjd * Quit if directory has been removed (posix) 2469168404Spjd */ 2470168404Spjd if ((*eofp = zp->z_unlinked) != 0) { 2471168404Spjd ZFS_EXIT(zfsvfs); 2472168404Spjd return (0); 2473168404Spjd } 2474168404Spjd 2475168404Spjd error = 0; 2476168404Spjd os = zfsvfs->z_os; 2477168404Spjd offset = uio->uio_loffset; 2478168404Spjd prefetch = zp->z_zn_prefetch; 2479168404Spjd 2480168404Spjd /* 2481168404Spjd * Initialize the iterator cursor. 2482168404Spjd */ 2483168404Spjd if (offset <= 3) { 2484168404Spjd /* 2485168404Spjd * Start iteration from the beginning of the directory. 2486168404Spjd */ 2487168404Spjd zap_cursor_init(&zc, os, zp->z_id); 2488168404Spjd } else { 2489168404Spjd /* 2490168404Spjd * The offset is a serialized cursor. 2491168404Spjd */ 2492168404Spjd zap_cursor_init_serialized(&zc, os, zp->z_id, offset); 2493168404Spjd } 2494168404Spjd 2495168404Spjd /* 2496168404Spjd * Get space to change directory entries into fs independent format. 2497168404Spjd */ 2498168404Spjd iovp = uio->uio_iov; 2499168404Spjd bytes_wanted = iovp->iov_len; 2500168404Spjd if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) { 2501168404Spjd bufsize = bytes_wanted; 2502168404Spjd outbuf = kmem_alloc(bufsize, KM_SLEEP); 2503168404Spjd odp = (struct dirent64 *)outbuf; 2504168404Spjd } else { 2505168404Spjd bufsize = bytes_wanted; 2506247187Smm outbuf = NULL; 2507168404Spjd odp = (struct dirent64 *)iovp->iov_base; 2508168404Spjd } 2509185029Spjd eodp = (struct edirent *)odp; 2510168404Spjd 2511169170Spjd if (ncookies != NULL) { 2512168404Spjd /* 2513168404Spjd * Minimum entry size is dirent size and 1 byte for a file name. 2514168404Spjd */ 2515168962Spjd ncooks = uio->uio_resid / (sizeof(struct dirent) - sizeof(((struct dirent *)NULL)->d_name) + 1); 2516219404Spjd cooks = malloc(ncooks * sizeof(u_long), M_TEMP, M_WAITOK); 2517219404Spjd *cookies = cooks; 2518168962Spjd *ncookies = ncooks; 2519168404Spjd } 2520185029Spjd /* 2521185029Spjd * If this VFS supports the system attribute view interface; and 2522185029Spjd * we're looking at an extended attribute directory; and we care 2523185029Spjd * about normalization conflicts on this vfs; then we must check 2524185029Spjd * for normalization conflicts with the sysattr name space. 2525185029Spjd */ 2526185029Spjd#ifdef TODO 2527185029Spjd check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 2528185029Spjd (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm && 2529185029Spjd (flags & V_RDDIR_ENTFLAGS); 2530185029Spjd#else 2531185029Spjd check_sysattrs = 0; 2532185029Spjd#endif 2533168404Spjd 2534168404Spjd /* 2535168404Spjd * Transform to file-system independent format 2536168404Spjd */ 2537168404Spjd outcount = 0; 2538168404Spjd while (outcount < bytes_wanted) { 2539168404Spjd ino64_t objnum; 2540168404Spjd ushort_t reclen; 2541219089Spjd off64_t *next = NULL; 2542168404Spjd 2543168404Spjd /* 2544168404Spjd * Special case `.', `..', and `.zfs'. 2545168404Spjd */ 2546168404Spjd if (offset == 0) { 2547168404Spjd (void) strcpy(zap.za_name, "."); 2548185029Spjd zap.za_normalization_conflict = 0; 2549168404Spjd objnum = zp->z_id; 2550169108Spjd type = DT_DIR; 2551168404Spjd } else if (offset == 1) { 2552168404Spjd (void) strcpy(zap.za_name, ".."); 2553185029Spjd zap.za_normalization_conflict = 0; 2554219089Spjd objnum = parent; 2555169108Spjd type = DT_DIR; 2556168404Spjd } else if (offset == 2 && zfs_show_ctldir(zp)) { 2557168404Spjd (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); 2558185029Spjd zap.za_normalization_conflict = 0; 2559168404Spjd objnum = ZFSCTL_INO_ROOT; 2560169108Spjd type = DT_DIR; 2561168404Spjd } else { 2562168404Spjd /* 2563168404Spjd * Grab next entry. 2564168404Spjd */ 2565168404Spjd if (error = zap_cursor_retrieve(&zc, &zap)) { 2566168404Spjd if ((*eofp = (error == ENOENT)) != 0) 2567168404Spjd break; 2568168404Spjd else 2569168404Spjd goto update; 2570168404Spjd } 2571168404Spjd 2572168404Spjd if (zap.za_integer_length != 8 || 2573168404Spjd zap.za_num_integers != 1) { 2574168404Spjd cmn_err(CE_WARN, "zap_readdir: bad directory " 2575168404Spjd "entry, obj = %lld, offset = %lld\n", 2576168404Spjd (u_longlong_t)zp->z_id, 2577168404Spjd (u_longlong_t)offset); 2578249195Smm error = SET_ERROR(ENXIO); 2579168404Spjd goto update; 2580168404Spjd } 2581168404Spjd 2582168404Spjd objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); 2583168404Spjd /* 2584168404Spjd * MacOS X can extract the object type here such as: 2585168404Spjd * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2586168404Spjd */ 2587168404Spjd type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2588185029Spjd 2589185029Spjd if (check_sysattrs && !zap.za_normalization_conflict) { 2590185029Spjd#ifdef TODO 2591185029Spjd zap.za_normalization_conflict = 2592185029Spjd xattr_sysattr_casechk(zap.za_name); 2593185029Spjd#else 2594185029Spjd panic("%s:%u: TODO", __func__, __LINE__); 2595185029Spjd#endif 2596185029Spjd } 2597168404Spjd } 2598168404Spjd 2599211932Smm if (flags & V_RDDIR_ACCFILTER) { 2600211932Smm /* 2601211932Smm * If we have no access at all, don't include 2602211932Smm * this entry in the returned information 2603211932Smm */ 2604211932Smm znode_t *ezp; 2605211932Smm if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0) 2606211932Smm goto skip_entry; 2607211932Smm if (!zfs_has_access(ezp, cr)) { 2608211932Smm VN_RELE(ZTOV(ezp)); 2609211932Smm goto skip_entry; 2610211932Smm } 2611211932Smm VN_RELE(ZTOV(ezp)); 2612211932Smm } 2613211932Smm 2614185029Spjd if (flags & V_RDDIR_ENTFLAGS) 2615185029Spjd reclen = EDIRENT_RECLEN(strlen(zap.za_name)); 2616185029Spjd else 2617185029Spjd reclen = DIRENT64_RECLEN(strlen(zap.za_name)); 2618185029Spjd 2619168404Spjd /* 2620168404Spjd * Will this entry fit in the buffer? 2621168404Spjd */ 2622168404Spjd if (outcount + reclen > bufsize) { 2623168404Spjd /* 2624168404Spjd * Did we manage to fit anything in the buffer? 2625168404Spjd */ 2626168404Spjd if (!outcount) { 2627249195Smm error = SET_ERROR(EINVAL); 2628168404Spjd goto update; 2629168404Spjd } 2630168404Spjd break; 2631168404Spjd } 2632185029Spjd if (flags & V_RDDIR_ENTFLAGS) { 2633185029Spjd /* 2634185029Spjd * Add extended flag entry: 2635185029Spjd */ 2636185029Spjd eodp->ed_ino = objnum; 2637185029Spjd eodp->ed_reclen = reclen; 2638185029Spjd /* NOTE: ed_off is the offset for the *next* entry */ 2639185029Spjd next = &(eodp->ed_off); 2640185029Spjd eodp->ed_eflags = zap.za_normalization_conflict ? 2641185029Spjd ED_CASE_CONFLICT : 0; 2642185029Spjd (void) strncpy(eodp->ed_name, zap.za_name, 2643185029Spjd EDIRENT_NAMELEN(reclen)); 2644185029Spjd eodp = (edirent_t *)((intptr_t)eodp + reclen); 2645185029Spjd } else { 2646185029Spjd /* 2647185029Spjd * Add normal entry: 2648185029Spjd */ 2649185029Spjd odp->d_ino = objnum; 2650185029Spjd odp->d_reclen = reclen; 2651185029Spjd odp->d_namlen = strlen(zap.za_name); 2652185029Spjd (void) strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1); 2653185029Spjd odp->d_type = type; 2654185029Spjd odp = (dirent64_t *)((intptr_t)odp + reclen); 2655185029Spjd } 2656168404Spjd outcount += reclen; 2657168404Spjd 2658168404Spjd ASSERT(outcount <= bufsize); 2659168404Spjd 2660168404Spjd /* Prefetch znode */ 2661168404Spjd if (prefetch) 2662168404Spjd dmu_prefetch(os, objnum, 0, 0); 2663168404Spjd 2664211932Smm skip_entry: 2665168404Spjd /* 2666168404Spjd * Move to the next entry, fill in the previous offset. 2667168404Spjd */ 2668168404Spjd if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { 2669168404Spjd zap_cursor_advance(&zc); 2670168404Spjd offset = zap_cursor_serialize(&zc); 2671168404Spjd } else { 2672168404Spjd offset += 1; 2673168404Spjd } 2674219404Spjd 2675219404Spjd if (cooks != NULL) { 2676219404Spjd *cooks++ = offset; 2677219404Spjd ncooks--; 2678219404Spjd KASSERT(ncooks >= 0, ("ncookies=%d", ncooks)); 2679219404Spjd } 2680168404Spjd } 2681168404Spjd zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ 2682168404Spjd 2683168404Spjd /* Subtract unused cookies */ 2684168962Spjd if (ncookies != NULL) 2685168962Spjd *ncookies -= ncooks; 2686168404Spjd 2687168404Spjd if (uio->uio_segflg == UIO_SYSSPACE && uio->uio_iovcnt == 1) { 2688168404Spjd iovp->iov_base += outcount; 2689168404Spjd iovp->iov_len -= outcount; 2690168404Spjd uio->uio_resid -= outcount; 2691168404Spjd } else if (error = uiomove(outbuf, (long)outcount, UIO_READ, uio)) { 2692168404Spjd /* 2693168404Spjd * Reset the pointer. 2694168404Spjd */ 2695168404Spjd offset = uio->uio_loffset; 2696168404Spjd } 2697168404Spjd 2698168404Spjdupdate: 2699168404Spjd zap_cursor_fini(&zc); 2700168404Spjd if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) 2701168404Spjd kmem_free(outbuf, bufsize); 2702168404Spjd 2703168404Spjd if (error == ENOENT) 2704168404Spjd error = 0; 2705168404Spjd 2706168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 2707168404Spjd 2708168404Spjd uio->uio_loffset = offset; 2709168404Spjd ZFS_EXIT(zfsvfs); 2710169107Spjd if (error != 0 && cookies != NULL) { 2711168962Spjd free(*cookies, M_TEMP); 2712168962Spjd *cookies = NULL; 2713168962Spjd *ncookies = 0; 2714168404Spjd } 2715168404Spjd return (error); 2716168404Spjd} 2717168404Spjd 2718185029Spjdulong_t zfs_fsync_sync_cnt = 4; 2719185029Spjd 2720168404Spjdstatic int 2721185029Spjdzfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) 2722168404Spjd{ 2723168962Spjd znode_t *zp = VTOZ(vp); 2724168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2725168404Spjd 2726185029Spjd (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt); 2727185029Spjd 2728219089Spjd if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) { 2729219089Spjd ZFS_ENTER(zfsvfs); 2730219089Spjd ZFS_VERIFY_ZP(zp); 2731219089Spjd zil_commit(zfsvfs->z_log, zp->z_id); 2732219089Spjd ZFS_EXIT(zfsvfs); 2733219089Spjd } 2734168404Spjd return (0); 2735168404Spjd} 2736168404Spjd 2737185029Spjd 2738168404Spjd/* 2739168404Spjd * Get the requested file attributes and place them in the provided 2740168404Spjd * vattr structure. 2741168404Spjd * 2742168404Spjd * IN: vp - vnode of file. 2743168404Spjd * vap - va_mask identifies requested attributes. 2744185029Spjd * If AT_XVATTR set, then optional attrs are requested 2745185029Spjd * flags - ATTR_NOACLCHECK (CIFS server context) 2746168404Spjd * cr - credentials of caller. 2747185029Spjd * ct - caller context 2748168404Spjd * 2749168404Spjd * OUT: vap - attribute values. 2750168404Spjd * 2751251631Sdelphij * RETURN: 0 (always succeeds). 2752168404Spjd */ 2753168404Spjd/* ARGSUSED */ 2754168404Spjdstatic int 2755185029Spjdzfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2756185029Spjd caller_context_t *ct) 2757168404Spjd{ 2758168962Spjd znode_t *zp = VTOZ(vp); 2759168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2760185029Spjd int error = 0; 2761168962Spjd uint32_t blksize; 2762168962Spjd u_longlong_t nblocks; 2763185029Spjd uint64_t links; 2764224251Sdelphij uint64_t mtime[2], ctime[2], crtime[2], rdev; 2765185029Spjd xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2766185029Spjd xoptattr_t *xoap = NULL; 2767185029Spjd boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2768224251Sdelphij sa_bulk_attr_t bulk[4]; 2769219089Spjd int count = 0; 2770168404Spjd 2771168404Spjd ZFS_ENTER(zfsvfs); 2772185029Spjd ZFS_VERIFY_ZP(zp); 2773168404Spjd 2774219089Spjd zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid); 2775219089Spjd 2776219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 2777219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 2778243807Sdelphij SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16); 2779224251Sdelphij if (vp->v_type == VBLK || vp->v_type == VCHR) 2780224251Sdelphij SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL, 2781224251Sdelphij &rdev, 8); 2782219089Spjd 2783219089Spjd if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) { 2784219089Spjd ZFS_EXIT(zfsvfs); 2785219089Spjd return (error); 2786219089Spjd } 2787219089Spjd 2788168404Spjd /* 2789185029Spjd * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES. 2790185029Spjd * Also, if we are the owner don't bother, since owner should 2791185029Spjd * always be allowed to read basic attributes of file. 2792185029Spjd */ 2793219089Spjd if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) && 2794219089Spjd (vap->va_uid != crgetuid(cr))) { 2795185029Spjd if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, 2796185029Spjd skipaclchk, cr)) { 2797185029Spjd ZFS_EXIT(zfsvfs); 2798185029Spjd return (error); 2799185029Spjd } 2800185029Spjd } 2801185029Spjd 2802185029Spjd /* 2803168404Spjd * Return all attributes. It's cheaper to provide the answer 2804168404Spjd * than to determine whether we were asked the question. 2805168404Spjd */ 2806168404Spjd 2807209097Smm mutex_enter(&zp->z_lock); 2808219089Spjd vap->va_type = IFTOVT(zp->z_mode); 2809219089Spjd vap->va_mode = zp->z_mode & ~S_IFMT; 2810224252Sdelphij#ifdef sun 2811224252Sdelphij vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev; 2812224252Sdelphij#else 2813224252Sdelphij vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; 2814224252Sdelphij#endif 2815168404Spjd vap->va_nodeid = zp->z_id; 2816185029Spjd if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp)) 2817219089Spjd links = zp->z_links + 1; 2818185029Spjd else 2819219089Spjd links = zp->z_links; 2820229425Sdim vap->va_nlink = MIN(links, LINK_MAX); /* nlink_t limit! */ 2821219089Spjd vap->va_size = zp->z_size; 2822224252Sdelphij#ifdef sun 2823224252Sdelphij vap->va_rdev = vp->v_rdev; 2824224252Sdelphij#else 2825224251Sdelphij if (vp->v_type == VBLK || vp->v_type == VCHR) 2826224251Sdelphij vap->va_rdev = zfs_cmpldev(rdev); 2827224252Sdelphij#endif 2828168404Spjd vap->va_seq = zp->z_seq; 2829168404Spjd vap->va_flags = 0; /* FreeBSD: Reset chflags(2) flags. */ 2830273122Saraujo vap->va_filerev = zp->z_seq; 2831168404Spjd 2832185029Spjd /* 2833185029Spjd * Add in any requested optional attributes and the create time. 2834185029Spjd * Also set the corresponding bits in the returned attribute bitmap. 2835185029Spjd */ 2836185029Spjd if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) { 2837185029Spjd if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 2838185029Spjd xoap->xoa_archive = 2839219089Spjd ((zp->z_pflags & ZFS_ARCHIVE) != 0); 2840185029Spjd XVA_SET_RTN(xvap, XAT_ARCHIVE); 2841185029Spjd } 2842185029Spjd 2843185029Spjd if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 2844185029Spjd xoap->xoa_readonly = 2845219089Spjd ((zp->z_pflags & ZFS_READONLY) != 0); 2846185029Spjd XVA_SET_RTN(xvap, XAT_READONLY); 2847185029Spjd } 2848185029Spjd 2849185029Spjd if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 2850185029Spjd xoap->xoa_system = 2851219089Spjd ((zp->z_pflags & ZFS_SYSTEM) != 0); 2852185029Spjd XVA_SET_RTN(xvap, XAT_SYSTEM); 2853185029Spjd } 2854185029Spjd 2855185029Spjd if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 2856185029Spjd xoap->xoa_hidden = 2857219089Spjd ((zp->z_pflags & ZFS_HIDDEN) != 0); 2858185029Spjd XVA_SET_RTN(xvap, XAT_HIDDEN); 2859185029Spjd } 2860185029Spjd 2861185029Spjd if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2862185029Spjd xoap->xoa_nounlink = 2863219089Spjd ((zp->z_pflags & ZFS_NOUNLINK) != 0); 2864185029Spjd XVA_SET_RTN(xvap, XAT_NOUNLINK); 2865185029Spjd } 2866185029Spjd 2867185029Spjd if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2868185029Spjd xoap->xoa_immutable = 2869219089Spjd ((zp->z_pflags & ZFS_IMMUTABLE) != 0); 2870185029Spjd XVA_SET_RTN(xvap, XAT_IMMUTABLE); 2871185029Spjd } 2872185029Spjd 2873185029Spjd if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2874185029Spjd xoap->xoa_appendonly = 2875219089Spjd ((zp->z_pflags & ZFS_APPENDONLY) != 0); 2876185029Spjd XVA_SET_RTN(xvap, XAT_APPENDONLY); 2877185029Spjd } 2878185029Spjd 2879185029Spjd if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2880185029Spjd xoap->xoa_nodump = 2881219089Spjd ((zp->z_pflags & ZFS_NODUMP) != 0); 2882185029Spjd XVA_SET_RTN(xvap, XAT_NODUMP); 2883185029Spjd } 2884185029Spjd 2885185029Spjd if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 2886185029Spjd xoap->xoa_opaque = 2887219089Spjd ((zp->z_pflags & ZFS_OPAQUE) != 0); 2888185029Spjd XVA_SET_RTN(xvap, XAT_OPAQUE); 2889185029Spjd } 2890185029Spjd 2891185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2892185029Spjd xoap->xoa_av_quarantined = 2893219089Spjd ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0); 2894185029Spjd XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 2895185029Spjd } 2896185029Spjd 2897185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2898185029Spjd xoap->xoa_av_modified = 2899219089Spjd ((zp->z_pflags & ZFS_AV_MODIFIED) != 0); 2900185029Spjd XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 2901185029Spjd } 2902185029Spjd 2903185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) && 2904219089Spjd vp->v_type == VREG) { 2905219089Spjd zfs_sa_get_scanstamp(zp, xvap); 2906185029Spjd } 2907185029Spjd 2908185029Spjd if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { 2909219089Spjd uint64_t times[2]; 2910219089Spjd 2911219089Spjd (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs), 2912219089Spjd times, sizeof (times)); 2913219089Spjd ZFS_TIME_DECODE(&xoap->xoa_createtime, times); 2914185029Spjd XVA_SET_RTN(xvap, XAT_CREATETIME); 2915185029Spjd } 2916219089Spjd 2917219089Spjd if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 2918219089Spjd xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0); 2919219089Spjd XVA_SET_RTN(xvap, XAT_REPARSE); 2920219089Spjd } 2921219089Spjd if (XVA_ISSET_REQ(xvap, XAT_GEN)) { 2922219089Spjd xoap->xoa_generation = zp->z_gen; 2923219089Spjd XVA_SET_RTN(xvap, XAT_GEN); 2924219089Spjd } 2925219089Spjd 2926219089Spjd if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { 2927219089Spjd xoap->xoa_offline = 2928219089Spjd ((zp->z_pflags & ZFS_OFFLINE) != 0); 2929219089Spjd XVA_SET_RTN(xvap, XAT_OFFLINE); 2930219089Spjd } 2931219089Spjd 2932219089Spjd if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { 2933219089Spjd xoap->xoa_sparse = 2934219089Spjd ((zp->z_pflags & ZFS_SPARSE) != 0); 2935219089Spjd XVA_SET_RTN(xvap, XAT_SPARSE); 2936219089Spjd } 2937185029Spjd } 2938185029Spjd 2939219089Spjd ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime); 2940219089Spjd ZFS_TIME_DECODE(&vap->va_mtime, mtime); 2941219089Spjd ZFS_TIME_DECODE(&vap->va_ctime, ctime); 2942219089Spjd ZFS_TIME_DECODE(&vap->va_birthtime, crtime); 2943168404Spjd 2944168404Spjd mutex_exit(&zp->z_lock); 2945168404Spjd 2946219089Spjd sa_object_size(zp->z_sa_hdl, &blksize, &nblocks); 2947168404Spjd vap->va_blksize = blksize; 2948168404Spjd vap->va_bytes = nblocks << 9; /* nblocks * 512 */ 2949168404Spjd 2950168404Spjd if (zp->z_blksz == 0) { 2951168404Spjd /* 2952168404Spjd * Block size hasn't been set; suggest maximal I/O transfers. 2953168404Spjd */ 2954168404Spjd vap->va_blksize = zfsvfs->z_max_blksz; 2955168404Spjd } 2956168404Spjd 2957168404Spjd ZFS_EXIT(zfsvfs); 2958168404Spjd return (0); 2959168404Spjd} 2960168404Spjd 2961168404Spjd/* 2962168404Spjd * Set the file attributes to the values contained in the 2963168404Spjd * vattr structure. 2964168404Spjd * 2965168404Spjd * IN: vp - vnode of file to be modified. 2966168404Spjd * vap - new attribute values. 2967185029Spjd * If AT_XVATTR set, then optional attrs are being set 2968168404Spjd * flags - ATTR_UTIME set if non-default time values provided. 2969185029Spjd * - ATTR_NOACLCHECK (CIFS context only). 2970168404Spjd * cr - credentials of caller. 2971185029Spjd * ct - caller context 2972168404Spjd * 2973251631Sdelphij * RETURN: 0 on success, error code on failure. 2974168404Spjd * 2975168404Spjd * Timestamps: 2976168404Spjd * vp - ctime updated, mtime updated if size changed. 2977168404Spjd */ 2978168404Spjd/* ARGSUSED */ 2979168404Spjdstatic int 2980168962Spjdzfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2981251631Sdelphij caller_context_t *ct) 2982168404Spjd{ 2983185029Spjd znode_t *zp = VTOZ(vp); 2984168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2985185029Spjd zilog_t *zilog; 2986168404Spjd dmu_tx_t *tx; 2987168404Spjd vattr_t oldva; 2988209962Smm xvattr_t tmpxvattr; 2989168962Spjd uint_t mask = vap->va_mask; 2990247187Smm uint_t saved_mask = 0; 2991197831Spjd uint64_t saved_mode; 2992168404Spjd int trim_mask = 0; 2993168404Spjd uint64_t new_mode; 2994209962Smm uint64_t new_uid, new_gid; 2995219089Spjd uint64_t xattr_obj; 2996219089Spjd uint64_t mtime[2], ctime[2]; 2997168404Spjd znode_t *attrzp; 2998168404Spjd int need_policy = FALSE; 2999219089Spjd int err, err2; 3000185029Spjd zfs_fuid_info_t *fuidp = NULL; 3001185029Spjd xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 3002185029Spjd xoptattr_t *xoap; 3003219089Spjd zfs_acl_t *aclp; 3004185029Spjd boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 3005219089Spjd boolean_t fuid_dirtied = B_FALSE; 3006219089Spjd sa_bulk_attr_t bulk[7], xattr_bulk[7]; 3007219089Spjd int count = 0, xattr_count = 0; 3008168404Spjd 3009168404Spjd if (mask == 0) 3010168404Spjd return (0); 3011168404Spjd 3012168962Spjd if (mask & AT_NOSET) 3013249195Smm return (SET_ERROR(EINVAL)); 3014168962Spjd 3015185029Spjd ZFS_ENTER(zfsvfs); 3016185029Spjd ZFS_VERIFY_ZP(zp); 3017185029Spjd 3018185029Spjd zilog = zfsvfs->z_log; 3019185029Spjd 3020185029Spjd /* 3021185029Spjd * Make sure that if we have ephemeral uid/gid or xvattr specified 3022185029Spjd * that file system is at proper version level 3023185029Spjd */ 3024185029Spjd 3025185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 3026185029Spjd (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) || 3027185029Spjd ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) || 3028185029Spjd (mask & AT_XVATTR))) { 3029185029Spjd ZFS_EXIT(zfsvfs); 3030249195Smm return (SET_ERROR(EINVAL)); 3031185029Spjd } 3032185029Spjd 3033185029Spjd if (mask & AT_SIZE && vp->v_type == VDIR) { 3034185029Spjd ZFS_EXIT(zfsvfs); 3035249195Smm return (SET_ERROR(EISDIR)); 3036185029Spjd } 3037168404Spjd 3038185029Spjd if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) { 3039185029Spjd ZFS_EXIT(zfsvfs); 3040249195Smm return (SET_ERROR(EINVAL)); 3041185029Spjd } 3042168404Spjd 3043185029Spjd /* 3044185029Spjd * If this is an xvattr_t, then get a pointer to the structure of 3045185029Spjd * optional attributes. If this is NULL, then we have a vattr_t. 3046185029Spjd */ 3047185029Spjd xoap = xva_getxoptattr(xvap); 3048168404Spjd 3049209962Smm xva_init(&tmpxvattr); 3050209962Smm 3051185029Spjd /* 3052185029Spjd * Immutable files can only alter immutable bit and atime 3053185029Spjd */ 3054219089Spjd if ((zp->z_pflags & ZFS_IMMUTABLE) && 3055185029Spjd ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) || 3056185029Spjd ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { 3057185029Spjd ZFS_EXIT(zfsvfs); 3058249195Smm return (SET_ERROR(EPERM)); 3059185029Spjd } 3060185029Spjd 3061219089Spjd if ((mask & AT_SIZE) && (zp->z_pflags & ZFS_READONLY)) { 3062185029Spjd ZFS_EXIT(zfsvfs); 3063249195Smm return (SET_ERROR(EPERM)); 3064185029Spjd } 3065185029Spjd 3066185029Spjd /* 3067185029Spjd * Verify timestamps doesn't overflow 32 bits. 3068185029Spjd * ZFS can handle large timestamps, but 32bit syscalls can't 3069185029Spjd * handle times greater than 2039. This check should be removed 3070185029Spjd * once large timestamps are fully supported. 3071185029Spjd */ 3072185029Spjd if (mask & (AT_ATIME | AT_MTIME)) { 3073185029Spjd if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || 3074185029Spjd ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { 3075185029Spjd ZFS_EXIT(zfsvfs); 3076249195Smm return (SET_ERROR(EOVERFLOW)); 3077185029Spjd } 3078185029Spjd } 3079185029Spjd 3080168404Spjdtop: 3081168404Spjd attrzp = NULL; 3082219089Spjd aclp = NULL; 3083168404Spjd 3084211932Smm /* Can this be moved to before the top label? */ 3085168404Spjd if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 3086168404Spjd ZFS_EXIT(zfsvfs); 3087249195Smm return (SET_ERROR(EROFS)); 3088168404Spjd } 3089168404Spjd 3090168404Spjd /* 3091168404Spjd * First validate permissions 3092168404Spjd */ 3093168404Spjd 3094168404Spjd if (mask & AT_SIZE) { 3095168404Spjd /* 3096168404Spjd * XXX - Note, we are not providing any open 3097168404Spjd * mode flags here (like FNDELAY), so we may 3098168404Spjd * block if there are locks present... this 3099168404Spjd * should be addressed in openat(). 3100168404Spjd */ 3101185029Spjd /* XXX - would it be OK to generate a log record here? */ 3102185029Spjd err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); 3103168404Spjd if (err) { 3104168404Spjd ZFS_EXIT(zfsvfs); 3105168404Spjd return (err); 3106168404Spjd } 3107168404Spjd } 3108168404Spjd 3109185029Spjd if (mask & (AT_ATIME|AT_MTIME) || 3110185029Spjd ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) || 3111185029Spjd XVA_ISSET_REQ(xvap, XAT_READONLY) || 3112185029Spjd XVA_ISSET_REQ(xvap, XAT_ARCHIVE) || 3113219089Spjd XVA_ISSET_REQ(xvap, XAT_OFFLINE) || 3114219089Spjd XVA_ISSET_REQ(xvap, XAT_SPARSE) || 3115185029Spjd XVA_ISSET_REQ(xvap, XAT_CREATETIME) || 3116219089Spjd XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) { 3117185029Spjd need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0, 3118185029Spjd skipaclchk, cr); 3119219089Spjd } 3120168404Spjd 3121168404Spjd if (mask & (AT_UID|AT_GID)) { 3122168404Spjd int idmask = (mask & (AT_UID|AT_GID)); 3123168404Spjd int take_owner; 3124168404Spjd int take_group; 3125168404Spjd 3126168404Spjd /* 3127168404Spjd * NOTE: even if a new mode is being set, 3128168404Spjd * we may clear S_ISUID/S_ISGID bits. 3129168404Spjd */ 3130168404Spjd 3131168404Spjd if (!(mask & AT_MODE)) 3132219089Spjd vap->va_mode = zp->z_mode; 3133168404Spjd 3134168404Spjd /* 3135168404Spjd * Take ownership or chgrp to group we are a member of 3136168404Spjd */ 3137168404Spjd 3138168404Spjd take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr)); 3139185029Spjd take_group = (mask & AT_GID) && 3140185029Spjd zfs_groupmember(zfsvfs, vap->va_gid, cr); 3141168404Spjd 3142168404Spjd /* 3143168404Spjd * If both AT_UID and AT_GID are set then take_owner and 3144168404Spjd * take_group must both be set in order to allow taking 3145168404Spjd * ownership. 3146168404Spjd * 3147168404Spjd * Otherwise, send the check through secpolicy_vnode_setattr() 3148168404Spjd * 3149168404Spjd */ 3150168404Spjd 3151168404Spjd if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) || 3152168404Spjd ((idmask == AT_UID) && take_owner) || 3153168404Spjd ((idmask == AT_GID) && take_group)) { 3154185029Spjd if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0, 3155185029Spjd skipaclchk, cr) == 0) { 3156168404Spjd /* 3157168404Spjd * Remove setuid/setgid for non-privileged users 3158168404Spjd */ 3159185029Spjd secpolicy_setid_clear(vap, vp, cr); 3160168404Spjd trim_mask = (mask & (AT_UID|AT_GID)); 3161168404Spjd } else { 3162168404Spjd need_policy = TRUE; 3163168404Spjd } 3164168404Spjd } else { 3165168404Spjd need_policy = TRUE; 3166168404Spjd } 3167168404Spjd } 3168168404Spjd 3169168404Spjd mutex_enter(&zp->z_lock); 3170219089Spjd oldva.va_mode = zp->z_mode; 3171185029Spjd zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid); 3172185029Spjd if (mask & AT_XVATTR) { 3173209962Smm /* 3174209962Smm * Update xvattr mask to include only those attributes 3175209962Smm * that are actually changing. 3176209962Smm * 3177209962Smm * the bits will be restored prior to actually setting 3178209962Smm * the attributes so the caller thinks they were set. 3179209962Smm */ 3180209962Smm if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 3181209962Smm if (xoap->xoa_appendonly != 3182219089Spjd ((zp->z_pflags & ZFS_APPENDONLY) != 0)) { 3183209962Smm need_policy = TRUE; 3184209962Smm } else { 3185209962Smm XVA_CLR_REQ(xvap, XAT_APPENDONLY); 3186209962Smm XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY); 3187209962Smm } 3188209962Smm } 3189209962Smm 3190209962Smm if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 3191209962Smm if (xoap->xoa_nounlink != 3192219089Spjd ((zp->z_pflags & ZFS_NOUNLINK) != 0)) { 3193209962Smm need_policy = TRUE; 3194209962Smm } else { 3195209962Smm XVA_CLR_REQ(xvap, XAT_NOUNLINK); 3196209962Smm XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK); 3197209962Smm } 3198209962Smm } 3199209962Smm 3200209962Smm if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 3201209962Smm if (xoap->xoa_immutable != 3202219089Spjd ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) { 3203209962Smm need_policy = TRUE; 3204209962Smm } else { 3205209962Smm XVA_CLR_REQ(xvap, XAT_IMMUTABLE); 3206209962Smm XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE); 3207209962Smm } 3208209962Smm } 3209209962Smm 3210209962Smm if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 3211209962Smm if (xoap->xoa_nodump != 3212219089Spjd ((zp->z_pflags & ZFS_NODUMP) != 0)) { 3213209962Smm need_policy = TRUE; 3214209962Smm } else { 3215209962Smm XVA_CLR_REQ(xvap, XAT_NODUMP); 3216209962Smm XVA_SET_REQ(&tmpxvattr, XAT_NODUMP); 3217209962Smm } 3218209962Smm } 3219209962Smm 3220209962Smm if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 3221209962Smm if (xoap->xoa_av_modified != 3222219089Spjd ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) { 3223209962Smm need_policy = TRUE; 3224209962Smm } else { 3225209962Smm XVA_CLR_REQ(xvap, XAT_AV_MODIFIED); 3226209962Smm XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED); 3227209962Smm } 3228209962Smm } 3229209962Smm 3230209962Smm if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 3231209962Smm if ((vp->v_type != VREG && 3232209962Smm xoap->xoa_av_quarantined) || 3233209962Smm xoap->xoa_av_quarantined != 3234219089Spjd ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) { 3235209962Smm need_policy = TRUE; 3236209962Smm } else { 3237209962Smm XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED); 3238209962Smm XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED); 3239209962Smm } 3240209962Smm } 3241209962Smm 3242219089Spjd if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 3243219089Spjd mutex_exit(&zp->z_lock); 3244219089Spjd ZFS_EXIT(zfsvfs); 3245249195Smm return (SET_ERROR(EPERM)); 3246219089Spjd } 3247219089Spjd 3248209962Smm if (need_policy == FALSE && 3249209962Smm (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) || 3250209962Smm XVA_ISSET_REQ(xvap, XAT_OPAQUE))) { 3251185029Spjd need_policy = TRUE; 3252185029Spjd } 3253185029Spjd } 3254185029Spjd 3255168404Spjd mutex_exit(&zp->z_lock); 3256168404Spjd 3257168404Spjd if (mask & AT_MODE) { 3258185029Spjd if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) { 3259168962Spjd err = secpolicy_setid_setsticky_clear(vp, vap, 3260168962Spjd &oldva, cr); 3261168962Spjd if (err) { 3262168962Spjd ZFS_EXIT(zfsvfs); 3263168962Spjd return (err); 3264168962Spjd } 3265168404Spjd trim_mask |= AT_MODE; 3266168404Spjd } else { 3267168404Spjd need_policy = TRUE; 3268168404Spjd } 3269168404Spjd } 3270168404Spjd 3271168404Spjd if (need_policy) { 3272168404Spjd /* 3273168404Spjd * If trim_mask is set then take ownership 3274168404Spjd * has been granted or write_acl is present and user 3275168404Spjd * has the ability to modify mode. In that case remove 3276168404Spjd * UID|GID and or MODE from mask so that 3277168404Spjd * secpolicy_vnode_setattr() doesn't revoke it. 3278168404Spjd */ 3279168404Spjd 3280168404Spjd if (trim_mask) { 3281168404Spjd saved_mask = vap->va_mask; 3282168404Spjd vap->va_mask &= ~trim_mask; 3283197831Spjd if (trim_mask & AT_MODE) { 3284197831Spjd /* 3285197831Spjd * Save the mode, as secpolicy_vnode_setattr() 3286197831Spjd * will overwrite it with ova.va_mode. 3287197831Spjd */ 3288197831Spjd saved_mode = vap->va_mode; 3289197831Spjd } 3290168404Spjd } 3291168404Spjd err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, 3292185029Spjd (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp); 3293168404Spjd if (err) { 3294168404Spjd ZFS_EXIT(zfsvfs); 3295168404Spjd return (err); 3296168404Spjd } 3297168404Spjd 3298197831Spjd if (trim_mask) { 3299168404Spjd vap->va_mask |= saved_mask; 3300197831Spjd if (trim_mask & AT_MODE) { 3301197831Spjd /* 3302197831Spjd * Recover the mode after 3303197831Spjd * secpolicy_vnode_setattr(). 3304197831Spjd */ 3305197831Spjd vap->va_mode = saved_mode; 3306197831Spjd } 3307197831Spjd } 3308168404Spjd } 3309168404Spjd 3310168404Spjd /* 3311168404Spjd * secpolicy_vnode_setattr, or take ownership may have 3312168404Spjd * changed va_mask 3313168404Spjd */ 3314168404Spjd mask = vap->va_mask; 3315168404Spjd 3316219089Spjd if ((mask & (AT_UID | AT_GID))) { 3317219089Spjd err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 3318219089Spjd &xattr_obj, sizeof (xattr_obj)); 3319168404Spjd 3320219089Spjd if (err == 0 && xattr_obj) { 3321219089Spjd err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp); 3322209962Smm if (err) 3323219089Spjd goto out2; 3324168404Spjd } 3325209962Smm if (mask & AT_UID) { 3326209962Smm new_uid = zfs_fuid_create(zfsvfs, 3327209962Smm (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp); 3328219089Spjd if (new_uid != zp->z_uid && 3329219089Spjd zfs_fuid_overquota(zfsvfs, B_FALSE, new_uid)) { 3330219089Spjd if (attrzp) 3331219089Spjd VN_RELE(ZTOV(attrzp)); 3332249195Smm err = SET_ERROR(EDQUOT); 3333219089Spjd goto out2; 3334209962Smm } 3335209962Smm } 3336209962Smm 3337209962Smm if (mask & AT_GID) { 3338209962Smm new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid, 3339209962Smm cr, ZFS_GROUP, &fuidp); 3340219089Spjd if (new_gid != zp->z_gid && 3341219089Spjd zfs_fuid_overquota(zfsvfs, B_TRUE, new_gid)) { 3342219089Spjd if (attrzp) 3343219089Spjd VN_RELE(ZTOV(attrzp)); 3344249195Smm err = SET_ERROR(EDQUOT); 3345219089Spjd goto out2; 3346209962Smm } 3347209962Smm } 3348219089Spjd } 3349219089Spjd tx = dmu_tx_create(zfsvfs->z_os); 3350219089Spjd 3351219089Spjd if (mask & AT_MODE) { 3352219089Spjd uint64_t pmode = zp->z_mode; 3353219089Spjd uint64_t acl_obj; 3354219089Spjd new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); 3355219089Spjd 3356243560Smm if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED && 3357243560Smm !(zp->z_pflags & ZFS_ACL_TRIVIAL)) { 3358249195Smm err = SET_ERROR(EPERM); 3359243560Smm goto out; 3360243560Smm } 3361243560Smm 3362224174Smm if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)) 3363224174Smm goto out; 3364219089Spjd 3365219089Spjd mutex_enter(&zp->z_lock); 3366219089Spjd if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) { 3367219089Spjd /* 3368219089Spjd * Are we upgrading ACL from old V0 format 3369219089Spjd * to V1 format? 3370219089Spjd */ 3371219089Spjd if (zfsvfs->z_version >= ZPL_VERSION_FUID && 3372219089Spjd zfs_znode_acl_version(zp) == 3373219089Spjd ZFS_ACL_VERSION_INITIAL) { 3374219089Spjd dmu_tx_hold_free(tx, acl_obj, 0, 3375219089Spjd DMU_OBJECT_END); 3376219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 3377219089Spjd 0, aclp->z_acl_bytes); 3378209962Smm } else { 3379219089Spjd dmu_tx_hold_write(tx, acl_obj, 0, 3380219089Spjd aclp->z_acl_bytes); 3381209962Smm } 3382219089Spjd } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) { 3383219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 3384219089Spjd 0, aclp->z_acl_bytes); 3385209962Smm } 3386219089Spjd mutex_exit(&zp->z_lock); 3387219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 3388219089Spjd } else { 3389219089Spjd if ((mask & AT_XVATTR) && 3390219089Spjd XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 3391219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 3392219089Spjd else 3393219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 3394168404Spjd } 3395168404Spjd 3396219089Spjd if (attrzp) { 3397219089Spjd dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE); 3398219089Spjd } 3399219089Spjd 3400219089Spjd fuid_dirtied = zfsvfs->z_fuid_dirty; 3401219089Spjd if (fuid_dirtied) 3402219089Spjd zfs_fuid_txhold(zfsvfs, tx); 3403219089Spjd 3404219089Spjd zfs_sa_upgrade_txholds(tx, zp); 3405219089Spjd 3406260776Savg err = dmu_tx_assign(tx, TXG_WAIT); 3407260776Savg if (err) 3408209962Smm goto out; 3409168404Spjd 3410219089Spjd count = 0; 3411168404Spjd /* 3412168404Spjd * Set each attribute requested. 3413168404Spjd * We group settings according to the locks they need to acquire. 3414168404Spjd * 3415168404Spjd * Note: you cannot set ctime directly, although it will be 3416168404Spjd * updated as a side-effect of calling this function. 3417168404Spjd */ 3418168404Spjd 3419219089Spjd 3420219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3421219089Spjd mutex_enter(&zp->z_acl_lock); 3422168404Spjd mutex_enter(&zp->z_lock); 3423168404Spjd 3424219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 3425219089Spjd &zp->z_pflags, sizeof (zp->z_pflags)); 3426219089Spjd 3427219089Spjd if (attrzp) { 3428219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3429219089Spjd mutex_enter(&attrzp->z_acl_lock); 3430219089Spjd mutex_enter(&attrzp->z_lock); 3431219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3432219089Spjd SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags, 3433219089Spjd sizeof (attrzp->z_pflags)); 3434219089Spjd } 3435219089Spjd 3436219089Spjd if (mask & (AT_UID|AT_GID)) { 3437219089Spjd 3438219089Spjd if (mask & AT_UID) { 3439219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, 3440219089Spjd &new_uid, sizeof (new_uid)); 3441219089Spjd zp->z_uid = new_uid; 3442219089Spjd if (attrzp) { 3443219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3444219089Spjd SA_ZPL_UID(zfsvfs), NULL, &new_uid, 3445219089Spjd sizeof (new_uid)); 3446219089Spjd attrzp->z_uid = new_uid; 3447219089Spjd } 3448219089Spjd } 3449219089Spjd 3450219089Spjd if (mask & AT_GID) { 3451219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), 3452219089Spjd NULL, &new_gid, sizeof (new_gid)); 3453219089Spjd zp->z_gid = new_gid; 3454219089Spjd if (attrzp) { 3455219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3456219089Spjd SA_ZPL_GID(zfsvfs), NULL, &new_gid, 3457219089Spjd sizeof (new_gid)); 3458219089Spjd attrzp->z_gid = new_gid; 3459219089Spjd } 3460219089Spjd } 3461219089Spjd if (!(mask & AT_MODE)) { 3462219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), 3463219089Spjd NULL, &new_mode, sizeof (new_mode)); 3464219089Spjd new_mode = zp->z_mode; 3465219089Spjd } 3466219089Spjd err = zfs_acl_chown_setattr(zp); 3467219089Spjd ASSERT(err == 0); 3468219089Spjd if (attrzp) { 3469219089Spjd err = zfs_acl_chown_setattr(attrzp); 3470219089Spjd ASSERT(err == 0); 3471219089Spjd } 3472219089Spjd } 3473219089Spjd 3474168404Spjd if (mask & AT_MODE) { 3475219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, 3476219089Spjd &new_mode, sizeof (new_mode)); 3477219089Spjd zp->z_mode = new_mode; 3478219089Spjd ASSERT3U((uintptr_t)aclp, !=, 0); 3479209962Smm err = zfs_aclset_common(zp, aclp, cr, tx); 3480240415Smm ASSERT0(err); 3481219089Spjd if (zp->z_acl_cached) 3482219089Spjd zfs_acl_free(zp->z_acl_cached); 3483211932Smm zp->z_acl_cached = aclp; 3484211932Smm aclp = NULL; 3485168404Spjd } 3486168404Spjd 3487168404Spjd 3488219089Spjd if (mask & AT_ATIME) { 3489219089Spjd ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime); 3490219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, 3491219089Spjd &zp->z_atime, sizeof (zp->z_atime)); 3492168404Spjd } 3493168404Spjd 3494219089Spjd if (mask & AT_MTIME) { 3495219089Spjd ZFS_TIME_ENCODE(&vap->va_mtime, mtime); 3496219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 3497219089Spjd mtime, sizeof (mtime)); 3498168404Spjd } 3499168404Spjd 3500185029Spjd /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */ 3501219089Spjd if (mask & AT_SIZE && !(mask & AT_MTIME)) { 3502219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), 3503219089Spjd NULL, mtime, sizeof (mtime)); 3504219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 3505219089Spjd &ctime, sizeof (ctime)); 3506219089Spjd zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 3507219089Spjd B_TRUE); 3508219089Spjd } else if (mask != 0) { 3509219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 3510219089Spjd &ctime, sizeof (ctime)); 3511219089Spjd zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime, 3512219089Spjd B_TRUE); 3513219089Spjd if (attrzp) { 3514219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3515219089Spjd SA_ZPL_CTIME(zfsvfs), NULL, 3516219089Spjd &ctime, sizeof (ctime)); 3517219089Spjd zfs_tstamp_update_setup(attrzp, STATE_CHANGED, 3518219089Spjd mtime, ctime, B_TRUE); 3519219089Spjd } 3520219089Spjd } 3521185029Spjd /* 3522185029Spjd * Do this after setting timestamps to prevent timestamp 3523185029Spjd * update from toggling bit 3524185029Spjd */ 3525168404Spjd 3526185029Spjd if (xoap && (mask & AT_XVATTR)) { 3527209962Smm 3528209962Smm /* 3529209962Smm * restore trimmed off masks 3530209962Smm * so that return masks can be set for caller. 3531209962Smm */ 3532209962Smm 3533209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) { 3534209962Smm XVA_SET_REQ(xvap, XAT_APPENDONLY); 3535209962Smm } 3536209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) { 3537209962Smm XVA_SET_REQ(xvap, XAT_NOUNLINK); 3538209962Smm } 3539209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) { 3540209962Smm XVA_SET_REQ(xvap, XAT_IMMUTABLE); 3541209962Smm } 3542209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) { 3543209962Smm XVA_SET_REQ(xvap, XAT_NODUMP); 3544209962Smm } 3545209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) { 3546209962Smm XVA_SET_REQ(xvap, XAT_AV_MODIFIED); 3547209962Smm } 3548209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) { 3549209962Smm XVA_SET_REQ(xvap, XAT_AV_QUARANTINED); 3550209962Smm } 3551209962Smm 3552219089Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 3553185029Spjd ASSERT(vp->v_type == VREG); 3554185029Spjd 3555219089Spjd zfs_xvattr_set(zp, xvap, tx); 3556185029Spjd } 3557185029Spjd 3558209962Smm if (fuid_dirtied) 3559209962Smm zfs_fuid_sync(zfsvfs, tx); 3560209962Smm 3561168404Spjd if (mask != 0) 3562185029Spjd zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); 3563168404Spjd 3564168404Spjd mutex_exit(&zp->z_lock); 3565219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3566219089Spjd mutex_exit(&zp->z_acl_lock); 3567168404Spjd 3568219089Spjd if (attrzp) { 3569219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3570219089Spjd mutex_exit(&attrzp->z_acl_lock); 3571219089Spjd mutex_exit(&attrzp->z_lock); 3572219089Spjd } 3573209962Smmout: 3574219089Spjd if (err == 0 && attrzp) { 3575219089Spjd err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk, 3576219089Spjd xattr_count, tx); 3577219089Spjd ASSERT(err2 == 0); 3578219089Spjd } 3579219089Spjd 3580168404Spjd if (attrzp) 3581168404Spjd VN_RELE(ZTOV(attrzp)); 3582251631Sdelphij 3583211932Smm if (aclp) 3584209962Smm zfs_acl_free(aclp); 3585168404Spjd 3586209962Smm if (fuidp) { 3587209962Smm zfs_fuid_info_free(fuidp); 3588209962Smm fuidp = NULL; 3589209962Smm } 3590209962Smm 3591219089Spjd if (err) { 3592209962Smm dmu_tx_abort(tx); 3593219089Spjd if (err == ERESTART) 3594219089Spjd goto top; 3595219089Spjd } else { 3596219089Spjd err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 3597209962Smm dmu_tx_commit(tx); 3598219089Spjd } 3599209962Smm 3600219089Spjdout2: 3601219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3602219089Spjd zil_commit(zilog, 0); 3603209962Smm 3604168404Spjd ZFS_EXIT(zfsvfs); 3605168404Spjd return (err); 3606168404Spjd} 3607168404Spjd 3608168404Spjdtypedef struct zfs_zlock { 3609168404Spjd krwlock_t *zl_rwlock; /* lock we acquired */ 3610168404Spjd znode_t *zl_znode; /* znode we held */ 3611168404Spjd struct zfs_zlock *zl_next; /* next in list */ 3612168404Spjd} zfs_zlock_t; 3613168404Spjd 3614168404Spjd/* 3615168404Spjd * Drop locks and release vnodes that were held by zfs_rename_lock(). 3616168404Spjd */ 3617168404Spjdstatic void 3618168404Spjdzfs_rename_unlock(zfs_zlock_t **zlpp) 3619168404Spjd{ 3620168404Spjd zfs_zlock_t *zl; 3621168404Spjd 3622168404Spjd while ((zl = *zlpp) != NULL) { 3623168404Spjd if (zl->zl_znode != NULL) 3624168404Spjd VN_RELE(ZTOV(zl->zl_znode)); 3625168404Spjd rw_exit(zl->zl_rwlock); 3626168404Spjd *zlpp = zl->zl_next; 3627168404Spjd kmem_free(zl, sizeof (*zl)); 3628168404Spjd } 3629168404Spjd} 3630168404Spjd 3631168404Spjd/* 3632168404Spjd * Search back through the directory tree, using the ".." entries. 3633168404Spjd * Lock each directory in the chain to prevent concurrent renames. 3634168404Spjd * Fail any attempt to move a directory into one of its own descendants. 3635168404Spjd * XXX - z_parent_lock can overlap with map or grow locks 3636168404Spjd */ 3637168404Spjdstatic int 3638168404Spjdzfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp) 3639168404Spjd{ 3640168404Spjd zfs_zlock_t *zl; 3641168404Spjd znode_t *zp = tdzp; 3642168404Spjd uint64_t rootid = zp->z_zfsvfs->z_root; 3643219089Spjd uint64_t oidp = zp->z_id; 3644168404Spjd krwlock_t *rwlp = &szp->z_parent_lock; 3645168404Spjd krw_t rw = RW_WRITER; 3646168404Spjd 3647168404Spjd /* 3648168404Spjd * First pass write-locks szp and compares to zp->z_id. 3649168404Spjd * Later passes read-lock zp and compare to zp->z_parent. 3650168404Spjd */ 3651168404Spjd do { 3652168404Spjd if (!rw_tryenter(rwlp, rw)) { 3653168404Spjd /* 3654168404Spjd * Another thread is renaming in this path. 3655168404Spjd * Note that if we are a WRITER, we don't have any 3656168404Spjd * parent_locks held yet. 3657168404Spjd */ 3658168404Spjd if (rw == RW_READER && zp->z_id > szp->z_id) { 3659168404Spjd /* 3660168404Spjd * Drop our locks and restart 3661168404Spjd */ 3662168404Spjd zfs_rename_unlock(&zl); 3663168404Spjd *zlpp = NULL; 3664168404Spjd zp = tdzp; 3665219089Spjd oidp = zp->z_id; 3666168404Spjd rwlp = &szp->z_parent_lock; 3667168404Spjd rw = RW_WRITER; 3668168404Spjd continue; 3669168404Spjd } else { 3670168404Spjd /* 3671168404Spjd * Wait for other thread to drop its locks 3672168404Spjd */ 3673168404Spjd rw_enter(rwlp, rw); 3674168404Spjd } 3675168404Spjd } 3676168404Spjd 3677168404Spjd zl = kmem_alloc(sizeof (*zl), KM_SLEEP); 3678168404Spjd zl->zl_rwlock = rwlp; 3679168404Spjd zl->zl_znode = NULL; 3680168404Spjd zl->zl_next = *zlpp; 3681168404Spjd *zlpp = zl; 3682168404Spjd 3683219089Spjd if (oidp == szp->z_id) /* We're a descendant of szp */ 3684249195Smm return (SET_ERROR(EINVAL)); 3685168404Spjd 3686219089Spjd if (oidp == rootid) /* We've hit the top */ 3687168404Spjd return (0); 3688168404Spjd 3689168404Spjd if (rw == RW_READER) { /* i.e. not the first pass */ 3690219089Spjd int error = zfs_zget(zp->z_zfsvfs, oidp, &zp); 3691168404Spjd if (error) 3692168404Spjd return (error); 3693168404Spjd zl->zl_znode = zp; 3694168404Spjd } 3695219089Spjd (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zp->z_zfsvfs), 3696219089Spjd &oidp, sizeof (oidp)); 3697168404Spjd rwlp = &zp->z_parent_lock; 3698168404Spjd rw = RW_READER; 3699168404Spjd 3700168404Spjd } while (zp->z_id != sdzp->z_id); 3701168404Spjd 3702168404Spjd return (0); 3703168404Spjd} 3704168404Spjd 3705168404Spjd/* 3706168404Spjd * Move an entry from the provided source directory to the target 3707168404Spjd * directory. Change the entry name as indicated. 3708168404Spjd * 3709168404Spjd * IN: sdvp - Source directory containing the "old entry". 3710168404Spjd * snm - Old entry name. 3711168404Spjd * tdvp - Target directory to contain the "new entry". 3712168404Spjd * tnm - New entry name. 3713168404Spjd * cr - credentials of caller. 3714185029Spjd * ct - caller context 3715185029Spjd * flags - case flags 3716168404Spjd * 3717251631Sdelphij * RETURN: 0 on success, error code on failure. 3718168404Spjd * 3719168404Spjd * Timestamps: 3720168404Spjd * sdvp,tdvp - ctime|mtime updated 3721168404Spjd */ 3722185029Spjd/*ARGSUSED*/ 3723168404Spjdstatic int 3724185029Spjdzfs_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr, 3725185029Spjd caller_context_t *ct, int flags) 3726168404Spjd{ 3727168404Spjd znode_t *tdzp, *szp, *tzp; 3728168404Spjd znode_t *sdzp = VTOZ(sdvp); 3729168404Spjd zfsvfs_t *zfsvfs = sdzp->z_zfsvfs; 3730185029Spjd zilog_t *zilog; 3731168962Spjd vnode_t *realvp; 3732168404Spjd zfs_dirlock_t *sdl, *tdl; 3733168404Spjd dmu_tx_t *tx; 3734168404Spjd zfs_zlock_t *zl; 3735185029Spjd int cmp, serr, terr; 3736185029Spjd int error = 0; 3737185029Spjd int zflg = 0; 3738260763Savg boolean_t waited = B_FALSE; 3739168404Spjd 3740168404Spjd ZFS_ENTER(zfsvfs); 3741185029Spjd ZFS_VERIFY_ZP(sdzp); 3742185029Spjd zilog = zfsvfs->z_log; 3743168404Spjd 3744168962Spjd /* 3745168962Spjd * Make sure we have the real vp for the target directory. 3746168962Spjd */ 3747185029Spjd if (VOP_REALVP(tdvp, &realvp, ct) == 0) 3748168962Spjd tdvp = realvp; 3749168962Spjd 3750254585Sdelphij tdzp = VTOZ(tdvp); 3751254585Sdelphij ZFS_VERIFY_ZP(tdzp); 3752254585Sdelphij 3753254585Sdelphij /* 3754254585Sdelphij * We check z_zfsvfs rather than v_vfsp here, because snapshots and the 3755254585Sdelphij * ctldir appear to have the same v_vfsp. 3756254585Sdelphij */ 3757254585Sdelphij if (tdzp->z_zfsvfs != zfsvfs || zfsctl_is_node(tdvp)) { 3758168404Spjd ZFS_EXIT(zfsvfs); 3759249195Smm return (SET_ERROR(EXDEV)); 3760168404Spjd } 3761168404Spjd 3762185029Spjd if (zfsvfs->z_utf8 && u8_validate(tnm, 3763185029Spjd strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3764185029Spjd ZFS_EXIT(zfsvfs); 3765249195Smm return (SET_ERROR(EILSEQ)); 3766185029Spjd } 3767185029Spjd 3768185029Spjd if (flags & FIGNORECASE) 3769185029Spjd zflg |= ZCILOOK; 3770185029Spjd 3771168404Spjdtop: 3772168404Spjd szp = NULL; 3773168404Spjd tzp = NULL; 3774168404Spjd zl = NULL; 3775168404Spjd 3776168404Spjd /* 3777168404Spjd * This is to prevent the creation of links into attribute space 3778168404Spjd * by renaming a linked file into/outof an attribute directory. 3779168404Spjd * See the comment in zfs_link() for why this is considered bad. 3780168404Spjd */ 3781219089Spjd if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) { 3782168962Spjd ZFS_EXIT(zfsvfs); 3783249195Smm return (SET_ERROR(EINVAL)); 3784168404Spjd } 3785168404Spjd 3786168404Spjd /* 3787168404Spjd * Lock source and target directory entries. To prevent deadlock, 3788168404Spjd * a lock ordering must be defined. We lock the directory with 3789168404Spjd * the smallest object id first, or if it's a tie, the one with 3790168404Spjd * the lexically first name. 3791168404Spjd */ 3792168404Spjd if (sdzp->z_id < tdzp->z_id) { 3793168962Spjd cmp = -1; 3794168962Spjd } else if (sdzp->z_id > tdzp->z_id) { 3795168962Spjd cmp = 1; 3796168962Spjd } else { 3797185029Spjd /* 3798185029Spjd * First compare the two name arguments without 3799185029Spjd * considering any case folding. 3800185029Spjd */ 3801185029Spjd int nofold = (zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER); 3802185029Spjd 3803185029Spjd cmp = u8_strcmp(snm, tnm, 0, nofold, U8_UNICODE_LATEST, &error); 3804185029Spjd ASSERT(error == 0 || !zfsvfs->z_utf8); 3805168962Spjd if (cmp == 0) { 3806168962Spjd /* 3807168962Spjd * POSIX: "If the old argument and the new argument 3808168962Spjd * both refer to links to the same existing file, 3809168962Spjd * the rename() function shall return successfully 3810168962Spjd * and perform no other action." 3811168962Spjd */ 3812168962Spjd ZFS_EXIT(zfsvfs); 3813168962Spjd return (0); 3814168962Spjd } 3815185029Spjd /* 3816185029Spjd * If the file system is case-folding, then we may 3817185029Spjd * have some more checking to do. A case-folding file 3818185029Spjd * system is either supporting mixed case sensitivity 3819185029Spjd * access or is completely case-insensitive. Note 3820185029Spjd * that the file system is always case preserving. 3821185029Spjd * 3822185029Spjd * In mixed sensitivity mode case sensitive behavior 3823185029Spjd * is the default. FIGNORECASE must be used to 3824185029Spjd * explicitly request case insensitive behavior. 3825185029Spjd * 3826185029Spjd * If the source and target names provided differ only 3827185029Spjd * by case (e.g., a request to rename 'tim' to 'Tim'), 3828185029Spjd * we will treat this as a special case in the 3829185029Spjd * case-insensitive mode: as long as the source name 3830185029Spjd * is an exact match, we will allow this to proceed as 3831185029Spjd * a name-change request. 3832185029Spjd */ 3833185029Spjd if ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE || 3834185029Spjd (zfsvfs->z_case == ZFS_CASE_MIXED && 3835185029Spjd flags & FIGNORECASE)) && 3836185029Spjd u8_strcmp(snm, tnm, 0, zfsvfs->z_norm, U8_UNICODE_LATEST, 3837185029Spjd &error) == 0) { 3838185029Spjd /* 3839185029Spjd * case preserving rename request, require exact 3840185029Spjd * name matches 3841185029Spjd */ 3842185029Spjd zflg |= ZCIEXACT; 3843185029Spjd zflg &= ~ZCILOOK; 3844185029Spjd } 3845168962Spjd } 3846185029Spjd 3847208131Smm /* 3848208131Smm * If the source and destination directories are the same, we should 3849208131Smm * grab the z_name_lock of that directory only once. 3850208131Smm */ 3851208131Smm if (sdzp == tdzp) { 3852208131Smm zflg |= ZHAVELOCK; 3853208131Smm rw_enter(&sdzp->z_name_lock, RW_READER); 3854208131Smm } 3855208131Smm 3856168962Spjd if (cmp < 0) { 3857185029Spjd serr = zfs_dirent_lock(&sdl, sdzp, snm, &szp, 3858185029Spjd ZEXISTS | zflg, NULL, NULL); 3859185029Spjd terr = zfs_dirent_lock(&tdl, 3860185029Spjd tdzp, tnm, &tzp, ZRENAMING | zflg, NULL, NULL); 3861168962Spjd } else { 3862185029Spjd terr = zfs_dirent_lock(&tdl, 3863185029Spjd tdzp, tnm, &tzp, zflg, NULL, NULL); 3864185029Spjd serr = zfs_dirent_lock(&sdl, 3865185029Spjd sdzp, snm, &szp, ZEXISTS | ZRENAMING | zflg, 3866185029Spjd NULL, NULL); 3867168404Spjd } 3868168404Spjd 3869168962Spjd if (serr) { 3870168404Spjd /* 3871168404Spjd * Source entry invalid or not there. 3872168404Spjd */ 3873168962Spjd if (!terr) { 3874168404Spjd zfs_dirent_unlock(tdl); 3875168962Spjd if (tzp) 3876168962Spjd VN_RELE(ZTOV(tzp)); 3877168962Spjd } 3878208131Smm 3879208131Smm if (sdzp == tdzp) 3880208131Smm rw_exit(&sdzp->z_name_lock); 3881208131Smm 3882219089Spjd /* 3883219089Spjd * FreeBSD: In OpenSolaris they only check if rename source is 3884219089Spjd * ".." here, because "." is handled in their lookup. This is 3885219089Spjd * not the case for FreeBSD, so we check for "." explicitly. 3886219089Spjd */ 3887168404Spjd if (strcmp(snm, ".") == 0 || strcmp(snm, "..") == 0) 3888249195Smm serr = SET_ERROR(EINVAL); 3889168962Spjd ZFS_EXIT(zfsvfs); 3890168962Spjd return (serr); 3891168404Spjd } 3892168404Spjd if (terr) { 3893168404Spjd zfs_dirent_unlock(sdl); 3894168962Spjd VN_RELE(ZTOV(szp)); 3895208131Smm 3896208131Smm if (sdzp == tdzp) 3897208131Smm rw_exit(&sdzp->z_name_lock); 3898208131Smm 3899168404Spjd if (strcmp(tnm, "..") == 0) 3900249195Smm terr = SET_ERROR(EINVAL); 3901168962Spjd ZFS_EXIT(zfsvfs); 3902168962Spjd return (terr); 3903168404Spjd } 3904168404Spjd 3905168404Spjd /* 3906168404Spjd * Must have write access at the source to remove the old entry 3907168404Spjd * and write access at the target to create the new entry. 3908168404Spjd * Note that if target and source are the same, this can be 3909168404Spjd * done in a single check. 3910168404Spjd */ 3911168404Spjd 3912168404Spjd if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)) 3913168404Spjd goto out; 3914168404Spjd 3915168962Spjd if (ZTOV(szp)->v_type == VDIR) { 3916168404Spjd /* 3917168404Spjd * Check to make sure rename is valid. 3918168404Spjd * Can't do a move like this: /usr/a/b to /usr/a/b/c/d 3919168404Spjd */ 3920168404Spjd if (error = zfs_rename_lock(szp, tdzp, sdzp, &zl)) 3921168404Spjd goto out; 3922168404Spjd } 3923168404Spjd 3924168404Spjd /* 3925168404Spjd * Does target exist? 3926168404Spjd */ 3927168404Spjd if (tzp) { 3928168404Spjd /* 3929168404Spjd * Source and target must be the same type. 3930168404Spjd */ 3931168962Spjd if (ZTOV(szp)->v_type == VDIR) { 3932168962Spjd if (ZTOV(tzp)->v_type != VDIR) { 3933249195Smm error = SET_ERROR(ENOTDIR); 3934168404Spjd goto out; 3935168404Spjd } 3936168404Spjd } else { 3937168962Spjd if (ZTOV(tzp)->v_type == VDIR) { 3938249195Smm error = SET_ERROR(EISDIR); 3939168404Spjd goto out; 3940168404Spjd } 3941168404Spjd } 3942168404Spjd /* 3943168404Spjd * POSIX dictates that when the source and target 3944168404Spjd * entries refer to the same file object, rename 3945168404Spjd * must do nothing and exit without error. 3946168404Spjd */ 3947168404Spjd if (szp->z_id == tzp->z_id) { 3948168404Spjd error = 0; 3949168404Spjd goto out; 3950168404Spjd } 3951168404Spjd } 3952168404Spjd 3953185029Spjd vnevent_rename_src(ZTOV(szp), sdvp, snm, ct); 3954168962Spjd if (tzp) 3955185029Spjd vnevent_rename_dest(ZTOV(tzp), tdvp, tnm, ct); 3956168962Spjd 3957185029Spjd /* 3958185029Spjd * notify the target directory if it is not the same 3959185029Spjd * as source directory. 3960185029Spjd */ 3961185029Spjd if (tdvp != sdvp) { 3962185029Spjd vnevent_rename_dest_dir(tdvp, ct); 3963185029Spjd } 3964185029Spjd 3965168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 3966219089Spjd dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 3967219089Spjd dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE); 3968168404Spjd dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); 3969168404Spjd dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); 3970219089Spjd if (sdzp != tdzp) { 3971219089Spjd dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE); 3972219089Spjd zfs_sa_upgrade_txholds(tx, tdzp); 3973219089Spjd } 3974219089Spjd if (tzp) { 3975219089Spjd dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE); 3976219089Spjd zfs_sa_upgrade_txholds(tx, tzp); 3977219089Spjd } 3978219089Spjd 3979219089Spjd zfs_sa_upgrade_txholds(tx, szp); 3980168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 3981260763Savg error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 3982168404Spjd if (error) { 3983168404Spjd if (zl != NULL) 3984168404Spjd zfs_rename_unlock(&zl); 3985168404Spjd zfs_dirent_unlock(sdl); 3986168404Spjd zfs_dirent_unlock(tdl); 3987208131Smm 3988208131Smm if (sdzp == tdzp) 3989208131Smm rw_exit(&sdzp->z_name_lock); 3990208131Smm 3991168962Spjd VN_RELE(ZTOV(szp)); 3992168962Spjd if (tzp) 3993168962Spjd VN_RELE(ZTOV(tzp)); 3994209962Smm if (error == ERESTART) { 3995260763Savg waited = B_TRUE; 3996168404Spjd dmu_tx_wait(tx); 3997168404Spjd dmu_tx_abort(tx); 3998168404Spjd goto top; 3999168404Spjd } 4000168404Spjd dmu_tx_abort(tx); 4001168962Spjd ZFS_EXIT(zfsvfs); 4002168962Spjd return (error); 4003168404Spjd } 4004168404Spjd 4005168404Spjd if (tzp) /* Attempt to remove the existing target */ 4006185029Spjd error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL); 4007168404Spjd 4008168404Spjd if (error == 0) { 4009168404Spjd error = zfs_link_create(tdl, szp, tx, ZRENAMING); 4010168404Spjd if (error == 0) { 4011219089Spjd szp->z_pflags |= ZFS_AV_MODIFIED; 4012185029Spjd 4013219089Spjd error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), 4014219089Spjd (void *)&szp->z_pflags, sizeof (uint64_t), tx); 4015240415Smm ASSERT0(error); 4016219089Spjd 4017168404Spjd error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL); 4018219089Spjd if (error == 0) { 4019219089Spjd zfs_log_rename(zilog, tx, TX_RENAME | 4020219089Spjd (flags & FIGNORECASE ? TX_CI : 0), sdzp, 4021219089Spjd sdl->dl_name, tdzp, tdl->dl_name, szp); 4022185029Spjd 4023219089Spjd /* 4024219089Spjd * Update path information for the target vnode 4025219089Spjd */ 4026219089Spjd vn_renamepath(tdvp, ZTOV(szp), tnm, 4027219089Spjd strlen(tnm)); 4028219089Spjd } else { 4029219089Spjd /* 4030219089Spjd * At this point, we have successfully created 4031219089Spjd * the target name, but have failed to remove 4032219089Spjd * the source name. Since the create was done 4033219089Spjd * with the ZRENAMING flag, there are 4034219089Spjd * complications; for one, the link count is 4035219089Spjd * wrong. The easiest way to deal with this 4036219089Spjd * is to remove the newly created target, and 4037219089Spjd * return the original error. This must 4038219089Spjd * succeed; fortunately, it is very unlikely to 4039219089Spjd * fail, since we just created it. 4040219089Spjd */ 4041219089Spjd VERIFY3U(zfs_link_destroy(tdl, szp, tx, 4042219089Spjd ZRENAMING, NULL), ==, 0); 4043219089Spjd } 4044168404Spjd } 4045168404Spjd#ifdef FREEBSD_NAMECACHE 4046168404Spjd if (error == 0) { 4047168404Spjd cache_purge(sdvp); 4048168404Spjd cache_purge(tdvp); 4049240829Spjd cache_purge(ZTOV(szp)); 4050240829Spjd if (tzp) 4051240829Spjd cache_purge(ZTOV(tzp)); 4052168404Spjd } 4053168404Spjd#endif 4054168404Spjd } 4055168404Spjd 4056168404Spjd dmu_tx_commit(tx); 4057168404Spjdout: 4058168404Spjd if (zl != NULL) 4059168404Spjd zfs_rename_unlock(&zl); 4060168404Spjd 4061168404Spjd zfs_dirent_unlock(sdl); 4062168404Spjd zfs_dirent_unlock(tdl); 4063168404Spjd 4064208131Smm if (sdzp == tdzp) 4065208131Smm rw_exit(&sdzp->z_name_lock); 4066208131Smm 4067219089Spjd 4068168962Spjd VN_RELE(ZTOV(szp)); 4069168404Spjd if (tzp) 4070168962Spjd VN_RELE(ZTOV(tzp)); 4071168404Spjd 4072219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4073219089Spjd zil_commit(zilog, 0); 4074219089Spjd 4075168404Spjd ZFS_EXIT(zfsvfs); 4076168404Spjd 4077168404Spjd return (error); 4078168404Spjd} 4079168404Spjd 4080168404Spjd/* 4081168404Spjd * Insert the indicated symbolic reference entry into the directory. 4082168404Spjd * 4083168404Spjd * IN: dvp - Directory to contain new symbolic link. 4084168404Spjd * link - Name for new symlink entry. 4085168404Spjd * vap - Attributes of new entry. 4086168404Spjd * cr - credentials of caller. 4087185029Spjd * ct - caller context 4088185029Spjd * flags - case flags 4089168404Spjd * 4090251631Sdelphij * RETURN: 0 on success, error code on failure. 4091168404Spjd * 4092168404Spjd * Timestamps: 4093168404Spjd * dvp - ctime|mtime updated 4094168404Spjd */ 4095185029Spjd/*ARGSUSED*/ 4096168404Spjdstatic int 4097185029Spjdzfs_symlink(vnode_t *dvp, vnode_t **vpp, char *name, vattr_t *vap, char *link, 4098185029Spjd cred_t *cr, kthread_t *td) 4099168404Spjd{ 4100168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 4101168404Spjd zfs_dirlock_t *dl; 4102168404Spjd dmu_tx_t *tx; 4103168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 4104185029Spjd zilog_t *zilog; 4105219089Spjd uint64_t len = strlen(link); 4106168404Spjd int error; 4107185029Spjd int zflg = ZNEW; 4108209962Smm zfs_acl_ids_t acl_ids; 4109209962Smm boolean_t fuid_dirtied; 4110219089Spjd uint64_t txtype = TX_SYMLINK; 4111260763Savg boolean_t waited = B_FALSE; 4112185029Spjd int flags = 0; 4113168404Spjd 4114168962Spjd ASSERT(vap->va_type == VLNK); 4115168404Spjd 4116168404Spjd ZFS_ENTER(zfsvfs); 4117185029Spjd ZFS_VERIFY_ZP(dzp); 4118185029Spjd zilog = zfsvfs->z_log; 4119185029Spjd 4120185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 4121185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 4122185029Spjd ZFS_EXIT(zfsvfs); 4123249195Smm return (SET_ERROR(EILSEQ)); 4124185029Spjd } 4125185029Spjd if (flags & FIGNORECASE) 4126185029Spjd zflg |= ZCILOOK; 4127168404Spjd 4128168404Spjd if (len > MAXPATHLEN) { 4129168404Spjd ZFS_EXIT(zfsvfs); 4130249195Smm return (SET_ERROR(ENAMETOOLONG)); 4131168404Spjd } 4132168404Spjd 4133219089Spjd if ((error = zfs_acl_ids_create(dzp, 0, 4134219089Spjd vap, cr, NULL, &acl_ids)) != 0) { 4135219089Spjd ZFS_EXIT(zfsvfs); 4136219089Spjd return (error); 4137219089Spjd } 4138262112Savg 4139262112Savg getnewvnode_reserve(1); 4140262112Savg 4141219089Spjdtop: 4142168404Spjd /* 4143168404Spjd * Attempt to lock directory; fail if entry already exists. 4144168404Spjd */ 4145185029Spjd error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL); 4146185029Spjd if (error) { 4147219089Spjd zfs_acl_ids_free(&acl_ids); 4148262112Savg getnewvnode_drop_reserve(); 4149168404Spjd ZFS_EXIT(zfsvfs); 4150168404Spjd return (error); 4151168404Spjd } 4152168404Spjd 4153219089Spjd if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 4154219089Spjd zfs_acl_ids_free(&acl_ids); 4155219089Spjd zfs_dirent_unlock(dl); 4156262112Savg getnewvnode_drop_reserve(); 4157219089Spjd ZFS_EXIT(zfsvfs); 4158219089Spjd return (error); 4159219089Spjd } 4160219089Spjd 4161209962Smm if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 4162209962Smm zfs_acl_ids_free(&acl_ids); 4163209962Smm zfs_dirent_unlock(dl); 4164262112Savg getnewvnode_drop_reserve(); 4165209962Smm ZFS_EXIT(zfsvfs); 4166249195Smm return (SET_ERROR(EDQUOT)); 4167209962Smm } 4168168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 4169209962Smm fuid_dirtied = zfsvfs->z_fuid_dirty; 4170168404Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); 4171168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 4172219089Spjd dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 4173219089Spjd ZFS_SA_BASE_ATTR_SIZE + len); 4174219089Spjd dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 4175219089Spjd if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 4176219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 4177219089Spjd acl_ids.z_aclp->z_acl_bytes); 4178219089Spjd } 4179209962Smm if (fuid_dirtied) 4180209962Smm zfs_fuid_txhold(zfsvfs, tx); 4181260763Savg error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 4182168404Spjd if (error) { 4183168404Spjd zfs_dirent_unlock(dl); 4184209962Smm if (error == ERESTART) { 4185260763Savg waited = B_TRUE; 4186168404Spjd dmu_tx_wait(tx); 4187168404Spjd dmu_tx_abort(tx); 4188168404Spjd goto top; 4189168404Spjd } 4190219089Spjd zfs_acl_ids_free(&acl_ids); 4191168404Spjd dmu_tx_abort(tx); 4192262112Savg getnewvnode_drop_reserve(); 4193168404Spjd ZFS_EXIT(zfsvfs); 4194168404Spjd return (error); 4195168404Spjd } 4196168404Spjd 4197168404Spjd /* 4198168404Spjd * Create a new object for the symlink. 4199219089Spjd * for version 4 ZPL datsets the symlink will be an SA attribute 4200168404Spjd */ 4201219089Spjd zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 4202168404Spjd 4203219089Spjd if (fuid_dirtied) 4204219089Spjd zfs_fuid_sync(zfsvfs, tx); 4205209962Smm 4206219089Spjd mutex_enter(&zp->z_lock); 4207219089Spjd if (zp->z_is_sa) 4208219089Spjd error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs), 4209219089Spjd link, len, tx); 4210219089Spjd else 4211219089Spjd zfs_sa_symlink(zp, link, len, tx); 4212219089Spjd mutex_exit(&zp->z_lock); 4213168404Spjd 4214219089Spjd zp->z_size = len; 4215219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 4216219089Spjd &zp->z_size, sizeof (zp->z_size), tx); 4217168404Spjd /* 4218168404Spjd * Insert the new object into the directory. 4219168404Spjd */ 4220168404Spjd (void) zfs_link_create(dl, zp, tx, ZNEW); 4221168404Spjd 4222219089Spjd if (flags & FIGNORECASE) 4223219089Spjd txtype |= TX_CI; 4224219089Spjd zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); 4225219089Spjd *vpp = ZTOV(zp); 4226219089Spjd 4227209962Smm zfs_acl_ids_free(&acl_ids); 4228209962Smm 4229168404Spjd dmu_tx_commit(tx); 4230168404Spjd 4231262112Savg getnewvnode_drop_reserve(); 4232262112Savg 4233168404Spjd zfs_dirent_unlock(dl); 4234168404Spjd 4235219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4236219089Spjd zil_commit(zilog, 0); 4237219089Spjd 4238168404Spjd ZFS_EXIT(zfsvfs); 4239168404Spjd return (error); 4240168404Spjd} 4241168404Spjd 4242168404Spjd/* 4243168404Spjd * Return, in the buffer contained in the provided uio structure, 4244168404Spjd * the symbolic path referred to by vp. 4245168404Spjd * 4246168404Spjd * IN: vp - vnode of symbolic link. 4247251631Sdelphij * uio - structure to contain the link path. 4248168404Spjd * cr - credentials of caller. 4249185029Spjd * ct - caller context 4250168404Spjd * 4251251631Sdelphij * OUT: uio - structure containing the link path. 4252168404Spjd * 4253251631Sdelphij * RETURN: 0 on success, error code on failure. 4254168404Spjd * 4255168404Spjd * Timestamps: 4256168404Spjd * vp - atime updated 4257168404Spjd */ 4258168404Spjd/* ARGSUSED */ 4259168404Spjdstatic int 4260185029Spjdzfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct) 4261168404Spjd{ 4262168404Spjd znode_t *zp = VTOZ(vp); 4263168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4264168404Spjd int error; 4265168404Spjd 4266168404Spjd ZFS_ENTER(zfsvfs); 4267185029Spjd ZFS_VERIFY_ZP(zp); 4268168404Spjd 4269219089Spjd mutex_enter(&zp->z_lock); 4270219089Spjd if (zp->z_is_sa) 4271219089Spjd error = sa_lookup_uio(zp->z_sa_hdl, 4272219089Spjd SA_ZPL_SYMLINK(zfsvfs), uio); 4273219089Spjd else 4274219089Spjd error = zfs_sa_readlink(zp, uio); 4275219089Spjd mutex_exit(&zp->z_lock); 4276168404Spjd 4277168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4278219089Spjd 4279168404Spjd ZFS_EXIT(zfsvfs); 4280168404Spjd return (error); 4281168404Spjd} 4282168404Spjd 4283168404Spjd/* 4284168404Spjd * Insert a new entry into directory tdvp referencing svp. 4285168404Spjd * 4286168404Spjd * IN: tdvp - Directory to contain new entry. 4287168404Spjd * svp - vnode of new entry. 4288168404Spjd * name - name of new entry. 4289168404Spjd * cr - credentials of caller. 4290185029Spjd * ct - caller context 4291168404Spjd * 4292251631Sdelphij * RETURN: 0 on success, error code on failure. 4293168404Spjd * 4294168404Spjd * Timestamps: 4295168404Spjd * tdvp - ctime|mtime updated 4296168404Spjd * svp - ctime updated 4297168404Spjd */ 4298168404Spjd/* ARGSUSED */ 4299168404Spjdstatic int 4300185029Spjdzfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr, 4301185029Spjd caller_context_t *ct, int flags) 4302168404Spjd{ 4303168404Spjd znode_t *dzp = VTOZ(tdvp); 4304168404Spjd znode_t *tzp, *szp; 4305168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 4306185029Spjd zilog_t *zilog; 4307168404Spjd zfs_dirlock_t *dl; 4308168404Spjd dmu_tx_t *tx; 4309168962Spjd vnode_t *realvp; 4310168404Spjd int error; 4311185029Spjd int zf = ZNEW; 4312212694Smm uint64_t parent; 4313185029Spjd uid_t owner; 4314260763Savg boolean_t waited = B_FALSE; 4315168404Spjd 4316168404Spjd ASSERT(tdvp->v_type == VDIR); 4317168404Spjd 4318168404Spjd ZFS_ENTER(zfsvfs); 4319185029Spjd ZFS_VERIFY_ZP(dzp); 4320185029Spjd zilog = zfsvfs->z_log; 4321168404Spjd 4322185029Spjd if (VOP_REALVP(svp, &realvp, ct) == 0) 4323168962Spjd svp = realvp; 4324168962Spjd 4325212694Smm /* 4326212694Smm * POSIX dictates that we return EPERM here. 4327212694Smm * Better choices include ENOTSUP or EISDIR. 4328212694Smm */ 4329212694Smm if (svp->v_type == VDIR) { 4330168404Spjd ZFS_EXIT(zfsvfs); 4331249195Smm return (SET_ERROR(EPERM)); 4332212694Smm } 4333212694Smm 4334254585Sdelphij szp = VTOZ(svp); 4335254585Sdelphij ZFS_VERIFY_ZP(szp); 4336254585Sdelphij 4337254585Sdelphij /* 4338254585Sdelphij * We check z_zfsvfs rather than v_vfsp here, because snapshots and the 4339254585Sdelphij * ctldir appear to have the same v_vfsp. 4340254585Sdelphij */ 4341254585Sdelphij if (szp->z_zfsvfs != zfsvfs || zfsctl_is_node(svp)) { 4342212694Smm ZFS_EXIT(zfsvfs); 4343249195Smm return (SET_ERROR(EXDEV)); 4344168404Spjd } 4345212694Smm 4346212694Smm /* Prevent links to .zfs/shares files */ 4347212694Smm 4348219089Spjd if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 4349219089Spjd &parent, sizeof (uint64_t))) != 0) { 4350212694Smm ZFS_EXIT(zfsvfs); 4351219089Spjd return (error); 4352219089Spjd } 4353219089Spjd if (parent == zfsvfs->z_shares_dir) { 4354219089Spjd ZFS_EXIT(zfsvfs); 4355249195Smm return (SET_ERROR(EPERM)); 4356212694Smm } 4357212694Smm 4358185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, 4359185029Spjd strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 4360185029Spjd ZFS_EXIT(zfsvfs); 4361249195Smm return (SET_ERROR(EILSEQ)); 4362185029Spjd } 4363185029Spjd if (flags & FIGNORECASE) 4364185029Spjd zf |= ZCILOOK; 4365185029Spjd 4366168404Spjd /* 4367168404Spjd * We do not support links between attributes and non-attributes 4368168404Spjd * because of the potential security risk of creating links 4369168404Spjd * into "normal" file space in order to circumvent restrictions 4370168404Spjd * imposed in attribute space. 4371168404Spjd */ 4372219089Spjd if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) { 4373168404Spjd ZFS_EXIT(zfsvfs); 4374249195Smm return (SET_ERROR(EINVAL)); 4375168404Spjd } 4376168404Spjd 4377168404Spjd 4378219089Spjd owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER); 4379219089Spjd if (owner != crgetuid(cr) && secpolicy_basic_link(svp, cr) != 0) { 4380168404Spjd ZFS_EXIT(zfsvfs); 4381249195Smm return (SET_ERROR(EPERM)); 4382168404Spjd } 4383168404Spjd 4384185029Spjd if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 4385168404Spjd ZFS_EXIT(zfsvfs); 4386168404Spjd return (error); 4387168404Spjd } 4388168404Spjd 4389212694Smmtop: 4390168404Spjd /* 4391168404Spjd * Attempt to lock directory; fail if entry already exists. 4392168404Spjd */ 4393185029Spjd error = zfs_dirent_lock(&dl, dzp, name, &tzp, zf, NULL, NULL); 4394185029Spjd if (error) { 4395168404Spjd ZFS_EXIT(zfsvfs); 4396168404Spjd return (error); 4397168404Spjd } 4398168404Spjd 4399168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 4400219089Spjd dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 4401168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 4402219089Spjd zfs_sa_upgrade_txholds(tx, szp); 4403219089Spjd zfs_sa_upgrade_txholds(tx, dzp); 4404260763Savg error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 4405168404Spjd if (error) { 4406168404Spjd zfs_dirent_unlock(dl); 4407209962Smm if (error == ERESTART) { 4408260763Savg waited = B_TRUE; 4409168404Spjd dmu_tx_wait(tx); 4410168404Spjd dmu_tx_abort(tx); 4411168404Spjd goto top; 4412168404Spjd } 4413168404Spjd dmu_tx_abort(tx); 4414168404Spjd ZFS_EXIT(zfsvfs); 4415168404Spjd return (error); 4416168404Spjd } 4417168404Spjd 4418168404Spjd error = zfs_link_create(dl, szp, tx, 0); 4419168404Spjd 4420185029Spjd if (error == 0) { 4421185029Spjd uint64_t txtype = TX_LINK; 4422185029Spjd if (flags & FIGNORECASE) 4423185029Spjd txtype |= TX_CI; 4424185029Spjd zfs_log_link(zilog, tx, txtype, dzp, szp, name); 4425185029Spjd } 4426168404Spjd 4427168404Spjd dmu_tx_commit(tx); 4428168404Spjd 4429168404Spjd zfs_dirent_unlock(dl); 4430168404Spjd 4431185029Spjd if (error == 0) { 4432185029Spjd vnevent_link(svp, ct); 4433185029Spjd } 4434185029Spjd 4435219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4436219089Spjd zil_commit(zilog, 0); 4437219089Spjd 4438168404Spjd ZFS_EXIT(zfsvfs); 4439168404Spjd return (error); 4440168404Spjd} 4441168404Spjd 4442219089Spjd#ifdef sun 4443219089Spjd/* 4444219089Spjd * zfs_null_putapage() is used when the file system has been force 4445219089Spjd * unmounted. It just drops the pages. 4446219089Spjd */ 4447219089Spjd/* ARGSUSED */ 4448219089Spjdstatic int 4449219089Spjdzfs_null_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, 4450219089Spjd size_t *lenp, int flags, cred_t *cr) 4451219089Spjd{ 4452219089Spjd pvn_write_done(pp, B_INVAL|B_FORCE|B_ERROR); 4453219089Spjd return (0); 4454219089Spjd} 4455219089Spjd 4456219089Spjd/* 4457219089Spjd * Push a page out to disk, klustering if possible. 4458219089Spjd * 4459219089Spjd * IN: vp - file to push page to. 4460219089Spjd * pp - page to push. 4461219089Spjd * flags - additional flags. 4462219089Spjd * cr - credentials of caller. 4463219089Spjd * 4464219089Spjd * OUT: offp - start of range pushed. 4465219089Spjd * lenp - len of range pushed. 4466219089Spjd * 4467251631Sdelphij * RETURN: 0 on success, error code on failure. 4468219089Spjd * 4469219089Spjd * NOTE: callers must have locked the page to be pushed. On 4470219089Spjd * exit, the page (and all other pages in the kluster) must be 4471219089Spjd * unlocked. 4472219089Spjd */ 4473219089Spjd/* ARGSUSED */ 4474219089Spjdstatic int 4475219089Spjdzfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, 4476219089Spjd size_t *lenp, int flags, cred_t *cr) 4477219089Spjd{ 4478219089Spjd znode_t *zp = VTOZ(vp); 4479219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4480219089Spjd dmu_tx_t *tx; 4481219089Spjd u_offset_t off, koff; 4482219089Spjd size_t len, klen; 4483219089Spjd int err; 4484219089Spjd 4485219089Spjd off = pp->p_offset; 4486219089Spjd len = PAGESIZE; 4487219089Spjd /* 4488219089Spjd * If our blocksize is bigger than the page size, try to kluster 4489219089Spjd * multiple pages so that we write a full block (thus avoiding 4490219089Spjd * a read-modify-write). 4491219089Spjd */ 4492219089Spjd if (off < zp->z_size && zp->z_blksz > PAGESIZE) { 4493219089Spjd klen = P2ROUNDUP((ulong_t)zp->z_blksz, PAGESIZE); 4494219089Spjd koff = ISP2(klen) ? P2ALIGN(off, (u_offset_t)klen) : 0; 4495219089Spjd ASSERT(koff <= zp->z_size); 4496219089Spjd if (koff + klen > zp->z_size) 4497219089Spjd klen = P2ROUNDUP(zp->z_size - koff, (uint64_t)PAGESIZE); 4498219089Spjd pp = pvn_write_kluster(vp, pp, &off, &len, koff, klen, flags); 4499219089Spjd } 4500219089Spjd ASSERT3U(btop(len), ==, btopr(len)); 4501219089Spjd 4502219089Spjd /* 4503219089Spjd * Can't push pages past end-of-file. 4504219089Spjd */ 4505219089Spjd if (off >= zp->z_size) { 4506219089Spjd /* ignore all pages */ 4507219089Spjd err = 0; 4508219089Spjd goto out; 4509219089Spjd } else if (off + len > zp->z_size) { 4510219089Spjd int npages = btopr(zp->z_size - off); 4511219089Spjd page_t *trunc; 4512219089Spjd 4513219089Spjd page_list_break(&pp, &trunc, npages); 4514219089Spjd /* ignore pages past end of file */ 4515219089Spjd if (trunc) 4516219089Spjd pvn_write_done(trunc, flags); 4517219089Spjd len = zp->z_size - off; 4518219089Spjd } 4519219089Spjd 4520219089Spjd if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 4521219089Spjd zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 4522249195Smm err = SET_ERROR(EDQUOT); 4523219089Spjd goto out; 4524219089Spjd } 4525219089Spjd tx = dmu_tx_create(zfsvfs->z_os); 4526219089Spjd dmu_tx_hold_write(tx, zp->z_id, off, len); 4527219089Spjd 4528219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 4529219089Spjd zfs_sa_upgrade_txholds(tx, zp); 4530260776Savg err = dmu_tx_assign(tx, TXG_WAIT); 4531219089Spjd if (err != 0) { 4532219089Spjd dmu_tx_abort(tx); 4533219089Spjd goto out; 4534219089Spjd } 4535219089Spjd 4536219089Spjd if (zp->z_blksz <= PAGESIZE) { 4537219089Spjd caddr_t va = zfs_map_page(pp, S_READ); 4538219089Spjd ASSERT3U(len, <=, PAGESIZE); 4539219089Spjd dmu_write(zfsvfs->z_os, zp->z_id, off, len, va, tx); 4540219089Spjd zfs_unmap_page(pp, va); 4541219089Spjd } else { 4542219089Spjd err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, pp, tx); 4543219089Spjd } 4544219089Spjd 4545219089Spjd if (err == 0) { 4546219089Spjd uint64_t mtime[2], ctime[2]; 4547219089Spjd sa_bulk_attr_t bulk[3]; 4548219089Spjd int count = 0; 4549219089Spjd 4550219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 4551219089Spjd &mtime, 16); 4552219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 4553219089Spjd &ctime, 16); 4554219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 4555219089Spjd &zp->z_pflags, 8); 4556219089Spjd zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 4557219089Spjd B_TRUE); 4558219089Spjd zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0); 4559219089Spjd } 4560219089Spjd dmu_tx_commit(tx); 4561219089Spjd 4562219089Spjdout: 4563219089Spjd pvn_write_done(pp, (err ? B_ERROR : 0) | flags); 4564219089Spjd if (offp) 4565219089Spjd *offp = off; 4566219089Spjd if (lenp) 4567219089Spjd *lenp = len; 4568219089Spjd 4569219089Spjd return (err); 4570219089Spjd} 4571219089Spjd 4572219089Spjd/* 4573219089Spjd * Copy the portion of the file indicated from pages into the file. 4574219089Spjd * The pages are stored in a page list attached to the files vnode. 4575219089Spjd * 4576219089Spjd * IN: vp - vnode of file to push page data to. 4577219089Spjd * off - position in file to put data. 4578219089Spjd * len - amount of data to write. 4579219089Spjd * flags - flags to control the operation. 4580219089Spjd * cr - credentials of caller. 4581219089Spjd * ct - caller context. 4582219089Spjd * 4583251631Sdelphij * RETURN: 0 on success, error code on failure. 4584219089Spjd * 4585219089Spjd * Timestamps: 4586219089Spjd * vp - ctime|mtime updated 4587219089Spjd */ 4588185029Spjd/*ARGSUSED*/ 4589219089Spjdstatic int 4590219089Spjdzfs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr, 4591219089Spjd caller_context_t *ct) 4592219089Spjd{ 4593219089Spjd znode_t *zp = VTOZ(vp); 4594219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4595219089Spjd page_t *pp; 4596219089Spjd size_t io_len; 4597219089Spjd u_offset_t io_off; 4598219089Spjd uint_t blksz; 4599219089Spjd rl_t *rl; 4600219089Spjd int error = 0; 4601219089Spjd 4602219089Spjd ZFS_ENTER(zfsvfs); 4603219089Spjd ZFS_VERIFY_ZP(zp); 4604219089Spjd 4605219089Spjd /* 4606219089Spjd * Align this request to the file block size in case we kluster. 4607219089Spjd * XXX - this can result in pretty aggresive locking, which can 4608219089Spjd * impact simultanious read/write access. One option might be 4609219089Spjd * to break up long requests (len == 0) into block-by-block 4610219089Spjd * operations to get narrower locking. 4611219089Spjd */ 4612219089Spjd blksz = zp->z_blksz; 4613219089Spjd if (ISP2(blksz)) 4614219089Spjd io_off = P2ALIGN_TYPED(off, blksz, u_offset_t); 4615219089Spjd else 4616219089Spjd io_off = 0; 4617219089Spjd if (len > 0 && ISP2(blksz)) 4618219089Spjd io_len = P2ROUNDUP_TYPED(len + (off - io_off), blksz, size_t); 4619219089Spjd else 4620219089Spjd io_len = 0; 4621219089Spjd 4622219089Spjd if (io_len == 0) { 4623219089Spjd /* 4624219089Spjd * Search the entire vp list for pages >= io_off. 4625219089Spjd */ 4626219089Spjd rl = zfs_range_lock(zp, io_off, UINT64_MAX, RL_WRITER); 4627219089Spjd error = pvn_vplist_dirty(vp, io_off, zfs_putapage, flags, cr); 4628219089Spjd goto out; 4629219089Spjd } 4630219089Spjd rl = zfs_range_lock(zp, io_off, io_len, RL_WRITER); 4631219089Spjd 4632219089Spjd if (off > zp->z_size) { 4633219089Spjd /* past end of file */ 4634219089Spjd zfs_range_unlock(rl); 4635219089Spjd ZFS_EXIT(zfsvfs); 4636219089Spjd return (0); 4637219089Spjd } 4638219089Spjd 4639219089Spjd len = MIN(io_len, P2ROUNDUP(zp->z_size, PAGESIZE) - io_off); 4640219089Spjd 4641219089Spjd for (off = io_off; io_off < off + len; io_off += io_len) { 4642219089Spjd if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) { 4643219089Spjd pp = page_lookup(vp, io_off, 4644219089Spjd (flags & (B_INVAL | B_FREE)) ? SE_EXCL : SE_SHARED); 4645219089Spjd } else { 4646219089Spjd pp = page_lookup_nowait(vp, io_off, 4647219089Spjd (flags & B_FREE) ? SE_EXCL : SE_SHARED); 4648219089Spjd } 4649219089Spjd 4650219089Spjd if (pp != NULL && pvn_getdirty(pp, flags)) { 4651219089Spjd int err; 4652219089Spjd 4653219089Spjd /* 4654219089Spjd * Found a dirty page to push 4655219089Spjd */ 4656219089Spjd err = zfs_putapage(vp, pp, &io_off, &io_len, flags, cr); 4657219089Spjd if (err) 4658219089Spjd error = err; 4659219089Spjd } else { 4660219089Spjd io_len = PAGESIZE; 4661219089Spjd } 4662219089Spjd } 4663219089Spjdout: 4664219089Spjd zfs_range_unlock(rl); 4665219089Spjd if ((flags & B_ASYNC) == 0 || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4666219089Spjd zil_commit(zfsvfs->z_log, zp->z_id); 4667219089Spjd ZFS_EXIT(zfsvfs); 4668219089Spjd return (error); 4669219089Spjd} 4670219089Spjd#endif /* sun */ 4671219089Spjd 4672219089Spjd/*ARGSUSED*/ 4673168962Spjdvoid 4674185029Spjdzfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 4675168404Spjd{ 4676168962Spjd znode_t *zp = VTOZ(vp); 4677168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4678168962Spjd int error; 4679168404Spjd 4680185029Spjd rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 4681219089Spjd if (zp->z_sa_hdl == NULL) { 4682185029Spjd /* 4683185029Spjd * The fs has been unmounted, or we did a 4684185029Spjd * suspend/resume and this file no longer exists. 4685185029Spjd */ 4686243520Savg rw_exit(&zfsvfs->z_teardown_inactive_lock); 4687234607Strasz vrecycle(vp); 4688243520Savg return; 4689243520Savg } 4690243520Savg 4691243520Savg mutex_enter(&zp->z_lock); 4692243520Savg if (zp->z_unlinked) { 4693243520Savg /* 4694243520Savg * Fast path to recycle a vnode of a removed file. 4695243520Savg */ 4696243520Savg mutex_exit(&zp->z_lock); 4697185029Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 4698243520Savg vrecycle(vp); 4699168962Spjd return; 4700168404Spjd } 4701243520Savg mutex_exit(&zp->z_lock); 4702168404Spjd 4703168404Spjd if (zp->z_atime_dirty && zp->z_unlinked == 0) { 4704168404Spjd dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); 4705168404Spjd 4706219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 4707219089Spjd zfs_sa_upgrade_txholds(tx, zp); 4708168404Spjd error = dmu_tx_assign(tx, TXG_WAIT); 4709168404Spjd if (error) { 4710168404Spjd dmu_tx_abort(tx); 4711168404Spjd } else { 4712168404Spjd mutex_enter(&zp->z_lock); 4713219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs), 4714219089Spjd (void *)&zp->z_atime, sizeof (zp->z_atime), tx); 4715168404Spjd zp->z_atime_dirty = 0; 4716168404Spjd mutex_exit(&zp->z_lock); 4717168404Spjd dmu_tx_commit(tx); 4718168404Spjd } 4719168404Spjd } 4720185029Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 4721168404Spjd} 4722168404Spjd 4723219089Spjd#ifdef sun 4724219089Spjd/* 4725219089Spjd * Bounds-check the seek operation. 4726219089Spjd * 4727219089Spjd * IN: vp - vnode seeking within 4728219089Spjd * ooff - old file offset 4729219089Spjd * noffp - pointer to new file offset 4730219089Spjd * ct - caller context 4731219089Spjd * 4732251631Sdelphij * RETURN: 0 on success, EINVAL if new offset invalid. 4733219089Spjd */ 4734219089Spjd/* ARGSUSED */ 4735219089Spjdstatic int 4736219089Spjdzfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, 4737219089Spjd caller_context_t *ct) 4738219089Spjd{ 4739219089Spjd if (vp->v_type == VDIR) 4740219089Spjd return (0); 4741219089Spjd return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); 4742219089Spjd} 4743219089Spjd 4744219089Spjd/* 4745219089Spjd * Pre-filter the generic locking function to trap attempts to place 4746219089Spjd * a mandatory lock on a memory mapped file. 4747219089Spjd */ 4748219089Spjdstatic int 4749219089Spjdzfs_frlock(vnode_t *vp, int cmd, flock64_t *bfp, int flag, offset_t offset, 4750219089Spjd flk_callback_t *flk_cbp, cred_t *cr, caller_context_t *ct) 4751219089Spjd{ 4752219089Spjd znode_t *zp = VTOZ(vp); 4753219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4754219089Spjd 4755219089Spjd ZFS_ENTER(zfsvfs); 4756219089Spjd ZFS_VERIFY_ZP(zp); 4757219089Spjd 4758219089Spjd /* 4759219089Spjd * We are following the UFS semantics with respect to mapcnt 4760219089Spjd * here: If we see that the file is mapped already, then we will 4761219089Spjd * return an error, but we don't worry about races between this 4762219089Spjd * function and zfs_map(). 4763219089Spjd */ 4764219089Spjd if (zp->z_mapcnt > 0 && MANDMODE(zp->z_mode)) { 4765219089Spjd ZFS_EXIT(zfsvfs); 4766249195Smm return (SET_ERROR(EAGAIN)); 4767219089Spjd } 4768219089Spjd ZFS_EXIT(zfsvfs); 4769219089Spjd return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct)); 4770219089Spjd} 4771219089Spjd 4772219089Spjd/* 4773219089Spjd * If we can't find a page in the cache, we will create a new page 4774219089Spjd * and fill it with file data. For efficiency, we may try to fill 4775219089Spjd * multiple pages at once (klustering) to fill up the supplied page 4776219089Spjd * list. Note that the pages to be filled are held with an exclusive 4777219089Spjd * lock to prevent access by other threads while they are being filled. 4778219089Spjd */ 4779219089Spjdstatic int 4780219089Spjdzfs_fillpage(vnode_t *vp, u_offset_t off, struct seg *seg, 4781219089Spjd caddr_t addr, page_t *pl[], size_t plsz, enum seg_rw rw) 4782219089Spjd{ 4783219089Spjd znode_t *zp = VTOZ(vp); 4784219089Spjd page_t *pp, *cur_pp; 4785219089Spjd objset_t *os = zp->z_zfsvfs->z_os; 4786219089Spjd u_offset_t io_off, total; 4787219089Spjd size_t io_len; 4788219089Spjd int err; 4789219089Spjd 4790219089Spjd if (plsz == PAGESIZE || zp->z_blksz <= PAGESIZE) { 4791219089Spjd /* 4792219089Spjd * We only have a single page, don't bother klustering 4793219089Spjd */ 4794219089Spjd io_off = off; 4795219089Spjd io_len = PAGESIZE; 4796219089Spjd pp = page_create_va(vp, io_off, io_len, 4797219089Spjd PG_EXCL | PG_WAIT, seg, addr); 4798219089Spjd } else { 4799219089Spjd /* 4800219089Spjd * Try to find enough pages to fill the page list 4801219089Spjd */ 4802219089Spjd pp = pvn_read_kluster(vp, off, seg, addr, &io_off, 4803219089Spjd &io_len, off, plsz, 0); 4804219089Spjd } 4805219089Spjd if (pp == NULL) { 4806219089Spjd /* 4807219089Spjd * The page already exists, nothing to do here. 4808219089Spjd */ 4809219089Spjd *pl = NULL; 4810219089Spjd return (0); 4811219089Spjd } 4812219089Spjd 4813219089Spjd /* 4814219089Spjd * Fill the pages in the kluster. 4815219089Spjd */ 4816219089Spjd cur_pp = pp; 4817219089Spjd for (total = io_off + io_len; io_off < total; io_off += PAGESIZE) { 4818219089Spjd caddr_t va; 4819219089Spjd 4820219089Spjd ASSERT3U(io_off, ==, cur_pp->p_offset); 4821219089Spjd va = zfs_map_page(cur_pp, S_WRITE); 4822219089Spjd err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va, 4823219089Spjd DMU_READ_PREFETCH); 4824219089Spjd zfs_unmap_page(cur_pp, va); 4825219089Spjd if (err) { 4826219089Spjd /* On error, toss the entire kluster */ 4827219089Spjd pvn_read_done(pp, B_ERROR); 4828219089Spjd /* convert checksum errors into IO errors */ 4829219089Spjd if (err == ECKSUM) 4830249195Smm err = SET_ERROR(EIO); 4831219089Spjd return (err); 4832219089Spjd } 4833219089Spjd cur_pp = cur_pp->p_next; 4834219089Spjd } 4835219089Spjd 4836219089Spjd /* 4837219089Spjd * Fill in the page list array from the kluster starting 4838219089Spjd * from the desired offset `off'. 4839219089Spjd * NOTE: the page list will always be null terminated. 4840219089Spjd */ 4841219089Spjd pvn_plist_init(pp, pl, plsz, off, io_len, rw); 4842219089Spjd ASSERT(pl == NULL || (*pl)->p_offset == off); 4843219089Spjd 4844219089Spjd return (0); 4845219089Spjd} 4846219089Spjd 4847219089Spjd/* 4848219089Spjd * Return pointers to the pages for the file region [off, off + len] 4849219089Spjd * in the pl array. If plsz is greater than len, this function may 4850219089Spjd * also return page pointers from after the specified region 4851219089Spjd * (i.e. the region [off, off + plsz]). These additional pages are 4852219089Spjd * only returned if they are already in the cache, or were created as 4853219089Spjd * part of a klustered read. 4854219089Spjd * 4855219089Spjd * IN: vp - vnode of file to get data from. 4856219089Spjd * off - position in file to get data from. 4857219089Spjd * len - amount of data to retrieve. 4858219089Spjd * plsz - length of provided page list. 4859219089Spjd * seg - segment to obtain pages for. 4860219089Spjd * addr - virtual address of fault. 4861219089Spjd * rw - mode of created pages. 4862219089Spjd * cr - credentials of caller. 4863219089Spjd * ct - caller context. 4864219089Spjd * 4865219089Spjd * OUT: protp - protection mode of created pages. 4866219089Spjd * pl - list of pages created. 4867219089Spjd * 4868251631Sdelphij * RETURN: 0 on success, error code on failure. 4869219089Spjd * 4870219089Spjd * Timestamps: 4871219089Spjd * vp - atime updated 4872219089Spjd */ 4873219089Spjd/* ARGSUSED */ 4874219089Spjdstatic int 4875219089Spjdzfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp, 4876251631Sdelphij page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, 4877251631Sdelphij enum seg_rw rw, cred_t *cr, caller_context_t *ct) 4878219089Spjd{ 4879219089Spjd znode_t *zp = VTOZ(vp); 4880219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4881219089Spjd page_t **pl0 = pl; 4882219089Spjd int err = 0; 4883219089Spjd 4884219089Spjd /* we do our own caching, faultahead is unnecessary */ 4885219089Spjd if (pl == NULL) 4886219089Spjd return (0); 4887219089Spjd else if (len > plsz) 4888219089Spjd len = plsz; 4889219089Spjd else 4890219089Spjd len = P2ROUNDUP(len, PAGESIZE); 4891219089Spjd ASSERT(plsz >= len); 4892219089Spjd 4893219089Spjd ZFS_ENTER(zfsvfs); 4894219089Spjd ZFS_VERIFY_ZP(zp); 4895219089Spjd 4896219089Spjd if (protp) 4897219089Spjd *protp = PROT_ALL; 4898219089Spjd 4899219089Spjd /* 4900219089Spjd * Loop through the requested range [off, off + len) looking 4901219089Spjd * for pages. If we don't find a page, we will need to create 4902219089Spjd * a new page and fill it with data from the file. 4903219089Spjd */ 4904219089Spjd while (len > 0) { 4905219089Spjd if (*pl = page_lookup(vp, off, SE_SHARED)) 4906219089Spjd *(pl+1) = NULL; 4907219089Spjd else if (err = zfs_fillpage(vp, off, seg, addr, pl, plsz, rw)) 4908219089Spjd goto out; 4909219089Spjd while (*pl) { 4910219089Spjd ASSERT3U((*pl)->p_offset, ==, off); 4911219089Spjd off += PAGESIZE; 4912219089Spjd addr += PAGESIZE; 4913219089Spjd if (len > 0) { 4914219089Spjd ASSERT3U(len, >=, PAGESIZE); 4915219089Spjd len -= PAGESIZE; 4916219089Spjd } 4917219089Spjd ASSERT3U(plsz, >=, PAGESIZE); 4918219089Spjd plsz -= PAGESIZE; 4919219089Spjd pl++; 4920219089Spjd } 4921219089Spjd } 4922219089Spjd 4923219089Spjd /* 4924219089Spjd * Fill out the page array with any pages already in the cache. 4925219089Spjd */ 4926219089Spjd while (plsz > 0 && 4927219089Spjd (*pl++ = page_lookup_nowait(vp, off, SE_SHARED))) { 4928219089Spjd off += PAGESIZE; 4929219089Spjd plsz -= PAGESIZE; 4930219089Spjd } 4931219089Spjdout: 4932219089Spjd if (err) { 4933219089Spjd /* 4934219089Spjd * Release any pages we have previously locked. 4935219089Spjd */ 4936219089Spjd while (pl > pl0) 4937219089Spjd page_unlock(*--pl); 4938219089Spjd } else { 4939219089Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4940219089Spjd } 4941219089Spjd 4942219089Spjd *pl = NULL; 4943219089Spjd 4944219089Spjd ZFS_EXIT(zfsvfs); 4945219089Spjd return (err); 4946219089Spjd} 4947219089Spjd 4948219089Spjd/* 4949219089Spjd * Request a memory map for a section of a file. This code interacts 4950219089Spjd * with common code and the VM system as follows: 4951219089Spjd * 4952251631Sdelphij * - common code calls mmap(), which ends up in smmap_common() 4953251631Sdelphij * - this calls VOP_MAP(), which takes you into (say) zfs 4954251631Sdelphij * - zfs_map() calls as_map(), passing segvn_create() as the callback 4955251631Sdelphij * - segvn_create() creates the new segment and calls VOP_ADDMAP() 4956251631Sdelphij * - zfs_addmap() updates z_mapcnt 4957219089Spjd */ 4958219089Spjd/*ARGSUSED*/ 4959219089Spjdstatic int 4960219089Spjdzfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp, 4961219089Spjd size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, 4962219089Spjd caller_context_t *ct) 4963219089Spjd{ 4964219089Spjd znode_t *zp = VTOZ(vp); 4965219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4966219089Spjd segvn_crargs_t vn_a; 4967219089Spjd int error; 4968219089Spjd 4969219089Spjd ZFS_ENTER(zfsvfs); 4970219089Spjd ZFS_VERIFY_ZP(zp); 4971219089Spjd 4972219089Spjd if ((prot & PROT_WRITE) && (zp->z_pflags & 4973219089Spjd (ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) { 4974219089Spjd ZFS_EXIT(zfsvfs); 4975249195Smm return (SET_ERROR(EPERM)); 4976219089Spjd } 4977219089Spjd 4978219089Spjd if ((prot & (PROT_READ | PROT_EXEC)) && 4979219089Spjd (zp->z_pflags & ZFS_AV_QUARANTINED)) { 4980219089Spjd ZFS_EXIT(zfsvfs); 4981249195Smm return (SET_ERROR(EACCES)); 4982219089Spjd } 4983219089Spjd 4984219089Spjd if (vp->v_flag & VNOMAP) { 4985219089Spjd ZFS_EXIT(zfsvfs); 4986249195Smm return (SET_ERROR(ENOSYS)); 4987219089Spjd } 4988219089Spjd 4989219089Spjd if (off < 0 || len > MAXOFFSET_T - off) { 4990219089Spjd ZFS_EXIT(zfsvfs); 4991249195Smm return (SET_ERROR(ENXIO)); 4992219089Spjd } 4993219089Spjd 4994219089Spjd if (vp->v_type != VREG) { 4995219089Spjd ZFS_EXIT(zfsvfs); 4996249195Smm return (SET_ERROR(ENODEV)); 4997219089Spjd } 4998219089Spjd 4999219089Spjd /* 5000219089Spjd * If file is locked, disallow mapping. 5001219089Spjd */ 5002219089Spjd if (MANDMODE(zp->z_mode) && vn_has_flocks(vp)) { 5003219089Spjd ZFS_EXIT(zfsvfs); 5004249195Smm return (SET_ERROR(EAGAIN)); 5005219089Spjd } 5006219089Spjd 5007219089Spjd as_rangelock(as); 5008219089Spjd error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags); 5009219089Spjd if (error != 0) { 5010219089Spjd as_rangeunlock(as); 5011219089Spjd ZFS_EXIT(zfsvfs); 5012219089Spjd return (error); 5013219089Spjd } 5014219089Spjd 5015219089Spjd vn_a.vp = vp; 5016219089Spjd vn_a.offset = (u_offset_t)off; 5017219089Spjd vn_a.type = flags & MAP_TYPE; 5018219089Spjd vn_a.prot = prot; 5019219089Spjd vn_a.maxprot = maxprot; 5020219089Spjd vn_a.cred = cr; 5021219089Spjd vn_a.amp = NULL; 5022219089Spjd vn_a.flags = flags & ~MAP_TYPE; 5023219089Spjd vn_a.szc = 0; 5024219089Spjd vn_a.lgrp_mem_policy_flags = 0; 5025219089Spjd 5026219089Spjd error = as_map(as, *addrp, len, segvn_create, &vn_a); 5027219089Spjd 5028219089Spjd as_rangeunlock(as); 5029219089Spjd ZFS_EXIT(zfsvfs); 5030219089Spjd return (error); 5031219089Spjd} 5032219089Spjd 5033219089Spjd/* ARGSUSED */ 5034219089Spjdstatic int 5035219089Spjdzfs_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, 5036219089Spjd size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, 5037219089Spjd caller_context_t *ct) 5038219089Spjd{ 5039219089Spjd uint64_t pages = btopr(len); 5040219089Spjd 5041219089Spjd atomic_add_64(&VTOZ(vp)->z_mapcnt, pages); 5042219089Spjd return (0); 5043219089Spjd} 5044219089Spjd 5045219089Spjd/* 5046219089Spjd * The reason we push dirty pages as part of zfs_delmap() is so that we get a 5047219089Spjd * more accurate mtime for the associated file. Since we don't have a way of 5048219089Spjd * detecting when the data was actually modified, we have to resort to 5049219089Spjd * heuristics. If an explicit msync() is done, then we mark the mtime when the 5050219089Spjd * last page is pushed. The problem occurs when the msync() call is omitted, 5051219089Spjd * which by far the most common case: 5052219089Spjd * 5053269002Sdelphij * open() 5054269002Sdelphij * mmap() 5055269002Sdelphij * <modify memory> 5056269002Sdelphij * munmap() 5057269002Sdelphij * close() 5058269002Sdelphij * <time lapse> 5059269002Sdelphij * putpage() via fsflush 5060219089Spjd * 5061219089Spjd * If we wait until fsflush to come along, we can have a modification time that 5062219089Spjd * is some arbitrary point in the future. In order to prevent this in the 5063219089Spjd * common case, we flush pages whenever a (MAP_SHARED, PROT_WRITE) mapping is 5064219089Spjd * torn down. 5065219089Spjd */ 5066219089Spjd/* ARGSUSED */ 5067219089Spjdstatic int 5068219089Spjdzfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, 5069219089Spjd size_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr, 5070219089Spjd caller_context_t *ct) 5071219089Spjd{ 5072219089Spjd uint64_t pages = btopr(len); 5073219089Spjd 5074219089Spjd ASSERT3U(VTOZ(vp)->z_mapcnt, >=, pages); 5075219089Spjd atomic_add_64(&VTOZ(vp)->z_mapcnt, -pages); 5076219089Spjd 5077219089Spjd if ((flags & MAP_SHARED) && (prot & PROT_WRITE) && 5078219089Spjd vn_has_cached_data(vp)) 5079219089Spjd (void) VOP_PUTPAGE(vp, off, len, B_ASYNC, cr, ct); 5080219089Spjd 5081219089Spjd return (0); 5082219089Spjd} 5083219089Spjd 5084219089Spjd/* 5085219089Spjd * Free or allocate space in a file. Currently, this function only 5086219089Spjd * supports the `F_FREESP' command. However, this command is somewhat 5087219089Spjd * misnamed, as its functionality includes the ability to allocate as 5088219089Spjd * well as free space. 5089219089Spjd * 5090219089Spjd * IN: vp - vnode of file to free data in. 5091219089Spjd * cmd - action to take (only F_FREESP supported). 5092219089Spjd * bfp - section of file to free/alloc. 5093219089Spjd * flag - current file open mode flags. 5094219089Spjd * offset - current file offset. 5095219089Spjd * cr - credentials of caller [UNUSED]. 5096219089Spjd * ct - caller context. 5097219089Spjd * 5098251631Sdelphij * RETURN: 0 on success, error code on failure. 5099219089Spjd * 5100219089Spjd * Timestamps: 5101219089Spjd * vp - ctime|mtime updated 5102219089Spjd */ 5103219089Spjd/* ARGSUSED */ 5104219089Spjdstatic int 5105219089Spjdzfs_space(vnode_t *vp, int cmd, flock64_t *bfp, int flag, 5106219089Spjd offset_t offset, cred_t *cr, caller_context_t *ct) 5107219089Spjd{ 5108219089Spjd znode_t *zp = VTOZ(vp); 5109219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5110219089Spjd uint64_t off, len; 5111219089Spjd int error; 5112219089Spjd 5113219089Spjd ZFS_ENTER(zfsvfs); 5114219089Spjd ZFS_VERIFY_ZP(zp); 5115219089Spjd 5116219089Spjd if (cmd != F_FREESP) { 5117219089Spjd ZFS_EXIT(zfsvfs); 5118249195Smm return (SET_ERROR(EINVAL)); 5119219089Spjd } 5120219089Spjd 5121219089Spjd if (error = convoff(vp, bfp, 0, offset)) { 5122219089Spjd ZFS_EXIT(zfsvfs); 5123219089Spjd return (error); 5124219089Spjd } 5125219089Spjd 5126219089Spjd if (bfp->l_len < 0) { 5127219089Spjd ZFS_EXIT(zfsvfs); 5128249195Smm return (SET_ERROR(EINVAL)); 5129219089Spjd } 5130219089Spjd 5131219089Spjd off = bfp->l_start; 5132219089Spjd len = bfp->l_len; /* 0 means from off to end of file */ 5133219089Spjd 5134219089Spjd error = zfs_freesp(zp, off, len, flag, TRUE); 5135219089Spjd 5136219089Spjd ZFS_EXIT(zfsvfs); 5137219089Spjd return (error); 5138219089Spjd} 5139219089Spjd#endif /* sun */ 5140219089Spjd 5141168404SpjdCTASSERT(sizeof(struct zfid_short) <= sizeof(struct fid)); 5142168404SpjdCTASSERT(sizeof(struct zfid_long) <= sizeof(struct fid)); 5143168404Spjd 5144185029Spjd/*ARGSUSED*/ 5145168404Spjdstatic int 5146185029Spjdzfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) 5147168404Spjd{ 5148168404Spjd znode_t *zp = VTOZ(vp); 5149168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5150185029Spjd uint32_t gen; 5151219089Spjd uint64_t gen64; 5152168404Spjd uint64_t object = zp->z_id; 5153168404Spjd zfid_short_t *zfid; 5154219089Spjd int size, i, error; 5155168404Spjd 5156168404Spjd ZFS_ENTER(zfsvfs); 5157185029Spjd ZFS_VERIFY_ZP(zp); 5158168404Spjd 5159219089Spjd if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), 5160219089Spjd &gen64, sizeof (uint64_t))) != 0) { 5161219089Spjd ZFS_EXIT(zfsvfs); 5162219089Spjd return (error); 5163219089Spjd } 5164219089Spjd 5165219089Spjd gen = (uint32_t)gen64; 5166219089Spjd 5167168404Spjd size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN; 5168249195Smm 5169249195Smm#ifdef illumos 5170249195Smm if (fidp->fid_len < size) { 5171249195Smm fidp->fid_len = size; 5172249195Smm ZFS_EXIT(zfsvfs); 5173249195Smm return (SET_ERROR(ENOSPC)); 5174249195Smm } 5175249195Smm#else 5176168404Spjd fidp->fid_len = size; 5177249195Smm#endif 5178168404Spjd 5179168404Spjd zfid = (zfid_short_t *)fidp; 5180168404Spjd 5181168404Spjd zfid->zf_len = size; 5182168404Spjd 5183168404Spjd for (i = 0; i < sizeof (zfid->zf_object); i++) 5184168404Spjd zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 5185168404Spjd 5186168404Spjd /* Must have a non-zero generation number to distinguish from .zfs */ 5187168404Spjd if (gen == 0) 5188168404Spjd gen = 1; 5189168404Spjd for (i = 0; i < sizeof (zfid->zf_gen); i++) 5190168404Spjd zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); 5191168404Spjd 5192168404Spjd if (size == LONG_FID_LEN) { 5193168404Spjd uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); 5194169023Spjd zfid_long_t *zlfid; 5195168404Spjd 5196168404Spjd zlfid = (zfid_long_t *)fidp; 5197168404Spjd 5198168404Spjd for (i = 0; i < sizeof (zlfid->zf_setid); i++) 5199168404Spjd zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); 5200168404Spjd 5201168404Spjd /* XXX - this should be the generation number for the objset */ 5202168404Spjd for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 5203168404Spjd zlfid->zf_setgen[i] = 0; 5204168404Spjd } 5205168404Spjd 5206168404Spjd ZFS_EXIT(zfsvfs); 5207168404Spjd return (0); 5208168404Spjd} 5209168404Spjd 5210168404Spjdstatic int 5211185029Spjdzfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 5212185029Spjd caller_context_t *ct) 5213168404Spjd{ 5214168404Spjd znode_t *zp, *xzp; 5215168404Spjd zfsvfs_t *zfsvfs; 5216168404Spjd zfs_dirlock_t *dl; 5217168404Spjd int error; 5218168404Spjd 5219168404Spjd switch (cmd) { 5220168404Spjd case _PC_LINK_MAX: 5221168404Spjd *valp = INT_MAX; 5222168404Spjd return (0); 5223168404Spjd 5224168404Spjd case _PC_FILESIZEBITS: 5225168404Spjd *valp = 64; 5226168404Spjd return (0); 5227219089Spjd#ifdef sun 5228168404Spjd case _PC_XATTR_EXISTS: 5229168404Spjd zp = VTOZ(vp); 5230168404Spjd zfsvfs = zp->z_zfsvfs; 5231168404Spjd ZFS_ENTER(zfsvfs); 5232185029Spjd ZFS_VERIFY_ZP(zp); 5233168404Spjd *valp = 0; 5234168404Spjd error = zfs_dirent_lock(&dl, zp, "", &xzp, 5235185029Spjd ZXATTR | ZEXISTS | ZSHARED, NULL, NULL); 5236168404Spjd if (error == 0) { 5237168404Spjd zfs_dirent_unlock(dl); 5238168404Spjd if (!zfs_dirempty(xzp)) 5239168404Spjd *valp = 1; 5240168404Spjd VN_RELE(ZTOV(xzp)); 5241168404Spjd } else if (error == ENOENT) { 5242168404Spjd /* 5243168404Spjd * If there aren't extended attributes, it's the 5244168404Spjd * same as having zero of them. 5245168404Spjd */ 5246168404Spjd error = 0; 5247168404Spjd } 5248168404Spjd ZFS_EXIT(zfsvfs); 5249168404Spjd return (error); 5250168404Spjd 5251219089Spjd case _PC_SATTR_ENABLED: 5252219089Spjd case _PC_SATTR_EXISTS: 5253219089Spjd *valp = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 5254219089Spjd (vp->v_type == VREG || vp->v_type == VDIR); 5255219089Spjd return (0); 5256219089Spjd 5257219089Spjd case _PC_ACCESS_FILTERING: 5258219089Spjd *valp = vfs_has_feature(vp->v_vfsp, VFSFT_ACCESS_FILTER) && 5259219089Spjd vp->v_type == VDIR; 5260219089Spjd return (0); 5261219089Spjd 5262219089Spjd case _PC_ACL_ENABLED: 5263219089Spjd *valp = _ACL_ACE_ENABLED; 5264219089Spjd return (0); 5265219089Spjd#endif /* sun */ 5266219089Spjd case _PC_MIN_HOLE_SIZE: 5267219089Spjd *valp = (int)SPA_MINBLOCKSIZE; 5268219089Spjd return (0); 5269219089Spjd#ifdef sun 5270219089Spjd case _PC_TIMESTAMP_RESOLUTION: 5271219089Spjd /* nanosecond timestamp resolution */ 5272219089Spjd *valp = 1L; 5273219089Spjd return (0); 5274219089Spjd#endif /* sun */ 5275168404Spjd case _PC_ACL_EXTENDED: 5276196949Strasz *valp = 0; 5277168404Spjd return (0); 5278168404Spjd 5279196949Strasz case _PC_ACL_NFS4: 5280196949Strasz *valp = 1; 5281196949Strasz return (0); 5282196949Strasz 5283196949Strasz case _PC_ACL_PATH_MAX: 5284196949Strasz *valp = ACL_MAX_ENTRIES; 5285196949Strasz return (0); 5286196949Strasz 5287168404Spjd default: 5288168962Spjd return (EOPNOTSUPP); 5289168404Spjd } 5290168404Spjd} 5291168404Spjd 5292168404Spjd/*ARGSUSED*/ 5293168404Spjdstatic int 5294185029Spjdzfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 5295185029Spjd caller_context_t *ct) 5296168404Spjd{ 5297168404Spjd znode_t *zp = VTOZ(vp); 5298168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5299168404Spjd int error; 5300185029Spjd boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 5301168404Spjd 5302168404Spjd ZFS_ENTER(zfsvfs); 5303185029Spjd ZFS_VERIFY_ZP(zp); 5304185029Spjd error = zfs_getacl(zp, vsecp, skipaclchk, cr); 5305168404Spjd ZFS_EXIT(zfsvfs); 5306168404Spjd 5307168404Spjd return (error); 5308168404Spjd} 5309168404Spjd 5310168404Spjd/*ARGSUSED*/ 5311228685Spjdint 5312185029Spjdzfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 5313185029Spjd caller_context_t *ct) 5314168404Spjd{ 5315168404Spjd znode_t *zp = VTOZ(vp); 5316168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5317168404Spjd int error; 5318185029Spjd boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 5319219089Spjd zilog_t *zilog = zfsvfs->z_log; 5320168404Spjd 5321168404Spjd ZFS_ENTER(zfsvfs); 5322185029Spjd ZFS_VERIFY_ZP(zp); 5323219089Spjd 5324185029Spjd error = zfs_setacl(zp, vsecp, skipaclchk, cr); 5325219089Spjd 5326219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 5327219089Spjd zil_commit(zilog, 0); 5328219089Spjd 5329168404Spjd ZFS_EXIT(zfsvfs); 5330168404Spjd return (error); 5331168404Spjd} 5332168404Spjd 5333219089Spjd#ifdef sun 5334219089Spjd/* 5335251631Sdelphij * The smallest read we may consider to loan out an arcbuf. 5336251631Sdelphij * This must be a power of 2. 5337219089Spjd */ 5338219089Spjdint zcr_blksz_min = (1 << 10); /* 1K */ 5339251631Sdelphij/* 5340251631Sdelphij * If set to less than the file block size, allow loaning out of an 5341251631Sdelphij * arcbuf for a partial block read. This must be a power of 2. 5342251631Sdelphij */ 5343219089Spjdint zcr_blksz_max = (1 << 17); /* 128K */ 5344219089Spjd 5345219089Spjd/*ARGSUSED*/ 5346168962Spjdstatic int 5347219089Spjdzfs_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr, 5348219089Spjd caller_context_t *ct) 5349219089Spjd{ 5350219089Spjd znode_t *zp = VTOZ(vp); 5351219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5352219089Spjd int max_blksz = zfsvfs->z_max_blksz; 5353219089Spjd uio_t *uio = &xuio->xu_uio; 5354219089Spjd ssize_t size = uio->uio_resid; 5355219089Spjd offset_t offset = uio->uio_loffset; 5356219089Spjd int blksz; 5357219089Spjd int fullblk, i; 5358219089Spjd arc_buf_t *abuf; 5359219089Spjd ssize_t maxsize; 5360219089Spjd int preamble, postamble; 5361219089Spjd 5362219089Spjd if (xuio->xu_type != UIOTYPE_ZEROCOPY) 5363249195Smm return (SET_ERROR(EINVAL)); 5364219089Spjd 5365219089Spjd ZFS_ENTER(zfsvfs); 5366219089Spjd ZFS_VERIFY_ZP(zp); 5367219089Spjd switch (ioflag) { 5368219089Spjd case UIO_WRITE: 5369219089Spjd /* 5370219089Spjd * Loan out an arc_buf for write if write size is bigger than 5371219089Spjd * max_blksz, and the file's block size is also max_blksz. 5372219089Spjd */ 5373219089Spjd blksz = max_blksz; 5374219089Spjd if (size < blksz || zp->z_blksz != blksz) { 5375219089Spjd ZFS_EXIT(zfsvfs); 5376249195Smm return (SET_ERROR(EINVAL)); 5377219089Spjd } 5378219089Spjd /* 5379219089Spjd * Caller requests buffers for write before knowing where the 5380219089Spjd * write offset might be (e.g. NFS TCP write). 5381219089Spjd */ 5382219089Spjd if (offset == -1) { 5383219089Spjd preamble = 0; 5384219089Spjd } else { 5385219089Spjd preamble = P2PHASE(offset, blksz); 5386219089Spjd if (preamble) { 5387219089Spjd preamble = blksz - preamble; 5388219089Spjd size -= preamble; 5389219089Spjd } 5390219089Spjd } 5391219089Spjd 5392219089Spjd postamble = P2PHASE(size, blksz); 5393219089Spjd size -= postamble; 5394219089Spjd 5395219089Spjd fullblk = size / blksz; 5396219089Spjd (void) dmu_xuio_init(xuio, 5397219089Spjd (preamble != 0) + fullblk + (postamble != 0)); 5398219089Spjd DTRACE_PROBE3(zfs_reqzcbuf_align, int, preamble, 5399219089Spjd int, postamble, int, 5400219089Spjd (preamble != 0) + fullblk + (postamble != 0)); 5401219089Spjd 5402219089Spjd /* 5403219089Spjd * Have to fix iov base/len for partial buffers. They 5404219089Spjd * currently represent full arc_buf's. 5405219089Spjd */ 5406219089Spjd if (preamble) { 5407219089Spjd /* data begins in the middle of the arc_buf */ 5408219089Spjd abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 5409219089Spjd blksz); 5410219089Spjd ASSERT(abuf); 5411219089Spjd (void) dmu_xuio_add(xuio, abuf, 5412219089Spjd blksz - preamble, preamble); 5413219089Spjd } 5414219089Spjd 5415219089Spjd for (i = 0; i < fullblk; i++) { 5416219089Spjd abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 5417219089Spjd blksz); 5418219089Spjd ASSERT(abuf); 5419219089Spjd (void) dmu_xuio_add(xuio, abuf, 0, blksz); 5420219089Spjd } 5421219089Spjd 5422219089Spjd if (postamble) { 5423219089Spjd /* data ends in the middle of the arc_buf */ 5424219089Spjd abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 5425219089Spjd blksz); 5426219089Spjd ASSERT(abuf); 5427219089Spjd (void) dmu_xuio_add(xuio, abuf, 0, postamble); 5428219089Spjd } 5429219089Spjd break; 5430219089Spjd case UIO_READ: 5431219089Spjd /* 5432219089Spjd * Loan out an arc_buf for read if the read size is larger than 5433219089Spjd * the current file block size. Block alignment is not 5434219089Spjd * considered. Partial arc_buf will be loaned out for read. 5435219089Spjd */ 5436219089Spjd blksz = zp->z_blksz; 5437219089Spjd if (blksz < zcr_blksz_min) 5438219089Spjd blksz = zcr_blksz_min; 5439219089Spjd if (blksz > zcr_blksz_max) 5440219089Spjd blksz = zcr_blksz_max; 5441219089Spjd /* avoid potential complexity of dealing with it */ 5442219089Spjd if (blksz > max_blksz) { 5443219089Spjd ZFS_EXIT(zfsvfs); 5444249195Smm return (SET_ERROR(EINVAL)); 5445219089Spjd } 5446219089Spjd 5447219089Spjd maxsize = zp->z_size - uio->uio_loffset; 5448219089Spjd if (size > maxsize) 5449219089Spjd size = maxsize; 5450219089Spjd 5451219089Spjd if (size < blksz || vn_has_cached_data(vp)) { 5452219089Spjd ZFS_EXIT(zfsvfs); 5453249195Smm return (SET_ERROR(EINVAL)); 5454219089Spjd } 5455219089Spjd break; 5456219089Spjd default: 5457219089Spjd ZFS_EXIT(zfsvfs); 5458249195Smm return (SET_ERROR(EINVAL)); 5459219089Spjd } 5460219089Spjd 5461219089Spjd uio->uio_extflg = UIO_XUIO; 5462219089Spjd XUIO_XUZC_RW(xuio) = ioflag; 5463219089Spjd ZFS_EXIT(zfsvfs); 5464219089Spjd return (0); 5465219089Spjd} 5466219089Spjd 5467219089Spjd/*ARGSUSED*/ 5468219089Spjdstatic int 5469219089Spjdzfs_retzcbuf(vnode_t *vp, xuio_t *xuio, cred_t *cr, caller_context_t *ct) 5470219089Spjd{ 5471219089Spjd int i; 5472219089Spjd arc_buf_t *abuf; 5473219089Spjd int ioflag = XUIO_XUZC_RW(xuio); 5474219089Spjd 5475219089Spjd ASSERT(xuio->xu_type == UIOTYPE_ZEROCOPY); 5476219089Spjd 5477219089Spjd i = dmu_xuio_cnt(xuio); 5478219089Spjd while (i-- > 0) { 5479219089Spjd abuf = dmu_xuio_arcbuf(xuio, i); 5480219089Spjd /* 5481219089Spjd * if abuf == NULL, it must be a write buffer 5482219089Spjd * that has been returned in zfs_write(). 5483219089Spjd */ 5484219089Spjd if (abuf) 5485219089Spjd dmu_return_arcbuf(abuf); 5486219089Spjd ASSERT(abuf || ioflag == UIO_WRITE); 5487219089Spjd } 5488219089Spjd 5489219089Spjd dmu_xuio_fini(xuio); 5490219089Spjd return (0); 5491219089Spjd} 5492219089Spjd 5493219089Spjd/* 5494219089Spjd * Predeclare these here so that the compiler assumes that 5495219089Spjd * this is an "old style" function declaration that does 5496219089Spjd * not include arguments => we won't get type mismatch errors 5497219089Spjd * in the initializations that follow. 5498219089Spjd */ 5499219089Spjdstatic int zfs_inval(); 5500219089Spjdstatic int zfs_isdir(); 5501219089Spjd 5502219089Spjdstatic int 5503219089Spjdzfs_inval() 5504219089Spjd{ 5505249195Smm return (SET_ERROR(EINVAL)); 5506219089Spjd} 5507219089Spjd 5508219089Spjdstatic int 5509219089Spjdzfs_isdir() 5510219089Spjd{ 5511249195Smm return (SET_ERROR(EISDIR)); 5512219089Spjd} 5513219089Spjd/* 5514219089Spjd * Directory vnode operations template 5515219089Spjd */ 5516219089Spjdvnodeops_t *zfs_dvnodeops; 5517219089Spjdconst fs_operation_def_t zfs_dvnodeops_template[] = { 5518219089Spjd VOPNAME_OPEN, { .vop_open = zfs_open }, 5519219089Spjd VOPNAME_CLOSE, { .vop_close = zfs_close }, 5520219089Spjd VOPNAME_READ, { .error = zfs_isdir }, 5521219089Spjd VOPNAME_WRITE, { .error = zfs_isdir }, 5522219089Spjd VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 5523219089Spjd VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5524219089Spjd VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5525219089Spjd VOPNAME_ACCESS, { .vop_access = zfs_access }, 5526219089Spjd VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 5527219089Spjd VOPNAME_CREATE, { .vop_create = zfs_create }, 5528219089Spjd VOPNAME_REMOVE, { .vop_remove = zfs_remove }, 5529219089Spjd VOPNAME_LINK, { .vop_link = zfs_link }, 5530219089Spjd VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5531219089Spjd VOPNAME_MKDIR, { .vop_mkdir = zfs_mkdir }, 5532219089Spjd VOPNAME_RMDIR, { .vop_rmdir = zfs_rmdir }, 5533219089Spjd VOPNAME_READDIR, { .vop_readdir = zfs_readdir }, 5534219089Spjd VOPNAME_SYMLINK, { .vop_symlink = zfs_symlink }, 5535219089Spjd VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5536219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5537219089Spjd VOPNAME_FID, { .vop_fid = zfs_fid }, 5538219089Spjd VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5539219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5540219089Spjd VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5541219089Spjd VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5542269002Sdelphij VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5543219089Spjd NULL, NULL 5544219089Spjd}; 5545219089Spjd 5546219089Spjd/* 5547219089Spjd * Regular file vnode operations template 5548219089Spjd */ 5549219089Spjdvnodeops_t *zfs_fvnodeops; 5550219089Spjdconst fs_operation_def_t zfs_fvnodeops_template[] = { 5551219089Spjd VOPNAME_OPEN, { .vop_open = zfs_open }, 5552219089Spjd VOPNAME_CLOSE, { .vop_close = zfs_close }, 5553219089Spjd VOPNAME_READ, { .vop_read = zfs_read }, 5554219089Spjd VOPNAME_WRITE, { .vop_write = zfs_write }, 5555219089Spjd VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 5556219089Spjd VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5557219089Spjd VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5558219089Spjd VOPNAME_ACCESS, { .vop_access = zfs_access }, 5559219089Spjd VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 5560219089Spjd VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5561219089Spjd VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5562219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5563219089Spjd VOPNAME_FID, { .vop_fid = zfs_fid }, 5564219089Spjd VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5565219089Spjd VOPNAME_FRLOCK, { .vop_frlock = zfs_frlock }, 5566219089Spjd VOPNAME_SPACE, { .vop_space = zfs_space }, 5567219089Spjd VOPNAME_GETPAGE, { .vop_getpage = zfs_getpage }, 5568219089Spjd VOPNAME_PUTPAGE, { .vop_putpage = zfs_putpage }, 5569219089Spjd VOPNAME_MAP, { .vop_map = zfs_map }, 5570219089Spjd VOPNAME_ADDMAP, { .vop_addmap = zfs_addmap }, 5571219089Spjd VOPNAME_DELMAP, { .vop_delmap = zfs_delmap }, 5572219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5573219089Spjd VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5574219089Spjd VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5575219089Spjd VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5576269002Sdelphij VOPNAME_REQZCBUF, { .vop_reqzcbuf = zfs_reqzcbuf }, 5577269002Sdelphij VOPNAME_RETZCBUF, { .vop_retzcbuf = zfs_retzcbuf }, 5578219089Spjd NULL, NULL 5579219089Spjd}; 5580219089Spjd 5581219089Spjd/* 5582219089Spjd * Symbolic link vnode operations template 5583219089Spjd */ 5584219089Spjdvnodeops_t *zfs_symvnodeops; 5585219089Spjdconst fs_operation_def_t zfs_symvnodeops_template[] = { 5586219089Spjd VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5587219089Spjd VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5588219089Spjd VOPNAME_ACCESS, { .vop_access = zfs_access }, 5589219089Spjd VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5590219089Spjd VOPNAME_READLINK, { .vop_readlink = zfs_readlink }, 5591219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5592219089Spjd VOPNAME_FID, { .vop_fid = zfs_fid }, 5593219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5594219089Spjd VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5595219089Spjd NULL, NULL 5596219089Spjd}; 5597219089Spjd 5598219089Spjd/* 5599219089Spjd * special share hidden files vnode operations template 5600219089Spjd */ 5601219089Spjdvnodeops_t *zfs_sharevnodeops; 5602219089Spjdconst fs_operation_def_t zfs_sharevnodeops_template[] = { 5603219089Spjd VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5604219089Spjd VOPNAME_ACCESS, { .vop_access = zfs_access }, 5605219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5606219089Spjd VOPNAME_FID, { .vop_fid = zfs_fid }, 5607219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5608219089Spjd VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5609219089Spjd VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5610219089Spjd VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5611219089Spjd NULL, NULL 5612219089Spjd}; 5613219089Spjd 5614219089Spjd/* 5615219089Spjd * Extended attribute directory vnode operations template 5616251631Sdelphij * 5617251631Sdelphij * This template is identical to the directory vnodes 5618251631Sdelphij * operation template except for restricted operations: 5619251631Sdelphij * VOP_MKDIR() 5620251631Sdelphij * VOP_SYMLINK() 5621251631Sdelphij * 5622219089Spjd * Note that there are other restrictions embedded in: 5623219089Spjd * zfs_create() - restrict type to VREG 5624219089Spjd * zfs_link() - no links into/out of attribute space 5625219089Spjd * zfs_rename() - no moves into/out of attribute space 5626219089Spjd */ 5627219089Spjdvnodeops_t *zfs_xdvnodeops; 5628219089Spjdconst fs_operation_def_t zfs_xdvnodeops_template[] = { 5629219089Spjd VOPNAME_OPEN, { .vop_open = zfs_open }, 5630219089Spjd VOPNAME_CLOSE, { .vop_close = zfs_close }, 5631219089Spjd VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 5632219089Spjd VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5633219089Spjd VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5634219089Spjd VOPNAME_ACCESS, { .vop_access = zfs_access }, 5635219089Spjd VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 5636219089Spjd VOPNAME_CREATE, { .vop_create = zfs_create }, 5637219089Spjd VOPNAME_REMOVE, { .vop_remove = zfs_remove }, 5638219089Spjd VOPNAME_LINK, { .vop_link = zfs_link }, 5639219089Spjd VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5640219089Spjd VOPNAME_MKDIR, { .error = zfs_inval }, 5641219089Spjd VOPNAME_RMDIR, { .vop_rmdir = zfs_rmdir }, 5642219089Spjd VOPNAME_READDIR, { .vop_readdir = zfs_readdir }, 5643219089Spjd VOPNAME_SYMLINK, { .error = zfs_inval }, 5644219089Spjd VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5645219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5646219089Spjd VOPNAME_FID, { .vop_fid = zfs_fid }, 5647219089Spjd VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5648219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5649219089Spjd VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5650219089Spjd VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5651219089Spjd VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5652219089Spjd NULL, NULL 5653219089Spjd}; 5654219089Spjd 5655219089Spjd/* 5656219089Spjd * Error vnode operations template 5657219089Spjd */ 5658219089Spjdvnodeops_t *zfs_evnodeops; 5659219089Spjdconst fs_operation_def_t zfs_evnodeops_template[] = { 5660219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5661219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5662219089Spjd NULL, NULL 5663219089Spjd}; 5664219089Spjd#endif /* sun */ 5665219089Spjd 5666219089Spjdstatic int 5667213673Spjdioflags(int ioflags) 5668213673Spjd{ 5669213673Spjd int flags = 0; 5670213673Spjd 5671213673Spjd if (ioflags & IO_APPEND) 5672213673Spjd flags |= FAPPEND; 5673213673Spjd if (ioflags & IO_NDELAY) 5674213673Spjd flags |= FNONBLOCK; 5675213673Spjd if (ioflags & IO_SYNC) 5676213673Spjd flags |= (FSYNC | FDSYNC | FRSYNC); 5677213673Spjd 5678213673Spjd return (flags); 5679213673Spjd} 5680213673Spjd 5681213673Spjdstatic int 5682213937Savgzfs_getpages(struct vnode *vp, vm_page_t *m, int count, int reqpage) 5683213937Savg{ 5684213937Savg znode_t *zp = VTOZ(vp); 5685213937Savg zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5686213937Savg objset_t *os = zp->z_zfsvfs->z_os; 5687243517Savg vm_page_t mfirst, mlast, mreq; 5688213937Savg vm_object_t object; 5689213937Savg caddr_t va; 5690213937Savg struct sf_buf *sf; 5691243517Savg off_t startoff, endoff; 5692213937Savg int i, error; 5693243517Savg vm_pindex_t reqstart, reqend; 5694243517Savg int pcount, lsize, reqsize, size; 5695213937Savg 5696213937Savg ZFS_ENTER(zfsvfs); 5697213937Savg ZFS_VERIFY_ZP(zp); 5698213937Savg 5699243517Savg pcount = OFF_TO_IDX(round_page(count)); 5700213937Savg mreq = m[reqpage]; 5701213937Savg object = mreq->object; 5702213937Savg error = 0; 5703213937Savg 5704213937Savg KASSERT(vp->v_object == object, ("mismatching object")); 5705213937Savg 5706243517Savg if (pcount > 1 && zp->z_blksz > PAGESIZE) { 5707243517Savg startoff = rounddown(IDX_TO_OFF(mreq->pindex), zp->z_blksz); 5708243517Savg reqstart = OFF_TO_IDX(round_page(startoff)); 5709243517Savg if (reqstart < m[0]->pindex) 5710243517Savg reqstart = 0; 5711243517Savg else 5712243517Savg reqstart = reqstart - m[0]->pindex; 5713243517Savg endoff = roundup(IDX_TO_OFF(mreq->pindex) + PAGE_SIZE, 5714243517Savg zp->z_blksz); 5715243517Savg reqend = OFF_TO_IDX(trunc_page(endoff)) - 1; 5716243517Savg if (reqend > m[pcount - 1]->pindex) 5717243517Savg reqend = m[pcount - 1]->pindex; 5718243517Savg reqsize = reqend - m[reqstart]->pindex + 1; 5719243517Savg KASSERT(reqstart <= reqpage && reqpage < reqstart + reqsize, 5720243517Savg ("reqpage beyond [reqstart, reqstart + reqsize[ bounds")); 5721243517Savg } else { 5722243517Savg reqstart = reqpage; 5723243517Savg reqsize = 1; 5724243517Savg } 5725243517Savg mfirst = m[reqstart]; 5726243517Savg mlast = m[reqstart + reqsize - 1]; 5727243517Savg 5728248084Sattilio zfs_vmobject_wlock(object); 5729213937Savg 5730243517Savg for (i = 0; i < reqstart; i++) { 5731243517Savg vm_page_lock(m[i]); 5732243517Savg vm_page_free(m[i]); 5733243517Savg vm_page_unlock(m[i]); 5734213937Savg } 5735243517Savg for (i = reqstart + reqsize; i < pcount; i++) { 5736243517Savg vm_page_lock(m[i]); 5737243517Savg vm_page_free(m[i]); 5738243517Savg vm_page_unlock(m[i]); 5739243517Savg } 5740213937Savg 5741243517Savg if (mreq->valid && reqsize == 1) { 5742213937Savg if (mreq->valid != VM_PAGE_BITS_ALL) 5743213937Savg vm_page_zero_invalid(mreq, TRUE); 5744248084Sattilio zfs_vmobject_wunlock(object); 5745213937Savg ZFS_EXIT(zfsvfs); 5746248084Sattilio return (zfs_vm_pagerret_ok); 5747213937Savg } 5748213937Savg 5749213937Savg PCPU_INC(cnt.v_vnodein); 5750243517Savg PCPU_ADD(cnt.v_vnodepgsin, reqsize); 5751213937Savg 5752213937Savg if (IDX_TO_OFF(mreq->pindex) >= object->un_pager.vnp.vnp_size) { 5753243517Savg for (i = reqstart; i < reqstart + reqsize; i++) { 5754243517Savg if (i != reqpage) { 5755243517Savg vm_page_lock(m[i]); 5756243517Savg vm_page_free(m[i]); 5757243517Savg vm_page_unlock(m[i]); 5758243517Savg } 5759243517Savg } 5760248084Sattilio zfs_vmobject_wunlock(object); 5761213937Savg ZFS_EXIT(zfsvfs); 5762248084Sattilio return (zfs_vm_pagerret_bad); 5763213937Savg } 5764213937Savg 5765243517Savg lsize = PAGE_SIZE; 5766243517Savg if (IDX_TO_OFF(mlast->pindex) + lsize > object->un_pager.vnp.vnp_size) 5767243517Savg lsize = object->un_pager.vnp.vnp_size - IDX_TO_OFF(mlast->pindex); 5768213937Savg 5769248084Sattilio zfs_vmobject_wunlock(object); 5770243517Savg 5771243517Savg for (i = reqstart; i < reqstart + reqsize; i++) { 5772243517Savg size = PAGE_SIZE; 5773243517Savg if (i == (reqstart + reqsize - 1)) 5774243517Savg size = lsize; 5775243517Savg va = zfs_map_page(m[i], &sf); 5776243517Savg error = dmu_read(os, zp->z_id, IDX_TO_OFF(m[i]->pindex), 5777243517Savg size, va, DMU_READ_PREFETCH); 5778243517Savg if (size != PAGE_SIZE) 5779243517Savg bzero(va + size, PAGE_SIZE - size); 5780243517Savg zfs_unmap_page(sf); 5781243517Savg if (error != 0) 5782243517Savg break; 5783243517Savg } 5784243517Savg 5785248084Sattilio zfs_vmobject_wlock(object); 5786213937Savg 5787243517Savg for (i = reqstart; i < reqstart + reqsize; i++) { 5788243763Savg if (!error) 5789243763Savg m[i]->valid = VM_PAGE_BITS_ALL; 5790243517Savg KASSERT(m[i]->dirty == 0, ("zfs_getpages: page %p is dirty", m[i])); 5791243763Savg if (i != reqpage) 5792243763Savg vm_page_readahead_finish(m[i]); 5793243517Savg } 5794243517Savg 5795248084Sattilio zfs_vmobject_wunlock(object); 5796213937Savg 5797213937Savg ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 5798213937Savg ZFS_EXIT(zfsvfs); 5799248084Sattilio return (error ? zfs_vm_pagerret_error : zfs_vm_pagerret_ok); 5800213937Savg} 5801213937Savg 5802213937Savgstatic int 5803213937Savgzfs_freebsd_getpages(ap) 5804213937Savg struct vop_getpages_args /* { 5805213937Savg struct vnode *a_vp; 5806213937Savg vm_page_t *a_m; 5807213937Savg int a_count; 5808213937Savg int a_reqpage; 5809213937Savg vm_ooffset_t a_offset; 5810213937Savg } */ *ap; 5811213937Savg{ 5812213937Savg 5813213937Savg return (zfs_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_reqpage)); 5814213937Savg} 5815213937Savg 5816213937Savgstatic int 5817260786Savgzfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags, 5818260786Savg int *rtvals) 5819260786Savg{ 5820260786Savg znode_t *zp = VTOZ(vp); 5821260786Savg zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5822260786Savg rl_t *rl; 5823260786Savg dmu_tx_t *tx; 5824260786Savg struct sf_buf *sf; 5825260786Savg vm_object_t object; 5826260786Savg vm_page_t m; 5827260786Savg caddr_t va; 5828260786Savg size_t tocopy; 5829260786Savg size_t lo_len; 5830260786Savg vm_ooffset_t lo_off; 5831260786Savg vm_ooffset_t off; 5832260786Savg uint_t blksz; 5833260786Savg int ncount; 5834260786Savg int pcount; 5835260786Savg int err; 5836260786Savg int i; 5837260786Savg 5838260786Savg ZFS_ENTER(zfsvfs); 5839260786Savg ZFS_VERIFY_ZP(zp); 5840260786Savg 5841260786Savg object = vp->v_object; 5842260786Savg pcount = btoc(len); 5843260786Savg ncount = pcount; 5844260786Savg 5845260786Savg KASSERT(ma[0]->object == object, ("mismatching object")); 5846260786Savg KASSERT(len > 0 && (len & PAGE_MASK) == 0, ("unexpected length")); 5847260786Savg 5848260786Savg for (i = 0; i < pcount; i++) 5849260786Savg rtvals[i] = zfs_vm_pagerret_error; 5850260786Savg 5851260786Savg off = IDX_TO_OFF(ma[0]->pindex); 5852260786Savg blksz = zp->z_blksz; 5853260786Savg lo_off = rounddown(off, blksz); 5854260786Savg lo_len = roundup(len + (off - lo_off), blksz); 5855260786Savg rl = zfs_range_lock(zp, lo_off, lo_len, RL_WRITER); 5856260786Savg 5857260786Savg zfs_vmobject_wlock(object); 5858260786Savg if (len + off > object->un_pager.vnp.vnp_size) { 5859260786Savg if (object->un_pager.vnp.vnp_size > off) { 5860260786Savg int pgoff; 5861260786Savg 5862260786Savg len = object->un_pager.vnp.vnp_size - off; 5863260786Savg ncount = btoc(len); 5864260786Savg if ((pgoff = (int)len & PAGE_MASK) != 0) { 5865260786Savg /* 5866260786Savg * If the object is locked and the following 5867260786Savg * conditions hold, then the page's dirty 5868260786Savg * field cannot be concurrently changed by a 5869260786Savg * pmap operation. 5870260786Savg */ 5871260786Savg m = ma[ncount - 1]; 5872260786Savg vm_page_assert_sbusied(m); 5873260786Savg KASSERT(!pmap_page_is_write_mapped(m), 5874260786Savg ("zfs_putpages: page %p is not read-only", m)); 5875260786Savg vm_page_clear_dirty(m, pgoff, PAGE_SIZE - 5876260786Savg pgoff); 5877260786Savg } 5878260786Savg } else { 5879260786Savg len = 0; 5880260786Savg ncount = 0; 5881260786Savg } 5882260786Savg if (ncount < pcount) { 5883260786Savg for (i = ncount; i < pcount; i++) { 5884260786Savg rtvals[i] = zfs_vm_pagerret_bad; 5885260786Savg } 5886260786Savg } 5887260786Savg } 5888260786Savg zfs_vmobject_wunlock(object); 5889260786Savg 5890260786Savg if (ncount == 0) 5891260786Savg goto out; 5892260786Savg 5893260786Savg if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 5894260786Savg zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 5895260786Savg goto out; 5896260786Savg } 5897260786Savg 5898260786Savgtop: 5899260786Savg tx = dmu_tx_create(zfsvfs->z_os); 5900260786Savg dmu_tx_hold_write(tx, zp->z_id, off, len); 5901260786Savg 5902260786Savg dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 5903260786Savg zfs_sa_upgrade_txholds(tx, zp); 5904260786Savg err = dmu_tx_assign(tx, TXG_NOWAIT); 5905260786Savg if (err != 0) { 5906260786Savg if (err == ERESTART) { 5907260786Savg dmu_tx_wait(tx); 5908260786Savg dmu_tx_abort(tx); 5909260786Savg goto top; 5910260786Savg } 5911260786Savg dmu_tx_abort(tx); 5912260786Savg goto out; 5913260786Savg } 5914260786Savg 5915260786Savg if (zp->z_blksz < PAGE_SIZE) { 5916260786Savg i = 0; 5917260786Savg for (i = 0; len > 0; off += tocopy, len -= tocopy, i++) { 5918260786Savg tocopy = len > PAGE_SIZE ? PAGE_SIZE : len; 5919260786Savg va = zfs_map_page(ma[i], &sf); 5920260786Savg dmu_write(zfsvfs->z_os, zp->z_id, off, tocopy, va, tx); 5921260786Savg zfs_unmap_page(sf); 5922260786Savg } 5923260786Savg } else { 5924260786Savg err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, ma, tx); 5925260786Savg } 5926260786Savg 5927260786Savg if (err == 0) { 5928260786Savg uint64_t mtime[2], ctime[2]; 5929260786Savg sa_bulk_attr_t bulk[3]; 5930260786Savg int count = 0; 5931260786Savg 5932260786Savg SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 5933260786Savg &mtime, 16); 5934260786Savg SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 5935260786Savg &ctime, 16); 5936260786Savg SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 5937260786Savg &zp->z_pflags, 8); 5938260786Savg zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 5939260786Savg B_TRUE); 5940260786Savg zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0); 5941260786Savg 5942260786Savg zfs_vmobject_wlock(object); 5943260786Savg for (i = 0; i < ncount; i++) { 5944260786Savg rtvals[i] = zfs_vm_pagerret_ok; 5945260786Savg vm_page_undirty(ma[i]); 5946260786Savg } 5947260786Savg zfs_vmobject_wunlock(object); 5948260786Savg PCPU_INC(cnt.v_vnodeout); 5949260786Savg PCPU_ADD(cnt.v_vnodepgsout, ncount); 5950260786Savg } 5951260786Savg dmu_tx_commit(tx); 5952260786Savg 5953260786Savgout: 5954260786Savg zfs_range_unlock(rl); 5955260786Savg if ((flags & (zfs_vm_pagerput_sync | zfs_vm_pagerput_inval)) != 0 || 5956260786Savg zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 5957260786Savg zil_commit(zfsvfs->z_log, zp->z_id); 5958260786Savg ZFS_EXIT(zfsvfs); 5959260786Savg return (rtvals[0]); 5960260786Savg} 5961260786Savg 5962260786Savgint 5963260786Savgzfs_freebsd_putpages(ap) 5964260786Savg struct vop_putpages_args /* { 5965260786Savg struct vnode *a_vp; 5966260786Savg vm_page_t *a_m; 5967260786Savg int a_count; 5968260786Savg int a_sync; 5969260786Savg int *a_rtvals; 5970260786Savg vm_ooffset_t a_offset; 5971260786Savg } */ *ap; 5972260786Savg{ 5973260786Savg 5974260786Savg return (zfs_putpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_sync, 5975260786Savg ap->a_rtvals)); 5976260786Savg} 5977260786Savg 5978260786Savgstatic int 5979243518Savgzfs_freebsd_bmap(ap) 5980243518Savg struct vop_bmap_args /* { 5981243518Savg struct vnode *a_vp; 5982243518Savg daddr_t a_bn; 5983243518Savg struct bufobj **a_bop; 5984243518Savg daddr_t *a_bnp; 5985243518Savg int *a_runp; 5986243518Savg int *a_runb; 5987243518Savg } */ *ap; 5988243518Savg{ 5989243518Savg 5990243518Savg if (ap->a_bop != NULL) 5991243518Savg *ap->a_bop = &ap->a_vp->v_bufobj; 5992243518Savg if (ap->a_bnp != NULL) 5993243518Savg *ap->a_bnp = ap->a_bn; 5994243518Savg if (ap->a_runp != NULL) 5995243518Savg *ap->a_runp = 0; 5996243518Savg if (ap->a_runb != NULL) 5997243518Savg *ap->a_runb = 0; 5998243518Savg 5999243518Savg return (0); 6000243518Savg} 6001243518Savg 6002243518Savgstatic int 6003168962Spjdzfs_freebsd_open(ap) 6004168962Spjd struct vop_open_args /* { 6005168962Spjd struct vnode *a_vp; 6006168962Spjd int a_mode; 6007168962Spjd struct ucred *a_cred; 6008168962Spjd struct thread *a_td; 6009168962Spjd } */ *ap; 6010168962Spjd{ 6011168962Spjd vnode_t *vp = ap->a_vp; 6012168962Spjd znode_t *zp = VTOZ(vp); 6013168962Spjd int error; 6014168962Spjd 6015185029Spjd error = zfs_open(&vp, ap->a_mode, ap->a_cred, NULL); 6016168962Spjd if (error == 0) 6017219089Spjd vnode_create_vobject(vp, zp->z_size, ap->a_td); 6018168962Spjd return (error); 6019168962Spjd} 6020168962Spjd 6021168962Spjdstatic int 6022168962Spjdzfs_freebsd_close(ap) 6023168962Spjd struct vop_close_args /* { 6024168962Spjd struct vnode *a_vp; 6025168962Spjd int a_fflag; 6026168962Spjd struct ucred *a_cred; 6027168962Spjd struct thread *a_td; 6028168962Spjd } */ *ap; 6029168962Spjd{ 6030168962Spjd 6031242566Savg return (zfs_close(ap->a_vp, ap->a_fflag, 1, 0, ap->a_cred, NULL)); 6032168962Spjd} 6033168962Spjd 6034168962Spjdstatic int 6035168962Spjdzfs_freebsd_ioctl(ap) 6036168962Spjd struct vop_ioctl_args /* { 6037168962Spjd struct vnode *a_vp; 6038168962Spjd u_long a_command; 6039168962Spjd caddr_t a_data; 6040168962Spjd int a_fflag; 6041168962Spjd struct ucred *cred; 6042168962Spjd struct thread *td; 6043168962Spjd } */ *ap; 6044168962Spjd{ 6045168962Spjd 6046168978Spjd return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data, 6047185029Spjd ap->a_fflag, ap->a_cred, NULL, NULL)); 6048168962Spjd} 6049168962Spjd 6050168962Spjdstatic int 6051168962Spjdzfs_freebsd_read(ap) 6052168962Spjd struct vop_read_args /* { 6053168962Spjd struct vnode *a_vp; 6054168962Spjd struct uio *a_uio; 6055168962Spjd int a_ioflag; 6056168962Spjd struct ucred *a_cred; 6057168962Spjd } */ *ap; 6058168962Spjd{ 6059168962Spjd 6060213673Spjd return (zfs_read(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag), 6061213673Spjd ap->a_cred, NULL)); 6062168962Spjd} 6063168962Spjd 6064168962Spjdstatic int 6065168962Spjdzfs_freebsd_write(ap) 6066168962Spjd struct vop_write_args /* { 6067168962Spjd struct vnode *a_vp; 6068168962Spjd struct uio *a_uio; 6069168962Spjd int a_ioflag; 6070168962Spjd struct ucred *a_cred; 6071168962Spjd } */ *ap; 6072168962Spjd{ 6073168962Spjd 6074213673Spjd return (zfs_write(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag), 6075213673Spjd ap->a_cred, NULL)); 6076168962Spjd} 6077168962Spjd 6078168962Spjdstatic int 6079168962Spjdzfs_freebsd_access(ap) 6080168962Spjd struct vop_access_args /* { 6081168962Spjd struct vnode *a_vp; 6082192689Strasz accmode_t a_accmode; 6083168962Spjd struct ucred *a_cred; 6084168962Spjd struct thread *a_td; 6085168962Spjd } */ *ap; 6086168962Spjd{ 6087212002Sjh vnode_t *vp = ap->a_vp; 6088212002Sjh znode_t *zp = VTOZ(vp); 6089198703Spjd accmode_t accmode; 6090198703Spjd int error = 0; 6091168962Spjd 6092185172Spjd /* 6093198703Spjd * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND, 6094185172Spjd */ 6095198703Spjd accmode = ap->a_accmode & (VREAD|VWRITE|VEXEC|VAPPEND); 6096198703Spjd if (accmode != 0) 6097198703Spjd error = zfs_access(ap->a_vp, accmode, 0, ap->a_cred, NULL); 6098185172Spjd 6099198703Spjd /* 6100198703Spjd * VADMIN has to be handled by vaccess(). 6101198703Spjd */ 6102198703Spjd if (error == 0) { 6103198703Spjd accmode = ap->a_accmode & ~(VREAD|VWRITE|VEXEC|VAPPEND); 6104198703Spjd if (accmode != 0) { 6105219089Spjd error = vaccess(vp->v_type, zp->z_mode, zp->z_uid, 6106219089Spjd zp->z_gid, accmode, ap->a_cred, NULL); 6107198703Spjd } 6108185172Spjd } 6109185172Spjd 6110212002Sjh /* 6111212002Sjh * For VEXEC, ensure that at least one execute bit is set for 6112212002Sjh * non-directories. 6113212002Sjh */ 6114212002Sjh if (error == 0 && (ap->a_accmode & VEXEC) != 0 && vp->v_type != VDIR && 6115219089Spjd (zp->z_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) { 6116212002Sjh error = EACCES; 6117219089Spjd } 6118212002Sjh 6119198703Spjd return (error); 6120168962Spjd} 6121168962Spjd 6122168962Spjdstatic int 6123168962Spjdzfs_freebsd_lookup(ap) 6124168962Spjd struct vop_lookup_args /* { 6125168962Spjd struct vnode *a_dvp; 6126168962Spjd struct vnode **a_vpp; 6127168962Spjd struct componentname *a_cnp; 6128168962Spjd } */ *ap; 6129168962Spjd{ 6130168962Spjd struct componentname *cnp = ap->a_cnp; 6131168962Spjd char nm[NAME_MAX + 1]; 6132168962Spjd 6133168962Spjd ASSERT(cnp->cn_namelen < sizeof(nm)); 6134168962Spjd strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof(nm))); 6135168962Spjd 6136168962Spjd return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop, 6137185029Spjd cnp->cn_cred, cnp->cn_thread, 0)); 6138168962Spjd} 6139168962Spjd 6140168962Spjdstatic int 6141168962Spjdzfs_freebsd_create(ap) 6142168962Spjd struct vop_create_args /* { 6143168962Spjd struct vnode *a_dvp; 6144168962Spjd struct vnode **a_vpp; 6145168962Spjd struct componentname *a_cnp; 6146168962Spjd struct vattr *a_vap; 6147168962Spjd } */ *ap; 6148168962Spjd{ 6149168962Spjd struct componentname *cnp = ap->a_cnp; 6150168962Spjd vattr_t *vap = ap->a_vap; 6151168962Spjd int mode; 6152168962Spjd 6153168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 6154168962Spjd 6155168962Spjd vattr_init_mask(vap); 6156168962Spjd mode = vap->va_mode & ALLPERMS; 6157168962Spjd 6158168962Spjd return (zfs_create(ap->a_dvp, cnp->cn_nameptr, vap, !EXCL, mode, 6159185029Spjd ap->a_vpp, cnp->cn_cred, cnp->cn_thread)); 6160168962Spjd} 6161168962Spjd 6162168962Spjdstatic int 6163168962Spjdzfs_freebsd_remove(ap) 6164168962Spjd struct vop_remove_args /* { 6165168962Spjd struct vnode *a_dvp; 6166168962Spjd struct vnode *a_vp; 6167168962Spjd struct componentname *a_cnp; 6168168962Spjd } */ *ap; 6169168962Spjd{ 6170168962Spjd 6171168962Spjd ASSERT(ap->a_cnp->cn_flags & SAVENAME); 6172168962Spjd 6173168962Spjd return (zfs_remove(ap->a_dvp, ap->a_cnp->cn_nameptr, 6174185029Spjd ap->a_cnp->cn_cred, NULL, 0)); 6175168962Spjd} 6176168962Spjd 6177168962Spjdstatic int 6178168962Spjdzfs_freebsd_mkdir(ap) 6179168962Spjd struct vop_mkdir_args /* { 6180168962Spjd struct vnode *a_dvp; 6181168962Spjd struct vnode **a_vpp; 6182168962Spjd struct componentname *a_cnp; 6183168962Spjd struct vattr *a_vap; 6184168962Spjd } */ *ap; 6185168962Spjd{ 6186168962Spjd vattr_t *vap = ap->a_vap; 6187168962Spjd 6188168962Spjd ASSERT(ap->a_cnp->cn_flags & SAVENAME); 6189168962Spjd 6190168962Spjd vattr_init_mask(vap); 6191168962Spjd 6192168962Spjd return (zfs_mkdir(ap->a_dvp, ap->a_cnp->cn_nameptr, vap, ap->a_vpp, 6193185029Spjd ap->a_cnp->cn_cred, NULL, 0, NULL)); 6194168962Spjd} 6195168962Spjd 6196168962Spjdstatic int 6197168962Spjdzfs_freebsd_rmdir(ap) 6198168962Spjd struct vop_rmdir_args /* { 6199168962Spjd struct vnode *a_dvp; 6200168962Spjd struct vnode *a_vp; 6201168962Spjd struct componentname *a_cnp; 6202168962Spjd } */ *ap; 6203168962Spjd{ 6204168962Spjd struct componentname *cnp = ap->a_cnp; 6205168962Spjd 6206168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 6207168962Spjd 6208185029Spjd return (zfs_rmdir(ap->a_dvp, cnp->cn_nameptr, NULL, cnp->cn_cred, NULL, 0)); 6209168962Spjd} 6210168962Spjd 6211168962Spjdstatic int 6212168962Spjdzfs_freebsd_readdir(ap) 6213168962Spjd struct vop_readdir_args /* { 6214168962Spjd struct vnode *a_vp; 6215168962Spjd struct uio *a_uio; 6216168962Spjd struct ucred *a_cred; 6217168962Spjd int *a_eofflag; 6218168962Spjd int *a_ncookies; 6219168962Spjd u_long **a_cookies; 6220168962Spjd } */ *ap; 6221168962Spjd{ 6222168962Spjd 6223168962Spjd return (zfs_readdir(ap->a_vp, ap->a_uio, ap->a_cred, ap->a_eofflag, 6224168962Spjd ap->a_ncookies, ap->a_cookies)); 6225168962Spjd} 6226168962Spjd 6227168962Spjdstatic int 6228168962Spjdzfs_freebsd_fsync(ap) 6229168962Spjd struct vop_fsync_args /* { 6230168962Spjd struct vnode *a_vp; 6231168962Spjd int a_waitfor; 6232168962Spjd struct thread *a_td; 6233168962Spjd } */ *ap; 6234168962Spjd{ 6235168962Spjd 6236168962Spjd vop_stdfsync(ap); 6237185029Spjd return (zfs_fsync(ap->a_vp, 0, ap->a_td->td_ucred, NULL)); 6238168962Spjd} 6239168962Spjd 6240168962Spjdstatic int 6241168962Spjdzfs_freebsd_getattr(ap) 6242168962Spjd struct vop_getattr_args /* { 6243168962Spjd struct vnode *a_vp; 6244168962Spjd struct vattr *a_vap; 6245168962Spjd struct ucred *a_cred; 6246168962Spjd } */ *ap; 6247168962Spjd{ 6248185029Spjd vattr_t *vap = ap->a_vap; 6249185029Spjd xvattr_t xvap; 6250185029Spjd u_long fflags = 0; 6251185029Spjd int error; 6252168962Spjd 6253185029Spjd xva_init(&xvap); 6254185029Spjd xvap.xva_vattr = *vap; 6255185029Spjd xvap.xva_vattr.va_mask |= AT_XVATTR; 6256185029Spjd 6257185029Spjd /* Convert chflags into ZFS-type flags. */ 6258185029Spjd /* XXX: what about SF_SETTABLE?. */ 6259185029Spjd XVA_SET_REQ(&xvap, XAT_IMMUTABLE); 6260185029Spjd XVA_SET_REQ(&xvap, XAT_APPENDONLY); 6261185029Spjd XVA_SET_REQ(&xvap, XAT_NOUNLINK); 6262185029Spjd XVA_SET_REQ(&xvap, XAT_NODUMP); 6263254627Sken XVA_SET_REQ(&xvap, XAT_READONLY); 6264254627Sken XVA_SET_REQ(&xvap, XAT_ARCHIVE); 6265254627Sken XVA_SET_REQ(&xvap, XAT_SYSTEM); 6266254627Sken XVA_SET_REQ(&xvap, XAT_HIDDEN); 6267254627Sken XVA_SET_REQ(&xvap, XAT_REPARSE); 6268254627Sken XVA_SET_REQ(&xvap, XAT_OFFLINE); 6269254627Sken XVA_SET_REQ(&xvap, XAT_SPARSE); 6270254627Sken 6271185029Spjd error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred, NULL); 6272185029Spjd if (error != 0) 6273185029Spjd return (error); 6274185029Spjd 6275185029Spjd /* Convert ZFS xattr into chflags. */ 6276185029Spjd#define FLAG_CHECK(fflag, xflag, xfield) do { \ 6277185029Spjd if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0) \ 6278185029Spjd fflags |= (fflag); \ 6279185029Spjd} while (0) 6280185029Spjd FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE, 6281185029Spjd xvap.xva_xoptattrs.xoa_immutable); 6282185029Spjd FLAG_CHECK(SF_APPEND, XAT_APPENDONLY, 6283185029Spjd xvap.xva_xoptattrs.xoa_appendonly); 6284185029Spjd FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK, 6285185029Spjd xvap.xva_xoptattrs.xoa_nounlink); 6286254627Sken FLAG_CHECK(UF_ARCHIVE, XAT_ARCHIVE, 6287254627Sken xvap.xva_xoptattrs.xoa_archive); 6288185029Spjd FLAG_CHECK(UF_NODUMP, XAT_NODUMP, 6289185029Spjd xvap.xva_xoptattrs.xoa_nodump); 6290254627Sken FLAG_CHECK(UF_READONLY, XAT_READONLY, 6291254627Sken xvap.xva_xoptattrs.xoa_readonly); 6292254627Sken FLAG_CHECK(UF_SYSTEM, XAT_SYSTEM, 6293254627Sken xvap.xva_xoptattrs.xoa_system); 6294254627Sken FLAG_CHECK(UF_HIDDEN, XAT_HIDDEN, 6295254627Sken xvap.xva_xoptattrs.xoa_hidden); 6296254627Sken FLAG_CHECK(UF_REPARSE, XAT_REPARSE, 6297254627Sken xvap.xva_xoptattrs.xoa_reparse); 6298254627Sken FLAG_CHECK(UF_OFFLINE, XAT_OFFLINE, 6299254627Sken xvap.xva_xoptattrs.xoa_offline); 6300254627Sken FLAG_CHECK(UF_SPARSE, XAT_SPARSE, 6301254627Sken xvap.xva_xoptattrs.xoa_sparse); 6302254627Sken 6303185029Spjd#undef FLAG_CHECK 6304185029Spjd *vap = xvap.xva_vattr; 6305185029Spjd vap->va_flags = fflags; 6306185029Spjd return (0); 6307168962Spjd} 6308168962Spjd 6309168962Spjdstatic int 6310168962Spjdzfs_freebsd_setattr(ap) 6311168962Spjd struct vop_setattr_args /* { 6312168962Spjd struct vnode *a_vp; 6313168962Spjd struct vattr *a_vap; 6314168962Spjd struct ucred *a_cred; 6315168962Spjd } */ *ap; 6316168962Spjd{ 6317185172Spjd vnode_t *vp = ap->a_vp; 6318168962Spjd vattr_t *vap = ap->a_vap; 6319185172Spjd cred_t *cred = ap->a_cred; 6320185029Spjd xvattr_t xvap; 6321185029Spjd u_long fflags; 6322185029Spjd uint64_t zflags; 6323168962Spjd 6324168962Spjd vattr_init_mask(vap); 6325170044Spjd vap->va_mask &= ~AT_NOSET; 6326168962Spjd 6327185029Spjd xva_init(&xvap); 6328185029Spjd xvap.xva_vattr = *vap; 6329185029Spjd 6330219089Spjd zflags = VTOZ(vp)->z_pflags; 6331185172Spjd 6332185029Spjd if (vap->va_flags != VNOVAL) { 6333197683Sdelphij zfsvfs_t *zfsvfs = VTOZ(vp)->z_zfsvfs; 6334185172Spjd int error; 6335185172Spjd 6336197683Sdelphij if (zfsvfs->z_use_fuids == B_FALSE) 6337197683Sdelphij return (EOPNOTSUPP); 6338197683Sdelphij 6339185029Spjd fflags = vap->va_flags; 6340254627Sken /* 6341254627Sken * XXX KDM 6342254627Sken * We need to figure out whether it makes sense to allow 6343254627Sken * UF_REPARSE through, since we don't really have other 6344254627Sken * facilities to handle reparse points and zfs_setattr() 6345254627Sken * doesn't currently allow setting that attribute anyway. 6346254627Sken */ 6347254627Sken if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_ARCHIVE| 6348254627Sken UF_NODUMP|UF_SYSTEM|UF_HIDDEN|UF_READONLY|UF_REPARSE| 6349254627Sken UF_OFFLINE|UF_SPARSE)) != 0) 6350185029Spjd return (EOPNOTSUPP); 6351185172Spjd /* 6352185172Spjd * Unprivileged processes are not permitted to unset system 6353185172Spjd * flags, or modify flags if any system flags are set. 6354185172Spjd * Privileged non-jail processes may not modify system flags 6355185172Spjd * if securelevel > 0 and any existing system flags are set. 6356185172Spjd * Privileged jail processes behave like privileged non-jail 6357185172Spjd * processes if the security.jail.chflags_allowed sysctl is 6358185172Spjd * is non-zero; otherwise, they behave like unprivileged 6359185172Spjd * processes. 6360185172Spjd */ 6361197861Spjd if (secpolicy_fs_owner(vp->v_mount, cred) == 0 || 6362197861Spjd priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0) == 0) { 6363185172Spjd if (zflags & 6364185172Spjd (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) { 6365185172Spjd error = securelevel_gt(cred, 0); 6366197861Spjd if (error != 0) 6367185172Spjd return (error); 6368185172Spjd } 6369185172Spjd } else { 6370197861Spjd /* 6371197861Spjd * Callers may only modify the file flags on objects they 6372197861Spjd * have VADMIN rights for. 6373197861Spjd */ 6374197861Spjd if ((error = VOP_ACCESS(vp, VADMIN, cred, curthread)) != 0) 6375197861Spjd return (error); 6376185172Spjd if (zflags & 6377185172Spjd (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) { 6378185172Spjd return (EPERM); 6379185172Spjd } 6380185172Spjd if (fflags & 6381185172Spjd (SF_IMMUTABLE | SF_APPEND | SF_NOUNLINK)) { 6382185172Spjd return (EPERM); 6383185172Spjd } 6384185172Spjd } 6385185029Spjd 6386185029Spjd#define FLAG_CHANGE(fflag, zflag, xflag, xfield) do { \ 6387185029Spjd if (((fflags & (fflag)) && !(zflags & (zflag))) || \ 6388185029Spjd ((zflags & (zflag)) && !(fflags & (fflag)))) { \ 6389185029Spjd XVA_SET_REQ(&xvap, (xflag)); \ 6390185029Spjd (xfield) = ((fflags & (fflag)) != 0); \ 6391185029Spjd } \ 6392185029Spjd} while (0) 6393185029Spjd /* Convert chflags into ZFS-type flags. */ 6394185029Spjd /* XXX: what about SF_SETTABLE?. */ 6395185029Spjd FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE, 6396185029Spjd xvap.xva_xoptattrs.xoa_immutable); 6397185029Spjd FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY, 6398185029Spjd xvap.xva_xoptattrs.xoa_appendonly); 6399185029Spjd FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK, 6400185029Spjd xvap.xva_xoptattrs.xoa_nounlink); 6401254627Sken FLAG_CHANGE(UF_ARCHIVE, ZFS_ARCHIVE, XAT_ARCHIVE, 6402254627Sken xvap.xva_xoptattrs.xoa_archive); 6403185029Spjd FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP, 6404185172Spjd xvap.xva_xoptattrs.xoa_nodump); 6405254627Sken FLAG_CHANGE(UF_READONLY, ZFS_READONLY, XAT_READONLY, 6406254627Sken xvap.xva_xoptattrs.xoa_readonly); 6407254627Sken FLAG_CHANGE(UF_SYSTEM, ZFS_SYSTEM, XAT_SYSTEM, 6408254627Sken xvap.xva_xoptattrs.xoa_system); 6409254627Sken FLAG_CHANGE(UF_HIDDEN, ZFS_HIDDEN, XAT_HIDDEN, 6410254627Sken xvap.xva_xoptattrs.xoa_hidden); 6411254627Sken FLAG_CHANGE(UF_REPARSE, ZFS_REPARSE, XAT_REPARSE, 6412254627Sken xvap.xva_xoptattrs.xoa_hidden); 6413254627Sken FLAG_CHANGE(UF_OFFLINE, ZFS_OFFLINE, XAT_OFFLINE, 6414254627Sken xvap.xva_xoptattrs.xoa_offline); 6415254627Sken FLAG_CHANGE(UF_SPARSE, ZFS_SPARSE, XAT_SPARSE, 6416254627Sken xvap.xva_xoptattrs.xoa_sparse); 6417185029Spjd#undef FLAG_CHANGE 6418185029Spjd } 6419185172Spjd return (zfs_setattr(vp, (vattr_t *)&xvap, 0, cred, NULL)); 6420168962Spjd} 6421168962Spjd 6422168962Spjdstatic int 6423168962Spjdzfs_freebsd_rename(ap) 6424168962Spjd struct vop_rename_args /* { 6425168962Spjd struct vnode *a_fdvp; 6426168962Spjd struct vnode *a_fvp; 6427168962Spjd struct componentname *a_fcnp; 6428168962Spjd struct vnode *a_tdvp; 6429168962Spjd struct vnode *a_tvp; 6430168962Spjd struct componentname *a_tcnp; 6431168962Spjd } */ *ap; 6432168962Spjd{ 6433168962Spjd vnode_t *fdvp = ap->a_fdvp; 6434168962Spjd vnode_t *fvp = ap->a_fvp; 6435168962Spjd vnode_t *tdvp = ap->a_tdvp; 6436168962Spjd vnode_t *tvp = ap->a_tvp; 6437168962Spjd int error; 6438168962Spjd 6439192237Skmacy ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART)); 6440192237Skmacy ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART)); 6441168962Spjd 6442255748Sdavide /* 6443255748Sdavide * Check for cross-device rename. 6444255748Sdavide */ 6445255748Sdavide if ((fdvp->v_mount != tdvp->v_mount) || 6446255748Sdavide (tvp && (fdvp->v_mount != tvp->v_mount))) 6447255748Sdavide error = EXDEV; 6448255748Sdavide else 6449254982Sdelphij error = zfs_rename(fdvp, ap->a_fcnp->cn_nameptr, tdvp, 6450254982Sdelphij ap->a_tcnp->cn_nameptr, ap->a_fcnp->cn_cred, NULL, 0); 6451168962Spjd if (tdvp == tvp) 6452168962Spjd VN_RELE(tdvp); 6453168962Spjd else 6454168962Spjd VN_URELE(tdvp); 6455168962Spjd if (tvp) 6456168962Spjd VN_URELE(tvp); 6457168962Spjd VN_RELE(fdvp); 6458168962Spjd VN_RELE(fvp); 6459168962Spjd 6460168962Spjd return (error); 6461168962Spjd} 6462168962Spjd 6463168962Spjdstatic int 6464168962Spjdzfs_freebsd_symlink(ap) 6465168962Spjd struct vop_symlink_args /* { 6466168962Spjd struct vnode *a_dvp; 6467168962Spjd struct vnode **a_vpp; 6468168962Spjd struct componentname *a_cnp; 6469168962Spjd struct vattr *a_vap; 6470168962Spjd char *a_target; 6471168962Spjd } */ *ap; 6472168962Spjd{ 6473168962Spjd struct componentname *cnp = ap->a_cnp; 6474168962Spjd vattr_t *vap = ap->a_vap; 6475168962Spjd 6476168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 6477168962Spjd 6478168962Spjd vap->va_type = VLNK; /* FreeBSD: Syscall only sets va_mode. */ 6479168962Spjd vattr_init_mask(vap); 6480168962Spjd 6481168962Spjd return (zfs_symlink(ap->a_dvp, ap->a_vpp, cnp->cn_nameptr, vap, 6482168962Spjd ap->a_target, cnp->cn_cred, cnp->cn_thread)); 6483168962Spjd} 6484168962Spjd 6485168962Spjdstatic int 6486168962Spjdzfs_freebsd_readlink(ap) 6487168962Spjd struct vop_readlink_args /* { 6488168962Spjd struct vnode *a_vp; 6489168962Spjd struct uio *a_uio; 6490168962Spjd struct ucred *a_cred; 6491168962Spjd } */ *ap; 6492168962Spjd{ 6493168962Spjd 6494185029Spjd return (zfs_readlink(ap->a_vp, ap->a_uio, ap->a_cred, NULL)); 6495168962Spjd} 6496168962Spjd 6497168962Spjdstatic int 6498168962Spjdzfs_freebsd_link(ap) 6499168962Spjd struct vop_link_args /* { 6500168962Spjd struct vnode *a_tdvp; 6501168962Spjd struct vnode *a_vp; 6502168962Spjd struct componentname *a_cnp; 6503168962Spjd } */ *ap; 6504168962Spjd{ 6505168962Spjd struct componentname *cnp = ap->a_cnp; 6506254982Sdelphij vnode_t *vp = ap->a_vp; 6507254982Sdelphij vnode_t *tdvp = ap->a_tdvp; 6508168962Spjd 6509254982Sdelphij if (tdvp->v_mount != vp->v_mount) 6510254982Sdelphij return (EXDEV); 6511254982Sdelphij 6512168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 6513168962Spjd 6514254982Sdelphij return (zfs_link(tdvp, vp, cnp->cn_nameptr, cnp->cn_cred, NULL, 0)); 6515168962Spjd} 6516168962Spjd 6517168962Spjdstatic int 6518168962Spjdzfs_freebsd_inactive(ap) 6519169170Spjd struct vop_inactive_args /* { 6520169170Spjd struct vnode *a_vp; 6521169170Spjd struct thread *a_td; 6522169170Spjd } */ *ap; 6523168962Spjd{ 6524168962Spjd vnode_t *vp = ap->a_vp; 6525168962Spjd 6526185029Spjd zfs_inactive(vp, ap->a_td->td_ucred, NULL); 6527168962Spjd return (0); 6528168962Spjd} 6529168962Spjd 6530168962Spjdstatic int 6531168962Spjdzfs_freebsd_reclaim(ap) 6532168962Spjd struct vop_reclaim_args /* { 6533168962Spjd struct vnode *a_vp; 6534168962Spjd struct thread *a_td; 6535168962Spjd } */ *ap; 6536168962Spjd{ 6537169170Spjd vnode_t *vp = ap->a_vp; 6538168962Spjd znode_t *zp = VTOZ(vp); 6539197133Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 6540168962Spjd 6541169025Spjd ASSERT(zp != NULL); 6542169025Spjd 6543243520Savg /* Destroy the vm object and flush associated pages. */ 6544243520Savg vnode_destroy_vobject(vp); 6545243520Savg 6546168962Spjd /* 6547243520Savg * z_teardown_inactive_lock protects from a race with 6548243520Savg * zfs_znode_dmu_fini in zfsvfs_teardown during 6549243520Savg * force unmount. 6550168962Spjd */ 6551243520Savg rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 6552243520Savg if (zp->z_sa_hdl == NULL) 6553196301Spjd zfs_znode_free(zp); 6554243520Savg else 6555243520Savg zfs_zinactive(zp); 6556243520Savg rw_exit(&zfsvfs->z_teardown_inactive_lock); 6557185029Spjd 6558168962Spjd vp->v_data = NULL; 6559168962Spjd return (0); 6560168962Spjd} 6561168962Spjd 6562168962Spjdstatic int 6563168962Spjdzfs_freebsd_fid(ap) 6564168962Spjd struct vop_fid_args /* { 6565168962Spjd struct vnode *a_vp; 6566168962Spjd struct fid *a_fid; 6567168962Spjd } */ *ap; 6568168962Spjd{ 6569168962Spjd 6570185029Spjd return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL)); 6571168962Spjd} 6572168962Spjd 6573168962Spjdstatic int 6574168962Spjdzfs_freebsd_pathconf(ap) 6575168962Spjd struct vop_pathconf_args /* { 6576168962Spjd struct vnode *a_vp; 6577168962Spjd int a_name; 6578168962Spjd register_t *a_retval; 6579168962Spjd } */ *ap; 6580168962Spjd{ 6581168962Spjd ulong_t val; 6582168962Spjd int error; 6583168962Spjd 6584185029Spjd error = zfs_pathconf(ap->a_vp, ap->a_name, &val, curthread->td_ucred, NULL); 6585168962Spjd if (error == 0) 6586168962Spjd *ap->a_retval = val; 6587168962Spjd else if (error == EOPNOTSUPP) 6588168962Spjd error = vop_stdpathconf(ap); 6589168962Spjd return (error); 6590168962Spjd} 6591168962Spjd 6592196949Straszstatic int 6593196949Straszzfs_freebsd_fifo_pathconf(ap) 6594196949Strasz struct vop_pathconf_args /* { 6595196949Strasz struct vnode *a_vp; 6596196949Strasz int a_name; 6597196949Strasz register_t *a_retval; 6598196949Strasz } */ *ap; 6599196949Strasz{ 6600196949Strasz 6601196949Strasz switch (ap->a_name) { 6602196949Strasz case _PC_ACL_EXTENDED: 6603196949Strasz case _PC_ACL_NFS4: 6604196949Strasz case _PC_ACL_PATH_MAX: 6605196949Strasz case _PC_MAC_PRESENT: 6606196949Strasz return (zfs_freebsd_pathconf(ap)); 6607196949Strasz default: 6608196949Strasz return (fifo_specops.vop_pathconf(ap)); 6609196949Strasz } 6610196949Strasz} 6611196949Strasz 6612185029Spjd/* 6613185029Spjd * FreeBSD's extended attributes namespace defines file name prefix for ZFS' 6614185029Spjd * extended attribute name: 6615185029Spjd * 6616185029Spjd * NAMESPACE PREFIX 6617185029Spjd * system freebsd:system: 6618185029Spjd * user (none, can be used to access ZFS fsattr(5) attributes 6619185029Spjd * created on Solaris) 6620185029Spjd */ 6621185029Spjdstatic int 6622185029Spjdzfs_create_attrname(int attrnamespace, const char *name, char *attrname, 6623185029Spjd size_t size) 6624185029Spjd{ 6625185029Spjd const char *namespace, *prefix, *suffix; 6626185029Spjd 6627185029Spjd /* We don't allow '/' character in attribute name. */ 6628185029Spjd if (strchr(name, '/') != NULL) 6629185029Spjd return (EINVAL); 6630185029Spjd /* We don't allow attribute names that start with "freebsd:" string. */ 6631185029Spjd if (strncmp(name, "freebsd:", 8) == 0) 6632185029Spjd return (EINVAL); 6633185029Spjd 6634185029Spjd bzero(attrname, size); 6635185029Spjd 6636185029Spjd switch (attrnamespace) { 6637185029Spjd case EXTATTR_NAMESPACE_USER: 6638185029Spjd#if 0 6639185029Spjd prefix = "freebsd:"; 6640185029Spjd namespace = EXTATTR_NAMESPACE_USER_STRING; 6641185029Spjd suffix = ":"; 6642185029Spjd#else 6643185029Spjd /* 6644185029Spjd * This is the default namespace by which we can access all 6645185029Spjd * attributes created on Solaris. 6646185029Spjd */ 6647185029Spjd prefix = namespace = suffix = ""; 6648185029Spjd#endif 6649185029Spjd break; 6650185029Spjd case EXTATTR_NAMESPACE_SYSTEM: 6651185029Spjd prefix = "freebsd:"; 6652185029Spjd namespace = EXTATTR_NAMESPACE_SYSTEM_STRING; 6653185029Spjd suffix = ":"; 6654185029Spjd break; 6655185029Spjd case EXTATTR_NAMESPACE_EMPTY: 6656185029Spjd default: 6657185029Spjd return (EINVAL); 6658185029Spjd } 6659185029Spjd if (snprintf(attrname, size, "%s%s%s%s", prefix, namespace, suffix, 6660185029Spjd name) >= size) { 6661185029Spjd return (ENAMETOOLONG); 6662185029Spjd } 6663185029Spjd return (0); 6664185029Spjd} 6665185029Spjd 6666185029Spjd/* 6667185029Spjd * Vnode operating to retrieve a named extended attribute. 6668185029Spjd */ 6669185029Spjdstatic int 6670185029Spjdzfs_getextattr(struct vop_getextattr_args *ap) 6671185029Spjd/* 6672185029Spjdvop_getextattr { 6673185029Spjd IN struct vnode *a_vp; 6674185029Spjd IN int a_attrnamespace; 6675185029Spjd IN const char *a_name; 6676185029Spjd INOUT struct uio *a_uio; 6677185029Spjd OUT size_t *a_size; 6678185029Spjd IN struct ucred *a_cred; 6679185029Spjd IN struct thread *a_td; 6680185029Spjd}; 6681185029Spjd*/ 6682185029Spjd{ 6683185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 6684185029Spjd struct thread *td = ap->a_td; 6685185029Spjd struct nameidata nd; 6686185029Spjd char attrname[255]; 6687185029Spjd struct vattr va; 6688185029Spjd vnode_t *xvp = NULL, *vp; 6689185029Spjd int error, flags; 6690185029Spjd 6691195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 6692195785Strasz ap->a_cred, ap->a_td, VREAD); 6693195785Strasz if (error != 0) 6694195785Strasz return (error); 6695195785Strasz 6696185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 6697185029Spjd sizeof(attrname)); 6698185029Spjd if (error != 0) 6699185029Spjd return (error); 6700185029Spjd 6701185029Spjd ZFS_ENTER(zfsvfs); 6702185029Spjd 6703185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 6704185029Spjd LOOKUP_XATTR); 6705185029Spjd if (error != 0) { 6706185029Spjd ZFS_EXIT(zfsvfs); 6707185029Spjd return (error); 6708185029Spjd } 6709185029Spjd 6710185029Spjd flags = FREAD; 6711241896Skib NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, 6712185029Spjd xvp, td); 6713194586Skib error = vn_open_cred(&nd, &flags, 0, 0, ap->a_cred, NULL); 6714185029Spjd vp = nd.ni_vp; 6715185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 6716185029Spjd if (error != 0) { 6717196303Spjd ZFS_EXIT(zfsvfs); 6718195785Strasz if (error == ENOENT) 6719195785Strasz error = ENOATTR; 6720185029Spjd return (error); 6721185029Spjd } 6722185029Spjd 6723185029Spjd if (ap->a_size != NULL) { 6724185029Spjd error = VOP_GETATTR(vp, &va, ap->a_cred); 6725185029Spjd if (error == 0) 6726185029Spjd *ap->a_size = (size_t)va.va_size; 6727185029Spjd } else if (ap->a_uio != NULL) 6728224605Smm error = VOP_READ(vp, ap->a_uio, IO_UNIT, ap->a_cred); 6729185029Spjd 6730185029Spjd VOP_UNLOCK(vp, 0); 6731185029Spjd vn_close(vp, flags, ap->a_cred, td); 6732185029Spjd ZFS_EXIT(zfsvfs); 6733185029Spjd 6734185029Spjd return (error); 6735185029Spjd} 6736185029Spjd 6737185029Spjd/* 6738185029Spjd * Vnode operation to remove a named attribute. 6739185029Spjd */ 6740185029Spjdint 6741185029Spjdzfs_deleteextattr(struct vop_deleteextattr_args *ap) 6742185029Spjd/* 6743185029Spjdvop_deleteextattr { 6744185029Spjd IN struct vnode *a_vp; 6745185029Spjd IN int a_attrnamespace; 6746185029Spjd IN const char *a_name; 6747185029Spjd IN struct ucred *a_cred; 6748185029Spjd IN struct thread *a_td; 6749185029Spjd}; 6750185029Spjd*/ 6751185029Spjd{ 6752185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 6753185029Spjd struct thread *td = ap->a_td; 6754185029Spjd struct nameidata nd; 6755185029Spjd char attrname[255]; 6756185029Spjd struct vattr va; 6757185029Spjd vnode_t *xvp = NULL, *vp; 6758185029Spjd int error, flags; 6759185029Spjd 6760195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 6761195785Strasz ap->a_cred, ap->a_td, VWRITE); 6762195785Strasz if (error != 0) 6763195785Strasz return (error); 6764195785Strasz 6765185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 6766185029Spjd sizeof(attrname)); 6767185029Spjd if (error != 0) 6768185029Spjd return (error); 6769185029Spjd 6770185029Spjd ZFS_ENTER(zfsvfs); 6771185029Spjd 6772185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 6773185029Spjd LOOKUP_XATTR); 6774185029Spjd if (error != 0) { 6775185029Spjd ZFS_EXIT(zfsvfs); 6776185029Spjd return (error); 6777185029Spjd } 6778185029Spjd 6779241896Skib NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF, 6780185029Spjd UIO_SYSSPACE, attrname, xvp, td); 6781185029Spjd error = namei(&nd); 6782185029Spjd vp = nd.ni_vp; 6783185029Spjd if (error != 0) { 6784196303Spjd ZFS_EXIT(zfsvfs); 6785262096Savg NDFREE(&nd, NDF_ONLY_PNBUF); 6786195785Strasz if (error == ENOENT) 6787195785Strasz error = ENOATTR; 6788185029Spjd return (error); 6789185029Spjd } 6790262096Savg 6791185029Spjd error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 6792262096Savg NDFREE(&nd, NDF_ONLY_PNBUF); 6793185029Spjd 6794185029Spjd vput(nd.ni_dvp); 6795185029Spjd if (vp == nd.ni_dvp) 6796185029Spjd vrele(vp); 6797185029Spjd else 6798185029Spjd vput(vp); 6799185029Spjd ZFS_EXIT(zfsvfs); 6800185029Spjd 6801185029Spjd return (error); 6802185029Spjd} 6803185029Spjd 6804185029Spjd/* 6805185029Spjd * Vnode operation to set a named attribute. 6806185029Spjd */ 6807185029Spjdstatic int 6808185029Spjdzfs_setextattr(struct vop_setextattr_args *ap) 6809185029Spjd/* 6810185029Spjdvop_setextattr { 6811185029Spjd IN struct vnode *a_vp; 6812185029Spjd IN int a_attrnamespace; 6813185029Spjd IN const char *a_name; 6814185029Spjd INOUT struct uio *a_uio; 6815185029Spjd IN struct ucred *a_cred; 6816185029Spjd IN struct thread *a_td; 6817185029Spjd}; 6818185029Spjd*/ 6819185029Spjd{ 6820185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 6821185029Spjd struct thread *td = ap->a_td; 6822185029Spjd struct nameidata nd; 6823185029Spjd char attrname[255]; 6824185029Spjd struct vattr va; 6825185029Spjd vnode_t *xvp = NULL, *vp; 6826185029Spjd int error, flags; 6827185029Spjd 6828195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 6829195785Strasz ap->a_cred, ap->a_td, VWRITE); 6830195785Strasz if (error != 0) 6831195785Strasz return (error); 6832195785Strasz 6833185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 6834185029Spjd sizeof(attrname)); 6835185029Spjd if (error != 0) 6836185029Spjd return (error); 6837185029Spjd 6838185029Spjd ZFS_ENTER(zfsvfs); 6839185029Spjd 6840185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 6841195785Strasz LOOKUP_XATTR | CREATE_XATTR_DIR); 6842185029Spjd if (error != 0) { 6843185029Spjd ZFS_EXIT(zfsvfs); 6844185029Spjd return (error); 6845185029Spjd } 6846185029Spjd 6847185029Spjd flags = FFLAGS(O_WRONLY | O_CREAT); 6848241896Skib NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, 6849185029Spjd xvp, td); 6850194586Skib error = vn_open_cred(&nd, &flags, 0600, 0, ap->a_cred, NULL); 6851185029Spjd vp = nd.ni_vp; 6852185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 6853185029Spjd if (error != 0) { 6854185029Spjd ZFS_EXIT(zfsvfs); 6855185029Spjd return (error); 6856185029Spjd } 6857185029Spjd 6858185029Spjd VATTR_NULL(&va); 6859185029Spjd va.va_size = 0; 6860185029Spjd error = VOP_SETATTR(vp, &va, ap->a_cred); 6861185029Spjd if (error == 0) 6862269061Smav VOP_WRITE(vp, ap->a_uio, IO_UNIT, ap->a_cred); 6863185029Spjd 6864185029Spjd VOP_UNLOCK(vp, 0); 6865185029Spjd vn_close(vp, flags, ap->a_cred, td); 6866185029Spjd ZFS_EXIT(zfsvfs); 6867185029Spjd 6868185029Spjd return (error); 6869185029Spjd} 6870185029Spjd 6871185029Spjd/* 6872185029Spjd * Vnode operation to retrieve extended attributes on a vnode. 6873185029Spjd */ 6874185029Spjdstatic int 6875185029Spjdzfs_listextattr(struct vop_listextattr_args *ap) 6876185029Spjd/* 6877185029Spjdvop_listextattr { 6878185029Spjd IN struct vnode *a_vp; 6879185029Spjd IN int a_attrnamespace; 6880185029Spjd INOUT struct uio *a_uio; 6881185029Spjd OUT size_t *a_size; 6882185029Spjd IN struct ucred *a_cred; 6883185029Spjd IN struct thread *a_td; 6884185029Spjd}; 6885185029Spjd*/ 6886185029Spjd{ 6887185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 6888185029Spjd struct thread *td = ap->a_td; 6889185029Spjd struct nameidata nd; 6890185029Spjd char attrprefix[16]; 6891185029Spjd u_char dirbuf[sizeof(struct dirent)]; 6892185029Spjd struct dirent *dp; 6893185029Spjd struct iovec aiov; 6894185029Spjd struct uio auio, *uio = ap->a_uio; 6895185029Spjd size_t *sizep = ap->a_size; 6896185029Spjd size_t plen; 6897185029Spjd vnode_t *xvp = NULL, *vp; 6898185029Spjd int done, error, eof, pos; 6899185029Spjd 6900195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 6901195785Strasz ap->a_cred, ap->a_td, VREAD); 6902196303Spjd if (error != 0) 6903195785Strasz return (error); 6904195785Strasz 6905185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, "", attrprefix, 6906185029Spjd sizeof(attrprefix)); 6907185029Spjd if (error != 0) 6908185029Spjd return (error); 6909185029Spjd plen = strlen(attrprefix); 6910185029Spjd 6911185029Spjd ZFS_ENTER(zfsvfs); 6912185029Spjd 6913195822Strasz if (sizep != NULL) 6914195822Strasz *sizep = 0; 6915195822Strasz 6916185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 6917185029Spjd LOOKUP_XATTR); 6918185029Spjd if (error != 0) { 6919196303Spjd ZFS_EXIT(zfsvfs); 6920195785Strasz /* 6921195785Strasz * ENOATTR means that the EA directory does not yet exist, 6922195785Strasz * i.e. there are no extended attributes there. 6923195785Strasz */ 6924195785Strasz if (error == ENOATTR) 6925195785Strasz error = 0; 6926185029Spjd return (error); 6927185029Spjd } 6928185029Spjd 6929241896Skib NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED, 6930188588Sjhb UIO_SYSSPACE, ".", xvp, td); 6931185029Spjd error = namei(&nd); 6932185029Spjd vp = nd.ni_vp; 6933185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 6934185029Spjd if (error != 0) { 6935185029Spjd ZFS_EXIT(zfsvfs); 6936185029Spjd return (error); 6937185029Spjd } 6938185029Spjd 6939185029Spjd auio.uio_iov = &aiov; 6940185029Spjd auio.uio_iovcnt = 1; 6941185029Spjd auio.uio_segflg = UIO_SYSSPACE; 6942185029Spjd auio.uio_td = td; 6943185029Spjd auio.uio_rw = UIO_READ; 6944185029Spjd auio.uio_offset = 0; 6945185029Spjd 6946185029Spjd do { 6947185029Spjd u_char nlen; 6948185029Spjd 6949185029Spjd aiov.iov_base = (void *)dirbuf; 6950185029Spjd aiov.iov_len = sizeof(dirbuf); 6951185029Spjd auio.uio_resid = sizeof(dirbuf); 6952185029Spjd error = VOP_READDIR(vp, &auio, ap->a_cred, &eof, NULL, NULL); 6953185029Spjd done = sizeof(dirbuf) - auio.uio_resid; 6954185029Spjd if (error != 0) 6955185029Spjd break; 6956185029Spjd for (pos = 0; pos < done;) { 6957185029Spjd dp = (struct dirent *)(dirbuf + pos); 6958185029Spjd pos += dp->d_reclen; 6959185029Spjd /* 6960185029Spjd * XXX: Temporarily we also accept DT_UNKNOWN, as this 6961185029Spjd * is what we get when attribute was created on Solaris. 6962185029Spjd */ 6963185029Spjd if (dp->d_type != DT_REG && dp->d_type != DT_UNKNOWN) 6964185029Spjd continue; 6965185029Spjd if (plen == 0 && strncmp(dp->d_name, "freebsd:", 8) == 0) 6966185029Spjd continue; 6967185029Spjd else if (strncmp(dp->d_name, attrprefix, plen) != 0) 6968185029Spjd continue; 6969185029Spjd nlen = dp->d_namlen - plen; 6970185029Spjd if (sizep != NULL) 6971185029Spjd *sizep += 1 + nlen; 6972185029Spjd else if (uio != NULL) { 6973185029Spjd /* 6974185029Spjd * Format of extattr name entry is one byte for 6975185029Spjd * length and the rest for name. 6976185029Spjd */ 6977185029Spjd error = uiomove(&nlen, 1, uio->uio_rw, uio); 6978185029Spjd if (error == 0) { 6979185029Spjd error = uiomove(dp->d_name + plen, nlen, 6980185029Spjd uio->uio_rw, uio); 6981185029Spjd } 6982185029Spjd if (error != 0) 6983185029Spjd break; 6984185029Spjd } 6985185029Spjd } 6986185029Spjd } while (!eof && error == 0); 6987185029Spjd 6988185029Spjd vput(vp); 6989185029Spjd ZFS_EXIT(zfsvfs); 6990185029Spjd 6991185029Spjd return (error); 6992185029Spjd} 6993185029Spjd 6994192800Straszint 6995192800Straszzfs_freebsd_getacl(ap) 6996192800Strasz struct vop_getacl_args /* { 6997192800Strasz struct vnode *vp; 6998192800Strasz acl_type_t type; 6999192800Strasz struct acl *aclp; 7000192800Strasz struct ucred *cred; 7001192800Strasz struct thread *td; 7002192800Strasz } */ *ap; 7003192800Strasz{ 7004192800Strasz int error; 7005192800Strasz vsecattr_t vsecattr; 7006192800Strasz 7007192800Strasz if (ap->a_type != ACL_TYPE_NFS4) 7008197435Strasz return (EINVAL); 7009192800Strasz 7010192800Strasz vsecattr.vsa_mask = VSA_ACE | VSA_ACECNT; 7011192800Strasz if (error = zfs_getsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL)) 7012192800Strasz return (error); 7013192800Strasz 7014192800Strasz error = acl_from_aces(ap->a_aclp, vsecattr.vsa_aclentp, vsecattr.vsa_aclcnt); 7015196303Spjd if (vsecattr.vsa_aclentp != NULL) 7016196303Spjd kmem_free(vsecattr.vsa_aclentp, vsecattr.vsa_aclentsz); 7017192800Strasz 7018196303Spjd return (error); 7019192800Strasz} 7020192800Strasz 7021192800Straszint 7022192800Straszzfs_freebsd_setacl(ap) 7023192800Strasz struct vop_setacl_args /* { 7024192800Strasz struct vnode *vp; 7025192800Strasz acl_type_t type; 7026192800Strasz struct acl *aclp; 7027192800Strasz struct ucred *cred; 7028192800Strasz struct thread *td; 7029192800Strasz } */ *ap; 7030192800Strasz{ 7031192800Strasz int error; 7032192800Strasz vsecattr_t vsecattr; 7033192800Strasz int aclbsize; /* size of acl list in bytes */ 7034192800Strasz aclent_t *aaclp; 7035192800Strasz 7036192800Strasz if (ap->a_type != ACL_TYPE_NFS4) 7037197435Strasz return (EINVAL); 7038192800Strasz 7039192800Strasz if (ap->a_aclp->acl_cnt < 1 || ap->a_aclp->acl_cnt > MAX_ACL_ENTRIES) 7040192800Strasz return (EINVAL); 7041192800Strasz 7042192800Strasz /* 7043196949Strasz * With NFSv4 ACLs, chmod(2) may need to add additional entries, 7044192800Strasz * splitting every entry into two and appending "canonical six" 7045192800Strasz * entries at the end. Don't allow for setting an ACL that would 7046192800Strasz * cause chmod(2) to run out of ACL entries. 7047192800Strasz */ 7048192800Strasz if (ap->a_aclp->acl_cnt * 2 + 6 > ACL_MAX_ENTRIES) 7049192800Strasz return (ENOSPC); 7050192800Strasz 7051208030Strasz error = acl_nfs4_check(ap->a_aclp, ap->a_vp->v_type == VDIR); 7052208030Strasz if (error != 0) 7053208030Strasz return (error); 7054208030Strasz 7055192800Strasz vsecattr.vsa_mask = VSA_ACE; 7056192800Strasz aclbsize = ap->a_aclp->acl_cnt * sizeof(ace_t); 7057192800Strasz vsecattr.vsa_aclentp = kmem_alloc(aclbsize, KM_SLEEP); 7058192800Strasz aaclp = vsecattr.vsa_aclentp; 7059192800Strasz vsecattr.vsa_aclentsz = aclbsize; 7060192800Strasz 7061192800Strasz aces_from_acl(vsecattr.vsa_aclentp, &vsecattr.vsa_aclcnt, ap->a_aclp); 7062192800Strasz error = zfs_setsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL); 7063192800Strasz kmem_free(aaclp, aclbsize); 7064192800Strasz 7065192800Strasz return (error); 7066192800Strasz} 7067192800Strasz 7068192800Straszint 7069192800Straszzfs_freebsd_aclcheck(ap) 7070192800Strasz struct vop_aclcheck_args /* { 7071192800Strasz struct vnode *vp; 7072192800Strasz acl_type_t type; 7073192800Strasz struct acl *aclp; 7074192800Strasz struct ucred *cred; 7075192800Strasz struct thread *td; 7076192800Strasz } */ *ap; 7077192800Strasz{ 7078192800Strasz 7079192800Strasz return (EOPNOTSUPP); 7080192800Strasz} 7081192800Strasz 7082168404Spjdstruct vop_vector zfs_vnodeops; 7083168404Spjdstruct vop_vector zfs_fifoops; 7084209962Smmstruct vop_vector zfs_shareops; 7085168404Spjd 7086168404Spjdstruct vop_vector zfs_vnodeops = { 7087185029Spjd .vop_default = &default_vnodeops, 7088185029Spjd .vop_inactive = zfs_freebsd_inactive, 7089185029Spjd .vop_reclaim = zfs_freebsd_reclaim, 7090185029Spjd .vop_access = zfs_freebsd_access, 7091168404Spjd#ifdef FREEBSD_NAMECACHE 7092185029Spjd .vop_lookup = vfs_cache_lookup, 7093185029Spjd .vop_cachedlookup = zfs_freebsd_lookup, 7094168404Spjd#else 7095185029Spjd .vop_lookup = zfs_freebsd_lookup, 7096168404Spjd#endif 7097185029Spjd .vop_getattr = zfs_freebsd_getattr, 7098185029Spjd .vop_setattr = zfs_freebsd_setattr, 7099185029Spjd .vop_create = zfs_freebsd_create, 7100185029Spjd .vop_mknod = zfs_freebsd_create, 7101185029Spjd .vop_mkdir = zfs_freebsd_mkdir, 7102185029Spjd .vop_readdir = zfs_freebsd_readdir, 7103185029Spjd .vop_fsync = zfs_freebsd_fsync, 7104185029Spjd .vop_open = zfs_freebsd_open, 7105185029Spjd .vop_close = zfs_freebsd_close, 7106185029Spjd .vop_rmdir = zfs_freebsd_rmdir, 7107185029Spjd .vop_ioctl = zfs_freebsd_ioctl, 7108185029Spjd .vop_link = zfs_freebsd_link, 7109185029Spjd .vop_symlink = zfs_freebsd_symlink, 7110185029Spjd .vop_readlink = zfs_freebsd_readlink, 7111185029Spjd .vop_read = zfs_freebsd_read, 7112185029Spjd .vop_write = zfs_freebsd_write, 7113185029Spjd .vop_remove = zfs_freebsd_remove, 7114185029Spjd .vop_rename = zfs_freebsd_rename, 7115185029Spjd .vop_pathconf = zfs_freebsd_pathconf, 7116243518Savg .vop_bmap = zfs_freebsd_bmap, 7117185029Spjd .vop_fid = zfs_freebsd_fid, 7118185029Spjd .vop_getextattr = zfs_getextattr, 7119185029Spjd .vop_deleteextattr = zfs_deleteextattr, 7120185029Spjd .vop_setextattr = zfs_setextattr, 7121185029Spjd .vop_listextattr = zfs_listextattr, 7122192800Strasz .vop_getacl = zfs_freebsd_getacl, 7123192800Strasz .vop_setacl = zfs_freebsd_setacl, 7124192800Strasz .vop_aclcheck = zfs_freebsd_aclcheck, 7125213937Savg .vop_getpages = zfs_freebsd_getpages, 7126260786Savg .vop_putpages = zfs_freebsd_putpages, 7127168404Spjd}; 7128168404Spjd 7129169170Spjdstruct vop_vector zfs_fifoops = { 7130185029Spjd .vop_default = &fifo_specops, 7131200162Skib .vop_fsync = zfs_freebsd_fsync, 7132185029Spjd .vop_access = zfs_freebsd_access, 7133185029Spjd .vop_getattr = zfs_freebsd_getattr, 7134185029Spjd .vop_inactive = zfs_freebsd_inactive, 7135185029Spjd .vop_read = VOP_PANIC, 7136185029Spjd .vop_reclaim = zfs_freebsd_reclaim, 7137185029Spjd .vop_setattr = zfs_freebsd_setattr, 7138185029Spjd .vop_write = VOP_PANIC, 7139196949Strasz .vop_pathconf = zfs_freebsd_fifo_pathconf, 7140185029Spjd .vop_fid = zfs_freebsd_fid, 7141192800Strasz .vop_getacl = zfs_freebsd_getacl, 7142192800Strasz .vop_setacl = zfs_freebsd_setacl, 7143192800Strasz .vop_aclcheck = zfs_freebsd_aclcheck, 7144168404Spjd}; 7145209962Smm 7146209962Smm/* 7147209962Smm * special share hidden files vnode operations template 7148209962Smm */ 7149209962Smmstruct vop_vector zfs_shareops = { 7150209962Smm .vop_default = &default_vnodeops, 7151209962Smm .vop_access = zfs_freebsd_access, 7152209962Smm .vop_inactive = zfs_freebsd_inactive, 7153209962Smm .vop_reclaim = zfs_freebsd_reclaim, 7154209962Smm .vop_fid = zfs_freebsd_fid, 7155209962Smm .vop_pathconf = zfs_freebsd_pathconf, 7156209962Smm}; 7157