1139825Simp/*- 2118131Srwatson * Copyright (c) 2002, 2003 Networks Associates Technology, Inc. 3101720Sphk * All rights reserved. 4101720Sphk * 5101720Sphk * This software was developed for the FreeBSD Project by Marshall 6101720Sphk * Kirk McKusick and Network Associates Laboratories, the Security 7101720Sphk * Research Division of Network Associates, Inc. under DARPA/SPAWAR 8101720Sphk * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS 9101720Sphk * research program 10101720Sphk * 11136721Srwatson * Redistribution and use in source and binary forms, with or without 12136721Srwatson * modification, are permitted provided that the following conditions 13136721Srwatson * are met: 14136721Srwatson * 1. Redistributions of source code must retain the above copyright 15136721Srwatson * notice, this list of conditions and the following disclaimer. 16136721Srwatson * 2. Redistributions in binary form must reproduce the above copyright 17136721Srwatson * notice, this list of conditions and the following disclaimer in the 18136721Srwatson * documentation and/or other materials provided with the distribution. 19136721Srwatson * 20136721Srwatson * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21136721Srwatson * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22136721Srwatson * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23136721Srwatson * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24136721Srwatson * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25136721Srwatson * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26136721Srwatson * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27136721Srwatson * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28136721Srwatson * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29136721Srwatson * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30136721Srwatson * SUCH DAMAGE. 31136721Srwatson * 321541Srgrimes * Copyright (c) 1982, 1986, 1989, 1993 331541Srgrimes * The Regents of the University of California. All rights reserved. 341541Srgrimes * 351541Srgrimes * Redistribution and use in source and binary forms, with or without 361541Srgrimes * modification, are permitted provided that the following conditions 371541Srgrimes * are met: 381541Srgrimes * 1. Redistributions of source code must retain the above copyright 391541Srgrimes * notice, this list of conditions and the following disclaimer. 401541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 411541Srgrimes * notice, this list of conditions and the following disclaimer in the 421541Srgrimes * documentation and/or other materials provided with the distribution. 431541Srgrimes * 4. Neither the name of the University nor the names of its contributors 441541Srgrimes * may be used to endorse or promote products derived from this software 451541Srgrimes * without specific prior written permission. 461541Srgrimes * 471541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 481541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 491541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 501541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 511541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 521541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 531541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 541541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 551541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 561541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 571541Srgrimes * SUCH DAMAGE. 581541Srgrimes * 59128006Sbde * from: @(#)ufs_readwrite.c 8.11 (Berkeley) 5/8/95 60128006Sbde * from: $FreeBSD: .../ufs/ufs_readwrite.c,v 1.96 2002/08/12 09:22:11 phk ... 6122521Sdyson * @(#)ffs_vnops.c 8.15 (Berkeley) 5/14/95 621541Srgrimes */ 631541Srgrimes 64116192Sobrien#include <sys/cdefs.h> 65116192Sobrien__FBSDID("$FreeBSD$"); 66116192Sobrien 671541Srgrimes#include <sys/param.h> 68102957Sbde#include <sys/bio.h> 691541Srgrimes#include <sys/systm.h> 70102957Sbde#include <sys/buf.h> 71102957Sbde#include <sys/conf.h> 72102991Sphk#include <sys/extattr.h> 73102957Sbde#include <sys/kernel.h> 74114216Skan#include <sys/limits.h> 75102957Sbde#include <sys/malloc.h> 76102957Sbde#include <sys/mount.h> 77164033Srwatson#include <sys/priv.h> 78248084Sattilio#include <sys/rwlock.h> 791541Srgrimes#include <sys/stat.h> 80102957Sbde#include <sys/vmmeter.h> 811541Srgrimes#include <sys/vnode.h> 821541Srgrimes 831541Srgrimes#include <vm/vm.h> 84239065Skib#include <vm/vm_param.h> 85102957Sbde#include <vm/vm_extern.h> 86102957Sbde#include <vm/vm_object.h> 877695Sdg#include <vm/vm_page.h> 88101720Sphk#include <vm/vm_pager.h> 89101720Sphk#include <vm/vnode_pager.h> 901541Srgrimes 9159241Srwatson#include <ufs/ufs/extattr.h> 921541Srgrimes#include <ufs/ufs/quota.h> 931541Srgrimes#include <ufs/ufs/inode.h> 94102957Sbde#include <ufs/ufs/ufs_extern.h> 9522521Sdyson#include <ufs/ufs/ufsmount.h> 961541Srgrimes 971541Srgrimes#include <ufs/ffs/fs.h> 981541Srgrimes#include <ufs/ffs/ffs_extern.h> 99112694Stegge#include "opt_directio.h" 100141521Sphk#include "opt_ffs.h" 1011541Srgrimes 102112694Stegge#ifdef DIRECTIO 103112694Steggeextern int ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone); 104112694Stegge#endif 105138270Sphkstatic vop_fsync_t ffs_fsync; 106169671Skibstatic vop_lock1_t ffs_lock; 107138270Sphkstatic vop_getpages_t ffs_getpages; 108138270Sphkstatic vop_read_t ffs_read; 109138270Sphkstatic vop_write_t ffs_write; 110102090Sphkstatic int ffs_extread(struct vnode *vp, struct uio *uio, int ioflag); 111105136Smckusickstatic int ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, 112105136Smckusick struct ucred *cred); 113138270Sphkstatic vop_strategy_t ffsext_strategy; 114138270Sphkstatic vop_closeextattr_t ffs_closeextattr; 115138270Sphkstatic vop_deleteextattr_t ffs_deleteextattr; 116138270Sphkstatic vop_getextattr_t ffs_getextattr; 117138270Sphkstatic vop_listextattr_t ffs_listextattr; 118138270Sphkstatic vop_openextattr_t ffs_openextattr; 119138270Sphkstatic vop_setextattr_t ffs_setextattr; 120166774Spjdstatic vop_vptofh_t ffs_vptofh; 12112911Sphk 122101720Sphk 1231541Srgrimes/* Global vfs data structures for ufs. */ 124141542Sphkstruct vop_vector ffs_vnodeops1 = { 125138290Sphk .vop_default = &ufs_vnodeops, 126138290Sphk .vop_fsync = ffs_fsync, 127138290Sphk .vop_getpages = ffs_getpages, 128169671Skib .vop_lock1 = ffs_lock, 129138290Sphk .vop_read = ffs_read, 130138290Sphk .vop_reallocblks = ffs_reallocblks, 131138290Sphk .vop_write = ffs_write, 132166774Spjd .vop_vptofh = ffs_vptofh, 133141542Sphk}; 134141542Sphk 135141542Sphkstruct vop_vector ffs_fifoops1 = { 136141542Sphk .vop_default = &ufs_fifoops, 137141542Sphk .vop_fsync = ffs_fsync, 138141542Sphk .vop_reallocblks = ffs_reallocblks, /* XXX: really ??? */ 139166774Spjd .vop_vptofh = ffs_vptofh, 140141542Sphk}; 141141542Sphk 142141542Sphk/* Global vfs data structures for ufs. */ 143141542Sphkstruct vop_vector ffs_vnodeops2 = { 144141542Sphk .vop_default = &ufs_vnodeops, 145141542Sphk .vop_fsync = ffs_fsync, 146141542Sphk .vop_getpages = ffs_getpages, 147169671Skib .vop_lock1 = ffs_lock, 148141542Sphk .vop_read = ffs_read, 149141542Sphk .vop_reallocblks = ffs_reallocblks, 150141542Sphk .vop_write = ffs_write, 151138290Sphk .vop_closeextattr = ffs_closeextattr, 152138290Sphk .vop_deleteextattr = ffs_deleteextattr, 153138869Sphk .vop_getextattr = ffs_getextattr, 154138290Sphk .vop_listextattr = ffs_listextattr, 155138290Sphk .vop_openextattr = ffs_openextattr, 156138869Sphk .vop_setextattr = ffs_setextattr, 157166774Spjd .vop_vptofh = ffs_vptofh, 1581541Srgrimes}; 1591541Srgrimes 160141542Sphkstruct vop_vector ffs_fifoops2 = { 161138290Sphk .vop_default = &ufs_fifoops, 162138290Sphk .vop_fsync = ffs_fsync, 163169671Skib .vop_lock1 = ffs_lock, 164138290Sphk .vop_reallocblks = ffs_reallocblks, 165138290Sphk .vop_strategy = ffsext_strategy, 166138290Sphk .vop_closeextattr = ffs_closeextattr, 167138290Sphk .vop_deleteextattr = ffs_deleteextattr, 168138869Sphk .vop_getextattr = ffs_getextattr, 169138290Sphk .vop_listextattr = ffs_listextattr, 170138290Sphk .vop_openextattr = ffs_openextattr, 171138869Sphk .vop_setextattr = ffs_setextattr, 172166774Spjd .vop_vptofh = ffs_vptofh, 1731541Srgrimes}; 1741541Srgrimes 1751541Srgrimes/* 1761541Srgrimes * Synch an open file. 1771541Srgrimes */ 1781541Srgrimes/* ARGSUSED */ 179105136Smckusickstatic int 180141522Sphkffs_fsync(struct vop_fsync_args *ap) 1811541Srgrimes{ 182195187Skib struct vnode *vp; 183195187Skib struct bufobj *bo; 184141522Sphk int error; 185141522Sphk 186195187Skib vp = ap->a_vp; 187195187Skib bo = &vp->v_bufobj; 188195187Skibretry: 189233438Smckusick error = ffs_syncvnode(vp, ap->a_waitfor, 0); 190141533Sphk if (error) 191141533Sphk return (error); 192224503Smckusick if (ap->a_waitfor == MNT_WAIT && DOINGSOFTDEP(vp)) { 193195187Skib error = softdep_fsync(vp); 194195187Skib if (error) 195195187Skib return (error); 196195187Skib 197195187Skib /* 198195187Skib * The softdep_fsync() function may drop vp lock, 199195187Skib * allowing for dirty buffers to reappear on the 200195187Skib * bo_dirty list. Recheck and resync as needed. 201195187Skib */ 202195187Skib BO_LOCK(bo); 203284201Skib if ((vp->v_type == VREG || vp->v_type == VDIR) && 204284201Skib (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)) { 205195187Skib BO_UNLOCK(bo); 206195187Skib goto retry; 207195187Skib } 208195187Skib BO_UNLOCK(bo); 209195187Skib } 210195187Skib return (0); 211141522Sphk} 212141522Sphk 213141522Sphkint 214233438Smckusickffs_syncvnode(struct vnode *vp, int waitfor, int flags) 215141522Sphk{ 216222958Sjeff struct inode *ip; 217177493Sjeff struct bufobj *bo; 21834266Sjulian struct buf *bp; 2191541Srgrimes struct buf *nbp; 22098542Smckusick ufs_lbn_t lbn; 221233438Smckusick int error, wait, passes; 2221541Srgrimes 223222958Sjeff ip = VTOI(vp); 224222958Sjeff ip->i_flag &= ~IN_NEEDSYNC; 225177493Sjeff bo = &vp->v_bufobj; 22632286Sdyson 2277695Sdg /* 228222958Sjeff * When doing MNT_WAIT we must first flush all dependencies 229222958Sjeff * on the inode. 230222958Sjeff */ 231222958Sjeff if (DOINGSOFTDEP(vp) && waitfor == MNT_WAIT && 232222958Sjeff (error = softdep_sync_metadata(vp)) != 0) 233222958Sjeff return (error); 234222958Sjeff 235222958Sjeff /* 2361541Srgrimes * Flush all dirty buffers associated with a vnode. 2371541Srgrimes */ 238222958Sjeff error = 0; 239222958Sjeff passes = 0; 240222958Sjeff wait = 0; /* Always do an async pass first. */ 241222958Sjeff lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1)); 242177493Sjeff BO_LOCK(bo); 2431541Srgrimesloop: 244177493Sjeff TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) 245110584Sjeff bp->b_vflags &= ~BV_SCANNED; 246177493Sjeff TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 247175053Sobrien /* 24855697Smckusick * Reasons to skip this buffer: it has already been considered 249222958Sjeff * on this pass, the buffer has dependencies that will cause 25055697Smckusick * it to be redirtied and it has not already been deferred, 25155697Smckusick * or it is already being written. 25234266Sjulian */ 253110584Sjeff if ((bp->b_vflags & BV_SCANNED) != 0) 2541541Srgrimes continue; 255110584Sjeff bp->b_vflags |= BV_SCANNED; 256222958Sjeff /* Flush indirects in order. */ 257222958Sjeff if (waitfor == MNT_WAIT && bp->b_lblkno <= -NDADDR && 258222958Sjeff lbn_level(bp->b_lblkno) >= passes) 25955697Smckusick continue; 260222958Sjeff if (bp->b_lblkno > lbn) 261222958Sjeff panic("ffs_syncvnode: syncing truncated data."); 262264491Sscottl if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) == 0) { 263264491Sscottl BO_UNLOCK(bo); 264264491Sscottl } else if (wait != 0) { 265264491Sscottl if (BUF_LOCK(bp, 266264491Sscottl LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, 267264491Sscottl BO_LOCKPTR(bo)) != 0) { 268264491Sscottl bp->b_vflags &= ~BV_SCANNED; 269264491Sscottl goto next; 270264491Sscottl } 271264491Sscottl } else 272110584Sjeff continue; 2731541Srgrimes if ((bp->b_flags & B_DELWRI) == 0) 2741541Srgrimes panic("ffs_fsync: not dirty"); 27534266Sjulian /* 276222958Sjeff * Check for dependencies and potentially complete them. 27734266Sjulian */ 278222958Sjeff if (!LIST_EMPTY(&bp->b_dep) && 279222958Sjeff (error = softdep_sync_buf(vp, bp, 280222958Sjeff wait ? MNT_WAIT : MNT_NOWAIT)) != 0) { 281222958Sjeff /* I/O error. */ 282222958Sjeff if (error != EBUSY) { 283222958Sjeff BUF_UNLOCK(bp); 284222958Sjeff return (error); 28532286Sdyson } 286222958Sjeff /* If we deferred once, don't defer again. */ 287222958Sjeff if ((bp->b_flags & B_DEFERRED) == 0) { 288222958Sjeff bp->b_flags |= B_DEFERRED; 289222958Sjeff BUF_UNLOCK(bp); 290222958Sjeff goto next; 291222958Sjeff } 292222958Sjeff } 293222958Sjeff if (wait) { 29432286Sdyson bremfree(bp); 295222958Sjeff if ((error = bwrite(bp)) != 0) 296222958Sjeff return (error); 297222958Sjeff } else if ((bp->b_flags & B_CLUSTEROK)) { 298222958Sjeff (void) vfs_bio_awrite(bp); 299222958Sjeff } else { 300222958Sjeff bremfree(bp); 301222958Sjeff (void) bawrite(bp); 302222958Sjeff } 303222958Sjeffnext: 30444391Smckusick /* 305175053Sobrien * Since we may have slept during the I/O, we need 30644391Smckusick * to start from a known point. 30744391Smckusick */ 308177493Sjeff BO_LOCK(bo); 309177493Sjeff nbp = TAILQ_FIRST(&bo->bo_dirty.bv_hd); 3101541Srgrimes } 311222958Sjeff if (waitfor != MNT_WAIT) { 312222958Sjeff BO_UNLOCK(bo); 313233438Smckusick if ((flags & NO_INO_UPDT) != 0) 314232351Smckusick return (0); 315232351Smckusick else 316232834Skib return (ffs_update(vp, 0)); 317222958Sjeff } 318222958Sjeff /* Drain IO to see if we're done. */ 319222958Sjeff bufobj_wwait(bo, 0, 0); 32034266Sjulian /* 321222958Sjeff * Block devices associated with filesystems may have new I/O 322222958Sjeff * requests posted for them even if the vnode is locked, so no 323222958Sjeff * amount of trying will get them clean. We make several passes 324222958Sjeff * as a best effort. 325222958Sjeff * 326222958Sjeff * Regular files may need multiple passes to flush all dependency 327222958Sjeff * work as it is possible that we must write once per indirect 328222958Sjeff * level, once for the leaf, and once for the inode and each of 329222958Sjeff * these will be done with one sync and one async pass. 33034266Sjulian */ 331222958Sjeff if (bo->bo_dirty.bv_cnt > 0) { 332222958Sjeff /* Write the inode after sync passes to flush deps. */ 333233438Smckusick if (wait && DOINGSOFTDEP(vp) && (flags & NO_INO_UPDT) == 0) { 334222958Sjeff BO_UNLOCK(bo); 335232948Skib ffs_update(vp, 1); 336222958Sjeff BO_LOCK(bo); 337222958Sjeff } 338222958Sjeff /* switch between sync/async. */ 339222958Sjeff wait = !wait; 340222958Sjeff if (wait == 1 || ++passes < NIADDR + 2) 341222958Sjeff goto loop; 342173464Sobrien#ifdef INVARIANTS 343222958Sjeff if (!vn_isdisk(vp, NULL)) 344222958Sjeff vprint("ffs_fsync: dirty", vp); 34534266Sjulian#endif 3461541Srgrimes } 347177493Sjeff BO_UNLOCK(bo); 348232351Smckusick error = 0; 349233438Smckusick if ((flags & NO_INO_UPDT) == 0) 350232948Skib error = ffs_update(vp, 1); 351222958Sjeff if (DOINGSUJ(vp)) 352222958Sjeff softdep_journal_fsync(VTOI(vp)); 353222958Sjeff return (error); 3541541Srgrimes} 355101720Sphk 356141521Sphkstatic int 357141521Sphkffs_lock(ap) 358169671Skib struct vop_lock1_args /* { 359141521Sphk struct vnode *a_vp; 360141521Sphk int a_flags; 361141521Sphk struct thread *a_td; 362164248Skmacy char *file; 363164248Skmacy int line; 364141521Sphk } */ *ap; 365141521Sphk{ 366158321Stegge#ifndef NO_FFS_SNAPSHOT 367158259Stegge struct vnode *vp; 368158259Stegge int flags; 369158259Stegge struct lock *lkp; 370158259Stegge int result; 371175053Sobrien 372158259Stegge switch (ap->a_flags & LK_TYPE_MASK) { 373158259Stegge case LK_SHARED: 374158259Stegge case LK_UPGRADE: 375158259Stegge case LK_EXCLUSIVE: 376158259Stegge vp = ap->a_vp; 377158259Stegge flags = ap->a_flags; 378158259Stegge for (;;) { 379184074Skib#ifdef DEBUG_VFS_LOCKS 380184074Skib KASSERT(vp->v_holdcnt != 0, 381184074Skib ("ffs_lock %p: zero hold count", vp)); 382184074Skib#endif 383158259Stegge lkp = vp->v_vnlock; 384176320Sattilio result = _lockmgr_args(lkp, flags, VI_MTX(vp), 385176320Sattilio LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, 386176320Sattilio ap->a_file, ap->a_line); 387158259Stegge if (lkp == vp->v_vnlock || result != 0) 388158259Stegge break; 389158259Stegge /* 390158259Stegge * Apparent success, except that the vnode 391158259Stegge * mutated between snapshot file vnode and 392158259Stegge * regular file vnode while this process 393158259Stegge * slept. The lock currently held is not the 394158259Stegge * right lock. Release it, and try to get the 395158259Stegge * new lock. 396158259Stegge */ 397177779Sjeff (void) _lockmgr_args(lkp, LK_RELEASE, NULL, 398176320Sattilio LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, 399175294Sattilio ap->a_file, ap->a_line); 400177779Sjeff if ((flags & (LK_INTERLOCK | LK_NOWAIT)) == 401177779Sjeff (LK_INTERLOCK | LK_NOWAIT)) 402177779Sjeff return (EBUSY); 403158259Stegge if ((flags & LK_TYPE_MASK) == LK_UPGRADE) 404158259Stegge flags = (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE; 405158259Stegge flags &= ~LK_INTERLOCK; 406158259Stegge } 407158259Stegge break; 408158259Stegge default: 409169671Skib result = VOP_LOCK1_APV(&ufs_vnodeops, ap); 410158259Stegge } 411158259Stegge return (result); 412158321Stegge#else 413169671Skib return (VOP_LOCK1_APV(&ufs_vnodeops, ap)); 414158321Stegge#endif 415141521Sphk} 416141521Sphk 417101720Sphk/* 418101720Sphk * Vnode op for reading. 419101720Sphk */ 420104094Sphkstatic int 421101720Sphkffs_read(ap) 422101720Sphk struct vop_read_args /* { 423101720Sphk struct vnode *a_vp; 424101720Sphk struct uio *a_uio; 425101720Sphk int a_ioflag; 426101720Sphk struct ucred *a_cred; 427101720Sphk } */ *ap; 428101720Sphk{ 429101720Sphk struct vnode *vp; 430101720Sphk struct inode *ip; 431101720Sphk struct uio *uio; 432101720Sphk struct fs *fs; 433101720Sphk struct buf *bp; 434101720Sphk ufs_lbn_t lbn, nextlbn; 435101720Sphk off_t bytesinfile; 436101720Sphk long size, xfersize, blkoffset; 437231949Skib ssize_t orig_resid; 438231949Skib int error; 439101720Sphk int seqcount; 440101720Sphk int ioflag; 441101720Sphk 442101789Sphk vp = ap->a_vp; 443101789Sphk uio = ap->a_uio; 444101789Sphk ioflag = ap->a_ioflag; 445101720Sphk if (ap->a_ioflag & IO_EXT) 446102090Sphk#ifdef notyet 447102090Sphk return (ffs_extread(vp, uio, ioflag)); 448102090Sphk#else 449102090Sphk panic("ffs_read+IO_EXT"); 450102090Sphk#endif 451112694Stegge#ifdef DIRECTIO 452112694Stegge if ((ioflag & IO_DIRECT) != 0) { 453112694Stegge int workdone; 454101720Sphk 455112694Stegge error = ffs_rawread(vp, uio, &workdone); 456112694Stegge if (error != 0 || workdone != 0) 457112694Stegge return error; 458112694Stegge } 459112694Stegge#endif 460112694Stegge 461124728Skan seqcount = ap->a_ioflag >> IO_SEQSHIFT; 462101720Sphk ip = VTOI(vp); 463101720Sphk 464173464Sobrien#ifdef INVARIANTS 465101720Sphk if (uio->uio_rw != UIO_READ) 466101720Sphk panic("ffs_read: mode"); 467101720Sphk 468101720Sphk if (vp->v_type == VLNK) { 469101720Sphk if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen) 470101720Sphk panic("ffs_read: short symlink"); 471101720Sphk } else if (vp->v_type != VREG && vp->v_type != VDIR) 472101720Sphk panic("ffs_read: type %d", vp->v_type); 473101720Sphk#endif 474101720Sphk orig_resid = uio->uio_resid; 475232732Spho KASSERT(orig_resid >= 0, ("ffs_read: uio->uio_resid < 0")); 476124855Sache if (orig_resid == 0) 477101720Sphk return (0); 478232732Spho KASSERT(uio->uio_offset >= 0, ("ffs_read: uio->uio_offset < 0")); 479124855Sache fs = ip->i_fs; 480124855Sache if (uio->uio_offset < ip->i_size && 481124855Sache uio->uio_offset >= fs->fs_maxfilesize) 482124855Sache return (EOVERFLOW); 483101720Sphk 484101720Sphk for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { 485101720Sphk if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) 486101720Sphk break; 487101720Sphk lbn = lblkno(fs, uio->uio_offset); 488101720Sphk nextlbn = lbn + 1; 489101720Sphk 490101720Sphk /* 491101720Sphk * size of buffer. The buffer representing the 492101720Sphk * end of the file is rounded up to the size of 493175053Sobrien * the block type ( fragment or full block, 494101720Sphk * depending ). 495101720Sphk */ 496101720Sphk size = blksize(fs, ip, lbn); 497101720Sphk blkoffset = blkoff(fs, uio->uio_offset); 498175053Sobrien 499101720Sphk /* 500101720Sphk * The amount we want to transfer in this iteration is 501101720Sphk * one FS block less the amount of the data before 502101720Sphk * our startpoint (duh!) 503101720Sphk */ 504101720Sphk xfersize = fs->fs_bsize - blkoffset; 505101720Sphk 506101720Sphk /* 507101720Sphk * But if we actually want less than the block, 508101720Sphk * or the file doesn't have a whole block more of data, 509101720Sphk * then use the lesser number. 510101720Sphk */ 511101720Sphk if (uio->uio_resid < xfersize) 512101720Sphk xfersize = uio->uio_resid; 513101720Sphk if (bytesinfile < xfersize) 514101720Sphk xfersize = bytesinfile; 515101720Sphk 516101720Sphk if (lblktosize(fs, nextlbn) >= ip->i_size) { 517101720Sphk /* 518101720Sphk * Don't do readahead if this is the end of the file. 519101720Sphk */ 520248521Skib error = bread_gb(vp, lbn, size, NOCRED, 521248521Skib GB_UNMAPPED, &bp); 522101720Sphk } else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 523175053Sobrien /* 524101720Sphk * Otherwise if we are allowed to cluster, 525101720Sphk * grab as much as we can. 526101720Sphk * 527101720Sphk * XXX This may not be a win if we are not 528101720Sphk * doing sequential access. 529101720Sphk */ 530101720Sphk error = cluster_read(vp, ip->i_size, lbn, 531248282Skib size, NOCRED, blkoffset + uio->uio_resid, 532248521Skib seqcount, GB_UNMAPPED, &bp); 533101720Sphk } else if (seqcount > 1) { 534101720Sphk /* 535101720Sphk * If we are NOT allowed to cluster, then 536101720Sphk * if we appear to be acting sequentially, 537101720Sphk * fire off a request for a readahead 538101720Sphk * as well as a read. Note that the 4th and 5th 539101720Sphk * arguments point to arrays of the size specified in 540101720Sphk * the 6th argument. 541101720Sphk */ 542259223Spfg u_int nextsize = blksize(fs, ip, nextlbn); 543248521Skib error = breadn_flags(vp, lbn, size, &nextlbn, 544248521Skib &nextsize, 1, NOCRED, GB_UNMAPPED, &bp); 545101720Sphk } else { 546101720Sphk /* 547175053Sobrien * Failing all of the above, just read what the 548101720Sphk * user asked for. Interestingly, the same as 549101720Sphk * the first option above. 550101720Sphk */ 551248521Skib error = bread_gb(vp, lbn, size, NOCRED, 552248521Skib GB_UNMAPPED, &bp); 553101720Sphk } 554101720Sphk if (error) { 555101720Sphk brelse(bp); 556101720Sphk bp = NULL; 557101720Sphk break; 558101720Sphk } 559101720Sphk 560101720Sphk /* 561101720Sphk * If IO_DIRECT then set B_DIRECT for the buffer. This 562101720Sphk * will cause us to attempt to release the buffer later on 563101720Sphk * and will cause the buffer cache to attempt to free the 564101720Sphk * underlying pages. 565101720Sphk */ 566101720Sphk if (ioflag & IO_DIRECT) 567101720Sphk bp->b_flags |= B_DIRECT; 568101720Sphk 569101720Sphk /* 570101720Sphk * We should only get non-zero b_resid when an I/O error 571101720Sphk * has occurred, which should cause us to break above. 572101720Sphk * However, if the short read did not cause an error, 573101720Sphk * then we want to ensure that we do not uiomove bad 574101720Sphk * or uninitialized data. 575101720Sphk */ 576101720Sphk size -= bp->b_resid; 577101720Sphk if (size < xfersize) { 578101720Sphk if (size == 0) 579101720Sphk break; 580101720Sphk xfersize = size; 581101720Sphk } 582101720Sphk 583248521Skib if ((bp->b_flags & B_UNMAPPED) == 0) { 584248521Skib error = vn_io_fault_uiomove((char *)bp->b_data + 585248521Skib blkoffset, (int)xfersize, uio); 586248521Skib } else { 587248521Skib error = vn_io_fault_pgmove(bp->b_pages, blkoffset, 588248521Skib (int)xfersize, uio); 589248521Skib } 590101720Sphk if (error) 591101720Sphk break; 592101720Sphk 593101720Sphk if ((ioflag & (IO_VMIO|IO_DIRECT)) && 594168353Sdelphij (LIST_EMPTY(&bp->b_dep))) { 595101720Sphk /* 596101720Sphk * If there are no dependencies, and it's VMIO, 597101720Sphk * then we don't need the buf, mark it available 598221281Skib * for freeing. For non-direct VMIO reads, the VM 599221261Skib * has the data. 600101720Sphk */ 601101720Sphk bp->b_flags |= B_RELBUF; 602101720Sphk brelse(bp); 603101720Sphk } else { 604101720Sphk /* 605101720Sphk * Otherwise let whoever 606101720Sphk * made the request take care of 607101720Sphk * freeing it. We just queue 608101720Sphk * it onto another list. 609101720Sphk */ 610101720Sphk bqrelse(bp); 611101720Sphk } 612101720Sphk } 613101720Sphk 614175053Sobrien /* 615101720Sphk * This can only happen in the case of an error 616101720Sphk * because the loop above resets bp to NULL on each iteration 617101720Sphk * and on normal completion has not set a new value into it. 618101720Sphk * so it must have come from a 'break' statement 619101720Sphk */ 620101720Sphk if (bp != NULL) { 621101720Sphk if ((ioflag & (IO_VMIO|IO_DIRECT)) && 622168353Sdelphij (LIST_EMPTY(&bp->b_dep))) { 623101720Sphk bp->b_flags |= B_RELBUF; 624101720Sphk brelse(bp); 625101720Sphk } else { 626101720Sphk bqrelse(bp); 627101720Sphk } 628101720Sphk } 629101720Sphk 630101720Sphk if ((error == 0 || uio->uio_resid != orig_resid) && 631273255Skib (vp->v_mount->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0 && 632177474Skib (ip->i_flag & IN_ACCESS) == 0) { 633163194Skib VI_LOCK(vp); 634101720Sphk ip->i_flag |= IN_ACCESS; 635163194Skib VI_UNLOCK(vp); 636163194Skib } 637101720Sphk return (error); 638101720Sphk} 639101720Sphk 640101720Sphk/* 641101720Sphk * Vnode op for writing. 642101720Sphk */ 643104094Sphkstatic int 644101720Sphkffs_write(ap) 645101720Sphk struct vop_write_args /* { 646101720Sphk struct vnode *a_vp; 647101720Sphk struct uio *a_uio; 648101720Sphk int a_ioflag; 649101720Sphk struct ucred *a_cred; 650101720Sphk } */ *ap; 651101720Sphk{ 652101720Sphk struct vnode *vp; 653101720Sphk struct uio *uio; 654101720Sphk struct inode *ip; 655101720Sphk struct fs *fs; 656101720Sphk struct buf *bp; 657101720Sphk ufs_lbn_t lbn; 658101720Sphk off_t osize; 659231949Skib ssize_t resid; 660101720Sphk int seqcount; 661231949Skib int blkoffset, error, flags, ioflag, size, xfersize; 662101720Sphk 663101789Sphk vp = ap->a_vp; 664101789Sphk uio = ap->a_uio; 665101789Sphk ioflag = ap->a_ioflag; 666101720Sphk if (ap->a_ioflag & IO_EXT) 667102090Sphk#ifdef notyet 668101789Sphk return (ffs_extwrite(vp, uio, ioflag, ap->a_cred)); 669102090Sphk#else 670124856Sache panic("ffs_write+IO_EXT"); 671102090Sphk#endif 672101720Sphk 673124728Skan seqcount = ap->a_ioflag >> IO_SEQSHIFT; 674101720Sphk ip = VTOI(vp); 675101720Sphk 676173464Sobrien#ifdef INVARIANTS 677101720Sphk if (uio->uio_rw != UIO_WRITE) 678124856Sache panic("ffs_write: mode"); 679101720Sphk#endif 680101720Sphk 681101720Sphk switch (vp->v_type) { 682101720Sphk case VREG: 683101720Sphk if (ioflag & IO_APPEND) 684101720Sphk uio->uio_offset = ip->i_size; 685125710Sbde if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) 686101720Sphk return (EPERM); 687101720Sphk /* FALLTHROUGH */ 688101720Sphk case VLNK: 689101720Sphk break; 690101720Sphk case VDIR: 691124856Sache panic("ffs_write: dir write"); 692101720Sphk break; 693101720Sphk default: 694124856Sache panic("ffs_write: type %p %d (%d,%d)", vp, (int)vp->v_type, 695101720Sphk (int)uio->uio_offset, 696101720Sphk (int)uio->uio_resid 697101720Sphk ); 698101720Sphk } 699101720Sphk 700125079Sache KASSERT(uio->uio_resid >= 0, ("ffs_write: uio->uio_resid < 0")); 701125079Sache KASSERT(uio->uio_offset >= 0, ("ffs_write: uio->uio_offset < 0")); 702101720Sphk fs = ip->i_fs; 703125710Sbde if ((uoff_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize) 704101720Sphk return (EFBIG); 705101720Sphk /* 706101720Sphk * Maybe this should be above the vnode op call, but so long as 707101720Sphk * file servers have no limits, I don't think it matters. 708101720Sphk */ 709207662Strasz if (vn_rlimit_fsize(vp, uio, uio->uio_td)) 710207662Strasz return (EFBIG); 711101720Sphk 712101720Sphk resid = uio->uio_resid; 713101720Sphk osize = ip->i_size; 714105422Sdillon if (seqcount > BA_SEQMAX) 715105422Sdillon flags = BA_SEQMAX << BA_SEQSHIFT; 716105422Sdillon else 717105422Sdillon flags = seqcount << BA_SEQSHIFT; 718101720Sphk if ((ioflag & IO_SYNC) && !DOINGASYNC(vp)) 719105422Sdillon flags |= IO_SYNC; 720248521Skib flags |= BA_UNMAPPED; 721101720Sphk 722101720Sphk for (error = 0; uio->uio_resid > 0;) { 723101720Sphk lbn = lblkno(fs, uio->uio_offset); 724101720Sphk blkoffset = blkoff(fs, uio->uio_offset); 725101720Sphk xfersize = fs->fs_bsize - blkoffset; 726101720Sphk if (uio->uio_resid < xfersize) 727101720Sphk xfersize = uio->uio_resid; 728101720Sphk if (uio->uio_offset + xfersize > ip->i_size) 729101720Sphk vnode_pager_setsize(vp, uio->uio_offset + xfersize); 730101720Sphk 731262779Spfg /* 732101720Sphk * We must perform a read-before-write if the transfer size 733101720Sphk * does not cover the entire buffer. 734262779Spfg */ 735101720Sphk if (fs->fs_bsize > xfersize) 736101720Sphk flags |= BA_CLRBUF; 737101720Sphk else 738101720Sphk flags &= ~BA_CLRBUF; 739101720Sphk/* XXX is uio->uio_offset the right thing here? */ 740101720Sphk error = UFS_BALLOC(vp, uio->uio_offset, xfersize, 741101720Sphk ap->a_cred, flags, &bp); 742187468Skib if (error != 0) { 743187468Skib vnode_pager_setsize(vp, ip->i_size); 744101720Sphk break; 745187468Skib } 746101720Sphk if (ioflag & IO_DIRECT) 747101720Sphk bp->b_flags |= B_DIRECT; 748129545Skensmith if ((ioflag & (IO_SYNC|IO_INVAL)) == (IO_SYNC|IO_INVAL)) 749129545Skensmith bp->b_flags |= B_NOCACHE; 750101720Sphk 751101720Sphk if (uio->uio_offset + xfersize > ip->i_size) { 752101720Sphk ip->i_size = uio->uio_offset + xfersize; 753132775Skan DIP_SET(ip, i_size, ip->i_size); 754101720Sphk } 755101720Sphk 756101720Sphk size = blksize(fs, ip, lbn) - bp->b_resid; 757101720Sphk if (size < xfersize) 758101720Sphk xfersize = size; 759101720Sphk 760248521Skib if ((bp->b_flags & B_UNMAPPED) == 0) { 761248521Skib error = vn_io_fault_uiomove((char *)bp->b_data + 762248521Skib blkoffset, (int)xfersize, uio); 763248521Skib } else { 764248521Skib error = vn_io_fault_pgmove(bp->b_pages, blkoffset, 765248521Skib (int)xfersize, uio); 766248521Skib } 767231313Smckusick /* 768231313Smckusick * If the buffer is not already filled and we encounter an 769231313Smckusick * error while trying to fill it, we have to clear out any 770231313Smckusick * garbage data from the pages instantiated for the buffer. 771231313Smckusick * If we do not, a failed uiomove() during a write can leave 772231313Smckusick * the prior contents of the pages exposed to a userland mmap. 773231313Smckusick * 774231313Smckusick * Note that we need only clear buffers with a transfer size 775231313Smckusick * equal to the block size because buffers with a shorter 776231313Smckusick * transfer size were cleared above by the call to UFS_BALLOC() 777231313Smckusick * with the BA_CLRBUF flag set. 778231313Smckusick * 779231313Smckusick * If the source region for uiomove identically mmaps the 780231313Smckusick * buffer, uiomove() performed the NOP copy, and the buffer 781231313Smckusick * content remains valid because the page fault handler 782231313Smckusick * validated the pages. 783231313Smckusick */ 784231313Smckusick if (error != 0 && (bp->b_flags & B_CACHE) == 0 && 785231313Smckusick fs->fs_bsize == xfersize) 786231313Smckusick vfs_bio_clrbuf(bp); 787101720Sphk if ((ioflag & (IO_VMIO|IO_DIRECT)) && 788168353Sdelphij (LIST_EMPTY(&bp->b_dep))) { 789101720Sphk bp->b_flags |= B_RELBUF; 790101720Sphk } 791101720Sphk 792101720Sphk /* 793101720Sphk * If IO_SYNC each buffer is written synchronously. Otherwise 794175053Sobrien * if we have a severe page deficiency write the buffer 795101720Sphk * asynchronously. Otherwise try to cluster, and if that 796101720Sphk * doesn't do it then either do an async write (if O_DIRECT), 797101720Sphk * or a delayed write (if not). 798101720Sphk */ 799101720Sphk if (ioflag & IO_SYNC) { 800101720Sphk (void)bwrite(bp); 801101720Sphk } else if (vm_page_count_severe() || 802101720Sphk buf_dirty_count_severe() || 803101720Sphk (ioflag & IO_ASYNC)) { 804101720Sphk bp->b_flags |= B_CLUSTEROK; 805101720Sphk bawrite(bp); 806101720Sphk } else if (xfersize + blkoffset == fs->fs_bsize) { 807101720Sphk if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) { 808101720Sphk bp->b_flags |= B_CLUSTEROK; 809248282Skib cluster_write(vp, bp, ip->i_size, seqcount, 810248521Skib GB_UNMAPPED); 811101720Sphk } else { 812101720Sphk bawrite(bp); 813101720Sphk } 814101720Sphk } else if (ioflag & IO_DIRECT) { 815101720Sphk bp->b_flags |= B_CLUSTEROK; 816101720Sphk bawrite(bp); 817101720Sphk } else { 818101720Sphk bp->b_flags |= B_CLUSTEROK; 819101720Sphk bdwrite(bp); 820101720Sphk } 821101720Sphk if (error || xfersize == 0) 822101720Sphk break; 823101720Sphk ip->i_flag |= IN_CHANGE | IN_UPDATE; 824101720Sphk } 825101720Sphk /* 826101720Sphk * If we successfully wrote any data, and we are not the superuser 827101720Sphk * we clear the setuid and setgid bits as a precaution against 828101720Sphk * tampering. 829101720Sphk */ 830167151Spjd if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid && 831167151Spjd ap->a_cred) { 832170587Srwatson if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID, 0)) { 833167151Spjd ip->i_mode &= ~(ISUID | ISGID); 834167151Spjd DIP_SET(ip, i_mode, ip->i_mode); 835167151Spjd } 836101720Sphk } 837101720Sphk if (error) { 838101720Sphk if (ioflag & IO_UNIT) { 839141526Sphk (void)ffs_truncate(vp, osize, 840234605Strasz IO_NORMAL | (ioflag & IO_SYNC), ap->a_cred); 841101720Sphk uio->uio_offset -= resid - uio->uio_resid; 842101720Sphk uio->uio_resid = resid; 843101720Sphk } 844101720Sphk } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) 845141526Sphk error = ffs_update(vp, 1); 846101720Sphk return (error); 847101720Sphk} 848101720Sphk 849101720Sphk/* 850101720Sphk * get page routine 851101720Sphk */ 852104094Sphkstatic int 853101720Sphkffs_getpages(ap) 854101720Sphk struct vop_getpages_args *ap; 855101720Sphk{ 856135459Sphk int i; 857101720Sphk vm_page_t mreq; 858101720Sphk int pcount; 859101720Sphk 860101720Sphk pcount = round_page(ap->a_count) / PAGE_SIZE; 861101720Sphk mreq = ap->a_m[ap->a_reqpage]; 862101720Sphk 863101720Sphk /* 864101720Sphk * if ANY DEV_BSIZE blocks are valid on a large filesystem block, 865101720Sphk * then the entire page is valid. Since the page may be mapped, 866101720Sphk * user programs might reference data beyond the actual end of file 867101720Sphk * occuring within the page. We have to zero that data. 868101720Sphk */ 869248084Sattilio VM_OBJECT_WLOCK(mreq->object); 870101720Sphk if (mreq->valid) { 871101720Sphk if (mreq->valid != VM_PAGE_BITS_ALL) 872101720Sphk vm_page_zero_invalid(mreq, TRUE); 873101720Sphk for (i = 0; i < pcount; i++) { 874101720Sphk if (i != ap->a_reqpage) { 875207669Salc vm_page_lock(ap->a_m[i]); 876101720Sphk vm_page_free(ap->a_m[i]); 877207669Salc vm_page_unlock(ap->a_m[i]); 878101720Sphk } 879101720Sphk } 880248084Sattilio VM_OBJECT_WUNLOCK(mreq->object); 881101720Sphk return VM_PAGER_OK; 882101720Sphk } 883248084Sattilio VM_OBJECT_WUNLOCK(mreq->object); 884101720Sphk 885135459Sphk return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, 886135459Sphk ap->a_count, 887135459Sphk ap->a_reqpage); 888135459Sphk} 889101720Sphk 890101720Sphk 891101720Sphk/* 892102991Sphk * Extended attribute area reading. 893101720Sphk */ 894101720Sphkstatic int 895102090Sphkffs_extread(struct vnode *vp, struct uio *uio, int ioflag) 896101720Sphk{ 897101720Sphk struct inode *ip; 898101720Sphk struct ufs2_dinode *dp; 899101720Sphk struct fs *fs; 900101720Sphk struct buf *bp; 901101720Sphk ufs_lbn_t lbn, nextlbn; 902101720Sphk off_t bytesinfile; 903101720Sphk long size, xfersize, blkoffset; 904231949Skib ssize_t orig_resid; 905231949Skib int error; 906101720Sphk 907101720Sphk ip = VTOI(vp); 908101720Sphk fs = ip->i_fs; 909101720Sphk dp = ip->i_din2; 910101720Sphk 911173464Sobrien#ifdef INVARIANTS 912101720Sphk if (uio->uio_rw != UIO_READ || fs->fs_magic != FS_UFS2_MAGIC) 913101720Sphk panic("ffs_extread: mode"); 914101720Sphk 915101720Sphk#endif 916101720Sphk orig_resid = uio->uio_resid; 917125079Sache KASSERT(orig_resid >= 0, ("ffs_extread: uio->uio_resid < 0")); 918124857Sache if (orig_resid == 0) 919101720Sphk return (0); 920125079Sache KASSERT(uio->uio_offset >= 0, ("ffs_extread: uio->uio_offset < 0")); 921101720Sphk 922101720Sphk for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { 923101720Sphk if ((bytesinfile = dp->di_extsize - uio->uio_offset) <= 0) 924101720Sphk break; 925101720Sphk lbn = lblkno(fs, uio->uio_offset); 926101720Sphk nextlbn = lbn + 1; 927101720Sphk 928101720Sphk /* 929101720Sphk * size of buffer. The buffer representing the 930101720Sphk * end of the file is rounded up to the size of 931175053Sobrien * the block type ( fragment or full block, 932101720Sphk * depending ). 933101720Sphk */ 934101720Sphk size = sblksize(fs, dp->di_extsize, lbn); 935101720Sphk blkoffset = blkoff(fs, uio->uio_offset); 936175053Sobrien 937101720Sphk /* 938101720Sphk * The amount we want to transfer in this iteration is 939101720Sphk * one FS block less the amount of the data before 940101720Sphk * our startpoint (duh!) 941101720Sphk */ 942101720Sphk xfersize = fs->fs_bsize - blkoffset; 943101720Sphk 944101720Sphk /* 945101720Sphk * But if we actually want less than the block, 946101720Sphk * or the file doesn't have a whole block more of data, 947101720Sphk * then use the lesser number. 948101720Sphk */ 949101720Sphk if (uio->uio_resid < xfersize) 950101720Sphk xfersize = uio->uio_resid; 951101720Sphk if (bytesinfile < xfersize) 952101720Sphk xfersize = bytesinfile; 953101720Sphk 954101720Sphk if (lblktosize(fs, nextlbn) >= dp->di_extsize) { 955101720Sphk /* 956101720Sphk * Don't do readahead if this is the end of the info. 957101720Sphk */ 958101720Sphk error = bread(vp, -1 - lbn, size, NOCRED, &bp); 959101720Sphk } else { 960101720Sphk /* 961101720Sphk * If we have a second block, then 962101720Sphk * fire off a request for a readahead 963101720Sphk * as well as a read. Note that the 4th and 5th 964101720Sphk * arguments point to arrays of the size specified in 965101720Sphk * the 6th argument. 966101720Sphk */ 967259223Spfg u_int nextsize = sblksize(fs, dp->di_extsize, nextlbn); 968101720Sphk 969101720Sphk nextlbn = -1 - nextlbn; 970101720Sphk error = breadn(vp, -1 - lbn, 971101720Sphk size, &nextlbn, &nextsize, 1, NOCRED, &bp); 972101720Sphk } 973101720Sphk if (error) { 974101720Sphk brelse(bp); 975101720Sphk bp = NULL; 976101720Sphk break; 977101720Sphk } 978101720Sphk 979101720Sphk /* 980101720Sphk * If IO_DIRECT then set B_DIRECT for the buffer. This 981101720Sphk * will cause us to attempt to release the buffer later on 982101720Sphk * and will cause the buffer cache to attempt to free the 983101720Sphk * underlying pages. 984101720Sphk */ 985101720Sphk if (ioflag & IO_DIRECT) 986101720Sphk bp->b_flags |= B_DIRECT; 987101720Sphk 988101720Sphk /* 989101720Sphk * We should only get non-zero b_resid when an I/O error 990101720Sphk * has occurred, which should cause us to break above. 991101720Sphk * However, if the short read did not cause an error, 992101720Sphk * then we want to ensure that we do not uiomove bad 993101720Sphk * or uninitialized data. 994101720Sphk */ 995101720Sphk size -= bp->b_resid; 996101720Sphk if (size < xfersize) { 997101720Sphk if (size == 0) 998101720Sphk break; 999101720Sphk xfersize = size; 1000101720Sphk } 1001101720Sphk 1002101720Sphk error = uiomove((char *)bp->b_data + blkoffset, 1003101720Sphk (int)xfersize, uio); 1004101720Sphk if (error) 1005101720Sphk break; 1006101720Sphk 1007101720Sphk if ((ioflag & (IO_VMIO|IO_DIRECT)) && 1008168353Sdelphij (LIST_EMPTY(&bp->b_dep))) { 1009101720Sphk /* 1010101720Sphk * If there are no dependencies, and it's VMIO, 1011101720Sphk * then we don't need the buf, mark it available 1012221281Skib * for freeing. For non-direct VMIO reads, the VM 1013221261Skib * has the data. 1014101720Sphk */ 1015101720Sphk bp->b_flags |= B_RELBUF; 1016101720Sphk brelse(bp); 1017101720Sphk } else { 1018101720Sphk /* 1019101720Sphk * Otherwise let whoever 1020101720Sphk * made the request take care of 1021101720Sphk * freeing it. We just queue 1022101720Sphk * it onto another list. 1023101720Sphk */ 1024101720Sphk bqrelse(bp); 1025101720Sphk } 1026101720Sphk } 1027101720Sphk 1028175053Sobrien /* 1029101720Sphk * This can only happen in the case of an error 1030101720Sphk * because the loop above resets bp to NULL on each iteration 1031101720Sphk * and on normal completion has not set a new value into it. 1032101720Sphk * so it must have come from a 'break' statement 1033101720Sphk */ 1034101720Sphk if (bp != NULL) { 1035101720Sphk if ((ioflag & (IO_VMIO|IO_DIRECT)) && 1036168353Sdelphij (LIST_EMPTY(&bp->b_dep))) { 1037101720Sphk bp->b_flags |= B_RELBUF; 1038101720Sphk brelse(bp); 1039101720Sphk } else { 1040101720Sphk bqrelse(bp); 1041101720Sphk } 1042101720Sphk } 1043101720Sphk return (error); 1044101720Sphk} 1045101720Sphk 1046101720Sphk/* 1047102991Sphk * Extended attribute area writing. 1048101720Sphk */ 1049101720Sphkstatic int 1050101789Sphkffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *ucred) 1051101720Sphk{ 1052101720Sphk struct inode *ip; 1053101720Sphk struct ufs2_dinode *dp; 1054101720Sphk struct fs *fs; 1055101720Sphk struct buf *bp; 1056101720Sphk ufs_lbn_t lbn; 1057101720Sphk off_t osize; 1058231949Skib ssize_t resid; 1059231949Skib int blkoffset, error, flags, size, xfersize; 1060101720Sphk 1061101720Sphk ip = VTOI(vp); 1062101720Sphk fs = ip->i_fs; 1063101720Sphk dp = ip->i_din2; 1064101720Sphk 1065173464Sobrien#ifdef INVARIANTS 1066101720Sphk if (uio->uio_rw != UIO_WRITE || fs->fs_magic != FS_UFS2_MAGIC) 1067124856Sache panic("ffs_extwrite: mode"); 1068101720Sphk#endif 1069101720Sphk 1070101720Sphk if (ioflag & IO_APPEND) 1071101720Sphk uio->uio_offset = dp->di_extsize; 1072125079Sache KASSERT(uio->uio_offset >= 0, ("ffs_extwrite: uio->uio_offset < 0")); 1073125079Sache KASSERT(uio->uio_resid >= 0, ("ffs_extwrite: uio->uio_resid < 0")); 1074124856Sache if ((uoff_t)uio->uio_offset + uio->uio_resid > NXADDR * fs->fs_bsize) 1075101720Sphk return (EFBIG); 1076101720Sphk 1077101720Sphk resid = uio->uio_resid; 1078101720Sphk osize = dp->di_extsize; 1079101720Sphk flags = IO_EXT; 1080101720Sphk if ((ioflag & IO_SYNC) && !DOINGASYNC(vp)) 1081101720Sphk flags |= IO_SYNC; 1082101720Sphk 1083101720Sphk for (error = 0; uio->uio_resid > 0;) { 1084101720Sphk lbn = lblkno(fs, uio->uio_offset); 1085101720Sphk blkoffset = blkoff(fs, uio->uio_offset); 1086101720Sphk xfersize = fs->fs_bsize - blkoffset; 1087101720Sphk if (uio->uio_resid < xfersize) 1088101720Sphk xfersize = uio->uio_resid; 1089101720Sphk 1090175053Sobrien /* 1091101720Sphk * We must perform a read-before-write if the transfer size 1092101720Sphk * does not cover the entire buffer. 1093262779Spfg */ 1094101720Sphk if (fs->fs_bsize > xfersize) 1095101720Sphk flags |= BA_CLRBUF; 1096101720Sphk else 1097101720Sphk flags &= ~BA_CLRBUF; 1098101720Sphk error = UFS_BALLOC(vp, uio->uio_offset, xfersize, 1099101789Sphk ucred, flags, &bp); 1100101720Sphk if (error != 0) 1101101720Sphk break; 1102101720Sphk /* 1103101720Sphk * If the buffer is not valid we have to clear out any 1104101720Sphk * garbage data from the pages instantiated for the buffer. 1105101720Sphk * If we do not, a failed uiomove() during a write can leave 1106101720Sphk * the prior contents of the pages exposed to a userland 1107101720Sphk * mmap(). XXX deal with uiomove() errors a better way. 1108101720Sphk */ 1109101720Sphk if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize) 1110101720Sphk vfs_bio_clrbuf(bp); 1111101720Sphk if (ioflag & IO_DIRECT) 1112101720Sphk bp->b_flags |= B_DIRECT; 1113101720Sphk 1114101720Sphk if (uio->uio_offset + xfersize > dp->di_extsize) 1115101720Sphk dp->di_extsize = uio->uio_offset + xfersize; 1116101720Sphk 1117101720Sphk size = sblksize(fs, dp->di_extsize, lbn) - bp->b_resid; 1118101720Sphk if (size < xfersize) 1119101720Sphk xfersize = size; 1120101720Sphk 1121101720Sphk error = 1122101720Sphk uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); 1123101720Sphk if ((ioflag & (IO_VMIO|IO_DIRECT)) && 1124168353Sdelphij (LIST_EMPTY(&bp->b_dep))) { 1125101720Sphk bp->b_flags |= B_RELBUF; 1126101720Sphk } 1127101720Sphk 1128101720Sphk /* 1129101720Sphk * If IO_SYNC each buffer is written synchronously. Otherwise 1130175053Sobrien * if we have a severe page deficiency write the buffer 1131101720Sphk * asynchronously. Otherwise try to cluster, and if that 1132101720Sphk * doesn't do it then either do an async write (if O_DIRECT), 1133101720Sphk * or a delayed write (if not). 1134101720Sphk */ 1135101720Sphk if (ioflag & IO_SYNC) { 1136101720Sphk (void)bwrite(bp); 1137101720Sphk } else if (vm_page_count_severe() || 1138101720Sphk buf_dirty_count_severe() || 1139101720Sphk xfersize + blkoffset == fs->fs_bsize || 1140101720Sphk (ioflag & (IO_ASYNC | IO_DIRECT))) 1141101720Sphk bawrite(bp); 1142101720Sphk else 1143101720Sphk bdwrite(bp); 1144101720Sphk if (error || xfersize == 0) 1145101720Sphk break; 1146187790Srwatson ip->i_flag |= IN_CHANGE; 1147101720Sphk } 1148101720Sphk /* 1149101720Sphk * If we successfully wrote any data, and we are not the superuser 1150101720Sphk * we clear the setuid and setgid bits as a precaution against 1151101720Sphk * tampering. 1152101720Sphk */ 1153167151Spjd if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid && ucred) { 1154170587Srwatson if (priv_check_cred(ucred, PRIV_VFS_RETAINSUGID, 0)) { 1155167151Spjd ip->i_mode &= ~(ISUID | ISGID); 1156167151Spjd dp->di_mode = ip->i_mode; 1157167151Spjd } 1158101720Sphk } 1159101720Sphk if (error) { 1160101720Sphk if (ioflag & IO_UNIT) { 1161141526Sphk (void)ffs_truncate(vp, osize, 1162234605Strasz IO_EXT | (ioflag&IO_SYNC), ucred); 1163101720Sphk uio->uio_offset -= resid - uio->uio_resid; 1164101720Sphk uio->uio_resid = resid; 1165101720Sphk } 1166101720Sphk } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) 1167141526Sphk error = ffs_update(vp, 1); 1168101720Sphk return (error); 1169101720Sphk} 1170101780Sphk 1171102090Sphk 1172101780Sphk/* 1173101780Sphk * Vnode operating to retrieve a named extended attribute. 1174102090Sphk * 1175102090Sphk * Locate a particular EA (nspace:name) in the area (ptr:length), and return 1176102090Sphk * the length of the EA, and possibly the pointer to the entry and to the data. 1177101780Sphk */ 1178102090Sphkstatic int 1179118607Sjhbffs_findextattr(u_char *ptr, u_int length, int nspace, const char *name, u_char **eap, u_char **eac) 1180102090Sphk{ 1181102090Sphk u_char *p, *pe, *pn, *p0; 1182102090Sphk int eapad1, eapad2, ealength, ealen, nlen; 1183102090Sphk uint32_t ul; 1184102090Sphk 1185102090Sphk pe = ptr + length; 1186102090Sphk nlen = strlen(name); 1187102090Sphk 1188102090Sphk for (p = ptr; p < pe; p = pn) { 1189102090Sphk p0 = p; 1190102090Sphk bcopy(p, &ul, sizeof(ul)); 1191102090Sphk pn = p + ul; 1192102090Sphk /* make sure this entry is complete */ 1193102090Sphk if (pn > pe) 1194102090Sphk break; 1195102090Sphk p += sizeof(uint32_t); 1196102090Sphk if (*p != nspace) 1197102090Sphk continue; 1198102090Sphk p++; 1199102090Sphk eapad2 = *p++; 1200102090Sphk if (*p != nlen) 1201102090Sphk continue; 1202102090Sphk p++; 1203102090Sphk if (bcmp(p, name, nlen)) 1204102090Sphk continue; 1205102090Sphk ealength = sizeof(uint32_t) + 3 + nlen; 1206102090Sphk eapad1 = 8 - (ealength % 8); 1207102090Sphk if (eapad1 == 8) 1208102090Sphk eapad1 = 0; 1209102090Sphk ealength += eapad1; 1210102090Sphk ealen = ul - ealength - eapad2; 1211102090Sphk p += nlen + eapad1; 1212102090Sphk if (eap != NULL) 1213102090Sphk *eap = p0; 1214102090Sphk if (eac != NULL) 1215102090Sphk *eac = p; 1216102090Sphk return (ealen); 1217102090Sphk } 1218102608Sphk return(-1); 1219102090Sphk} 1220102090Sphk 1221102090Sphkstatic int 1222102090Sphkffs_rdextattr(u_char **p, struct vnode *vp, struct thread *td, int extra) 1223102090Sphk{ 1224102090Sphk struct inode *ip; 1225102090Sphk struct ufs2_dinode *dp; 1226171437Srodrigc struct fs *fs; 1227102090Sphk struct uio luio; 1228102090Sphk struct iovec liovec; 1229259223Spfg u_int easize; 1230259223Spfg int error; 1231102090Sphk u_char *eae; 1232102090Sphk 1233102090Sphk ip = VTOI(vp); 1234171437Srodrigc fs = ip->i_fs; 1235102090Sphk dp = ip->i_din2; 1236102090Sphk easize = dp->di_extsize; 1237171437Srodrigc if ((uoff_t)easize + extra > NXADDR * fs->fs_bsize) 1238171437Srodrigc return (EFBIG); 1239102090Sphk 1240111119Simp eae = malloc(easize + extra, M_TEMP, M_WAITOK); 1241102090Sphk 1242102090Sphk liovec.iov_base = eae; 1243102090Sphk liovec.iov_len = easize; 1244102090Sphk luio.uio_iov = &liovec; 1245102090Sphk luio.uio_iovcnt = 1; 1246102090Sphk luio.uio_offset = 0; 1247102090Sphk luio.uio_resid = easize; 1248102090Sphk luio.uio_segflg = UIO_SYSSPACE; 1249102090Sphk luio.uio_rw = UIO_READ; 1250102090Sphk luio.uio_td = td; 1251102090Sphk 1252102090Sphk error = ffs_extread(vp, &luio, IO_EXT | IO_SYNC); 1253102090Sphk if (error) { 1254102090Sphk free(eae, M_TEMP); 1255102090Sphk return(error); 1256102090Sphk } 1257102090Sphk *p = eae; 1258102090Sphk return (0); 1259102090Sphk} 1260102090Sphk 1261189737Skibstatic void 1262189737Skibffs_lock_ea(struct vnode *vp) 1263189737Skib{ 1264189737Skib struct inode *ip; 1265189737Skib 1266189737Skib ip = VTOI(vp); 1267189737Skib VI_LOCK(vp); 1268189737Skib while (ip->i_flag & IN_EA_LOCKED) { 1269189737Skib ip->i_flag |= IN_EA_LOCKWAIT; 1270189737Skib msleep(&ip->i_ea_refs, &vp->v_interlock, PINOD + 2, "ufs_ea", 1271189737Skib 0); 1272189737Skib } 1273189737Skib ip->i_flag |= IN_EA_LOCKED; 1274189737Skib VI_UNLOCK(vp); 1275189737Skib} 1276189737Skib 1277189737Skibstatic void 1278189737Skibffs_unlock_ea(struct vnode *vp) 1279189737Skib{ 1280189737Skib struct inode *ip; 1281189737Skib 1282189737Skib ip = VTOI(vp); 1283189737Skib VI_LOCK(vp); 1284189737Skib if (ip->i_flag & IN_EA_LOCKWAIT) 1285189737Skib wakeup(&ip->i_ea_refs); 1286189737Skib ip->i_flag &= ~(IN_EA_LOCKED | IN_EA_LOCKWAIT); 1287189737Skib VI_UNLOCK(vp); 1288189737Skib} 1289189737Skib 1290102991Sphkstatic int 1291102991Sphkffs_open_ea(struct vnode *vp, struct ucred *cred, struct thread *td) 1292102991Sphk{ 1293102991Sphk struct inode *ip; 1294102991Sphk struct ufs2_dinode *dp; 1295102991Sphk int error; 1296102991Sphk 1297102991Sphk ip = VTOI(vp); 1298102991Sphk 1299189737Skib ffs_lock_ea(vp); 1300189737Skib if (ip->i_ea_area != NULL) { 1301189737Skib ip->i_ea_refs++; 1302189737Skib ffs_unlock_ea(vp); 1303189737Skib return (0); 1304189737Skib } 1305102991Sphk dp = ip->i_din2; 1306102991Sphk error = ffs_rdextattr(&ip->i_ea_area, vp, td, 0); 1307189737Skib if (error) { 1308189737Skib ffs_unlock_ea(vp); 1309102991Sphk return (error); 1310189737Skib } 1311102991Sphk ip->i_ea_len = dp->di_extsize; 1312102991Sphk ip->i_ea_error = 0; 1313189737Skib ip->i_ea_refs++; 1314189737Skib ffs_unlock_ea(vp); 1315102991Sphk return (0); 1316102991Sphk} 1317102991Sphk 1318102090Sphk/* 1319102991Sphk * Vnode extattr transaction commit/abort 1320102090Sphk */ 1321102991Sphkstatic int 1322102991Sphkffs_close_ea(struct vnode *vp, int commit, struct ucred *cred, struct thread *td) 1323102991Sphk{ 1324102991Sphk struct inode *ip; 1325102991Sphk struct uio luio; 1326102991Sphk struct iovec liovec; 1327102991Sphk int error; 1328102991Sphk struct ufs2_dinode *dp; 1329102991Sphk 1330102991Sphk ip = VTOI(vp); 1331189737Skib 1332189737Skib ffs_lock_ea(vp); 1333189737Skib if (ip->i_ea_area == NULL) { 1334189737Skib ffs_unlock_ea(vp); 1335102991Sphk return (EINVAL); 1336189737Skib } 1337102991Sphk dp = ip->i_din2; 1338102991Sphk error = ip->i_ea_error; 1339102991Sphk if (commit && error == 0) { 1340189737Skib ASSERT_VOP_ELOCKED(vp, "ffs_close_ea commit"); 1341104051Sphk if (cred == NOCRED) 1342104051Sphk cred = vp->v_mount->mnt_cred; 1343102991Sphk liovec.iov_base = ip->i_ea_area; 1344102991Sphk liovec.iov_len = ip->i_ea_len; 1345102991Sphk luio.uio_iov = &liovec; 1346102991Sphk luio.uio_iovcnt = 1; 1347102991Sphk luio.uio_offset = 0; 1348102991Sphk luio.uio_resid = ip->i_ea_len; 1349102991Sphk luio.uio_segflg = UIO_SYSSPACE; 1350102991Sphk luio.uio_rw = UIO_WRITE; 1351102991Sphk luio.uio_td = td; 1352102991Sphk /* XXX: I'm not happy about truncating to zero size */ 1353102991Sphk if (ip->i_ea_len < dp->di_extsize) 1354234605Strasz error = ffs_truncate(vp, 0, IO_EXT, cred); 1355102991Sphk error = ffs_extwrite(vp, &luio, IO_EXT | IO_SYNC, cred); 1356102991Sphk } 1357189737Skib if (--ip->i_ea_refs == 0) { 1358189737Skib free(ip->i_ea_area, M_TEMP); 1359189737Skib ip->i_ea_area = NULL; 1360189737Skib ip->i_ea_len = 0; 1361189737Skib ip->i_ea_error = 0; 1362189737Skib } 1363189737Skib ffs_unlock_ea(vp); 1364102991Sphk return (error); 1365102991Sphk} 1366102991Sphk 1367102991Sphk/* 1368137035Sphk * Vnode extattr strategy routine for fifos. 1369105136Smckusick * 1370105136Smckusick * We need to check for a read or write of the external attributes. 1371105136Smckusick * Otherwise we just fall through and do the usual thing. 1372105136Smckusick */ 1373105136Smckusickstatic int 1374105136Smckusickffsext_strategy(struct vop_strategy_args *ap) 1375105136Smckusick/* 1376105136Smckusickstruct vop_strategy_args { 1377105136Smckusick struct vnodeop_desc *a_desc; 1378105136Smckusick struct vnode *a_vp; 1379105136Smckusick struct buf *a_bp; 1380105136Smckusick}; 1381105136Smckusick*/ 1382105136Smckusick{ 1383105136Smckusick struct vnode *vp; 1384105136Smckusick daddr_t lbn; 1385105136Smckusick 1386105136Smckusick vp = ap->a_vp; 1387105136Smckusick lbn = ap->a_bp->b_lblkno; 1388105136Smckusick if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC && 1389105136Smckusick lbn < 0 && lbn >= -NXADDR) 1390141520Sphk return (VOP_STRATEGY_APV(&ufs_vnodeops, ap)); 1391105136Smckusick if (vp->v_type == VFIFO) 1392141520Sphk return (VOP_STRATEGY_APV(&ufs_fifoops, ap)); 1393135877Sphk panic("spec nodes went here"); 1394105136Smckusick} 1395105136Smckusick 1396105136Smckusick/* 1397102991Sphk * Vnode extattr transaction commit/abort 1398102991Sphk */ 1399104094Sphkstatic int 1400102991Sphkffs_openextattr(struct vop_openextattr_args *ap) 1401102991Sphk/* 1402102991Sphkstruct vop_openextattr_args { 1403102991Sphk struct vnodeop_desc *a_desc; 1404102991Sphk struct vnode *a_vp; 1405102991Sphk IN struct ucred *a_cred; 1406102991Sphk IN struct thread *a_td; 1407102991Sphk}; 1408102991Sphk*/ 1409102991Sphk{ 1410102991Sphk 1411195265Strasz if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) 1412115588Srwatson return (EOPNOTSUPP); 1413115588Srwatson 1414102991Sphk return (ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td)); 1415102991Sphk} 1416102991Sphk 1417102991Sphk 1418102991Sphk/* 1419102991Sphk * Vnode extattr transaction commit/abort 1420102991Sphk */ 1421104094Sphkstatic int 1422102991Sphkffs_closeextattr(struct vop_closeextattr_args *ap) 1423102991Sphk/* 1424102991Sphkstruct vop_closeextattr_args { 1425102991Sphk struct vnodeop_desc *a_desc; 1426102991Sphk struct vnode *a_vp; 1427102991Sphk int a_commit; 1428102991Sphk IN struct ucred *a_cred; 1429102991Sphk IN struct thread *a_td; 1430102991Sphk}; 1431102991Sphk*/ 1432102991Sphk{ 1433102991Sphk 1434195265Strasz if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) 1435115588Srwatson return (EOPNOTSUPP); 1436115588Srwatson 1437166864Smckusick if (ap->a_commit && (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)) 1438166864Smckusick return (EROFS); 1439166864Smckusick 1440102991Sphk return (ffs_close_ea(ap->a_vp, ap->a_commit, ap->a_cred, ap->a_td)); 1441102991Sphk} 1442102991Sphk 1443118131Srwatson/* 1444118131Srwatson * Vnode operation to remove a named attribute. 1445118131Srwatson */ 1446118131Srwatsonstatic int 1447118131Srwatsonffs_deleteextattr(struct vop_deleteextattr_args *ap) 1448118131Srwatson/* 1449118131Srwatsonvop_deleteextattr { 1450118131Srwatson IN struct vnode *a_vp; 1451118131Srwatson IN int a_attrnamespace; 1452118131Srwatson IN const char *a_name; 1453118131Srwatson IN struct ucred *a_cred; 1454118131Srwatson IN struct thread *a_td; 1455118131Srwatson}; 1456118131Srwatson*/ 1457118131Srwatson{ 1458118131Srwatson struct inode *ip; 1459118131Srwatson struct fs *fs; 1460118131Srwatson uint32_t ealength, ul; 1461118131Srwatson int ealen, olen, eapad1, eapad2, error, i, easize; 1462118131Srwatson u_char *eae, *p; 1463102991Sphk 1464118131Srwatson ip = VTOI(ap->a_vp); 1465118131Srwatson fs = ip->i_fs; 1466102991Sphk 1467195265Strasz if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) 1468118131Srwatson return (EOPNOTSUPP); 1469118131Srwatson 1470118131Srwatson if (strlen(ap->a_name) == 0) 1471118131Srwatson return (EINVAL); 1472118131Srwatson 1473166864Smckusick if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) 1474166864Smckusick return (EROFS); 1475166864Smckusick 1476118131Srwatson error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1477182721Strasz ap->a_cred, ap->a_td, VWRITE); 1478118131Srwatson if (error) { 1479189737Skib 1480189737Skib /* 1481189737Skib * ffs_lock_ea is not needed there, because the vnode 1482190469Skib * must be exclusively locked. 1483189737Skib */ 1484118131Srwatson if (ip->i_ea_area != NULL && ip->i_ea_error == 0) 1485118131Srwatson ip->i_ea_error = error; 1486118131Srwatson return (error); 1487118131Srwatson } 1488118131Srwatson 1489189737Skib error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1490189737Skib if (error) 1491189737Skib return (error); 1492118131Srwatson 1493118131Srwatson ealength = eapad1 = ealen = eapad2 = 0; 1494118131Srwatson 1495118131Srwatson eae = malloc(ip->i_ea_len, M_TEMP, M_WAITOK); 1496118131Srwatson bcopy(ip->i_ea_area, eae, ip->i_ea_len); 1497118131Srwatson easize = ip->i_ea_len; 1498118131Srwatson 1499118131Srwatson olen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name, 1500118131Srwatson &p, NULL); 1501118131Srwatson if (olen == -1) { 1502118131Srwatson /* delete but nonexistent */ 1503118131Srwatson free(eae, M_TEMP); 1504189737Skib ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1505118131Srwatson return(ENOATTR); 1506118131Srwatson } 1507118131Srwatson bcopy(p, &ul, sizeof ul); 1508118131Srwatson i = p - eae + ul; 1509118131Srwatson if (ul != ealength) { 1510118131Srwatson bcopy(p + ul, p + ealength, easize - i); 1511118131Srwatson easize += (ealength - ul); 1512118131Srwatson } 1513118131Srwatson if (easize > NXADDR * fs->fs_bsize) { 1514118131Srwatson free(eae, M_TEMP); 1515189737Skib ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1516189737Skib if (ip->i_ea_area != NULL && ip->i_ea_error == 0) 1517118131Srwatson ip->i_ea_error = ENOSPC; 1518118131Srwatson return(ENOSPC); 1519118131Srwatson } 1520118131Srwatson p = ip->i_ea_area; 1521118131Srwatson ip->i_ea_area = eae; 1522118131Srwatson ip->i_ea_len = easize; 1523118131Srwatson free(p, M_TEMP); 1524189737Skib error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td); 1525118131Srwatson return(error); 1526118131Srwatson} 1527118131Srwatson 1528102991Sphk/* 1529102991Sphk * Vnode operation to retrieve a named extended attribute. 1530102991Sphk */ 1531104094Sphkstatic int 1532101780Sphkffs_getextattr(struct vop_getextattr_args *ap) 1533101780Sphk/* 1534101780Sphkvop_getextattr { 1535101780Sphk IN struct vnode *a_vp; 1536101780Sphk IN int a_attrnamespace; 1537101780Sphk IN const char *a_name; 1538101780Sphk INOUT struct uio *a_uio; 1539104346Sdd OUT size_t *a_size; 1540101780Sphk IN struct ucred *a_cred; 1541101780Sphk IN struct thread *a_td; 1542101780Sphk}; 1543101780Sphk*/ 1544101780Sphk{ 1545102090Sphk struct inode *ip; 1546115869Srwatson u_char *eae, *p; 1547115869Srwatson unsigned easize; 1548189737Skib int error, ealen; 1549115869Srwatson 1550115869Srwatson ip = VTOI(ap->a_vp); 1551115869Srwatson 1552195265Strasz if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) 1553115869Srwatson return (EOPNOTSUPP); 1554115869Srwatson 1555115869Srwatson error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1556182721Strasz ap->a_cred, ap->a_td, VREAD); 1557115869Srwatson if (error) 1558115869Srwatson return (error); 1559115869Srwatson 1560189737Skib error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1561189737Skib if (error) 1562189737Skib return (error); 1563189737Skib 1564115869Srwatson eae = ip->i_ea_area; 1565115869Srwatson easize = ip->i_ea_len; 1566115869Srwatson 1567115869Srwatson ealen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name, 1568115869Srwatson NULL, &p); 1569115869Srwatson if (ealen >= 0) { 1570115869Srwatson error = 0; 1571115869Srwatson if (ap->a_size != NULL) 1572115869Srwatson *ap->a_size = ealen; 1573115869Srwatson else if (ap->a_uio != NULL) 1574115869Srwatson error = uiomove(p, ealen, ap->a_uio); 1575115869Srwatson } else 1576115869Srwatson error = ENOATTR; 1577189737Skib 1578189737Skib ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1579115869Srwatson return(error); 1580115869Srwatson} 1581115869Srwatson 1582115869Srwatson/* 1583115869Srwatson * Vnode operation to retrieve extended attributes on a vnode. 1584115869Srwatson */ 1585115869Srwatsonstatic int 1586115869Srwatsonffs_listextattr(struct vop_listextattr_args *ap) 1587115869Srwatson/* 1588115869Srwatsonvop_listextattr { 1589115869Srwatson IN struct vnode *a_vp; 1590115869Srwatson IN int a_attrnamespace; 1591115869Srwatson INOUT struct uio *a_uio; 1592115869Srwatson OUT size_t *a_size; 1593115869Srwatson IN struct ucred *a_cred; 1594115869Srwatson IN struct thread *a_td; 1595115869Srwatson}; 1596115869Srwatson*/ 1597115869Srwatson{ 1598115869Srwatson struct inode *ip; 1599102175Sphk u_char *eae, *p, *pe, *pn; 1600102090Sphk unsigned easize; 1601102175Sphk uint32_t ul; 1602189737Skib int error, ealen; 1603101780Sphk 1604102090Sphk ip = VTOI(ap->a_vp); 1605102090Sphk 1606195265Strasz if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) 1607115588Srwatson return (EOPNOTSUPP); 1608115588Srwatson 1609102991Sphk error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1610182721Strasz ap->a_cred, ap->a_td, VREAD); 1611102090Sphk if (error) 1612102090Sphk return (error); 1613102991Sphk 1614189737Skib error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1615189737Skib if (error) 1616189737Skib return (error); 1617102991Sphk eae = ip->i_ea_area; 1618102991Sphk easize = ip->i_ea_len; 1619115869Srwatson 1620115869Srwatson error = 0; 1621115869Srwatson if (ap->a_size != NULL) 1622115869Srwatson *ap->a_size = 0; 1623115869Srwatson pe = eae + easize; 1624115869Srwatson for(p = eae; error == 0 && p < pe; p = pn) { 1625115869Srwatson bcopy(p, &ul, sizeof(ul)); 1626115869Srwatson pn = p + ul; 1627115869Srwatson if (pn > pe) 1628115869Srwatson break; 1629115869Srwatson p += sizeof(ul); 1630115869Srwatson if (*p++ != ap->a_attrnamespace) 1631115869Srwatson continue; 1632115869Srwatson p++; /* pad2 */ 1633115869Srwatson ealen = *p; 1634115869Srwatson if (ap->a_size != NULL) { 1635115869Srwatson *ap->a_size += ealen + 1; 1636115869Srwatson } else if (ap->a_uio != NULL) { 1637115869Srwatson error = uiomove(p, ealen + 1, ap->a_uio); 1638102175Sphk } 1639102090Sphk } 1640189737Skib ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1641102090Sphk return(error); 1642101780Sphk} 1643101780Sphk 1644101780Sphk/* 1645101780Sphk * Vnode operation to set a named attribute. 1646101780Sphk */ 1647104094Sphkstatic int 1648101780Sphkffs_setextattr(struct vop_setextattr_args *ap) 1649101780Sphk/* 1650101780Sphkvop_setextattr { 1651101780Sphk IN struct vnode *a_vp; 1652101780Sphk IN int a_attrnamespace; 1653101780Sphk IN const char *a_name; 1654101780Sphk INOUT struct uio *a_uio; 1655101780Sphk IN struct ucred *a_cred; 1656101780Sphk IN struct thread *a_td; 1657101780Sphk}; 1658101780Sphk*/ 1659101780Sphk{ 1660102090Sphk struct inode *ip; 1661102090Sphk struct fs *fs; 1662102090Sphk uint32_t ealength, ul; 1663237366Skib ssize_t ealen; 1664237366Skib int olen, eapad1, eapad2, error, i, easize; 1665102090Sphk u_char *eae, *p; 1666101780Sphk 1667102090Sphk ip = VTOI(ap->a_vp); 1668102090Sphk fs = ip->i_fs; 1669102090Sphk 1670195265Strasz if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) 1671115588Srwatson return (EOPNOTSUPP); 1672115588Srwatson 1673115869Srwatson if (strlen(ap->a_name) == 0) 1674115869Srwatson return (EINVAL); 1675115869Srwatson 1676118131Srwatson /* XXX Now unsupported API to delete EAs using NULL uio. */ 1677118131Srwatson if (ap->a_uio == NULL) 1678118131Srwatson return (EOPNOTSUPP); 1679118131Srwatson 1680166864Smckusick if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) 1681166864Smckusick return (EROFS); 1682166864Smckusick 1683237366Skib ealen = ap->a_uio->uio_resid; 1684237366Skib if (ealen < 0 || ealen > lblktosize(fs, NXADDR)) 1685237366Skib return (EINVAL); 1686237366Skib 1687102991Sphk error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1688182721Strasz ap->a_cred, ap->a_td, VWRITE); 1689102991Sphk if (error) { 1690189737Skib 1691189737Skib /* 1692189737Skib * ffs_lock_ea is not needed there, because the vnode 1693190469Skib * must be exclusively locked. 1694189737Skib */ 1695102991Sphk if (ip->i_ea_area != NULL && ip->i_ea_error == 0) 1696102991Sphk ip->i_ea_error = error; 1697102991Sphk return (error); 1698102991Sphk } 1699102991Sphk 1700189737Skib error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1701189737Skib if (error) 1702189737Skib return (error); 1703102991Sphk 1704118131Srwatson ealength = sizeof(uint32_t) + 3 + strlen(ap->a_name); 1705118131Srwatson eapad1 = 8 - (ealength % 8); 1706118131Srwatson if (eapad1 == 8) 1707118131Srwatson eapad1 = 0; 1708118131Srwatson eapad2 = 8 - (ealen % 8); 1709118131Srwatson if (eapad2 == 8) 1710118131Srwatson eapad2 = 0; 1711118131Srwatson ealength += eapad1 + ealen + eapad2; 1712102090Sphk 1713111119Simp eae = malloc(ip->i_ea_len + ealength, M_TEMP, M_WAITOK); 1714102991Sphk bcopy(ip->i_ea_area, eae, ip->i_ea_len); 1715102991Sphk easize = ip->i_ea_len; 1716102090Sphk 1717102608Sphk olen = ffs_findextattr(eae, easize, 1718102090Sphk ap->a_attrnamespace, ap->a_name, &p, NULL); 1719102991Sphk if (olen == -1) { 1720102090Sphk /* new, append at end */ 1721102090Sphk p = eae + easize; 1722102090Sphk easize += ealength; 1723102608Sphk } else { 1724102090Sphk bcopy(p, &ul, sizeof ul); 1725102090Sphk i = p - eae + ul; 1726102608Sphk if (ul != ealength) { 1727102608Sphk bcopy(p + ul, p + ealength, easize - i); 1728102608Sphk easize += (ealength - ul); 1729102608Sphk } 1730102090Sphk } 1731237366Skib if (easize > lblktosize(fs, NXADDR)) { 1732102090Sphk free(eae, M_TEMP); 1733189737Skib ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1734189737Skib if (ip->i_ea_area != NULL && ip->i_ea_error == 0) 1735102991Sphk ip->i_ea_error = ENOSPC; 1736102090Sphk return(ENOSPC); 1737102090Sphk } 1738118131Srwatson bcopy(&ealength, p, sizeof(ealength)); 1739118131Srwatson p += sizeof(ealength); 1740118131Srwatson *p++ = ap->a_attrnamespace; 1741118131Srwatson *p++ = eapad2; 1742118131Srwatson *p++ = strlen(ap->a_name); 1743118131Srwatson strcpy(p, ap->a_name); 1744118131Srwatson p += strlen(ap->a_name); 1745118131Srwatson bzero(p, eapad1); 1746118131Srwatson p += eapad1; 1747118131Srwatson error = uiomove(p, ealen, ap->a_uio); 1748118131Srwatson if (error) { 1749118131Srwatson free(eae, M_TEMP); 1750189737Skib ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1751189737Skib if (ip->i_ea_area != NULL && ip->i_ea_error == 0) 1752118131Srwatson ip->i_ea_error = error; 1753118131Srwatson return(error); 1754102090Sphk } 1755118131Srwatson p += ealen; 1756118131Srwatson bzero(p, eapad2); 1757118131Srwatson 1758102991Sphk p = ip->i_ea_area; 1759102991Sphk ip->i_ea_area = eae; 1760102991Sphk ip->i_ea_len = easize; 1761102991Sphk free(p, M_TEMP); 1762189737Skib error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td); 1763102090Sphk return(error); 1764101780Sphk} 1765166774Spjd 1766166774Spjd/* 1767166774Spjd * Vnode pointer to File handle 1768166774Spjd */ 1769166774Spjdstatic int 1770166774Spjdffs_vptofh(struct vop_vptofh_args *ap) 1771166774Spjd/* 1772166774Spjdvop_vptofh { 1773166774Spjd IN struct vnode *a_vp; 1774166774Spjd IN struct fid *a_fhp; 1775166774Spjd}; 1776166774Spjd*/ 1777166774Spjd{ 1778166774Spjd struct inode *ip; 1779166774Spjd struct ufid *ufhp; 1780166774Spjd 1781166774Spjd ip = VTOI(ap->a_vp); 1782166774Spjd ufhp = (struct ufid *)ap->a_fhp; 1783166774Spjd ufhp->ufid_len = sizeof(struct ufid); 1784166774Spjd ufhp->ufid_ino = ip->i_number; 1785166774Spjd ufhp->ufid_gen = ip->i_gen; 1786166774Spjd return (0); 1787166774Spjd} 1788