nfs_bio.c revision 45347
1/* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Rick Macklem at The University of Guelph. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95 37 * $Id: nfs_bio.c,v 1.67 1999/03/12 02:24:58 julian Exp $ 38 */ 39 40 41#include <sys/param.h> 42#include <sys/systm.h> 43#include <sys/resourcevar.h> 44#include <sys/signalvar.h> 45#include <sys/proc.h> 46#include <sys/buf.h> 47#include <sys/vnode.h> 48#include <sys/mount.h> 49#include <sys/kernel.h> 50 51#include <vm/vm.h> 52#include <vm/vm_extern.h> 53#include <vm/vm_prot.h> 54#include <vm/vm_page.h> 55#include <vm/vm_object.h> 56#include <vm/vm_pager.h> 57#include <vm/vnode_pager.h> 58 59#include <nfs/rpcv2.h> 60#include <nfs/nfsproto.h> 61#include <nfs/nfs.h> 62#include <nfs/nfsmount.h> 63#include <nfs/nqnfs.h> 64#include <nfs/nfsnode.h> 65 66static struct buf *nfs_getcacheblk __P((struct vnode *vp, daddr_t bn, int size, 67 struct proc *p)); 68static void nfs_prot_buf __P((struct buf *bp, int off, int n)); 69 70extern int nfs_numasync; 71extern int nfs_pbuf_freecnt; 72extern struct nfsstats nfsstats; 73 74/* 75 * Vnode op for VM getpages. 76 */ 77int 78nfs_getpages(ap) 79 struct vop_getpages_args /* { 80 struct vnode *a_vp; 81 vm_page_t *a_m; 82 int a_count; 83 int a_reqpage; 84 vm_ooffset_t a_offset; 85 } */ *ap; 86{ 87 int i, error, nextoff, size, toff, npages, count; 88 struct uio uio; 89 struct iovec iov; 90 vm_offset_t kva; 91 struct buf *bp; 92 struct vnode *vp; 93 struct proc *p; 94 struct ucred *cred; 95 struct nfsmount *nmp; 96 vm_page_t *pages; 97 98 vp = ap->a_vp; 99 p = curproc; /* XXX */ 100 cred = curproc->p_ucred; /* XXX */ 101 nmp = VFSTONFS(vp->v_mount); 102 pages = ap->a_m; 103 count = ap->a_count; 104 105 if (vp->v_object == NULL) { 106 printf("nfs_getpages: called with non-merged cache vnode??\n"); 107 return VM_PAGER_ERROR; 108 } 109 110 if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 && 111 (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) 112 (void)nfs_fsinfo(nmp, vp, cred, p); 113 /* 114 * We use only the kva address for the buffer, but this is extremely 115 * convienient and fast. 116 */ 117 bp = getpbuf(&nfs_pbuf_freecnt); 118 119 npages = btoc(count); 120 kva = (vm_offset_t) bp->b_data; 121 pmap_qenter(kva, pages, npages); 122 123 iov.iov_base = (caddr_t) kva; 124 iov.iov_len = count; 125 uio.uio_iov = &iov; 126 uio.uio_iovcnt = 1; 127 uio.uio_offset = IDX_TO_OFF(pages[0]->pindex); 128 uio.uio_resid = count; 129 uio.uio_segflg = UIO_SYSSPACE; 130 uio.uio_rw = UIO_READ; 131 uio.uio_procp = p; 132 133 error = nfs_readrpc(vp, &uio, cred); 134 pmap_qremove(kva, npages); 135 136 relpbuf(bp, &nfs_pbuf_freecnt); 137 138 if (error && (uio.uio_resid == count)) { 139 printf("nfs_getpages: error %d\n", error); 140 for (i = 0; i < npages; ++i) { 141 if (i != ap->a_reqpage) 142 vnode_pager_freepage(pages[i]); 143 } 144 return VM_PAGER_ERROR; 145 } 146 147 /* 148 * Calculate the number of bytes read and validate only that number 149 * of bytes. Note that due to pending writes, size may be 0. This 150 * does not mean that the remaining data is invalid! 151 */ 152 153 size = count - uio.uio_resid; 154 155 for (i = 0, toff = 0; i < npages; i++, toff = nextoff) { 156 vm_page_t m; 157 nextoff = toff + PAGE_SIZE; 158 m = pages[i]; 159 160 m->flags &= ~PG_ZERO; 161 162 if (nextoff <= size) { 163 /* 164 * Read operation filled an entire page 165 */ 166 m->valid = VM_PAGE_BITS_ALL; 167 m->dirty = 0; 168 } else if (size > toff) { 169 /* 170 * Read operation filled a partial page, set valid 171 * bits properly. validclean will zero out 172 * any cruft in the buffer when setting a valid bit, 173 * if the size is not DEV_BSIZE aligned. 174 */ 175 vm_page_set_validclean(m, 0, size - toff); 176 } 177 178 if (i != ap->a_reqpage) { 179 /* 180 * Whether or not to leave the page activated is up in 181 * the air, but we should put the page on a page queue 182 * somewhere (it already is in the object). Result: 183 * It appears that emperical results show that 184 * deactivating pages is best. 185 */ 186 187 /* 188 * Just in case someone was asking for this page we 189 * now tell them that it is ok to use. 190 */ 191 if (!error) { 192 if (m->flags & PG_WANTED) 193 vm_page_activate(m); 194 else 195 vm_page_deactivate(m); 196 vm_page_wakeup(m); 197 } else { 198 vnode_pager_freepage(m); 199 } 200 } else { 201 /* 202 * This page is being mapped, clear out any other 203 * cruft in the invalid areas of the page. 204 */ 205 if (m->valid && m->valid != VM_PAGE_BITS_ALL) 206 vm_page_zero_invalid(m, FALSE); 207 } 208 } 209 return 0; 210} 211 212/* 213 * Vnode op for VM putpages. 214 */ 215int 216nfs_putpages(ap) 217 struct vop_putpages_args /* { 218 struct vnode *a_vp; 219 vm_page_t *a_m; 220 int a_count; 221 int a_sync; 222 int *a_rtvals; 223 vm_ooffset_t a_offset; 224 } */ *ap; 225{ 226 struct uio uio; 227 struct iovec iov; 228 vm_offset_t kva; 229 struct buf *bp; 230 int iomode, must_commit, i, error, npages, count; 231 int *rtvals; 232 struct vnode *vp; 233 struct proc *p; 234 struct ucred *cred; 235 struct nfsmount *nmp; 236 vm_page_t *pages; 237 238 vp = ap->a_vp; 239 p = curproc; /* XXX */ 240 cred = curproc->p_ucred; /* XXX */ 241 nmp = VFSTONFS(vp->v_mount); 242 pages = ap->a_m; 243 count = ap->a_count; 244 rtvals = ap->a_rtvals; 245 npages = btoc(count); 246 247 if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 && 248 (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) 249 (void)nfs_fsinfo(nmp, vp, cred, p); 250 251 for (i = 0; i < npages; i++) { 252 rtvals[i] = VM_PAGER_AGAIN; 253 } 254 255 /* 256 * We use only the kva address for the buffer, but this is extremely 257 * convienient and fast. 258 */ 259 bp = getpbuf(&nfs_pbuf_freecnt); 260 261 kva = (vm_offset_t) bp->b_data; 262 pmap_qenter(kva, pages, npages); 263 264 iov.iov_base = (caddr_t) kva; 265 iov.iov_len = count; 266 uio.uio_iov = &iov; 267 uio.uio_iovcnt = 1; 268 uio.uio_offset = IDX_TO_OFF(pages[0]->pindex); 269 uio.uio_resid = count; 270 uio.uio_segflg = UIO_SYSSPACE; 271 uio.uio_rw = UIO_WRITE; 272 uio.uio_procp = p; 273 274 if ((ap->a_sync & VM_PAGER_PUT_SYNC) == 0) 275 iomode = NFSV3WRITE_UNSTABLE; 276 else 277 iomode = NFSV3WRITE_FILESYNC; 278 279 error = nfs_writerpc(vp, &uio, cred, &iomode, &must_commit); 280 281 pmap_qremove(kva, npages); 282 relpbuf(bp, &nfs_pbuf_freecnt); 283 284 if (!error) { 285 int nwritten = round_page(count - uio.uio_resid) / PAGE_SIZE; 286 for (i = 0; i < nwritten; i++) { 287 rtvals[i] = VM_PAGER_OK; 288 pages[i]->dirty = 0; 289 } 290 if (must_commit) 291 nfs_clearcommit(vp->v_mount); 292 } 293 return rtvals[0]; 294} 295 296/* 297 * Vnode op for read using bio 298 */ 299int 300nfs_bioread(vp, uio, ioflag, cred, getpages) 301 register struct vnode *vp; 302 register struct uio *uio; 303 int ioflag; 304 struct ucred *cred; 305 int getpages; 306{ 307 register struct nfsnode *np = VTONFS(vp); 308 register int biosize, i; 309 off_t diff; 310 struct buf *bp = 0, *rabp; 311 struct vattr vattr; 312 struct proc *p; 313 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 314 daddr_t lbn, rabn; 315 int bufsize; 316 int nra, error = 0, n = 0, on = 0, not_readin; 317 318#ifdef DIAGNOSTIC 319 if (uio->uio_rw != UIO_READ) 320 panic("nfs_read mode"); 321#endif 322 if (uio->uio_resid == 0) 323 return (0); 324 if (uio->uio_offset < 0) /* XXX VDIR cookies can be negative */ 325 return (EINVAL); 326 p = uio->uio_procp; 327 if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 && 328 (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) 329 (void)nfs_fsinfo(nmp, vp, cred, p); 330 if (vp->v_type != VDIR && 331 (uio->uio_offset + uio->uio_resid) > nmp->nm_maxfilesize) 332 return (EFBIG); 333 biosize = vp->v_mount->mnt_stat.f_iosize; 334 /* 335 * For nfs, cache consistency can only be maintained approximately. 336 * Although RFC1094 does not specify the criteria, the following is 337 * believed to be compatible with the reference port. 338 * For nqnfs, full cache consistency is maintained within the loop. 339 * For nfs: 340 * If the file's modify time on the server has changed since the 341 * last read rpc or you have written to the file, 342 * you may have lost data cache consistency with the 343 * server, so flush all of the file's data out of the cache. 344 * Then force a getattr rpc to ensure that you have up to date 345 * attributes. 346 * NB: This implies that cache data can be read when up to 347 * NFS_ATTRTIMEO seconds out of date. If you find that you need current 348 * attributes this could be forced by setting n_attrstamp to 0 before 349 * the VOP_GETATTR() call. 350 */ 351 if ((nmp->nm_flag & NFSMNT_NQNFS) == 0) { 352 if (np->n_flag & NMODIFIED) { 353 if (vp->v_type != VREG) { 354 if (vp->v_type != VDIR) 355 panic("nfs: bioread, not dir"); 356 nfs_invaldir(vp); 357 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 358 if (error) 359 return (error); 360 } 361 np->n_attrstamp = 0; 362 error = VOP_GETATTR(vp, &vattr, cred, p); 363 if (error) 364 return (error); 365 np->n_mtime = vattr.va_mtime.tv_sec; 366 } else { 367 error = VOP_GETATTR(vp, &vattr, cred, p); 368 if (error) 369 return (error); 370 if (np->n_mtime != vattr.va_mtime.tv_sec) { 371 if (vp->v_type == VDIR) 372 nfs_invaldir(vp); 373 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 374 if (error) 375 return (error); 376 np->n_mtime = vattr.va_mtime.tv_sec; 377 } 378 } 379 } 380 do { 381 382 /* 383 * Get a valid lease. If cached data is stale, flush it. 384 */ 385 if (nmp->nm_flag & NFSMNT_NQNFS) { 386 if (NQNFS_CKINVALID(vp, np, ND_READ)) { 387 do { 388 error = nqnfs_getlease(vp, ND_READ, cred, p); 389 } while (error == NQNFS_EXPIRED); 390 if (error) 391 return (error); 392 if (np->n_lrev != np->n_brev || 393 (np->n_flag & NQNFSNONCACHE) || 394 ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { 395 if (vp->v_type == VDIR) 396 nfs_invaldir(vp); 397 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 398 if (error) 399 return (error); 400 np->n_brev = np->n_lrev; 401 } 402 } else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) { 403 nfs_invaldir(vp); 404 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 405 if (error) 406 return (error); 407 } 408 } 409 if (np->n_flag & NQNFSNONCACHE) { 410 switch (vp->v_type) { 411 case VREG: 412 return (nfs_readrpc(vp, uio, cred)); 413 case VLNK: 414 return (nfs_readlinkrpc(vp, uio, cred)); 415 case VDIR: 416 break; 417 default: 418 printf(" NQNFSNONCACHE: type %x unexpected\n", 419 vp->v_type); 420 }; 421 } 422 switch (vp->v_type) { 423 case VREG: 424 nfsstats.biocache_reads++; 425 lbn = uio->uio_offset / biosize; 426 on = uio->uio_offset & (biosize - 1); 427 not_readin = 1; 428 429 /* 430 * Start the read ahead(s), as required. 431 */ 432 if (nfs_numasync > 0 && nmp->nm_readahead > 0) { 433 for (nra = 0; nra < nmp->nm_readahead && 434 (off_t)(lbn + 1 + nra) * biosize < np->n_size; nra++) { 435 rabn = lbn + 1 + nra; 436 if (!incore(vp, rabn)) { 437 rabp = nfs_getcacheblk(vp, rabn, biosize, p); 438 if (!rabp) 439 return (EINTR); 440 if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) { 441 rabp->b_flags |= (B_READ | B_ASYNC); 442 rabp->b_flags &= ~B_DONE; 443 vfs_busy_pages(rabp, 0); 444 if (nfs_asyncio(rabp, cred)) { 445 rabp->b_flags |= B_INVAL|B_ERROR; 446 vfs_unbusy_pages(rabp); 447 brelse(rabp); 448 } 449 } else 450 brelse(rabp); 451 } 452 } 453 } 454 455 /* 456 * If the block is in the cache and has the required data 457 * in a valid region, just copy it out. 458 * Otherwise, get the block and write back/read in, 459 * as required. 460 */ 461again: 462 bufsize = biosize; 463 if ((off_t)(lbn + 1) * biosize > np->n_size && 464 (off_t)(lbn + 1) * biosize - np->n_size < biosize) { 465 bufsize = np->n_size - (off_t)lbn * biosize; 466 bufsize = (bufsize + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); 467 } 468 bp = nfs_getcacheblk(vp, lbn, bufsize, p); 469 if (!bp) 470 return (EINTR); 471 472 /* 473 * If we are being called from nfs_getpages, we must 474 * make sure the buffer is a vmio buffer. The vp will 475 * already be setup for vmio but there may be some old 476 * non-vmio buffers attached to it. 477 */ 478 if (getpages && !(bp->b_flags & B_VMIO)) { 479#ifdef DIAGNOSTIC 480 printf("nfs_bioread: non vmio buf found, discarding\n"); 481#endif 482 bp->b_flags |= B_NOCACHE; 483 bp->b_flags |= B_INVAFTERWRITE; 484 if (bp->b_dirtyend > 0) { 485 if ((bp->b_flags & B_DELWRI) == 0) 486 panic("nfsbioread"); 487 if (VOP_BWRITE(bp) == EINTR) 488 return (EINTR); 489 } else 490 brelse(bp); 491 goto again; 492 } 493 if ((bp->b_flags & B_CACHE) == 0) { 494 bp->b_flags |= B_READ; 495 bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL); 496 not_readin = 0; 497 vfs_busy_pages(bp, 0); 498 error = nfs_doio(bp, cred, p); 499 if (error) { 500 brelse(bp); 501 return (error); 502 } 503 } 504 if (bufsize > on) { 505 n = min((unsigned)(bufsize - on), uio->uio_resid); 506 } else { 507 n = 0; 508 } 509 diff = np->n_size - uio->uio_offset; 510 if (diff < n) 511 n = diff; 512 if (not_readin && n > 0) { 513 if (on < bp->b_validoff || (on + n) > bp->b_validend) { 514 bp->b_flags |= B_NOCACHE; 515 bp->b_flags |= B_INVAFTERWRITE; 516 if (bp->b_dirtyend > 0) { 517 if ((bp->b_flags & B_DELWRI) == 0) 518 panic("nfsbioread"); 519 if (VOP_BWRITE(bp) == EINTR) 520 return (EINTR); 521 } else 522 brelse(bp); 523 goto again; 524 } 525 } 526 vp->v_lastr = lbn; 527 diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on); 528 if (diff < n) 529 n = diff; 530 break; 531 case VLNK: 532 nfsstats.biocache_readlinks++; 533 bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p); 534 if (!bp) 535 return (EINTR); 536 if ((bp->b_flags & B_CACHE) == 0) { 537 bp->b_flags |= B_READ; 538 bp->b_flags &= ~B_DONE; 539 vfs_busy_pages(bp, 0); 540 error = nfs_doio(bp, cred, p); 541 if (error) { 542 bp->b_flags |= B_ERROR; 543 brelse(bp); 544 return (error); 545 } 546 } 547 n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid); 548 on = 0; 549 break; 550 case VDIR: 551 nfsstats.biocache_readdirs++; 552 if (np->n_direofoffset 553 && uio->uio_offset >= np->n_direofoffset) { 554 return (0); 555 } 556 lbn = (uoff_t)uio->uio_offset / NFS_DIRBLKSIZ; 557 on = uio->uio_offset & (NFS_DIRBLKSIZ - 1); 558 bp = nfs_getcacheblk(vp, lbn, NFS_DIRBLKSIZ, p); 559 if (!bp) 560 return (EINTR); 561 if ((bp->b_flags & B_CACHE) == 0) { 562 bp->b_flags |= B_READ; 563 bp->b_flags &= ~B_DONE; 564 vfs_busy_pages(bp, 0); 565 error = nfs_doio(bp, cred, p); 566 if (error) { 567 brelse(bp); 568 } 569 while (error == NFSERR_BAD_COOKIE) { 570 nfs_invaldir(vp); 571 error = nfs_vinvalbuf(vp, 0, cred, p, 1); 572 /* 573 * Yuck! The directory has been modified on the 574 * server. The only way to get the block is by 575 * reading from the beginning to get all the 576 * offset cookies. 577 */ 578 for (i = 0; i <= lbn && !error; i++) { 579 if (np->n_direofoffset 580 && (i * NFS_DIRBLKSIZ) >= np->n_direofoffset) 581 return (0); 582 bp = nfs_getcacheblk(vp, i, NFS_DIRBLKSIZ, p); 583 if (!bp) 584 return (EINTR); 585 if ((bp->b_flags & B_DONE) == 0) { 586 bp->b_flags |= B_READ; 587 bp->b_flags &= ~B_DONE; 588 vfs_busy_pages(bp, 0); 589 error = nfs_doio(bp, cred, p); 590 if (error == 0 && (bp->b_flags & B_INVAL)) 591 break; 592 if (error) { 593 brelse(bp); 594 } else if (i < lbn) { 595 brelse(bp); 596 } 597 } 598 } 599 } 600 if (error) 601 return (error); 602 } 603 604 /* 605 * If not eof and read aheads are enabled, start one. 606 * (You need the current block first, so that you have the 607 * directory offset cookie of the next block.) 608 */ 609 if (nfs_numasync > 0 && nmp->nm_readahead > 0 && 610 (bp->b_flags & B_INVAL) == 0 && 611 (np->n_direofoffset == 0 || 612 (lbn + 1) * NFS_DIRBLKSIZ < np->n_direofoffset) && 613 !(np->n_flag & NQNFSNONCACHE) && 614 !incore(vp, lbn + 1)) { 615 rabp = nfs_getcacheblk(vp, lbn + 1, NFS_DIRBLKSIZ, p); 616 if (rabp) { 617 if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) { 618 rabp->b_flags |= (B_READ | B_ASYNC); 619 rabp->b_flags &= ~B_DONE; 620 vfs_busy_pages(rabp, 0); 621 if (nfs_asyncio(rabp, cred)) { 622 rabp->b_flags |= B_INVAL|B_ERROR; 623 vfs_unbusy_pages(rabp); 624 brelse(rabp); 625 } 626 } else { 627 brelse(rabp); 628 } 629 } 630 } 631 /* 632 * Make sure we use a signed variant of min() since 633 * the second term may be negative. 634 */ 635 n = lmin(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid - on); 636 break; 637 default: 638 printf(" nfs_bioread: type %x unexpected\n",vp->v_type); 639 break; 640 }; 641 642 if (n > 0) { 643 error = uiomove(bp->b_data + on, (int)n, uio); 644 } 645 switch (vp->v_type) { 646 case VREG: 647 break; 648 case VLNK: 649 n = 0; 650 break; 651 case VDIR: 652 if (np->n_flag & NQNFSNONCACHE) 653 bp->b_flags |= B_INVAL; 654 break; 655 default: 656 printf(" nfs_bioread: type %x unexpected\n",vp->v_type); 657 } 658 brelse(bp); 659 } while (error == 0 && uio->uio_resid > 0 && n > 0); 660 return (error); 661} 662 663static void 664nfs_prot_buf(bp, off, n) 665 struct buf *bp; 666 int off; 667 int n; 668{ 669 int pindex, boff, end; 670 671 if ((bp->b_flags & B_VMIO) == 0) 672 return; 673 674 end = round_page(off + n); 675 for (boff = trunc_page(off); boff < end; boff += PAGE_SIZE) { 676 pindex = boff >> PAGE_SHIFT; 677 vm_page_protect(bp->b_pages[pindex], VM_PROT_NONE); 678 } 679} 680 681/* 682 * Vnode op for write using bio 683 */ 684int 685nfs_write(ap) 686 struct vop_write_args /* { 687 struct vnode *a_vp; 688 struct uio *a_uio; 689 int a_ioflag; 690 struct ucred *a_cred; 691 } */ *ap; 692{ 693 register int biosize; 694 register struct uio *uio = ap->a_uio; 695 struct proc *p = uio->uio_procp; 696 register struct vnode *vp = ap->a_vp; 697 struct nfsnode *np = VTONFS(vp); 698 register struct ucred *cred = ap->a_cred; 699 int ioflag = ap->a_ioflag; 700 struct buf *bp; 701 struct vattr vattr; 702 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 703 daddr_t lbn; 704 int bufsize; 705 int n, on, error = 0, iomode, must_commit; 706 707#ifdef DIAGNOSTIC 708 if (uio->uio_rw != UIO_WRITE) 709 panic("nfs_write mode"); 710 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 711 panic("nfs_write proc"); 712#endif 713 if (vp->v_type != VREG) 714 return (EIO); 715 if (np->n_flag & NWRITEERR) { 716 np->n_flag &= ~NWRITEERR; 717 return (np->n_error); 718 } 719 if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 && 720 (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) 721 (void)nfs_fsinfo(nmp, vp, cred, p); 722 if (ioflag & (IO_APPEND | IO_SYNC)) { 723 if (np->n_flag & NMODIFIED) { 724 np->n_attrstamp = 0; 725 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 726 if (error) 727 return (error); 728 } 729 if (ioflag & IO_APPEND) { 730 np->n_attrstamp = 0; 731 error = VOP_GETATTR(vp, &vattr, cred, p); 732 if (error) 733 return (error); 734 uio->uio_offset = np->n_size; 735 } 736 } 737 if (uio->uio_offset < 0) 738 return (EINVAL); 739 if ((uio->uio_offset + uio->uio_resid) > nmp->nm_maxfilesize) 740 return (EFBIG); 741 if (uio->uio_resid == 0) 742 return (0); 743 /* 744 * Maybe this should be above the vnode op call, but so long as 745 * file servers have no limits, i don't think it matters 746 */ 747 if (p && uio->uio_offset + uio->uio_resid > 748 p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 749 psignal(p, SIGXFSZ); 750 return (EFBIG); 751 } 752 /* 753 * I use nm_rsize, not nm_wsize so that all buffer cache blocks 754 * will be the same size within a filesystem. nfs_writerpc will 755 * still use nm_wsize when sizing the rpc's. 756 */ 757 biosize = vp->v_mount->mnt_stat.f_iosize; 758 do { 759 /* 760 * Check for a valid write lease. 761 */ 762 if ((nmp->nm_flag & NFSMNT_NQNFS) && 763 NQNFS_CKINVALID(vp, np, ND_WRITE)) { 764 do { 765 error = nqnfs_getlease(vp, ND_WRITE, cred, p); 766 } while (error == NQNFS_EXPIRED); 767 if (error) 768 return (error); 769 if (np->n_lrev != np->n_brev || 770 (np->n_flag & NQNFSNONCACHE)) { 771 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 772 if (error) 773 return (error); 774 np->n_brev = np->n_lrev; 775 } 776 } 777 if ((np->n_flag & NQNFSNONCACHE) && uio->uio_iovcnt == 1) { 778 iomode = NFSV3WRITE_FILESYNC; 779 error = nfs_writerpc(vp, uio, cred, &iomode, &must_commit); 780 if (must_commit) 781 nfs_clearcommit(vp->v_mount); 782 return (error); 783 } 784 nfsstats.biocache_writes++; 785 lbn = uio->uio_offset / biosize; 786 on = uio->uio_offset & (biosize-1); 787 n = min((unsigned)(biosize - on), uio->uio_resid); 788again: 789 if (uio->uio_offset + n > np->n_size) { 790 np->n_size = uio->uio_offset + n; 791 np->n_flag |= NMODIFIED; 792 vnode_pager_setsize(vp, np->n_size); 793 } 794 bufsize = biosize; 795 if ((off_t)(lbn + 1) * biosize > np->n_size) { 796 bufsize = np->n_size - (off_t)lbn * biosize; 797 bufsize = (bufsize + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); 798 } 799 bp = nfs_getcacheblk(vp, lbn, bufsize, p); 800 if (!bp) 801 return (EINTR); 802 if (bp->b_wcred == NOCRED) { 803 crhold(cred); 804 bp->b_wcred = cred; 805 } 806 np->n_flag |= NMODIFIED; 807 808 /* 809 * If dirtyend exceeds file size, chop it down. If this 810 * creates a reverse-indexed or degenerate situation with 811 * dirtyoff/end, 0 them. 812 */ 813 814 if ((off_t)bp->b_blkno * DEV_BSIZE + bp->b_dirtyend > np->n_size) 815 bp->b_dirtyend = np->n_size - (off_t)bp->b_blkno * DEV_BSIZE; 816 if (bp->b_dirtyoff >= bp->b_dirtyend) 817 bp->b_dirtyoff = bp->b_dirtyend = 0; 818 819 /* 820 * If the new write will leave a contiguous dirty 821 * area, just update the b_dirtyoff and b_dirtyend, 822 * otherwise force a write rpc of the old dirty area. 823 */ 824 825 if (bp->b_dirtyend > 0 && 826 (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { 827 bp->b_proc = p; 828 if (VOP_BWRITE(bp) == EINTR) 829 return (EINTR); 830 goto again; 831 } 832 833 /* 834 * Check for valid write lease and get one as required. 835 * In case getblk() and/or bwrite() delayed us. 836 */ 837 if ((nmp->nm_flag & NFSMNT_NQNFS) && 838 NQNFS_CKINVALID(vp, np, ND_WRITE)) { 839 do { 840 error = nqnfs_getlease(vp, ND_WRITE, cred, p); 841 } while (error == NQNFS_EXPIRED); 842 if (error) { 843 brelse(bp); 844 return (error); 845 } 846 if (np->n_lrev != np->n_brev || 847 (np->n_flag & NQNFSNONCACHE)) { 848 brelse(bp); 849 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 850 if (error) 851 return (error); 852 np->n_brev = np->n_lrev; 853 goto again; 854 } 855 } 856 857 error = uiomove((char *)bp->b_data + on, n, uio); 858 bp->b_flags &= ~B_NEEDCOMMIT; 859 if (error) { 860 bp->b_flags |= B_ERROR; 861 brelse(bp); 862 return (error); 863 } 864 865 /* 866 * This will keep the buffer and mmaped regions more coherent. 867 */ 868 nfs_prot_buf(bp, on, n); 869 870 /* 871 * Only update dirtyoff/dirtyend if not a degenerate 872 * condition. 873 */ 874 if (n) { 875 if (bp->b_dirtyend > 0) { 876 bp->b_dirtyoff = min(on, bp->b_dirtyoff); 877 bp->b_dirtyend = max((on + n), bp->b_dirtyend); 878 } else { 879 bp->b_dirtyoff = on; 880 bp->b_dirtyend = on + n; 881 } 882 } 883 884 /* 885 * To avoid code complexity, we may have to throw away 886 * previously valid ranges when merging the new dirty range 887 * into the valid range. As long as we do not *ADD* an 888 * invalid valid range, we are ok. 889 */ 890 if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff || 891 bp->b_validoff > bp->b_dirtyend) { 892 bp->b_validoff = bp->b_dirtyoff; 893 bp->b_validend = bp->b_dirtyend; 894 } else { 895 bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff); 896 bp->b_validend = max(bp->b_validend, bp->b_dirtyend); 897 } 898 899 /* 900 * Since this block is being modified, it must be written 901 * again and not just committed. 902 */ 903 bp->b_flags &= ~B_NEEDCOMMIT; 904 905 /* 906 * If the lease is non-cachable or IO_SYNC do bwrite(). 907 */ 908 if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) { 909 bp->b_proc = p; 910 if (ioflag & IO_INVAL) 911 bp->b_flags |= B_INVAL; 912 error = VOP_BWRITE(bp); 913 if (error) 914 return (error); 915 if (np->n_flag & NQNFSNONCACHE) { 916 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 917 if (error) 918 return (error); 919 } 920 } else if ((n + on) == biosize && 921 (nmp->nm_flag & NFSMNT_NQNFS) == 0) { 922 bp->b_proc = (struct proc *)0; 923 bp->b_flags |= B_ASYNC; 924 (void)nfs_writebp(bp, 0); 925 } else 926 bdwrite(bp); 927 } while (uio->uio_resid > 0 && n > 0); 928 return (0); 929} 930 931/* 932 * Get an nfs cache block. 933 * Allocate a new one if the block isn't currently in the cache 934 * and return the block marked busy. If the calling process is 935 * interrupted by a signal for an interruptible mount point, return 936 * NULL. 937 */ 938static struct buf * 939nfs_getcacheblk(vp, bn, size, p) 940 struct vnode *vp; 941 daddr_t bn; 942 int size; 943 struct proc *p; 944{ 945 register struct buf *bp; 946 struct mount *mp; 947 struct nfsmount *nmp; 948 949 mp = vp->v_mount; 950 nmp = VFSTONFS(mp); 951 952 if (nmp->nm_flag & NFSMNT_INT) { 953 bp = getblk(vp, bn, size, PCATCH, 0); 954 while (bp == (struct buf *)0) { 955 if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) 956 return ((struct buf *)0); 957 bp = getblk(vp, bn, size, 0, 2 * hz); 958 } 959 } else 960 bp = getblk(vp, bn, size, 0, 0); 961 962 if (vp->v_type == VREG) { 963 int biosize; 964 biosize = mp->mnt_stat.f_iosize; 965 bp->b_blkno = bn * (biosize / DEV_BSIZE); 966 } 967 968 return (bp); 969} 970 971/* 972 * Flush and invalidate all dirty buffers. If another process is already 973 * doing the flush, just wait for completion. 974 */ 975int 976nfs_vinvalbuf(vp, flags, cred, p, intrflg) 977 struct vnode *vp; 978 int flags; 979 struct ucred *cred; 980 struct proc *p; 981 int intrflg; 982{ 983 register struct nfsnode *np = VTONFS(vp); 984 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 985 int error = 0, slpflag, slptimeo; 986 987 if (vp->v_flag & VXLOCK) { 988 return (0); 989 } 990 991 if ((nmp->nm_flag & NFSMNT_INT) == 0) 992 intrflg = 0; 993 if (intrflg) { 994 slpflag = PCATCH; 995 slptimeo = 2 * hz; 996 } else { 997 slpflag = 0; 998 slptimeo = 0; 999 } 1000 /* 1001 * First wait for any other process doing a flush to complete. 1002 */ 1003 while (np->n_flag & NFLUSHINPROG) { 1004 np->n_flag |= NFLUSHWANT; 1005 error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval", 1006 slptimeo); 1007 if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) 1008 return (EINTR); 1009 } 1010 1011 /* 1012 * Now, flush as required. 1013 */ 1014 np->n_flag |= NFLUSHINPROG; 1015 error = vinvalbuf(vp, flags, cred, p, slpflag, 0); 1016 while (error) { 1017 if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) { 1018 np->n_flag &= ~NFLUSHINPROG; 1019 if (np->n_flag & NFLUSHWANT) { 1020 np->n_flag &= ~NFLUSHWANT; 1021 wakeup((caddr_t)&np->n_flag); 1022 } 1023 return (EINTR); 1024 } 1025 error = vinvalbuf(vp, flags, cred, p, 0, slptimeo); 1026 } 1027 np->n_flag &= ~(NMODIFIED | NFLUSHINPROG); 1028 if (np->n_flag & NFLUSHWANT) { 1029 np->n_flag &= ~NFLUSHWANT; 1030 wakeup((caddr_t)&np->n_flag); 1031 } 1032 return (0); 1033} 1034 1035/* 1036 * Initiate asynchronous I/O. Return an error if no nfsiods are available. 1037 * This is mainly to avoid queueing async I/O requests when the nfsiods 1038 * are all hung on a dead server. 1039 */ 1040int 1041nfs_asyncio(bp, cred) 1042 register struct buf *bp; 1043 struct ucred *cred; 1044{ 1045 struct nfsmount *nmp; 1046 int i; 1047 int gotiod; 1048 int slpflag = 0; 1049 int slptimeo = 0; 1050 int error; 1051 1052 if (nfs_numasync == 0) 1053 return (EIO); 1054 1055 nmp = VFSTONFS(bp->b_vp->v_mount); 1056again: 1057 if (nmp->nm_flag & NFSMNT_INT) 1058 slpflag = PCATCH; 1059 gotiod = FALSE; 1060 1061 /* 1062 * Find a free iod to process this request. 1063 */ 1064 for (i = 0; i < NFS_MAXASYNCDAEMON; i++) 1065 if (nfs_iodwant[i]) { 1066 /* 1067 * Found one, so wake it up and tell it which 1068 * mount to process. 1069 */ 1070 NFS_DPF(ASYNCIO, 1071 ("nfs_asyncio: waking iod %d for mount %p\n", 1072 i, nmp)); 1073 nfs_iodwant[i] = (struct proc *)0; 1074 nfs_iodmount[i] = nmp; 1075 nmp->nm_bufqiods++; 1076 wakeup((caddr_t)&nfs_iodwant[i]); 1077 gotiod = TRUE; 1078 break; 1079 } 1080 1081 /* 1082 * If none are free, we may already have an iod working on this mount 1083 * point. If so, it will process our request. 1084 */ 1085 if (!gotiod) { 1086 if (nmp->nm_bufqiods > 0) { 1087 NFS_DPF(ASYNCIO, 1088 ("nfs_asyncio: %d iods are already processing mount %p\n", 1089 nmp->nm_bufqiods, nmp)); 1090 gotiod = TRUE; 1091 } 1092 } 1093 1094 /* 1095 * If we have an iod which can process the request, then queue 1096 * the buffer. 1097 */ 1098 if (gotiod) { 1099 /* 1100 * Ensure that the queue never grows too large. 1101 */ 1102 while (nmp->nm_bufqlen >= 2*nfs_numasync) { 1103 NFS_DPF(ASYNCIO, 1104 ("nfs_asyncio: waiting for mount %p queue to drain\n", nmp)); 1105 nmp->nm_bufqwant = TRUE; 1106 error = tsleep(&nmp->nm_bufq, slpflag | PRIBIO, 1107 "nfsaio", slptimeo); 1108 if (error) { 1109 if (nfs_sigintr(nmp, NULL, bp->b_proc)) 1110 return (EINTR); 1111 if (slpflag == PCATCH) { 1112 slpflag = 0; 1113 slptimeo = 2 * hz; 1114 } 1115 } 1116 /* 1117 * We might have lost our iod while sleeping, 1118 * so check and loop if nescessary. 1119 */ 1120 if (nmp->nm_bufqiods == 0) { 1121 NFS_DPF(ASYNCIO, 1122 ("nfs_asyncio: no iods after mount %p queue was drained, looping\n", nmp)); 1123 goto again; 1124 } 1125 } 1126 1127 if (bp->b_flags & B_READ) { 1128 if (bp->b_rcred == NOCRED && cred != NOCRED) { 1129 crhold(cred); 1130 bp->b_rcred = cred; 1131 } 1132 } else { 1133 bp->b_flags |= B_WRITEINPROG; 1134 if (bp->b_wcred == NOCRED && cred != NOCRED) { 1135 crhold(cred); 1136 bp->b_wcred = cred; 1137 } 1138 } 1139 1140 TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, b_freelist); 1141 nmp->nm_bufqlen++; 1142 return (0); 1143 } 1144 1145 /* 1146 * All the iods are busy on other mounts, so return EIO to 1147 * force the caller to process the i/o synchronously. 1148 */ 1149 NFS_DPF(ASYNCIO, ("nfs_asyncio: no iods available, i/o is synchronous\n")); 1150 return (EIO); 1151} 1152 1153/* 1154 * Do an I/O operation to/from a cache block. This may be called 1155 * synchronously or from an nfsiod. 1156 */ 1157int 1158nfs_doio(bp, cr, p) 1159 struct buf *bp; 1160 struct ucred *cr; 1161 struct proc *p; 1162{ 1163 struct uio *uiop; 1164 struct vnode *vp; 1165 struct nfsnode *np; 1166 struct nfsmount *nmp; 1167 int error = 0, diff, len, iomode, must_commit = 0; 1168 struct uio uio; 1169 struct iovec io; 1170 1171 vp = bp->b_vp; 1172 np = VTONFS(vp); 1173 nmp = VFSTONFS(vp->v_mount); 1174 uiop = &uio; 1175 uiop->uio_iov = &io; 1176 uiop->uio_iovcnt = 1; 1177 uiop->uio_segflg = UIO_SYSSPACE; 1178 uiop->uio_procp = p; 1179 1180 KASSERT(!(bp->b_flags & B_DONE), ("nfs_doio: bp %p already marked done", bp)); 1181 1182 /* 1183 * Historically, paging was done with physio, but no more. 1184 */ 1185 if (bp->b_flags & B_PHYS) { 1186 /* 1187 * ...though reading /dev/drum still gets us here. 1188 */ 1189 io.iov_len = uiop->uio_resid = bp->b_bcount; 1190 /* mapping was done by vmapbuf() */ 1191 io.iov_base = bp->b_data; 1192 uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE; 1193 if (bp->b_flags & B_READ) { 1194 uiop->uio_rw = UIO_READ; 1195 nfsstats.read_physios++; 1196 error = nfs_readrpc(vp, uiop, cr); 1197 } else { 1198 int com; 1199 1200 iomode = NFSV3WRITE_DATASYNC; 1201 uiop->uio_rw = UIO_WRITE; 1202 nfsstats.write_physios++; 1203 error = nfs_writerpc(vp, uiop, cr, &iomode, &com); 1204 } 1205 if (error) { 1206 bp->b_flags |= B_ERROR; 1207 bp->b_error = error; 1208 } 1209 } else if (bp->b_flags & B_READ) { 1210 io.iov_len = uiop->uio_resid = bp->b_bcount; 1211 io.iov_base = bp->b_data; 1212 uiop->uio_rw = UIO_READ; 1213 switch (vp->v_type) { 1214 case VREG: 1215 uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE; 1216 nfsstats.read_bios++; 1217 error = nfs_readrpc(vp, uiop, cr); 1218 if (!error) { 1219 bp->b_validoff = 0; 1220 if (uiop->uio_resid) { 1221 /* 1222 * If len > 0, there is a hole in the file and 1223 * no writes after the hole have been pushed to 1224 * the server yet. 1225 * Just zero fill the rest of the valid area. 1226 */ 1227 diff = bp->b_bcount - uiop->uio_resid; 1228 len = np->n_size - (((u_quad_t)bp->b_blkno) * DEV_BSIZE 1229 + diff); 1230 if (len > 0) { 1231 len = min(len, uiop->uio_resid); 1232 bzero((char *)bp->b_data + diff, len); 1233 bp->b_validend = diff + len; 1234 } else 1235 bp->b_validend = diff; 1236 } else 1237 bp->b_validend = bp->b_bcount; 1238 } 1239 if (p && (vp->v_flag & VTEXT) && 1240 (((nmp->nm_flag & NFSMNT_NQNFS) && 1241 NQNFS_CKINVALID(vp, np, ND_READ) && 1242 np->n_lrev != np->n_brev) || 1243 (!(nmp->nm_flag & NFSMNT_NQNFS) && 1244 np->n_mtime != np->n_vattr.va_mtime.tv_sec))) { 1245 uprintf("Process killed due to text file modification\n"); 1246 psignal(p, SIGKILL); 1247 p->p_flag |= P_NOSWAP; 1248 } 1249 break; 1250 case VLNK: 1251 uiop->uio_offset = (off_t)0; 1252 nfsstats.readlink_bios++; 1253 error = nfs_readlinkrpc(vp, uiop, cr); 1254 break; 1255 case VDIR: 1256 nfsstats.readdir_bios++; 1257 uiop->uio_offset = ((u_quad_t)bp->b_lblkno) * NFS_DIRBLKSIZ; 1258 if (nmp->nm_flag & NFSMNT_RDIRPLUS) { 1259 error = nfs_readdirplusrpc(vp, uiop, cr); 1260 if (error == NFSERR_NOTSUPP) 1261 nmp->nm_flag &= ~NFSMNT_RDIRPLUS; 1262 } 1263 if ((nmp->nm_flag & NFSMNT_RDIRPLUS) == 0) 1264 error = nfs_readdirrpc(vp, uiop, cr); 1265 if (error == 0 && uiop->uio_resid == bp->b_bcount) 1266 bp->b_flags |= B_INVAL; 1267 break; 1268 default: 1269 printf("nfs_doio: type %x unexpected\n",vp->v_type); 1270 break; 1271 }; 1272 if (error) { 1273 bp->b_flags |= B_ERROR; 1274 bp->b_error = error; 1275 } 1276 } else { 1277 if ((off_t)bp->b_blkno * DEV_BSIZE + bp->b_dirtyend > np->n_size) 1278 bp->b_dirtyend = np->n_size - (off_t)bp->b_blkno * DEV_BSIZE; 1279 1280 if (bp->b_dirtyend > bp->b_dirtyoff) { 1281 io.iov_len = uiop->uio_resid = bp->b_dirtyend 1282 - bp->b_dirtyoff; 1283 uiop->uio_offset = (off_t)bp->b_blkno * DEV_BSIZE 1284 + bp->b_dirtyoff; 1285 io.iov_base = (char *)bp->b_data + bp->b_dirtyoff; 1286 uiop->uio_rw = UIO_WRITE; 1287 nfsstats.write_bios++; 1288 1289 if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE | B_CLUSTER)) == B_ASYNC) 1290 iomode = NFSV3WRITE_UNSTABLE; 1291 else 1292 iomode = NFSV3WRITE_FILESYNC; 1293 1294 bp->b_flags |= B_WRITEINPROG; 1295 error = nfs_writerpc(vp, uiop, cr, &iomode, &must_commit); 1296 if (!error && iomode == NFSV3WRITE_UNSTABLE) { 1297 bp->b_flags |= B_NEEDCOMMIT; 1298 if (bp->b_dirtyoff == 0 1299 && bp->b_dirtyend == bp->b_bufsize) 1300 bp->b_flags |= B_CLUSTEROK; 1301 } else { 1302 bp->b_flags &= ~B_NEEDCOMMIT; 1303 } 1304 bp->b_flags &= ~B_WRITEINPROG; 1305 1306 /* 1307 * For an interrupted write, the buffer is still valid 1308 * and the write hasn't been pushed to the server yet, 1309 * so we can't set B_ERROR and report the interruption 1310 * by setting B_EINTR. For the B_ASYNC case, B_EINTR 1311 * is not relevant, so the rpc attempt is essentially 1312 * a noop. For the case of a V3 write rpc not being 1313 * committed to stable storage, the block is still 1314 * dirty and requires either a commit rpc or another 1315 * write rpc with iomode == NFSV3WRITE_FILESYNC before 1316 * the block is reused. This is indicated by setting 1317 * the B_DELWRI and B_NEEDCOMMIT flags. 1318 * 1319 * If the buffer is marked B_PAGING, it does not reside on 1320 * the vp's paging queues so we cannot call bdirty(). The 1321 * bp in this case is not an NFS cache block so we should 1322 * be safe. XXX 1323 */ 1324 if (error == EINTR 1325 || (!error && (bp->b_flags & B_NEEDCOMMIT))) { 1326 int s; 1327 1328 s = splbio(); 1329 bp->b_flags &= ~(B_INVAL|B_NOCACHE); 1330 if ((bp->b_flags & B_PAGING) == 0) { 1331 bdirty(bp); 1332 bp->b_flags &= ~B_DONE; 1333 } 1334 if ((bp->b_flags & B_ASYNC) == 0) 1335 bp->b_flags |= B_EINTR; 1336 splx(s); 1337 } else { 1338 if (error) { 1339 bp->b_flags |= B_ERROR; 1340 bp->b_error = np->n_error = error; 1341 np->n_flag |= NWRITEERR; 1342 } 1343 bp->b_dirtyoff = bp->b_dirtyend = 0; 1344 } 1345 } else { 1346 bp->b_resid = 0; 1347 biodone(bp); 1348 return (0); 1349 } 1350 } 1351 bp->b_resid = uiop->uio_resid; 1352 if (must_commit) 1353 nfs_clearcommit(vp->v_mount); 1354 biodone(bp); 1355 return (error); 1356} 1357