nfs_bio.c revision 8692
1/* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Rick Macklem at The University of Guelph. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)nfs_bio.c 8.5 (Berkeley) 1/4/94 37 * $Id: nfs_bio.c,v 1.12 1995/04/16 05:05:25 davidg Exp $ 38 */ 39 40#include <sys/param.h> 41#include <sys/systm.h> 42#include <sys/resourcevar.h> 43#include <sys/signalvar.h> 44#include <sys/proc.h> 45#include <sys/buf.h> 46#include <sys/vnode.h> 47#include <sys/mount.h> 48#include <sys/kernel.h> 49 50#include <vm/vm.h> 51 52#include <nfs/nfsnode.h> 53#include <nfs/rpcv2.h> 54#include <nfs/nfsv2.h> 55#include <nfs/nfs.h> 56#include <nfs/nfsmount.h> 57#include <nfs/nqnfs.h> 58 59struct buf *nfs_getcacheblk(); 60extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON]; 61extern int nfs_numasync; 62 63/* 64 * Vnode op for read using bio 65 * Any similarity to readip() is purely coincidental 66 */ 67int 68nfs_bioread(vp, uio, ioflag, cred) 69 register struct vnode *vp; 70 register struct uio *uio; 71 int ioflag; 72 struct ucred *cred; 73{ 74 register struct nfsnode *np = VTONFS(vp); 75 register int biosize, diff; 76 struct buf *bp = 0, *rabp; 77 struct vattr vattr; 78 struct proc *p; 79 struct nfsmount *nmp; 80 daddr_t lbn, rabn; 81 int bufsize; 82 int nra, error = 0, n = 0, on = 0, not_readin; 83 84#ifdef lint 85 ioflag = ioflag; 86#endif /* lint */ 87#ifdef DIAGNOSTIC 88 if (uio->uio_rw != UIO_READ) 89 panic("nfs_read mode"); 90#endif 91 if (uio->uio_resid == 0) 92 return (0); 93 if (uio->uio_offset < 0 && vp->v_type != VDIR) 94 return (EINVAL); 95 nmp = VFSTONFS(vp->v_mount); 96 biosize = NFS_MAXDGRAMDATA; 97 p = uio->uio_procp; 98 /* 99 * For nfs, cache consistency can only be maintained approximately. 100 * Although RFC1094 does not specify the criteria, the following is 101 * believed to be compatible with the reference port. 102 * For nqnfs, full cache consistency is maintained within the loop. 103 * For nfs: 104 * If the file's modify time on the server has changed since the 105 * last read rpc or you have written to the file, 106 * you may have lost data cache consistency with the 107 * server, so flush all of the file's data out of the cache. 108 * Then force a getattr rpc to ensure that you have up to date 109 * attributes. 110 * The mount flag NFSMNT_MYWRITE says "Assume that my writes are 111 * the ones changing the modify time. 112 * NB: This implies that cache data can be read when up to 113 * NFS_ATTRTIMEO seconds out of date. If you find that you need current 114 * attributes this could be forced by setting n_attrstamp to 0 before 115 * the VOP_GETATTR() call. 116 */ 117 if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) { 118 if (np->n_flag & NMODIFIED) { 119 if ((nmp->nm_flag & NFSMNT_MYWRITE) == 0 || 120 vp->v_type != VREG) { 121 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 122 if (error) 123 return (error); 124 } 125 np->n_attrstamp = 0; 126 np->n_direofoffset = 0; 127 error = VOP_GETATTR(vp, &vattr, cred, p); 128 if (error) 129 return (error); 130 np->n_mtime = vattr.va_mtime.ts_sec; 131 } else { 132 error = VOP_GETATTR(vp, &vattr, cred, p); 133 if (error) 134 return (error); 135 if (np->n_mtime != vattr.va_mtime.ts_sec) { 136 np->n_direofoffset = 0; 137 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 138 if (error) 139 return (error); 140 np->n_mtime = vattr.va_mtime.ts_sec; 141 } 142 } 143 } 144 do { 145 146 /* 147 * Get a valid lease. If cached data is stale, flush it. 148 */ 149 if (nmp->nm_flag & NFSMNT_NQNFS) { 150 if (NQNFS_CKINVALID(vp, np, NQL_READ)) { 151 do { 152 error = nqnfs_getlease(vp, NQL_READ, cred, p); 153 } while (error == NQNFS_EXPIRED); 154 if (error) 155 return (error); 156 if (np->n_lrev != np->n_brev || 157 (np->n_flag & NQNFSNONCACHE) || 158 ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { 159 if (vp->v_type == VDIR) { 160 np->n_direofoffset = 0; 161 cache_purge(vp); 162 } 163 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 164 if (error) 165 return (error); 166 np->n_brev = np->n_lrev; 167 } 168 } else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) { 169 np->n_direofoffset = 0; 170 cache_purge(vp); 171 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 172 if (error) 173 return (error); 174 } 175 } 176 if (np->n_flag & NQNFSNONCACHE) { 177 switch (vp->v_type) { 178 case VREG: 179 error = nfs_readrpc(vp, uio, cred); 180 break; 181 case VLNK: 182 error = nfs_readlinkrpc(vp, uio, cred); 183 break; 184 case VDIR: 185 error = nfs_readdirrpc(vp, uio, cred); 186 break; 187 default: 188 printf(" NQNFSNONCACHE: type %x unexpected\n", 189 vp->v_type); 190 break; 191 }; 192 return (error); 193 } 194 switch (vp->v_type) { 195 case VREG: 196 nfsstats.biocache_reads++; 197 lbn = uio->uio_offset / biosize; 198 on = uio->uio_offset & (biosize-1); 199 not_readin = 1; 200 201 /* 202 * Start the read ahead(s), as required. 203 */ 204 if (nfs_numasync > 0 && nmp->nm_readahead > 0 && 205 lbn == vp->v_lastr + 1) { 206 for (nra = 0; nra < nmp->nm_readahead && 207 (lbn + 1 + nra) * biosize < np->n_size; nra++) { 208 rabn = lbn + 1 + nra; 209 if (!incore(vp, rabn)) { 210 rabp = nfs_getcacheblk(vp, rabn, biosize, p); 211 if (!rabp) 212 return (EINTR); 213 if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) { 214 rabp->b_flags |= (B_READ | B_ASYNC); 215 vfs_busy_pages(rabp, 0); 216 if (nfs_asyncio(rabp, cred)) { 217 rabp->b_flags |= B_INVAL|B_ERROR; 218 vfs_unbusy_pages(rabp); 219 brelse(rabp); 220 } 221 } else { 222 brelse(rabp); 223 } 224 } 225 } 226 } 227 228 /* 229 * If the block is in the cache and has the required data 230 * in a valid region, just copy it out. 231 * Otherwise, get the block and write back/read in, 232 * as required. 233 */ 234again: 235 bufsize = biosize; 236 if ((lbn + 1) * biosize > np->n_size) { 237 bufsize = np->n_size - lbn * biosize; 238 bufsize = (bufsize + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); 239 } 240 bp = nfs_getcacheblk(vp, lbn, bufsize, p); 241 if (!bp) 242 return (EINTR); 243 if ((bp->b_flags & B_CACHE) == 0) { 244 bp->b_flags |= B_READ; 245 not_readin = 0; 246 vfs_busy_pages(bp, 0); 247 error = nfs_doio(bp, cred, p); 248 if (error) { 249 brelse(bp); 250 return (error); 251 } 252 } 253 if (bufsize > on) { 254 n = min((unsigned)(bufsize - on), uio->uio_resid); 255 } else { 256 n = 0; 257 } 258 diff = np->n_size - uio->uio_offset; 259 if (diff < n) 260 n = diff; 261 if (not_readin && n > 0) { 262 if (on < bp->b_validoff || (on + n) > bp->b_validend) { 263 bp->b_flags |= B_NOCACHE; 264 if (bp->b_dirtyend > 0) { 265 if ((bp->b_flags & B_DELWRI) == 0) 266 panic("nfsbioread"); 267 if (VOP_BWRITE(bp) == EINTR) 268 return (EINTR); 269 } else 270 brelse(bp); 271 goto again; 272 } 273 } 274 vp->v_lastr = lbn; 275 diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on); 276 if (diff < n) 277 n = diff; 278 break; 279 case VLNK: 280 nfsstats.biocache_readlinks++; 281 bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p); 282 if (!bp) 283 return (EINTR); 284 if ((bp->b_flags & B_CACHE) == 0) { 285 bp->b_flags |= B_READ; 286 vfs_busy_pages(bp, 0); 287 error = nfs_doio(bp, cred, p); 288 if (error) { 289 bp->b_flags |= B_ERROR; 290 brelse(bp); 291 return (error); 292 } 293 } 294 n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid); 295 on = 0; 296 break; 297 case VDIR: 298 nfsstats.biocache_readdirs++; 299 lbn = (daddr_t)uio->uio_offset; 300 bp = nfs_getcacheblk(vp, lbn, NFS_DIRBLKSIZ, p); 301 if (!bp) 302 return (EINTR); 303 304 if ((bp->b_flags & B_CACHE) == 0) { 305 bp->b_flags |= B_READ; 306 vfs_busy_pages(bp, 0); 307 error = nfs_doio(bp, cred, p); 308 if (error) { 309 bp->b_flags |= B_ERROR; 310 brelse(bp); 311 return (error); 312 } 313 } 314 315 /* 316 * If not eof and read aheads are enabled, start one. 317 * (You need the current block first, so that you have the 318 * directory offset cookie of the next block. 319 */ 320 rabn = bp->b_blkno; 321 if (nfs_numasync > 0 && nmp->nm_readahead > 0 && 322 rabn != 0 && rabn != np->n_direofoffset && 323 !incore(vp, rabn)) { 324 rabp = nfs_getcacheblk(vp, rabn, NFS_DIRBLKSIZ, p); 325 if (rabp) { 326 if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) { 327 rabp->b_flags |= (B_READ | B_ASYNC); 328 vfs_busy_pages(rabp, 0); 329 if (nfs_asyncio(rabp, cred)) { 330 rabp->b_flags |= B_INVAL|B_ERROR; 331 vfs_unbusy_pages(rabp); 332 brelse(rabp); 333 } 334 } else { 335 brelse(rabp); 336 } 337 } 338 } 339 on = 0; 340 n = min(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid); 341 break; 342 default: 343 printf(" nfsbioread: type %x unexpected\n",vp->v_type); 344 break; 345 }; 346 347 if (n > 0) { 348 error = uiomove(bp->b_data + on, (int)n, uio); 349 } 350 switch (vp->v_type) { 351 case VREG: 352 break; 353 case VLNK: 354 n = 0; 355 break; 356 case VDIR: 357 uio->uio_offset = bp->b_blkno; 358 break; 359 default: 360 printf(" nfsbioread: type %x unexpected\n",vp->v_type); 361 break; 362 } 363 brelse(bp); 364 } while (error == 0 && uio->uio_resid > 0 && n > 0); 365 return (error); 366} 367 368/* 369 * Vnode op for write using bio 370 */ 371int 372nfs_write(ap) 373 struct vop_write_args /* { 374 struct vnode *a_vp; 375 struct uio *a_uio; 376 int a_ioflag; 377 struct ucred *a_cred; 378 } */ *ap; 379{ 380 register int biosize; 381 register struct uio *uio = ap->a_uio; 382 struct proc *p = uio->uio_procp; 383 register struct vnode *vp = ap->a_vp; 384 struct nfsnode *np = VTONFS(vp); 385 register struct ucred *cred = ap->a_cred; 386 int ioflag = ap->a_ioflag; 387 struct buf *bp; 388 struct vattr vattr; 389 struct nfsmount *nmp; 390 daddr_t lbn; 391 int bufsize; 392 int n, on, error = 0; 393 394#ifdef DIAGNOSTIC 395 if (uio->uio_rw != UIO_WRITE) 396 panic("nfs_write mode"); 397 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 398 panic("nfs_write proc"); 399#endif 400 if (vp->v_type != VREG) 401 return (EIO); 402 if (np->n_flag & NWRITEERR) { 403 np->n_flag &= ~NWRITEERR; 404 return (np->n_error); 405 } 406 if (ioflag & (IO_APPEND | IO_SYNC)) { 407 if (np->n_flag & NMODIFIED) { 408 np->n_attrstamp = 0; 409 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 410 if (error) 411 return (error); 412 } 413 if (ioflag & IO_APPEND) { 414 np->n_attrstamp = 0; 415 error = VOP_GETATTR(vp, &vattr, cred, p); 416 if (error) 417 return (error); 418 uio->uio_offset = np->n_size; 419 } 420 } 421 nmp = VFSTONFS(vp->v_mount); 422 if (uio->uio_offset < 0) 423 return (EINVAL); 424 if (uio->uio_resid == 0) 425 return (0); 426 /* 427 * Maybe this should be above the vnode op call, but so long as 428 * file servers have no limits, i don't think it matters 429 */ 430 if (p && uio->uio_offset + uio->uio_resid > 431 p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 432 psignal(p, SIGXFSZ); 433 return (EFBIG); 434 } 435 /* 436 * I use nm_rsize, not nm_wsize so that all buffer cache blocks 437 * will be the same size within a filesystem. nfs_writerpc will 438 * still use nm_wsize when sizing the rpc's. 439 */ 440 biosize = NFS_MAXDGRAMDATA; 441 do { 442 443 /* 444 * XXX make sure we aren't cached in the VM page cache 445 */ 446 /* 447 * Check for a valid write lease. 448 * If non-cachable, just do the rpc 449 */ 450 if ((nmp->nm_flag & NFSMNT_NQNFS) && 451 NQNFS_CKINVALID(vp, np, NQL_WRITE)) { 452 do { 453 error = nqnfs_getlease(vp, NQL_WRITE, cred, p); 454 } while (error == NQNFS_EXPIRED); 455 if (error) 456 return (error); 457 if (np->n_lrev != np->n_brev || 458 (np->n_flag & NQNFSNONCACHE)) { 459 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 460 if (error) 461 return (error); 462 np->n_brev = np->n_lrev; 463 } 464 } 465 if (np->n_flag & NQNFSNONCACHE) 466 return (nfs_writerpc(vp, uio, cred, ioflag)); 467 nfsstats.biocache_writes++; 468 lbn = uio->uio_offset / biosize; 469 on = uio->uio_offset & (biosize-1); 470 n = min((unsigned)(biosize - on), uio->uio_resid); 471again: 472 if (uio->uio_offset + n > np->n_size) { 473 np->n_size = uio->uio_offset + n; 474 vnode_pager_setsize(vp, (u_long)np->n_size); 475 } 476 bufsize = biosize; 477 if ((lbn + 1) * biosize > np->n_size) { 478 bufsize = np->n_size - lbn * biosize; 479 bufsize = (bufsize + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); 480 } 481 bp = nfs_getcacheblk(vp, lbn, bufsize, p); 482 if (!bp) 483 return (EINTR); 484 if (bp->b_wcred == NOCRED) { 485 crhold(cred); 486 bp->b_wcred = cred; 487 } 488 np->n_flag |= NMODIFIED; 489 490 if ((bp->b_blkno * DEV_BSIZE) + bp->b_dirtyend > np->n_size) { 491 bp->b_dirtyend = np->n_size - (bp->b_blkno * DEV_BSIZE); 492 } 493 494 /* 495 * If the new write will leave a contiguous dirty 496 * area, just update the b_dirtyoff and b_dirtyend, 497 * otherwise force a write rpc of the old dirty area. 498 */ 499 if (bp->b_dirtyend > 0 && 500 (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { 501 bp->b_proc = p; 502 if (VOP_BWRITE(bp) == EINTR) 503 return (EINTR); 504 goto again; 505 } 506 507 /* 508 * Check for valid write lease and get one as required. 509 * In case getblk() and/or bwrite() delayed us. 510 */ 511 if ((nmp->nm_flag & NFSMNT_NQNFS) && 512 NQNFS_CKINVALID(vp, np, NQL_WRITE)) { 513 do { 514 error = nqnfs_getlease(vp, NQL_WRITE, cred, p); 515 } while (error == NQNFS_EXPIRED); 516 if (error) { 517 brelse(bp); 518 return (error); 519 } 520 if (np->n_lrev != np->n_brev || 521 (np->n_flag & NQNFSNONCACHE)) { 522 brelse(bp); 523 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 524 if (error) 525 return (error); 526 np->n_brev = np->n_lrev; 527 goto again; 528 } 529 } 530 error = uiomove((char *)bp->b_data + on, n, uio); 531 if (error) { 532 bp->b_flags |= B_ERROR; 533 brelse(bp); 534 return (error); 535 } 536 if (bp->b_dirtyend > 0) { 537 bp->b_dirtyoff = min(on, bp->b_dirtyoff); 538 bp->b_dirtyend = max((on + n), bp->b_dirtyend); 539 } else { 540 bp->b_dirtyoff = on; 541 bp->b_dirtyend = on + n; 542 } 543#ifndef notdef 544 if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff || 545 bp->b_validoff > bp->b_dirtyend) { 546 bp->b_validoff = bp->b_dirtyoff; 547 bp->b_validend = bp->b_dirtyend; 548 } else { 549 bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff); 550 bp->b_validend = max(bp->b_validend, bp->b_dirtyend); 551 } 552#else 553 bp->b_validoff = bp->b_dirtyoff; 554 bp->b_validend = bp->b_dirtyend; 555#endif 556 if (ioflag & IO_APPEND) 557 bp->b_flags |= B_APPENDWRITE; 558 559 /* 560 * If the lease is non-cachable or IO_SYNC do bwrite(). 561 */ 562 if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) { 563 bp->b_proc = p; 564 error = VOP_BWRITE(bp); 565 if (error) 566 return (error); 567 } else if ((n + on) == biosize && 568 (nmp->nm_flag & NFSMNT_NQNFS) == 0) { 569 bp->b_proc = (struct proc *)0; 570 bawrite(bp); 571 } else 572 bdwrite(bp); 573 } while (uio->uio_resid > 0 && n > 0); 574 return (0); 575} 576 577/* 578 * Get an nfs cache block. 579 * Allocate a new one if the block isn't currently in the cache 580 * and return the block marked busy. If the calling process is 581 * interrupted by a signal for an interruptible mount point, return 582 * NULL. 583 */ 584struct buf * 585nfs_getcacheblk(vp, bn, size, p) 586 struct vnode *vp; 587 daddr_t bn; 588 int size; 589 struct proc *p; 590{ 591 register struct buf *bp; 592 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 593 594 if (nmp->nm_flag & NFSMNT_INT) { 595 bp = getblk(vp, bn, size, PCATCH, 0); 596 while (bp == (struct buf *)0) { 597 if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) 598 return ((struct buf *)0); 599 bp = getblk(vp, bn, size, 0, 2 * hz); 600 } 601 } else 602 bp = getblk(vp, bn, size, 0, 0); 603 604 if( vp->v_type == VREG) 605 bp->b_blkno = (bn * NFS_MAXDGRAMDATA) / DEV_BSIZE; 606 607 return (bp); 608} 609 610/* 611 * Flush and invalidate all dirty buffers. If another process is already 612 * doing the flush, just wait for completion. 613 */ 614int 615nfs_vinvalbuf(vp, flags, cred, p, intrflg) 616 struct vnode *vp; 617 int flags; 618 struct ucred *cred; 619 struct proc *p; 620 int intrflg; 621{ 622 register struct nfsnode *np = VTONFS(vp); 623 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 624 int error = 0, slpflag, slptimeo; 625 626 if ((nmp->nm_flag & NFSMNT_INT) == 0) 627 intrflg = 0; 628 if (intrflg) { 629 slpflag = PCATCH; 630 slptimeo = 2 * hz; 631 } else { 632 slpflag = 0; 633 slptimeo = 0; 634 } 635 /* 636 * First wait for any other process doing a flush to complete. 637 */ 638 while (np->n_flag & NFLUSHINPROG) { 639 np->n_flag |= NFLUSHWANT; 640 error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval", 641 slptimeo); 642 if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) 643 return (EINTR); 644 } 645 646 /* 647 * Now, flush as required. 648 */ 649 np->n_flag |= NFLUSHINPROG; 650 error = vinvalbuf(vp, flags, cred, p, slpflag, 0); 651 while (error) { 652 if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) { 653 np->n_flag &= ~NFLUSHINPROG; 654 if (np->n_flag & NFLUSHWANT) { 655 np->n_flag &= ~NFLUSHWANT; 656 wakeup((caddr_t)&np->n_flag); 657 } 658 return (EINTR); 659 } 660 error = vinvalbuf(vp, flags, cred, p, 0, slptimeo); 661 } 662 np->n_flag &= ~(NMODIFIED | NFLUSHINPROG); 663 if (np->n_flag & NFLUSHWANT) { 664 np->n_flag &= ~NFLUSHWANT; 665 wakeup((caddr_t)&np->n_flag); 666 } 667 return (0); 668} 669 670/* 671 * Initiate asynchronous I/O. Return an error if no nfsiods are available. 672 * This is mainly to avoid queueing async I/O requests when the nfsiods 673 * are all hung on a dead server. 674 */ 675int 676nfs_asyncio(bp, cred) 677 register struct buf *bp; 678 struct ucred *cred; 679{ 680 register int i; 681 682 if (nfs_numasync == 0) 683 return (EIO); 684 for (i = 0; i < NFS_MAXASYNCDAEMON; i++) 685 if (nfs_iodwant[i]) { 686 if (bp->b_flags & B_READ) { 687 if (bp->b_rcred == NOCRED && cred != NOCRED) { 688 crhold(cred); 689 bp->b_rcred = cred; 690 } 691 } else { 692 if (bp->b_wcred == NOCRED && cred != NOCRED) { 693 crhold(cred); 694 bp->b_wcred = cred; 695 } 696 } 697 698 TAILQ_INSERT_TAIL(&nfs_bufq, bp, b_freelist); 699 nfs_iodwant[i] = (struct proc *)0; 700 wakeup((caddr_t)&nfs_iodwant[i]); 701 return (0); 702 } 703 return (EIO); 704} 705 706/* 707 * Do an I/O operation to/from a cache block. This may be called 708 * synchronously or from an nfsiod. 709 */ 710int 711nfs_doio(bp, cr, p) 712 register struct buf *bp; 713 struct ucred *cr; 714 struct proc *p; 715{ 716 register struct uio *uiop; 717 register struct vnode *vp; 718 struct nfsnode *np; 719 struct nfsmount *nmp; 720 int error = 0, diff, len; 721 struct uio uio; 722 struct iovec io; 723 724 vp = bp->b_vp; 725 np = VTONFS(vp); 726 nmp = VFSTONFS(vp->v_mount); 727 uiop = &uio; 728 uiop->uio_iov = &io; 729 uiop->uio_iovcnt = 1; 730 uiop->uio_segflg = UIO_SYSSPACE; 731 uiop->uio_procp = p; 732 733 /* 734 * Historically, paging was done with physio, but no more. 735 */ 736 if (bp->b_flags & B_PHYS) { 737 /* 738 * ...though reading /dev/drum still gets us here. 739 */ 740 io.iov_len = uiop->uio_resid = bp->b_bcount; 741 /* mapping was done by vmapbuf() */ 742 io.iov_base = bp->b_data; 743 uiop->uio_offset = bp->b_blkno * DEV_BSIZE; 744 if (bp->b_flags & B_READ) { 745 uiop->uio_rw = UIO_READ; 746 nfsstats.read_physios++; 747 error = nfs_readrpc(vp, uiop, cr); 748 } else { 749 uiop->uio_rw = UIO_WRITE; 750 nfsstats.write_physios++; 751 error = nfs_writerpc(vp, uiop, cr,0); 752 } 753 if (error) { 754 bp->b_flags |= B_ERROR; 755 bp->b_error = error; 756 } 757 } else if (bp->b_flags & B_READ) { 758 io.iov_len = uiop->uio_resid = bp->b_bcount; 759 io.iov_base = bp->b_data; 760 uiop->uio_rw = UIO_READ; 761 switch (vp->v_type) { 762 case VREG: 763 uiop->uio_offset = bp->b_blkno * DEV_BSIZE; 764 nfsstats.read_bios++; 765 error = nfs_readrpc(vp, uiop, cr); 766 if (!error) { 767 bp->b_validoff = 0; 768 if (uiop->uio_resid) { 769 /* 770 * If len > 0, there is a hole in the file and 771 * no writes after the hole have been pushed to 772 * the server yet. 773 * Just zero fill the rest of the valid area. 774 */ 775 diff = bp->b_bcount - uiop->uio_resid; 776 len = np->n_size - (bp->b_blkno * DEV_BSIZE 777 + diff); 778 if (len > 0) { 779 len = min(len, uiop->uio_resid); 780 bzero((char *)bp->b_data + diff, len); 781 bp->b_validend = diff + len; 782 } else 783 bp->b_validend = diff; 784 } else 785 bp->b_validend = bp->b_bcount; 786 } 787 if (p && (vp->v_flag & VTEXT) && 788 (((nmp->nm_flag & NFSMNT_NQNFS) && 789 NQNFS_CKINVALID(vp, np, NQL_READ) && 790 np->n_lrev != np->n_brev) || 791 (!(nmp->nm_flag & NFSMNT_NQNFS) && 792 np->n_mtime != np->n_vattr.va_mtime.ts_sec))) { 793 uprintf("Process killed due to text file modification\n"); 794 psignal(p, SIGKILL); 795 p->p_flag |= P_NOSWAP; 796 } 797 break; 798 case VLNK: 799 uiop->uio_offset = 0; 800 nfsstats.readlink_bios++; 801 error = nfs_readlinkrpc(vp, uiop, cr); 802 break; 803 case VDIR: 804 uiop->uio_offset = bp->b_lblkno; 805 nfsstats.readdir_bios++; 806 if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) 807 error = nfs_readdirlookrpc(vp, uiop, cr); 808 else 809 error = nfs_readdirrpc(vp, uiop, cr); 810 /* 811 * Save offset cookie in b_blkno. 812 */ 813 bp->b_blkno = uiop->uio_offset; 814 break; 815 default: 816 printf("nfs_doio: type %x unexpected\n",vp->v_type); 817 break; 818 }; 819 if (error) { 820 bp->b_flags |= B_ERROR; 821 bp->b_error = error; 822 } 823 } else { 824 825 if (((bp->b_blkno * DEV_BSIZE) + bp->b_dirtyend) > np->n_size) 826 bp->b_dirtyend = np->n_size - (bp->b_blkno * DEV_BSIZE); 827 828 if (bp->b_dirtyend > bp->b_dirtyoff) { 829 io.iov_len = uiop->uio_resid = bp->b_dirtyend 830 - bp->b_dirtyoff; 831 uiop->uio_offset = (bp->b_blkno * DEV_BSIZE) 832 + bp->b_dirtyoff; 833 io.iov_base = (char *)bp->b_data + bp->b_dirtyoff; 834 uiop->uio_rw = UIO_WRITE; 835 nfsstats.write_bios++; 836 if (bp->b_flags & B_APPENDWRITE) 837 error = nfs_writerpc(vp, uiop, cr, IO_APPEND); 838 else 839 error = nfs_writerpc(vp, uiop, cr, 0); 840 bp->b_flags &= ~(B_WRITEINPROG | B_APPENDWRITE); 841 842 /* 843 * For an interrupted write, the buffer is still valid and the 844 * write hasn't been pushed to the server yet, so we can't set 845 * B_ERROR and report the interruption by setting B_EINTR. For 846 * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt 847 * is essentially a noop. 848 */ 849 if (error == EINTR) { 850 bp->b_flags &= ~(B_INVAL|B_NOCACHE); 851 bp->b_flags |= B_DELWRI; 852 853 /* 854 * Since for the B_ASYNC case, nfs_bwrite() has reassigned the 855 * buffer to the clean list, we have to reassign it back to the 856 * dirty one. Ugh. 857 */ 858 if (bp->b_flags & B_ASYNC) 859 reassignbuf(bp, vp); 860 else 861 bp->b_flags |= B_EINTR; 862 } else { 863 if (error) { 864 bp->b_flags |= B_ERROR; 865 bp->b_error = np->n_error = error; 866 np->n_flag |= NWRITEERR; 867 } 868 bp->b_dirtyoff = bp->b_dirtyend = 0; 869 } 870 } else { 871 bp->b_resid = 0; 872 biodone(bp); 873 return (0); 874 } 875 } 876 bp->b_resid = uiop->uio_resid; 877 biodone(bp); 878 return (error); 879} 880