nfs_bio.c revision 1549
1/* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Rick Macklem at The University of Guelph. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)nfs_bio.c 8.5 (Berkeley) 1/4/94 37 */ 38 39#include <sys/param.h> 40#include <sys/systm.h> 41#include <sys/resourcevar.h> 42#include <sys/proc.h> 43#include <sys/buf.h> 44#include <sys/vnode.h> 45#include <sys/trace.h> 46#include <sys/mount.h> 47#include <sys/kernel.h> 48 49#include <vm/vm.h> 50 51#include <nfs/nfsnode.h> 52#include <nfs/rpcv2.h> 53#include <nfs/nfsv2.h> 54#include <nfs/nfs.h> 55#include <nfs/nfsmount.h> 56#include <nfs/nqnfs.h> 57 58struct buf *incore(), *nfs_getcacheblk(); 59extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON]; 60extern int nfs_numasync; 61 62/* 63 * Vnode op for read using bio 64 * Any similarity to readip() is purely coincidental 65 */ 66int 67nfs_bioread(vp, uio, ioflag, cred) 68 register struct vnode *vp; 69 register struct uio *uio; 70 int ioflag; 71 struct ucred *cred; 72{ 73 register struct nfsnode *np = VTONFS(vp); 74 register int biosize, diff; 75 struct buf *bp = 0, *rabp; 76 struct vattr vattr; 77 struct proc *p; 78 struct nfsmount *nmp; 79 daddr_t lbn, bn, rabn; 80 caddr_t baddr; 81 int got_buf = 0, nra, error = 0, n = 0, on = 0, not_readin; 82 83#ifdef lint 84 ioflag = ioflag; 85#endif /* lint */ 86#ifdef DIAGNOSTIC 87 if (uio->uio_rw != UIO_READ) 88 panic("nfs_read mode"); 89#endif 90 if (uio->uio_resid == 0) 91 return (0); 92 if (uio->uio_offset < 0 && vp->v_type != VDIR) 93 return (EINVAL); 94 nmp = VFSTONFS(vp->v_mount); 95 biosize = nmp->nm_rsize; 96 p = uio->uio_procp; 97 /* 98 * For nfs, cache consistency can only be maintained approximately. 99 * Although RFC1094 does not specify the criteria, the following is 100 * believed to be compatible with the reference port. 101 * For nqnfs, full cache consistency is maintained within the loop. 102 * For nfs: 103 * If the file's modify time on the server has changed since the 104 * last read rpc or you have written to the file, 105 * you may have lost data cache consistency with the 106 * server, so flush all of the file's data out of the cache. 107 * Then force a getattr rpc to ensure that you have up to date 108 * attributes. 109 * The mount flag NFSMNT_MYWRITE says "Assume that my writes are 110 * the ones changing the modify time. 111 * NB: This implies that cache data can be read when up to 112 * NFS_ATTRTIMEO seconds out of date. If you find that you need current 113 * attributes this could be forced by setting n_attrstamp to 0 before 114 * the VOP_GETATTR() call. 115 */ 116 if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) { 117 if (np->n_flag & NMODIFIED) { 118 if ((nmp->nm_flag & NFSMNT_MYWRITE) == 0 || 119 vp->v_type != VREG) { 120 if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) 121 return (error); 122 } 123 np->n_attrstamp = 0; 124 np->n_direofoffset = 0; 125 if (error = VOP_GETATTR(vp, &vattr, cred, p)) 126 return (error); 127 np->n_mtime = vattr.va_mtime.ts_sec; 128 } else { 129 if (error = VOP_GETATTR(vp, &vattr, cred, p)) 130 return (error); 131 if (np->n_mtime != vattr.va_mtime.ts_sec) { 132 np->n_direofoffset = 0; 133 if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) 134 return (error); 135 np->n_mtime = vattr.va_mtime.ts_sec; 136 } 137 } 138 } 139 do { 140 141 /* 142 * Get a valid lease. If cached data is stale, flush it. 143 */ 144 if (nmp->nm_flag & NFSMNT_NQNFS) { 145 if (NQNFS_CKINVALID(vp, np, NQL_READ)) { 146 do { 147 error = nqnfs_getlease(vp, NQL_READ, cred, p); 148 } while (error == NQNFS_EXPIRED); 149 if (error) 150 return (error); 151 if (np->n_lrev != np->n_brev || 152 (np->n_flag & NQNFSNONCACHE) || 153 ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { 154 if (vp->v_type == VDIR) { 155 np->n_direofoffset = 0; 156 cache_purge(vp); 157 } 158 if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) 159 return (error); 160 np->n_brev = np->n_lrev; 161 } 162 } else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) { 163 np->n_direofoffset = 0; 164 cache_purge(vp); 165 if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) 166 return (error); 167 } 168 } 169 if (np->n_flag & NQNFSNONCACHE) { 170 switch (vp->v_type) { 171 case VREG: 172 error = nfs_readrpc(vp, uio, cred); 173 break; 174 case VLNK: 175 error = nfs_readlinkrpc(vp, uio, cred); 176 break; 177 case VDIR: 178 error = nfs_readdirrpc(vp, uio, cred); 179 break; 180 }; 181 return (error); 182 } 183 baddr = (caddr_t)0; 184 switch (vp->v_type) { 185 case VREG: 186 nfsstats.biocache_reads++; 187 lbn = uio->uio_offset / biosize; 188 on = uio->uio_offset & (biosize-1); 189 bn = lbn * (biosize / DEV_BSIZE); 190 not_readin = 1; 191 192 /* 193 * Start the read ahead(s), as required. 194 */ 195 if (nfs_numasync > 0 && nmp->nm_readahead > 0 && 196 lbn == vp->v_lastr + 1) { 197 for (nra = 0; nra < nmp->nm_readahead && 198 (lbn + 1 + nra) * biosize < np->n_size; nra++) { 199 rabn = (lbn + 1 + nra) * (biosize / DEV_BSIZE); 200 if (!incore(vp, rabn)) { 201 rabp = nfs_getcacheblk(vp, rabn, biosize, p); 202 if (!rabp) 203 return (EINTR); 204 if ((rabp->b_flags & (B_DELWRI | B_DONE)) == 0) { 205 rabp->b_flags |= (B_READ | B_ASYNC); 206 if (nfs_asyncio(rabp, cred)) { 207 rabp->b_flags |= B_INVAL; 208 brelse(rabp); 209 } 210 } 211 } 212 } 213 } 214 215 /* 216 * If the block is in the cache and has the required data 217 * in a valid region, just copy it out. 218 * Otherwise, get the block and write back/read in, 219 * as required. 220 */ 221 if ((bp = incore(vp, bn)) && 222 (bp->b_flags & (B_BUSY | B_WRITEINPROG)) == 223 (B_BUSY | B_WRITEINPROG)) 224 got_buf = 0; 225 else { 226again: 227 bp = nfs_getcacheblk(vp, bn, biosize, p); 228 if (!bp) 229 return (EINTR); 230 got_buf = 1; 231 if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) { 232 bp->b_flags |= B_READ; 233 not_readin = 0; 234 if (error = nfs_doio(bp, cred, p)) { 235 brelse(bp); 236 return (error); 237 } 238 } 239 } 240 n = min((unsigned)(biosize - on), uio->uio_resid); 241 diff = np->n_size - uio->uio_offset; 242 if (diff < n) 243 n = diff; 244 if (not_readin && n > 0) { 245 if (on < bp->b_validoff || (on + n) > bp->b_validend) { 246 if (!got_buf) { 247 bp = nfs_getcacheblk(vp, bn, biosize, p); 248 if (!bp) 249 return (EINTR); 250 got_buf = 1; 251 } 252 bp->b_flags |= B_INVAL; 253 if (bp->b_dirtyend > 0) { 254 if ((bp->b_flags & B_DELWRI) == 0) 255 panic("nfsbioread"); 256 if (VOP_BWRITE(bp) == EINTR) 257 return (EINTR); 258 } else 259 brelse(bp); 260 goto again; 261 } 262 } 263 vp->v_lastr = lbn; 264 diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on); 265 if (diff < n) 266 n = diff; 267 break; 268 case VLNK: 269 nfsstats.biocache_readlinks++; 270 bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p); 271 if (!bp) 272 return (EINTR); 273 if ((bp->b_flags & B_DONE) == 0) { 274 bp->b_flags |= B_READ; 275 if (error = nfs_doio(bp, cred, p)) { 276 brelse(bp); 277 return (error); 278 } 279 } 280 n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid); 281 got_buf = 1; 282 on = 0; 283 break; 284 case VDIR: 285 nfsstats.biocache_readdirs++; 286 bn = (daddr_t)uio->uio_offset; 287 bp = nfs_getcacheblk(vp, bn, NFS_DIRBLKSIZ, p); 288 if (!bp) 289 return (EINTR); 290 if ((bp->b_flags & B_DONE) == 0) { 291 bp->b_flags |= B_READ; 292 if (error = nfs_doio(bp, cred, p)) { 293 brelse(bp); 294 return (error); 295 } 296 } 297 298 /* 299 * If not eof and read aheads are enabled, start one. 300 * (You need the current block first, so that you have the 301 * directory offset cookie of the next block. 302 */ 303 rabn = bp->b_blkno; 304 if (nfs_numasync > 0 && nmp->nm_readahead > 0 && 305 rabn != 0 && rabn != np->n_direofoffset && 306 !incore(vp, rabn)) { 307 rabp = nfs_getcacheblk(vp, rabn, NFS_DIRBLKSIZ, p); 308 if (rabp) { 309 if ((rabp->b_flags & (B_DONE | B_DELWRI)) == 0) { 310 rabp->b_flags |= (B_READ | B_ASYNC); 311 if (nfs_asyncio(rabp, cred)) { 312 rabp->b_flags |= B_INVAL; 313 brelse(rabp); 314 } 315 } 316 } 317 } 318 on = 0; 319 n = min(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid); 320 got_buf = 1; 321 break; 322 }; 323 324 if (n > 0) { 325 if (!baddr) 326 baddr = bp->b_data; 327 error = uiomove(baddr + on, (int)n, uio); 328 } 329 switch (vp->v_type) { 330 case VREG: 331 if (n + on == biosize || uio->uio_offset == np->n_size) 332 bp->b_flags |= B_AGE; 333 break; 334 case VLNK: 335 n = 0; 336 break; 337 case VDIR: 338 uio->uio_offset = bp->b_blkno; 339 break; 340 }; 341 if (got_buf) 342 brelse(bp); 343 } while (error == 0 && uio->uio_resid > 0 && n > 0); 344 return (error); 345} 346 347/* 348 * Vnode op for write using bio 349 */ 350int 351nfs_write(ap) 352 struct vop_write_args /* { 353 struct vnode *a_vp; 354 struct uio *a_uio; 355 int a_ioflag; 356 struct ucred *a_cred; 357 } */ *ap; 358{ 359 register int biosize; 360 register struct uio *uio = ap->a_uio; 361 struct proc *p = uio->uio_procp; 362 register struct vnode *vp = ap->a_vp; 363 struct nfsnode *np = VTONFS(vp); 364 register struct ucred *cred = ap->a_cred; 365 int ioflag = ap->a_ioflag; 366 struct buf *bp; 367 struct vattr vattr; 368 struct nfsmount *nmp; 369 daddr_t lbn, bn; 370 int n, on, error = 0; 371 372#ifdef DIAGNOSTIC 373 if (uio->uio_rw != UIO_WRITE) 374 panic("nfs_write mode"); 375 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 376 panic("nfs_write proc"); 377#endif 378 if (vp->v_type != VREG) 379 return (EIO); 380 if (np->n_flag & NWRITEERR) { 381 np->n_flag &= ~NWRITEERR; 382 return (np->n_error); 383 } 384 if (ioflag & (IO_APPEND | IO_SYNC)) { 385 if (np->n_flag & NMODIFIED) { 386 np->n_attrstamp = 0; 387 if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) 388 return (error); 389 } 390 if (ioflag & IO_APPEND) { 391 np->n_attrstamp = 0; 392 if (error = VOP_GETATTR(vp, &vattr, cred, p)) 393 return (error); 394 uio->uio_offset = np->n_size; 395 } 396 } 397 nmp = VFSTONFS(vp->v_mount); 398 if (uio->uio_offset < 0) 399 return (EINVAL); 400 if (uio->uio_resid == 0) 401 return (0); 402 /* 403 * Maybe this should be above the vnode op call, but so long as 404 * file servers have no limits, i don't think it matters 405 */ 406 if (p && uio->uio_offset + uio->uio_resid > 407 p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 408 psignal(p, SIGXFSZ); 409 return (EFBIG); 410 } 411 /* 412 * I use nm_rsize, not nm_wsize so that all buffer cache blocks 413 * will be the same size within a filesystem. nfs_writerpc will 414 * still use nm_wsize when sizing the rpc's. 415 */ 416 biosize = nmp->nm_rsize; 417 do { 418 419 /* 420 * Check for a valid write lease. 421 * If non-cachable, just do the rpc 422 */ 423 if ((nmp->nm_flag & NFSMNT_NQNFS) && 424 NQNFS_CKINVALID(vp, np, NQL_WRITE)) { 425 do { 426 error = nqnfs_getlease(vp, NQL_WRITE, cred, p); 427 } while (error == NQNFS_EXPIRED); 428 if (error) 429 return (error); 430 if (np->n_lrev != np->n_brev || 431 (np->n_flag & NQNFSNONCACHE)) { 432 if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) 433 return (error); 434 np->n_brev = np->n_lrev; 435 } 436 } 437 if (np->n_flag & NQNFSNONCACHE) 438 return (nfs_writerpc(vp, uio, cred, ioflag)); 439 nfsstats.biocache_writes++; 440 lbn = uio->uio_offset / biosize; 441 on = uio->uio_offset & (biosize-1); 442 n = min((unsigned)(biosize - on), uio->uio_resid); 443 bn = lbn * (biosize / DEV_BSIZE); 444again: 445 bp = nfs_getcacheblk(vp, bn, biosize, p); 446 if (!bp) 447 return (EINTR); 448 if (bp->b_wcred == NOCRED) { 449 crhold(cred); 450 bp->b_wcred = cred; 451 } 452 np->n_flag |= NMODIFIED; 453 if (uio->uio_offset + n > np->n_size) { 454 np->n_size = uio->uio_offset + n; 455 vnode_pager_setsize(vp, (u_long)np->n_size); 456 } 457 458 /* 459 * If the new write will leave a contiguous dirty 460 * area, just update the b_dirtyoff and b_dirtyend, 461 * otherwise force a write rpc of the old dirty area. 462 */ 463 if (bp->b_dirtyend > 0 && 464 (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { 465 bp->b_proc = p; 466 if (VOP_BWRITE(bp) == EINTR) 467 return (EINTR); 468 goto again; 469 } 470 471 /* 472 * Check for valid write lease and get one as required. 473 * In case getblk() and/or bwrite() delayed us. 474 */ 475 if ((nmp->nm_flag & NFSMNT_NQNFS) && 476 NQNFS_CKINVALID(vp, np, NQL_WRITE)) { 477 do { 478 error = nqnfs_getlease(vp, NQL_WRITE, cred, p); 479 } while (error == NQNFS_EXPIRED); 480 if (error) { 481 brelse(bp); 482 return (error); 483 } 484 if (np->n_lrev != np->n_brev || 485 (np->n_flag & NQNFSNONCACHE)) { 486 brelse(bp); 487 if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) 488 return (error); 489 np->n_brev = np->n_lrev; 490 goto again; 491 } 492 } 493 if (error = uiomove((char *)bp->b_data + on, n, uio)) { 494 bp->b_flags |= B_ERROR; 495 brelse(bp); 496 return (error); 497 } 498 if (bp->b_dirtyend > 0) { 499 bp->b_dirtyoff = min(on, bp->b_dirtyoff); 500 bp->b_dirtyend = max((on + n), bp->b_dirtyend); 501 } else { 502 bp->b_dirtyoff = on; 503 bp->b_dirtyend = on + n; 504 } 505#ifndef notdef 506 if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff || 507 bp->b_validoff > bp->b_dirtyend) { 508 bp->b_validoff = bp->b_dirtyoff; 509 bp->b_validend = bp->b_dirtyend; 510 } else { 511 bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff); 512 bp->b_validend = max(bp->b_validend, bp->b_dirtyend); 513 } 514#else 515 bp->b_validoff = bp->b_dirtyoff; 516 bp->b_validend = bp->b_dirtyend; 517#endif 518 if (ioflag & IO_APPEND) 519 bp->b_flags |= B_APPENDWRITE; 520 521 /* 522 * If the lease is non-cachable or IO_SYNC do bwrite(). 523 */ 524 if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) { 525 bp->b_proc = p; 526 if (error = VOP_BWRITE(bp)) 527 return (error); 528 } else if ((n + on) == biosize && 529 (nmp->nm_flag & NFSMNT_NQNFS) == 0) { 530 bp->b_proc = (struct proc *)0; 531 bawrite(bp); 532 } else 533 bdwrite(bp); 534 } while (uio->uio_resid > 0 && n > 0); 535 return (0); 536} 537 538/* 539 * Get an nfs cache block. 540 * Allocate a new one if the block isn't currently in the cache 541 * and return the block marked busy. If the calling process is 542 * interrupted by a signal for an interruptible mount point, return 543 * NULL. 544 */ 545struct buf * 546nfs_getcacheblk(vp, bn, size, p) 547 struct vnode *vp; 548 daddr_t bn; 549 int size; 550 struct proc *p; 551{ 552 register struct buf *bp; 553 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 554 555 if (nmp->nm_flag & NFSMNT_INT) { 556 bp = getblk(vp, bn, size, PCATCH, 0); 557 while (bp == (struct buf *)0) { 558 if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) 559 return ((struct buf *)0); 560 bp = getblk(vp, bn, size, 0, 2 * hz); 561 } 562 } else 563 bp = getblk(vp, bn, size, 0, 0); 564 return (bp); 565} 566 567/* 568 * Flush and invalidate all dirty buffers. If another process is already 569 * doing the flush, just wait for completion. 570 */ 571int 572nfs_vinvalbuf(vp, flags, cred, p, intrflg) 573 struct vnode *vp; 574 int flags; 575 struct ucred *cred; 576 struct proc *p; 577 int intrflg; 578{ 579 register struct nfsnode *np = VTONFS(vp); 580 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 581 int error = 0, slpflag, slptimeo; 582 583 if ((nmp->nm_flag & NFSMNT_INT) == 0) 584 intrflg = 0; 585 if (intrflg) { 586 slpflag = PCATCH; 587 slptimeo = 2 * hz; 588 } else { 589 slpflag = 0; 590 slptimeo = 0; 591 } 592 /* 593 * First wait for any other process doing a flush to complete. 594 */ 595 while (np->n_flag & NFLUSHINPROG) { 596 np->n_flag |= NFLUSHWANT; 597 error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval", 598 slptimeo); 599 if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) 600 return (EINTR); 601 } 602 603 /* 604 * Now, flush as required. 605 */ 606 np->n_flag |= NFLUSHINPROG; 607 error = vinvalbuf(vp, flags, cred, p, slpflag, 0); 608 while (error) { 609 if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) { 610 np->n_flag &= ~NFLUSHINPROG; 611 if (np->n_flag & NFLUSHWANT) { 612 np->n_flag &= ~NFLUSHWANT; 613 wakeup((caddr_t)&np->n_flag); 614 } 615 return (EINTR); 616 } 617 error = vinvalbuf(vp, flags, cred, p, 0, slptimeo); 618 } 619 np->n_flag &= ~(NMODIFIED | NFLUSHINPROG); 620 if (np->n_flag & NFLUSHWANT) { 621 np->n_flag &= ~NFLUSHWANT; 622 wakeup((caddr_t)&np->n_flag); 623 } 624 return (0); 625} 626 627/* 628 * Initiate asynchronous I/O. Return an error if no nfsiods are available. 629 * This is mainly to avoid queueing async I/O requests when the nfsiods 630 * are all hung on a dead server. 631 */ 632int 633nfs_asyncio(bp, cred) 634 register struct buf *bp; 635 struct ucred *cred; 636{ 637 register int i; 638 639 if (nfs_numasync == 0) 640 return (EIO); 641 for (i = 0; i < NFS_MAXASYNCDAEMON; i++) 642 if (nfs_iodwant[i]) { 643 if (bp->b_flags & B_READ) { 644 if (bp->b_rcred == NOCRED && cred != NOCRED) { 645 crhold(cred); 646 bp->b_rcred = cred; 647 } 648 } else { 649 if (bp->b_wcred == NOCRED && cred != NOCRED) { 650 crhold(cred); 651 bp->b_wcred = cred; 652 } 653 } 654 655 TAILQ_INSERT_TAIL(&nfs_bufq, bp, b_freelist); 656 nfs_iodwant[i] = (struct proc *)0; 657 wakeup((caddr_t)&nfs_iodwant[i]); 658 return (0); 659 } 660 return (EIO); 661} 662 663/* 664 * Do an I/O operation to/from a cache block. This may be called 665 * synchronously or from an nfsiod. 666 */ 667int 668nfs_doio(bp, cr, p) 669 register struct buf *bp; 670 struct cred *cr; 671 struct proc *p; 672{ 673 register struct uio *uiop; 674 register struct vnode *vp; 675 struct nfsnode *np; 676 struct nfsmount *nmp; 677 int error = 0, diff, len; 678 struct uio uio; 679 struct iovec io; 680 681 vp = bp->b_vp; 682 np = VTONFS(vp); 683 nmp = VFSTONFS(vp->v_mount); 684 uiop = &uio; 685 uiop->uio_iov = &io; 686 uiop->uio_iovcnt = 1; 687 uiop->uio_segflg = UIO_SYSSPACE; 688 uiop->uio_procp = p; 689 690 /* 691 * Historically, paging was done with physio, but no more. 692 */ 693 if (bp->b_flags & B_PHYS) 694 panic("doio phys"); 695 if (bp->b_flags & B_READ) { 696 io.iov_len = uiop->uio_resid = bp->b_bcount; 697 io.iov_base = bp->b_data; 698 uiop->uio_rw = UIO_READ; 699 switch (vp->v_type) { 700 case VREG: 701 uiop->uio_offset = bp->b_blkno * DEV_BSIZE; 702 nfsstats.read_bios++; 703 error = nfs_readrpc(vp, uiop, cr); 704 if (!error) { 705 bp->b_validoff = 0; 706 if (uiop->uio_resid) { 707 /* 708 * If len > 0, there is a hole in the file and 709 * no writes after the hole have been pushed to 710 * the server yet. 711 * Just zero fill the rest of the valid area. 712 */ 713 diff = bp->b_bcount - uiop->uio_resid; 714 len = np->n_size - (bp->b_blkno * DEV_BSIZE 715 + diff); 716 if (len > 0) { 717 len = min(len, uiop->uio_resid); 718 bzero((char *)bp->b_data + diff, len); 719 bp->b_validend = diff + len; 720 } else 721 bp->b_validend = diff; 722 } else 723 bp->b_validend = bp->b_bcount; 724 } 725 if (p && (vp->v_flag & VTEXT) && 726 (((nmp->nm_flag & NFSMNT_NQNFS) && 727 np->n_lrev != np->n_brev) || 728 (!(nmp->nm_flag & NFSMNT_NQNFS) && 729 np->n_mtime != np->n_vattr.va_mtime.ts_sec))) { 730 uprintf("Process killed due to text file modification\n"); 731 psignal(p, SIGKILL); 732 p->p_flag |= P_NOSWAP; 733 } 734 break; 735 case VLNK: 736 uiop->uio_offset = 0; 737 nfsstats.readlink_bios++; 738 error = nfs_readlinkrpc(vp, uiop, cr); 739 break; 740 case VDIR: 741 uiop->uio_offset = bp->b_lblkno; 742 nfsstats.readdir_bios++; 743 if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) 744 error = nfs_readdirlookrpc(vp, uiop, cr); 745 else 746 error = nfs_readdirrpc(vp, uiop, cr); 747 /* 748 * Save offset cookie in b_blkno. 749 */ 750 bp->b_blkno = uiop->uio_offset; 751 break; 752 }; 753 if (error) { 754 bp->b_flags |= B_ERROR; 755 bp->b_error = error; 756 } 757 } else { 758 io.iov_len = uiop->uio_resid = bp->b_dirtyend 759 - bp->b_dirtyoff; 760 uiop->uio_offset = (bp->b_blkno * DEV_BSIZE) 761 + bp->b_dirtyoff; 762 io.iov_base = (char *)bp->b_data + bp->b_dirtyoff; 763 uiop->uio_rw = UIO_WRITE; 764 nfsstats.write_bios++; 765 if (bp->b_flags & B_APPENDWRITE) 766 error = nfs_writerpc(vp, uiop, cr, IO_APPEND); 767 else 768 error = nfs_writerpc(vp, uiop, cr, 0); 769 bp->b_flags &= ~(B_WRITEINPROG | B_APPENDWRITE); 770 771 /* 772 * For an interrupted write, the buffer is still valid and the 773 * write hasn't been pushed to the server yet, so we can't set 774 * B_ERROR and report the interruption by setting B_EINTR. For 775 * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt 776 * is essentially a noop. 777 */ 778 if (error == EINTR) { 779 bp->b_flags &= ~B_INVAL; 780 bp->b_flags |= B_DELWRI; 781 782 /* 783 * Since for the B_ASYNC case, nfs_bwrite() has reassigned the 784 * buffer to the clean list, we have to reassign it back to the 785 * dirty one. Ugh. 786 */ 787 if (bp->b_flags & B_ASYNC) 788 reassignbuf(bp, vp); 789 else 790 bp->b_flags |= B_EINTR; 791 } else { 792 if (error) { 793 bp->b_flags |= B_ERROR; 794 bp->b_error = np->n_error = error; 795 np->n_flag |= NWRITEERR; 796 } 797 bp->b_dirtyoff = bp->b_dirtyend = 0; 798 } 799 } 800 bp->b_resid = uiop->uio_resid; 801 biodone(bp); 802 return (error); 803} 804