1/*	$OpenBSD: nfs_bio.c,v 1.86 2024/05/01 13:15:59 jsg Exp $	*/
2/*	$NetBSD: nfs_bio.c,v 1.25.4.2 1996/07/08 20:47:04 jtc Exp $	*/
3
4/*
5 * Copyright (c) 1989, 1993
6 *	The Regents of the University of California.  All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Rick Macklem at The University of Guelph.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 *	@(#)nfs_bio.c	8.9 (Berkeley) 3/30/95
36 */
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/signalvar.h>
41#include <sys/proc.h>
42#include <sys/buf.h>
43#include <sys/vnode.h>
44#include <sys/mount.h>
45#include <sys/queue.h>
46#include <sys/time.h>
47
48#include <nfs/nfsproto.h>
49#include <nfs/nfs.h>
50#include <nfs/nfsmount.h>
51#include <nfs/nfsnode.h>
52#include <nfs/nfs_var.h>
53
54extern int nfs_numasync;
55extern struct nfsstats nfsstats;
56struct nfs_bufqhead nfs_bufq;
57uint32_t nfs_bufqmax, nfs_bufqlen;
58
59struct buf *nfs_getcacheblk(struct vnode *, daddr_t, int, struct proc *);
60
61/*
62 * Vnode op for read using bio
63 * Any similarity to readip() is purely coincidental
64 */
65int
66nfs_bioread(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred)
67{
68	struct nfsnode *np = VTONFS(vp);
69	int biosize, diff;
70	struct buf *bp = NULL, *rabp;
71	struct vattr vattr;
72	struct proc *p;
73	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
74	daddr_t lbn, bn, rabn;
75	caddr_t baddr;
76	int got_buf = 0, nra, error = 0, n = 0, on = 0, not_readin;
77	off_t offdiff;
78
79#ifdef DIAGNOSTIC
80	if (uio->uio_rw != UIO_READ)
81		panic("nfs_read mode");
82#endif
83	if (uio->uio_resid == 0)
84		return (0);
85	if (uio->uio_offset < 0)
86		return (EINVAL);
87	p = uio->uio_procp;
88	if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
89		(void)nfs_fsinfo(nmp, vp, cred, p);
90	biosize = nmp->nm_rsize;
91	/*
92	 * For nfs, cache consistency can only be maintained approximately.
93	 * Although RFC1094 does not specify the criteria, the following is
94	 * believed to be compatible with the reference port.
95	 * For nfs:
96	 * If the file's modify time on the server has changed since the
97	 * last read rpc or you have written to the file,
98	 * you may have lost data cache consistency with the
99	 * server, so flush all of the file's data out of the cache.
100	 * Then force a getattr rpc to ensure that you have up to date
101	 * attributes.
102	 */
103	if (np->n_flag & NMODIFIED) {
104		NFS_INVALIDATE_ATTRCACHE(np);
105		error = VOP_GETATTR(vp, &vattr, cred, p);
106		if (error)
107			return (error);
108		np->n_mtime = vattr.va_mtime;
109	} else {
110		error = VOP_GETATTR(vp, &vattr, cred, p);
111		if (error)
112			return (error);
113		if (timespeccmp(&np->n_mtime, &vattr.va_mtime, !=)) {
114			error = nfs_vinvalbuf(vp, V_SAVE, cred, p);
115			if (error)
116				return (error);
117			np->n_mtime = vattr.va_mtime;
118		}
119	}
120
121	/*
122	 * update the cache read creds for this vnode
123	 */
124	if (np->n_rcred)
125		crfree(np->n_rcred);
126	np->n_rcred = cred;
127	crhold(cred);
128
129	do {
130	    if ((vp->v_flag & VROOT) && vp->v_type == VLNK) {
131		    return (nfs_readlinkrpc(vp, uio, cred));
132	    }
133	    baddr = NULL;
134	    switch (vp->v_type) {
135	    case VREG:
136		nfsstats.biocache_reads++;
137		lbn = uio->uio_offset / biosize;
138		on = uio->uio_offset & (biosize - 1);
139		bn = lbn * (biosize / DEV_BSIZE);
140		not_readin = 1;
141
142		/*
143		 * Start the read ahead(s), as required.
144		 */
145		if (nfs_numasync > 0 && nmp->nm_readahead > 0) {
146		    for (nra = 0; nra < nmp->nm_readahead &&
147			(lbn + 1 + nra) * biosize < np->n_size; nra++) {
148			rabn = (lbn + 1 + nra) * (biosize / DEV_BSIZE);
149			if (!incore(vp, rabn)) {
150			    rabp = nfs_getcacheblk(vp, rabn, biosize, p);
151			    if (!rabp)
152				return (EINTR);
153			    if ((rabp->b_flags & (B_DELWRI | B_DONE)) == 0) {
154				rabp->b_flags |= (B_READ | B_ASYNC);
155				if (nfs_asyncio(rabp, 1)) {
156				    rabp->b_flags |= B_INVAL;
157				    brelse(rabp);
158				}
159			    } else
160				brelse(rabp);
161			}
162		    }
163		}
164
165again:
166		bp = nfs_getcacheblk(vp, bn, biosize, p);
167		if (!bp)
168			return (EINTR);
169		got_buf = 1;
170		if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) {
171			bp->b_flags |= B_READ;
172			not_readin = 0;
173			error = nfs_doio(bp, p);
174			if (error) {
175			    brelse(bp);
176			    return (error);
177			}
178		}
179		n = ulmin(biosize - on, uio->uio_resid);
180		offdiff = np->n_size - uio->uio_offset;
181		if (offdiff < (off_t)n)
182			n = (int)offdiff;
183		if (not_readin && n > 0) {
184			if (on < bp->b_validoff || (on + n) > bp->b_validend) {
185				bp->b_flags |= B_INVAFTERWRITE;
186				if (bp->b_dirtyend > 0) {
187				    if ((bp->b_flags & B_DELWRI) == 0)
188					panic("nfsbioread");
189				    if (VOP_BWRITE(bp) == EINTR)
190					return (EINTR);
191				} else
192				    brelse(bp);
193				goto again;
194			}
195		}
196		diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on);
197		if (diff < n)
198			n = diff;
199		break;
200	    case VLNK:
201		nfsstats.biocache_readlinks++;
202		bp = nfs_getcacheblk(vp, 0, NFS_MAXPATHLEN, p);
203		if (!bp)
204			return (EINTR);
205		if ((bp->b_flags & B_DONE) == 0) {
206			bp->b_flags |= B_READ;
207			error = nfs_doio(bp, p);
208			if (error) {
209				brelse(bp);
210				return (error);
211			}
212		}
213		n = ulmin(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
214		got_buf = 1;
215		on = 0;
216		break;
217	    default:
218		panic("nfsbioread: type %x unexpected", vp->v_type);
219		break;
220	    }
221
222	    if (n > 0) {
223		if (!baddr)
224			baddr = bp->b_data;
225		error = uiomove(baddr + on, n, uio);
226	    }
227
228	    if (vp->v_type == VLNK)
229		n = 0;
230
231	    if (got_buf)
232		brelse(bp);
233	} while (error == 0 && uio->uio_resid > 0 && n > 0);
234	return (error);
235}
236
237/*
238 * Vnode op for write using bio
239 */
240int
241nfs_write(void *v)
242{
243	struct vop_write_args *ap = v;
244	int biosize;
245	struct uio *uio = ap->a_uio;
246	struct proc *p = uio->uio_procp;
247	struct vnode *vp = ap->a_vp;
248	struct nfsnode *np = VTONFS(vp);
249	struct ucred *cred = ap->a_cred;
250	int ioflag = ap->a_ioflag;
251	struct buf *bp;
252	struct vattr vattr;
253	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
254	daddr_t lbn, bn;
255	int n, on, error = 0, extended = 0, wrotedta = 0, truncated = 0;
256	ssize_t overrun;
257
258#ifdef DIAGNOSTIC
259	if (uio->uio_rw != UIO_WRITE)
260		panic("nfs_write mode");
261	if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
262		panic("nfs_write proc");
263#endif
264	if (vp->v_type != VREG)
265		return (EIO);
266	if (np->n_flag & NWRITEERR) {
267		np->n_flag &= ~NWRITEERR;
268		return (np->n_error);
269	}
270	if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
271		(void)nfs_fsinfo(nmp, vp, cred, p);
272	if (ioflag & (IO_APPEND | IO_SYNC)) {
273		if (np->n_flag & NMODIFIED) {
274			NFS_INVALIDATE_ATTRCACHE(np);
275			error = nfs_vinvalbuf(vp, V_SAVE, cred, p);
276			if (error)
277				return (error);
278		}
279		if (ioflag & IO_APPEND) {
280			NFS_INVALIDATE_ATTRCACHE(np);
281			error = VOP_GETATTR(vp, &vattr, cred, p);
282			if (error)
283				return (error);
284			uio->uio_offset = np->n_size;
285		}
286	}
287	if (uio->uio_offset < 0)
288		return (EINVAL);
289	if (uio->uio_resid == 0)
290		return (0);
291
292	/* do the filesize rlimit check */
293	if ((error = vn_fsizechk(vp, uio, ioflag, &overrun)))
294		return (error);
295
296	/*
297	 * update the cache write creds for this node.
298	 */
299	if (np->n_wcred)
300		crfree(np->n_wcred);
301	np->n_wcred = cred;
302	crhold(cred);
303
304	/*
305	 * I use nm_rsize, not nm_wsize so that all buffer cache blocks
306	 * will be the same size within a filesystem. nfs_writerpc will
307	 * still use nm_wsize when sizing the rpc's.
308	 */
309	biosize = nmp->nm_rsize;
310	do {
311
312		/*
313		 * XXX make sure we aren't cached in the VM page cache
314		 */
315		uvm_vnp_uncache(vp);
316
317		nfsstats.biocache_writes++;
318		lbn = uio->uio_offset / biosize;
319		on = uio->uio_offset & (biosize-1);
320		n = ulmin(biosize - on, uio->uio_resid);
321		bn = lbn * (biosize / DEV_BSIZE);
322again:
323		bp = nfs_getcacheblk(vp, bn, biosize, p);
324		if (!bp) {
325			error = EINTR;
326			goto out;
327		}
328		np->n_flag |= NMODIFIED;
329		if (uio->uio_offset + n > np->n_size) {
330			np->n_size = uio->uio_offset + n;
331			uvm_vnp_setsize(vp, np->n_size);
332			extended = 1;
333		} else if (uio->uio_offset + n < np->n_size)
334			truncated = 1;
335
336		/*
337		 * If the new write will leave a contiguous dirty
338		 * area, just update the b_dirtyoff and b_dirtyend,
339		 * otherwise force a write rpc of the old dirty area.
340		 */
341		if (bp->b_dirtyend > 0 &&
342		    (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
343			bp->b_proc = p;
344			if (VOP_BWRITE(bp) == EINTR) {
345				error = EINTR;
346				goto out;
347			}
348			goto again;
349		}
350
351		error = uiomove((char *)bp->b_data + on, n, uio);
352		if (error) {
353			bp->b_flags |= B_ERROR;
354			brelse(bp);
355			goto out;
356		}
357		if (bp->b_dirtyend > 0) {
358			bp->b_dirtyoff = min(on, bp->b_dirtyoff);
359			bp->b_dirtyend = max((on + n), bp->b_dirtyend);
360		} else {
361			bp->b_dirtyoff = on;
362			bp->b_dirtyend = on + n;
363		}
364		if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||
365		    bp->b_validoff > bp->b_dirtyend) {
366			bp->b_validoff = bp->b_dirtyoff;
367			bp->b_validend = bp->b_dirtyend;
368		} else {
369			bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff);
370			bp->b_validend = max(bp->b_validend, bp->b_dirtyend);
371		}
372
373		wrotedta = 1;
374
375		/*
376		 * Since this block is being modified, it must be written
377		 * again and not just committed.
378		 */
379
380		if (NFS_ISV3(vp)) {
381			rw_enter_write(&np->n_commitlock);
382			if (bp->b_flags & B_NEEDCOMMIT) {
383				bp->b_flags &= ~B_NEEDCOMMIT;
384				nfs_del_tobecommitted_range(vp, bp);
385			}
386			nfs_del_committed_range(vp, bp);
387			rw_exit_write(&np->n_commitlock);
388		} else
389			bp->b_flags &= ~B_NEEDCOMMIT;
390
391		if (ioflag & IO_SYNC) {
392			bp->b_proc = p;
393			error = VOP_BWRITE(bp);
394			if (error)
395				goto out;
396		} else if ((n + on) == biosize) {
397			bp->b_proc = NULL;
398			bp->b_flags |= B_ASYNC;
399			(void)nfs_writebp(bp, 0);
400		} else {
401			bdwrite(bp);
402		}
403	} while (uio->uio_resid > 0 && n > 0);
404
405/*out: XXX belongs here??? */
406	if (wrotedta)
407		VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0) |
408		    (truncated ? NOTE_TRUNCATE : 0));
409
410out:
411	/* correct the result for writes clamped by vn_fsizechk() */
412	uio->uio_resid += overrun;
413
414	return (error);
415}
416
417/*
418 * Get an nfs cache block.
419 * Allocate a new one if the block isn't currently in the cache
420 * and return the block marked busy. If the calling process is
421 * interrupted by a signal for an interruptible mount point, return
422 * NULL.
423 */
424struct buf *
425nfs_getcacheblk(struct vnode *vp, daddr_t bn, int size, struct proc *p)
426{
427	struct buf *bp;
428	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
429
430	if (nmp->nm_flag & NFSMNT_INT) {
431		bp = getblk(vp, bn, size, PCATCH, INFSLP);
432		while (bp == NULL) {
433			if (nfs_sigintr(nmp, NULL, p))
434				return (NULL);
435			bp = getblk(vp, bn, size, 0, SEC_TO_NSEC(2));
436		}
437	} else
438		bp = getblk(vp, bn, size, 0, INFSLP);
439	return (bp);
440}
441
442/*
443 * Flush and invalidate all dirty buffers. If another process is already
444 * doing the flush, just wait for completion.
445 */
446int
447nfs_vinvalbuf(struct vnode *vp, int flags, struct ucred *cred, struct proc *p)
448{
449	struct nfsmount		*nmp= VFSTONFS(vp->v_mount);
450	struct nfsnode		*np = VTONFS(vp);
451	uint64_t		 stimeo;
452	int			 error, sintr;
453
454	stimeo = INFSLP;
455	error = sintr = 0;
456
457	if (ISSET(nmp->nm_flag, NFSMNT_INT)) {
458		sintr = PCATCH;
459		stimeo = SEC_TO_NSEC(2);
460	}
461
462	/* First wait for any other process doing a flush to complete. */
463	while (np->n_flag & NFLUSHINPROG) {
464		np->n_flag |= NFLUSHWANT;
465		error = tsleep_nsec(&np->n_flag, PRIBIO|sintr, "nfsvinval",
466		    stimeo);
467		if (error && sintr && nfs_sigintr(nmp, NULL, p))
468			return (EINTR);
469	}
470
471	/* Now, flush as required. */
472	np->n_flag |= NFLUSHINPROG;
473	error = vinvalbuf(vp, flags, cred, p, sintr, INFSLP);
474	while (error) {
475		if (sintr && nfs_sigintr(nmp, NULL, p)) {
476			np->n_flag &= ~NFLUSHINPROG;
477			if (np->n_flag & NFLUSHWANT) {
478				np->n_flag &= ~NFLUSHWANT;
479				wakeup(&np->n_flag);
480			}
481			return (EINTR);
482		}
483		error = vinvalbuf(vp, flags, cred, p, 0, stimeo);
484	}
485	np->n_flag &= ~(NMODIFIED | NFLUSHINPROG);
486	if (np->n_flag & NFLUSHWANT) {
487		np->n_flag &= ~NFLUSHWANT;
488		wakeup(&np->n_flag);
489	}
490	return (0);
491}
492
493/*
494 * Initiate asynchronous I/O. Return an error if no nfsiods are available.
495 * This is mainly to avoid queueing async I/O requests when the nfsiods
496 * are all hung on a dead server.
497 */
498int
499nfs_asyncio(struct buf *bp, int readahead)
500{
501	if (nfs_numasync == 0)
502		goto out;
503
504	while (nfs_bufqlen > nfs_bufqmax)
505		if (readahead)
506			goto out;
507		else
508			tsleep_nsec(&nfs_bufqlen, PRIBIO, "nfs_bufq", INFSLP);
509
510	if ((bp->b_flags & B_READ) == 0) {
511		bp->b_flags |= B_WRITEINPROG;
512	}
513
514	TAILQ_INSERT_TAIL(&nfs_bufq, bp, b_freelist);
515	nfs_bufqlen++;
516
517	wakeup_one(&nfs_bufq);
518	return (0);
519
520out:
521	nfsstats.forcedsync++;
522	return (EIO);
523}
524
525/*
526 * Do an I/O operation to/from a cache block. This may be called
527 * synchronously or from an nfsiod.
528 */
529int
530nfs_doio(struct buf *bp, struct proc *p)
531{
532	struct uio *uiop;
533	struct vnode *vp;
534	struct nfsnode *np;
535	struct nfsmount *nmp;
536	int s, error = 0, diff, len, iomode, must_commit = 0;
537	struct uio uio;
538	struct iovec io;
539
540	vp = bp->b_vp;
541	np = VTONFS(vp);
542	nmp = VFSTONFS(vp->v_mount);
543	uiop = &uio;
544	uiop->uio_iov = &io;
545	uiop->uio_iovcnt = 1;
546	uiop->uio_segflg = UIO_SYSSPACE;
547	uiop->uio_procp = p;
548
549	/*
550	 * Historically, paging was done with physio, but no more.
551	 */
552	if (bp->b_flags & B_PHYS) {
553	    io.iov_len = uiop->uio_resid = bp->b_bcount;
554	    /* mapping was done by vmapbuf() */
555	    io.iov_base = bp->b_data;
556	    uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT;
557	    if (bp->b_flags & B_READ) {
558		uiop->uio_rw = UIO_READ;
559		nfsstats.read_physios++;
560		error = nfs_readrpc(vp, uiop);
561	    } else {
562		iomode = NFSV3WRITE_DATASYNC;
563		uiop->uio_rw = UIO_WRITE;
564		nfsstats.write_physios++;
565		error = nfs_writerpc(vp, uiop, &iomode, &must_commit);
566	    }
567	    if (error) {
568		bp->b_flags |= B_ERROR;
569		bp->b_error = error;
570	    }
571	} else if (bp->b_flags & B_READ) {
572	    io.iov_len = uiop->uio_resid = bp->b_bcount;
573	    io.iov_base = bp->b_data;
574	    uiop->uio_rw = UIO_READ;
575	    switch (vp->v_type) {
576	    case VREG:
577		uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT;
578		nfsstats.read_bios++;
579		bcstats.pendingreads++;
580		bcstats.numreads++;
581		error = nfs_readrpc(vp, uiop);
582		if (!error) {
583		    bp->b_validoff = 0;
584		    if (uiop->uio_resid) {
585			/*
586			 * If len > 0, there is a hole in the file and
587			 * no writes after the hole have been pushed to
588			 * the server yet.
589			 * Just zero fill the rest of the valid area.
590			 */
591			diff = bp->b_bcount - uiop->uio_resid;
592			len = np->n_size - ((((off_t)bp->b_blkno) << DEV_BSHIFT)
593				+ diff);
594			if (len > 0) {
595			    len = ulmin(len, uiop->uio_resid);
596			    memset((char *)bp->b_data + diff, 0, len);
597			    bp->b_validend = diff + len;
598			} else
599			    bp->b_validend = diff;
600		    } else
601			bp->b_validend = bp->b_bcount;
602		}
603		if (p && (vp->v_flag & VTEXT) &&
604		    (timespeccmp(&np->n_mtime, &np->n_vattr.va_mtime, !=))) {
605			uprintf("Process killed due to text file modification\n");
606			psignal(p, SIGKILL);
607		}
608		break;
609	    case VLNK:
610		uiop->uio_offset = (off_t)0;
611		nfsstats.readlink_bios++;
612		bcstats.pendingreads++;
613		bcstats.numreads++;
614		error = nfs_readlinkrpc(vp, uiop, curproc->p_ucred);
615		break;
616	    default:
617		panic("nfs_doio:  type %x unexpected", vp->v_type);
618		break;
619	    };
620	    if (error) {
621		bp->b_flags |= B_ERROR;
622		bp->b_error = error;
623	    }
624	} else {
625	    io.iov_len = uiop->uio_resid = bp->b_dirtyend
626		- bp->b_dirtyoff;
627	    uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE
628		+ bp->b_dirtyoff;
629	    io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
630	    uiop->uio_rw = UIO_WRITE;
631	    nfsstats.write_bios++;
632	    bcstats.pendingwrites++;
633	    bcstats.numwrites++;
634	    if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE)) == B_ASYNC)
635		iomode = NFSV3WRITE_UNSTABLE;
636	    else
637		iomode = NFSV3WRITE_FILESYNC;
638	    bp->b_flags |= B_WRITEINPROG;
639	    error = nfs_writerpc(vp, uiop, &iomode, &must_commit);
640
641	    rw_enter_write(&np->n_commitlock);
642	    if (!error && iomode == NFSV3WRITE_UNSTABLE) {
643		bp->b_flags |= B_NEEDCOMMIT;
644		nfs_add_tobecommitted_range(vp, bp);
645	    } else {
646		bp->b_flags &= ~B_NEEDCOMMIT;
647		nfs_del_committed_range(vp, bp);
648	    }
649	    rw_exit_write(&np->n_commitlock);
650
651	    bp->b_flags &= ~B_WRITEINPROG;
652
653	    /*
654	     * For an interrupted write, the buffer is still valid and the
655	     * write hasn't been pushed to the server yet, so we can't set
656	     * B_ERROR and report the interruption by setting B_EINTR. For
657	     * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt
658	     * is essentially a noop.
659	     * For the case of a V3 write rpc not being committed to stable
660	     * storage, the block is still dirty and requires either a commit
661	     * rpc or another write rpc with iomode == NFSV3WRITE_FILESYNC
662	     * before the block is reused. This is indicated by setting the
663	     * B_DELWRI and B_NEEDCOMMIT flags.
664	     */
665	    if (error == EINTR || (!error && (bp->b_flags & B_NEEDCOMMIT))) {
666		    s = splbio();
667		    buf_dirty(bp);
668		    splx(s);
669
670		    if (!(bp->b_flags & B_ASYNC) && error)
671			    bp->b_flags |= B_EINTR;
672	    } else {
673		if (error) {
674		    bp->b_flags |= B_ERROR;
675		    bp->b_error = np->n_error = error;
676		    np->n_flag |= NWRITEERR;
677		}
678		bp->b_dirtyoff = bp->b_dirtyend = 0;
679	    }
680	}
681	bp->b_resid = uiop->uio_resid;
682	if (must_commit)
683		nfs_clearcommit(vp->v_mount);
684	s = splbio();
685	biodone(bp);
686	splx(s);
687	return (error);
688}
689