nfs_bio.c revision 19449
1194676Sthompsa/*
2194676Sthompsa * Copyright (c) 1989, 1993
3194676Sthompsa *	The Regents of the University of California.  All rights reserved.
4194676Sthompsa *
5194676Sthompsa * This code is derived from software contributed to Berkeley by
6194676Sthompsa * Rick Macklem at The University of Guelph.
7194676Sthompsa *
8194676Sthompsa * Redistribution and use in source and binary forms, with or without
9194676Sthompsa * modification, are permitted provided that the following conditions
10194676Sthompsa * are met:
11194676Sthompsa * 1. Redistributions of source code must retain the above copyright
12194676Sthompsa *    notice, this list of conditions and the following disclaimer.
13194676Sthompsa * 2. Redistributions in binary form must reproduce the above copyright
14194676Sthompsa *    notice, this list of conditions and the following disclaimer in the
15194676Sthompsa *    documentation and/or other materials provided with the distribution.
16194676Sthompsa * 3. All advertising materials mentioning features or use of this software
17194676Sthompsa *    must display the following acknowledgement:
18194676Sthompsa *	This product includes software developed by the University of
19194676Sthompsa *	California, Berkeley and its contributors.
20194676Sthompsa * 4. Neither the name of the University nor the names of its contributors
21194676Sthompsa *    may be used to endorse or promote products derived from this software
22194676Sthompsa *    without specific prior written permission.
23194676Sthompsa *
24194676Sthompsa * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25194676Sthompsa * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26194676Sthompsa * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27248236Shselasky * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28248236Shselasky * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29248236Shselasky * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30203815Swkoszek * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31203815Swkoszek * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32248236Shselasky * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33248236Shselasky * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34248236Shselasky * SUCH DAMAGE.
35248236Shselasky *
36203815Swkoszek *	@(#)nfs_bio.c	8.5 (Berkeley) 1/4/94
37208020Sthompsa * $Id: nfs_bio.c,v 1.28 1996/10/21 10:07:48 dfr Exp $
38208020Sthompsa */
39194676Sthompsa
40194676Sthompsa#include <sys/param.h>
41194676Sthompsa#include <sys/systm.h>
42194676Sthompsa#include <sys/resourcevar.h>
43194676Sthompsa#include <sys/signalvar.h>
44194676Sthompsa#include <sys/proc.h>
45199055Sthompsa#include <sys/buf.h>
46199055Sthompsa#include <sys/vnode.h>
47194676Sthompsa#include <sys/mount.h>
48194676Sthompsa#include <sys/kernel.h>
49194676Sthompsa#include <sys/sysctl.h>
50195957Salfred
51194676Sthompsa#include <vm/vm.h>
52194676Sthompsa#include <vm/vm_param.h>
53194676Sthompsa#include <vm/vm_extern.h>
54194676Sthompsa
55194676Sthompsa#include <nfs/rpcv2.h>
56194676Sthompsa#include <nfs/nfsproto.h>
57194676Sthompsa#include <nfs/nfs.h>
58194676Sthompsa#include <nfs/nfsmount.h>
59194676Sthompsa#include <nfs/nqnfs.h>
60194676Sthompsa#include <nfs/nfsnode.h>
61194676Sthompsa
62194676Sthompsastatic struct buf *nfs_getcacheblk __P((struct vnode *vp, daddr_t bn, int size,
63194676Sthompsa					struct proc *p));
64194676Sthompsa
65194676Sthompsaextern int nfs_numasync;
66194676Sthompsaextern struct nfsstats nfsstats;
67194676Sthompsa
68194676Sthompsa/*
69194676Sthompsa * Ifdefs for FreeBSD-current's merged VM/buffer cache. It is unfortunate
70194676Sthompsa * that this isn't done inside getblk() and brelse() so these calls
71194676Sthompsa * wouldn't need to be here.
72194676Sthompsa */
73194676Sthompsa#ifdef B_VMIO
74194676Sthompsa#define vnode_pager_uncache(vp)
75194676Sthompsa#else
76194676Sthompsa#define vfs_busy_pages(bp, f)
77194676Sthompsa#define vfs_unbusy_pages(bp)
78194676Sthompsa#define vfs_dirty_pages(bp)
79194676Sthompsa#endif
80194676Sthompsa
81195957Salfred/*
82194676Sthompsa * Vnode op for read using bio
83194676Sthompsa * Any similarity to readip() is purely coincidental
84194676Sthompsa */
85195957Salfredint
86194676Sthompsanfs_bioread(vp, uio, ioflag, cred)
87194676Sthompsa	register struct vnode *vp;
88195957Salfred	register struct uio *uio;
89194676Sthompsa	int ioflag;
90195957Salfred	struct ucred *cred;
91194676Sthompsa{
92194676Sthompsa	register struct nfsnode *np = VTONFS(vp);
93194676Sthompsa	register int biosize, diff, i;
94195957Salfred	struct buf *bp = 0, *rabp;
95194676Sthompsa	struct vattr vattr;
96194676Sthompsa	struct proc *p;
97194676Sthompsa	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
98194676Sthompsa	daddr_t lbn, rabn;
99194676Sthompsa	int bufsize;
100194676Sthompsa	int nra, error = 0, n = 0, on = 0, not_readin;
101195957Salfred
102195957Salfred#ifdef DIAGNOSTIC
103195957Salfred	if (uio->uio_rw != UIO_READ)
104195957Salfred		panic("nfs_read mode");
105195957Salfred#endif
106195957Salfred	if (uio->uio_resid == 0)
107195957Salfred		return (0);
108195957Salfred	if (uio->uio_offset < 0)
109195957Salfred		return (EINVAL);
110195957Salfred	p = uio->uio_procp;
111195957Salfred	if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
112194676Sthompsa		(void)nfs_fsinfo(nmp, vp, cred, p);
113194676Sthompsa	biosize = vp->v_mount->mnt_stat.f_iosize;
114194676Sthompsa	/*
115194676Sthompsa	 * For nfs, cache consistency can only be maintained approximately.
116195957Salfred	 * Although RFC1094 does not specify the criteria, the following is
117195957Salfred	 * believed to be compatible with the reference port.
118194676Sthompsa	 * For nqnfs, full cache consistency is maintained within the loop.
119194676Sthompsa	 * For nfs:
120194676Sthompsa	 * If the file's modify time on the server has changed since the
121194676Sthompsa	 * last read rpc or you have written to the file,
122194676Sthompsa	 * you may have lost data cache consistency with the
123194676Sthompsa	 * server, so flush all of the file's data out of the cache.
124194676Sthompsa	 * Then force a getattr rpc to ensure that you have up to date
125195957Salfred	 * attributes.
126199055Sthompsa	 * NB: This implies that cache data can be read when up to
127195957Salfred	 * NFS_ATTRTIMEO seconds out of date. If you find that you need current
128195957Salfred	 * attributes this could be forced by setting n_attrstamp to 0 before
129195957Salfred	 * the VOP_GETATTR() call.
130195957Salfred	 */
131199055Sthompsa	if ((nmp->nm_flag & NFSMNT_NQNFS) == 0) {
132195957Salfred		if (np->n_flag & NMODIFIED) {
133195957Salfred			if (vp->v_type != VREG) {
134195957Salfred				if (vp->v_type != VDIR)
135195957Salfred					panic("nfs: bioread, not dir");
136199055Sthompsa				nfs_invaldir(vp);
137195957Salfred				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
138195957Salfred				if (error)
139195957Salfred					return (error);
140195957Salfred			}
141195957Salfred			np->n_attrstamp = 0;
142195957Salfred			error = VOP_GETATTR(vp, &vattr, cred, p);
143195957Salfred			if (error)
144199055Sthompsa				return (error);
145195957Salfred			np->n_mtime = vattr.va_mtime.tv_sec;
146195957Salfred		} else {
147195957Salfred			error = VOP_GETATTR(vp, &vattr, cred, p);
148195957Salfred			if (error)
149199055Sthompsa				return (error);
150195957Salfred			if (np->n_mtime != vattr.va_mtime.tv_sec) {
151194676Sthompsa				if (vp->v_type == VDIR)
152195957Salfred					nfs_invaldir(vp);
153194676Sthompsa				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
154194676Sthompsa				if (error)
155194676Sthompsa					return (error);
156195957Salfred				np->n_mtime = vattr.va_mtime.tv_sec;
157195957Salfred			}
158194676Sthompsa		}
159194676Sthompsa	}
160195957Salfred	do {
161195957Salfred
162199055Sthompsa	    /*
163199055Sthompsa	     * Get a valid lease. If cached data is stale, flush it.
164195957Salfred	     */
165195957Salfred	    if (nmp->nm_flag & NFSMNT_NQNFS) {
166195957Salfred		if (NQNFS_CKINVALID(vp, np, ND_READ)) {
167194676Sthompsa		    do {
168194676Sthompsa			error = nqnfs_getlease(vp, ND_READ, cred, p);
169194676Sthompsa		    } while (error == NQNFS_EXPIRED);
170199055Sthompsa		    if (error)
171195957Salfred			return (error);
172194676Sthompsa		    if (np->n_lrev != np->n_brev ||
173199055Sthompsa			(np->n_flag & NQNFSNONCACHE) ||
174194676Sthompsa			((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
175195957Salfred			if (vp->v_type == VDIR)
176195957Salfred			    nfs_invaldir(vp);
177195957Salfred			error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
178195957Salfred			if (error)
179195957Salfred			    return (error);
180195957Salfred			np->n_brev = np->n_lrev;
181195957Salfred		    }
182195957Salfred		} else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) {
183195957Salfred		    nfs_invaldir(vp);
184195957Salfred		    error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
185195957Salfred		    if (error)
186195957Salfred			return (error);
187195957Salfred		}
188195957Salfred	    }
189195957Salfred	    if (np->n_flag & NQNFSNONCACHE) {
190195957Salfred		switch (vp->v_type) {
191195957Salfred		case VREG:
192195957Salfred			return (nfs_readrpc(vp, uio, cred));
193195957Salfred		case VLNK:
194199055Sthompsa			return (nfs_readlinkrpc(vp, uio, cred));
195195957Salfred		case VDIR:
196195957Salfred			break;
197195957Salfred		default:
198195957Salfred			printf(" NQNFSNONCACHE: type %x unexpected\n",
199195957Salfred				vp->v_type);
200195957Salfred		};
201195957Salfred	    }
202195957Salfred	    switch (vp->v_type) {
203195957Salfred	    case VREG:
204195957Salfred		nfsstats.biocache_reads++;
205195957Salfred		lbn = uio->uio_offset / biosize;
206195957Salfred		on = uio->uio_offset & (biosize - 1);
207195957Salfred		not_readin = 1;
208195957Salfred
209194676Sthompsa		/*
210194676Sthompsa		 * Start the read ahead(s), as required.
211194676Sthompsa		 */
212195957Salfred		if (nfs_numasync > 0 && nmp->nm_readahead > 0) {
213194676Sthompsa		    for (nra = 0; nra < nmp->nm_readahead &&
214195957Salfred			(off_t)(lbn + 1 + nra) * biosize < np->n_size; nra++) {
215194676Sthompsa			rabn = lbn + 1 + nra;
216195957Salfred			if (!incore(vp, rabn)) {
217195957Salfred			    rabp = nfs_getcacheblk(vp, rabn, biosize, p);
218194676Sthompsa			    if (!rabp)
219194676Sthompsa				return (EINTR);
220195957Salfred			    if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) {
221194676Sthompsa				rabp->b_flags |= (B_READ | B_ASYNC);
222194676Sthompsa				vfs_busy_pages(rabp, 0);
223194676Sthompsa				if (nfs_asyncio(rabp, cred)) {
224194676Sthompsa				    rabp->b_flags |= B_INVAL|B_ERROR;
225194676Sthompsa				    vfs_unbusy_pages(rabp);
226194676Sthompsa				    brelse(rabp);
227194676Sthompsa				}
228194676Sthompsa			    } else {
229194676Sthompsa				brelse(rabp);
230195957Salfred			    }
231195957Salfred			}
232195957Salfred		    }
233195957Salfred		}
234199055Sthompsa
235195957Salfred		/*
236195957Salfred		 * If the block is in the cache and has the required data
237194676Sthompsa		 * in a valid region, just copy it out.
238194676Sthompsa		 * Otherwise, get the block and write back/read in,
239194676Sthompsa		 * as required.
240194676Sthompsa		 */
241194676Sthompsaagain:
242194676Sthompsa		bufsize = biosize;
243194676Sthompsa		if ((off_t)(lbn + 1) * biosize > np->n_size &&
244194676Sthompsa		    (off_t)(lbn + 1) * biosize - np->n_size < biosize) {
245194676Sthompsa			bufsize = np->n_size - lbn * biosize;
246194676Sthompsa			bufsize = (bufsize + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
247195957Salfred		}
248195957Salfred		bp = nfs_getcacheblk(vp, lbn, bufsize, p);
249195957Salfred		if (!bp)
250195957Salfred			return (EINTR);
251199055Sthompsa		if ((bp->b_flags & B_CACHE) == 0) {
252195957Salfred			bp->b_flags |= B_READ;
253194676Sthompsa			bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL);
254195957Salfred			not_readin = 0;
255194676Sthompsa			vfs_busy_pages(bp, 0);
256194676Sthompsa			error = nfs_doio(bp, cred, p);
257194676Sthompsa			if (error) {
258195957Salfred			    brelse(bp);
259195957Salfred			    return (error);
260195957Salfred			}
261195957Salfred		}
262194676Sthompsa		if (bufsize > on) {
263194676Sthompsa			n = min((unsigned)(bufsize - on), uio->uio_resid);
264194676Sthompsa		} else {
265195957Salfred			n = 0;
266194676Sthompsa		}
267194676Sthompsa		diff = np->n_size - uio->uio_offset;
268194676Sthompsa		if (diff < n)
269194676Sthompsa			n = diff;
270194676Sthompsa		if (not_readin && n > 0) {
271195957Salfred			if (on < bp->b_validoff || (on + n) > bp->b_validend) {
272194676Sthompsa				bp->b_flags |= B_NOCACHE;
273194676Sthompsa				if (bp->b_dirtyend > 0) {
274194676Sthompsa				    if ((bp->b_flags & B_DELWRI) == 0)
275195957Salfred					panic("nfsbioread");
276194676Sthompsa				    if (VOP_BWRITE(bp) == EINTR)
277194676Sthompsa					return (EINTR);
278194676Sthompsa				} else
279195957Salfred				    brelse(bp);
280195957Salfred				goto again;
281195957Salfred			}
282195957Salfred		}
283194676Sthompsa		vp->v_lastr = lbn;
284195957Salfred		diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on);
285195957Salfred		if (diff < n)
286195957Salfred			n = diff;
287195957Salfred		break;
288194676Sthompsa	    case VLNK:
289194676Sthompsa		nfsstats.biocache_readlinks++;
290195957Salfred		bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p);
291195957Salfred		if (!bp)
292194676Sthompsa			return (EINTR);
293194676Sthompsa		if ((bp->b_flags & B_CACHE) == 0) {
294194676Sthompsa			bp->b_flags |= B_READ;
295194676Sthompsa			vfs_busy_pages(bp, 0);
296194676Sthompsa			error = nfs_doio(bp, cred, p);
297194676Sthompsa			if (error) {
298194676Sthompsa				bp->b_flags |= B_ERROR;
299194676Sthompsa				brelse(bp);
300194676Sthompsa				return (error);
301194676Sthompsa			}
302235128Shselasky		}
303235128Shselasky		n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
304235128Shselasky		on = 0;
305235128Shselasky		break;
306235128Shselasky	    case VDIR:
307235128Shselasky		nfsstats.biocache_readdirs++;
308235128Shselasky		lbn = uio->uio_offset / NFS_DIRBLKSIZ;
309235128Shselasky		on = uio->uio_offset & (NFS_DIRBLKSIZ - 1);
310235128Shselasky		bp = nfs_getcacheblk(vp, lbn, NFS_DIRBLKSIZ, p);
311235128Shselasky		if (!bp)
312235128Shselasky		    return (EINTR);
313235128Shselasky		if ((bp->b_flags & B_CACHE) == 0) {
314235128Shselasky		    bp->b_flags |= B_READ;
315235128Shselasky		    vfs_busy_pages(bp, 0);
316235128Shselasky		    error = nfs_doio(bp, cred, p);
317235128Shselasky		    if (error) {
318235128Shselasky		        vfs_unbusy_pages(bp);
319235128Shselasky			brelse(bp);
320235128Shselasky			while (error == NFSERR_BAD_COOKIE) {
321195957Salfred			    nfs_invaldir(vp);
322194676Sthompsa			    error = nfs_vinvalbuf(vp, 0, cred, p, 1);
323194676Sthompsa			    /*
324195957Salfred			     * Yuck! The directory has been modified on the
325227404Shselasky			     * server. The only way to get the block is by
326194676Sthompsa			     * reading from the beginning to get all the
327224085Shselasky			     * offset cookies.
328224085Shselasky			     */
329224085Shselasky			    for (i = 0; i <= lbn && !error; i++) {
330195957Salfred				bp = nfs_getcacheblk(vp, i, NFS_DIRBLKSIZ, p);
331195957Salfred				if (!bp)
332194676Sthompsa				    return (EINTR);
333195957Salfred				if ((bp->b_flags & B_DONE) == 0) {
334194676Sthompsa				    bp->b_flags |= B_READ;
335301846Shselasky				    vfs_busy_pages(bp, 0);
336194676Sthompsa				    error = nfs_doio(bp, cred, p);
337194676Sthompsa				    if (error) {
338194676Sthompsa					vfs_unbusy_pages(bp);
339199055Sthompsa					brelse(bp);
340199055Sthompsa				    } else if (i < lbn)
341199055Sthompsa					brelse(bp);
342199055Sthompsa				}
343199055Sthompsa			    }
344224085Shselasky			}
345227404Shselasky			if (error)
346224085Shselasky			    return (error);
347224085Shselasky		    }
348224085Shselasky		}
349224085Shselasky
350199055Sthompsa		/*
351199055Sthompsa		 * If not eof and read aheads are enabled, start one.
352199055Sthompsa		 * (You need the current block first, so that you have the
353199055Sthompsa		 *  directory offset cookie of the next block.)
354227404Shselasky		 */
355227404Shselasky		if (nfs_numasync > 0 && nmp->nm_readahead > 0 &&
356227404Shselasky		    (np->n_direofoffset == 0 ||
357227404Shselasky		    (lbn + 1) * NFS_DIRBLKSIZ < np->n_direofoffset) &&
358227404Shselasky		    !(np->n_flag & NQNFSNONCACHE) &&
359227404Shselasky		    !incore(vp, lbn + 1)) {
360227404Shselasky			rabp = nfs_getcacheblk(vp, lbn + 1, NFS_DIRBLKSIZ, p);
361227404Shselasky			if (rabp) {
362227404Shselasky			    if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) {
363227404Shselasky				rabp->b_flags |= (B_READ | B_ASYNC);
364227404Shselasky				vfs_busy_pages(rabp, 0);
365227404Shselasky				if (nfs_asyncio(rabp, cred)) {
366227404Shselasky				    rabp->b_flags |= B_INVAL|B_ERROR;
367227404Shselasky				    vfs_unbusy_pages(rabp);
368227404Shselasky				    brelse(rabp);
369227404Shselasky				}
370227404Shselasky			    } else {
371227404Shselasky				brelse(rabp);
372227404Shselasky			    }
373227404Shselasky			}
374227404Shselasky		}
375227404Shselasky		n = min(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid - on);
376227404Shselasky		break;
377227404Shselasky	    default:
378227404Shselasky		printf(" nfs_bioread: type %x unexpected\n",vp->v_type);
379227404Shselasky		break;
380227404Shselasky	    };
381227404Shselasky
382227404Shselasky	    if (n > 0) {
383227404Shselasky		error = uiomove(bp->b_data + on, (int)n, uio);
384227404Shselasky	    }
385227404Shselasky	    switch (vp->v_type) {
386227404Shselasky	    case VREG:
387227404Shselasky		break;
388227404Shselasky	    case VLNK:
389227404Shselasky		n = 0;
390227404Shselasky		break;
391227404Shselasky	    case VDIR:
392227404Shselasky		if (np->n_flag & NQNFSNONCACHE)
393227404Shselasky			bp->b_flags |= B_INVAL;
394227404Shselasky		break;
395227404Shselasky	    default:
396227404Shselasky		printf(" nfs_bioread: type %x unexpected\n",vp->v_type);
397227404Shselasky	    }
398227404Shselasky 	    brelse(bp);
399227404Shselasky	} while (error == 0 && uio->uio_resid > 0 && n > 0);
400227404Shselasky	return (error);
401227404Shselasky}
402227404Shselasky
403227404Shselasky/*
404227404Shselasky * Vnode op for write using bio
405227404Shselasky */
406227404Shselaskyint
407227404Shselaskynfs_write(ap)
408227404Shselasky	struct vop_write_args /* {
409227404Shselasky		struct vnode *a_vp;
410227404Shselasky		struct uio *a_uio;
411227404Shselasky		int  a_ioflag;
412227404Shselasky		struct ucred *a_cred;
413301968Shselasky	} */ *ap;
414301968Shselasky{
415301968Shselasky	register int biosize;
416301968Shselasky	register struct uio *uio = ap->a_uio;
417301968Shselasky	struct proc *p = uio->uio_procp;
418301968Shselasky	register struct vnode *vp = ap->a_vp;
419301968Shselasky	struct nfsnode *np = VTONFS(vp);
420301968Shselasky	register struct ucred *cred = ap->a_cred;
421301968Shselasky	int ioflag = ap->a_ioflag;
422301968Shselasky	struct buf *bp;
423301968Shselasky	struct vattr vattr;
424301968Shselasky	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
425301968Shselasky	daddr_t lbn;
426301968Shselasky	int bufsize;
427301968Shselasky	int n, on, error = 0, iomode, must_commit;
428301968Shselasky
429301968Shselasky#ifdef DIAGNOSTIC
430227404Shselasky	if (uio->uio_rw != UIO_WRITE)
431227404Shselasky		panic("nfs_write mode");
432227404Shselasky	if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
433227404Shselasky		panic("nfs_write proc");
434234491Shselasky#endif
435234491Shselasky	if (vp->v_type != VREG)
436328142Skevans		return (EIO);
437227404Shselasky	if (np->n_flag & NWRITEERR) {
438227404Shselasky		np->n_flag &= ~NWRITEERR;
439227404Shselasky		return (np->n_error);
440227404Shselasky	}
441227404Shselasky	if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
442227404Shselasky		(void)nfs_fsinfo(nmp, vp, cred, p);
443227404Shselasky	if (ioflag & (IO_APPEND | IO_SYNC)) {
444227404Shselasky		if (np->n_flag & NMODIFIED) {
445227404Shselasky			np->n_attrstamp = 0;
446227404Shselasky			error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
447227404Shselasky			if (error)
448227404Shselasky				return (error);
449227404Shselasky		}
450227404Shselasky		if (ioflag & IO_APPEND) {
451227404Shselasky			np->n_attrstamp = 0;
452227404Shselasky			error = VOP_GETATTR(vp, &vattr, cred, p);
453227404Shselasky			if (error)
454227404Shselasky				return (error);
455227404Shselasky			uio->uio_offset = np->n_size;
456227404Shselasky		}
457328142Skevans	}
458328142Skevans	if (uio->uio_offset < 0)
459227404Shselasky		return (EINVAL);
460227404Shselasky	if (uio->uio_resid == 0)
461227404Shselasky		return (0);
462227404Shselasky	/*
463227404Shselasky	 * Maybe this should be above the vnode op call, but so long as
464227404Shselasky	 * file servers have no limits, i don't think it matters
465227404Shselasky	 */
466227404Shselasky	if (p && uio->uio_offset + uio->uio_resid >
467227404Shselasky	      p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
468227404Shselasky		psignal(p, SIGXFSZ);
469227404Shselasky		return (EFBIG);
470227404Shselasky	}
471227404Shselasky	/*
472227404Shselasky	 * I use nm_rsize, not nm_wsize so that all buffer cache blocks
473227404Shselasky	 * will be the same size within a filesystem. nfs_writerpc will
474227404Shselasky	 * still use nm_wsize when sizing the rpc's.
475328142Skevans	 */
476328142Skevans	biosize = vp->v_mount->mnt_stat.f_iosize;
477328142Skevans	do {
478328142Skevans
479328142Skevans		/*
480227404Shselasky		 * XXX make sure we aren't cached in the VM page cache
481227404Shselasky		 */
482227404Shselasky		/*
483227404Shselasky		 * Check for a valid write lease.
484227404Shselasky		 */
485227404Shselasky		if ((nmp->nm_flag & NFSMNT_NQNFS) &&
486227404Shselasky		    NQNFS_CKINVALID(vp, np, ND_WRITE)) {
487328142Skevans			do {
488328142Skevans				error = nqnfs_getlease(vp, ND_WRITE, cred, p);
489328142Skevans			} while (error == NQNFS_EXPIRED);
490328142Skevans			if (error)
491328142Skevans				return (error);
492328142Skevans			if (np->n_lrev != np->n_brev ||
493328142Skevans			    (np->n_flag & NQNFSNONCACHE)) {
494328142Skevans				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
495328142Skevans				if (error)
496227404Shselasky					return (error);
497227404Shselasky				np->n_brev = np->n_lrev;
498234491Shselasky			}
499227404Shselasky		}
500227404Shselasky		if ((np->n_flag & NQNFSNONCACHE) && uio->uio_iovcnt == 1) {
501227404Shselasky		    iomode = NFSV3WRITE_FILESYNC;
502227404Shselasky		    error = nfs_writerpc(vp, uio, cred, &iomode, &must_commit);
503227404Shselasky		    if (must_commit)
504227404Shselasky			nfs_clearcommit(vp->v_mount);
505227404Shselasky		    return (error);
506227404Shselasky		}
507227404Shselasky		nfsstats.biocache_writes++;
508227404Shselasky		lbn = uio->uio_offset / biosize;
509227404Shselasky		on = uio->uio_offset & (biosize-1);
510227404Shselasky		n = min((unsigned)(biosize - on), uio->uio_resid);
511227404Shselaskyagain:
512227404Shselasky		if (uio->uio_offset + n > np->n_size) {
513227404Shselasky			np->n_size = uio->uio_offset + n;
514227404Shselasky			vnode_pager_setsize(vp, (u_long)np->n_size);
515234491Shselasky		}
516227404Shselasky		bufsize = biosize;
517227404Shselasky		if ((lbn + 1) * biosize > np->n_size) {
518227404Shselasky			bufsize = np->n_size - lbn * biosize;
519227404Shselasky			bufsize = (bufsize + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
520227404Shselasky		}
521227404Shselasky		bp = nfs_getcacheblk(vp, lbn, bufsize, p);
522227404Shselasky		if (!bp)
523227404Shselasky			return (EINTR);
524227404Shselasky		if (bp->b_wcred == NOCRED) {
525227404Shselasky			crhold(cred);
526227404Shselasky			bp->b_wcred = cred;
527227404Shselasky		}
528227404Shselasky		np->n_flag |= NMODIFIED;
529227404Shselasky
530227404Shselasky		if ((bp->b_blkno * DEV_BSIZE) + bp->b_dirtyend > np->n_size) {
531227404Shselasky			bp->b_dirtyend = np->n_size - (bp->b_blkno * DEV_BSIZE);
532227404Shselasky		}
533227404Shselasky
534227404Shselasky		/*
535227404Shselasky		 * If the new write will leave a contiguous dirty
536227404Shselasky		 * area, just update the b_dirtyoff and b_dirtyend,
537227404Shselasky		 * otherwise force a write rpc of the old dirty area.
538227404Shselasky		 */
539227404Shselasky		if (bp->b_dirtyend > 0 &&
540227404Shselasky		    (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
541227404Shselasky			bp->b_proc = p;
542328142Skevans			if (VOP_BWRITE(bp) == EINTR)
543328142Skevans				return (EINTR);
544328142Skevans			goto again;
545227404Shselasky		}
546328142Skevans
547227404Shselasky		/*
548227404Shselasky		 * Check for valid write lease and get one as required.
549227404Shselasky		 * In case getblk() and/or bwrite() delayed us.
550227404Shselasky		 */
551227404Shselasky		if ((nmp->nm_flag & NFSMNT_NQNFS) &&
552227404Shselasky		    NQNFS_CKINVALID(vp, np, ND_WRITE)) {
553227404Shselasky			do {
554328142Skevans				error = nqnfs_getlease(vp, ND_WRITE, cred, p);
555328142Skevans			} while (error == NQNFS_EXPIRED);
556227404Shselasky			if (error) {
557227404Shselasky				brelse(bp);
558227404Shselasky				return (error);
559328142Skevans			}
560328142Skevans			if (np->n_lrev != np->n_brev ||
561328142Skevans			    (np->n_flag & NQNFSNONCACHE)) {
562227404Shselasky				brelse(bp);
563227404Shselasky				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
564301968Shselasky				if (error)
565301968Shselasky					return (error);
566301968Shselasky				np->n_brev = np->n_lrev;
567301968Shselasky				goto again;
568301968Shselasky			}
569301968Shselasky		}
570301968Shselasky		error = uiomove((char *)bp->b_data + on, n, uio);
571301968Shselasky		if (error) {
572301968Shselasky			bp->b_flags |= B_ERROR;
573301968Shselasky			brelse(bp);
574301968Shselasky			return (error);
575301968Shselasky		}
576301968Shselasky		if (bp->b_dirtyend > 0) {
577301968Shselasky			bp->b_dirtyoff = min(on, bp->b_dirtyoff);
578301968Shselasky			bp->b_dirtyend = max((on + n), bp->b_dirtyend);
579301968Shselasky		} else {
580301968Shselasky			bp->b_dirtyoff = on;
581301968Shselasky			bp->b_dirtyend = on + n;
582301968Shselasky		}
583301968Shselasky		if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||
584301968Shselasky		    bp->b_validoff > bp->b_dirtyend) {
585301968Shselasky			bp->b_validoff = bp->b_dirtyoff;
586301968Shselasky			bp->b_validend = bp->b_dirtyend;
587301968Shselasky		} else {
588301968Shselasky			bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff);
589301968Shselasky			bp->b_validend = max(bp->b_validend, bp->b_dirtyend);
590301968Shselasky		}
591301968Shselasky
592301968Shselasky		/*
593301968Shselasky		 * Since this block is being modified, it must be written
594301968Shselasky		 * again and not just committed.
595301968Shselasky		 */
596301968Shselasky		bp->b_flags &= ~B_NEEDCOMMIT;
597301968Shselasky
598301968Shselasky		/*
599301968Shselasky		 * If the lease is non-cachable or IO_SYNC do bwrite().
600301968Shselasky		 */
601301968Shselasky		if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) {
602301968Shselasky			bp->b_proc = p;
603301968Shselasky			error = VOP_BWRITE(bp);
604301968Shselasky			if (error)
605301968Shselasky				return (error);
606301968Shselasky			if (np->n_flag & NQNFSNONCACHE) {
607301968Shselasky				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
608301968Shselasky				if (error)
609301968Shselasky					return (error);
610301968Shselasky			}
611301968Shselasky		} else if ((n + on) == biosize &&
612301968Shselasky			(nmp->nm_flag & NFSMNT_NQNFS) == 0) {
613301968Shselasky			bp->b_proc = (struct proc *)0;
614301968Shselasky			bp->b_flags |= B_ASYNC;
615301968Shselasky			(void)nfs_writebp(bp, 0);
616301968Shselasky		} else
617301968Shselasky			bdwrite(bp);
618301968Shselasky	} while (uio->uio_resid > 0 && n > 0);
619301968Shselasky	return (0);
620301968Shselasky}
621301968Shselasky
622301968Shselasky/*
623301968Shselasky * Get an nfs cache block.
624301968Shselasky * Allocate a new one if the block isn't currently in the cache
625301968Shselasky * and return the block marked busy. If the calling process is
626301968Shselasky * interrupted by a signal for an interruptible mount point, return
627301968Shselasky * NULL.
628301968Shselasky */
629301968Shselaskystatic struct buf *
630301968Shselaskynfs_getcacheblk(vp, bn, size, p)
631301968Shselasky	struct vnode *vp;
632301968Shselasky	daddr_t bn;
633301968Shselasky	int size;
634301968Shselasky	struct proc *p;
635301968Shselasky{
636301968Shselasky	register struct buf *bp;
637301968Shselasky	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
638301968Shselasky	int biosize = vp->v_mount->mnt_stat.f_iosize;
639301968Shselasky
640301968Shselasky	if (nmp->nm_flag & NFSMNT_INT) {
641301968Shselasky		bp = getblk(vp, bn, size, PCATCH, 0);
642301968Shselasky		while (bp == (struct buf *)0) {
643301968Shselasky			if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
644301968Shselasky				return ((struct buf *)0);
645301968Shselasky			bp = getblk(vp, bn, size, 0, 2 * hz);
646301968Shselasky		}
647301968Shselasky	} else
648301968Shselasky		bp = getblk(vp, bn, size, 0, 0);
649301968Shselasky
650301968Shselasky	if( vp->v_type == VREG)
651301968Shselasky		bp->b_blkno = (bn * biosize) / DEV_BSIZE;
652301968Shselasky
653301968Shselasky	return (bp);
654301968Shselasky}
655301968Shselasky
656301968Shselasky/*
657301968Shselasky * Flush and invalidate all dirty buffers. If another process is already
658301968Shselasky * doing the flush, just wait for completion.
659301968Shselasky */
660301968Shselaskyint
661301968Shselaskynfs_vinvalbuf(vp, flags, cred, p, intrflg)
662301968Shselasky	struct vnode *vp;
663301968Shselasky	int flags;
664301968Shselasky	struct ucred *cred;
665301968Shselasky	struct proc *p;
666301968Shselasky	int intrflg;
667301968Shselasky{
668301968Shselasky	register struct nfsnode *np = VTONFS(vp);
669301968Shselasky	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
670301968Shselasky	int error = 0, slpflag, slptimeo;
671301968Shselasky
672301968Shselasky	if ((nmp->nm_flag & NFSMNT_INT) == 0)
673301968Shselasky		intrflg = 0;
674301968Shselasky	if (intrflg) {
675301968Shselasky		slpflag = PCATCH;
676301968Shselasky		slptimeo = 2 * hz;
677301968Shselasky	} else {
678301968Shselasky		slpflag = 0;
679301968Shselasky		slptimeo = 0;
680301968Shselasky	}
681301968Shselasky	/*
682301968Shselasky	 * First wait for any other process doing a flush to complete.
683301968Shselasky	 */
684301968Shselasky	while (np->n_flag & NFLUSHINPROG) {
685301968Shselasky		np->n_flag |= NFLUSHWANT;
686301968Shselasky		error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval",
687301968Shselasky			slptimeo);
688301968Shselasky		if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p))
689301968Shselasky			return (EINTR);
690301968Shselasky	}
691301968Shselasky
692301968Shselasky	/*
693301968Shselasky	 * Now, flush as required.
694301968Shselasky	 */
695301968Shselasky	np->n_flag |= NFLUSHINPROG;
696301968Shselasky	error = vinvalbuf(vp, flags, cred, p, slpflag, 0);
697301968Shselasky	while (error) {
698301968Shselasky		if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
699301968Shselasky			np->n_flag &= ~NFLUSHINPROG;
700301968Shselasky			if (np->n_flag & NFLUSHWANT) {
701301968Shselasky				np->n_flag &= ~NFLUSHWANT;
702301968Shselasky				wakeup((caddr_t)&np->n_flag);
703301968Shselasky			}
704301968Shselasky			return (EINTR);
705301968Shselasky		}
706301968Shselasky		error = vinvalbuf(vp, flags, cred, p, 0, slptimeo);
707301968Shselasky	}
708301968Shselasky	np->n_flag &= ~(NMODIFIED | NFLUSHINPROG);
709301968Shselasky	if (np->n_flag & NFLUSHWANT) {
710301968Shselasky		np->n_flag &= ~NFLUSHWANT;
711301968Shselasky		wakeup((caddr_t)&np->n_flag);
712301968Shselasky	}
713301968Shselasky	return (0);
714301968Shselasky}
715
716/*
717 * Initiate asynchronous I/O. Return an error if no nfsiods are available.
718 * This is mainly to avoid queueing async I/O requests when the nfsiods
719 * are all hung on a dead server.
720 */
721int
722nfs_asyncio(bp, cred)
723	register struct buf *bp;
724	struct ucred *cred;
725{
726	struct nfsmount *nmp;
727	int i;
728	int gotiod;
729	int slpflag = 0;
730	int slptimeo = 0;
731	int error;
732
733	if (nfs_numasync == 0)
734		return (EIO);
735
736	nmp = VFSTONFS(bp->b_vp->v_mount);
737again:
738	if (nmp->nm_flag & NFSMNT_INT)
739		slpflag = PCATCH;
740	gotiod = FALSE;
741
742	/*
743	 * Find a free iod to process this request.
744	 */
745	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
746		if (nfs_iodwant[i]) {
747			/*
748			 * Found one, so wake it up and tell it which
749			 * mount to process.
750			 */
751			NFS_DPF(ASYNCIO,
752				("nfs_asyncio: waking iod %d for mount %p\n",
753				 i, nmp));
754			nfs_iodwant[i] = (struct proc *)0;
755			nfs_iodmount[i] = nmp;
756			nmp->nm_bufqiods++;
757			wakeup((caddr_t)&nfs_iodwant[i]);
758			gotiod = TRUE;
759		}
760
761	/*
762	 * If none are free, we may already have an iod working on this mount
763	 * point.  If so, it will process our request.
764	 */
765	if (!gotiod) {
766		if (nmp->nm_bufqiods > 0) {
767			NFS_DPF(ASYNCIO,
768				("nfs_asyncio: %d iods are already processing mount %p\n",
769				 nmp->nm_bufqiods, nmp));
770			gotiod = TRUE;
771		}
772	}
773
774	/*
775	 * If we have an iod which can process the request, then queue
776	 * the buffer.
777	 */
778	if (gotiod) {
779		/*
780		 * Ensure that the queue never grows too large.
781		 */
782		while (nmp->nm_bufqlen >= 2*nfs_numasync) {
783			NFS_DPF(ASYNCIO,
784				("nfs_asyncio: waiting for mount %p queue to drain\n", nmp));
785			nmp->nm_bufqwant = TRUE;
786			error = tsleep(&nmp->nm_bufq, slpflag | PRIBIO,
787				       "nfsaio", slptimeo);
788			if (error) {
789				if (nfs_sigintr(nmp, NULL, bp->b_proc))
790					return (EINTR);
791				if (slpflag == PCATCH) {
792					slpflag = 0;
793					slptimeo = 2 * hz;
794				}
795			}
796			/*
797			 * We might have lost our iod while sleeping,
798			 * so check and loop if nescessary.
799			 */
800			if (nmp->nm_bufqiods == 0) {
801				NFS_DPF(ASYNCIO,
802					("nfs_asyncio: no iods after mount %p queue was drained, looping\n", nmp));
803				goto again;
804			}
805		}
806
807		if (bp->b_flags & B_READ) {
808			if (bp->b_rcred == NOCRED && cred != NOCRED) {
809				crhold(cred);
810				bp->b_rcred = cred;
811			}
812		} else {
813			bp->b_flags |= B_WRITEINPROG;
814			if (bp->b_wcred == NOCRED && cred != NOCRED) {
815				crhold(cred);
816				bp->b_wcred = cred;
817			}
818		}
819
820		TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, b_freelist);
821		nmp->nm_bufqlen++;
822		return (0);
823	}
824
825	/*
826	 * All the iods are busy on other mounts, so return EIO to
827	 * force the caller to process the i/o synchronously.
828	 */
829	NFS_DPF(ASYNCIO, ("nfs_asyncio: no iods available, i/o is synchronous\n"));
830	return (EIO);
831}
832
833/*
834 * Do an I/O operation to/from a cache block. This may be called
835 * synchronously or from an nfsiod.
836 */
837int
838nfs_doio(bp, cr, p)
839	register struct buf *bp;
840	struct ucred *cr;
841	struct proc *p;
842{
843	register struct uio *uiop;
844	register struct vnode *vp;
845	struct nfsnode *np;
846	struct nfsmount *nmp;
847	int error = 0, diff, len, iomode, must_commit = 0;
848	struct uio uio;
849	struct iovec io;
850
851	vp = bp->b_vp;
852	np = VTONFS(vp);
853	nmp = VFSTONFS(vp->v_mount);
854	uiop = &uio;
855	uiop->uio_iov = &io;
856	uiop->uio_iovcnt = 1;
857	uiop->uio_segflg = UIO_SYSSPACE;
858	uiop->uio_procp = p;
859
860	/*
861	 * Historically, paging was done with physio, but no more.
862	 */
863	if (bp->b_flags & B_PHYS) {
864	    /*
865	     * ...though reading /dev/drum still gets us here.
866	     */
867	    io.iov_len = uiop->uio_resid = bp->b_bcount;
868	    /* mapping was done by vmapbuf() */
869	    io.iov_base = bp->b_data;
870	    uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
871	    if (bp->b_flags & B_READ) {
872		uiop->uio_rw = UIO_READ;
873		nfsstats.read_physios++;
874		error = nfs_readrpc(vp, uiop, cr);
875	    } else {
876		int com;
877
878		iomode = NFSV3WRITE_DATASYNC;
879		uiop->uio_rw = UIO_WRITE;
880		nfsstats.write_physios++;
881		error = nfs_writerpc(vp, uiop, cr, &iomode, &com);
882	    }
883	    if (error) {
884		bp->b_flags |= B_ERROR;
885		bp->b_error = error;
886	    }
887	} else if (bp->b_flags & B_READ) {
888	    io.iov_len = uiop->uio_resid = bp->b_bcount;
889	    io.iov_base = bp->b_data;
890	    uiop->uio_rw = UIO_READ;
891	    switch (vp->v_type) {
892	    case VREG:
893		uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
894		nfsstats.read_bios++;
895		error = nfs_readrpc(vp, uiop, cr);
896		if (!error) {
897		    bp->b_validoff = 0;
898		    if (uiop->uio_resid) {
899			/*
900			 * If len > 0, there is a hole in the file and
901			 * no writes after the hole have been pushed to
902			 * the server yet.
903			 * Just zero fill the rest of the valid area.
904			 */
905			diff = bp->b_bcount - uiop->uio_resid;
906			len = np->n_size - (((u_quad_t)bp->b_blkno) * DEV_BSIZE
907				+ diff);
908			if (len > 0) {
909			    len = min(len, uiop->uio_resid);
910			    bzero((char *)bp->b_data + diff, len);
911			    bp->b_validend = diff + len;
912			} else
913			    bp->b_validend = diff;
914		    } else
915			bp->b_validend = bp->b_bcount;
916		}
917		if (p && (vp->v_flag & VTEXT) &&
918			(((nmp->nm_flag & NFSMNT_NQNFS) &&
919			  NQNFS_CKINVALID(vp, np, ND_READ) &&
920			  np->n_lrev != np->n_brev) ||
921			 (!(nmp->nm_flag & NFSMNT_NQNFS) &&
922			  np->n_mtime != np->n_vattr.va_mtime.tv_sec))) {
923			uprintf("Process killed due to text file modification\n");
924			psignal(p, SIGKILL);
925#ifdef __NetBSD__
926			p->p_holdcnt++;
927#else
928			p->p_flag |= P_NOSWAP;
929#endif
930		}
931		break;
932	    case VLNK:
933		uiop->uio_offset = (off_t)0;
934		nfsstats.readlink_bios++;
935		error = nfs_readlinkrpc(vp, uiop, cr);
936		break;
937	    case VDIR:
938		nfsstats.readdir_bios++;
939		uiop->uio_offset = ((u_quad_t)bp->b_lblkno) * NFS_DIRBLKSIZ;
940		if (nmp->nm_flag & NFSMNT_RDIRPLUS) {
941			error = nfs_readdirplusrpc(vp, uiop, cr);
942			if (error == NFSERR_NOTSUPP)
943				nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
944		}
945		if ((nmp->nm_flag & NFSMNT_RDIRPLUS) == 0)
946			error = nfs_readdirrpc(vp, uiop, cr);
947		break;
948	    default:
949		printf("nfs_doio:  type %x unexpected\n",vp->v_type);
950		break;
951	    };
952	    if (error) {
953		bp->b_flags |= B_ERROR;
954		bp->b_error = error;
955	    }
956	} else {
957	    if (((bp->b_blkno * DEV_BSIZE) + bp->b_dirtyend) > np->n_size)
958		bp->b_dirtyend = np->n_size - (bp->b_blkno * DEV_BSIZE);
959
960	    if (bp->b_dirtyend > bp->b_dirtyoff) {
961		io.iov_len = uiop->uio_resid = bp->b_dirtyend
962		    - bp->b_dirtyoff;
963		uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE
964		    + bp->b_dirtyoff;
965		io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
966		uiop->uio_rw = UIO_WRITE;
967		nfsstats.write_bios++;
968		if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE)) == B_ASYNC)
969		    iomode = NFSV3WRITE_UNSTABLE;
970		else
971		    iomode = NFSV3WRITE_FILESYNC;
972		bp->b_flags |= B_WRITEINPROG;
973		error = nfs_writerpc(vp, uiop, cr, &iomode, &must_commit);
974		if (!error && iomode == NFSV3WRITE_UNSTABLE)
975		    bp->b_flags |= B_NEEDCOMMIT;
976		else
977		    bp->b_flags &= ~B_NEEDCOMMIT;
978		bp->b_flags &= ~B_WRITEINPROG;
979
980		/*
981		 * For an interrupted write, the buffer is still valid
982		 * and the write hasn't been pushed to the server yet,
983		 * so we can't set B_ERROR and report the interruption
984		 * by setting B_EINTR. For the B_ASYNC case, B_EINTR
985		 * is not relevant, so the rpc attempt is essentially
986		 * a noop.  For the case of a V3 write rpc not being
987		 * committed to stable storage, the block is still
988		 * dirty and requires either a commit rpc or another
989		 * write rpc with iomode == NFSV3WRITE_FILESYNC before
990		 * the block is reused. This is indicated by setting
991		 * the B_DELWRI and B_NEEDCOMMIT flags.
992		 */
993    		if (error == EINTR
994		    || (!error && (bp->b_flags & B_NEEDCOMMIT))) {
995			bp->b_flags &= ~(B_INVAL|B_NOCACHE);
996			bp->b_flags |= B_DELWRI;
997
998		/*
999		 * Since for the B_ASYNC case, nfs_bwrite() has reassigned the
1000		 * buffer to the clean list, we have to reassign it back to the
1001		 * dirty one. Ugh.
1002		 */
1003			if (bp->b_flags & B_ASYNC)
1004				reassignbuf(bp, vp);
1005			else
1006				bp->b_flags |= B_EINTR;
1007	    	} else {
1008			if (error) {
1009				bp->b_flags |= B_ERROR;
1010				bp->b_error = np->n_error = error;
1011				np->n_flag |= NWRITEERR;
1012			}
1013			bp->b_dirtyoff = bp->b_dirtyend = 0;
1014		}
1015	    } else {
1016		bp->b_resid = 0;
1017		biodone(bp);
1018		return (0);
1019	    }
1020	}
1021	bp->b_resid = uiop->uio_resid;
1022	if (must_commit)
1023		nfs_clearcommit(vp->v_mount);
1024	biodone(bp);
1025	return (error);
1026}
1027