vfs_bio.c revision 6948
1/*
2 * Copyright (c) 1994 John S. Dyson
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice immediately at the beginning of the file, without modification,
10 *    this list of conditions, and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. Absolutely no warranty of function or purpose is made by the author
15 *    John S. Dyson.
16 * 4. This work was done expressly for inclusion into FreeBSD.  Other use
17 *    is allowed if this notation is included.
18 * 5. Modifications may be freely made to this file if the above conditions
19 *    are met.
20 *
21 * $Id: vfs_bio.c,v 1.34 1995/03/04 15:16:07 davidg Exp $
22 */
23
24/*
25 * this file contains a new buffer I/O scheme implementing a coherent
26 * VM object and buffer cache scheme.  Pains have been taken to make
27 * sure that the performance degradation associated with schemes such
28 * as this is not realized.
29 *
30 * Author:  John S. Dyson
31 * Significant help during the development and debugging phases
32 * had been provided by David Greenman, also of the FreeBSD core team.
33 */
34
35#define VMIO
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/kernel.h>
39#include <sys/proc.h>
40#include <sys/vnode.h>
41#include <vm/vm.h>
42#include <vm/vm_pageout.h>
43#include <vm/vm_page.h>
44#include <vm/vm_object.h>
45#include <sys/buf.h>
46#include <sys/mount.h>
47#include <sys/malloc.h>
48#include <sys/resourcevar.h>
49#include <sys/proc.h>
50
51#include <miscfs/specfs/specdev.h>
52
53struct buf *buf;		/* buffer header pool */
54int nbuf;			/* number of buffer headers calculated
55				 * elsewhere */
56struct swqueue bswlist;
57
58extern vm_map_t buffer_map, io_map, kernel_map, pager_map;
59
60void vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to);
61void vm_hold_load_pages(struct buf * bp, vm_offset_t from, vm_offset_t to);
62void vfs_dirty_pages(struct buf * bp);
63void vfs_busy_pages(struct buf *, int clear_modify);
64
65int needsbuffer;
66
67/*
68 * Internal update daemon, process 3
69 *	The variable vfs_update_wakeup allows for internal syncs.
70 */
71int vfs_update_wakeup;
72
73
74/*
75 * buffers base kva
76 */
77caddr_t buffers_kva;
78
79/*
80 * bogus page -- for I/O to/from partially complete buffers
81 * this is a temporary solution to the problem, but it is not
82 * really that bad.  it would be better to split the buffer
83 * for input in the case of buffers partially already in memory,
84 * but the code is intricate enough already.
85 */
86vm_page_t bogus_page;
87vm_offset_t bogus_offset;
88
89int bufspace, maxbufspace;
90
91/*
92 * advisory minimum for size of LRU queue or VMIO queue
93 */
94int minbuf;
95
96/*
97 * Initialize buffer headers and related structures.
98 */
99void
100bufinit()
101{
102	struct buf *bp;
103	int i;
104
105	TAILQ_INIT(&bswlist);
106	LIST_INIT(&invalhash);
107
108	/* first, make a null hash table */
109	for (i = 0; i < BUFHSZ; i++)
110		LIST_INIT(&bufhashtbl[i]);
111
112	/* next, make a null set of free lists */
113	for (i = 0; i < BUFFER_QUEUES; i++)
114		TAILQ_INIT(&bufqueues[i]);
115
116	buffers_kva = (caddr_t) kmem_alloc_pageable(buffer_map, MAXBSIZE * nbuf);
117	/* finally, initialize each buffer header and stick on empty q */
118	for (i = 0; i < nbuf; i++) {
119		bp = &buf[i];
120		bzero(bp, sizeof *bp);
121		bp->b_flags = B_INVAL;	/* we're just an empty header */
122		bp->b_dev = NODEV;
123		bp->b_vp = NULL;
124		bp->b_rcred = NOCRED;
125		bp->b_wcred = NOCRED;
126		bp->b_qindex = QUEUE_EMPTY;
127		bp->b_vnbufs.le_next = NOLIST;
128		bp->b_data = buffers_kva + i * MAXBSIZE;
129		TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist);
130		LIST_INSERT_HEAD(&invalhash, bp, b_hash);
131	}
132/*
133 * this will change later!!!
134 */
135	minbuf = nbuf / 3;
136	maxbufspace = 2 * (nbuf + 8) * PAGE_SIZE;
137
138	bogus_offset = kmem_alloc_pageable(kernel_map, PAGE_SIZE);
139	bogus_page = vm_page_alloc(kernel_object,
140			bogus_offset - VM_MIN_KERNEL_ADDRESS, VM_ALLOC_NORMAL);
141
142}
143
144/*
145 * remove the buffer from the appropriate free list
146 */
147void
148bremfree(struct buf * bp)
149{
150	int s = splbio();
151
152	if (bp->b_qindex != QUEUE_NONE) {
153		TAILQ_REMOVE(&bufqueues[bp->b_qindex], bp, b_freelist);
154		bp->b_qindex = QUEUE_NONE;
155	} else {
156		panic("bremfree: removing a buffer when not on a queue");
157	}
158	splx(s);
159}
160
161/*
162 * Get a buffer with the specified data.  Look in the cache first.
163 */
164int
165bread(struct vnode * vp, daddr_t blkno, int size, struct ucred * cred,
166    struct buf ** bpp)
167{
168	struct buf *bp;
169
170	bp = getblk(vp, blkno, size, 0, 0);
171	*bpp = bp;
172
173	/* if not found in cache, do some I/O */
174	if ((bp->b_flags & B_CACHE) == 0) {
175		if (curproc && curproc->p_stats)	/* count block I/O */
176			curproc->p_stats->p_ru.ru_inblock++;
177		bp->b_flags |= B_READ;
178		bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL);
179		if (bp->b_rcred == NOCRED) {
180			if (cred != NOCRED)
181				crhold(cred);
182			bp->b_rcred = cred;
183		}
184		vfs_busy_pages(bp, 0);
185		VOP_STRATEGY(bp);
186		return (biowait(bp));
187	}
188	return (0);
189}
190
191/*
192 * Operates like bread, but also starts asynchronous I/O on
193 * read-ahead blocks.
194 */
195int
196breadn(struct vnode * vp, daddr_t blkno, int size,
197    daddr_t * rablkno, int *rabsize,
198    int cnt, struct ucred * cred, struct buf ** bpp)
199{
200	struct buf *bp, *rabp;
201	int i;
202	int rv = 0, readwait = 0;
203
204	*bpp = bp = getblk(vp, blkno, size, 0, 0);
205
206	/* if not found in cache, do some I/O */
207	if ((bp->b_flags & B_CACHE) == 0) {
208		if (curproc && curproc->p_stats)	/* count block I/O */
209			curproc->p_stats->p_ru.ru_inblock++;
210		bp->b_flags |= B_READ;
211		bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL);
212		if (bp->b_rcred == NOCRED) {
213			if (cred != NOCRED)
214				crhold(cred);
215			bp->b_rcred = cred;
216		}
217		vfs_busy_pages(bp, 0);
218		VOP_STRATEGY(bp);
219		++readwait;
220	}
221	for (i = 0; i < cnt; i++, rablkno++, rabsize++) {
222		if (inmem(vp, *rablkno))
223			continue;
224		rabp = getblk(vp, *rablkno, *rabsize, 0, 0);
225
226		if ((rabp->b_flags & B_CACHE) == 0) {
227			if (curproc && curproc->p_stats)
228				curproc->p_stats->p_ru.ru_inblock++;
229			rabp->b_flags |= B_READ | B_ASYNC;
230			rabp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL);
231			if (rabp->b_rcred == NOCRED) {
232				if (cred != NOCRED)
233					crhold(cred);
234				rabp->b_rcred = cred;
235			}
236			vfs_busy_pages(rabp, 0);
237			VOP_STRATEGY(rabp);
238		} else {
239			brelse(rabp);
240		}
241	}
242
243	if (readwait) {
244		rv = biowait(bp);
245	}
246	return (rv);
247}
248
249/*
250 * Write, release buffer on completion.  (Done by iodone
251 * if async.)
252 */
253int
254bwrite(struct buf * bp)
255{
256	int oldflags = bp->b_flags;
257
258	if (bp->b_flags & B_INVAL) {
259		brelse(bp);
260		return (0);
261	}
262	if (!(bp->b_flags & B_BUSY))
263		panic("bwrite: buffer is not busy???");
264
265	bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
266	bp->b_flags |= B_WRITEINPROG;
267
268	if (oldflags & B_ASYNC) {
269		if (oldflags & B_DELWRI) {
270			reassignbuf(bp, bp->b_vp);
271		} else if (curproc) {
272			++curproc->p_stats->p_ru.ru_oublock;
273		}
274	}
275	bp->b_vp->v_numoutput++;
276	vfs_busy_pages(bp, 1);
277	VOP_STRATEGY(bp);
278
279	if ((oldflags & B_ASYNC) == 0) {
280		int rtval = biowait(bp);
281
282		if (oldflags & B_DELWRI) {
283			reassignbuf(bp, bp->b_vp);
284		} else if (curproc) {
285			++curproc->p_stats->p_ru.ru_oublock;
286		}
287		brelse(bp);
288		return (rtval);
289	}
290	return (0);
291}
292
293int
294vn_bwrite(ap)
295	struct vop_bwrite_args *ap;
296{
297	return (bwrite(ap->a_bp));
298}
299
300/*
301 * Delayed write. (Buffer is marked dirty).
302 */
303void
304bdwrite(struct buf * bp)
305{
306
307	if ((bp->b_flags & B_BUSY) == 0) {
308		panic("bdwrite: buffer is not busy");
309	}
310	if (bp->b_flags & B_INVAL) {
311		brelse(bp);
312		return;
313	}
314	if (bp->b_flags & B_TAPE) {
315		bawrite(bp);
316		return;
317	}
318	bp->b_flags &= ~B_READ;
319	vfs_dirty_pages(bp);
320	if ((bp->b_flags & B_DELWRI) == 0) {
321		if (curproc)
322			++curproc->p_stats->p_ru.ru_oublock;
323		bp->b_flags |= B_DONE | B_DELWRI;
324		reassignbuf(bp, bp->b_vp);
325	}
326	if( bp->b_lblkno == bp->b_blkno) {
327		VOP_BMAP(bp->b_vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL);
328	}
329	brelse(bp);
330	return;
331}
332
333/*
334 * Asynchronous write.
335 * Start output on a buffer, but do not wait for it to complete.
336 * The buffer is released when the output completes.
337 */
338void
339bawrite(struct buf * bp)
340{
341	struct vnode *vp;
342	vp = bp->b_vp;
343	bp->b_flags |= B_ASYNC;
344	(void) bwrite(bp);
345}
346
347/*
348 * Release a buffer.
349 */
350void
351brelse(struct buf * bp)
352{
353	int s;
354
355	if (bp->b_flags & B_CLUSTER) {
356		relpbuf(bp);
357		return;
358	}
359	/* anyone need a "free" block? */
360	s = splbio();
361
362	if (needsbuffer) {
363		needsbuffer = 0;
364		wakeup((caddr_t) &needsbuffer);
365	}
366
367	/* anyone need this block? */
368	if (bp->b_flags & B_WANTED) {
369		bp->b_flags &= ~(B_PDWANTED | B_WANTED | B_AGE);
370		wakeup((caddr_t) bp);
371	} else if (bp->b_flags & B_VMIO) {
372		bp->b_flags &= ~(B_WANTED | B_PDWANTED);
373		wakeup((caddr_t) bp);
374	}
375	if (bp->b_flags & B_LOCKED)
376		bp->b_flags &= ~B_ERROR;
377
378	if ((bp->b_flags & (B_NOCACHE | B_INVAL | B_ERROR)) ||
379	    (bp->b_bufsize <= 0)) {
380		bp->b_flags |= B_INVAL;
381		bp->b_flags &= ~(B_DELWRI | B_CACHE);
382		if (((bp->b_flags & B_VMIO) == 0) && bp->b_vp)
383			brelvp(bp);
384	}
385
386	/*
387	 * VMIO buffer rundown.  It is not very necessary to keep a VMIO buffer
388	 * constituted, so the B_INVAL flag is used to *invalidate* the buffer,
389	 * but the VM object is kept around.  The B_NOCACHE flag is used to
390	 * invalidate the pages in the VM object.
391	 */
392	if (bp->b_flags & B_VMIO) {
393		vm_offset_t foff;
394		vm_object_t obj;
395		int i, resid;
396		vm_page_t m;
397		int iototal = bp->b_bufsize;
398
399		foff = 0;
400		obj = 0;
401		if (bp->b_npages) {
402			if (bp->b_vp && bp->b_vp->v_mount) {
403				foff = bp->b_vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno;
404			} else {
405				/*
406				 * vnode pointer has been ripped away --
407				 * probably file gone...
408				 */
409				foff = bp->b_pages[0]->offset;
410			}
411		}
412		for (i = 0; i < bp->b_npages; i++) {
413			m = bp->b_pages[i];
414			if (m == bogus_page) {
415				panic("brelse: bogus page found");
416			}
417			resid = (m->offset + PAGE_SIZE) - foff;
418			if (resid > iototal)
419				resid = iototal;
420			if (resid > 0) {
421				if (bp->b_flags & (B_ERROR | B_NOCACHE)) {
422					vm_page_set_invalid(m, foff, resid);
423				} else if ((bp->b_flags & B_DELWRI) == 0) {
424					vm_page_set_clean(m, foff, resid);
425					vm_page_set_valid(m, foff, resid);
426				}
427			} else {
428				vm_page_test_dirty(m);
429			}
430			foff += resid;
431			iototal -= resid;
432		}
433
434		if (bp->b_flags & B_INVAL) {
435			for(i=0;i<bp->b_npages;i++) {
436				m = bp->b_pages[i];
437				--m->bmapped;
438				if (m->bmapped == 0) {
439					PAGE_WAKEUP(m);
440					if (m->valid == 0) {
441						vm_page_protect(m, VM_PROT_NONE);
442						vm_page_free(m);
443					}
444#if 1
445					else if ((m->dirty & m->valid) == 0 &&
446						(m->flags & PG_REFERENCED) == 0 &&
447							!pmap_is_referenced(VM_PAGE_TO_PHYS(m)))
448						vm_page_cache(m);
449#endif
450					else if ((m->flags & PG_ACTIVE) == 0) {
451						vm_page_activate(m);
452						m->act_count = 0;
453					}
454				}
455			}
456			bufspace -= bp->b_bufsize;
457			pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages);
458			bp->b_npages = 0;
459			bp->b_bufsize = 0;
460			bp->b_flags &= ~B_VMIO;
461			if (bp->b_vp)
462				brelvp(bp);
463		}
464	}
465	if (bp->b_qindex != QUEUE_NONE)
466		panic("brelse: free buffer onto another queue???");
467
468	/* enqueue */
469	/* buffers with no memory */
470	if (bp->b_bufsize == 0) {
471		bp->b_qindex = QUEUE_EMPTY;
472		TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist);
473		LIST_REMOVE(bp, b_hash);
474		LIST_INSERT_HEAD(&invalhash, bp, b_hash);
475		bp->b_dev = NODEV;
476		/* buffers with junk contents */
477	} else if (bp->b_flags & (B_ERROR | B_INVAL | B_NOCACHE)) {
478		bp->b_qindex = QUEUE_AGE;
479		TAILQ_INSERT_HEAD(&bufqueues[QUEUE_AGE], bp, b_freelist);
480		LIST_REMOVE(bp, b_hash);
481		LIST_INSERT_HEAD(&invalhash, bp, b_hash);
482		bp->b_dev = NODEV;
483		/* buffers that are locked */
484	} else if (bp->b_flags & B_LOCKED) {
485		bp->b_qindex = QUEUE_LOCKED;
486		TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LOCKED], bp, b_freelist);
487		/* buffers with stale but valid contents */
488	} else if (bp->b_flags & B_AGE) {
489		bp->b_qindex = QUEUE_AGE;
490		TAILQ_INSERT_TAIL(&bufqueues[QUEUE_AGE], bp, b_freelist);
491		/* buffers with valid and quite potentially reuseable contents */
492	} else {
493		bp->b_qindex = QUEUE_LRU;
494		TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], bp, b_freelist);
495	}
496
497	/* unlock */
498	bp->b_flags &= ~(B_PDWANTED | B_WANTED | B_BUSY | B_ASYNC | B_NOCACHE | B_AGE);
499	splx(s);
500}
501
502/*
503 * this routine implements clustered async writes for
504 * clearing out B_DELWRI buffers...  This is much better
505 * than the old way of writing only one buffer at a time.
506 */
507void
508vfs_bio_awrite(struct buf * bp)
509{
510	int i;
511	daddr_t lblkno = bp->b_lblkno;
512	struct vnode *vp = bp->b_vp;
513	int s;
514	int ncl;
515	struct buf *bpa;
516
517	s = splbio();
518	if( vp->v_mount && (vp->v_flag & VVMIO) &&
519	    	(bp->b_flags & (B_CLUSTEROK | B_INVAL)) == B_CLUSTEROK) {
520		int size = vp->v_mount->mnt_stat.f_iosize;
521
522		for (i = 1; i < MAXPHYS / size; i++) {
523			if ((bpa = incore(vp, lblkno + i)) &&
524			    ((bpa->b_flags & (B_BUSY | B_DELWRI | B_BUSY | B_CLUSTEROK | B_INVAL)) == B_DELWRI | B_CLUSTEROK) &&
525			    (bpa->b_bufsize == size)) {
526				if ((bpa->b_blkno == bpa->b_lblkno) ||
527				    (bpa->b_blkno != bp->b_blkno + (i * size) / DEV_BSIZE))
528					break;
529			} else {
530				break;
531			}
532		}
533		ncl = i;
534		/*
535		 * this is a possible cluster write
536		 */
537		if (ncl != 1) {
538			bremfree(bp);
539			cluster_wbuild(vp, bp, size, lblkno, ncl, -1);
540			splx(s);
541			return;
542		}
543	}
544	/*
545	 * default (old) behavior, writing out only one block
546	 */
547	bremfree(bp);
548	bp->b_flags |= B_BUSY | B_ASYNC;
549	bwrite(bp);
550	splx(s);
551}
552
553
554/*
555 * Find a buffer header which is available for use.
556 */
557struct buf *
558getnewbuf(int slpflag, int slptimeo, int doingvmio)
559{
560	struct buf *bp;
561	int s;
562	int firstbp = 1;
563
564	s = splbio();
565start:
566	if (bufspace >= maxbufspace)
567		goto trytofreespace;
568
569	/* can we constitute a new buffer? */
570	if ((bp = bufqueues[QUEUE_EMPTY].tqh_first)) {
571		if (bp->b_qindex != QUEUE_EMPTY)
572			panic("getnewbuf: inconsistent EMPTY queue");
573		bremfree(bp);
574		goto fillbuf;
575	}
576trytofreespace:
577	/*
578	 * We keep the file I/O from hogging metadata I/O
579	 * This is desirable because file data is cached in the
580	 * VM/Buffer cache even if a buffer is freed.
581	 */
582	if ((bp = bufqueues[QUEUE_AGE].tqh_first)) {
583		if (bp->b_qindex != QUEUE_AGE)
584			panic("getnewbuf: inconsistent AGE queue");
585	} else if ((bp = bufqueues[QUEUE_LRU].tqh_first)) {
586		if (bp->b_qindex != QUEUE_LRU)
587			panic("getnewbuf: inconsistent LRU queue");
588	}
589	if (!bp) {
590		/* wait for a free buffer of any kind */
591		needsbuffer = 1;
592		tsleep((caddr_t) &needsbuffer, PRIBIO | slpflag, "newbuf", slptimeo);
593		splx(s);
594		return (0);
595	}
596
597	/* if we are a delayed write, convert to an async write */
598	if ((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) {
599		vfs_bio_awrite(bp);
600		if (!slpflag && !slptimeo) {
601			splx(s);
602			return (0);
603		}
604		goto start;
605	}
606
607	if (bp->b_flags & B_WANTED) {
608		bp->b_flags &= ~(B_WANTED|B_PDWANTED);
609		wakeup((caddr_t) bp);
610	}
611	bremfree(bp);
612
613	if (bp->b_flags & B_VMIO) {
614		bp->b_flags |= B_INVAL | B_BUSY;
615		brelse(bp);
616		bremfree(bp);
617	}
618
619	if (bp->b_vp)
620		brelvp(bp);
621
622	/* we are not free, nor do we contain interesting data */
623	if (bp->b_rcred != NOCRED)
624		crfree(bp->b_rcred);
625	if (bp->b_wcred != NOCRED)
626		crfree(bp->b_wcred);
627fillbuf:
628	bp->b_flags |= B_BUSY;
629	LIST_REMOVE(bp, b_hash);
630	LIST_INSERT_HEAD(&invalhash, bp, b_hash);
631	splx(s);
632	if (bp->b_bufsize) {
633		allocbuf(bp, 0, 0);
634	}
635	bp->b_flags = B_BUSY;
636	bp->b_dev = NODEV;
637	bp->b_vp = NULL;
638	bp->b_blkno = bp->b_lblkno = 0;
639	bp->b_iodone = 0;
640	bp->b_error = 0;
641	bp->b_resid = 0;
642	bp->b_bcount = 0;
643	bp->b_npages = 0;
644	bp->b_wcred = bp->b_rcred = NOCRED;
645	bp->b_data = buffers_kva + (bp - buf) * MAXBSIZE;
646	bp->b_dirtyoff = bp->b_dirtyend = 0;
647	bp->b_validoff = bp->b_validend = 0;
648	if (bufspace >= maxbufspace) {
649		s = splbio();
650		bp->b_flags |= B_INVAL;
651		brelse(bp);
652		goto trytofreespace;
653	}
654	return (bp);
655}
656
657/*
658 * Check to see if a block is currently memory resident.
659 */
660struct buf *
661incore(struct vnode * vp, daddr_t blkno)
662{
663	struct buf *bp;
664	struct bufhashhdr *bh;
665
666	int s = splbio();
667
668	bh = BUFHASH(vp, blkno);
669	bp = bh->lh_first;
670
671	/* Search hash chain */
672	while (bp) {
673		/* hit */
674		if (bp->b_lblkno == blkno && bp->b_vp == vp
675		    && (bp->b_flags & B_INVAL) == 0) {
676			splx(s);
677			return (bp);
678		}
679		bp = bp->b_hash.le_next;
680	}
681	splx(s);
682
683	return (0);
684}
685
686/*
687 * Returns true if no I/O is needed to access the
688 * associated VM object.  This is like incore except
689 * it also hunts around in the VM system for the data.
690 */
691
692int
693inmem(struct vnode * vp, daddr_t blkno)
694{
695	vm_object_t obj;
696	vm_offset_t off, toff, tinc;
697	vm_page_t m;
698
699	if (incore(vp, blkno))
700		return 1;
701	if (vp->v_mount == 0)
702		return 0;
703	if ((vp->v_vmdata == 0) || (vp->v_flag & VVMIO) == 0)
704		return 0;
705
706	obj = (vm_object_t) vp->v_vmdata;
707	tinc = PAGE_SIZE;
708	if (tinc > vp->v_mount->mnt_stat.f_iosize)
709		tinc = vp->v_mount->mnt_stat.f_iosize;
710	off = blkno * vp->v_mount->mnt_stat.f_iosize;
711
712	for (toff = 0; toff < vp->v_mount->mnt_stat.f_iosize; toff += tinc) {
713		int mask;
714
715		m = vm_page_lookup(obj, trunc_page(toff + off));
716		if (!m)
717			return 0;
718		if (vm_page_is_valid(m, toff + off, tinc) == 0)
719			return 0;
720	}
721	return 1;
722}
723
724/*
725 * Get a block given a specified block and offset into a file/device.
726 */
727struct buf *
728getblk(struct vnode * vp, daddr_t blkno, int size, int slpflag, int slptimeo)
729{
730	struct buf *bp;
731	int s;
732	struct bufhashhdr *bh;
733	vm_offset_t off;
734	int nleft;
735
736	s = splbio();
737loop:
738	if ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_cache_min)
739		pagedaemon_wakeup();
740
741	if (bp = incore(vp, blkno)) {
742		if (bp->b_flags & B_BUSY) {
743			bp->b_flags |= B_WANTED;
744			if (curproc == pageproc) {
745				bp->b_flags |= B_PDWANTED;
746				wakeup((caddr_t) &cnt.v_free_count);
747			}
748			if (!tsleep((caddr_t) bp, PRIBIO | slpflag, "getblk", slptimeo))
749				goto loop;
750
751			splx(s);
752			return (struct buf *) NULL;
753		}
754		bp->b_flags |= B_BUSY | B_CACHE;
755		bremfree(bp);
756		/*
757		 * check for size inconsistancies
758		 */
759		if (bp->b_bcount != size) {
760#if defined(VFS_BIO_DEBUG)
761			printf("getblk: invalid buffer size: %ld\n", bp->b_bcount);
762#endif
763			bp->b_flags |= B_INVAL;
764			bwrite(bp);
765			goto loop;
766		}
767		splx(s);
768		return (bp);
769	} else {
770		vm_object_t obj;
771		int doingvmio;
772
773		if ((obj = (vm_object_t) vp->v_vmdata) && (vp->v_flag & VVMIO)) {
774			doingvmio = 1;
775		} else {
776			doingvmio = 0;
777		}
778		if ((bp = getnewbuf(slpflag, slptimeo, doingvmio)) == 0) {
779			if (slpflag || slptimeo)
780				return NULL;
781			goto loop;
782		}
783		/*
784		 * It is possible that another buffer has been constituted
785		 * during the time that getnewbuf is blocked.  This checks
786		 * for this possibility, and handles it.
787		 */
788		if (incore(vp, blkno)) {
789			bp->b_flags |= B_INVAL;
790			brelse(bp);
791			goto loop;
792		}
793		/*
794		 * Insert the buffer into the hash, so that it can
795		 * be found by incore.
796		 */
797		bp->b_blkno = bp->b_lblkno = blkno;
798		bgetvp(vp, bp);
799		LIST_REMOVE(bp, b_hash);
800		bh = BUFHASH(vp, blkno);
801		LIST_INSERT_HEAD(bh, bp, b_hash);
802
803		if (doingvmio) {
804			bp->b_flags |= (B_VMIO | B_CACHE);
805#if defined(VFS_BIO_DEBUG)
806			if (vp->v_type != VREG)
807				printf("getblk: vmioing file type %d???\n", vp->v_type);
808#endif
809		} else {
810			bp->b_flags &= ~B_VMIO;
811		}
812		splx(s);
813
814		if (!allocbuf(bp, size, 1)) {
815			s = splbio();
816			goto loop;
817		}
818		return (bp);
819	}
820}
821
822/*
823 * Get an empty, disassociated buffer of given size.
824 */
825struct buf *
826geteblk(int size)
827{
828	struct buf *bp;
829
830	while ((bp = getnewbuf(0, 0, 0)) == 0);
831	allocbuf(bp, size, 0);
832	bp->b_flags |= B_INVAL;
833	return (bp);
834}
835
836/*
837 * This code constitutes the buffer memory from either anonymous system
838 * memory (in the case of non-VMIO operations) or from an associated
839 * VM object (in the case of VMIO operations).
840 *
841 * Note that this code is tricky, and has many complications to resolve
842 * deadlock or inconsistant data situations.  Tread lightly!!!
843 *
844 * Modify the length of a buffer's underlying buffer storage without
845 * destroying information (unless, of course the buffer is shrinking).
846 */
847int
848allocbuf(struct buf * bp, int size, int vmio)
849{
850
851	int s;
852	int newbsize, mbsize;
853	int i;
854
855	if ((bp->b_flags & B_VMIO) == 0) {
856		/*
857		 * Just get anonymous memory from the kernel
858		 */
859		mbsize = ((size + DEV_BSIZE - 1) / DEV_BSIZE) * DEV_BSIZE;
860		newbsize = round_page(size);
861
862		if (newbsize == bp->b_bufsize) {
863			bp->b_bcount = size;
864			return 1;
865		} else if (newbsize < bp->b_bufsize) {
866			vm_hold_free_pages(
867			    bp,
868			    (vm_offset_t) bp->b_data + newbsize,
869			    (vm_offset_t) bp->b_data + bp->b_bufsize);
870			bufspace -= (bp->b_bufsize - newbsize);
871		} else if (newbsize > bp->b_bufsize) {
872			vm_hold_load_pages(
873			    bp,
874			    (vm_offset_t) bp->b_data + bp->b_bufsize,
875			    (vm_offset_t) bp->b_data + newbsize);
876			bufspace += (newbsize - bp->b_bufsize);
877		}
878	} else {
879		vm_page_t m;
880		int desiredpages;
881
882		newbsize = ((size + DEV_BSIZE - 1) / DEV_BSIZE) * DEV_BSIZE;
883		desiredpages = round_page(newbsize) / PAGE_SIZE;
884
885		if (newbsize == bp->b_bufsize) {
886			bp->b_bcount = size;
887			return 1;
888		} else if (newbsize < bp->b_bufsize) {
889			if (desiredpages < bp->b_npages) {
890				pmap_qremove((vm_offset_t) trunc_page(bp->b_data) +
891				    desiredpages * PAGE_SIZE, (bp->b_npages - desiredpages));
892				for (i = desiredpages; i < bp->b_npages; i++) {
893					m = bp->b_pages[i];
894					s = splhigh();
895					while ((m->flags & PG_BUSY) || (m->busy != 0)) {
896						m->flags |= PG_WANTED;
897						tsleep(m, PVM, "biodep", 0);
898					}
899					splx(s);
900
901					if (m->bmapped == 0) {
902						printf("allocbuf: bmapped is zero for page %d\n", i);
903						panic("allocbuf: error");
904					}
905					--m->bmapped;
906					if (m->bmapped == 0) {
907						PAGE_WAKEUP(m);
908						if (m->valid == 0) {
909							vm_page_protect(m, VM_PROT_NONE);
910							vm_page_free(m);
911						}
912					}
913					bp->b_pages[i] = NULL;
914				}
915				bp->b_npages = desiredpages;
916				bufspace -= (bp->b_bufsize - newbsize);
917			}
918		} else {
919			vm_object_t obj;
920			vm_offset_t tinc, off, toff, objoff;
921			int pageindex, curbpnpages;
922			struct vnode *vp;
923			int bsize;
924
925			vp = bp->b_vp;
926			bsize = vp->v_mount->mnt_stat.f_iosize;
927
928			if (bp->b_npages < desiredpages) {
929				obj = (vm_object_t) vp->v_vmdata;
930				tinc = PAGE_SIZE;
931				if (tinc > bsize)
932					tinc = bsize;
933				off = bp->b_lblkno * bsize;
934				curbpnpages = bp->b_npages;
935		doretry:
936				for (toff = 0; toff < newbsize; toff += tinc) {
937					int mask;
938					int bytesinpage;
939
940					pageindex = toff / PAGE_SIZE;
941					objoff = trunc_page(toff + off);
942					if (pageindex < curbpnpages) {
943						int pb;
944
945						m = bp->b_pages[pageindex];
946						if (m->offset != objoff)
947							panic("allocbuf: page changed offset??!!!?");
948						bytesinpage = tinc;
949						if (tinc > (newbsize - toff))
950							bytesinpage = newbsize - toff;
951						if (!vm_page_is_valid(m, toff + off, bytesinpage)) {
952							bp->b_flags &= ~B_CACHE;
953						}
954						if ((m->flags & PG_ACTIVE) == 0) {
955							vm_page_activate(m);
956							m->act_count = 0;
957						}
958						continue;
959					}
960					m = vm_page_lookup(obj, objoff);
961					if (!m) {
962						m = vm_page_alloc(obj, objoff, VM_ALLOC_NORMAL);
963						if (!m) {
964							int j;
965
966							for (j = bp->b_npages; j < pageindex; j++) {
967								vm_page_t mt = bp->b_pages[j];
968
969								PAGE_WAKEUP(mt);
970								if (mt->valid == 0 && mt->bmapped == 0) {
971									vm_page_free(mt);
972								}
973							}
974							VM_WAIT;
975							if (vmio && (bp->b_flags & B_PDWANTED)) {
976								bp->b_flags |= B_INVAL;
977								brelse(bp);
978								return 0;
979							}
980							curbpnpages = bp->b_npages;
981							goto doretry;
982						}
983						m->valid = 0;
984						vm_page_activate(m);
985						m->act_count = 0;
986					} else if ((m->valid == 0) || (m->flags & PG_BUSY)) {
987						int j;
988						int bufferdestroyed = 0;
989
990						for (j = bp->b_npages; j < pageindex; j++) {
991							vm_page_t mt = bp->b_pages[j];
992
993							PAGE_WAKEUP(mt);
994							if (mt->valid == 0 && mt->bmapped == 0) {
995								vm_page_free(mt);
996							}
997						}
998						if (vmio && (bp->b_flags & B_PDWANTED)) {
999							bp->b_flags |= B_INVAL;
1000							brelse(bp);
1001							VM_WAIT;
1002							bufferdestroyed = 1;
1003						}
1004						s = splbio();
1005						if (m->flags & PG_BUSY) {
1006							m->flags |= PG_WANTED;
1007							tsleep(m, PRIBIO, "pgtblk", 0);
1008						} else if( m->valid == 0 && m->bmapped == 0) {
1009							vm_page_free(m);
1010						}
1011						splx(s);
1012						if (bufferdestroyed)
1013							return 0;
1014						curbpnpages = bp->b_npages;
1015						goto doretry;
1016					} else {
1017						int pb;
1018
1019						if ((m->flags & PG_CACHE) &&
1020						    (cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min) {
1021							int j;
1022
1023							for (j = bp->b_npages; j < pageindex; j++) {
1024								vm_page_t mt = bp->b_pages[j];
1025
1026								PAGE_WAKEUP(mt);
1027								if (mt->valid == 0 && mt->bmapped == 0) {
1028									vm_page_free(mt);
1029								}
1030							}
1031							VM_WAIT;
1032							if (vmio && (bp->b_flags & B_PDWANTED)) {
1033								bp->b_flags |= B_INVAL;
1034								brelse(bp);
1035								return 0;
1036							}
1037							curbpnpages = bp->b_npages;
1038							goto doretry;
1039						}
1040						bytesinpage = tinc;
1041						if (tinc > (newbsize - toff))
1042							bytesinpage = newbsize - toff;
1043						if (!vm_page_is_valid(m, toff + off, bytesinpage)) {
1044							bp->b_flags &= ~B_CACHE;
1045						}
1046						if ((m->flags & PG_ACTIVE) == 0) {
1047							vm_page_activate(m);
1048							m->act_count = 0;
1049						}
1050						m->flags |= PG_BUSY;
1051					}
1052					bp->b_pages[pageindex] = m;
1053					curbpnpages = pageindex + 1;
1054				}
1055				if (bsize >= PAGE_SIZE) {
1056					for (i = bp->b_npages; i < curbpnpages; i++) {
1057						m = bp->b_pages[i];
1058						if (m->valid == 0) {
1059							bp->b_flags &= ~B_CACHE;
1060						}
1061						m->bmapped++;
1062						PAGE_WAKEUP(m);
1063					}
1064				} else {
1065					if (!vm_page_is_valid(bp->b_pages[0], off, bsize))
1066						bp->b_flags &= ~B_CACHE;
1067					bp->b_pages[0]->bmapped++;
1068					PAGE_WAKEUP(bp->b_pages[0]);
1069				}
1070				bp->b_npages = curbpnpages;
1071				bp->b_data = buffers_kva + (bp - buf) * MAXBSIZE;
1072				pmap_qenter((vm_offset_t) bp->b_data, bp->b_pages, bp->b_npages);
1073				bp->b_data += off % PAGE_SIZE;
1074			}
1075			bufspace += (newbsize - bp->b_bufsize);
1076		}
1077	}
1078	bp->b_bufsize = newbsize;
1079	bp->b_bcount = size;
1080	return 1;
1081}
1082
1083/*
1084 * Wait for buffer I/O completion, returning error status.
1085 */
1086int
1087biowait(register struct buf * bp)
1088{
1089	int s;
1090
1091	s = splbio();
1092	while ((bp->b_flags & B_DONE) == 0)
1093		tsleep((caddr_t) bp, PRIBIO, "biowait", 0);
1094	if ((bp->b_flags & B_ERROR) || bp->b_error) {
1095		if ((bp->b_flags & B_INVAL) == 0) {
1096			bp->b_flags |= B_INVAL;
1097			bp->b_dev = NODEV;
1098			LIST_REMOVE(bp, b_hash);
1099			LIST_INSERT_HEAD(&invalhash, bp, b_hash);
1100			wakeup((caddr_t) bp);
1101		}
1102		if (!bp->b_error)
1103			bp->b_error = EIO;
1104		else
1105			bp->b_flags |= B_ERROR;
1106		splx(s);
1107		return (bp->b_error);
1108	} else {
1109		splx(s);
1110		return (0);
1111	}
1112}
1113
1114/*
1115 * Finish I/O on a buffer, calling an optional function.
1116 * This is usually called from interrupt level, so process blocking
1117 * is not *a good idea*.
1118 */
1119void
1120biodone(register struct buf * bp)
1121{
1122	int s;
1123
1124	s = splbio();
1125	if (bp->b_flags & B_DONE) {
1126		splx(s);
1127		printf("biodone: buffer already done\n");
1128		return;
1129	}
1130	bp->b_flags |= B_DONE;
1131
1132	if ((bp->b_flags & B_READ) == 0) {
1133		struct vnode *vp = bp->b_vp;
1134		vwakeup(bp);
1135	}
1136#ifdef BOUNCE_BUFFERS
1137	if (bp->b_flags & B_BOUNCE)
1138		vm_bounce_free(bp);
1139#endif
1140
1141	/* call optional completion function if requested */
1142	if (bp->b_flags & B_CALL) {
1143		bp->b_flags &= ~B_CALL;
1144		(*bp->b_iodone) (bp);
1145		splx(s);
1146		return;
1147	}
1148	if (bp->b_flags & B_VMIO) {
1149		int i, resid;
1150		vm_offset_t foff;
1151		vm_page_t m;
1152		vm_object_t obj;
1153		int iosize;
1154		struct vnode *vp = bp->b_vp;
1155
1156		foff = vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno;
1157		obj = (vm_object_t) vp->v_vmdata;
1158		if (!obj) {
1159			return;
1160		}
1161#if defined(VFS_BIO_DEBUG)
1162		if (obj->paging_in_progress < bp->b_npages) {
1163			printf("biodone: paging in progress(%d) < bp->b_npages(%d)\n",
1164			    obj->paging_in_progress, bp->b_npages);
1165		}
1166#endif
1167		iosize = bp->b_bufsize;
1168		for (i = 0; i < bp->b_npages; i++) {
1169			m = bp->b_pages[i];
1170			if (m == bogus_page) {
1171				m = vm_page_lookup(obj, foff);
1172				if (!m) {
1173#if defined(VFS_BIO_DEBUG)
1174					printf("biodone: page disappeared\n");
1175#endif
1176					--obj->paging_in_progress;
1177					continue;
1178				}
1179				bp->b_pages[i] = m;
1180				pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages);
1181			}
1182#if defined(VFS_BIO_DEBUG)
1183			if (trunc_page(foff) != m->offset) {
1184				printf("biodone: foff(%d)/m->offset(%d) mismatch\n", foff, m->offset);
1185			}
1186#endif
1187			resid = (m->offset + PAGE_SIZE) - foff;
1188			if (resid > iosize)
1189				resid = iosize;
1190			if (resid > 0) {
1191				vm_page_set_valid(m, foff, resid);
1192				vm_page_set_clean(m, foff, resid);
1193			}
1194
1195			/*
1196			 * when debugging new filesystems or buffer I/O methods, this
1197			 * is the most common error that pops up.  if you see this, you
1198			 * have not set the page busy flag correctly!!!
1199			 */
1200			if (m->busy == 0) {
1201				printf("biodone: page busy < 0, off: %d, foff: %d, resid: %d, index: %d\n",
1202				    m->offset, foff, resid, i);
1203				printf(" iosize: %d, lblkno: %d\n",
1204				    bp->b_vp->v_mount->mnt_stat.f_iosize, bp->b_lblkno);
1205				printf(" valid: 0x%x, dirty: 0x%x, mapped: %d\n",
1206				    m->valid, m->dirty, m->bmapped);
1207				panic("biodone: page busy < 0\n");
1208			}
1209			--m->busy;
1210			PAGE_WAKEUP(m);
1211			--obj->paging_in_progress;
1212			foff += resid;
1213			iosize -= resid;
1214		}
1215		if (obj && obj->paging_in_progress == 0 &&
1216		    (obj->flags & OBJ_PIPWNT)) {
1217			obj->flags &= ~OBJ_PIPWNT;
1218			wakeup((caddr_t) obj);
1219		}
1220	}
1221	/*
1222	 * For asynchronous completions, release the buffer now. The brelse
1223	 * checks for B_WANTED and will do the wakeup there if necessary - so
1224	 * no need to do a wakeup here in the async case.
1225	 */
1226
1227	if (bp->b_flags & B_ASYNC) {
1228		brelse(bp);
1229	} else {
1230		bp->b_flags &= ~(B_WANTED | B_PDWANTED);
1231		wakeup((caddr_t) bp);
1232	}
1233	splx(s);
1234}
1235
1236int
1237count_lock_queue()
1238{
1239	int count;
1240	struct buf *bp;
1241
1242	count = 0;
1243	for (bp = bufqueues[QUEUE_LOCKED].tqh_first;
1244	    bp != NULL;
1245	    bp = bp->b_freelist.tqe_next)
1246		count++;
1247	return (count);
1248}
1249
1250int vfs_update_interval = 30;
1251
1252void
1253vfs_update()
1254{
1255	(void) spl0();
1256	while (1) {
1257		tsleep((caddr_t) &vfs_update_wakeup, PRIBIO, "update",
1258		    hz * vfs_update_interval);
1259		vfs_update_wakeup = 0;
1260		sync(curproc, NULL, NULL);
1261	}
1262}
1263
1264/*
1265 * This routine is called in lieu of iodone in the case of
1266 * incomplete I/O.  This keeps the busy status for pages
1267 * consistant.
1268 */
1269void
1270vfs_unbusy_pages(struct buf * bp)
1271{
1272	int i;
1273
1274	if (bp->b_flags & B_VMIO) {
1275		struct vnode *vp = bp->b_vp;
1276		vm_object_t obj = (vm_object_t) vp->v_vmdata;
1277		vm_offset_t foff;
1278
1279		foff = vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno;
1280
1281		for (i = 0; i < bp->b_npages; i++) {
1282			vm_page_t m = bp->b_pages[i];
1283
1284			if (m == bogus_page) {
1285				m = vm_page_lookup(obj, foff);
1286				if (!m) {
1287					panic("vfs_unbusy_pages: page missing\n");
1288				}
1289				bp->b_pages[i] = m;
1290				pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages);
1291			}
1292			--obj->paging_in_progress;
1293			--m->busy;
1294			PAGE_WAKEUP(m);
1295		}
1296		if (obj->paging_in_progress == 0 &&
1297		    (obj->flags & OBJ_PIPWNT)) {
1298			obj->flags &= ~OBJ_PIPWNT;
1299			wakeup((caddr_t) obj);
1300		}
1301	}
1302}
1303
1304/*
1305 * This routine is called before a device strategy routine.
1306 * It is used to tell the VM system that paging I/O is in
1307 * progress, and treat the pages associated with the buffer
1308 * almost as being PG_BUSY.  Also the object paging_in_progress
1309 * flag is handled to make sure that the object doesn't become
1310 * inconsistant.
1311 */
1312void
1313vfs_busy_pages(struct buf * bp, int clear_modify)
1314{
1315	int i;
1316
1317	if (bp->b_flags & B_VMIO) {
1318		vm_object_t obj = (vm_object_t) bp->b_vp->v_vmdata;
1319		vm_offset_t foff = bp->b_vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno;
1320		int iocount = bp->b_bufsize;
1321
1322		for (i = 0; i < bp->b_npages; i++) {
1323			vm_page_t m = bp->b_pages[i];
1324			int resid = (m->offset + PAGE_SIZE) - foff;
1325
1326			if (resid > iocount)
1327				resid = iocount;
1328			obj->paging_in_progress++;
1329			m->busy++;
1330			if (clear_modify) {
1331				vm_page_test_dirty(m);
1332				vm_page_protect(m, VM_PROT_READ);
1333			} else if (bp->b_bcount >= PAGE_SIZE) {
1334				if (m->valid && (bp->b_flags & B_CACHE) == 0) {
1335					bp->b_pages[i] = bogus_page;
1336					pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages);
1337				}
1338			}
1339			foff += resid;
1340			iocount -= resid;
1341		}
1342	}
1343}
1344
1345/*
1346 * Tell the VM system that the pages associated with this buffer
1347 * are dirty.  This is in case of the unlikely circumstance that
1348 * a buffer has to be destroyed before it is flushed.
1349 */
1350void
1351vfs_dirty_pages(struct buf * bp)
1352{
1353	int i;
1354
1355	if (bp->b_flags & B_VMIO) {
1356		vm_offset_t foff = bp->b_vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno;
1357		int iocount = bp->b_bufsize;
1358
1359		for (i = 0; i < bp->b_npages; i++) {
1360			vm_page_t m = bp->b_pages[i];
1361			int resid = (m->offset + PAGE_SIZE) - foff;
1362
1363			if (resid > iocount)
1364				resid = iocount;
1365			if (resid > 0) {
1366				vm_page_set_valid(m, foff, resid);
1367				vm_page_set_dirty(m, foff, resid);
1368			}
1369			PAGE_WAKEUP(m);
1370			foff += resid;
1371			iocount -= resid;
1372		}
1373	}
1374}
1375/*
1376 * vm_hold_load_pages and vm_hold_unload pages get pages into
1377 * a buffers address space.  The pages are anonymous and are
1378 * not associated with a file object.
1379 */
1380void
1381vm_hold_load_pages(struct buf * bp, vm_offset_t froma, vm_offset_t toa)
1382{
1383	vm_offset_t pg;
1384	vm_page_t p;
1385	vm_offset_t from = round_page(froma);
1386	vm_offset_t to = round_page(toa);
1387
1388	for (pg = from; pg < to; pg += PAGE_SIZE) {
1389
1390tryagain:
1391
1392		p = vm_page_alloc(kernel_object, pg - VM_MIN_KERNEL_ADDRESS,
1393		    VM_ALLOC_NORMAL);
1394		if (!p) {
1395			VM_WAIT;
1396			goto tryagain;
1397		}
1398		vm_page_wire(p);
1399		pmap_kenter(pg, VM_PAGE_TO_PHYS(p));
1400		bp->b_pages[((caddr_t) pg - bp->b_data) / PAGE_SIZE] = p;
1401		PAGE_WAKEUP(p);
1402		bp->b_npages++;
1403	}
1404}
1405
1406void
1407vm_hold_free_pages(struct buf * bp, vm_offset_t froma, vm_offset_t toa)
1408{
1409	vm_offset_t pg;
1410	vm_page_t p;
1411	vm_offset_t from = round_page(froma);
1412	vm_offset_t to = round_page(toa);
1413
1414	for (pg = from; pg < to; pg += PAGE_SIZE) {
1415		p = bp->b_pages[((caddr_t) pg - bp->b_data) / PAGE_SIZE];
1416		bp->b_pages[((caddr_t) pg - bp->b_data) / PAGE_SIZE] = 0;
1417		pmap_kremove(pg);
1418		vm_page_free(p);
1419		--bp->b_npages;
1420	}
1421}
1422
1423void
1424bufstats()
1425{
1426}
1427