vfs_bio.c revision 6619
1/*
2 * Copyright (c) 1994 John S. Dyson
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice immediately at the beginning of the file, without modification,
10 *    this list of conditions, and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. Absolutely no warranty of function or purpose is made by the author
15 *    John S. Dyson.
16 * 4. This work was done expressly for inclusion into FreeBSD.  Other use
17 *    is allowed if this notation is included.
18 * 5. Modifications may be freely made to this file if the above conditions
19 *    are met.
20 *
21 * $Id: vfs_bio.c,v 1.28 1995/02/18 02:55:09 davidg Exp $
22 */
23
24/*
25 * this file contains a new buffer I/O scheme implementing a coherent
26 * VM object and buffer cache scheme.  Pains have been taken to make
27 * sure that the performance degradation associated with schemes such
28 * as this is not realized.
29 *
30 * Author:  John S. Dyson
31 * Significant help during the development and debugging phases
32 * had been provided by David Greenman, also of the FreeBSD core team.
33 */
34
35#define VMIO
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/kernel.h>
39#include <sys/proc.h>
40#include <sys/vnode.h>
41#include <vm/vm.h>
42#include <vm/vm_pageout.h>
43#include <vm/vm_page.h>
44#include <vm/vm_object.h>
45#include <sys/buf.h>
46#include <sys/mount.h>
47#include <sys/malloc.h>
48#include <sys/resourcevar.h>
49#include <sys/proc.h>
50
51#include <miscfs/specfs/specdev.h>
52
53struct buf *buf;		/* buffer header pool */
54int nbuf;			/* number of buffer headers calculated
55				 * elsewhere */
56struct swqueue bswlist;
57int nvmio, nlru;
58
59extern vm_map_t buffer_map, io_map, kernel_map, pager_map;
60
61void vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to);
62void vm_hold_load_pages(struct buf * bp, vm_offset_t from, vm_offset_t to);
63void vfs_dirty_pages(struct buf * bp);
64void vfs_busy_pages(struct buf *, int clear_modify);
65
66int needsbuffer;
67
68/*
69 * Internal update daemon, process 3
70 *	The variable vfs_update_wakeup allows for internal syncs.
71 */
72int vfs_update_wakeup;
73
74
75/*
76 * buffers base kva
77 */
78caddr_t buffers_kva;
79
80/*
81 * bogus page -- for I/O to/from partially complete buffers
82 * this is a temporary solution to the problem, but it is not
83 * really that bad.  it would be better to split the buffer
84 * for input in the case of buffers partially already in memory,
85 * but the code is intricate enough already.
86 */
87vm_page_t bogus_page;
88vm_offset_t bogus_offset;
89
90int bufspace, maxbufspace;
91
92/*
93 * Initialize buffer headers and related structures.
94 */
95void
96bufinit()
97{
98	struct buf *bp;
99	int i;
100
101	TAILQ_INIT(&bswlist);
102	LIST_INIT(&invalhash);
103
104	/* first, make a null hash table */
105	for (i = 0; i < BUFHSZ; i++)
106		LIST_INIT(&bufhashtbl[i]);
107
108	/* next, make a null set of free lists */
109	for (i = 0; i < BUFFER_QUEUES; i++)
110		TAILQ_INIT(&bufqueues[i]);
111
112	buffers_kva = (caddr_t) kmem_alloc_pageable(buffer_map, MAXBSIZE * nbuf);
113	/* finally, initialize each buffer header and stick on empty q */
114	for (i = 0; i < nbuf; i++) {
115		bp = &buf[i];
116		bzero(bp, sizeof *bp);
117		bp->b_flags = B_INVAL;	/* we're just an empty header */
118		bp->b_dev = NODEV;
119		bp->b_vp = NULL;
120		bp->b_rcred = NOCRED;
121		bp->b_wcred = NOCRED;
122		bp->b_qindex = QUEUE_EMPTY;
123		bp->b_vnbufs.le_next = NOLIST;
124		bp->b_data = buffers_kva + i * MAXBSIZE;
125		TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist);
126		LIST_INSERT_HEAD(&invalhash, bp, b_hash);
127	}
128/*
129 * this will change later!!!
130 */
131	maxbufspace = 2 * (nbuf + 8) * PAGE_SIZE;
132
133	bogus_offset = kmem_alloc_pageable(kernel_map, PAGE_SIZE);
134	bogus_page = vm_page_alloc(kernel_object, bogus_offset - VM_MIN_KERNEL_ADDRESS, 0);
135
136}
137
138/*
139 * remove the buffer from the appropriate free list
140 */
141void
142bremfree(struct buf * bp)
143{
144	int s = splbio();
145
146	if (bp->b_qindex != QUEUE_NONE) {
147		if (bp->b_qindex == QUEUE_LRU)
148			--nlru;
149		TAILQ_REMOVE(&bufqueues[bp->b_qindex], bp, b_freelist);
150		bp->b_qindex = QUEUE_NONE;
151	} else {
152		panic("bremfree: removing a buffer when not on a queue");
153	}
154	splx(s);
155}
156
157/*
158 * Get a buffer with the specified data.  Look in the cache first.
159 */
160int
161bread(struct vnode * vp, daddr_t blkno, int size, struct ucred * cred,
162    struct buf ** bpp)
163{
164	struct buf *bp;
165
166	bp = getblk(vp, blkno, size, 0, 0);
167	*bpp = bp;
168
169	/* if not found in cache, do some I/O */
170	if ((bp->b_flags & B_CACHE) == 0) {
171		if (curproc && curproc->p_stats)	/* count block I/O */
172			curproc->p_stats->p_ru.ru_inblock++;
173		bp->b_flags |= B_READ;
174		bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL);
175		if (bp->b_rcred == NOCRED) {
176			if (cred != NOCRED)
177				crhold(cred);
178			bp->b_rcred = cred;
179		}
180		vfs_busy_pages(bp, 0);
181		VOP_STRATEGY(bp);
182		return (biowait(bp));
183	} else if (bp->b_lblkno == bp->b_blkno) {
184		VOP_BMAP(vp, bp->b_lblkno, (struct vnode **) 0,
185		    &bp->b_blkno, (int *) 0);
186	}
187	return (0);
188}
189
190/*
191 * Operates like bread, but also starts asynchronous I/O on
192 * read-ahead blocks.
193 */
194int
195breadn(struct vnode * vp, daddr_t blkno, int size,
196    daddr_t * rablkno, int *rabsize,
197    int cnt, struct ucred * cred, struct buf ** bpp)
198{
199	struct buf *bp, *rabp;
200	int i;
201	int rv = 0, readwait = 0;
202
203	*bpp = bp = getblk(vp, blkno, size, 0, 0);
204
205	/* if not found in cache, do some I/O */
206	if ((bp->b_flags & B_CACHE) == 0) {
207		if (curproc && curproc->p_stats)	/* count block I/O */
208			curproc->p_stats->p_ru.ru_inblock++;
209		bp->b_flags |= B_READ;
210		bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL);
211		if (bp->b_rcred == NOCRED) {
212			if (cred != NOCRED)
213				crhold(cred);
214			bp->b_rcred = cred;
215		}
216		vfs_busy_pages(bp, 0);
217		VOP_STRATEGY(bp);
218		++readwait;
219	} else if (bp->b_lblkno == bp->b_blkno) {
220		VOP_BMAP(vp, bp->b_lblkno, (struct vnode **) 0,
221		    &bp->b_blkno, (int *) 0);
222	}
223	for (i = 0; i < cnt; i++, rablkno++, rabsize++) {
224		if (inmem(vp, *rablkno))
225			continue;
226		rabp = getblk(vp, *rablkno, *rabsize, 0, 0);
227
228		if ((rabp->b_flags & B_CACHE) == 0) {
229			if (curproc && curproc->p_stats)
230				curproc->p_stats->p_ru.ru_inblock++;
231			rabp->b_flags |= B_READ | B_ASYNC;
232			rabp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL);
233			if (rabp->b_rcred == NOCRED) {
234				if (cred != NOCRED)
235					crhold(cred);
236				rabp->b_rcred = cred;
237			}
238			vfs_busy_pages(rabp, 0);
239			VOP_STRATEGY(rabp);
240		} else {
241			brelse(rabp);
242		}
243	}
244
245	if (readwait) {
246		rv = biowait(bp);
247	}
248	return (rv);
249}
250
251/*
252 * Write, release buffer on completion.  (Done by iodone
253 * if async.)
254 */
255int
256bwrite(struct buf * bp)
257{
258	int oldflags = bp->b_flags;
259
260	if (bp->b_flags & B_INVAL) {
261		brelse(bp);
262		return (0);
263	}
264	if (!(bp->b_flags & B_BUSY))
265		panic("bwrite: buffer is not busy???");
266
267	bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
268	bp->b_flags |= B_WRITEINPROG;
269
270	if (oldflags & B_ASYNC) {
271		if (oldflags & B_DELWRI) {
272			reassignbuf(bp, bp->b_vp);
273		} else if (curproc) {
274			++curproc->p_stats->p_ru.ru_oublock;
275		}
276	}
277	bp->b_vp->v_numoutput++;
278	vfs_busy_pages(bp, 1);
279	VOP_STRATEGY(bp);
280
281	if ((oldflags & B_ASYNC) == 0) {
282		int rtval = biowait(bp);
283
284		if (oldflags & B_DELWRI) {
285			reassignbuf(bp, bp->b_vp);
286		} else if (curproc) {
287			++curproc->p_stats->p_ru.ru_oublock;
288		}
289		brelse(bp);
290		return (rtval);
291	}
292	return (0);
293}
294
295int
296vn_bwrite(ap)
297	struct vop_bwrite_args *ap;
298{
299	return (bwrite(ap->a_bp));
300}
301
302/*
303 * Delayed write. (Buffer is marked dirty).
304 */
305void
306bdwrite(struct buf * bp)
307{
308
309	if ((bp->b_flags & B_BUSY) == 0) {
310		panic("bdwrite: buffer is not busy");
311	}
312	if (bp->b_flags & B_INVAL) {
313		brelse(bp);
314		return;
315	}
316	if (bp->b_flags & B_TAPE) {
317		bawrite(bp);
318		return;
319	}
320	bp->b_flags &= ~B_READ;
321	vfs_dirty_pages(bp);
322	if ((bp->b_flags & B_DELWRI) == 0) {
323		if (curproc)
324			++curproc->p_stats->p_ru.ru_oublock;
325		bp->b_flags |= B_DONE | B_DELWRI;
326		reassignbuf(bp, bp->b_vp);
327	}
328	brelse(bp);
329	return;
330}
331
332/*
333 * Asynchronous write.
334 * Start output on a buffer, but do not wait for it to complete.
335 * The buffer is released when the output completes.
336 */
337void
338bawrite(struct buf * bp)
339{
340#ifdef EVILFORNOW
341	/*
342	 * #ifdef EXTRA_DEADLOCKS is appropriate for this code for now :-)
343	 */
344	if (((bp->b_flags & B_DELWRI) == 0) && (bp->b_vp->v_numoutput > 24)) {
345		int s = splbio();
346
347		while (bp->b_vp->v_numoutput > 16) {
348			bp->b_vp->v_flag |= VBWAIT;
349			tsleep((caddr_t) &bp->b_vp->v_numoutput, PRIBIO, "bawnmo", 0);
350		}
351		splx(s);
352	}
353#endif
354	bp->b_flags |= B_ASYNC;
355	(void) bwrite(bp);
356}
357
358/*
359 * Release a buffer.
360 */
361void
362brelse(struct buf * bp)
363{
364	int s;
365
366	if (bp->b_flags & B_CLUSTER) {
367		relpbuf(bp);
368		return;
369	}
370	/* anyone need a "free" block? */
371	s = splbio();
372
373	if (needsbuffer) {
374		needsbuffer = 0;
375		wakeup((caddr_t) &needsbuffer);
376	}
377
378	/* anyone need this block? */
379	if (bp->b_flags & B_WANTED) {
380		bp->b_flags &= ~(B_PDWANTED | B_WANTED | B_AGE);
381		wakeup((caddr_t) bp);
382	} else if (bp->b_flags & B_VMIO) {
383		bp->b_flags &= ~(B_WANTED | B_PDWANTED);
384		wakeup((caddr_t) bp);
385	}
386	if (bp->b_flags & B_LOCKED)
387		bp->b_flags &= ~B_ERROR;
388
389	if ((bp->b_flags & (B_NOCACHE | B_INVAL | B_ERROR)) ||
390	    (bp->b_bufsize <= 0)) {
391		bp->b_flags |= B_INVAL;
392		bp->b_flags &= ~(B_DELWRI | B_CACHE);
393		if (((bp->b_flags & B_VMIO) == 0) && bp->b_vp)
394			brelvp(bp);
395	}
396
397	/*
398	 * VMIO buffer rundown.  It is not very necessary to keep a VMIO buffer
399	 * constituted, so the B_INVAL flag is used to *invalidate* the buffer,
400	 * but the VM object is kept around.  The B_NOCACHE flag is used to
401	 * invalidate the pages in the VM object.
402	 */
403	if (bp->b_flags & B_VMIO) {
404		vm_offset_t foff;
405		vm_object_t obj;
406		int i, resid;
407		vm_page_t m;
408		int iototal = bp->b_bufsize;
409
410		foff = 0;
411		obj = 0;
412		if (bp->b_npages) {
413			if (bp->b_vp && bp->b_vp->v_mount) {
414				foff = bp->b_vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno;
415			} else {
416				/*
417				 * vnode pointer has been ripped away --
418				 * probably file gone...
419				 */
420				foff = bp->b_pages[0]->offset;
421			}
422		}
423		for (i = 0; i < bp->b_npages; i++) {
424			m = bp->b_pages[i];
425			if (m == bogus_page) {
426				panic("brelse: bogus page found");
427			}
428			resid = (m->offset + PAGE_SIZE) - foff;
429			if (resid > iototal)
430				resid = iototal;
431			if (resid > 0) {
432				if (bp->b_flags & (B_ERROR | B_NOCACHE)) {
433					vm_page_set_invalid(m, foff, resid);
434				} else if ((bp->b_flags & B_DELWRI) == 0) {
435					vm_page_set_clean(m, foff, resid);
436					vm_page_set_valid(m, foff, resid);
437				}
438			} else {
439				vm_page_test_dirty(m);
440			}
441			if (bp->b_flags & B_INVAL) {
442				if (m->bmapped == 0) {
443					panic("brelse: bmapped is zero for page\n");
444				}
445				--m->bmapped;
446				if (m->bmapped == 0) {
447					PAGE_WAKEUP(m);
448					if (m->valid == 0) {
449						pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE);
450						vm_page_free(m);
451					} else if ((m->dirty & m->valid) == 0 &&
452						(m->flags & PG_REFERENCED) == 0 &&
453							!pmap_is_referenced(VM_PAGE_TO_PHYS(m)))
454						vm_page_cache(m);
455					else if( (m->flags & PG_ACTIVE) == 0)
456						vm_page_activate(m);
457				}
458			}
459			foff += resid;
460			iototal -= resid;
461		}
462
463		if (bp->b_flags & B_INVAL) {
464			bufspace -= bp->b_bufsize;
465			pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages);
466			bp->b_npages = 0;
467			bp->b_bufsize = 0;
468			bp->b_flags &= ~B_VMIO;
469			if (bp->b_vp)
470				brelvp(bp);
471			--nvmio;
472		}
473	}
474	if (bp->b_qindex != QUEUE_NONE)
475		panic("brelse: free buffer onto another queue???");
476
477	/* enqueue */
478	/* buffers with no memory */
479	if (bp->b_bufsize == 0) {
480		bp->b_qindex = QUEUE_EMPTY;
481		TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist);
482		LIST_REMOVE(bp, b_hash);
483		LIST_INSERT_HEAD(&invalhash, bp, b_hash);
484		bp->b_dev = NODEV;
485		/* buffers with junk contents */
486	} else if (bp->b_flags & (B_ERROR | B_INVAL | B_NOCACHE)) {
487		bp->b_qindex = QUEUE_AGE;
488		TAILQ_INSERT_HEAD(&bufqueues[QUEUE_AGE], bp, b_freelist);
489		LIST_REMOVE(bp, b_hash);
490		LIST_INSERT_HEAD(&invalhash, bp, b_hash);
491		bp->b_dev = NODEV;
492		/* buffers that are locked */
493	} else if (bp->b_flags & B_LOCKED) {
494		bp->b_qindex = QUEUE_LOCKED;
495		TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LOCKED], bp, b_freelist);
496		/* buffers with stale but valid contents */
497	} else if (bp->b_flags & B_AGE) {
498		bp->b_qindex = QUEUE_AGE;
499		TAILQ_INSERT_TAIL(&bufqueues[QUEUE_AGE], bp, b_freelist);
500		/* buffers with valid and quite potentially reuseable contents */
501	} else {
502		if (bp->b_flags & B_VMIO)
503			bp->b_qindex = QUEUE_VMIO;
504		else {
505			bp->b_qindex = QUEUE_LRU;
506			++nlru;
507		}
508		TAILQ_INSERT_TAIL(&bufqueues[bp->b_qindex], bp, b_freelist);
509	}
510
511	/* unlock */
512	bp->b_flags &= ~(B_PDWANTED | B_WANTED | B_BUSY | B_ASYNC | B_NOCACHE | B_AGE);
513	splx(s);
514}
515
516/*
517 * this routine implements clustered async writes for
518 * clearing out B_DELWRI buffers...  This is much better
519 * than the old way of writing only one buffer at a time.
520 */
521void
522vfs_bio_awrite(struct buf * bp)
523{
524	int i;
525	daddr_t lblkno = bp->b_lblkno;
526	struct vnode *vp = bp->b_vp;
527	int s;
528	int ncl;
529	struct buf *bpa;
530
531	s = splbio();
532	if( vp->v_mount && (vp->v_flag & VVMIO) &&
533	    	(bp->b_flags & (B_CLUSTEROK | B_INVAL)) == B_CLUSTEROK) {
534		int size = vp->v_mount->mnt_stat.f_iosize;
535
536		for (i = 1; i < MAXPHYS / size; i++) {
537			if ((bpa = incore(vp, lblkno + i)) &&
538			    ((bpa->b_flags & (B_BUSY | B_DELWRI | B_BUSY | B_CLUSTEROK | B_INVAL)) == B_DELWRI | B_CLUSTEROK) &&
539			    (bpa->b_bufsize == size)) {
540				if ((bpa->b_blkno == bpa->b_lblkno) ||
541				    (bpa->b_blkno != bp->b_blkno + (i * size) / DEV_BSIZE))
542					break;
543			} else {
544				break;
545			}
546		}
547		ncl = i;
548		/*
549		 * this is a possible cluster write
550		 */
551		if (ncl != 1) {
552			cluster_wbuild(vp, NULL, size, lblkno, ncl, -1);
553			splx(s);
554			return;
555		}
556	}
557	/*
558	 * default (old) behavior, writing out only one block
559	 */
560	bremfree(bp);
561	bp->b_flags |= B_BUSY | B_ASYNC;
562	bwrite(bp);
563	splx(s);
564}
565
566
567/*
568 * Find a buffer header which is available for use.
569 */
570struct buf *
571getnewbuf(int slpflag, int slptimeo, int doingvmio)
572{
573	struct buf *bp;
574	int s;
575	int firstbp = 1;
576
577	s = splbio();
578start:
579	if (bufspace >= maxbufspace)
580		goto trytofreespace;
581
582	/* can we constitute a new buffer? */
583	if ((bp = bufqueues[QUEUE_EMPTY].tqh_first)) {
584		if (bp->b_qindex != QUEUE_EMPTY)
585			panic("getnewbuf: inconsistent EMPTY queue");
586		bremfree(bp);
587		goto fillbuf;
588	}
589trytofreespace:
590	/*
591	 * we keep the file I/O from hogging metadata I/O
592	 */
593	if (bp = bufqueues[QUEUE_AGE].tqh_first) {
594		if (bp->b_qindex != QUEUE_AGE)
595			panic("getnewbuf: inconsistent AGE queue");
596	} else if ((nvmio > (nbuf / 2))
597	    && (bp = bufqueues[QUEUE_VMIO].tqh_first)) {
598		if (bp->b_qindex != QUEUE_VMIO)
599			panic("getnewbuf: inconsistent VMIO queue");
600	} else if ((!doingvmio || (nlru > (nbuf / 2))) &&
601	    (bp = bufqueues[QUEUE_LRU].tqh_first)) {
602		if (bp->b_qindex != QUEUE_LRU)
603			panic("getnewbuf: inconsistent LRU queue");
604	}
605	if (!bp) {
606		if (doingvmio) {
607			if (bp = bufqueues[QUEUE_VMIO].tqh_first) {
608				if (bp->b_qindex != QUEUE_VMIO)
609					panic("getnewbuf: inconsistent VMIO queue");
610			} else if (bp = bufqueues[QUEUE_LRU].tqh_first) {
611				if (bp->b_qindex != QUEUE_LRU)
612					panic("getnewbuf: inconsistent LRU queue");
613			}
614		} else {
615			if (bp = bufqueues[QUEUE_LRU].tqh_first) {
616				if (bp->b_qindex != QUEUE_LRU)
617					panic("getnewbuf: inconsistent LRU queue");
618			} else if (bp = bufqueues[QUEUE_VMIO].tqh_first) {
619				if (bp->b_qindex != QUEUE_VMIO)
620					panic("getnewbuf: inconsistent VMIO queue");
621			}
622		}
623	}
624	if (!bp) {
625		/* wait for a free buffer of any kind */
626		needsbuffer = 1;
627		tsleep((caddr_t) &needsbuffer, PRIBIO | slpflag, "newbuf", slptimeo);
628		splx(s);
629		return (0);
630	}
631	/* if we are a delayed write, convert to an async write */
632	if ((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) {
633		vfs_bio_awrite(bp);
634		if (!slpflag && !slptimeo) {
635			splx(s);
636			return (0);
637		}
638		goto start;
639	}
640	bremfree(bp);
641
642	if (bp->b_flags & B_VMIO) {
643		bp->b_flags |= B_INVAL | B_BUSY;
644		brelse(bp);
645		bremfree(bp);
646	}
647	if (bp->b_vp)
648		brelvp(bp);
649
650	/* we are not free, nor do we contain interesting data */
651	if (bp->b_rcred != NOCRED)
652		crfree(bp->b_rcred);
653	if (bp->b_wcred != NOCRED)
654		crfree(bp->b_wcred);
655fillbuf:
656	bp->b_flags |= B_BUSY;
657	LIST_REMOVE(bp, b_hash);
658	LIST_INSERT_HEAD(&invalhash, bp, b_hash);
659	splx(s);
660	if (bp->b_bufsize) {
661		allocbuf(bp, 0, 0);
662	}
663	bp->b_flags = B_BUSY;
664	bp->b_dev = NODEV;
665	bp->b_vp = NULL;
666	bp->b_blkno = bp->b_lblkno = 0;
667	bp->b_iodone = 0;
668	bp->b_error = 0;
669	bp->b_resid = 0;
670	bp->b_bcount = 0;
671	bp->b_npages = 0;
672	bp->b_wcred = bp->b_rcred = NOCRED;
673	bp->b_data = buffers_kva + (bp - buf) * MAXBSIZE;
674	bp->b_dirtyoff = bp->b_dirtyend = 0;
675	bp->b_validoff = bp->b_validend = 0;
676	if (bufspace >= maxbufspace) {
677		s = splbio();
678		bp->b_flags |= B_INVAL;
679		brelse(bp);
680		goto trytofreespace;
681	}
682	return (bp);
683}
684
685/*
686 * Check to see if a block is currently memory resident.
687 */
688struct buf *
689incore(struct vnode * vp, daddr_t blkno)
690{
691	struct buf *bp;
692	struct bufhashhdr *bh;
693
694	int s = splbio();
695
696	bh = BUFHASH(vp, blkno);
697	bp = bh->lh_first;
698
699	/* Search hash chain */
700	while (bp) {
701		/* hit */
702		if (bp->b_lblkno == blkno && bp->b_vp == vp
703		    && (bp->b_flags & B_INVAL) == 0) {
704			splx(s);
705			return (bp);
706		}
707		bp = bp->b_hash.le_next;
708	}
709	splx(s);
710
711	return (0);
712}
713
714/*
715 * Returns true if no I/O is needed to access the
716 * associated VM object.  This is like incore except
717 * it also hunts around in the VM system for the data.
718 */
719
720int
721inmem(struct vnode * vp, daddr_t blkno)
722{
723	vm_object_t obj;
724	vm_offset_t off, toff, tinc;
725	vm_page_t m;
726
727	if (incore(vp, blkno))
728		return 1;
729	if (vp->v_mount == 0)
730		return 0;
731	if ((vp->v_vmdata == 0) || (vp->v_flag & VVMIO) == 0)
732		return 0;
733
734	obj = (vm_object_t) vp->v_vmdata;
735	tinc = PAGE_SIZE;
736	if (tinc > vp->v_mount->mnt_stat.f_iosize)
737		tinc = vp->v_mount->mnt_stat.f_iosize;
738	off = blkno * vp->v_mount->mnt_stat.f_iosize;
739
740	for (toff = 0; toff < vp->v_mount->mnt_stat.f_iosize; toff += tinc) {
741		int mask;
742
743		m = vm_page_lookup(obj, trunc_page(toff + off));
744		if (!m)
745			return 0;
746		if (vm_page_is_valid(m, toff + off, tinc) == 0)
747			return 0;
748	}
749	return 1;
750}
751
752/*
753 * Get a block given a specified block and offset into a file/device.
754 */
755struct buf *
756getblk(struct vnode * vp, daddr_t blkno, int size, int slpflag, int slptimeo)
757{
758	struct buf *bp;
759	int s;
760	struct bufhashhdr *bh;
761	vm_offset_t off;
762	int nleft;
763
764	s = splbio();
765loop:
766	if ((cnt.v_free_count + cnt.v_cache_count) <
767	    cnt.v_free_reserved + MAXBSIZE / PAGE_SIZE)
768		wakeup((caddr_t) &vm_pages_needed);
769	if (bp = incore(vp, blkno)) {
770		if (bp->b_flags & B_BUSY) {
771			bp->b_flags |= B_WANTED;
772			if (curproc == pageproc) {
773				bp->b_flags |= B_PDWANTED;
774				wakeup((caddr_t) &cnt.v_free_count);
775			}
776			if (!tsleep((caddr_t) bp, PRIBIO | slpflag, "getblk", slptimeo))
777				goto loop;
778			splx(s);
779			return (struct buf *) NULL;
780		}
781		bp->b_flags |= B_BUSY | B_CACHE;
782		bremfree(bp);
783		/*
784		 * check for size inconsistancies
785		 */
786		if (bp->b_bcount != size) {
787#if defined(VFS_BIO_DEBUG)
788			printf("getblk: invalid buffer size: %ld\n", bp->b_bcount);
789#endif
790			bp->b_flags |= B_INVAL;
791			bwrite(bp);
792			goto loop;
793		}
794		splx(s);
795		return (bp);
796	} else {
797		vm_object_t obj;
798		int doingvmio;
799
800		if ((obj = (vm_object_t) vp->v_vmdata) && (vp->v_flag & VVMIO)) {
801			doingvmio = 1;
802		} else {
803			doingvmio = 0;
804		}
805		if ((bp = getnewbuf(slpflag, slptimeo, doingvmio)) == 0) {
806			if (slpflag || slptimeo)
807				return NULL;
808			goto loop;
809		}
810		if (incore(vp, blkno)) {
811			bp->b_flags |= B_INVAL;
812			brelse(bp);
813			goto loop;
814		}
815		bp->b_blkno = bp->b_lblkno = blkno;
816		bgetvp(vp, bp);
817		LIST_REMOVE(bp, b_hash);
818		bh = BUFHASH(vp, blkno);
819		LIST_INSERT_HEAD(bh, bp, b_hash);
820		if (doingvmio) {
821			bp->b_flags |= (B_VMIO | B_CACHE);
822#if defined(VFS_BIO_DEBUG)
823			if (vp->v_type != VREG)
824				printf("getblk: vmioing file type %d???\n", vp->v_type);
825#endif
826			++nvmio;
827		} else {
828			if (bp->b_flags & B_VMIO)
829				--nvmio;
830			bp->b_flags &= ~B_VMIO;
831		}
832		splx(s);
833		if (!allocbuf(bp, size, 1)) {
834			s = splbio();
835			goto loop;
836		}
837		return (bp);
838	}
839}
840
841/*
842 * Get an empty, disassociated buffer of given size.
843 */
844struct buf *
845geteblk(int size)
846{
847	struct buf *bp;
848
849	while ((bp = getnewbuf(0, 0, 0)) == 0);
850	allocbuf(bp, size, 0);
851	bp->b_flags |= B_INVAL;
852	return (bp);
853}
854
855/*
856 * This code constitutes the buffer memory from either anonymous system
857 * memory (in the case of non-VMIO operations) or from an associated
858 * VM object (in the case of VMIO operations).
859 *
860 * Note that this code is tricky, and has many complications to resolve
861 * deadlock or inconsistant data situations.  Tread lightly!!!
862 *
863 * Modify the length of a buffer's underlying buffer storage without
864 * destroying information (unless, of course the buffer is shrinking).
865 */
866int
867allocbuf(struct buf * bp, int size, int vmio)
868{
869
870	int s;
871	int newbsize, mbsize;
872	int i;
873
874	if ((bp->b_flags & B_VMIO) == 0) {
875		/*
876		 * Just get anonymous memory from the kernel
877		 */
878		mbsize = ((size + DEV_BSIZE - 1) / DEV_BSIZE) * DEV_BSIZE;
879		newbsize = round_page(size);
880
881		if (newbsize == bp->b_bufsize) {
882			bp->b_bcount = size;
883			return 1;
884		} else if (newbsize < bp->b_bufsize) {
885			vm_hold_free_pages(
886			    bp,
887			    (vm_offset_t) bp->b_data + newbsize,
888			    (vm_offset_t) bp->b_data + bp->b_bufsize);
889			bufspace -= (bp->b_bufsize - newbsize);
890		} else if (newbsize > bp->b_bufsize) {
891			vm_hold_load_pages(
892			    bp,
893			    (vm_offset_t) bp->b_data + bp->b_bufsize,
894			    (vm_offset_t) bp->b_data + newbsize);
895			bufspace += (newbsize - bp->b_bufsize);
896		}
897	} else {
898		vm_page_t m;
899		int desiredpages;
900
901		newbsize = ((size + DEV_BSIZE - 1) / DEV_BSIZE) * DEV_BSIZE;
902		desiredpages = round_page(newbsize) / PAGE_SIZE;
903
904		if (newbsize == bp->b_bufsize) {
905			bp->b_bcount = size;
906			return 1;
907		} else if (newbsize < bp->b_bufsize) {
908			if (desiredpages < bp->b_npages) {
909				pmap_qremove((vm_offset_t) trunc_page(bp->b_data) +
910				    desiredpages * PAGE_SIZE, (bp->b_npages - desiredpages));
911				for (i = desiredpages; i < bp->b_npages; i++) {
912					m = bp->b_pages[i];
913					s = splhigh();
914					while ((m->flags & PG_BUSY) || (m->busy != 0)) {
915						m->flags |= PG_WANTED;
916						tsleep(m, PVM, "biodep", 0);
917					}
918					splx(s);
919
920					if (m->bmapped == 0) {
921						printf("allocbuf: bmapped is zero for page %d\n", i);
922						panic("allocbuf: error");
923					}
924					--m->bmapped;
925					if (m->bmapped == 0) {
926						PAGE_WAKEUP(m);
927						if (m->valid == 0) {
928							pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE);
929							vm_page_free(m);
930						}
931					}
932					bp->b_pages[i] = NULL;
933				}
934				bp->b_npages = desiredpages;
935				bufspace -= (bp->b_bufsize - newbsize);
936			}
937		} else {
938			vm_object_t obj;
939			vm_offset_t tinc, off, toff, objoff;
940			int pageindex, curbpnpages;
941			struct vnode *vp;
942			int bsize;
943
944			vp = bp->b_vp;
945			bsize = vp->v_mount->mnt_stat.f_iosize;
946
947			if (bp->b_npages < desiredpages) {
948				obj = (vm_object_t) vp->v_vmdata;
949				tinc = PAGE_SIZE;
950				if (tinc > bsize)
951					tinc = bsize;
952				off = bp->b_lblkno * bsize;
953				curbpnpages = bp->b_npages;
954		doretry:
955				for (toff = 0; toff < newbsize; toff += tinc) {
956					int mask;
957					int bytesinpage;
958
959					pageindex = toff / PAGE_SIZE;
960					objoff = trunc_page(toff + off);
961					if (pageindex < curbpnpages) {
962						int pb;
963
964						m = bp->b_pages[pageindex];
965						if (m->offset != objoff)
966							panic("allocbuf: page changed offset??!!!?");
967						bytesinpage = tinc;
968						if (tinc > (newbsize - toff))
969							bytesinpage = newbsize - toff;
970						if (!vm_page_is_valid(m, toff + off, bytesinpage)) {
971							bp->b_flags &= ~B_CACHE;
972						}
973						if ((m->flags & PG_ACTIVE) == 0)
974							vm_page_activate(m);
975						continue;
976					}
977					m = vm_page_lookup(obj, objoff);
978					if (!m) {
979						m = vm_page_alloc(obj, objoff, 0);
980						if (!m) {
981							int j;
982
983							for (j = bp->b_npages; j < pageindex; j++) {
984								vm_page_t mt = bp->b_pages[j];
985
986								PAGE_WAKEUP(mt);
987								if (mt->valid == 0 && mt->bmapped == 0) {
988									vm_page_free(mt);
989								}
990							}
991							VM_WAIT;
992							if (vmio && (bp->b_flags & B_PDWANTED)) {
993								bp->b_flags |= B_INVAL;
994								brelse(bp);
995								return 0;
996							}
997							curbpnpages = bp->b_npages;
998							goto doretry;
999						}
1000						m->valid = 0;
1001						vm_page_activate(m);
1002					} else if ((m->valid == 0) || (m->flags & PG_BUSY)) {
1003						int j;
1004						int bufferdestroyed = 0;
1005
1006						for (j = bp->b_npages; j < pageindex; j++) {
1007							vm_page_t mt = bp->b_pages[j];
1008
1009							PAGE_WAKEUP(mt);
1010							if (mt->valid == 0 && mt->bmapped == 0) {
1011								vm_page_free(mt);
1012							}
1013						}
1014						if (vmio && (bp->b_flags & B_PDWANTED)) {
1015							bp->b_flags |= B_INVAL;
1016							brelse(bp);
1017							VM_WAIT;
1018							bufferdestroyed = 1;
1019						}
1020						s = splbio();
1021						if (m->flags & PG_BUSY) {
1022							m->flags |= PG_WANTED;
1023							tsleep(m, PRIBIO, "pgtblk", 0);
1024						} else if( m->valid == 0 && m->bmapped == 0) {
1025							vm_page_free(m);
1026						}
1027						splx(s);
1028						if (bufferdestroyed)
1029							return 0;
1030						curbpnpages = bp->b_npages;
1031						goto doretry;
1032					} else {
1033						int pb;
1034
1035						if ((m->flags & PG_CACHE) &&
1036						    (cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_reserved) {
1037							int j;
1038
1039							for (j = bp->b_npages; j < pageindex; j++) {
1040								vm_page_t mt = bp->b_pages[j];
1041
1042								PAGE_WAKEUP(mt);
1043								if (mt->valid == 0 && mt->bmapped == 0) {
1044									vm_page_free(mt);
1045								}
1046							}
1047							VM_WAIT;
1048							if (vmio && (bp->b_flags & B_PDWANTED)) {
1049								bp->b_flags |= B_INVAL;
1050								brelse(bp);
1051								return 0;
1052							}
1053							curbpnpages = bp->b_npages;
1054							goto doretry;
1055						}
1056						bytesinpage = tinc;
1057						if (tinc > (newbsize - toff))
1058							bytesinpage = newbsize - toff;
1059						if (!vm_page_is_valid(m, toff + off, bytesinpage)) {
1060							bp->b_flags &= ~B_CACHE;
1061						}
1062						if ((m->flags & PG_ACTIVE) == 0)
1063							vm_page_activate(m);
1064						m->flags |= PG_BUSY;
1065					}
1066					bp->b_pages[pageindex] = m;
1067					curbpnpages = pageindex + 1;
1068				}
1069				if (bsize >= PAGE_SIZE) {
1070					for (i = bp->b_npages; i < curbpnpages; i++) {
1071						m = bp->b_pages[i];
1072						if (m->valid == 0) {
1073							bp->b_flags &= ~B_CACHE;
1074						}
1075						m->bmapped++;
1076						PAGE_WAKEUP(m);
1077					}
1078#if 0
1079					if( bp->b_flags & B_CACHE) {
1080						for (i = bp->b_npages; i < curbpnpages; i++) {
1081							bp->b_pages[i]->flags |= PG_REFERENCED;
1082						}
1083					}
1084#endif
1085				} else {
1086					if (!vm_page_is_valid(bp->b_pages[0], off, bsize))
1087						bp->b_flags &= ~B_CACHE;
1088					bp->b_pages[0]->bmapped++;
1089					PAGE_WAKEUP(bp->b_pages[0]);
1090				}
1091				bp->b_npages = curbpnpages;
1092				bp->b_data = buffers_kva + (bp - buf) * MAXBSIZE;
1093				pmap_qenter((vm_offset_t) bp->b_data, bp->b_pages, bp->b_npages);
1094				bp->b_data += off % PAGE_SIZE;
1095			}
1096			bufspace += (newbsize - bp->b_bufsize);
1097		}
1098	}
1099	bp->b_bufsize = newbsize;
1100	bp->b_bcount = size;
1101	return 1;
1102}
1103
1104/*
1105 * Wait for buffer I/O completion, returning error status.
1106 */
1107int
1108biowait(register struct buf * bp)
1109{
1110	int s;
1111
1112	s = splbio();
1113	while ((bp->b_flags & B_DONE) == 0)
1114		tsleep((caddr_t) bp, PRIBIO, "biowait", 0);
1115	if ((bp->b_flags & B_ERROR) || bp->b_error) {
1116		if ((bp->b_flags & B_INVAL) == 0) {
1117			bp->b_flags |= B_INVAL;
1118			bp->b_dev = NODEV;
1119			LIST_REMOVE(bp, b_hash);
1120			LIST_INSERT_HEAD(&invalhash, bp, b_hash);
1121			wakeup((caddr_t) bp);
1122		}
1123		if (!bp->b_error)
1124			bp->b_error = EIO;
1125		else
1126			bp->b_flags |= B_ERROR;
1127		splx(s);
1128		return (bp->b_error);
1129	} else {
1130		splx(s);
1131		return (0);
1132	}
1133}
1134
1135/*
1136 * Finish I/O on a buffer, calling an optional function.
1137 * This is usually called from interrupt level, so process blocking
1138 * is not *a good idea*.
1139 */
1140void
1141biodone(register struct buf * bp)
1142{
1143	int s;
1144
1145	s = splbio();
1146	if (bp->b_flags & B_DONE)
1147		printf("biodone: buffer already done\n");
1148	bp->b_flags |= B_DONE;
1149
1150	if ((bp->b_flags & B_READ) == 0) {
1151		vwakeup(bp);
1152	}
1153#ifdef BOUNCE_BUFFERS
1154	if (bp->b_flags & B_BOUNCE)
1155		vm_bounce_free(bp);
1156#endif
1157
1158	/* call optional completion function if requested */
1159	if (bp->b_flags & B_CALL) {
1160		bp->b_flags &= ~B_CALL;
1161		(*bp->b_iodone) (bp);
1162		splx(s);
1163		return;
1164	}
1165	if (bp->b_flags & B_VMIO) {
1166		int i, resid;
1167		vm_offset_t foff;
1168		vm_page_t m;
1169		vm_object_t obj;
1170		int iosize;
1171		struct vnode *vp = bp->b_vp;
1172
1173		foff = vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno;
1174		obj = (vm_object_t) vp->v_vmdata;
1175		if (!obj) {
1176			return;
1177		}
1178#if defined(VFS_BIO_DEBUG)
1179		if (obj->paging_in_progress < bp->b_npages) {
1180			printf("biodone: paging in progress(%d) < bp->b_npages(%d)\n",
1181			    obj->paging_in_progress, bp->b_npages);
1182		}
1183#endif
1184		iosize = bp->b_bufsize;
1185		for (i = 0; i < bp->b_npages; i++) {
1186			m = bp->b_pages[i];
1187			if (m == bogus_page) {
1188				m = vm_page_lookup(obj, foff);
1189				if (!m) {
1190#if defined(VFS_BIO_DEBUG)
1191					printf("biodone: page disappeared\n");
1192#endif
1193					--obj->paging_in_progress;
1194					continue;
1195				}
1196				bp->b_pages[i] = m;
1197				pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages);
1198			}
1199#if defined(VFS_BIO_DEBUG)
1200			if (trunc_page(foff) != m->offset) {
1201				printf("biodone: foff(%d)/m->offset(%d) mismatch\n", foff, m->offset);
1202			}
1203#endif
1204			resid = (m->offset + PAGE_SIZE) - foff;
1205			if (resid > iosize)
1206				resid = iosize;
1207			if (resid > 0) {
1208				vm_page_set_valid(m, foff, resid);
1209				vm_page_set_clean(m, foff, resid);
1210			}
1211
1212			/*
1213			 * when debugging new filesystems or buffer I/O methods, this
1214			 * is the most common error that pops up.  if you see this, you
1215			 * have not set the page busy flag correctly!!!
1216			 */
1217			if (m->busy == 0) {
1218				printf("biodone: page busy < 0, off: %d, foff: %d, resid: %d, index: %d\n",
1219				    m->offset, foff, resid, i);
1220				printf(" iosize: %d, lblkno: %d\n",
1221				    bp->b_vp->v_mount->mnt_stat.f_iosize, bp->b_lblkno);
1222				printf(" valid: 0x%x, dirty: 0x%x, mapped: %d\n",
1223				    m->valid, m->dirty, m->bmapped);
1224				panic("biodone: page busy < 0\n");
1225			}
1226			--m->busy;
1227			PAGE_WAKEUP(m);
1228			--obj->paging_in_progress;
1229			foff += resid;
1230			iosize -= resid;
1231		}
1232		if (obj && obj->paging_in_progress == 0 &&
1233		    (obj->flags & OBJ_PIPWNT)) {
1234			obj->flags &= ~OBJ_PIPWNT;
1235			wakeup((caddr_t) obj);
1236		}
1237	}
1238	/*
1239	 * For asynchronous completions, release the buffer now. The brelse
1240	 * checks for B_WANTED and will do the wakeup there if necessary - so
1241	 * no need to do a wakeup here in the async case.
1242	 */
1243
1244	if (bp->b_flags & B_ASYNC) {
1245		brelse(bp);
1246	} else {
1247		bp->b_flags &= ~(B_WANTED | B_PDWANTED);
1248		wakeup((caddr_t) bp);
1249	}
1250	splx(s);
1251}
1252
1253int
1254count_lock_queue()
1255{
1256	int count;
1257	struct buf *bp;
1258
1259	count = 0;
1260	for (bp = bufqueues[QUEUE_LOCKED].tqh_first;
1261	    bp != NULL;
1262	    bp = bp->b_freelist.tqe_next)
1263		count++;
1264	return (count);
1265}
1266
1267int vfs_update_interval = 30;
1268
1269void
1270vfs_update()
1271{
1272	(void) spl0();
1273	while (1) {
1274		tsleep((caddr_t) &vfs_update_wakeup, PRIBIO, "update",
1275		    hz * vfs_update_interval);
1276		vfs_update_wakeup = 0;
1277		sync(curproc, NULL, NULL);
1278	}
1279}
1280
1281/*
1282 * This routine is called in lieu of iodone in the case of
1283 * incomplete I/O.  This keeps the busy status for pages
1284 * consistant.
1285 */
1286void
1287vfs_unbusy_pages(struct buf * bp)
1288{
1289	int i;
1290
1291	if (bp->b_flags & B_VMIO) {
1292		struct vnode *vp = bp->b_vp;
1293		vm_object_t obj = (vm_object_t) vp->v_vmdata;
1294		vm_offset_t foff;
1295
1296		foff = vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno;
1297
1298		for (i = 0; i < bp->b_npages; i++) {
1299			vm_page_t m = bp->b_pages[i];
1300
1301			if (m == bogus_page) {
1302				m = vm_page_lookup(obj, foff);
1303				if (!m) {
1304					panic("vfs_unbusy_pages: page missing\n");
1305				}
1306				bp->b_pages[i] = m;
1307				pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages);
1308			}
1309			--obj->paging_in_progress;
1310			--m->busy;
1311			PAGE_WAKEUP(m);
1312		}
1313		if (obj->paging_in_progress == 0 &&
1314		    (obj->flags & OBJ_PIPWNT)) {
1315			obj->flags &= ~OBJ_PIPWNT;
1316			wakeup((caddr_t) obj);
1317		}
1318	}
1319}
1320
1321/*
1322 * This routine is called before a device strategy routine.
1323 * It is used to tell the VM system that paging I/O is in
1324 * progress, and treat the pages associated with the buffer
1325 * almost as being PG_BUSY.  Also the object paging_in_progress
1326 * flag is handled to make sure that the object doesn't become
1327 * inconsistant.
1328 */
1329void
1330vfs_busy_pages(struct buf * bp, int clear_modify)
1331{
1332	int i;
1333
1334	if (bp->b_flags & B_VMIO) {
1335		vm_object_t obj = (vm_object_t) bp->b_vp->v_vmdata;
1336		vm_offset_t foff = bp->b_vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno;
1337		int iocount = bp->b_bufsize;
1338
1339		for (i = 0; i < bp->b_npages; i++) {
1340			vm_page_t m = bp->b_pages[i];
1341			int resid = (m->offset + PAGE_SIZE) - foff;
1342
1343			if (resid > iocount)
1344				resid = iocount;
1345			obj->paging_in_progress++;
1346			m->busy++;
1347			if (clear_modify) {
1348				vm_page_test_dirty(m);
1349				pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_READ);
1350			} else if (bp->b_bcount >= PAGE_SIZE) {
1351				if (m->valid && (bp->b_flags & B_CACHE) == 0) {
1352					bp->b_pages[i] = bogus_page;
1353					pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages);
1354				}
1355			}
1356			foff += resid;
1357			iocount -= resid;
1358		}
1359	}
1360}
1361
1362/*
1363 * Tell the VM system that the pages associated with this buffer
1364 * are dirty.  This is in case of the unlikely circumstance that
1365 * a buffer has to be destroyed before it is flushed.
1366 */
1367void
1368vfs_dirty_pages(struct buf * bp)
1369{
1370	int i;
1371
1372	if (bp->b_flags & B_VMIO) {
1373		vm_offset_t foff = bp->b_vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno;
1374		int iocount = bp->b_bufsize;
1375
1376		for (i = 0; i < bp->b_npages; i++) {
1377			vm_page_t m = bp->b_pages[i];
1378			int resid = (m->offset + PAGE_SIZE) - foff;
1379
1380			if (resid > iocount)
1381				resid = iocount;
1382			if (resid > 0) {
1383				vm_page_set_valid(m, foff, resid);
1384				vm_page_set_dirty(m, foff, resid);
1385			}
1386			PAGE_WAKEUP(m);
1387			foff += resid;
1388			iocount -= resid;
1389		}
1390	}
1391}
1392/*
1393 * vm_hold_load_pages and vm_hold_unload pages get pages into
1394 * a buffers address space.  The pages are anonymous and are
1395 * not associated with a file object.
1396 */
1397void
1398vm_hold_load_pages(struct buf * bp, vm_offset_t froma, vm_offset_t toa)
1399{
1400	vm_offset_t pg;
1401	vm_page_t p;
1402	vm_offset_t from = round_page(froma);
1403	vm_offset_t to = round_page(toa);
1404
1405tryagain0:
1406	if ((curproc != pageproc) && ((cnt.v_free_count + cnt.v_cache_count) <=
1407		cnt.v_free_reserved + (toa - froma) / PAGE_SIZE)) {
1408		VM_WAIT;
1409		goto tryagain0;
1410	}
1411	for (pg = from; pg < to; pg += PAGE_SIZE) {
1412
1413tryagain:
1414
1415		p = vm_page_alloc(kernel_object, pg - VM_MIN_KERNEL_ADDRESS, 0);
1416		if (!p) {
1417			VM_WAIT;
1418			goto tryagain;
1419		}
1420		vm_page_wire(p);
1421		pmap_kenter(pg, VM_PAGE_TO_PHYS(p));
1422		bp->b_pages[((caddr_t) pg - bp->b_data) / PAGE_SIZE] = p;
1423		PAGE_WAKEUP(p);
1424		bp->b_npages++;
1425	}
1426}
1427
1428void
1429vm_hold_free_pages(struct buf * bp, vm_offset_t froma, vm_offset_t toa)
1430{
1431	vm_offset_t pg;
1432	vm_page_t p;
1433	vm_offset_t from = round_page(froma);
1434	vm_offset_t to = round_page(toa);
1435
1436	for (pg = from; pg < to; pg += PAGE_SIZE) {
1437		p = bp->b_pages[((caddr_t) pg - bp->b_data) / PAGE_SIZE];
1438		bp->b_pages[((caddr_t) pg - bp->b_data) / PAGE_SIZE] = 0;
1439		pmap_kremove(pg);
1440		vm_page_free(p);
1441		--bp->b_npages;
1442	}
1443}
1444
1445void
1446bufstats()
1447{
1448}
1449