vfs_bio.c revision 6619
1/* 2 * Copyright (c) 1994 John S. Dyson 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice immediately at the beginning of the file, without modification, 10 * this list of conditions, and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. Absolutely no warranty of function or purpose is made by the author 15 * John S. Dyson. 16 * 4. This work was done expressly for inclusion into FreeBSD. Other use 17 * is allowed if this notation is included. 18 * 5. Modifications may be freely made to this file if the above conditions 19 * are met. 20 * 21 * $Id: vfs_bio.c,v 1.28 1995/02/18 02:55:09 davidg Exp $ 22 */ 23 24/* 25 * this file contains a new buffer I/O scheme implementing a coherent 26 * VM object and buffer cache scheme. Pains have been taken to make 27 * sure that the performance degradation associated with schemes such 28 * as this is not realized. 29 * 30 * Author: John S. Dyson 31 * Significant help during the development and debugging phases 32 * had been provided by David Greenman, also of the FreeBSD core team. 33 */ 34 35#define VMIO 36#include <sys/param.h> 37#include <sys/systm.h> 38#include <sys/kernel.h> 39#include <sys/proc.h> 40#include <sys/vnode.h> 41#include <vm/vm.h> 42#include <vm/vm_pageout.h> 43#include <vm/vm_page.h> 44#include <vm/vm_object.h> 45#include <sys/buf.h> 46#include <sys/mount.h> 47#include <sys/malloc.h> 48#include <sys/resourcevar.h> 49#include <sys/proc.h> 50 51#include <miscfs/specfs/specdev.h> 52 53struct buf *buf; /* buffer header pool */ 54int nbuf; /* number of buffer headers calculated 55 * elsewhere */ 56struct swqueue bswlist; 57int nvmio, nlru; 58 59extern vm_map_t buffer_map, io_map, kernel_map, pager_map; 60 61void vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to); 62void vm_hold_load_pages(struct buf * bp, vm_offset_t from, vm_offset_t to); 63void vfs_dirty_pages(struct buf * bp); 64void vfs_busy_pages(struct buf *, int clear_modify); 65 66int needsbuffer; 67 68/* 69 * Internal update daemon, process 3 70 * The variable vfs_update_wakeup allows for internal syncs. 71 */ 72int vfs_update_wakeup; 73 74 75/* 76 * buffers base kva 77 */ 78caddr_t buffers_kva; 79 80/* 81 * bogus page -- for I/O to/from partially complete buffers 82 * this is a temporary solution to the problem, but it is not 83 * really that bad. it would be better to split the buffer 84 * for input in the case of buffers partially already in memory, 85 * but the code is intricate enough already. 86 */ 87vm_page_t bogus_page; 88vm_offset_t bogus_offset; 89 90int bufspace, maxbufspace; 91 92/* 93 * Initialize buffer headers and related structures. 94 */ 95void 96bufinit() 97{ 98 struct buf *bp; 99 int i; 100 101 TAILQ_INIT(&bswlist); 102 LIST_INIT(&invalhash); 103 104 /* first, make a null hash table */ 105 for (i = 0; i < BUFHSZ; i++) 106 LIST_INIT(&bufhashtbl[i]); 107 108 /* next, make a null set of free lists */ 109 for (i = 0; i < BUFFER_QUEUES; i++) 110 TAILQ_INIT(&bufqueues[i]); 111 112 buffers_kva = (caddr_t) kmem_alloc_pageable(buffer_map, MAXBSIZE * nbuf); 113 /* finally, initialize each buffer header and stick on empty q */ 114 for (i = 0; i < nbuf; i++) { 115 bp = &buf[i]; 116 bzero(bp, sizeof *bp); 117 bp->b_flags = B_INVAL; /* we're just an empty header */ 118 bp->b_dev = NODEV; 119 bp->b_vp = NULL; 120 bp->b_rcred = NOCRED; 121 bp->b_wcred = NOCRED; 122 bp->b_qindex = QUEUE_EMPTY; 123 bp->b_vnbufs.le_next = NOLIST; 124 bp->b_data = buffers_kva + i * MAXBSIZE; 125 TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist); 126 LIST_INSERT_HEAD(&invalhash, bp, b_hash); 127 } 128/* 129 * this will change later!!! 130 */ 131 maxbufspace = 2 * (nbuf + 8) * PAGE_SIZE; 132 133 bogus_offset = kmem_alloc_pageable(kernel_map, PAGE_SIZE); 134 bogus_page = vm_page_alloc(kernel_object, bogus_offset - VM_MIN_KERNEL_ADDRESS, 0); 135 136} 137 138/* 139 * remove the buffer from the appropriate free list 140 */ 141void 142bremfree(struct buf * bp) 143{ 144 int s = splbio(); 145 146 if (bp->b_qindex != QUEUE_NONE) { 147 if (bp->b_qindex == QUEUE_LRU) 148 --nlru; 149 TAILQ_REMOVE(&bufqueues[bp->b_qindex], bp, b_freelist); 150 bp->b_qindex = QUEUE_NONE; 151 } else { 152 panic("bremfree: removing a buffer when not on a queue"); 153 } 154 splx(s); 155} 156 157/* 158 * Get a buffer with the specified data. Look in the cache first. 159 */ 160int 161bread(struct vnode * vp, daddr_t blkno, int size, struct ucred * cred, 162 struct buf ** bpp) 163{ 164 struct buf *bp; 165 166 bp = getblk(vp, blkno, size, 0, 0); 167 *bpp = bp; 168 169 /* if not found in cache, do some I/O */ 170 if ((bp->b_flags & B_CACHE) == 0) { 171 if (curproc && curproc->p_stats) /* count block I/O */ 172 curproc->p_stats->p_ru.ru_inblock++; 173 bp->b_flags |= B_READ; 174 bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL); 175 if (bp->b_rcred == NOCRED) { 176 if (cred != NOCRED) 177 crhold(cred); 178 bp->b_rcred = cred; 179 } 180 vfs_busy_pages(bp, 0); 181 VOP_STRATEGY(bp); 182 return (biowait(bp)); 183 } else if (bp->b_lblkno == bp->b_blkno) { 184 VOP_BMAP(vp, bp->b_lblkno, (struct vnode **) 0, 185 &bp->b_blkno, (int *) 0); 186 } 187 return (0); 188} 189 190/* 191 * Operates like bread, but also starts asynchronous I/O on 192 * read-ahead blocks. 193 */ 194int 195breadn(struct vnode * vp, daddr_t blkno, int size, 196 daddr_t * rablkno, int *rabsize, 197 int cnt, struct ucred * cred, struct buf ** bpp) 198{ 199 struct buf *bp, *rabp; 200 int i; 201 int rv = 0, readwait = 0; 202 203 *bpp = bp = getblk(vp, blkno, size, 0, 0); 204 205 /* if not found in cache, do some I/O */ 206 if ((bp->b_flags & B_CACHE) == 0) { 207 if (curproc && curproc->p_stats) /* count block I/O */ 208 curproc->p_stats->p_ru.ru_inblock++; 209 bp->b_flags |= B_READ; 210 bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL); 211 if (bp->b_rcred == NOCRED) { 212 if (cred != NOCRED) 213 crhold(cred); 214 bp->b_rcred = cred; 215 } 216 vfs_busy_pages(bp, 0); 217 VOP_STRATEGY(bp); 218 ++readwait; 219 } else if (bp->b_lblkno == bp->b_blkno) { 220 VOP_BMAP(vp, bp->b_lblkno, (struct vnode **) 0, 221 &bp->b_blkno, (int *) 0); 222 } 223 for (i = 0; i < cnt; i++, rablkno++, rabsize++) { 224 if (inmem(vp, *rablkno)) 225 continue; 226 rabp = getblk(vp, *rablkno, *rabsize, 0, 0); 227 228 if ((rabp->b_flags & B_CACHE) == 0) { 229 if (curproc && curproc->p_stats) 230 curproc->p_stats->p_ru.ru_inblock++; 231 rabp->b_flags |= B_READ | B_ASYNC; 232 rabp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL); 233 if (rabp->b_rcred == NOCRED) { 234 if (cred != NOCRED) 235 crhold(cred); 236 rabp->b_rcred = cred; 237 } 238 vfs_busy_pages(rabp, 0); 239 VOP_STRATEGY(rabp); 240 } else { 241 brelse(rabp); 242 } 243 } 244 245 if (readwait) { 246 rv = biowait(bp); 247 } 248 return (rv); 249} 250 251/* 252 * Write, release buffer on completion. (Done by iodone 253 * if async.) 254 */ 255int 256bwrite(struct buf * bp) 257{ 258 int oldflags = bp->b_flags; 259 260 if (bp->b_flags & B_INVAL) { 261 brelse(bp); 262 return (0); 263 } 264 if (!(bp->b_flags & B_BUSY)) 265 panic("bwrite: buffer is not busy???"); 266 267 bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); 268 bp->b_flags |= B_WRITEINPROG; 269 270 if (oldflags & B_ASYNC) { 271 if (oldflags & B_DELWRI) { 272 reassignbuf(bp, bp->b_vp); 273 } else if (curproc) { 274 ++curproc->p_stats->p_ru.ru_oublock; 275 } 276 } 277 bp->b_vp->v_numoutput++; 278 vfs_busy_pages(bp, 1); 279 VOP_STRATEGY(bp); 280 281 if ((oldflags & B_ASYNC) == 0) { 282 int rtval = biowait(bp); 283 284 if (oldflags & B_DELWRI) { 285 reassignbuf(bp, bp->b_vp); 286 } else if (curproc) { 287 ++curproc->p_stats->p_ru.ru_oublock; 288 } 289 brelse(bp); 290 return (rtval); 291 } 292 return (0); 293} 294 295int 296vn_bwrite(ap) 297 struct vop_bwrite_args *ap; 298{ 299 return (bwrite(ap->a_bp)); 300} 301 302/* 303 * Delayed write. (Buffer is marked dirty). 304 */ 305void 306bdwrite(struct buf * bp) 307{ 308 309 if ((bp->b_flags & B_BUSY) == 0) { 310 panic("bdwrite: buffer is not busy"); 311 } 312 if (bp->b_flags & B_INVAL) { 313 brelse(bp); 314 return; 315 } 316 if (bp->b_flags & B_TAPE) { 317 bawrite(bp); 318 return; 319 } 320 bp->b_flags &= ~B_READ; 321 vfs_dirty_pages(bp); 322 if ((bp->b_flags & B_DELWRI) == 0) { 323 if (curproc) 324 ++curproc->p_stats->p_ru.ru_oublock; 325 bp->b_flags |= B_DONE | B_DELWRI; 326 reassignbuf(bp, bp->b_vp); 327 } 328 brelse(bp); 329 return; 330} 331 332/* 333 * Asynchronous write. 334 * Start output on a buffer, but do not wait for it to complete. 335 * The buffer is released when the output completes. 336 */ 337void 338bawrite(struct buf * bp) 339{ 340#ifdef EVILFORNOW 341 /* 342 * #ifdef EXTRA_DEADLOCKS is appropriate for this code for now :-) 343 */ 344 if (((bp->b_flags & B_DELWRI) == 0) && (bp->b_vp->v_numoutput > 24)) { 345 int s = splbio(); 346 347 while (bp->b_vp->v_numoutput > 16) { 348 bp->b_vp->v_flag |= VBWAIT; 349 tsleep((caddr_t) &bp->b_vp->v_numoutput, PRIBIO, "bawnmo", 0); 350 } 351 splx(s); 352 } 353#endif 354 bp->b_flags |= B_ASYNC; 355 (void) bwrite(bp); 356} 357 358/* 359 * Release a buffer. 360 */ 361void 362brelse(struct buf * bp) 363{ 364 int s; 365 366 if (bp->b_flags & B_CLUSTER) { 367 relpbuf(bp); 368 return; 369 } 370 /* anyone need a "free" block? */ 371 s = splbio(); 372 373 if (needsbuffer) { 374 needsbuffer = 0; 375 wakeup((caddr_t) &needsbuffer); 376 } 377 378 /* anyone need this block? */ 379 if (bp->b_flags & B_WANTED) { 380 bp->b_flags &= ~(B_PDWANTED | B_WANTED | B_AGE); 381 wakeup((caddr_t) bp); 382 } else if (bp->b_flags & B_VMIO) { 383 bp->b_flags &= ~(B_WANTED | B_PDWANTED); 384 wakeup((caddr_t) bp); 385 } 386 if (bp->b_flags & B_LOCKED) 387 bp->b_flags &= ~B_ERROR; 388 389 if ((bp->b_flags & (B_NOCACHE | B_INVAL | B_ERROR)) || 390 (bp->b_bufsize <= 0)) { 391 bp->b_flags |= B_INVAL; 392 bp->b_flags &= ~(B_DELWRI | B_CACHE); 393 if (((bp->b_flags & B_VMIO) == 0) && bp->b_vp) 394 brelvp(bp); 395 } 396 397 /* 398 * VMIO buffer rundown. It is not very necessary to keep a VMIO buffer 399 * constituted, so the B_INVAL flag is used to *invalidate* the buffer, 400 * but the VM object is kept around. The B_NOCACHE flag is used to 401 * invalidate the pages in the VM object. 402 */ 403 if (bp->b_flags & B_VMIO) { 404 vm_offset_t foff; 405 vm_object_t obj; 406 int i, resid; 407 vm_page_t m; 408 int iototal = bp->b_bufsize; 409 410 foff = 0; 411 obj = 0; 412 if (bp->b_npages) { 413 if (bp->b_vp && bp->b_vp->v_mount) { 414 foff = bp->b_vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; 415 } else { 416 /* 417 * vnode pointer has been ripped away -- 418 * probably file gone... 419 */ 420 foff = bp->b_pages[0]->offset; 421 } 422 } 423 for (i = 0; i < bp->b_npages; i++) { 424 m = bp->b_pages[i]; 425 if (m == bogus_page) { 426 panic("brelse: bogus page found"); 427 } 428 resid = (m->offset + PAGE_SIZE) - foff; 429 if (resid > iototal) 430 resid = iototal; 431 if (resid > 0) { 432 if (bp->b_flags & (B_ERROR | B_NOCACHE)) { 433 vm_page_set_invalid(m, foff, resid); 434 } else if ((bp->b_flags & B_DELWRI) == 0) { 435 vm_page_set_clean(m, foff, resid); 436 vm_page_set_valid(m, foff, resid); 437 } 438 } else { 439 vm_page_test_dirty(m); 440 } 441 if (bp->b_flags & B_INVAL) { 442 if (m->bmapped == 0) { 443 panic("brelse: bmapped is zero for page\n"); 444 } 445 --m->bmapped; 446 if (m->bmapped == 0) { 447 PAGE_WAKEUP(m); 448 if (m->valid == 0) { 449 pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE); 450 vm_page_free(m); 451 } else if ((m->dirty & m->valid) == 0 && 452 (m->flags & PG_REFERENCED) == 0 && 453 !pmap_is_referenced(VM_PAGE_TO_PHYS(m))) 454 vm_page_cache(m); 455 else if( (m->flags & PG_ACTIVE) == 0) 456 vm_page_activate(m); 457 } 458 } 459 foff += resid; 460 iototal -= resid; 461 } 462 463 if (bp->b_flags & B_INVAL) { 464 bufspace -= bp->b_bufsize; 465 pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages); 466 bp->b_npages = 0; 467 bp->b_bufsize = 0; 468 bp->b_flags &= ~B_VMIO; 469 if (bp->b_vp) 470 brelvp(bp); 471 --nvmio; 472 } 473 } 474 if (bp->b_qindex != QUEUE_NONE) 475 panic("brelse: free buffer onto another queue???"); 476 477 /* enqueue */ 478 /* buffers with no memory */ 479 if (bp->b_bufsize == 0) { 480 bp->b_qindex = QUEUE_EMPTY; 481 TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist); 482 LIST_REMOVE(bp, b_hash); 483 LIST_INSERT_HEAD(&invalhash, bp, b_hash); 484 bp->b_dev = NODEV; 485 /* buffers with junk contents */ 486 } else if (bp->b_flags & (B_ERROR | B_INVAL | B_NOCACHE)) { 487 bp->b_qindex = QUEUE_AGE; 488 TAILQ_INSERT_HEAD(&bufqueues[QUEUE_AGE], bp, b_freelist); 489 LIST_REMOVE(bp, b_hash); 490 LIST_INSERT_HEAD(&invalhash, bp, b_hash); 491 bp->b_dev = NODEV; 492 /* buffers that are locked */ 493 } else if (bp->b_flags & B_LOCKED) { 494 bp->b_qindex = QUEUE_LOCKED; 495 TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LOCKED], bp, b_freelist); 496 /* buffers with stale but valid contents */ 497 } else if (bp->b_flags & B_AGE) { 498 bp->b_qindex = QUEUE_AGE; 499 TAILQ_INSERT_TAIL(&bufqueues[QUEUE_AGE], bp, b_freelist); 500 /* buffers with valid and quite potentially reuseable contents */ 501 } else { 502 if (bp->b_flags & B_VMIO) 503 bp->b_qindex = QUEUE_VMIO; 504 else { 505 bp->b_qindex = QUEUE_LRU; 506 ++nlru; 507 } 508 TAILQ_INSERT_TAIL(&bufqueues[bp->b_qindex], bp, b_freelist); 509 } 510 511 /* unlock */ 512 bp->b_flags &= ~(B_PDWANTED | B_WANTED | B_BUSY | B_ASYNC | B_NOCACHE | B_AGE); 513 splx(s); 514} 515 516/* 517 * this routine implements clustered async writes for 518 * clearing out B_DELWRI buffers... This is much better 519 * than the old way of writing only one buffer at a time. 520 */ 521void 522vfs_bio_awrite(struct buf * bp) 523{ 524 int i; 525 daddr_t lblkno = bp->b_lblkno; 526 struct vnode *vp = bp->b_vp; 527 int s; 528 int ncl; 529 struct buf *bpa; 530 531 s = splbio(); 532 if( vp->v_mount && (vp->v_flag & VVMIO) && 533 (bp->b_flags & (B_CLUSTEROK | B_INVAL)) == B_CLUSTEROK) { 534 int size = vp->v_mount->mnt_stat.f_iosize; 535 536 for (i = 1; i < MAXPHYS / size; i++) { 537 if ((bpa = incore(vp, lblkno + i)) && 538 ((bpa->b_flags & (B_BUSY | B_DELWRI | B_BUSY | B_CLUSTEROK | B_INVAL)) == B_DELWRI | B_CLUSTEROK) && 539 (bpa->b_bufsize == size)) { 540 if ((bpa->b_blkno == bpa->b_lblkno) || 541 (bpa->b_blkno != bp->b_blkno + (i * size) / DEV_BSIZE)) 542 break; 543 } else { 544 break; 545 } 546 } 547 ncl = i; 548 /* 549 * this is a possible cluster write 550 */ 551 if (ncl != 1) { 552 cluster_wbuild(vp, NULL, size, lblkno, ncl, -1); 553 splx(s); 554 return; 555 } 556 } 557 /* 558 * default (old) behavior, writing out only one block 559 */ 560 bremfree(bp); 561 bp->b_flags |= B_BUSY | B_ASYNC; 562 bwrite(bp); 563 splx(s); 564} 565 566 567/* 568 * Find a buffer header which is available for use. 569 */ 570struct buf * 571getnewbuf(int slpflag, int slptimeo, int doingvmio) 572{ 573 struct buf *bp; 574 int s; 575 int firstbp = 1; 576 577 s = splbio(); 578start: 579 if (bufspace >= maxbufspace) 580 goto trytofreespace; 581 582 /* can we constitute a new buffer? */ 583 if ((bp = bufqueues[QUEUE_EMPTY].tqh_first)) { 584 if (bp->b_qindex != QUEUE_EMPTY) 585 panic("getnewbuf: inconsistent EMPTY queue"); 586 bremfree(bp); 587 goto fillbuf; 588 } 589trytofreespace: 590 /* 591 * we keep the file I/O from hogging metadata I/O 592 */ 593 if (bp = bufqueues[QUEUE_AGE].tqh_first) { 594 if (bp->b_qindex != QUEUE_AGE) 595 panic("getnewbuf: inconsistent AGE queue"); 596 } else if ((nvmio > (nbuf / 2)) 597 && (bp = bufqueues[QUEUE_VMIO].tqh_first)) { 598 if (bp->b_qindex != QUEUE_VMIO) 599 panic("getnewbuf: inconsistent VMIO queue"); 600 } else if ((!doingvmio || (nlru > (nbuf / 2))) && 601 (bp = bufqueues[QUEUE_LRU].tqh_first)) { 602 if (bp->b_qindex != QUEUE_LRU) 603 panic("getnewbuf: inconsistent LRU queue"); 604 } 605 if (!bp) { 606 if (doingvmio) { 607 if (bp = bufqueues[QUEUE_VMIO].tqh_first) { 608 if (bp->b_qindex != QUEUE_VMIO) 609 panic("getnewbuf: inconsistent VMIO queue"); 610 } else if (bp = bufqueues[QUEUE_LRU].tqh_first) { 611 if (bp->b_qindex != QUEUE_LRU) 612 panic("getnewbuf: inconsistent LRU queue"); 613 } 614 } else { 615 if (bp = bufqueues[QUEUE_LRU].tqh_first) { 616 if (bp->b_qindex != QUEUE_LRU) 617 panic("getnewbuf: inconsistent LRU queue"); 618 } else if (bp = bufqueues[QUEUE_VMIO].tqh_first) { 619 if (bp->b_qindex != QUEUE_VMIO) 620 panic("getnewbuf: inconsistent VMIO queue"); 621 } 622 } 623 } 624 if (!bp) { 625 /* wait for a free buffer of any kind */ 626 needsbuffer = 1; 627 tsleep((caddr_t) &needsbuffer, PRIBIO | slpflag, "newbuf", slptimeo); 628 splx(s); 629 return (0); 630 } 631 /* if we are a delayed write, convert to an async write */ 632 if ((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) { 633 vfs_bio_awrite(bp); 634 if (!slpflag && !slptimeo) { 635 splx(s); 636 return (0); 637 } 638 goto start; 639 } 640 bremfree(bp); 641 642 if (bp->b_flags & B_VMIO) { 643 bp->b_flags |= B_INVAL | B_BUSY; 644 brelse(bp); 645 bremfree(bp); 646 } 647 if (bp->b_vp) 648 brelvp(bp); 649 650 /* we are not free, nor do we contain interesting data */ 651 if (bp->b_rcred != NOCRED) 652 crfree(bp->b_rcred); 653 if (bp->b_wcred != NOCRED) 654 crfree(bp->b_wcred); 655fillbuf: 656 bp->b_flags |= B_BUSY; 657 LIST_REMOVE(bp, b_hash); 658 LIST_INSERT_HEAD(&invalhash, bp, b_hash); 659 splx(s); 660 if (bp->b_bufsize) { 661 allocbuf(bp, 0, 0); 662 } 663 bp->b_flags = B_BUSY; 664 bp->b_dev = NODEV; 665 bp->b_vp = NULL; 666 bp->b_blkno = bp->b_lblkno = 0; 667 bp->b_iodone = 0; 668 bp->b_error = 0; 669 bp->b_resid = 0; 670 bp->b_bcount = 0; 671 bp->b_npages = 0; 672 bp->b_wcred = bp->b_rcred = NOCRED; 673 bp->b_data = buffers_kva + (bp - buf) * MAXBSIZE; 674 bp->b_dirtyoff = bp->b_dirtyend = 0; 675 bp->b_validoff = bp->b_validend = 0; 676 if (bufspace >= maxbufspace) { 677 s = splbio(); 678 bp->b_flags |= B_INVAL; 679 brelse(bp); 680 goto trytofreespace; 681 } 682 return (bp); 683} 684 685/* 686 * Check to see if a block is currently memory resident. 687 */ 688struct buf * 689incore(struct vnode * vp, daddr_t blkno) 690{ 691 struct buf *bp; 692 struct bufhashhdr *bh; 693 694 int s = splbio(); 695 696 bh = BUFHASH(vp, blkno); 697 bp = bh->lh_first; 698 699 /* Search hash chain */ 700 while (bp) { 701 /* hit */ 702 if (bp->b_lblkno == blkno && bp->b_vp == vp 703 && (bp->b_flags & B_INVAL) == 0) { 704 splx(s); 705 return (bp); 706 } 707 bp = bp->b_hash.le_next; 708 } 709 splx(s); 710 711 return (0); 712} 713 714/* 715 * Returns true if no I/O is needed to access the 716 * associated VM object. This is like incore except 717 * it also hunts around in the VM system for the data. 718 */ 719 720int 721inmem(struct vnode * vp, daddr_t blkno) 722{ 723 vm_object_t obj; 724 vm_offset_t off, toff, tinc; 725 vm_page_t m; 726 727 if (incore(vp, blkno)) 728 return 1; 729 if (vp->v_mount == 0) 730 return 0; 731 if ((vp->v_vmdata == 0) || (vp->v_flag & VVMIO) == 0) 732 return 0; 733 734 obj = (vm_object_t) vp->v_vmdata; 735 tinc = PAGE_SIZE; 736 if (tinc > vp->v_mount->mnt_stat.f_iosize) 737 tinc = vp->v_mount->mnt_stat.f_iosize; 738 off = blkno * vp->v_mount->mnt_stat.f_iosize; 739 740 for (toff = 0; toff < vp->v_mount->mnt_stat.f_iosize; toff += tinc) { 741 int mask; 742 743 m = vm_page_lookup(obj, trunc_page(toff + off)); 744 if (!m) 745 return 0; 746 if (vm_page_is_valid(m, toff + off, tinc) == 0) 747 return 0; 748 } 749 return 1; 750} 751 752/* 753 * Get a block given a specified block and offset into a file/device. 754 */ 755struct buf * 756getblk(struct vnode * vp, daddr_t blkno, int size, int slpflag, int slptimeo) 757{ 758 struct buf *bp; 759 int s; 760 struct bufhashhdr *bh; 761 vm_offset_t off; 762 int nleft; 763 764 s = splbio(); 765loop: 766 if ((cnt.v_free_count + cnt.v_cache_count) < 767 cnt.v_free_reserved + MAXBSIZE / PAGE_SIZE) 768 wakeup((caddr_t) &vm_pages_needed); 769 if (bp = incore(vp, blkno)) { 770 if (bp->b_flags & B_BUSY) { 771 bp->b_flags |= B_WANTED; 772 if (curproc == pageproc) { 773 bp->b_flags |= B_PDWANTED; 774 wakeup((caddr_t) &cnt.v_free_count); 775 } 776 if (!tsleep((caddr_t) bp, PRIBIO | slpflag, "getblk", slptimeo)) 777 goto loop; 778 splx(s); 779 return (struct buf *) NULL; 780 } 781 bp->b_flags |= B_BUSY | B_CACHE; 782 bremfree(bp); 783 /* 784 * check for size inconsistancies 785 */ 786 if (bp->b_bcount != size) { 787#if defined(VFS_BIO_DEBUG) 788 printf("getblk: invalid buffer size: %ld\n", bp->b_bcount); 789#endif 790 bp->b_flags |= B_INVAL; 791 bwrite(bp); 792 goto loop; 793 } 794 splx(s); 795 return (bp); 796 } else { 797 vm_object_t obj; 798 int doingvmio; 799 800 if ((obj = (vm_object_t) vp->v_vmdata) && (vp->v_flag & VVMIO)) { 801 doingvmio = 1; 802 } else { 803 doingvmio = 0; 804 } 805 if ((bp = getnewbuf(slpflag, slptimeo, doingvmio)) == 0) { 806 if (slpflag || slptimeo) 807 return NULL; 808 goto loop; 809 } 810 if (incore(vp, blkno)) { 811 bp->b_flags |= B_INVAL; 812 brelse(bp); 813 goto loop; 814 } 815 bp->b_blkno = bp->b_lblkno = blkno; 816 bgetvp(vp, bp); 817 LIST_REMOVE(bp, b_hash); 818 bh = BUFHASH(vp, blkno); 819 LIST_INSERT_HEAD(bh, bp, b_hash); 820 if (doingvmio) { 821 bp->b_flags |= (B_VMIO | B_CACHE); 822#if defined(VFS_BIO_DEBUG) 823 if (vp->v_type != VREG) 824 printf("getblk: vmioing file type %d???\n", vp->v_type); 825#endif 826 ++nvmio; 827 } else { 828 if (bp->b_flags & B_VMIO) 829 --nvmio; 830 bp->b_flags &= ~B_VMIO; 831 } 832 splx(s); 833 if (!allocbuf(bp, size, 1)) { 834 s = splbio(); 835 goto loop; 836 } 837 return (bp); 838 } 839} 840 841/* 842 * Get an empty, disassociated buffer of given size. 843 */ 844struct buf * 845geteblk(int size) 846{ 847 struct buf *bp; 848 849 while ((bp = getnewbuf(0, 0, 0)) == 0); 850 allocbuf(bp, size, 0); 851 bp->b_flags |= B_INVAL; 852 return (bp); 853} 854 855/* 856 * This code constitutes the buffer memory from either anonymous system 857 * memory (in the case of non-VMIO operations) or from an associated 858 * VM object (in the case of VMIO operations). 859 * 860 * Note that this code is tricky, and has many complications to resolve 861 * deadlock or inconsistant data situations. Tread lightly!!! 862 * 863 * Modify the length of a buffer's underlying buffer storage without 864 * destroying information (unless, of course the buffer is shrinking). 865 */ 866int 867allocbuf(struct buf * bp, int size, int vmio) 868{ 869 870 int s; 871 int newbsize, mbsize; 872 int i; 873 874 if ((bp->b_flags & B_VMIO) == 0) { 875 /* 876 * Just get anonymous memory from the kernel 877 */ 878 mbsize = ((size + DEV_BSIZE - 1) / DEV_BSIZE) * DEV_BSIZE; 879 newbsize = round_page(size); 880 881 if (newbsize == bp->b_bufsize) { 882 bp->b_bcount = size; 883 return 1; 884 } else if (newbsize < bp->b_bufsize) { 885 vm_hold_free_pages( 886 bp, 887 (vm_offset_t) bp->b_data + newbsize, 888 (vm_offset_t) bp->b_data + bp->b_bufsize); 889 bufspace -= (bp->b_bufsize - newbsize); 890 } else if (newbsize > bp->b_bufsize) { 891 vm_hold_load_pages( 892 bp, 893 (vm_offset_t) bp->b_data + bp->b_bufsize, 894 (vm_offset_t) bp->b_data + newbsize); 895 bufspace += (newbsize - bp->b_bufsize); 896 } 897 } else { 898 vm_page_t m; 899 int desiredpages; 900 901 newbsize = ((size + DEV_BSIZE - 1) / DEV_BSIZE) * DEV_BSIZE; 902 desiredpages = round_page(newbsize) / PAGE_SIZE; 903 904 if (newbsize == bp->b_bufsize) { 905 bp->b_bcount = size; 906 return 1; 907 } else if (newbsize < bp->b_bufsize) { 908 if (desiredpages < bp->b_npages) { 909 pmap_qremove((vm_offset_t) trunc_page(bp->b_data) + 910 desiredpages * PAGE_SIZE, (bp->b_npages - desiredpages)); 911 for (i = desiredpages; i < bp->b_npages; i++) { 912 m = bp->b_pages[i]; 913 s = splhigh(); 914 while ((m->flags & PG_BUSY) || (m->busy != 0)) { 915 m->flags |= PG_WANTED; 916 tsleep(m, PVM, "biodep", 0); 917 } 918 splx(s); 919 920 if (m->bmapped == 0) { 921 printf("allocbuf: bmapped is zero for page %d\n", i); 922 panic("allocbuf: error"); 923 } 924 --m->bmapped; 925 if (m->bmapped == 0) { 926 PAGE_WAKEUP(m); 927 if (m->valid == 0) { 928 pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE); 929 vm_page_free(m); 930 } 931 } 932 bp->b_pages[i] = NULL; 933 } 934 bp->b_npages = desiredpages; 935 bufspace -= (bp->b_bufsize - newbsize); 936 } 937 } else { 938 vm_object_t obj; 939 vm_offset_t tinc, off, toff, objoff; 940 int pageindex, curbpnpages; 941 struct vnode *vp; 942 int bsize; 943 944 vp = bp->b_vp; 945 bsize = vp->v_mount->mnt_stat.f_iosize; 946 947 if (bp->b_npages < desiredpages) { 948 obj = (vm_object_t) vp->v_vmdata; 949 tinc = PAGE_SIZE; 950 if (tinc > bsize) 951 tinc = bsize; 952 off = bp->b_lblkno * bsize; 953 curbpnpages = bp->b_npages; 954 doretry: 955 for (toff = 0; toff < newbsize; toff += tinc) { 956 int mask; 957 int bytesinpage; 958 959 pageindex = toff / PAGE_SIZE; 960 objoff = trunc_page(toff + off); 961 if (pageindex < curbpnpages) { 962 int pb; 963 964 m = bp->b_pages[pageindex]; 965 if (m->offset != objoff) 966 panic("allocbuf: page changed offset??!!!?"); 967 bytesinpage = tinc; 968 if (tinc > (newbsize - toff)) 969 bytesinpage = newbsize - toff; 970 if (!vm_page_is_valid(m, toff + off, bytesinpage)) { 971 bp->b_flags &= ~B_CACHE; 972 } 973 if ((m->flags & PG_ACTIVE) == 0) 974 vm_page_activate(m); 975 continue; 976 } 977 m = vm_page_lookup(obj, objoff); 978 if (!m) { 979 m = vm_page_alloc(obj, objoff, 0); 980 if (!m) { 981 int j; 982 983 for (j = bp->b_npages; j < pageindex; j++) { 984 vm_page_t mt = bp->b_pages[j]; 985 986 PAGE_WAKEUP(mt); 987 if (mt->valid == 0 && mt->bmapped == 0) { 988 vm_page_free(mt); 989 } 990 } 991 VM_WAIT; 992 if (vmio && (bp->b_flags & B_PDWANTED)) { 993 bp->b_flags |= B_INVAL; 994 brelse(bp); 995 return 0; 996 } 997 curbpnpages = bp->b_npages; 998 goto doretry; 999 } 1000 m->valid = 0; 1001 vm_page_activate(m); 1002 } else if ((m->valid == 0) || (m->flags & PG_BUSY)) { 1003 int j; 1004 int bufferdestroyed = 0; 1005 1006 for (j = bp->b_npages; j < pageindex; j++) { 1007 vm_page_t mt = bp->b_pages[j]; 1008 1009 PAGE_WAKEUP(mt); 1010 if (mt->valid == 0 && mt->bmapped == 0) { 1011 vm_page_free(mt); 1012 } 1013 } 1014 if (vmio && (bp->b_flags & B_PDWANTED)) { 1015 bp->b_flags |= B_INVAL; 1016 brelse(bp); 1017 VM_WAIT; 1018 bufferdestroyed = 1; 1019 } 1020 s = splbio(); 1021 if (m->flags & PG_BUSY) { 1022 m->flags |= PG_WANTED; 1023 tsleep(m, PRIBIO, "pgtblk", 0); 1024 } else if( m->valid == 0 && m->bmapped == 0) { 1025 vm_page_free(m); 1026 } 1027 splx(s); 1028 if (bufferdestroyed) 1029 return 0; 1030 curbpnpages = bp->b_npages; 1031 goto doretry; 1032 } else { 1033 int pb; 1034 1035 if ((m->flags & PG_CACHE) && 1036 (cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_reserved) { 1037 int j; 1038 1039 for (j = bp->b_npages; j < pageindex; j++) { 1040 vm_page_t mt = bp->b_pages[j]; 1041 1042 PAGE_WAKEUP(mt); 1043 if (mt->valid == 0 && mt->bmapped == 0) { 1044 vm_page_free(mt); 1045 } 1046 } 1047 VM_WAIT; 1048 if (vmio && (bp->b_flags & B_PDWANTED)) { 1049 bp->b_flags |= B_INVAL; 1050 brelse(bp); 1051 return 0; 1052 } 1053 curbpnpages = bp->b_npages; 1054 goto doretry; 1055 } 1056 bytesinpage = tinc; 1057 if (tinc > (newbsize - toff)) 1058 bytesinpage = newbsize - toff; 1059 if (!vm_page_is_valid(m, toff + off, bytesinpage)) { 1060 bp->b_flags &= ~B_CACHE; 1061 } 1062 if ((m->flags & PG_ACTIVE) == 0) 1063 vm_page_activate(m); 1064 m->flags |= PG_BUSY; 1065 } 1066 bp->b_pages[pageindex] = m; 1067 curbpnpages = pageindex + 1; 1068 } 1069 if (bsize >= PAGE_SIZE) { 1070 for (i = bp->b_npages; i < curbpnpages; i++) { 1071 m = bp->b_pages[i]; 1072 if (m->valid == 0) { 1073 bp->b_flags &= ~B_CACHE; 1074 } 1075 m->bmapped++; 1076 PAGE_WAKEUP(m); 1077 } 1078#if 0 1079 if( bp->b_flags & B_CACHE) { 1080 for (i = bp->b_npages; i < curbpnpages; i++) { 1081 bp->b_pages[i]->flags |= PG_REFERENCED; 1082 } 1083 } 1084#endif 1085 } else { 1086 if (!vm_page_is_valid(bp->b_pages[0], off, bsize)) 1087 bp->b_flags &= ~B_CACHE; 1088 bp->b_pages[0]->bmapped++; 1089 PAGE_WAKEUP(bp->b_pages[0]); 1090 } 1091 bp->b_npages = curbpnpages; 1092 bp->b_data = buffers_kva + (bp - buf) * MAXBSIZE; 1093 pmap_qenter((vm_offset_t) bp->b_data, bp->b_pages, bp->b_npages); 1094 bp->b_data += off % PAGE_SIZE; 1095 } 1096 bufspace += (newbsize - bp->b_bufsize); 1097 } 1098 } 1099 bp->b_bufsize = newbsize; 1100 bp->b_bcount = size; 1101 return 1; 1102} 1103 1104/* 1105 * Wait for buffer I/O completion, returning error status. 1106 */ 1107int 1108biowait(register struct buf * bp) 1109{ 1110 int s; 1111 1112 s = splbio(); 1113 while ((bp->b_flags & B_DONE) == 0) 1114 tsleep((caddr_t) bp, PRIBIO, "biowait", 0); 1115 if ((bp->b_flags & B_ERROR) || bp->b_error) { 1116 if ((bp->b_flags & B_INVAL) == 0) { 1117 bp->b_flags |= B_INVAL; 1118 bp->b_dev = NODEV; 1119 LIST_REMOVE(bp, b_hash); 1120 LIST_INSERT_HEAD(&invalhash, bp, b_hash); 1121 wakeup((caddr_t) bp); 1122 } 1123 if (!bp->b_error) 1124 bp->b_error = EIO; 1125 else 1126 bp->b_flags |= B_ERROR; 1127 splx(s); 1128 return (bp->b_error); 1129 } else { 1130 splx(s); 1131 return (0); 1132 } 1133} 1134 1135/* 1136 * Finish I/O on a buffer, calling an optional function. 1137 * This is usually called from interrupt level, so process blocking 1138 * is not *a good idea*. 1139 */ 1140void 1141biodone(register struct buf * bp) 1142{ 1143 int s; 1144 1145 s = splbio(); 1146 if (bp->b_flags & B_DONE) 1147 printf("biodone: buffer already done\n"); 1148 bp->b_flags |= B_DONE; 1149 1150 if ((bp->b_flags & B_READ) == 0) { 1151 vwakeup(bp); 1152 } 1153#ifdef BOUNCE_BUFFERS 1154 if (bp->b_flags & B_BOUNCE) 1155 vm_bounce_free(bp); 1156#endif 1157 1158 /* call optional completion function if requested */ 1159 if (bp->b_flags & B_CALL) { 1160 bp->b_flags &= ~B_CALL; 1161 (*bp->b_iodone) (bp); 1162 splx(s); 1163 return; 1164 } 1165 if (bp->b_flags & B_VMIO) { 1166 int i, resid; 1167 vm_offset_t foff; 1168 vm_page_t m; 1169 vm_object_t obj; 1170 int iosize; 1171 struct vnode *vp = bp->b_vp; 1172 1173 foff = vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; 1174 obj = (vm_object_t) vp->v_vmdata; 1175 if (!obj) { 1176 return; 1177 } 1178#if defined(VFS_BIO_DEBUG) 1179 if (obj->paging_in_progress < bp->b_npages) { 1180 printf("biodone: paging in progress(%d) < bp->b_npages(%d)\n", 1181 obj->paging_in_progress, bp->b_npages); 1182 } 1183#endif 1184 iosize = bp->b_bufsize; 1185 for (i = 0; i < bp->b_npages; i++) { 1186 m = bp->b_pages[i]; 1187 if (m == bogus_page) { 1188 m = vm_page_lookup(obj, foff); 1189 if (!m) { 1190#if defined(VFS_BIO_DEBUG) 1191 printf("biodone: page disappeared\n"); 1192#endif 1193 --obj->paging_in_progress; 1194 continue; 1195 } 1196 bp->b_pages[i] = m; 1197 pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages); 1198 } 1199#if defined(VFS_BIO_DEBUG) 1200 if (trunc_page(foff) != m->offset) { 1201 printf("biodone: foff(%d)/m->offset(%d) mismatch\n", foff, m->offset); 1202 } 1203#endif 1204 resid = (m->offset + PAGE_SIZE) - foff; 1205 if (resid > iosize) 1206 resid = iosize; 1207 if (resid > 0) { 1208 vm_page_set_valid(m, foff, resid); 1209 vm_page_set_clean(m, foff, resid); 1210 } 1211 1212 /* 1213 * when debugging new filesystems or buffer I/O methods, this 1214 * is the most common error that pops up. if you see this, you 1215 * have not set the page busy flag correctly!!! 1216 */ 1217 if (m->busy == 0) { 1218 printf("biodone: page busy < 0, off: %d, foff: %d, resid: %d, index: %d\n", 1219 m->offset, foff, resid, i); 1220 printf(" iosize: %d, lblkno: %d\n", 1221 bp->b_vp->v_mount->mnt_stat.f_iosize, bp->b_lblkno); 1222 printf(" valid: 0x%x, dirty: 0x%x, mapped: %d\n", 1223 m->valid, m->dirty, m->bmapped); 1224 panic("biodone: page busy < 0\n"); 1225 } 1226 --m->busy; 1227 PAGE_WAKEUP(m); 1228 --obj->paging_in_progress; 1229 foff += resid; 1230 iosize -= resid; 1231 } 1232 if (obj && obj->paging_in_progress == 0 && 1233 (obj->flags & OBJ_PIPWNT)) { 1234 obj->flags &= ~OBJ_PIPWNT; 1235 wakeup((caddr_t) obj); 1236 } 1237 } 1238 /* 1239 * For asynchronous completions, release the buffer now. The brelse 1240 * checks for B_WANTED and will do the wakeup there if necessary - so 1241 * no need to do a wakeup here in the async case. 1242 */ 1243 1244 if (bp->b_flags & B_ASYNC) { 1245 brelse(bp); 1246 } else { 1247 bp->b_flags &= ~(B_WANTED | B_PDWANTED); 1248 wakeup((caddr_t) bp); 1249 } 1250 splx(s); 1251} 1252 1253int 1254count_lock_queue() 1255{ 1256 int count; 1257 struct buf *bp; 1258 1259 count = 0; 1260 for (bp = bufqueues[QUEUE_LOCKED].tqh_first; 1261 bp != NULL; 1262 bp = bp->b_freelist.tqe_next) 1263 count++; 1264 return (count); 1265} 1266 1267int vfs_update_interval = 30; 1268 1269void 1270vfs_update() 1271{ 1272 (void) spl0(); 1273 while (1) { 1274 tsleep((caddr_t) &vfs_update_wakeup, PRIBIO, "update", 1275 hz * vfs_update_interval); 1276 vfs_update_wakeup = 0; 1277 sync(curproc, NULL, NULL); 1278 } 1279} 1280 1281/* 1282 * This routine is called in lieu of iodone in the case of 1283 * incomplete I/O. This keeps the busy status for pages 1284 * consistant. 1285 */ 1286void 1287vfs_unbusy_pages(struct buf * bp) 1288{ 1289 int i; 1290 1291 if (bp->b_flags & B_VMIO) { 1292 struct vnode *vp = bp->b_vp; 1293 vm_object_t obj = (vm_object_t) vp->v_vmdata; 1294 vm_offset_t foff; 1295 1296 foff = vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; 1297 1298 for (i = 0; i < bp->b_npages; i++) { 1299 vm_page_t m = bp->b_pages[i]; 1300 1301 if (m == bogus_page) { 1302 m = vm_page_lookup(obj, foff); 1303 if (!m) { 1304 panic("vfs_unbusy_pages: page missing\n"); 1305 } 1306 bp->b_pages[i] = m; 1307 pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages); 1308 } 1309 --obj->paging_in_progress; 1310 --m->busy; 1311 PAGE_WAKEUP(m); 1312 } 1313 if (obj->paging_in_progress == 0 && 1314 (obj->flags & OBJ_PIPWNT)) { 1315 obj->flags &= ~OBJ_PIPWNT; 1316 wakeup((caddr_t) obj); 1317 } 1318 } 1319} 1320 1321/* 1322 * This routine is called before a device strategy routine. 1323 * It is used to tell the VM system that paging I/O is in 1324 * progress, and treat the pages associated with the buffer 1325 * almost as being PG_BUSY. Also the object paging_in_progress 1326 * flag is handled to make sure that the object doesn't become 1327 * inconsistant. 1328 */ 1329void 1330vfs_busy_pages(struct buf * bp, int clear_modify) 1331{ 1332 int i; 1333 1334 if (bp->b_flags & B_VMIO) { 1335 vm_object_t obj = (vm_object_t) bp->b_vp->v_vmdata; 1336 vm_offset_t foff = bp->b_vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; 1337 int iocount = bp->b_bufsize; 1338 1339 for (i = 0; i < bp->b_npages; i++) { 1340 vm_page_t m = bp->b_pages[i]; 1341 int resid = (m->offset + PAGE_SIZE) - foff; 1342 1343 if (resid > iocount) 1344 resid = iocount; 1345 obj->paging_in_progress++; 1346 m->busy++; 1347 if (clear_modify) { 1348 vm_page_test_dirty(m); 1349 pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_READ); 1350 } else if (bp->b_bcount >= PAGE_SIZE) { 1351 if (m->valid && (bp->b_flags & B_CACHE) == 0) { 1352 bp->b_pages[i] = bogus_page; 1353 pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages); 1354 } 1355 } 1356 foff += resid; 1357 iocount -= resid; 1358 } 1359 } 1360} 1361 1362/* 1363 * Tell the VM system that the pages associated with this buffer 1364 * are dirty. This is in case of the unlikely circumstance that 1365 * a buffer has to be destroyed before it is flushed. 1366 */ 1367void 1368vfs_dirty_pages(struct buf * bp) 1369{ 1370 int i; 1371 1372 if (bp->b_flags & B_VMIO) { 1373 vm_offset_t foff = bp->b_vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; 1374 int iocount = bp->b_bufsize; 1375 1376 for (i = 0; i < bp->b_npages; i++) { 1377 vm_page_t m = bp->b_pages[i]; 1378 int resid = (m->offset + PAGE_SIZE) - foff; 1379 1380 if (resid > iocount) 1381 resid = iocount; 1382 if (resid > 0) { 1383 vm_page_set_valid(m, foff, resid); 1384 vm_page_set_dirty(m, foff, resid); 1385 } 1386 PAGE_WAKEUP(m); 1387 foff += resid; 1388 iocount -= resid; 1389 } 1390 } 1391} 1392/* 1393 * vm_hold_load_pages and vm_hold_unload pages get pages into 1394 * a buffers address space. The pages are anonymous and are 1395 * not associated with a file object. 1396 */ 1397void 1398vm_hold_load_pages(struct buf * bp, vm_offset_t froma, vm_offset_t toa) 1399{ 1400 vm_offset_t pg; 1401 vm_page_t p; 1402 vm_offset_t from = round_page(froma); 1403 vm_offset_t to = round_page(toa); 1404 1405tryagain0: 1406 if ((curproc != pageproc) && ((cnt.v_free_count + cnt.v_cache_count) <= 1407 cnt.v_free_reserved + (toa - froma) / PAGE_SIZE)) { 1408 VM_WAIT; 1409 goto tryagain0; 1410 } 1411 for (pg = from; pg < to; pg += PAGE_SIZE) { 1412 1413tryagain: 1414 1415 p = vm_page_alloc(kernel_object, pg - VM_MIN_KERNEL_ADDRESS, 0); 1416 if (!p) { 1417 VM_WAIT; 1418 goto tryagain; 1419 } 1420 vm_page_wire(p); 1421 pmap_kenter(pg, VM_PAGE_TO_PHYS(p)); 1422 bp->b_pages[((caddr_t) pg - bp->b_data) / PAGE_SIZE] = p; 1423 PAGE_WAKEUP(p); 1424 bp->b_npages++; 1425 } 1426} 1427 1428void 1429vm_hold_free_pages(struct buf * bp, vm_offset_t froma, vm_offset_t toa) 1430{ 1431 vm_offset_t pg; 1432 vm_page_t p; 1433 vm_offset_t from = round_page(froma); 1434 vm_offset_t to = round_page(toa); 1435 1436 for (pg = from; pg < to; pg += PAGE_SIZE) { 1437 p = bp->b_pages[((caddr_t) pg - bp->b_data) / PAGE_SIZE]; 1438 bp->b_pages[((caddr_t) pg - bp->b_data) / PAGE_SIZE] = 0; 1439 pmap_kremove(pg); 1440 vm_page_free(p); 1441 --bp->b_npages; 1442 } 1443} 1444 1445void 1446bufstats() 1447{ 1448} 1449