vfs_bio.c revision 10228
1/* 2 * Copyright (c) 1994 John S. Dyson 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice immediately at the beginning of the file, without modification, 10 * this list of conditions, and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. Absolutely no warranty of function or purpose is made by the author 15 * John S. Dyson. 16 * 4. This work was done expressly for inclusion into FreeBSD. Other use 17 * is allowed if this notation is included. 18 * 5. Modifications may be freely made to this file if the above conditions 19 * are met. 20 * 21 * $Id: vfs_bio.c,v 1.58 1995/08/24 13:28:16 davidg Exp $ 22 */ 23 24/* 25 * this file contains a new buffer I/O scheme implementing a coherent 26 * VM object and buffer cache scheme. Pains have been taken to make 27 * sure that the performance degradation associated with schemes such 28 * as this is not realized. 29 * 30 * Author: John S. Dyson 31 * Significant help during the development and debugging phases 32 * had been provided by David Greenman, also of the FreeBSD core team. 33 */ 34 35#define VMIO 36#include <sys/param.h> 37#include <sys/systm.h> 38#include <sys/kernel.h> 39#include <sys/proc.h> 40#include <sys/vnode.h> 41#include <vm/vm.h> 42#include <vm/vm_kern.h> 43#include <vm/vm_pageout.h> 44#include <vm/vm_page.h> 45#include <vm/vm_object.h> 46#include <sys/buf.h> 47#include <sys/mount.h> 48#include <sys/malloc.h> 49#include <sys/resourcevar.h> 50#include <sys/proc.h> 51 52#include <miscfs/specfs/specdev.h> 53 54struct buf *buf; /* buffer header pool */ 55struct swqueue bswlist; 56 57void vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to); 58void vm_hold_load_pages(struct buf * bp, vm_offset_t from, vm_offset_t to); 59void vfs_clean_pages(struct buf * bp); 60static void vfs_setdirty(struct buf *bp); 61 62int needsbuffer; 63 64/* 65 * Internal update daemon, process 3 66 * The variable vfs_update_wakeup allows for internal syncs. 67 */ 68int vfs_update_wakeup; 69 70 71/* 72 * buffers base kva 73 */ 74caddr_t buffers_kva; 75 76/* 77 * bogus page -- for I/O to/from partially complete buffers 78 * this is a temporary solution to the problem, but it is not 79 * really that bad. it would be better to split the buffer 80 * for input in the case of buffers partially already in memory, 81 * but the code is intricate enough already. 82 */ 83vm_page_t bogus_page; 84vm_offset_t bogus_offset; 85 86int bufspace, maxbufspace; 87 88/* 89 * advisory minimum for size of LRU queue or VMIO queue 90 */ 91int minbuf; 92 93struct bufhashhdr bufhashtbl[BUFHSZ], invalhash; 94struct bqueues bufqueues[BUFFER_QUEUES]; 95 96/* 97 * Initialize buffer headers and related structures. 98 */ 99void 100bufinit() 101{ 102 struct buf *bp; 103 int i; 104 105 TAILQ_INIT(&bswlist); 106 LIST_INIT(&invalhash); 107 108 /* first, make a null hash table */ 109 for (i = 0; i < BUFHSZ; i++) 110 LIST_INIT(&bufhashtbl[i]); 111 112 /* next, make a null set of free lists */ 113 for (i = 0; i < BUFFER_QUEUES; i++) 114 TAILQ_INIT(&bufqueues[i]); 115 116 buffers_kva = (caddr_t) kmem_alloc_pageable(buffer_map, MAXBSIZE * nbuf); 117 /* finally, initialize each buffer header and stick on empty q */ 118 for (i = 0; i < nbuf; i++) { 119 bp = &buf[i]; 120 bzero(bp, sizeof *bp); 121 bp->b_flags = B_INVAL; /* we're just an empty header */ 122 bp->b_dev = NODEV; 123 bp->b_rcred = NOCRED; 124 bp->b_wcred = NOCRED; 125 bp->b_qindex = QUEUE_EMPTY; 126 bp->b_vnbufs.le_next = NOLIST; 127 bp->b_data = buffers_kva + i * MAXBSIZE; 128 TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist); 129 LIST_INSERT_HEAD(&invalhash, bp, b_hash); 130 } 131/* 132 * maxbufspace is currently calculated to support all filesystem blocks 133 * to be 8K. If you happen to use a 16K filesystem, the size of the buffer 134 * cache is still the same as it would be for 8K filesystems. This 135 * keeps the size of the buffer cache "in check" for big block filesystems. 136 */ 137 minbuf = nbuf / 3; 138 maxbufspace = 2 * (nbuf + 8) * PAGE_SIZE; 139 140 bogus_offset = kmem_alloc_pageable(kernel_map, PAGE_SIZE); 141 bogus_page = vm_page_alloc(kernel_object, 142 bogus_offset - VM_MIN_KERNEL_ADDRESS, VM_ALLOC_NORMAL); 143 144} 145 146/* 147 * remove the buffer from the appropriate free list 148 */ 149void 150bremfree(struct buf * bp) 151{ 152 int s = splbio(); 153 154 if (bp->b_qindex != QUEUE_NONE) { 155 TAILQ_REMOVE(&bufqueues[bp->b_qindex], bp, b_freelist); 156 bp->b_qindex = QUEUE_NONE; 157 } else { 158 panic("bremfree: removing a buffer when not on a queue"); 159 } 160 splx(s); 161} 162 163/* 164 * Get a buffer with the specified data. Look in the cache first. 165 */ 166int 167bread(struct vnode * vp, daddr_t blkno, int size, struct ucred * cred, 168 struct buf ** bpp) 169{ 170 struct buf *bp; 171 172 bp = getblk(vp, blkno, size, 0, 0); 173 *bpp = bp; 174 175 /* if not found in cache, do some I/O */ 176 if ((bp->b_flags & B_CACHE) == 0) { 177 if (curproc != NULL) 178 curproc->p_stats->p_ru.ru_inblock++; 179 bp->b_flags |= B_READ; 180 bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL); 181 if (bp->b_rcred == NOCRED) { 182 if (cred != NOCRED) 183 crhold(cred); 184 bp->b_rcred = cred; 185 } 186 vfs_busy_pages(bp, 0); 187 VOP_STRATEGY(bp); 188 return (biowait(bp)); 189 } 190 return (0); 191} 192 193/* 194 * Operates like bread, but also starts asynchronous I/O on 195 * read-ahead blocks. 196 */ 197int 198breadn(struct vnode * vp, daddr_t blkno, int size, 199 daddr_t * rablkno, int *rabsize, 200 int cnt, struct ucred * cred, struct buf ** bpp) 201{ 202 struct buf *bp, *rabp; 203 int i; 204 int rv = 0, readwait = 0; 205 206 *bpp = bp = getblk(vp, blkno, size, 0, 0); 207 208 /* if not found in cache, do some I/O */ 209 if ((bp->b_flags & B_CACHE) == 0) { 210 if (curproc != NULL) 211 curproc->p_stats->p_ru.ru_inblock++; 212 bp->b_flags |= B_READ; 213 bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL); 214 if (bp->b_rcred == NOCRED) { 215 if (cred != NOCRED) 216 crhold(cred); 217 bp->b_rcred = cred; 218 } 219 vfs_busy_pages(bp, 0); 220 VOP_STRATEGY(bp); 221 ++readwait; 222 } 223 for (i = 0; i < cnt; i++, rablkno++, rabsize++) { 224 if (inmem(vp, *rablkno)) 225 continue; 226 rabp = getblk(vp, *rablkno, *rabsize, 0, 0); 227 228 if ((rabp->b_flags & B_CACHE) == 0) { 229 if (curproc != NULL) 230 curproc->p_stats->p_ru.ru_inblock++; 231 rabp->b_flags |= B_READ | B_ASYNC; 232 rabp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL); 233 if (rabp->b_rcred == NOCRED) { 234 if (cred != NOCRED) 235 crhold(cred); 236 rabp->b_rcred = cred; 237 } 238 vfs_busy_pages(rabp, 0); 239 VOP_STRATEGY(rabp); 240 } else { 241 brelse(rabp); 242 } 243 } 244 245 if (readwait) { 246 rv = biowait(bp); 247 } 248 return (rv); 249} 250 251/* 252 * Write, release buffer on completion. (Done by iodone 253 * if async.) 254 */ 255int 256bwrite(struct buf * bp) 257{ 258 int oldflags = bp->b_flags; 259 260 if (bp->b_flags & B_INVAL) { 261 brelse(bp); 262 return (0); 263 } 264 if (!(bp->b_flags & B_BUSY)) 265 panic("bwrite: buffer is not busy???"); 266 267 bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); 268 bp->b_flags |= B_WRITEINPROG; 269 270 if ((oldflags & (B_ASYNC|B_DELWRI)) == (B_ASYNC|B_DELWRI)) { 271 reassignbuf(bp, bp->b_vp); 272 } 273 274 bp->b_vp->v_numoutput++; 275 vfs_busy_pages(bp, 1); 276 if (curproc != NULL) 277 curproc->p_stats->p_ru.ru_oublock++; 278 VOP_STRATEGY(bp); 279 280 if ((oldflags & B_ASYNC) == 0) { 281 int rtval = biowait(bp); 282 283 if (oldflags & B_DELWRI) { 284 reassignbuf(bp, bp->b_vp); 285 } 286 brelse(bp); 287 return (rtval); 288 } 289 return (0); 290} 291 292int 293vn_bwrite(ap) 294 struct vop_bwrite_args *ap; 295{ 296 return (bwrite(ap->a_bp)); 297} 298 299/* 300 * Delayed write. (Buffer is marked dirty). 301 */ 302void 303bdwrite(struct buf * bp) 304{ 305 306 if ((bp->b_flags & B_BUSY) == 0) { 307 panic("bdwrite: buffer is not busy"); 308 } 309 if (bp->b_flags & B_INVAL) { 310 brelse(bp); 311 return; 312 } 313 if (bp->b_flags & B_TAPE) { 314 bawrite(bp); 315 return; 316 } 317 bp->b_flags &= ~(B_READ|B_RELBUF); 318 if ((bp->b_flags & B_DELWRI) == 0) { 319 bp->b_flags |= B_DONE | B_DELWRI; 320 reassignbuf(bp, bp->b_vp); 321 } 322 323 /* 324 * This bmap keeps the system from needing to do the bmap later, 325 * perhaps when the system is attempting to do a sync. Since it 326 * is likely that the indirect block -- or whatever other datastructure 327 * that the filesystem needs is still in memory now, it is a good 328 * thing to do this. Note also, that if the pageout daemon is 329 * requesting a sync -- there might not be enough memory to do 330 * the bmap then... So, this is important to do. 331 */ 332 if( bp->b_lblkno == bp->b_blkno) { 333 VOP_BMAP(bp->b_vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL); 334 } 335 336 /* 337 * Set the *dirty* buffer range based upon the VM system dirty pages. 338 */ 339 vfs_setdirty(bp); 340 341 /* 342 * We need to do this here to satisfy the vnode_pager and the 343 * pageout daemon, so that it thinks that the pages have been 344 * "cleaned". Note that since the pages are in a delayed write 345 * buffer -- the VFS layer "will" see that the pages get written 346 * out on the next sync, or perhaps the cluster will be completed. 347 */ 348 vfs_clean_pages(bp); 349 brelse(bp); 350 return; 351} 352 353/* 354 * Asynchronous write. 355 * Start output on a buffer, but do not wait for it to complete. 356 * The buffer is released when the output completes. 357 */ 358void 359bawrite(struct buf * bp) 360{ 361 bp->b_flags |= B_ASYNC; 362 (void) VOP_BWRITE(bp); 363} 364 365/* 366 * Release a buffer. 367 */ 368void 369brelse(struct buf * bp) 370{ 371 int s; 372 373 if (bp->b_flags & B_CLUSTER) { 374 relpbuf(bp); 375 return; 376 } 377 /* anyone need a "free" block? */ 378 s = splbio(); 379 380 if (needsbuffer) { 381 needsbuffer = 0; 382 wakeup(&needsbuffer); 383 } 384 385 /* anyone need this block? */ 386 if (bp->b_flags & B_WANTED) { 387 bp->b_flags &= ~(B_WANTED | B_AGE); 388 wakeup(bp); 389 } else if (bp->b_flags & B_VMIO) { 390 bp->b_flags &= ~B_WANTED; 391 wakeup(bp); 392 } 393 if (bp->b_flags & B_LOCKED) 394 bp->b_flags &= ~B_ERROR; 395 396 if ((bp->b_flags & (B_NOCACHE | B_INVAL | B_ERROR)) || 397 (bp->b_bufsize <= 0)) { 398 bp->b_flags |= B_INVAL; 399 bp->b_flags &= ~(B_DELWRI | B_CACHE); 400 if (((bp->b_flags & B_VMIO) == 0) && bp->b_vp) 401 brelvp(bp); 402 } 403 404 /* 405 * VMIO buffer rundown. It is not very necessary to keep a VMIO buffer 406 * constituted, so the B_INVAL flag is used to *invalidate* the buffer, 407 * but the VM object is kept around. The B_NOCACHE flag is used to 408 * invalidate the pages in the VM object. 409 */ 410 if (bp->b_flags & B_VMIO) { 411 vm_offset_t foff; 412 vm_object_t obj; 413 int i, resid; 414 vm_page_t m; 415 int iototal = bp->b_bufsize; 416 417 foff = 0; 418 obj = 0; 419 if (bp->b_npages) { 420 if (bp->b_vp && bp->b_vp->v_mount) { 421 foff = bp->b_vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; 422 } else { 423 /* 424 * vnode pointer has been ripped away -- 425 * probably file gone... 426 */ 427 foff = bp->b_pages[0]->offset; 428 } 429 } 430 for (i = 0; i < bp->b_npages; i++) { 431 m = bp->b_pages[i]; 432 if (m == bogus_page) { 433 m = vm_page_lookup(obj, foff); 434 if (!m) { 435 panic("brelse: page missing\n"); 436 } 437 bp->b_pages[i] = m; 438 pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages); 439 } 440 resid = (m->offset + PAGE_SIZE) - foff; 441 if (resid > iototal) 442 resid = iototal; 443 if (resid > 0) { 444 /* 445 * Don't invalidate the page if the local machine has already 446 * modified it. This is the lesser of two evils, and should 447 * be fixed. 448 */ 449 if (bp->b_flags & (B_NOCACHE | B_ERROR)) { 450 vm_page_test_dirty(m); 451 if (m->dirty == 0) { 452 vm_page_set_invalid(m, foff, resid); 453 if (m->valid == 0) 454 vm_page_protect(m, VM_PROT_NONE); 455 } 456 } 457 } 458 foff += resid; 459 iototal -= resid; 460 } 461 462 if (bp->b_flags & (B_INVAL | B_RELBUF)) { 463 for(i=0;i<bp->b_npages;i++) { 464 m = bp->b_pages[i]; 465 --m->bmapped; 466 if (m->bmapped == 0) { 467 if (m->flags & PG_WANTED) { 468 wakeup(m); 469 m->flags &= ~PG_WANTED; 470 } 471 vm_page_test_dirty(m); 472 if ((m->dirty & m->valid) == 0 && 473 (m->flags & PG_REFERENCED) == 0 && 474 !pmap_is_referenced(VM_PAGE_TO_PHYS(m))) { 475 vm_page_cache(m); 476 } else if ((m->flags & PG_ACTIVE) == 0) { 477 vm_page_activate(m); 478 m->act_count = 0; 479 } 480 } 481 } 482 bufspace -= bp->b_bufsize; 483 pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages); 484 bp->b_npages = 0; 485 bp->b_bufsize = 0; 486 bp->b_flags &= ~B_VMIO; 487 if (bp->b_vp) 488 brelvp(bp); 489 } 490 } 491 if (bp->b_qindex != QUEUE_NONE) 492 panic("brelse: free buffer onto another queue???"); 493 494 /* enqueue */ 495 /* buffers with no memory */ 496 if (bp->b_bufsize == 0) { 497 bp->b_qindex = QUEUE_EMPTY; 498 TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist); 499 LIST_REMOVE(bp, b_hash); 500 LIST_INSERT_HEAD(&invalhash, bp, b_hash); 501 bp->b_dev = NODEV; 502 /* buffers with junk contents */ 503 } else if (bp->b_flags & (B_ERROR | B_INVAL | B_NOCACHE | B_RELBUF)) { 504 bp->b_qindex = QUEUE_AGE; 505 TAILQ_INSERT_HEAD(&bufqueues[QUEUE_AGE], bp, b_freelist); 506 LIST_REMOVE(bp, b_hash); 507 LIST_INSERT_HEAD(&invalhash, bp, b_hash); 508 bp->b_dev = NODEV; 509 /* buffers that are locked */ 510 } else if (bp->b_flags & B_LOCKED) { 511 bp->b_qindex = QUEUE_LOCKED; 512 TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LOCKED], bp, b_freelist); 513 /* buffers with stale but valid contents */ 514 } else if (bp->b_flags & B_AGE) { 515 bp->b_qindex = QUEUE_AGE; 516 TAILQ_INSERT_TAIL(&bufqueues[QUEUE_AGE], bp, b_freelist); 517 /* buffers with valid and quite potentially reuseable contents */ 518 } else { 519 bp->b_qindex = QUEUE_LRU; 520 TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], bp, b_freelist); 521 } 522 523 /* unlock */ 524 bp->b_flags &= ~(B_WANTED | B_BUSY | B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF); 525 splx(s); 526} 527 528/* 529 * this routine implements clustered async writes for 530 * clearing out B_DELWRI buffers... This is much better 531 * than the old way of writing only one buffer at a time. 532 */ 533void 534vfs_bio_awrite(struct buf * bp) 535{ 536 int i; 537 daddr_t lblkno = bp->b_lblkno; 538 struct vnode *vp = bp->b_vp; 539 int s; 540 int ncl; 541 struct buf *bpa; 542 543 s = splbio(); 544 if (vp->v_mount && (vp->v_flag & VVMIO) && 545 (bp->b_flags & (B_CLUSTEROK | B_INVAL)) == B_CLUSTEROK) { 546 int size = vp->v_mount->mnt_stat.f_iosize; 547 int maxcl = MAXPHYS / size; 548 549 for (i = 1; i < maxcl; i++) { 550 if ((bpa = incore(vp, lblkno + i)) && 551 ((bpa->b_flags & (B_BUSY | B_DELWRI | B_CLUSTEROK | B_INVAL)) == 552 (B_DELWRI | B_CLUSTEROK)) && 553 (bpa->b_bufsize == size)) { 554 if ((bpa->b_blkno == bpa->b_lblkno) || 555 (bpa->b_blkno != bp->b_blkno + (i * size) / DEV_BSIZE)) 556 break; 557 } else { 558 break; 559 } 560 } 561 ncl = i; 562 /* 563 * this is a possible cluster write 564 */ 565 if (ncl != 1) { 566 bremfree(bp); 567 cluster_wbuild(vp, bp, size, lblkno, ncl, -1); 568 splx(s); 569 return; 570 } 571 } 572 /* 573 * default (old) behavior, writing out only one block 574 */ 575 bremfree(bp); 576 bp->b_flags |= B_BUSY | B_ASYNC; 577 (void) VOP_BWRITE(bp); 578 splx(s); 579} 580 581 582/* 583 * Find a buffer header which is available for use. 584 */ 585static struct buf * 586getnewbuf(int slpflag, int slptimeo, int doingvmio) 587{ 588 struct buf *bp; 589 int s; 590 int firstbp = 1; 591 592 s = splbio(); 593start: 594 if (bufspace >= maxbufspace) 595 goto trytofreespace; 596 597 /* can we constitute a new buffer? */ 598 if ((bp = bufqueues[QUEUE_EMPTY].tqh_first)) { 599 if (bp->b_qindex != QUEUE_EMPTY) 600 panic("getnewbuf: inconsistent EMPTY queue"); 601 bremfree(bp); 602 goto fillbuf; 603 } 604trytofreespace: 605 /* 606 * We keep the file I/O from hogging metadata I/O 607 * This is desirable because file data is cached in the 608 * VM/Buffer cache even if a buffer is freed. 609 */ 610 if ((bp = bufqueues[QUEUE_AGE].tqh_first)) { 611 if (bp->b_qindex != QUEUE_AGE) 612 panic("getnewbuf: inconsistent AGE queue"); 613 } else if ((bp = bufqueues[QUEUE_LRU].tqh_first)) { 614 if (bp->b_qindex != QUEUE_LRU) 615 panic("getnewbuf: inconsistent LRU queue"); 616 } 617 if (!bp) { 618 /* wait for a free buffer of any kind */ 619 needsbuffer = 1; 620 tsleep(&needsbuffer, PRIBIO | slpflag, "newbuf", slptimeo); 621 splx(s); 622 return (0); 623 } 624 625 /* if we are a delayed write, convert to an async write */ 626 if ((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) { 627 vfs_bio_awrite(bp); 628 if (!slpflag && !slptimeo) { 629 splx(s); 630 return (0); 631 } 632 goto start; 633 } 634 635 if (bp->b_flags & B_WANTED) { 636 bp->b_flags &= ~B_WANTED; 637 wakeup(bp); 638 } 639 bremfree(bp); 640 641 if (bp->b_flags & B_VMIO) { 642 bp->b_flags |= B_RELBUF | B_BUSY | B_DONE; 643 brelse(bp); 644 bremfree(bp); 645 } 646 647 if (bp->b_vp) 648 brelvp(bp); 649 650 /* we are not free, nor do we contain interesting data */ 651 if (bp->b_rcred != NOCRED) 652 crfree(bp->b_rcred); 653 if (bp->b_wcred != NOCRED) 654 crfree(bp->b_wcred); 655fillbuf: 656 bp->b_flags |= B_BUSY; 657 LIST_REMOVE(bp, b_hash); 658 LIST_INSERT_HEAD(&invalhash, bp, b_hash); 659 splx(s); 660 if (bp->b_bufsize) { 661 allocbuf(bp, 0); 662 } 663 bp->b_flags = B_BUSY; 664 bp->b_dev = NODEV; 665 bp->b_vp = NULL; 666 bp->b_blkno = bp->b_lblkno = 0; 667 bp->b_iodone = 0; 668 bp->b_error = 0; 669 bp->b_resid = 0; 670 bp->b_bcount = 0; 671 bp->b_npages = 0; 672 bp->b_wcred = bp->b_rcred = NOCRED; 673 bp->b_data = buffers_kva + (bp - buf) * MAXBSIZE; 674 bp->b_dirtyoff = bp->b_dirtyend = 0; 675 bp->b_validoff = bp->b_validend = 0; 676 if (bufspace >= maxbufspace) { 677 s = splbio(); 678 bp->b_flags |= B_INVAL; 679 brelse(bp); 680 goto trytofreespace; 681 } 682 return (bp); 683} 684 685/* 686 * Check to see if a block is currently memory resident. 687 */ 688struct buf * 689incore(struct vnode * vp, daddr_t blkno) 690{ 691 struct buf *bp; 692 struct bufhashhdr *bh; 693 694 int s = splbio(); 695 696 bh = BUFHASH(vp, blkno); 697 bp = bh->lh_first; 698 699 /* Search hash chain */ 700 while (bp != NULL) { 701 /* hit */ 702 if (bp->b_vp == vp && bp->b_lblkno == blkno && 703 (bp->b_flags & B_INVAL) == 0) { 704 splx(s); 705 return (bp); 706 } 707 bp = bp->b_hash.le_next; 708 } 709 splx(s); 710 711 return (NULL); 712} 713 714/* 715 * Returns true if no I/O is needed to access the 716 * associated VM object. This is like incore except 717 * it also hunts around in the VM system for the data. 718 */ 719 720int 721inmem(struct vnode * vp, daddr_t blkno) 722{ 723 vm_object_t obj; 724 vm_offset_t off, toff, tinc; 725 vm_page_t m; 726 727 if (incore(vp, blkno)) 728 return 1; 729 if (vp->v_mount == NULL) 730 return 0; 731 if ((vp->v_object == NULL) || (vp->v_flag & VVMIO) == 0) 732 return 0; 733 734 obj = vp->v_object; 735 tinc = PAGE_SIZE; 736 if (tinc > vp->v_mount->mnt_stat.f_iosize) 737 tinc = vp->v_mount->mnt_stat.f_iosize; 738 off = blkno * vp->v_mount->mnt_stat.f_iosize; 739 740 for (toff = 0; toff < vp->v_mount->mnt_stat.f_iosize; toff += tinc) { 741 int mask; 742 743 m = vm_page_lookup(obj, trunc_page(toff + off)); 744 if (!m) 745 return 0; 746 if (vm_page_is_valid(m, toff + off, tinc) == 0) 747 return 0; 748 } 749 return 1; 750} 751 752/* 753 * now we set the dirty range for the buffer -- 754 * for NFS -- if the file is mapped and pages have 755 * been written to, let it know. We want the 756 * entire range of the buffer to be marked dirty if 757 * any of the pages have been written to for consistancy 758 * with the b_validoff, b_validend set in the nfs write 759 * code, and used by the nfs read code. 760 */ 761static void 762vfs_setdirty(struct buf *bp) { 763 int i; 764 vm_object_t object; 765 vm_offset_t boffset, offset; 766 /* 767 * We qualify the scan for modified pages on whether the 768 * object has been flushed yet. The OBJ_WRITEABLE flag 769 * is not cleared simply by protecting pages off. 770 */ 771 if ((bp->b_flags & B_VMIO) && 772 ((object = bp->b_pages[0]->object)->flags & OBJ_WRITEABLE)) { 773 /* 774 * test the pages to see if they have been modified directly 775 * by users through the VM system. 776 */ 777 for (i = 0; i < bp->b_npages; i++) 778 vm_page_test_dirty(bp->b_pages[i]); 779 780 /* 781 * scan forwards for the first page modified 782 */ 783 for (i = 0; i < bp->b_npages; i++) { 784 if (bp->b_pages[i]->dirty) { 785 break; 786 } 787 } 788 boffset = i * PAGE_SIZE; 789 if (boffset < bp->b_dirtyoff) { 790 bp->b_dirtyoff = boffset; 791 } 792 793 /* 794 * scan backwards for the last page modified 795 */ 796 for (i = bp->b_npages - 1; i >= 0; --i) { 797 if (bp->b_pages[i]->dirty) { 798 break; 799 } 800 } 801 boffset = (i + 1) * PAGE_SIZE; 802 offset = boffset + bp->b_pages[0]->offset; 803 if (offset >= object->size) { 804 boffset = object->size - bp->b_pages[0]->offset; 805 } 806 if (bp->b_dirtyend < boffset) { 807 bp->b_dirtyend = boffset; 808 } 809 } 810} 811 812/* 813 * Get a block given a specified block and offset into a file/device. 814 */ 815struct buf * 816getblk(struct vnode * vp, daddr_t blkno, int size, int slpflag, int slptimeo) 817{ 818 struct buf *bp; 819 int s; 820 struct bufhashhdr *bh; 821 vm_offset_t off; 822 int nleft; 823 824 s = splbio(); 825loop: 826 if (bp = incore(vp, blkno)) { 827 if (bp->b_flags & B_BUSY) { 828 bp->b_flags |= B_WANTED; 829 if (!tsleep(bp, PRIBIO | slpflag, "getblk", slptimeo)) 830 goto loop; 831 832 splx(s); 833 return (struct buf *) NULL; 834 } 835 bp->b_flags |= B_BUSY | B_CACHE; 836 bremfree(bp); 837 /* 838 * check for size inconsistancies 839 */ 840 if (bp->b_bcount != size) { 841 allocbuf(bp, size); 842 } 843 splx(s); 844 return (bp); 845 } else { 846 vm_object_t obj; 847 int doingvmio; 848 849 if ((obj = vp->v_object) && (vp->v_flag & VVMIO)) { 850 doingvmio = 1; 851 } else { 852 doingvmio = 0; 853 } 854 if ((bp = getnewbuf(slpflag, slptimeo, doingvmio)) == 0) { 855 if (slpflag || slptimeo) 856 return NULL; 857 goto loop; 858 } 859 860 /* 861 * This code is used to make sure that a buffer is not 862 * created while the getnewbuf routine is blocked. 863 * Normally the vnode is locked so this isn't a problem. 864 * VBLK type I/O requests, however, don't lock the vnode. 865 */ 866 if (!VOP_ISLOCKED(vp) && incore(vp, blkno)) { 867 bp->b_flags |= B_INVAL; 868 brelse(bp); 869 goto loop; 870 } 871 872 /* 873 * Insert the buffer into the hash, so that it can 874 * be found by incore. 875 */ 876 bp->b_blkno = bp->b_lblkno = blkno; 877 bgetvp(vp, bp); 878 LIST_REMOVE(bp, b_hash); 879 bh = BUFHASH(vp, blkno); 880 LIST_INSERT_HEAD(bh, bp, b_hash); 881 882 if (doingvmio) { 883 bp->b_flags |= (B_VMIO | B_CACHE); 884#if defined(VFS_BIO_DEBUG) 885 if (vp->v_type != VREG) 886 printf("getblk: vmioing file type %d???\n", vp->v_type); 887#endif 888 } else { 889 bp->b_flags &= ~B_VMIO; 890 } 891 splx(s); 892 893 allocbuf(bp, size); 894 return (bp); 895 } 896} 897 898/* 899 * Get an empty, disassociated buffer of given size. 900 */ 901struct buf * 902geteblk(int size) 903{ 904 struct buf *bp; 905 906 while ((bp = getnewbuf(0, 0, 0)) == 0); 907 allocbuf(bp, size); 908 bp->b_flags |= B_INVAL; 909 return (bp); 910} 911 912/* 913 * This code constitutes the buffer memory from either anonymous system 914 * memory (in the case of non-VMIO operations) or from an associated 915 * VM object (in the case of VMIO operations). 916 * 917 * Note that this code is tricky, and has many complications to resolve 918 * deadlock or inconsistant data situations. Tread lightly!!! 919 * 920 * Modify the length of a buffer's underlying buffer storage without 921 * destroying information (unless, of course the buffer is shrinking). 922 */ 923int 924allocbuf(struct buf * bp, int size) 925{ 926 927 int s; 928 int newbsize; 929 int i; 930 931 if (!(bp->b_flags & B_BUSY)) 932 panic("allocbuf: buffer not busy"); 933 934 if ((bp->b_flags & B_VMIO) == 0) { 935 /* 936 * Just get anonymous memory from the kernel 937 */ 938 newbsize = round_page(size); 939 940 if (newbsize < bp->b_bufsize) { 941 vm_hold_free_pages( 942 bp, 943 (vm_offset_t) bp->b_data + newbsize, 944 (vm_offset_t) bp->b_data + bp->b_bufsize); 945 } else if (newbsize > bp->b_bufsize) { 946 vm_hold_load_pages( 947 bp, 948 (vm_offset_t) bp->b_data + bp->b_bufsize, 949 (vm_offset_t) bp->b_data + newbsize); 950 } 951 } else { 952 vm_page_t m; 953 int desiredpages; 954 955 newbsize = ((size + DEV_BSIZE - 1) / DEV_BSIZE) * DEV_BSIZE; 956 desiredpages = round_page(newbsize) / PAGE_SIZE; 957 958 if (newbsize < bp->b_bufsize) { 959 if (desiredpages < bp->b_npages) { 960 pmap_qremove((vm_offset_t) trunc_page(bp->b_data) + 961 desiredpages * PAGE_SIZE, (bp->b_npages - desiredpages)); 962 for (i = desiredpages; i < bp->b_npages; i++) { 963 m = bp->b_pages[i]; 964 s = splhigh(); 965 while ((m->flags & PG_BUSY) || (m->busy != 0)) { 966 m->flags |= PG_WANTED; 967 tsleep(m, PVM, "biodep", 0); 968 } 969 splx(s); 970 971 if (m->bmapped == 0) { 972 printf("allocbuf: bmapped is zero for page %d\n", i); 973 panic("allocbuf: error"); 974 } 975 --m->bmapped; 976 if (m->bmapped == 0) { 977 vm_page_protect(m, VM_PROT_NONE); 978 vm_page_free(m); 979 } 980 bp->b_pages[i] = NULL; 981 } 982 bp->b_npages = desiredpages; 983 } 984 } else if (newbsize > bp->b_bufsize) { 985 vm_object_t obj; 986 vm_offset_t tinc, off, toff, objoff; 987 int pageindex, curbpnpages; 988 struct vnode *vp; 989 int bsize; 990 991 vp = bp->b_vp; 992 bsize = vp->v_mount->mnt_stat.f_iosize; 993 994 if (bp->b_npages < desiredpages) { 995 obj = vp->v_object; 996 tinc = PAGE_SIZE; 997 if (tinc > bsize) 998 tinc = bsize; 999 off = bp->b_lblkno * bsize; 1000 doretry: 1001 curbpnpages = bp->b_npages; 1002 bp->b_flags |= B_CACHE; 1003 for (toff = 0; toff < newbsize; toff += tinc) { 1004 int mask; 1005 int bytesinpage; 1006 1007 pageindex = toff / PAGE_SIZE; 1008 objoff = trunc_page(toff + off); 1009 if (pageindex < curbpnpages) { 1010 int pb; 1011 1012 m = bp->b_pages[pageindex]; 1013 if (m->offset != objoff) 1014 panic("allocbuf: page changed offset??!!!?"); 1015 bytesinpage = tinc; 1016 if (tinc > (newbsize - toff)) 1017 bytesinpage = newbsize - toff; 1018 if (!vm_page_is_valid(m, toff + off, bytesinpage)) { 1019 bp->b_flags &= ~B_CACHE; 1020 } 1021 if ((m->flags & PG_ACTIVE) == 0) { 1022 vm_page_activate(m); 1023 m->act_count = 0; 1024 } 1025 continue; 1026 } 1027 m = vm_page_lookup(obj, objoff); 1028 if (!m) { 1029 m = vm_page_alloc(obj, objoff, VM_ALLOC_NORMAL); 1030 if (!m) { 1031 int j; 1032 1033 for (j = bp->b_npages; j < pageindex; j++) { 1034 PAGE_WAKEUP(bp->b_pages[j]); 1035 } 1036 VM_WAIT; 1037 goto doretry; 1038 } 1039 vm_page_activate(m); 1040 m->act_count = 0; 1041 m->valid = 0; 1042 bp->b_flags &= ~B_CACHE; 1043 } else if (m->flags & PG_BUSY) { 1044 int j; 1045 1046 for (j = bp->b_npages; j < pageindex; j++) { 1047 PAGE_WAKEUP(bp->b_pages[j]); 1048 } 1049 1050 s = splbio(); 1051 m->flags |= PG_WANTED; 1052 tsleep(m, PRIBIO, "pgtblk", 0); 1053 splx(s); 1054 1055 goto doretry; 1056 } else { 1057 int pb; 1058 if ((curproc != pageproc) && 1059 (m->flags & PG_CACHE) && 1060 (cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min) { 1061 pagedaemon_wakeup(); 1062 } 1063 bytesinpage = tinc; 1064 if (tinc > (newbsize - toff)) 1065 bytesinpage = newbsize - toff; 1066 if (!vm_page_is_valid(m, toff + off, bytesinpage)) { 1067 bp->b_flags &= ~B_CACHE; 1068 } 1069 if ((m->flags & PG_ACTIVE) == 0) { 1070 vm_page_activate(m); 1071 m->act_count = 0; 1072 } 1073 m->flags |= PG_BUSY; 1074 } 1075 bp->b_pages[pageindex] = m; 1076 curbpnpages = pageindex + 1; 1077 } 1078 for (i = bp->b_npages; i < curbpnpages; i++) { 1079 m = bp->b_pages[i]; 1080 m->bmapped++; 1081 PAGE_WAKEUP(m); 1082 } 1083 bp->b_npages = curbpnpages; 1084 bp->b_data = buffers_kva + (bp - buf) * MAXBSIZE; 1085 pmap_qenter((vm_offset_t) bp->b_data, bp->b_pages, bp->b_npages); 1086 bp->b_data += off % PAGE_SIZE; 1087 } 1088 } 1089 } 1090 bufspace += (newbsize - bp->b_bufsize); 1091 bp->b_bufsize = newbsize; 1092 bp->b_bcount = size; 1093 return 1; 1094} 1095 1096/* 1097 * Wait for buffer I/O completion, returning error status. 1098 */ 1099int 1100biowait(register struct buf * bp) 1101{ 1102 int s; 1103 1104 s = splbio(); 1105 while ((bp->b_flags & B_DONE) == 0) 1106 tsleep(bp, PRIBIO, "biowait", 0); 1107 splx(s); 1108 if (bp->b_flags & B_EINTR) { 1109 bp->b_flags &= ~B_EINTR; 1110 return (EINTR); 1111 } 1112 if (bp->b_flags & B_ERROR) { 1113 return (bp->b_error ? bp->b_error : EIO); 1114 } else { 1115 return (0); 1116 } 1117} 1118 1119/* 1120 * Finish I/O on a buffer, calling an optional function. 1121 * This is usually called from interrupt level, so process blocking 1122 * is not *a good idea*. 1123 */ 1124void 1125biodone(register struct buf * bp) 1126{ 1127 int s; 1128 1129 s = splbio(); 1130 if (!(bp->b_flags & B_BUSY)) 1131 panic("biodone: buffer not busy"); 1132 1133 if (bp->b_flags & B_DONE) { 1134 splx(s); 1135 printf("biodone: buffer already done\n"); 1136 return; 1137 } 1138 bp->b_flags |= B_DONE; 1139 1140 if ((bp->b_flags & B_READ) == 0) { 1141 struct vnode *vp = bp->b_vp; 1142 vwakeup(bp); 1143 } 1144#ifdef BOUNCE_BUFFERS 1145 if (bp->b_flags & B_BOUNCE) 1146 vm_bounce_free(bp); 1147#endif 1148 1149 /* call optional completion function if requested */ 1150 if (bp->b_flags & B_CALL) { 1151 bp->b_flags &= ~B_CALL; 1152 (*bp->b_iodone) (bp); 1153 splx(s); 1154 return; 1155 } 1156 if (bp->b_flags & B_VMIO) { 1157 int i, resid; 1158 vm_offset_t foff; 1159 vm_page_t m; 1160 vm_object_t obj; 1161 int iosize; 1162 struct vnode *vp = bp->b_vp; 1163 1164 foff = vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; 1165 obj = vp->v_object; 1166 if (!obj) { 1167 panic("biodone: no object"); 1168 } 1169#if defined(VFS_BIO_DEBUG) 1170 if (obj->paging_in_progress < bp->b_npages) { 1171 printf("biodone: paging in progress(%d) < bp->b_npages(%d)\n", 1172 obj->paging_in_progress, bp->b_npages); 1173 } 1174#endif 1175 iosize = bp->b_bufsize; 1176 for (i = 0; i < bp->b_npages; i++) { 1177 int bogusflag = 0; 1178 m = bp->b_pages[i]; 1179 if (m == bogus_page) { 1180 bogusflag = 1; 1181 m = vm_page_lookup(obj, foff); 1182 if (!m) { 1183#if defined(VFS_BIO_DEBUG) 1184 printf("biodone: page disappeared\n"); 1185#endif 1186 --obj->paging_in_progress; 1187 continue; 1188 } 1189 bp->b_pages[i] = m; 1190 pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages); 1191 } 1192#if defined(VFS_BIO_DEBUG) 1193 if (trunc_page(foff) != m->offset) { 1194 printf("biodone: foff(%d)/m->offset(%d) mismatch\n", foff, m->offset); 1195 } 1196#endif 1197 resid = (m->offset + PAGE_SIZE) - foff; 1198 if (resid > iosize) 1199 resid = iosize; 1200 /* 1201 * In the write case, the valid and clean bits are 1202 * already changed correctly, so we only need to do this 1203 * here in the read case. 1204 */ 1205 if ((bp->b_flags & B_READ) && !bogusflag && resid > 0) { 1206 vm_page_set_valid(m, foff & (PAGE_SIZE-1), resid); 1207 vm_page_set_clean(m, foff & (PAGE_SIZE-1), resid); 1208 } 1209 1210 /* 1211 * when debugging new filesystems or buffer I/O methods, this 1212 * is the most common error that pops up. if you see this, you 1213 * have not set the page busy flag correctly!!! 1214 */ 1215 if (m->busy == 0) { 1216 printf("biodone: page busy < 0, " 1217 "off: %ld, foff: %ld, " 1218 "resid: %d, index: %d\n", 1219 m->offset, foff, resid, i); 1220 printf(" iosize: %ld, lblkno: %ld, flags: 0x%x, npages: %d\n", 1221 bp->b_vp->v_mount->mnt_stat.f_iosize, 1222 bp->b_lblkno, bp->b_flags, bp->b_npages); 1223 printf(" valid: 0x%x, dirty: 0x%x, mapped: %d\n", 1224 m->valid, m->dirty, m->bmapped); 1225 panic("biodone: page busy < 0\n"); 1226 } 1227 --m->busy; 1228 if ((m->busy == 0) && (m->flags & PG_WANTED)) { 1229 m->flags &= ~PG_WANTED; 1230 wakeup(m); 1231 } 1232 --obj->paging_in_progress; 1233 foff += resid; 1234 iosize -= resid; 1235 } 1236 if (obj && obj->paging_in_progress == 0 && 1237 (obj->flags & OBJ_PIPWNT)) { 1238 obj->flags &= ~OBJ_PIPWNT; 1239 wakeup(obj); 1240 } 1241 } 1242 /* 1243 * For asynchronous completions, release the buffer now. The brelse 1244 * checks for B_WANTED and will do the wakeup there if necessary - so 1245 * no need to do a wakeup here in the async case. 1246 */ 1247 1248 if (bp->b_flags & B_ASYNC) { 1249 brelse(bp); 1250 } else { 1251 bp->b_flags &= ~B_WANTED; 1252 wakeup(bp); 1253 } 1254 splx(s); 1255} 1256 1257int 1258count_lock_queue() 1259{ 1260 int count; 1261 struct buf *bp; 1262 1263 count = 0; 1264 for (bp = bufqueues[QUEUE_LOCKED].tqh_first; 1265 bp != NULL; 1266 bp = bp->b_freelist.tqe_next) 1267 count++; 1268 return (count); 1269} 1270 1271int vfs_update_interval = 30; 1272 1273void 1274vfs_update() 1275{ 1276 (void) spl0(); 1277 while (1) { 1278 tsleep(&vfs_update_wakeup, PRIBIO, "update", 1279 hz * vfs_update_interval); 1280 vfs_update_wakeup = 0; 1281 sync(curproc, NULL, NULL); 1282 } 1283} 1284 1285/* 1286 * This routine is called in lieu of iodone in the case of 1287 * incomplete I/O. This keeps the busy status for pages 1288 * consistant. 1289 */ 1290void 1291vfs_unbusy_pages(struct buf * bp) 1292{ 1293 int i; 1294 1295 if (bp->b_flags & B_VMIO) { 1296 struct vnode *vp = bp->b_vp; 1297 vm_object_t obj = vp->v_object; 1298 vm_offset_t foff; 1299 1300 foff = trunc_page(vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno); 1301 1302 for (i = 0; i < bp->b_npages; i++) { 1303 vm_page_t m = bp->b_pages[i]; 1304 1305 if (m == bogus_page) { 1306 m = vm_page_lookup(obj, foff + i * PAGE_SIZE); 1307 if (!m) { 1308 panic("vfs_unbusy_pages: page missing\n"); 1309 } 1310 bp->b_pages[i] = m; 1311 pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages); 1312 } 1313 --obj->paging_in_progress; 1314 --m->busy; 1315 if ((m->busy == 0) && (m->flags & PG_WANTED)) { 1316 m->flags &= ~PG_WANTED; 1317 wakeup(m); 1318 } 1319 } 1320 if (obj->paging_in_progress == 0 && 1321 (obj->flags & OBJ_PIPWNT)) { 1322 obj->flags &= ~OBJ_PIPWNT; 1323 wakeup(obj); 1324 } 1325 } 1326} 1327 1328/* 1329 * This routine is called before a device strategy routine. 1330 * It is used to tell the VM system that paging I/O is in 1331 * progress, and treat the pages associated with the buffer 1332 * almost as being PG_BUSY. Also the object paging_in_progress 1333 * flag is handled to make sure that the object doesn't become 1334 * inconsistant. 1335 */ 1336void 1337vfs_busy_pages(struct buf * bp, int clear_modify) 1338{ 1339 int i; 1340 1341 if (bp->b_flags & B_VMIO) { 1342 vm_object_t obj = bp->b_vp->v_object; 1343 vm_offset_t foff = bp->b_vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; 1344 int iocount = bp->b_bufsize; 1345 1346 vfs_setdirty(bp); 1347 for (i = 0; i < bp->b_npages; i++) { 1348 vm_page_t m = bp->b_pages[i]; 1349 int resid = (m->offset + PAGE_SIZE) - foff; 1350 1351 if (resid > iocount) 1352 resid = iocount; 1353 obj->paging_in_progress++; 1354 m->busy++; 1355 if (clear_modify) { 1356 vm_page_protect(m, VM_PROT_READ); 1357 vm_page_set_valid(m, 1358 foff & (PAGE_SIZE-1), resid); 1359 vm_page_set_clean(m, 1360 foff & (PAGE_SIZE-1), resid); 1361 } else if (bp->b_bcount >= PAGE_SIZE) { 1362 if (m->valid && (bp->b_flags & B_CACHE) == 0) { 1363 bp->b_pages[i] = bogus_page; 1364 pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages); 1365 } 1366 } 1367 foff += resid; 1368 iocount -= resid; 1369 } 1370 } 1371} 1372 1373/* 1374 * Tell the VM system that the pages associated with this buffer 1375 * are clean. This is used for delayed writes where the data is 1376 * going to go to disk eventually without additional VM intevention. 1377 */ 1378void 1379vfs_clean_pages(struct buf * bp) 1380{ 1381 int i; 1382 1383 if (bp->b_flags & B_VMIO) { 1384 vm_offset_t foff = 1385 bp->b_vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; 1386 int iocount = bp->b_bufsize; 1387 1388 for (i = 0; i < bp->b_npages; i++) { 1389 vm_page_t m = bp->b_pages[i]; 1390 int resid = (m->offset + PAGE_SIZE) - foff; 1391 1392 if (resid > iocount) 1393 resid = iocount; 1394 if (resid > 0) { 1395 vm_page_set_valid(m, 1396 foff & (PAGE_SIZE-1), resid); 1397 vm_page_set_clean(m, 1398 foff & (PAGE_SIZE-1), resid); 1399 } 1400 foff += resid; 1401 iocount -= resid; 1402 } 1403 } 1404} 1405 1406void 1407vfs_bio_clrbuf(struct buf *bp) { 1408 int i; 1409 if( bp->b_flags & B_VMIO) { 1410 if( (bp->b_npages == 1) && (bp->b_bufsize < PAGE_SIZE)) { 1411 int j; 1412 if( bp->b_pages[0]->valid != VM_PAGE_BITS_ALL) { 1413 for(j=0; j < bp->b_bufsize / DEV_BSIZE;j++) { 1414 bzero(bp->b_data + j * DEV_BSIZE, DEV_BSIZE); 1415 } 1416 } 1417 bp->b_resid = 0; 1418 return; 1419 } 1420 for(i=0;i<bp->b_npages;i++) { 1421 if( bp->b_pages[i]->valid == VM_PAGE_BITS_ALL) 1422 continue; 1423 if( bp->b_pages[i]->valid == 0) { 1424 bzero(bp->b_data + i * PAGE_SIZE, PAGE_SIZE); 1425 } else { 1426 int j; 1427 for(j=0;j<PAGE_SIZE/DEV_BSIZE;j++) { 1428 if( (bp->b_pages[i]->valid & (1<<j)) == 0) 1429 bzero(bp->b_data + i * PAGE_SIZE + j * DEV_BSIZE, DEV_BSIZE); 1430 } 1431 } 1432 bp->b_pages[i]->valid = VM_PAGE_BITS_ALL; 1433 } 1434 bp->b_resid = 0; 1435 } else { 1436 clrbuf(bp); 1437 } 1438} 1439 1440/* 1441 * vm_hold_load_pages and vm_hold_unload pages get pages into 1442 * a buffers address space. The pages are anonymous and are 1443 * not associated with a file object. 1444 */ 1445void 1446vm_hold_load_pages(struct buf * bp, vm_offset_t froma, vm_offset_t toa) 1447{ 1448 vm_offset_t pg; 1449 vm_page_t p; 1450 vm_offset_t from = round_page(froma); 1451 vm_offset_t to = round_page(toa); 1452 1453 for (pg = from; pg < to; pg += PAGE_SIZE) { 1454 1455tryagain: 1456 1457 p = vm_page_alloc(kernel_object, pg - VM_MIN_KERNEL_ADDRESS, 1458 VM_ALLOC_NORMAL); 1459 if (!p) { 1460 VM_WAIT; 1461 goto tryagain; 1462 } 1463 vm_page_wire(p); 1464 pmap_kenter(pg, VM_PAGE_TO_PHYS(p)); 1465 bp->b_pages[((caddr_t) pg - bp->b_data) / PAGE_SIZE] = p; 1466 PAGE_WAKEUP(p); 1467 bp->b_npages++; 1468 } 1469} 1470 1471void 1472vm_hold_free_pages(struct buf * bp, vm_offset_t froma, vm_offset_t toa) 1473{ 1474 vm_offset_t pg; 1475 vm_page_t p; 1476 vm_offset_t from = round_page(froma); 1477 vm_offset_t to = round_page(toa); 1478 1479 for (pg = from; pg < to; pg += PAGE_SIZE) { 1480 p = bp->b_pages[((caddr_t) pg - bp->b_data) / PAGE_SIZE]; 1481 bp->b_pages[((caddr_t) pg - bp->b_data) / PAGE_SIZE] = 0; 1482 pmap_kremove(pg); 1483 vm_page_free(p); 1484 --bp->b_npages; 1485 } 1486} 1487