vfs_bio.c revision 10358
1/* 2 * Copyright (c) 1994 John S. Dyson 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice immediately at the beginning of the file, without modification, 10 * this list of conditions, and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. Absolutely no warranty of function or purpose is made by the author 15 * John S. Dyson. 16 * 4. This work was done expressly for inclusion into FreeBSD. Other use 17 * is allowed if this notation is included. 18 * 5. Modifications may be freely made to this file if the above conditions 19 * are met. 20 * 21 * $Id: vfs_bio.c,v 1.59 1995/08/24 13:59:14 davidg Exp $ 22 */ 23 24/* 25 * this file contains a new buffer I/O scheme implementing a coherent 26 * VM object and buffer cache scheme. Pains have been taken to make 27 * sure that the performance degradation associated with schemes such 28 * as this is not realized. 29 * 30 * Author: John S. Dyson 31 * Significant help during the development and debugging phases 32 * had been provided by David Greenman, also of the FreeBSD core team. 33 */ 34 35#define VMIO 36#include <sys/param.h> 37#include <sys/systm.h> 38#include <sys/kernel.h> 39#include <sys/proc.h> 40#include <sys/vnode.h> 41#include <vm/vm.h> 42#include <vm/vm_kern.h> 43#include <vm/vm_pageout.h> 44#include <vm/vm_page.h> 45#include <vm/vm_object.h> 46#include <sys/buf.h> 47#include <sys/mount.h> 48#include <sys/malloc.h> 49#include <sys/resourcevar.h> 50#include <sys/proc.h> 51 52#include <miscfs/specfs/specdev.h> 53 54/* 55 * System initialization 56 */ 57 58static void vfs_update __P((void)); 59struct proc *updateproc; 60 61static struct kproc_desc up_kp = { 62 "update", 63 vfs_update, 64 &updateproc 65}; 66SYSINIT_KT(update, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, (caddr_t)&up_kp) 67 68 69struct buf *buf; /* buffer header pool */ 70struct swqueue bswlist; 71 72void vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to); 73void vm_hold_load_pages(struct buf * bp, vm_offset_t from, vm_offset_t to); 74void vfs_clean_pages(struct buf * bp); 75static void vfs_setdirty(struct buf *bp); 76 77int needsbuffer; 78 79/* 80 * Internal update daemon, process 3 81 * The variable vfs_update_wakeup allows for internal syncs. 82 */ 83int vfs_update_wakeup; 84 85 86/* 87 * buffers base kva 88 */ 89caddr_t buffers_kva; 90 91/* 92 * bogus page -- for I/O to/from partially complete buffers 93 * this is a temporary solution to the problem, but it is not 94 * really that bad. it would be better to split the buffer 95 * for input in the case of buffers partially already in memory, 96 * but the code is intricate enough already. 97 */ 98vm_page_t bogus_page; 99vm_offset_t bogus_offset; 100 101int bufspace, maxbufspace; 102 103/* 104 * advisory minimum for size of LRU queue or VMIO queue 105 */ 106int minbuf; 107 108struct bufhashhdr bufhashtbl[BUFHSZ], invalhash; 109struct bqueues bufqueues[BUFFER_QUEUES]; 110 111/* 112 * Initialize buffer headers and related structures. 113 */ 114void 115bufinit() 116{ 117 struct buf *bp; 118 int i; 119 120 TAILQ_INIT(&bswlist); 121 LIST_INIT(&invalhash); 122 123 /* first, make a null hash table */ 124 for (i = 0; i < BUFHSZ; i++) 125 LIST_INIT(&bufhashtbl[i]); 126 127 /* next, make a null set of free lists */ 128 for (i = 0; i < BUFFER_QUEUES; i++) 129 TAILQ_INIT(&bufqueues[i]); 130 131 buffers_kva = (caddr_t) kmem_alloc_pageable(buffer_map, MAXBSIZE * nbuf); 132 /* finally, initialize each buffer header and stick on empty q */ 133 for (i = 0; i < nbuf; i++) { 134 bp = &buf[i]; 135 bzero(bp, sizeof *bp); 136 bp->b_flags = B_INVAL; /* we're just an empty header */ 137 bp->b_dev = NODEV; 138 bp->b_rcred = NOCRED; 139 bp->b_wcred = NOCRED; 140 bp->b_qindex = QUEUE_EMPTY; 141 bp->b_vnbufs.le_next = NOLIST; 142 bp->b_data = buffers_kva + i * MAXBSIZE; 143 TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist); 144 LIST_INSERT_HEAD(&invalhash, bp, b_hash); 145 } 146/* 147 * maxbufspace is currently calculated to support all filesystem blocks 148 * to be 8K. If you happen to use a 16K filesystem, the size of the buffer 149 * cache is still the same as it would be for 8K filesystems. This 150 * keeps the size of the buffer cache "in check" for big block filesystems. 151 */ 152 minbuf = nbuf / 3; 153 maxbufspace = 2 * (nbuf + 8) * PAGE_SIZE; 154 155 bogus_offset = kmem_alloc_pageable(kernel_map, PAGE_SIZE); 156 bogus_page = vm_page_alloc(kernel_object, 157 bogus_offset - VM_MIN_KERNEL_ADDRESS, VM_ALLOC_NORMAL); 158 159} 160 161/* 162 * remove the buffer from the appropriate free list 163 */ 164void 165bremfree(struct buf * bp) 166{ 167 int s = splbio(); 168 169 if (bp->b_qindex != QUEUE_NONE) { 170 TAILQ_REMOVE(&bufqueues[bp->b_qindex], bp, b_freelist); 171 bp->b_qindex = QUEUE_NONE; 172 } else { 173 panic("bremfree: removing a buffer when not on a queue"); 174 } 175 splx(s); 176} 177 178/* 179 * Get a buffer with the specified data. Look in the cache first. 180 */ 181int 182bread(struct vnode * vp, daddr_t blkno, int size, struct ucred * cred, 183 struct buf ** bpp) 184{ 185 struct buf *bp; 186 187 bp = getblk(vp, blkno, size, 0, 0); 188 *bpp = bp; 189 190 /* if not found in cache, do some I/O */ 191 if ((bp->b_flags & B_CACHE) == 0) { 192 if (curproc != NULL) 193 curproc->p_stats->p_ru.ru_inblock++; 194 bp->b_flags |= B_READ; 195 bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL); 196 if (bp->b_rcred == NOCRED) { 197 if (cred != NOCRED) 198 crhold(cred); 199 bp->b_rcred = cred; 200 } 201 vfs_busy_pages(bp, 0); 202 VOP_STRATEGY(bp); 203 return (biowait(bp)); 204 } 205 return (0); 206} 207 208/* 209 * Operates like bread, but also starts asynchronous I/O on 210 * read-ahead blocks. 211 */ 212int 213breadn(struct vnode * vp, daddr_t blkno, int size, 214 daddr_t * rablkno, int *rabsize, 215 int cnt, struct ucred * cred, struct buf ** bpp) 216{ 217 struct buf *bp, *rabp; 218 int i; 219 int rv = 0, readwait = 0; 220 221 *bpp = bp = getblk(vp, blkno, size, 0, 0); 222 223 /* if not found in cache, do some I/O */ 224 if ((bp->b_flags & B_CACHE) == 0) { 225 if (curproc != NULL) 226 curproc->p_stats->p_ru.ru_inblock++; 227 bp->b_flags |= B_READ; 228 bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL); 229 if (bp->b_rcred == NOCRED) { 230 if (cred != NOCRED) 231 crhold(cred); 232 bp->b_rcred = cred; 233 } 234 vfs_busy_pages(bp, 0); 235 VOP_STRATEGY(bp); 236 ++readwait; 237 } 238 for (i = 0; i < cnt; i++, rablkno++, rabsize++) { 239 if (inmem(vp, *rablkno)) 240 continue; 241 rabp = getblk(vp, *rablkno, *rabsize, 0, 0); 242 243 if ((rabp->b_flags & B_CACHE) == 0) { 244 if (curproc != NULL) 245 curproc->p_stats->p_ru.ru_inblock++; 246 rabp->b_flags |= B_READ | B_ASYNC; 247 rabp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL); 248 if (rabp->b_rcred == NOCRED) { 249 if (cred != NOCRED) 250 crhold(cred); 251 rabp->b_rcred = cred; 252 } 253 vfs_busy_pages(rabp, 0); 254 VOP_STRATEGY(rabp); 255 } else { 256 brelse(rabp); 257 } 258 } 259 260 if (readwait) { 261 rv = biowait(bp); 262 } 263 return (rv); 264} 265 266/* 267 * Write, release buffer on completion. (Done by iodone 268 * if async.) 269 */ 270int 271bwrite(struct buf * bp) 272{ 273 int oldflags = bp->b_flags; 274 275 if (bp->b_flags & B_INVAL) { 276 brelse(bp); 277 return (0); 278 } 279 if (!(bp->b_flags & B_BUSY)) 280 panic("bwrite: buffer is not busy???"); 281 282 bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); 283 bp->b_flags |= B_WRITEINPROG; 284 285 if ((oldflags & (B_ASYNC|B_DELWRI)) == (B_ASYNC|B_DELWRI)) { 286 reassignbuf(bp, bp->b_vp); 287 } 288 289 bp->b_vp->v_numoutput++; 290 vfs_busy_pages(bp, 1); 291 if (curproc != NULL) 292 curproc->p_stats->p_ru.ru_oublock++; 293 VOP_STRATEGY(bp); 294 295 if ((oldflags & B_ASYNC) == 0) { 296 int rtval = biowait(bp); 297 298 if (oldflags & B_DELWRI) { 299 reassignbuf(bp, bp->b_vp); 300 } 301 brelse(bp); 302 return (rtval); 303 } 304 return (0); 305} 306 307int 308vn_bwrite(ap) 309 struct vop_bwrite_args *ap; 310{ 311 return (bwrite(ap->a_bp)); 312} 313 314/* 315 * Delayed write. (Buffer is marked dirty). 316 */ 317void 318bdwrite(struct buf * bp) 319{ 320 321 if ((bp->b_flags & B_BUSY) == 0) { 322 panic("bdwrite: buffer is not busy"); 323 } 324 if (bp->b_flags & B_INVAL) { 325 brelse(bp); 326 return; 327 } 328 if (bp->b_flags & B_TAPE) { 329 bawrite(bp); 330 return; 331 } 332 bp->b_flags &= ~(B_READ|B_RELBUF); 333 if ((bp->b_flags & B_DELWRI) == 0) { 334 bp->b_flags |= B_DONE | B_DELWRI; 335 reassignbuf(bp, bp->b_vp); 336 } 337 338 /* 339 * This bmap keeps the system from needing to do the bmap later, 340 * perhaps when the system is attempting to do a sync. Since it 341 * is likely that the indirect block -- or whatever other datastructure 342 * that the filesystem needs is still in memory now, it is a good 343 * thing to do this. Note also, that if the pageout daemon is 344 * requesting a sync -- there might not be enough memory to do 345 * the bmap then... So, this is important to do. 346 */ 347 if( bp->b_lblkno == bp->b_blkno) { 348 VOP_BMAP(bp->b_vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL); 349 } 350 351 /* 352 * Set the *dirty* buffer range based upon the VM system dirty pages. 353 */ 354 vfs_setdirty(bp); 355 356 /* 357 * We need to do this here to satisfy the vnode_pager and the 358 * pageout daemon, so that it thinks that the pages have been 359 * "cleaned". Note that since the pages are in a delayed write 360 * buffer -- the VFS layer "will" see that the pages get written 361 * out on the next sync, or perhaps the cluster will be completed. 362 */ 363 vfs_clean_pages(bp); 364 brelse(bp); 365 return; 366} 367 368/* 369 * Asynchronous write. 370 * Start output on a buffer, but do not wait for it to complete. 371 * The buffer is released when the output completes. 372 */ 373void 374bawrite(struct buf * bp) 375{ 376 bp->b_flags |= B_ASYNC; 377 (void) VOP_BWRITE(bp); 378} 379 380/* 381 * Release a buffer. 382 */ 383void 384brelse(struct buf * bp) 385{ 386 int s; 387 388 if (bp->b_flags & B_CLUSTER) { 389 relpbuf(bp); 390 return; 391 } 392 /* anyone need a "free" block? */ 393 s = splbio(); 394 395 if (needsbuffer) { 396 needsbuffer = 0; 397 wakeup(&needsbuffer); 398 } 399 400 /* anyone need this block? */ 401 if (bp->b_flags & B_WANTED) { 402 bp->b_flags &= ~(B_WANTED | B_AGE); 403 wakeup(bp); 404 } else if (bp->b_flags & B_VMIO) { 405 bp->b_flags &= ~B_WANTED; 406 wakeup(bp); 407 } 408 if (bp->b_flags & B_LOCKED) 409 bp->b_flags &= ~B_ERROR; 410 411 if ((bp->b_flags & (B_NOCACHE | B_INVAL | B_ERROR)) || 412 (bp->b_bufsize <= 0)) { 413 bp->b_flags |= B_INVAL; 414 bp->b_flags &= ~(B_DELWRI | B_CACHE); 415 if (((bp->b_flags & B_VMIO) == 0) && bp->b_vp) 416 brelvp(bp); 417 } 418 419 /* 420 * VMIO buffer rundown. It is not very necessary to keep a VMIO buffer 421 * constituted, so the B_INVAL flag is used to *invalidate* the buffer, 422 * but the VM object is kept around. The B_NOCACHE flag is used to 423 * invalidate the pages in the VM object. 424 */ 425 if (bp->b_flags & B_VMIO) { 426 vm_offset_t foff; 427 vm_object_t obj; 428 int i, resid; 429 vm_page_t m; 430 int iototal = bp->b_bufsize; 431 432 foff = 0; 433 obj = 0; 434 if (bp->b_npages) { 435 if (bp->b_vp && bp->b_vp->v_mount) { 436 foff = bp->b_vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; 437 } else { 438 /* 439 * vnode pointer has been ripped away -- 440 * probably file gone... 441 */ 442 foff = bp->b_pages[0]->offset; 443 } 444 } 445 for (i = 0; i < bp->b_npages; i++) { 446 m = bp->b_pages[i]; 447 if (m == bogus_page) { 448 m = vm_page_lookup(obj, foff); 449 if (!m) { 450 panic("brelse: page missing\n"); 451 } 452 bp->b_pages[i] = m; 453 pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages); 454 } 455 resid = (m->offset + PAGE_SIZE) - foff; 456 if (resid > iototal) 457 resid = iototal; 458 if (resid > 0) { 459 /* 460 * Don't invalidate the page if the local machine has already 461 * modified it. This is the lesser of two evils, and should 462 * be fixed. 463 */ 464 if (bp->b_flags & (B_NOCACHE | B_ERROR)) { 465 vm_page_test_dirty(m); 466 if (m->dirty == 0) { 467 vm_page_set_invalid(m, foff, resid); 468 if (m->valid == 0) 469 vm_page_protect(m, VM_PROT_NONE); 470 } 471 } 472 } 473 foff += resid; 474 iototal -= resid; 475 } 476 477 if (bp->b_flags & (B_INVAL | B_RELBUF)) { 478 for(i=0;i<bp->b_npages;i++) { 479 m = bp->b_pages[i]; 480 --m->bmapped; 481 if (m->bmapped == 0) { 482 if (m->flags & PG_WANTED) { 483 wakeup(m); 484 m->flags &= ~PG_WANTED; 485 } 486 vm_page_test_dirty(m); 487 if ((m->dirty & m->valid) == 0 && 488 (m->flags & PG_REFERENCED) == 0 && 489 !pmap_is_referenced(VM_PAGE_TO_PHYS(m))) { 490 vm_page_cache(m); 491 } else if ((m->flags & PG_ACTIVE) == 0) { 492 vm_page_activate(m); 493 m->act_count = 0; 494 } 495 } 496 } 497 bufspace -= bp->b_bufsize; 498 pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages); 499 bp->b_npages = 0; 500 bp->b_bufsize = 0; 501 bp->b_flags &= ~B_VMIO; 502 if (bp->b_vp) 503 brelvp(bp); 504 } 505 } 506 if (bp->b_qindex != QUEUE_NONE) 507 panic("brelse: free buffer onto another queue???"); 508 509 /* enqueue */ 510 /* buffers with no memory */ 511 if (bp->b_bufsize == 0) { 512 bp->b_qindex = QUEUE_EMPTY; 513 TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist); 514 LIST_REMOVE(bp, b_hash); 515 LIST_INSERT_HEAD(&invalhash, bp, b_hash); 516 bp->b_dev = NODEV; 517 /* buffers with junk contents */ 518 } else if (bp->b_flags & (B_ERROR | B_INVAL | B_NOCACHE | B_RELBUF)) { 519 bp->b_qindex = QUEUE_AGE; 520 TAILQ_INSERT_HEAD(&bufqueues[QUEUE_AGE], bp, b_freelist); 521 LIST_REMOVE(bp, b_hash); 522 LIST_INSERT_HEAD(&invalhash, bp, b_hash); 523 bp->b_dev = NODEV; 524 /* buffers that are locked */ 525 } else if (bp->b_flags & B_LOCKED) { 526 bp->b_qindex = QUEUE_LOCKED; 527 TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LOCKED], bp, b_freelist); 528 /* buffers with stale but valid contents */ 529 } else if (bp->b_flags & B_AGE) { 530 bp->b_qindex = QUEUE_AGE; 531 TAILQ_INSERT_TAIL(&bufqueues[QUEUE_AGE], bp, b_freelist); 532 /* buffers with valid and quite potentially reuseable contents */ 533 } else { 534 bp->b_qindex = QUEUE_LRU; 535 TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], bp, b_freelist); 536 } 537 538 /* unlock */ 539 bp->b_flags &= ~(B_WANTED | B_BUSY | B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF); 540 splx(s); 541} 542 543/* 544 * this routine implements clustered async writes for 545 * clearing out B_DELWRI buffers... This is much better 546 * than the old way of writing only one buffer at a time. 547 */ 548void 549vfs_bio_awrite(struct buf * bp) 550{ 551 int i; 552 daddr_t lblkno = bp->b_lblkno; 553 struct vnode *vp = bp->b_vp; 554 int s; 555 int ncl; 556 struct buf *bpa; 557 558 s = splbio(); 559 if (vp->v_mount && (vp->v_flag & VVMIO) && 560 (bp->b_flags & (B_CLUSTEROK | B_INVAL)) == B_CLUSTEROK) { 561 int size = vp->v_mount->mnt_stat.f_iosize; 562 int maxcl = MAXPHYS / size; 563 564 for (i = 1; i < maxcl; i++) { 565 if ((bpa = incore(vp, lblkno + i)) && 566 ((bpa->b_flags & (B_BUSY | B_DELWRI | B_CLUSTEROK | B_INVAL)) == 567 (B_DELWRI | B_CLUSTEROK)) && 568 (bpa->b_bufsize == size)) { 569 if ((bpa->b_blkno == bpa->b_lblkno) || 570 (bpa->b_blkno != bp->b_blkno + (i * size) / DEV_BSIZE)) 571 break; 572 } else { 573 break; 574 } 575 } 576 ncl = i; 577 /* 578 * this is a possible cluster write 579 */ 580 if (ncl != 1) { 581 bremfree(bp); 582 cluster_wbuild(vp, bp, size, lblkno, ncl, -1); 583 splx(s); 584 return; 585 } 586 } 587 /* 588 * default (old) behavior, writing out only one block 589 */ 590 bremfree(bp); 591 bp->b_flags |= B_BUSY | B_ASYNC; 592 (void) VOP_BWRITE(bp); 593 splx(s); 594} 595 596 597/* 598 * Find a buffer header which is available for use. 599 */ 600static struct buf * 601getnewbuf(int slpflag, int slptimeo, int doingvmio) 602{ 603 struct buf *bp; 604 int s; 605 int firstbp = 1; 606 607 s = splbio(); 608start: 609 if (bufspace >= maxbufspace) 610 goto trytofreespace; 611 612 /* can we constitute a new buffer? */ 613 if ((bp = bufqueues[QUEUE_EMPTY].tqh_first)) { 614 if (bp->b_qindex != QUEUE_EMPTY) 615 panic("getnewbuf: inconsistent EMPTY queue"); 616 bremfree(bp); 617 goto fillbuf; 618 } 619trytofreespace: 620 /* 621 * We keep the file I/O from hogging metadata I/O 622 * This is desirable because file data is cached in the 623 * VM/Buffer cache even if a buffer is freed. 624 */ 625 if ((bp = bufqueues[QUEUE_AGE].tqh_first)) { 626 if (bp->b_qindex != QUEUE_AGE) 627 panic("getnewbuf: inconsistent AGE queue"); 628 } else if ((bp = bufqueues[QUEUE_LRU].tqh_first)) { 629 if (bp->b_qindex != QUEUE_LRU) 630 panic("getnewbuf: inconsistent LRU queue"); 631 } 632 if (!bp) { 633 /* wait for a free buffer of any kind */ 634 needsbuffer = 1; 635 tsleep(&needsbuffer, PRIBIO | slpflag, "newbuf", slptimeo); 636 splx(s); 637 return (0); 638 } 639 640 /* if we are a delayed write, convert to an async write */ 641 if ((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) { 642 vfs_bio_awrite(bp); 643 if (!slpflag && !slptimeo) { 644 splx(s); 645 return (0); 646 } 647 goto start; 648 } 649 650 if (bp->b_flags & B_WANTED) { 651 bp->b_flags &= ~B_WANTED; 652 wakeup(bp); 653 } 654 bremfree(bp); 655 656 if (bp->b_flags & B_VMIO) { 657 bp->b_flags |= B_RELBUF | B_BUSY | B_DONE; 658 brelse(bp); 659 bremfree(bp); 660 } 661 662 if (bp->b_vp) 663 brelvp(bp); 664 665 /* we are not free, nor do we contain interesting data */ 666 if (bp->b_rcred != NOCRED) 667 crfree(bp->b_rcred); 668 if (bp->b_wcred != NOCRED) 669 crfree(bp->b_wcred); 670fillbuf: 671 bp->b_flags |= B_BUSY; 672 LIST_REMOVE(bp, b_hash); 673 LIST_INSERT_HEAD(&invalhash, bp, b_hash); 674 splx(s); 675 if (bp->b_bufsize) { 676 allocbuf(bp, 0); 677 } 678 bp->b_flags = B_BUSY; 679 bp->b_dev = NODEV; 680 bp->b_vp = NULL; 681 bp->b_blkno = bp->b_lblkno = 0; 682 bp->b_iodone = 0; 683 bp->b_error = 0; 684 bp->b_resid = 0; 685 bp->b_bcount = 0; 686 bp->b_npages = 0; 687 bp->b_wcred = bp->b_rcred = NOCRED; 688 bp->b_data = buffers_kva + (bp - buf) * MAXBSIZE; 689 bp->b_dirtyoff = bp->b_dirtyend = 0; 690 bp->b_validoff = bp->b_validend = 0; 691 if (bufspace >= maxbufspace) { 692 s = splbio(); 693 bp->b_flags |= B_INVAL; 694 brelse(bp); 695 goto trytofreespace; 696 } 697 return (bp); 698} 699 700/* 701 * Check to see if a block is currently memory resident. 702 */ 703struct buf * 704incore(struct vnode * vp, daddr_t blkno) 705{ 706 struct buf *bp; 707 struct bufhashhdr *bh; 708 709 int s = splbio(); 710 711 bh = BUFHASH(vp, blkno); 712 bp = bh->lh_first; 713 714 /* Search hash chain */ 715 while (bp != NULL) { 716 /* hit */ 717 if (bp->b_vp == vp && bp->b_lblkno == blkno && 718 (bp->b_flags & B_INVAL) == 0) { 719 splx(s); 720 return (bp); 721 } 722 bp = bp->b_hash.le_next; 723 } 724 splx(s); 725 726 return (NULL); 727} 728 729/* 730 * Returns true if no I/O is needed to access the 731 * associated VM object. This is like incore except 732 * it also hunts around in the VM system for the data. 733 */ 734 735int 736inmem(struct vnode * vp, daddr_t blkno) 737{ 738 vm_object_t obj; 739 vm_offset_t off, toff, tinc; 740 vm_page_t m; 741 742 if (incore(vp, blkno)) 743 return 1; 744 if (vp->v_mount == NULL) 745 return 0; 746 if ((vp->v_object == NULL) || (vp->v_flag & VVMIO) == 0) 747 return 0; 748 749 obj = vp->v_object; 750 tinc = PAGE_SIZE; 751 if (tinc > vp->v_mount->mnt_stat.f_iosize) 752 tinc = vp->v_mount->mnt_stat.f_iosize; 753 off = blkno * vp->v_mount->mnt_stat.f_iosize; 754 755 for (toff = 0; toff < vp->v_mount->mnt_stat.f_iosize; toff += tinc) { 756 int mask; 757 758 m = vm_page_lookup(obj, trunc_page(toff + off)); 759 if (!m) 760 return 0; 761 if (vm_page_is_valid(m, toff + off, tinc) == 0) 762 return 0; 763 } 764 return 1; 765} 766 767/* 768 * now we set the dirty range for the buffer -- 769 * for NFS -- if the file is mapped and pages have 770 * been written to, let it know. We want the 771 * entire range of the buffer to be marked dirty if 772 * any of the pages have been written to for consistancy 773 * with the b_validoff, b_validend set in the nfs write 774 * code, and used by the nfs read code. 775 */ 776static void 777vfs_setdirty(struct buf *bp) { 778 int i; 779 vm_object_t object; 780 vm_offset_t boffset, offset; 781 /* 782 * We qualify the scan for modified pages on whether the 783 * object has been flushed yet. The OBJ_WRITEABLE flag 784 * is not cleared simply by protecting pages off. 785 */ 786 if ((bp->b_flags & B_VMIO) && 787 ((object = bp->b_pages[0]->object)->flags & OBJ_WRITEABLE)) { 788 /* 789 * test the pages to see if they have been modified directly 790 * by users through the VM system. 791 */ 792 for (i = 0; i < bp->b_npages; i++) 793 vm_page_test_dirty(bp->b_pages[i]); 794 795 /* 796 * scan forwards for the first page modified 797 */ 798 for (i = 0; i < bp->b_npages; i++) { 799 if (bp->b_pages[i]->dirty) { 800 break; 801 } 802 } 803 boffset = i * PAGE_SIZE; 804 if (boffset < bp->b_dirtyoff) { 805 bp->b_dirtyoff = boffset; 806 } 807 808 /* 809 * scan backwards for the last page modified 810 */ 811 for (i = bp->b_npages - 1; i >= 0; --i) { 812 if (bp->b_pages[i]->dirty) { 813 break; 814 } 815 } 816 boffset = (i + 1) * PAGE_SIZE; 817 offset = boffset + bp->b_pages[0]->offset; 818 if (offset >= object->size) { 819 boffset = object->size - bp->b_pages[0]->offset; 820 } 821 if (bp->b_dirtyend < boffset) { 822 bp->b_dirtyend = boffset; 823 } 824 } 825} 826 827/* 828 * Get a block given a specified block and offset into a file/device. 829 */ 830struct buf * 831getblk(struct vnode * vp, daddr_t blkno, int size, int slpflag, int slptimeo) 832{ 833 struct buf *bp; 834 int s; 835 struct bufhashhdr *bh; 836 vm_offset_t off; 837 int nleft; 838 839 s = splbio(); 840loop: 841 if (bp = incore(vp, blkno)) { 842 if (bp->b_flags & B_BUSY) { 843 bp->b_flags |= B_WANTED; 844 if (!tsleep(bp, PRIBIO | slpflag, "getblk", slptimeo)) 845 goto loop; 846 847 splx(s); 848 return (struct buf *) NULL; 849 } 850 bp->b_flags |= B_BUSY | B_CACHE; 851 bremfree(bp); 852 /* 853 * check for size inconsistancies 854 */ 855 if (bp->b_bcount != size) { 856 allocbuf(bp, size); 857 } 858 splx(s); 859 return (bp); 860 } else { 861 vm_object_t obj; 862 int doingvmio; 863 864 if ((obj = vp->v_object) && (vp->v_flag & VVMIO)) { 865 doingvmio = 1; 866 } else { 867 doingvmio = 0; 868 } 869 if ((bp = getnewbuf(slpflag, slptimeo, doingvmio)) == 0) { 870 if (slpflag || slptimeo) 871 return NULL; 872 goto loop; 873 } 874 875 /* 876 * This code is used to make sure that a buffer is not 877 * created while the getnewbuf routine is blocked. 878 * Normally the vnode is locked so this isn't a problem. 879 * VBLK type I/O requests, however, don't lock the vnode. 880 */ 881 if (!VOP_ISLOCKED(vp) && incore(vp, blkno)) { 882 bp->b_flags |= B_INVAL; 883 brelse(bp); 884 goto loop; 885 } 886 887 /* 888 * Insert the buffer into the hash, so that it can 889 * be found by incore. 890 */ 891 bp->b_blkno = bp->b_lblkno = blkno; 892 bgetvp(vp, bp); 893 LIST_REMOVE(bp, b_hash); 894 bh = BUFHASH(vp, blkno); 895 LIST_INSERT_HEAD(bh, bp, b_hash); 896 897 if (doingvmio) { 898 bp->b_flags |= (B_VMIO | B_CACHE); 899#if defined(VFS_BIO_DEBUG) 900 if (vp->v_type != VREG) 901 printf("getblk: vmioing file type %d???\n", vp->v_type); 902#endif 903 } else { 904 bp->b_flags &= ~B_VMIO; 905 } 906 splx(s); 907 908 allocbuf(bp, size); 909 return (bp); 910 } 911} 912 913/* 914 * Get an empty, disassociated buffer of given size. 915 */ 916struct buf * 917geteblk(int size) 918{ 919 struct buf *bp; 920 921 while ((bp = getnewbuf(0, 0, 0)) == 0); 922 allocbuf(bp, size); 923 bp->b_flags |= B_INVAL; 924 return (bp); 925} 926 927/* 928 * This code constitutes the buffer memory from either anonymous system 929 * memory (in the case of non-VMIO operations) or from an associated 930 * VM object (in the case of VMIO operations). 931 * 932 * Note that this code is tricky, and has many complications to resolve 933 * deadlock or inconsistant data situations. Tread lightly!!! 934 * 935 * Modify the length of a buffer's underlying buffer storage without 936 * destroying information (unless, of course the buffer is shrinking). 937 */ 938int 939allocbuf(struct buf * bp, int size) 940{ 941 942 int s; 943 int newbsize; 944 int i; 945 946 if (!(bp->b_flags & B_BUSY)) 947 panic("allocbuf: buffer not busy"); 948 949 if ((bp->b_flags & B_VMIO) == 0) { 950 /* 951 * Just get anonymous memory from the kernel 952 */ 953 newbsize = round_page(size); 954 955 if (newbsize < bp->b_bufsize) { 956 vm_hold_free_pages( 957 bp, 958 (vm_offset_t) bp->b_data + newbsize, 959 (vm_offset_t) bp->b_data + bp->b_bufsize); 960 } else if (newbsize > bp->b_bufsize) { 961 vm_hold_load_pages( 962 bp, 963 (vm_offset_t) bp->b_data + bp->b_bufsize, 964 (vm_offset_t) bp->b_data + newbsize); 965 } 966 } else { 967 vm_page_t m; 968 int desiredpages; 969 970 newbsize = ((size + DEV_BSIZE - 1) / DEV_BSIZE) * DEV_BSIZE; 971 desiredpages = round_page(newbsize) / PAGE_SIZE; 972 973 if (newbsize < bp->b_bufsize) { 974 if (desiredpages < bp->b_npages) { 975 pmap_qremove((vm_offset_t) trunc_page(bp->b_data) + 976 desiredpages * PAGE_SIZE, (bp->b_npages - desiredpages)); 977 for (i = desiredpages; i < bp->b_npages; i++) { 978 m = bp->b_pages[i]; 979 s = splhigh(); 980 while ((m->flags & PG_BUSY) || (m->busy != 0)) { 981 m->flags |= PG_WANTED; 982 tsleep(m, PVM, "biodep", 0); 983 } 984 splx(s); 985 986 if (m->bmapped == 0) { 987 printf("allocbuf: bmapped is zero for page %d\n", i); 988 panic("allocbuf: error"); 989 } 990 --m->bmapped; 991 if (m->bmapped == 0) { 992 vm_page_protect(m, VM_PROT_NONE); 993 vm_page_free(m); 994 } 995 bp->b_pages[i] = NULL; 996 } 997 bp->b_npages = desiredpages; 998 } 999 } else if (newbsize > bp->b_bufsize) { 1000 vm_object_t obj; 1001 vm_offset_t tinc, off, toff, objoff; 1002 int pageindex, curbpnpages; 1003 struct vnode *vp; 1004 int bsize; 1005 1006 vp = bp->b_vp; 1007 bsize = vp->v_mount->mnt_stat.f_iosize; 1008 1009 if (bp->b_npages < desiredpages) { 1010 obj = vp->v_object; 1011 tinc = PAGE_SIZE; 1012 if (tinc > bsize) 1013 tinc = bsize; 1014 off = bp->b_lblkno * bsize; 1015 doretry: 1016 curbpnpages = bp->b_npages; 1017 bp->b_flags |= B_CACHE; 1018 for (toff = 0; toff < newbsize; toff += tinc) { 1019 int mask; 1020 int bytesinpage; 1021 1022 pageindex = toff / PAGE_SIZE; 1023 objoff = trunc_page(toff + off); 1024 if (pageindex < curbpnpages) { 1025 int pb; 1026 1027 m = bp->b_pages[pageindex]; 1028 if (m->offset != objoff) 1029 panic("allocbuf: page changed offset??!!!?"); 1030 bytesinpage = tinc; 1031 if (tinc > (newbsize - toff)) 1032 bytesinpage = newbsize - toff; 1033 if (!vm_page_is_valid(m, toff + off, bytesinpage)) { 1034 bp->b_flags &= ~B_CACHE; 1035 } 1036 if ((m->flags & PG_ACTIVE) == 0) { 1037 vm_page_activate(m); 1038 m->act_count = 0; 1039 } 1040 continue; 1041 } 1042 m = vm_page_lookup(obj, objoff); 1043 if (!m) { 1044 m = vm_page_alloc(obj, objoff, VM_ALLOC_NORMAL); 1045 if (!m) { 1046 int j; 1047 1048 for (j = bp->b_npages; j < pageindex; j++) { 1049 PAGE_WAKEUP(bp->b_pages[j]); 1050 } 1051 VM_WAIT; 1052 goto doretry; 1053 } 1054 vm_page_activate(m); 1055 m->act_count = 0; 1056 m->valid = 0; 1057 bp->b_flags &= ~B_CACHE; 1058 } else if (m->flags & PG_BUSY) { 1059 int j; 1060 1061 for (j = bp->b_npages; j < pageindex; j++) { 1062 PAGE_WAKEUP(bp->b_pages[j]); 1063 } 1064 1065 s = splbio(); 1066 m->flags |= PG_WANTED; 1067 tsleep(m, PRIBIO, "pgtblk", 0); 1068 splx(s); 1069 1070 goto doretry; 1071 } else { 1072 int pb; 1073 if ((curproc != pageproc) && 1074 (m->flags & PG_CACHE) && 1075 (cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min) { 1076 pagedaemon_wakeup(); 1077 } 1078 bytesinpage = tinc; 1079 if (tinc > (newbsize - toff)) 1080 bytesinpage = newbsize - toff; 1081 if (!vm_page_is_valid(m, toff + off, bytesinpage)) { 1082 bp->b_flags &= ~B_CACHE; 1083 } 1084 if ((m->flags & PG_ACTIVE) == 0) { 1085 vm_page_activate(m); 1086 m->act_count = 0; 1087 } 1088 m->flags |= PG_BUSY; 1089 } 1090 bp->b_pages[pageindex] = m; 1091 curbpnpages = pageindex + 1; 1092 } 1093 for (i = bp->b_npages; i < curbpnpages; i++) { 1094 m = bp->b_pages[i]; 1095 m->bmapped++; 1096 PAGE_WAKEUP(m); 1097 } 1098 bp->b_npages = curbpnpages; 1099 bp->b_data = buffers_kva + (bp - buf) * MAXBSIZE; 1100 pmap_qenter((vm_offset_t) bp->b_data, bp->b_pages, bp->b_npages); 1101 bp->b_data += off % PAGE_SIZE; 1102 } 1103 } 1104 } 1105 bufspace += (newbsize - bp->b_bufsize); 1106 bp->b_bufsize = newbsize; 1107 bp->b_bcount = size; 1108 return 1; 1109} 1110 1111/* 1112 * Wait for buffer I/O completion, returning error status. 1113 */ 1114int 1115biowait(register struct buf * bp) 1116{ 1117 int s; 1118 1119 s = splbio(); 1120 while ((bp->b_flags & B_DONE) == 0) 1121 tsleep(bp, PRIBIO, "biowait", 0); 1122 splx(s); 1123 if (bp->b_flags & B_EINTR) { 1124 bp->b_flags &= ~B_EINTR; 1125 return (EINTR); 1126 } 1127 if (bp->b_flags & B_ERROR) { 1128 return (bp->b_error ? bp->b_error : EIO); 1129 } else { 1130 return (0); 1131 } 1132} 1133 1134/* 1135 * Finish I/O on a buffer, calling an optional function. 1136 * This is usually called from interrupt level, so process blocking 1137 * is not *a good idea*. 1138 */ 1139void 1140biodone(register struct buf * bp) 1141{ 1142 int s; 1143 1144 s = splbio(); 1145 if (!(bp->b_flags & B_BUSY)) 1146 panic("biodone: buffer not busy"); 1147 1148 if (bp->b_flags & B_DONE) { 1149 splx(s); 1150 printf("biodone: buffer already done\n"); 1151 return; 1152 } 1153 bp->b_flags |= B_DONE; 1154 1155 if ((bp->b_flags & B_READ) == 0) { 1156 struct vnode *vp = bp->b_vp; 1157 vwakeup(bp); 1158 } 1159#ifdef BOUNCE_BUFFERS 1160 if (bp->b_flags & B_BOUNCE) 1161 vm_bounce_free(bp); 1162#endif 1163 1164 /* call optional completion function if requested */ 1165 if (bp->b_flags & B_CALL) { 1166 bp->b_flags &= ~B_CALL; 1167 (*bp->b_iodone) (bp); 1168 splx(s); 1169 return; 1170 } 1171 if (bp->b_flags & B_VMIO) { 1172 int i, resid; 1173 vm_offset_t foff; 1174 vm_page_t m; 1175 vm_object_t obj; 1176 int iosize; 1177 struct vnode *vp = bp->b_vp; 1178 1179 foff = vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; 1180 obj = vp->v_object; 1181 if (!obj) { 1182 panic("biodone: no object"); 1183 } 1184#if defined(VFS_BIO_DEBUG) 1185 if (obj->paging_in_progress < bp->b_npages) { 1186 printf("biodone: paging in progress(%d) < bp->b_npages(%d)\n", 1187 obj->paging_in_progress, bp->b_npages); 1188 } 1189#endif 1190 iosize = bp->b_bufsize; 1191 for (i = 0; i < bp->b_npages; i++) { 1192 int bogusflag = 0; 1193 m = bp->b_pages[i]; 1194 if (m == bogus_page) { 1195 bogusflag = 1; 1196 m = vm_page_lookup(obj, foff); 1197 if (!m) { 1198#if defined(VFS_BIO_DEBUG) 1199 printf("biodone: page disappeared\n"); 1200#endif 1201 --obj->paging_in_progress; 1202 continue; 1203 } 1204 bp->b_pages[i] = m; 1205 pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages); 1206 } 1207#if defined(VFS_BIO_DEBUG) 1208 if (trunc_page(foff) != m->offset) { 1209 printf("biodone: foff(%d)/m->offset(%d) mismatch\n", foff, m->offset); 1210 } 1211#endif 1212 resid = (m->offset + PAGE_SIZE) - foff; 1213 if (resid > iosize) 1214 resid = iosize; 1215 /* 1216 * In the write case, the valid and clean bits are 1217 * already changed correctly, so we only need to do this 1218 * here in the read case. 1219 */ 1220 if ((bp->b_flags & B_READ) && !bogusflag && resid > 0) { 1221 vm_page_set_valid(m, foff & (PAGE_SIZE-1), resid); 1222 vm_page_set_clean(m, foff & (PAGE_SIZE-1), resid); 1223 } 1224 1225 /* 1226 * when debugging new filesystems or buffer I/O methods, this 1227 * is the most common error that pops up. if you see this, you 1228 * have not set the page busy flag correctly!!! 1229 */ 1230 if (m->busy == 0) { 1231 printf("biodone: page busy < 0, " 1232 "off: %ld, foff: %ld, " 1233 "resid: %d, index: %d\n", 1234 m->offset, foff, resid, i); 1235 printf(" iosize: %ld, lblkno: %ld, flags: 0x%x, npages: %d\n", 1236 bp->b_vp->v_mount->mnt_stat.f_iosize, 1237 bp->b_lblkno, bp->b_flags, bp->b_npages); 1238 printf(" valid: 0x%x, dirty: 0x%x, mapped: %d\n", 1239 m->valid, m->dirty, m->bmapped); 1240 panic("biodone: page busy < 0\n"); 1241 } 1242 --m->busy; 1243 if ((m->busy == 0) && (m->flags & PG_WANTED)) { 1244 m->flags &= ~PG_WANTED; 1245 wakeup(m); 1246 } 1247 --obj->paging_in_progress; 1248 foff += resid; 1249 iosize -= resid; 1250 } 1251 if (obj && obj->paging_in_progress == 0 && 1252 (obj->flags & OBJ_PIPWNT)) { 1253 obj->flags &= ~OBJ_PIPWNT; 1254 wakeup(obj); 1255 } 1256 } 1257 /* 1258 * For asynchronous completions, release the buffer now. The brelse 1259 * checks for B_WANTED and will do the wakeup there if necessary - so 1260 * no need to do a wakeup here in the async case. 1261 */ 1262 1263 if (bp->b_flags & B_ASYNC) { 1264 brelse(bp); 1265 } else { 1266 bp->b_flags &= ~B_WANTED; 1267 wakeup(bp); 1268 } 1269 splx(s); 1270} 1271 1272int 1273count_lock_queue() 1274{ 1275 int count; 1276 struct buf *bp; 1277 1278 count = 0; 1279 for (bp = bufqueues[QUEUE_LOCKED].tqh_first; 1280 bp != NULL; 1281 bp = bp->b_freelist.tqe_next) 1282 count++; 1283 return (count); 1284} 1285 1286int vfs_update_interval = 30; 1287 1288static void 1289vfs_update() 1290{ 1291 (void) spl0(); /* XXX redundant? wrong place?*/ 1292 while (1) { 1293 tsleep(&vfs_update_wakeup, PRIBIO, "update", 1294 hz * vfs_update_interval); 1295 vfs_update_wakeup = 0; 1296 sync(curproc, NULL, NULL); 1297 } 1298} 1299 1300/* 1301 * This routine is called in lieu of iodone in the case of 1302 * incomplete I/O. This keeps the busy status for pages 1303 * consistant. 1304 */ 1305void 1306vfs_unbusy_pages(struct buf * bp) 1307{ 1308 int i; 1309 1310 if (bp->b_flags & B_VMIO) { 1311 struct vnode *vp = bp->b_vp; 1312 vm_object_t obj = vp->v_object; 1313 vm_offset_t foff; 1314 1315 foff = trunc_page(vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno); 1316 1317 for (i = 0; i < bp->b_npages; i++) { 1318 vm_page_t m = bp->b_pages[i]; 1319 1320 if (m == bogus_page) { 1321 m = vm_page_lookup(obj, foff + i * PAGE_SIZE); 1322 if (!m) { 1323 panic("vfs_unbusy_pages: page missing\n"); 1324 } 1325 bp->b_pages[i] = m; 1326 pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages); 1327 } 1328 --obj->paging_in_progress; 1329 --m->busy; 1330 if ((m->busy == 0) && (m->flags & PG_WANTED)) { 1331 m->flags &= ~PG_WANTED; 1332 wakeup(m); 1333 } 1334 } 1335 if (obj->paging_in_progress == 0 && 1336 (obj->flags & OBJ_PIPWNT)) { 1337 obj->flags &= ~OBJ_PIPWNT; 1338 wakeup(obj); 1339 } 1340 } 1341} 1342 1343/* 1344 * This routine is called before a device strategy routine. 1345 * It is used to tell the VM system that paging I/O is in 1346 * progress, and treat the pages associated with the buffer 1347 * almost as being PG_BUSY. Also the object paging_in_progress 1348 * flag is handled to make sure that the object doesn't become 1349 * inconsistant. 1350 */ 1351void 1352vfs_busy_pages(struct buf * bp, int clear_modify) 1353{ 1354 int i; 1355 1356 if (bp->b_flags & B_VMIO) { 1357 vm_object_t obj = bp->b_vp->v_object; 1358 vm_offset_t foff = bp->b_vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; 1359 int iocount = bp->b_bufsize; 1360 1361 vfs_setdirty(bp); 1362 for (i = 0; i < bp->b_npages; i++) { 1363 vm_page_t m = bp->b_pages[i]; 1364 int resid = (m->offset + PAGE_SIZE) - foff; 1365 1366 if (resid > iocount) 1367 resid = iocount; 1368 obj->paging_in_progress++; 1369 m->busy++; 1370 if (clear_modify) { 1371 vm_page_protect(m, VM_PROT_READ); 1372 vm_page_set_valid(m, 1373 foff & (PAGE_SIZE-1), resid); 1374 vm_page_set_clean(m, 1375 foff & (PAGE_SIZE-1), resid); 1376 } else if (bp->b_bcount >= PAGE_SIZE) { 1377 if (m->valid && (bp->b_flags & B_CACHE) == 0) { 1378 bp->b_pages[i] = bogus_page; 1379 pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages); 1380 } 1381 } 1382 foff += resid; 1383 iocount -= resid; 1384 } 1385 } 1386} 1387 1388/* 1389 * Tell the VM system that the pages associated with this buffer 1390 * are clean. This is used for delayed writes where the data is 1391 * going to go to disk eventually without additional VM intevention. 1392 */ 1393void 1394vfs_clean_pages(struct buf * bp) 1395{ 1396 int i; 1397 1398 if (bp->b_flags & B_VMIO) { 1399 vm_offset_t foff = 1400 bp->b_vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; 1401 int iocount = bp->b_bufsize; 1402 1403 for (i = 0; i < bp->b_npages; i++) { 1404 vm_page_t m = bp->b_pages[i]; 1405 int resid = (m->offset + PAGE_SIZE) - foff; 1406 1407 if (resid > iocount) 1408 resid = iocount; 1409 if (resid > 0) { 1410 vm_page_set_valid(m, 1411 foff & (PAGE_SIZE-1), resid); 1412 vm_page_set_clean(m, 1413 foff & (PAGE_SIZE-1), resid); 1414 } 1415 foff += resid; 1416 iocount -= resid; 1417 } 1418 } 1419} 1420 1421void 1422vfs_bio_clrbuf(struct buf *bp) { 1423 int i; 1424 if( bp->b_flags & B_VMIO) { 1425 if( (bp->b_npages == 1) && (bp->b_bufsize < PAGE_SIZE)) { 1426 int j; 1427 if( bp->b_pages[0]->valid != VM_PAGE_BITS_ALL) { 1428 for(j=0; j < bp->b_bufsize / DEV_BSIZE;j++) { 1429 bzero(bp->b_data + j * DEV_BSIZE, DEV_BSIZE); 1430 } 1431 } 1432 bp->b_resid = 0; 1433 return; 1434 } 1435 for(i=0;i<bp->b_npages;i++) { 1436 if( bp->b_pages[i]->valid == VM_PAGE_BITS_ALL) 1437 continue; 1438 if( bp->b_pages[i]->valid == 0) { 1439 bzero(bp->b_data + i * PAGE_SIZE, PAGE_SIZE); 1440 } else { 1441 int j; 1442 for(j=0;j<PAGE_SIZE/DEV_BSIZE;j++) { 1443 if( (bp->b_pages[i]->valid & (1<<j)) == 0) 1444 bzero(bp->b_data + i * PAGE_SIZE + j * DEV_BSIZE, DEV_BSIZE); 1445 } 1446 } 1447 bp->b_pages[i]->valid = VM_PAGE_BITS_ALL; 1448 } 1449 bp->b_resid = 0; 1450 } else { 1451 clrbuf(bp); 1452 } 1453} 1454 1455/* 1456 * vm_hold_load_pages and vm_hold_unload pages get pages into 1457 * a buffers address space. The pages are anonymous and are 1458 * not associated with a file object. 1459 */ 1460void 1461vm_hold_load_pages(struct buf * bp, vm_offset_t froma, vm_offset_t toa) 1462{ 1463 vm_offset_t pg; 1464 vm_page_t p; 1465 vm_offset_t from = round_page(froma); 1466 vm_offset_t to = round_page(toa); 1467 1468 for (pg = from; pg < to; pg += PAGE_SIZE) { 1469 1470tryagain: 1471 1472 p = vm_page_alloc(kernel_object, pg - VM_MIN_KERNEL_ADDRESS, 1473 VM_ALLOC_NORMAL); 1474 if (!p) { 1475 VM_WAIT; 1476 goto tryagain; 1477 } 1478 vm_page_wire(p); 1479 pmap_kenter(pg, VM_PAGE_TO_PHYS(p)); 1480 bp->b_pages[((caddr_t) pg - bp->b_data) / PAGE_SIZE] = p; 1481 PAGE_WAKEUP(p); 1482 bp->b_npages++; 1483 } 1484} 1485 1486void 1487vm_hold_free_pages(struct buf * bp, vm_offset_t froma, vm_offset_t toa) 1488{ 1489 vm_offset_t pg; 1490 vm_page_t p; 1491 vm_offset_t from = round_page(froma); 1492 vm_offset_t to = round_page(toa); 1493 1494 for (pg = from; pg < to; pg += PAGE_SIZE) { 1495 p = bp->b_pages[((caddr_t) pg - bp->b_data) / PAGE_SIZE]; 1496 bp->b_pages[((caddr_t) pg - bp->b_data) / PAGE_SIZE] = 0; 1497 pmap_kremove(pg); 1498 vm_page_free(p); 1499 --bp->b_npages; 1500 } 1501} 1502