1139804Simp/*- 243819Sken * Copyright (c) 1997, 1998, 1999 Kenneth D. Merry. 339229Sgibbs * All rights reserved. 439229Sgibbs * 539229Sgibbs * Redistribution and use in source and binary forms, with or without 639229Sgibbs * modification, are permitted provided that the following conditions 739229Sgibbs * are met: 839229Sgibbs * 1. Redistributions of source code must retain the above copyright 939229Sgibbs * notice, this list of conditions and the following disclaimer. 1039229Sgibbs * 2. Redistributions in binary form must reproduce the above copyright 1139229Sgibbs * notice, this list of conditions and the following disclaimer in the 1239229Sgibbs * documentation and/or other materials provided with the distribution. 1339229Sgibbs * 3. The name of the author may not be used to endorse or promote products 1439229Sgibbs * derived from this software without specific prior written permission. 1539229Sgibbs * 1639229Sgibbs * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 1739229Sgibbs * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1839229Sgibbs * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1939229Sgibbs * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 2039229Sgibbs * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2139229Sgibbs * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2239229Sgibbs * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2339229Sgibbs * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2439229Sgibbs * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2539229Sgibbs * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2639229Sgibbs * SUCH DAMAGE. 2739229Sgibbs */ 2839229Sgibbs 29116182Sobrien#include <sys/cdefs.h> 30116182Sobrien__FBSDID("$FreeBSD$"); 31116182Sobrien 32238366Sgnn#include "opt_kdtrace.h" 33238366Sgnn 3439229Sgibbs#include <sys/param.h> 3539229Sgibbs#include <sys/kernel.h> 3639229Sgibbs#include <sys/systm.h> 3760041Sphk#include <sys/bio.h> 38112368Sphk#include <sys/devicestat.h> 39260813Savg#include <sys/sdt.h> 4039229Sgibbs#include <sys/sysctl.h> 41112001Sphk#include <sys/malloc.h> 42112368Sphk#include <sys/lock.h> 43112368Sphk#include <sys/mutex.h> 44112001Sphk#include <sys/conf.h> 45112001Sphk#include <vm/vm.h> 46112001Sphk#include <vm/pmap.h> 4739229Sgibbs 48112368Sphk#include <machine/atomic.h> 4939229Sgibbs 50260813SavgSDT_PROVIDER_DEFINE(io); 51238366Sgnn 52260817SavgSDT_PROBE_DEFINE2(io, , , start, "struct bio *", "struct devstat *"); 53260817SavgSDT_PROBE_DEFINE2(io, , , done, "struct bio *", "struct devstat *"); 54260817SavgSDT_PROBE_DEFINE2(io, , , wait__start, "struct bio *", 55260813Savg "struct devstat *"); 56260817SavgSDT_PROBE_DEFINE2(io, , , wait__done, "struct bio *", 57260813Savg "struct devstat *"); 58238366Sgnn 59260813Savg#define DTRACE_DEVSTAT_START() SDT_PROBE2(io, , , start, NULL, ds) 60260813Savg#define DTRACE_DEVSTAT_BIO_START() SDT_PROBE2(io, , , start, bp, ds) 61260813Savg#define DTRACE_DEVSTAT_DONE() SDT_PROBE2(io, , , done, NULL, ds) 62260813Savg#define DTRACE_DEVSTAT_BIO_DONE() SDT_PROBE2(io, , , done, bp, ds) 63260817Savg#define DTRACE_DEVSTAT_WAIT_START() SDT_PROBE2(io, , , wait__start, NULL, ds) 64260817Savg#define DTRACE_DEVSTAT_WAIT_DONE() SDT_PROBE2(io, , , wait__done, NULL, ds) 65238366Sgnn 6639229Sgibbsstatic int devstat_num_devs; 67119028Sphkstatic long devstat_generation = 1; 6839229Sgibbsstatic int devstat_version = DEVSTAT_VERSION; 6939229Sgibbsstatic int devstat_current_devnumber; 70112368Sphkstatic struct mtx devstat_mutex; 71223061SgibbsMTX_SYSINIT(devstat_mutex, &devstat_mutex, "devstat", MTX_DEF); 7239229Sgibbs 73223061Sgibbsstatic struct devstatlist device_statq = STAILQ_HEAD_INITIALIZER(device_statq); 74112001Sphkstatic struct devstat *devstat_alloc(void); 75112001Sphkstatic void devstat_free(struct devstat *); 76112365Sphkstatic void devstat_add_entry(struct devstat *ds, const void *dev_name, 77209390Sed int unit_number, uint32_t block_size, 78112007Sphk devstat_support_flags flags, 79112007Sphk devstat_type_flags device_type, 80112007Sphk devstat_priority priority); 8139229Sgibbs 8239229Sgibbs/* 83112001Sphk * Allocate a devstat and initialize it 84112001Sphk */ 85112001Sphkstruct devstat * 86112365Sphkdevstat_new_entry(const void *dev_name, 87209390Sed int unit_number, uint32_t block_size, 88112001Sphk devstat_support_flags flags, 89112001Sphk devstat_type_flags device_type, 90112001Sphk devstat_priority priority) 91112001Sphk{ 92112001Sphk struct devstat *ds; 93112001Sphk 94112368Sphk mtx_assert(&devstat_mutex, MA_NOTOWNED); 95112368Sphk 96112001Sphk ds = devstat_alloc(); 97112368Sphk mtx_lock(&devstat_mutex); 98112369Sphk if (unit_number == -1) { 99260385Sscottl ds->unit_number = unit_number; 100112369Sphk ds->id = dev_name; 101112369Sphk binuptime(&ds->creation_time); 102112369Sphk devstat_generation++; 103112369Sphk } else { 104112369Sphk devstat_add_entry(ds, dev_name, unit_number, block_size, 105112369Sphk flags, device_type, priority); 106112369Sphk } 107112368Sphk mtx_unlock(&devstat_mutex); 108112001Sphk return (ds); 109112001Sphk} 110112001Sphk 111112001Sphk/* 11239229Sgibbs * Take a malloced and zeroed devstat structure given to us, fill it in 11339229Sgibbs * and add it to the queue of devices. 11439229Sgibbs */ 115112007Sphkstatic void 116112365Sphkdevstat_add_entry(struct devstat *ds, const void *dev_name, 117209390Sed int unit_number, uint32_t block_size, 11839229Sgibbs devstat_support_flags flags, 11943819Sken devstat_type_flags device_type, 12043819Sken devstat_priority priority) 12139229Sgibbs{ 12239229Sgibbs struct devstatlist *devstat_head; 12343819Sken struct devstat *ds_tmp; 12439229Sgibbs 125112368Sphk mtx_assert(&devstat_mutex, MA_OWNED); 12639229Sgibbs devstat_num_devs++; 12739229Sgibbs 12839229Sgibbs devstat_head = &device_statq; 12939229Sgibbs 13043819Sken /* 13143819Sken * Priority sort. Each driver passes in its priority when it adds 13243819Sken * its devstat entry. Drivers are sorted first by priority, and 13343819Sken * then by probe order. 13443819Sken * 13543819Sken * For the first device, we just insert it, since the priority 13643819Sken * doesn't really matter yet. Subsequent devices are inserted into 13743819Sken * the list using the order outlined above. 13843819Sken */ 13943819Sken if (devstat_num_devs == 1) 14043819Sken STAILQ_INSERT_TAIL(devstat_head, ds, dev_links); 14143819Sken else { 14272012Sphk STAILQ_FOREACH(ds_tmp, devstat_head, dev_links) { 14343819Sken struct devstat *ds_next; 14439229Sgibbs 14543819Sken ds_next = STAILQ_NEXT(ds_tmp, dev_links); 14643819Sken 14743819Sken /* 14843819Sken * If we find a break between higher and lower 14943819Sken * priority items, and if this item fits in the 15043819Sken * break, insert it. This also applies if the 15143819Sken * "lower priority item" is the end of the list. 15243819Sken */ 15343819Sken if ((priority <= ds_tmp->priority) 15443819Sken && ((ds_next == NULL) 15543819Sken || (priority > ds_next->priority))) { 15643819Sken STAILQ_INSERT_AFTER(devstat_head, ds_tmp, ds, 15743819Sken dev_links); 15843819Sken break; 15943819Sken } else if (priority > ds_tmp->priority) { 16043819Sken /* 16143819Sken * If this is the case, we should be able 16243819Sken * to insert ourselves at the head of the 16343819Sken * list. If we can't, something is wrong. 16443819Sken */ 16543819Sken if (ds_tmp == STAILQ_FIRST(devstat_head)) { 16643819Sken STAILQ_INSERT_HEAD(devstat_head, 16743819Sken ds, dev_links); 16843819Sken break; 16943819Sken } else { 17043819Sken STAILQ_INSERT_TAIL(devstat_head, 17143819Sken ds, dev_links); 17243819Sken printf("devstat_add_entry: HELP! " 17343819Sken "sorting problem detected " 174112365Sphk "for name %p unit %d\n", 175112365Sphk dev_name, unit_number); 17643819Sken break; 17743819Sken } 17843819Sken } 17943819Sken } 18043819Sken } 18143819Sken 18239229Sgibbs ds->device_number = devstat_current_devnumber++; 18339229Sgibbs ds->unit_number = unit_number; 184105354Srobert strlcpy(ds->device_name, dev_name, DEVSTAT_NAME_LEN); 18539229Sgibbs ds->block_size = block_size; 18639229Sgibbs ds->flags = flags; 18739229Sgibbs ds->device_type = device_type; 18843819Sken ds->priority = priority; 189112288Sphk binuptime(&ds->creation_time); 190112368Sphk devstat_generation++; 19139229Sgibbs} 19239229Sgibbs 19339229Sgibbs/* 19439229Sgibbs * Remove a devstat structure from the list of devices. 19539229Sgibbs */ 19639229Sgibbsvoid 19739229Sgibbsdevstat_remove_entry(struct devstat *ds) 19839229Sgibbs{ 19939229Sgibbs struct devstatlist *devstat_head; 20039229Sgibbs 201112368Sphk mtx_assert(&devstat_mutex, MA_NOTOWNED); 20239229Sgibbs if (ds == NULL) 20339229Sgibbs return; 20439229Sgibbs 205112368Sphk mtx_lock(&devstat_mutex); 20639229Sgibbs 20739229Sgibbs devstat_head = &device_statq; 20839229Sgibbs 20939229Sgibbs /* Remove this entry from the devstat queue */ 210112368Sphk atomic_add_acq_int(&ds->sequence1, 1); 211260385Sscottl if (ds->unit_number != -1) { 212112369Sphk devstat_num_devs--; 213112369Sphk STAILQ_REMOVE(devstat_head, ds, devstat, dev_links); 214112369Sphk } 215112368Sphk devstat_free(ds); 216112368Sphk devstat_generation++; 217112368Sphk mtx_unlock(&devstat_mutex); 21839229Sgibbs} 21939229Sgibbs 22039229Sgibbs/* 22139229Sgibbs * Record a transaction start. 222112288Sphk * 223112288Sphk * See comments for devstat_end_transaction(). Ordering is very important 224112288Sphk * here. 22539229Sgibbs */ 22639229Sgibbsvoid 227112288Sphkdevstat_start_transaction(struct devstat *ds, struct bintime *now) 22839229Sgibbs{ 229112368Sphk 230112368Sphk mtx_assert(&devstat_mutex, MA_NOTOWNED); 231112368Sphk 23239229Sgibbs /* sanity check */ 23339229Sgibbs if (ds == NULL) 23439229Sgibbs return; 23539229Sgibbs 236112368Sphk atomic_add_acq_int(&ds->sequence1, 1); 23739229Sgibbs /* 23839229Sgibbs * We only want to set the start time when we are going from idle 23939229Sgibbs * to busy. The start time is really the start of the latest busy 24039229Sgibbs * period. 24139229Sgibbs */ 242112288Sphk if (ds->start_count == ds->end_count) { 243112288Sphk if (now != NULL) 244112288Sphk ds->busy_from = *now; 245112288Sphk else 246112288Sphk binuptime(&ds->busy_from); 247112288Sphk } 248112288Sphk ds->start_count++; 249112368Sphk atomic_add_rel_int(&ds->sequence0, 1); 250238366Sgnn DTRACE_DEVSTAT_START(); 25139229Sgibbs} 25239229Sgibbs 253112258Sphkvoid 254112258Sphkdevstat_start_transaction_bio(struct devstat *ds, struct bio *bp) 255112258Sphk{ 256112258Sphk 257112368Sphk mtx_assert(&devstat_mutex, MA_NOTOWNED); 258112368Sphk 259112368Sphk /* sanity check */ 260112368Sphk if (ds == NULL) 261112368Sphk return; 262112368Sphk 263112288Sphk binuptime(&bp->bio_t0); 264112288Sphk devstat_start_transaction(ds, &bp->bio_t0); 265238366Sgnn DTRACE_DEVSTAT_BIO_START(); 266112288Sphk} 267112288Sphk 26839229Sgibbs/* 26939229Sgibbs * Record the ending of a transaction, and incrment the various counters. 270112288Sphk * 271112288Sphk * Ordering in this function, and in devstat_start_transaction() is VERY 272112288Sphk * important. The idea here is to run without locks, so we are very 273112288Sphk * careful to only modify some fields on the way "down" (i.e. at 274112288Sphk * transaction start) and some fields on the way "up" (i.e. at transaction 275112288Sphk * completion). One exception is busy_from, which we only modify in 276112288Sphk * devstat_start_transaction() when there are no outstanding transactions, 277112288Sphk * and thus it can't be modified in devstat_end_transaction() 278112288Sphk * simultaneously. 279112368Sphk * 280112368Sphk * The sequence0 and sequence1 fields are provided to enable an application 281112368Sphk * spying on the structures with mmap(2) to tell when a structure is in a 282112368Sphk * consistent state or not. 283112368Sphk * 284112368Sphk * For this to work 100% reliably, it is important that the two fields 285112368Sphk * are at opposite ends of the structure and that they are incremented 286112368Sphk * in the opposite order of how a memcpy(3) in userland would copy them. 287112368Sphk * We assume that the copying happens front to back, but there is actually 288112368Sphk * no way short of writing your own memcpy(3) replacement to guarantee 289112368Sphk * this will be the case. 290112368Sphk * 291112368Sphk * In addition to this, being a kind of locks, they must be updated with 292112368Sphk * atomic instructions using appropriate memory barriers. 29339229Sgibbs */ 29439229Sgibbsvoid 295209390Seddevstat_end_transaction(struct devstat *ds, uint32_t bytes, 296112288Sphk devstat_tag_type tag_type, devstat_trans_flags flags, 297112288Sphk struct bintime *now, struct bintime *then) 29839229Sgibbs{ 299112288Sphk struct bintime dt, lnow; 30039229Sgibbs 30139229Sgibbs /* sanity check */ 30239229Sgibbs if (ds == NULL) 30339229Sgibbs return; 30439229Sgibbs 305112288Sphk if (now == NULL) { 306112288Sphk now = &lnow; 307112288Sphk binuptime(now); 308112288Sphk } 30939229Sgibbs 310112368Sphk atomic_add_acq_int(&ds->sequence1, 1); 311112288Sphk /* Update byte and operations counts */ 312112288Sphk ds->bytes[flags] += bytes; 313112288Sphk ds->operations[flags]++; 31439229Sgibbs 31539229Sgibbs /* 31639229Sgibbs * Keep a count of the various tag types sent. 31739229Sgibbs */ 31851397Sphk if ((ds->flags & DEVSTAT_NO_ORDERED_TAGS) == 0 && 31951375Sphk tag_type != DEVSTAT_TAG_NONE) 32039229Sgibbs ds->tag_types[tag_type]++; 32139229Sgibbs 322112288Sphk if (then != NULL) { 323112288Sphk /* Update duration of operations */ 324112288Sphk dt = *now; 325112288Sphk bintime_sub(&dt, then); 326112288Sphk bintime_add(&ds->duration[flags], &dt); 327112288Sphk } 32839229Sgibbs 329112288Sphk /* Accumulate busy time */ 330112288Sphk dt = *now; 331112288Sphk bintime_sub(&dt, &ds->busy_from); 332112288Sphk bintime_add(&ds->busy_time, &dt); 333112288Sphk ds->busy_from = *now; 334112288Sphk 335112288Sphk ds->end_count++; 336112368Sphk atomic_add_rel_int(&ds->sequence0, 1); 337238366Sgnn DTRACE_DEVSTAT_DONE(); 33839229Sgibbs} 33939229Sgibbs 34051375Sphkvoid 34158942Sphkdevstat_end_transaction_bio(struct devstat *ds, struct bio *bp) 34258942Sphk{ 343260385Sscottl 344260385Sscottl devstat_end_transaction_bio_bt(ds, bp, NULL); 345260385Sscottl} 346260385Sscottl 347260385Sscottlvoid 348260385Sscottldevstat_end_transaction_bio_bt(struct devstat *ds, struct bio *bp, 349260385Sscottl struct bintime *now) 350260385Sscottl{ 35158942Sphk devstat_trans_flags flg; 35258942Sphk 353112368Sphk /* sanity check */ 354112368Sphk if (ds == NULL) 355112368Sphk return; 356112368Sphk 35758942Sphk if (bp->bio_cmd == BIO_DELETE) 35858942Sphk flg = DEVSTAT_FREE; 35958942Sphk else if (bp->bio_cmd == BIO_READ) 36058942Sphk flg = DEVSTAT_READ; 361112368Sphk else if (bp->bio_cmd == BIO_WRITE) 36258942Sphk flg = DEVSTAT_WRITE; 363112368Sphk else 364112368Sphk flg = DEVSTAT_NO_DATA; 36558942Sphk 36658942Sphk devstat_end_transaction(ds, bp->bio_bcount - bp->bio_resid, 367260385Sscottl DEVSTAT_TAG_SIMPLE, flg, now, &bp->bio_t0); 368238366Sgnn DTRACE_DEVSTAT_BIO_DONE(); 36958942Sphk} 37058942Sphk 37139229Sgibbs/* 37239229Sgibbs * This is the sysctl handler for the devstat package. The data pushed out 37339229Sgibbs * on the kern.devstat.all sysctl variable consists of the current devstat 37439229Sgibbs * generation number, and then an array of devstat structures, one for each 37539229Sgibbs * device in the system. 37639229Sgibbs * 377112368Sphk * This is more cryptic that obvious, but basically we neither can nor 378112368Sphk * want to hold the devstat_mutex for any amount of time, so we grab it 379112368Sphk * only when we need to and keep an eye on devstat_generation all the time. 38039229Sgibbs */ 38139229Sgibbsstatic int 38262573Sphksysctl_devstat(SYSCTL_HANDLER_ARGS) 38339229Sgibbs{ 384112368Sphk int error; 385113599Sharti long mygen; 38639229Sgibbs struct devstat *nds; 38739229Sgibbs 388112368Sphk mtx_assert(&devstat_mutex, MA_NOTOWNED); 389112368Sphk 39039229Sgibbs /* 391112368Sphk * XXX devstat_generation should really be "volatile" but that 392112368Sphk * XXX freaks out the sysctl macro below. The places where we 393112368Sphk * XXX change it and inspect it are bracketed in the mutex which 394112368Sphk * XXX guarantees us proper write barriers. I don't belive the 395112368Sphk * XXX compiler is allowed to optimize mygen away across calls 396112368Sphk * XXX to other functions, so the following is belived to be safe. 39739229Sgibbs */ 398112368Sphk mygen = devstat_generation; 39939229Sgibbs 400112368Sphk error = SYSCTL_OUT(req, &mygen, sizeof(mygen)); 401112368Sphk 402119028Sphk if (devstat_num_devs == 0) 403119028Sphk return(0); 404119028Sphk 405112368Sphk if (error != 0) 406112368Sphk return (error); 407112368Sphk 408112368Sphk mtx_lock(&devstat_mutex); 409112368Sphk nds = STAILQ_FIRST(&device_statq); 410112368Sphk if (mygen != devstat_generation) 411112368Sphk error = EBUSY; 412112368Sphk mtx_unlock(&devstat_mutex); 413112368Sphk 414112368Sphk if (error != 0) 415112368Sphk return (error); 416112368Sphk 417112368Sphk for (;nds != NULL;) { 41839229Sgibbs error = SYSCTL_OUT(req, nds, sizeof(struct devstat)); 419112368Sphk if (error != 0) 420112368Sphk return (error); 421112368Sphk mtx_lock(&devstat_mutex); 422112368Sphk if (mygen != devstat_generation) 423112368Sphk error = EBUSY; 424112368Sphk else 425112368Sphk nds = STAILQ_NEXT(nds, dev_links); 426112368Sphk mtx_unlock(&devstat_mutex); 427112368Sphk if (error != 0) 428112368Sphk return (error); 429112368Sphk } 43039229Sgibbs return(error); 43139229Sgibbs} 43239229Sgibbs 43339229Sgibbs/* 43439229Sgibbs * Sysctl entries for devstat. The first one is a node that all the rest 43539229Sgibbs * hang off of. 43639229Sgibbs */ 437227309Sedstatic SYSCTL_NODE(_kern, OID_AUTO, devstat, CTLFLAG_RD, NULL, 438227309Sed "Device Statistics"); 43939229Sgibbs 44039229SgibbsSYSCTL_PROC(_kern_devstat, OID_AUTO, all, CTLFLAG_RD|CTLTYPE_OPAQUE, 441188063Simp NULL, 0, sysctl_devstat, "S,devstat", "All devices in the devstat list"); 44239229Sgibbs/* 44339229Sgibbs * Export the number of devices in the system so that userland utilities 44439229Sgibbs * can determine how much memory to allocate to hold all the devices. 44539229Sgibbs */ 44646381SbillfSYSCTL_INT(_kern_devstat, OID_AUTO, numdevs, CTLFLAG_RD, 44746381Sbillf &devstat_num_devs, 0, "Number of devices in the devstat list"); 448113599ShartiSYSCTL_LONG(_kern_devstat, OID_AUTO, generation, CTLFLAG_RD, 44962622Sjhb &devstat_generation, 0, "Devstat list generation"); 45046381SbillfSYSCTL_INT(_kern_devstat, OID_AUTO, version, CTLFLAG_RD, 45146381Sbillf &devstat_version, 0, "Devstat list version number"); 452112001Sphk 453112368Sphk/* 454112368Sphk * Allocator for struct devstat structures. We sub-allocate these from pages 455112368Sphk * which we get from malloc. These pages are exported for mmap(2)'ing through 456112368Sphk * a miniature device driver 457112368Sphk */ 458112368Sphk 459112001Sphk#define statsperpage (PAGE_SIZE / sizeof(struct devstat)) 460112001Sphk 461112001Sphkstatic d_mmap_t devstat_mmap; 462112001Sphk 463112001Sphkstatic struct cdevsw devstat_cdevsw = { 464126080Sphk .d_version = D_VERSION, 465112001Sphk .d_mmap = devstat_mmap, 466112001Sphk .d_name = "devstat", 467112001Sphk}; 468112001Sphk 469112001Sphkstruct statspage { 470112001Sphk TAILQ_ENTRY(statspage) list; 471112001Sphk struct devstat *stat; 472112001Sphk u_int nfree; 473112001Sphk}; 474112001Sphk 475112001Sphkstatic TAILQ_HEAD(, statspage) pagelist = TAILQ_HEAD_INITIALIZER(pagelist); 476112001Sphkstatic MALLOC_DEFINE(M_DEVSTAT, "devstat", "Device statistics"); 477112001Sphk 478112001Sphkstatic int 479201223Srnolanddevstat_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr, 480201223Srnoland int nprot, vm_memattr_t *memattr) 481112001Sphk{ 482112001Sphk struct statspage *spp; 483112001Sphk 484112001Sphk if (nprot != VM_PROT_READ) 485112001Sphk return (-1); 486264546Semax mtx_lock(&devstat_mutex); 487112001Sphk TAILQ_FOREACH(spp, &pagelist, list) { 488112001Sphk if (offset == 0) { 489112001Sphk *paddr = vtophys(spp->stat); 490264546Semax mtx_unlock(&devstat_mutex); 491112001Sphk return (0); 492112001Sphk } 493112001Sphk offset -= PAGE_SIZE; 494112001Sphk } 495264546Semax mtx_unlock(&devstat_mutex); 496112001Sphk return (-1); 497112001Sphk} 498112001Sphk 499112001Sphkstatic struct devstat * 500112001Sphkdevstat_alloc(void) 501112001Sphk{ 502112001Sphk struct devstat *dsp; 503197309Sattilio struct statspage *spp, *spp2; 504112001Sphk u_int u; 505112001Sphk static int once; 506112001Sphk 507112368Sphk mtx_assert(&devstat_mutex, MA_NOTOWNED); 508223062Sken if (!once) { 509223062Sken make_dev_credf(MAKEDEV_ETERNAL | MAKEDEV_CHECKNAME, 510264546Semax &devstat_cdevsw, 0, NULL, UID_ROOT, GID_WHEEL, 0444, 511223062Sken DEVSTAT_DEVICE_NAME); 512223062Sken once = 1; 513112001Sphk } 514197309Sattilio spp2 = NULL; 515112368Sphk mtx_lock(&devstat_mutex); 516112368Sphk for (;;) { 517112368Sphk TAILQ_FOREACH(spp, &pagelist, list) { 518112368Sphk if (spp->nfree > 0) 519112368Sphk break; 520112368Sphk } 521112368Sphk if (spp != NULL) 522112001Sphk break; 523197309Sattilio mtx_unlock(&devstat_mutex); 524197309Sattilio spp2 = malloc(sizeof *spp, M_DEVSTAT, M_ZERO | M_WAITOK); 525197309Sattilio spp2->stat = malloc(PAGE_SIZE, M_DEVSTAT, M_ZERO | M_WAITOK); 526197309Sattilio spp2->nfree = statsperpage; 527197309Sattilio 528112368Sphk /* 529197309Sattilio * If free statspages were added while the lock was released 530197309Sattilio * just reuse them. 531112368Sphk */ 532112368Sphk mtx_lock(&devstat_mutex); 533197309Sattilio TAILQ_FOREACH(spp, &pagelist, list) 534197309Sattilio if (spp->nfree > 0) 535197309Sattilio break; 536197309Sattilio if (spp == NULL) { 537197309Sattilio spp = spp2; 538197309Sattilio 539197309Sattilio /* 540197309Sattilio * It would make more sense to add the new page at the 541197309Sattilio * head but the order on the list determine the 542197309Sattilio * sequence of the mapping so we can't do that. 543197309Sattilio */ 544197309Sattilio TAILQ_INSERT_TAIL(&pagelist, spp, list); 545197309Sattilio } else 546197309Sattilio break; 547112001Sphk } 548112001Sphk dsp = spp->stat; 549112001Sphk for (u = 0; u < statsperpage; u++) { 550112001Sphk if (dsp->allocated == 0) 551112001Sphk break; 552112001Sphk dsp++; 553112001Sphk } 554112001Sphk spp->nfree--; 555112001Sphk dsp->allocated = 1; 556112368Sphk mtx_unlock(&devstat_mutex); 557197309Sattilio if (spp2 != NULL && spp2 != spp) { 558197309Sattilio free(spp2->stat, M_DEVSTAT); 559197309Sattilio free(spp2, M_DEVSTAT); 560197309Sattilio } 561112001Sphk return (dsp); 562112001Sphk} 563112001Sphk 564112001Sphkstatic void 565112001Sphkdevstat_free(struct devstat *dsp) 566112001Sphk{ 567112001Sphk struct statspage *spp; 568112001Sphk 569112368Sphk mtx_assert(&devstat_mutex, MA_OWNED); 570112001Sphk bzero(dsp, sizeof *dsp); 571112001Sphk TAILQ_FOREACH(spp, &pagelist, list) { 572112001Sphk if (dsp >= spp->stat && dsp < (spp->stat + statsperpage)) { 573112001Sphk spp->nfree++; 574112001Sphk return; 575112001Sphk } 576112001Sphk } 577112001Sphk} 578112288Sphk 579112288SphkSYSCTL_INT(_debug_sizeof, OID_AUTO, devstat, CTLFLAG_RD, 580188063Simp NULL, sizeof(struct devstat), "sizeof(struct devstat)"); 581