1298010Simp/*- 2298010Simp * CAM IO Scheduler Interface 3298010Simp * 4298010Simp * Copyright (c) 2015 Netflix, Inc. 5298010Simp * All rights reserved. 6298010Simp * 7298010Simp * Redistribution and use in source and binary forms, with or without 8298010Simp * modification, are permitted provided that the following conditions 9298010Simp * are met: 10298010Simp * 1. Redistributions of source code must retain the above copyright 11298010Simp * notice, this list of conditions, and the following disclaimer, 12298010Simp * without modification, immediately at the beginning of the file. 13298010Simp * 2. The name of the author may not be used to endorse or promote products 14298010Simp * derived from this software without specific prior written permission. 15298010Simp * 16298010Simp * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17298010Simp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18298010Simp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19298010Simp * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 20298010Simp * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21298010Simp * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22298010Simp * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23298010Simp * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24298010Simp * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25298010Simp * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26298010Simp * SUCH DAMAGE. 27298010Simp * 28298010Simp * $FreeBSD: stable/11/sys/cam/cam_iosched.c 334263 2018-05-27 23:52:41Z sbruno $ 29298010Simp */ 30298010Simp 31298010Simp#include "opt_cam.h" 32298010Simp#include "opt_ddb.h" 33298010Simp 34298010Simp#include <sys/cdefs.h> 35298010Simp__FBSDID("$FreeBSD: stable/11/sys/cam/cam_iosched.c 334263 2018-05-27 23:52:41Z sbruno $"); 36298010Simp 37298010Simp#include <sys/param.h> 38298010Simp 39298010Simp#include <sys/systm.h> 40298010Simp#include <sys/kernel.h> 41298010Simp#include <sys/bio.h> 42298010Simp#include <sys/lock.h> 43298010Simp#include <sys/malloc.h> 44298010Simp#include <sys/mutex.h> 45298010Simp#include <sys/sysctl.h> 46298010Simp 47298010Simp#include <cam/cam.h> 48298010Simp#include <cam/cam_ccb.h> 49298010Simp#include <cam/cam_periph.h> 50298010Simp#include <cam/cam_xpt_periph.h> 51298010Simp#include <cam/cam_iosched.h> 52298010Simp 53298010Simp#include <ddb/ddb.h> 54298010Simp 55298010Simpstatic MALLOC_DEFINE(M_CAMSCHED, "CAM I/O Scheduler", 56298010Simp "CAM I/O Scheduler buffers"); 57298010Simp 58298010Simp/* 59298010Simp * Default I/O scheduler for FreeBSD. This implementation is just a thin-vineer 60298010Simp * over the bioq_* interface, with notions of separate calls for normal I/O and 61298010Simp * for trims. 62302396Simp * 63302396Simp * When CAM_IOSCHED_DYNAMIC is defined, the scheduler is enhanced to dynamically 64302396Simp * steer the rate of one type of traffic to help other types of traffic (eg 65302396Simp * limit writes when read latency deteriorates on SSDs). 66298010Simp */ 67298010Simp 68302163Simp#ifdef CAM_IOSCHED_DYNAMIC 69298010Simp 70302396Simpstatic int do_dynamic_iosched = 1; 71302396SimpTUNABLE_INT("kern.cam.do_dynamic_iosched", &do_dynamic_iosched); 72302396SimpSYSCTL_INT(_kern_cam, OID_AUTO, do_dynamic_iosched, CTLFLAG_RD, 73302396Simp &do_dynamic_iosched, 1, 74302396Simp "Enable Dynamic I/O scheduler optimizations."); 75298010Simp 76298010Simpstatic int alpha_bits = 9; 77298010SimpTUNABLE_INT("kern.cam.iosched_alpha_bits", &alpha_bits); 78298010SimpSYSCTL_INT(_kern_cam, OID_AUTO, iosched_alpha_bits, CTLFLAG_RW, 79298010Simp &alpha_bits, 1, 80298010Simp "Bits in EMA's alpha."); 81298010Simp 82298010Simp 83298010Simp 84298010Simpstruct iop_stats; 85298010Simpstruct cam_iosched_softc; 86298010Simp 87298010Simpint iosched_debug = 0; 88298010Simp 89298010Simptypedef enum { 90298010Simp none = 0, /* No limits */ 91298010Simp queue_depth, /* Limit how many ops we queue to SIM */ 92298010Simp iops, /* Limit # of IOPS to the drive */ 93298010Simp bandwidth, /* Limit bandwidth to the drive */ 94298010Simp limiter_max 95298010Simp} io_limiter; 96298010Simp 97298010Simpstatic const char *cam_iosched_limiter_names[] = 98298010Simp { "none", "queue_depth", "iops", "bandwidth" }; 99298010Simp 100298010Simp/* 101298010Simp * Called to initialize the bits of the iop_stats structure relevant to the 102298010Simp * limiter. Called just after the limiter is set. 103298010Simp */ 104298010Simptypedef int l_init_t(struct iop_stats *); 105298010Simp 106298010Simp/* 107298010Simp * Called every tick. 108298010Simp */ 109298010Simptypedef int l_tick_t(struct iop_stats *); 110298010Simp 111298010Simp/* 112298010Simp * Called to see if the limiter thinks this IOP can be allowed to 113298010Simp * proceed. If so, the limiter assumes that the while IOP proceeded 114298010Simp * and makes any accounting of it that's needed. 115298010Simp */ 116298010Simptypedef int l_iop_t(struct iop_stats *, struct bio *); 117298010Simp 118298010Simp/* 119298010Simp * Called when an I/O completes so the limiter can updates its 120298010Simp * accounting. Pending I/Os may complete in any order (even when 121298010Simp * sent to the hardware at the same time), so the limiter may not 122298010Simp * make any assumptions other than this I/O has completed. If it 123298010Simp * returns 1, then xpt_schedule() needs to be called again. 124298010Simp */ 125298010Simptypedef int l_iodone_t(struct iop_stats *, struct bio *); 126298010Simp 127298010Simpstatic l_iop_t cam_iosched_qd_iop; 128298010Simpstatic l_iop_t cam_iosched_qd_caniop; 129298010Simpstatic l_iodone_t cam_iosched_qd_iodone; 130298010Simp 131298010Simpstatic l_init_t cam_iosched_iops_init; 132298010Simpstatic l_tick_t cam_iosched_iops_tick; 133298010Simpstatic l_iop_t cam_iosched_iops_caniop; 134298010Simpstatic l_iop_t cam_iosched_iops_iop; 135298010Simp 136298010Simpstatic l_init_t cam_iosched_bw_init; 137298010Simpstatic l_tick_t cam_iosched_bw_tick; 138298010Simpstatic l_iop_t cam_iosched_bw_caniop; 139298010Simpstatic l_iop_t cam_iosched_bw_iop; 140298010Simp 141298010Simpstruct limswitch 142298010Simp{ 143298010Simp l_init_t *l_init; 144298010Simp l_tick_t *l_tick; 145298010Simp l_iop_t *l_iop; 146298010Simp l_iop_t *l_caniop; 147298010Simp l_iodone_t *l_iodone; 148298010Simp} limsw[] = 149298010Simp{ 150298010Simp { /* none */ 151298010Simp .l_init = NULL, 152298010Simp .l_tick = NULL, 153298010Simp .l_iop = NULL, 154298010Simp .l_iodone= NULL, 155298010Simp }, 156298010Simp { /* queue_depth */ 157298010Simp .l_init = NULL, 158298010Simp .l_tick = NULL, 159298010Simp .l_caniop = cam_iosched_qd_caniop, 160298010Simp .l_iop = cam_iosched_qd_iop, 161298010Simp .l_iodone= cam_iosched_qd_iodone, 162298010Simp }, 163298010Simp { /* iops */ 164298010Simp .l_init = cam_iosched_iops_init, 165298010Simp .l_tick = cam_iosched_iops_tick, 166298010Simp .l_caniop = cam_iosched_iops_caniop, 167298010Simp .l_iop = cam_iosched_iops_iop, 168298010Simp .l_iodone= NULL, 169298010Simp }, 170298010Simp { /* bandwidth */ 171298010Simp .l_init = cam_iosched_bw_init, 172298010Simp .l_tick = cam_iosched_bw_tick, 173298010Simp .l_caniop = cam_iosched_bw_caniop, 174298010Simp .l_iop = cam_iosched_bw_iop, 175298010Simp .l_iodone= NULL, 176298010Simp }, 177298010Simp}; 178298010Simp 179298010Simpstruct iop_stats 180298010Simp{ 181298010Simp /* 182298010Simp * sysctl state for this subnode. 183298010Simp */ 184298010Simp struct sysctl_ctx_list sysctl_ctx; 185298010Simp struct sysctl_oid *sysctl_tree; 186298010Simp 187298010Simp /* 188298010Simp * Information about the current rate limiters, if any 189298010Simp */ 190298010Simp io_limiter limiter; /* How are I/Os being limited */ 191298010Simp int min; /* Low range of limit */ 192298010Simp int max; /* High range of limit */ 193298010Simp int current; /* Current rate limiter */ 194298010Simp int l_value1; /* per-limiter scratch value 1. */ 195298010Simp int l_value2; /* per-limiter scratch value 2. */ 196298010Simp 197298010Simp 198298010Simp /* 199298010Simp * Debug information about counts of I/Os that have gone through the 200298010Simp * scheduler. 201298010Simp */ 202298010Simp int pending; /* I/Os pending in the hardware */ 203298010Simp int queued; /* number currently in the queue */ 204298010Simp int total; /* Total for all time -- wraps */ 205298010Simp int in; /* number queued all time -- wraps */ 206298010Simp int out; /* number completed all time -- wraps */ 207298010Simp 208298010Simp /* 209298010Simp * Statistics on different bits of the process. 210298010Simp */ 211298010Simp /* Exp Moving Average, alpha = 1 / (1 << alpha_bits) */ 212298010Simp sbintime_t ema; 213298010Simp sbintime_t emss; /* Exp Moving sum of the squares */ 214298010Simp sbintime_t sd; /* Last computed sd */ 215298010Simp 216298010Simp struct cam_iosched_softc *softc; 217298010Simp}; 218298010Simp 219298010Simp 220298010Simptypedef enum { 221298010Simp set_max = 0, /* current = max */ 222298010Simp read_latency, /* Steer read latency by throttling writes */ 223298010Simp cl_max /* Keep last */ 224298010Simp} control_type; 225298010Simp 226298010Simpstatic const char *cam_iosched_control_type_names[] = 227298010Simp { "set_max", "read_latency" }; 228298010Simp 229298010Simpstruct control_loop 230298010Simp{ 231298010Simp /* 232298010Simp * sysctl state for this subnode. 233298010Simp */ 234298010Simp struct sysctl_ctx_list sysctl_ctx; 235298010Simp struct sysctl_oid *sysctl_tree; 236298010Simp 237298010Simp sbintime_t next_steer; /* Time of next steer */ 238298010Simp sbintime_t steer_interval; /* How often do we steer? */ 239298010Simp sbintime_t lolat; 240298010Simp sbintime_t hilat; 241298010Simp int alpha; 242298010Simp control_type type; /* What type of control? */ 243298010Simp int last_count; /* Last I/O count */ 244298010Simp 245298010Simp struct cam_iosched_softc *softc; 246298010Simp}; 247298010Simp 248298010Simp#endif 249298010Simp 250298010Simpstruct cam_iosched_softc 251298010Simp{ 252298010Simp struct bio_queue_head bio_queue; 253298010Simp struct bio_queue_head trim_queue; 254298010Simp /* scheduler flags < 16, user flags >= 16 */ 255298010Simp uint32_t flags; 256298010Simp int sort_io_queue; 257302163Simp#ifdef CAM_IOSCHED_DYNAMIC 258298010Simp int read_bias; /* Read bias setting */ 259298010Simp int current_read_bias; /* Current read bias state */ 260298010Simp int total_ticks; 261298010Simp 262298010Simp struct bio_queue_head write_queue; 263298010Simp struct iop_stats read_stats, write_stats, trim_stats; 264298010Simp struct sysctl_ctx_list sysctl_ctx; 265298010Simp struct sysctl_oid *sysctl_tree; 266298010Simp 267298010Simp int quanta; /* Number of quanta per second */ 268298010Simp struct callout ticker; /* Callout for our quota system */ 269298010Simp struct cam_periph *periph; /* cam periph associated with this device */ 270298010Simp uint32_t this_frac; /* Fraction of a second (1024ths) for this tick */ 271298010Simp sbintime_t last_time; /* Last time we ticked */ 272298010Simp struct control_loop cl; 273298010Simp#endif 274298010Simp}; 275298010Simp 276302163Simp#ifdef CAM_IOSCHED_DYNAMIC 277298010Simp/* 278298010Simp * helper functions to call the limsw functions. 279298010Simp */ 280298010Simpstatic int 281298010Simpcam_iosched_limiter_init(struct iop_stats *ios) 282298010Simp{ 283298010Simp int lim = ios->limiter; 284298010Simp 285298010Simp /* maybe this should be a kassert */ 286298010Simp if (lim < none || lim >= limiter_max) 287298010Simp return EINVAL; 288298010Simp 289298010Simp if (limsw[lim].l_init) 290298010Simp return limsw[lim].l_init(ios); 291298010Simp 292298010Simp return 0; 293298010Simp} 294298010Simp 295298010Simpstatic int 296298010Simpcam_iosched_limiter_tick(struct iop_stats *ios) 297298010Simp{ 298298010Simp int lim = ios->limiter; 299298010Simp 300298010Simp /* maybe this should be a kassert */ 301298010Simp if (lim < none || lim >= limiter_max) 302298010Simp return EINVAL; 303298010Simp 304298010Simp if (limsw[lim].l_tick) 305298010Simp return limsw[lim].l_tick(ios); 306298010Simp 307298010Simp return 0; 308298010Simp} 309298010Simp 310298010Simpstatic int 311298010Simpcam_iosched_limiter_iop(struct iop_stats *ios, struct bio *bp) 312298010Simp{ 313298010Simp int lim = ios->limiter; 314298010Simp 315298010Simp /* maybe this should be a kassert */ 316298010Simp if (lim < none || lim >= limiter_max) 317298010Simp return EINVAL; 318298010Simp 319298010Simp if (limsw[lim].l_iop) 320298010Simp return limsw[lim].l_iop(ios, bp); 321298010Simp 322298010Simp return 0; 323298010Simp} 324298010Simp 325298010Simpstatic int 326298010Simpcam_iosched_limiter_caniop(struct iop_stats *ios, struct bio *bp) 327298010Simp{ 328298010Simp int lim = ios->limiter; 329298010Simp 330298010Simp /* maybe this should be a kassert */ 331298010Simp if (lim < none || lim >= limiter_max) 332298010Simp return EINVAL; 333298010Simp 334298010Simp if (limsw[lim].l_caniop) 335298010Simp return limsw[lim].l_caniop(ios, bp); 336298010Simp 337298010Simp return 0; 338298010Simp} 339298010Simp 340298010Simpstatic int 341298010Simpcam_iosched_limiter_iodone(struct iop_stats *ios, struct bio *bp) 342298010Simp{ 343298010Simp int lim = ios->limiter; 344298010Simp 345298010Simp /* maybe this should be a kassert */ 346298010Simp if (lim < none || lim >= limiter_max) 347298010Simp return 0; 348298010Simp 349298010Simp if (limsw[lim].l_iodone) 350298010Simp return limsw[lim].l_iodone(ios, bp); 351298010Simp 352298010Simp return 0; 353298010Simp} 354298010Simp 355298010Simp/* 356298010Simp * Functions to implement the different kinds of limiters 357298010Simp */ 358298010Simp 359298010Simpstatic int 360298010Simpcam_iosched_qd_iop(struct iop_stats *ios, struct bio *bp) 361298010Simp{ 362298010Simp 363298010Simp if (ios->current <= 0 || ios->pending < ios->current) 364298010Simp return 0; 365298010Simp 366298010Simp return EAGAIN; 367298010Simp} 368298010Simp 369298010Simpstatic int 370298010Simpcam_iosched_qd_caniop(struct iop_stats *ios, struct bio *bp) 371298010Simp{ 372298010Simp 373298010Simp if (ios->current <= 0 || ios->pending < ios->current) 374298010Simp return 0; 375298010Simp 376298010Simp return EAGAIN; 377298010Simp} 378298010Simp 379298010Simpstatic int 380298010Simpcam_iosched_qd_iodone(struct iop_stats *ios, struct bio *bp) 381298010Simp{ 382298010Simp 383298010Simp if (ios->current <= 0 || ios->pending != ios->current) 384298010Simp return 0; 385298010Simp 386298010Simp return 1; 387298010Simp} 388298010Simp 389298010Simpstatic int 390298010Simpcam_iosched_iops_init(struct iop_stats *ios) 391298010Simp{ 392298010Simp 393298010Simp ios->l_value1 = ios->current / ios->softc->quanta; 394298010Simp if (ios->l_value1 <= 0) 395298010Simp ios->l_value1 = 1; 396298010Simp 397298010Simp return 0; 398298010Simp} 399298010Simp 400298010Simpstatic int 401298010Simpcam_iosched_iops_tick(struct iop_stats *ios) 402298010Simp{ 403298010Simp 404298010Simp ios->l_value1 = (int)((ios->current * (uint64_t)ios->softc->this_frac) >> 16); 405298010Simp if (ios->l_value1 <= 0) 406298010Simp ios->l_value1 = 1; 407298010Simp 408298010Simp return 0; 409298010Simp} 410298010Simp 411298010Simpstatic int 412298010Simpcam_iosched_iops_caniop(struct iop_stats *ios, struct bio *bp) 413298010Simp{ 414298010Simp 415298010Simp /* 416298010Simp * So if we have any more IOPs left, allow it, 417298010Simp * otherwise wait. 418298010Simp */ 419298010Simp if (ios->l_value1 <= 0) 420298010Simp return EAGAIN; 421298010Simp return 0; 422298010Simp} 423298010Simp 424298010Simpstatic int 425298010Simpcam_iosched_iops_iop(struct iop_stats *ios, struct bio *bp) 426298010Simp{ 427298010Simp int rv; 428298010Simp 429298010Simp rv = cam_iosched_limiter_caniop(ios, bp); 430298010Simp if (rv == 0) 431298010Simp ios->l_value1--; 432298010Simp 433298010Simp return rv; 434298010Simp} 435298010Simp 436298010Simpstatic int 437298010Simpcam_iosched_bw_init(struct iop_stats *ios) 438298010Simp{ 439298010Simp 440298010Simp /* ios->current is in kB/s, so scale to bytes */ 441298010Simp ios->l_value1 = ios->current * 1000 / ios->softc->quanta; 442298010Simp 443298010Simp return 0; 444298010Simp} 445298010Simp 446298010Simpstatic int 447298010Simpcam_iosched_bw_tick(struct iop_stats *ios) 448298010Simp{ 449298010Simp int bw; 450298010Simp 451298010Simp /* 452298010Simp * If we're in the hole for available quota from 453298010Simp * the last time, then add the quantum for this. 454298010Simp * If we have any left over from last quantum, 455298010Simp * then too bad, that's lost. Also, ios->current 456298010Simp * is in kB/s, so scale. 457298010Simp * 458298010Simp * We also allow up to 4 quanta of credits to 459298010Simp * accumulate to deal with burstiness. 4 is extremely 460298010Simp * arbitrary. 461298010Simp */ 462298010Simp bw = (int)((ios->current * 1000ull * (uint64_t)ios->softc->this_frac) >> 16); 463298010Simp if (ios->l_value1 < bw * 4) 464298010Simp ios->l_value1 += bw; 465298010Simp 466298010Simp return 0; 467298010Simp} 468298010Simp 469298010Simpstatic int 470298010Simpcam_iosched_bw_caniop(struct iop_stats *ios, struct bio *bp) 471298010Simp{ 472298010Simp /* 473298010Simp * So if we have any more bw quota left, allow it, 474298010Simp * otherwise wait. Not, we'll go negative and that's 475298010Simp * OK. We'll just get a lettle less next quota. 476298010Simp * 477298010Simp * Note on going negative: that allows us to process 478298010Simp * requests in order better, since we won't allow 479298010Simp * shorter reads to get around the long one that we 480298010Simp * don't have the quota to do just yet. It also prevents 481298010Simp * starvation by being a little more permissive about 482298010Simp * what we let through this quantum (to prevent the 483298010Simp * starvation), at the cost of getting a little less 484298010Simp * next quantum. 485298010Simp */ 486298010Simp if (ios->l_value1 <= 0) 487298010Simp return EAGAIN; 488298010Simp 489298010Simp 490298010Simp return 0; 491298010Simp} 492298010Simp 493298010Simpstatic int 494298010Simpcam_iosched_bw_iop(struct iop_stats *ios, struct bio *bp) 495298010Simp{ 496298010Simp int rv; 497298010Simp 498298010Simp rv = cam_iosched_limiter_caniop(ios, bp); 499298010Simp if (rv == 0) 500298010Simp ios->l_value1 -= bp->bio_length; 501298010Simp 502298010Simp return rv; 503298010Simp} 504298010Simp 505298010Simpstatic void cam_iosched_cl_maybe_steer(struct control_loop *clp); 506298010Simp 507298010Simpstatic void 508298010Simpcam_iosched_ticker(void *arg) 509298010Simp{ 510298010Simp struct cam_iosched_softc *isc = arg; 511298010Simp sbintime_t now, delta; 512298010Simp 513334263Ssbruno callout_reset(&isc->ticker, hz / isc->quanta, cam_iosched_ticker, isc); 514298010Simp 515298010Simp now = sbinuptime(); 516298010Simp delta = now - isc->last_time; 517298010Simp isc->this_frac = (uint32_t)delta >> 16; /* Note: discards seconds -- should be 0 harmless if not */ 518298010Simp isc->last_time = now; 519298010Simp 520298010Simp cam_iosched_cl_maybe_steer(&isc->cl); 521298010Simp 522298010Simp cam_iosched_limiter_tick(&isc->read_stats); 523298010Simp cam_iosched_limiter_tick(&isc->write_stats); 524298010Simp cam_iosched_limiter_tick(&isc->trim_stats); 525298010Simp 526298010Simp cam_iosched_schedule(isc, isc->periph); 527298010Simp 528298010Simp isc->total_ticks++; 529298010Simp} 530298010Simp 531298010Simp 532298010Simpstatic void 533298010Simpcam_iosched_cl_init(struct control_loop *clp, struct cam_iosched_softc *isc) 534298010Simp{ 535298010Simp 536298010Simp clp->next_steer = sbinuptime(); 537298010Simp clp->softc = isc; 538298010Simp clp->steer_interval = SBT_1S * 5; /* Let's start out steering every 5s */ 539298010Simp clp->lolat = 5 * SBT_1MS; 540298010Simp clp->hilat = 15 * SBT_1MS; 541298010Simp clp->alpha = 20; /* Alpha == gain. 20 = .2 */ 542298010Simp clp->type = set_max; 543298010Simp} 544298010Simp 545298010Simpstatic void 546298010Simpcam_iosched_cl_maybe_steer(struct control_loop *clp) 547298010Simp{ 548298010Simp struct cam_iosched_softc *isc; 549298010Simp sbintime_t now, lat; 550298010Simp int old; 551298010Simp 552298010Simp isc = clp->softc; 553298010Simp now = isc->last_time; 554298010Simp if (now < clp->next_steer) 555298010Simp return; 556298010Simp 557298010Simp clp->next_steer = now + clp->steer_interval; 558298010Simp switch (clp->type) { 559298010Simp case set_max: 560298010Simp if (isc->write_stats.current != isc->write_stats.max) 561298010Simp printf("Steering write from %d kBps to %d kBps\n", 562298010Simp isc->write_stats.current, isc->write_stats.max); 563298010Simp isc->read_stats.current = isc->read_stats.max; 564298010Simp isc->write_stats.current = isc->write_stats.max; 565298010Simp isc->trim_stats.current = isc->trim_stats.max; 566298010Simp break; 567298010Simp case read_latency: 568298010Simp old = isc->write_stats.current; 569298010Simp lat = isc->read_stats.ema; 570298010Simp /* 571298010Simp * Simple PLL-like engine. Since we're steering to a range for 572298010Simp * the SP (set point) that makes things a little more 573298010Simp * complicated. In addition, we're not directly controlling our 574298010Simp * PV (process variable), the read latency, but instead are 575298010Simp * manipulating the write bandwidth limit for our MV 576298010Simp * (manipulation variable), analysis of this code gets a bit 577298010Simp * messy. Also, the MV is a very noisy control surface for read 578298010Simp * latency since it is affected by many hidden processes inside 579298010Simp * the device which change how responsive read latency will be 580298010Simp * in reaction to changes in write bandwidth. Unlike the classic 581298010Simp * boiler control PLL. this may result in over-steering while 582298010Simp * the SSD takes its time to react to the new, lower load. This 583298010Simp * is why we use a relatively low alpha of between .1 and .25 to 584298010Simp * compensate for this effect. At .1, it takes ~22 steering 585298010Simp * intervals to back off by a factor of 10. At .2 it only takes 586298010Simp * ~10. At .25 it only takes ~8. However some preliminary data 587298010Simp * from the SSD drives suggests a reasponse time in 10's of 588298010Simp * seconds before latency drops regardless of the new write 589298010Simp * rate. Careful observation will be reqiured to tune this 590298010Simp * effectively. 591298010Simp * 592298010Simp * Also, when there's no read traffic, we jack up the write 593298010Simp * limit too regardless of the last read latency. 10 is 594298010Simp * somewhat arbitrary. 595298010Simp */ 596298010Simp if (lat < clp->lolat || isc->read_stats.total - clp->last_count < 10) 597298010Simp isc->write_stats.current = isc->write_stats.current * 598298010Simp (100 + clp->alpha) / 100; /* Scale up */ 599298010Simp else if (lat > clp->hilat) 600298010Simp isc->write_stats.current = isc->write_stats.current * 601298010Simp (100 - clp->alpha) / 100; /* Scale down */ 602298010Simp clp->last_count = isc->read_stats.total; 603298010Simp 604298010Simp /* 605298010Simp * Even if we don't steer, per se, enforce the min/max limits as 606298010Simp * those may have changed. 607298010Simp */ 608298010Simp if (isc->write_stats.current < isc->write_stats.min) 609298010Simp isc->write_stats.current = isc->write_stats.min; 610298010Simp if (isc->write_stats.current > isc->write_stats.max) 611298010Simp isc->write_stats.current = isc->write_stats.max; 612298164Simp if (old != isc->write_stats.current && iosched_debug) 613298164Simp printf("Steering write from %d kBps to %d kBps due to latency of %jdms\n", 614298010Simp old, isc->write_stats.current, 615298164Simp (uintmax_t)((uint64_t)1000000 * (uint32_t)lat) >> 32); 616298010Simp break; 617298010Simp case cl_max: 618298010Simp break; 619298010Simp } 620298010Simp} 621298010Simp#endif 622298010Simp 623298010Simp /* Trim or similar currently pending completion */ 624298010Simp#define CAM_IOSCHED_FLAG_TRIM_ACTIVE (1ul << 0) 625298010Simp /* Callout active, and needs to be torn down */ 626298010Simp#define CAM_IOSCHED_FLAG_CALLOUT_ACTIVE (1ul << 1) 627298010Simp 628298010Simp /* Periph drivers set these flags to indicate work */ 629298010Simp#define CAM_IOSCHED_FLAG_WORK_FLAGS ((0xffffu) << 16) 630298010Simp 631302163Simp#ifdef CAM_IOSCHED_DYNAMIC 632298010Simpstatic void 633298010Simpcam_iosched_io_metric_update(struct cam_iosched_softc *isc, 634298010Simp sbintime_t sim_latency, int cmd, size_t size); 635298036Simp#endif 636298010Simp 637298010Simpstatic inline int 638298010Simpcam_iosched_has_flagged_work(struct cam_iosched_softc *isc) 639298010Simp{ 640298010Simp return !!(isc->flags & CAM_IOSCHED_FLAG_WORK_FLAGS); 641298010Simp} 642298010Simp 643298010Simpstatic inline int 644298010Simpcam_iosched_has_io(struct cam_iosched_softc *isc) 645298010Simp{ 646302163Simp#ifdef CAM_IOSCHED_DYNAMIC 647302396Simp if (do_dynamic_iosched) { 648298010Simp struct bio *rbp = bioq_first(&isc->bio_queue); 649298010Simp struct bio *wbp = bioq_first(&isc->write_queue); 650298010Simp int can_write = wbp != NULL && 651298010Simp cam_iosched_limiter_caniop(&isc->write_stats, wbp) == 0; 652298010Simp int can_read = rbp != NULL && 653298010Simp cam_iosched_limiter_caniop(&isc->read_stats, rbp) == 0; 654298010Simp if (iosched_debug > 2) { 655298010Simp printf("can write %d: pending_writes %d max_writes %d\n", can_write, isc->write_stats.pending, isc->write_stats.max); 656298010Simp printf("can read %d: read_stats.pending %d max_reads %d\n", can_read, isc->read_stats.pending, isc->read_stats.max); 657298010Simp printf("Queued reads %d writes %d\n", isc->read_stats.queued, isc->write_stats.queued); 658298010Simp } 659298010Simp return can_read || can_write; 660298010Simp } 661298010Simp#endif 662298010Simp return bioq_first(&isc->bio_queue) != NULL; 663298010Simp} 664298010Simp 665298010Simpstatic inline int 666298010Simpcam_iosched_has_more_trim(struct cam_iosched_softc *isc) 667298010Simp{ 668298010Simp return !(isc->flags & CAM_IOSCHED_FLAG_TRIM_ACTIVE) && 669298010Simp bioq_first(&isc->trim_queue); 670298010Simp} 671298010Simp 672298010Simp#define cam_iosched_sort_queue(isc) ((isc)->sort_io_queue >= 0 ? \ 673298010Simp (isc)->sort_io_queue : cam_sort_io_queues) 674298010Simp 675298010Simp 676298010Simpstatic inline int 677298010Simpcam_iosched_has_work(struct cam_iosched_softc *isc) 678298010Simp{ 679302163Simp#ifdef CAM_IOSCHED_DYNAMIC 680298010Simp if (iosched_debug > 2) 681298010Simp printf("has work: %d %d %d\n", cam_iosched_has_io(isc), 682298010Simp cam_iosched_has_more_trim(isc), 683298010Simp cam_iosched_has_flagged_work(isc)); 684298010Simp#endif 685298010Simp 686298010Simp return cam_iosched_has_io(isc) || 687298010Simp cam_iosched_has_more_trim(isc) || 688298010Simp cam_iosched_has_flagged_work(isc); 689298010Simp} 690298010Simp 691302163Simp#ifdef CAM_IOSCHED_DYNAMIC 692298010Simpstatic void 693298010Simpcam_iosched_iop_stats_init(struct cam_iosched_softc *isc, struct iop_stats *ios) 694298010Simp{ 695298010Simp 696298010Simp ios->limiter = none; 697298010Simp cam_iosched_limiter_init(ios); 698298010Simp ios->in = 0; 699298010Simp ios->max = 300000; 700298010Simp ios->min = 1; 701298010Simp ios->out = 0; 702298010Simp ios->pending = 0; 703298010Simp ios->queued = 0; 704298010Simp ios->total = 0; 705298010Simp ios->ema = 0; 706298010Simp ios->emss = 0; 707298010Simp ios->sd = 0; 708298010Simp ios->softc = isc; 709298010Simp} 710298010Simp 711298010Simpstatic int 712298010Simpcam_iosched_limiter_sysctl(SYSCTL_HANDLER_ARGS) 713298010Simp{ 714298010Simp char buf[16]; 715298010Simp struct iop_stats *ios; 716298010Simp struct cam_iosched_softc *isc; 717298010Simp int value, i, error, cantick; 718298010Simp const char *p; 719298010Simp 720298010Simp ios = arg1; 721298010Simp isc = ios->softc; 722298010Simp value = ios->limiter; 723298010Simp if (value < none || value >= limiter_max) 724298010Simp p = "UNKNOWN"; 725298010Simp else 726298010Simp p = cam_iosched_limiter_names[value]; 727298010Simp 728298010Simp strlcpy(buf, p, sizeof(buf)); 729298010Simp error = sysctl_handle_string(oidp, buf, sizeof(buf), req); 730298010Simp if (error != 0 || req->newptr == NULL) 731298010Simp return error; 732298010Simp 733298010Simp cam_periph_lock(isc->periph); 734298010Simp 735298010Simp for (i = none; i < limiter_max; i++) { 736298010Simp if (strcmp(buf, cam_iosched_limiter_names[i]) != 0) 737298010Simp continue; 738298010Simp ios->limiter = i; 739298010Simp error = cam_iosched_limiter_init(ios); 740298010Simp if (error != 0) { 741298010Simp ios->limiter = value; 742298010Simp cam_periph_unlock(isc->periph); 743298010Simp return error; 744298010Simp } 745298010Simp cantick = !!limsw[isc->read_stats.limiter].l_tick + 746298010Simp !!limsw[isc->write_stats.limiter].l_tick + 747298010Simp !!limsw[isc->trim_stats.limiter].l_tick + 748298010Simp 1; /* Control loop requires it */ 749298010Simp if (isc->flags & CAM_IOSCHED_FLAG_CALLOUT_ACTIVE) { 750298010Simp if (cantick == 0) { 751298010Simp callout_stop(&isc->ticker); 752298010Simp isc->flags &= ~CAM_IOSCHED_FLAG_CALLOUT_ACTIVE; 753298010Simp } 754298010Simp } else { 755298010Simp if (cantick != 0) { 756334229Ssbruno callout_reset(&isc->ticker, hz / isc->quanta, cam_iosched_ticker, isc); 757298010Simp isc->flags |= CAM_IOSCHED_FLAG_CALLOUT_ACTIVE; 758298010Simp } 759298010Simp } 760298010Simp 761298010Simp cam_periph_unlock(isc->periph); 762298010Simp return 0; 763298010Simp } 764298010Simp 765298010Simp cam_periph_unlock(isc->periph); 766298010Simp return EINVAL; 767298010Simp} 768298010Simp 769298010Simpstatic int 770298010Simpcam_iosched_control_type_sysctl(SYSCTL_HANDLER_ARGS) 771298010Simp{ 772298010Simp char buf[16]; 773298010Simp struct control_loop *clp; 774298010Simp struct cam_iosched_softc *isc; 775298010Simp int value, i, error; 776298010Simp const char *p; 777298010Simp 778298010Simp clp = arg1; 779298010Simp isc = clp->softc; 780298010Simp value = clp->type; 781298010Simp if (value < none || value >= cl_max) 782298010Simp p = "UNKNOWN"; 783298010Simp else 784298010Simp p = cam_iosched_control_type_names[value]; 785298010Simp 786298010Simp strlcpy(buf, p, sizeof(buf)); 787298010Simp error = sysctl_handle_string(oidp, buf, sizeof(buf), req); 788298010Simp if (error != 0 || req->newptr == NULL) 789298010Simp return error; 790298010Simp 791298010Simp for (i = set_max; i < cl_max; i++) { 792298010Simp if (strcmp(buf, cam_iosched_control_type_names[i]) != 0) 793298010Simp continue; 794298010Simp cam_periph_lock(isc->periph); 795298010Simp clp->type = i; 796298010Simp cam_periph_unlock(isc->periph); 797298010Simp return 0; 798298010Simp } 799298010Simp 800298010Simp return EINVAL; 801298010Simp} 802298010Simp 803298010Simpstatic int 804298010Simpcam_iosched_sbintime_sysctl(SYSCTL_HANDLER_ARGS) 805298010Simp{ 806298010Simp char buf[16]; 807298010Simp sbintime_t value; 808298010Simp int error; 809298010Simp uint64_t us; 810298010Simp 811298010Simp value = *(sbintime_t *)arg1; 812298010Simp us = (uint64_t)value / SBT_1US; 813298010Simp snprintf(buf, sizeof(buf), "%ju", (intmax_t)us); 814298010Simp error = sysctl_handle_string(oidp, buf, sizeof(buf), req); 815298010Simp if (error != 0 || req->newptr == NULL) 816298010Simp return error; 817298010Simp us = strtoul(buf, NULL, 10); 818298010Simp if (us == 0) 819298010Simp return EINVAL; 820298010Simp *(sbintime_t *)arg1 = us * SBT_1US; 821298010Simp return 0; 822298010Simp} 823298010Simp 824334229Ssbrunostatic int 825334229Ssbrunocam_iosched_quanta_sysctl(SYSCTL_HANDLER_ARGS) 826334229Ssbruno{ 827334229Ssbruno int *quanta; 828334229Ssbruno int error, value; 829334229Ssbruno 830334229Ssbruno quanta = (unsigned *)arg1; 831334229Ssbruno value = *quanta; 832334229Ssbruno 833334229Ssbruno error = sysctl_handle_int(oidp, (int *)&value, 0, req); 834334229Ssbruno if ((error != 0) || (req->newptr == NULL)) 835334229Ssbruno return (error); 836334229Ssbruno 837334229Ssbruno if (value < 1 || value > hz) 838334229Ssbruno return (EINVAL); 839334229Ssbruno 840334229Ssbruno *quanta = value; 841334229Ssbruno 842334229Ssbruno return (0); 843334229Ssbruno} 844334229Ssbruno 845298010Simpstatic void 846298010Simpcam_iosched_iop_stats_sysctl_init(struct cam_iosched_softc *isc, struct iop_stats *ios, char *name) 847298010Simp{ 848298010Simp struct sysctl_oid_list *n; 849298010Simp struct sysctl_ctx_list *ctx; 850298010Simp 851298010Simp ios->sysctl_tree = SYSCTL_ADD_NODE(&isc->sysctl_ctx, 852298010Simp SYSCTL_CHILDREN(isc->sysctl_tree), OID_AUTO, name, 853298010Simp CTLFLAG_RD, 0, name); 854298010Simp n = SYSCTL_CHILDREN(ios->sysctl_tree); 855298010Simp ctx = &ios->sysctl_ctx; 856298010Simp 857298010Simp SYSCTL_ADD_UQUAD(ctx, n, 858298010Simp OID_AUTO, "ema", CTLFLAG_RD, 859298010Simp &ios->ema, 860298010Simp "Fast Exponentially Weighted Moving Average"); 861298010Simp SYSCTL_ADD_UQUAD(ctx, n, 862298010Simp OID_AUTO, "emss", CTLFLAG_RD, 863298010Simp &ios->emss, 864298010Simp "Fast Exponentially Weighted Moving Sum of Squares (maybe wrong)"); 865298010Simp SYSCTL_ADD_UQUAD(ctx, n, 866298010Simp OID_AUTO, "sd", CTLFLAG_RD, 867298010Simp &ios->sd, 868298010Simp "Estimated SD for fast ema (may be wrong)"); 869298010Simp 870298010Simp SYSCTL_ADD_INT(ctx, n, 871298010Simp OID_AUTO, "pending", CTLFLAG_RD, 872298010Simp &ios->pending, 0, 873298010Simp "Instantaneous # of pending transactions"); 874298010Simp SYSCTL_ADD_INT(ctx, n, 875298010Simp OID_AUTO, "count", CTLFLAG_RD, 876298010Simp &ios->total, 0, 877298010Simp "# of transactions submitted to hardware"); 878298010Simp SYSCTL_ADD_INT(ctx, n, 879298010Simp OID_AUTO, "queued", CTLFLAG_RD, 880298010Simp &ios->queued, 0, 881298010Simp "# of transactions in the queue"); 882298010Simp SYSCTL_ADD_INT(ctx, n, 883298010Simp OID_AUTO, "in", CTLFLAG_RD, 884298010Simp &ios->in, 0, 885298010Simp "# of transactions queued to driver"); 886298010Simp SYSCTL_ADD_INT(ctx, n, 887298010Simp OID_AUTO, "out", CTLFLAG_RD, 888298010Simp &ios->out, 0, 889298010Simp "# of transactions completed"); 890298010Simp 891298010Simp SYSCTL_ADD_PROC(ctx, n, 892298010Simp OID_AUTO, "limiter", CTLTYPE_STRING | CTLFLAG_RW, 893298010Simp ios, 0, cam_iosched_limiter_sysctl, "A", 894298010Simp "Current limiting type."); 895298010Simp SYSCTL_ADD_INT(ctx, n, 896298010Simp OID_AUTO, "min", CTLFLAG_RW, 897298010Simp &ios->min, 0, 898298010Simp "min resource"); 899298010Simp SYSCTL_ADD_INT(ctx, n, 900298010Simp OID_AUTO, "max", CTLFLAG_RW, 901298010Simp &ios->max, 0, 902298010Simp "max resource"); 903298010Simp SYSCTL_ADD_INT(ctx, n, 904298010Simp OID_AUTO, "current", CTLFLAG_RW, 905298010Simp &ios->current, 0, 906298010Simp "current resource"); 907298010Simp 908298010Simp} 909298010Simp 910298010Simpstatic void 911298010Simpcam_iosched_iop_stats_fini(struct iop_stats *ios) 912298010Simp{ 913298010Simp if (ios->sysctl_tree) 914298010Simp if (sysctl_ctx_free(&ios->sysctl_ctx) != 0) 915298010Simp printf("can't remove iosched sysctl stats context\n"); 916298010Simp} 917298010Simp 918298010Simpstatic void 919298010Simpcam_iosched_cl_sysctl_init(struct cam_iosched_softc *isc) 920298010Simp{ 921298010Simp struct sysctl_oid_list *n; 922298010Simp struct sysctl_ctx_list *ctx; 923298010Simp struct control_loop *clp; 924298010Simp 925298010Simp clp = &isc->cl; 926298010Simp clp->sysctl_tree = SYSCTL_ADD_NODE(&isc->sysctl_ctx, 927298010Simp SYSCTL_CHILDREN(isc->sysctl_tree), OID_AUTO, "control", 928298010Simp CTLFLAG_RD, 0, "Control loop info"); 929298010Simp n = SYSCTL_CHILDREN(clp->sysctl_tree); 930298010Simp ctx = &clp->sysctl_ctx; 931298010Simp 932298010Simp SYSCTL_ADD_PROC(ctx, n, 933298010Simp OID_AUTO, "type", CTLTYPE_STRING | CTLFLAG_RW, 934298010Simp clp, 0, cam_iosched_control_type_sysctl, "A", 935298010Simp "Control loop algorithm"); 936298010Simp SYSCTL_ADD_PROC(ctx, n, 937298010Simp OID_AUTO, "steer_interval", CTLTYPE_STRING | CTLFLAG_RW, 938298010Simp &clp->steer_interval, 0, cam_iosched_sbintime_sysctl, "A", 939298010Simp "How often to steer (in us)"); 940298010Simp SYSCTL_ADD_PROC(ctx, n, 941298010Simp OID_AUTO, "lolat", CTLTYPE_STRING | CTLFLAG_RW, 942298010Simp &clp->lolat, 0, cam_iosched_sbintime_sysctl, "A", 943298010Simp "Low water mark for Latency (in us)"); 944298010Simp SYSCTL_ADD_PROC(ctx, n, 945298010Simp OID_AUTO, "hilat", CTLTYPE_STRING | CTLFLAG_RW, 946298010Simp &clp->hilat, 0, cam_iosched_sbintime_sysctl, "A", 947298010Simp "Hi water mark for Latency (in us)"); 948298010Simp SYSCTL_ADD_INT(ctx, n, 949298010Simp OID_AUTO, "alpha", CTLFLAG_RW, 950298010Simp &clp->alpha, 0, 951298010Simp "Alpha for PLL (x100) aka gain"); 952298010Simp} 953298010Simp 954298010Simpstatic void 955298010Simpcam_iosched_cl_sysctl_fini(struct control_loop *clp) 956298010Simp{ 957298010Simp if (clp->sysctl_tree) 958298010Simp if (sysctl_ctx_free(&clp->sysctl_ctx) != 0) 959298010Simp printf("can't remove iosched sysctl control loop context\n"); 960298010Simp} 961298010Simp#endif 962298010Simp 963298010Simp/* 964298010Simp * Allocate the iosched structure. This also insulates callers from knowing 965298010Simp * sizeof struct cam_iosched_softc. 966298010Simp */ 967298010Simpint 968298010Simpcam_iosched_init(struct cam_iosched_softc **iscp, struct cam_periph *periph) 969298010Simp{ 970298010Simp 971298010Simp *iscp = malloc(sizeof(**iscp), M_CAMSCHED, M_NOWAIT | M_ZERO); 972298010Simp if (*iscp == NULL) 973298010Simp return ENOMEM; 974302163Simp#ifdef CAM_IOSCHED_DYNAMIC 975298010Simp if (iosched_debug) 976298010Simp printf("CAM IOSCHEDULER Allocating entry at %p\n", *iscp); 977298010Simp#endif 978298010Simp (*iscp)->sort_io_queue = -1; 979298010Simp bioq_init(&(*iscp)->bio_queue); 980298010Simp bioq_init(&(*iscp)->trim_queue); 981302163Simp#ifdef CAM_IOSCHED_DYNAMIC 982302396Simp if (do_dynamic_iosched) { 983298010Simp bioq_init(&(*iscp)->write_queue); 984298010Simp (*iscp)->read_bias = 100; 985298010Simp (*iscp)->current_read_bias = 100; 986298010Simp (*iscp)->quanta = 200; 987298010Simp cam_iosched_iop_stats_init(*iscp, &(*iscp)->read_stats); 988298010Simp cam_iosched_iop_stats_init(*iscp, &(*iscp)->write_stats); 989298010Simp cam_iosched_iop_stats_init(*iscp, &(*iscp)->trim_stats); 990298010Simp (*iscp)->trim_stats.max = 1; /* Trims are special: one at a time for now */ 991298010Simp (*iscp)->last_time = sbinuptime(); 992298010Simp callout_init_mtx(&(*iscp)->ticker, cam_periph_mtx(periph), 0); 993298010Simp (*iscp)->periph = periph; 994298010Simp cam_iosched_cl_init(&(*iscp)->cl, *iscp); 995334229Ssbruno callout_reset(&(*iscp)->ticker, hz / (*iscp)->quanta, cam_iosched_ticker, *iscp); 996298010Simp (*iscp)->flags |= CAM_IOSCHED_FLAG_CALLOUT_ACTIVE; 997298010Simp } 998298010Simp#endif 999298010Simp 1000298010Simp return 0; 1001298010Simp} 1002298010Simp 1003298010Simp/* 1004298010Simp * Reclaim all used resources. This assumes that other folks have 1005298010Simp * drained the requests in the hardware. Maybe an unwise assumption. 1006298010Simp */ 1007298010Simpvoid 1008298010Simpcam_iosched_fini(struct cam_iosched_softc *isc) 1009298010Simp{ 1010298010Simp if (isc) { 1011298010Simp cam_iosched_flush(isc, NULL, ENXIO); 1012302163Simp#ifdef CAM_IOSCHED_DYNAMIC 1013298010Simp cam_iosched_iop_stats_fini(&isc->read_stats); 1014298010Simp cam_iosched_iop_stats_fini(&isc->write_stats); 1015298010Simp cam_iosched_iop_stats_fini(&isc->trim_stats); 1016298010Simp cam_iosched_cl_sysctl_fini(&isc->cl); 1017298010Simp if (isc->sysctl_tree) 1018298010Simp if (sysctl_ctx_free(&isc->sysctl_ctx) != 0) 1019298010Simp printf("can't remove iosched sysctl stats context\n"); 1020298010Simp if (isc->flags & CAM_IOSCHED_FLAG_CALLOUT_ACTIVE) { 1021298010Simp callout_drain(&isc->ticker); 1022298010Simp isc->flags &= ~ CAM_IOSCHED_FLAG_CALLOUT_ACTIVE; 1023298010Simp } 1024298010Simp 1025298010Simp#endif 1026298010Simp free(isc, M_CAMSCHED); 1027298010Simp } 1028298010Simp} 1029298010Simp 1030298010Simp/* 1031298010Simp * After we're sure we're attaching a device, go ahead and add 1032298010Simp * hooks for any sysctl we may wish to honor. 1033298010Simp */ 1034298010Simpvoid cam_iosched_sysctl_init(struct cam_iosched_softc *isc, 1035298010Simp struct sysctl_ctx_list *ctx, struct sysctl_oid *node) 1036298010Simp{ 1037302163Simp#ifdef CAM_IOSCHED_DYNAMIC 1038298010Simp struct sysctl_oid_list *n; 1039298010Simp#endif 1040298010Simp 1041298010Simp SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(node), 1042298010Simp OID_AUTO, "sort_io_queue", CTLFLAG_RW | CTLFLAG_MPSAFE, 1043298010Simp &isc->sort_io_queue, 0, 1044298010Simp "Sort IO queue to try and optimise disk access patterns"); 1045298010Simp 1046302163Simp#ifdef CAM_IOSCHED_DYNAMIC 1047302396Simp if (!do_dynamic_iosched) 1048298010Simp return; 1049298010Simp 1050298010Simp isc->sysctl_tree = SYSCTL_ADD_NODE(&isc->sysctl_ctx, 1051298010Simp SYSCTL_CHILDREN(node), OID_AUTO, "iosched", 1052298010Simp CTLFLAG_RD, 0, "I/O scheduler statistics"); 1053298010Simp n = SYSCTL_CHILDREN(isc->sysctl_tree); 1054298010Simp ctx = &isc->sysctl_ctx; 1055298010Simp 1056298010Simp cam_iosched_iop_stats_sysctl_init(isc, &isc->read_stats, "read"); 1057298010Simp cam_iosched_iop_stats_sysctl_init(isc, &isc->write_stats, "write"); 1058298010Simp cam_iosched_iop_stats_sysctl_init(isc, &isc->trim_stats, "trim"); 1059298010Simp cam_iosched_cl_sysctl_init(isc); 1060298010Simp 1061298010Simp SYSCTL_ADD_INT(ctx, n, 1062298010Simp OID_AUTO, "read_bias", CTLFLAG_RW, 1063298010Simp &isc->read_bias, 100, 1064298010Simp "How biased towards read should we be independent of limits"); 1065298010Simp 1066334229Ssbruno SYSCTL_ADD_PROC(ctx, n, 1067334229Ssbruno OID_AUTO, "quanta", CTLTYPE_UINT | CTLFLAG_RW, 1068334229Ssbruno &isc->quanta, 0, cam_iosched_quanta_sysctl, "I", 1069298010Simp "How many quanta per second do we slice the I/O up into"); 1070298010Simp 1071298010Simp SYSCTL_ADD_INT(ctx, n, 1072298010Simp OID_AUTO, "total_ticks", CTLFLAG_RD, 1073298010Simp &isc->total_ticks, 0, 1074298010Simp "Total number of ticks we've done"); 1075298010Simp#endif 1076298010Simp} 1077298010Simp 1078298010Simp/* 1079298010Simp * Flush outstanding I/O. Consumers of this library don't know all the 1080298010Simp * queues we may keep, so this allows all I/O to be flushed in one 1081298010Simp * convenient call. 1082298010Simp */ 1083298010Simpvoid 1084298010Simpcam_iosched_flush(struct cam_iosched_softc *isc, struct devstat *stp, int err) 1085298010Simp{ 1086298010Simp bioq_flush(&isc->bio_queue, stp, err); 1087298010Simp bioq_flush(&isc->trim_queue, stp, err); 1088302163Simp#ifdef CAM_IOSCHED_DYNAMIC 1089302396Simp if (do_dynamic_iosched) 1090298010Simp bioq_flush(&isc->write_queue, stp, err); 1091298010Simp#endif 1092298010Simp} 1093298010Simp 1094302163Simp#ifdef CAM_IOSCHED_DYNAMIC 1095298010Simpstatic struct bio * 1096298010Simpcam_iosched_get_write(struct cam_iosched_softc *isc) 1097298010Simp{ 1098298010Simp struct bio *bp; 1099298010Simp 1100298010Simp /* 1101298010Simp * We control the write rate by controlling how many requests we send 1102298010Simp * down to the drive at any one time. Fewer requests limits the 1103298010Simp * effects of both starvation when the requests take a while and write 1104298010Simp * amplification when each request is causing more than one write to 1105298010Simp * the NAND media. Limiting the queue depth like this will also limit 1106298010Simp * the write throughput and give and reads that want to compete to 1107298010Simp * compete unfairly. 1108298010Simp */ 1109298010Simp bp = bioq_first(&isc->write_queue); 1110298010Simp if (bp == NULL) { 1111298010Simp if (iosched_debug > 3) 1112298010Simp printf("No writes present in write_queue\n"); 1113298010Simp return NULL; 1114298010Simp } 1115298010Simp 1116298010Simp /* 1117298010Simp * If pending read, prefer that based on current read bias 1118298010Simp * setting. 1119298010Simp */ 1120298010Simp if (bioq_first(&isc->bio_queue) && isc->current_read_bias) { 1121298010Simp if (iosched_debug) 1122298010Simp printf("Reads present and current_read_bias is %d queued writes %d queued reads %d\n", isc->current_read_bias, isc->write_stats.queued, isc->read_stats.queued); 1123298010Simp isc->current_read_bias--; 1124298010Simp return NULL; 1125298010Simp } 1126298010Simp 1127298010Simp /* 1128298010Simp * See if our current limiter allows this I/O. 1129298010Simp */ 1130298010Simp if (cam_iosched_limiter_iop(&isc->write_stats, bp) != 0) { 1131298010Simp if (iosched_debug) 1132298010Simp printf("Can't write because limiter says no.\n"); 1133298010Simp return NULL; 1134298010Simp } 1135298010Simp 1136298010Simp /* 1137298010Simp * Let's do this: We've passed all the gates and we're a go 1138298010Simp * to schedule the I/O in the SIM. 1139298010Simp */ 1140298010Simp isc->current_read_bias = isc->read_bias; 1141298010Simp bioq_remove(&isc->write_queue, bp); 1142298010Simp if (bp->bio_cmd == BIO_WRITE) { 1143298010Simp isc->write_stats.queued--; 1144298010Simp isc->write_stats.total++; 1145298010Simp isc->write_stats.pending++; 1146298010Simp } 1147298010Simp if (iosched_debug > 9) 1148298010Simp printf("HWQ : %p %#x\n", bp, bp->bio_cmd); 1149298010Simp return bp; 1150298010Simp} 1151298010Simp#endif 1152298010Simp 1153298010Simp/* 1154298010Simp * Put back a trim that you weren't able to actually schedule this time. 1155298010Simp */ 1156298010Simpvoid 1157298010Simpcam_iosched_put_back_trim(struct cam_iosched_softc *isc, struct bio *bp) 1158298010Simp{ 1159298010Simp bioq_insert_head(&isc->trim_queue, bp); 1160302163Simp#ifdef CAM_IOSCHED_DYNAMIC 1161298010Simp isc->trim_stats.queued++; 1162298010Simp isc->trim_stats.total--; /* since we put it back, don't double count */ 1163298010Simp isc->trim_stats.pending--; 1164298010Simp#endif 1165298010Simp} 1166298010Simp 1167298010Simp/* 1168298010Simp * gets the next trim from the trim queue. 1169298010Simp * 1170298010Simp * Assumes we're called with the periph lock held. It removes this 1171298010Simp * trim from the queue and the device must explicitly reinstert it 1172298010Simp * should the need arise. 1173298010Simp */ 1174298010Simpstruct bio * 1175298010Simpcam_iosched_next_trim(struct cam_iosched_softc *isc) 1176298010Simp{ 1177298010Simp struct bio *bp; 1178298010Simp 1179298010Simp bp = bioq_first(&isc->trim_queue); 1180298010Simp if (bp == NULL) 1181298010Simp return NULL; 1182298010Simp bioq_remove(&isc->trim_queue, bp); 1183302163Simp#ifdef CAM_IOSCHED_DYNAMIC 1184298010Simp isc->trim_stats.queued--; 1185298010Simp isc->trim_stats.total++; 1186298010Simp isc->trim_stats.pending++; 1187298010Simp#endif 1188298010Simp return bp; 1189298010Simp} 1190298010Simp 1191298010Simp/* 1192298010Simp * gets the an available trim from the trim queue, if there's no trim 1193298010Simp * already pending. It removes this trim from the queue and the device 1194298010Simp * must explicitly reinstert it should the need arise. 1195298010Simp * 1196298010Simp * Assumes we're called with the periph lock held. 1197298010Simp */ 1198298010Simpstruct bio * 1199298010Simpcam_iosched_get_trim(struct cam_iosched_softc *isc) 1200298010Simp{ 1201298010Simp 1202298010Simp if (!cam_iosched_has_more_trim(isc)) 1203298010Simp return NULL; 1204298010Simp 1205298010Simp return cam_iosched_next_trim(isc); 1206298010Simp} 1207298010Simp 1208298010Simp/* 1209298010Simp * Determine what the next bit of work to do is for the periph. The 1210298010Simp * default implementation looks to see if we have trims to do, but no 1211298010Simp * trims outstanding. If so, we do that. Otherwise we see if we have 1212298010Simp * other work. If we do, then we do that. Otherwise why were we called? 1213298010Simp */ 1214298010Simpstruct bio * 1215298010Simpcam_iosched_next_bio(struct cam_iosched_softc *isc) 1216298010Simp{ 1217298010Simp struct bio *bp; 1218298010Simp 1219298010Simp /* 1220298010Simp * See if we have a trim that can be scheduled. We can only send one 1221298010Simp * at a time down, so this takes that into account. 1222298010Simp * 1223298010Simp * XXX newer TRIM commands are queueable. Revisit this when we 1224298010Simp * implement them. 1225298010Simp */ 1226298010Simp if ((bp = cam_iosched_get_trim(isc)) != NULL) 1227298010Simp return bp; 1228298010Simp 1229302163Simp#ifdef CAM_IOSCHED_DYNAMIC 1230298010Simp /* 1231298010Simp * See if we have any pending writes, and room in the queue for them, 1232298010Simp * and if so, those are next. 1233298010Simp */ 1234302396Simp if (do_dynamic_iosched) { 1235298010Simp if ((bp = cam_iosched_get_write(isc)) != NULL) 1236298010Simp return bp; 1237298010Simp } 1238298010Simp#endif 1239298010Simp 1240298010Simp /* 1241298010Simp * next, see if there's other, normal I/O waiting. If so return that. 1242298010Simp */ 1243298010Simp if ((bp = bioq_first(&isc->bio_queue)) == NULL) 1244298010Simp return NULL; 1245298010Simp 1246302163Simp#ifdef CAM_IOSCHED_DYNAMIC 1247298010Simp /* 1248298010Simp * For the netflix scheduler, bio_queue is only for reads, so enforce 1249298010Simp * the limits here. Enforce only for reads. 1250298010Simp */ 1251302396Simp if (do_dynamic_iosched) { 1252298010Simp if (bp->bio_cmd == BIO_READ && 1253298010Simp cam_iosched_limiter_iop(&isc->read_stats, bp) != 0) 1254298010Simp return NULL; 1255298010Simp } 1256298010Simp#endif 1257298010Simp bioq_remove(&isc->bio_queue, bp); 1258302163Simp#ifdef CAM_IOSCHED_DYNAMIC 1259302396Simp if (do_dynamic_iosched) { 1260298010Simp if (bp->bio_cmd == BIO_READ) { 1261298010Simp isc->read_stats.queued--; 1262298010Simp isc->read_stats.total++; 1263298010Simp isc->read_stats.pending++; 1264298010Simp } else 1265298010Simp printf("Found bio_cmd = %#x\n", bp->bio_cmd); 1266298010Simp } 1267298010Simp if (iosched_debug > 9) 1268298010Simp printf("HWQ : %p %#x\n", bp, bp->bio_cmd); 1269298010Simp#endif 1270298010Simp return bp; 1271298010Simp} 1272298010Simp 1273298010Simp/* 1274298010Simp * Driver has been given some work to do by the block layer. Tell the 1275298010Simp * scheduler about it and have it queue the work up. The scheduler module 1276298010Simp * will then return the currently most useful bit of work later, possibly 1277298010Simp * deferring work for various reasons. 1278298010Simp */ 1279298010Simpvoid 1280298010Simpcam_iosched_queue_work(struct cam_iosched_softc *isc, struct bio *bp) 1281298010Simp{ 1282298010Simp 1283298010Simp /* 1284298010Simp * Put all trims on the trim queue sorted, since we know 1285298010Simp * that the collapsing code requires this. Otherwise put 1286298010Simp * the work on the bio queue. 1287298010Simp */ 1288298010Simp if (bp->bio_cmd == BIO_DELETE) { 1289298010Simp bioq_disksort(&isc->trim_queue, bp); 1290302163Simp#ifdef CAM_IOSCHED_DYNAMIC 1291298010Simp isc->trim_stats.in++; 1292298010Simp isc->trim_stats.queued++; 1293298010Simp#endif 1294298010Simp } 1295302163Simp#ifdef CAM_IOSCHED_DYNAMIC 1296302396Simp else if (do_dynamic_iosched && 1297298010Simp (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_FLUSH)) { 1298298010Simp if (cam_iosched_sort_queue(isc)) 1299298010Simp bioq_disksort(&isc->write_queue, bp); 1300298010Simp else 1301298010Simp bioq_insert_tail(&isc->write_queue, bp); 1302298010Simp if (iosched_debug > 9) 1303298010Simp printf("Qw : %p %#x\n", bp, bp->bio_cmd); 1304298010Simp if (bp->bio_cmd == BIO_WRITE) { 1305298010Simp isc->write_stats.in++; 1306298010Simp isc->write_stats.queued++; 1307298010Simp } 1308298010Simp } 1309298010Simp#endif 1310298010Simp else { 1311298010Simp if (cam_iosched_sort_queue(isc)) 1312298010Simp bioq_disksort(&isc->bio_queue, bp); 1313298010Simp else 1314298010Simp bioq_insert_tail(&isc->bio_queue, bp); 1315302163Simp#ifdef CAM_IOSCHED_DYNAMIC 1316298010Simp if (iosched_debug > 9) 1317298010Simp printf("Qr : %p %#x\n", bp, bp->bio_cmd); 1318298010Simp if (bp->bio_cmd == BIO_READ) { 1319298010Simp isc->read_stats.in++; 1320298010Simp isc->read_stats.queued++; 1321298010Simp } else if (bp->bio_cmd == BIO_WRITE) { 1322298010Simp isc->write_stats.in++; 1323298010Simp isc->write_stats.queued++; 1324298010Simp } 1325298010Simp#endif 1326298010Simp } 1327298010Simp} 1328298010Simp 1329298010Simp/* 1330298010Simp * If we have work, get it scheduled. Called with the periph lock held. 1331298010Simp */ 1332298010Simpvoid 1333298010Simpcam_iosched_schedule(struct cam_iosched_softc *isc, struct cam_periph *periph) 1334298010Simp{ 1335298010Simp 1336298010Simp if (cam_iosched_has_work(isc)) 1337298010Simp xpt_schedule(periph, CAM_PRIORITY_NORMAL); 1338298010Simp} 1339298010Simp 1340298010Simp/* 1341298010Simp * Complete a trim request 1342298010Simp */ 1343298010Simpvoid 1344298010Simpcam_iosched_trim_done(struct cam_iosched_softc *isc) 1345298010Simp{ 1346298010Simp 1347298010Simp isc->flags &= ~CAM_IOSCHED_FLAG_TRIM_ACTIVE; 1348298010Simp} 1349298010Simp 1350298010Simp/* 1351298010Simp * Complete a bio. Called before we release the ccb with xpt_release_ccb so we 1352298010Simp * might use notes in the ccb for statistics. 1353298010Simp */ 1354298010Simpint 1355298010Simpcam_iosched_bio_complete(struct cam_iosched_softc *isc, struct bio *bp, 1356298010Simp union ccb *done_ccb) 1357298010Simp{ 1358298010Simp int retval = 0; 1359302163Simp#ifdef CAM_IOSCHED_DYNAMIC 1360302396Simp if (!do_dynamic_iosched) 1361298010Simp return retval; 1362298010Simp 1363298010Simp if (iosched_debug > 10) 1364298010Simp printf("done: %p %#x\n", bp, bp->bio_cmd); 1365298010Simp if (bp->bio_cmd == BIO_WRITE) { 1366298010Simp retval = cam_iosched_limiter_iodone(&isc->write_stats, bp); 1367298010Simp isc->write_stats.out++; 1368298010Simp isc->write_stats.pending--; 1369298010Simp } else if (bp->bio_cmd == BIO_READ) { 1370298010Simp retval = cam_iosched_limiter_iodone(&isc->read_stats, bp); 1371298010Simp isc->read_stats.out++; 1372298010Simp isc->read_stats.pending--; 1373298010Simp } else if (bp->bio_cmd == BIO_DELETE) { 1374298010Simp isc->trim_stats.out++; 1375298010Simp isc->trim_stats.pending--; 1376298010Simp } else if (bp->bio_cmd != BIO_FLUSH) { 1377298010Simp if (iosched_debug) 1378298010Simp printf("Completing command with bio_cmd == %#x\n", bp->bio_cmd); 1379298010Simp } 1380298010Simp 1381298010Simp if (!(bp->bio_flags & BIO_ERROR)) 1382298010Simp cam_iosched_io_metric_update(isc, done_ccb->ccb_h.qos.sim_data, 1383298010Simp bp->bio_cmd, bp->bio_bcount); 1384298010Simp#endif 1385298010Simp return retval; 1386298010Simp} 1387298010Simp 1388298010Simp/* 1389298010Simp * Tell the io scheduler that you've pushed a trim down into the sim. 1390298010Simp * xxx better place for this? 1391298010Simp */ 1392298010Simpvoid 1393298010Simpcam_iosched_submit_trim(struct cam_iosched_softc *isc) 1394298010Simp{ 1395298010Simp 1396298010Simp isc->flags |= CAM_IOSCHED_FLAG_TRIM_ACTIVE; 1397298010Simp} 1398298010Simp 1399298010Simp/* 1400298010Simp * Change the sorting policy hint for I/O transactions for this device. 1401298010Simp */ 1402298010Simpvoid 1403298010Simpcam_iosched_set_sort_queue(struct cam_iosched_softc *isc, int val) 1404298010Simp{ 1405298010Simp 1406298010Simp isc->sort_io_queue = val; 1407298010Simp} 1408298010Simp 1409298010Simpint 1410298010Simpcam_iosched_has_work_flags(struct cam_iosched_softc *isc, uint32_t flags) 1411298010Simp{ 1412298010Simp return isc->flags & flags; 1413298010Simp} 1414298010Simp 1415298010Simpvoid 1416298010Simpcam_iosched_set_work_flags(struct cam_iosched_softc *isc, uint32_t flags) 1417298010Simp{ 1418298010Simp isc->flags |= flags; 1419298010Simp} 1420298010Simp 1421298010Simpvoid 1422298010Simpcam_iosched_clr_work_flags(struct cam_iosched_softc *isc, uint32_t flags) 1423298010Simp{ 1424298010Simp isc->flags &= ~flags; 1425298010Simp} 1426298010Simp 1427302163Simp#ifdef CAM_IOSCHED_DYNAMIC 1428298010Simp/* 1429298010Simp * After the method presented in Jack Crenshaw's 1998 article "Integer 1430298010Simp * Suqare Roots," reprinted at 1431298010Simp * http://www.embedded.com/electronics-blogs/programmer-s-toolbox/4219659/Integer-Square-Roots 1432298010Simp * and well worth the read. Briefly, we find the power of 4 that's the 1433298010Simp * largest smaller than val. We then check each smaller power of 4 to 1434298010Simp * see if val is still bigger. The right shifts at each step divide 1435298010Simp * the result by 2 which after successive application winds up 1436298010Simp * accumulating the right answer. It could also have been accumulated 1437298010Simp * using a separate root counter, but this code is smaller and faster 1438298010Simp * than that method. This method is also integer size invariant. 1439298010Simp * It returns floor(sqrt((float)val)), or the larget integer less than 1440298010Simp * or equal to the square root. 1441298010Simp */ 1442298010Simpstatic uint64_t 1443298010Simpisqrt64(uint64_t val) 1444298010Simp{ 1445298010Simp uint64_t res = 0; 1446298010Simp uint64_t bit = 1ULL << (sizeof(uint64_t) * NBBY - 2); 1447298010Simp 1448298010Simp /* 1449298010Simp * Find the largest power of 4 smaller than val. 1450298010Simp */ 1451298010Simp while (bit > val) 1452298010Simp bit >>= 2; 1453298010Simp 1454298010Simp /* 1455298010Simp * Accumulate the answer, one bit at a time (we keep moving 1456298010Simp * them over since 2 is the square root of 4 and we test 1457298010Simp * powers of 4). We accumulate where we find the bit, but 1458298010Simp * the successive shifts land the bit in the right place 1459298010Simp * by the end. 1460298010Simp */ 1461298010Simp while (bit != 0) { 1462298010Simp if (val >= res + bit) { 1463298010Simp val -= res + bit; 1464298010Simp res = (res >> 1) + bit; 1465298010Simp } else 1466298010Simp res >>= 1; 1467298010Simp bit >>= 2; 1468298010Simp } 1469298010Simp 1470298010Simp return res; 1471298010Simp} 1472298010Simp 1473298010Simp/* 1474298010Simp * a and b are 32.32 fixed point stored in a 64-bit word. 1475298010Simp * Let al and bl be the .32 part of a and b. 1476298010Simp * Let ah and bh be the 32 part of a and b. 1477298010Simp * R is the radix and is 1 << 32 1478298010Simp * 1479298010Simp * a * b 1480298010Simp * (ah + al / R) * (bh + bl / R) 1481298010Simp * ah * bh + (al * bh + ah * bl) / R + al * bl / R^2 1482298010Simp * 1483298010Simp * After multiplicaiton, we have to renormalize by multiply by 1484298010Simp * R, so we wind up with 1485298010Simp * ah * bh * R + al * bh + ah * bl + al * bl / R 1486298010Simp * which turns out to be a very nice way to compute this value 1487298010Simp * so long as ah and bh are < 65536 there's no loss of high bits 1488298010Simp * and the low order bits are below the threshold of caring for 1489298010Simp * this application. 1490298010Simp */ 1491298010Simpstatic uint64_t 1492298010Simpmul(uint64_t a, uint64_t b) 1493298010Simp{ 1494298010Simp uint64_t al, ah, bl, bh; 1495298010Simp al = a & 0xffffffff; 1496298010Simp ah = a >> 32; 1497298010Simp bl = b & 0xffffffff; 1498298010Simp bh = b >> 32; 1499298010Simp return ((ah * bh) << 32) + al * bh + ah * bl + ((al * bl) >> 32); 1500298010Simp} 1501298010Simp 1502298010Simpstatic void 1503298010Simpcam_iosched_update(struct iop_stats *iop, sbintime_t sim_latency) 1504298010Simp{ 1505298010Simp sbintime_t y, yy; 1506298010Simp uint64_t var; 1507298010Simp 1508298010Simp /* 1509298010Simp * Classic expoentially decaying average with a tiny alpha 1510298010Simp * (2 ^ -alpha_bits). For more info see the NIST statistical 1511298010Simp * handbook. 1512298010Simp * 1513298010Simp * ema_t = y_t * alpha + ema_t-1 * (1 - alpha) 1514298010Simp * alpha = 1 / (1 << alpha_bits) 1515298010Simp * 1516298010Simp * Since alpha is a power of two, we can compute this w/o any mult or 1517298010Simp * division. 1518298010Simp */ 1519298010Simp y = sim_latency; 1520298010Simp iop->ema = (y + (iop->ema << alpha_bits) - iop->ema) >> alpha_bits; 1521298010Simp 1522298010Simp yy = mul(y, y); 1523298010Simp iop->emss = (yy + (iop->emss << alpha_bits) - iop->emss) >> alpha_bits; 1524298010Simp 1525298010Simp /* 1526298010Simp * s_1 = sum of data 1527298010Simp * s_2 = sum of data * data 1528298010Simp * ema ~ mean (or s_1 / N) 1529298010Simp * emss ~ s_2 / N 1530298010Simp * 1531298010Simp * sd = sqrt((N * s_2 - s_1 ^ 2) / (N * (N - 1))) 1532298010Simp * sd = sqrt((N * s_2 / N * (N - 1)) - (s_1 ^ 2 / (N * (N - 1)))) 1533298010Simp * 1534298010Simp * N ~ 2 / alpha - 1 1535298010Simp * alpha < 1 / 16 (typically much less) 1536298010Simp * N > 31 --> N large so N * (N - 1) is approx N * N 1537298010Simp * 1538298010Simp * substituting and rearranging: 1539298010Simp * sd ~ sqrt(s_2 / N - (s_1 / N) ^ 2) 1540298010Simp * ~ sqrt(emss - ema ^ 2); 1541298010Simp * which is the formula used here to get a decent estimate of sd which 1542298010Simp * we use to detect outliers. Note that when first starting up, it 1543298010Simp * takes a while for emss sum of squares estimator to converge on a 1544298010Simp * good value. during this time, it can be less than ema^2. We 1545298010Simp * compute a sd of 0 in that case, and ignore outliers. 1546298010Simp */ 1547298010Simp var = iop->emss - mul(iop->ema, iop->ema); 1548298010Simp iop->sd = (int64_t)var < 0 ? 0 : isqrt64(var); 1549298010Simp} 1550298010Simp 1551302163Simp#ifdef CAM_IOSCHED_DYNAMIC 1552298010Simpstatic void 1553298010Simpcam_iosched_io_metric_update(struct cam_iosched_softc *isc, 1554298010Simp sbintime_t sim_latency, int cmd, size_t size) 1555298010Simp{ 1556298010Simp /* xxx Do we need to scale based on the size of the I/O ? */ 1557298010Simp switch (cmd) { 1558298010Simp case BIO_READ: 1559298010Simp cam_iosched_update(&isc->read_stats, sim_latency); 1560298010Simp break; 1561298010Simp case BIO_WRITE: 1562298010Simp cam_iosched_update(&isc->write_stats, sim_latency); 1563298010Simp break; 1564298010Simp case BIO_DELETE: 1565298010Simp cam_iosched_update(&isc->trim_stats, sim_latency); 1566298010Simp break; 1567298010Simp default: 1568298010Simp break; 1569298010Simp } 1570298010Simp} 1571298036Simp#endif 1572298010Simp 1573298010Simp#ifdef DDB 1574298010Simpstatic int biolen(struct bio_queue_head *bq) 1575298010Simp{ 1576298010Simp int i = 0; 1577298010Simp struct bio *bp; 1578298010Simp 1579298010Simp TAILQ_FOREACH(bp, &bq->queue, bio_queue) { 1580298010Simp i++; 1581298010Simp } 1582298010Simp return i; 1583298010Simp} 1584298010Simp 1585298010Simp/* 1586298010Simp * Show the internal state of the I/O scheduler. 1587298010Simp */ 1588298010SimpDB_SHOW_COMMAND(iosched, cam_iosched_db_show) 1589298010Simp{ 1590298010Simp struct cam_iosched_softc *isc; 1591298010Simp 1592298010Simp if (!have_addr) { 1593298010Simp db_printf("Need addr\n"); 1594298010Simp return; 1595298010Simp } 1596298010Simp isc = (struct cam_iosched_softc *)addr; 1597298010Simp db_printf("pending_reads: %d\n", isc->read_stats.pending); 1598298010Simp db_printf("min_reads: %d\n", isc->read_stats.min); 1599298010Simp db_printf("max_reads: %d\n", isc->read_stats.max); 1600298010Simp db_printf("reads: %d\n", isc->read_stats.total); 1601298010Simp db_printf("in_reads: %d\n", isc->read_stats.in); 1602298010Simp db_printf("out_reads: %d\n", isc->read_stats.out); 1603298010Simp db_printf("queued_reads: %d\n", isc->read_stats.queued); 1604298010Simp db_printf("Current Q len %d\n", biolen(&isc->bio_queue)); 1605298010Simp db_printf("pending_writes: %d\n", isc->write_stats.pending); 1606298010Simp db_printf("min_writes: %d\n", isc->write_stats.min); 1607298010Simp db_printf("max_writes: %d\n", isc->write_stats.max); 1608298010Simp db_printf("writes: %d\n", isc->write_stats.total); 1609298010Simp db_printf("in_writes: %d\n", isc->write_stats.in); 1610298010Simp db_printf("out_writes: %d\n", isc->write_stats.out); 1611298010Simp db_printf("queued_writes: %d\n", isc->write_stats.queued); 1612298010Simp db_printf("Current Q len %d\n", biolen(&isc->write_queue)); 1613298010Simp db_printf("pending_trims: %d\n", isc->trim_stats.pending); 1614298010Simp db_printf("min_trims: %d\n", isc->trim_stats.min); 1615298010Simp db_printf("max_trims: %d\n", isc->trim_stats.max); 1616298010Simp db_printf("trims: %d\n", isc->trim_stats.total); 1617298010Simp db_printf("in_trims: %d\n", isc->trim_stats.in); 1618298010Simp db_printf("out_trims: %d\n", isc->trim_stats.out); 1619298010Simp db_printf("queued_trims: %d\n", isc->trim_stats.queued); 1620298010Simp db_printf("Current Q len %d\n", biolen(&isc->trim_queue)); 1621298010Simp db_printf("read_bias: %d\n", isc->read_bias); 1622298010Simp db_printf("current_read_bias: %d\n", isc->current_read_bias); 1623298010Simp db_printf("Trim active? %s\n", 1624298010Simp (isc->flags & CAM_IOSCHED_FLAG_TRIM_ACTIVE) ? "yes" : "no"); 1625298010Simp} 1626298010Simp#endif 1627298010Simp#endif 1628