1206497Sluigi/*- 2206552Sluigi * Copyright (c) 2009-2010 Fabio Checconi 3206552Sluigi * Copyright (c) 2009-2010 Luigi Rizzo, Universita` di Pisa 4206497Sluigi * All rights reserved. 5206497Sluigi * 6206497Sluigi * Redistribution and use in source and binary forms, with or without 7206497Sluigi * modification, are permitted provided that the following conditions 8206497Sluigi * are met: 9206497Sluigi * 1. Redistributions of source code must retain the above copyright 10206497Sluigi * notice, this list of conditions and the following disclaimer. 11206497Sluigi * 2. Redistributions in binary form must reproduce the above copyright 12206497Sluigi * notice, this list of conditions and the following disclaimer in the 13206497Sluigi * documentation and/or other materials provided with the distribution. 14206497Sluigi * 15206497Sluigi * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 16206497Sluigi * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17206497Sluigi * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18206497Sluigi * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 19206497Sluigi * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20206497Sluigi * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21206497Sluigi * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22206497Sluigi * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23206497Sluigi * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24206497Sluigi * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25206497Sluigi * SUCH DAMAGE. 26206497Sluigi */ 27206497Sluigi 28206497Sluigi/* 29206497Sluigi * $Id$ 30206497Sluigi * $FreeBSD$ 31206497Sluigi * 32206497Sluigi * Main control module for geom-based disk schedulers ('sched'). 33206497Sluigi * 34206497Sluigi * USER VIEW 35206497Sluigi * A 'sched' node is typically inserted transparently between 36206497Sluigi * an existing provider pp and its original geom gp 37206497Sluigi * 38206497Sluigi * [pp --> gp ..] 39206497Sluigi * 40206497Sluigi * using the command "geom sched insert <provider>" and 41206497Sluigi * resulting in the following topology 42206497Sluigi * 43206497Sluigi * [pp --> sched_gp --> cp] [new_pp --> gp ... ] 44206497Sluigi * 45206497Sluigi * Deletion "geom sched destroy <provider>.sched." restores the 46206497Sluigi * original chain. The normal "geom sched create <provide>" 47206497Sluigi * is also supported. 48206497Sluigi * 49206497Sluigi * INTERNALS 50206497Sluigi * Internally, the 'sched' uses the following data structures 51206497Sluigi * 52206497Sluigi * geom{} g_sched_softc{} g_gsched{} 53206497Sluigi * +----------+ +---------------+ +-------------+ 54206497Sluigi * | softc *-|--->| sc_gsched *-|-->| gs_init | 55206497Sluigi * | ... | | | | gs_fini | 56206497Sluigi * | | | [ hash table] | | gs_start | 57206497Sluigi * +----------+ | | | ... | 58206497Sluigi * | | +-------------+ 59206497Sluigi * | | 60206497Sluigi * | | g_*_softc{} 61206497Sluigi * | | +-------------+ 62206497Sluigi * | sc_data *-|-->| | 63206497Sluigi * +---------------+ | algorithm- | 64206497Sluigi * | specific | 65206497Sluigi * +-------------+ 66206497Sluigi * 67206497Sluigi * A g_sched_softc{} is created with a "geom sched insert" call. 68206497Sluigi * In turn this instantiates a specific scheduling algorithm, 69206497Sluigi * which sets sc_gsched to point to the algorithm callbacks, 70206497Sluigi * and calls gs_init() to create the g_*_softc{} . 71206497Sluigi * The other callbacks (gs_start, gs_next, ...) are invoked 72206497Sluigi * as needed 73206497Sluigi * 74206497Sluigi * g_sched_softc{} is defined in g_sched.h and mostly used here; 75206497Sluigi * g_gsched{}, and the gs_callbacks, are documented in gs_scheduler.h; 76206497Sluigi * g_*_softc{} is defined/implemented by each algorithm (gs_*.c) 77206497Sluigi * 78206497Sluigi * DATA MOVING 79206497Sluigi * When a bio is received on the provider, it goes to the 80206497Sluigi * g_sched_start() which calls gs_start() to initially queue it; 81206497Sluigi * then we call g_sched_dispatch() that loops around gs_next() 82206497Sluigi * to select zero or more bio's to be sent downstream. 83206497Sluigi * 84206497Sluigi * g_sched_dispatch() can also be called as a result of a timeout, 85206497Sluigi * e.g. when doing anticipation or pacing requests. 86206497Sluigi * 87206497Sluigi * When a bio comes back, it goes to g_sched_done() which in turn 88206497Sluigi * calls gs_done(). The latter does any necessary housekeeping in 89206497Sluigi * the scheduling algorithm, and may decide to call g_sched_dispatch() 90206497Sluigi * to send more bio's downstream. 91206497Sluigi * 92206497Sluigi * If an algorithm needs per-flow queues, these are created 93206497Sluigi * calling gs_init_class() and destroyed with gs_fini_class(), 94206497Sluigi * and they are also inserted in the hash table implemented in 95206497Sluigi * the g_sched_softc{} 96206497Sluigi * 97206497Sluigi * If an algorithm is replaced, or a transparently-inserted node is 98206497Sluigi * removed with "geom sched destroy", we need to remove all references 99206497Sluigi * to the g_*_softc{} and g_sched_softc from the bio's still in 100206497Sluigi * the scheduler. g_sched_forced_dispatch() helps doing this. 101206497Sluigi * XXX need to explain better. 102206497Sluigi */ 103206497Sluigi 104206497Sluigi#include <sys/cdefs.h> 105206497Sluigi#include <sys/param.h> 106206497Sluigi#include <sys/systm.h> 107206497Sluigi#include <sys/kernel.h> 108206497Sluigi#include <sys/module.h> 109206497Sluigi#include <sys/lock.h> 110206497Sluigi#include <sys/mutex.h> 111206497Sluigi#include <sys/bio.h> 112206497Sluigi#include <sys/limits.h> 113206497Sluigi#include <sys/hash.h> 114223921Sae#include <sys/sbuf.h> 115206497Sluigi#include <sys/sysctl.h> 116206497Sluigi#include <sys/malloc.h> 117206497Sluigi#include <sys/proc.h> /* we access curthread */ 118206497Sluigi#include <geom/geom.h> 119206497Sluigi#include "gs_scheduler.h" 120206497Sluigi#include "g_sched.h" /* geom hooks */ 121206497Sluigi 122206497Sluigi/* 123206497Sluigi * Size of the per-geom hash table storing traffic classes. 124206497Sluigi * We may decide to change it at a later time, it has no ABI 125206497Sluigi * implications as it is only used for run-time allocations. 126206497Sluigi */ 127206497Sluigi#define G_SCHED_HASH_SIZE 32 128206497Sluigi 129206497Sluigistatic int g_sched_destroy(struct g_geom *gp, boolean_t force); 130206497Sluigistatic int g_sched_destroy_geom(struct gctl_req *req, 131206497Sluigi struct g_class *mp, struct g_geom *gp); 132206497Sluigistatic void g_sched_config(struct gctl_req *req, struct g_class *mp, 133206497Sluigi const char *verb); 134206497Sluigistatic struct g_geom *g_sched_taste(struct g_class *mp, 135206497Sluigi struct g_provider *pp, int flags __unused); 136206497Sluigistatic void g_sched_dumpconf(struct sbuf *sb, const char *indent, 137206497Sluigi struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp); 138206497Sluigistatic void g_sched_init(struct g_class *mp); 139206497Sluigistatic void g_sched_fini(struct g_class *mp); 140210747Saestatic int g_sched_ioctl(struct g_provider *pp, u_long cmd, void *data, 141210747Sae int fflag, struct thread *td); 142206497Sluigi 143206497Sluigistruct g_class g_sched_class = { 144206497Sluigi .name = G_SCHED_CLASS_NAME, 145206497Sluigi .version = G_VERSION, 146206497Sluigi .ctlreq = g_sched_config, 147206497Sluigi .taste = g_sched_taste, 148206497Sluigi .destroy_geom = g_sched_destroy_geom, 149206497Sluigi .init = g_sched_init, 150210747Sae .ioctl = g_sched_ioctl, 151206497Sluigi .fini = g_sched_fini 152206497Sluigi}; 153206497Sluigi 154206497SluigiMALLOC_DEFINE(M_GEOM_SCHED, "GEOM_SCHED", "Geom schedulers data structures"); 155206497Sluigi 156206497Sluigi/* 157206497Sluigi * Global variables describing the state of the geom_sched module. 158206497Sluigi * There is only one static instance of this structure. 159206497Sluigi */ 160206497SluigiLIST_HEAD(gs_list, g_gsched); /* type, link field */ 161206497Sluigistruct geom_sched_vars { 162206497Sluigi struct mtx gs_mtx; 163206497Sluigi struct gs_list gs_scheds; /* list of algorithms */ 164206497Sluigi u_int gs_debug; 165206497Sluigi u_int gs_sched_count; /* how many algorithms ? */ 166206497Sluigi u_int gs_patched; /* g_io_request was patched */ 167206497Sluigi 168206497Sluigi u_int gs_initialized; 169206497Sluigi u_int gs_expire_secs; /* expiration of hash entries */ 170206497Sluigi 171206497Sluigi struct bio_queue_head gs_pending; 172206497Sluigi u_int gs_npending; 173206497Sluigi 174206497Sluigi /* The following are for stats, usually protected by gs_mtx. */ 175206497Sluigi u_long gs_requests; /* total requests */ 176206497Sluigi u_long gs_done; /* total done */ 177206497Sluigi u_int gs_in_flight; /* requests in flight */ 178206497Sluigi u_int gs_writes_in_flight; 179206497Sluigi u_int gs_bytes_in_flight; 180206497Sluigi u_int gs_write_bytes_in_flight; 181206497Sluigi 182206497Sluigi char gs_names[256]; /* names of schedulers */ 183206497Sluigi}; 184206497Sluigi 185206497Sluigistatic struct geom_sched_vars me = { 186206497Sluigi .gs_expire_secs = 10, 187206497Sluigi}; 188206497Sluigi 189206497SluigiSYSCTL_DECL(_kern_geom); 190206497SluigiSYSCTL_NODE(_kern_geom, OID_AUTO, sched, CTLFLAG_RW, 0, 191206497Sluigi "GEOM_SCHED stuff"); 192206497Sluigi 193217324SmdfSYSCTL_UINT(_kern_geom_sched, OID_AUTO, in_flight_wb, CTLFLAG_RD, 194206497Sluigi &me.gs_write_bytes_in_flight, 0, "Write bytes in flight"); 195206497Sluigi 196217324SmdfSYSCTL_UINT(_kern_geom_sched, OID_AUTO, in_flight_b, CTLFLAG_RD, 197206497Sluigi &me.gs_bytes_in_flight, 0, "Bytes in flight"); 198206497Sluigi 199206497SluigiSYSCTL_UINT(_kern_geom_sched, OID_AUTO, in_flight_w, CTLFLAG_RD, 200206497Sluigi &me.gs_writes_in_flight, 0, "Write Requests in flight"); 201206497Sluigi 202206497SluigiSYSCTL_UINT(_kern_geom_sched, OID_AUTO, in_flight, CTLFLAG_RD, 203206497Sluigi &me.gs_in_flight, 0, "Requests in flight"); 204206497Sluigi 205206497SluigiSYSCTL_ULONG(_kern_geom_sched, OID_AUTO, done, CTLFLAG_RD, 206206497Sluigi &me.gs_done, 0, "Total done"); 207206497Sluigi 208206497SluigiSYSCTL_ULONG(_kern_geom_sched, OID_AUTO, requests, CTLFLAG_RD, 209206497Sluigi &me.gs_requests, 0, "Total requests"); 210206497Sluigi 211206497SluigiSYSCTL_STRING(_kern_geom_sched, OID_AUTO, algorithms, CTLFLAG_RD, 212206497Sluigi &me.gs_names, 0, "Algorithm names"); 213206497Sluigi 214206497SluigiSYSCTL_UINT(_kern_geom_sched, OID_AUTO, alg_count, CTLFLAG_RD, 215206497Sluigi &me.gs_sched_count, 0, "Number of algorithms"); 216206497Sluigi 217206497SluigiSYSCTL_UINT(_kern_geom_sched, OID_AUTO, debug, CTLFLAG_RW, 218206497Sluigi &me.gs_debug, 0, "Debug level"); 219206497Sluigi 220206497SluigiSYSCTL_UINT(_kern_geom_sched, OID_AUTO, expire_secs, CTLFLAG_RW, 221206497Sluigi &me.gs_expire_secs, 0, "Expire time in seconds"); 222206497Sluigi 223206497Sluigi/* 224206497Sluigi * g_sched calls the scheduler algorithms with this lock held. 225206497Sluigi * The locking functions are exposed so the scheduler algorithms can also 226206497Sluigi * protect themselves e.g. when running a callout handler. 227206497Sluigi */ 228206497Sluigivoid 229206497Sluigig_sched_lock(struct g_geom *gp) 230206497Sluigi{ 231206497Sluigi struct g_sched_softc *sc = gp->softc; 232206497Sluigi 233206497Sluigi mtx_lock(&sc->sc_mtx); 234206497Sluigi} 235206497Sluigi 236206497Sluigivoid 237206497Sluigig_sched_unlock(struct g_geom *gp) 238206497Sluigi{ 239206497Sluigi struct g_sched_softc *sc = gp->softc; 240206497Sluigi 241206497Sluigi mtx_unlock(&sc->sc_mtx); 242206497Sluigi} 243206497Sluigi 244206497Sluigi/* 245206497Sluigi * Support functions to handle references to the module, 246206497Sluigi * which are coming from devices using this scheduler. 247206497Sluigi */ 248206497Sluigistatic inline void 249206497Sluigig_gsched_ref(struct g_gsched *gsp) 250206497Sluigi{ 251206497Sluigi 252206497Sluigi atomic_add_int(&gsp->gs_refs, 1); 253206497Sluigi} 254206497Sluigi 255206497Sluigistatic inline void 256206497Sluigig_gsched_unref(struct g_gsched *gsp) 257206497Sluigi{ 258206497Sluigi 259206497Sluigi atomic_add_int(&gsp->gs_refs, -1); 260206497Sluigi} 261206497Sluigi 262206497Sluigi/* 263206497Sluigi * Update the stats when this request is done. 264206497Sluigi */ 265206497Sluigistatic void 266206497Sluigig_sched_update_stats(struct bio *bio) 267206497Sluigi{ 268206497Sluigi 269206497Sluigi me.gs_done++; 270206497Sluigi me.gs_in_flight--; 271206497Sluigi me.gs_bytes_in_flight -= bio->bio_length; 272206497Sluigi if (bio->bio_cmd & BIO_WRITE) { 273206497Sluigi me.gs_writes_in_flight--; 274206497Sluigi me.gs_write_bytes_in_flight -= bio->bio_length; 275206497Sluigi } 276206497Sluigi} 277206497Sluigi 278206497Sluigi/* 279206497Sluigi * Dispatch any pending request. 280206497Sluigi */ 281206497Sluigistatic void 282206497Sluigig_sched_forced_dispatch(struct g_geom *gp) 283206497Sluigi{ 284206497Sluigi struct g_sched_softc *sc = gp->softc; 285206497Sluigi struct g_gsched *gsp = sc->sc_gsched; 286206497Sluigi struct bio *bp; 287206497Sluigi 288206497Sluigi KASSERT(mtx_owned(&sc->sc_mtx), 289206497Sluigi ("sc_mtx not owned during forced dispatch")); 290206497Sluigi 291206497Sluigi while ((bp = gsp->gs_next(sc->sc_data, 1)) != NULL) 292206497Sluigi g_io_request(bp, LIST_FIRST(&gp->consumer)); 293206497Sluigi} 294206497Sluigi 295206497Sluigi/* 296206497Sluigi * The main dispatch loop, called either here after the start 297206497Sluigi * routine, or by scheduling algorithms when they receive a timeout 298206497Sluigi * or a 'done' notification. Does not share code with the forced 299206497Sluigi * dispatch path, since the gs_done() callback can call us. 300206497Sluigi */ 301206497Sluigivoid 302206497Sluigig_sched_dispatch(struct g_geom *gp) 303206497Sluigi{ 304206497Sluigi struct g_sched_softc *sc = gp->softc; 305206497Sluigi struct g_gsched *gsp = sc->sc_gsched; 306206497Sluigi struct bio *bp; 307206497Sluigi 308206497Sluigi KASSERT(mtx_owned(&sc->sc_mtx), ("sc_mtx not owned during dispatch")); 309206497Sluigi 310206497Sluigi if ((sc->sc_flags & G_SCHED_FLUSHING)) 311206497Sluigi return; 312206497Sluigi 313206497Sluigi while ((bp = gsp->gs_next(sc->sc_data, 0)) != NULL) 314206497Sluigi g_io_request(bp, LIST_FIRST(&gp->consumer)); 315206497Sluigi} 316206497Sluigi 317206497Sluigi/* 318206497Sluigi * Recent (8.0 and above) versions of FreeBSD have support to 319206497Sluigi * register classifiers of disk requests. The classifier is 320206497Sluigi * invoked by g_io_request(), and stores the information into 321206497Sluigi * bp->bio_classifier1. 322206497Sluigi * 323206497Sluigi * Support for older versions, which is left here only for 324206497Sluigi * documentation purposes, relies on two hacks: 325206497Sluigi * 1. classification info is written into the bio_caller1 326206497Sluigi * field of the topmost node in the bio chain. This field 327206497Sluigi * is rarely used, but this module is incompatible with 328206497Sluigi * those that use bio_caller1 for other purposes, 329206497Sluigi * such as ZFS and gjournal; 330206497Sluigi * 2. g_io_request() is patched in-memory when the module is 331206497Sluigi * loaded, so that the function calls a classifier as its 332206497Sluigi * first thing. g_io_request() is restored when the module 333206497Sluigi * is unloaded. This functionality is only supported for 334206497Sluigi * x86 and amd64, other architectures need source code changes. 335206497Sluigi */ 336206497Sluigi 337206497Sluigi/* 338206497Sluigi * Lookup the identity of the issuer of the original request. 339206497Sluigi * In the current implementation we use the curthread of the 340206497Sluigi * issuer, but different mechanisms may be implemented later 341206497Sluigi * so we do not make assumptions on the return value which for 342206497Sluigi * us is just an opaque identifier. 343206497Sluigi */ 344206497Sluigi 345206497Sluigistatic inline u_long 346206497Sluigig_sched_classify(struct bio *bp) 347206497Sluigi{ 348206497Sluigi 349206497Sluigi#if __FreeBSD_version > 800098 350206497Sluigi /* we have classifier fields in the struct bio */ 351206497Sluigi#define HAVE_BIO_CLASSIFIER 352206497Sluigi return ((u_long)bp->bio_classifier1); 353206497Sluigi#else 354206497Sluigi#warning old version!!! 355206497Sluigi while (bp->bio_parent != NULL) 356206497Sluigi bp = bp->bio_parent; 357206497Sluigi 358206497Sluigi return ((u_long)bp->bio_caller1); 359206497Sluigi#endif 360206497Sluigi} 361206497Sluigi 362206497Sluigi/* Return the hash chain for the given key. */ 363206497Sluigistatic inline struct g_hash * 364206497Sluigig_sched_hash(struct g_sched_softc *sc, u_long key) 365206497Sluigi{ 366206497Sluigi 367206497Sluigi return (&sc->sc_hash[key & sc->sc_mask]); 368206497Sluigi} 369206497Sluigi 370206497Sluigi/* 371206497Sluigi * Helper function for the children classes, which takes 372206497Sluigi * a geom and a bio and returns the private descriptor 373206497Sluigi * associated to the request. This involves fetching 374206497Sluigi * the classification field and [al]locating the 375206497Sluigi * corresponding entry in the hash table. 376206497Sluigi */ 377206497Sluigivoid * 378206497Sluigig_sched_get_class(struct g_geom *gp, struct bio *bp) 379206497Sluigi{ 380206497Sluigi struct g_sched_softc *sc; 381206497Sluigi struct g_sched_class *gsc; 382206497Sluigi struct g_gsched *gsp; 383206497Sluigi struct g_hash *bucket; 384206497Sluigi u_long key; 385206497Sluigi 386206497Sluigi sc = gp->softc; 387206497Sluigi key = g_sched_classify(bp); 388206497Sluigi bucket = g_sched_hash(sc, key); 389206497Sluigi LIST_FOREACH(gsc, bucket, gsc_clist) { 390206497Sluigi if (key == gsc->gsc_key) { 391206497Sluigi gsc->gsc_refs++; 392206497Sluigi return (gsc->gsc_priv); 393206497Sluigi } 394206497Sluigi } 395206497Sluigi 396206497Sluigi gsp = sc->sc_gsched; 397206497Sluigi gsc = malloc(sizeof(*gsc) + gsp->gs_priv_size, 398206497Sluigi M_GEOM_SCHED, M_NOWAIT | M_ZERO); 399206497Sluigi if (!gsc) 400206497Sluigi return (NULL); 401206497Sluigi 402206497Sluigi if (gsp->gs_init_class(sc->sc_data, gsc->gsc_priv)) { 403206497Sluigi free(gsc, M_GEOM_SCHED); 404206497Sluigi return (NULL); 405206497Sluigi } 406206497Sluigi 407206497Sluigi gsc->gsc_refs = 2; /* 1 for the hash table, 1 for the caller. */ 408206497Sluigi gsc->gsc_key = key; 409206497Sluigi LIST_INSERT_HEAD(bucket, gsc, gsc_clist); 410206497Sluigi 411206497Sluigi gsc->gsc_expire = ticks + me.gs_expire_secs * hz; 412206497Sluigi 413206497Sluigi return (gsc->gsc_priv); 414206497Sluigi} 415206497Sluigi 416206497Sluigi/* 417206497Sluigi * Release a reference to the per-client descriptor, 418206497Sluigi */ 419206497Sluigivoid 420206497Sluigig_sched_put_class(struct g_geom *gp, void *priv) 421206497Sluigi{ 422206497Sluigi struct g_sched_class *gsc; 423206497Sluigi struct g_sched_softc *sc; 424206497Sluigi 425206497Sluigi gsc = g_sched_priv2class(priv); 426206497Sluigi gsc->gsc_expire = ticks + me.gs_expire_secs * hz; 427206497Sluigi 428206497Sluigi if (--gsc->gsc_refs > 0) 429206497Sluigi return; 430206497Sluigi 431206497Sluigi sc = gp->softc; 432206497Sluigi sc->sc_gsched->gs_fini_class(sc->sc_data, priv); 433206497Sluigi 434206497Sluigi LIST_REMOVE(gsc, gsc_clist); 435206497Sluigi free(gsc, M_GEOM_SCHED); 436206497Sluigi} 437206497Sluigi 438206497Sluigistatic void 439206497Sluigig_sched_hash_fini(struct g_geom *gp, struct g_hash *hp, u_long mask, 440206497Sluigi struct g_gsched *gsp, void *data) 441206497Sluigi{ 442206497Sluigi struct g_sched_class *cp, *cp2; 443206497Sluigi int i; 444206497Sluigi 445206497Sluigi if (!hp) 446206497Sluigi return; 447206497Sluigi 448206497Sluigi if (data && gsp->gs_hash_unref) 449206497Sluigi gsp->gs_hash_unref(data); 450206497Sluigi 451206497Sluigi for (i = 0; i < G_SCHED_HASH_SIZE; i++) { 452206497Sluigi LIST_FOREACH_SAFE(cp, &hp[i], gsc_clist, cp2) 453206497Sluigi g_sched_put_class(gp, cp->gsc_priv); 454206497Sluigi } 455206497Sluigi 456206497Sluigi hashdestroy(hp, M_GEOM_SCHED, mask); 457206497Sluigi} 458206497Sluigi 459206497Sluigistatic struct g_hash * 460206497Sluigig_sched_hash_init(struct g_gsched *gsp, u_long *mask, int flags) 461206497Sluigi{ 462206497Sluigi struct g_hash *hash; 463206497Sluigi 464206497Sluigi if (gsp->gs_priv_size == 0) 465206497Sluigi return (NULL); 466206497Sluigi 467206497Sluigi hash = hashinit_flags(G_SCHED_HASH_SIZE, M_GEOM_SCHED, mask, flags); 468206497Sluigi 469206497Sluigi return (hash); 470206497Sluigi} 471206497Sluigi 472206497Sluigistatic void 473206497Sluigig_sched_flush_classes(struct g_geom *gp) 474206497Sluigi{ 475206497Sluigi struct g_sched_softc *sc; 476206497Sluigi struct g_sched_class *cp, *cp2; 477206497Sluigi int i; 478206497Sluigi 479206497Sluigi sc = gp->softc; 480206497Sluigi 481206497Sluigi if (!sc->sc_hash || ticks - sc->sc_flush_ticks <= 0) 482206497Sluigi return; 483206497Sluigi 484206497Sluigi for (i = 0; i < G_SCHED_HASH_SIZE; i++) { 485206497Sluigi LIST_FOREACH_SAFE(cp, &sc->sc_hash[i], gsc_clist, cp2) { 486206497Sluigi if (cp->gsc_refs == 1 && ticks - cp->gsc_expire > 0) 487206497Sluigi g_sched_put_class(gp, cp->gsc_priv); 488206497Sluigi } 489206497Sluigi } 490206497Sluigi 491206497Sluigi sc->sc_flush_ticks = ticks + me.gs_expire_secs * hz; 492206497Sluigi} 493206497Sluigi 494206497Sluigi/* 495206497Sluigi * Wait for the completion of any outstanding request. To ensure 496206497Sluigi * that this does not take forever the caller has to make sure that 497206497Sluigi * no new request enter the scehduler before calling us. 498206497Sluigi * 499206497Sluigi * Must be called with the gp mutex held and topology locked. 500206497Sluigi */ 501206497Sluigistatic int 502206497Sluigig_sched_wait_pending(struct g_geom *gp) 503206497Sluigi{ 504206497Sluigi struct g_sched_softc *sc = gp->softc; 505206497Sluigi int endticks = ticks + hz; 506206497Sluigi 507206497Sluigi g_topology_assert(); 508206497Sluigi 509206497Sluigi while (sc->sc_pending && endticks - ticks >= 0) 510206497Sluigi msleep(gp, &sc->sc_mtx, 0, "sched_wait_pending", hz / 4); 511206497Sluigi 512206497Sluigi return (sc->sc_pending ? ETIMEDOUT : 0); 513206497Sluigi} 514206497Sluigi 515206497Sluigistatic int 516206497Sluigig_sched_remove_locked(struct g_geom *gp, struct g_gsched *gsp) 517206497Sluigi{ 518206497Sluigi struct g_sched_softc *sc = gp->softc; 519206497Sluigi int error; 520206497Sluigi 521206497Sluigi /* Set the flushing flag: new bios will not enter the scheduler. */ 522206497Sluigi sc->sc_flags |= G_SCHED_FLUSHING; 523206497Sluigi 524206497Sluigi g_sched_forced_dispatch(gp); 525206497Sluigi error = g_sched_wait_pending(gp); 526206497Sluigi if (error) 527206497Sluigi goto failed; 528206497Sluigi 529206497Sluigi /* No more requests pending or in flight from the old gsp. */ 530206497Sluigi 531206497Sluigi g_sched_hash_fini(gp, sc->sc_hash, sc->sc_mask, gsp, sc->sc_data); 532206497Sluigi sc->sc_hash = NULL; 533206497Sluigi 534206497Sluigi /* 535206497Sluigi * Avoid deadlock here by releasing the gp mutex and reacquiring 536206497Sluigi * it once done. It should be safe, since no reconfiguration or 537206497Sluigi * destruction can take place due to the geom topology lock; no 538206497Sluigi * new request can use the current sc_data since we flagged the 539206497Sluigi * geom as being flushed. 540206497Sluigi */ 541206497Sluigi g_sched_unlock(gp); 542206497Sluigi gsp->gs_fini(sc->sc_data); 543206497Sluigi g_sched_lock(gp); 544206497Sluigi 545206497Sluigi sc->sc_gsched = NULL; 546206497Sluigi sc->sc_data = NULL; 547206497Sluigi g_gsched_unref(gsp); 548206497Sluigi 549206497Sluigifailed: 550206497Sluigi sc->sc_flags &= ~G_SCHED_FLUSHING; 551206497Sluigi 552206497Sluigi return (error); 553206497Sluigi} 554206497Sluigi 555206497Sluigistatic int 556206497Sluigig_sched_remove(struct g_geom *gp, struct g_gsched *gsp) 557206497Sluigi{ 558206497Sluigi int error; 559206497Sluigi 560206497Sluigi g_sched_lock(gp); 561206497Sluigi error = g_sched_remove_locked(gp, gsp); /* gsp is surely non-null */ 562206497Sluigi g_sched_unlock(gp); 563206497Sluigi 564206497Sluigi return (error); 565206497Sluigi} 566206497Sluigi 567206497Sluigi/* 568206497Sluigi * Support function for create/taste -- locate the desired 569206497Sluigi * algorithm and grab a reference to it. 570206497Sluigi */ 571206497Sluigistatic struct g_gsched * 572206497Sluigig_gsched_find(const char *name) 573206497Sluigi{ 574206497Sluigi struct g_gsched *gsp = NULL; 575206497Sluigi 576206497Sluigi mtx_lock(&me.gs_mtx); 577206497Sluigi LIST_FOREACH(gsp, &me.gs_scheds, glist) { 578206497Sluigi if (strcmp(name, gsp->gs_name) == 0) { 579206497Sluigi g_gsched_ref(gsp); 580206497Sluigi break; 581206497Sluigi } 582206497Sluigi } 583206497Sluigi mtx_unlock(&me.gs_mtx); 584206497Sluigi 585206497Sluigi return (gsp); 586206497Sluigi} 587206497Sluigi 588206497Sluigi/* 589206497Sluigi * Rebuild the list of scheduler names. 590206497Sluigi * To be called with me.gs_mtx lock held. 591206497Sluigi */ 592206497Sluigistatic void 593206497Sluigig_gsched_build_names(struct g_gsched *gsp) 594206497Sluigi{ 595206497Sluigi int pos, l; 596206497Sluigi struct g_gsched *cur; 597206497Sluigi 598206497Sluigi pos = 0; 599206497Sluigi LIST_FOREACH(cur, &me.gs_scheds, glist) { 600206497Sluigi l = strlen(cur->gs_name); 601206497Sluigi if (l + pos + 1 + 1 < sizeof(me.gs_names)) { 602206497Sluigi if (pos != 0) 603206497Sluigi me.gs_names[pos++] = ' '; 604206497Sluigi strcpy(me.gs_names + pos, cur->gs_name); 605206497Sluigi pos += l; 606206497Sluigi } 607206497Sluigi } 608206497Sluigi me.gs_names[pos] = '\0'; 609206497Sluigi} 610206497Sluigi 611206497Sluigi/* 612206497Sluigi * Register or unregister individual scheduling algorithms. 613206497Sluigi */ 614206497Sluigistatic int 615206497Sluigig_gsched_register(struct g_gsched *gsp) 616206497Sluigi{ 617206497Sluigi struct g_gsched *cur; 618206497Sluigi int error = 0; 619206497Sluigi 620206497Sluigi mtx_lock(&me.gs_mtx); 621206497Sluigi LIST_FOREACH(cur, &me.gs_scheds, glist) { 622206497Sluigi if (strcmp(gsp->gs_name, cur->gs_name) == 0) 623206497Sluigi break; 624206497Sluigi } 625206497Sluigi if (cur != NULL) { 626206497Sluigi G_SCHED_DEBUG(0, "A scheduler named %s already" 627206497Sluigi "exists.", gsp->gs_name); 628206497Sluigi error = EEXIST; 629206497Sluigi } else { 630206497Sluigi LIST_INSERT_HEAD(&me.gs_scheds, gsp, glist); 631206497Sluigi gsp->gs_refs = 1; 632206497Sluigi me.gs_sched_count++; 633206497Sluigi g_gsched_build_names(gsp); 634206497Sluigi } 635206497Sluigi mtx_unlock(&me.gs_mtx); 636206497Sluigi 637206497Sluigi return (error); 638206497Sluigi} 639206497Sluigi 640206497Sluigistruct g_gsched_unregparm { 641206497Sluigi struct g_gsched *gup_gsp; 642206497Sluigi int gup_error; 643206497Sluigi}; 644206497Sluigi 645206497Sluigistatic void 646206497Sluigig_gsched_unregister(void *arg, int flag) 647206497Sluigi{ 648206497Sluigi struct g_gsched_unregparm *parm = arg; 649206497Sluigi struct g_gsched *gsp = parm->gup_gsp, *cur, *tmp; 650206497Sluigi struct g_sched_softc *sc; 651206497Sluigi struct g_geom *gp, *gp_tmp; 652206497Sluigi int error; 653206497Sluigi 654206497Sluigi parm->gup_error = 0; 655206497Sluigi 656206497Sluigi g_topology_assert(); 657206497Sluigi 658206497Sluigi if (flag == EV_CANCEL) 659206497Sluigi return; 660206497Sluigi 661206497Sluigi mtx_lock(&me.gs_mtx); 662206497Sluigi 663206497Sluigi LIST_FOREACH_SAFE(gp, &g_sched_class.geom, geom, gp_tmp) { 664206497Sluigi if (gp->class != &g_sched_class) 665206497Sluigi continue; /* Should not happen. */ 666206497Sluigi 667206497Sluigi sc = gp->softc; 668206497Sluigi if (sc->sc_gsched == gsp) { 669206497Sluigi error = g_sched_remove(gp, gsp); 670206497Sluigi if (error) 671206497Sluigi goto failed; 672206497Sluigi } 673206497Sluigi } 674206497Sluigi 675206497Sluigi LIST_FOREACH_SAFE(cur, &me.gs_scheds, glist, tmp) { 676206497Sluigi if (cur != gsp) 677206497Sluigi continue; 678206497Sluigi 679206497Sluigi if (gsp->gs_refs != 1) { 680206497Sluigi G_SCHED_DEBUG(0, "%s still in use.", 681206497Sluigi gsp->gs_name); 682206497Sluigi parm->gup_error = EBUSY; 683206497Sluigi } else { 684206497Sluigi LIST_REMOVE(gsp, glist); 685206497Sluigi me.gs_sched_count--; 686206497Sluigi g_gsched_build_names(gsp); 687206497Sluigi } 688206497Sluigi break; 689206497Sluigi } 690206497Sluigi 691206497Sluigi if (cur == NULL) { 692206497Sluigi G_SCHED_DEBUG(0, "%s not registered.", gsp->gs_name); 693206497Sluigi parm->gup_error = ENOENT; 694206497Sluigi } 695206497Sluigi 696206497Sluigifailed: 697206497Sluigi mtx_unlock(&me.gs_mtx); 698206497Sluigi} 699206497Sluigi 700206497Sluigistatic inline void 701206497Sluigig_gsched_global_init(void) 702206497Sluigi{ 703206497Sluigi 704206497Sluigi if (!me.gs_initialized) { 705206497Sluigi G_SCHED_DEBUG(0, "Initializing global data."); 706206497Sluigi mtx_init(&me.gs_mtx, "gsched", NULL, MTX_DEF); 707206497Sluigi LIST_INIT(&me.gs_scheds); 708206497Sluigi gs_bioq_init(&me.gs_pending); 709206497Sluigi me.gs_initialized = 1; 710206497Sluigi } 711206497Sluigi} 712206497Sluigi 713206497Sluigi/* 714206497Sluigi * Module event called when a scheduling algorithm module is loaded or 715206497Sluigi * unloaded. 716206497Sluigi */ 717206497Sluigiint 718206497Sluigig_gsched_modevent(module_t mod, int cmd, void *arg) 719206497Sluigi{ 720206497Sluigi struct g_gsched *gsp = arg; 721206497Sluigi struct g_gsched_unregparm parm; 722206497Sluigi int error; 723206497Sluigi 724206497Sluigi G_SCHED_DEBUG(0, "Modevent %d.", cmd); 725206497Sluigi 726206497Sluigi /* 727206497Sluigi * If the module is loaded at boot, the geom thread that calls 728206497Sluigi * g_sched_init() might actually run after g_gsched_modevent(), 729206497Sluigi * so make sure that the module is properly initialized. 730206497Sluigi */ 731206497Sluigi g_gsched_global_init(); 732206497Sluigi 733206497Sluigi error = EOPNOTSUPP; 734206497Sluigi switch (cmd) { 735206497Sluigi case MOD_LOAD: 736206497Sluigi error = g_gsched_register(gsp); 737206497Sluigi G_SCHED_DEBUG(0, "Loaded module %s error %d.", 738206497Sluigi gsp->gs_name, error); 739206497Sluigi if (error == 0) 740206497Sluigi g_retaste(&g_sched_class); 741206497Sluigi break; 742206497Sluigi 743206497Sluigi case MOD_UNLOAD: 744206497Sluigi parm.gup_gsp = gsp; 745206497Sluigi parm.gup_error = 0; 746206497Sluigi 747206497Sluigi error = g_waitfor_event(g_gsched_unregister, 748206497Sluigi &parm, M_WAITOK, NULL); 749206497Sluigi if (error == 0) 750206497Sluigi error = parm.gup_error; 751206497Sluigi G_SCHED_DEBUG(0, "Unloaded module %s error %d.", 752206497Sluigi gsp->gs_name, error); 753206497Sluigi break; 754206497Sluigi }; 755206497Sluigi 756206497Sluigi return (error); 757206497Sluigi} 758206497Sluigi 759206497Sluigi#ifdef KTR 760206497Sluigi#define TRC_BIO_EVENT(e, bp) g_sched_trace_bio_ ## e (bp) 761206497Sluigi 762206497Sluigistatic inline char 763206497Sluigig_sched_type(struct bio *bp) 764206497Sluigi{ 765206497Sluigi 766206497Sluigi if (0 != (bp->bio_cmd & BIO_READ)) 767206497Sluigi return ('R'); 768206497Sluigi else if (0 != (bp->bio_cmd & BIO_WRITE)) 769206497Sluigi return ('W'); 770206497Sluigi return ('U'); 771206497Sluigi} 772206497Sluigi 773206497Sluigistatic inline void 774206497Sluigig_sched_trace_bio_START(struct bio *bp) 775206497Sluigi{ 776206497Sluigi 777206551Sluigi CTR5(KTR_GSCHED, "S %lu %c %lu/%lu %lu", g_sched_classify(bp), 778206497Sluigi g_sched_type(bp), bp->bio_offset / ULONG_MAX, 779206497Sluigi bp->bio_offset, bp->bio_length); 780206497Sluigi} 781206497Sluigi 782206497Sluigistatic inline void 783206497Sluigig_sched_trace_bio_DONE(struct bio *bp) 784206497Sluigi{ 785206497Sluigi 786206551Sluigi CTR5(KTR_GSCHED, "D %lu %c %lu/%lu %lu", g_sched_classify(bp), 787206497Sluigi g_sched_type(bp), bp->bio_offset / ULONG_MAX, 788206497Sluigi bp->bio_offset, bp->bio_length); 789206497Sluigi} 790206551Sluigi#else /* !KTR */ 791206497Sluigi#define TRC_BIO_EVENT(e, bp) 792206551Sluigi#endif /* !KTR */ 793206497Sluigi 794206497Sluigi/* 795206497Sluigi * g_sched_done() and g_sched_start() dispatch the geom requests to 796206497Sluigi * the scheduling algorithm in use. 797206497Sluigi */ 798206497Sluigistatic void 799206497Sluigig_sched_done(struct bio *bio) 800206497Sluigi{ 801206497Sluigi struct g_geom *gp = bio->bio_caller2; 802206497Sluigi struct g_sched_softc *sc = gp->softc; 803206497Sluigi 804206497Sluigi TRC_BIO_EVENT(DONE, bio); 805206497Sluigi 806206497Sluigi KASSERT(bio->bio_caller1, ("null bio_caller1 in g_sched_done")); 807206497Sluigi 808206497Sluigi g_sched_lock(gp); 809206497Sluigi 810206497Sluigi g_sched_update_stats(bio); 811206497Sluigi sc->sc_gsched->gs_done(sc->sc_data, bio); 812206497Sluigi if (!--sc->sc_pending) 813206497Sluigi wakeup(gp); 814206497Sluigi 815206497Sluigi g_sched_flush_classes(gp); 816206497Sluigi g_sched_unlock(gp); 817206497Sluigi 818206497Sluigi g_std_done(bio); 819206497Sluigi} 820206497Sluigi 821206497Sluigistatic void 822206497Sluigig_sched_start(struct bio *bp) 823206497Sluigi{ 824206497Sluigi struct g_geom *gp = bp->bio_to->geom; 825206497Sluigi struct g_sched_softc *sc = gp->softc; 826206497Sluigi struct bio *cbp; 827206497Sluigi 828206497Sluigi TRC_BIO_EVENT(START, bp); 829206497Sluigi G_SCHED_LOGREQ(bp, "Request received."); 830206497Sluigi 831206497Sluigi cbp = g_clone_bio(bp); 832206497Sluigi if (cbp == NULL) { 833206497Sluigi g_io_deliver(bp, ENOMEM); 834206497Sluigi return; 835206497Sluigi } 836206497Sluigi cbp->bio_done = g_sched_done; 837206497Sluigi cbp->bio_to = LIST_FIRST(&gp->provider); 838206497Sluigi KASSERT(cbp->bio_to != NULL, ("NULL provider")); 839206497Sluigi 840206497Sluigi /* We only schedule reads and writes. */ 841206497Sluigi if (0 == (bp->bio_cmd & (BIO_READ | BIO_WRITE))) 842206497Sluigi goto bypass; 843206497Sluigi 844206497Sluigi G_SCHED_LOGREQ(cbp, "Sending request."); 845206497Sluigi 846206497Sluigi g_sched_lock(gp); 847206497Sluigi /* 848206497Sluigi * Call the algorithm's gs_start to queue the request in the 849206497Sluigi * scheduler. If gs_start fails then pass the request down, 850206497Sluigi * otherwise call g_sched_dispatch() which tries to push 851206497Sluigi * one or more requests down. 852206497Sluigi */ 853206497Sluigi if (!sc->sc_gsched || (sc->sc_flags & G_SCHED_FLUSHING) || 854206497Sluigi sc->sc_gsched->gs_start(sc->sc_data, cbp)) { 855206497Sluigi g_sched_unlock(gp); 856206497Sluigi goto bypass; 857206497Sluigi } 858206497Sluigi /* 859206497Sluigi * We use bio_caller1 to mark requests that are scheduled 860206497Sluigi * so make sure it is not NULL. 861206497Sluigi */ 862206497Sluigi if (cbp->bio_caller1 == NULL) 863206497Sluigi cbp->bio_caller1 = &me; /* anything not NULL */ 864206497Sluigi 865206497Sluigi cbp->bio_caller2 = gp; 866206497Sluigi sc->sc_pending++; 867206497Sluigi 868206497Sluigi /* Update general stats. */ 869206497Sluigi me.gs_in_flight++; 870206497Sluigi me.gs_requests++; 871206497Sluigi me.gs_bytes_in_flight += bp->bio_length; 872206497Sluigi if (bp->bio_cmd & BIO_WRITE) { 873206497Sluigi me.gs_writes_in_flight++; 874206497Sluigi me.gs_write_bytes_in_flight += bp->bio_length; 875206497Sluigi } 876206497Sluigi g_sched_dispatch(gp); 877206497Sluigi g_sched_unlock(gp); 878206497Sluigi return; 879206497Sluigi 880206497Sluigibypass: 881206497Sluigi cbp->bio_done = g_std_done; 882206497Sluigi cbp->bio_caller1 = NULL; /* not scheduled */ 883206497Sluigi g_io_request(cbp, LIST_FIRST(&gp->consumer)); 884206497Sluigi} 885206497Sluigi 886206497Sluigi/* 887206497Sluigi * The next few functions are the geom glue. 888206497Sluigi */ 889206497Sluigistatic void 890206497Sluigig_sched_orphan(struct g_consumer *cp) 891206497Sluigi{ 892206497Sluigi 893206497Sluigi g_topology_assert(); 894206497Sluigi g_sched_destroy(cp->geom, 1); 895206497Sluigi} 896206497Sluigi 897206497Sluigistatic int 898206497Sluigig_sched_access(struct g_provider *pp, int dr, int dw, int de) 899206497Sluigi{ 900206497Sluigi struct g_geom *gp; 901206497Sluigi struct g_consumer *cp; 902206497Sluigi int error; 903206497Sluigi 904206497Sluigi gp = pp->geom; 905206497Sluigi cp = LIST_FIRST(&gp->consumer); 906206497Sluigi error = g_access(cp, dr, dw, de); 907206497Sluigi 908206497Sluigi return (error); 909206497Sluigi} 910206497Sluigi 911206497Sluigistatic void 912206497Sluigig_sched_temporary_start(struct bio *bio) 913206497Sluigi{ 914206497Sluigi 915206497Sluigi mtx_lock(&me.gs_mtx); 916206497Sluigi me.gs_npending++; 917206497Sluigi gs_bioq_disksort(&me.gs_pending, bio); 918206497Sluigi mtx_unlock(&me.gs_mtx); 919206497Sluigi} 920206497Sluigi 921206497Sluigistatic void 922206497Sluigig_sched_flush_pending(g_start_t *start) 923206497Sluigi{ 924206497Sluigi struct bio *bp; 925206497Sluigi 926206497Sluigi while ((bp = gs_bioq_takefirst(&me.gs_pending))) 927206497Sluigi start(bp); 928206497Sluigi} 929206497Sluigi 930206497Sluigistatic int 931206497Sluigig_insert_proxy(struct g_geom *gp, struct g_provider *newpp, 932206497Sluigi struct g_geom *dstgp, struct g_provider *pp, struct g_consumer *cp) 933206497Sluigi{ 934206497Sluigi struct g_sched_softc *sc = gp->softc; 935206497Sluigi g_start_t *saved_start, *flush = g_sched_start; 936206497Sluigi int error = 0, endticks = ticks + hz; 937206497Sluigi 938206497Sluigi g_cancel_event(newpp); /* prevent taste() */ 939206497Sluigi /* copy private fields */ 940206497Sluigi newpp->private = pp->private; 941206497Sluigi newpp->index = pp->index; 942206497Sluigi 943206497Sluigi /* Queue all the early requests coming for us. */ 944206497Sluigi me.gs_npending = 0; 945206497Sluigi saved_start = pp->geom->start; 946206497Sluigi dstgp->start = g_sched_temporary_start; 947206497Sluigi 948206497Sluigi while (pp->nstart - pp->nend != me.gs_npending && 949206497Sluigi endticks - ticks >= 0) 950206497Sluigi tsleep(pp, PRIBIO, "-", hz/10); 951206497Sluigi 952206497Sluigi if (pp->nstart - pp->nend != me.gs_npending) { 953206497Sluigi flush = saved_start; 954206497Sluigi error = ETIMEDOUT; 955206497Sluigi goto fail; 956206497Sluigi } 957206497Sluigi 958206497Sluigi /* link pp to this geom */ 959206497Sluigi LIST_REMOVE(pp, provider); 960206497Sluigi pp->geom = gp; 961206497Sluigi LIST_INSERT_HEAD(&gp->provider, pp, provider); 962206497Sluigi 963206497Sluigi /* 964206497Sluigi * replicate the counts from the parent in the 965206497Sluigi * new provider and consumer nodes 966206497Sluigi */ 967206497Sluigi cp->acr = newpp->acr = pp->acr; 968206497Sluigi cp->acw = newpp->acw = pp->acw; 969206497Sluigi cp->ace = newpp->ace = pp->ace; 970206497Sluigi sc->sc_flags |= G_SCHED_PROXYING; 971206497Sluigi 972206497Sluigifail: 973206497Sluigi dstgp->start = saved_start; 974206497Sluigi 975206497Sluigi g_sched_flush_pending(flush); 976206497Sluigi 977206497Sluigi return (error); 978206497Sluigi} 979206497Sluigi 980206497Sluigi/* 981206497Sluigi * Create a geom node for the device passed as *pp. 982206497Sluigi * If successful, add a reference to this gsp. 983206497Sluigi */ 984206497Sluigistatic int 985206497Sluigig_sched_create(struct gctl_req *req, struct g_class *mp, 986206497Sluigi struct g_provider *pp, struct g_gsched *gsp, int proxy) 987206497Sluigi{ 988206497Sluigi struct g_sched_softc *sc = NULL; 989206497Sluigi struct g_geom *gp, *dstgp; 990206497Sluigi struct g_provider *newpp = NULL; 991206497Sluigi struct g_consumer *cp = NULL; 992206497Sluigi char name[64]; 993206497Sluigi int error; 994206497Sluigi 995206497Sluigi g_topology_assert(); 996206497Sluigi 997206497Sluigi snprintf(name, sizeof(name), "%s%s", pp->name, G_SCHED_SUFFIX); 998206497Sluigi LIST_FOREACH(gp, &mp->geom, geom) { 999206497Sluigi if (strcmp(gp->name, name) == 0) { 1000206497Sluigi gctl_error(req, "Geom %s already exists.", 1001206497Sluigi name); 1002206497Sluigi return (EEXIST); 1003206497Sluigi } 1004206497Sluigi } 1005206497Sluigi 1006243333Sjh gp = g_new_geomf(mp, "%s", name); 1007206497Sluigi dstgp = proxy ? pp->geom : gp; /* where do we link the provider */ 1008206497Sluigi 1009206497Sluigi sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO); 1010206497Sluigi sc->sc_gsched = gsp; 1011206497Sluigi sc->sc_data = gsp->gs_init(gp); 1012206497Sluigi if (sc->sc_data == NULL) { 1013206497Sluigi error = ENOMEM; 1014206497Sluigi goto fail; 1015206497Sluigi } 1016206497Sluigi 1017206497Sluigi sc->sc_hash = g_sched_hash_init(gsp, &sc->sc_mask, HASH_WAITOK); 1018206497Sluigi 1019206497Sluigi /* 1020206497Sluigi * Do not initialize the flush mechanism, will be initialized 1021206497Sluigi * on the first insertion on the hash table. 1022206497Sluigi */ 1023206497Sluigi 1024206497Sluigi mtx_init(&sc->sc_mtx, "g_sched_mtx", NULL, MTX_DEF); 1025206497Sluigi 1026206497Sluigi gp->softc = sc; 1027206497Sluigi gp->start = g_sched_start; 1028206497Sluigi gp->orphan = g_sched_orphan; 1029206497Sluigi gp->access = g_sched_access; 1030206497Sluigi gp->dumpconf = g_sched_dumpconf; 1031206497Sluigi 1032243333Sjh newpp = g_new_providerf(dstgp, "%s", gp->name); 1033206497Sluigi newpp->mediasize = pp->mediasize; 1034206497Sluigi newpp->sectorsize = pp->sectorsize; 1035206497Sluigi 1036206497Sluigi cp = g_new_consumer(gp); 1037206497Sluigi error = g_attach(cp, proxy ? newpp : pp); 1038206497Sluigi if (error != 0) { 1039206497Sluigi gctl_error(req, "Cannot attach to provider %s.", 1040206497Sluigi pp->name); 1041206497Sluigi goto fail; 1042206497Sluigi } 1043206497Sluigi 1044206497Sluigi g_error_provider(newpp, 0); 1045206497Sluigi if (proxy) { 1046206497Sluigi error = g_insert_proxy(gp, newpp, dstgp, pp, cp); 1047206497Sluigi if (error) 1048206497Sluigi goto fail; 1049206497Sluigi } 1050206497Sluigi G_SCHED_DEBUG(0, "Device %s created.", gp->name); 1051206497Sluigi 1052206497Sluigi g_gsched_ref(gsp); 1053206497Sluigi 1054206497Sluigi return (0); 1055206497Sluigi 1056206497Sluigifail: 1057206497Sluigi if (cp != NULL) { 1058206497Sluigi if (cp->provider != NULL) 1059206497Sluigi g_detach(cp); 1060206497Sluigi g_destroy_consumer(cp); 1061206497Sluigi } 1062206497Sluigi if (newpp != NULL) 1063206497Sluigi g_destroy_provider(newpp); 1064221453Sae if (sc->sc_hash) 1065206497Sluigi g_sched_hash_fini(gp, sc->sc_hash, sc->sc_mask, 1066206497Sluigi gsp, sc->sc_data); 1067221453Sae if (sc->sc_data) 1068206497Sluigi gsp->gs_fini(sc->sc_data); 1069221453Sae g_free(gp->softc); 1070221453Sae g_destroy_geom(gp); 1071206497Sluigi 1072206497Sluigi return (error); 1073206497Sluigi} 1074206497Sluigi 1075206497Sluigi/* 1076206497Sluigi * Support for dynamic switching of scheduling algorithms. 1077206497Sluigi * First initialize the data structures for the new algorithm, 1078206497Sluigi * then call g_sched_remove_locked() to flush all references 1079206497Sluigi * to the old one, finally link the new algorithm. 1080206497Sluigi */ 1081206497Sluigistatic int 1082206497Sluigig_sched_change_algo(struct gctl_req *req, struct g_class *mp, 1083206497Sluigi struct g_provider *pp, struct g_gsched *gsp) 1084206497Sluigi{ 1085206497Sluigi struct g_sched_softc *sc; 1086206497Sluigi struct g_geom *gp; 1087206497Sluigi struct g_hash *newh; 1088206497Sluigi void *data; 1089206497Sluigi u_long mask; 1090206497Sluigi int error = 0; 1091206497Sluigi 1092206497Sluigi gp = pp->geom; 1093206497Sluigi sc = gp->softc; 1094206497Sluigi 1095206497Sluigi data = gsp->gs_init(gp); 1096206497Sluigi if (data == NULL) 1097206497Sluigi return (ENOMEM); 1098206497Sluigi 1099206497Sluigi newh = g_sched_hash_init(gsp, &mask, HASH_WAITOK); 1100206497Sluigi if (gsp->gs_priv_size && !newh) { 1101206497Sluigi error = ENOMEM; 1102206497Sluigi goto fail; 1103206497Sluigi } 1104206497Sluigi 1105206497Sluigi g_sched_lock(gp); 1106206497Sluigi if (sc->sc_gsched) { /* can be NULL in some cases */ 1107206497Sluigi error = g_sched_remove_locked(gp, sc->sc_gsched); 1108206497Sluigi if (error) 1109206497Sluigi goto fail; 1110206497Sluigi } 1111206497Sluigi 1112206497Sluigi g_gsched_ref(gsp); 1113206497Sluigi sc->sc_gsched = gsp; 1114206497Sluigi sc->sc_data = data; 1115206497Sluigi sc->sc_hash = newh; 1116206497Sluigi sc->sc_mask = mask; 1117206497Sluigi 1118206497Sluigi g_sched_unlock(gp); 1119206497Sluigi 1120206497Sluigi return (0); 1121206497Sluigi 1122206497Sluigifail: 1123206497Sluigi if (newh) 1124206497Sluigi g_sched_hash_fini(gp, newh, mask, gsp, data); 1125206497Sluigi 1126206497Sluigi if (data) 1127206497Sluigi gsp->gs_fini(data); 1128206497Sluigi 1129206497Sluigi g_sched_unlock(gp); 1130206497Sluigi 1131206497Sluigi return (error); 1132206497Sluigi} 1133206497Sluigi 1134206497Sluigi/* 1135206497Sluigi * Stop the request flow directed to the proxy, redirecting the new 1136206497Sluigi * requests to the me.gs_pending queue. 1137206497Sluigi */ 1138206497Sluigistatic struct g_provider * 1139206497Sluigig_detach_proxy(struct g_geom *gp) 1140206497Sluigi{ 1141206497Sluigi struct g_consumer *cp; 1142206497Sluigi struct g_provider *pp, *newpp; 1143206497Sluigi 1144206497Sluigi do { 1145206497Sluigi pp = LIST_FIRST(&gp->provider); 1146206497Sluigi if (pp == NULL) 1147206497Sluigi break; 1148206497Sluigi cp = LIST_FIRST(&gp->consumer); 1149206497Sluigi if (cp == NULL) 1150206497Sluigi break; 1151206497Sluigi newpp = cp->provider; 1152206497Sluigi if (newpp == NULL) 1153206497Sluigi break; 1154206497Sluigi 1155206497Sluigi me.gs_npending = 0; 1156206497Sluigi pp->geom->start = g_sched_temporary_start; 1157206497Sluigi 1158206497Sluigi return (pp); 1159206497Sluigi } while (0); 1160206497Sluigi printf("%s error detaching proxy %s\n", __FUNCTION__, gp->name); 1161206497Sluigi 1162206497Sluigi return (NULL); 1163206497Sluigi} 1164206497Sluigi 1165206497Sluigistatic void 1166206497Sluigig_sched_blackhole(struct bio *bp) 1167206497Sluigi{ 1168206497Sluigi 1169206497Sluigi g_io_deliver(bp, ENXIO); 1170206497Sluigi} 1171206497Sluigi 1172206497Sluigistatic inline void 1173206497Sluigig_reparent_provider(struct g_provider *pp, struct g_geom *gp, 1174206497Sluigi struct g_provider *newpp) 1175206497Sluigi{ 1176206497Sluigi 1177206497Sluigi LIST_REMOVE(pp, provider); 1178206497Sluigi if (newpp) { 1179206497Sluigi pp->private = newpp->private; 1180206497Sluigi pp->index = newpp->index; 1181206497Sluigi } 1182206497Sluigi pp->geom = gp; 1183206497Sluigi LIST_INSERT_HEAD(&gp->provider, pp, provider); 1184206497Sluigi} 1185206497Sluigi 1186206497Sluigistatic inline void 1187206497Sluigig_unproxy_provider(struct g_provider *oldpp, struct g_provider *newpp) 1188206497Sluigi{ 1189206497Sluigi struct g_geom *gp = oldpp->geom; 1190206497Sluigi 1191206497Sluigi g_reparent_provider(oldpp, newpp->geom, newpp); 1192206497Sluigi 1193206497Sluigi /* 1194206497Sluigi * Hackish: let the system destroy the old provider for us, just 1195206497Sluigi * in case someone attached a consumer to it, in which case a 1196206497Sluigi * direct call to g_destroy_provider() would not work. 1197206497Sluigi */ 1198206497Sluigi g_reparent_provider(newpp, gp, NULL); 1199206497Sluigi} 1200206497Sluigi 1201206497Sluigi/* 1202206497Sluigi * Complete the proxy destruction, linking the old provider to its 1203206497Sluigi * original geom, and destroying the proxy provider. Also take care 1204206497Sluigi * of issuing the pending requests collected in me.gs_pending (if any). 1205206497Sluigi */ 1206206497Sluigistatic int 1207206497Sluigig_destroy_proxy(struct g_geom *gp, struct g_provider *oldpp) 1208206497Sluigi{ 1209206497Sluigi struct g_consumer *cp; 1210206497Sluigi struct g_provider *newpp; 1211206497Sluigi 1212206497Sluigi do { 1213206497Sluigi cp = LIST_FIRST(&gp->consumer); 1214206497Sluigi if (cp == NULL) 1215206497Sluigi break; 1216206497Sluigi newpp = cp->provider; 1217206497Sluigi if (newpp == NULL) 1218206497Sluigi break; 1219206497Sluigi 1220206497Sluigi /* Relink the provider to its original geom. */ 1221206497Sluigi g_unproxy_provider(oldpp, newpp); 1222206497Sluigi 1223206497Sluigi /* Detach consumer from provider, and destroy provider. */ 1224206497Sluigi cp->acr = newpp->acr = 0; 1225206497Sluigi cp->acw = newpp->acw = 0; 1226206497Sluigi cp->ace = newpp->ace = 0; 1227206497Sluigi g_detach(cp); 1228206497Sluigi 1229206497Sluigi /* Send the pending bios through the right start function. */ 1230206497Sluigi g_sched_flush_pending(oldpp->geom->start); 1231206497Sluigi 1232206497Sluigi return (0); 1233206497Sluigi } while (0); 1234206497Sluigi printf("%s error destroying proxy %s\n", __FUNCTION__, gp->name); 1235206497Sluigi 1236206497Sluigi /* We cannot send the pending bios anywhere... */ 1237206497Sluigi g_sched_flush_pending(g_sched_blackhole); 1238206497Sluigi 1239206497Sluigi return (EINVAL); 1240206497Sluigi} 1241206497Sluigi 1242206497Sluigistatic int 1243206497Sluigig_sched_destroy(struct g_geom *gp, boolean_t force) 1244206497Sluigi{ 1245206497Sluigi struct g_provider *pp, *oldpp = NULL; 1246206497Sluigi struct g_sched_softc *sc; 1247206497Sluigi struct g_gsched *gsp; 1248206497Sluigi int error; 1249206497Sluigi 1250206497Sluigi g_topology_assert(); 1251206497Sluigi sc = gp->softc; 1252206497Sluigi if (sc == NULL) 1253206497Sluigi return (ENXIO); 1254206497Sluigi if (!(sc->sc_flags & G_SCHED_PROXYING)) { 1255206497Sluigi pp = LIST_FIRST(&gp->provider); 1256206497Sluigi if (pp && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) { 1257206497Sluigi const char *msg = force ? 1258206497Sluigi "but we force removal" : "cannot remove"; 1259206497Sluigi 1260206497Sluigi G_SCHED_DEBUG(!force, 1261206497Sluigi "Device %s is still open (r%dw%de%d), %s.", 1262206497Sluigi pp->name, pp->acr, pp->acw, pp->ace, msg); 1263206497Sluigi if (!force) 1264206497Sluigi return (EBUSY); 1265206497Sluigi } else { 1266206497Sluigi G_SCHED_DEBUG(0, "Device %s removed.", gp->name); 1267206497Sluigi } 1268206497Sluigi } else 1269206497Sluigi oldpp = g_detach_proxy(gp); 1270206497Sluigi 1271206497Sluigi gsp = sc->sc_gsched; 1272206497Sluigi if (gsp) { 1273206497Sluigi /* 1274206497Sluigi * XXX bad hack here: force a dispatch to release 1275206497Sluigi * any reference to the hash table still held by 1276206497Sluigi * the scheduler. 1277206497Sluigi */ 1278206497Sluigi g_sched_lock(gp); 1279206497Sluigi /* 1280206497Sluigi * We are dying here, no new requests should enter 1281206497Sluigi * the scheduler. This is granted by the topolgy, 1282206497Sluigi * either in case we were proxying (new bios are 1283206497Sluigi * being redirected) or not (see the access check 1284206497Sluigi * above). 1285206497Sluigi */ 1286206497Sluigi g_sched_forced_dispatch(gp); 1287206497Sluigi error = g_sched_wait_pending(gp); 1288206497Sluigi 1289206497Sluigi if (error) { 1290206497Sluigi /* 1291206497Sluigi * Not all the requests came home: this might happen 1292206497Sluigi * under heavy load, or if we were waiting for any 1293206497Sluigi * bio which is served in the event path (see 1294206497Sluigi * geom_slice.c for an example of how this can 1295206497Sluigi * happen). Try to restore a working configuration 1296206497Sluigi * if we can fail. 1297206497Sluigi */ 1298206497Sluigi if ((sc->sc_flags & G_SCHED_PROXYING) && oldpp) { 1299206497Sluigi g_sched_flush_pending(force ? 1300206497Sluigi g_sched_blackhole : g_sched_start); 1301206497Sluigi } 1302206497Sluigi 1303206497Sluigi /* 1304206497Sluigi * In the forced destroy case there is not so much 1305206497Sluigi * we can do, we have pending bios that will call 1306206497Sluigi * g_sched_done() somehow, and we don't want them 1307206497Sluigi * to crash the system using freed memory. We tell 1308206497Sluigi * the user that something went wrong, and leak some 1309206497Sluigi * memory here. 1310206497Sluigi * Note: the callers using force = 1 ignore the 1311206497Sluigi * return value. 1312206497Sluigi */ 1313206497Sluigi if (force) { 1314206497Sluigi G_SCHED_DEBUG(0, "Pending requests while " 1315206497Sluigi " destroying geom, some memory leaked."); 1316206497Sluigi } 1317206497Sluigi 1318206497Sluigi return (error); 1319206497Sluigi } 1320206497Sluigi 1321206497Sluigi g_sched_unlock(gp); 1322206497Sluigi g_sched_hash_fini(gp, sc->sc_hash, sc->sc_mask, 1323206497Sluigi gsp, sc->sc_data); 1324206497Sluigi sc->sc_hash = NULL; 1325206497Sluigi gsp->gs_fini(sc->sc_data); 1326206497Sluigi g_gsched_unref(gsp); 1327206497Sluigi sc->sc_gsched = NULL; 1328206497Sluigi } 1329206497Sluigi 1330206497Sluigi if ((sc->sc_flags & G_SCHED_PROXYING) && oldpp) { 1331206497Sluigi error = g_destroy_proxy(gp, oldpp); 1332206497Sluigi 1333206497Sluigi if (error) { 1334206497Sluigi if (force) { 1335206497Sluigi G_SCHED_DEBUG(0, "Unrecoverable error while " 1336206497Sluigi "destroying a proxy geom, leaking some " 1337206497Sluigi " memory."); 1338206497Sluigi } 1339206497Sluigi 1340206497Sluigi return (error); 1341206497Sluigi } 1342206497Sluigi } 1343206497Sluigi 1344206497Sluigi mtx_destroy(&sc->sc_mtx); 1345206497Sluigi 1346206497Sluigi g_free(gp->softc); 1347206497Sluigi gp->softc = NULL; 1348206497Sluigi g_wither_geom(gp, ENXIO); 1349206497Sluigi 1350206497Sluigi return (error); 1351206497Sluigi} 1352206497Sluigi 1353206497Sluigistatic int 1354206497Sluigig_sched_destroy_geom(struct gctl_req *req, struct g_class *mp, 1355206497Sluigi struct g_geom *gp) 1356206497Sluigi{ 1357206497Sluigi 1358206497Sluigi return (g_sched_destroy(gp, 0)); 1359206497Sluigi} 1360206497Sluigi 1361206497Sluigi/* 1362206497Sluigi * Functions related to the classification of requests. 1363206497Sluigi * 1364206497Sluigi * On recent FreeBSD versions (8.0 and above), we store a reference 1365206497Sluigi * to the issuer of a request in bp->bio_classifier1 as soon 1366206497Sluigi * as the bio is posted to the geom queue (and not later, because 1367206497Sluigi * requests are managed by the g_down thread afterwards). 1368206497Sluigi * 1369206497Sluigi * On older versions of the system (but this code is not used 1370206497Sluigi * in any existing release), we [ab]use the caller1 field in the 1371206497Sluigi * root element of the bio tree to store the classification info. 1372206497Sluigi * The marking is done at the beginning of g_io_request() 1373206497Sluigi * and only if we find that the field is NULL. 1374206497Sluigi * 1375206497Sluigi * To avoid rebuilding the kernel, this module will patch the 1376206497Sluigi * initial part of g_io_request() so it jumps to some hand-coded 1377206497Sluigi * assembly that does the marking and then executes the original 1378206497Sluigi * body of g_io_request(). 1379206497Sluigi * 1380206497Sluigi * fake_ioreq[] is architecture-specific machine code 1381206497Sluigi * that implements the above. CODE_SIZE, STORE_SIZE etc. 1382206497Sluigi * are constants used in the patching routine. Look at the 1383206497Sluigi * code in g_ioreq_patch() for the details. 1384206497Sluigi */ 1385206497Sluigi 1386206497Sluigi#ifndef HAVE_BIO_CLASSIFIER 1387206497Sluigi/* 1388206497Sluigi * Support for old FreeBSD versions 1389206497Sluigi */ 1390206497Sluigi#if defined(__i386__) 1391206497Sluigi#define CODE_SIZE 29 1392206497Sluigi#define STORE_SIZE 5 1393206497Sluigi#define EPILOGUE 5 1394206497Sluigi#define SIZE (CODE_SIZE + STORE_SIZE + EPILOGUE) 1395206497Sluigi 1396206497Sluigistatic u_char fake_ioreq[SIZE] = { 1397206497Sluigi 0x8b, 0x44, 0x24, 0x04, /* mov bp, %eax */ 1398206497Sluigi /* 1: */ 1399206497Sluigi 0x89, 0xc2, /* mov %eax, %edx # edx = bp */ 1400206497Sluigi 0x8b, 0x40, 0x64, /* mov bp->bio_parent, %eax */ 1401206497Sluigi 0x85, 0xc0, /* test %eax, %eax */ 1402206497Sluigi 0x75, 0xf7, /* jne 1b */ 1403206497Sluigi 0x8b, 0x42, 0x30, /* mov bp->bp_caller1, %eax */ 1404206497Sluigi 0x85, 0xc0, /* test %eax, %eax */ 1405206497Sluigi 0x75, 0x09, /* jne 2f */ 1406206497Sluigi 0x64, 0xa1, 0x00, 0x00, /* mov %fs:0, %eax */ 1407206497Sluigi 0x00, 0x00, 1408206497Sluigi 0x89, 0x42, 0x30, /* mov %eax, bp->bio_caller1 */ 1409206497Sluigi /* 2: */ 1410206497Sluigi 0x55, 0x89, 0xe5, 0x57, 0x56, 1411206497Sluigi 0xe9, 0x00, 0x00, 0x00, 0x00, /* jmp back... */ 1412206497Sluigi}; 1413206497Sluigi#elif defined(__amd64) 1414206497Sluigi#define CODE_SIZE 38 1415206497Sluigi#define STORE_SIZE 6 1416206497Sluigi#define EPILOGUE 5 1417206497Sluigi#define SIZE (CODE_SIZE + STORE_SIZE + EPILOGUE) 1418206497Sluigi 1419206497Sluigistatic u_char fake_ioreq[SIZE] = { 1420206497Sluigi 0x48, 0x89, 0xf8, /* mov bp, %rax */ 1421206497Sluigi /* 1: */ 1422206497Sluigi 0x48, 0x89, 0xc2, /* mov %rax, %rdx # rdx = bp */ 1423206497Sluigi 0x48, 0x8b, 0x82, 0xa8, /* mov bp->bio_parent, %rax */ 1424206497Sluigi 0x00, 0x00, 0x00, 1425206497Sluigi 0x48, 0x85, 0xc0, /* test %rax, %rax */ 1426206497Sluigi 0x75, 0xf1, /* jne 1b */ 1427206497Sluigi 0x48, 0x83, 0x7a, 0x58, /* cmp $0, bp->bp_caller1 */ 1428206497Sluigi 0x00, 1429206497Sluigi 0x75, 0x0d, /* jne 2f */ 1430206497Sluigi 0x65, 0x48, 0x8b, 0x04, /* mov %gs:0, %rax */ 1431206497Sluigi 0x25, 0x00, 0x00, 0x00, 1432206497Sluigi 0x00, 1433206497Sluigi 0x48, 0x89, 0x42, 0x58, /* mov %rax, bp->bio_caller1 */ 1434206497Sluigi /* 2: */ 1435206497Sluigi 0x55, 0x48, 0x89, 0xe5, 0x41, 0x56, 1436206497Sluigi 0xe9, 0x00, 0x00, 0x00, 0x00, /* jmp back... */ 1437206497Sluigi}; 1438206497Sluigi#else /* neither x86 nor amd64 */ 1439206497Sluigistatic void 1440206497Sluigig_new_io_request(struct bio *bp, struct g_consumer *cp) 1441206497Sluigi{ 1442206497Sluigi struct bio *top = bp; 1443206497Sluigi 1444206497Sluigi /* 1445206497Sluigi * bio classification: if bio_caller1 is available in the 1446206497Sluigi * root of the 'struct bio' tree, store there the thread id 1447206497Sluigi * of the thread that originated the request. 1448206497Sluigi * More sophisticated classification schemes can be used. 1449206497Sluigi */ 1450206497Sluigi while (top->bio_parent) 1451206497Sluigi top = top->bio_parent; 1452206497Sluigi 1453206497Sluigi if (top->bio_caller1 == NULL) 1454206497Sluigi top->bio_caller1 = curthread; 1455206497Sluigi} 1456206497Sluigi 1457206497Sluigi#error please add the code above in g_new_io_request() to the beginning of \ 1458206497Sluigi /sys/geom/geom_io.c::g_io_request(), and remove this line. 1459206497Sluigi#endif /* end of arch-specific code */ 1460206497Sluigi 1461206497Sluigistatic int 1462206497Sluigig_ioreq_patch(void) 1463206497Sluigi{ 1464206497Sluigi u_char *original; 1465206497Sluigi u_long ofs; 1466206497Sluigi int found; 1467206497Sluigi 1468206497Sluigi if (me.gs_patched) 1469206497Sluigi return (-1); 1470206497Sluigi 1471206497Sluigi original = (u_char *)g_io_request; 1472206497Sluigi 1473206497Sluigi found = !bcmp(original, fake_ioreq + CODE_SIZE, STORE_SIZE); 1474206497Sluigi if (!found) 1475206497Sluigi return (-1); 1476206497Sluigi 1477206497Sluigi /* Jump back to the original + STORE_SIZE. */ 1478206497Sluigi ofs = (original + STORE_SIZE) - (fake_ioreq + SIZE); 1479206497Sluigi bcopy(&ofs, fake_ioreq + CODE_SIZE + STORE_SIZE + 1, 4); 1480206497Sluigi 1481206497Sluigi /* Patch the original address with a jump to the trampoline. */ 1482206497Sluigi *original = 0xe9; /* jump opcode */ 1483206497Sluigi ofs = fake_ioreq - (original + 5); 1484206497Sluigi bcopy(&ofs, original + 1, 4); 1485206497Sluigi 1486206497Sluigi me.gs_patched = 1; 1487206497Sluigi 1488206497Sluigi return (0); 1489206497Sluigi} 1490206497Sluigi 1491206497Sluigi/* 1492206497Sluigi * Restore the original code, this is easy. 1493206497Sluigi */ 1494206497Sluigistatic void 1495206497Sluigig_ioreq_restore(void) 1496206497Sluigi{ 1497206497Sluigi u_char *original; 1498206497Sluigi 1499206497Sluigi if (me.gs_patched) { 1500206497Sluigi original = (u_char *)g_io_request; 1501206497Sluigi bcopy(fake_ioreq + CODE_SIZE, original, STORE_SIZE); 1502206497Sluigi me.gs_patched = 0; 1503206497Sluigi } 1504206497Sluigi} 1505206497Sluigi 1506206497Sluigistatic inline void 1507206497Sluigig_classifier_ini(void) 1508206497Sluigi{ 1509206497Sluigi 1510206497Sluigi g_ioreq_patch(); 1511206497Sluigi} 1512206497Sluigi 1513206497Sluigistatic inline void 1514206497Sluigig_classifier_fini(void) 1515206497Sluigi{ 1516206497Sluigi 1517206497Sluigi g_ioreq_restore(); 1518206497Sluigi} 1519206497Sluigi 1520206497Sluigi/*--- end of support code for older FreeBSD versions */ 1521206497Sluigi 1522206497Sluigi#else /* HAVE_BIO_CLASSIFIER */ 1523206497Sluigi 1524206497Sluigi/* 1525206497Sluigi * Classifier support for recent FreeBSD versions: we use 1526206497Sluigi * a very simple classifier, only use curthread to tag a request. 1527206497Sluigi * The classifier is registered at module load, and unregistered 1528206497Sluigi * at module unload. 1529206497Sluigi */ 1530206497Sluigistatic int 1531206497Sluigig_sched_tag(void *arg, struct bio *bp) 1532206497Sluigi{ 1533206497Sluigi 1534206497Sluigi bp->bio_classifier1 = curthread; 1535206497Sluigi return (1); 1536206497Sluigi} 1537206497Sluigi 1538206497Sluigistatic struct g_classifier_hook g_sched_classifier = { 1539206497Sluigi .func = g_sched_tag, 1540206497Sluigi}; 1541206497Sluigi 1542206497Sluigistatic inline void 1543206497Sluigig_classifier_ini(void) 1544206497Sluigi{ 1545206497Sluigi 1546206497Sluigi g_register_classifier(&g_sched_classifier); 1547206497Sluigi} 1548206497Sluigi 1549206497Sluigistatic inline void 1550206497Sluigig_classifier_fini(void) 1551206497Sluigi{ 1552206497Sluigi 1553206497Sluigi g_unregister_classifier(&g_sched_classifier); 1554206497Sluigi} 1555206497Sluigi#endif /* HAVE_BIO_CLASSIFIER */ 1556206497Sluigi 1557206497Sluigistatic void 1558206497Sluigig_sched_init(struct g_class *mp) 1559206497Sluigi{ 1560206497Sluigi 1561206497Sluigi g_gsched_global_init(); 1562206497Sluigi 1563206497Sluigi G_SCHED_DEBUG(0, "Loading: mp = %p, g_sched_class = %p.", 1564206497Sluigi mp, &g_sched_class); 1565206497Sluigi 1566206497Sluigi /* Patch g_io_request to store classification info in the bio. */ 1567206497Sluigi g_classifier_ini(); 1568206497Sluigi} 1569206497Sluigi 1570206497Sluigistatic void 1571206497Sluigig_sched_fini(struct g_class *mp) 1572206497Sluigi{ 1573206497Sluigi 1574206497Sluigi g_classifier_fini(); 1575206497Sluigi 1576206497Sluigi G_SCHED_DEBUG(0, "Unloading..."); 1577206497Sluigi 1578206497Sluigi KASSERT(LIST_EMPTY(&me.gs_scheds), ("still registered schedulers")); 1579206497Sluigi mtx_destroy(&me.gs_mtx); 1580206497Sluigi} 1581206497Sluigi 1582210747Saestatic int 1583210747Saeg_sched_ioctl(struct g_provider *pp, u_long cmd, void *data, int fflag, 1584210747Sae struct thread *td) 1585210747Sae{ 1586210747Sae struct g_consumer *cp; 1587210747Sae struct g_geom *gp; 1588210747Sae 1589210747Sae cp = LIST_FIRST(&pp->geom->consumer); 1590210747Sae if (cp == NULL) 1591210747Sae return (ENOIOCTL); 1592210747Sae gp = cp->provider->geom; 1593210747Sae if (gp->ioctl == NULL) 1594210747Sae return (ENOIOCTL); 1595210747Sae return (gp->ioctl(cp->provider, cmd, data, fflag, td)); 1596210747Sae} 1597210747Sae 1598206497Sluigi/* 1599206497Sluigi * Read the i-th argument for a request, skipping the /dev/ 1600206497Sluigi * prefix if present. 1601206497Sluigi */ 1602206497Sluigistatic const char * 1603206497Sluigig_sched_argi(struct gctl_req *req, int i) 1604206497Sluigi{ 1605206497Sluigi static const char *dev_prefix = "/dev/"; 1606206497Sluigi const char *name; 1607206497Sluigi char param[16]; 1608206497Sluigi int l = strlen(dev_prefix); 1609206497Sluigi 1610206497Sluigi snprintf(param, sizeof(param), "arg%d", i); 1611206497Sluigi name = gctl_get_asciiparam(req, param); 1612206497Sluigi if (name == NULL) 1613206497Sluigi gctl_error(req, "No 'arg%d' argument", i); 1614206497Sluigi else if (strncmp(name, dev_prefix, l) == 0) 1615206497Sluigi name += l; 1616206497Sluigi return (name); 1617206497Sluigi} 1618206497Sluigi 1619206497Sluigi/* 1620206497Sluigi * Fetch nargs and do appropriate checks. 1621206497Sluigi */ 1622206497Sluigistatic int 1623206497Sluigig_sched_get_nargs(struct gctl_req *req) 1624206497Sluigi{ 1625206497Sluigi int *nargs; 1626206497Sluigi 1627206497Sluigi nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); 1628206497Sluigi if (nargs == NULL) { 1629206497Sluigi gctl_error(req, "No 'nargs' argument"); 1630206497Sluigi return (0); 1631206497Sluigi } 1632206497Sluigi if (*nargs <= 0) 1633206497Sluigi gctl_error(req, "Missing device(s)."); 1634206497Sluigi return (*nargs); 1635206497Sluigi} 1636206497Sluigi 1637206497Sluigi/* 1638206497Sluigi * Check whether we should add the class on certain volumes when 1639206497Sluigi * this geom is created. Right now this is under control of a kenv 1640206497Sluigi * variable containing the names of all devices that we care about. 1641206497Sluigi * Probably we should only support transparent insertion as the 1642206497Sluigi * preferred mode of operation. 1643206497Sluigi */ 1644206497Sluigistatic struct g_geom * 1645206497Sluigig_sched_taste(struct g_class *mp, struct g_provider *pp, 1646206497Sluigi int flags __unused) 1647206497Sluigi{ 1648206497Sluigi struct g_gsched *gsp = NULL; /* the . algorithm we want */ 1649206497Sluigi const char *s; /* generic string pointer */ 1650206497Sluigi const char *taste_names; /* devices we like */ 1651206497Sluigi int l; 1652206497Sluigi 1653206497Sluigi g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, 1654206497Sluigi mp->name, pp->name); 1655206497Sluigi g_topology_assert(); 1656206497Sluigi 1657206497Sluigi G_SCHED_DEBUG(2, "Tasting %s.", pp->name); 1658206497Sluigi 1659206497Sluigi do { 1660206497Sluigi /* do not taste on ourselves */ 1661206497Sluigi if (pp->geom->class == mp) 1662206497Sluigi break; 1663206497Sluigi 1664206497Sluigi taste_names = getenv("geom.sched.taste"); 1665206497Sluigi if (taste_names == NULL) 1666206497Sluigi break; 1667206497Sluigi 1668206497Sluigi l = strlen(pp->name); 1669206497Sluigi for (s = taste_names; *s && 1670206497Sluigi (s = strstr(s, pp->name)); s++) { 1671206497Sluigi /* further checks for an exact match */ 1672206497Sluigi if ( (s == taste_names || s[-1] == ' ') && 1673206497Sluigi (s[l] == '\0' || s[l] == ' ') ) 1674206497Sluigi break; 1675206497Sluigi } 1676206497Sluigi if (s == NULL) 1677206497Sluigi break; 1678206497Sluigi G_SCHED_DEBUG(0, "Attach device %s match [%s]\n", 1679206497Sluigi pp->name, s); 1680206497Sluigi 1681206497Sluigi /* look up the provider name in the list */ 1682206497Sluigi s = getenv("geom.sched.algo"); 1683206497Sluigi if (s == NULL) 1684206497Sluigi s = "rr"; 1685206497Sluigi 1686206497Sluigi gsp = g_gsched_find(s); /* also get a reference */ 1687206497Sluigi if (gsp == NULL) { 1688206497Sluigi G_SCHED_DEBUG(0, "Bad '%s' algorithm.", s); 1689206497Sluigi break; 1690206497Sluigi } 1691206497Sluigi 1692206497Sluigi /* XXX create with 1 as last argument ? */ 1693206497Sluigi g_sched_create(NULL, mp, pp, gsp, 0); 1694206497Sluigi g_gsched_unref(gsp); 1695206497Sluigi } while (0); 1696206497Sluigi return NULL; 1697206497Sluigi} 1698206497Sluigi 1699206497Sluigistatic void 1700206497Sluigig_sched_ctl_create(struct gctl_req *req, struct g_class *mp, int proxy) 1701206497Sluigi{ 1702206497Sluigi struct g_provider *pp; 1703206497Sluigi struct g_gsched *gsp; 1704206497Sluigi const char *name; 1705206497Sluigi int i, nargs; 1706206497Sluigi 1707206497Sluigi g_topology_assert(); 1708206497Sluigi 1709206497Sluigi name = gctl_get_asciiparam(req, "algo"); 1710206497Sluigi if (name == NULL) { 1711206497Sluigi gctl_error(req, "No '%s' argument", "algo"); 1712206497Sluigi return; 1713206497Sluigi } 1714206497Sluigi 1715206497Sluigi gsp = g_gsched_find(name); /* also get a reference */ 1716206497Sluigi if (gsp == NULL) { 1717206497Sluigi gctl_error(req, "Bad algorithm '%s'", name); 1718206497Sluigi return; 1719206497Sluigi } 1720206497Sluigi 1721206497Sluigi nargs = g_sched_get_nargs(req); 1722206497Sluigi 1723206497Sluigi /* 1724206497Sluigi * Run on the arguments, and break on any error. 1725206497Sluigi * We look for a device name, but skip the /dev/ prefix if any. 1726206497Sluigi */ 1727206497Sluigi for (i = 0; i < nargs; i++) { 1728206497Sluigi name = g_sched_argi(req, i); 1729206497Sluigi if (name == NULL) 1730206497Sluigi break; 1731206497Sluigi pp = g_provider_by_name(name); 1732206497Sluigi if (pp == NULL) { 1733206497Sluigi G_SCHED_DEBUG(1, "Provider %s is invalid.", name); 1734206497Sluigi gctl_error(req, "Provider %s is invalid.", name); 1735206497Sluigi break; 1736206497Sluigi } 1737206497Sluigi if (g_sched_create(req, mp, pp, gsp, proxy) != 0) 1738206497Sluigi break; 1739206497Sluigi } 1740206497Sluigi 1741206497Sluigi g_gsched_unref(gsp); 1742206497Sluigi} 1743206497Sluigi 1744206497Sluigistatic void 1745206497Sluigig_sched_ctl_configure(struct gctl_req *req, struct g_class *mp) 1746206497Sluigi{ 1747206497Sluigi struct g_provider *pp; 1748206497Sluigi struct g_gsched *gsp; 1749206497Sluigi const char *name; 1750206497Sluigi int i, nargs; 1751206497Sluigi 1752206497Sluigi g_topology_assert(); 1753206497Sluigi 1754206497Sluigi name = gctl_get_asciiparam(req, "algo"); 1755206497Sluigi if (name == NULL) { 1756206497Sluigi gctl_error(req, "No '%s' argument", "algo"); 1757206497Sluigi return; 1758206497Sluigi } 1759206497Sluigi 1760206497Sluigi gsp = g_gsched_find(name); /* also get a reference */ 1761206497Sluigi if (gsp == NULL) { 1762206497Sluigi gctl_error(req, "Bad algorithm '%s'", name); 1763206497Sluigi return; 1764206497Sluigi } 1765206497Sluigi 1766206497Sluigi nargs = g_sched_get_nargs(req); 1767206497Sluigi 1768206497Sluigi /* 1769206497Sluigi * Run on the arguments, and break on any error. 1770206497Sluigi * We look for a device name, but skip the /dev/ prefix if any. 1771206497Sluigi */ 1772206497Sluigi for (i = 0; i < nargs; i++) { 1773206497Sluigi name = g_sched_argi(req, i); 1774206497Sluigi if (name == NULL) 1775206497Sluigi break; 1776206497Sluigi pp = g_provider_by_name(name); 1777206497Sluigi if (pp == NULL || pp->geom->class != mp) { 1778206497Sluigi G_SCHED_DEBUG(1, "Provider %s is invalid.", name); 1779206497Sluigi gctl_error(req, "Provider %s is invalid.", name); 1780206497Sluigi break; 1781206497Sluigi } 1782206497Sluigi if (g_sched_change_algo(req, mp, pp, gsp) != 0) 1783206497Sluigi break; 1784206497Sluigi } 1785206497Sluigi 1786206497Sluigi g_gsched_unref(gsp); 1787206497Sluigi} 1788206497Sluigi 1789206497Sluigistatic struct g_geom * 1790206497Sluigig_sched_find_geom(struct g_class *mp, const char *name) 1791206497Sluigi{ 1792206497Sluigi struct g_geom *gp; 1793206497Sluigi 1794206497Sluigi LIST_FOREACH(gp, &mp->geom, geom) { 1795206497Sluigi if (strcmp(gp->name, name) == 0) 1796206497Sluigi return (gp); 1797206497Sluigi } 1798206497Sluigi return (NULL); 1799206497Sluigi} 1800206497Sluigi 1801206497Sluigistatic void 1802206497Sluigig_sched_ctl_destroy(struct gctl_req *req, struct g_class *mp) 1803206497Sluigi{ 1804206497Sluigi int nargs, *force, error, i; 1805206497Sluigi struct g_geom *gp; 1806206497Sluigi const char *name; 1807206497Sluigi 1808206497Sluigi g_topology_assert(); 1809206497Sluigi 1810206497Sluigi nargs = g_sched_get_nargs(req); 1811206497Sluigi 1812206497Sluigi force = gctl_get_paraml(req, "force", sizeof(*force)); 1813206497Sluigi if (force == NULL) { 1814206497Sluigi gctl_error(req, "No 'force' argument"); 1815206497Sluigi return; 1816206497Sluigi } 1817206497Sluigi 1818206497Sluigi for (i = 0; i < nargs; i++) { 1819206497Sluigi name = g_sched_argi(req, i); 1820206497Sluigi if (name == NULL) 1821206497Sluigi break; 1822206497Sluigi 1823206497Sluigi gp = g_sched_find_geom(mp, name); 1824206497Sluigi if (gp == NULL) { 1825206497Sluigi G_SCHED_DEBUG(1, "Device %s is invalid.", name); 1826206497Sluigi gctl_error(req, "Device %s is invalid.", name); 1827206497Sluigi break; 1828206497Sluigi } 1829206497Sluigi 1830206497Sluigi error = g_sched_destroy(gp, *force); 1831206497Sluigi if (error != 0) { 1832206497Sluigi gctl_error(req, "Cannot destroy device %s (error=%d).", 1833206497Sluigi gp->name, error); 1834206497Sluigi break; 1835206497Sluigi } 1836206497Sluigi } 1837206497Sluigi} 1838206497Sluigi 1839206497Sluigistatic void 1840206497Sluigig_sched_config(struct gctl_req *req, struct g_class *mp, const char *verb) 1841206497Sluigi{ 1842206497Sluigi uint32_t *version; 1843206497Sluigi 1844206497Sluigi g_topology_assert(); 1845206497Sluigi 1846206497Sluigi version = gctl_get_paraml(req, "version", sizeof(*version)); 1847206497Sluigi if (version == NULL) { 1848206497Sluigi gctl_error(req, "No '%s' argument.", "version"); 1849206497Sluigi return; 1850206497Sluigi } 1851206497Sluigi 1852206497Sluigi if (*version != G_SCHED_VERSION) { 1853206497Sluigi gctl_error(req, "Userland and kernel parts are " 1854206497Sluigi "out of sync."); 1855206497Sluigi return; 1856206497Sluigi } 1857206497Sluigi 1858206497Sluigi if (strcmp(verb, "create") == 0) { 1859206497Sluigi g_sched_ctl_create(req, mp, 0); 1860206497Sluigi return; 1861206497Sluigi } else if (strcmp(verb, "insert") == 0) { 1862206497Sluigi g_sched_ctl_create(req, mp, 1); 1863206497Sluigi return; 1864206497Sluigi } else if (strcmp(verb, "configure") == 0) { 1865206497Sluigi g_sched_ctl_configure(req, mp); 1866206497Sluigi return; 1867206497Sluigi } else if (strcmp(verb, "destroy") == 0) { 1868206497Sluigi g_sched_ctl_destroy(req, mp); 1869206497Sluigi return; 1870206497Sluigi } 1871206497Sluigi 1872206497Sluigi gctl_error(req, "Unknown verb."); 1873206497Sluigi} 1874206497Sluigi 1875206497Sluigistatic void 1876206497Sluigig_sched_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, 1877206497Sluigi struct g_consumer *cp, struct g_provider *pp) 1878206497Sluigi{ 1879206497Sluigi struct g_sched_softc *sc = gp->softc; 1880206497Sluigi struct g_gsched *gsp = sc->sc_gsched; 1881206497Sluigi if (indent == NULL) { /* plaintext */ 1882206497Sluigi sbuf_printf(sb, " algo %s", gsp ? gsp->gs_name : "--"); 1883206497Sluigi } 1884210795Sae if (gsp != NULL && gsp->gs_dumpconf) 1885206497Sluigi gsp->gs_dumpconf(sb, indent, gp, cp, pp); 1886206497Sluigi} 1887206497Sluigi 1888206497SluigiDECLARE_GEOM_CLASS(g_sched_class, g_sched); 1889206497SluigiMODULE_VERSION(geom_sched, 0); 1890