1/* $OpenBSD: mpath.c,v 1.58 2024/05/13 01:15:53 jsg Exp $ */ 2 3/* 4 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19#include <sys/param.h> 20#include <sys/systm.h> 21#include <sys/kernel.h> 22#include <sys/malloc.h> 23#include <sys/device.h> 24#include <sys/conf.h> 25#include <sys/queue.h> 26#include <sys/rwlock.h> 27#include <sys/ioctl.h> 28 29#include <scsi/scsi_all.h> 30#include <scsi/scsiconf.h> 31#include <scsi/mpathvar.h> 32 33#define MPATH_BUSWIDTH 256 34 35int mpath_match(struct device *, void *, void *); 36void mpath_attach(struct device *, struct device *, void *); 37 38TAILQ_HEAD(mpath_paths, mpath_path); 39 40struct mpath_group { 41 TAILQ_ENTRY(mpath_group) g_entry; 42 struct mpath_paths g_paths; 43 struct mpath_dev *g_dev; 44 u_int g_id; 45}; 46TAILQ_HEAD(mpath_groups, mpath_group); 47 48struct mpath_dev { 49 struct mutex d_mtx; 50 51 struct scsi_xfer_list d_xfers; 52 struct mpath_path *d_next_path; 53 54 struct mpath_groups d_groups; 55 56 struct mpath_group *d_failover_iter; 57 struct timeout d_failover_tmo; 58 u_int d_failover; 59 60 const struct mpath_ops *d_ops; 61 struct devid *d_id; 62}; 63 64struct mpath_softc { 65 struct device sc_dev; 66 struct scsibus_softc *sc_scsibus; 67 struct mpath_dev *sc_devs[MPATH_BUSWIDTH]; 68}; 69#define DEVNAME(_s) ((_s)->sc_dev.dv_xname) 70 71struct mpath_softc *mpath; 72 73const struct cfattach mpath_ca = { 74 sizeof(struct mpath_softc), 75 mpath_match, 76 mpath_attach 77}; 78 79struct cfdriver mpath_cd = { 80 NULL, 81 "mpath", 82 DV_DULL 83}; 84 85void mpath_cmd(struct scsi_xfer *); 86void mpath_minphys(struct buf *, struct scsi_link *); 87int mpath_probe(struct scsi_link *); 88 89struct mpath_path *mpath_next_path(struct mpath_dev *); 90void mpath_done(struct scsi_xfer *); 91 92void mpath_failover(struct mpath_dev *); 93void mpath_failover_start(void *); 94void mpath_failover_check(struct mpath_dev *); 95 96const struct scsi_adapter mpath_switch = { 97 mpath_cmd, NULL, mpath_probe, NULL, NULL 98}; 99 100void mpath_xs_stuffup(struct scsi_xfer *); 101 102int 103mpath_match(struct device *parent, void *match, void *aux) 104{ 105 return (1); 106} 107 108void 109mpath_attach(struct device *parent, struct device *self, void *aux) 110{ 111 struct mpath_softc *sc = (struct mpath_softc *)self; 112 struct scsibus_attach_args saa; 113 114 mpath = sc; 115 116 printf("\n"); 117 118 saa.saa_adapter = &mpath_switch; 119 saa.saa_adapter_softc = sc; 120 saa.saa_adapter_target = SDEV_NO_ADAPTER_TARGET; 121 saa.saa_adapter_buswidth = MPATH_BUSWIDTH; 122 saa.saa_luns = 1; 123 saa.saa_openings = 1024; /* XXX magical */ 124 saa.saa_pool = NULL; 125 saa.saa_quirks = saa.saa_flags = 0; 126 saa.saa_wwpn = saa.saa_wwnn = 0; 127 128 sc->sc_scsibus = (struct scsibus_softc *)config_found(&sc->sc_dev, 129 &saa, scsiprint); 130} 131 132void 133mpath_xs_stuffup(struct scsi_xfer *xs) 134{ 135 xs->error = XS_DRIVER_STUFFUP; 136 scsi_done(xs); 137} 138 139int 140mpath_probe(struct scsi_link *link) 141{ 142 struct mpath_softc *sc = link->bus->sb_adapter_softc; 143 struct mpath_dev *d = sc->sc_devs[link->target]; 144 145 if (link->lun != 0 || d == NULL) 146 return (ENXIO); 147 148 link->id = devid_copy(d->d_id); 149 150 return (0); 151} 152 153struct mpath_path * 154mpath_next_path(struct mpath_dev *d) 155{ 156 struct mpath_group *g; 157 struct mpath_path *p; 158 159#ifdef DIAGNOSTIC 160 if (d == NULL) 161 panic("%s: d is NULL", __func__); 162#endif /* DIAGNOSTIC */ 163 164 p = d->d_next_path; 165 if (p != NULL) { 166 d->d_next_path = TAILQ_NEXT(p, p_entry); 167 if (d->d_next_path == NULL && 168 (g = TAILQ_FIRST(&d->d_groups)) != NULL) 169 d->d_next_path = TAILQ_FIRST(&g->g_paths); 170 } 171 172 return (p); 173} 174 175void 176mpath_cmd(struct scsi_xfer *xs) 177{ 178 struct scsi_link *link = xs->sc_link; 179 struct mpath_softc *sc = link->bus->sb_adapter_softc; 180 struct mpath_dev *d = sc->sc_devs[link->target]; 181 struct mpath_path *p; 182 struct scsi_xfer *mxs; 183 184#ifdef DIAGNOSTIC 185 if (d == NULL) 186 panic("mpath_cmd issued against nonexistent device"); 187#endif /* DIAGNOSTIC */ 188 189 if (ISSET(xs->flags, SCSI_POLL)) { 190 mtx_enter(&d->d_mtx); 191 p = mpath_next_path(d); 192 mtx_leave(&d->d_mtx); 193 if (p == NULL) { 194 mpath_xs_stuffup(xs); 195 return; 196 } 197 198 mxs = scsi_xs_get(p->p_link, xs->flags); 199 if (mxs == NULL) { 200 mpath_xs_stuffup(xs); 201 return; 202 } 203 204 memcpy(&mxs->cmd, &xs->cmd, xs->cmdlen); 205 mxs->cmdlen = xs->cmdlen; 206 mxs->data = xs->data; 207 mxs->datalen = xs->datalen; 208 mxs->retries = xs->retries; 209 mxs->timeout = xs->timeout; 210 mxs->bp = xs->bp; 211 212 scsi_xs_sync(mxs); 213 214 xs->error = mxs->error; 215 xs->status = mxs->status; 216 xs->resid = mxs->resid; 217 218 memcpy(&xs->sense, &mxs->sense, sizeof(xs->sense)); 219 220 scsi_xs_put(mxs); 221 scsi_done(xs); 222 return; 223 } 224 225 mtx_enter(&d->d_mtx); 226 SIMPLEQ_INSERT_TAIL(&d->d_xfers, xs, xfer_list); 227 p = mpath_next_path(d); 228 mtx_leave(&d->d_mtx); 229 230 if (p != NULL) 231 scsi_xsh_add(&p->p_xsh); 232} 233 234void 235mpath_start(struct mpath_path *p, struct scsi_xfer *mxs) 236{ 237 struct mpath_dev *d = p->p_group->g_dev; 238 struct scsi_xfer *xs; 239 int addxsh = 0; 240 241 if (ISSET(p->p_link->state, SDEV_S_DYING) || d == NULL) 242 goto fail; 243 244 mtx_enter(&d->d_mtx); 245 xs = SIMPLEQ_FIRST(&d->d_xfers); 246 if (xs != NULL) { 247 SIMPLEQ_REMOVE_HEAD(&d->d_xfers, xfer_list); 248 if (!SIMPLEQ_EMPTY(&d->d_xfers)) 249 addxsh = 1; 250 } 251 mtx_leave(&d->d_mtx); 252 253 if (xs == NULL) 254 goto fail; 255 256 memcpy(&mxs->cmd, &xs->cmd, xs->cmdlen); 257 mxs->cmdlen = xs->cmdlen; 258 mxs->data = xs->data; 259 mxs->datalen = xs->datalen; 260 mxs->retries = xs->retries; 261 mxs->timeout = xs->timeout; 262 mxs->bp = xs->bp; 263 mxs->flags = xs->flags; 264 265 mxs->cookie = xs; 266 mxs->done = mpath_done; 267 268 scsi_xs_exec(mxs); 269 270 if (addxsh) 271 scsi_xsh_add(&p->p_xsh); 272 273 return; 274fail: 275 scsi_xs_put(mxs); 276} 277 278void 279mpath_done(struct scsi_xfer *mxs) 280{ 281 struct scsi_xfer *xs = mxs->cookie; 282 struct scsi_link *link = xs->sc_link; 283 struct mpath_softc *sc = link->bus->sb_adapter_softc; 284 struct mpath_dev *d = sc->sc_devs[link->target]; 285 struct mpath_path *p; 286 287 switch (mxs->error) { 288 case XS_SELTIMEOUT: /* physical path is gone, try the next */ 289 case XS_RESET: 290 mtx_enter(&d->d_mtx); 291 SIMPLEQ_INSERT_HEAD(&d->d_xfers, xs, xfer_list); 292 p = mpath_next_path(d); 293 mtx_leave(&d->d_mtx); 294 295 scsi_xs_put(mxs); 296 297 if (p != NULL) 298 scsi_xsh_add(&p->p_xsh); 299 return; 300 case XS_SENSE: 301 switch (d->d_ops->op_checksense(mxs)) { 302 case MPATH_SENSE_FAILOVER: 303 mtx_enter(&d->d_mtx); 304 SIMPLEQ_INSERT_HEAD(&d->d_xfers, xs, xfer_list); 305 p = mpath_next_path(d); 306 mtx_leave(&d->d_mtx); 307 308 scsi_xs_put(mxs); 309 310 mpath_failover(d); 311 return; 312 case MPATH_SENSE_DECLINED: 313 break; 314#ifdef DIAGNOSTIC 315 default: 316 panic("unexpected return from checksense"); 317#endif /* DIAGNOSTIC */ 318 } 319 break; 320 } 321 322 xs->error = mxs->error; 323 xs->status = mxs->status; 324 xs->resid = mxs->resid; 325 326 memcpy(&xs->sense, &mxs->sense, sizeof(xs->sense)); 327 328 scsi_xs_put(mxs); 329 330 scsi_done(xs); 331} 332 333void 334mpath_failover(struct mpath_dev *d) 335{ 336 if (!scsi_pending_start(&d->d_mtx, &d->d_failover)) 337 return; 338 339 mpath_failover_start(d); 340} 341 342void 343mpath_failover_start(void *xd) 344{ 345 struct mpath_dev *d = xd; 346 347 mtx_enter(&d->d_mtx); 348 d->d_failover_iter = TAILQ_FIRST(&d->d_groups); 349 mtx_leave(&d->d_mtx); 350 351 mpath_failover_check(d); 352} 353 354void 355mpath_failover_check(struct mpath_dev *d) 356{ 357 struct mpath_group *g = d->d_failover_iter; 358 struct mpath_path *p; 359 360 if (g == NULL) 361 timeout_add_sec(&d->d_failover_tmo, 1); 362 else { 363 p = TAILQ_FIRST(&g->g_paths); 364 d->d_ops->op_status(p->p_link); 365 } 366} 367 368void 369mpath_path_status(struct mpath_path *p, int status) 370{ 371 struct mpath_group *g = p->p_group; 372 struct mpath_dev *d = g->g_dev; 373 374 mtx_enter(&d->d_mtx); 375 if (status == MPATH_S_ACTIVE) { 376 TAILQ_REMOVE(&d->d_groups, g, g_entry); 377 TAILQ_INSERT_HEAD(&d->d_groups, g, g_entry); 378 d->d_next_path = p; 379 } else 380 d->d_failover_iter = TAILQ_NEXT(d->d_failover_iter, g_entry); 381 mtx_leave(&d->d_mtx); 382 383 if (status == MPATH_S_ACTIVE) { 384 scsi_xsh_add(&p->p_xsh); 385 if (!scsi_pending_finish(&d->d_mtx, &d->d_failover)) 386 mpath_failover_start(d); 387 } else 388 mpath_failover_check(d); 389} 390 391void 392mpath_minphys(struct buf *bp, struct scsi_link *link) 393{ 394 struct mpath_softc *sc = link->bus->sb_adapter_softc; 395 struct mpath_dev *d = sc->sc_devs[link->target]; 396 struct mpath_group *g; 397 struct mpath_path *p; 398 399#ifdef DIAGNOSTIC 400 if (d == NULL) 401 panic("mpath_minphys against nonexistent device"); 402#endif /* DIAGNOSTIC */ 403 404 mtx_enter(&d->d_mtx); 405 TAILQ_FOREACH(g, &d->d_groups, g_entry) { 406 TAILQ_FOREACH(p, &g->g_paths, p_entry) { 407 /* XXX crossing layers with mutex held */ 408 if (p->p_link->bus->sb_adapter->dev_minphys != NULL) 409 p->p_link->bus->sb_adapter->dev_minphys(bp, 410 p->p_link); 411 } 412 } 413 mtx_leave(&d->d_mtx); 414} 415 416int 417mpath_path_probe(struct scsi_link *link) 418{ 419 if (mpath == NULL) 420 return (ENXIO); 421 422 if (link->id == NULL) 423 return (EINVAL); 424 425 if (ISSET(link->flags, SDEV_UMASS)) 426 return (EINVAL); 427 428 if (mpath == link->bus->sb_adapter_softc) 429 return (ENXIO); 430 431 return (0); 432} 433 434int 435mpath_path_attach(struct mpath_path *p, u_int g_id, const struct mpath_ops *ops) 436{ 437 struct mpath_softc *sc = mpath; 438 struct scsi_link *link = p->p_link; 439 struct mpath_dev *d = NULL; 440 struct mpath_group *g; 441 int newdev = 0, addxsh = 0; 442 int target; 443 444#ifdef DIAGNOSTIC 445 if (p->p_link == NULL) 446 panic("mpath_path_attach: NULL link"); 447 if (p->p_group != NULL) 448 panic("mpath_path_attach: group is not NULL"); 449#endif /* DIAGNOSTIC */ 450 451 for (target = 0; target < MPATH_BUSWIDTH; target++) { 452 if ((d = sc->sc_devs[target]) == NULL) 453 continue; 454 455 if (DEVID_CMP(d->d_id, link->id) && d->d_ops == ops) 456 break; 457 458 d = NULL; 459 } 460 461 if (d == NULL) { 462 for (target = 0; target < MPATH_BUSWIDTH; target++) { 463 if (sc->sc_devs[target] == NULL) 464 break; 465 } 466 if (target >= MPATH_BUSWIDTH) 467 return (ENXIO); 468 469 d = malloc(sizeof(*d), M_DEVBUF, M_WAITOK | M_CANFAIL | M_ZERO); 470 if (d == NULL) 471 return (ENOMEM); 472 473 mtx_init(&d->d_mtx, IPL_BIO); 474 TAILQ_INIT(&d->d_groups); 475 SIMPLEQ_INIT(&d->d_xfers); 476 d->d_id = devid_copy(link->id); 477 d->d_ops = ops; 478 479 timeout_set(&d->d_failover_tmo, mpath_failover_start, d); 480 481 sc->sc_devs[target] = d; 482 newdev = 1; 483 } else { 484 /* 485 * instead of carrying identical values in different devid 486 * instances, delete the new one and reference the old one in 487 * the new scsi_link. 488 */ 489 devid_free(link->id); 490 link->id = devid_copy(d->d_id); 491 } 492 493 TAILQ_FOREACH(g, &d->d_groups, g_entry) { 494 if (g->g_id == g_id) 495 break; 496 } 497 498 if (g == NULL) { 499 g = malloc(sizeof(*g), M_DEVBUF, 500 M_WAITOK | M_CANFAIL | M_ZERO); 501 if (g == NULL) { 502 if (newdev) { 503 free(d, M_DEVBUF, sizeof(*d)); 504 sc->sc_devs[target] = NULL; 505 } 506 507 return (ENOMEM); 508 } 509 510 TAILQ_INIT(&g->g_paths); 511 g->g_dev = d; 512 g->g_id = g_id; 513 514 mtx_enter(&d->d_mtx); 515 TAILQ_INSERT_TAIL(&d->d_groups, g, g_entry); 516 mtx_leave(&d->d_mtx); 517 } 518 519 p->p_group = g; 520 521 mtx_enter(&d->d_mtx); 522 TAILQ_INSERT_TAIL(&g->g_paths, p, p_entry); 523 if (!SIMPLEQ_EMPTY(&d->d_xfers)) 524 addxsh = 1; 525 526 if (d->d_next_path == NULL) 527 d->d_next_path = p; 528 mtx_leave(&d->d_mtx); 529 530 if (newdev) 531 scsi_probe_target(mpath->sc_scsibus, target); 532 else if (addxsh) 533 scsi_xsh_add(&p->p_xsh); 534 535 return (0); 536} 537 538int 539mpath_path_detach(struct mpath_path *p) 540{ 541 struct mpath_group *g = p->p_group; 542 struct mpath_dev *d; 543 struct mpath_path *np = NULL; 544 545#ifdef DIAGNOSTIC 546 if (g == NULL) 547 panic("mpath: detaching a path from a nonexistent bus"); 548#endif /* DIAGNOSTIC */ 549 d = g->g_dev; 550 p->p_group = NULL; 551 552 mtx_enter(&d->d_mtx); 553 TAILQ_REMOVE(&g->g_paths, p, p_entry); 554 if (d->d_next_path == p) 555 d->d_next_path = TAILQ_FIRST(&g->g_paths); 556 557 if (TAILQ_EMPTY(&g->g_paths)) 558 TAILQ_REMOVE(&d->d_groups, g, g_entry); 559 else 560 g = NULL; 561 562 if (!SIMPLEQ_EMPTY(&d->d_xfers)) 563 np = d->d_next_path; 564 mtx_leave(&d->d_mtx); 565 566 if (g != NULL) 567 free(g, M_DEVBUF, sizeof(*g)); 568 569 scsi_xsh_del(&p->p_xsh); 570 571 if (np == NULL) 572 mpath_failover(d); 573 else 574 scsi_xsh_add(&np->p_xsh); 575 576 return (0); 577} 578 579struct device * 580mpath_bootdv(struct device *dev) 581{ 582 struct mpath_softc *sc = mpath; 583 struct mpath_dev *d; 584 struct mpath_group *g; 585 struct mpath_path *p; 586 int target; 587 588 if (sc == NULL) 589 return (dev); 590 591 for (target = 0; target < MPATH_BUSWIDTH; target++) { 592 if ((d = sc->sc_devs[target]) == NULL) 593 continue; 594 595 TAILQ_FOREACH(g, &d->d_groups, g_entry) { 596 TAILQ_FOREACH(p, &g->g_paths, p_entry) { 597 if (p->p_link->device_softc == dev) { 598 return (scsi_get_link(mpath->sc_scsibus, 599 target, 0)->device_softc); 600 } 601 } 602 } 603 } 604 605 return (dev); 606} 607