geom_vinum_drive.c revision 152971
1/*- 2 * Copyright (c) 2004, 2005 Lukas Ertl 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD: head/sys/geom/vinum/geom_vinum_drive.c 152971 2005-11-30 22:15:00Z sobomax $"); 29 30#include <sys/param.h> 31#include <sys/bio.h> 32#include <sys/errno.h> 33#include <sys/conf.h> 34#include <sys/kernel.h> 35#include <sys/kthread.h> 36#include <sys/libkern.h> 37#include <sys/lock.h> 38#include <sys/malloc.h> 39#include <sys/module.h> 40#include <sys/mutex.h> 41#include <sys/sbuf.h> 42#include <sys/systm.h> 43#include <sys/time.h> 44 45#include <geom/geom.h> 46#include <geom/vinum/geom_vinum_var.h> 47#include <geom/vinum/geom_vinum.h> 48#include <geom/vinum/geom_vinum_share.h> 49 50static void gv_drive_dead(void *, int); 51static void gv_drive_worker(void *); 52 53void 54gv_config_new_drive(struct gv_drive *d) 55{ 56 struct gv_hdr *vhdr; 57 struct gv_freelist *fl; 58 59 KASSERT(d != NULL, ("config_new_drive: NULL d")); 60 61 vhdr = g_malloc(sizeof(*vhdr), M_WAITOK | M_ZERO); 62 vhdr->magic = GV_MAGIC; 63 vhdr->config_length = GV_CFG_LEN; 64 65 bcopy(hostname, vhdr->label.sysname, GV_HOSTNAME_LEN); 66 strncpy(vhdr->label.name, d->name, GV_MAXDRIVENAME); 67 microtime(&vhdr->label.date_of_birth); 68 69 d->hdr = vhdr; 70 71 LIST_INIT(&d->subdisks); 72 LIST_INIT(&d->freelist); 73 74 fl = g_malloc(sizeof(struct gv_freelist), M_WAITOK | M_ZERO); 75 fl->offset = GV_DATA_START; 76 fl->size = d->avail; 77 LIST_INSERT_HEAD(&d->freelist, fl, freelist); 78 d->freelist_entries = 1; 79 80 TAILQ_INIT(&d->bqueue); 81 mtx_init(&d->bqueue_mtx, "gv_drive", NULL, MTX_DEF); 82 kthread_create(gv_drive_worker, d, NULL, 0, 0, "gv_d %s", d->name); 83 d->flags |= GV_DRIVE_THREAD_ACTIVE; 84} 85 86void 87gv_save_config_all(struct gv_softc *sc) 88{ 89 struct gv_drive *d; 90 91 g_topology_assert(); 92 93 LIST_FOREACH(d, &sc->drives, drive) { 94 if (d->geom == NULL) 95 continue; 96 gv_save_config(NULL, d, sc); 97 } 98} 99 100/* Save the vinum configuration back to disk. */ 101void 102gv_save_config(struct g_consumer *cp, struct gv_drive *d, struct gv_softc *sc) 103{ 104 struct g_geom *gp; 105 struct g_consumer *cp2; 106 struct gv_hdr *vhdr, *hdr; 107 struct sbuf *sb; 108 int error; 109 110 g_topology_assert(); 111 112 KASSERT(d != NULL, ("gv_save_config: null d")); 113 KASSERT(sc != NULL, ("gv_save_config: null sc")); 114 115 /* 116 * We can't save the config on a drive that isn't up, but drives that 117 * were just created aren't officially up yet, so we check a special 118 * flag. 119 */ 120 if ((d->state != GV_DRIVE_UP) && !(d->flags && GV_DRIVE_NEWBORN)) 121 return; 122 123 if (cp == NULL) { 124 gp = d->geom; 125 KASSERT(gp != NULL, ("gv_save_config: null gp")); 126 cp2 = LIST_FIRST(&gp->consumer); 127 KASSERT(cp2 != NULL, ("gv_save_config: null cp2")); 128 } else 129 cp2 = cp; 130 131 vhdr = g_malloc(GV_HDR_LEN, M_WAITOK | M_ZERO); 132 vhdr->magic = GV_MAGIC; 133 vhdr->config_length = GV_CFG_LEN; 134 135 hdr = d->hdr; 136 if (hdr == NULL) { 137 printf("GEOM_VINUM: drive %s has NULL hdr\n", d->name); 138 g_free(vhdr); 139 return; 140 } 141 microtime(&hdr->label.last_update); 142 bcopy(&hdr->label, &vhdr->label, sizeof(struct gv_label)); 143 144 sb = sbuf_new(NULL, NULL, GV_CFG_LEN, SBUF_FIXEDLEN); 145 gv_format_config(sc, sb, 1, NULL); 146 sbuf_finish(sb); 147 148 error = g_access(cp2, 0, 1, 0); 149 if (error) { 150 printf("GEOM_VINUM: g_access failed on drive %s, errno %d\n", 151 d->name, error); 152 sbuf_delete(sb); 153 g_free(vhdr); 154 return; 155 } 156 g_topology_unlock(); 157 158 do { 159 error = g_write_data(cp2, GV_HDR_OFFSET, vhdr, GV_HDR_LEN); 160 if (error) { 161 printf("GEOM_VINUM: writing vhdr failed on drive %s, " 162 "errno %d", d->name, error); 163 break; 164 } 165 166 error = g_write_data(cp2, GV_CFG_OFFSET, sbuf_data(sb), 167 GV_CFG_LEN); 168 if (error) { 169 printf("GEOM_VINUM: writing first config copy failed " 170 "on drive %s, errno %d", d->name, error); 171 break; 172 } 173 174 error = g_write_data(cp2, GV_CFG_OFFSET + GV_CFG_LEN, 175 sbuf_data(sb), GV_CFG_LEN); 176 if (error) 177 printf("GEOM_VINUM: writing second config copy failed " 178 "on drive %s, errno %d", d->name, error); 179 } while (0); 180 181 g_topology_lock(); 182 g_access(cp2, 0, -1, 0); 183 sbuf_delete(sb); 184 g_free(vhdr); 185 186 if (d->geom != NULL) 187 gv_drive_modify(d); 188} 189 190/* This resembles g_slice_access(). */ 191static int 192gv_drive_access(struct g_provider *pp, int dr, int dw, int de) 193{ 194 struct g_geom *gp; 195 struct g_consumer *cp; 196 struct g_provider *pp2; 197 struct gv_drive *d; 198 struct gv_sd *s, *s2; 199 int error; 200 201 gp = pp->geom; 202 cp = LIST_FIRST(&gp->consumer); 203 if (cp == NULL) 204 return (0); 205 206 d = gp->softc; 207 if (d == NULL) 208 return (0); 209 210 s = pp->private; 211 KASSERT(s != NULL, ("gv_drive_access: NULL s")); 212 213 LIST_FOREACH(s2, &d->subdisks, from_drive) { 214 if (s == s2) 215 continue; 216 if (s->drive_offset + s->size <= s2->drive_offset) 217 continue; 218 if (s2->drive_offset + s2->size <= s->drive_offset) 219 continue; 220 221 /* Overlap. */ 222 pp2 = s2->provider; 223 KASSERT(s2 != NULL, ("gv_drive_access: NULL s2")); 224 if ((pp->acw + dw) > 0 && pp2->ace > 0) 225 return (EPERM); 226 if ((pp->ace + de) > 0 && pp2->acw > 0) 227 return (EPERM); 228 } 229 230 error = g_access(cp, dr, dw, de); 231 return (error); 232} 233 234static void 235gv_drive_done(struct bio *bp) 236{ 237 struct gv_drive *d; 238 struct gv_bioq *bq; 239 240 /* Put the BIO on the worker queue again. */ 241 d = bp->bio_from->geom->softc; 242 bp->bio_cflags |= GV_BIO_DONE; 243 bq = g_malloc(sizeof(*bq), M_NOWAIT | M_ZERO); 244 bq->bp = bp; 245 mtx_lock(&d->bqueue_mtx); 246 TAILQ_INSERT_TAIL(&d->bqueue, bq, queue); 247 wakeup(d); 248 mtx_unlock(&d->bqueue_mtx); 249} 250 251 252static void 253gv_drive_start(struct bio *bp) 254{ 255 struct gv_drive *d; 256 struct gv_sd *s; 257 struct gv_bioq *bq; 258 259 switch (bp->bio_cmd) { 260 case BIO_READ: 261 case BIO_WRITE: 262 case BIO_DELETE: 263 break; 264 case BIO_GETATTR: 265 default: 266 g_io_deliver(bp, EOPNOTSUPP); 267 return; 268 } 269 270 s = bp->bio_to->private; 271 if ((s->state == GV_SD_DOWN) || (s->state == GV_SD_STALE)) { 272 g_io_deliver(bp, ENXIO); 273 return; 274 } 275 276 d = bp->bio_to->geom->softc; 277 278 /* 279 * Put the BIO on the worker queue, where the worker thread will pick 280 * it up. 281 */ 282 bq = g_malloc(sizeof(*bq), M_NOWAIT | M_ZERO); 283 bq->bp = bp; 284 mtx_lock(&d->bqueue_mtx); 285 TAILQ_INSERT_TAIL(&d->bqueue, bq, queue); 286 wakeup(d); 287 mtx_unlock(&d->bqueue_mtx); 288 289} 290 291static void 292gv_drive_worker(void *arg) 293{ 294 struct bio *bp, *cbp; 295 struct g_geom *gp; 296 struct g_provider *pp; 297 struct gv_drive *d; 298 struct gv_sd *s; 299 struct gv_bioq *bq, *bq2; 300 int error; 301 302 d = arg; 303 304 mtx_lock(&d->bqueue_mtx); 305 for (;;) { 306 /* We were signaled to exit. */ 307 if (d->flags & GV_DRIVE_THREAD_DIE) 308 break; 309 310 /* Take the first BIO from out queue. */ 311 bq = TAILQ_FIRST(&d->bqueue); 312 if (bq == NULL) { 313 msleep(d, &d->bqueue_mtx, PRIBIO, "-", hz/10); 314 continue; 315 } 316 TAILQ_REMOVE(&d->bqueue, bq, queue); 317 mtx_unlock(&d->bqueue_mtx); 318 319 bp = bq->bp; 320 g_free(bq); 321 pp = bp->bio_to; 322 gp = pp->geom; 323 324 /* Completed request. */ 325 if (bp->bio_cflags & GV_BIO_DONE) { 326 error = bp->bio_error; 327 328 /* Deliver the original request. */ 329 g_std_done(bp); 330 331 /* The request had an error, we need to clean up. */ 332 if (error != 0) { 333 g_topology_lock(); 334 gv_set_drive_state(d, GV_DRIVE_DOWN, 335 GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG); 336 g_topology_unlock(); 337 g_post_event(gv_drive_dead, d, M_WAITOK, d, 338 NULL); 339 } 340 341 /* New request, needs to be sent downwards. */ 342 } else { 343 s = pp->private; 344 345 if ((s->state == GV_SD_DOWN) || 346 (s->state == GV_SD_STALE)) { 347 g_io_deliver(bp, ENXIO); 348 mtx_lock(&d->bqueue_mtx); 349 continue; 350 } 351 if (bp->bio_offset > s->size) { 352 g_io_deliver(bp, EINVAL); 353 mtx_lock(&d->bqueue_mtx); 354 continue; 355 } 356 357 cbp = g_clone_bio(bp); 358 if (cbp == NULL) { 359 g_io_deliver(bp, ENOMEM); 360 mtx_lock(&d->bqueue_mtx); 361 continue; 362 } 363 if (cbp->bio_offset + cbp->bio_length > s->size) 364 cbp->bio_length = s->size - 365 cbp->bio_offset; 366 cbp->bio_done = gv_drive_done; 367 cbp->bio_offset += s->drive_offset; 368 g_io_request(cbp, LIST_FIRST(&gp->consumer)); 369 } 370 371 mtx_lock(&d->bqueue_mtx); 372 } 373 374 TAILQ_FOREACH_SAFE(bq, &d->bqueue, queue, bq2) { 375 TAILQ_REMOVE(&d->bqueue, bq, queue); 376 mtx_unlock(&d->bqueue_mtx); 377 bp = bq->bp; 378 g_free(bq); 379 if (bp->bio_cflags & GV_BIO_DONE) 380 g_std_done(bp); 381 else 382 g_io_deliver(bp, ENXIO); 383 mtx_lock(&d->bqueue_mtx); 384 } 385 mtx_unlock(&d->bqueue_mtx); 386 d->flags |= GV_DRIVE_THREAD_DEAD; 387 388 kthread_exit(ENXIO); 389} 390 391 392static void 393gv_drive_orphan(struct g_consumer *cp) 394{ 395 struct g_geom *gp; 396 struct gv_drive *d; 397 398 g_topology_assert(); 399 gp = cp->geom; 400 g_trace(G_T_TOPOLOGY, "gv_drive_orphan(%s)", gp->name); 401 d = gp->softc; 402 if (d != NULL) { 403 gv_set_drive_state(d, GV_DRIVE_DOWN, 404 GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG); 405 g_post_event(gv_drive_dead, d, M_WAITOK, d, NULL); 406 } else 407 g_wither_geom(gp, ENXIO); 408} 409 410static struct g_geom * 411gv_drive_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) 412{ 413 struct g_geom *gp, *gp2; 414 struct g_consumer *cp; 415 struct gv_drive *d; 416 struct gv_sd *s; 417 struct gv_softc *sc; 418 struct gv_freelist *fl; 419 struct gv_hdr *vhdr; 420 int error; 421 char *buf, errstr[ERRBUFSIZ]; 422 423 vhdr = NULL; 424 d = NULL; 425 426 g_trace(G_T_TOPOLOGY, "gv_drive_taste(%s, %s)", mp->name, pp->name); 427 g_topology_assert(); 428 429 /* Find the VINUM class and its associated geom. */ 430 gp2 = find_vinum_geom(); 431 if (gp2 == NULL) 432 return (NULL); 433 sc = gp2->softc; 434 435 gp = g_new_geomf(mp, "%s.vinumdrive", pp->name); 436 gp->start = gv_drive_start; 437 gp->orphan = gv_drive_orphan; 438 gp->access = gv_drive_access; 439 gp->start = gv_drive_start; 440 441 cp = g_new_consumer(gp); 442 g_attach(cp, pp); 443 error = g_access(cp, 1, 0, 0); 444 if (error) { 445 g_detach(cp); 446 g_destroy_consumer(cp); 447 g_destroy_geom(gp); 448 return (NULL); 449 } 450 451 g_topology_unlock(); 452 453 /* Now check if the provided slice is a valid vinum drive. */ 454 do { 455 vhdr = g_read_data(cp, GV_HDR_OFFSET, pp->sectorsize, NULL); 456 if (vhdr == NULL) 457 break; 458 if (vhdr->magic != GV_MAGIC) { 459 g_free(vhdr); 460 break; 461 } 462 463 /* A valid vinum drive, let's parse the on-disk information. */ 464 buf = g_read_data(cp, GV_CFG_OFFSET, GV_CFG_LEN, NULL); 465 if (buf == NULL) { 466 g_free(vhdr); 467 break; 468 } 469 g_topology_lock(); 470 gv_parse_config(sc, buf, 1); 471 g_free(buf); 472 473 /* 474 * Let's see if this drive is already known in the 475 * configuration. 476 */ 477 d = gv_find_drive(sc, vhdr->label.name); 478 479 /* We already know about this drive. */ 480 if (d != NULL) { 481 /* Check if this drive already has a geom. */ 482 if (d->geom != NULL) { 483 g_topology_unlock(); 484 break; 485 } 486 bcopy(vhdr, d->hdr, sizeof(*vhdr)); 487 488 /* This is a new drive. */ 489 } else { 490 d = g_malloc(sizeof(*d), M_WAITOK | M_ZERO); 491 492 /* Initialize all needed variables. */ 493 d->size = pp->mediasize - GV_DATA_START; 494 d->avail = d->size; 495 d->hdr = vhdr; 496 strncpy(d->name, vhdr->label.name, GV_MAXDRIVENAME); 497 LIST_INIT(&d->subdisks); 498 LIST_INIT(&d->freelist); 499 500 /* We also need a freelist entry. */ 501 fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO); 502 fl->offset = GV_DATA_START; 503 fl->size = d->avail; 504 LIST_INSERT_HEAD(&d->freelist, fl, freelist); 505 d->freelist_entries = 1; 506 507 TAILQ_INIT(&d->bqueue); 508 509 /* Save it into the main configuration. */ 510 LIST_INSERT_HEAD(&sc->drives, d, drive); 511 } 512 513 /* 514 * Create a bio queue mutex and a worker thread, if necessary. 515 */ 516 if (mtx_initialized(&d->bqueue_mtx) == 0) 517 mtx_init(&d->bqueue_mtx, "gv_drive", NULL, MTX_DEF); 518 519 if (!(d->flags & GV_DRIVE_THREAD_ACTIVE)) { 520 kthread_create(gv_drive_worker, d, NULL, 0, 0, 521 "gv_d %s", d->name); 522 d->flags |= GV_DRIVE_THREAD_ACTIVE; 523 } 524 525 g_access(cp, -1, 0, 0); 526 527 gp->softc = d; 528 d->geom = gp; 529 d->vinumconf = sc; 530 strncpy(d->device, pp->name, GV_MAXDRIVENAME); 531 532 /* 533 * Find out which subdisks belong to this drive and crosslink 534 * them. 535 */ 536 LIST_FOREACH(s, &sc->subdisks, sd) { 537 if (!strncmp(s->drive, d->name, GV_MAXDRIVENAME)) 538 /* XXX: errors ignored */ 539 gv_sd_to_drive(sc, d, s, errstr, 540 sizeof(errstr)); 541 } 542 543 /* This drive is now up for sure. */ 544 gv_set_drive_state(d, GV_DRIVE_UP, 0); 545 546 /* 547 * If there are subdisks on this drive, we need to create 548 * providers for them. 549 */ 550 if (d->sdcount) 551 gv_drive_modify(d); 552 553 return (gp); 554 555 } while (0); 556 557 g_topology_lock(); 558 g_access(cp, -1, 0, 0); 559 560 g_detach(cp); 561 g_destroy_consumer(cp); 562 g_destroy_geom(gp); 563 return (NULL); 564} 565 566/* 567 * Modify the providers for the given drive 'd'. It is assumed that the 568 * subdisk list of 'd' is already correctly set up. 569 */ 570void 571gv_drive_modify(struct gv_drive *d) 572{ 573 struct g_geom *gp; 574 struct g_consumer *cp; 575 struct g_provider *pp, *pp2; 576 struct gv_sd *s; 577 578 KASSERT(d != NULL, ("gv_drive_modify: null d")); 579 gp = d->geom; 580 KASSERT(gp != NULL, ("gv_drive_modify: null gp")); 581 cp = LIST_FIRST(&gp->consumer); 582 KASSERT(cp != NULL, ("gv_drive_modify: null cp")); 583 pp = cp->provider; 584 KASSERT(pp != NULL, ("gv_drive_modify: null pp")); 585 586 g_topology_assert(); 587 588 LIST_FOREACH(s, &d->subdisks, from_drive) { 589 /* This subdisk already has a provider. */ 590 if (s->provider != NULL) 591 continue; 592 pp2 = g_new_providerf(gp, "gvinum/sd/%s", s->name); 593 pp2->mediasize = s->size; 594 pp2->sectorsize = pp->sectorsize; 595 g_error_provider(pp2, 0); 596 s->provider = pp2; 597 pp2->private = s; 598 } 599} 600 601static void 602gv_drive_dead(void *arg, int flag) 603{ 604 struct g_geom *gp; 605 struct g_consumer *cp; 606 struct gv_drive *d; 607 struct gv_sd *s; 608 609 g_topology_assert(); 610 KASSERT(arg != NULL, ("gv_drive_dead: NULL arg")); 611 612 if (flag == EV_CANCEL) 613 return; 614 615 d = arg; 616 if (d->state != GV_DRIVE_DOWN) 617 return; 618 619 g_trace(G_T_TOPOLOGY, "gv_drive_dead(%s)", d->name); 620 621 gp = d->geom; 622 if (gp == NULL) 623 return; 624 625 LIST_FOREACH(cp, &gp->consumer, consumer) { 626 if (cp->nstart != cp->nend) { 627 printf("GEOM_VINUM: dead drive '%s' has still " 628 "active requests, can't detach consumer\n", 629 d->name); 630 g_post_event(gv_drive_dead, d, M_WAITOK, d, 631 NULL); 632 return; 633 } 634 if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0) 635 g_access(cp, -cp->acr, -cp->acw, -cp->ace); 636 } 637 638 printf("GEOM_VINUM: lost drive '%s'\n", d->name); 639 d->geom = NULL; 640 LIST_FOREACH(s, &d->subdisks, from_drive) { 641 s->provider = NULL; 642 s->consumer = NULL; 643 } 644 gv_kill_drive_thread(d); 645 gp->softc = NULL; 646 g_wither_geom(gp, ENXIO); 647} 648 649static int 650gv_drive_destroy_geom(struct gctl_req *req, struct g_class *mp, 651 struct g_geom *gp) 652{ 653 struct gv_drive *d; 654 655 g_trace(G_T_TOPOLOGY, "gv_drive_destroy_geom: %s", gp->name); 656 g_topology_assert(); 657 658 d = gp->softc; 659 gv_kill_drive_thread(d); 660 661 g_wither_geom(gp, ENXIO); 662 return (0); 663} 664 665#define VINUMDRIVE_CLASS_NAME "VINUMDRIVE" 666 667static struct g_class g_vinum_drive_class = { 668 .name = VINUMDRIVE_CLASS_NAME, 669 .version = G_VERSION, 670 .taste = gv_drive_taste, 671 .destroy_geom = gv_drive_destroy_geom 672}; 673 674DECLARE_GEOM_CLASS(g_vinum_drive_class, g_vinum_drive); 675