vdev_geom.c revision 330524
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org> 23 * All rights reserved. 24 * 25 * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org> 26 */ 27 28#include <sys/zfs_context.h> 29#include <sys/param.h> 30#include <sys/kernel.h> 31#include <sys/bio.h> 32#include <sys/disk.h> 33#include <sys/spa.h> 34#include <sys/spa_impl.h> 35#include <sys/vdev_impl.h> 36#include <sys/fs/zfs.h> 37#include <sys/zio.h> 38#include <geom/geom.h> 39#include <geom/geom_int.h> 40 41/* 42 * Virtual device vector for GEOM. 43 */ 44 45static g_attrchanged_t vdev_geom_attrchanged; 46struct g_class zfs_vdev_class = { 47 .name = "ZFS::VDEV", 48 .version = G_VERSION, 49 .attrchanged = vdev_geom_attrchanged, 50}; 51 52struct consumer_vdev_elem { 53 SLIST_ENTRY(consumer_vdev_elem) elems; 54 vdev_t *vd; 55}; 56 57SLIST_HEAD(consumer_priv_t, consumer_vdev_elem); 58_Static_assert(sizeof(((struct g_consumer*)NULL)->private) 59 == sizeof(struct consumer_priv_t*), 60 "consumer_priv_t* can't be stored in g_consumer.private"); 61 62DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev); 63 64SYSCTL_DECL(_vfs_zfs_vdev); 65/* Don't send BIO_FLUSH. */ 66static int vdev_geom_bio_flush_disable = 0; 67TUNABLE_INT("vfs.zfs.vdev.bio_flush_disable", &vdev_geom_bio_flush_disable); 68SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RW, 69 &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH"); 70/* Don't send BIO_DELETE. */ 71static int vdev_geom_bio_delete_disable = 0; 72TUNABLE_INT("vfs.zfs.vdev.bio_delete_disable", &vdev_geom_bio_delete_disable); 73SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RW, 74 &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE"); 75 76/* Declare local functions */ 77static void vdev_geom_detach(struct g_consumer *cp, boolean_t open_for_read); 78 79/* 80 * Thread local storage used to indicate when a thread is probing geoms 81 * for their guids. If NULL, this thread is not tasting geoms. If non NULL, 82 * it is looking for a replacement for the vdev_t* that is its value. 83 */ 84uint_t zfs_geom_probe_vdev_key; 85 86static void 87vdev_geom_set_rotation_rate(vdev_t *vd, struct g_consumer *cp) 88{ 89 int error; 90 uint16_t rate; 91 92 error = g_getattr("GEOM::rotation_rate", cp, &rate); 93 if (error == 0) 94 vd->vdev_rotation_rate = rate; 95 else 96 vd->vdev_rotation_rate = VDEV_RATE_UNKNOWN; 97} 98 99static void 100vdev_geom_set_physpath(vdev_t *vd, struct g_consumer *cp, 101 boolean_t do_null_update) 102{ 103 boolean_t needs_update = B_FALSE; 104 char *physpath; 105 int error, physpath_len; 106 107 physpath_len = MAXPATHLEN; 108 physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO); 109 error = g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath); 110 if (error == 0) { 111 char *old_physpath; 112 113 /* g_topology lock ensures that vdev has not been closed */ 114 g_topology_assert(); 115 old_physpath = vd->vdev_physpath; 116 vd->vdev_physpath = spa_strdup(physpath); 117 118 if (old_physpath != NULL) { 119 needs_update = (strcmp(old_physpath, 120 vd->vdev_physpath) != 0); 121 spa_strfree(old_physpath); 122 } else 123 needs_update = do_null_update; 124 } 125 g_free(physpath); 126 127 /* 128 * If the physical path changed, update the config. 129 * Only request an update for previously unset physpaths if 130 * requested by the caller. 131 */ 132 if (needs_update) 133 spa_async_request(vd->vdev_spa, SPA_ASYNC_CONFIG_UPDATE); 134 135} 136 137static void 138vdev_geom_attrchanged(struct g_consumer *cp, const char *attr) 139{ 140 char *old_physpath; 141 struct consumer_priv_t *priv; 142 struct consumer_vdev_elem *elem; 143 int error; 144 145 priv = (struct consumer_priv_t*)&cp->private; 146 if (SLIST_EMPTY(priv)) 147 return; 148 149 SLIST_FOREACH(elem, priv, elems) { 150 vdev_t *vd = elem->vd; 151 if (strcmp(attr, "GEOM::rotation_rate") == 0) { 152 vdev_geom_set_rotation_rate(vd, cp); 153 return; 154 } 155 if (strcmp(attr, "GEOM::physpath") == 0) { 156 vdev_geom_set_physpath(vd, cp, /*null_update*/B_TRUE); 157 return; 158 } 159 } 160} 161 162static void 163vdev_geom_orphan(struct g_consumer *cp) 164{ 165 struct consumer_priv_t *priv; 166 struct consumer_vdev_elem *elem; 167 168 g_topology_assert(); 169 170 priv = (struct consumer_priv_t*)&cp->private; 171 if (SLIST_EMPTY(priv)) 172 /* Vdev close in progress. Ignore the event. */ 173 return; 174 175 /* 176 * Orphan callbacks occur from the GEOM event thread. 177 * Concurrent with this call, new I/O requests may be 178 * working their way through GEOM about to find out 179 * (only once executed by the g_down thread) that we've 180 * been orphaned from our disk provider. These I/Os 181 * must be retired before we can detach our consumer. 182 * This is most easily achieved by acquiring the 183 * SPA ZIO configuration lock as a writer, but doing 184 * so with the GEOM topology lock held would cause 185 * a lock order reversal. Instead, rely on the SPA's 186 * async removal support to invoke a close on this 187 * vdev once it is safe to do so. 188 */ 189 SLIST_FOREACH(elem, priv, elems) { 190 vdev_t *vd = elem->vd; 191 192 vd->vdev_remove_wanted = B_TRUE; 193 spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE); 194 } 195} 196 197static struct g_consumer * 198vdev_geom_attach(struct g_provider *pp, vdev_t *vd, boolean_t sanity) 199{ 200 struct g_geom *gp; 201 struct g_consumer *cp; 202 int error; 203 204 g_topology_assert(); 205 206 ZFS_LOG(1, "Attaching to %s.", pp->name); 207 208 if (sanity) { 209 if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize)) { 210 ZFS_LOG(1, "Failing attach of %s. " 211 "Incompatible sectorsize %d\n", 212 pp->name, pp->sectorsize); 213 return (NULL); 214 } else if (pp->mediasize < SPA_MINDEVSIZE) { 215 ZFS_LOG(1, "Failing attach of %s. " 216 "Incompatible mediasize %ju\n", 217 pp->name, pp->mediasize); 218 return (NULL); 219 } 220 } 221 222 /* Do we have geom already? No? Create one. */ 223 LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) { 224 if (gp->flags & G_GEOM_WITHER) 225 continue; 226 if (strcmp(gp->name, "zfs::vdev") != 0) 227 continue; 228 break; 229 } 230 if (gp == NULL) { 231 gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev"); 232 gp->orphan = vdev_geom_orphan; 233 gp->attrchanged = vdev_geom_attrchanged; 234 cp = g_new_consumer(gp); 235 error = g_attach(cp, pp); 236 if (error != 0) { 237 ZFS_LOG(1, "%s(%d): g_attach failed: %d\n", __func__, 238 __LINE__, error); 239 vdev_geom_detach(cp, B_FALSE); 240 return (NULL); 241 } 242 error = g_access(cp, 1, 0, 1); 243 if (error != 0) { 244 ZFS_LOG(1, "%s(%d): g_access failed: %d", __func__, 245 __LINE__, error); 246 vdev_geom_detach(cp, B_FALSE); 247 return (NULL); 248 } 249 ZFS_LOG(1, "Created geom and consumer for %s.", pp->name); 250 } else { 251 /* Check if we are already connected to this provider. */ 252 LIST_FOREACH(cp, &gp->consumer, consumer) { 253 if (cp->provider == pp) { 254 ZFS_LOG(1, "Found consumer for %s.", pp->name); 255 break; 256 } 257 } 258 if (cp == NULL) { 259 cp = g_new_consumer(gp); 260 error = g_attach(cp, pp); 261 if (error != 0) { 262 ZFS_LOG(1, "%s(%d): g_attach failed: %d\n", 263 __func__, __LINE__, error); 264 vdev_geom_detach(cp, B_FALSE); 265 return (NULL); 266 } 267 error = g_access(cp, 1, 0, 1); 268 if (error != 0) { 269 ZFS_LOG(1, "%s(%d): g_access failed: %d\n", 270 __func__, __LINE__, error); 271 vdev_geom_detach(cp, B_FALSE); 272 return (NULL); 273 } 274 ZFS_LOG(1, "Created consumer for %s.", pp->name); 275 } else { 276 error = g_access(cp, 1, 0, 1); 277 if (error != 0) { 278 ZFS_LOG(1, "%s(%d): g_access failed: %d\n", 279 __func__, __LINE__, error); 280 return (NULL); 281 } 282 ZFS_LOG(1, "Used existing consumer for %s.", pp->name); 283 } 284 } 285 286 if (vd != NULL) 287 vd->vdev_tsd = cp; 288 289 cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; 290 return (cp); 291} 292 293static void 294vdev_geom_detach(struct g_consumer *cp, boolean_t open_for_read) 295{ 296 struct g_geom *gp; 297 298 g_topology_assert(); 299 300 ZFS_LOG(1, "Detaching from %s.", 301 cp->provider && cp->provider->name ? cp->provider->name : "NULL"); 302 303 gp = cp->geom; 304 if (open_for_read) 305 g_access(cp, -1, 0, -1); 306 /* Destroy consumer on last close. */ 307 if (cp->acr == 0 && cp->ace == 0) { 308 if (cp->acw > 0) 309 g_access(cp, 0, -cp->acw, 0); 310 if (cp->provider != NULL) { 311 ZFS_LOG(1, "Destroying consumer for %s.", 312 cp->provider->name ? cp->provider->name : "NULL"); 313 g_detach(cp); 314 } 315 g_destroy_consumer(cp); 316 } 317 /* Destroy geom if there are no consumers left. */ 318 if (LIST_EMPTY(&gp->consumer)) { 319 ZFS_LOG(1, "Destroyed geom %s.", gp->name); 320 g_wither_geom(gp, ENXIO); 321 } 322} 323 324static void 325vdev_geom_close_locked(vdev_t *vd) 326{ 327 struct g_consumer *cp; 328 struct consumer_priv_t *priv; 329 struct consumer_vdev_elem *elem, *elem_temp; 330 331 g_topology_assert(); 332 333 cp = vd->vdev_tsd; 334 vd->vdev_delayed_close = B_FALSE; 335 if (cp == NULL) 336 return; 337 338 ZFS_LOG(1, "Closing access to %s.", cp->provider->name); 339 KASSERT(cp->private != NULL, ("%s: cp->private is NULL", __func__)); 340 priv = (struct consumer_priv_t*)&cp->private; 341 vd->vdev_tsd = NULL; 342 SLIST_FOREACH_SAFE(elem, priv, elems, elem_temp) { 343 if (elem->vd == vd) { 344 SLIST_REMOVE(priv, elem, consumer_vdev_elem, elems); 345 g_free(elem); 346 } 347 } 348 349 vdev_geom_detach(cp, B_TRUE); 350} 351 352/* 353 * Issue one or more bios to the vdev in parallel 354 * cmds, datas, offsets, errors, and sizes are arrays of length ncmds. Each IO 355 * operation is described by parallel entries from each array. There may be 356 * more bios actually issued than entries in the array 357 */ 358static void 359vdev_geom_io(struct g_consumer *cp, int *cmds, void **datas, off_t *offsets, 360 off_t *sizes, int *errors, int ncmds) 361{ 362 struct bio **bios; 363 u_char *p; 364 off_t off, maxio, s, end; 365 int i, n_bios, j; 366 size_t bios_size; 367 368 maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize); 369 n_bios = 0; 370 371 /* How many bios are required for all commands ? */ 372 for (i = 0; i < ncmds; i++) 373 n_bios += (sizes[i] + maxio - 1) / maxio; 374 375 /* Allocate memory for the bios */ 376 bios_size = n_bios * sizeof(struct bio*); 377 bios = kmem_zalloc(bios_size, KM_SLEEP); 378 379 /* Prepare and issue all of the bios */ 380 for (i = j = 0; i < ncmds; i++) { 381 off = offsets[i]; 382 p = datas[i]; 383 s = sizes[i]; 384 end = off + s; 385 ASSERT((off % cp->provider->sectorsize) == 0); 386 ASSERT((s % cp->provider->sectorsize) == 0); 387 388 for (; off < end; off += maxio, p += maxio, s -= maxio, j++) { 389 bios[j] = g_alloc_bio(); 390 bios[j]->bio_cmd = cmds[i]; 391 bios[j]->bio_done = NULL; 392 bios[j]->bio_offset = off; 393 bios[j]->bio_length = MIN(s, maxio); 394 bios[j]->bio_data = p; 395 g_io_request(bios[j], cp); 396 } 397 } 398 ASSERT(j == n_bios); 399 400 /* Wait for all of the bios to complete, and clean them up */ 401 for (i = j = 0; i < ncmds; i++) { 402 off = offsets[i]; 403 s = sizes[i]; 404 end = off + s; 405 406 for (; off < end; off += maxio, s -= maxio, j++) { 407 errors[i] = biowait(bios[j], "vdev_geom_io") || errors[i]; 408 g_destroy_bio(bios[j]); 409 } 410 } 411 kmem_free(bios, bios_size); 412} 413 414/* 415 * Read the vdev config from a device. Return the number of valid labels that 416 * were found. The vdev config will be returned in config if and only if at 417 * least one valid label was found. 418 */ 419static int 420vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config) 421{ 422 struct g_provider *pp; 423 vdev_phys_t *vdev_lists[VDEV_LABELS]; 424 char *buf; 425 size_t buflen; 426 uint64_t psize, state, txg; 427 off_t offsets[VDEV_LABELS]; 428 off_t size; 429 off_t sizes[VDEV_LABELS]; 430 int cmds[VDEV_LABELS]; 431 int errors[VDEV_LABELS]; 432 int l, nlabels; 433 434 g_topology_assert_not(); 435 436 pp = cp->provider; 437 ZFS_LOG(1, "Reading config from %s...", pp->name); 438 439 psize = pp->mediasize; 440 psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t)); 441 442 size = sizeof(*vdev_lists[0]) + pp->sectorsize - 443 ((sizeof(*vdev_lists[0]) - 1) % pp->sectorsize) - 1; 444 445 buflen = sizeof(vdev_lists[0]->vp_nvlist); 446 447 *config = NULL; 448 /* Create all of the IO requests */ 449 for (l = 0; l < VDEV_LABELS; l++) { 450 cmds[l] = BIO_READ; 451 vdev_lists[l] = kmem_alloc(size, KM_SLEEP); 452 offsets[l] = vdev_label_offset(psize, l, 0) + VDEV_SKIP_SIZE; 453 sizes[l] = size; 454 errors[l] = 0; 455 ASSERT(offsets[l] % pp->sectorsize == 0); 456 } 457 458 /* Issue the IO requests */ 459 vdev_geom_io(cp, cmds, (void**)vdev_lists, offsets, sizes, errors, 460 VDEV_LABELS); 461 462 /* Parse the labels */ 463 nlabels = 0; 464 for (l = 0; l < VDEV_LABELS; l++) { 465 if (errors[l] != 0) 466 continue; 467 468 buf = vdev_lists[l]->vp_nvlist; 469 470 if (nvlist_unpack(buf, buflen, config, 0) != 0) 471 continue; 472 473 if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE, 474 &state) != 0 || state > POOL_STATE_L2CACHE) { 475 nvlist_free(*config); 476 *config = NULL; 477 continue; 478 } 479 480 if (state != POOL_STATE_SPARE && 481 state != POOL_STATE_L2CACHE && 482 (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG, 483 &txg) != 0 || txg == 0)) { 484 nvlist_free(*config); 485 *config = NULL; 486 continue; 487 } 488 489 nlabels++; 490 } 491 492 /* Free the label storage */ 493 for (l = 0; l < VDEV_LABELS; l++) 494 kmem_free(vdev_lists[l], size); 495 496 return (nlabels); 497} 498 499static void 500resize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id) 501{ 502 nvlist_t **new_configs; 503 uint64_t i; 504 505 if (id < *count) 506 return; 507 new_configs = kmem_zalloc((id + 1) * sizeof(nvlist_t *), 508 KM_SLEEP); 509 for (i = 0; i < *count; i++) 510 new_configs[i] = (*configs)[i]; 511 if (*configs != NULL) 512 kmem_free(*configs, *count * sizeof(void *)); 513 *configs = new_configs; 514 *count = id + 1; 515} 516 517static void 518process_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg, 519 const char *name, uint64_t* known_pool_guid) 520{ 521 nvlist_t *vdev_tree; 522 uint64_t pool_guid; 523 uint64_t vdev_guid, known_guid; 524 uint64_t id, txg, known_txg; 525 char *pname; 526 int i; 527 528 if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 || 529 strcmp(pname, name) != 0) 530 goto ignore; 531 532 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0) 533 goto ignore; 534 535 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0) 536 goto ignore; 537 538 if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) 539 goto ignore; 540 541 if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0) 542 goto ignore; 543 544 VERIFY(nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); 545 546 if (*known_pool_guid != 0) { 547 if (pool_guid != *known_pool_guid) 548 goto ignore; 549 } else 550 *known_pool_guid = pool_guid; 551 552 resize_configs(configs, count, id); 553 554 if ((*configs)[id] != NULL) { 555 VERIFY(nvlist_lookup_uint64((*configs)[id], 556 ZPOOL_CONFIG_POOL_TXG, &known_txg) == 0); 557 if (txg <= known_txg) 558 goto ignore; 559 nvlist_free((*configs)[id]); 560 } 561 562 (*configs)[id] = cfg; 563 return; 564 565ignore: 566 nvlist_free(cfg); 567} 568 569int 570vdev_geom_read_pool_label(const char *name, 571 nvlist_t ***configs, uint64_t *count) 572{ 573 struct g_class *mp; 574 struct g_geom *gp; 575 struct g_provider *pp; 576 struct g_consumer *zcp; 577 nvlist_t *vdev_cfg; 578 uint64_t pool_guid; 579 int error, nlabels; 580 581 DROP_GIANT(); 582 g_topology_lock(); 583 584 *configs = NULL; 585 *count = 0; 586 pool_guid = 0; 587 LIST_FOREACH(mp, &g_classes, class) { 588 if (mp == &zfs_vdev_class) 589 continue; 590 LIST_FOREACH(gp, &mp->geom, geom) { 591 if (gp->flags & G_GEOM_WITHER) 592 continue; 593 LIST_FOREACH(pp, &gp->provider, provider) { 594 if (pp->flags & G_PF_WITHER) 595 continue; 596 zcp = vdev_geom_attach(pp, NULL, B_TRUE); 597 if (zcp == NULL) 598 continue; 599 g_topology_unlock(); 600 nlabels = vdev_geom_read_config(zcp, &vdev_cfg); 601 g_topology_lock(); 602 vdev_geom_detach(zcp, B_TRUE); 603 if (nlabels == 0) 604 continue; 605 ZFS_LOG(1, "successfully read vdev config"); 606 607 process_vdev_config(configs, count, 608 vdev_cfg, name, &pool_guid); 609 } 610 } 611 } 612 g_topology_unlock(); 613 PICKUP_GIANT(); 614 615 return (*count > 0 ? 0 : ENOENT); 616} 617 618enum match { 619 NO_MATCH = 0, /* No matching labels found */ 620 TOPGUID_MATCH = 1, /* Labels match top guid, not vdev guid*/ 621 ZERO_MATCH = 1, /* Should never be returned */ 622 ONE_MATCH = 2, /* 1 label matching the vdev_guid */ 623 TWO_MATCH = 3, /* 2 label matching the vdev_guid */ 624 THREE_MATCH = 4, /* 3 label matching the vdev_guid */ 625 FULL_MATCH = 5 /* all labels match the vdev_guid */ 626}; 627 628static enum match 629vdev_attach_ok(vdev_t *vd, struct g_provider *pp) 630{ 631 nvlist_t *config; 632 uint64_t pool_guid, top_guid, vdev_guid; 633 struct g_consumer *cp; 634 int nlabels; 635 636 cp = vdev_geom_attach(pp, NULL, B_TRUE); 637 if (cp == NULL) { 638 ZFS_LOG(1, "Unable to attach tasting instance to %s.", 639 pp->name); 640 return (NO_MATCH); 641 } 642 g_topology_unlock(); 643 nlabels = vdev_geom_read_config(cp, &config); 644 g_topology_lock(); 645 vdev_geom_detach(cp, B_TRUE); 646 if (nlabels == 0) { 647 ZFS_LOG(1, "Unable to read config from %s.", pp->name); 648 return (NO_MATCH); 649 } 650 651 pool_guid = 0; 652 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid); 653 top_guid = 0; 654 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_TOP_GUID, &top_guid); 655 vdev_guid = 0; 656 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid); 657 nvlist_free(config); 658 659 /* 660 * Check that the label's pool guid matches the desired guid. 661 * Inactive spares and L2ARCs do not have any pool guid in the label. 662 */ 663 if (pool_guid != 0 && pool_guid != spa_guid(vd->vdev_spa)) { 664 ZFS_LOG(1, "pool guid mismatch for provider %s: %ju != %ju.", 665 pp->name, 666 (uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)pool_guid); 667 return (NO_MATCH); 668 } 669 670 /* 671 * Check that the label's vdev guid matches the desired guid. 672 * The second condition handles possible race on vdev detach, when 673 * remaining vdev receives GUID of destroyed top level mirror vdev. 674 */ 675 if (vdev_guid == vd->vdev_guid) { 676 ZFS_LOG(1, "guids match for provider %s.", pp->name); 677 return (ZERO_MATCH + nlabels); 678 } else if (top_guid == vd->vdev_guid && vd == vd->vdev_top) { 679 ZFS_LOG(1, "top vdev guid match for provider %s.", pp->name); 680 return (TOPGUID_MATCH); 681 } 682 ZFS_LOG(1, "vdev guid mismatch for provider %s: %ju != %ju.", 683 pp->name, (uintmax_t)vd->vdev_guid, (uintmax_t)vdev_guid); 684 return (NO_MATCH); 685} 686 687static struct g_consumer * 688vdev_geom_attach_by_guids(vdev_t *vd) 689{ 690 struct g_class *mp; 691 struct g_geom *gp; 692 struct g_provider *pp, *best_pp; 693 struct g_consumer *cp; 694 enum match match, best_match; 695 696 g_topology_assert(); 697 698 cp = NULL; 699 best_pp = NULL; 700 best_match = NO_MATCH; 701 LIST_FOREACH(mp, &g_classes, class) { 702 if (mp == &zfs_vdev_class) 703 continue; 704 LIST_FOREACH(gp, &mp->geom, geom) { 705 if (gp->flags & G_GEOM_WITHER) 706 continue; 707 LIST_FOREACH(pp, &gp->provider, provider) { 708 match = vdev_attach_ok(vd, pp); 709 if (match > best_match) { 710 best_match = match; 711 best_pp = pp; 712 } 713 if (match == FULL_MATCH) 714 goto out; 715 } 716 } 717 } 718 719out: 720 if (best_pp) { 721 cp = vdev_geom_attach(best_pp, vd, B_TRUE); 722 if (cp == NULL) { 723 printf("ZFS WARNING: Unable to attach to %s.\n", 724 best_pp->name); 725 } 726 } 727 return (cp); 728} 729 730static struct g_consumer * 731vdev_geom_open_by_guids(vdev_t *vd) 732{ 733 struct g_consumer *cp; 734 char *buf; 735 size_t len; 736 737 g_topology_assert(); 738 739 ZFS_LOG(1, "Searching by guids [%ju:%ju].", 740 (uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)vd->vdev_guid); 741 cp = vdev_geom_attach_by_guids(vd); 742 if (cp != NULL) { 743 len = strlen(cp->provider->name) + strlen("/dev/") + 1; 744 buf = kmem_alloc(len, KM_SLEEP); 745 746 snprintf(buf, len, "/dev/%s", cp->provider->name); 747 spa_strfree(vd->vdev_path); 748 vd->vdev_path = buf; 749 750 ZFS_LOG(1, "Attach by guid [%ju:%ju] succeeded, provider %s.", 751 (uintmax_t)spa_guid(vd->vdev_spa), 752 (uintmax_t)vd->vdev_guid, cp->provider->name); 753 } else { 754 ZFS_LOG(1, "Search by guid [%ju:%ju] failed.", 755 (uintmax_t)spa_guid(vd->vdev_spa), 756 (uintmax_t)vd->vdev_guid); 757 } 758 759 return (cp); 760} 761 762static struct g_consumer * 763vdev_geom_open_by_path(vdev_t *vd, int check_guid) 764{ 765 struct g_provider *pp; 766 struct g_consumer *cp; 767 768 g_topology_assert(); 769 770 cp = NULL; 771 pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1); 772 if (pp != NULL) { 773 ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path); 774 if (!check_guid || vdev_attach_ok(vd, pp) == FULL_MATCH) 775 cp = vdev_geom_attach(pp, vd, B_FALSE); 776 } 777 778 return (cp); 779} 780 781static int 782vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, 783 uint64_t *logical_ashift, uint64_t *physical_ashift) 784{ 785 struct g_provider *pp; 786 struct g_consumer *cp; 787 size_t bufsize; 788 int error; 789 790 /* Set the TLS to indicate downstack that we should not access zvols*/ 791 VERIFY(tsd_set(zfs_geom_probe_vdev_key, vd) == 0); 792 793 /* 794 * We must have a pathname, and it must be absolute. 795 */ 796 if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { 797 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; 798 return (EINVAL); 799 } 800 801 /* 802 * Reopen the device if it's not currently open. Otherwise, 803 * just update the physical size of the device. 804 */ 805 if ((cp = vd->vdev_tsd) != NULL) { 806 ASSERT(vd->vdev_reopening); 807 goto skip_open; 808 } 809 810 DROP_GIANT(); 811 g_topology_lock(); 812 error = 0; 813 814 if (vd->vdev_spa->spa_splitting_newspa || 815 (vd->vdev_prevstate == VDEV_STATE_UNKNOWN && 816 vd->vdev_spa->spa_load_state == SPA_LOAD_NONE || 817 vd->vdev_spa->spa_load_state == SPA_LOAD_CREATE)) { 818 /* 819 * We are dealing with a vdev that hasn't been previously 820 * opened (since boot), and we are not loading an 821 * existing pool configuration. This looks like a 822 * vdev add operation to a new or existing pool. 823 * Assume the user knows what he/she is doing and find 824 * GEOM provider by its name, ignoring GUID mismatches. 825 * 826 * XXPOLICY: It would be safer to only allow a device 827 * that is unlabeled or labeled but missing 828 * GUID information to be opened in this fashion, 829 * unless we are doing a split, in which case we 830 * should allow any guid. 831 */ 832 cp = vdev_geom_open_by_path(vd, 0); 833 } else { 834 /* 835 * Try using the recorded path for this device, but only 836 * accept it if its label data contains the expected GUIDs. 837 */ 838 cp = vdev_geom_open_by_path(vd, 1); 839 if (cp == NULL) { 840 /* 841 * The device at vd->vdev_path doesn't have the 842 * expected GUIDs. The disks might have merely 843 * moved around so try all other GEOM providers 844 * to find one with the right GUIDs. 845 */ 846 cp = vdev_geom_open_by_guids(vd); 847 } 848 } 849 850 /* Clear the TLS now that tasting is done */ 851 VERIFY(tsd_set(zfs_geom_probe_vdev_key, NULL) == 0); 852 853 if (cp == NULL) { 854 ZFS_LOG(1, "Vdev %s not found.", vd->vdev_path); 855 error = ENOENT; 856 } else { 857 struct consumer_priv_t *priv; 858 struct consumer_vdev_elem *elem; 859 int spamode; 860 861 priv = (struct consumer_priv_t*)&cp->private; 862 if (cp->private == NULL) 863 SLIST_INIT(priv); 864 elem = g_malloc(sizeof(*elem), M_WAITOK|M_ZERO); 865 elem->vd = vd; 866 SLIST_INSERT_HEAD(priv, elem, elems); 867 868 spamode = spa_mode(vd->vdev_spa); 869 if (cp->provider->sectorsize > VDEV_PAD_SIZE || 870 !ISP2(cp->provider->sectorsize)) { 871 ZFS_LOG(1, "Provider %s has unsupported sectorsize.", 872 cp->provider->name); 873 874 vdev_geom_close_locked(vd); 875 error = EINVAL; 876 cp = NULL; 877 } else if (cp->acw == 0 && (spamode & FWRITE) != 0) { 878 int i; 879 880 for (i = 0; i < 5; i++) { 881 error = g_access(cp, 0, 1, 0); 882 if (error == 0) 883 break; 884 g_topology_unlock(); 885 tsleep(vd, 0, "vdev", hz / 2); 886 g_topology_lock(); 887 } 888 if (error != 0) { 889 printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n", 890 cp->provider->name, error); 891 vdev_geom_close_locked(vd); 892 cp = NULL; 893 } 894 } 895 } 896 897 /* Fetch initial physical path information for this device. */ 898 if (cp != NULL) { 899 vdev_geom_attrchanged(cp, "GEOM::physpath"); 900 901 /* Set other GEOM characteristics */ 902 vdev_geom_set_physpath(vd, cp, /*do_null_update*/B_FALSE); 903 vdev_geom_set_rotation_rate(vd, cp); 904 } 905 906 g_topology_unlock(); 907 PICKUP_GIANT(); 908 if (cp == NULL) { 909 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 910 return (error); 911 } 912skip_open: 913 pp = cp->provider; 914 915 /* 916 * Determine the actual size of the device. 917 */ 918 *max_psize = *psize = pp->mediasize; 919 920 /* 921 * Determine the device's minimum transfer size and preferred 922 * transfer size. 923 */ 924 *logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1; 925 *physical_ashift = 0; 926 if (pp->stripesize > (1 << *logical_ashift) && ISP2(pp->stripesize) && 927 pp->stripesize <= (1 << SPA_MAXASHIFT) && pp->stripeoffset == 0) 928 *physical_ashift = highbit(pp->stripesize) - 1; 929 930 /* 931 * Clear the nowritecache settings, so that on a vdev_reopen() 932 * we will try again. 933 */ 934 vd->vdev_nowritecache = B_FALSE; 935 936 return (0); 937} 938 939static void 940vdev_geom_close(vdev_t *vd) 941{ 942 struct g_consumer *cp; 943 944 cp = vd->vdev_tsd; 945 946 DROP_GIANT(); 947 g_topology_lock(); 948 949 if (!vd->vdev_reopening || 950 (cp != NULL && ((cp->flags & G_CF_ORPHAN) != 0 || 951 (cp->provider != NULL && cp->provider->error != 0)))) 952 vdev_geom_close_locked(vd); 953 954 g_topology_unlock(); 955 PICKUP_GIANT(); 956} 957 958static void 959vdev_geom_io_intr(struct bio *bp) 960{ 961 vdev_t *vd; 962 zio_t *zio; 963 964 zio = bp->bio_caller1; 965 vd = zio->io_vd; 966 zio->io_error = bp->bio_error; 967 if (zio->io_error == 0 && bp->bio_resid != 0) 968 zio->io_error = SET_ERROR(EIO); 969 970 switch(zio->io_error) { 971 case ENOTSUP: 972 /* 973 * If we get ENOTSUP for BIO_FLUSH or BIO_DELETE we know 974 * that future attempts will never succeed. In this case 975 * we set a persistent flag so that we don't bother with 976 * requests in the future. 977 */ 978 switch(bp->bio_cmd) { 979 case BIO_FLUSH: 980 vd->vdev_nowritecache = B_TRUE; 981 break; 982 case BIO_DELETE: 983 vd->vdev_notrim = B_TRUE; 984 break; 985 } 986 break; 987 case ENXIO: 988 if (!vd->vdev_remove_wanted) { 989 /* 990 * If provider's error is set we assume it is being 991 * removed. 992 */ 993 if (bp->bio_to->error != 0) { 994 vd->vdev_remove_wanted = B_TRUE; 995 spa_async_request(zio->io_spa, 996 SPA_ASYNC_REMOVE); 997 } else if (!vd->vdev_delayed_close) { 998 vd->vdev_delayed_close = B_TRUE; 999 } 1000 } 1001 break; 1002 } 1003 g_destroy_bio(bp); 1004 zio_delay_interrupt(zio); 1005} 1006 1007static void 1008vdev_geom_io_start(zio_t *zio) 1009{ 1010 vdev_t *vd; 1011 struct g_consumer *cp; 1012 struct bio *bp; 1013 int error; 1014 1015 vd = zio->io_vd; 1016 1017 switch (zio->io_type) { 1018 case ZIO_TYPE_IOCTL: 1019 /* XXPOLICY */ 1020 if (!vdev_readable(vd)) { 1021 zio->io_error = SET_ERROR(ENXIO); 1022 zio_interrupt(zio); 1023 return; 1024 } else { 1025 switch (zio->io_cmd) { 1026 case DKIOCFLUSHWRITECACHE: 1027 if (zfs_nocacheflush || vdev_geom_bio_flush_disable) 1028 break; 1029 if (vd->vdev_nowritecache) { 1030 zio->io_error = SET_ERROR(ENOTSUP); 1031 break; 1032 } 1033 goto sendreq; 1034 default: 1035 zio->io_error = SET_ERROR(ENOTSUP); 1036 } 1037 } 1038 1039 zio_execute(zio); 1040 return; 1041 case ZIO_TYPE_FREE: 1042 if (vd->vdev_notrim) { 1043 zio->io_error = SET_ERROR(ENOTSUP); 1044 } else if (!vdev_geom_bio_delete_disable) { 1045 goto sendreq; 1046 } 1047 zio_execute(zio); 1048 return; 1049 } 1050sendreq: 1051 ASSERT(zio->io_type == ZIO_TYPE_READ || 1052 zio->io_type == ZIO_TYPE_WRITE || 1053 zio->io_type == ZIO_TYPE_FREE || 1054 zio->io_type == ZIO_TYPE_IOCTL); 1055 1056 cp = vd->vdev_tsd; 1057 if (cp == NULL) { 1058 zio->io_error = SET_ERROR(ENXIO); 1059 zio_interrupt(zio); 1060 return; 1061 } 1062 bp = g_alloc_bio(); 1063 bp->bio_caller1 = zio; 1064 switch (zio->io_type) { 1065 case ZIO_TYPE_READ: 1066 case ZIO_TYPE_WRITE: 1067 zio->io_target_timestamp = zio_handle_io_delay(zio); 1068 bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE; 1069 bp->bio_data = zio->io_data; 1070 bp->bio_offset = zio->io_offset; 1071 bp->bio_length = zio->io_size; 1072 break; 1073 case ZIO_TYPE_FREE: 1074 bp->bio_cmd = BIO_DELETE; 1075 bp->bio_data = NULL; 1076 bp->bio_offset = zio->io_offset; 1077 bp->bio_length = zio->io_size; 1078 break; 1079 case ZIO_TYPE_IOCTL: 1080 bp->bio_cmd = BIO_FLUSH; 1081 bp->bio_flags |= BIO_ORDERED; 1082 bp->bio_data = NULL; 1083 bp->bio_offset = cp->provider->mediasize; 1084 bp->bio_length = 0; 1085 break; 1086 } 1087 bp->bio_done = vdev_geom_io_intr; 1088 1089 g_io_request(bp, cp); 1090} 1091 1092static void 1093vdev_geom_io_done(zio_t *zio) 1094{ 1095} 1096 1097static void 1098vdev_geom_hold(vdev_t *vd) 1099{ 1100} 1101 1102static void 1103vdev_geom_rele(vdev_t *vd) 1104{ 1105} 1106 1107vdev_ops_t vdev_geom_ops = { 1108 vdev_geom_open, 1109 vdev_geom_close, 1110 vdev_default_asize, 1111 vdev_geom_io_start, 1112 vdev_geom_io_done, 1113 NULL, 1114 vdev_geom_hold, 1115 vdev_geom_rele, 1116 VDEV_TYPE_DISK, /* name of this vdev type */ 1117 B_TRUE /* leaf vdev */ 1118}; 1119