vdev_geom.c revision 339034
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org> 23 * All rights reserved. 24 * 25 * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org> 26 */ 27 28#include <sys/zfs_context.h> 29#include <sys/param.h> 30#include <sys/kernel.h> 31#include <sys/bio.h> 32#include <sys/disk.h> 33#include <sys/spa.h> 34#include <sys/spa_impl.h> 35#include <sys/vdev_impl.h> 36#include <sys/fs/zfs.h> 37#include <sys/zio.h> 38#include <geom/geom.h> 39#include <geom/geom_int.h> 40 41/* 42 * Virtual device vector for GEOM. 43 */ 44 45static g_attrchanged_t vdev_geom_attrchanged; 46struct g_class zfs_vdev_class = { 47 .name = "ZFS::VDEV", 48 .version = G_VERSION, 49 .attrchanged = vdev_geom_attrchanged, 50}; 51 52struct consumer_vdev_elem { 53 SLIST_ENTRY(consumer_vdev_elem) elems; 54 vdev_t *vd; 55}; 56 57SLIST_HEAD(consumer_priv_t, consumer_vdev_elem); 58_Static_assert(sizeof(((struct g_consumer*)NULL)->private) 59 == sizeof(struct consumer_priv_t*), 60 "consumer_priv_t* can't be stored in g_consumer.private"); 61 62DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev); 63 64SYSCTL_DECL(_vfs_zfs_vdev); 65/* Don't send BIO_FLUSH. */ 66static int vdev_geom_bio_flush_disable; 67SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RWTUN, 68 &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH"); 69/* Don't send BIO_DELETE. */ 70static int vdev_geom_bio_delete_disable; 71SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RWTUN, 72 &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE"); 73 74/* Declare local functions */ 75static void vdev_geom_detach(struct g_consumer *cp, boolean_t open_for_read); 76 77/* 78 * Thread local storage used to indicate when a thread is probing geoms 79 * for their guids. If NULL, this thread is not tasting geoms. If non NULL, 80 * it is looking for a replacement for the vdev_t* that is its value. 81 */ 82uint_t zfs_geom_probe_vdev_key; 83 84static void 85vdev_geom_set_rotation_rate(vdev_t *vd, struct g_consumer *cp) 86{ 87 int error; 88 uint16_t rate; 89 90 error = g_getattr("GEOM::rotation_rate", cp, &rate); 91 if (error == 0) 92 vd->vdev_rotation_rate = rate; 93 else 94 vd->vdev_rotation_rate = VDEV_RATE_UNKNOWN; 95} 96 97static void 98vdev_geom_set_physpath(vdev_t *vd, struct g_consumer *cp, 99 boolean_t do_null_update) 100{ 101 boolean_t needs_update = B_FALSE; 102 char *physpath; 103 int error, physpath_len; 104 105 physpath_len = MAXPATHLEN; 106 physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO); 107 error = g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath); 108 if (error == 0) { 109 char *old_physpath; 110 111 /* g_topology lock ensures that vdev has not been closed */ 112 g_topology_assert(); 113 old_physpath = vd->vdev_physpath; 114 vd->vdev_physpath = spa_strdup(physpath); 115 116 if (old_physpath != NULL) { 117 needs_update = (strcmp(old_physpath, 118 vd->vdev_physpath) != 0); 119 spa_strfree(old_physpath); 120 } else 121 needs_update = do_null_update; 122 } 123 g_free(physpath); 124 125 /* 126 * If the physical path changed, update the config. 127 * Only request an update for previously unset physpaths if 128 * requested by the caller. 129 */ 130 if (needs_update) 131 spa_async_request(vd->vdev_spa, SPA_ASYNC_CONFIG_UPDATE); 132 133} 134 135static void 136vdev_geom_attrchanged(struct g_consumer *cp, const char *attr) 137{ 138 char *old_physpath; 139 struct consumer_priv_t *priv; 140 struct consumer_vdev_elem *elem; 141 int error; 142 143 priv = (struct consumer_priv_t*)&cp->private; 144 if (SLIST_EMPTY(priv)) 145 return; 146 147 SLIST_FOREACH(elem, priv, elems) { 148 vdev_t *vd = elem->vd; 149 if (strcmp(attr, "GEOM::rotation_rate") == 0) { 150 vdev_geom_set_rotation_rate(vd, cp); 151 return; 152 } 153 if (strcmp(attr, "GEOM::physpath") == 0) { 154 vdev_geom_set_physpath(vd, cp, /*null_update*/B_TRUE); 155 return; 156 } 157 } 158} 159 160static void 161vdev_geom_orphan(struct g_consumer *cp) 162{ 163 struct consumer_priv_t *priv; 164 struct consumer_vdev_elem *elem; 165 166 g_topology_assert(); 167 168 priv = (struct consumer_priv_t*)&cp->private; 169 if (SLIST_EMPTY(priv)) 170 /* Vdev close in progress. Ignore the event. */ 171 return; 172 173 /* 174 * Orphan callbacks occur from the GEOM event thread. 175 * Concurrent with this call, new I/O requests may be 176 * working their way through GEOM about to find out 177 * (only once executed by the g_down thread) that we've 178 * been orphaned from our disk provider. These I/Os 179 * must be retired before we can detach our consumer. 180 * This is most easily achieved by acquiring the 181 * SPA ZIO configuration lock as a writer, but doing 182 * so with the GEOM topology lock held would cause 183 * a lock order reversal. Instead, rely on the SPA's 184 * async removal support to invoke a close on this 185 * vdev once it is safe to do so. 186 */ 187 SLIST_FOREACH(elem, priv, elems) { 188 vdev_t *vd = elem->vd; 189 190 vd->vdev_remove_wanted = B_TRUE; 191 spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE); 192 } 193} 194 195static struct g_consumer * 196vdev_geom_attach(struct g_provider *pp, vdev_t *vd, boolean_t sanity) 197{ 198 struct g_geom *gp; 199 struct g_consumer *cp; 200 int error; 201 202 g_topology_assert(); 203 204 ZFS_LOG(1, "Attaching to %s.", pp->name); 205 206 if (sanity) { 207 if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize)) { 208 ZFS_LOG(1, "Failing attach of %s. " 209 "Incompatible sectorsize %d\n", 210 pp->name, pp->sectorsize); 211 return (NULL); 212 } else if (pp->mediasize < SPA_MINDEVSIZE) { 213 ZFS_LOG(1, "Failing attach of %s. " 214 "Incompatible mediasize %ju\n", 215 pp->name, pp->mediasize); 216 return (NULL); 217 } 218 } 219 220 /* Do we have geom already? No? Create one. */ 221 LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) { 222 if (gp->flags & G_GEOM_WITHER) 223 continue; 224 if (strcmp(gp->name, "zfs::vdev") != 0) 225 continue; 226 break; 227 } 228 if (gp == NULL) { 229 gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev"); 230 gp->orphan = vdev_geom_orphan; 231 gp->attrchanged = vdev_geom_attrchanged; 232 cp = g_new_consumer(gp); 233 error = g_attach(cp, pp); 234 if (error != 0) { 235 ZFS_LOG(1, "%s(%d): g_attach failed: %d\n", __func__, 236 __LINE__, error); 237 vdev_geom_detach(cp, B_FALSE); 238 return (NULL); 239 } 240 error = g_access(cp, 1, 0, 1); 241 if (error != 0) { 242 ZFS_LOG(1, "%s(%d): g_access failed: %d\n", __func__, 243 __LINE__, error); 244 vdev_geom_detach(cp, B_FALSE); 245 return (NULL); 246 } 247 ZFS_LOG(1, "Created geom and consumer for %s.", pp->name); 248 } else { 249 /* Check if we are already connected to this provider. */ 250 LIST_FOREACH(cp, &gp->consumer, consumer) { 251 if (cp->provider == pp) { 252 ZFS_LOG(1, "Found consumer for %s.", pp->name); 253 break; 254 } 255 } 256 if (cp == NULL) { 257 cp = g_new_consumer(gp); 258 error = g_attach(cp, pp); 259 if (error != 0) { 260 ZFS_LOG(1, "%s(%d): g_attach failed: %d\n", 261 __func__, __LINE__, error); 262 vdev_geom_detach(cp, B_FALSE); 263 return (NULL); 264 } 265 error = g_access(cp, 1, 0, 1); 266 if (error != 0) { 267 ZFS_LOG(1, "%s(%d): g_access failed: %d\n", 268 __func__, __LINE__, error); 269 vdev_geom_detach(cp, B_FALSE); 270 return (NULL); 271 } 272 ZFS_LOG(1, "Created consumer for %s.", pp->name); 273 } else { 274 error = g_access(cp, 1, 0, 1); 275 if (error != 0) { 276 ZFS_LOG(1, "%s(%d): g_access failed: %d\n", 277 __func__, __LINE__, error); 278 return (NULL); 279 } 280 ZFS_LOG(1, "Used existing consumer for %s.", pp->name); 281 } 282 } 283 284 if (vd != NULL) 285 vd->vdev_tsd = cp; 286 287 cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; 288 return (cp); 289} 290 291static void 292vdev_geom_detach(struct g_consumer *cp, boolean_t open_for_read) 293{ 294 struct g_geom *gp; 295 296 g_topology_assert(); 297 298 ZFS_LOG(1, "Detaching from %s.", 299 cp->provider && cp->provider->name ? cp->provider->name : "NULL"); 300 301 gp = cp->geom; 302 if (open_for_read) 303 g_access(cp, -1, 0, -1); 304 /* Destroy consumer on last close. */ 305 if (cp->acr == 0 && cp->ace == 0) { 306 if (cp->acw > 0) 307 g_access(cp, 0, -cp->acw, 0); 308 if (cp->provider != NULL) { 309 ZFS_LOG(1, "Destroying consumer for %s.", 310 cp->provider->name ? cp->provider->name : "NULL"); 311 g_detach(cp); 312 } 313 g_destroy_consumer(cp); 314 } 315 /* Destroy geom if there are no consumers left. */ 316 if (LIST_EMPTY(&gp->consumer)) { 317 ZFS_LOG(1, "Destroyed geom %s.", gp->name); 318 g_wither_geom(gp, ENXIO); 319 } 320} 321 322static void 323vdev_geom_close_locked(vdev_t *vd) 324{ 325 struct g_consumer *cp; 326 struct consumer_priv_t *priv; 327 struct consumer_vdev_elem *elem, *elem_temp; 328 329 g_topology_assert(); 330 331 cp = vd->vdev_tsd; 332 vd->vdev_delayed_close = B_FALSE; 333 if (cp == NULL) 334 return; 335 336 ZFS_LOG(1, "Closing access to %s.", cp->provider->name); 337 KASSERT(cp->private != NULL, ("%s: cp->private is NULL", __func__)); 338 priv = (struct consumer_priv_t*)&cp->private; 339 vd->vdev_tsd = NULL; 340 SLIST_FOREACH_SAFE(elem, priv, elems, elem_temp) { 341 if (elem->vd == vd) { 342 SLIST_REMOVE(priv, elem, consumer_vdev_elem, elems); 343 g_free(elem); 344 } 345 } 346 347 vdev_geom_detach(cp, B_TRUE); 348} 349 350/* 351 * Issue one or more bios to the vdev in parallel 352 * cmds, datas, offsets, errors, and sizes are arrays of length ncmds. Each IO 353 * operation is described by parallel entries from each array. There may be 354 * more bios actually issued than entries in the array 355 */ 356static void 357vdev_geom_io(struct g_consumer *cp, int *cmds, void **datas, off_t *offsets, 358 off_t *sizes, int *errors, int ncmds) 359{ 360 struct bio **bios; 361 u_char *p; 362 off_t off, maxio, s, end; 363 int i, n_bios, j; 364 size_t bios_size; 365 366 maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize); 367 n_bios = 0; 368 369 /* How many bios are required for all commands ? */ 370 for (i = 0; i < ncmds; i++) 371 n_bios += (sizes[i] + maxio - 1) / maxio; 372 373 /* Allocate memory for the bios */ 374 bios_size = n_bios * sizeof(struct bio*); 375 bios = kmem_zalloc(bios_size, KM_SLEEP); 376 377 /* Prepare and issue all of the bios */ 378 for (i = j = 0; i < ncmds; i++) { 379 off = offsets[i]; 380 p = datas[i]; 381 s = sizes[i]; 382 end = off + s; 383 ASSERT((off % cp->provider->sectorsize) == 0); 384 ASSERT((s % cp->provider->sectorsize) == 0); 385 386 for (; off < end; off += maxio, p += maxio, s -= maxio, j++) { 387 bios[j] = g_alloc_bio(); 388 bios[j]->bio_cmd = cmds[i]; 389 bios[j]->bio_done = NULL; 390 bios[j]->bio_offset = off; 391 bios[j]->bio_length = MIN(s, maxio); 392 bios[j]->bio_data = p; 393 g_io_request(bios[j], cp); 394 } 395 } 396 ASSERT(j == n_bios); 397 398 /* Wait for all of the bios to complete, and clean them up */ 399 for (i = j = 0; i < ncmds; i++) { 400 off = offsets[i]; 401 s = sizes[i]; 402 end = off + s; 403 404 for (; off < end; off += maxio, s -= maxio, j++) { 405 errors[i] = biowait(bios[j], "vdev_geom_io") || errors[i]; 406 g_destroy_bio(bios[j]); 407 } 408 } 409 kmem_free(bios, bios_size); 410} 411 412/* 413 * Read the vdev config from a device. Return the number of valid labels that 414 * were found. The vdev config will be returned in config if and only if at 415 * least one valid label was found. 416 */ 417static int 418vdev_geom_read_config(struct g_consumer *cp, nvlist_t **configp) 419{ 420 struct g_provider *pp; 421 nvlist_t *config; 422 vdev_phys_t *vdev_lists[VDEV_LABELS]; 423 char *buf; 424 size_t buflen; 425 uint64_t psize, state, txg; 426 off_t offsets[VDEV_LABELS]; 427 off_t size; 428 off_t sizes[VDEV_LABELS]; 429 int cmds[VDEV_LABELS]; 430 int errors[VDEV_LABELS]; 431 int l, nlabels; 432 433 g_topology_assert_not(); 434 435 pp = cp->provider; 436 ZFS_LOG(1, "Reading config from %s...", pp->name); 437 438 psize = pp->mediasize; 439 psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t)); 440 441 size = sizeof(*vdev_lists[0]) + pp->sectorsize - 442 ((sizeof(*vdev_lists[0]) - 1) % pp->sectorsize) - 1; 443 444 buflen = sizeof(vdev_lists[0]->vp_nvlist); 445 446 /* Create all of the IO requests */ 447 for (l = 0; l < VDEV_LABELS; l++) { 448 cmds[l] = BIO_READ; 449 vdev_lists[l] = kmem_alloc(size, KM_SLEEP); 450 offsets[l] = vdev_label_offset(psize, l, 0) + VDEV_SKIP_SIZE; 451 sizes[l] = size; 452 errors[l] = 0; 453 ASSERT(offsets[l] % pp->sectorsize == 0); 454 } 455 456 /* Issue the IO requests */ 457 vdev_geom_io(cp, cmds, (void**)vdev_lists, offsets, sizes, errors, 458 VDEV_LABELS); 459 460 /* Parse the labels */ 461 config = *configp = NULL; 462 nlabels = 0; 463 for (l = 0; l < VDEV_LABELS; l++) { 464 if (errors[l] != 0) 465 continue; 466 467 buf = vdev_lists[l]->vp_nvlist; 468 469 if (nvlist_unpack(buf, buflen, &config, 0) != 0) 470 continue; 471 472 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, 473 &state) != 0 || state > POOL_STATE_L2CACHE) { 474 nvlist_free(config); 475 continue; 476 } 477 478 if (state != POOL_STATE_SPARE && 479 state != POOL_STATE_L2CACHE && 480 (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, 481 &txg) != 0 || txg == 0)) { 482 nvlist_free(config); 483 continue; 484 } 485 486 if (*configp != NULL) 487 nvlist_free(*configp); 488 *configp = config; 489 490 nlabels++; 491 } 492 493 /* Free the label storage */ 494 for (l = 0; l < VDEV_LABELS; l++) 495 kmem_free(vdev_lists[l], size); 496 497 return (nlabels); 498} 499 500static void 501resize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id) 502{ 503 nvlist_t **new_configs; 504 uint64_t i; 505 506 if (id < *count) 507 return; 508 new_configs = kmem_zalloc((id + 1) * sizeof(nvlist_t *), 509 KM_SLEEP); 510 for (i = 0; i < *count; i++) 511 new_configs[i] = (*configs)[i]; 512 if (*configs != NULL) 513 kmem_free(*configs, *count * sizeof(void *)); 514 *configs = new_configs; 515 *count = id + 1; 516} 517 518static void 519process_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg, 520 const char *name, uint64_t* known_pool_guid) 521{ 522 nvlist_t *vdev_tree; 523 uint64_t pool_guid; 524 uint64_t vdev_guid, known_guid; 525 uint64_t id, txg, known_txg; 526 char *pname; 527 int i; 528 529 if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 || 530 strcmp(pname, name) != 0) 531 goto ignore; 532 533 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0) 534 goto ignore; 535 536 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0) 537 goto ignore; 538 539 if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) 540 goto ignore; 541 542 if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0) 543 goto ignore; 544 545 VERIFY(nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); 546 547 if (*known_pool_guid != 0) { 548 if (pool_guid != *known_pool_guid) 549 goto ignore; 550 } else 551 *known_pool_guid = pool_guid; 552 553 resize_configs(configs, count, id); 554 555 if ((*configs)[id] != NULL) { 556 VERIFY(nvlist_lookup_uint64((*configs)[id], 557 ZPOOL_CONFIG_POOL_TXG, &known_txg) == 0); 558 if (txg <= known_txg) 559 goto ignore; 560 nvlist_free((*configs)[id]); 561 } 562 563 (*configs)[id] = cfg; 564 return; 565 566ignore: 567 nvlist_free(cfg); 568} 569 570int 571vdev_geom_read_pool_label(const char *name, 572 nvlist_t ***configs, uint64_t *count) 573{ 574 struct g_class *mp; 575 struct g_geom *gp; 576 struct g_provider *pp; 577 struct g_consumer *zcp; 578 nvlist_t *vdev_cfg; 579 uint64_t pool_guid; 580 int error, nlabels; 581 582 DROP_GIANT(); 583 g_topology_lock(); 584 585 *configs = NULL; 586 *count = 0; 587 pool_guid = 0; 588 LIST_FOREACH(mp, &g_classes, class) { 589 if (mp == &zfs_vdev_class) 590 continue; 591 LIST_FOREACH(gp, &mp->geom, geom) { 592 if (gp->flags & G_GEOM_WITHER) 593 continue; 594 LIST_FOREACH(pp, &gp->provider, provider) { 595 if (pp->flags & G_PF_WITHER) 596 continue; 597 zcp = vdev_geom_attach(pp, NULL, B_TRUE); 598 if (zcp == NULL) 599 continue; 600 g_topology_unlock(); 601 nlabels = vdev_geom_read_config(zcp, &vdev_cfg); 602 g_topology_lock(); 603 vdev_geom_detach(zcp, B_TRUE); 604 if (nlabels == 0) 605 continue; 606 ZFS_LOG(1, "successfully read vdev config"); 607 608 process_vdev_config(configs, count, 609 vdev_cfg, name, &pool_guid); 610 } 611 } 612 } 613 g_topology_unlock(); 614 PICKUP_GIANT(); 615 616 return (*count > 0 ? 0 : ENOENT); 617} 618 619enum match { 620 NO_MATCH = 0, /* No matching labels found */ 621 TOPGUID_MATCH = 1, /* Labels match top guid, not vdev guid*/ 622 ZERO_MATCH = 1, /* Should never be returned */ 623 ONE_MATCH = 2, /* 1 label matching the vdev_guid */ 624 TWO_MATCH = 3, /* 2 label matching the vdev_guid */ 625 THREE_MATCH = 4, /* 3 label matching the vdev_guid */ 626 FULL_MATCH = 5 /* all labels match the vdev_guid */ 627}; 628 629static enum match 630vdev_attach_ok(vdev_t *vd, struct g_provider *pp) 631{ 632 nvlist_t *config; 633 uint64_t pool_guid, top_guid, vdev_guid; 634 struct g_consumer *cp; 635 int nlabels; 636 637 cp = vdev_geom_attach(pp, NULL, B_TRUE); 638 if (cp == NULL) { 639 ZFS_LOG(1, "Unable to attach tasting instance to %s.", 640 pp->name); 641 return (NO_MATCH); 642 } 643 g_topology_unlock(); 644 nlabels = vdev_geom_read_config(cp, &config); 645 g_topology_lock(); 646 vdev_geom_detach(cp, B_TRUE); 647 if (nlabels == 0) { 648 ZFS_LOG(1, "Unable to read config from %s.", pp->name); 649 return (NO_MATCH); 650 } 651 652 pool_guid = 0; 653 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid); 654 top_guid = 0; 655 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_TOP_GUID, &top_guid); 656 vdev_guid = 0; 657 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid); 658 nvlist_free(config); 659 660 /* 661 * Check that the label's pool guid matches the desired guid. 662 * Inactive spares and L2ARCs do not have any pool guid in the label. 663 */ 664 if (pool_guid != 0 && pool_guid != spa_guid(vd->vdev_spa)) { 665 ZFS_LOG(1, "pool guid mismatch for provider %s: %ju != %ju.", 666 pp->name, 667 (uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)pool_guid); 668 return (NO_MATCH); 669 } 670 671 /* 672 * Check that the label's vdev guid matches the desired guid. 673 * The second condition handles possible race on vdev detach, when 674 * remaining vdev receives GUID of destroyed top level mirror vdev. 675 */ 676 if (vdev_guid == vd->vdev_guid) { 677 ZFS_LOG(1, "guids match for provider %s.", pp->name); 678 return (ZERO_MATCH + nlabels); 679 } else if (top_guid == vd->vdev_guid && vd == vd->vdev_top) { 680 ZFS_LOG(1, "top vdev guid match for provider %s.", pp->name); 681 return (TOPGUID_MATCH); 682 } 683 ZFS_LOG(1, "vdev guid mismatch for provider %s: %ju != %ju.", 684 pp->name, (uintmax_t)vd->vdev_guid, (uintmax_t)vdev_guid); 685 return (NO_MATCH); 686} 687 688static struct g_consumer * 689vdev_geom_attach_by_guids(vdev_t *vd) 690{ 691 struct g_class *mp; 692 struct g_geom *gp; 693 struct g_provider *pp, *best_pp; 694 struct g_consumer *cp; 695 enum match match, best_match; 696 697 g_topology_assert(); 698 699 cp = NULL; 700 best_pp = NULL; 701 best_match = NO_MATCH; 702 LIST_FOREACH(mp, &g_classes, class) { 703 if (mp == &zfs_vdev_class) 704 continue; 705 LIST_FOREACH(gp, &mp->geom, geom) { 706 if (gp->flags & G_GEOM_WITHER) 707 continue; 708 LIST_FOREACH(pp, &gp->provider, provider) { 709 match = vdev_attach_ok(vd, pp); 710 if (match > best_match) { 711 best_match = match; 712 best_pp = pp; 713 } 714 if (match == FULL_MATCH) 715 goto out; 716 } 717 } 718 } 719 720out: 721 if (best_pp) { 722 cp = vdev_geom_attach(best_pp, vd, B_TRUE); 723 if (cp == NULL) { 724 printf("ZFS WARNING: Unable to attach to %s.\n", 725 best_pp->name); 726 } 727 } 728 return (cp); 729} 730 731static struct g_consumer * 732vdev_geom_open_by_guids(vdev_t *vd) 733{ 734 struct g_consumer *cp; 735 char *buf; 736 size_t len; 737 738 g_topology_assert(); 739 740 ZFS_LOG(1, "Searching by guids [%ju:%ju].", 741 (uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)vd->vdev_guid); 742 cp = vdev_geom_attach_by_guids(vd); 743 if (cp != NULL) { 744 len = strlen(cp->provider->name) + strlen("/dev/") + 1; 745 buf = kmem_alloc(len, KM_SLEEP); 746 747 snprintf(buf, len, "/dev/%s", cp->provider->name); 748 spa_strfree(vd->vdev_path); 749 vd->vdev_path = buf; 750 751 ZFS_LOG(1, "Attach by guid [%ju:%ju] succeeded, provider %s.", 752 (uintmax_t)spa_guid(vd->vdev_spa), 753 (uintmax_t)vd->vdev_guid, cp->provider->name); 754 } else { 755 ZFS_LOG(1, "Search by guid [%ju:%ju] failed.", 756 (uintmax_t)spa_guid(vd->vdev_spa), 757 (uintmax_t)vd->vdev_guid); 758 } 759 760 return (cp); 761} 762 763static struct g_consumer * 764vdev_geom_open_by_path(vdev_t *vd, int check_guid) 765{ 766 struct g_provider *pp; 767 struct g_consumer *cp; 768 769 g_topology_assert(); 770 771 cp = NULL; 772 pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1); 773 if (pp != NULL) { 774 ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path); 775 if (!check_guid || vdev_attach_ok(vd, pp) == FULL_MATCH) 776 cp = vdev_geom_attach(pp, vd, B_FALSE); 777 } 778 779 return (cp); 780} 781 782static int 783vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, 784 uint64_t *logical_ashift, uint64_t *physical_ashift) 785{ 786 struct g_provider *pp; 787 struct g_consumer *cp; 788 size_t bufsize; 789 int error; 790 791 /* Set the TLS to indicate downstack that we should not access zvols*/ 792 VERIFY(tsd_set(zfs_geom_probe_vdev_key, vd) == 0); 793 794 /* 795 * We must have a pathname, and it must be absolute. 796 */ 797 if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { 798 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; 799 return (EINVAL); 800 } 801 802 /* 803 * Reopen the device if it's not currently open. Otherwise, 804 * just update the physical size of the device. 805 */ 806 if ((cp = vd->vdev_tsd) != NULL) { 807 ASSERT(vd->vdev_reopening); 808 goto skip_open; 809 } 810 811 DROP_GIANT(); 812 g_topology_lock(); 813 error = 0; 814 815 if (vd->vdev_spa->spa_splitting_newspa || 816 (vd->vdev_prevstate == VDEV_STATE_UNKNOWN && 817 vd->vdev_spa->spa_load_state == SPA_LOAD_NONE || 818 vd->vdev_spa->spa_load_state == SPA_LOAD_CREATE)) { 819 /* 820 * We are dealing with a vdev that hasn't been previously 821 * opened (since boot), and we are not loading an 822 * existing pool configuration. This looks like a 823 * vdev add operation to a new or existing pool. 824 * Assume the user knows what he/she is doing and find 825 * GEOM provider by its name, ignoring GUID mismatches. 826 * 827 * XXPOLICY: It would be safer to only allow a device 828 * that is unlabeled or labeled but missing 829 * GUID information to be opened in this fashion, 830 * unless we are doing a split, in which case we 831 * should allow any guid. 832 */ 833 cp = vdev_geom_open_by_path(vd, 0); 834 } else { 835 /* 836 * Try using the recorded path for this device, but only 837 * accept it if its label data contains the expected GUIDs. 838 */ 839 cp = vdev_geom_open_by_path(vd, 1); 840 if (cp == NULL) { 841 /* 842 * The device at vd->vdev_path doesn't have the 843 * expected GUIDs. The disks might have merely 844 * moved around so try all other GEOM providers 845 * to find one with the right GUIDs. 846 */ 847 cp = vdev_geom_open_by_guids(vd); 848 } 849 } 850 851 /* Clear the TLS now that tasting is done */ 852 VERIFY(tsd_set(zfs_geom_probe_vdev_key, NULL) == 0); 853 854 if (cp == NULL) { 855 ZFS_LOG(1, "Vdev %s not found.", vd->vdev_path); 856 error = ENOENT; 857 } else { 858 struct consumer_priv_t *priv; 859 struct consumer_vdev_elem *elem; 860 int spamode; 861 862 priv = (struct consumer_priv_t*)&cp->private; 863 if (cp->private == NULL) 864 SLIST_INIT(priv); 865 elem = g_malloc(sizeof(*elem), M_WAITOK|M_ZERO); 866 elem->vd = vd; 867 SLIST_INSERT_HEAD(priv, elem, elems); 868 869 spamode = spa_mode(vd->vdev_spa); 870 if (cp->provider->sectorsize > VDEV_PAD_SIZE || 871 !ISP2(cp->provider->sectorsize)) { 872 ZFS_LOG(1, "Provider %s has unsupported sectorsize.", 873 cp->provider->name); 874 875 vdev_geom_close_locked(vd); 876 error = EINVAL; 877 cp = NULL; 878 } else if (cp->acw == 0 && (spamode & FWRITE) != 0) { 879 int i; 880 881 for (i = 0; i < 5; i++) { 882 error = g_access(cp, 0, 1, 0); 883 if (error == 0) 884 break; 885 g_topology_unlock(); 886 tsleep(vd, 0, "vdev", hz / 2); 887 g_topology_lock(); 888 } 889 if (error != 0) { 890 printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n", 891 cp->provider->name, error); 892 vdev_geom_close_locked(vd); 893 cp = NULL; 894 } 895 } 896 } 897 898 /* Fetch initial physical path information for this device. */ 899 if (cp != NULL) { 900 vdev_geom_attrchanged(cp, "GEOM::physpath"); 901 902 /* Set other GEOM characteristics */ 903 vdev_geom_set_physpath(vd, cp, /*do_null_update*/B_FALSE); 904 vdev_geom_set_rotation_rate(vd, cp); 905 } 906 907 g_topology_unlock(); 908 PICKUP_GIANT(); 909 if (cp == NULL) { 910 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 911 vdev_dbgmsg(vd, "vdev_geom_open: failed to open [error=%d]", 912 error); 913 return (error); 914 } 915skip_open: 916 pp = cp->provider; 917 918 /* 919 * Determine the actual size of the device. 920 */ 921 *max_psize = *psize = pp->mediasize; 922 923 /* 924 * Determine the device's minimum transfer size and preferred 925 * transfer size. 926 */ 927 *logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1; 928 *physical_ashift = 0; 929 if (pp->stripesize > (1 << *logical_ashift) && ISP2(pp->stripesize) && 930 pp->stripesize <= (1 << SPA_MAXASHIFT) && pp->stripeoffset == 0) 931 *physical_ashift = highbit(pp->stripesize) - 1; 932 933 /* 934 * Clear the nowritecache settings, so that on a vdev_reopen() 935 * we will try again. 936 */ 937 vd->vdev_nowritecache = B_FALSE; 938 939 return (0); 940} 941 942static void 943vdev_geom_close(vdev_t *vd) 944{ 945 struct g_consumer *cp; 946 947 cp = vd->vdev_tsd; 948 949 DROP_GIANT(); 950 g_topology_lock(); 951 952 if (!vd->vdev_reopening || 953 (cp != NULL && ((cp->flags & G_CF_ORPHAN) != 0 || 954 (cp->provider != NULL && cp->provider->error != 0)))) 955 vdev_geom_close_locked(vd); 956 957 g_topology_unlock(); 958 PICKUP_GIANT(); 959} 960 961static void 962vdev_geom_io_intr(struct bio *bp) 963{ 964 vdev_t *vd; 965 zio_t *zio; 966 967 zio = bp->bio_caller1; 968 vd = zio->io_vd; 969 zio->io_error = bp->bio_error; 970 if (zio->io_error == 0 && bp->bio_resid != 0) 971 zio->io_error = SET_ERROR(EIO); 972 973 switch(zio->io_error) { 974 case ENOTSUP: 975 /* 976 * If we get ENOTSUP for BIO_FLUSH or BIO_DELETE we know 977 * that future attempts will never succeed. In this case 978 * we set a persistent flag so that we don't bother with 979 * requests in the future. 980 */ 981 switch(bp->bio_cmd) { 982 case BIO_FLUSH: 983 vd->vdev_nowritecache = B_TRUE; 984 break; 985 case BIO_DELETE: 986 vd->vdev_notrim = B_TRUE; 987 break; 988 } 989 break; 990 case ENXIO: 991 if (!vd->vdev_remove_wanted) { 992 /* 993 * If provider's error is set we assume it is being 994 * removed. 995 */ 996 if (bp->bio_to->error != 0) { 997 vd->vdev_remove_wanted = B_TRUE; 998 spa_async_request(zio->io_spa, 999 SPA_ASYNC_REMOVE); 1000 } else if (!vd->vdev_delayed_close) { 1001 vd->vdev_delayed_close = B_TRUE; 1002 } 1003 } 1004 break; 1005 } 1006 1007 /* 1008 * We have to split bio freeing into two parts, because the ABD code 1009 * cannot be called in this context and vdev_op_io_done is not called 1010 * for ZIO_TYPE_IOCTL zio-s. 1011 */ 1012 if (zio->io_type != ZIO_TYPE_READ && zio->io_type != ZIO_TYPE_WRITE) { 1013 g_destroy_bio(bp); 1014 zio->io_bio = NULL; 1015 } 1016 zio_delay_interrupt(zio); 1017} 1018 1019static void 1020vdev_geom_io_start(zio_t *zio) 1021{ 1022 vdev_t *vd; 1023 struct g_consumer *cp; 1024 struct bio *bp; 1025 int error; 1026 1027 vd = zio->io_vd; 1028 1029 switch (zio->io_type) { 1030 case ZIO_TYPE_IOCTL: 1031 /* XXPOLICY */ 1032 if (!vdev_readable(vd)) { 1033 zio->io_error = SET_ERROR(ENXIO); 1034 zio_interrupt(zio); 1035 return; 1036 } else { 1037 switch (zio->io_cmd) { 1038 case DKIOCFLUSHWRITECACHE: 1039 if (zfs_nocacheflush || vdev_geom_bio_flush_disable) 1040 break; 1041 if (vd->vdev_nowritecache) { 1042 zio->io_error = SET_ERROR(ENOTSUP); 1043 break; 1044 } 1045 goto sendreq; 1046 default: 1047 zio->io_error = SET_ERROR(ENOTSUP); 1048 } 1049 } 1050 1051 zio_execute(zio); 1052 return; 1053 case ZIO_TYPE_FREE: 1054 if (vd->vdev_notrim) { 1055 zio->io_error = SET_ERROR(ENOTSUP); 1056 } else if (!vdev_geom_bio_delete_disable) { 1057 goto sendreq; 1058 } 1059 zio_execute(zio); 1060 return; 1061 } 1062sendreq: 1063 ASSERT(zio->io_type == ZIO_TYPE_READ || 1064 zio->io_type == ZIO_TYPE_WRITE || 1065 zio->io_type == ZIO_TYPE_FREE || 1066 zio->io_type == ZIO_TYPE_IOCTL); 1067 1068 cp = vd->vdev_tsd; 1069 if (cp == NULL) { 1070 zio->io_error = SET_ERROR(ENXIO); 1071 zio_interrupt(zio); 1072 return; 1073 } 1074 bp = g_alloc_bio(); 1075 bp->bio_caller1 = zio; 1076 switch (zio->io_type) { 1077 case ZIO_TYPE_READ: 1078 case ZIO_TYPE_WRITE: 1079 zio->io_target_timestamp = zio_handle_io_delay(zio); 1080 bp->bio_offset = zio->io_offset; 1081 bp->bio_length = zio->io_size; 1082 if (zio->io_type == ZIO_TYPE_READ) { 1083 bp->bio_cmd = BIO_READ; 1084 bp->bio_data = 1085 abd_borrow_buf(zio->io_abd, zio->io_size); 1086 } else { 1087 bp->bio_cmd = BIO_WRITE; 1088 bp->bio_data = 1089 abd_borrow_buf_copy(zio->io_abd, zio->io_size); 1090 } 1091 break; 1092 case ZIO_TYPE_FREE: 1093 bp->bio_cmd = BIO_DELETE; 1094 bp->bio_data = NULL; 1095 bp->bio_offset = zio->io_offset; 1096 bp->bio_length = zio->io_size; 1097 break; 1098 case ZIO_TYPE_IOCTL: 1099 bp->bio_cmd = BIO_FLUSH; 1100 bp->bio_flags |= BIO_ORDERED; 1101 bp->bio_data = NULL; 1102 bp->bio_offset = cp->provider->mediasize; 1103 bp->bio_length = 0; 1104 break; 1105 } 1106 bp->bio_done = vdev_geom_io_intr; 1107 zio->io_bio = bp; 1108 1109 g_io_request(bp, cp); 1110} 1111 1112static void 1113vdev_geom_io_done(zio_t *zio) 1114{ 1115 struct bio *bp = zio->io_bio; 1116 1117 if (zio->io_type != ZIO_TYPE_READ && zio->io_type != ZIO_TYPE_WRITE) { 1118 ASSERT(bp == NULL); 1119 return; 1120 } 1121 1122 if (bp == NULL) { 1123 ASSERT3S(zio->io_error, ==, ENXIO); 1124 return; 1125 } 1126 1127 if (zio->io_type == ZIO_TYPE_READ) 1128 abd_return_buf_copy(zio->io_abd, bp->bio_data, zio->io_size); 1129 else 1130 abd_return_buf(zio->io_abd, bp->bio_data, zio->io_size); 1131 1132 g_destroy_bio(bp); 1133 zio->io_bio = NULL; 1134} 1135 1136static void 1137vdev_geom_hold(vdev_t *vd) 1138{ 1139} 1140 1141static void 1142vdev_geom_rele(vdev_t *vd) 1143{ 1144} 1145 1146vdev_ops_t vdev_geom_ops = { 1147 vdev_geom_open, 1148 vdev_geom_close, 1149 vdev_default_asize, 1150 vdev_geom_io_start, 1151 vdev_geom_io_done, 1152 NULL, 1153 NULL, 1154 vdev_geom_hold, 1155 vdev_geom_rele, 1156 NULL, 1157 VDEV_TYPE_DISK, /* name of this vdev type */ 1158 B_TRUE /* leaf vdev */ 1159}; 1160