vdev_geom.c revision 338905
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org> 23 * All rights reserved. 24 * 25 * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org> 26 */ 27 28#include <sys/zfs_context.h> 29#include <sys/param.h> 30#include <sys/kernel.h> 31#include <sys/bio.h> 32#include <sys/disk.h> 33#include <sys/spa.h> 34#include <sys/spa_impl.h> 35#include <sys/vdev_impl.h> 36#include <sys/fs/zfs.h> 37#include <sys/zio.h> 38#include <geom/geom.h> 39#include <geom/geom_int.h> 40 41/* 42 * Virtual device vector for GEOM. 43 */ 44 45static g_attrchanged_t vdev_geom_attrchanged; 46struct g_class zfs_vdev_class = { 47 .name = "ZFS::VDEV", 48 .version = G_VERSION, 49 .attrchanged = vdev_geom_attrchanged, 50}; 51 52struct consumer_vdev_elem { 53 SLIST_ENTRY(consumer_vdev_elem) elems; 54 vdev_t *vd; 55}; 56 57SLIST_HEAD(consumer_priv_t, consumer_vdev_elem); 58_Static_assert(sizeof(((struct g_consumer*)NULL)->private) 59 == sizeof(struct consumer_priv_t*), 60 "consumer_priv_t* can't be stored in g_consumer.private"); 61 62DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev); 63 64SYSCTL_DECL(_vfs_zfs_vdev); 65/* Don't send BIO_FLUSH. */ 66static int vdev_geom_bio_flush_disable = 0; 67TUNABLE_INT("vfs.zfs.vdev.bio_flush_disable", &vdev_geom_bio_flush_disable); 68SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RW, 69 &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH"); 70/* Don't send BIO_DELETE. */ 71static int vdev_geom_bio_delete_disable = 0; 72TUNABLE_INT("vfs.zfs.vdev.bio_delete_disable", &vdev_geom_bio_delete_disable); 73SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RW, 74 &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE"); 75 76/* Declare local functions */ 77static void vdev_geom_detach(struct g_consumer *cp, boolean_t open_for_read); 78 79/* 80 * Thread local storage used to indicate when a thread is probing geoms 81 * for their guids. If NULL, this thread is not tasting geoms. If non NULL, 82 * it is looking for a replacement for the vdev_t* that is its value. 83 */ 84uint_t zfs_geom_probe_vdev_key; 85 86static void 87vdev_geom_set_rotation_rate(vdev_t *vd, struct g_consumer *cp) 88{ 89 int error; 90 uint16_t rate; 91 92 error = g_getattr("GEOM::rotation_rate", cp, &rate); 93 if (error == 0) 94 vd->vdev_rotation_rate = rate; 95 else 96 vd->vdev_rotation_rate = VDEV_RATE_UNKNOWN; 97} 98 99static void 100vdev_geom_set_physpath(vdev_t *vd, struct g_consumer *cp, 101 boolean_t do_null_update) 102{ 103 boolean_t needs_update = B_FALSE; 104 char *physpath; 105 int error, physpath_len; 106 107 physpath_len = MAXPATHLEN; 108 physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO); 109 error = g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath); 110 if (error == 0) { 111 char *old_physpath; 112 113 /* g_topology lock ensures that vdev has not been closed */ 114 g_topology_assert(); 115 old_physpath = vd->vdev_physpath; 116 vd->vdev_physpath = spa_strdup(physpath); 117 118 if (old_physpath != NULL) { 119 needs_update = (strcmp(old_physpath, 120 vd->vdev_physpath) != 0); 121 spa_strfree(old_physpath); 122 } else 123 needs_update = do_null_update; 124 } 125 g_free(physpath); 126 127 /* 128 * If the physical path changed, update the config. 129 * Only request an update for previously unset physpaths if 130 * requested by the caller. 131 */ 132 if (needs_update) 133 spa_async_request(vd->vdev_spa, SPA_ASYNC_CONFIG_UPDATE); 134 135} 136 137static void 138vdev_geom_attrchanged(struct g_consumer *cp, const char *attr) 139{ 140 char *old_physpath; 141 struct consumer_priv_t *priv; 142 struct consumer_vdev_elem *elem; 143 int error; 144 145 priv = (struct consumer_priv_t*)&cp->private; 146 if (SLIST_EMPTY(priv)) 147 return; 148 149 SLIST_FOREACH(elem, priv, elems) { 150 vdev_t *vd = elem->vd; 151 if (strcmp(attr, "GEOM::rotation_rate") == 0) { 152 vdev_geom_set_rotation_rate(vd, cp); 153 return; 154 } 155 if (strcmp(attr, "GEOM::physpath") == 0) { 156 vdev_geom_set_physpath(vd, cp, /*null_update*/B_TRUE); 157 return; 158 } 159 } 160} 161 162static void 163vdev_geom_orphan(struct g_consumer *cp) 164{ 165 struct consumer_priv_t *priv; 166 struct consumer_vdev_elem *elem; 167 168 g_topology_assert(); 169 170 priv = (struct consumer_priv_t*)&cp->private; 171 if (SLIST_EMPTY(priv)) 172 /* Vdev close in progress. Ignore the event. */ 173 return; 174 175 /* 176 * Orphan callbacks occur from the GEOM event thread. 177 * Concurrent with this call, new I/O requests may be 178 * working their way through GEOM about to find out 179 * (only once executed by the g_down thread) that we've 180 * been orphaned from our disk provider. These I/Os 181 * must be retired before we can detach our consumer. 182 * This is most easily achieved by acquiring the 183 * SPA ZIO configuration lock as a writer, but doing 184 * so with the GEOM topology lock held would cause 185 * a lock order reversal. Instead, rely on the SPA's 186 * async removal support to invoke a close on this 187 * vdev once it is safe to do so. 188 */ 189 SLIST_FOREACH(elem, priv, elems) { 190 vdev_t *vd = elem->vd; 191 192 vd->vdev_remove_wanted = B_TRUE; 193 spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE); 194 } 195} 196 197static struct g_consumer * 198vdev_geom_attach(struct g_provider *pp, vdev_t *vd, boolean_t sanity) 199{ 200 struct g_geom *gp; 201 struct g_consumer *cp; 202 int error; 203 204 g_topology_assert(); 205 206 ZFS_LOG(1, "Attaching to %s.", pp->name); 207 208 if (sanity) { 209 if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize)) { 210 ZFS_LOG(1, "Failing attach of %s. " 211 "Incompatible sectorsize %d\n", 212 pp->name, pp->sectorsize); 213 return (NULL); 214 } else if (pp->mediasize < SPA_MINDEVSIZE) { 215 ZFS_LOG(1, "Failing attach of %s. " 216 "Incompatible mediasize %ju\n", 217 pp->name, pp->mediasize); 218 return (NULL); 219 } 220 } 221 222 /* Do we have geom already? No? Create one. */ 223 LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) { 224 if (gp->flags & G_GEOM_WITHER) 225 continue; 226 if (strcmp(gp->name, "zfs::vdev") != 0) 227 continue; 228 break; 229 } 230 if (gp == NULL) { 231 gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev"); 232 gp->orphan = vdev_geom_orphan; 233 gp->attrchanged = vdev_geom_attrchanged; 234 cp = g_new_consumer(gp); 235 error = g_attach(cp, pp); 236 if (error != 0) { 237 ZFS_LOG(1, "%s(%d): g_attach failed: %d\n", __func__, 238 __LINE__, error); 239 vdev_geom_detach(cp, B_FALSE); 240 return (NULL); 241 } 242 error = g_access(cp, 1, 0, 1); 243 if (error != 0) { 244 ZFS_LOG(1, "%s(%d): g_access failed: %d", __func__, 245 __LINE__, error); 246 vdev_geom_detach(cp, B_FALSE); 247 return (NULL); 248 } 249 ZFS_LOG(1, "Created geom and consumer for %s.", pp->name); 250 } else { 251 /* Check if we are already connected to this provider. */ 252 LIST_FOREACH(cp, &gp->consumer, consumer) { 253 if (cp->provider == pp) { 254 ZFS_LOG(1, "Found consumer for %s.", pp->name); 255 break; 256 } 257 } 258 if (cp == NULL) { 259 cp = g_new_consumer(gp); 260 error = g_attach(cp, pp); 261 if (error != 0) { 262 ZFS_LOG(1, "%s(%d): g_attach failed: %d\n", 263 __func__, __LINE__, error); 264 vdev_geom_detach(cp, B_FALSE); 265 return (NULL); 266 } 267 error = g_access(cp, 1, 0, 1); 268 if (error != 0) { 269 ZFS_LOG(1, "%s(%d): g_access failed: %d\n", 270 __func__, __LINE__, error); 271 vdev_geom_detach(cp, B_FALSE); 272 return (NULL); 273 } 274 ZFS_LOG(1, "Created consumer for %s.", pp->name); 275 } else { 276 error = g_access(cp, 1, 0, 1); 277 if (error != 0) { 278 ZFS_LOG(1, "%s(%d): g_access failed: %d\n", 279 __func__, __LINE__, error); 280 return (NULL); 281 } 282 ZFS_LOG(1, "Used existing consumer for %s.", pp->name); 283 } 284 } 285 286 if (vd != NULL) 287 vd->vdev_tsd = cp; 288 289 cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; 290 return (cp); 291} 292 293static void 294vdev_geom_detach(struct g_consumer *cp, boolean_t open_for_read) 295{ 296 struct g_geom *gp; 297 298 g_topology_assert(); 299 300 ZFS_LOG(1, "Detaching from %s.", 301 cp->provider && cp->provider->name ? cp->provider->name : "NULL"); 302 303 gp = cp->geom; 304 if (open_for_read) 305 g_access(cp, -1, 0, -1); 306 /* Destroy consumer on last close. */ 307 if (cp->acr == 0 && cp->ace == 0) { 308 if (cp->acw > 0) 309 g_access(cp, 0, -cp->acw, 0); 310 if (cp->provider != NULL) { 311 ZFS_LOG(1, "Destroying consumer for %s.", 312 cp->provider->name ? cp->provider->name : "NULL"); 313 g_detach(cp); 314 } 315 g_destroy_consumer(cp); 316 } 317 /* Destroy geom if there are no consumers left. */ 318 if (LIST_EMPTY(&gp->consumer)) { 319 ZFS_LOG(1, "Destroyed geom %s.", gp->name); 320 g_wither_geom(gp, ENXIO); 321 } 322} 323 324static void 325vdev_geom_close_locked(vdev_t *vd) 326{ 327 struct g_consumer *cp; 328 struct consumer_priv_t *priv; 329 struct consumer_vdev_elem *elem, *elem_temp; 330 331 g_topology_assert(); 332 333 cp = vd->vdev_tsd; 334 vd->vdev_delayed_close = B_FALSE; 335 if (cp == NULL) 336 return; 337 338 ZFS_LOG(1, "Closing access to %s.", cp->provider->name); 339 KASSERT(cp->private != NULL, ("%s: cp->private is NULL", __func__)); 340 priv = (struct consumer_priv_t*)&cp->private; 341 vd->vdev_tsd = NULL; 342 SLIST_FOREACH_SAFE(elem, priv, elems, elem_temp) { 343 if (elem->vd == vd) { 344 SLIST_REMOVE(priv, elem, consumer_vdev_elem, elems); 345 g_free(elem); 346 } 347 } 348 349 vdev_geom_detach(cp, B_TRUE); 350} 351 352/* 353 * Issue one or more bios to the vdev in parallel 354 * cmds, datas, offsets, errors, and sizes are arrays of length ncmds. Each IO 355 * operation is described by parallel entries from each array. There may be 356 * more bios actually issued than entries in the array 357 */ 358static void 359vdev_geom_io(struct g_consumer *cp, int *cmds, void **datas, off_t *offsets, 360 off_t *sizes, int *errors, int ncmds) 361{ 362 struct bio **bios; 363 u_char *p; 364 off_t off, maxio, s, end; 365 int i, n_bios, j; 366 size_t bios_size; 367 368 maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize); 369 n_bios = 0; 370 371 /* How many bios are required for all commands ? */ 372 for (i = 0; i < ncmds; i++) 373 n_bios += (sizes[i] + maxio - 1) / maxio; 374 375 /* Allocate memory for the bios */ 376 bios_size = n_bios * sizeof(struct bio*); 377 bios = kmem_zalloc(bios_size, KM_SLEEP); 378 379 /* Prepare and issue all of the bios */ 380 for (i = j = 0; i < ncmds; i++) { 381 off = offsets[i]; 382 p = datas[i]; 383 s = sizes[i]; 384 end = off + s; 385 ASSERT((off % cp->provider->sectorsize) == 0); 386 ASSERT((s % cp->provider->sectorsize) == 0); 387 388 for (; off < end; off += maxio, p += maxio, s -= maxio, j++) { 389 bios[j] = g_alloc_bio(); 390 bios[j]->bio_cmd = cmds[i]; 391 bios[j]->bio_done = NULL; 392 bios[j]->bio_offset = off; 393 bios[j]->bio_length = MIN(s, maxio); 394 bios[j]->bio_data = p; 395 g_io_request(bios[j], cp); 396 } 397 } 398 ASSERT(j == n_bios); 399 400 /* Wait for all of the bios to complete, and clean them up */ 401 for (i = j = 0; i < ncmds; i++) { 402 off = offsets[i]; 403 s = sizes[i]; 404 end = off + s; 405 406 for (; off < end; off += maxio, s -= maxio, j++) { 407 errors[i] = biowait(bios[j], "vdev_geom_io") || errors[i]; 408 g_destroy_bio(bios[j]); 409 } 410 } 411 kmem_free(bios, bios_size); 412} 413 414/* 415 * Read the vdev config from a device. Return the number of valid labels that 416 * were found. The vdev config will be returned in config if and only if at 417 * least one valid label was found. 418 */ 419static int 420vdev_geom_read_config(struct g_consumer *cp, nvlist_t **configp) 421{ 422 struct g_provider *pp; 423 nvlist_t *config; 424 vdev_phys_t *vdev_lists[VDEV_LABELS]; 425 char *buf; 426 size_t buflen; 427 uint64_t psize, state, txg; 428 off_t offsets[VDEV_LABELS]; 429 off_t size; 430 off_t sizes[VDEV_LABELS]; 431 int cmds[VDEV_LABELS]; 432 int errors[VDEV_LABELS]; 433 int l, nlabels; 434 435 g_topology_assert_not(); 436 437 pp = cp->provider; 438 ZFS_LOG(1, "Reading config from %s...", pp->name); 439 440 psize = pp->mediasize; 441 psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t)); 442 443 size = sizeof(*vdev_lists[0]) + pp->sectorsize - 444 ((sizeof(*vdev_lists[0]) - 1) % pp->sectorsize) - 1; 445 446 buflen = sizeof(vdev_lists[0]->vp_nvlist); 447 448 /* Create all of the IO requests */ 449 for (l = 0; l < VDEV_LABELS; l++) { 450 cmds[l] = BIO_READ; 451 vdev_lists[l] = kmem_alloc(size, KM_SLEEP); 452 offsets[l] = vdev_label_offset(psize, l, 0) + VDEV_SKIP_SIZE; 453 sizes[l] = size; 454 errors[l] = 0; 455 ASSERT(offsets[l] % pp->sectorsize == 0); 456 } 457 458 /* Issue the IO requests */ 459 vdev_geom_io(cp, cmds, (void**)vdev_lists, offsets, sizes, errors, 460 VDEV_LABELS); 461 462 /* Parse the labels */ 463 config = *configp = NULL; 464 nlabels = 0; 465 for (l = 0; l < VDEV_LABELS; l++) { 466 if (errors[l] != 0) 467 continue; 468 469 buf = vdev_lists[l]->vp_nvlist; 470 471 if (nvlist_unpack(buf, buflen, &config, 0) != 0) 472 continue; 473 474 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, 475 &state) != 0 || state > POOL_STATE_L2CACHE) { 476 nvlist_free(config); 477 continue; 478 } 479 480 if (state != POOL_STATE_SPARE && 481 state != POOL_STATE_L2CACHE && 482 (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, 483 &txg) != 0 || txg == 0)) { 484 nvlist_free(config); 485 continue; 486 } 487 488 if (*configp != NULL) 489 nvlist_free(*configp); 490 *configp = config; 491 492 nlabels++; 493 } 494 495 /* Free the label storage */ 496 for (l = 0; l < VDEV_LABELS; l++) 497 kmem_free(vdev_lists[l], size); 498 499 return (nlabels); 500} 501 502static void 503resize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id) 504{ 505 nvlist_t **new_configs; 506 uint64_t i; 507 508 if (id < *count) 509 return; 510 new_configs = kmem_zalloc((id + 1) * sizeof(nvlist_t *), 511 KM_SLEEP); 512 for (i = 0; i < *count; i++) 513 new_configs[i] = (*configs)[i]; 514 if (*configs != NULL) 515 kmem_free(*configs, *count * sizeof(void *)); 516 *configs = new_configs; 517 *count = id + 1; 518} 519 520static void 521process_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg, 522 const char *name, uint64_t* known_pool_guid) 523{ 524 nvlist_t *vdev_tree; 525 uint64_t pool_guid; 526 uint64_t vdev_guid, known_guid; 527 uint64_t id, txg, known_txg; 528 char *pname; 529 int i; 530 531 if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 || 532 strcmp(pname, name) != 0) 533 goto ignore; 534 535 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0) 536 goto ignore; 537 538 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0) 539 goto ignore; 540 541 if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) 542 goto ignore; 543 544 if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0) 545 goto ignore; 546 547 VERIFY(nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); 548 549 if (*known_pool_guid != 0) { 550 if (pool_guid != *known_pool_guid) 551 goto ignore; 552 } else 553 *known_pool_guid = pool_guid; 554 555 resize_configs(configs, count, id); 556 557 if ((*configs)[id] != NULL) { 558 VERIFY(nvlist_lookup_uint64((*configs)[id], 559 ZPOOL_CONFIG_POOL_TXG, &known_txg) == 0); 560 if (txg <= known_txg) 561 goto ignore; 562 nvlist_free((*configs)[id]); 563 } 564 565 (*configs)[id] = cfg; 566 return; 567 568ignore: 569 nvlist_free(cfg); 570} 571 572int 573vdev_geom_read_pool_label(const char *name, 574 nvlist_t ***configs, uint64_t *count) 575{ 576 struct g_class *mp; 577 struct g_geom *gp; 578 struct g_provider *pp; 579 struct g_consumer *zcp; 580 nvlist_t *vdev_cfg; 581 uint64_t pool_guid; 582 int error, nlabels; 583 584 DROP_GIANT(); 585 g_topology_lock(); 586 587 *configs = NULL; 588 *count = 0; 589 pool_guid = 0; 590 LIST_FOREACH(mp, &g_classes, class) { 591 if (mp == &zfs_vdev_class) 592 continue; 593 LIST_FOREACH(gp, &mp->geom, geom) { 594 if (gp->flags & G_GEOM_WITHER) 595 continue; 596 LIST_FOREACH(pp, &gp->provider, provider) { 597 if (pp->flags & G_PF_WITHER) 598 continue; 599 zcp = vdev_geom_attach(pp, NULL, B_TRUE); 600 if (zcp == NULL) 601 continue; 602 g_topology_unlock(); 603 nlabels = vdev_geom_read_config(zcp, &vdev_cfg); 604 g_topology_lock(); 605 vdev_geom_detach(zcp, B_TRUE); 606 if (nlabels == 0) 607 continue; 608 ZFS_LOG(1, "successfully read vdev config"); 609 610 process_vdev_config(configs, count, 611 vdev_cfg, name, &pool_guid); 612 } 613 } 614 } 615 g_topology_unlock(); 616 PICKUP_GIANT(); 617 618 return (*count > 0 ? 0 : ENOENT); 619} 620 621enum match { 622 NO_MATCH = 0, /* No matching labels found */ 623 TOPGUID_MATCH = 1, /* Labels match top guid, not vdev guid*/ 624 ZERO_MATCH = 1, /* Should never be returned */ 625 ONE_MATCH = 2, /* 1 label matching the vdev_guid */ 626 TWO_MATCH = 3, /* 2 label matching the vdev_guid */ 627 THREE_MATCH = 4, /* 3 label matching the vdev_guid */ 628 FULL_MATCH = 5 /* all labels match the vdev_guid */ 629}; 630 631static enum match 632vdev_attach_ok(vdev_t *vd, struct g_provider *pp) 633{ 634 nvlist_t *config; 635 uint64_t pool_guid, top_guid, vdev_guid; 636 struct g_consumer *cp; 637 int nlabels; 638 639 cp = vdev_geom_attach(pp, NULL, B_TRUE); 640 if (cp == NULL) { 641 ZFS_LOG(1, "Unable to attach tasting instance to %s.", 642 pp->name); 643 return (NO_MATCH); 644 } 645 g_topology_unlock(); 646 nlabels = vdev_geom_read_config(cp, &config); 647 g_topology_lock(); 648 vdev_geom_detach(cp, B_TRUE); 649 if (nlabels == 0) { 650 ZFS_LOG(1, "Unable to read config from %s.", pp->name); 651 return (NO_MATCH); 652 } 653 654 pool_guid = 0; 655 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid); 656 top_guid = 0; 657 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_TOP_GUID, &top_guid); 658 vdev_guid = 0; 659 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid); 660 nvlist_free(config); 661 662 /* 663 * Check that the label's pool guid matches the desired guid. 664 * Inactive spares and L2ARCs do not have any pool guid in the label. 665 */ 666 if (pool_guid != 0 && pool_guid != spa_guid(vd->vdev_spa)) { 667 ZFS_LOG(1, "pool guid mismatch for provider %s: %ju != %ju.", 668 pp->name, 669 (uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)pool_guid); 670 return (NO_MATCH); 671 } 672 673 /* 674 * Check that the label's vdev guid matches the desired guid. 675 * The second condition handles possible race on vdev detach, when 676 * remaining vdev receives GUID of destroyed top level mirror vdev. 677 */ 678 if (vdev_guid == vd->vdev_guid) { 679 ZFS_LOG(1, "guids match for provider %s.", pp->name); 680 return (ZERO_MATCH + nlabels); 681 } else if (top_guid == vd->vdev_guid && vd == vd->vdev_top) { 682 ZFS_LOG(1, "top vdev guid match for provider %s.", pp->name); 683 return (TOPGUID_MATCH); 684 } 685 ZFS_LOG(1, "vdev guid mismatch for provider %s: %ju != %ju.", 686 pp->name, (uintmax_t)vd->vdev_guid, (uintmax_t)vdev_guid); 687 return (NO_MATCH); 688} 689 690static struct g_consumer * 691vdev_geom_attach_by_guids(vdev_t *vd) 692{ 693 struct g_class *mp; 694 struct g_geom *gp; 695 struct g_provider *pp, *best_pp; 696 struct g_consumer *cp; 697 enum match match, best_match; 698 699 g_topology_assert(); 700 701 cp = NULL; 702 best_pp = NULL; 703 best_match = NO_MATCH; 704 LIST_FOREACH(mp, &g_classes, class) { 705 if (mp == &zfs_vdev_class) 706 continue; 707 LIST_FOREACH(gp, &mp->geom, geom) { 708 if (gp->flags & G_GEOM_WITHER) 709 continue; 710 LIST_FOREACH(pp, &gp->provider, provider) { 711 match = vdev_attach_ok(vd, pp); 712 if (match > best_match) { 713 best_match = match; 714 best_pp = pp; 715 } 716 if (match == FULL_MATCH) 717 goto out; 718 } 719 } 720 } 721 722out: 723 if (best_pp) { 724 cp = vdev_geom_attach(best_pp, vd, B_TRUE); 725 if (cp == NULL) { 726 printf("ZFS WARNING: Unable to attach to %s.\n", 727 best_pp->name); 728 } 729 } 730 return (cp); 731} 732 733static struct g_consumer * 734vdev_geom_open_by_guids(vdev_t *vd) 735{ 736 struct g_consumer *cp; 737 char *buf; 738 size_t len; 739 740 g_topology_assert(); 741 742 ZFS_LOG(1, "Searching by guids [%ju:%ju].", 743 (uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)vd->vdev_guid); 744 cp = vdev_geom_attach_by_guids(vd); 745 if (cp != NULL) { 746 len = strlen(cp->provider->name) + strlen("/dev/") + 1; 747 buf = kmem_alloc(len, KM_SLEEP); 748 749 snprintf(buf, len, "/dev/%s", cp->provider->name); 750 spa_strfree(vd->vdev_path); 751 vd->vdev_path = buf; 752 753 ZFS_LOG(1, "Attach by guid [%ju:%ju] succeeded, provider %s.", 754 (uintmax_t)spa_guid(vd->vdev_spa), 755 (uintmax_t)vd->vdev_guid, cp->provider->name); 756 } else { 757 ZFS_LOG(1, "Search by guid [%ju:%ju] failed.", 758 (uintmax_t)spa_guid(vd->vdev_spa), 759 (uintmax_t)vd->vdev_guid); 760 } 761 762 return (cp); 763} 764 765static struct g_consumer * 766vdev_geom_open_by_path(vdev_t *vd, int check_guid) 767{ 768 struct g_provider *pp; 769 struct g_consumer *cp; 770 771 g_topology_assert(); 772 773 cp = NULL; 774 pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1); 775 if (pp != NULL) { 776 ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path); 777 if (!check_guid || vdev_attach_ok(vd, pp) == FULL_MATCH) 778 cp = vdev_geom_attach(pp, vd, B_FALSE); 779 } 780 781 return (cp); 782} 783 784static int 785vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, 786 uint64_t *logical_ashift, uint64_t *physical_ashift) 787{ 788 struct g_provider *pp; 789 struct g_consumer *cp; 790 size_t bufsize; 791 int error; 792 793 /* Set the TLS to indicate downstack that we should not access zvols*/ 794 VERIFY(tsd_set(zfs_geom_probe_vdev_key, vd) == 0); 795 796 /* 797 * We must have a pathname, and it must be absolute. 798 */ 799 if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { 800 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; 801 return (EINVAL); 802 } 803 804 /* 805 * Reopen the device if it's not currently open. Otherwise, 806 * just update the physical size of the device. 807 */ 808 if ((cp = vd->vdev_tsd) != NULL) { 809 ASSERT(vd->vdev_reopening); 810 goto skip_open; 811 } 812 813 DROP_GIANT(); 814 g_topology_lock(); 815 error = 0; 816 817 if (vd->vdev_spa->spa_splitting_newspa || 818 (vd->vdev_prevstate == VDEV_STATE_UNKNOWN && 819 vd->vdev_spa->spa_load_state == SPA_LOAD_NONE || 820 vd->vdev_spa->spa_load_state == SPA_LOAD_CREATE)) { 821 /* 822 * We are dealing with a vdev that hasn't been previously 823 * opened (since boot), and we are not loading an 824 * existing pool configuration. This looks like a 825 * vdev add operation to a new or existing pool. 826 * Assume the user knows what he/she is doing and find 827 * GEOM provider by its name, ignoring GUID mismatches. 828 * 829 * XXPOLICY: It would be safer to only allow a device 830 * that is unlabeled or labeled but missing 831 * GUID information to be opened in this fashion, 832 * unless we are doing a split, in which case we 833 * should allow any guid. 834 */ 835 cp = vdev_geom_open_by_path(vd, 0); 836 } else { 837 /* 838 * Try using the recorded path for this device, but only 839 * accept it if its label data contains the expected GUIDs. 840 */ 841 cp = vdev_geom_open_by_path(vd, 1); 842 if (cp == NULL) { 843 /* 844 * The device at vd->vdev_path doesn't have the 845 * expected GUIDs. The disks might have merely 846 * moved around so try all other GEOM providers 847 * to find one with the right GUIDs. 848 */ 849 cp = vdev_geom_open_by_guids(vd); 850 } 851 } 852 853 /* Clear the TLS now that tasting is done */ 854 VERIFY(tsd_set(zfs_geom_probe_vdev_key, NULL) == 0); 855 856 if (cp == NULL) { 857 ZFS_LOG(1, "Vdev %s not found.", vd->vdev_path); 858 error = ENOENT; 859 } else { 860 struct consumer_priv_t *priv; 861 struct consumer_vdev_elem *elem; 862 int spamode; 863 864 priv = (struct consumer_priv_t*)&cp->private; 865 if (cp->private == NULL) 866 SLIST_INIT(priv); 867 elem = g_malloc(sizeof(*elem), M_WAITOK|M_ZERO); 868 elem->vd = vd; 869 SLIST_INSERT_HEAD(priv, elem, elems); 870 871 spamode = spa_mode(vd->vdev_spa); 872 if (cp->provider->sectorsize > VDEV_PAD_SIZE || 873 !ISP2(cp->provider->sectorsize)) { 874 ZFS_LOG(1, "Provider %s has unsupported sectorsize.", 875 cp->provider->name); 876 877 vdev_geom_close_locked(vd); 878 error = EINVAL; 879 cp = NULL; 880 } else if (cp->acw == 0 && (spamode & FWRITE) != 0) { 881 int i; 882 883 for (i = 0; i < 5; i++) { 884 error = g_access(cp, 0, 1, 0); 885 if (error == 0) 886 break; 887 g_topology_unlock(); 888 tsleep(vd, 0, "vdev", hz / 2); 889 g_topology_lock(); 890 } 891 if (error != 0) { 892 printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n", 893 cp->provider->name, error); 894 vdev_geom_close_locked(vd); 895 cp = NULL; 896 } 897 } 898 } 899 900 /* Fetch initial physical path information for this device. */ 901 if (cp != NULL) { 902 vdev_geom_attrchanged(cp, "GEOM::physpath"); 903 904 /* Set other GEOM characteristics */ 905 vdev_geom_set_physpath(vd, cp, /*do_null_update*/B_FALSE); 906 vdev_geom_set_rotation_rate(vd, cp); 907 } 908 909 g_topology_unlock(); 910 PICKUP_GIANT(); 911 if (cp == NULL) { 912 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 913 return (error); 914 } 915skip_open: 916 pp = cp->provider; 917 918 /* 919 * Determine the actual size of the device. 920 */ 921 *max_psize = *psize = pp->mediasize; 922 923 /* 924 * Determine the device's minimum transfer size and preferred 925 * transfer size. 926 */ 927 *logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1; 928 *physical_ashift = 0; 929 if (pp->stripesize > (1 << *logical_ashift) && ISP2(pp->stripesize) && 930 pp->stripesize <= (1 << SPA_MAXASHIFT) && pp->stripeoffset == 0) 931 *physical_ashift = highbit(pp->stripesize) - 1; 932 933 /* 934 * Clear the nowritecache settings, so that on a vdev_reopen() 935 * we will try again. 936 */ 937 vd->vdev_nowritecache = B_FALSE; 938 939 return (0); 940} 941 942static void 943vdev_geom_close(vdev_t *vd) 944{ 945 struct g_consumer *cp; 946 947 cp = vd->vdev_tsd; 948 949 DROP_GIANT(); 950 g_topology_lock(); 951 952 if (!vd->vdev_reopening || 953 (cp != NULL && ((cp->flags & G_CF_ORPHAN) != 0 || 954 (cp->provider != NULL && cp->provider->error != 0)))) 955 vdev_geom_close_locked(vd); 956 957 g_topology_unlock(); 958 PICKUP_GIANT(); 959} 960 961static void 962vdev_geom_io_intr(struct bio *bp) 963{ 964 vdev_t *vd; 965 zio_t *zio; 966 967 zio = bp->bio_caller1; 968 vd = zio->io_vd; 969 zio->io_error = bp->bio_error; 970 if (zio->io_error == 0 && bp->bio_resid != 0) 971 zio->io_error = SET_ERROR(EIO); 972 973 switch(zio->io_error) { 974 case ENOTSUP: 975 /* 976 * If we get ENOTSUP for BIO_FLUSH or BIO_DELETE we know 977 * that future attempts will never succeed. In this case 978 * we set a persistent flag so that we don't bother with 979 * requests in the future. 980 */ 981 switch(bp->bio_cmd) { 982 case BIO_FLUSH: 983 vd->vdev_nowritecache = B_TRUE; 984 break; 985 case BIO_DELETE: 986 vd->vdev_notrim = B_TRUE; 987 break; 988 } 989 break; 990 case ENXIO: 991 if (!vd->vdev_remove_wanted) { 992 /* 993 * If provider's error is set we assume it is being 994 * removed. 995 */ 996 if (bp->bio_to->error != 0) { 997 vd->vdev_remove_wanted = B_TRUE; 998 spa_async_request(zio->io_spa, 999 SPA_ASYNC_REMOVE); 1000 } else if (!vd->vdev_delayed_close) { 1001 vd->vdev_delayed_close = B_TRUE; 1002 } 1003 } 1004 break; 1005 } 1006 g_destroy_bio(bp); 1007 zio_delay_interrupt(zio); 1008} 1009 1010static void 1011vdev_geom_io_start(zio_t *zio) 1012{ 1013 vdev_t *vd; 1014 struct g_consumer *cp; 1015 struct bio *bp; 1016 int error; 1017 1018 vd = zio->io_vd; 1019 1020 switch (zio->io_type) { 1021 case ZIO_TYPE_IOCTL: 1022 /* XXPOLICY */ 1023 if (!vdev_readable(vd)) { 1024 zio->io_error = SET_ERROR(ENXIO); 1025 zio_interrupt(zio); 1026 return; 1027 } else { 1028 switch (zio->io_cmd) { 1029 case DKIOCFLUSHWRITECACHE: 1030 if (zfs_nocacheflush || vdev_geom_bio_flush_disable) 1031 break; 1032 if (vd->vdev_nowritecache) { 1033 zio->io_error = SET_ERROR(ENOTSUP); 1034 break; 1035 } 1036 goto sendreq; 1037 default: 1038 zio->io_error = SET_ERROR(ENOTSUP); 1039 } 1040 } 1041 1042 zio_execute(zio); 1043 return; 1044 case ZIO_TYPE_FREE: 1045 if (vd->vdev_notrim) { 1046 zio->io_error = SET_ERROR(ENOTSUP); 1047 } else if (!vdev_geom_bio_delete_disable) { 1048 goto sendreq; 1049 } 1050 zio_execute(zio); 1051 return; 1052 } 1053sendreq: 1054 ASSERT(zio->io_type == ZIO_TYPE_READ || 1055 zio->io_type == ZIO_TYPE_WRITE || 1056 zio->io_type == ZIO_TYPE_FREE || 1057 zio->io_type == ZIO_TYPE_IOCTL); 1058 1059 cp = vd->vdev_tsd; 1060 if (cp == NULL) { 1061 zio->io_error = SET_ERROR(ENXIO); 1062 zio_interrupt(zio); 1063 return; 1064 } 1065 bp = g_alloc_bio(); 1066 bp->bio_caller1 = zio; 1067 switch (zio->io_type) { 1068 case ZIO_TYPE_READ: 1069 case ZIO_TYPE_WRITE: 1070 zio->io_target_timestamp = zio_handle_io_delay(zio); 1071 bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE; 1072 bp->bio_data = zio->io_data; 1073 bp->bio_offset = zio->io_offset; 1074 bp->bio_length = zio->io_size; 1075 break; 1076 case ZIO_TYPE_FREE: 1077 bp->bio_cmd = BIO_DELETE; 1078 bp->bio_data = NULL; 1079 bp->bio_offset = zio->io_offset; 1080 bp->bio_length = zio->io_size; 1081 break; 1082 case ZIO_TYPE_IOCTL: 1083 bp->bio_cmd = BIO_FLUSH; 1084 bp->bio_flags |= BIO_ORDERED; 1085 bp->bio_data = NULL; 1086 bp->bio_offset = cp->provider->mediasize; 1087 bp->bio_length = 0; 1088 break; 1089 } 1090 bp->bio_done = vdev_geom_io_intr; 1091 1092 g_io_request(bp, cp); 1093} 1094 1095static void 1096vdev_geom_io_done(zio_t *zio) 1097{ 1098} 1099 1100static void 1101vdev_geom_hold(vdev_t *vd) 1102{ 1103} 1104 1105static void 1106vdev_geom_rele(vdev_t *vd) 1107{ 1108} 1109 1110vdev_ops_t vdev_geom_ops = { 1111 vdev_geom_open, 1112 vdev_geom_close, 1113 vdev_default_asize, 1114 vdev_geom_io_start, 1115 vdev_geom_io_done, 1116 NULL, 1117 vdev_geom_hold, 1118 vdev_geom_rele, 1119 VDEV_TYPE_DISK, /* name of this vdev type */ 1120 B_TRUE /* leaf vdev */ 1121}; 1122