vdev_geom.c revision 325913
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org> 23 * All rights reserved. 24 * 25 * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org> 26 */ 27 28#include <sys/zfs_context.h> 29#include <sys/param.h> 30#include <sys/kernel.h> 31#include <sys/bio.h> 32#include <sys/disk.h> 33#include <sys/spa.h> 34#include <sys/spa_impl.h> 35#include <sys/vdev_impl.h> 36#include <sys/fs/zfs.h> 37#include <sys/zio.h> 38#include <geom/geom.h> 39#include <geom/geom_int.h> 40 41/* 42 * Virtual device vector for GEOM. 43 */ 44 45static g_attrchanged_t vdev_geom_attrchanged; 46struct g_class zfs_vdev_class = { 47 .name = "ZFS::VDEV", 48 .version = G_VERSION, 49 .attrchanged = vdev_geom_attrchanged, 50}; 51 52struct consumer_vdev_elem { 53 SLIST_ENTRY(consumer_vdev_elem) elems; 54 vdev_t *vd; 55}; 56 57SLIST_HEAD(consumer_priv_t, consumer_vdev_elem); 58_Static_assert(sizeof(((struct g_consumer*)NULL)->private) 59 == sizeof(struct consumer_priv_t*), 60 "consumer_priv_t* can't be stored in g_consumer.private"); 61 62DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev); 63 64SYSCTL_DECL(_vfs_zfs_vdev); 65/* Don't send BIO_FLUSH. */ 66static int vdev_geom_bio_flush_disable = 0; 67TUNABLE_INT("vfs.zfs.vdev.bio_flush_disable", &vdev_geom_bio_flush_disable); 68SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RW, 69 &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH"); 70/* Don't send BIO_DELETE. */ 71static int vdev_geom_bio_delete_disable = 0; 72TUNABLE_INT("vfs.zfs.vdev.bio_delete_disable", &vdev_geom_bio_delete_disable); 73SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RW, 74 &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE"); 75 76/* Declare local functions */ 77static void vdev_geom_detach(struct g_consumer *cp, boolean_t open_for_read); 78 79/* 80 * Thread local storage used to indicate when a thread is probing geoms 81 * for their guids. If NULL, this thread is not tasting geoms. If non NULL, 82 * it is looking for a replacement for the vdev_t* that is its value. 83 */ 84uint_t zfs_geom_probe_vdev_key; 85 86static void 87vdev_geom_set_rotation_rate(vdev_t *vd, struct g_consumer *cp) 88{ 89 int error; 90 uint16_t rate; 91 92 error = g_getattr("GEOM::rotation_rate", cp, &rate); 93 if (error == 0) 94 vd->vdev_rotation_rate = rate; 95 else 96 vd->vdev_rotation_rate = VDEV_RATE_UNKNOWN; 97} 98 99static void 100vdev_geom_set_physpath(vdev_t *vd, struct g_consumer *cp, 101 boolean_t do_null_update) 102{ 103 boolean_t needs_update = B_FALSE; 104 char *physpath; 105 int error, physpath_len; 106 107 physpath_len = MAXPATHLEN; 108 physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO); 109 error = g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath); 110 if (error == 0) { 111 char *old_physpath; 112 113 /* g_topology lock ensures that vdev has not been closed */ 114 g_topology_assert(); 115 old_physpath = vd->vdev_physpath; 116 vd->vdev_physpath = spa_strdup(physpath); 117 118 if (old_physpath != NULL) { 119 needs_update = (strcmp(old_physpath, 120 vd->vdev_physpath) != 0); 121 spa_strfree(old_physpath); 122 } else 123 needs_update = do_null_update; 124 } 125 g_free(physpath); 126 127 /* 128 * If the physical path changed, update the config. 129 * Only request an update for previously unset physpaths if 130 * requested by the caller. 131 */ 132 if (needs_update) 133 spa_async_request(vd->vdev_spa, SPA_ASYNC_CONFIG_UPDATE); 134 135} 136 137static void 138vdev_geom_attrchanged(struct g_consumer *cp, const char *attr) 139{ 140 char *old_physpath; 141 struct consumer_priv_t *priv; 142 struct consumer_vdev_elem *elem; 143 int error; 144 145 priv = (struct consumer_priv_t*)&cp->private; 146 if (SLIST_EMPTY(priv)) 147 return; 148 149 SLIST_FOREACH(elem, priv, elems) { 150 vdev_t *vd = elem->vd; 151 if (strcmp(attr, "GEOM::rotation_rate") == 0) { 152 vdev_geom_set_rotation_rate(vd, cp); 153 return; 154 } 155 if (strcmp(attr, "GEOM::physpath") == 0) { 156 vdev_geom_set_physpath(vd, cp, /*null_update*/B_TRUE); 157 return; 158 } 159 } 160} 161 162static void 163vdev_geom_orphan(struct g_consumer *cp) 164{ 165 struct consumer_priv_t *priv; 166 struct consumer_vdev_elem *elem; 167 168 g_topology_assert(); 169 170 priv = (struct consumer_priv_t*)&cp->private; 171 if (SLIST_EMPTY(priv)) 172 /* Vdev close in progress. Ignore the event. */ 173 return; 174 175 /* 176 * Orphan callbacks occur from the GEOM event thread. 177 * Concurrent with this call, new I/O requests may be 178 * working their way through GEOM about to find out 179 * (only once executed by the g_down thread) that we've 180 * been orphaned from our disk provider. These I/Os 181 * must be retired before we can detach our consumer. 182 * This is most easily achieved by acquiring the 183 * SPA ZIO configuration lock as a writer, but doing 184 * so with the GEOM topology lock held would cause 185 * a lock order reversal. Instead, rely on the SPA's 186 * async removal support to invoke a close on this 187 * vdev once it is safe to do so. 188 */ 189 SLIST_FOREACH(elem, priv, elems) { 190 vdev_t *vd = elem->vd; 191 192 vd->vdev_remove_wanted = B_TRUE; 193 spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE); 194 } 195} 196 197static struct g_consumer * 198vdev_geom_attach(struct g_provider *pp, vdev_t *vd) 199{ 200 struct g_geom *gp; 201 struct g_consumer *cp; 202 int error; 203 204 g_topology_assert(); 205 206 ZFS_LOG(1, "Attaching to %s.", pp->name); 207 208 if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize)) { 209 ZFS_LOG(1, "Failing attach of %s. Incompatible sectorsize %d\n", 210 pp->name, pp->sectorsize); 211 return (NULL); 212 } else if (pp->mediasize < SPA_MINDEVSIZE) { 213 ZFS_LOG(1, "Failing attach of %s. Incompatible mediasize %ju\n", 214 pp->name, pp->mediasize); 215 return (NULL); 216 } 217 218 /* Do we have geom already? No? Create one. */ 219 LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) { 220 if (gp->flags & G_GEOM_WITHER) 221 continue; 222 if (strcmp(gp->name, "zfs::vdev") != 0) 223 continue; 224 break; 225 } 226 if (gp == NULL) { 227 gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev"); 228 gp->orphan = vdev_geom_orphan; 229 gp->attrchanged = vdev_geom_attrchanged; 230 cp = g_new_consumer(gp); 231 error = g_attach(cp, pp); 232 if (error != 0) { 233 ZFS_LOG(1, "%s(%d): g_attach failed: %d\n", __func__, 234 __LINE__, error); 235 vdev_geom_detach(cp, B_FALSE); 236 return (NULL); 237 } 238 error = g_access(cp, 1, 0, 1); 239 if (error != 0) { 240 ZFS_LOG(1, "%s(%d): g_access failed: %d", __func__, 241 __LINE__, error); 242 vdev_geom_detach(cp, B_FALSE); 243 return (NULL); 244 } 245 ZFS_LOG(1, "Created geom and consumer for %s.", pp->name); 246 } else { 247 /* Check if we are already connected to this provider. */ 248 LIST_FOREACH(cp, &gp->consumer, consumer) { 249 if (cp->provider == pp) { 250 ZFS_LOG(1, "Found consumer for %s.", pp->name); 251 break; 252 } 253 } 254 if (cp == NULL) { 255 cp = g_new_consumer(gp); 256 error = g_attach(cp, pp); 257 if (error != 0) { 258 ZFS_LOG(1, "%s(%d): g_attach failed: %d\n", 259 __func__, __LINE__, error); 260 vdev_geom_detach(cp, B_FALSE); 261 return (NULL); 262 } 263 error = g_access(cp, 1, 0, 1); 264 if (error != 0) { 265 ZFS_LOG(1, "%s(%d): g_access failed: %d\n", 266 __func__, __LINE__, error); 267 vdev_geom_detach(cp, B_FALSE); 268 return (NULL); 269 } 270 ZFS_LOG(1, "Created consumer for %s.", pp->name); 271 } else { 272 error = g_access(cp, 1, 0, 1); 273 if (error != 0) { 274 ZFS_LOG(1, "%s(%d): g_access failed: %d\n", 275 __func__, __LINE__, error); 276 return (NULL); 277 } 278 ZFS_LOG(1, "Used existing consumer for %s.", pp->name); 279 } 280 } 281 282 if (vd != NULL) 283 vd->vdev_tsd = cp; 284 285 cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; 286 return (cp); 287} 288 289static void 290vdev_geom_detach(struct g_consumer *cp, boolean_t open_for_read) 291{ 292 struct g_geom *gp; 293 294 g_topology_assert(); 295 296 ZFS_LOG(1, "Detaching from %s.", 297 cp->provider && cp->provider->name ? cp->provider->name : "NULL"); 298 299 gp = cp->geom; 300 if (open_for_read) 301 g_access(cp, -1, 0, -1); 302 /* Destroy consumer on last close. */ 303 if (cp->acr == 0 && cp->ace == 0) { 304 if (cp->acw > 0) 305 g_access(cp, 0, -cp->acw, 0); 306 if (cp->provider != NULL) { 307 ZFS_LOG(1, "Destroying consumer for %s.", 308 cp->provider->name ? cp->provider->name : "NULL"); 309 g_detach(cp); 310 } 311 g_destroy_consumer(cp); 312 } 313 /* Destroy geom if there are no consumers left. */ 314 if (LIST_EMPTY(&gp->consumer)) { 315 ZFS_LOG(1, "Destroyed geom %s.", gp->name); 316 g_wither_geom(gp, ENXIO); 317 } 318} 319 320static void 321vdev_geom_close_locked(vdev_t *vd) 322{ 323 struct g_consumer *cp; 324 struct consumer_priv_t *priv; 325 struct consumer_vdev_elem *elem, *elem_temp; 326 327 g_topology_assert(); 328 329 cp = vd->vdev_tsd; 330 vd->vdev_delayed_close = B_FALSE; 331 if (cp == NULL) 332 return; 333 334 ZFS_LOG(1, "Closing access to %s.", cp->provider->name); 335 KASSERT(cp->private != NULL, ("%s: cp->private is NULL", __func__)); 336 priv = (struct consumer_priv_t*)&cp->private; 337 vd->vdev_tsd = NULL; 338 SLIST_FOREACH_SAFE(elem, priv, elems, elem_temp) { 339 if (elem->vd == vd) { 340 SLIST_REMOVE(priv, elem, consumer_vdev_elem, elems); 341 g_free(elem); 342 } 343 } 344 345 vdev_geom_detach(cp, B_TRUE); 346} 347 348/* 349 * Issue one or more bios to the vdev in parallel 350 * cmds, datas, offsets, errors, and sizes are arrays of length ncmds. Each IO 351 * operation is described by parallel entries from each array. There may be 352 * more bios actually issued than entries in the array 353 */ 354static void 355vdev_geom_io(struct g_consumer *cp, int *cmds, void **datas, off_t *offsets, 356 off_t *sizes, int *errors, int ncmds) 357{ 358 struct bio **bios; 359 u_char *p; 360 off_t off, maxio, s, end; 361 int i, n_bios, j; 362 size_t bios_size; 363 364 maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize); 365 n_bios = 0; 366 367 /* How many bios are required for all commands ? */ 368 for (i = 0; i < ncmds; i++) 369 n_bios += (sizes[i] + maxio - 1) / maxio; 370 371 /* Allocate memory for the bios */ 372 bios_size = n_bios * sizeof(struct bio*); 373 bios = kmem_zalloc(bios_size, KM_SLEEP); 374 375 /* Prepare and issue all of the bios */ 376 for (i = j = 0; i < ncmds; i++) { 377 off = offsets[i]; 378 p = datas[i]; 379 s = sizes[i]; 380 end = off + s; 381 ASSERT((off % cp->provider->sectorsize) == 0); 382 ASSERT((s % cp->provider->sectorsize) == 0); 383 384 for (; off < end; off += maxio, p += maxio, s -= maxio, j++) { 385 bios[j] = g_alloc_bio(); 386 bios[j]->bio_cmd = cmds[i]; 387 bios[j]->bio_done = NULL; 388 bios[j]->bio_offset = off; 389 bios[j]->bio_length = MIN(s, maxio); 390 bios[j]->bio_data = p; 391 g_io_request(bios[j], cp); 392 } 393 } 394 ASSERT(j == n_bios); 395 396 /* Wait for all of the bios to complete, and clean them up */ 397 for (i = j = 0; i < ncmds; i++) { 398 off = offsets[i]; 399 s = sizes[i]; 400 end = off + s; 401 402 for (; off < end; off += maxio, s -= maxio, j++) { 403 errors[i] = biowait(bios[j], "vdev_geom_io") || errors[i]; 404 g_destroy_bio(bios[j]); 405 } 406 } 407 kmem_free(bios, bios_size); 408} 409 410/* 411 * Read the vdev config from a device. Return the number of valid labels that 412 * were found. The vdev config will be returned in config if and only if at 413 * least one valid label was found. 414 */ 415static int 416vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config) 417{ 418 struct g_provider *pp; 419 vdev_phys_t *vdev_lists[VDEV_LABELS]; 420 char *buf; 421 size_t buflen; 422 uint64_t psize, state, txg; 423 off_t offsets[VDEV_LABELS]; 424 off_t size; 425 off_t sizes[VDEV_LABELS]; 426 int cmds[VDEV_LABELS]; 427 int errors[VDEV_LABELS]; 428 int l, nlabels; 429 430 g_topology_assert_not(); 431 432 pp = cp->provider; 433 ZFS_LOG(1, "Reading config from %s...", pp->name); 434 435 psize = pp->mediasize; 436 psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t)); 437 438 size = sizeof(*vdev_lists[0]) + pp->sectorsize - 439 ((sizeof(*vdev_lists[0]) - 1) % pp->sectorsize) - 1; 440 441 buflen = sizeof(vdev_lists[0]->vp_nvlist); 442 443 *config = NULL; 444 /* Create all of the IO requests */ 445 for (l = 0; l < VDEV_LABELS; l++) { 446 cmds[l] = BIO_READ; 447 vdev_lists[l] = kmem_alloc(size, KM_SLEEP); 448 offsets[l] = vdev_label_offset(psize, l, 0) + VDEV_SKIP_SIZE; 449 sizes[l] = size; 450 errors[l] = 0; 451 ASSERT(offsets[l] % pp->sectorsize == 0); 452 } 453 454 /* Issue the IO requests */ 455 vdev_geom_io(cp, cmds, (void**)vdev_lists, offsets, sizes, errors, 456 VDEV_LABELS); 457 458 /* Parse the labels */ 459 nlabels = 0; 460 for (l = 0; l < VDEV_LABELS; l++) { 461 if (errors[l] != 0) 462 continue; 463 464 buf = vdev_lists[l]->vp_nvlist; 465 466 if (nvlist_unpack(buf, buflen, config, 0) != 0) 467 continue; 468 469 if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE, 470 &state) != 0 || state > POOL_STATE_L2CACHE) { 471 nvlist_free(*config); 472 *config = NULL; 473 continue; 474 } 475 476 if (state != POOL_STATE_SPARE && 477 state != POOL_STATE_L2CACHE && 478 (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG, 479 &txg) != 0 || txg == 0)) { 480 nvlist_free(*config); 481 *config = NULL; 482 continue; 483 } 484 485 nlabels++; 486 } 487 488 /* Free the label storage */ 489 for (l = 0; l < VDEV_LABELS; l++) 490 kmem_free(vdev_lists[l], size); 491 492 return (nlabels); 493} 494 495static void 496resize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id) 497{ 498 nvlist_t **new_configs; 499 uint64_t i; 500 501 if (id < *count) 502 return; 503 new_configs = kmem_zalloc((id + 1) * sizeof(nvlist_t *), 504 KM_SLEEP); 505 for (i = 0; i < *count; i++) 506 new_configs[i] = (*configs)[i]; 507 if (*configs != NULL) 508 kmem_free(*configs, *count * sizeof(void *)); 509 *configs = new_configs; 510 *count = id + 1; 511} 512 513static void 514process_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg, 515 const char *name, uint64_t* known_pool_guid) 516{ 517 nvlist_t *vdev_tree; 518 uint64_t pool_guid; 519 uint64_t vdev_guid, known_guid; 520 uint64_t id, txg, known_txg; 521 char *pname; 522 int i; 523 524 if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 || 525 strcmp(pname, name) != 0) 526 goto ignore; 527 528 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0) 529 goto ignore; 530 531 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0) 532 goto ignore; 533 534 if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) 535 goto ignore; 536 537 if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0) 538 goto ignore; 539 540 VERIFY(nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); 541 542 if (*known_pool_guid != 0) { 543 if (pool_guid != *known_pool_guid) 544 goto ignore; 545 } else 546 *known_pool_guid = pool_guid; 547 548 resize_configs(configs, count, id); 549 550 if ((*configs)[id] != NULL) { 551 VERIFY(nvlist_lookup_uint64((*configs)[id], 552 ZPOOL_CONFIG_POOL_TXG, &known_txg) == 0); 553 if (txg <= known_txg) 554 goto ignore; 555 nvlist_free((*configs)[id]); 556 } 557 558 (*configs)[id] = cfg; 559 return; 560 561ignore: 562 nvlist_free(cfg); 563} 564 565int 566vdev_geom_read_pool_label(const char *name, 567 nvlist_t ***configs, uint64_t *count) 568{ 569 struct g_class *mp; 570 struct g_geom *gp; 571 struct g_provider *pp; 572 struct g_consumer *zcp; 573 nvlist_t *vdev_cfg; 574 uint64_t pool_guid; 575 int error, nlabels; 576 577 DROP_GIANT(); 578 g_topology_lock(); 579 580 *configs = NULL; 581 *count = 0; 582 pool_guid = 0; 583 LIST_FOREACH(mp, &g_classes, class) { 584 if (mp == &zfs_vdev_class) 585 continue; 586 LIST_FOREACH(gp, &mp->geom, geom) { 587 if (gp->flags & G_GEOM_WITHER) 588 continue; 589 LIST_FOREACH(pp, &gp->provider, provider) { 590 if (pp->flags & G_PF_WITHER) 591 continue; 592 zcp = vdev_geom_attach(pp, NULL); 593 if (zcp == NULL) 594 continue; 595 g_topology_unlock(); 596 nlabels = vdev_geom_read_config(zcp, &vdev_cfg); 597 g_topology_lock(); 598 vdev_geom_detach(zcp, B_TRUE); 599 if (nlabels == 0) 600 continue; 601 ZFS_LOG(1, "successfully read vdev config"); 602 603 process_vdev_config(configs, count, 604 vdev_cfg, name, &pool_guid); 605 } 606 } 607 } 608 g_topology_unlock(); 609 PICKUP_GIANT(); 610 611 return (*count > 0 ? 0 : ENOENT); 612} 613 614enum match { 615 NO_MATCH = 0, /* No matching labels found */ 616 TOPGUID_MATCH = 1, /* Labels match top guid, not vdev guid*/ 617 ZERO_MATCH = 1, /* Should never be returned */ 618 ONE_MATCH = 2, /* 1 label matching the vdev_guid */ 619 TWO_MATCH = 3, /* 2 label matching the vdev_guid */ 620 THREE_MATCH = 4, /* 3 label matching the vdev_guid */ 621 FULL_MATCH = 5 /* all labels match the vdev_guid */ 622}; 623 624static enum match 625vdev_attach_ok(vdev_t *vd, struct g_provider *pp) 626{ 627 nvlist_t *config; 628 uint64_t pool_guid, top_guid, vdev_guid; 629 struct g_consumer *cp; 630 int nlabels; 631 632 cp = vdev_geom_attach(pp, NULL); 633 if (cp == NULL) { 634 ZFS_LOG(1, "Unable to attach tasting instance to %s.", 635 pp->name); 636 return (NO_MATCH); 637 } 638 g_topology_unlock(); 639 nlabels = vdev_geom_read_config(cp, &config); 640 if (nlabels == 0) { 641 g_topology_lock(); 642 vdev_geom_detach(cp, B_TRUE); 643 ZFS_LOG(1, "Unable to read config from %s.", pp->name); 644 return (NO_MATCH); 645 } 646 g_topology_lock(); 647 vdev_geom_detach(cp, B_TRUE); 648 649 pool_guid = 0; 650 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid); 651 top_guid = 0; 652 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_TOP_GUID, &top_guid); 653 vdev_guid = 0; 654 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid); 655 nvlist_free(config); 656 657 /* 658 * Check that the label's pool guid matches the desired guid. 659 * Inactive spares and L2ARCs do not have any pool guid in the label. 660 */ 661 if (pool_guid != 0 && pool_guid != spa_guid(vd->vdev_spa)) { 662 ZFS_LOG(1, "pool guid mismatch for provider %s: %ju != %ju.", 663 pp->name, 664 (uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)pool_guid); 665 return (NO_MATCH); 666 } 667 668 /* 669 * Check that the label's vdev guid matches the desired guid. 670 * The second condition handles possible race on vdev detach, when 671 * remaining vdev receives GUID of destroyed top level mirror vdev. 672 */ 673 if (vdev_guid == vd->vdev_guid) { 674 ZFS_LOG(1, "guids match for provider %s.", pp->name); 675 return (ZERO_MATCH + nlabels); 676 } else if (top_guid == vd->vdev_guid && vd == vd->vdev_top) { 677 ZFS_LOG(1, "top vdev guid match for provider %s.", pp->name); 678 return (TOPGUID_MATCH); 679 } 680 ZFS_LOG(1, "vdev guid mismatch for provider %s: %ju != %ju.", 681 pp->name, (uintmax_t)vd->vdev_guid, (uintmax_t)vdev_guid); 682 return (NO_MATCH); 683} 684 685static struct g_consumer * 686vdev_geom_attach_by_guids(vdev_t *vd) 687{ 688 struct g_class *mp; 689 struct g_geom *gp; 690 struct g_provider *pp, *best_pp; 691 struct g_consumer *cp; 692 enum match match, best_match; 693 694 g_topology_assert(); 695 696 cp = NULL; 697 best_pp = NULL; 698 best_match = NO_MATCH; 699 LIST_FOREACH(mp, &g_classes, class) { 700 if (mp == &zfs_vdev_class) 701 continue; 702 LIST_FOREACH(gp, &mp->geom, geom) { 703 if (gp->flags & G_GEOM_WITHER) 704 continue; 705 LIST_FOREACH(pp, &gp->provider, provider) { 706 match = vdev_attach_ok(vd, pp); 707 if (match > best_match) { 708 best_match = match; 709 best_pp = pp; 710 } 711 if (match == FULL_MATCH) 712 goto out; 713 } 714 } 715 } 716 717out: 718 if (best_pp) { 719 cp = vdev_geom_attach(best_pp, vd); 720 if (cp == NULL) { 721 printf("ZFS WARNING: Unable to attach to %s.\n", 722 best_pp->name); 723 } 724 } 725 return (cp); 726} 727 728static struct g_consumer * 729vdev_geom_open_by_guids(vdev_t *vd) 730{ 731 struct g_consumer *cp; 732 char *buf; 733 size_t len; 734 735 g_topology_assert(); 736 737 ZFS_LOG(1, "Searching by guids [%ju:%ju].", 738 (uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)vd->vdev_guid); 739 cp = vdev_geom_attach_by_guids(vd); 740 if (cp != NULL) { 741 len = strlen(cp->provider->name) + strlen("/dev/") + 1; 742 buf = kmem_alloc(len, KM_SLEEP); 743 744 snprintf(buf, len, "/dev/%s", cp->provider->name); 745 spa_strfree(vd->vdev_path); 746 vd->vdev_path = buf; 747 748 ZFS_LOG(1, "Attach by guid [%ju:%ju] succeeded, provider %s.", 749 (uintmax_t)spa_guid(vd->vdev_spa), 750 (uintmax_t)vd->vdev_guid, vd->vdev_path); 751 } else { 752 ZFS_LOG(1, "Search by guid [%ju:%ju] failed.", 753 (uintmax_t)spa_guid(vd->vdev_spa), 754 (uintmax_t)vd->vdev_guid); 755 } 756 757 return (cp); 758} 759 760static struct g_consumer * 761vdev_geom_open_by_path(vdev_t *vd, int check_guid) 762{ 763 struct g_provider *pp; 764 struct g_consumer *cp; 765 766 g_topology_assert(); 767 768 cp = NULL; 769 pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1); 770 if (pp != NULL) { 771 ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path); 772 if (!check_guid || vdev_attach_ok(vd, pp) == FULL_MATCH) 773 cp = vdev_geom_attach(pp, vd); 774 } 775 776 return (cp); 777} 778 779static int 780vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, 781 uint64_t *logical_ashift, uint64_t *physical_ashift) 782{ 783 struct g_provider *pp; 784 struct g_consumer *cp; 785 size_t bufsize; 786 int error; 787 788 /* Set the TLS to indicate downstack that we should not access zvols*/ 789 VERIFY(tsd_set(zfs_geom_probe_vdev_key, vd) == 0); 790 791 /* 792 * We must have a pathname, and it must be absolute. 793 */ 794 if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { 795 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; 796 return (EINVAL); 797 } 798 799 /* 800 * Reopen the device if it's not currently open. Otherwise, 801 * just update the physical size of the device. 802 */ 803 if ((cp = vd->vdev_tsd) != NULL) { 804 ASSERT(vd->vdev_reopening); 805 goto skip_open; 806 } 807 808 DROP_GIANT(); 809 g_topology_lock(); 810 error = 0; 811 812 if (vd->vdev_spa->spa_splitting_newspa || 813 (vd->vdev_prevstate == VDEV_STATE_UNKNOWN && 814 vd->vdev_spa->spa_load_state == SPA_LOAD_NONE || 815 vd->vdev_spa->spa_load_state == SPA_LOAD_CREATE)) { 816 /* 817 * We are dealing with a vdev that hasn't been previously 818 * opened (since boot), and we are not loading an 819 * existing pool configuration. This looks like a 820 * vdev add operation to a new or existing pool. 821 * Assume the user knows what he/she is doing and find 822 * GEOM provider by its name, ignoring GUID mismatches. 823 * 824 * XXPOLICY: It would be safer to only allow a device 825 * that is unlabeled or labeled but missing 826 * GUID information to be opened in this fashion, 827 * unless we are doing a split, in which case we 828 * should allow any guid. 829 */ 830 cp = vdev_geom_open_by_path(vd, 0); 831 } else { 832 /* 833 * Try using the recorded path for this device, but only 834 * accept it if its label data contains the expected GUIDs. 835 */ 836 cp = vdev_geom_open_by_path(vd, 1); 837 if (cp == NULL) { 838 /* 839 * The device at vd->vdev_path doesn't have the 840 * expected GUIDs. The disks might have merely 841 * moved around so try all other GEOM providers 842 * to find one with the right GUIDs. 843 */ 844 cp = vdev_geom_open_by_guids(vd); 845 } 846 } 847 848 /* Clear the TLS now that tasting is done */ 849 VERIFY(tsd_set(zfs_geom_probe_vdev_key, NULL) == 0); 850 851 if (cp == NULL) { 852 ZFS_LOG(1, "Provider %s not found.", vd->vdev_path); 853 error = ENOENT; 854 } else if (cp->provider->sectorsize > VDEV_PAD_SIZE || 855 !ISP2(cp->provider->sectorsize)) { 856 ZFS_LOG(1, "Provider %s has unsupported sectorsize.", 857 vd->vdev_path); 858 859 vdev_geom_close_locked(vd); 860 error = EINVAL; 861 cp = NULL; 862 } else if (cp->acw == 0 && (spa_mode(vd->vdev_spa) & FWRITE) != 0) { 863 int i; 864 865 for (i = 0; i < 5; i++) { 866 error = g_access(cp, 0, 1, 0); 867 if (error == 0) 868 break; 869 g_topology_unlock(); 870 tsleep(vd, 0, "vdev", hz / 2); 871 g_topology_lock(); 872 } 873 if (error != 0) { 874 printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n", 875 vd->vdev_path, error); 876 vdev_geom_close_locked(vd); 877 cp = NULL; 878 } 879 } 880 if (cp != NULL) { 881 struct consumer_priv_t *priv; 882 struct consumer_vdev_elem *elem; 883 884 priv = (struct consumer_priv_t*)&cp->private; 885 if (cp->private == NULL) 886 SLIST_INIT(priv); 887 elem = g_malloc(sizeof(*elem), M_WAITOK|M_ZERO); 888 elem->vd = vd; 889 SLIST_INSERT_HEAD(priv, elem, elems); 890 } 891 892 /* Fetch initial physical path information for this device. */ 893 if (cp != NULL) { 894 vdev_geom_attrchanged(cp, "GEOM::physpath"); 895 896 /* Set other GEOM characteristics */ 897 vdev_geom_set_physpath(vd, cp, /*do_null_update*/B_FALSE); 898 vdev_geom_set_rotation_rate(vd, cp); 899 } 900 901 g_topology_unlock(); 902 PICKUP_GIANT(); 903 if (cp == NULL) { 904 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 905 return (error); 906 } 907skip_open: 908 pp = cp->provider; 909 910 /* 911 * Determine the actual size of the device. 912 */ 913 *max_psize = *psize = pp->mediasize; 914 915 /* 916 * Determine the device's minimum transfer size and preferred 917 * transfer size. 918 */ 919 *logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1; 920 *physical_ashift = 0; 921 if (pp->stripesize > (1 << *logical_ashift) && ISP2(pp->stripesize) && 922 pp->stripesize <= (1 << SPA_MAXASHIFT) && pp->stripeoffset == 0) 923 *physical_ashift = highbit(pp->stripesize) - 1; 924 925 /* 926 * Clear the nowritecache settings, so that on a vdev_reopen() 927 * we will try again. 928 */ 929 vd->vdev_nowritecache = B_FALSE; 930 931 return (0); 932} 933 934static void 935vdev_geom_close(vdev_t *vd) 936{ 937 struct g_consumer *cp; 938 939 cp = vd->vdev_tsd; 940 941 DROP_GIANT(); 942 g_topology_lock(); 943 944 if (!vd->vdev_reopening || 945 (cp != NULL && ((cp->flags & G_CF_ORPHAN) != 0 || 946 (cp->provider != NULL && cp->provider->error != 0)))) 947 vdev_geom_close_locked(vd); 948 949 g_topology_unlock(); 950 PICKUP_GIANT(); 951} 952 953static void 954vdev_geom_io_intr(struct bio *bp) 955{ 956 vdev_t *vd; 957 zio_t *zio; 958 959 zio = bp->bio_caller1; 960 vd = zio->io_vd; 961 zio->io_error = bp->bio_error; 962 if (zio->io_error == 0 && bp->bio_resid != 0) 963 zio->io_error = SET_ERROR(EIO); 964 965 switch(zio->io_error) { 966 case ENOTSUP: 967 /* 968 * If we get ENOTSUP for BIO_FLUSH or BIO_DELETE we know 969 * that future attempts will never succeed. In this case 970 * we set a persistent flag so that we don't bother with 971 * requests in the future. 972 */ 973 switch(bp->bio_cmd) { 974 case BIO_FLUSH: 975 vd->vdev_nowritecache = B_TRUE; 976 break; 977 case BIO_DELETE: 978 vd->vdev_notrim = B_TRUE; 979 break; 980 } 981 break; 982 case ENXIO: 983 if (!vd->vdev_remove_wanted) { 984 /* 985 * If provider's error is set we assume it is being 986 * removed. 987 */ 988 if (bp->bio_to->error != 0) { 989 vd->vdev_remove_wanted = B_TRUE; 990 spa_async_request(zio->io_spa, 991 SPA_ASYNC_REMOVE); 992 } else if (!vd->vdev_delayed_close) { 993 vd->vdev_delayed_close = B_TRUE; 994 } 995 } 996 break; 997 } 998 g_destroy_bio(bp); 999 zio_delay_interrupt(zio); 1000} 1001 1002static void 1003vdev_geom_io_start(zio_t *zio) 1004{ 1005 vdev_t *vd; 1006 struct g_consumer *cp; 1007 struct bio *bp; 1008 int error; 1009 1010 vd = zio->io_vd; 1011 1012 switch (zio->io_type) { 1013 case ZIO_TYPE_IOCTL: 1014 /* XXPOLICY */ 1015 if (!vdev_readable(vd)) { 1016 zio->io_error = SET_ERROR(ENXIO); 1017 zio_interrupt(zio); 1018 return; 1019 } else { 1020 switch (zio->io_cmd) { 1021 case DKIOCFLUSHWRITECACHE: 1022 if (zfs_nocacheflush || vdev_geom_bio_flush_disable) 1023 break; 1024 if (vd->vdev_nowritecache) { 1025 zio->io_error = SET_ERROR(ENOTSUP); 1026 break; 1027 } 1028 goto sendreq; 1029 default: 1030 zio->io_error = SET_ERROR(ENOTSUP); 1031 } 1032 } 1033 1034 zio_execute(zio); 1035 return; 1036 case ZIO_TYPE_FREE: 1037 if (vd->vdev_notrim) { 1038 zio->io_error = SET_ERROR(ENOTSUP); 1039 } else if (!vdev_geom_bio_delete_disable) { 1040 goto sendreq; 1041 } 1042 zio_execute(zio); 1043 return; 1044 } 1045sendreq: 1046 ASSERT(zio->io_type == ZIO_TYPE_READ || 1047 zio->io_type == ZIO_TYPE_WRITE || 1048 zio->io_type == ZIO_TYPE_FREE || 1049 zio->io_type == ZIO_TYPE_IOCTL); 1050 1051 cp = vd->vdev_tsd; 1052 if (cp == NULL) { 1053 zio->io_error = SET_ERROR(ENXIO); 1054 zio_interrupt(zio); 1055 return; 1056 } 1057 bp = g_alloc_bio(); 1058 bp->bio_caller1 = zio; 1059 switch (zio->io_type) { 1060 case ZIO_TYPE_READ: 1061 case ZIO_TYPE_WRITE: 1062 zio->io_target_timestamp = zio_handle_io_delay(zio); 1063 bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE; 1064 bp->bio_data = zio->io_data; 1065 bp->bio_offset = zio->io_offset; 1066 bp->bio_length = zio->io_size; 1067 break; 1068 case ZIO_TYPE_FREE: 1069 bp->bio_cmd = BIO_DELETE; 1070 bp->bio_data = NULL; 1071 bp->bio_offset = zio->io_offset; 1072 bp->bio_length = zio->io_size; 1073 break; 1074 case ZIO_TYPE_IOCTL: 1075 bp->bio_cmd = BIO_FLUSH; 1076 bp->bio_flags |= BIO_ORDERED; 1077 bp->bio_data = NULL; 1078 bp->bio_offset = cp->provider->mediasize; 1079 bp->bio_length = 0; 1080 break; 1081 } 1082 bp->bio_done = vdev_geom_io_intr; 1083 1084 g_io_request(bp, cp); 1085} 1086 1087static void 1088vdev_geom_io_done(zio_t *zio) 1089{ 1090} 1091 1092static void 1093vdev_geom_hold(vdev_t *vd) 1094{ 1095} 1096 1097static void 1098vdev_geom_rele(vdev_t *vd) 1099{ 1100} 1101 1102vdev_ops_t vdev_geom_ops = { 1103 vdev_geom_open, 1104 vdev_geom_close, 1105 vdev_default_asize, 1106 vdev_geom_io_start, 1107 vdev_geom_io_done, 1108 NULL, 1109 vdev_geom_hold, 1110 vdev_geom_rele, 1111 VDEV_TYPE_DISK, /* name of this vdev type */ 1112 B_TRUE /* leaf vdev */ 1113}; 1114