vdev_geom.c revision 308590
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org> 23 * All rights reserved. 24 * 25 * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org> 26 */ 27 28#include <sys/zfs_context.h> 29#include <sys/param.h> 30#include <sys/kernel.h> 31#include <sys/bio.h> 32#include <sys/disk.h> 33#include <sys/spa.h> 34#include <sys/spa_impl.h> 35#include <sys/vdev_impl.h> 36#include <sys/fs/zfs.h> 37#include <sys/zio.h> 38#include <geom/geom.h> 39#include <geom/geom_int.h> 40 41/* 42 * Virtual device vector for GEOM. 43 */ 44 45static g_attrchanged_t vdev_geom_attrchanged; 46struct g_class zfs_vdev_class = { 47 .name = "ZFS::VDEV", 48 .version = G_VERSION, 49 .attrchanged = vdev_geom_attrchanged, 50}; 51 52DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev); 53 54SYSCTL_DECL(_vfs_zfs_vdev); 55/* Don't send BIO_FLUSH. */ 56static int vdev_geom_bio_flush_disable = 0; 57TUNABLE_INT("vfs.zfs.vdev.bio_flush_disable", &vdev_geom_bio_flush_disable); 58SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RW, 59 &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH"); 60/* Don't send BIO_DELETE. */ 61static int vdev_geom_bio_delete_disable = 0; 62TUNABLE_INT("vfs.zfs.vdev.bio_delete_disable", &vdev_geom_bio_delete_disable); 63SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RW, 64 &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE"); 65 66/* Declare local functions */ 67static void vdev_geom_detach(struct g_consumer *cp, boolean_t open_for_read); 68 69/* 70 * Thread local storage used to indicate when a thread is probing geoms 71 * for their guids. If NULL, this thread is not tasting geoms. If non NULL, 72 * it is looking for a replacement for the vdev_t* that is its value. 73 */ 74uint_t zfs_geom_probe_vdev_key; 75 76static void 77vdev_geom_set_rotation_rate(vdev_t *vd, struct g_consumer *cp) 78{ 79 int error; 80 uint16_t rate; 81 82 error = g_getattr("GEOM::rotation_rate", cp, &rate); 83 if (error == 0) 84 vd->vdev_rotation_rate = rate; 85 else 86 vd->vdev_rotation_rate = VDEV_RATE_UNKNOWN; 87} 88 89static void 90vdev_geom_set_physpath(struct g_consumer *cp, boolean_t do_null_update) 91{ 92 boolean_t needs_update = B_FALSE; 93 vdev_t *vd; 94 char *physpath; 95 int error, physpath_len; 96 97 if (g_access(cp, 1, 0, 0) != 0) 98 return; 99 100 vd = cp->private; 101 physpath_len = MAXPATHLEN; 102 physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO); 103 error = g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath); 104 g_access(cp, -1, 0, 0); 105 if (error == 0) { 106 char *old_physpath; 107 108 /* g_topology lock ensures that vdev has not been closed */ 109 g_topology_assert(); 110 old_physpath = vd->vdev_physpath; 111 vd->vdev_physpath = spa_strdup(physpath); 112 113 if (old_physpath != NULL) { 114 needs_update = (strcmp(old_physpath, 115 vd->vdev_physpath) != 0); 116 spa_strfree(old_physpath); 117 } else 118 needs_update = do_null_update; 119 } 120 g_free(physpath); 121 122 /* 123 * If the physical path changed, update the config. 124 * Only request an update for previously unset physpaths if 125 * requested by the caller. 126 */ 127 if (needs_update) 128 spa_async_request(vd->vdev_spa, SPA_ASYNC_CONFIG_UPDATE); 129 130} 131 132static void 133vdev_geom_attrchanged(struct g_consumer *cp, const char *attr) 134{ 135 vdev_t *vd; 136 char *old_physpath; 137 int error; 138 139 vd = cp->private; 140 if (vd == NULL) 141 return; 142 143 if (strcmp(attr, "GEOM::rotation_rate") == 0) { 144 vdev_geom_set_rotation_rate(vd, cp); 145 return; 146 } 147 148 if (strcmp(attr, "GEOM::physpath") == 0) { 149 vdev_geom_set_physpath(cp, /*do_null_update*/B_TRUE); 150 return; 151 } 152} 153 154static void 155vdev_geom_orphan(struct g_consumer *cp) 156{ 157 vdev_t *vd; 158 159 g_topology_assert(); 160 161 vd = cp->private; 162 if (vd == NULL) { 163 /* Vdev close in progress. Ignore the event. */ 164 return; 165 } 166 167 /* 168 * Orphan callbacks occur from the GEOM event thread. 169 * Concurrent with this call, new I/O requests may be 170 * working their way through GEOM about to find out 171 * (only once executed by the g_down thread) that we've 172 * been orphaned from our disk provider. These I/Os 173 * must be retired before we can detach our consumer. 174 * This is most easily achieved by acquiring the 175 * SPA ZIO configuration lock as a writer, but doing 176 * so with the GEOM topology lock held would cause 177 * a lock order reversal. Instead, rely on the SPA's 178 * async removal support to invoke a close on this 179 * vdev once it is safe to do so. 180 */ 181 vd->vdev_remove_wanted = B_TRUE; 182 spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE); 183} 184 185static struct g_consumer * 186vdev_geom_attach(struct g_provider *pp, vdev_t *vd) 187{ 188 struct g_geom *gp; 189 struct g_consumer *cp; 190 int error; 191 192 g_topology_assert(); 193 194 ZFS_LOG(1, "Attaching to %s.", pp->name); 195 196 if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize)) { 197 ZFS_LOG(1, "Failing attach of %s. Incompatible sectorsize %d\n", 198 pp->name, pp->sectorsize); 199 return (NULL); 200 } else if (pp->mediasize < SPA_MINDEVSIZE) { 201 ZFS_LOG(1, "Failing attach of %s. Incompatible mediasize %ju\n", 202 pp->name, pp->mediasize); 203 return (NULL); 204 } 205 206 /* Do we have geom already? No? Create one. */ 207 LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) { 208 if (gp->flags & G_GEOM_WITHER) 209 continue; 210 if (strcmp(gp->name, "zfs::vdev") != 0) 211 continue; 212 break; 213 } 214 if (gp == NULL) { 215 gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev"); 216 gp->orphan = vdev_geom_orphan; 217 gp->attrchanged = vdev_geom_attrchanged; 218 cp = g_new_consumer(gp); 219 error = g_attach(cp, pp); 220 if (error != 0) { 221 ZFS_LOG(1, "%s(%d): g_attach failed: %d\n", __func__, 222 __LINE__, error); 223 vdev_geom_detach(cp, B_FALSE); 224 return (NULL); 225 } 226 error = g_access(cp, 1, 0, 1); 227 if (error != 0) { 228 ZFS_LOG(1, "%s(%d): g_access failed: %d", __func__, 229 __LINE__, error); 230 vdev_geom_detach(cp, B_FALSE); 231 return (NULL); 232 } 233 ZFS_LOG(1, "Created geom and consumer for %s.", pp->name); 234 } else { 235 /* Check if we are already connected to this provider. */ 236 LIST_FOREACH(cp, &gp->consumer, consumer) { 237 if (cp->provider == pp) { 238 ZFS_LOG(1, "Found consumer for %s.", pp->name); 239 break; 240 } 241 } 242 if (cp == NULL) { 243 cp = g_new_consumer(gp); 244 error = g_attach(cp, pp); 245 if (error != 0) { 246 ZFS_LOG(1, "%s(%d): g_attach failed: %d\n", 247 __func__, __LINE__, error); 248 vdev_geom_detach(cp, B_FALSE); 249 return (NULL); 250 } 251 error = g_access(cp, 1, 0, 1); 252 if (error != 0) { 253 ZFS_LOG(1, "%s(%d): g_access failed: %d\n", 254 __func__, __LINE__, error); 255 vdev_geom_detach(cp, B_FALSE); 256 return (NULL); 257 } 258 ZFS_LOG(1, "Created consumer for %s.", pp->name); 259 } else { 260 error = g_access(cp, 1, 0, 1); 261 if (error != 0) { 262 ZFS_LOG(1, "%s(%d): g_access failed: %d\n", 263 __func__, __LINE__, error); 264 return (NULL); 265 } 266 ZFS_LOG(1, "Used existing consumer for %s.", pp->name); 267 } 268 } 269 270 /* 271 * BUG: cp may already belong to a vdev. This could happen if: 272 * 1) That vdev is a shared spare, or 273 * 2) We are trying to reopen a missing vdev and we are scanning by 274 * guid. In that case, we'll ultimately fail to open this consumer, 275 * but not until after setting the private field. 276 * The solution is to: 277 * 1) Don't set the private field until after the open succeeds, and 278 * 2) Set it to a linked list of vdevs, not just a single vdev 279 */ 280 cp->private = vd; 281 if (vd != NULL) { 282 vd->vdev_tsd = cp; 283 vdev_geom_set_physpath(cp, /*do_null_update*/B_FALSE); 284 } 285 286 cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; 287 return (cp); 288} 289 290static void 291vdev_geom_detach(struct g_consumer *cp, boolean_t open_for_read) 292{ 293 struct g_geom *gp; 294 vdev_t *vd; 295 296 g_topology_assert(); 297 298 ZFS_LOG(1, "Detaching from %s.", 299 cp->provider && cp->provider->name ? cp->provider->name : "NULL"); 300 301 vd = cp->private; 302 cp->private = NULL; 303 304 gp = cp->geom; 305 if (open_for_read) 306 g_access(cp, -1, 0, -1); 307 /* Destroy consumer on last close. */ 308 if (cp->acr == 0 && cp->ace == 0) { 309 if (cp->acw > 0) 310 g_access(cp, 0, -cp->acw, 0); 311 if (cp->provider != NULL) { 312 ZFS_LOG(1, "Destroying consumer for %s.", 313 cp->provider->name ? cp->provider->name : "NULL"); 314 g_detach(cp); 315 } 316 g_destroy_consumer(cp); 317 } 318 /* Destroy geom if there are no consumers left. */ 319 if (LIST_EMPTY(&gp->consumer)) { 320 ZFS_LOG(1, "Destroyed geom %s.", gp->name); 321 g_wither_geom(gp, ENXIO); 322 } 323} 324 325static void 326vdev_geom_close_locked(vdev_t *vd) 327{ 328 struct g_consumer *cp; 329 330 g_topology_assert(); 331 332 cp = vd->vdev_tsd; 333 vd->vdev_tsd = NULL; 334 vd->vdev_delayed_close = B_FALSE; 335 if (cp == NULL) 336 return; 337 338 ZFS_LOG(1, "Closing access to %s.", cp->provider->name); 339 340 vdev_geom_detach(cp, B_TRUE); 341} 342 343/* 344 * Issue one or more bios to the vdev in parallel 345 * cmds, datas, offsets, errors, and sizes are arrays of length ncmds. Each IO 346 * operation is described by parallel entries from each array. There may be 347 * more bios actually issued than entries in the array 348 */ 349static void 350vdev_geom_io(struct g_consumer *cp, int *cmds, void **datas, off_t *offsets, 351 off_t *sizes, int *errors, int ncmds) 352{ 353 struct bio **bios; 354 u_char *p; 355 off_t off, maxio, s, end; 356 int i, n_bios, j; 357 size_t bios_size; 358 359 maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize); 360 n_bios = 0; 361 362 /* How many bios are required for all commands ? */ 363 for (i = 0; i < ncmds; i++) 364 n_bios += (sizes[i] + maxio - 1) / maxio; 365 366 /* Allocate memory for the bios */ 367 bios_size = n_bios * sizeof(struct bio*); 368 bios = kmem_zalloc(bios_size, KM_SLEEP); 369 370 /* Prepare and issue all of the bios */ 371 for (i = j = 0; i < ncmds; i++) { 372 off = offsets[i]; 373 p = datas[i]; 374 s = sizes[i]; 375 end = off + s; 376 ASSERT((off % cp->provider->sectorsize) == 0); 377 ASSERT((s % cp->provider->sectorsize) == 0); 378 379 for (; off < end; off += maxio, p += maxio, s -= maxio, j++) { 380 bios[j] = g_alloc_bio(); 381 bios[j]->bio_cmd = cmds[i]; 382 bios[j]->bio_done = NULL; 383 bios[j]->bio_offset = off; 384 bios[j]->bio_length = MIN(s, maxio); 385 bios[j]->bio_data = p; 386 g_io_request(bios[j], cp); 387 } 388 } 389 ASSERT(j == n_bios); 390 391 /* Wait for all of the bios to complete, and clean them up */ 392 for (i = j = 0; i < ncmds; i++) { 393 off = offsets[i]; 394 s = sizes[i]; 395 end = off + s; 396 397 for (; off < end; off += maxio, s -= maxio, j++) { 398 errors[i] = biowait(bios[j], "vdev_geom_io") || errors[i]; 399 g_destroy_bio(bios[j]); 400 } 401 } 402 kmem_free(bios, bios_size); 403} 404 405static int 406vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config) 407{ 408 struct g_provider *pp; 409 vdev_phys_t *vdev_lists[VDEV_LABELS]; 410 char *p, *buf; 411 size_t buflen; 412 uint64_t psize, state, txg; 413 off_t offsets[VDEV_LABELS]; 414 off_t size; 415 off_t sizes[VDEV_LABELS]; 416 int cmds[VDEV_LABELS]; 417 int errors[VDEV_LABELS]; 418 int l, len; 419 420 g_topology_assert_not(); 421 422 pp = cp->provider; 423 ZFS_LOG(1, "Reading config from %s...", pp->name); 424 425 psize = pp->mediasize; 426 psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t)); 427 428 size = sizeof(*vdev_lists[0]) + pp->sectorsize - 429 ((sizeof(*vdev_lists[0]) - 1) % pp->sectorsize) - 1; 430 431 buflen = sizeof(vdev_lists[0]->vp_nvlist); 432 433 *config = NULL; 434 /* Create all of the IO requests */ 435 for (l = 0; l < VDEV_LABELS; l++) { 436 cmds[l] = BIO_READ; 437 vdev_lists[l] = kmem_alloc(size, KM_SLEEP); 438 offsets[l] = vdev_label_offset(psize, l, 0) + VDEV_SKIP_SIZE; 439 sizes[l] = size; 440 errors[l] = 0; 441 ASSERT(offsets[l] % pp->sectorsize == 0); 442 } 443 444 /* Issue the IO requests */ 445 vdev_geom_io(cp, cmds, (void**)vdev_lists, offsets, sizes, errors, 446 VDEV_LABELS); 447 448 /* Parse the labels */ 449 for (l = 0; l < VDEV_LABELS; l++) { 450 if (errors[l] != 0) 451 continue; 452 453 buf = vdev_lists[l]->vp_nvlist; 454 455 if (nvlist_unpack(buf, buflen, config, 0) != 0) 456 continue; 457 458 if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE, 459 &state) != 0 || state > POOL_STATE_L2CACHE) { 460 nvlist_free(*config); 461 *config = NULL; 462 continue; 463 } 464 465 if (state != POOL_STATE_SPARE && 466 state != POOL_STATE_L2CACHE && 467 (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG, 468 &txg) != 0 || txg == 0)) { 469 nvlist_free(*config); 470 *config = NULL; 471 continue; 472 } 473 474 break; 475 } 476 477 /* Free the label storage */ 478 for (l = 0; l < VDEV_LABELS; l++) 479 kmem_free(vdev_lists[l], size); 480 481 return (*config == NULL ? ENOENT : 0); 482} 483 484static void 485resize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id) 486{ 487 nvlist_t **new_configs; 488 uint64_t i; 489 490 if (id < *count) 491 return; 492 new_configs = kmem_zalloc((id + 1) * sizeof(nvlist_t *), 493 KM_SLEEP); 494 for (i = 0; i < *count; i++) 495 new_configs[i] = (*configs)[i]; 496 if (*configs != NULL) 497 kmem_free(*configs, *count * sizeof(void *)); 498 *configs = new_configs; 499 *count = id + 1; 500} 501 502static void 503process_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg, 504 const char *name, uint64_t* known_pool_guid) 505{ 506 nvlist_t *vdev_tree; 507 uint64_t pool_guid; 508 uint64_t vdev_guid, known_guid; 509 uint64_t id, txg, known_txg; 510 char *pname; 511 int i; 512 513 if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 || 514 strcmp(pname, name) != 0) 515 goto ignore; 516 517 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0) 518 goto ignore; 519 520 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0) 521 goto ignore; 522 523 if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) 524 goto ignore; 525 526 if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0) 527 goto ignore; 528 529 VERIFY(nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); 530 531 if (*known_pool_guid != 0) { 532 if (pool_guid != *known_pool_guid) 533 goto ignore; 534 } else 535 *known_pool_guid = pool_guid; 536 537 resize_configs(configs, count, id); 538 539 if ((*configs)[id] != NULL) { 540 VERIFY(nvlist_lookup_uint64((*configs)[id], 541 ZPOOL_CONFIG_POOL_TXG, &known_txg) == 0); 542 if (txg <= known_txg) 543 goto ignore; 544 nvlist_free((*configs)[id]); 545 } 546 547 (*configs)[id] = cfg; 548 return; 549 550ignore: 551 nvlist_free(cfg); 552} 553 554int 555vdev_geom_read_pool_label(const char *name, 556 nvlist_t ***configs, uint64_t *count) 557{ 558 struct g_class *mp; 559 struct g_geom *gp; 560 struct g_provider *pp; 561 struct g_consumer *zcp; 562 nvlist_t *vdev_cfg; 563 uint64_t pool_guid; 564 int error; 565 566 DROP_GIANT(); 567 g_topology_lock(); 568 569 *configs = NULL; 570 *count = 0; 571 pool_guid = 0; 572 LIST_FOREACH(mp, &g_classes, class) { 573 if (mp == &zfs_vdev_class) 574 continue; 575 LIST_FOREACH(gp, &mp->geom, geom) { 576 if (gp->flags & G_GEOM_WITHER) 577 continue; 578 LIST_FOREACH(pp, &gp->provider, provider) { 579 if (pp->flags & G_PF_WITHER) 580 continue; 581 zcp = vdev_geom_attach(pp, NULL); 582 if (zcp == NULL) 583 continue; 584 g_topology_unlock(); 585 error = vdev_geom_read_config(zcp, &vdev_cfg); 586 g_topology_lock(); 587 vdev_geom_detach(zcp, B_TRUE); 588 if (error) 589 continue; 590 ZFS_LOG(1, "successfully read vdev config"); 591 592 process_vdev_config(configs, count, 593 vdev_cfg, name, &pool_guid); 594 } 595 } 596 } 597 g_topology_unlock(); 598 PICKUP_GIANT(); 599 600 return (*count > 0 ? 0 : ENOENT); 601} 602 603enum match { 604 NO_MATCH, 605 TOP_MATCH, 606 FULL_MATCH 607}; 608 609static enum match 610vdev_attach_ok(vdev_t *vd, struct g_provider *pp) 611{ 612 nvlist_t *config; 613 uint64_t pool_guid, top_guid, vdev_guid; 614 struct g_consumer *cp; 615 616 cp = vdev_geom_attach(pp, NULL); 617 if (cp == NULL) { 618 ZFS_LOG(1, "Unable to attach tasting instance to %s.", 619 pp->name); 620 return (NO_MATCH); 621 } 622 g_topology_unlock(); 623 if (vdev_geom_read_config(cp, &config) != 0) { 624 g_topology_lock(); 625 vdev_geom_detach(cp, B_TRUE); 626 ZFS_LOG(1, "Unable to read config from %s.", pp->name); 627 return (NO_MATCH); 628 } 629 g_topology_lock(); 630 vdev_geom_detach(cp, B_TRUE); 631 632 pool_guid = 0; 633 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid); 634 top_guid = 0; 635 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_TOP_GUID, &top_guid); 636 vdev_guid = 0; 637 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid); 638 nvlist_free(config); 639 640 /* 641 * Check that the label's pool guid matches the desired guid. 642 * Inactive spares and L2ARCs do not have any pool guid in the label. 643 */ 644 if (pool_guid != 0 && pool_guid != spa_guid(vd->vdev_spa)) { 645 ZFS_LOG(1, "pool guid mismatch for provider %s: %ju != %ju.", 646 pp->name, 647 (uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)pool_guid); 648 return (NO_MATCH); 649 } 650 651 /* 652 * Check that the label's vdev guid matches the desired guid. 653 * The second condition handles possible race on vdev detach, when 654 * remaining vdev receives GUID of destroyed top level mirror vdev. 655 */ 656 if (vdev_guid == vd->vdev_guid) { 657 ZFS_LOG(1, "guids match for provider %s.", pp->name); 658 return (FULL_MATCH); 659 } else if (top_guid == vd->vdev_guid && vd == vd->vdev_top) { 660 ZFS_LOG(1, "top vdev guid match for provider %s.", pp->name); 661 return (TOP_MATCH); 662 } 663 ZFS_LOG(1, "vdev guid mismatch for provider %s: %ju != %ju.", 664 pp->name, (uintmax_t)vd->vdev_guid, (uintmax_t)vdev_guid); 665 return (NO_MATCH); 666} 667 668static struct g_consumer * 669vdev_geom_attach_by_guids(vdev_t *vd) 670{ 671 struct g_class *mp; 672 struct g_geom *gp; 673 struct g_provider *pp; 674 struct g_consumer *cp; 675 enum match m; 676 677 g_topology_assert(); 678 679 cp = NULL; 680 LIST_FOREACH(mp, &g_classes, class) { 681 if (mp == &zfs_vdev_class) 682 continue; 683 LIST_FOREACH(gp, &mp->geom, geom) { 684 if (gp->flags & G_GEOM_WITHER) 685 continue; 686 LIST_FOREACH(pp, &gp->provider, provider) { 687 m = vdev_attach_ok(vd, pp); 688 if (m == NO_MATCH) 689 continue; 690 if (cp != NULL) { 691 if (m == FULL_MATCH) 692 vdev_geom_detach(cp, B_TRUE); 693 else 694 continue; 695 } 696 cp = vdev_geom_attach(pp, vd); 697 if (cp == NULL) { 698 printf("ZFS WARNING: Unable to " 699 "attach to %s.\n", pp->name); 700 continue; 701 } 702 if (m == FULL_MATCH) 703 return (cp); 704 } 705 } 706 } 707 return (cp); 708} 709 710static struct g_consumer * 711vdev_geom_open_by_guids(vdev_t *vd) 712{ 713 struct g_consumer *cp; 714 char *buf; 715 size_t len; 716 717 g_topology_assert(); 718 719 ZFS_LOG(1, "Searching by guids [%ju:%ju].", 720 (uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)vd->vdev_guid); 721 cp = vdev_geom_attach_by_guids(vd); 722 if (cp != NULL) { 723 len = strlen(cp->provider->name) + strlen("/dev/") + 1; 724 buf = kmem_alloc(len, KM_SLEEP); 725 726 snprintf(buf, len, "/dev/%s", cp->provider->name); 727 spa_strfree(vd->vdev_path); 728 vd->vdev_path = buf; 729 730 ZFS_LOG(1, "Attach by guid [%ju:%ju] succeeded, provider %s.", 731 (uintmax_t)spa_guid(vd->vdev_spa), 732 (uintmax_t)vd->vdev_guid, vd->vdev_path); 733 } else { 734 ZFS_LOG(1, "Search by guid [%ju:%ju] failed.", 735 (uintmax_t)spa_guid(vd->vdev_spa), 736 (uintmax_t)vd->vdev_guid); 737 } 738 739 return (cp); 740} 741 742static struct g_consumer * 743vdev_geom_open_by_path(vdev_t *vd, int check_guid) 744{ 745 struct g_provider *pp; 746 struct g_consumer *cp; 747 748 g_topology_assert(); 749 750 cp = NULL; 751 pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1); 752 if (pp != NULL) { 753 ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path); 754 if (!check_guid || vdev_attach_ok(vd, pp) == FULL_MATCH) 755 cp = vdev_geom_attach(pp, vd); 756 } 757 758 return (cp); 759} 760 761static int 762vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, 763 uint64_t *logical_ashift, uint64_t *physical_ashift) 764{ 765 struct g_provider *pp; 766 struct g_consumer *cp; 767 size_t bufsize; 768 int error; 769 770 /* Set the TLS to indicate downstack that we should not access zvols*/ 771 VERIFY(tsd_set(zfs_geom_probe_vdev_key, vd) == 0); 772 773 /* 774 * We must have a pathname, and it must be absolute. 775 */ 776 if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { 777 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; 778 return (EINVAL); 779 } 780 781 vd->vdev_tsd = NULL; 782 783 DROP_GIANT(); 784 g_topology_lock(); 785 error = 0; 786 787 if (vd->vdev_spa->spa_splitting_newspa || 788 (vd->vdev_prevstate == VDEV_STATE_UNKNOWN && 789 vd->vdev_spa->spa_load_state == SPA_LOAD_NONE || 790 vd->vdev_spa->spa_load_state == SPA_LOAD_CREATE)) { 791 /* 792 * We are dealing with a vdev that hasn't been previously 793 * opened (since boot), and we are not loading an 794 * existing pool configuration. This looks like a 795 * vdev add operation to a new or existing pool. 796 * Assume the user knows what he/she is doing and find 797 * GEOM provider by its name, ignoring GUID mismatches. 798 * 799 * XXPOLICY: It would be safer to only allow a device 800 * that is unlabeled or labeled but missing 801 * GUID information to be opened in this fashion, 802 * unless we are doing a split, in which case we 803 * should allow any guid. 804 */ 805 cp = vdev_geom_open_by_path(vd, 0); 806 } else { 807 /* 808 * Try using the recorded path for this device, but only 809 * accept it if its label data contains the expected GUIDs. 810 */ 811 cp = vdev_geom_open_by_path(vd, 1); 812 if (cp == NULL) { 813 /* 814 * The device at vd->vdev_path doesn't have the 815 * expected GUIDs. The disks might have merely 816 * moved around so try all other GEOM providers 817 * to find one with the right GUIDs. 818 */ 819 cp = vdev_geom_open_by_guids(vd); 820 } 821 } 822 823 /* Clear the TLS now that tasting is done */ 824 VERIFY(tsd_set(zfs_geom_probe_vdev_key, NULL) == 0); 825 826 if (cp == NULL) { 827 ZFS_LOG(1, "Provider %s not found.", vd->vdev_path); 828 error = ENOENT; 829 } else if (cp->provider->sectorsize > VDEV_PAD_SIZE || 830 !ISP2(cp->provider->sectorsize)) { 831 ZFS_LOG(1, "Provider %s has unsupported sectorsize.", 832 vd->vdev_path); 833 834 vdev_geom_close_locked(vd); 835 error = EINVAL; 836 cp = NULL; 837 } else if (cp->acw == 0 && (spa_mode(vd->vdev_spa) & FWRITE) != 0) { 838 int i; 839 840 for (i = 0; i < 5; i++) { 841 error = g_access(cp, 0, 1, 0); 842 if (error == 0) 843 break; 844 g_topology_unlock(); 845 tsleep(vd, 0, "vdev", hz / 2); 846 g_topology_lock(); 847 } 848 if (error != 0) { 849 printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n", 850 vd->vdev_path, error); 851 vdev_geom_close_locked(vd); 852 cp = NULL; 853 } 854 } 855 856 /* Fetch initial physical path information for this device. */ 857 if (cp != NULL) 858 vdev_geom_attrchanged(cp, "GEOM::physpath"); 859 860 g_topology_unlock(); 861 PICKUP_GIANT(); 862 if (cp == NULL) { 863 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 864 return (error); 865 } 866 pp = cp->provider; 867 868 /* 869 * Determine the actual size of the device. 870 */ 871 *max_psize = *psize = pp->mediasize; 872 873 /* 874 * Determine the device's minimum transfer size and preferred 875 * transfer size. 876 */ 877 *logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1; 878 *physical_ashift = 0; 879 if (pp->stripesize > (1 << *logical_ashift) && ISP2(pp->stripesize) && 880 pp->stripesize <= (1 << SPA_MAXASHIFT) && pp->stripeoffset == 0) 881 *physical_ashift = highbit(pp->stripesize) - 1; 882 883 /* 884 * Clear the nowritecache settings, so that on a vdev_reopen() 885 * we will try again. 886 */ 887 vd->vdev_nowritecache = B_FALSE; 888 889 /* 890 * Determine the device's rotation rate. 891 */ 892 vdev_geom_set_rotation_rate(vd, cp); 893 894 return (0); 895} 896 897static void 898vdev_geom_close(vdev_t *vd) 899{ 900 901 DROP_GIANT(); 902 g_topology_lock(); 903 vdev_geom_close_locked(vd); 904 g_topology_unlock(); 905 PICKUP_GIANT(); 906} 907 908static void 909vdev_geom_io_intr(struct bio *bp) 910{ 911 vdev_t *vd; 912 zio_t *zio; 913 914 zio = bp->bio_caller1; 915 vd = zio->io_vd; 916 zio->io_error = bp->bio_error; 917 if (zio->io_error == 0 && bp->bio_resid != 0) 918 zio->io_error = SET_ERROR(EIO); 919 920 switch(zio->io_error) { 921 case ENOTSUP: 922 /* 923 * If we get ENOTSUP for BIO_FLUSH or BIO_DELETE we know 924 * that future attempts will never succeed. In this case 925 * we set a persistent flag so that we don't bother with 926 * requests in the future. 927 */ 928 switch(bp->bio_cmd) { 929 case BIO_FLUSH: 930 vd->vdev_nowritecache = B_TRUE; 931 break; 932 case BIO_DELETE: 933 vd->vdev_notrim = B_TRUE; 934 break; 935 } 936 break; 937 case ENXIO: 938 if (!vd->vdev_remove_wanted) { 939 /* 940 * If provider's error is set we assume it is being 941 * removed. 942 */ 943 if (bp->bio_to->error != 0) { 944 vd->vdev_remove_wanted = B_TRUE; 945 spa_async_request(zio->io_spa, 946 SPA_ASYNC_REMOVE); 947 } else if (!vd->vdev_delayed_close) { 948 vd->vdev_delayed_close = B_TRUE; 949 } 950 } 951 break; 952 } 953 g_destroy_bio(bp); 954 zio_delay_interrupt(zio); 955} 956 957static void 958vdev_geom_io_start(zio_t *zio) 959{ 960 vdev_t *vd; 961 struct g_consumer *cp; 962 struct bio *bp; 963 int error; 964 965 vd = zio->io_vd; 966 967 switch (zio->io_type) { 968 case ZIO_TYPE_IOCTL: 969 /* XXPOLICY */ 970 if (!vdev_readable(vd)) { 971 zio->io_error = SET_ERROR(ENXIO); 972 zio_interrupt(zio); 973 return; 974 } else { 975 switch (zio->io_cmd) { 976 case DKIOCFLUSHWRITECACHE: 977 if (zfs_nocacheflush || vdev_geom_bio_flush_disable) 978 break; 979 if (vd->vdev_nowritecache) { 980 zio->io_error = SET_ERROR(ENOTSUP); 981 break; 982 } 983 goto sendreq; 984 default: 985 zio->io_error = SET_ERROR(ENOTSUP); 986 } 987 } 988 989 zio_execute(zio); 990 return; 991 case ZIO_TYPE_FREE: 992 if (vd->vdev_notrim) { 993 zio->io_error = SET_ERROR(ENOTSUP); 994 } else if (!vdev_geom_bio_delete_disable) { 995 goto sendreq; 996 } 997 zio_execute(zio); 998 return; 999 } 1000sendreq: 1001 ASSERT(zio->io_type == ZIO_TYPE_READ || 1002 zio->io_type == ZIO_TYPE_WRITE || 1003 zio->io_type == ZIO_TYPE_FREE || 1004 zio->io_type == ZIO_TYPE_IOCTL); 1005 1006 cp = vd->vdev_tsd; 1007 if (cp == NULL) { 1008 zio->io_error = SET_ERROR(ENXIO); 1009 zio_interrupt(zio); 1010 return; 1011 } 1012 bp = g_alloc_bio(); 1013 bp->bio_caller1 = zio; 1014 switch (zio->io_type) { 1015 case ZIO_TYPE_READ: 1016 case ZIO_TYPE_WRITE: 1017 zio->io_target_timestamp = zio_handle_io_delay(zio); 1018 bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE; 1019 bp->bio_data = zio->io_data; 1020 bp->bio_offset = zio->io_offset; 1021 bp->bio_length = zio->io_size; 1022 break; 1023 case ZIO_TYPE_FREE: 1024 bp->bio_cmd = BIO_DELETE; 1025 bp->bio_data = NULL; 1026 bp->bio_offset = zio->io_offset; 1027 bp->bio_length = zio->io_size; 1028 break; 1029 case ZIO_TYPE_IOCTL: 1030 bp->bio_cmd = BIO_FLUSH; 1031 bp->bio_flags |= BIO_ORDERED; 1032 bp->bio_data = NULL; 1033 bp->bio_offset = cp->provider->mediasize; 1034 bp->bio_length = 0; 1035 break; 1036 } 1037 bp->bio_done = vdev_geom_io_intr; 1038 1039 g_io_request(bp, cp); 1040} 1041 1042static void 1043vdev_geom_io_done(zio_t *zio) 1044{ 1045} 1046 1047static void 1048vdev_geom_hold(vdev_t *vd) 1049{ 1050} 1051 1052static void 1053vdev_geom_rele(vdev_t *vd) 1054{ 1055} 1056 1057vdev_ops_t vdev_geom_ops = { 1058 vdev_geom_open, 1059 vdev_geom_close, 1060 vdev_default_asize, 1061 vdev_geom_io_start, 1062 vdev_geom_io_done, 1063 NULL, 1064 vdev_geom_hold, 1065 vdev_geom_rele, 1066 VDEV_TYPE_DISK, /* name of this vdev type */ 1067 B_TRUE /* leaf vdev */ 1068}; 1069