vdev_geom.c revision 308588
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org> 23 * All rights reserved. 24 * 25 * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org> 26 */ 27 28#include <sys/zfs_context.h> 29#include <sys/param.h> 30#include <sys/kernel.h> 31#include <sys/bio.h> 32#include <sys/disk.h> 33#include <sys/spa.h> 34#include <sys/spa_impl.h> 35#include <sys/vdev_impl.h> 36#include <sys/fs/zfs.h> 37#include <sys/zio.h> 38#include <geom/geom.h> 39#include <geom/geom_int.h> 40 41/* 42 * Virtual device vector for GEOM. 43 */ 44 45static g_attrchanged_t vdev_geom_attrchanged; 46struct g_class zfs_vdev_class = { 47 .name = "ZFS::VDEV", 48 .version = G_VERSION, 49 .attrchanged = vdev_geom_attrchanged, 50}; 51 52DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev); 53 54SYSCTL_DECL(_vfs_zfs_vdev); 55/* Don't send BIO_FLUSH. */ 56static int vdev_geom_bio_flush_disable = 0; 57TUNABLE_INT("vfs.zfs.vdev.bio_flush_disable", &vdev_geom_bio_flush_disable); 58SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RW, 59 &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH"); 60/* Don't send BIO_DELETE. */ 61static int vdev_geom_bio_delete_disable = 0; 62TUNABLE_INT("vfs.zfs.vdev.bio_delete_disable", &vdev_geom_bio_delete_disable); 63SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RW, 64 &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE"); 65 66/* Declare local functions */ 67static void vdev_geom_detach(struct g_consumer *cp, boolean_t open_for_read); 68 69/* 70 * Thread local storage used to indicate when a thread is probing geoms 71 * for their guids. If NULL, this thread is not tasting geoms. If non NULL, 72 * it is looking for a replacement for the vdev_t* that is its value. 73 */ 74uint_t zfs_geom_probe_vdev_key; 75 76static void 77vdev_geom_set_rotation_rate(vdev_t *vd, struct g_consumer *cp) 78{ 79 int error; 80 uint16_t rate; 81 82 error = g_getattr("GEOM::rotation_rate", cp, &rate); 83 if (error == 0) 84 vd->vdev_rotation_rate = rate; 85 else 86 vd->vdev_rotation_rate = VDEV_RATE_UNKNOWN; 87} 88 89static void 90vdev_geom_set_physpath(struct g_consumer *cp, boolean_t do_null_update) 91{ 92 boolean_t needs_update = B_FALSE; 93 vdev_t *vd; 94 char *physpath; 95 int error, physpath_len; 96 97 if (g_access(cp, 1, 0, 0) != 0) 98 return; 99 100 vd = cp->private; 101 physpath_len = MAXPATHLEN; 102 physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO); 103 error = g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath); 104 g_access(cp, -1, 0, 0); 105 if (error == 0) { 106 char *old_physpath; 107 108 /* g_topology lock ensures that vdev has not been closed */ 109 g_topology_assert(); 110 old_physpath = vd->vdev_physpath; 111 vd->vdev_physpath = spa_strdup(physpath); 112 113 if (old_physpath != NULL) { 114 needs_update = (strcmp(old_physpath, 115 vd->vdev_physpath) != 0); 116 spa_strfree(old_physpath); 117 } else 118 needs_update = do_null_update; 119 } 120 g_free(physpath); 121 122 /* 123 * If the physical path changed, update the config. 124 * Only request an update for previously unset physpaths if 125 * requested by the caller. 126 */ 127 if (needs_update) 128 spa_async_request(vd->vdev_spa, SPA_ASYNC_CONFIG_UPDATE); 129 130} 131 132static void 133vdev_geom_attrchanged(struct g_consumer *cp, const char *attr) 134{ 135 vdev_t *vd; 136 char *old_physpath; 137 int error; 138 139 vd = cp->private; 140 if (vd == NULL) 141 return; 142 143 if (strcmp(attr, "GEOM::rotation_rate") == 0) { 144 vdev_geom_set_rotation_rate(vd, cp); 145 return; 146 } 147 148 if (strcmp(attr, "GEOM::physpath") == 0) { 149 vdev_geom_set_physpath(cp, /*do_null_update*/B_TRUE); 150 return; 151 } 152} 153 154static void 155vdev_geom_orphan(struct g_consumer *cp) 156{ 157 vdev_t *vd; 158 159 g_topology_assert(); 160 161 vd = cp->private; 162 if (vd == NULL) { 163 /* Vdev close in progress. Ignore the event. */ 164 return; 165 } 166 167 /* 168 * Orphan callbacks occur from the GEOM event thread. 169 * Concurrent with this call, new I/O requests may be 170 * working their way through GEOM about to find out 171 * (only once executed by the g_down thread) that we've 172 * been orphaned from our disk provider. These I/Os 173 * must be retired before we can detach our consumer. 174 * This is most easily achieved by acquiring the 175 * SPA ZIO configuration lock as a writer, but doing 176 * so with the GEOM topology lock held would cause 177 * a lock order reversal. Instead, rely on the SPA's 178 * async removal support to invoke a close on this 179 * vdev once it is safe to do so. 180 */ 181 vd->vdev_remove_wanted = B_TRUE; 182 spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE); 183} 184 185static struct g_consumer * 186vdev_geom_attach(struct g_provider *pp, vdev_t *vd) 187{ 188 struct g_geom *gp; 189 struct g_consumer *cp; 190 int error; 191 192 g_topology_assert(); 193 194 ZFS_LOG(1, "Attaching to %s.", pp->name); 195 196 if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize)) { 197 ZFS_LOG(1, "Failing attach of %s. Incompatible sectorsize %d\n", 198 pp->name, pp->sectorsize); 199 return (NULL); 200 } else if (pp->mediasize < SPA_MINDEVSIZE) { 201 ZFS_LOG(1, "Failing attach of %s. Incompatible mediasize %ju\n", 202 pp->name, pp->mediasize); 203 return (NULL); 204 } 205 206 /* Do we have geom already? No? Create one. */ 207 LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) { 208 if (gp->flags & G_GEOM_WITHER) 209 continue; 210 if (strcmp(gp->name, "zfs::vdev") != 0) 211 continue; 212 break; 213 } 214 if (gp == NULL) { 215 gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev"); 216 gp->orphan = vdev_geom_orphan; 217 gp->attrchanged = vdev_geom_attrchanged; 218 cp = g_new_consumer(gp); 219 error = g_attach(cp, pp); 220 if (error != 0) { 221 ZFS_LOG(1, "%s(%d): g_attach failed: %d\n", __func__, 222 __LINE__, error); 223 vdev_geom_detach(cp, B_FALSE); 224 return (NULL); 225 } 226 error = g_access(cp, 1, 0, 1); 227 if (error != 0) { 228 ZFS_LOG(1, "%s(%d): g_access failed: %d", __func__, 229 __LINE__, error); 230 vdev_geom_detach(cp, B_FALSE); 231 return (NULL); 232 } 233 ZFS_LOG(1, "Created geom and consumer for %s.", pp->name); 234 } else { 235 /* Check if we are already connected to this provider. */ 236 LIST_FOREACH(cp, &gp->consumer, consumer) { 237 if (cp->provider == pp) { 238 ZFS_LOG(1, "Found consumer for %s.", pp->name); 239 break; 240 } 241 } 242 if (cp == NULL) { 243 cp = g_new_consumer(gp); 244 error = g_attach(cp, pp); 245 if (error != 0) { 246 ZFS_LOG(1, "%s(%d): g_attach failed: %d\n", 247 __func__, __LINE__, error); 248 vdev_geom_detach(cp, B_FALSE); 249 return (NULL); 250 } 251 error = g_access(cp, 1, 0, 1); 252 if (error != 0) { 253 ZFS_LOG(1, "%s(%d): g_access failed: %d\n", 254 __func__, __LINE__, error); 255 vdev_geom_detach(cp, B_FALSE); 256 return (NULL); 257 } 258 ZFS_LOG(1, "Created consumer for %s.", pp->name); 259 } else { 260 error = g_access(cp, 1, 0, 1); 261 if (error != 0) { 262 ZFS_LOG(1, "%s(%d): g_access failed: %d\n", 263 __func__, __LINE__, error); 264 return (NULL); 265 } 266 ZFS_LOG(1, "Used existing consumer for %s.", pp->name); 267 } 268 } 269 270 /* 271 * BUG: cp may already belong to a vdev. This could happen if: 272 * 1) That vdev is a shared spare, or 273 * 2) We are trying to reopen a missing vdev and we are scanning by 274 * guid. In that case, we'll ultimately fail to open this consumer, 275 * but not until after setting the private field. 276 * The solution is to: 277 * 1) Don't set the private field until after the open succeeds, and 278 * 2) Set it to a linked list of vdevs, not just a single vdev 279 */ 280 cp->private = vd; 281 if (vd != NULL) { 282 vd->vdev_tsd = cp; 283 vdev_geom_set_physpath(cp, /*do_null_update*/B_FALSE); 284 } 285 286 cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; 287 return (cp); 288} 289 290static void 291vdev_geom_detach(struct g_consumer *cp, boolean_t open_for_read) 292{ 293 struct g_geom *gp; 294 vdev_t *vd; 295 296 g_topology_assert(); 297 298 ZFS_LOG(1, "Detaching from %s.", 299 cp->provider && cp->provider->name ? cp->provider->name : "NULL"); 300 301 vd = cp->private; 302 cp->private = NULL; 303 304 gp = cp->geom; 305 if (open_for_read) 306 g_access(cp, -1, 0, -1); 307 /* Destroy consumer on last close. */ 308 if (cp->acr == 0 && cp->ace == 0) { 309 if (cp->acw > 0) 310 g_access(cp, 0, -cp->acw, 0); 311 if (cp->provider != NULL) { 312 ZFS_LOG(1, "Destroying consumer for %s.", 313 cp->provider->name ? cp->provider->name : "NULL"); 314 g_detach(cp); 315 } 316 g_destroy_consumer(cp); 317 } 318 /* Destroy geom if there are no consumers left. */ 319 if (LIST_EMPTY(&gp->consumer)) { 320 ZFS_LOG(1, "Destroyed geom %s.", gp->name); 321 g_wither_geom(gp, ENXIO); 322 } 323} 324 325static void 326vdev_geom_close_locked(vdev_t *vd) 327{ 328 struct g_consumer *cp; 329 330 g_topology_assert(); 331 332 cp = vd->vdev_tsd; 333 vd->vdev_tsd = NULL; 334 vd->vdev_delayed_close = B_FALSE; 335 if (cp == NULL) 336 return; 337 338 ZFS_LOG(1, "Closing access to %s.", cp->provider->name); 339 340 vdev_geom_detach(cp, B_TRUE); 341} 342 343static void 344nvlist_get_guids(nvlist_t *list, uint64_t *pguid, uint64_t *vguid) 345{ 346 347 (void) nvlist_lookup_uint64(list, ZPOOL_CONFIG_GUID, vguid); 348 (void) nvlist_lookup_uint64(list, ZPOOL_CONFIG_POOL_GUID, pguid); 349} 350 351/* 352 * Issue one or more bios to the vdev in parallel 353 * cmds, datas, offsets, errors, and sizes are arrays of length ncmds. Each IO 354 * operation is described by parallel entries from each array. There may be 355 * more bios actually issued than entries in the array 356 */ 357static void 358vdev_geom_io(struct g_consumer *cp, int *cmds, void **datas, off_t *offsets, 359 off_t *sizes, int *errors, int ncmds) 360{ 361 struct bio **bios; 362 u_char *p; 363 off_t off, maxio, s, end; 364 int i, n_bios, j; 365 size_t bios_size; 366 367 maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize); 368 n_bios = 0; 369 370 /* How many bios are required for all commands ? */ 371 for (i = 0; i < ncmds; i++) 372 n_bios += (sizes[i] + maxio - 1) / maxio; 373 374 /* Allocate memory for the bios */ 375 bios_size = n_bios * sizeof(struct bio*); 376 bios = kmem_zalloc(bios_size, KM_SLEEP); 377 378 /* Prepare and issue all of the bios */ 379 for (i = j = 0; i < ncmds; i++) { 380 off = offsets[i]; 381 p = datas[i]; 382 s = sizes[i]; 383 end = off + s; 384 ASSERT((off % cp->provider->sectorsize) == 0); 385 ASSERT((s % cp->provider->sectorsize) == 0); 386 387 for (; off < end; off += maxio, p += maxio, s -= maxio, j++) { 388 bios[j] = g_alloc_bio(); 389 bios[j]->bio_cmd = cmds[i]; 390 bios[j]->bio_done = NULL; 391 bios[j]->bio_offset = off; 392 bios[j]->bio_length = MIN(s, maxio); 393 bios[j]->bio_data = p; 394 g_io_request(bios[j], cp); 395 } 396 } 397 ASSERT(j == n_bios); 398 399 /* Wait for all of the bios to complete, and clean them up */ 400 for (i = j = 0; i < ncmds; i++) { 401 off = offsets[i]; 402 s = sizes[i]; 403 end = off + s; 404 405 for (; off < end; off += maxio, s -= maxio, j++) { 406 errors[i] = biowait(bios[j], "vdev_geom_io") || errors[i]; 407 g_destroy_bio(bios[j]); 408 } 409 } 410 kmem_free(bios, bios_size); 411} 412 413static int 414vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config) 415{ 416 struct g_provider *pp; 417 vdev_phys_t *vdev_lists[VDEV_LABELS]; 418 char *p, *buf; 419 size_t buflen; 420 uint64_t psize, state, txg; 421 off_t offsets[VDEV_LABELS]; 422 off_t size; 423 off_t sizes[VDEV_LABELS]; 424 int cmds[VDEV_LABELS]; 425 int errors[VDEV_LABELS]; 426 int l, len; 427 428 g_topology_assert_not(); 429 430 pp = cp->provider; 431 ZFS_LOG(1, "Reading config from %s...", pp->name); 432 433 psize = pp->mediasize; 434 psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t)); 435 436 size = sizeof(*vdev_lists[0]) + pp->sectorsize - 437 ((sizeof(*vdev_lists[0]) - 1) % pp->sectorsize) - 1; 438 439 buflen = sizeof(vdev_lists[0]->vp_nvlist); 440 441 *config = NULL; 442 /* Create all of the IO requests */ 443 for (l = 0; l < VDEV_LABELS; l++) { 444 cmds[l] = BIO_READ; 445 vdev_lists[l] = kmem_alloc(size, KM_SLEEP); 446 offsets[l] = vdev_label_offset(psize, l, 0) + VDEV_SKIP_SIZE; 447 sizes[l] = size; 448 errors[l] = 0; 449 ASSERT(offsets[l] % pp->sectorsize == 0); 450 } 451 452 /* Issue the IO requests */ 453 vdev_geom_io(cp, cmds, (void**)vdev_lists, offsets, sizes, errors, 454 VDEV_LABELS); 455 456 /* Parse the labels */ 457 for (l = 0; l < VDEV_LABELS; l++) { 458 if (errors[l] != 0) 459 continue; 460 461 buf = vdev_lists[l]->vp_nvlist; 462 463 if (nvlist_unpack(buf, buflen, config, 0) != 0) 464 continue; 465 466 if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE, 467 &state) != 0 || state > POOL_STATE_L2CACHE) { 468 nvlist_free(*config); 469 *config = NULL; 470 continue; 471 } 472 473 if (state != POOL_STATE_SPARE && 474 state != POOL_STATE_L2CACHE && 475 (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG, 476 &txg) != 0 || txg == 0)) { 477 nvlist_free(*config); 478 *config = NULL; 479 continue; 480 } 481 482 break; 483 } 484 485 /* Free the label storage */ 486 for (l = 0; l < VDEV_LABELS; l++) 487 kmem_free(vdev_lists[l], size); 488 489 return (*config == NULL ? ENOENT : 0); 490} 491 492static void 493resize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id) 494{ 495 nvlist_t **new_configs; 496 uint64_t i; 497 498 if (id < *count) 499 return; 500 new_configs = kmem_zalloc((id + 1) * sizeof(nvlist_t *), 501 KM_SLEEP); 502 for (i = 0; i < *count; i++) 503 new_configs[i] = (*configs)[i]; 504 if (*configs != NULL) 505 kmem_free(*configs, *count * sizeof(void *)); 506 *configs = new_configs; 507 *count = id + 1; 508} 509 510static void 511process_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg, 512 const char *name, uint64_t* known_pool_guid) 513{ 514 nvlist_t *vdev_tree; 515 uint64_t pool_guid; 516 uint64_t vdev_guid, known_guid; 517 uint64_t id, txg, known_txg; 518 char *pname; 519 int i; 520 521 if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 || 522 strcmp(pname, name) != 0) 523 goto ignore; 524 525 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0) 526 goto ignore; 527 528 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0) 529 goto ignore; 530 531 if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) 532 goto ignore; 533 534 if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0) 535 goto ignore; 536 537 VERIFY(nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); 538 539 if (*known_pool_guid != 0) { 540 if (pool_guid != *known_pool_guid) 541 goto ignore; 542 } else 543 *known_pool_guid = pool_guid; 544 545 resize_configs(configs, count, id); 546 547 if ((*configs)[id] != NULL) { 548 VERIFY(nvlist_lookup_uint64((*configs)[id], 549 ZPOOL_CONFIG_POOL_TXG, &known_txg) == 0); 550 if (txg <= known_txg) 551 goto ignore; 552 nvlist_free((*configs)[id]); 553 } 554 555 (*configs)[id] = cfg; 556 return; 557 558ignore: 559 nvlist_free(cfg); 560} 561 562int 563vdev_geom_read_pool_label(const char *name, 564 nvlist_t ***configs, uint64_t *count) 565{ 566 struct g_class *mp; 567 struct g_geom *gp; 568 struct g_provider *pp; 569 struct g_consumer *zcp; 570 nvlist_t *vdev_cfg; 571 uint64_t pool_guid; 572 int error; 573 574 DROP_GIANT(); 575 g_topology_lock(); 576 577 *configs = NULL; 578 *count = 0; 579 pool_guid = 0; 580 LIST_FOREACH(mp, &g_classes, class) { 581 if (mp == &zfs_vdev_class) 582 continue; 583 LIST_FOREACH(gp, &mp->geom, geom) { 584 if (gp->flags & G_GEOM_WITHER) 585 continue; 586 LIST_FOREACH(pp, &gp->provider, provider) { 587 if (pp->flags & G_PF_WITHER) 588 continue; 589 zcp = vdev_geom_attach(pp, NULL); 590 if (zcp == NULL) 591 continue; 592 g_topology_unlock(); 593 error = vdev_geom_read_config(zcp, &vdev_cfg); 594 g_topology_lock(); 595 vdev_geom_detach(zcp, B_TRUE); 596 if (error) 597 continue; 598 ZFS_LOG(1, "successfully read vdev config"); 599 600 process_vdev_config(configs, count, 601 vdev_cfg, name, &pool_guid); 602 } 603 } 604 } 605 g_topology_unlock(); 606 PICKUP_GIANT(); 607 608 return (*count > 0 ? 0 : ENOENT); 609} 610 611static void 612vdev_geom_read_guids(struct g_consumer *cp, uint64_t *pguid, uint64_t *vguid) 613{ 614 nvlist_t *config; 615 616 g_topology_assert_not(); 617 618 *pguid = 0; 619 *vguid = 0; 620 if (vdev_geom_read_config(cp, &config) == 0) { 621 nvlist_get_guids(config, pguid, vguid); 622 nvlist_free(config); 623 } 624} 625 626static boolean_t 627vdev_attach_ok(vdev_t *vd, struct g_provider *pp) 628{ 629 uint64_t pool_guid; 630 uint64_t vdev_guid; 631 struct g_consumer *zcp; 632 boolean_t pool_ok; 633 boolean_t vdev_ok; 634 635 zcp = vdev_geom_attach(pp, NULL); 636 if (zcp == NULL) { 637 ZFS_LOG(1, "Unable to attach tasting instance to %s.", 638 pp->name); 639 return (B_FALSE); 640 } 641 g_topology_unlock(); 642 vdev_geom_read_guids(zcp, &pool_guid, &vdev_guid); 643 g_topology_lock(); 644 vdev_geom_detach(zcp, B_TRUE); 645 646 /* 647 * Check that the label's vdev guid matches the desired guid. If the 648 * label has a pool guid, check that it matches too. (Inactive spares 649 * and L2ARCs do not have any pool guid in the label.) 650 */ 651 if ((pool_guid == 0 || pool_guid == spa_guid(vd->vdev_spa)) && 652 vdev_guid == vd->vdev_guid) { 653 ZFS_LOG(1, "guids match for provider %s.", vd->vdev_path); 654 return (B_TRUE); 655 } else { 656 ZFS_LOG(1, "guid mismatch for provider %s: " 657 "%ju:%ju != %ju:%ju.", vd->vdev_path, 658 (uintmax_t)spa_guid(vd->vdev_spa), 659 (uintmax_t)vd->vdev_guid, 660 (uintmax_t)pool_guid, (uintmax_t)vdev_guid); 661 return (B_FALSE); 662 } 663} 664 665static struct g_consumer * 666vdev_geom_attach_by_guids(vdev_t *vd) 667{ 668 struct g_class *mp; 669 struct g_geom *gp; 670 struct g_provider *pp; 671 struct g_consumer *cp; 672 673 g_topology_assert(); 674 675 cp = NULL; 676 LIST_FOREACH(mp, &g_classes, class) { 677 if (mp == &zfs_vdev_class) 678 continue; 679 LIST_FOREACH(gp, &mp->geom, geom) { 680 if (gp->flags & G_GEOM_WITHER) 681 continue; 682 LIST_FOREACH(pp, &gp->provider, provider) { 683 if (!vdev_attach_ok(vd, pp)) 684 continue; 685 cp = vdev_geom_attach(pp, vd); 686 if (cp == NULL) { 687 printf("ZFS WARNING: Unable to " 688 "attach to %s.\n", pp->name); 689 continue; 690 } 691 break; 692 } 693 if (cp != NULL) 694 break; 695 } 696 if (cp != NULL) 697 break; 698 } 699end: 700 return (cp); 701} 702 703static struct g_consumer * 704vdev_geom_open_by_guids(vdev_t *vd) 705{ 706 struct g_consumer *cp; 707 char *buf; 708 size_t len; 709 710 g_topology_assert(); 711 712 ZFS_LOG(1, "Searching by guids [%ju:%ju].", 713 (uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)vd->vdev_guid); 714 cp = vdev_geom_attach_by_guids(vd); 715 if (cp != NULL) { 716 len = strlen(cp->provider->name) + strlen("/dev/") + 1; 717 buf = kmem_alloc(len, KM_SLEEP); 718 719 snprintf(buf, len, "/dev/%s", cp->provider->name); 720 spa_strfree(vd->vdev_path); 721 vd->vdev_path = buf; 722 723 ZFS_LOG(1, "Attach by guid [%ju:%ju] succeeded, provider %s.", 724 (uintmax_t)spa_guid(vd->vdev_spa), 725 (uintmax_t)vd->vdev_guid, vd->vdev_path); 726 } else { 727 ZFS_LOG(1, "Search by guid [%ju:%ju] failed.", 728 (uintmax_t)spa_guid(vd->vdev_spa), 729 (uintmax_t)vd->vdev_guid); 730 } 731 732 return (cp); 733} 734 735static struct g_consumer * 736vdev_geom_open_by_path(vdev_t *vd, int check_guid) 737{ 738 struct g_provider *pp; 739 struct g_consumer *cp; 740 741 g_topology_assert(); 742 743 cp = NULL; 744 pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1); 745 if (pp != NULL) { 746 ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path); 747 if (!check_guid || vdev_attach_ok(vd, pp)) 748 cp = vdev_geom_attach(pp, vd); 749 } 750 751 return (cp); 752} 753 754static int 755vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, 756 uint64_t *logical_ashift, uint64_t *physical_ashift) 757{ 758 struct g_provider *pp; 759 struct g_consumer *cp; 760 size_t bufsize; 761 int error; 762 763 /* Set the TLS to indicate downstack that we should not access zvols*/ 764 VERIFY(tsd_set(zfs_geom_probe_vdev_key, vd) == 0); 765 766 /* 767 * We must have a pathname, and it must be absolute. 768 */ 769 if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { 770 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; 771 return (EINVAL); 772 } 773 774 vd->vdev_tsd = NULL; 775 776 DROP_GIANT(); 777 g_topology_lock(); 778 error = 0; 779 780 if (vd->vdev_spa->spa_splitting_newspa || 781 (vd->vdev_prevstate == VDEV_STATE_UNKNOWN && 782 vd->vdev_spa->spa_load_state == SPA_LOAD_NONE || 783 vd->vdev_spa->spa_load_state == SPA_LOAD_CREATE)) { 784 /* 785 * We are dealing with a vdev that hasn't been previously 786 * opened (since boot), and we are not loading an 787 * existing pool configuration. This looks like a 788 * vdev add operation to a new or existing pool. 789 * Assume the user knows what he/she is doing and find 790 * GEOM provider by its name, ignoring GUID mismatches. 791 * 792 * XXPOLICY: It would be safer to only allow a device 793 * that is unlabeled or labeled but missing 794 * GUID information to be opened in this fashion, 795 * unless we are doing a split, in which case we 796 * should allow any guid. 797 */ 798 cp = vdev_geom_open_by_path(vd, 0); 799 } else { 800 /* 801 * Try using the recorded path for this device, but only 802 * accept it if its label data contains the expected GUIDs. 803 */ 804 cp = vdev_geom_open_by_path(vd, 1); 805 if (cp == NULL) { 806 /* 807 * The device at vd->vdev_path doesn't have the 808 * expected GUIDs. The disks might have merely 809 * moved around so try all other GEOM providers 810 * to find one with the right GUIDs. 811 */ 812 cp = vdev_geom_open_by_guids(vd); 813 } 814 } 815 816 /* Clear the TLS now that tasting is done */ 817 VERIFY(tsd_set(zfs_geom_probe_vdev_key, NULL) == 0); 818 819 if (cp == NULL) { 820 ZFS_LOG(1, "Provider %s not found.", vd->vdev_path); 821 error = ENOENT; 822 } else if (cp->provider->sectorsize > VDEV_PAD_SIZE || 823 !ISP2(cp->provider->sectorsize)) { 824 ZFS_LOG(1, "Provider %s has unsupported sectorsize.", 825 vd->vdev_path); 826 827 vdev_geom_close_locked(vd); 828 error = EINVAL; 829 cp = NULL; 830 } else if (cp->acw == 0 && (spa_mode(vd->vdev_spa) & FWRITE) != 0) { 831 int i; 832 833 for (i = 0; i < 5; i++) { 834 error = g_access(cp, 0, 1, 0); 835 if (error == 0) 836 break; 837 g_topology_unlock(); 838 tsleep(vd, 0, "vdev", hz / 2); 839 g_topology_lock(); 840 } 841 if (error != 0) { 842 printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n", 843 vd->vdev_path, error); 844 vdev_geom_close_locked(vd); 845 cp = NULL; 846 } 847 } 848 849 /* Fetch initial physical path information for this device. */ 850 if (cp != NULL) 851 vdev_geom_attrchanged(cp, "GEOM::physpath"); 852 853 g_topology_unlock(); 854 PICKUP_GIANT(); 855 if (cp == NULL) { 856 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 857 return (error); 858 } 859 pp = cp->provider; 860 861 /* 862 * Determine the actual size of the device. 863 */ 864 *max_psize = *psize = pp->mediasize; 865 866 /* 867 * Determine the device's minimum transfer size and preferred 868 * transfer size. 869 */ 870 *logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1; 871 *physical_ashift = 0; 872 if (pp->stripesize > (1 << *logical_ashift) && ISP2(pp->stripesize) && 873 pp->stripesize <= (1 << SPA_MAXASHIFT) && pp->stripeoffset == 0) 874 *physical_ashift = highbit(pp->stripesize) - 1; 875 876 /* 877 * Clear the nowritecache settings, so that on a vdev_reopen() 878 * we will try again. 879 */ 880 vd->vdev_nowritecache = B_FALSE; 881 882 /* 883 * Determine the device's rotation rate. 884 */ 885 vdev_geom_set_rotation_rate(vd, cp); 886 887 return (0); 888} 889 890static void 891vdev_geom_close(vdev_t *vd) 892{ 893 894 DROP_GIANT(); 895 g_topology_lock(); 896 vdev_geom_close_locked(vd); 897 g_topology_unlock(); 898 PICKUP_GIANT(); 899} 900 901static void 902vdev_geom_io_intr(struct bio *bp) 903{ 904 vdev_t *vd; 905 zio_t *zio; 906 907 zio = bp->bio_caller1; 908 vd = zio->io_vd; 909 zio->io_error = bp->bio_error; 910 if (zio->io_error == 0 && bp->bio_resid != 0) 911 zio->io_error = SET_ERROR(EIO); 912 913 switch(zio->io_error) { 914 case ENOTSUP: 915 /* 916 * If we get ENOTSUP for BIO_FLUSH or BIO_DELETE we know 917 * that future attempts will never succeed. In this case 918 * we set a persistent flag so that we don't bother with 919 * requests in the future. 920 */ 921 switch(bp->bio_cmd) { 922 case BIO_FLUSH: 923 vd->vdev_nowritecache = B_TRUE; 924 break; 925 case BIO_DELETE: 926 vd->vdev_notrim = B_TRUE; 927 break; 928 } 929 break; 930 case ENXIO: 931 if (!vd->vdev_remove_wanted) { 932 /* 933 * If provider's error is set we assume it is being 934 * removed. 935 */ 936 if (bp->bio_to->error != 0) { 937 vd->vdev_remove_wanted = B_TRUE; 938 spa_async_request(zio->io_spa, 939 SPA_ASYNC_REMOVE); 940 } else if (!vd->vdev_delayed_close) { 941 vd->vdev_delayed_close = B_TRUE; 942 } 943 } 944 break; 945 } 946 g_destroy_bio(bp); 947 zio_delay_interrupt(zio); 948} 949 950static void 951vdev_geom_io_start(zio_t *zio) 952{ 953 vdev_t *vd; 954 struct g_consumer *cp; 955 struct bio *bp; 956 int error; 957 958 vd = zio->io_vd; 959 960 switch (zio->io_type) { 961 case ZIO_TYPE_IOCTL: 962 /* XXPOLICY */ 963 if (!vdev_readable(vd)) { 964 zio->io_error = SET_ERROR(ENXIO); 965 zio_interrupt(zio); 966 return; 967 } else { 968 switch (zio->io_cmd) { 969 case DKIOCFLUSHWRITECACHE: 970 if (zfs_nocacheflush || vdev_geom_bio_flush_disable) 971 break; 972 if (vd->vdev_nowritecache) { 973 zio->io_error = SET_ERROR(ENOTSUP); 974 break; 975 } 976 goto sendreq; 977 default: 978 zio->io_error = SET_ERROR(ENOTSUP); 979 } 980 } 981 982 zio_execute(zio); 983 return; 984 case ZIO_TYPE_FREE: 985 if (vd->vdev_notrim) { 986 zio->io_error = SET_ERROR(ENOTSUP); 987 } else if (!vdev_geom_bio_delete_disable) { 988 goto sendreq; 989 } 990 zio_execute(zio); 991 return; 992 } 993sendreq: 994 ASSERT(zio->io_type == ZIO_TYPE_READ || 995 zio->io_type == ZIO_TYPE_WRITE || 996 zio->io_type == ZIO_TYPE_FREE || 997 zio->io_type == ZIO_TYPE_IOCTL); 998 999 cp = vd->vdev_tsd; 1000 if (cp == NULL) { 1001 zio->io_error = SET_ERROR(ENXIO); 1002 zio_interrupt(zio); 1003 return; 1004 } 1005 bp = g_alloc_bio(); 1006 bp->bio_caller1 = zio; 1007 switch (zio->io_type) { 1008 case ZIO_TYPE_READ: 1009 case ZIO_TYPE_WRITE: 1010 zio->io_target_timestamp = zio_handle_io_delay(zio); 1011 bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE; 1012 bp->bio_data = zio->io_data; 1013 bp->bio_offset = zio->io_offset; 1014 bp->bio_length = zio->io_size; 1015 break; 1016 case ZIO_TYPE_FREE: 1017 bp->bio_cmd = BIO_DELETE; 1018 bp->bio_data = NULL; 1019 bp->bio_offset = zio->io_offset; 1020 bp->bio_length = zio->io_size; 1021 break; 1022 case ZIO_TYPE_IOCTL: 1023 bp->bio_cmd = BIO_FLUSH; 1024 bp->bio_flags |= BIO_ORDERED; 1025 bp->bio_data = NULL; 1026 bp->bio_offset = cp->provider->mediasize; 1027 bp->bio_length = 0; 1028 break; 1029 } 1030 bp->bio_done = vdev_geom_io_intr; 1031 1032 g_io_request(bp, cp); 1033} 1034 1035static void 1036vdev_geom_io_done(zio_t *zio) 1037{ 1038} 1039 1040static void 1041vdev_geom_hold(vdev_t *vd) 1042{ 1043} 1044 1045static void 1046vdev_geom_rele(vdev_t *vd) 1047{ 1048} 1049 1050vdev_ops_t vdev_geom_ops = { 1051 vdev_geom_open, 1052 vdev_geom_close, 1053 vdev_default_asize, 1054 vdev_geom_io_start, 1055 vdev_geom_io_done, 1056 NULL, 1057 vdev_geom_hold, 1058 vdev_geom_rele, 1059 VDEV_TYPE_DISK, /* name of this vdev type */ 1060 B_TRUE /* leaf vdev */ 1061}; 1062