vdev_geom.c revision 308060
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org> 23 * All rights reserved. 24 * 25 * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org> 26 */ 27 28#include <sys/zfs_context.h> 29#include <sys/param.h> 30#include <sys/kernel.h> 31#include <sys/bio.h> 32#include <sys/disk.h> 33#include <sys/spa.h> 34#include <sys/spa_impl.h> 35#include <sys/vdev_impl.h> 36#include <sys/fs/zfs.h> 37#include <sys/zio.h> 38#include <geom/geom.h> 39#include <geom/geom_int.h> 40 41/* 42 * Virtual device vector for GEOM. 43 */ 44 45static g_attrchanged_t vdev_geom_attrchanged; 46struct g_class zfs_vdev_class = { 47 .name = "ZFS::VDEV", 48 .version = G_VERSION, 49 .attrchanged = vdev_geom_attrchanged, 50}; 51 52DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev); 53 54SYSCTL_DECL(_vfs_zfs_vdev); 55/* Don't send BIO_FLUSH. */ 56static int vdev_geom_bio_flush_disable = 0; 57TUNABLE_INT("vfs.zfs.vdev.bio_flush_disable", &vdev_geom_bio_flush_disable); 58SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RW, 59 &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH"); 60/* Don't send BIO_DELETE. */ 61static int vdev_geom_bio_delete_disable = 0; 62TUNABLE_INT("vfs.zfs.vdev.bio_delete_disable", &vdev_geom_bio_delete_disable); 63SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RW, 64 &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE"); 65 66/* Declare local functions */ 67static void vdev_geom_detach(struct g_consumer *cp, boolean_t open_for_read); 68 69/* 70 * Thread local storage used to indicate when a thread is probing geoms 71 * for their guids. If NULL, this thread is not tasting geoms. If non NULL, 72 * it is looking for a replacement for the vdev_t* that is its value. 73 */ 74uint_t zfs_geom_probe_vdev_key; 75 76static void 77vdev_geom_set_rotation_rate(vdev_t *vd, struct g_consumer *cp) 78{ 79 int error; 80 uint16_t rate; 81 82 error = g_getattr("GEOM::rotation_rate", cp, &rate); 83 if (error == 0) 84 vd->vdev_rotation_rate = rate; 85 else 86 vd->vdev_rotation_rate = VDEV_RATE_UNKNOWN; 87} 88 89static void 90vdev_geom_attrchanged(struct g_consumer *cp, const char *attr) 91{ 92 vdev_t *vd; 93 spa_t *spa; 94 char *physpath; 95 int error, physpath_len; 96 97 vd = cp->private; 98 if (vd == NULL) 99 return; 100 101 if (strcmp(attr, "GEOM::rotation_rate") == 0) { 102 vdev_geom_set_rotation_rate(vd, cp); 103 return; 104 } 105 106 if (strcmp(attr, "GEOM::physpath") != 0) 107 return; 108 109 if (g_access(cp, 1, 0, 0) != 0) 110 return; 111 112 /* 113 * Record/Update physical path information for this device. 114 */ 115 spa = vd->vdev_spa; 116 physpath_len = MAXPATHLEN; 117 physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO); 118 error = g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath); 119 g_access(cp, -1, 0, 0); 120 if (error == 0) { 121 char *old_physpath; 122 123 /* g_topology lock ensures that vdev has not been closed */ 124 g_topology_assert(); 125 old_physpath = vd->vdev_physpath; 126 vd->vdev_physpath = spa_strdup(physpath); 127 spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 128 129 if (old_physpath != NULL) 130 spa_strfree(old_physpath); 131 } 132 g_free(physpath); 133} 134 135static void 136vdev_geom_orphan(struct g_consumer *cp) 137{ 138 vdev_t *vd; 139 140 g_topology_assert(); 141 142 vd = cp->private; 143 if (vd == NULL) { 144 /* Vdev close in progress. Ignore the event. */ 145 return; 146 } 147 148 /* 149 * Orphan callbacks occur from the GEOM event thread. 150 * Concurrent with this call, new I/O requests may be 151 * working their way through GEOM about to find out 152 * (only once executed by the g_down thread) that we've 153 * been orphaned from our disk provider. These I/Os 154 * must be retired before we can detach our consumer. 155 * This is most easily achieved by acquiring the 156 * SPA ZIO configuration lock as a writer, but doing 157 * so with the GEOM topology lock held would cause 158 * a lock order reversal. Instead, rely on the SPA's 159 * async removal support to invoke a close on this 160 * vdev once it is safe to do so. 161 */ 162 vd->vdev_remove_wanted = B_TRUE; 163 spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE); 164} 165 166static struct g_consumer * 167vdev_geom_attach(struct g_provider *pp, vdev_t *vd) 168{ 169 struct g_geom *gp; 170 struct g_consumer *cp; 171 int error; 172 173 g_topology_assert(); 174 175 ZFS_LOG(1, "Attaching to %s.", pp->name); 176 177 if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize)) { 178 ZFS_LOG(1, "Failing attach of %s. Incompatible sectorsize %d\n", 179 pp->name, pp->sectorsize); 180 return (NULL); 181 } else if (pp->mediasize < SPA_MINDEVSIZE) { 182 ZFS_LOG(1, "Failing attach of %s. Incompatible mediasize %ju\n", 183 pp->name, pp->mediasize); 184 return (NULL); 185 } 186 187 /* Do we have geom already? No? Create one. */ 188 LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) { 189 if (gp->flags & G_GEOM_WITHER) 190 continue; 191 if (strcmp(gp->name, "zfs::vdev") != 0) 192 continue; 193 break; 194 } 195 if (gp == NULL) { 196 gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev"); 197 gp->orphan = vdev_geom_orphan; 198 gp->attrchanged = vdev_geom_attrchanged; 199 cp = g_new_consumer(gp); 200 error = g_attach(cp, pp); 201 if (error != 0) { 202 ZFS_LOG(1, "%s(%d): g_attach failed: %d\n", __func__, 203 __LINE__, error); 204 vdev_geom_detach(cp, B_FALSE); 205 return (NULL); 206 } 207 error = g_access(cp, 1, 0, 1); 208 if (error != 0) { 209 ZFS_LOG(1, "%s(%d): g_access failed: %d\n", __func__, 210 __LINE__, error); 211 vdev_geom_detach(cp, B_FALSE); 212 return (NULL); 213 } 214 ZFS_LOG(1, "Created geom and consumer for %s.", pp->name); 215 } else { 216 /* Check if we are already connected to this provider. */ 217 LIST_FOREACH(cp, &gp->consumer, consumer) { 218 if (cp->provider == pp) { 219 ZFS_LOG(1, "Found consumer for %s.", pp->name); 220 break; 221 } 222 } 223 if (cp == NULL) { 224 cp = g_new_consumer(gp); 225 error = g_attach(cp, pp); 226 if (error != 0) { 227 ZFS_LOG(1, "%s(%d): g_attach failed: %d\n", 228 __func__, __LINE__, error); 229 vdev_geom_detach(cp, B_FALSE); 230 return (NULL); 231 } 232 error = g_access(cp, 1, 0, 1); 233 if (error != 0) { 234 ZFS_LOG(1, "%s(%d): g_access failed: %d\n", 235 __func__, __LINE__, error); 236 vdev_geom_detach(cp, B_FALSE); 237 return (NULL); 238 } 239 ZFS_LOG(1, "Created consumer for %s.", pp->name); 240 } else { 241 error = g_access(cp, 1, 0, 1); 242 if (error != 0) { 243 ZFS_LOG(1, "%s(%d): g_access failed: %d\n", 244 __func__, __LINE__, error); 245 return (NULL); 246 } 247 ZFS_LOG(1, "Used existing consumer for %s.", pp->name); 248 } 249 } 250 251 /* 252 * BUG: cp may already belong to a vdev. This could happen if: 253 * 1) That vdev is a shared spare, or 254 * 2) We are trying to reopen a missing vdev and we are scanning by 255 * guid. In that case, we'll ultimately fail to open this consumer, 256 * but not until after setting the private field. 257 * The solution is to: 258 * 1) Don't set the private field until after the open succeeds, and 259 * 2) Set it to a linked list of vdevs, not just a single vdev 260 */ 261 cp->private = vd; 262 if (vd != NULL) 263 vd->vdev_tsd = cp; 264 265 cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; 266 return (cp); 267} 268 269static void 270vdev_geom_detach(struct g_consumer *cp, boolean_t open_for_read) 271{ 272 struct g_geom *gp; 273 vdev_t *vd; 274 275 g_topology_assert(); 276 277 ZFS_LOG(1, "Detaching consumer. Provider %s.", 278 cp->provider && cp->provider->name ? cp->provider->name : "NULL"); 279 280 vd = cp->private; 281 cp->private = NULL; 282 283 gp = cp->geom; 284 if (open_for_read) 285 g_access(cp, -1, 0, -1); 286 /* Destroy consumer on last close. */ 287 if (cp->acr == 0 && cp->ace == 0) { 288 if (cp->acw > 0) 289 g_access(cp, 0, -cp->acw, 0); 290 if (cp->provider != NULL) { 291 ZFS_LOG(1, "Destroying consumer to %s.", 292 cp->provider->name ? cp->provider->name : "NULL"); 293 g_detach(cp); 294 } 295 g_destroy_consumer(cp); 296 } 297 /* Destroy geom if there are no consumers left. */ 298 if (LIST_EMPTY(&gp->consumer)) { 299 ZFS_LOG(1, "Destroyed geom %s.", gp->name); 300 g_wither_geom(gp, ENXIO); 301 } 302} 303 304static void 305vdev_geom_close_locked(vdev_t *vd) 306{ 307 struct g_consumer *cp; 308 309 g_topology_assert(); 310 311 cp = vd->vdev_tsd; 312 vd->vdev_tsd = NULL; 313 vd->vdev_delayed_close = B_FALSE; 314 if (cp == NULL) 315 return; 316 317 ZFS_LOG(1, "Closing access to %s.", cp->provider->name); 318 319 vdev_geom_detach(cp, B_TRUE); 320} 321 322static void 323nvlist_get_guids(nvlist_t *list, uint64_t *pguid, uint64_t *vguid) 324{ 325 326 (void) nvlist_lookup_uint64(list, ZPOOL_CONFIG_GUID, vguid); 327 (void) nvlist_lookup_uint64(list, ZPOOL_CONFIG_POOL_GUID, pguid); 328} 329 330/* 331 * Issue one or more bios to the vdev in parallel 332 * cmds, datas, offsets, errors, and sizes are arrays of length ncmds. Each IO 333 * operation is described by parallel entries from each array. There may be 334 * more bios actually issued than entries in the array 335 */ 336static void 337vdev_geom_io(struct g_consumer *cp, int *cmds, void **datas, off_t *offsets, 338 off_t *sizes, int *errors, int ncmds) 339{ 340 struct bio **bios; 341 u_char *p; 342 off_t off, maxio, s, end; 343 int i, n_bios, j; 344 size_t bios_size; 345 346 maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize); 347 n_bios = 0; 348 349 /* How many bios are required for all commands ? */ 350 for (i = 0; i < ncmds; i++) 351 n_bios += (sizes[i] + maxio - 1) / maxio; 352 353 /* Allocate memory for the bios */ 354 bios_size = n_bios * sizeof(struct bio*); 355 bios = kmem_zalloc(bios_size, KM_SLEEP); 356 357 /* Prepare and issue all of the bios */ 358 for (i = j = 0; i < ncmds; i++) { 359 off = offsets[i]; 360 p = datas[i]; 361 s = sizes[i]; 362 end = off + s; 363 ASSERT((off % cp->provider->sectorsize) == 0); 364 ASSERT((s % cp->provider->sectorsize) == 0); 365 366 for (; off < end; off += maxio, p += maxio, s -= maxio, j++) { 367 bios[j] = g_alloc_bio(); 368 bios[j]->bio_cmd = cmds[i]; 369 bios[j]->bio_done = NULL; 370 bios[j]->bio_offset = off; 371 bios[j]->bio_length = MIN(s, maxio); 372 bios[j]->bio_data = p; 373 g_io_request(bios[j], cp); 374 } 375 } 376 ASSERT(j == n_bios); 377 378 /* Wait for all of the bios to complete, and clean them up */ 379 for (i = j = 0; i < ncmds; i++) { 380 off = offsets[i]; 381 s = sizes[i]; 382 end = off + s; 383 384 for (; off < end; off += maxio, s -= maxio, j++) { 385 errors[i] = biowait(bios[j], "vdev_geom_io") || errors[i]; 386 g_destroy_bio(bios[j]); 387 } 388 } 389 kmem_free(bios, bios_size); 390} 391 392static int 393vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config) 394{ 395 struct g_provider *pp; 396 vdev_phys_t *vdev_lists[VDEV_LABELS]; 397 char *p, *buf; 398 size_t buflen; 399 uint64_t psize, state, txg; 400 off_t offsets[VDEV_LABELS]; 401 off_t size; 402 off_t sizes[VDEV_LABELS]; 403 int cmds[VDEV_LABELS]; 404 int errors[VDEV_LABELS]; 405 int l, len; 406 407 g_topology_assert_not(); 408 409 pp = cp->provider; 410 ZFS_LOG(1, "Reading config from %s...", pp->name); 411 412 psize = pp->mediasize; 413 psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t)); 414 415 size = sizeof(*vdev_lists[0]) + pp->sectorsize - 416 ((sizeof(*vdev_lists[0]) - 1) % pp->sectorsize) - 1; 417 418 buflen = sizeof(vdev_lists[0]->vp_nvlist); 419 420 *config = NULL; 421 /* Create all of the IO requests */ 422 for (l = 0; l < VDEV_LABELS; l++) { 423 cmds[l] = BIO_READ; 424 vdev_lists[l] = kmem_alloc(size, KM_SLEEP); 425 offsets[l] = vdev_label_offset(psize, l, 0) + VDEV_SKIP_SIZE; 426 sizes[l] = size; 427 errors[l] = 0; 428 ASSERT(offsets[l] % pp->sectorsize == 0); 429 } 430 431 /* Issue the IO requests */ 432 vdev_geom_io(cp, cmds, (void**)vdev_lists, offsets, sizes, errors, 433 VDEV_LABELS); 434 435 /* Parse the labels */ 436 for (l = 0; l < VDEV_LABELS; l++) { 437 if (errors[l] != 0) 438 continue; 439 440 buf = vdev_lists[l]->vp_nvlist; 441 442 if (nvlist_unpack(buf, buflen, config, 0) != 0) 443 continue; 444 445 if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE, 446 &state) != 0 || state > POOL_STATE_L2CACHE) { 447 nvlist_free(*config); 448 *config = NULL; 449 continue; 450 } 451 452 if (state != POOL_STATE_SPARE && 453 state != POOL_STATE_L2CACHE && 454 (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG, 455 &txg) != 0 || txg == 0)) { 456 nvlist_free(*config); 457 *config = NULL; 458 continue; 459 } 460 461 break; 462 } 463 464 /* Free the label storage */ 465 for (l = 0; l < VDEV_LABELS; l++) 466 kmem_free(vdev_lists[l], size); 467 468 return (*config == NULL ? ENOENT : 0); 469} 470 471static void 472resize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id) 473{ 474 nvlist_t **new_configs; 475 uint64_t i; 476 477 if (id < *count) 478 return; 479 new_configs = kmem_zalloc((id + 1) * sizeof(nvlist_t *), 480 KM_SLEEP); 481 for (i = 0; i < *count; i++) 482 new_configs[i] = (*configs)[i]; 483 if (*configs != NULL) 484 kmem_free(*configs, *count * sizeof(void *)); 485 *configs = new_configs; 486 *count = id + 1; 487} 488 489static void 490process_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg, 491 const char *name, uint64_t* known_pool_guid) 492{ 493 nvlist_t *vdev_tree; 494 uint64_t pool_guid; 495 uint64_t vdev_guid, known_guid; 496 uint64_t id, txg, known_txg; 497 char *pname; 498 int i; 499 500 if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 || 501 strcmp(pname, name) != 0) 502 goto ignore; 503 504 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0) 505 goto ignore; 506 507 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0) 508 goto ignore; 509 510 if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) 511 goto ignore; 512 513 if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0) 514 goto ignore; 515 516 VERIFY(nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); 517 518 if (*known_pool_guid != 0) { 519 if (pool_guid != *known_pool_guid) 520 goto ignore; 521 } else 522 *known_pool_guid = pool_guid; 523 524 resize_configs(configs, count, id); 525 526 if ((*configs)[id] != NULL) { 527 VERIFY(nvlist_lookup_uint64((*configs)[id], 528 ZPOOL_CONFIG_POOL_TXG, &known_txg) == 0); 529 if (txg <= known_txg) 530 goto ignore; 531 nvlist_free((*configs)[id]); 532 } 533 534 (*configs)[id] = cfg; 535 return; 536 537ignore: 538 nvlist_free(cfg); 539} 540 541int 542vdev_geom_read_pool_label(const char *name, 543 nvlist_t ***configs, uint64_t *count) 544{ 545 struct g_class *mp; 546 struct g_geom *gp; 547 struct g_provider *pp; 548 struct g_consumer *zcp; 549 nvlist_t *vdev_cfg; 550 uint64_t pool_guid; 551 int error; 552 553 DROP_GIANT(); 554 g_topology_lock(); 555 556 *configs = NULL; 557 *count = 0; 558 pool_guid = 0; 559 LIST_FOREACH(mp, &g_classes, class) { 560 if (mp == &zfs_vdev_class) 561 continue; 562 LIST_FOREACH(gp, &mp->geom, geom) { 563 if (gp->flags & G_GEOM_WITHER) 564 continue; 565 LIST_FOREACH(pp, &gp->provider, provider) { 566 if (pp->flags & G_PF_WITHER) 567 continue; 568 zcp = vdev_geom_attach(pp, NULL); 569 if (zcp == NULL) 570 continue; 571 g_topology_unlock(); 572 error = vdev_geom_read_config(zcp, &vdev_cfg); 573 g_topology_lock(); 574 vdev_geom_detach(zcp, B_TRUE); 575 if (error) 576 continue; 577 ZFS_LOG(1, "successfully read vdev config"); 578 579 process_vdev_config(configs, count, 580 vdev_cfg, name, &pool_guid); 581 } 582 } 583 } 584 g_topology_unlock(); 585 PICKUP_GIANT(); 586 587 return (*count > 0 ? 0 : ENOENT); 588} 589 590static void 591vdev_geom_read_guids(struct g_consumer *cp, uint64_t *pguid, uint64_t *vguid) 592{ 593 nvlist_t *config; 594 595 g_topology_assert_not(); 596 597 *pguid = 0; 598 *vguid = 0; 599 if (vdev_geom_read_config(cp, &config) == 0) { 600 nvlist_get_guids(config, pguid, vguid); 601 nvlist_free(config); 602 } 603} 604 605static boolean_t 606vdev_attach_ok(vdev_t *vd, struct g_provider *pp) 607{ 608 uint64_t pool_guid; 609 uint64_t vdev_guid; 610 struct g_consumer *zcp; 611 boolean_t pool_ok; 612 boolean_t vdev_ok; 613 614 zcp = vdev_geom_attach(pp, NULL); 615 if (zcp == NULL) { 616 ZFS_LOG(1, "Unable to attach tasting instance to %s.", 617 pp->name); 618 return (B_FALSE); 619 } 620 g_topology_unlock(); 621 vdev_geom_read_guids(zcp, &pool_guid, &vdev_guid); 622 g_topology_lock(); 623 vdev_geom_detach(zcp, B_TRUE); 624 625 /* 626 * Check that the label's vdev guid matches the desired guid. If the 627 * label has a pool guid, check that it matches too. (Inactive spares 628 * and L2ARCs do not have any pool guid in the label.) 629 */ 630 if ((pool_guid == 0 || pool_guid == spa_guid(vd->vdev_spa)) && 631 vdev_guid == vd->vdev_guid) { 632 ZFS_LOG(1, "guids match for provider %s.", vd->vdev_path); 633 return (B_TRUE); 634 } else { 635 ZFS_LOG(1, "guid mismatch for provider %s: " 636 "%ju:%ju != %ju:%ju.", vd->vdev_path, 637 (uintmax_t)spa_guid(vd->vdev_spa), 638 (uintmax_t)vd->vdev_guid, 639 (uintmax_t)pool_guid, (uintmax_t)vdev_guid); 640 return (B_FALSE); 641 } 642} 643 644static struct g_consumer * 645vdev_geom_attach_by_guids(vdev_t *vd) 646{ 647 struct g_class *mp; 648 struct g_geom *gp; 649 struct g_provider *pp; 650 struct g_consumer *cp; 651 652 g_topology_assert(); 653 654 cp = NULL; 655 LIST_FOREACH(mp, &g_classes, class) { 656 if (mp == &zfs_vdev_class) 657 continue; 658 LIST_FOREACH(gp, &mp->geom, geom) { 659 if (gp->flags & G_GEOM_WITHER) 660 continue; 661 LIST_FOREACH(pp, &gp->provider, provider) { 662 if (!vdev_attach_ok(vd, pp)) 663 continue; 664 cp = vdev_geom_attach(pp, vd); 665 if (cp == NULL) { 666 printf("ZFS WARNING: Unable to " 667 "attach to %s.\n", pp->name); 668 continue; 669 } 670 break; 671 } 672 if (cp != NULL) 673 break; 674 } 675 if (cp != NULL) 676 break; 677 } 678end: 679 return (cp); 680} 681 682static struct g_consumer * 683vdev_geom_open_by_guids(vdev_t *vd) 684{ 685 struct g_consumer *cp; 686 char *buf; 687 size_t len; 688 689 g_topology_assert(); 690 691 ZFS_LOG(1, "Searching by guids [%ju:%ju].", 692 (uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)vd->vdev_guid); 693 cp = vdev_geom_attach_by_guids(vd); 694 if (cp != NULL) { 695 len = strlen(cp->provider->name) + strlen("/dev/") + 1; 696 buf = kmem_alloc(len, KM_SLEEP); 697 698 snprintf(buf, len, "/dev/%s", cp->provider->name); 699 spa_strfree(vd->vdev_path); 700 vd->vdev_path = buf; 701 702 ZFS_LOG(1, "Attach by guid [%ju:%ju] succeeded, provider %s.", 703 (uintmax_t)spa_guid(vd->vdev_spa), 704 (uintmax_t)vd->vdev_guid, vd->vdev_path); 705 } else { 706 ZFS_LOG(1, "Search by guid [%ju:%ju] failed.", 707 (uintmax_t)spa_guid(vd->vdev_spa), 708 (uintmax_t)vd->vdev_guid); 709 } 710 711 return (cp); 712} 713 714static struct g_consumer * 715vdev_geom_open_by_path(vdev_t *vd, int check_guid) 716{ 717 struct g_provider *pp; 718 struct g_consumer *cp; 719 720 g_topology_assert(); 721 722 cp = NULL; 723 pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1); 724 if (pp != NULL) { 725 ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path); 726 if (!check_guid || vdev_attach_ok(vd, pp)) 727 cp = vdev_geom_attach(pp, vd); 728 } 729 730 return (cp); 731} 732 733static int 734vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, 735 uint64_t *logical_ashift, uint64_t *physical_ashift) 736{ 737 struct g_provider *pp; 738 struct g_consumer *cp; 739 size_t bufsize; 740 int error; 741 742 /* Set the TLS to indicate downstack that we should not access zvols*/ 743 VERIFY(tsd_set(zfs_geom_probe_vdev_key, vd) == 0); 744 745 /* 746 * We must have a pathname, and it must be absolute. 747 */ 748 if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { 749 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; 750 return (EINVAL); 751 } 752 753 vd->vdev_tsd = NULL; 754 755 DROP_GIANT(); 756 g_topology_lock(); 757 error = 0; 758 759 if (vd->vdev_spa->spa_splitting_newspa || 760 (vd->vdev_prevstate == VDEV_STATE_UNKNOWN && 761 vd->vdev_spa->spa_load_state == SPA_LOAD_NONE || 762 vd->vdev_spa->spa_load_state == SPA_LOAD_CREATE)) { 763 /* 764 * We are dealing with a vdev that hasn't been previously 765 * opened (since boot), and we are not loading an 766 * existing pool configuration. This looks like a 767 * vdev add operation to a new or existing pool. 768 * Assume the user knows what he/she is doing and find 769 * GEOM provider by its name, ignoring GUID mismatches. 770 * 771 * XXPOLICY: It would be safer to only allow a device 772 * that is unlabeled or labeled but missing 773 * GUID information to be opened in this fashion, 774 * unless we are doing a split, in which case we 775 * should allow any guid. 776 */ 777 cp = vdev_geom_open_by_path(vd, 0); 778 } else { 779 /* 780 * Try using the recorded path for this device, but only 781 * accept it if its label data contains the expected GUIDs. 782 */ 783 cp = vdev_geom_open_by_path(vd, 1); 784 if (cp == NULL) { 785 /* 786 * The device at vd->vdev_path doesn't have the 787 * expected GUIDs. The disks might have merely 788 * moved around so try all other GEOM providers 789 * to find one with the right GUIDs. 790 */ 791 cp = vdev_geom_open_by_guids(vd); 792 } 793 } 794 795 /* Clear the TLS now that tasting is done */ 796 VERIFY(tsd_set(zfs_geom_probe_vdev_key, NULL) == 0); 797 798 if (cp == NULL) { 799 ZFS_LOG(1, "Provider %s not found.", vd->vdev_path); 800 error = ENOENT; 801 } else if (cp->provider->sectorsize > VDEV_PAD_SIZE || 802 !ISP2(cp->provider->sectorsize)) { 803 ZFS_LOG(1, "Provider %s has unsupported sectorsize.", 804 vd->vdev_path); 805 806 vdev_geom_close_locked(vd); 807 error = EINVAL; 808 cp = NULL; 809 } else if (cp->acw == 0 && (spa_mode(vd->vdev_spa) & FWRITE) != 0) { 810 int i; 811 812 for (i = 0; i < 5; i++) { 813 error = g_access(cp, 0, 1, 0); 814 if (error == 0) 815 break; 816 g_topology_unlock(); 817 tsleep(vd, 0, "vdev", hz / 2); 818 g_topology_lock(); 819 } 820 if (error != 0) { 821 printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n", 822 vd->vdev_path, error); 823 vdev_geom_close_locked(vd); 824 cp = NULL; 825 } 826 } 827 828 /* Fetch initial physical path information for this device. */ 829 if (cp != NULL) 830 vdev_geom_attrchanged(cp, "GEOM::physpath"); 831 832 g_topology_unlock(); 833 PICKUP_GIANT(); 834 if (cp == NULL) { 835 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 836 return (error); 837 } 838 pp = cp->provider; 839 840 /* 841 * Determine the actual size of the device. 842 */ 843 *max_psize = *psize = pp->mediasize; 844 845 /* 846 * Determine the device's minimum transfer size and preferred 847 * transfer size. 848 */ 849 *logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1; 850 *physical_ashift = 0; 851 if (pp->stripesize > (1 << *logical_ashift) && ISP2(pp->stripesize) && 852 pp->stripesize <= (1 << SPA_MAXASHIFT) && pp->stripeoffset == 0) 853 *physical_ashift = highbit(pp->stripesize) - 1; 854 855 /* 856 * Clear the nowritecache settings, so that on a vdev_reopen() 857 * we will try again. 858 */ 859 vd->vdev_nowritecache = B_FALSE; 860 861 /* 862 * Determine the device's rotation rate. 863 */ 864 vdev_geom_set_rotation_rate(vd, cp); 865 866 return (0); 867} 868 869static void 870vdev_geom_close(vdev_t *vd) 871{ 872 873 DROP_GIANT(); 874 g_topology_lock(); 875 vdev_geom_close_locked(vd); 876 g_topology_unlock(); 877 PICKUP_GIANT(); 878} 879 880static void 881vdev_geom_io_intr(struct bio *bp) 882{ 883 vdev_t *vd; 884 zio_t *zio; 885 886 zio = bp->bio_caller1; 887 vd = zio->io_vd; 888 zio->io_error = bp->bio_error; 889 if (zio->io_error == 0 && bp->bio_resid != 0) 890 zio->io_error = SET_ERROR(EIO); 891 892 switch(zio->io_error) { 893 case ENOTSUP: 894 /* 895 * If we get ENOTSUP for BIO_FLUSH or BIO_DELETE we know 896 * that future attempts will never succeed. In this case 897 * we set a persistent flag so that we don't bother with 898 * requests in the future. 899 */ 900 switch(bp->bio_cmd) { 901 case BIO_FLUSH: 902 vd->vdev_nowritecache = B_TRUE; 903 break; 904 case BIO_DELETE: 905 vd->vdev_notrim = B_TRUE; 906 break; 907 } 908 break; 909 case ENXIO: 910 if (!vd->vdev_remove_wanted) { 911 /* 912 * If provider's error is set we assume it is being 913 * removed. 914 */ 915 if (bp->bio_to->error != 0) { 916 vd->vdev_remove_wanted = B_TRUE; 917 spa_async_request(zio->io_spa, 918 SPA_ASYNC_REMOVE); 919 } else if (!vd->vdev_delayed_close) { 920 vd->vdev_delayed_close = B_TRUE; 921 } 922 } 923 break; 924 } 925 g_destroy_bio(bp); 926 zio_delay_interrupt(zio); 927} 928 929static void 930vdev_geom_io_start(zio_t *zio) 931{ 932 vdev_t *vd; 933 struct g_consumer *cp; 934 struct bio *bp; 935 int error; 936 937 vd = zio->io_vd; 938 939 switch (zio->io_type) { 940 case ZIO_TYPE_IOCTL: 941 /* XXPOLICY */ 942 if (!vdev_readable(vd)) { 943 zio->io_error = SET_ERROR(ENXIO); 944 zio_interrupt(zio); 945 return; 946 } else { 947 switch (zio->io_cmd) { 948 case DKIOCFLUSHWRITECACHE: 949 if (zfs_nocacheflush || vdev_geom_bio_flush_disable) 950 break; 951 if (vd->vdev_nowritecache) { 952 zio->io_error = SET_ERROR(ENOTSUP); 953 break; 954 } 955 goto sendreq; 956 default: 957 zio->io_error = SET_ERROR(ENOTSUP); 958 } 959 } 960 961 zio_execute(zio); 962 return; 963 case ZIO_TYPE_FREE: 964 if (vd->vdev_notrim) { 965 zio->io_error = SET_ERROR(ENOTSUP); 966 } else if (!vdev_geom_bio_delete_disable) { 967 goto sendreq; 968 } 969 zio_execute(zio); 970 return; 971 } 972sendreq: 973 ASSERT(zio->io_type == ZIO_TYPE_READ || 974 zio->io_type == ZIO_TYPE_WRITE || 975 zio->io_type == ZIO_TYPE_FREE || 976 zio->io_type == ZIO_TYPE_IOCTL); 977 978 cp = vd->vdev_tsd; 979 if (cp == NULL) { 980 zio->io_error = SET_ERROR(ENXIO); 981 zio_interrupt(zio); 982 return; 983 } 984 bp = g_alloc_bio(); 985 bp->bio_caller1 = zio; 986 switch (zio->io_type) { 987 case ZIO_TYPE_READ: 988 case ZIO_TYPE_WRITE: 989 zio->io_target_timestamp = zio_handle_io_delay(zio); 990 bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE; 991 bp->bio_data = zio->io_data; 992 bp->bio_offset = zio->io_offset; 993 bp->bio_length = zio->io_size; 994 break; 995 case ZIO_TYPE_FREE: 996 bp->bio_cmd = BIO_DELETE; 997 bp->bio_data = NULL; 998 bp->bio_offset = zio->io_offset; 999 bp->bio_length = zio->io_size; 1000 break; 1001 case ZIO_TYPE_IOCTL: 1002 bp->bio_cmd = BIO_FLUSH; 1003 bp->bio_flags |= BIO_ORDERED; 1004 bp->bio_data = NULL; 1005 bp->bio_offset = cp->provider->mediasize; 1006 bp->bio_length = 0; 1007 break; 1008 } 1009 bp->bio_done = vdev_geom_io_intr; 1010 1011 g_io_request(bp, cp); 1012} 1013 1014static void 1015vdev_geom_io_done(zio_t *zio) 1016{ 1017} 1018 1019static void 1020vdev_geom_hold(vdev_t *vd) 1021{ 1022} 1023 1024static void 1025vdev_geom_rele(vdev_t *vd) 1026{ 1027} 1028 1029vdev_ops_t vdev_geom_ops = { 1030 vdev_geom_open, 1031 vdev_geom_close, 1032 vdev_default_asize, 1033 vdev_geom_io_start, 1034 vdev_geom_io_done, 1035 NULL, 1036 vdev_geom_hold, 1037 vdev_geom_rele, 1038 VDEV_TYPE_DISK, /* name of this vdev type */ 1039 B_TRUE /* leaf vdev */ 1040}; 1041