vdev_geom.c revision 308057
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org> 23 * All rights reserved. 24 * 25 * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org> 26 */ 27 28#include <sys/zfs_context.h> 29#include <sys/param.h> 30#include <sys/kernel.h> 31#include <sys/bio.h> 32#include <sys/disk.h> 33#include <sys/spa.h> 34#include <sys/spa_impl.h> 35#include <sys/vdev_impl.h> 36#include <sys/fs/zfs.h> 37#include <sys/zio.h> 38#include <geom/geom.h> 39#include <geom/geom_int.h> 40 41/* 42 * Virtual device vector for GEOM. 43 */ 44 45static g_attrchanged_t vdev_geom_attrchanged; 46struct g_class zfs_vdev_class = { 47 .name = "ZFS::VDEV", 48 .version = G_VERSION, 49 .attrchanged = vdev_geom_attrchanged, 50}; 51 52DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev); 53 54SYSCTL_DECL(_vfs_zfs_vdev); 55/* Don't send BIO_FLUSH. */ 56static int vdev_geom_bio_flush_disable = 0; 57TUNABLE_INT("vfs.zfs.vdev.bio_flush_disable", &vdev_geom_bio_flush_disable); 58SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RW, 59 &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH"); 60/* Don't send BIO_DELETE. */ 61static int vdev_geom_bio_delete_disable = 0; 62TUNABLE_INT("vfs.zfs.vdev.bio_delete_disable", &vdev_geom_bio_delete_disable); 63SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RW, 64 &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE"); 65 66/* 67 * Thread local storage used to indicate when a thread is probing geoms 68 * for their guids. If NULL, this thread is not tasting geoms. If non NULL, 69 * it is looking for a replacement for the vdev_t* that is its value. 70 */ 71uint_t zfs_geom_probe_vdev_key; 72 73static void 74vdev_geom_set_rotation_rate(vdev_t *vd, struct g_consumer *cp) 75{ 76 int error; 77 uint16_t rate; 78 79 error = g_getattr("GEOM::rotation_rate", cp, &rate); 80 if (error == 0) 81 vd->vdev_rotation_rate = rate; 82 else 83 vd->vdev_rotation_rate = VDEV_RATE_UNKNOWN; 84} 85 86static void 87vdev_geom_attrchanged(struct g_consumer *cp, const char *attr) 88{ 89 vdev_t *vd; 90 spa_t *spa; 91 char *physpath; 92 int error, physpath_len; 93 94 vd = cp->private; 95 if (vd == NULL) 96 return; 97 98 if (strcmp(attr, "GEOM::rotation_rate") == 0) { 99 vdev_geom_set_rotation_rate(vd, cp); 100 return; 101 } 102 103 if (strcmp(attr, "GEOM::physpath") != 0) 104 return; 105 106 if (g_access(cp, 1, 0, 0) != 0) 107 return; 108 109 /* 110 * Record/Update physical path information for this device. 111 */ 112 spa = vd->vdev_spa; 113 physpath_len = MAXPATHLEN; 114 physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO); 115 error = g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath); 116 g_access(cp, -1, 0, 0); 117 if (error == 0) { 118 char *old_physpath; 119 120 /* g_topology lock ensures that vdev has not been closed */ 121 g_topology_assert(); 122 old_physpath = vd->vdev_physpath; 123 vd->vdev_physpath = spa_strdup(physpath); 124 spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 125 126 if (old_physpath != NULL) 127 spa_strfree(old_physpath); 128 } 129 g_free(physpath); 130} 131 132static void 133vdev_geom_orphan(struct g_consumer *cp) 134{ 135 vdev_t *vd; 136 137 g_topology_assert(); 138 139 vd = cp->private; 140 if (vd == NULL) { 141 /* Vdev close in progress. Ignore the event. */ 142 return; 143 } 144 145 /* 146 * Orphan callbacks occur from the GEOM event thread. 147 * Concurrent with this call, new I/O requests may be 148 * working their way through GEOM about to find out 149 * (only once executed by the g_down thread) that we've 150 * been orphaned from our disk provider. These I/Os 151 * must be retired before we can detach our consumer. 152 * This is most easily achieved by acquiring the 153 * SPA ZIO configuration lock as a writer, but doing 154 * so with the GEOM topology lock held would cause 155 * a lock order reversal. Instead, rely on the SPA's 156 * async removal support to invoke a close on this 157 * vdev once it is safe to do so. 158 */ 159 vd->vdev_remove_wanted = B_TRUE; 160 spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE); 161} 162 163static struct g_consumer * 164vdev_geom_attach(struct g_provider *pp, vdev_t *vd) 165{ 166 struct g_geom *gp; 167 struct g_consumer *cp; 168 int error; 169 170 g_topology_assert(); 171 172 ZFS_LOG(1, "Attaching to %s.", pp->name); 173 /* Do we have geom already? No? Create one. */ 174 LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) { 175 if (gp->flags & G_GEOM_WITHER) 176 continue; 177 if (strcmp(gp->name, "zfs::vdev") != 0) 178 continue; 179 break; 180 } 181 if (gp == NULL) { 182 gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev"); 183 gp->orphan = vdev_geom_orphan; 184 gp->attrchanged = vdev_geom_attrchanged; 185 cp = g_new_consumer(gp); 186 error = g_attach(cp, pp); 187 if (error != 0) { 188 ZFS_LOG(1, "%s(%d): g_attach failed: %d\n", __func__, 189 __LINE__, error); 190 g_wither_geom(gp, ENXIO); 191 return (NULL); 192 } 193 error = g_access(cp, 1, 0, 1); 194 if (error != 0) { 195 ZFS_LOG(1, "%s(%d): g_access failed: %d\n", __func__, 196 __LINE__, error); 197 g_wither_geom(gp, ENXIO); 198 return (NULL); 199 } 200 ZFS_LOG(1, "Created geom and consumer for %s.", pp->name); 201 } else { 202 /* Check if we are already connected to this provider. */ 203 LIST_FOREACH(cp, &gp->consumer, consumer) { 204 if (cp->provider == pp) { 205 ZFS_LOG(1, "Found consumer for %s.", pp->name); 206 break; 207 } 208 } 209 if (cp == NULL) { 210 cp = g_new_consumer(gp); 211 error = g_attach(cp, pp); 212 if (error != 0) { 213 ZFS_LOG(1, "%s(%d): g_attach failed: %d\n", 214 __func__, __LINE__, error); 215 g_destroy_consumer(cp); 216 return (NULL); 217 } 218 error = g_access(cp, 1, 0, 1); 219 if (error != 0) { 220 ZFS_LOG(1, "%s(%d): g_access failed: %d\n", 221 __func__, __LINE__, error); 222 g_detach(cp); 223 g_destroy_consumer(cp); 224 return (NULL); 225 } 226 ZFS_LOG(1, "Created consumer for %s.", pp->name); 227 } else { 228 error = g_access(cp, 1, 0, 1); 229 if (error != 0) { 230 ZFS_LOG(1, "%s(%d): g_access failed: %d\n", 231 __func__, __LINE__, error); 232 return (NULL); 233 } 234 ZFS_LOG(1, "Used existing consumer for %s.", pp->name); 235 } 236 } 237 238 /* 239 * BUG: cp may already belong to a vdev. This could happen if: 240 * 1) That vdev is a shared spare, or 241 * 2) We are trying to reopen a missing vdev and we are scanning by 242 * guid. In that case, we'll ultimately fail to open this consumer, 243 * but not until after setting the private field. 244 * The solution is to: 245 * 1) Don't set the private field until after the open succeeds, and 246 * 2) Set it to a linked list of vdevs, not just a single vdev 247 */ 248 cp->private = vd; 249 vd->vdev_tsd = cp; 250 251 cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; 252 return (cp); 253} 254 255static void 256vdev_geom_close_locked(vdev_t *vd) 257{ 258 struct g_geom *gp; 259 struct g_consumer *cp; 260 261 g_topology_assert(); 262 263 cp = vd->vdev_tsd; 264 if (cp == NULL) 265 return; 266 267 ZFS_LOG(1, "Closing access to %s.", cp->provider->name); 268 KASSERT(vd->vdev_tsd == cp, ("%s: vdev_tsd is not cp", __func__)); 269 vd->vdev_tsd = NULL; 270 vd->vdev_delayed_close = B_FALSE; 271 cp->private = NULL; 272 273 gp = cp->geom; 274 g_access(cp, -1, 0, -1); 275 /* Destroy consumer on last close. */ 276 if (cp->acr == 0 && cp->ace == 0) { 277 if (cp->acw > 0) 278 g_access(cp, 0, -cp->acw, 0); 279 if (cp->provider != NULL) { 280 ZFS_LOG(1, "Destroyed consumer to %s.", 281 cp->provider->name); 282 g_detach(cp); 283 } 284 g_destroy_consumer(cp); 285 } 286 /* Destroy geom if there are no consumers left. */ 287 if (LIST_EMPTY(&gp->consumer)) { 288 ZFS_LOG(1, "Destroyed geom %s.", gp->name); 289 g_wither_geom(gp, ENXIO); 290 } 291} 292 293static void 294nvlist_get_guids(nvlist_t *list, uint64_t *pguid, uint64_t *vguid) 295{ 296 297 (void) nvlist_lookup_uint64(list, ZPOOL_CONFIG_GUID, vguid); 298 (void) nvlist_lookup_uint64(list, ZPOOL_CONFIG_POOL_GUID, pguid); 299} 300 301static int 302vdev_geom_io(struct g_consumer *cp, int cmd, void *data, off_t offset, off_t size) 303{ 304 struct bio *bp; 305 u_char *p; 306 off_t off, maxio; 307 int error; 308 309 ASSERT((offset % cp->provider->sectorsize) == 0); 310 ASSERT((size % cp->provider->sectorsize) == 0); 311 312 bp = g_alloc_bio(); 313 off = offset; 314 offset += size; 315 p = data; 316 maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize); 317 error = 0; 318 319 for (; off < offset; off += maxio, p += maxio, size -= maxio) { 320 bzero(bp, sizeof(*bp)); 321 bp->bio_cmd = cmd; 322 bp->bio_done = NULL; 323 bp->bio_offset = off; 324 bp->bio_length = MIN(size, maxio); 325 bp->bio_data = p; 326 g_io_request(bp, cp); 327 error = biowait(bp, "vdev_geom_io"); 328 if (error != 0) 329 break; 330 } 331 332 g_destroy_bio(bp); 333 return (error); 334} 335 336static void 337vdev_geom_taste_orphan(struct g_consumer *cp) 338{ 339 ZFS_LOG(0, "WARNING: Orphan %s while tasting its VDev GUID.", 340 cp->provider->name); 341} 342 343static int 344vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config) 345{ 346 struct g_provider *pp; 347 vdev_label_t *label; 348 char *p, *buf; 349 size_t buflen; 350 uint64_t psize; 351 off_t offset, size; 352 uint64_t state, txg; 353 int error, l, len; 354 355 g_topology_assert_not(); 356 357 pp = cp->provider; 358 ZFS_LOG(1, "Reading config from %s...", pp->name); 359 360 psize = pp->mediasize; 361 psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t)); 362 363 size = sizeof(*label) + pp->sectorsize - 364 ((sizeof(*label) - 1) % pp->sectorsize) - 1; 365 366 label = kmem_alloc(size, KM_SLEEP); 367 buflen = sizeof(label->vl_vdev_phys.vp_nvlist); 368 369 *config = NULL; 370 for (l = 0; l < VDEV_LABELS; l++) { 371 372 offset = vdev_label_offset(psize, l, 0); 373 if ((offset % pp->sectorsize) != 0) 374 continue; 375 376 if (vdev_geom_io(cp, BIO_READ, label, offset, size) != 0) 377 continue; 378 buf = label->vl_vdev_phys.vp_nvlist; 379 380 if (nvlist_unpack(buf, buflen, config, 0) != 0) 381 continue; 382 383 if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE, 384 &state) != 0 || state > POOL_STATE_L2CACHE) { 385 nvlist_free(*config); 386 *config = NULL; 387 continue; 388 } 389 390 if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE && 391 (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG, 392 &txg) != 0 || txg == 0)) { 393 nvlist_free(*config); 394 *config = NULL; 395 continue; 396 } 397 398 break; 399 } 400 401 kmem_free(label, size); 402 return (*config == NULL ? ENOENT : 0); 403} 404 405static void 406resize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id) 407{ 408 nvlist_t **new_configs; 409 uint64_t i; 410 411 if (id < *count) 412 return; 413 new_configs = kmem_zalloc((id + 1) * sizeof(nvlist_t *), 414 KM_SLEEP); 415 for (i = 0; i < *count; i++) 416 new_configs[i] = (*configs)[i]; 417 if (*configs != NULL) 418 kmem_free(*configs, *count * sizeof(void *)); 419 *configs = new_configs; 420 *count = id + 1; 421} 422 423static void 424process_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg, 425 const char *name, uint64_t* known_pool_guid) 426{ 427 nvlist_t *vdev_tree; 428 uint64_t pool_guid; 429 uint64_t vdev_guid, known_guid; 430 uint64_t id, txg, known_txg; 431 char *pname; 432 int i; 433 434 if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 || 435 strcmp(pname, name) != 0) 436 goto ignore; 437 438 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0) 439 goto ignore; 440 441 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0) 442 goto ignore; 443 444 if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) 445 goto ignore; 446 447 if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0) 448 goto ignore; 449 450 VERIFY(nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); 451 452 if (*known_pool_guid != 0) { 453 if (pool_guid != *known_pool_guid) 454 goto ignore; 455 } else 456 *known_pool_guid = pool_guid; 457 458 resize_configs(configs, count, id); 459 460 if ((*configs)[id] != NULL) { 461 VERIFY(nvlist_lookup_uint64((*configs)[id], 462 ZPOOL_CONFIG_POOL_TXG, &known_txg) == 0); 463 if (txg <= known_txg) 464 goto ignore; 465 nvlist_free((*configs)[id]); 466 } 467 468 (*configs)[id] = cfg; 469 return; 470 471ignore: 472 nvlist_free(cfg); 473} 474 475static int 476vdev_geom_attach_taster(struct g_consumer *cp, struct g_provider *pp) 477{ 478 int error; 479 480 if (pp->flags & G_PF_WITHER) 481 return (EINVAL); 482 g_attach(cp, pp); 483 error = g_access(cp, 1, 0, 0); 484 if (error == 0) { 485 if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize)) 486 error = EINVAL; 487 else if (pp->mediasize < SPA_MINDEVSIZE) 488 error = EINVAL; 489 if (error != 0) 490 g_access(cp, -1, 0, 0); 491 } 492 if (error != 0) 493 g_detach(cp); 494 return (error); 495} 496 497static void 498vdev_geom_detach_taster(struct g_consumer *cp) 499{ 500 g_access(cp, -1, 0, 0); 501 g_detach(cp); 502} 503 504int 505vdev_geom_read_pool_label(const char *name, 506 nvlist_t ***configs, uint64_t *count) 507{ 508 struct g_class *mp; 509 struct g_geom *gp, *zgp; 510 struct g_provider *pp; 511 struct g_consumer *zcp; 512 nvlist_t *vdev_cfg; 513 uint64_t pool_guid; 514 int error; 515 516 DROP_GIANT(); 517 g_topology_lock(); 518 519 zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste"); 520 /* This orphan function should be never called. */ 521 zgp->orphan = vdev_geom_taste_orphan; 522 zcp = g_new_consumer(zgp); 523 524 *configs = NULL; 525 *count = 0; 526 pool_guid = 0; 527 LIST_FOREACH(mp, &g_classes, class) { 528 if (mp == &zfs_vdev_class) 529 continue; 530 LIST_FOREACH(gp, &mp->geom, geom) { 531 if (gp->flags & G_GEOM_WITHER) 532 continue; 533 LIST_FOREACH(pp, &gp->provider, provider) { 534 if (pp->flags & G_PF_WITHER) 535 continue; 536 if (vdev_geom_attach_taster(zcp, pp) != 0) 537 continue; 538 g_topology_unlock(); 539 error = vdev_geom_read_config(zcp, &vdev_cfg); 540 g_topology_lock(); 541 vdev_geom_detach_taster(zcp); 542 if (error) 543 continue; 544 ZFS_LOG(1, "successfully read vdev config"); 545 546 process_vdev_config(configs, count, 547 vdev_cfg, name, &pool_guid); 548 } 549 } 550 } 551 552 g_destroy_consumer(zcp); 553 g_destroy_geom(zgp); 554 g_topology_unlock(); 555 PICKUP_GIANT(); 556 557 return (*count > 0 ? 0 : ENOENT); 558} 559 560static void 561vdev_geom_read_guids(struct g_consumer *cp, uint64_t *pguid, uint64_t *vguid) 562{ 563 nvlist_t *config; 564 565 g_topology_assert_not(); 566 567 *pguid = 0; 568 *vguid = 0; 569 if (vdev_geom_read_config(cp, &config) == 0) { 570 nvlist_get_guids(config, pguid, vguid); 571 nvlist_free(config); 572 } 573} 574 575static struct g_consumer * 576vdev_geom_attach_by_guids(vdev_t *vd) 577{ 578 struct g_class *mp; 579 struct g_geom *gp, *zgp; 580 struct g_provider *pp; 581 struct g_consumer *cp, *zcp; 582 uint64_t pguid, vguid; 583 584 g_topology_assert(); 585 586 zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste"); 587 zgp->orphan = vdev_geom_taste_orphan; 588 zcp = g_new_consumer(zgp); 589 590 cp = NULL; 591 LIST_FOREACH(mp, &g_classes, class) { 592 if (mp == &zfs_vdev_class) 593 continue; 594 LIST_FOREACH(gp, &mp->geom, geom) { 595 if (gp->flags & G_GEOM_WITHER) 596 continue; 597 LIST_FOREACH(pp, &gp->provider, provider) { 598 if (vdev_geom_attach_taster(zcp, pp) != 0) 599 continue; 600 g_topology_unlock(); 601 vdev_geom_read_guids(zcp, &pguid, &vguid); 602 g_topology_lock(); 603 vdev_geom_detach_taster(zcp); 604 /* 605 * Check that the label's vdev guid matches the 606 * desired guid. If the label has a pool guid, 607 * check that it matches too. (Inactive spares 608 * and L2ARCs do not have any pool guid in the 609 * label.) 610 */ 611 if ((pguid != 0 && 612 pguid != spa_guid(vd->vdev_spa)) || 613 vguid != vd->vdev_guid) 614 continue; 615 cp = vdev_geom_attach(pp, vd); 616 if (cp == NULL) { 617 printf("ZFS WARNING: Unable to " 618 "attach to %s.\n", pp->name); 619 continue; 620 } 621 break; 622 } 623 if (cp != NULL) 624 break; 625 } 626 if (cp != NULL) 627 break; 628 } 629end: 630 g_destroy_consumer(zcp); 631 g_destroy_geom(zgp); 632 return (cp); 633} 634 635static struct g_consumer * 636vdev_geom_open_by_guids(vdev_t *vd) 637{ 638 struct g_consumer *cp; 639 char *buf; 640 size_t len; 641 642 g_topology_assert(); 643 644 ZFS_LOG(1, "Searching by guids [%ju:%ju].", 645 (uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)vd->vdev_guid); 646 cp = vdev_geom_attach_by_guids(vd); 647 if (cp != NULL) { 648 len = strlen(cp->provider->name) + strlen("/dev/") + 1; 649 buf = kmem_alloc(len, KM_SLEEP); 650 651 snprintf(buf, len, "/dev/%s", cp->provider->name); 652 spa_strfree(vd->vdev_path); 653 vd->vdev_path = buf; 654 655 ZFS_LOG(1, "Attach by guid [%ju:%ju] succeeded, provider %s.", 656 (uintmax_t)spa_guid(vd->vdev_spa), 657 (uintmax_t)vd->vdev_guid, vd->vdev_path); 658 } else { 659 ZFS_LOG(1, "Search by guid [%ju:%ju] failed.", 660 (uintmax_t)spa_guid(vd->vdev_spa), 661 (uintmax_t)vd->vdev_guid); 662 } 663 664 return (cp); 665} 666 667static struct g_consumer * 668vdev_geom_open_by_path(vdev_t *vd, int check_guid) 669{ 670 struct g_provider *pp; 671 struct g_consumer *cp; 672 uint64_t pguid, vguid; 673 674 g_topology_assert(); 675 676 cp = NULL; 677 pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1); 678 if (pp != NULL) { 679 ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path); 680 cp = vdev_geom_attach(pp, vd); 681 if (cp != NULL && check_guid && ISP2(pp->sectorsize) && 682 pp->sectorsize <= VDEV_PAD_SIZE) { 683 g_topology_unlock(); 684 vdev_geom_read_guids(cp, &pguid, &vguid); 685 g_topology_lock(); 686 /* 687 * Check that the label's vdev guid matches the 688 * desired guid. If the label has a pool guid, 689 * check that it matches too. (Inactive spares 690 * and L2ARCs do not have any pool guid in the 691 * label.) 692 */ 693 if ((pguid != 0 && 694 pguid != spa_guid(vd->vdev_spa)) || 695 vguid != vd->vdev_guid) { 696 vdev_geom_close_locked(vd); 697 cp = NULL; 698 ZFS_LOG(1, "guid mismatch for provider %s: " 699 "%ju:%ju != %ju:%ju.", vd->vdev_path, 700 (uintmax_t)spa_guid(vd->vdev_spa), 701 (uintmax_t)vd->vdev_guid, 702 (uintmax_t)pguid, (uintmax_t)vguid); 703 } else { 704 ZFS_LOG(1, "guid match for provider %s.", 705 vd->vdev_path); 706 } 707 } 708 } 709 710 return (cp); 711} 712 713static int 714vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, 715 uint64_t *logical_ashift, uint64_t *physical_ashift) 716{ 717 struct g_provider *pp; 718 struct g_consumer *cp; 719 size_t bufsize; 720 int error; 721 722 /* Set the TLS to indicate downstack that we should not access zvols*/ 723 VERIFY(tsd_set(zfs_geom_probe_vdev_key, vd) == 0); 724 725 /* 726 * We must have a pathname, and it must be absolute. 727 */ 728 if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { 729 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; 730 return (EINVAL); 731 } 732 733 vd->vdev_tsd = NULL; 734 735 DROP_GIANT(); 736 g_topology_lock(); 737 error = 0; 738 739 if (vd->vdev_spa->spa_splitting_newspa || 740 (vd->vdev_prevstate == VDEV_STATE_UNKNOWN && 741 vd->vdev_spa->spa_load_state == SPA_LOAD_NONE || 742 vd->vdev_spa->spa_load_state == SPA_LOAD_CREATE)) { 743 /* 744 * We are dealing with a vdev that hasn't been previously 745 * opened (since boot), and we are not loading an 746 * existing pool configuration. This looks like a 747 * vdev add operation to a new or existing pool. 748 * Assume the user knows what he/she is doing and find 749 * GEOM provider by its name, ignoring GUID mismatches. 750 * 751 * XXPOLICY: It would be safer to only allow a device 752 * that is unlabeled or labeled but missing 753 * GUID information to be opened in this fashion, 754 * unless we are doing a split, in which case we 755 * should allow any guid. 756 */ 757 cp = vdev_geom_open_by_path(vd, 0); 758 } else { 759 /* 760 * Try using the recorded path for this device, but only 761 * accept it if its label data contains the expected GUIDs. 762 */ 763 cp = vdev_geom_open_by_path(vd, 1); 764 if (cp == NULL) { 765 /* 766 * The device at vd->vdev_path doesn't have the 767 * expected GUIDs. The disks might have merely 768 * moved around so try all other GEOM providers 769 * to find one with the right GUIDs. 770 */ 771 cp = vdev_geom_open_by_guids(vd); 772 } 773 } 774 775 /* Clear the TLS now that tasting is done */ 776 VERIFY(tsd_set(zfs_geom_probe_vdev_key, NULL) == 0); 777 778 if (cp == NULL) { 779 ZFS_LOG(1, "Provider %s not found.", vd->vdev_path); 780 error = ENOENT; 781 } else if (cp->provider->sectorsize > VDEV_PAD_SIZE || 782 !ISP2(cp->provider->sectorsize)) { 783 ZFS_LOG(1, "Provider %s has unsupported sectorsize.", 784 vd->vdev_path); 785 786 vdev_geom_close_locked(vd); 787 error = EINVAL; 788 cp = NULL; 789 } else if (cp->acw == 0 && (spa_mode(vd->vdev_spa) & FWRITE) != 0) { 790 int i; 791 792 for (i = 0; i < 5; i++) { 793 error = g_access(cp, 0, 1, 0); 794 if (error == 0) 795 break; 796 g_topology_unlock(); 797 tsleep(vd, 0, "vdev", hz / 2); 798 g_topology_lock(); 799 } 800 if (error != 0) { 801 printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n", 802 vd->vdev_path, error); 803 vdev_geom_close_locked(vd); 804 cp = NULL; 805 } 806 } 807 808 /* Fetch initial physical path information for this device. */ 809 if (cp != NULL) 810 vdev_geom_attrchanged(cp, "GEOM::physpath"); 811 812 g_topology_unlock(); 813 PICKUP_GIANT(); 814 if (cp == NULL) { 815 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 816 return (error); 817 } 818 pp = cp->provider; 819 820 /* 821 * Determine the actual size of the device. 822 */ 823 *max_psize = *psize = pp->mediasize; 824 825 /* 826 * Determine the device's minimum transfer size and preferred 827 * transfer size. 828 */ 829 *logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1; 830 *physical_ashift = 0; 831 if (pp->stripesize > (1 << *logical_ashift) && ISP2(pp->stripesize) && 832 pp->stripesize <= (1 << SPA_MAXASHIFT) && pp->stripeoffset == 0) 833 *physical_ashift = highbit(pp->stripesize) - 1; 834 835 /* 836 * Clear the nowritecache settings, so that on a vdev_reopen() 837 * we will try again. 838 */ 839 vd->vdev_nowritecache = B_FALSE; 840 841 /* 842 * Determine the device's rotation rate. 843 */ 844 vdev_geom_set_rotation_rate(vd, cp); 845 846 return (0); 847} 848 849static void 850vdev_geom_close(vdev_t *vd) 851{ 852 853 DROP_GIANT(); 854 g_topology_lock(); 855 vdev_geom_close_locked(vd); 856 g_topology_unlock(); 857 PICKUP_GIANT(); 858} 859 860static void 861vdev_geom_io_intr(struct bio *bp) 862{ 863 vdev_t *vd; 864 zio_t *zio; 865 866 zio = bp->bio_caller1; 867 vd = zio->io_vd; 868 zio->io_error = bp->bio_error; 869 if (zio->io_error == 0 && bp->bio_resid != 0) 870 zio->io_error = SET_ERROR(EIO); 871 872 switch(zio->io_error) { 873 case ENOTSUP: 874 /* 875 * If we get ENOTSUP for BIO_FLUSH or BIO_DELETE we know 876 * that future attempts will never succeed. In this case 877 * we set a persistent flag so that we don't bother with 878 * requests in the future. 879 */ 880 switch(bp->bio_cmd) { 881 case BIO_FLUSH: 882 vd->vdev_nowritecache = B_TRUE; 883 break; 884 case BIO_DELETE: 885 vd->vdev_notrim = B_TRUE; 886 break; 887 } 888 break; 889 case ENXIO: 890 if (!vd->vdev_remove_wanted) { 891 /* 892 * If provider's error is set we assume it is being 893 * removed. 894 */ 895 if (bp->bio_to->error != 0) { 896 vd->vdev_remove_wanted = B_TRUE; 897 spa_async_request(zio->io_spa, 898 SPA_ASYNC_REMOVE); 899 } else if (!vd->vdev_delayed_close) { 900 vd->vdev_delayed_close = B_TRUE; 901 } 902 } 903 break; 904 } 905 g_destroy_bio(bp); 906 zio_delay_interrupt(zio); 907} 908 909static void 910vdev_geom_io_start(zio_t *zio) 911{ 912 vdev_t *vd; 913 struct g_consumer *cp; 914 struct bio *bp; 915 int error; 916 917 vd = zio->io_vd; 918 919 switch (zio->io_type) { 920 case ZIO_TYPE_IOCTL: 921 /* XXPOLICY */ 922 if (!vdev_readable(vd)) { 923 zio->io_error = SET_ERROR(ENXIO); 924 zio_interrupt(zio); 925 return; 926 } else { 927 switch (zio->io_cmd) { 928 case DKIOCFLUSHWRITECACHE: 929 if (zfs_nocacheflush || vdev_geom_bio_flush_disable) 930 break; 931 if (vd->vdev_nowritecache) { 932 zio->io_error = SET_ERROR(ENOTSUP); 933 break; 934 } 935 goto sendreq; 936 default: 937 zio->io_error = SET_ERROR(ENOTSUP); 938 } 939 } 940 941 zio_execute(zio); 942 return; 943 case ZIO_TYPE_FREE: 944 if (vd->vdev_notrim) { 945 zio->io_error = SET_ERROR(ENOTSUP); 946 } else if (!vdev_geom_bio_delete_disable) { 947 goto sendreq; 948 } 949 zio_execute(zio); 950 return; 951 } 952sendreq: 953 ASSERT(zio->io_type == ZIO_TYPE_READ || 954 zio->io_type == ZIO_TYPE_WRITE || 955 zio->io_type == ZIO_TYPE_FREE || 956 zio->io_type == ZIO_TYPE_IOCTL); 957 958 cp = vd->vdev_tsd; 959 if (cp == NULL) { 960 zio->io_error = SET_ERROR(ENXIO); 961 zio_interrupt(zio); 962 return; 963 } 964 bp = g_alloc_bio(); 965 bp->bio_caller1 = zio; 966 switch (zio->io_type) { 967 case ZIO_TYPE_READ: 968 case ZIO_TYPE_WRITE: 969 zio->io_target_timestamp = zio_handle_io_delay(zio); 970 bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE; 971 bp->bio_data = zio->io_data; 972 bp->bio_offset = zio->io_offset; 973 bp->bio_length = zio->io_size; 974 break; 975 case ZIO_TYPE_FREE: 976 bp->bio_cmd = BIO_DELETE; 977 bp->bio_data = NULL; 978 bp->bio_offset = zio->io_offset; 979 bp->bio_length = zio->io_size; 980 break; 981 case ZIO_TYPE_IOCTL: 982 bp->bio_cmd = BIO_FLUSH; 983 bp->bio_flags |= BIO_ORDERED; 984 bp->bio_data = NULL; 985 bp->bio_offset = cp->provider->mediasize; 986 bp->bio_length = 0; 987 break; 988 } 989 bp->bio_done = vdev_geom_io_intr; 990 991 g_io_request(bp, cp); 992} 993 994static void 995vdev_geom_io_done(zio_t *zio) 996{ 997} 998 999static void 1000vdev_geom_hold(vdev_t *vd) 1001{ 1002} 1003 1004static void 1005vdev_geom_rele(vdev_t *vd) 1006{ 1007} 1008 1009vdev_ops_t vdev_geom_ops = { 1010 vdev_geom_open, 1011 vdev_geom_close, 1012 vdev_default_asize, 1013 vdev_geom_io_start, 1014 vdev_geom_io_done, 1015 NULL, 1016 vdev_geom_hold, 1017 vdev_geom_rele, 1018 VDEV_TYPE_DISK, /* name of this vdev type */ 1019 B_TRUE /* leaf vdev */ 1020}; 1021