vdev_geom.c revision 307296
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org> 23 * All rights reserved. 24 * 25 * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org> 26 */ 27 28#include <sys/zfs_context.h> 29#include <sys/param.h> 30#include <sys/kernel.h> 31#include <sys/bio.h> 32#include <sys/disk.h> 33#include <sys/spa.h> 34#include <sys/spa_impl.h> 35#include <sys/vdev_impl.h> 36#include <sys/fs/zfs.h> 37#include <sys/zio.h> 38#include <geom/geom.h> 39#include <geom/geom_int.h> 40 41/* 42 * Virtual device vector for GEOM. 43 */ 44 45static g_attrchanged_t vdev_geom_attrchanged; 46struct g_class zfs_vdev_class = { 47 .name = "ZFS::VDEV", 48 .version = G_VERSION, 49 .attrchanged = vdev_geom_attrchanged, 50}; 51 52DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev); 53 54SYSCTL_DECL(_vfs_zfs_vdev); 55/* Don't send BIO_FLUSH. */ 56static int vdev_geom_bio_flush_disable = 0; 57TUNABLE_INT("vfs.zfs.vdev.bio_flush_disable", &vdev_geom_bio_flush_disable); 58SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RW, 59 &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH"); 60/* Don't send BIO_DELETE. */ 61static int vdev_geom_bio_delete_disable = 0; 62TUNABLE_INT("vfs.zfs.vdev.bio_delete_disable", &vdev_geom_bio_delete_disable); 63SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RW, 64 &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE"); 65 66static void 67vdev_geom_set_rotation_rate(vdev_t *vd, struct g_consumer *cp) 68{ 69 int error; 70 uint16_t rate; 71 72 error = g_getattr("GEOM::rotation_rate", cp, &rate); 73 if (error == 0) 74 vd->vdev_rotation_rate = rate; 75 else 76 vd->vdev_rotation_rate = VDEV_RATE_UNKNOWN; 77} 78 79static void 80vdev_geom_attrchanged(struct g_consumer *cp, const char *attr) 81{ 82 vdev_t *vd; 83 spa_t *spa; 84 char *physpath; 85 int error, physpath_len; 86 87 vd = cp->private; 88 if (vd == NULL) 89 return; 90 91 if (strcmp(attr, "GEOM::rotation_rate") == 0) { 92 vdev_geom_set_rotation_rate(vd, cp); 93 return; 94 } 95 96 if (strcmp(attr, "GEOM::physpath") != 0) 97 return; 98 99 if (g_access(cp, 1, 0, 0) != 0) 100 return; 101 102 /* 103 * Record/Update physical path information for this device. 104 */ 105 spa = vd->vdev_spa; 106 physpath_len = MAXPATHLEN; 107 physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO); 108 error = g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath); 109 g_access(cp, -1, 0, 0); 110 if (error == 0) { 111 char *old_physpath; 112 113 /* g_topology lock ensures that vdev has not been closed */ 114 g_topology_assert(); 115 old_physpath = vd->vdev_physpath; 116 vd->vdev_physpath = spa_strdup(physpath); 117 spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 118 119 if (old_physpath != NULL) 120 spa_strfree(old_physpath); 121 } 122 g_free(physpath); 123} 124 125static void 126vdev_geom_orphan(struct g_consumer *cp) 127{ 128 vdev_t *vd; 129 130 g_topology_assert(); 131 132 vd = cp->private; 133 if (vd == NULL) { 134 /* Vdev close in progress. Ignore the event. */ 135 return; 136 } 137 138 /* 139 * Orphan callbacks occur from the GEOM event thread. 140 * Concurrent with this call, new I/O requests may be 141 * working their way through GEOM about to find out 142 * (only once executed by the g_down thread) that we've 143 * been orphaned from our disk provider. These I/Os 144 * must be retired before we can detach our consumer. 145 * This is most easily achieved by acquiring the 146 * SPA ZIO configuration lock as a writer, but doing 147 * so with the GEOM topology lock held would cause 148 * a lock order reversal. Instead, rely on the SPA's 149 * async removal support to invoke a close on this 150 * vdev once it is safe to do so. 151 */ 152 vd->vdev_remove_wanted = B_TRUE; 153 spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE); 154} 155 156static struct g_consumer * 157vdev_geom_attach(struct g_provider *pp, vdev_t *vd) 158{ 159 struct g_geom *gp; 160 struct g_consumer *cp; 161 int error; 162 163 g_topology_assert(); 164 165 ZFS_LOG(1, "Attaching to %s.", pp->name); 166 /* Do we have geom already? No? Create one. */ 167 LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) { 168 if (gp->flags & G_GEOM_WITHER) 169 continue; 170 if (strcmp(gp->name, "zfs::vdev") != 0) 171 continue; 172 break; 173 } 174 if (gp == NULL) { 175 gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev"); 176 gp->orphan = vdev_geom_orphan; 177 gp->attrchanged = vdev_geom_attrchanged; 178 cp = g_new_consumer(gp); 179 error = g_attach(cp, pp); 180 if (error != 0) { 181 ZFS_LOG(1, "%s(%d): g_attach failed: %d\n", __func__, 182 __LINE__, error); 183 g_wither_geom(gp, ENXIO); 184 return (NULL); 185 } 186 error = g_access(cp, 1, 0, 1); 187 if (error != 0) { 188 ZFS_LOG(1, "%s(%d): g_access failed: %d\n", __func__, 189 __LINE__, error); 190 g_wither_geom(gp, ENXIO); 191 return (NULL); 192 } 193 ZFS_LOG(1, "Created geom and consumer for %s.", pp->name); 194 } else { 195 /* Check if we are already connected to this provider. */ 196 LIST_FOREACH(cp, &gp->consumer, consumer) { 197 if (cp->provider == pp) { 198 ZFS_LOG(1, "Found consumer for %s.", pp->name); 199 break; 200 } 201 } 202 if (cp == NULL) { 203 cp = g_new_consumer(gp); 204 error = g_attach(cp, pp); 205 if (error != 0) { 206 ZFS_LOG(1, "%s(%d): g_attach failed: %d\n", 207 __func__, __LINE__, error); 208 g_destroy_consumer(cp); 209 return (NULL); 210 } 211 error = g_access(cp, 1, 0, 1); 212 if (error != 0) { 213 ZFS_LOG(1, "%s(%d): g_access failed: %d\n", 214 __func__, __LINE__, error); 215 g_detach(cp); 216 g_destroy_consumer(cp); 217 return (NULL); 218 } 219 ZFS_LOG(1, "Created consumer for %s.", pp->name); 220 } else { 221 error = g_access(cp, 1, 0, 1); 222 if (error != 0) { 223 ZFS_LOG(1, "%s(%d): g_access failed: %d\n", 224 __func__, __LINE__, error); 225 return (NULL); 226 } 227 ZFS_LOG(1, "Used existing consumer for %s.", pp->name); 228 } 229 } 230 231 /* 232 * BUG: cp may already belong to a vdev. This could happen if: 233 * 1) That vdev is a shared spare, or 234 * 2) We are trying to reopen a missing vdev and we are scanning by 235 * guid. In that case, we'll ultimately fail to open this consumer, 236 * but not until after setting the private field. 237 * The solution is to: 238 * 1) Don't set the private field until after the open succeeds, and 239 * 2) Set it to a linked list of vdevs, not just a single vdev 240 */ 241 cp->private = vd; 242 vd->vdev_tsd = cp; 243 244 cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; 245 return (cp); 246} 247 248static void 249vdev_geom_close_locked(vdev_t *vd) 250{ 251 struct g_geom *gp; 252 struct g_consumer *cp; 253 254 g_topology_assert(); 255 256 cp = vd->vdev_tsd; 257 if (cp == NULL) 258 return; 259 260 ZFS_LOG(1, "Closing access to %s.", cp->provider->name); 261 KASSERT(vd->vdev_tsd == cp, ("%s: vdev_tsd is not cp", __func__)); 262 vd->vdev_tsd = NULL; 263 vd->vdev_delayed_close = B_FALSE; 264 cp->private = NULL; 265 266 gp = cp->geom; 267 g_access(cp, -1, 0, -1); 268 /* Destroy consumer on last close. */ 269 if (cp->acr == 0 && cp->ace == 0) { 270 if (cp->acw > 0) 271 g_access(cp, 0, -cp->acw, 0); 272 if (cp->provider != NULL) { 273 ZFS_LOG(1, "Destroyed consumer to %s.", 274 cp->provider->name); 275 g_detach(cp); 276 } 277 g_destroy_consumer(cp); 278 } 279 /* Destroy geom if there are no consumers left. */ 280 if (LIST_EMPTY(&gp->consumer)) { 281 ZFS_LOG(1, "Destroyed geom %s.", gp->name); 282 g_wither_geom(gp, ENXIO); 283 } 284} 285 286static void 287nvlist_get_guids(nvlist_t *list, uint64_t *pguid, uint64_t *vguid) 288{ 289 290 (void) nvlist_lookup_uint64(list, ZPOOL_CONFIG_GUID, vguid); 291 (void) nvlist_lookup_uint64(list, ZPOOL_CONFIG_POOL_GUID, pguid); 292} 293 294static int 295vdev_geom_io(struct g_consumer *cp, int cmd, void *data, off_t offset, off_t size) 296{ 297 struct bio *bp; 298 u_char *p; 299 off_t off, maxio; 300 int error; 301 302 ASSERT((offset % cp->provider->sectorsize) == 0); 303 ASSERT((size % cp->provider->sectorsize) == 0); 304 305 bp = g_alloc_bio(); 306 off = offset; 307 offset += size; 308 p = data; 309 maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize); 310 error = 0; 311 312 for (; off < offset; off += maxio, p += maxio, size -= maxio) { 313 bzero(bp, sizeof(*bp)); 314 bp->bio_cmd = cmd; 315 bp->bio_done = NULL; 316 bp->bio_offset = off; 317 bp->bio_length = MIN(size, maxio); 318 bp->bio_data = p; 319 g_io_request(bp, cp); 320 error = biowait(bp, "vdev_geom_io"); 321 if (error != 0) 322 break; 323 } 324 325 g_destroy_bio(bp); 326 return (error); 327} 328 329static void 330vdev_geom_taste_orphan(struct g_consumer *cp) 331{ 332 333 KASSERT(1 == 0, ("%s called while tasting %s.", __func__, 334 cp->provider->name)); 335} 336 337static int 338vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config) 339{ 340 struct g_provider *pp; 341 vdev_label_t *label; 342 char *p, *buf; 343 size_t buflen; 344 uint64_t psize; 345 off_t offset, size; 346 uint64_t state, txg; 347 int error, l, len; 348 349 g_topology_assert_not(); 350 351 pp = cp->provider; 352 ZFS_LOG(1, "Reading config from %s...", pp->name); 353 354 psize = pp->mediasize; 355 psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t)); 356 357 size = sizeof(*label) + pp->sectorsize - 358 ((sizeof(*label) - 1) % pp->sectorsize) - 1; 359 360 label = kmem_alloc(size, KM_SLEEP); 361 buflen = sizeof(label->vl_vdev_phys.vp_nvlist); 362 363 *config = NULL; 364 for (l = 0; l < VDEV_LABELS; l++) { 365 366 offset = vdev_label_offset(psize, l, 0); 367 if ((offset % pp->sectorsize) != 0) 368 continue; 369 370 if (vdev_geom_io(cp, BIO_READ, label, offset, size) != 0) 371 continue; 372 buf = label->vl_vdev_phys.vp_nvlist; 373 374 if (nvlist_unpack(buf, buflen, config, 0) != 0) 375 continue; 376 377 if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE, 378 &state) != 0 || state > POOL_STATE_L2CACHE) { 379 nvlist_free(*config); 380 *config = NULL; 381 continue; 382 } 383 384 if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE && 385 (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG, 386 &txg) != 0 || txg == 0)) { 387 nvlist_free(*config); 388 *config = NULL; 389 continue; 390 } 391 392 break; 393 } 394 395 kmem_free(label, size); 396 return (*config == NULL ? ENOENT : 0); 397} 398 399static void 400resize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id) 401{ 402 nvlist_t **new_configs; 403 uint64_t i; 404 405 if (id < *count) 406 return; 407 new_configs = kmem_zalloc((id + 1) * sizeof(nvlist_t *), 408 KM_SLEEP); 409 for (i = 0; i < *count; i++) 410 new_configs[i] = (*configs)[i]; 411 if (*configs != NULL) 412 kmem_free(*configs, *count * sizeof(void *)); 413 *configs = new_configs; 414 *count = id + 1; 415} 416 417static void 418process_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg, 419 const char *name, uint64_t* known_pool_guid) 420{ 421 nvlist_t *vdev_tree; 422 uint64_t pool_guid; 423 uint64_t vdev_guid, known_guid; 424 uint64_t id, txg, known_txg; 425 char *pname; 426 int i; 427 428 if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 || 429 strcmp(pname, name) != 0) 430 goto ignore; 431 432 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0) 433 goto ignore; 434 435 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0) 436 goto ignore; 437 438 if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) 439 goto ignore; 440 441 if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0) 442 goto ignore; 443 444 VERIFY(nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); 445 446 if (*known_pool_guid != 0) { 447 if (pool_guid != *known_pool_guid) 448 goto ignore; 449 } else 450 *known_pool_guid = pool_guid; 451 452 resize_configs(configs, count, id); 453 454 if ((*configs)[id] != NULL) { 455 VERIFY(nvlist_lookup_uint64((*configs)[id], 456 ZPOOL_CONFIG_POOL_TXG, &known_txg) == 0); 457 if (txg <= known_txg) 458 goto ignore; 459 nvlist_free((*configs)[id]); 460 } 461 462 (*configs)[id] = cfg; 463 return; 464 465ignore: 466 nvlist_free(cfg); 467} 468 469static int 470vdev_geom_attach_taster(struct g_consumer *cp, struct g_provider *pp) 471{ 472 int error; 473 474 if (pp->flags & G_PF_WITHER) 475 return (EINVAL); 476 g_attach(cp, pp); 477 error = g_access(cp, 1, 0, 0); 478 if (error == 0) { 479 if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize)) 480 error = EINVAL; 481 else if (pp->mediasize < SPA_MINDEVSIZE) 482 error = EINVAL; 483 if (error != 0) 484 g_access(cp, -1, 0, 0); 485 } 486 if (error != 0) 487 g_detach(cp); 488 return (error); 489} 490 491static void 492vdev_geom_detach_taster(struct g_consumer *cp) 493{ 494 g_access(cp, -1, 0, 0); 495 g_detach(cp); 496} 497 498int 499vdev_geom_read_pool_label(const char *name, 500 nvlist_t ***configs, uint64_t *count) 501{ 502 struct g_class *mp; 503 struct g_geom *gp, *zgp; 504 struct g_provider *pp; 505 struct g_consumer *zcp; 506 nvlist_t *vdev_cfg; 507 uint64_t pool_guid; 508 int error; 509 510 DROP_GIANT(); 511 g_topology_lock(); 512 513 zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste"); 514 /* This orphan function should be never called. */ 515 zgp->orphan = vdev_geom_taste_orphan; 516 zcp = g_new_consumer(zgp); 517 518 *configs = NULL; 519 *count = 0; 520 pool_guid = 0; 521 LIST_FOREACH(mp, &g_classes, class) { 522 if (mp == &zfs_vdev_class) 523 continue; 524 LIST_FOREACH(gp, &mp->geom, geom) { 525 if (gp->flags & G_GEOM_WITHER) 526 continue; 527 LIST_FOREACH(pp, &gp->provider, provider) { 528 if (pp->flags & G_PF_WITHER) 529 continue; 530 if (vdev_geom_attach_taster(zcp, pp) != 0) 531 continue; 532 g_topology_unlock(); 533 error = vdev_geom_read_config(zcp, &vdev_cfg); 534 g_topology_lock(); 535 vdev_geom_detach_taster(zcp); 536 if (error) 537 continue; 538 ZFS_LOG(1, "successfully read vdev config"); 539 540 process_vdev_config(configs, count, 541 vdev_cfg, name, &pool_guid); 542 } 543 } 544 } 545 546 g_destroy_consumer(zcp); 547 g_destroy_geom(zgp); 548 g_topology_unlock(); 549 PICKUP_GIANT(); 550 551 return (*count > 0 ? 0 : ENOENT); 552} 553 554static void 555vdev_geom_read_guids(struct g_consumer *cp, uint64_t *pguid, uint64_t *vguid) 556{ 557 nvlist_t *config; 558 559 g_topology_assert_not(); 560 561 *pguid = 0; 562 *vguid = 0; 563 if (vdev_geom_read_config(cp, &config) == 0) { 564 nvlist_get_guids(config, pguid, vguid); 565 nvlist_free(config); 566 } 567} 568 569static struct g_consumer * 570vdev_geom_attach_by_guids(vdev_t *vd) 571{ 572 struct g_class *mp; 573 struct g_geom *gp, *zgp; 574 struct g_provider *pp; 575 struct g_consumer *cp, *zcp; 576 uint64_t pguid, vguid; 577 578 g_topology_assert(); 579 580 zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste"); 581 /* This orphan function should be never called. */ 582 zgp->orphan = vdev_geom_taste_orphan; 583 zcp = g_new_consumer(zgp); 584 585 cp = NULL; 586 LIST_FOREACH(mp, &g_classes, class) { 587 if (mp == &zfs_vdev_class) 588 continue; 589 LIST_FOREACH(gp, &mp->geom, geom) { 590 if (gp->flags & G_GEOM_WITHER) 591 continue; 592 LIST_FOREACH(pp, &gp->provider, provider) { 593 if (vdev_geom_attach_taster(zcp, pp) != 0) 594 continue; 595 g_topology_unlock(); 596 vdev_geom_read_guids(zcp, &pguid, &vguid); 597 g_topology_lock(); 598 vdev_geom_detach_taster(zcp); 599 /* 600 * Check that the label's vdev guid matches the 601 * desired guid. If the label has a pool guid, 602 * check that it matches too. (Inactive spares 603 * and L2ARCs do not have any pool guid in the 604 * label.) 605 */ 606 if ((pguid != 0 && 607 pguid != spa_guid(vd->vdev_spa)) || 608 vguid != vd->vdev_guid) 609 continue; 610 cp = vdev_geom_attach(pp, vd); 611 if (cp == NULL) { 612 printf("ZFS WARNING: Unable to " 613 "attach to %s.\n", pp->name); 614 continue; 615 } 616 break; 617 } 618 if (cp != NULL) 619 break; 620 } 621 if (cp != NULL) 622 break; 623 } 624end: 625 g_destroy_consumer(zcp); 626 g_destroy_geom(zgp); 627 return (cp); 628} 629 630static struct g_consumer * 631vdev_geom_open_by_guids(vdev_t *vd) 632{ 633 struct g_consumer *cp; 634 char *buf; 635 size_t len; 636 637 g_topology_assert(); 638 639 ZFS_LOG(1, "Searching by guids [%ju:%ju].", 640 (uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)vd->vdev_guid); 641 cp = vdev_geom_attach_by_guids(vd); 642 if (cp != NULL) { 643 len = strlen(cp->provider->name) + strlen("/dev/") + 1; 644 buf = kmem_alloc(len, KM_SLEEP); 645 646 snprintf(buf, len, "/dev/%s", cp->provider->name); 647 spa_strfree(vd->vdev_path); 648 vd->vdev_path = buf; 649 650 ZFS_LOG(1, "Attach by guid [%ju:%ju] succeeded, provider %s.", 651 (uintmax_t)spa_guid(vd->vdev_spa), 652 (uintmax_t)vd->vdev_guid, vd->vdev_path); 653 } else { 654 ZFS_LOG(1, "Search by guid [%ju:%ju] failed.", 655 (uintmax_t)spa_guid(vd->vdev_spa), 656 (uintmax_t)vd->vdev_guid); 657 } 658 659 return (cp); 660} 661 662static struct g_consumer * 663vdev_geom_open_by_path(vdev_t *vd, int check_guid) 664{ 665 struct g_provider *pp; 666 struct g_consumer *cp; 667 uint64_t pguid, vguid; 668 669 g_topology_assert(); 670 671 cp = NULL; 672 pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1); 673 if (pp != NULL) { 674 ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path); 675 cp = vdev_geom_attach(pp, vd); 676 if (cp != NULL && check_guid && ISP2(pp->sectorsize) && 677 pp->sectorsize <= VDEV_PAD_SIZE) { 678 g_topology_unlock(); 679 vdev_geom_read_guids(cp, &pguid, &vguid); 680 g_topology_lock(); 681 /* 682 * Check that the label's vdev guid matches the 683 * desired guid. If the label has a pool guid, 684 * check that it matches too. (Inactive spares 685 * and L2ARCs do not have any pool guid in the 686 * label.) 687 */ 688 if ((pguid != 0 && 689 pguid != spa_guid(vd->vdev_spa)) || 690 vguid != vd->vdev_guid) { 691 vdev_geom_close_locked(vd); 692 cp = NULL; 693 ZFS_LOG(1, "guid mismatch for provider %s: " 694 "%ju:%ju != %ju:%ju.", vd->vdev_path, 695 (uintmax_t)spa_guid(vd->vdev_spa), 696 (uintmax_t)vd->vdev_guid, 697 (uintmax_t)pguid, (uintmax_t)vguid); 698 } else { 699 ZFS_LOG(1, "guid match for provider %s.", 700 vd->vdev_path); 701 } 702 } 703 } 704 705 return (cp); 706} 707 708static int 709vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, 710 uint64_t *logical_ashift, uint64_t *physical_ashift) 711{ 712 struct g_provider *pp; 713 struct g_consumer *cp; 714 size_t bufsize; 715 int error; 716 717 /* 718 * We must have a pathname, and it must be absolute. 719 */ 720 if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { 721 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; 722 return (EINVAL); 723 } 724 725 vd->vdev_tsd = NULL; 726 727 DROP_GIANT(); 728 g_topology_lock(); 729 error = 0; 730 731 if (vd->vdev_spa->spa_splitting_newspa || 732 (vd->vdev_prevstate == VDEV_STATE_UNKNOWN && 733 vd->vdev_spa->spa_load_state == SPA_LOAD_NONE || 734 vd->vdev_spa->spa_load_state == SPA_LOAD_CREATE)) { 735 /* 736 * We are dealing with a vdev that hasn't been previously 737 * opened (since boot), and we are not loading an 738 * existing pool configuration. This looks like a 739 * vdev add operation to a new or existing pool. 740 * Assume the user knows what he/she is doing and find 741 * GEOM provider by its name, ignoring GUID mismatches. 742 * 743 * XXPOLICY: It would be safer to only allow a device 744 * that is unlabeled or labeled but missing 745 * GUID information to be opened in this fashion, 746 * unless we are doing a split, in which case we 747 * should allow any guid. 748 */ 749 cp = vdev_geom_open_by_path(vd, 0); 750 } else { 751 /* 752 * Try using the recorded path for this device, but only 753 * accept it if its label data contains the expected GUIDs. 754 */ 755 cp = vdev_geom_open_by_path(vd, 1); 756 if (cp == NULL) { 757 /* 758 * The device at vd->vdev_path doesn't have the 759 * expected GUIDs. The disks might have merely 760 * moved around so try all other GEOM providers 761 * to find one with the right GUIDs. 762 */ 763 cp = vdev_geom_open_by_guids(vd); 764 } 765 } 766 767 if (cp == NULL) { 768 ZFS_LOG(1, "Provider %s not found.", vd->vdev_path); 769 error = ENOENT; 770 } else if (cp->provider->sectorsize > VDEV_PAD_SIZE || 771 !ISP2(cp->provider->sectorsize)) { 772 ZFS_LOG(1, "Provider %s has unsupported sectorsize.", 773 vd->vdev_path); 774 775 vdev_geom_close_locked(vd); 776 error = EINVAL; 777 cp = NULL; 778 } else if (cp->acw == 0 && (spa_mode(vd->vdev_spa) & FWRITE) != 0) { 779 int i; 780 781 for (i = 0; i < 5; i++) { 782 error = g_access(cp, 0, 1, 0); 783 if (error == 0) 784 break; 785 g_topology_unlock(); 786 tsleep(vd, 0, "vdev", hz / 2); 787 g_topology_lock(); 788 } 789 if (error != 0) { 790 printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n", 791 vd->vdev_path, error); 792 vdev_geom_close_locked(vd); 793 cp = NULL; 794 } 795 } 796 797 /* Fetch initial physical path information for this device. */ 798 if (cp != NULL) 799 vdev_geom_attrchanged(cp, "GEOM::physpath"); 800 801 g_topology_unlock(); 802 PICKUP_GIANT(); 803 if (cp == NULL) { 804 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 805 return (error); 806 } 807 pp = cp->provider; 808 809 /* 810 * Determine the actual size of the device. 811 */ 812 *max_psize = *psize = pp->mediasize; 813 814 /* 815 * Determine the device's minimum transfer size and preferred 816 * transfer size. 817 */ 818 *logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1; 819 *physical_ashift = 0; 820 if (pp->stripesize > (1 << *logical_ashift) && ISP2(pp->stripesize) && 821 pp->stripesize <= (1 << SPA_MAXASHIFT) && pp->stripeoffset == 0) 822 *physical_ashift = highbit(pp->stripesize) - 1; 823 824 /* 825 * Clear the nowritecache settings, so that on a vdev_reopen() 826 * we will try again. 827 */ 828 vd->vdev_nowritecache = B_FALSE; 829 830 /* 831 * Determine the device's rotation rate. 832 */ 833 vdev_geom_set_rotation_rate(vd, cp); 834 835 return (0); 836} 837 838static void 839vdev_geom_close(vdev_t *vd) 840{ 841 842 DROP_GIANT(); 843 g_topology_lock(); 844 vdev_geom_close_locked(vd); 845 g_topology_unlock(); 846 PICKUP_GIANT(); 847} 848 849static void 850vdev_geom_io_intr(struct bio *bp) 851{ 852 vdev_t *vd; 853 zio_t *zio; 854 855 zio = bp->bio_caller1; 856 vd = zio->io_vd; 857 zio->io_error = bp->bio_error; 858 if (zio->io_error == 0 && bp->bio_resid != 0) 859 zio->io_error = SET_ERROR(EIO); 860 861 switch(zio->io_error) { 862 case ENOTSUP: 863 /* 864 * If we get ENOTSUP for BIO_FLUSH or BIO_DELETE we know 865 * that future attempts will never succeed. In this case 866 * we set a persistent flag so that we don't bother with 867 * requests in the future. 868 */ 869 switch(bp->bio_cmd) { 870 case BIO_FLUSH: 871 vd->vdev_nowritecache = B_TRUE; 872 break; 873 case BIO_DELETE: 874 vd->vdev_notrim = B_TRUE; 875 break; 876 } 877 break; 878 case ENXIO: 879 if (!vd->vdev_remove_wanted) { 880 /* 881 * If provider's error is set we assume it is being 882 * removed. 883 */ 884 if (bp->bio_to->error != 0) { 885 vd->vdev_remove_wanted = B_TRUE; 886 spa_async_request(zio->io_spa, 887 SPA_ASYNC_REMOVE); 888 } else if (!vd->vdev_delayed_close) { 889 vd->vdev_delayed_close = B_TRUE; 890 } 891 } 892 break; 893 } 894 g_destroy_bio(bp); 895 zio_delay_interrupt(zio); 896} 897 898static void 899vdev_geom_io_start(zio_t *zio) 900{ 901 vdev_t *vd; 902 struct g_consumer *cp; 903 struct bio *bp; 904 int error; 905 906 vd = zio->io_vd; 907 908 switch (zio->io_type) { 909 case ZIO_TYPE_IOCTL: 910 /* XXPOLICY */ 911 if (!vdev_readable(vd)) { 912 zio->io_error = SET_ERROR(ENXIO); 913 zio_interrupt(zio); 914 return; 915 } else { 916 switch (zio->io_cmd) { 917 case DKIOCFLUSHWRITECACHE: 918 if (zfs_nocacheflush || vdev_geom_bio_flush_disable) 919 break; 920 if (vd->vdev_nowritecache) { 921 zio->io_error = SET_ERROR(ENOTSUP); 922 break; 923 } 924 goto sendreq; 925 default: 926 zio->io_error = SET_ERROR(ENOTSUP); 927 } 928 } 929 930 zio_execute(zio); 931 return; 932 case ZIO_TYPE_FREE: 933 if (vd->vdev_notrim) { 934 zio->io_error = SET_ERROR(ENOTSUP); 935 } else if (!vdev_geom_bio_delete_disable) { 936 goto sendreq; 937 } 938 zio_execute(zio); 939 return; 940 } 941sendreq: 942 ASSERT(zio->io_type == ZIO_TYPE_READ || 943 zio->io_type == ZIO_TYPE_WRITE || 944 zio->io_type == ZIO_TYPE_FREE || 945 zio->io_type == ZIO_TYPE_IOCTL); 946 947 cp = vd->vdev_tsd; 948 if (cp == NULL) { 949 zio->io_error = SET_ERROR(ENXIO); 950 zio_interrupt(zio); 951 return; 952 } 953 bp = g_alloc_bio(); 954 bp->bio_caller1 = zio; 955 switch (zio->io_type) { 956 case ZIO_TYPE_READ: 957 case ZIO_TYPE_WRITE: 958 zio->io_target_timestamp = zio_handle_io_delay(zio); 959 bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE; 960 bp->bio_data = zio->io_data; 961 bp->bio_offset = zio->io_offset; 962 bp->bio_length = zio->io_size; 963 break; 964 case ZIO_TYPE_FREE: 965 bp->bio_cmd = BIO_DELETE; 966 bp->bio_data = NULL; 967 bp->bio_offset = zio->io_offset; 968 bp->bio_length = zio->io_size; 969 break; 970 case ZIO_TYPE_IOCTL: 971 bp->bio_cmd = BIO_FLUSH; 972 bp->bio_flags |= BIO_ORDERED; 973 bp->bio_data = NULL; 974 bp->bio_offset = cp->provider->mediasize; 975 bp->bio_length = 0; 976 break; 977 } 978 bp->bio_done = vdev_geom_io_intr; 979 980 g_io_request(bp, cp); 981} 982 983static void 984vdev_geom_io_done(zio_t *zio) 985{ 986} 987 988static void 989vdev_geom_hold(vdev_t *vd) 990{ 991} 992 993static void 994vdev_geom_rele(vdev_t *vd) 995{ 996} 997 998vdev_ops_t vdev_geom_ops = { 999 vdev_geom_open, 1000 vdev_geom_close, 1001 vdev_default_asize, 1002 vdev_geom_io_start, 1003 vdev_geom_io_done, 1004 NULL, 1005 vdev_geom_hold, 1006 vdev_geom_rele, 1007 VDEV_TYPE_DISK, /* name of this vdev type */ 1008 B_TRUE /* leaf vdev */ 1009}; 1010