vdev_geom.c revision 299376
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org> 23 * All rights reserved. 24 * 25 * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org> 26 */ 27 28#include <sys/zfs_context.h> 29#include <sys/param.h> 30#include <sys/kernel.h> 31#include <sys/bio.h> 32#include <sys/disk.h> 33#include <sys/spa.h> 34#include <sys/spa_impl.h> 35#include <sys/vdev_impl.h> 36#include <sys/fs/zfs.h> 37#include <sys/zio.h> 38#include <geom/geom.h> 39#include <geom/geom_int.h> 40 41/* 42 * Virtual device vector for GEOM. 43 */ 44 45static g_attrchanged_t vdev_geom_attrchanged; 46struct g_class zfs_vdev_class = { 47 .name = "ZFS::VDEV", 48 .version = G_VERSION, 49 .attrchanged = vdev_geom_attrchanged, 50}; 51 52DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev); 53 54SYSCTL_DECL(_vfs_zfs_vdev); 55/* Don't send BIO_FLUSH. */ 56static int vdev_geom_bio_flush_disable = 0; 57TUNABLE_INT("vfs.zfs.vdev.bio_flush_disable", &vdev_geom_bio_flush_disable); 58SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RW, 59 &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH"); 60/* Don't send BIO_DELETE. */ 61static int vdev_geom_bio_delete_disable = 0; 62TUNABLE_INT("vfs.zfs.vdev.bio_delete_disable", &vdev_geom_bio_delete_disable); 63SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RW, 64 &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE"); 65 66static void 67vdev_geom_set_rotation_rate(vdev_t *vd, struct g_consumer *cp) 68{ 69 int error; 70 uint16_t rate; 71 72 error = g_getattr("GEOM::rotation_rate", cp, &rate); 73 if (error == 0) 74 vd->vdev_rotation_rate = rate; 75 else 76 vd->vdev_rotation_rate = VDEV_RATE_UNKNOWN; 77} 78 79static void 80vdev_geom_attrchanged(struct g_consumer *cp, const char *attr) 81{ 82 vdev_t *vd; 83 spa_t *spa; 84 char *physpath; 85 int error, physpath_len; 86 87 vd = cp->private; 88 if (vd == NULL) 89 return; 90 91 if (strcmp(attr, "GEOM::rotation_rate") == 0) { 92 vdev_geom_set_rotation_rate(vd, cp); 93 return; 94 } 95 96 if (strcmp(attr, "GEOM::physpath") != 0) 97 return; 98 99 if (g_access(cp, 1, 0, 0) != 0) 100 return; 101 102 /* 103 * Record/Update physical path information for this device. 104 */ 105 spa = vd->vdev_spa; 106 physpath_len = MAXPATHLEN; 107 physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO); 108 error = g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath); 109 g_access(cp, -1, 0, 0); 110 if (error == 0) { 111 char *old_physpath; 112 113 /* g_topology lock ensures that vdev has not been closed */ 114 g_topology_assert(); 115 old_physpath = vd->vdev_physpath; 116 vd->vdev_physpath = spa_strdup(physpath); 117 spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 118 119 if (old_physpath != NULL) 120 spa_strfree(old_physpath); 121 } 122 g_free(physpath); 123} 124 125static void 126vdev_geom_orphan(struct g_consumer *cp) 127{ 128 vdev_t *vd; 129 130 g_topology_assert(); 131 132 vd = cp->private; 133 if (vd == NULL) { 134 /* Vdev close in progress. Ignore the event. */ 135 return; 136 } 137 138 /* 139 * Orphan callbacks occur from the GEOM event thread. 140 * Concurrent with this call, new I/O requests may be 141 * working their way through GEOM about to find out 142 * (only once executed by the g_down thread) that we've 143 * been orphaned from our disk provider. These I/Os 144 * must be retired before we can detach our consumer. 145 * This is most easily achieved by acquiring the 146 * SPA ZIO configuration lock as a writer, but doing 147 * so with the GEOM topology lock held would cause 148 * a lock order reversal. Instead, rely on the SPA's 149 * async removal support to invoke a close on this 150 * vdev once it is safe to do so. 151 */ 152 vd->vdev_remove_wanted = B_TRUE; 153 spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE); 154} 155 156static struct g_consumer * 157vdev_geom_attach(struct g_provider *pp, vdev_t *vd) 158{ 159 struct g_geom *gp; 160 struct g_consumer *cp; 161 162 g_topology_assert(); 163 164 ZFS_LOG(1, "Attaching to %s.", pp->name); 165 /* Do we have geom already? No? Create one. */ 166 LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) { 167 if (gp->flags & G_GEOM_WITHER) 168 continue; 169 if (strcmp(gp->name, "zfs::vdev") != 0) 170 continue; 171 break; 172 } 173 if (gp == NULL) { 174 gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev"); 175 gp->orphan = vdev_geom_orphan; 176 gp->attrchanged = vdev_geom_attrchanged; 177 cp = g_new_consumer(gp); 178 if (g_attach(cp, pp) != 0) { 179 g_wither_geom(gp, ENXIO); 180 return (NULL); 181 } 182 if (g_access(cp, 1, 0, 1) != 0) { 183 g_wither_geom(gp, ENXIO); 184 return (NULL); 185 } 186 ZFS_LOG(1, "Created geom and consumer for %s.", pp->name); 187 } else { 188 /* Check if we are already connected to this provider. */ 189 LIST_FOREACH(cp, &gp->consumer, consumer) { 190 if (cp->provider == pp) { 191 ZFS_LOG(1, "Found consumer for %s.", pp->name); 192 break; 193 } 194 } 195 if (cp == NULL) { 196 cp = g_new_consumer(gp); 197 if (g_attach(cp, pp) != 0) { 198 g_destroy_consumer(cp); 199 return (NULL); 200 } 201 if (g_access(cp, 1, 0, 1) != 0) { 202 g_detach(cp); 203 g_destroy_consumer(cp); 204 return (NULL); 205 } 206 ZFS_LOG(1, "Created consumer for %s.", pp->name); 207 } else { 208 if (g_access(cp, 1, 0, 1) != 0) 209 return (NULL); 210 ZFS_LOG(1, "Used existing consumer for %s.", pp->name); 211 } 212 } 213 214 /* 215 * BUG: cp may already belong to a vdev. This could happen if: 216 * 1) That vdev is a shared spare, or 217 * 2) We are trying to reopen a missing vdev and we are scanning by 218 * guid. In that case, we'll ultimately fail to open this consumer, 219 * but not until after setting the private field. 220 * The solution is to: 221 * 1) Don't set the private field until after the open succeeds, and 222 * 2) Set it to a linked list of vdevs, not just a single vdev 223 */ 224 cp->private = vd; 225 vd->vdev_tsd = cp; 226 227 /* Fetch initial physical path information for this device. */ 228 vdev_geom_attrchanged(cp, "GEOM::physpath"); 229 230 cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; 231 return (cp); 232} 233 234static void 235vdev_geom_close_locked(vdev_t *vd) 236{ 237 struct g_geom *gp; 238 struct g_consumer *cp; 239 240 g_topology_assert(); 241 242 cp = vd->vdev_tsd; 243 if (cp == NULL) 244 return; 245 246 ZFS_LOG(1, "Closing access to %s.", cp->provider->name); 247 KASSERT(vd->vdev_tsd == cp, ("%s: vdev_tsd is not cp", __func__)); 248 vd->vdev_tsd = NULL; 249 vd->vdev_delayed_close = B_FALSE; 250 cp->private = NULL; 251 252 gp = cp->geom; 253 g_access(cp, -1, 0, -1); 254 /* Destroy consumer on last close. */ 255 if (cp->acr == 0 && cp->ace == 0) { 256 if (cp->acw > 0) 257 g_access(cp, 0, -cp->acw, 0); 258 if (cp->provider != NULL) { 259 ZFS_LOG(1, "Destroyed consumer to %s.", 260 cp->provider->name); 261 g_detach(cp); 262 } 263 g_destroy_consumer(cp); 264 } 265 /* Destroy geom if there are no consumers left. */ 266 if (LIST_EMPTY(&gp->consumer)) { 267 ZFS_LOG(1, "Destroyed geom %s.", gp->name); 268 g_wither_geom(gp, ENXIO); 269 } 270} 271 272static void 273nvlist_get_guids(nvlist_t *list, uint64_t *pguid, uint64_t *vguid) 274{ 275 276 (void) nvlist_lookup_uint64(list, ZPOOL_CONFIG_GUID, vguid); 277 (void) nvlist_lookup_uint64(list, ZPOOL_CONFIG_POOL_GUID, pguid); 278} 279 280static int 281vdev_geom_io(struct g_consumer *cp, int cmd, void *data, off_t offset, off_t size) 282{ 283 struct bio *bp; 284 u_char *p; 285 off_t off, maxio; 286 int error; 287 288 ASSERT((offset % cp->provider->sectorsize) == 0); 289 ASSERT((size % cp->provider->sectorsize) == 0); 290 291 bp = g_alloc_bio(); 292 off = offset; 293 offset += size; 294 p = data; 295 maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize); 296 error = 0; 297 298 for (; off < offset; off += maxio, p += maxio, size -= maxio) { 299 bzero(bp, sizeof(*bp)); 300 bp->bio_cmd = cmd; 301 bp->bio_done = NULL; 302 bp->bio_offset = off; 303 bp->bio_length = MIN(size, maxio); 304 bp->bio_data = p; 305 g_io_request(bp, cp); 306 error = biowait(bp, "vdev_geom_io"); 307 if (error != 0) 308 break; 309 } 310 311 g_destroy_bio(bp); 312 return (error); 313} 314 315static void 316vdev_geom_taste_orphan(struct g_consumer *cp) 317{ 318 319 KASSERT(1 == 0, ("%s called while tasting %s.", __func__, 320 cp->provider->name)); 321} 322 323static int 324vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config) 325{ 326 struct g_provider *pp; 327 vdev_label_t *label; 328 char *p, *buf; 329 size_t buflen; 330 uint64_t psize; 331 off_t offset, size; 332 uint64_t state, txg; 333 int error, l, len; 334 335 g_topology_assert_not(); 336 337 pp = cp->provider; 338 ZFS_LOG(1, "Reading config from %s...", pp->name); 339 340 psize = pp->mediasize; 341 psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t)); 342 343 size = sizeof(*label) + pp->sectorsize - 344 ((sizeof(*label) - 1) % pp->sectorsize) - 1; 345 346 label = kmem_alloc(size, KM_SLEEP); 347 buflen = sizeof(label->vl_vdev_phys.vp_nvlist); 348 349 *config = NULL; 350 for (l = 0; l < VDEV_LABELS; l++) { 351 352 offset = vdev_label_offset(psize, l, 0); 353 if ((offset % pp->sectorsize) != 0) 354 continue; 355 356 if (vdev_geom_io(cp, BIO_READ, label, offset, size) != 0) 357 continue; 358 buf = label->vl_vdev_phys.vp_nvlist; 359 360 if (nvlist_unpack(buf, buflen, config, 0) != 0) 361 continue; 362 363 if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE, 364 &state) != 0 || state > POOL_STATE_L2CACHE) { 365 nvlist_free(*config); 366 *config = NULL; 367 continue; 368 } 369 370 if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE && 371 (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG, 372 &txg) != 0 || txg == 0)) { 373 nvlist_free(*config); 374 *config = NULL; 375 continue; 376 } 377 378 break; 379 } 380 381 kmem_free(label, size); 382 return (*config == NULL ? ENOENT : 0); 383} 384 385static void 386resize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id) 387{ 388 nvlist_t **new_configs; 389 uint64_t i; 390 391 if (id < *count) 392 return; 393 new_configs = kmem_zalloc((id + 1) * sizeof(nvlist_t *), 394 KM_SLEEP); 395 for (i = 0; i < *count; i++) 396 new_configs[i] = (*configs)[i]; 397 if (*configs != NULL) 398 kmem_free(*configs, *count * sizeof(void *)); 399 *configs = new_configs; 400 *count = id + 1; 401} 402 403static void 404process_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg, 405 const char *name, uint64_t* known_pool_guid) 406{ 407 nvlist_t *vdev_tree; 408 uint64_t pool_guid; 409 uint64_t vdev_guid, known_guid; 410 uint64_t id, txg, known_txg; 411 char *pname; 412 int i; 413 414 if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 || 415 strcmp(pname, name) != 0) 416 goto ignore; 417 418 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0) 419 goto ignore; 420 421 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0) 422 goto ignore; 423 424 if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) 425 goto ignore; 426 427 if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0) 428 goto ignore; 429 430 VERIFY(nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); 431 432 if (*known_pool_guid != 0) { 433 if (pool_guid != *known_pool_guid) 434 goto ignore; 435 } else 436 *known_pool_guid = pool_guid; 437 438 resize_configs(configs, count, id); 439 440 if ((*configs)[id] != NULL) { 441 VERIFY(nvlist_lookup_uint64((*configs)[id], 442 ZPOOL_CONFIG_POOL_TXG, &known_txg) == 0); 443 if (txg <= known_txg) 444 goto ignore; 445 nvlist_free((*configs)[id]); 446 } 447 448 (*configs)[id] = cfg; 449 return; 450 451ignore: 452 nvlist_free(cfg); 453} 454 455static int 456vdev_geom_attach_taster(struct g_consumer *cp, struct g_provider *pp) 457{ 458 int error; 459 460 if (pp->flags & G_PF_WITHER) 461 return (EINVAL); 462 g_attach(cp, pp); 463 error = g_access(cp, 1, 0, 0); 464 if (error == 0) { 465 if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize)) 466 error = EINVAL; 467 else if (pp->mediasize < SPA_MINDEVSIZE) 468 error = EINVAL; 469 if (error != 0) 470 g_access(cp, -1, 0, 0); 471 } 472 if (error != 0) 473 g_detach(cp); 474 return (error); 475} 476 477static void 478vdev_geom_detach_taster(struct g_consumer *cp) 479{ 480 g_access(cp, -1, 0, 0); 481 g_detach(cp); 482} 483 484int 485vdev_geom_read_pool_label(const char *name, 486 nvlist_t ***configs, uint64_t *count) 487{ 488 struct g_class *mp; 489 struct g_geom *gp, *zgp; 490 struct g_provider *pp; 491 struct g_consumer *zcp; 492 nvlist_t *vdev_cfg; 493 uint64_t pool_guid; 494 int error; 495 496 DROP_GIANT(); 497 g_topology_lock(); 498 499 zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste"); 500 /* This orphan function should be never called. */ 501 zgp->orphan = vdev_geom_taste_orphan; 502 zcp = g_new_consumer(zgp); 503 504 *configs = NULL; 505 *count = 0; 506 pool_guid = 0; 507 LIST_FOREACH(mp, &g_classes, class) { 508 if (mp == &zfs_vdev_class) 509 continue; 510 LIST_FOREACH(gp, &mp->geom, geom) { 511 if (gp->flags & G_GEOM_WITHER) 512 continue; 513 LIST_FOREACH(pp, &gp->provider, provider) { 514 if (pp->flags & G_PF_WITHER) 515 continue; 516 if (vdev_geom_attach_taster(zcp, pp) != 0) 517 continue; 518 g_topology_unlock(); 519 error = vdev_geom_read_config(zcp, &vdev_cfg); 520 g_topology_lock(); 521 vdev_geom_detach_taster(zcp); 522 if (error) 523 continue; 524 ZFS_LOG(1, "successfully read vdev config"); 525 526 process_vdev_config(configs, count, 527 vdev_cfg, name, &pool_guid); 528 } 529 } 530 } 531 532 g_destroy_consumer(zcp); 533 g_destroy_geom(zgp); 534 g_topology_unlock(); 535 PICKUP_GIANT(); 536 537 return (*count > 0 ? 0 : ENOENT); 538} 539 540static void 541vdev_geom_read_guids(struct g_consumer *cp, uint64_t *pguid, uint64_t *vguid) 542{ 543 nvlist_t *config; 544 545 g_topology_assert_not(); 546 547 *pguid = 0; 548 *vguid = 0; 549 if (vdev_geom_read_config(cp, &config) == 0) { 550 nvlist_get_guids(config, pguid, vguid); 551 nvlist_free(config); 552 } 553} 554 555static struct g_consumer * 556vdev_geom_attach_by_guids(vdev_t *vd) 557{ 558 struct g_class *mp; 559 struct g_geom *gp, *zgp; 560 struct g_provider *pp; 561 struct g_consumer *cp, *zcp; 562 uint64_t pguid, vguid; 563 564 g_topology_assert(); 565 566 zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste"); 567 /* This orphan function should be never called. */ 568 zgp->orphan = vdev_geom_taste_orphan; 569 zcp = g_new_consumer(zgp); 570 571 cp = NULL; 572 LIST_FOREACH(mp, &g_classes, class) { 573 if (mp == &zfs_vdev_class) 574 continue; 575 LIST_FOREACH(gp, &mp->geom, geom) { 576 if (gp->flags & G_GEOM_WITHER) 577 continue; 578 LIST_FOREACH(pp, &gp->provider, provider) { 579 if (vdev_geom_attach_taster(zcp, pp) != 0) 580 continue; 581 g_topology_unlock(); 582 vdev_geom_read_guids(zcp, &pguid, &vguid); 583 g_topology_lock(); 584 vdev_geom_detach_taster(zcp); 585 /* 586 * Check that the label's vdev guid matches the 587 * desired guid. If the label has a pool guid, 588 * check that it matches too. (Inactive spares 589 * and L2ARCs do not have any pool guid in the 590 * label.) 591 */ 592 if ((pguid != 0 && 593 pguid != spa_guid(vd->vdev_spa)) || 594 vguid != vd->vdev_guid) 595 continue; 596 cp = vdev_geom_attach(pp, vd); 597 if (cp == NULL) { 598 printf("ZFS WARNING: Unable to " 599 "attach to %s.\n", pp->name); 600 continue; 601 } 602 break; 603 } 604 if (cp != NULL) 605 break; 606 } 607 if (cp != NULL) 608 break; 609 } 610end: 611 g_destroy_consumer(zcp); 612 g_destroy_geom(zgp); 613 return (cp); 614} 615 616static struct g_consumer * 617vdev_geom_open_by_guids(vdev_t *vd) 618{ 619 struct g_consumer *cp; 620 char *buf; 621 size_t len; 622 623 g_topology_assert(); 624 625 ZFS_LOG(1, "Searching by guid [%ju].", (uintmax_t)vd->vdev_guid); 626 cp = vdev_geom_attach_by_guids(vd); 627 if (cp != NULL) { 628 len = strlen(cp->provider->name) + strlen("/dev/") + 1; 629 buf = kmem_alloc(len, KM_SLEEP); 630 631 snprintf(buf, len, "/dev/%s", cp->provider->name); 632 spa_strfree(vd->vdev_path); 633 vd->vdev_path = buf; 634 635 ZFS_LOG(1, "Attach by guid [%ju:%ju] succeeded, provider %s.", 636 (uintmax_t)spa_guid(vd->vdev_spa), 637 (uintmax_t)vd->vdev_guid, vd->vdev_path); 638 } else { 639 ZFS_LOG(1, "Search by guid [%ju:%ju] failed.", 640 (uintmax_t)spa_guid(vd->vdev_spa), 641 (uintmax_t)vd->vdev_guid); 642 } 643 644 return (cp); 645} 646 647static struct g_consumer * 648vdev_geom_open_by_path(vdev_t *vd, int check_guid) 649{ 650 struct g_provider *pp; 651 struct g_consumer *cp; 652 uint64_t pguid, vguid; 653 654 g_topology_assert(); 655 656 cp = NULL; 657 pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1); 658 if (pp != NULL) { 659 ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path); 660 cp = vdev_geom_attach(pp, vd); 661 if (cp != NULL && check_guid && ISP2(pp->sectorsize) && 662 pp->sectorsize <= VDEV_PAD_SIZE) { 663 g_topology_unlock(); 664 vdev_geom_read_guids(cp, &pguid, &vguid); 665 g_topology_lock(); 666 /* 667 * Check that the label's vdev guid matches the 668 * desired guid. If the label has a pool guid, 669 * check that it matches too. (Inactive spares 670 * and L2ARCs do not have any pool guid in the 671 * label.) 672 */ 673 if ((pguid != 0 && 674 pguid != spa_guid(vd->vdev_spa)) || 675 vguid != vd->vdev_guid) { 676 vdev_geom_close_locked(vd); 677 cp = NULL; 678 ZFS_LOG(1, "guid mismatch for provider %s: " 679 "%ju:%ju != %ju:%ju.", vd->vdev_path, 680 (uintmax_t)spa_guid(vd->vdev_spa), 681 (uintmax_t)vd->vdev_guid, 682 (uintmax_t)pguid, (uintmax_t)vguid); 683 } else { 684 ZFS_LOG(1, "guid match for provider %s.", 685 vd->vdev_path); 686 } 687 } 688 } 689 690 return (cp); 691} 692 693static int 694vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, 695 uint64_t *logical_ashift, uint64_t *physical_ashift) 696{ 697 struct g_provider *pp; 698 struct g_consumer *cp; 699 size_t bufsize; 700 int error; 701 702 /* 703 * We must have a pathname, and it must be absolute. 704 */ 705 if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { 706 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; 707 return (EINVAL); 708 } 709 710 vd->vdev_tsd = NULL; 711 712 DROP_GIANT(); 713 g_topology_lock(); 714 error = 0; 715 716 if (vd->vdev_spa->spa_splitting_newspa || 717 (vd->vdev_prevstate == VDEV_STATE_UNKNOWN && 718 vd->vdev_spa->spa_load_state == SPA_LOAD_NONE)) { 719 /* 720 * We are dealing with a vdev that hasn't been previously 721 * opened (since boot), and we are not loading an 722 * existing pool configuration. This looks like a 723 * vdev add operation to a new or existing pool. 724 * Assume the user knows what he/she is doing and find 725 * GEOM provider by its name, ignoring GUID mismatches. 726 * 727 * XXPOLICY: It would be safer to only allow a device 728 * that is unlabeled or labeled but missing 729 * GUID information to be opened in this fashion, 730 * unless we are doing a split, in which case we 731 * should allow any guid. 732 */ 733 cp = vdev_geom_open_by_path(vd, 0); 734 } else { 735 /* 736 * Try using the recorded path for this device, but only 737 * accept it if its label data contains the expected GUIDs. 738 */ 739 cp = vdev_geom_open_by_path(vd, 1); 740 if (cp == NULL) { 741 /* 742 * The device at vd->vdev_path doesn't have the 743 * expected GUIDs. The disks might have merely 744 * moved around so try all other GEOM providers 745 * to find one with the right GUIDs. 746 */ 747 cp = vdev_geom_open_by_guids(vd); 748 } 749 } 750 751 if (cp == NULL) { 752 ZFS_LOG(1, "Provider %s not found.", vd->vdev_path); 753 error = ENOENT; 754 } else if (cp->provider->sectorsize > VDEV_PAD_SIZE || 755 !ISP2(cp->provider->sectorsize)) { 756 ZFS_LOG(1, "Provider %s has unsupported sectorsize.", 757 vd->vdev_path); 758 759 vdev_geom_close_locked(vd); 760 error = EINVAL; 761 cp = NULL; 762 } else if (cp->acw == 0 && (spa_mode(vd->vdev_spa) & FWRITE) != 0) { 763 int i; 764 765 for (i = 0; i < 5; i++) { 766 error = g_access(cp, 0, 1, 0); 767 if (error == 0) 768 break; 769 g_topology_unlock(); 770 tsleep(vd, 0, "vdev", hz / 2); 771 g_topology_lock(); 772 } 773 if (error != 0) { 774 printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n", 775 vd->vdev_path, error); 776 vdev_geom_close_locked(vd); 777 cp = NULL; 778 } 779 } 780 781 g_topology_unlock(); 782 PICKUP_GIANT(); 783 if (cp == NULL) { 784 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 785 return (error); 786 } 787 pp = cp->provider; 788 789 /* 790 * Determine the actual size of the device. 791 */ 792 *max_psize = *psize = pp->mediasize; 793 794 /* 795 * Determine the device's minimum transfer size and preferred 796 * transfer size. 797 */ 798 *logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1; 799 *physical_ashift = 0; 800 if (pp->stripesize > (1 << *logical_ashift) && ISP2(pp->stripesize) && 801 pp->stripesize <= (1 << SPA_MAXASHIFT) && pp->stripeoffset == 0) 802 *physical_ashift = highbit(pp->stripesize) - 1; 803 804 /* 805 * Clear the nowritecache settings, so that on a vdev_reopen() 806 * we will try again. 807 */ 808 vd->vdev_nowritecache = B_FALSE; 809 810 /* 811 * Determine the device's rotation rate. 812 */ 813 vdev_geom_set_rotation_rate(vd, cp); 814 815 return (0); 816} 817 818static void 819vdev_geom_close(vdev_t *vd) 820{ 821 822 DROP_GIANT(); 823 g_topology_lock(); 824 vdev_geom_close_locked(vd); 825 g_topology_unlock(); 826 PICKUP_GIANT(); 827} 828 829static void 830vdev_geom_io_intr(struct bio *bp) 831{ 832 vdev_t *vd; 833 zio_t *zio; 834 835 zio = bp->bio_caller1; 836 vd = zio->io_vd; 837 zio->io_error = bp->bio_error; 838 if (zio->io_error == 0 && bp->bio_resid != 0) 839 zio->io_error = SET_ERROR(EIO); 840 841 switch(zio->io_error) { 842 case ENOTSUP: 843 /* 844 * If we get ENOTSUP for BIO_FLUSH or BIO_DELETE we know 845 * that future attempts will never succeed. In this case 846 * we set a persistent flag so that we don't bother with 847 * requests in the future. 848 */ 849 switch(bp->bio_cmd) { 850 case BIO_FLUSH: 851 vd->vdev_nowritecache = B_TRUE; 852 break; 853 case BIO_DELETE: 854 vd->vdev_notrim = B_TRUE; 855 break; 856 } 857 break; 858 case ENXIO: 859 if (!vd->vdev_remove_wanted) { 860 /* 861 * If provider's error is set we assume it is being 862 * removed. 863 */ 864 if (bp->bio_to->error != 0) { 865 vd->vdev_remove_wanted = B_TRUE; 866 spa_async_request(zio->io_spa, 867 SPA_ASYNC_REMOVE); 868 } else if (!vd->vdev_delayed_close) { 869 vd->vdev_delayed_close = B_TRUE; 870 } 871 } 872 break; 873 } 874 g_destroy_bio(bp); 875 zio_delay_interrupt(zio); 876} 877 878static void 879vdev_geom_io_start(zio_t *zio) 880{ 881 vdev_t *vd; 882 struct g_consumer *cp; 883 struct bio *bp; 884 int error; 885 886 vd = zio->io_vd; 887 888 switch (zio->io_type) { 889 case ZIO_TYPE_IOCTL: 890 /* XXPOLICY */ 891 if (!vdev_readable(vd)) { 892 zio->io_error = SET_ERROR(ENXIO); 893 zio_interrupt(zio); 894 return; 895 } else { 896 switch (zio->io_cmd) { 897 case DKIOCFLUSHWRITECACHE: 898 if (zfs_nocacheflush || vdev_geom_bio_flush_disable) 899 break; 900 if (vd->vdev_nowritecache) { 901 zio->io_error = SET_ERROR(ENOTSUP); 902 break; 903 } 904 goto sendreq; 905 default: 906 zio->io_error = SET_ERROR(ENOTSUP); 907 } 908 } 909 910 zio_execute(zio); 911 return; 912 case ZIO_TYPE_FREE: 913 if (vd->vdev_notrim) { 914 zio->io_error = SET_ERROR(ENOTSUP); 915 } else if (!vdev_geom_bio_delete_disable) { 916 goto sendreq; 917 } 918 zio_execute(zio); 919 return; 920 } 921sendreq: 922 ASSERT(zio->io_type == ZIO_TYPE_READ || 923 zio->io_type == ZIO_TYPE_WRITE || 924 zio->io_type == ZIO_TYPE_FREE || 925 zio->io_type == ZIO_TYPE_IOCTL); 926 927 cp = vd->vdev_tsd; 928 if (cp == NULL) { 929 zio->io_error = SET_ERROR(ENXIO); 930 zio_interrupt(zio); 931 return; 932 } 933 bp = g_alloc_bio(); 934 bp->bio_caller1 = zio; 935 switch (zio->io_type) { 936 case ZIO_TYPE_READ: 937 case ZIO_TYPE_WRITE: 938 zio->io_target_timestamp = zio_handle_io_delay(zio); 939 bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE; 940 bp->bio_data = zio->io_data; 941 bp->bio_offset = zio->io_offset; 942 bp->bio_length = zio->io_size; 943 break; 944 case ZIO_TYPE_FREE: 945 bp->bio_cmd = BIO_DELETE; 946 bp->bio_data = NULL; 947 bp->bio_offset = zio->io_offset; 948 bp->bio_length = zio->io_size; 949 break; 950 case ZIO_TYPE_IOCTL: 951 bp->bio_cmd = BIO_FLUSH; 952 bp->bio_flags |= BIO_ORDERED; 953 bp->bio_data = NULL; 954 bp->bio_offset = cp->provider->mediasize; 955 bp->bio_length = 0; 956 break; 957 } 958 bp->bio_done = vdev_geom_io_intr; 959 960 g_io_request(bp, cp); 961} 962 963static void 964vdev_geom_io_done(zio_t *zio) 965{ 966} 967 968static void 969vdev_geom_hold(vdev_t *vd) 970{ 971} 972 973static void 974vdev_geom_rele(vdev_t *vd) 975{ 976} 977 978vdev_ops_t vdev_geom_ops = { 979 vdev_geom_open, 980 vdev_geom_close, 981 vdev_default_asize, 982 vdev_geom_io_start, 983 vdev_geom_io_done, 984 NULL, 985 vdev_geom_hold, 986 vdev_geom_rele, 987 VDEV_TYPE_DISK, /* name of this vdev type */ 988 B_TRUE /* leaf vdev */ 989}; 990