vdev_geom.c revision 297108
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org> 23 * All rights reserved. 24 * 25 * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org> 26 */ 27 28#include <sys/zfs_context.h> 29#include <sys/param.h> 30#include <sys/kernel.h> 31#include <sys/bio.h> 32#include <sys/disk.h> 33#include <sys/spa.h> 34#include <sys/spa_impl.h> 35#include <sys/vdev_impl.h> 36#include <sys/fs/zfs.h> 37#include <sys/zio.h> 38#include <geom/geom.h> 39#include <geom/geom_int.h> 40 41/* 42 * Virtual device vector for GEOM. 43 */ 44 45static g_attrchanged_t vdev_geom_attrchanged; 46struct g_class zfs_vdev_class = { 47 .name = "ZFS::VDEV", 48 .version = G_VERSION, 49 .attrchanged = vdev_geom_attrchanged, 50}; 51 52DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev); 53 54SYSCTL_DECL(_vfs_zfs_vdev); 55/* Don't send BIO_FLUSH. */ 56static int vdev_geom_bio_flush_disable = 0; 57TUNABLE_INT("vfs.zfs.vdev.bio_flush_disable", &vdev_geom_bio_flush_disable); 58SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RW, 59 &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH"); 60/* Don't send BIO_DELETE. */ 61static int vdev_geom_bio_delete_disable = 0; 62TUNABLE_INT("vfs.zfs.vdev.bio_delete_disable", &vdev_geom_bio_delete_disable); 63SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RW, 64 &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE"); 65 66static void 67vdev_geom_set_rotation_rate(vdev_t *vd, struct g_consumer *cp) 68{ 69 int error; 70 uint16_t rate; 71 72 error = g_getattr("GEOM::rotation_rate", cp, &rate); 73 if (error == 0) 74 vd->vdev_rotation_rate = rate; 75 else 76 vd->vdev_rotation_rate = VDEV_RATE_UNKNOWN; 77} 78 79static void 80vdev_geom_attrchanged(struct g_consumer *cp, const char *attr) 81{ 82 vdev_t *vd; 83 spa_t *spa; 84 char *physpath; 85 int error, physpath_len; 86 87 vd = cp->private; 88 if (vd == NULL) 89 return; 90 91 if (strcmp(attr, "GEOM::rotation_rate") == 0) { 92 vdev_geom_set_rotation_rate(vd, cp); 93 return; 94 } 95 96 if (strcmp(attr, "GEOM::physpath") != 0) 97 return; 98 99 if (g_access(cp, 1, 0, 0) != 0) 100 return; 101 102 /* 103 * Record/Update physical path information for this device. 104 */ 105 spa = vd->vdev_spa; 106 physpath_len = MAXPATHLEN; 107 physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO); 108 error = g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath); 109 g_access(cp, -1, 0, 0); 110 if (error == 0) { 111 char *old_physpath; 112 113 old_physpath = vd->vdev_physpath; 114 vd->vdev_physpath = spa_strdup(physpath); 115 spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 116 117 if (old_physpath != NULL) { 118 int held_lock; 119 120 held_lock = spa_config_held(spa, SCL_STATE, RW_WRITER); 121 if (held_lock == 0) { 122 g_topology_unlock(); 123 spa_config_enter(spa, SCL_STATE, FTAG, 124 RW_WRITER); 125 } 126 127 spa_strfree(old_physpath); 128 129 if (held_lock == 0) { 130 spa_config_exit(spa, SCL_STATE, FTAG); 131 g_topology_lock(); 132 } 133 } 134 } 135 g_free(physpath); 136} 137 138static void 139vdev_geom_orphan(struct g_consumer *cp) 140{ 141 vdev_t *vd; 142 143 g_topology_assert(); 144 145 vd = cp->private; 146 if (vd == NULL) { 147 /* Vdev close in progress. Ignore the event. */ 148 return; 149 } 150 151 /* 152 * Orphan callbacks occur from the GEOM event thread. 153 * Concurrent with this call, new I/O requests may be 154 * working their way through GEOM about to find out 155 * (only once executed by the g_down thread) that we've 156 * been orphaned from our disk provider. These I/Os 157 * must be retired before we can detach our consumer. 158 * This is most easily achieved by acquiring the 159 * SPA ZIO configuration lock as a writer, but doing 160 * so with the GEOM topology lock held would cause 161 * a lock order reversal. Instead, rely on the SPA's 162 * async removal support to invoke a close on this 163 * vdev once it is safe to do so. 164 */ 165 vd->vdev_remove_wanted = B_TRUE; 166 spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE); 167} 168 169static struct g_consumer * 170vdev_geom_attach(struct g_provider *pp, vdev_t *vd) 171{ 172 struct g_geom *gp; 173 struct g_consumer *cp; 174 175 g_topology_assert(); 176 177 ZFS_LOG(1, "Attaching to %s.", pp->name); 178 /* Do we have geom already? No? Create one. */ 179 LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) { 180 if (gp->flags & G_GEOM_WITHER) 181 continue; 182 if (strcmp(gp->name, "zfs::vdev") != 0) 183 continue; 184 break; 185 } 186 if (gp == NULL) { 187 gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev"); 188 gp->orphan = vdev_geom_orphan; 189 gp->attrchanged = vdev_geom_attrchanged; 190 cp = g_new_consumer(gp); 191 if (g_attach(cp, pp) != 0) { 192 g_wither_geom(gp, ENXIO); 193 return (NULL); 194 } 195 if (g_access(cp, 1, 0, 1) != 0) { 196 g_wither_geom(gp, ENXIO); 197 return (NULL); 198 } 199 ZFS_LOG(1, "Created geom and consumer for %s.", pp->name); 200 } else { 201 /* Check if we are already connected to this provider. */ 202 LIST_FOREACH(cp, &gp->consumer, consumer) { 203 if (cp->provider == pp) { 204 ZFS_LOG(1, "Found consumer for %s.", pp->name); 205 break; 206 } 207 } 208 if (cp == NULL) { 209 cp = g_new_consumer(gp); 210 if (g_attach(cp, pp) != 0) { 211 g_destroy_consumer(cp); 212 return (NULL); 213 } 214 if (g_access(cp, 1, 0, 1) != 0) { 215 g_detach(cp); 216 g_destroy_consumer(cp); 217 return (NULL); 218 } 219 ZFS_LOG(1, "Created consumer for %s.", pp->name); 220 } else { 221 if (g_access(cp, 1, 0, 1) != 0) 222 return (NULL); 223 ZFS_LOG(1, "Used existing consumer for %s.", pp->name); 224 } 225 } 226 227 /* 228 * BUG: cp may already belong to a vdev. This could happen if: 229 * 1) That vdev is a shared spare, or 230 * 2) We are trying to reopen a missing vdev and we are scanning by 231 * guid. In that case, we'll ultimately fail to open this consumer, 232 * but not until after setting the private field. 233 * The solution is to: 234 * 1) Don't set the private field until after the open succeeds, and 235 * 2) Set it to a linked list of vdevs, not just a single vdev 236 */ 237 cp->private = vd; 238 vd->vdev_tsd = cp; 239 240 /* Fetch initial physical path information for this device. */ 241 vdev_geom_attrchanged(cp, "GEOM::physpath"); 242 243 cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; 244 return (cp); 245} 246 247static void 248vdev_geom_close_locked(vdev_t *vd) 249{ 250 struct g_geom *gp; 251 struct g_consumer *cp; 252 253 g_topology_assert(); 254 255 cp = vd->vdev_tsd; 256 if (cp == NULL) 257 return; 258 259 ZFS_LOG(1, "Closing access to %s.", cp->provider->name); 260 KASSERT(vd->vdev_tsd == cp, ("%s: vdev_tsd is not cp", __func__)); 261 vd->vdev_tsd = NULL; 262 vd->vdev_delayed_close = B_FALSE; 263 cp->private = NULL; 264 265 gp = cp->geom; 266 g_access(cp, -1, 0, -1); 267 /* Destroy consumer on last close. */ 268 if (cp->acr == 0 && cp->ace == 0) { 269 if (cp->acw > 0) 270 g_access(cp, 0, -cp->acw, 0); 271 if (cp->provider != NULL) { 272 ZFS_LOG(1, "Destroyed consumer to %s.", 273 cp->provider->name); 274 g_detach(cp); 275 } 276 g_destroy_consumer(cp); 277 } 278 /* Destroy geom if there are no consumers left. */ 279 if (LIST_EMPTY(&gp->consumer)) { 280 ZFS_LOG(1, "Destroyed geom %s.", gp->name); 281 g_wither_geom(gp, ENXIO); 282 } 283} 284 285static void 286nvlist_get_guids(nvlist_t *list, uint64_t *pguid, uint64_t *vguid) 287{ 288 289 (void) nvlist_lookup_uint64(list, ZPOOL_CONFIG_GUID, vguid); 290 (void) nvlist_lookup_uint64(list, ZPOOL_CONFIG_POOL_GUID, pguid); 291} 292 293static int 294vdev_geom_io(struct g_consumer *cp, int cmd, void *data, off_t offset, off_t size) 295{ 296 struct bio *bp; 297 u_char *p; 298 off_t off, maxio; 299 int error; 300 301 ASSERT((offset % cp->provider->sectorsize) == 0); 302 ASSERT((size % cp->provider->sectorsize) == 0); 303 304 bp = g_alloc_bio(); 305 off = offset; 306 offset += size; 307 p = data; 308 maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize); 309 error = 0; 310 311 for (; off < offset; off += maxio, p += maxio, size -= maxio) { 312 bzero(bp, sizeof(*bp)); 313 bp->bio_cmd = cmd; 314 bp->bio_done = NULL; 315 bp->bio_offset = off; 316 bp->bio_length = MIN(size, maxio); 317 bp->bio_data = p; 318 g_io_request(bp, cp); 319 error = biowait(bp, "vdev_geom_io"); 320 if (error != 0) 321 break; 322 } 323 324 g_destroy_bio(bp); 325 return (error); 326} 327 328static void 329vdev_geom_taste_orphan(struct g_consumer *cp) 330{ 331 332 KASSERT(1 == 0, ("%s called while tasting %s.", __func__, 333 cp->provider->name)); 334} 335 336static int 337vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config) 338{ 339 struct g_provider *pp; 340 vdev_label_t *label; 341 char *p, *buf; 342 size_t buflen; 343 uint64_t psize; 344 off_t offset, size; 345 uint64_t state, txg; 346 int error, l, len; 347 348 g_topology_assert_not(); 349 350 pp = cp->provider; 351 ZFS_LOG(1, "Reading config from %s...", pp->name); 352 353 psize = pp->mediasize; 354 psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t)); 355 356 size = sizeof(*label) + pp->sectorsize - 357 ((sizeof(*label) - 1) % pp->sectorsize) - 1; 358 359 label = kmem_alloc(size, KM_SLEEP); 360 buflen = sizeof(label->vl_vdev_phys.vp_nvlist); 361 362 *config = NULL; 363 for (l = 0; l < VDEV_LABELS; l++) { 364 365 offset = vdev_label_offset(psize, l, 0); 366 if ((offset % pp->sectorsize) != 0) 367 continue; 368 369 if (vdev_geom_io(cp, BIO_READ, label, offset, size) != 0) 370 continue; 371 buf = label->vl_vdev_phys.vp_nvlist; 372 373 if (nvlist_unpack(buf, buflen, config, 0) != 0) 374 continue; 375 376 if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE, 377 &state) != 0 || state > POOL_STATE_L2CACHE) { 378 nvlist_free(*config); 379 *config = NULL; 380 continue; 381 } 382 383 if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE && 384 (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG, 385 &txg) != 0 || txg == 0)) { 386 nvlist_free(*config); 387 *config = NULL; 388 continue; 389 } 390 391 break; 392 } 393 394 kmem_free(label, size); 395 return (*config == NULL ? ENOENT : 0); 396} 397 398static void 399resize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id) 400{ 401 nvlist_t **new_configs; 402 uint64_t i; 403 404 if (id < *count) 405 return; 406 new_configs = kmem_zalloc((id + 1) * sizeof(nvlist_t *), 407 KM_SLEEP); 408 for (i = 0; i < *count; i++) 409 new_configs[i] = (*configs)[i]; 410 if (*configs != NULL) 411 kmem_free(*configs, *count * sizeof(void *)); 412 *configs = new_configs; 413 *count = id + 1; 414} 415 416static void 417process_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg, 418 const char *name, uint64_t* known_pool_guid) 419{ 420 nvlist_t *vdev_tree; 421 uint64_t pool_guid; 422 uint64_t vdev_guid, known_guid; 423 uint64_t id, txg, known_txg; 424 char *pname; 425 int i; 426 427 if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 || 428 strcmp(pname, name) != 0) 429 goto ignore; 430 431 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0) 432 goto ignore; 433 434 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0) 435 goto ignore; 436 437 if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) 438 goto ignore; 439 440 if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0) 441 goto ignore; 442 443 VERIFY(nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); 444 445 if (*known_pool_guid != 0) { 446 if (pool_guid != *known_pool_guid) 447 goto ignore; 448 } else 449 *known_pool_guid = pool_guid; 450 451 resize_configs(configs, count, id); 452 453 if ((*configs)[id] != NULL) { 454 VERIFY(nvlist_lookup_uint64((*configs)[id], 455 ZPOOL_CONFIG_POOL_TXG, &known_txg) == 0); 456 if (txg <= known_txg) 457 goto ignore; 458 nvlist_free((*configs)[id]); 459 } 460 461 (*configs)[id] = cfg; 462 return; 463 464ignore: 465 nvlist_free(cfg); 466} 467 468static int 469vdev_geom_attach_taster(struct g_consumer *cp, struct g_provider *pp) 470{ 471 int error; 472 473 if (pp->flags & G_PF_WITHER) 474 return (EINVAL); 475 g_attach(cp, pp); 476 error = g_access(cp, 1, 0, 0); 477 if (error == 0) { 478 if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize)) 479 error = EINVAL; 480 else if (pp->mediasize < SPA_MINDEVSIZE) 481 error = EINVAL; 482 if (error != 0) 483 g_access(cp, -1, 0, 0); 484 } 485 if (error != 0) 486 g_detach(cp); 487 return (error); 488} 489 490static void 491vdev_geom_detach_taster(struct g_consumer *cp) 492{ 493 g_access(cp, -1, 0, 0); 494 g_detach(cp); 495} 496 497int 498vdev_geom_read_pool_label(const char *name, 499 nvlist_t ***configs, uint64_t *count) 500{ 501 struct g_class *mp; 502 struct g_geom *gp, *zgp; 503 struct g_provider *pp; 504 struct g_consumer *zcp; 505 nvlist_t *vdev_cfg; 506 uint64_t pool_guid; 507 int error; 508 509 DROP_GIANT(); 510 g_topology_lock(); 511 512 zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste"); 513 /* This orphan function should be never called. */ 514 zgp->orphan = vdev_geom_taste_orphan; 515 zcp = g_new_consumer(zgp); 516 517 *configs = NULL; 518 *count = 0; 519 pool_guid = 0; 520 LIST_FOREACH(mp, &g_classes, class) { 521 if (mp == &zfs_vdev_class) 522 continue; 523 LIST_FOREACH(gp, &mp->geom, geom) { 524 if (gp->flags & G_GEOM_WITHER) 525 continue; 526 LIST_FOREACH(pp, &gp->provider, provider) { 527 if (pp->flags & G_PF_WITHER) 528 continue; 529 if (vdev_geom_attach_taster(zcp, pp) != 0) 530 continue; 531 g_topology_unlock(); 532 error = vdev_geom_read_config(zcp, &vdev_cfg); 533 g_topology_lock(); 534 vdev_geom_detach_taster(zcp); 535 if (error) 536 continue; 537 ZFS_LOG(1, "successfully read vdev config"); 538 539 process_vdev_config(configs, count, 540 vdev_cfg, name, &pool_guid); 541 } 542 } 543 } 544 545 g_destroy_consumer(zcp); 546 g_destroy_geom(zgp); 547 g_topology_unlock(); 548 PICKUP_GIANT(); 549 550 return (*count > 0 ? 0 : ENOENT); 551} 552 553static void 554vdev_geom_read_guids(struct g_consumer *cp, uint64_t *pguid, uint64_t *vguid) 555{ 556 nvlist_t *config; 557 558 g_topology_assert_not(); 559 560 *pguid = 0; 561 *vguid = 0; 562 if (vdev_geom_read_config(cp, &config) == 0) { 563 nvlist_get_guids(config, pguid, vguid); 564 nvlist_free(config); 565 } 566} 567 568static struct g_consumer * 569vdev_geom_attach_by_guids(vdev_t *vd) 570{ 571 struct g_class *mp; 572 struct g_geom *gp, *zgp; 573 struct g_provider *pp; 574 struct g_consumer *cp, *zcp; 575 uint64_t pguid, vguid; 576 577 g_topology_assert(); 578 579 zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste"); 580 /* This orphan function should be never called. */ 581 zgp->orphan = vdev_geom_taste_orphan; 582 zcp = g_new_consumer(zgp); 583 584 cp = NULL; 585 LIST_FOREACH(mp, &g_classes, class) { 586 if (mp == &zfs_vdev_class) 587 continue; 588 LIST_FOREACH(gp, &mp->geom, geom) { 589 if (gp->flags & G_GEOM_WITHER) 590 continue; 591 LIST_FOREACH(pp, &gp->provider, provider) { 592 if (vdev_geom_attach_taster(zcp, pp) != 0) 593 continue; 594 g_topology_unlock(); 595 vdev_geom_read_guids(zcp, &pguid, &vguid); 596 g_topology_lock(); 597 vdev_geom_detach_taster(zcp); 598 /* 599 * Check that the label's vdev guid matches the 600 * desired guid. If the label has a pool guid, 601 * check that it matches too. (Inactive spares 602 * and L2ARCs do not have any pool guid in the 603 * label.) 604 */ 605 if ((pguid != 0 && 606 pguid != spa_guid(vd->vdev_spa)) || 607 vguid != vd->vdev_guid) 608 continue; 609 cp = vdev_geom_attach(pp, vd); 610 if (cp == NULL) { 611 printf("ZFS WARNING: Unable to " 612 "attach to %s.\n", pp->name); 613 continue; 614 } 615 break; 616 } 617 if (cp != NULL) 618 break; 619 } 620 if (cp != NULL) 621 break; 622 } 623end: 624 g_destroy_consumer(zcp); 625 g_destroy_geom(zgp); 626 return (cp); 627} 628 629static struct g_consumer * 630vdev_geom_open_by_guids(vdev_t *vd) 631{ 632 struct g_consumer *cp; 633 char *buf; 634 size_t len; 635 636 g_topology_assert(); 637 638 ZFS_LOG(1, "Searching by guid [%ju].", (uintmax_t)vd->vdev_guid); 639 cp = vdev_geom_attach_by_guids(vd); 640 if (cp != NULL) { 641 len = strlen(cp->provider->name) + strlen("/dev/") + 1; 642 buf = kmem_alloc(len, KM_SLEEP); 643 644 snprintf(buf, len, "/dev/%s", cp->provider->name); 645 spa_strfree(vd->vdev_path); 646 vd->vdev_path = buf; 647 648 ZFS_LOG(1, "Attach by guid [%ju:%ju] succeeded, provider %s.", 649 (uintmax_t)spa_guid(vd->vdev_spa), 650 (uintmax_t)vd->vdev_guid, vd->vdev_path); 651 } else { 652 ZFS_LOG(1, "Search by guid [%ju:%ju] failed.", 653 (uintmax_t)spa_guid(vd->vdev_spa), 654 (uintmax_t)vd->vdev_guid); 655 } 656 657 return (cp); 658} 659 660static struct g_consumer * 661vdev_geom_open_by_path(vdev_t *vd, int check_guid) 662{ 663 struct g_provider *pp; 664 struct g_consumer *cp; 665 uint64_t pguid, vguid; 666 667 g_topology_assert(); 668 669 cp = NULL; 670 pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1); 671 if (pp != NULL) { 672 ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path); 673 cp = vdev_geom_attach(pp, vd); 674 if (cp != NULL && check_guid && ISP2(pp->sectorsize) && 675 pp->sectorsize <= VDEV_PAD_SIZE) { 676 g_topology_unlock(); 677 vdev_geom_read_guids(cp, &pguid, &vguid); 678 g_topology_lock(); 679 if (pguid != spa_guid(vd->vdev_spa) || 680 vguid != vd->vdev_guid) { 681 vdev_geom_close_locked(vd); 682 cp = NULL; 683 ZFS_LOG(1, "guid mismatch for provider %s: " 684 "%ju:%ju != %ju:%ju.", vd->vdev_path, 685 (uintmax_t)spa_guid(vd->vdev_spa), 686 (uintmax_t)vd->vdev_guid, 687 (uintmax_t)pguid, (uintmax_t)vguid); 688 } else { 689 ZFS_LOG(1, "guid match for provider %s.", 690 vd->vdev_path); 691 } 692 } 693 } 694 695 return (cp); 696} 697 698static int 699vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, 700 uint64_t *logical_ashift, uint64_t *physical_ashift) 701{ 702 struct g_provider *pp; 703 struct g_consumer *cp; 704 size_t bufsize; 705 int error; 706 707 /* 708 * We must have a pathname, and it must be absolute. 709 */ 710 if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { 711 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; 712 return (EINVAL); 713 } 714 715 vd->vdev_tsd = NULL; 716 717 DROP_GIANT(); 718 g_topology_lock(); 719 error = 0; 720 721 if (vd->vdev_spa->spa_splitting_newspa || 722 (vd->vdev_prevstate == VDEV_STATE_UNKNOWN && 723 vd->vdev_spa->spa_load_state == SPA_LOAD_NONE)) { 724 /* 725 * We are dealing with a vdev that hasn't been previously 726 * opened (since boot), and we are not loading an 727 * existing pool configuration. This looks like a 728 * vdev add operation to a new or existing pool. 729 * Assume the user knows what he/she is doing and find 730 * GEOM provider by its name, ignoring GUID mismatches. 731 * 732 * XXPOLICY: It would be safer to only allow a device 733 * that is unlabeled or labeled but missing 734 * GUID information to be opened in this fashion, 735 * unless we are doing a split, in which case we 736 * should allow any guid. 737 */ 738 cp = vdev_geom_open_by_path(vd, 0); 739 } else { 740 /* 741 * Try using the recorded path for this device, but only 742 * accept it if its label data contains the expected GUIDs. 743 */ 744 cp = vdev_geom_open_by_path(vd, 1); 745 if (cp == NULL) { 746 /* 747 * The device at vd->vdev_path doesn't have the 748 * expected GUIDs. The disks might have merely 749 * moved around so try all other GEOM providers 750 * to find one with the right GUIDs. 751 */ 752 cp = vdev_geom_open_by_guids(vd); 753 } 754 } 755 756 if (cp == NULL) { 757 ZFS_LOG(1, "Provider %s not found.", vd->vdev_path); 758 error = ENOENT; 759 } else if (cp->provider->sectorsize > VDEV_PAD_SIZE || 760 !ISP2(cp->provider->sectorsize)) { 761 ZFS_LOG(1, "Provider %s has unsupported sectorsize.", 762 vd->vdev_path); 763 764 vdev_geom_close_locked(vd); 765 error = EINVAL; 766 cp = NULL; 767 } else if (cp->acw == 0 && (spa_mode(vd->vdev_spa) & FWRITE) != 0) { 768 int i; 769 770 for (i = 0; i < 5; i++) { 771 error = g_access(cp, 0, 1, 0); 772 if (error == 0) 773 break; 774 g_topology_unlock(); 775 tsleep(vd, 0, "vdev", hz / 2); 776 g_topology_lock(); 777 } 778 if (error != 0) { 779 printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n", 780 vd->vdev_path, error); 781 vdev_geom_close_locked(vd); 782 cp = NULL; 783 } 784 } 785 786 g_topology_unlock(); 787 PICKUP_GIANT(); 788 if (cp == NULL) { 789 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 790 return (error); 791 } 792 pp = cp->provider; 793 794 /* 795 * Determine the actual size of the device. 796 */ 797 *max_psize = *psize = pp->mediasize; 798 799 /* 800 * Determine the device's minimum transfer size and preferred 801 * transfer size. 802 */ 803 *logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1; 804 *physical_ashift = 0; 805 if (pp->stripesize) 806 *physical_ashift = highbit(pp->stripesize) - 1; 807 808 /* 809 * Clear the nowritecache settings, so that on a vdev_reopen() 810 * we will try again. 811 */ 812 vd->vdev_nowritecache = B_FALSE; 813 814 /* 815 * Determine the device's rotation rate. 816 */ 817 vdev_geom_set_rotation_rate(vd, cp); 818 819 return (0); 820} 821 822static void 823vdev_geom_close(vdev_t *vd) 824{ 825 826 DROP_GIANT(); 827 g_topology_lock(); 828 vdev_geom_close_locked(vd); 829 g_topology_unlock(); 830 PICKUP_GIANT(); 831} 832 833static void 834vdev_geom_io_intr(struct bio *bp) 835{ 836 vdev_t *vd; 837 zio_t *zio; 838 839 zio = bp->bio_caller1; 840 vd = zio->io_vd; 841 zio->io_error = bp->bio_error; 842 if (zio->io_error == 0 && bp->bio_resid != 0) 843 zio->io_error = SET_ERROR(EIO); 844 845 switch(zio->io_error) { 846 case ENOTSUP: 847 /* 848 * If we get ENOTSUP for BIO_FLUSH or BIO_DELETE we know 849 * that future attempts will never succeed. In this case 850 * we set a persistent flag so that we don't bother with 851 * requests in the future. 852 */ 853 switch(bp->bio_cmd) { 854 case BIO_FLUSH: 855 vd->vdev_nowritecache = B_TRUE; 856 break; 857 case BIO_DELETE: 858 vd->vdev_notrim = B_TRUE; 859 break; 860 } 861 break; 862 case ENXIO: 863 if (!vd->vdev_remove_wanted) { 864 /* 865 * If provider's error is set we assume it is being 866 * removed. 867 */ 868 if (bp->bio_to->error != 0) { 869 vd->vdev_remove_wanted = B_TRUE; 870 spa_async_request(zio->io_spa, 871 SPA_ASYNC_REMOVE); 872 } else if (!vd->vdev_delayed_close) { 873 vd->vdev_delayed_close = B_TRUE; 874 } 875 } 876 break; 877 } 878 g_destroy_bio(bp); 879 zio_delay_interrupt(zio); 880} 881 882static void 883vdev_geom_io_start(zio_t *zio) 884{ 885 vdev_t *vd; 886 struct g_consumer *cp; 887 struct bio *bp; 888 int error; 889 890 vd = zio->io_vd; 891 892 switch (zio->io_type) { 893 case ZIO_TYPE_IOCTL: 894 /* XXPOLICY */ 895 if (!vdev_readable(vd)) { 896 zio->io_error = SET_ERROR(ENXIO); 897 zio_interrupt(zio); 898 return; 899 } else { 900 switch (zio->io_cmd) { 901 case DKIOCFLUSHWRITECACHE: 902 if (zfs_nocacheflush || vdev_geom_bio_flush_disable) 903 break; 904 if (vd->vdev_nowritecache) { 905 zio->io_error = SET_ERROR(ENOTSUP); 906 break; 907 } 908 goto sendreq; 909 default: 910 zio->io_error = SET_ERROR(ENOTSUP); 911 } 912 } 913 914 zio_execute(zio); 915 return; 916 case ZIO_TYPE_FREE: 917 if (vd->vdev_notrim) { 918 zio->io_error = SET_ERROR(ENOTSUP); 919 } else if (!vdev_geom_bio_delete_disable) { 920 goto sendreq; 921 } 922 zio_execute(zio); 923 return; 924 } 925sendreq: 926 ASSERT(zio->io_type == ZIO_TYPE_READ || 927 zio->io_type == ZIO_TYPE_WRITE || 928 zio->io_type == ZIO_TYPE_FREE || 929 zio->io_type == ZIO_TYPE_IOCTL); 930 931 cp = vd->vdev_tsd; 932 if (cp == NULL) { 933 zio->io_error = SET_ERROR(ENXIO); 934 zio_interrupt(zio); 935 return; 936 } 937 bp = g_alloc_bio(); 938 bp->bio_caller1 = zio; 939 switch (zio->io_type) { 940 case ZIO_TYPE_READ: 941 case ZIO_TYPE_WRITE: 942 zio->io_target_timestamp = zio_handle_io_delay(zio); 943 bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE; 944 bp->bio_data = zio->io_data; 945 bp->bio_offset = zio->io_offset; 946 bp->bio_length = zio->io_size; 947 break; 948 case ZIO_TYPE_FREE: 949 bp->bio_cmd = BIO_DELETE; 950 bp->bio_data = NULL; 951 bp->bio_offset = zio->io_offset; 952 bp->bio_length = zio->io_size; 953 break; 954 case ZIO_TYPE_IOCTL: 955 bp->bio_cmd = BIO_FLUSH; 956 bp->bio_flags |= BIO_ORDERED; 957 bp->bio_data = NULL; 958 bp->bio_offset = cp->provider->mediasize; 959 bp->bio_length = 0; 960 break; 961 } 962 bp->bio_done = vdev_geom_io_intr; 963 964 g_io_request(bp, cp); 965} 966 967static void 968vdev_geom_io_done(zio_t *zio) 969{ 970} 971 972static void 973vdev_geom_hold(vdev_t *vd) 974{ 975} 976 977static void 978vdev_geom_rele(vdev_t *vd) 979{ 980} 981 982vdev_ops_t vdev_geom_ops = { 983 vdev_geom_open, 984 vdev_geom_close, 985 vdev_default_asize, 986 vdev_geom_io_start, 987 vdev_geom_io_done, 988 NULL, 989 vdev_geom_hold, 990 vdev_geom_rele, 991 VDEV_TYPE_DISK, /* name of this vdev type */ 992 B_TRUE /* leaf vdev */ 993}; 994