vdev_geom.c revision 260339
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org> 23 * All rights reserved. 24 * 25 * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org> 26 */ 27 28#include <sys/zfs_context.h> 29#include <sys/param.h> 30#include <sys/kernel.h> 31#include <sys/bio.h> 32#include <sys/disk.h> 33#include <sys/spa.h> 34#include <sys/spa_impl.h> 35#include <sys/vdev_impl.h> 36#include <sys/fs/zfs.h> 37#include <sys/zio.h> 38#include <geom/geom.h> 39#include <geom/geom_int.h> 40 41/* 42 * Virtual device vector for GEOM. 43 */ 44 45struct g_class zfs_vdev_class = { 46 .name = "ZFS::VDEV", 47 .version = G_VERSION, 48}; 49 50DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev); 51 52SYSCTL_DECL(_vfs_zfs_vdev); 53/* Don't send BIO_FLUSH. */ 54static int vdev_geom_bio_flush_disable = 0; 55TUNABLE_INT("vfs.zfs.vdev.bio_flush_disable", &vdev_geom_bio_flush_disable); 56SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RW, 57 &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH"); 58/* Don't send BIO_DELETE. */ 59static int vdev_geom_bio_delete_disable = 0; 60TUNABLE_INT("vfs.zfs.vdev.bio_delete_disable", &vdev_geom_bio_delete_disable); 61SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RW, 62 &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE"); 63 64static void 65vdev_geom_orphan(struct g_consumer *cp) 66{ 67 vdev_t *vd; 68 69 g_topology_assert(); 70 71 vd = cp->private; 72 if (vd == NULL) 73 return; 74 75 /* 76 * Orphan callbacks occur from the GEOM event thread. 77 * Concurrent with this call, new I/O requests may be 78 * working their way through GEOM about to find out 79 * (only once executed by the g_down thread) that we've 80 * been orphaned from our disk provider. These I/Os 81 * must be retired before we can detach our consumer. 82 * This is most easily achieved by acquiring the 83 * SPA ZIO configuration lock as a writer, but doing 84 * so with the GEOM topology lock held would cause 85 * a lock order reversal. Instead, rely on the SPA's 86 * async removal support to invoke a close on this 87 * vdev once it is safe to do so. 88 */ 89 zfs_post_remove(vd->vdev_spa, vd); 90 vd->vdev_remove_wanted = B_TRUE; 91 spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE); 92} 93 94static struct g_consumer * 95vdev_geom_attach(struct g_provider *pp) 96{ 97 struct g_geom *gp; 98 struct g_consumer *cp; 99 100 g_topology_assert(); 101 102 ZFS_LOG(1, "Attaching to %s.", pp->name); 103 /* Do we have geom already? No? Create one. */ 104 LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) { 105 if (gp->flags & G_GEOM_WITHER) 106 continue; 107 if (strcmp(gp->name, "zfs::vdev") != 0) 108 continue; 109 break; 110 } 111 if (gp == NULL) { 112 gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev"); 113 gp->orphan = vdev_geom_orphan; 114 cp = g_new_consumer(gp); 115 if (g_attach(cp, pp) != 0) { 116 g_wither_geom(gp, ENXIO); 117 return (NULL); 118 } 119 if (g_access(cp, 1, 0, 1) != 0) { 120 g_wither_geom(gp, ENXIO); 121 return (NULL); 122 } 123 ZFS_LOG(1, "Created geom and consumer for %s.", pp->name); 124 } else { 125 /* Check if we are already connected to this provider. */ 126 LIST_FOREACH(cp, &gp->consumer, consumer) { 127 if (cp->provider == pp) { 128 ZFS_LOG(1, "Found consumer for %s.", pp->name); 129 break; 130 } 131 } 132 if (cp == NULL) { 133 cp = g_new_consumer(gp); 134 if (g_attach(cp, pp) != 0) { 135 g_destroy_consumer(cp); 136 return (NULL); 137 } 138 if (g_access(cp, 1, 0, 1) != 0) { 139 g_detach(cp); 140 g_destroy_consumer(cp); 141 return (NULL); 142 } 143 ZFS_LOG(1, "Created consumer for %s.", pp->name); 144 } else { 145 if (g_access(cp, 1, 0, 1) != 0) 146 return (NULL); 147 ZFS_LOG(1, "Used existing consumer for %s.", pp->name); 148 } 149 } 150 return (cp); 151} 152 153static void 154vdev_geom_detach(void *arg, int flag __unused) 155{ 156 struct g_geom *gp; 157 struct g_consumer *cp; 158 159 g_topology_assert(); 160 cp = arg; 161 gp = cp->geom; 162 163 ZFS_LOG(1, "Closing access to %s.", cp->provider->name); 164 g_access(cp, -1, 0, -1); 165 /* Destroy consumer on last close. */ 166 if (cp->acr == 0 && cp->ace == 0) { 167 ZFS_LOG(1, "Destroyed consumer to %s.", cp->provider->name); 168 if (cp->acw > 0) 169 g_access(cp, 0, -cp->acw, 0); 170 g_detach(cp); 171 g_destroy_consumer(cp); 172 } 173 /* Destroy geom if there are no consumers left. */ 174 if (LIST_EMPTY(&gp->consumer)) { 175 ZFS_LOG(1, "Destroyed geom %s.", gp->name); 176 g_wither_geom(gp, ENXIO); 177 } 178} 179 180static uint64_t 181nvlist_get_guid(nvlist_t *list) 182{ 183 uint64_t value; 184 185 value = 0; 186 nvlist_lookup_uint64(list, ZPOOL_CONFIG_GUID, &value); 187 return (value); 188} 189 190static int 191vdev_geom_io(struct g_consumer *cp, int cmd, void *data, off_t offset, off_t size) 192{ 193 struct bio *bp; 194 u_char *p; 195 off_t off, maxio; 196 int error; 197 198 ASSERT((offset % cp->provider->sectorsize) == 0); 199 ASSERT((size % cp->provider->sectorsize) == 0); 200 201 bp = g_alloc_bio(); 202 off = offset; 203 offset += size; 204 p = data; 205 maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize); 206 error = 0; 207 208 for (; off < offset; off += maxio, p += maxio, size -= maxio) { 209 bzero(bp, sizeof(*bp)); 210 bp->bio_cmd = cmd; 211 bp->bio_done = NULL; 212 bp->bio_offset = off; 213 bp->bio_length = MIN(size, maxio); 214 bp->bio_data = p; 215 g_io_request(bp, cp); 216 error = biowait(bp, "vdev_geom_io"); 217 if (error != 0) 218 break; 219 } 220 221 g_destroy_bio(bp); 222 return (error); 223} 224 225static void 226vdev_geom_taste_orphan(struct g_consumer *cp) 227{ 228 229 KASSERT(1 == 0, ("%s called while tasting %s.", __func__, 230 cp->provider->name)); 231} 232 233static int 234vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config) 235{ 236 struct g_provider *pp; 237 vdev_label_t *label; 238 char *p, *buf; 239 size_t buflen; 240 uint64_t psize; 241 off_t offset, size; 242 uint64_t guid, state, txg; 243 int error, l, len; 244 245 g_topology_assert_not(); 246 247 pp = cp->provider; 248 ZFS_LOG(1, "Reading config from %s...", pp->name); 249 250 psize = pp->mediasize; 251 psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t)); 252 253 size = sizeof(*label) + pp->sectorsize - 254 ((sizeof(*label) - 1) % pp->sectorsize) - 1; 255 256 guid = 0; 257 label = kmem_alloc(size, KM_SLEEP); 258 buflen = sizeof(label->vl_vdev_phys.vp_nvlist); 259 260 *config = NULL; 261 for (l = 0; l < VDEV_LABELS; l++) { 262 263 offset = vdev_label_offset(psize, l, 0); 264 if ((offset % pp->sectorsize) != 0) 265 continue; 266 267 if (vdev_geom_io(cp, BIO_READ, label, offset, size) != 0) 268 continue; 269 buf = label->vl_vdev_phys.vp_nvlist; 270 271 if (nvlist_unpack(buf, buflen, config, 0) != 0) 272 continue; 273 274 if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE, 275 &state) != 0 || state > POOL_STATE_L2CACHE) { 276 nvlist_free(*config); 277 *config = NULL; 278 continue; 279 } 280 281 if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE && 282 (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG, 283 &txg) != 0 || txg == 0)) { 284 nvlist_free(*config); 285 *config = NULL; 286 continue; 287 } 288 289 break; 290 } 291 292 kmem_free(label, size); 293 return (*config == NULL ? ENOENT : 0); 294} 295 296static void 297resize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id) 298{ 299 nvlist_t **new_configs; 300 uint64_t i; 301 302 if (id < *count) 303 return; 304 new_configs = kmem_zalloc((id + 1) * sizeof(nvlist_t *), 305 KM_SLEEP); 306 for (i = 0; i < *count; i++) 307 new_configs[i] = (*configs)[i]; 308 if (*configs != NULL) 309 kmem_free(*configs, *count * sizeof(void *)); 310 *configs = new_configs; 311 *count = id + 1; 312} 313 314static void 315process_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg, 316 const char *name, uint64_t* known_pool_guid) 317{ 318 nvlist_t *vdev_tree; 319 uint64_t pool_guid; 320 uint64_t vdev_guid, known_guid; 321 uint64_t id, txg, known_txg; 322 char *pname; 323 int i; 324 325 if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 || 326 strcmp(pname, name) != 0) 327 goto ignore; 328 329 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0) 330 goto ignore; 331 332 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0) 333 goto ignore; 334 335 if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) 336 goto ignore; 337 338 if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0) 339 goto ignore; 340 341 VERIFY(nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); 342 343 if (*known_pool_guid != 0) { 344 if (pool_guid != *known_pool_guid) 345 goto ignore; 346 } else 347 *known_pool_guid = pool_guid; 348 349 resize_configs(configs, count, id); 350 351 if ((*configs)[id] != NULL) { 352 VERIFY(nvlist_lookup_uint64((*configs)[id], 353 ZPOOL_CONFIG_POOL_TXG, &known_txg) == 0); 354 if (txg <= known_txg) 355 goto ignore; 356 nvlist_free((*configs)[id]); 357 } 358 359 (*configs)[id] = cfg; 360 return; 361 362ignore: 363 nvlist_free(cfg); 364} 365 366static int 367vdev_geom_attach_taster(struct g_consumer *cp, struct g_provider *pp) 368{ 369 int error; 370 371 if (pp->flags & G_PF_WITHER) 372 return (EINVAL); 373 g_attach(cp, pp); 374 error = g_access(cp, 1, 0, 0); 375 if (error == 0) { 376 if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize)) 377 error = EINVAL; 378 else if (pp->mediasize < SPA_MINDEVSIZE) 379 error = EINVAL; 380 if (error != 0) 381 g_access(cp, -1, 0, 0); 382 } 383 if (error != 0) 384 g_detach(cp); 385 return (error); 386} 387 388static void 389vdev_geom_detach_taster(struct g_consumer *cp) 390{ 391 g_access(cp, -1, 0, 0); 392 g_detach(cp); 393} 394 395int 396vdev_geom_read_pool_label(const char *name, 397 nvlist_t ***configs, uint64_t *count) 398{ 399 struct g_class *mp; 400 struct g_geom *gp, *zgp; 401 struct g_provider *pp; 402 struct g_consumer *zcp; 403 nvlist_t *vdev_cfg; 404 uint64_t pool_guid; 405 int error; 406 407 DROP_GIANT(); 408 g_topology_lock(); 409 410 zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste"); 411 /* This orphan function should be never called. */ 412 zgp->orphan = vdev_geom_taste_orphan; 413 zcp = g_new_consumer(zgp); 414 415 *configs = NULL; 416 *count = 0; 417 pool_guid = 0; 418 LIST_FOREACH(mp, &g_classes, class) { 419 if (mp == &zfs_vdev_class) 420 continue; 421 LIST_FOREACH(gp, &mp->geom, geom) { 422 if (gp->flags & G_GEOM_WITHER) 423 continue; 424 LIST_FOREACH(pp, &gp->provider, provider) { 425 if (pp->flags & G_PF_WITHER) 426 continue; 427 if (vdev_geom_attach_taster(zcp, pp) != 0) 428 continue; 429 g_topology_unlock(); 430 error = vdev_geom_read_config(zcp, &vdev_cfg); 431 g_topology_lock(); 432 vdev_geom_detach_taster(zcp); 433 if (error) 434 continue; 435 ZFS_LOG(1, "successfully read vdev config"); 436 437 process_vdev_config(configs, count, 438 vdev_cfg, name, &pool_guid); 439 } 440 } 441 } 442 443 g_destroy_consumer(zcp); 444 g_destroy_geom(zgp); 445 g_topology_unlock(); 446 PICKUP_GIANT(); 447 448 return (*count > 0 ? 0 : ENOENT); 449} 450 451static uint64_t 452vdev_geom_read_guid(struct g_consumer *cp) 453{ 454 nvlist_t *config; 455 uint64_t guid; 456 457 g_topology_assert_not(); 458 459 guid = 0; 460 if (vdev_geom_read_config(cp, &config) == 0) { 461 guid = nvlist_get_guid(config); 462 nvlist_free(config); 463 } 464 return (guid); 465} 466 467static struct g_consumer * 468vdev_geom_attach_by_guid(uint64_t guid) 469{ 470 struct g_class *mp; 471 struct g_geom *gp, *zgp; 472 struct g_provider *pp; 473 struct g_consumer *cp, *zcp; 474 uint64_t pguid; 475 476 g_topology_assert(); 477 478 zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste"); 479 /* This orphan function should be never called. */ 480 zgp->orphan = vdev_geom_taste_orphan; 481 zcp = g_new_consumer(zgp); 482 483 cp = NULL; 484 LIST_FOREACH(mp, &g_classes, class) { 485 if (mp == &zfs_vdev_class) 486 continue; 487 LIST_FOREACH(gp, &mp->geom, geom) { 488 if (gp->flags & G_GEOM_WITHER) 489 continue; 490 LIST_FOREACH(pp, &gp->provider, provider) { 491 if (vdev_geom_attach_taster(zcp, pp) != 0) 492 continue; 493 g_topology_unlock(); 494 pguid = vdev_geom_read_guid(zcp); 495 g_topology_lock(); 496 vdev_geom_detach_taster(zcp); 497 if (pguid != guid) 498 continue; 499 cp = vdev_geom_attach(pp); 500 if (cp == NULL) { 501 printf("ZFS WARNING: Unable to attach to %s.\n", 502 pp->name); 503 continue; 504 } 505 break; 506 } 507 if (cp != NULL) 508 break; 509 } 510 if (cp != NULL) 511 break; 512 } 513end: 514 g_destroy_consumer(zcp); 515 g_destroy_geom(zgp); 516 return (cp); 517} 518 519static struct g_consumer * 520vdev_geom_open_by_guid(vdev_t *vd) 521{ 522 struct g_consumer *cp; 523 char *buf; 524 size_t len; 525 526 g_topology_assert(); 527 528 ZFS_LOG(1, "Searching by guid [%ju].", (uintmax_t)vd->vdev_guid); 529 cp = vdev_geom_attach_by_guid(vd->vdev_guid); 530 if (cp != NULL) { 531 len = strlen(cp->provider->name) + strlen("/dev/") + 1; 532 buf = kmem_alloc(len, KM_SLEEP); 533 534 snprintf(buf, len, "/dev/%s", cp->provider->name); 535 spa_strfree(vd->vdev_path); 536 vd->vdev_path = buf; 537 538 ZFS_LOG(1, "Attach by guid [%ju] succeeded, provider %s.", 539 (uintmax_t)vd->vdev_guid, vd->vdev_path); 540 } else { 541 ZFS_LOG(1, "Search by guid [%ju] failed.", 542 (uintmax_t)vd->vdev_guid); 543 } 544 545 return (cp); 546} 547 548static struct g_consumer * 549vdev_geom_open_by_path(vdev_t *vd, int check_guid) 550{ 551 struct g_provider *pp; 552 struct g_consumer *cp; 553 uint64_t guid; 554 555 g_topology_assert(); 556 557 cp = NULL; 558 pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1); 559 if (pp != NULL) { 560 ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path); 561 cp = vdev_geom_attach(pp); 562 if (cp != NULL && check_guid && ISP2(pp->sectorsize) && 563 pp->sectorsize <= VDEV_PAD_SIZE) { 564 g_topology_unlock(); 565 guid = vdev_geom_read_guid(cp); 566 g_topology_lock(); 567 if (guid != vd->vdev_guid) { 568 vdev_geom_detach(cp, 0); 569 cp = NULL; 570 ZFS_LOG(1, "guid mismatch for provider %s: " 571 "%ju != %ju.", vd->vdev_path, 572 (uintmax_t)vd->vdev_guid, (uintmax_t)guid); 573 } else { 574 ZFS_LOG(1, "guid match for provider %s.", 575 vd->vdev_path); 576 } 577 } 578 } 579 580 return (cp); 581} 582 583static int 584vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, 585 uint64_t *logical_ashift, uint64_t *physical_ashift) 586{ 587 struct g_provider *pp; 588 struct g_consumer *cp; 589 size_t bufsize; 590 int error; 591 592 /* 593 * We must have a pathname, and it must be absolute. 594 */ 595 if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { 596 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; 597 return (EINVAL); 598 } 599 600 vd->vdev_tsd = NULL; 601 602 DROP_GIANT(); 603 g_topology_lock(); 604 error = 0; 605 606 /* 607 * If we're creating or splitting a pool, just find the GEOM provider 608 * by its name and ignore GUID mismatches. 609 */ 610 if (vd->vdev_spa->spa_load_state == SPA_LOAD_NONE || 611 vd->vdev_spa->spa_splitting_newspa == B_TRUE) 612 cp = vdev_geom_open_by_path(vd, 0); 613 else { 614 cp = vdev_geom_open_by_path(vd, 1); 615 if (cp == NULL) { 616 /* 617 * The device at vd->vdev_path doesn't have the 618 * expected guid. The disks might have merely 619 * moved around so try all other GEOM providers 620 * to find one with the right guid. 621 */ 622 cp = vdev_geom_open_by_guid(vd); 623 } 624 } 625 626 if (cp == NULL) { 627 ZFS_LOG(1, "Provider %s not found.", vd->vdev_path); 628 error = ENOENT; 629 } else if (cp->provider->sectorsize > VDEV_PAD_SIZE || 630 !ISP2(cp->provider->sectorsize)) { 631 ZFS_LOG(1, "Provider %s has unsupported sectorsize.", 632 vd->vdev_path); 633 vdev_geom_detach(cp, 0); 634 error = EINVAL; 635 cp = NULL; 636 } else if (cp->acw == 0 && (spa_mode(vd->vdev_spa) & FWRITE) != 0) { 637 int i; 638 639 for (i = 0; i < 5; i++) { 640 error = g_access(cp, 0, 1, 0); 641 if (error == 0) 642 break; 643 g_topology_unlock(); 644 tsleep(vd, 0, "vdev", hz / 2); 645 g_topology_lock(); 646 } 647 if (error != 0) { 648 printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n", 649 vd->vdev_path, error); 650 vdev_geom_detach(cp, 0); 651 cp = NULL; 652 } 653 } 654 g_topology_unlock(); 655 PICKUP_GIANT(); 656 if (cp == NULL) { 657 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 658 return (error); 659 } 660 661 cp->private = vd; 662 vd->vdev_tsd = cp; 663 pp = cp->provider; 664 665 /* 666 * Determine the actual size of the device. 667 */ 668 *max_psize = *psize = pp->mediasize; 669 670 /* 671 * Determine the device's minimum transfer size and preferred 672 * transfer size. 673 */ 674 *logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1; 675 *physical_ashift = 0; 676 if (pp->stripesize) 677 *physical_ashift = highbit(pp->stripesize) - 1; 678 679 /* 680 * Clear the nowritecache settings, so that on a vdev_reopen() 681 * we will try again. 682 */ 683 vd->vdev_nowritecache = B_FALSE; 684 685 if (vd->vdev_physpath != NULL) 686 spa_strfree(vd->vdev_physpath); 687 bufsize = sizeof("/dev/") + strlen(pp->name); 688 vd->vdev_physpath = kmem_alloc(bufsize, KM_SLEEP); 689 snprintf(vd->vdev_physpath, bufsize, "/dev/%s", pp->name); 690 691 return (0); 692} 693 694static void 695vdev_geom_close(vdev_t *vd) 696{ 697 struct g_consumer *cp; 698 699 cp = vd->vdev_tsd; 700 if (cp == NULL) 701 return; 702 vd->vdev_tsd = NULL; 703 vd->vdev_delayed_close = B_FALSE; 704 cp->private = NULL; /* XXX locking */ 705 g_post_event(vdev_geom_detach, cp, M_WAITOK, NULL); 706} 707 708static void 709vdev_geom_io_intr(struct bio *bp) 710{ 711 vdev_t *vd; 712 zio_t *zio; 713 714 zio = bp->bio_caller1; 715 vd = zio->io_vd; 716 zio->io_error = bp->bio_error; 717 if (zio->io_error == 0 && bp->bio_resid != 0) 718 zio->io_error = EIO; 719 if (bp->bio_cmd == BIO_FLUSH && bp->bio_error == ENOTSUP) { 720 /* 721 * If we get ENOTSUP, we know that no future 722 * attempts will ever succeed. In this case we 723 * set a persistent bit so that we don't bother 724 * with the ioctl in the future. 725 */ 726 vd->vdev_nowritecache = B_TRUE; 727 } 728 if (bp->bio_cmd == BIO_DELETE && bp->bio_error == ENOTSUP) { 729 /* 730 * If we get ENOTSUP, we know that no future 731 * attempts will ever succeed. In this case we 732 * set a persistent bit so that we don't bother 733 * with the ioctl in the future. 734 */ 735 vd->vdev_notrim = B_TRUE; 736 } 737 if (zio->io_error == EIO && !vd->vdev_remove_wanted) { 738 /* 739 * If provider's error is set we assume it is being 740 * removed. 741 */ 742 if (bp->bio_to->error != 0) { 743 /* 744 * We post the resource as soon as possible, instead of 745 * when the async removal actually happens, because the 746 * DE is using this information to discard previous I/O 747 * errors. 748 */ 749 /* XXX: zfs_post_remove() can sleep. */ 750 zfs_post_remove(zio->io_spa, vd); 751 vd->vdev_remove_wanted = B_TRUE; 752 spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE); 753 } else if (!vd->vdev_delayed_close) { 754 vd->vdev_delayed_close = B_TRUE; 755 } 756 } 757 g_destroy_bio(bp); 758 zio_interrupt(zio); 759} 760 761static int 762vdev_geom_io_start(zio_t *zio) 763{ 764 vdev_t *vd; 765 struct g_consumer *cp; 766 struct bio *bp; 767 int error; 768 769 vd = zio->io_vd; 770 771 if (zio->io_type == ZIO_TYPE_IOCTL) { 772 /* XXPOLICY */ 773 if (!vdev_readable(vd)) { 774 zio->io_error = ENXIO; 775 return (ZIO_PIPELINE_CONTINUE); 776 } 777 778 switch (zio->io_cmd) { 779 case DKIOCFLUSHWRITECACHE: 780 if (zfs_nocacheflush || vdev_geom_bio_flush_disable) 781 break; 782 if (vd->vdev_nowritecache) { 783 zio->io_error = ENOTSUP; 784 break; 785 } 786 goto sendreq; 787 case DKIOCTRIM: 788 if (vdev_geom_bio_delete_disable) 789 break; 790 if (vd->vdev_notrim) { 791 zio->io_error = ENOTSUP; 792 break; 793 } 794 goto sendreq; 795 default: 796 zio->io_error = ENOTSUP; 797 } 798 799 return (ZIO_PIPELINE_CONTINUE); 800 } 801sendreq: 802 cp = vd->vdev_tsd; 803 if (cp == NULL) { 804 zio->io_error = ENXIO; 805 return (ZIO_PIPELINE_CONTINUE); 806 } 807 bp = g_alloc_bio(); 808 bp->bio_caller1 = zio; 809 switch (zio->io_type) { 810 case ZIO_TYPE_READ: 811 case ZIO_TYPE_WRITE: 812 bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE; 813 bp->bio_data = zio->io_data; 814 bp->bio_offset = zio->io_offset; 815 bp->bio_length = zio->io_size; 816 break; 817 case ZIO_TYPE_IOCTL: 818 switch (zio->io_cmd) { 819 case DKIOCFLUSHWRITECACHE: 820 bp->bio_cmd = BIO_FLUSH; 821 bp->bio_flags |= BIO_ORDERED; 822 bp->bio_data = NULL; 823 bp->bio_offset = cp->provider->mediasize; 824 bp->bio_length = 0; 825 break; 826 case DKIOCTRIM: 827 bp->bio_cmd = BIO_DELETE; 828 bp->bio_data = NULL; 829 bp->bio_offset = zio->io_offset; 830 bp->bio_length = zio->io_size; 831 break; 832 } 833 break; 834 } 835 bp->bio_done = vdev_geom_io_intr; 836 837 g_io_request(bp, cp); 838 839 return (ZIO_PIPELINE_STOP); 840} 841 842static void 843vdev_geom_io_done(zio_t *zio) 844{ 845} 846 847static void 848vdev_geom_hold(vdev_t *vd) 849{ 850} 851 852static void 853vdev_geom_rele(vdev_t *vd) 854{ 855} 856 857vdev_ops_t vdev_geom_ops = { 858 vdev_geom_open, 859 vdev_geom_close, 860 vdev_default_asize, 861 vdev_geom_io_start, 862 vdev_geom_io_done, 863 NULL, 864 vdev_geom_hold, 865 vdev_geom_rele, 866 VDEV_TYPE_DISK, /* name of this vdev type */ 867 B_TRUE /* leaf vdev */ 868}; 869