vdev_geom.c revision 297078
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org> 23 * All rights reserved. 24 * 25 * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org> 26 */ 27 28#include <sys/zfs_context.h> 29#include <sys/param.h> 30#include <sys/kernel.h> 31#include <sys/bio.h> 32#include <sys/disk.h> 33#include <sys/spa.h> 34#include <sys/spa_impl.h> 35#include <sys/vdev_impl.h> 36#include <sys/fs/zfs.h> 37#include <sys/zio.h> 38#include <geom/geom.h> 39#include <geom/geom_int.h> 40 41/* 42 * Virtual device vector for GEOM. 43 */ 44 45static g_attrchanged_t vdev_geom_attrchanged; 46struct g_class zfs_vdev_class = { 47 .name = "ZFS::VDEV", 48 .version = G_VERSION, 49 .attrchanged = vdev_geom_attrchanged, 50}; 51 52DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev); 53 54SYSCTL_DECL(_vfs_zfs_vdev); 55/* Don't send BIO_FLUSH. */ 56static int vdev_geom_bio_flush_disable = 0; 57TUNABLE_INT("vfs.zfs.vdev.bio_flush_disable", &vdev_geom_bio_flush_disable); 58SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RW, 59 &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH"); 60/* Don't send BIO_DELETE. */ 61static int vdev_geom_bio_delete_disable = 0; 62TUNABLE_INT("vfs.zfs.vdev.bio_delete_disable", &vdev_geom_bio_delete_disable); 63SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RW, 64 &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE"); 65 66static void 67vdev_geom_set_rotation_rate(vdev_t *vd, struct g_consumer *cp) 68{ 69 int error; 70 uint16_t rate; 71 72 error = g_getattr("GEOM::rotation_rate", cp, &rate); 73 if (error == 0) 74 vd->vdev_rotation_rate = rate; 75 else 76 vd->vdev_rotation_rate = VDEV_RATE_UNKNOWN; 77} 78 79static void 80vdev_geom_attrchanged(struct g_consumer *cp, const char *attr) 81{ 82 vdev_t *vd; 83 84 vd = cp->private; 85 if (vd == NULL) 86 return; 87 88 if (strcmp(attr, "GEOM::rotation_rate") == 0) { 89 vdev_geom_set_rotation_rate(vd, cp); 90 return; 91 } 92} 93 94static void 95vdev_geom_orphan(struct g_consumer *cp) 96{ 97 vdev_t *vd; 98 99 g_topology_assert(); 100 101 vd = cp->private; 102 if (vd == NULL) 103 return; 104 105 /* 106 * Orphan callbacks occur from the GEOM event thread. 107 * Concurrent with this call, new I/O requests may be 108 * working their way through GEOM about to find out 109 * (only once executed by the g_down thread) that we've 110 * been orphaned from our disk provider. These I/Os 111 * must be retired before we can detach our consumer. 112 * This is most easily achieved by acquiring the 113 * SPA ZIO configuration lock as a writer, but doing 114 * so with the GEOM topology lock held would cause 115 * a lock order reversal. Instead, rely on the SPA's 116 * async removal support to invoke a close on this 117 * vdev once it is safe to do so. 118 */ 119 vd->vdev_remove_wanted = B_TRUE; 120 spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE); 121} 122 123static struct g_consumer * 124vdev_geom_attach(struct g_provider *pp) 125{ 126 struct g_geom *gp; 127 struct g_consumer *cp; 128 129 g_topology_assert(); 130 131 ZFS_LOG(1, "Attaching to %s.", pp->name); 132 /* Do we have geom already? No? Create one. */ 133 LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) { 134 if (gp->flags & G_GEOM_WITHER) 135 continue; 136 if (strcmp(gp->name, "zfs::vdev") != 0) 137 continue; 138 break; 139 } 140 if (gp == NULL) { 141 gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev"); 142 gp->orphan = vdev_geom_orphan; 143 cp = g_new_consumer(gp); 144 if (g_attach(cp, pp) != 0) { 145 g_wither_geom(gp, ENXIO); 146 return (NULL); 147 } 148 if (g_access(cp, 1, 0, 1) != 0) { 149 g_wither_geom(gp, ENXIO); 150 return (NULL); 151 } 152 ZFS_LOG(1, "Created geom and consumer for %s.", pp->name); 153 } else { 154 /* Check if we are already connected to this provider. */ 155 LIST_FOREACH(cp, &gp->consumer, consumer) { 156 if (cp->provider == pp) { 157 ZFS_LOG(1, "Found consumer for %s.", pp->name); 158 break; 159 } 160 } 161 if (cp == NULL) { 162 cp = g_new_consumer(gp); 163 if (g_attach(cp, pp) != 0) { 164 g_destroy_consumer(cp); 165 return (NULL); 166 } 167 if (g_access(cp, 1, 0, 1) != 0) { 168 g_detach(cp); 169 g_destroy_consumer(cp); 170 return (NULL); 171 } 172 ZFS_LOG(1, "Created consumer for %s.", pp->name); 173 } else { 174 if (g_access(cp, 1, 0, 1) != 0) 175 return (NULL); 176 ZFS_LOG(1, "Used existing consumer for %s.", pp->name); 177 } 178 } 179 cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; 180 return (cp); 181} 182 183static void 184vdev_geom_detach(void *arg, int flag __unused) 185{ 186 struct g_geom *gp; 187 struct g_consumer *cp; 188 189 g_topology_assert(); 190 cp = arg; 191 gp = cp->geom; 192 193 ZFS_LOG(1, "Closing access to %s.", cp->provider->name); 194 g_access(cp, -1, 0, -1); 195 /* Destroy consumer on last close. */ 196 if (cp->acr == 0 && cp->ace == 0) { 197 ZFS_LOG(1, "Destroyed consumer to %s.", cp->provider->name); 198 if (cp->acw > 0) 199 g_access(cp, 0, -cp->acw, 0); 200 g_detach(cp); 201 g_destroy_consumer(cp); 202 } 203 /* Destroy geom if there are no consumers left. */ 204 if (LIST_EMPTY(&gp->consumer)) { 205 ZFS_LOG(1, "Destroyed geom %s.", gp->name); 206 g_wither_geom(gp, ENXIO); 207 } 208} 209 210static void 211nvlist_get_guids(nvlist_t *list, uint64_t *pguid, uint64_t *vguid) 212{ 213 214 (void) nvlist_lookup_uint64(list, ZPOOL_CONFIG_GUID, vguid); 215 (void) nvlist_lookup_uint64(list, ZPOOL_CONFIG_POOL_GUID, pguid); 216} 217 218static int 219vdev_geom_io(struct g_consumer *cp, int cmd, void *data, off_t offset, off_t size) 220{ 221 struct bio *bp; 222 u_char *p; 223 off_t off, maxio; 224 int error; 225 226 ASSERT((offset % cp->provider->sectorsize) == 0); 227 ASSERT((size % cp->provider->sectorsize) == 0); 228 229 bp = g_alloc_bio(); 230 off = offset; 231 offset += size; 232 p = data; 233 maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize); 234 error = 0; 235 236 for (; off < offset; off += maxio, p += maxio, size -= maxio) { 237 bzero(bp, sizeof(*bp)); 238 bp->bio_cmd = cmd; 239 bp->bio_done = NULL; 240 bp->bio_offset = off; 241 bp->bio_length = MIN(size, maxio); 242 bp->bio_data = p; 243 g_io_request(bp, cp); 244 error = biowait(bp, "vdev_geom_io"); 245 if (error != 0) 246 break; 247 } 248 249 g_destroy_bio(bp); 250 return (error); 251} 252 253static void 254vdev_geom_taste_orphan(struct g_consumer *cp) 255{ 256 257 KASSERT(1 == 0, ("%s called while tasting %s.", __func__, 258 cp->provider->name)); 259} 260 261static int 262vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config) 263{ 264 struct g_provider *pp; 265 vdev_label_t *label; 266 char *p, *buf; 267 size_t buflen; 268 uint64_t psize; 269 off_t offset, size; 270 uint64_t state, txg; 271 int error, l, len; 272 273 g_topology_assert_not(); 274 275 pp = cp->provider; 276 ZFS_LOG(1, "Reading config from %s...", pp->name); 277 278 psize = pp->mediasize; 279 psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t)); 280 281 size = sizeof(*label) + pp->sectorsize - 282 ((sizeof(*label) - 1) % pp->sectorsize) - 1; 283 284 label = kmem_alloc(size, KM_SLEEP); 285 buflen = sizeof(label->vl_vdev_phys.vp_nvlist); 286 287 *config = NULL; 288 for (l = 0; l < VDEV_LABELS; l++) { 289 290 offset = vdev_label_offset(psize, l, 0); 291 if ((offset % pp->sectorsize) != 0) 292 continue; 293 294 if (vdev_geom_io(cp, BIO_READ, label, offset, size) != 0) 295 continue; 296 buf = label->vl_vdev_phys.vp_nvlist; 297 298 if (nvlist_unpack(buf, buflen, config, 0) != 0) 299 continue; 300 301 if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE, 302 &state) != 0 || state > POOL_STATE_L2CACHE) { 303 nvlist_free(*config); 304 *config = NULL; 305 continue; 306 } 307 308 if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE && 309 (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG, 310 &txg) != 0 || txg == 0)) { 311 nvlist_free(*config); 312 *config = NULL; 313 continue; 314 } 315 316 break; 317 } 318 319 kmem_free(label, size); 320 return (*config == NULL ? ENOENT : 0); 321} 322 323static void 324resize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id) 325{ 326 nvlist_t **new_configs; 327 uint64_t i; 328 329 if (id < *count) 330 return; 331 new_configs = kmem_zalloc((id + 1) * sizeof(nvlist_t *), 332 KM_SLEEP); 333 for (i = 0; i < *count; i++) 334 new_configs[i] = (*configs)[i]; 335 if (*configs != NULL) 336 kmem_free(*configs, *count * sizeof(void *)); 337 *configs = new_configs; 338 *count = id + 1; 339} 340 341static void 342process_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg, 343 const char *name, uint64_t* known_pool_guid) 344{ 345 nvlist_t *vdev_tree; 346 uint64_t pool_guid; 347 uint64_t vdev_guid, known_guid; 348 uint64_t id, txg, known_txg; 349 char *pname; 350 int i; 351 352 if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 || 353 strcmp(pname, name) != 0) 354 goto ignore; 355 356 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0) 357 goto ignore; 358 359 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0) 360 goto ignore; 361 362 if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) 363 goto ignore; 364 365 if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0) 366 goto ignore; 367 368 VERIFY(nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); 369 370 if (*known_pool_guid != 0) { 371 if (pool_guid != *known_pool_guid) 372 goto ignore; 373 } else 374 *known_pool_guid = pool_guid; 375 376 resize_configs(configs, count, id); 377 378 if ((*configs)[id] != NULL) { 379 VERIFY(nvlist_lookup_uint64((*configs)[id], 380 ZPOOL_CONFIG_POOL_TXG, &known_txg) == 0); 381 if (txg <= known_txg) 382 goto ignore; 383 nvlist_free((*configs)[id]); 384 } 385 386 (*configs)[id] = cfg; 387 return; 388 389ignore: 390 nvlist_free(cfg); 391} 392 393static int 394vdev_geom_attach_taster(struct g_consumer *cp, struct g_provider *pp) 395{ 396 int error; 397 398 if (pp->flags & G_PF_WITHER) 399 return (EINVAL); 400 g_attach(cp, pp); 401 error = g_access(cp, 1, 0, 0); 402 if (error == 0) { 403 if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize)) 404 error = EINVAL; 405 else if (pp->mediasize < SPA_MINDEVSIZE) 406 error = EINVAL; 407 if (error != 0) 408 g_access(cp, -1, 0, 0); 409 } 410 if (error != 0) 411 g_detach(cp); 412 return (error); 413} 414 415static void 416vdev_geom_detach_taster(struct g_consumer *cp) 417{ 418 g_access(cp, -1, 0, 0); 419 g_detach(cp); 420} 421 422int 423vdev_geom_read_pool_label(const char *name, 424 nvlist_t ***configs, uint64_t *count) 425{ 426 struct g_class *mp; 427 struct g_geom *gp, *zgp; 428 struct g_provider *pp; 429 struct g_consumer *zcp; 430 nvlist_t *vdev_cfg; 431 uint64_t pool_guid; 432 int error; 433 434 DROP_GIANT(); 435 g_topology_lock(); 436 437 zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste"); 438 /* This orphan function should be never called. */ 439 zgp->orphan = vdev_geom_taste_orphan; 440 zcp = g_new_consumer(zgp); 441 442 *configs = NULL; 443 *count = 0; 444 pool_guid = 0; 445 LIST_FOREACH(mp, &g_classes, class) { 446 if (mp == &zfs_vdev_class) 447 continue; 448 LIST_FOREACH(gp, &mp->geom, geom) { 449 if (gp->flags & G_GEOM_WITHER) 450 continue; 451 LIST_FOREACH(pp, &gp->provider, provider) { 452 if (pp->flags & G_PF_WITHER) 453 continue; 454 if (vdev_geom_attach_taster(zcp, pp) != 0) 455 continue; 456 g_topology_unlock(); 457 error = vdev_geom_read_config(zcp, &vdev_cfg); 458 g_topology_lock(); 459 vdev_geom_detach_taster(zcp); 460 if (error) 461 continue; 462 ZFS_LOG(1, "successfully read vdev config"); 463 464 process_vdev_config(configs, count, 465 vdev_cfg, name, &pool_guid); 466 } 467 } 468 } 469 470 g_destroy_consumer(zcp); 471 g_destroy_geom(zgp); 472 g_topology_unlock(); 473 PICKUP_GIANT(); 474 475 return (*count > 0 ? 0 : ENOENT); 476} 477 478static void 479vdev_geom_read_guids(struct g_consumer *cp, uint64_t *pguid, uint64_t *vguid) 480{ 481 nvlist_t *config; 482 483 g_topology_assert_not(); 484 485 *pguid = 0; 486 *vguid = 0; 487 if (vdev_geom_read_config(cp, &config) == 0) { 488 nvlist_get_guids(config, pguid, vguid); 489 nvlist_free(config); 490 } 491} 492 493static struct g_consumer * 494vdev_geom_attach_by_guids(uint64_t pool_guid, uint64_t vdev_guid) 495{ 496 struct g_class *mp; 497 struct g_geom *gp, *zgp; 498 struct g_provider *pp; 499 struct g_consumer *cp, *zcp; 500 uint64_t pguid, vguid; 501 502 g_topology_assert(); 503 504 zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste"); 505 /* This orphan function should be never called. */ 506 zgp->orphan = vdev_geom_taste_orphan; 507 zcp = g_new_consumer(zgp); 508 509 cp = NULL; 510 LIST_FOREACH(mp, &g_classes, class) { 511 if (mp == &zfs_vdev_class) 512 continue; 513 LIST_FOREACH(gp, &mp->geom, geom) { 514 if (gp->flags & G_GEOM_WITHER) 515 continue; 516 LIST_FOREACH(pp, &gp->provider, provider) { 517 if (vdev_geom_attach_taster(zcp, pp) != 0) 518 continue; 519 g_topology_unlock(); 520 vdev_geom_read_guids(zcp, &pguid, &vguid); 521 g_topology_lock(); 522 vdev_geom_detach_taster(zcp); 523 /* 524 * Check that the label's vdev guid matches the 525 * desired guid. If the label has a pool guid, 526 * check that it matches too. (Inactive spares 527 * and L2ARCs do not have any pool guid in the 528 * label.) 529 */ 530 if ((pguid != 0 && 531 pguid != pool_guid) || 532 vguid != vdev_guid) 533 continue; 534 cp = vdev_geom_attach(pp); 535 if (cp == NULL) { 536 printf("ZFS WARNING: Unable to " 537 "attach to %s.\n", pp->name); 538 continue; 539 } 540 break; 541 } 542 if (cp != NULL) 543 break; 544 } 545 if (cp != NULL) 546 break; 547 } 548end: 549 g_destroy_consumer(zcp); 550 g_destroy_geom(zgp); 551 return (cp); 552} 553 554static struct g_consumer * 555vdev_geom_open_by_guids(vdev_t *vd) 556{ 557 struct g_consumer *cp; 558 char *buf; 559 size_t len; 560 561 g_topology_assert(); 562 563 ZFS_LOG(1, "Searching by guid [%ju].", (uintmax_t)vd->vdev_guid); 564 cp = vdev_geom_attach_by_guids(spa_guid(vd->vdev_spa), vd->vdev_guid); 565 if (cp != NULL) { 566 len = strlen(cp->provider->name) + strlen("/dev/") + 1; 567 buf = kmem_alloc(len, KM_SLEEP); 568 569 snprintf(buf, len, "/dev/%s", cp->provider->name); 570 spa_strfree(vd->vdev_path); 571 vd->vdev_path = buf; 572 573 ZFS_LOG(1, "Attach by guid [%ju:%ju] succeeded, provider %s.", 574 (uintmax_t)spa_guid(vd->vdev_spa), 575 (uintmax_t)vd->vdev_guid, vd->vdev_path); 576 } else { 577 ZFS_LOG(1, "Search by guid [%ju:%ju] failed.", 578 (uintmax_t)spa_guid(vd->vdev_spa), 579 (uintmax_t)vd->vdev_guid); 580 } 581 582 return (cp); 583} 584 585static struct g_consumer * 586vdev_geom_open_by_path(vdev_t *vd, int check_guid) 587{ 588 struct g_provider *pp; 589 struct g_consumer *cp; 590 uint64_t pguid, vguid; 591 592 g_topology_assert(); 593 594 cp = NULL; 595 pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1); 596 if (pp != NULL) { 597 ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path); 598 cp = vdev_geom_attach(pp); 599 if (cp != NULL && check_guid && ISP2(pp->sectorsize) && 600 pp->sectorsize <= VDEV_PAD_SIZE) { 601 g_topology_unlock(); 602 vdev_geom_read_guids(cp, &pguid, &vguid); 603 g_topology_lock(); 604 if (pguid != spa_guid(vd->vdev_spa) || 605 vguid != vd->vdev_guid) { 606 vdev_geom_detach(cp, 0); 607 cp = NULL; 608 ZFS_LOG(1, "guid mismatch for provider %s: " 609 "%ju:%ju != %ju:%ju.", vd->vdev_path, 610 (uintmax_t)spa_guid(vd->vdev_spa), 611 (uintmax_t)vd->vdev_guid, 612 (uintmax_t)pguid, (uintmax_t)vguid); 613 } else { 614 ZFS_LOG(1, "guid match for provider %s.", 615 vd->vdev_path); 616 } 617 } 618 } 619 620 return (cp); 621} 622 623static int 624vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, 625 uint64_t *logical_ashift, uint64_t *physical_ashift) 626{ 627 struct g_provider *pp; 628 struct g_consumer *cp; 629 size_t bufsize; 630 int error; 631 632 /* 633 * We must have a pathname, and it must be absolute. 634 */ 635 if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { 636 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; 637 return (EINVAL); 638 } 639 640 vd->vdev_tsd = NULL; 641 642 DROP_GIANT(); 643 g_topology_lock(); 644 error = 0; 645 646 if (vd->vdev_spa->spa_splitting_newspa || 647 (vd->vdev_prevstate == VDEV_STATE_UNKNOWN && 648 vd->vdev_spa->spa_load_state == SPA_LOAD_NONE)) { 649 /* 650 * We are dealing with a vdev that hasn't been previously 651 * opened (since boot), and we are not loading an 652 * existing pool configuration. This looks like a 653 * vdev add operation to a new or existing pool. 654 * Assume the user knows what he/she is doing and find 655 * GEOM provider by its name, ignoring GUID mismatches. 656 * 657 * XXPOLICY: It would be safer to only allow a device 658 * that is unlabeled or labeled but missing 659 * GUID information to be opened in this fashion, 660 * unless we are doing a split, in which case we 661 * should allow any guid. 662 */ 663 cp = vdev_geom_open_by_path(vd, 0); 664 } else { 665 /* 666 * Try using the recorded path for this device, but only 667 * accept it if its label data contains the expected GUIDs. 668 */ 669 cp = vdev_geom_open_by_path(vd, 1); 670 if (cp == NULL) { 671 /* 672 * The device at vd->vdev_path doesn't have the 673 * expected GUIDs. The disks might have merely 674 * moved around so try all other GEOM providers 675 * to find one with the right GUIDs. 676 */ 677 cp = vdev_geom_open_by_guids(vd); 678 } 679 } 680 681 if (cp == NULL) { 682 ZFS_LOG(1, "Provider %s not found.", vd->vdev_path); 683 error = ENOENT; 684 } else if (cp->provider->sectorsize > VDEV_PAD_SIZE || 685 !ISP2(cp->provider->sectorsize)) { 686 ZFS_LOG(1, "Provider %s has unsupported sectorsize.", 687 vd->vdev_path); 688 vdev_geom_detach(cp, 0); 689 error = EINVAL; 690 cp = NULL; 691 } else if (cp->acw == 0 && (spa_mode(vd->vdev_spa) & FWRITE) != 0) { 692 int i; 693 694 for (i = 0; i < 5; i++) { 695 error = g_access(cp, 0, 1, 0); 696 if (error == 0) 697 break; 698 g_topology_unlock(); 699 tsleep(vd, 0, "vdev", hz / 2); 700 g_topology_lock(); 701 } 702 if (error != 0) { 703 printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n", 704 vd->vdev_path, error); 705 vdev_geom_detach(cp, 0); 706 cp = NULL; 707 } 708 } 709 g_topology_unlock(); 710 PICKUP_GIANT(); 711 if (cp == NULL) { 712 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 713 return (error); 714 } 715 716 cp->private = vd; 717 vd->vdev_tsd = cp; 718 pp = cp->provider; 719 720 /* 721 * Determine the actual size of the device. 722 */ 723 *max_psize = *psize = pp->mediasize; 724 725 /* 726 * Determine the device's minimum transfer size and preferred 727 * transfer size. 728 */ 729 *logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1; 730 *physical_ashift = 0; 731 if (pp->stripesize) 732 *physical_ashift = highbit(pp->stripesize) - 1; 733 734 /* 735 * Clear the nowritecache settings, so that on a vdev_reopen() 736 * we will try again. 737 */ 738 vd->vdev_nowritecache = B_FALSE; 739 740 if (vd->vdev_physpath != NULL) 741 spa_strfree(vd->vdev_physpath); 742 bufsize = sizeof("/dev/") + strlen(pp->name); 743 vd->vdev_physpath = kmem_alloc(bufsize, KM_SLEEP); 744 snprintf(vd->vdev_physpath, bufsize, "/dev/%s", pp->name); 745 746 /* 747 * Determine the device's rotation rate. 748 */ 749 vdev_geom_set_rotation_rate(vd, cp); 750 751 return (0); 752} 753 754static void 755vdev_geom_close(vdev_t *vd) 756{ 757 struct g_consumer *cp; 758 759 cp = vd->vdev_tsd; 760 if (cp == NULL) 761 return; 762 vd->vdev_tsd = NULL; 763 vd->vdev_delayed_close = B_FALSE; 764 cp->private = NULL; /* XXX locking */ 765 g_post_event(vdev_geom_detach, cp, M_WAITOK, NULL); 766} 767 768static void 769vdev_geom_io_intr(struct bio *bp) 770{ 771 vdev_t *vd; 772 zio_t *zio; 773 774 zio = bp->bio_caller1; 775 vd = zio->io_vd; 776 zio->io_error = bp->bio_error; 777 if (zio->io_error == 0 && bp->bio_resid != 0) 778 zio->io_error = SET_ERROR(EIO); 779 780 switch(zio->io_error) { 781 case ENOTSUP: 782 /* 783 * If we get ENOTSUP for BIO_FLUSH or BIO_DELETE we know 784 * that future attempts will never succeed. In this case 785 * we set a persistent flag so that we don't bother with 786 * requests in the future. 787 */ 788 switch(bp->bio_cmd) { 789 case BIO_FLUSH: 790 vd->vdev_nowritecache = B_TRUE; 791 break; 792 case BIO_DELETE: 793 vd->vdev_notrim = B_TRUE; 794 break; 795 } 796 break; 797 case ENXIO: 798 if (!vd->vdev_remove_wanted) { 799 /* 800 * If provider's error is set we assume it is being 801 * removed. 802 */ 803 if (bp->bio_to->error != 0) { 804 vd->vdev_remove_wanted = B_TRUE; 805 spa_async_request(zio->io_spa, 806 SPA_ASYNC_REMOVE); 807 } else if (!vd->vdev_delayed_close) { 808 vd->vdev_delayed_close = B_TRUE; 809 } 810 } 811 break; 812 } 813 g_destroy_bio(bp); 814 zio_interrupt(zio); 815} 816 817static void 818vdev_geom_io_start(zio_t *zio) 819{ 820 vdev_t *vd; 821 struct g_consumer *cp; 822 struct bio *bp; 823 int error; 824 825 vd = zio->io_vd; 826 827 switch (zio->io_type) { 828 case ZIO_TYPE_IOCTL: 829 /* XXPOLICY */ 830 if (!vdev_readable(vd)) { 831 zio->io_error = SET_ERROR(ENXIO); 832 zio_interrupt(zio); 833 return; 834 } else { 835 switch (zio->io_cmd) { 836 case DKIOCFLUSHWRITECACHE: 837 if (zfs_nocacheflush || vdev_geom_bio_flush_disable) 838 break; 839 if (vd->vdev_nowritecache) { 840 zio->io_error = SET_ERROR(ENOTSUP); 841 break; 842 } 843 goto sendreq; 844 default: 845 zio->io_error = SET_ERROR(ENOTSUP); 846 } 847 } 848 849 zio_execute(zio); 850 return; 851 case ZIO_TYPE_FREE: 852 if (vd->vdev_notrim) { 853 zio->io_error = SET_ERROR(ENOTSUP); 854 } else if (!vdev_geom_bio_delete_disable) { 855 goto sendreq; 856 } 857 zio_execute(zio); 858 return; 859 } 860sendreq: 861 ASSERT(zio->io_type == ZIO_TYPE_READ || 862 zio->io_type == ZIO_TYPE_WRITE || 863 zio->io_type == ZIO_TYPE_FREE || 864 zio->io_type == ZIO_TYPE_IOCTL); 865 866 cp = vd->vdev_tsd; 867 if (cp == NULL) { 868 zio->io_error = SET_ERROR(ENXIO); 869 zio_interrupt(zio); 870 return; 871 } 872 bp = g_alloc_bio(); 873 bp->bio_caller1 = zio; 874 switch (zio->io_type) { 875 case ZIO_TYPE_READ: 876 case ZIO_TYPE_WRITE: 877 bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE; 878 bp->bio_data = zio->io_data; 879 bp->bio_offset = zio->io_offset; 880 bp->bio_length = zio->io_size; 881 break; 882 case ZIO_TYPE_FREE: 883 bp->bio_cmd = BIO_DELETE; 884 bp->bio_data = NULL; 885 bp->bio_offset = zio->io_offset; 886 bp->bio_length = zio->io_size; 887 break; 888 case ZIO_TYPE_IOCTL: 889 bp->bio_cmd = BIO_FLUSH; 890 bp->bio_flags |= BIO_ORDERED; 891 bp->bio_data = NULL; 892 bp->bio_offset = cp->provider->mediasize; 893 bp->bio_length = 0; 894 break; 895 } 896 bp->bio_done = vdev_geom_io_intr; 897 898 g_io_request(bp, cp); 899} 900 901static void 902vdev_geom_io_done(zio_t *zio) 903{ 904} 905 906static void 907vdev_geom_hold(vdev_t *vd) 908{ 909} 910 911static void 912vdev_geom_rele(vdev_t *vd) 913{ 914} 915 916vdev_ops_t vdev_geom_ops = { 917 vdev_geom_open, 918 vdev_geom_close, 919 vdev_default_asize, 920 vdev_geom_io_start, 921 vdev_geom_io_done, 922 NULL, 923 vdev_geom_hold, 924 vdev_geom_rele, 925 VDEV_TYPE_DISK, /* name of this vdev type */ 926 B_TRUE /* leaf vdev */ 927}; 928