vdev_geom.c revision 308058
1296781Sdes/* 2204861Sdes * CDDL HEADER START 3204861Sdes * 4204861Sdes * The contents of this file are subject to the terms of the 5204861Sdes * Common Development and Distribution License (the "License"). 6204861Sdes * You may not use this file except in compliance with the License. 7204861Sdes * 8204861Sdes * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9204861Sdes * or http://www.opensolaris.org/os/licensing. 10204861Sdes * See the License for the specific language governing permissions 11204861Sdes * and limitations under the License. 12204861Sdes * 13204861Sdes * When distributing Covered Code, include this CDDL HEADER in each 14204861Sdes * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15204861Sdes * If applicable, add the following below this CDDL HEADER, with the 16204861Sdes * fields enclosed by brackets "[]" replaced with your own identifying 17204861Sdes * information: Portions Copyright [yyyy] [name of copyright owner] 18204861Sdes * 19204861Sdes * CDDL HEADER END 20204861Sdes */ 21204861Sdes/* 22204861Sdes * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org> 23204861Sdes * All rights reserved. 24204861Sdes * 25204861Sdes * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org> 26204861Sdes */ 27204861Sdes 28204861Sdes#include <sys/zfs_context.h> 29204861Sdes#include <sys/param.h> 30204861Sdes#include <sys/kernel.h> 31204861Sdes#include <sys/bio.h> 32204861Sdes#include <sys/disk.h> 33204861Sdes#include <sys/spa.h> 34262566Sdes#include <sys/spa_impl.h> 35262566Sdes#include <sys/vdev_impl.h> 36204861Sdes#include <sys/fs/zfs.h> 37204861Sdes#include <sys/zio.h> 38204861Sdes#include <geom/geom.h> 39204861Sdes#include <geom/geom_int.h> 40204861Sdes 41295367Sdes/* 42204861Sdes * Virtual device vector for GEOM. 43204861Sdes */ 44204861Sdes 45204861Sdesstatic g_attrchanged_t vdev_geom_attrchanged; 46204861Sdesstruct g_class zfs_vdev_class = { 47204861Sdes .name = "ZFS::VDEV", 48204861Sdes .version = G_VERSION, 49204861Sdes .attrchanged = vdev_geom_attrchanged, 50204861Sdes}; 51204861Sdes 52204861SdesDECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev); 53204861Sdes 54204861SdesSYSCTL_DECL(_vfs_zfs_vdev); 55204861Sdes/* Don't send BIO_FLUSH. */ 56204861Sdesstatic int vdev_geom_bio_flush_disable = 0; 57204861SdesTUNABLE_INT("vfs.zfs.vdev.bio_flush_disable", &vdev_geom_bio_flush_disable); 58204861SdesSYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RW, 59204861Sdes &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH"); 60204861Sdes/* Don't send BIO_DELETE. */ 61204861Sdesstatic int vdev_geom_bio_delete_disable = 0; 62204861SdesTUNABLE_INT("vfs.zfs.vdev.bio_delete_disable", &vdev_geom_bio_delete_disable); 63204861SdesSYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RW, 64204861Sdes &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE"); 65204861Sdes 66204861Sdes/* Declare local functions */ 67204861Sdesstatic void vdev_geom_detach(struct g_consumer *cp, boolean_t open_for_read); 68204861Sdes 69204861Sdes/* 70204861Sdes * Thread local storage used to indicate when a thread is probing geoms 71204861Sdes * for their guids. If NULL, this thread is not tasting geoms. If non NULL, 72204861Sdes * it is looking for a replacement for the vdev_t* that is its value. 73204861Sdes */ 74204861Sdesuint_t zfs_geom_probe_vdev_key; 75204861Sdes 76204861Sdesstatic void 77204861Sdesvdev_geom_set_rotation_rate(vdev_t *vd, struct g_consumer *cp) 78204861Sdes{ 79204861Sdes int error; 80204861Sdes uint16_t rate; 81204861Sdes 82204861Sdes error = g_getattr("GEOM::rotation_rate", cp, &rate); 83204861Sdes if (error == 0) 84204861Sdes vd->vdev_rotation_rate = rate; 85204861Sdes else 86204861Sdes vd->vdev_rotation_rate = VDEV_RATE_UNKNOWN; 87204861Sdes} 88204861Sdes 89204861Sdesstatic void 90204861Sdesvdev_geom_attrchanged(struct g_consumer *cp, const char *attr) 91204861Sdes{ 92204861Sdes vdev_t *vd; 93204861Sdes spa_t *spa; 94204861Sdes char *physpath; 95204861Sdes int error, physpath_len; 96204861Sdes 97204861Sdes vd = cp->private; 98204861Sdes if (vd == NULL) 99204861Sdes return; 100204861Sdes 101204861Sdes if (strcmp(attr, "GEOM::rotation_rate") == 0) { 102204861Sdes vdev_geom_set_rotation_rate(vd, cp); 103204861Sdes return; 104204861Sdes } 105204861Sdes 106204861Sdes if (strcmp(attr, "GEOM::physpath") != 0) 107204861Sdes return; 108204861Sdes 109204861Sdes if (g_access(cp, 1, 0, 0) != 0) 110204861Sdes return; 111204861Sdes 112204861Sdes /* 113204861Sdes * Record/Update physical path information for this device. 114204861Sdes */ 115204861Sdes spa = vd->vdev_spa; 116204861Sdes physpath_len = MAXPATHLEN; 117204861Sdes physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO); 118204861Sdes error = g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath); 119204861Sdes g_access(cp, -1, 0, 0); 120204861Sdes if (error == 0) { 121204861Sdes char *old_physpath; 122204861Sdes 123204861Sdes /* g_topology lock ensures that vdev has not been closed */ 124204861Sdes g_topology_assert(); 125255767Sdes old_physpath = vd->vdev_physpath; 126255767Sdes vd->vdev_physpath = spa_strdup(physpath); 127255767Sdes spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 128204861Sdes 129204861Sdes if (old_physpath != NULL) 130204861Sdes spa_strfree(old_physpath); 131204861Sdes } 132204861Sdes g_free(physpath); 133204861Sdes} 134204861Sdes 135204861Sdesstatic void 136204861Sdesvdev_geom_orphan(struct g_consumer *cp) 137204861Sdes{ 138204861Sdes vdev_t *vd; 139204861Sdes 140204861Sdes g_topology_assert(); 141204861Sdes 142204861Sdes vd = cp->private; 143204861Sdes if (vd == NULL) { 144204861Sdes /* Vdev close in progress. Ignore the event. */ 145204861Sdes return; 146204861Sdes } 147204861Sdes 148204861Sdes /* 149204861Sdes * Orphan callbacks occur from the GEOM event thread. 150204861Sdes * Concurrent with this call, new I/O requests may be 151204861Sdes * working their way through GEOM about to find out 152204861Sdes * (only once executed by the g_down thread) that we've 153204861Sdes * been orphaned from our disk provider. These I/Os 154204861Sdes * must be retired before we can detach our consumer. 155204861Sdes * This is most easily achieved by acquiring the 156204861Sdes * SPA ZIO configuration lock as a writer, but doing 157204861Sdes * so with the GEOM topology lock held would cause 158204861Sdes * a lock order reversal. Instead, rely on the SPA's 159204861Sdes * async removal support to invoke a close on this 160204861Sdes * vdev once it is safe to do so. 161204861Sdes */ 162204861Sdes vd->vdev_remove_wanted = B_TRUE; 163204861Sdes spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE); 164204861Sdes} 165204861Sdes 166204861Sdesstatic struct g_consumer * 167204861Sdesvdev_geom_attach(struct g_provider *pp, vdev_t *vd) 168204861Sdes{ 169204861Sdes struct g_geom *gp; 170204861Sdes struct g_consumer *cp; 171204861Sdes int error; 172204861Sdes 173204861Sdes g_topology_assert(); 174204861Sdes 175204861Sdes ZFS_LOG(1, "Attaching to %s.", pp->name); 176204861Sdes 177204861Sdes if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize)) { 178204861Sdes ZFS_LOG(1, "Failing attach of %s. Incompatible sectorsize %d\n", 179204861Sdes pp->name, pp->sectorsize); 180204861Sdes return (NULL); 181204861Sdes } else if (pp->mediasize < SPA_MINDEVSIZE) { 182204861Sdes ZFS_LOG(1, "Failing attach of %s. Incompatible mediasize %ju\n", 183204861Sdes pp->name, pp->mediasize); 184204861Sdes return (NULL); 185255767Sdes } 186255767Sdes 187204861Sdes /* Do we have geom already? No? Create one. */ 188204861Sdes LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) { 189204861Sdes if (gp->flags & G_GEOM_WITHER) 190204861Sdes continue; 191215116Sdes if (strcmp(gp->name, "zfs::vdev") != 0) 192215116Sdes continue; 193215116Sdes break; 194215116Sdes } 195215116Sdes if (gp == NULL) { 196215116Sdes gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev"); 197215116Sdes gp->orphan = vdev_geom_orphan; 198215116Sdes gp->attrchanged = vdev_geom_attrchanged; 199215116Sdes cp = g_new_consumer(gp); 200215116Sdes error = g_attach(cp, pp); 201215116Sdes if (error != 0) { 202215116Sdes ZFS_LOG(1, "%s(%d): g_attach failed: %d\n", __func__, 203215116Sdes __LINE__, error); 204215116Sdes vdev_geom_detach(cp, B_FALSE); 205215116Sdes return (NULL); 206215116Sdes } 207215116Sdes error = g_access(cp, 1, 0, 1); 208215116Sdes if (error != 0) { 209215116Sdes ZFS_LOG(1, "%s(%d): g_access failed: %d\n", __func__, 210215116Sdes __LINE__, error); 211215116Sdes vdev_geom_detach(cp, B_FALSE); 212215116Sdes return (NULL); 213215116Sdes } 214215116Sdes ZFS_LOG(1, "Created geom and consumer for %s.", pp->name); 215215116Sdes } else { 216215116Sdes /* Check if we are already connected to this provider. */ 217215116Sdes LIST_FOREACH(cp, &gp->consumer, consumer) { 218215116Sdes if (cp->provider == pp) { 219204861Sdes ZFS_LOG(1, "Found consumer for %s.", pp->name); 220204861Sdes break; 221204861Sdes } 222204861Sdes } 223204861Sdes if (cp == NULL) { 224204861Sdes cp = g_new_consumer(gp); 225204861Sdes error = g_attach(cp, pp); 226204861Sdes if (error != 0) { 227204861Sdes ZFS_LOG(1, "%s(%d): g_attach failed: %d\n", 228215116Sdes __func__, __LINE__, error); 229204861Sdes vdev_geom_detach(cp, B_FALSE); 230262566Sdes return (NULL); 231204861Sdes } 232204861Sdes error = g_access(cp, 1, 0, 1); 233204861Sdes if (error != 0) { 234204861Sdes ZFS_LOG(1, "%s(%d): g_access failed: %d\n", 235204861Sdes __func__, __LINE__, error); 236204861Sdes vdev_geom_detach(cp, B_FALSE); 237204861Sdes return (NULL); 238204861Sdes } 239204861Sdes ZFS_LOG(1, "Created consumer for %s.", pp->name); 240295367Sdes } else { 241204861Sdes error = g_access(cp, 1, 0, 1); 242204861Sdes if (error != 0) { 243204861Sdes ZFS_LOG(1, "%s(%d): g_access failed: %d\n", 244204861Sdes __func__, __LINE__, error); 245204861Sdes return (NULL); 246204861Sdes } 247204861Sdes ZFS_LOG(1, "Used existing consumer for %s.", pp->name); 248204861Sdes } 249204861Sdes } 250204861Sdes 251204861Sdes /* 252204861Sdes * BUG: cp may already belong to a vdev. This could happen if: 253204861Sdes * 1) That vdev is a shared spare, or 254204861Sdes * 2) We are trying to reopen a missing vdev and we are scanning by 255204861Sdes * guid. In that case, we'll ultimately fail to open this consumer, 256204861Sdes * but not until after setting the private field. 257204861Sdes * The solution is to: 258295367Sdes * 1) Don't set the private field until after the open succeeds, and 259295367Sdes * 2) Set it to a linked list of vdevs, not just a single vdev 260295367Sdes */ 261204861Sdes cp->private = vd; 262204861Sdes if (vd != NULL) 263295367Sdes vd->vdev_tsd = cp; 264295367Sdes 265295367Sdes cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; 266295367Sdes return (cp); 267295367Sdes} 268295367Sdes 269295367Sdesstatic void 270295367Sdesvdev_geom_detach(struct g_consumer *cp, boolean_t open_for_read) 271295367Sdes{ 272295367Sdes struct g_geom *gp; 273295367Sdes vdev_t *vd; 274295367Sdes 275295367Sdes g_topology_assert(); 276255767Sdes 277295367Sdes ZFS_LOG(1, "Detaching consumer. Provider %s.", 278295367Sdes cp->provider && cp->provider->name ? cp->provider->name : "NULL"); 279204861Sdes 280204861Sdes vd = cp->private; 281204861Sdes if (vd != NULL) { 282204861Sdes vd->vdev_tsd = NULL; 283204861Sdes vd->vdev_delayed_close = B_FALSE; 284204861Sdes } 285204861Sdes cp->private = NULL; 286215116Sdes 287215116Sdes gp = cp->geom; 288215116Sdes if (open_for_read) 289215116Sdes g_access(cp, -1, 0, -1); 290204861Sdes /* Destroy consumer on last close. */ 291204861Sdes if (cp->acr == 0 && cp->ace == 0) { 292204861Sdes if (cp->acw > 0) 293204861Sdes g_access(cp, 0, -cp->acw, 0); 294204861Sdes if (cp->provider != NULL) { 295204861Sdes ZFS_LOG(1, "Destroying consumer to %s.", 296204861Sdes cp->provider->name ? cp->provider->name : "NULL"); 297204861Sdes g_detach(cp); 298204861Sdes } 299204861Sdes g_destroy_consumer(cp); 300204861Sdes } 301204861Sdes /* Destroy geom if there are no consumers left. */ 302204861Sdes if (LIST_EMPTY(&gp->consumer)) { 303204861Sdes ZFS_LOG(1, "Destroyed geom %s.", gp->name); 304204861Sdes g_wither_geom(gp, ENXIO); 305204861Sdes } 306204861Sdes} 307204861Sdes 308204861Sdesstatic void 309204861Sdesvdev_geom_close_locked(vdev_t *vd) 310204861Sdes{ 311204861Sdes struct g_consumer *cp; 312204861Sdes 313204861Sdes g_topology_assert(); 314204861Sdes 315204861Sdes cp = vd->vdev_tsd; 316204861Sdes if (cp == NULL) 317204861Sdes return; 318204861Sdes 319204861Sdes ZFS_LOG(1, "Closing access to %s.", cp->provider->name); 320204861Sdes 321204861Sdes vdev_geom_detach(cp, B_TRUE); 322204861Sdes} 323204861Sdes 324204861Sdesstatic void 325296781Sdesnvlist_get_guids(nvlist_t *list, uint64_t *pguid, uint64_t *vguid) 326296781Sdes{ 327296781Sdes 328296781Sdes (void) nvlist_lookup_uint64(list, ZPOOL_CONFIG_GUID, vguid); 329204861Sdes (void) nvlist_lookup_uint64(list, ZPOOL_CONFIG_POOL_GUID, pguid); 330204861Sdes} 331204861Sdes 332204861Sdesstatic int 333204861Sdesvdev_geom_io(struct g_consumer *cp, int cmd, void *data, off_t offset, off_t size) 334204861Sdes{ 335204861Sdes struct bio *bp; 336204861Sdes u_char *p; 337204861Sdes off_t off, maxio; 338204861Sdes int error; 339204861Sdes 340204861Sdes ASSERT((offset % cp->provider->sectorsize) == 0); 341204861Sdes ASSERT((size % cp->provider->sectorsize) == 0); 342255767Sdes 343204861Sdes bp = g_alloc_bio(); 344204861Sdes off = offset; 345204861Sdes offset += size; 346204861Sdes p = data; 347204861Sdes maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize); 348204861Sdes error = 0; 349204861Sdes 350204861Sdes for (; off < offset; off += maxio, p += maxio, size -= maxio) { 351204861Sdes bzero(bp, sizeof(*bp)); 352204861Sdes bp->bio_cmd = cmd; 353204861Sdes bp->bio_done = NULL; 354204861Sdes bp->bio_offset = off; 355204861Sdes bp->bio_length = MIN(size, maxio); 356204861Sdes bp->bio_data = p; 357204861Sdes g_io_request(bp, cp); 358204861Sdes error = biowait(bp, "vdev_geom_io"); 359204861Sdes if (error != 0) 360204861Sdes break; 361204861Sdes } 362204861Sdes 363204861Sdes g_destroy_bio(bp); 364204861Sdes return (error); 365204861Sdes} 366204861Sdes 367204861Sdesstatic int 368204861Sdesvdev_geom_read_config(struct g_consumer *cp, nvlist_t **config) 369204861Sdes{ 370204861Sdes struct g_provider *pp; 371204861Sdes vdev_label_t *label; 372204861Sdes char *p, *buf; 373204861Sdes size_t buflen; 374204861Sdes uint64_t psize; 375204861Sdes off_t offset, size; 376204861Sdes uint64_t state, txg; 377204861Sdes int error, l, len; 378204861Sdes 379204861Sdes g_topology_assert_not(); 380295367Sdes 381295367Sdes pp = cp->provider; 382295367Sdes ZFS_LOG(1, "Reading config from %s...", pp->name); 383204861Sdes 384204861Sdes psize = pp->mediasize; 385204861Sdes psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t)); 386204861Sdes 387204861Sdes size = sizeof(*label) + pp->sectorsize - 388204861Sdes ((sizeof(*label) - 1) % pp->sectorsize) - 1; 389204861Sdes 390204861Sdes label = kmem_alloc(size, KM_SLEEP); 391204861Sdes buflen = sizeof(label->vl_vdev_phys.vp_nvlist); 392204861Sdes 393204861Sdes *config = NULL; 394204861Sdes for (l = 0; l < VDEV_LABELS; l++) { 395204861Sdes 396204861Sdes offset = vdev_label_offset(psize, l, 0); 397204861Sdes if ((offset % pp->sectorsize) != 0) 398204861Sdes continue; 399262566Sdes 400295367Sdes if (vdev_geom_io(cp, BIO_READ, label, offset, size) != 0) 401262566Sdes continue; 402262566Sdes buf = label->vl_vdev_phys.vp_nvlist; 403204861Sdes 404262566Sdes if (nvlist_unpack(buf, buflen, config, 0) != 0) 405295367Sdes continue; 406204861Sdes 407262566Sdes if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE, 408262566Sdes &state) != 0 || state > POOL_STATE_L2CACHE) { 409262566Sdes nvlist_free(*config); 410262566Sdes *config = NULL; 411262566Sdes continue; 412262566Sdes } 413262566Sdes 414262566Sdes if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE && 415262566Sdes (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG, 416262566Sdes &txg) != 0 || txg == 0)) { 417262566Sdes nvlist_free(*config); 418262566Sdes *config = NULL; 419262566Sdes continue; 420262566Sdes } 421262566Sdes 422262566Sdes break; 423262566Sdes } 424262566Sdes 425262566Sdes kmem_free(label, size); 426262566Sdes return (*config == NULL ? ENOENT : 0); 427262566Sdes} 428262566Sdes 429262566Sdesstatic void 430262566Sdesresize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id) 431262566Sdes{ 432262566Sdes nvlist_t **new_configs; 433262566Sdes uint64_t i; 434262566Sdes 435262566Sdes if (id < *count) 436262566Sdes return; 437295367Sdes new_configs = kmem_zalloc((id + 1) * sizeof(nvlist_t *), 438262566Sdes KM_SLEEP); 439262566Sdes for (i = 0; i < *count; i++) 440262566Sdes new_configs[i] = (*configs)[i]; 441262566Sdes if (*configs != NULL) 442295367Sdes kmem_free(*configs, *count * sizeof(void *)); 443262566Sdes *configs = new_configs; 444262566Sdes *count = id + 1; 445262566Sdes} 446262566Sdes 447262566Sdesstatic void 448262566Sdesprocess_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg, 449262566Sdes const char *name, uint64_t* known_pool_guid) 450295367Sdes{ 451262566Sdes nvlist_t *vdev_tree; 452295367Sdes uint64_t pool_guid; 453204861Sdes uint64_t vdev_guid, known_guid; 454262566Sdes uint64_t id, txg, known_txg; 455262566Sdes char *pname; 456204861Sdes int i; 457262566Sdes 458204861Sdes if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 || 459204861Sdes strcmp(pname, name) != 0) 460204861Sdes goto ignore; 461204861Sdes 462204861Sdes if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0) 463204861Sdes goto ignore; 464204861Sdes 465204861Sdes if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0) 466204861Sdes goto ignore; 467262566Sdes 468204861Sdes if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) 469204861Sdes goto ignore; 470204861Sdes 471204861Sdes if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0) 472204861Sdes goto ignore; 473204861Sdes 474204861Sdes VERIFY(nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); 475204861Sdes 476204861Sdes if (*known_pool_guid != 0) { 477204861Sdes if (pool_guid != *known_pool_guid) 478204861Sdes goto ignore; 479204861Sdes } else 480204861Sdes *known_pool_guid = pool_guid; 481204861Sdes 482204861Sdes resize_configs(configs, count, id); 483204861Sdes 484204861Sdes if ((*configs)[id] != NULL) { 485204861Sdes VERIFY(nvlist_lookup_uint64((*configs)[id], 486295367Sdes ZPOOL_CONFIG_POOL_TXG, &known_txg) == 0); 487295367Sdes if (txg <= known_txg) 488295367Sdes goto ignore; 489295367Sdes nvlist_free((*configs)[id]); 490295367Sdes } 491295367Sdes 492215116Sdes (*configs)[id] = cfg; 493215116Sdes return; 494215116Sdes 495215116Sdesignore: 496295367Sdes nvlist_free(cfg); 497295367Sdes} 498295367Sdes 499295367Sdesint 500295367Sdesvdev_geom_read_pool_label(const char *name, 501295367Sdes nvlist_t ***configs, uint64_t *count) 502295367Sdes{ 503262566Sdes struct g_class *mp; 504262566Sdes struct g_geom *gp; 505262566Sdes struct g_provider *pp; 506262566Sdes struct g_consumer *zcp; 507262566Sdes nvlist_t *vdev_cfg; 508204861Sdes uint64_t pool_guid; 509204861Sdes int error; 510204861Sdes 511262566Sdes DROP_GIANT(); 512262566Sdes g_topology_lock(); 513262566Sdes 514262566Sdes *configs = NULL; 515262566Sdes *count = 0; 516262566Sdes pool_guid = 0; 517262566Sdes LIST_FOREACH(mp, &g_classes, class) { 518262566Sdes if (mp == &zfs_vdev_class) 519262566Sdes continue; 520204861Sdes LIST_FOREACH(gp, &mp->geom, geom) { 521262566Sdes if (gp->flags & G_GEOM_WITHER) 522262566Sdes continue; 523262566Sdes LIST_FOREACH(pp, &gp->provider, provider) { 524262566Sdes if (pp->flags & G_PF_WITHER) 525262566Sdes continue; 526262566Sdes zcp = vdev_geom_attach(pp, NULL); 527262566Sdes if (zcp == NULL) 528262566Sdes continue; 529262566Sdes g_topology_unlock(); 530262566Sdes error = vdev_geom_read_config(zcp, &vdev_cfg); 531262566Sdes g_topology_lock(); 532262566Sdes vdev_geom_detach(zcp, B_TRUE); 533262566Sdes if (error) 534262566Sdes continue; 535262566Sdes ZFS_LOG(1, "successfully read vdev config"); 536262566Sdes 537262566Sdes process_vdev_config(configs, count, 538262566Sdes vdev_cfg, name, &pool_guid); 539262566Sdes } 540295367Sdes } 541262566Sdes } 542262566Sdes g_topology_unlock(); 543295367Sdes PICKUP_GIANT(); 544262566Sdes 545295367Sdes return (*count > 0 ? 0 : ENOENT); 546262566Sdes} 547204861Sdes 548295367Sdesstatic void 549295367Sdesvdev_geom_read_guids(struct g_consumer *cp, uint64_t *pguid, uint64_t *vguid) 550204861Sdes{ 551204861Sdes nvlist_t *config; 552204861Sdes 553204861Sdes g_topology_assert_not(); 554262566Sdes 555262566Sdes *pguid = 0; 556204861Sdes *vguid = 0; 557204861Sdes if (vdev_geom_read_config(cp, &config) == 0) { 558255767Sdes nvlist_get_guids(config, pguid, vguid); 559204861Sdes nvlist_free(config); 560204861Sdes } 561204861Sdes} 562204861Sdes 563204861Sdesstatic boolean_t 564204861Sdesvdev_attach_ok(vdev_t *vd, struct g_provider *pp) 565204861Sdes{ 566204861Sdes uint64_t pool_guid; 567295367Sdes uint64_t vdev_guid; 568204861Sdes struct g_consumer *zcp; 569204861Sdes boolean_t pool_ok; 570204861Sdes boolean_t vdev_ok; 571204861Sdes 572204861Sdes zcp = vdev_geom_attach(pp, NULL); 573204861Sdes if (zcp == NULL) { 574204861Sdes ZFS_LOG(1, "Unable to attach tasting instance to %s.", 575204861Sdes pp->name); 576204861Sdes return (B_FALSE); 577204861Sdes } 578204861Sdes g_topology_unlock(); 579204861Sdes vdev_geom_read_guids(zcp, &pool_guid, &vdev_guid); 580204861Sdes g_topology_lock(); 581204861Sdes vdev_geom_detach(zcp, B_TRUE); 582204861Sdes 583204861Sdes /* 584204861Sdes * Check that the label's vdev guid matches the desired guid. If the 585204861Sdes * label has a pool guid, check that it matches too. (Inactive spares 586204861Sdes * and L2ARCs do not have any pool guid in the label.) 587204861Sdes */ 588204861Sdes if ((pool_guid == 0 || pool_guid == spa_guid(vd->vdev_spa)) && 589204861Sdes vdev_guid == vd->vdev_guid) { 590204861Sdes ZFS_LOG(1, "guids match for provider %s.", vd->vdev_path); 591204861Sdes return (B_TRUE); 592204861Sdes } else { 593204861Sdes ZFS_LOG(1, "guid mismatch for provider %s: " 594204861Sdes "%ju:%ju != %ju:%ju.", vd->vdev_path, 595204861Sdes (uintmax_t)spa_guid(vd->vdev_spa), 596204861Sdes (uintmax_t)vd->vdev_guid, 597204861Sdes (uintmax_t)pool_guid, (uintmax_t)vdev_guid); 598204861Sdes return (B_FALSE); 599204861Sdes } 600204861Sdes} 601204861Sdes 602204861Sdesstatic struct g_consumer * 603204861Sdesvdev_geom_attach_by_guids(vdev_t *vd) 604204861Sdes{ 605204861Sdes struct g_class *mp; 606204861Sdes struct g_geom *gp; 607204861Sdes struct g_provider *pp; 608204861Sdes struct g_consumer *cp; 609204861Sdes 610204861Sdes g_topology_assert(); 611204861Sdes 612204861Sdes cp = NULL; 613204861Sdes LIST_FOREACH(mp, &g_classes, class) { 614204861Sdes if (mp == &zfs_vdev_class) 615204861Sdes continue; 616204861Sdes LIST_FOREACH(gp, &mp->geom, geom) { 617204861Sdes if (gp->flags & G_GEOM_WITHER) 618204861Sdes continue; 619204861Sdes LIST_FOREACH(pp, &gp->provider, provider) { 620204861Sdes if (!vdev_attach_ok(vd, pp)) 621204861Sdes continue; 622204861Sdes cp = vdev_geom_attach(pp, vd); 623204861Sdes if (cp == NULL) { 624204861Sdes printf("ZFS WARNING: Unable to " 625204861Sdes "attach to %s.\n", pp->name); 626204861Sdes continue; 627204861Sdes } 628204861Sdes break; 629204861Sdes } 630204861Sdes if (cp != NULL) 631204861Sdes break; 632204861Sdes } 633204861Sdes if (cp != NULL) 634204861Sdes break; 635204861Sdes } 636204861Sdesend: 637204861Sdes return (cp); 638204861Sdes} 639204861Sdes 640204861Sdesstatic struct g_consumer * 641204861Sdesvdev_geom_open_by_guids(vdev_t *vd) 642204861Sdes{ 643204861Sdes struct g_consumer *cp; 644295367Sdes char *buf; 645295367Sdes size_t len; 646295367Sdes 647295367Sdes g_topology_assert(); 648295367Sdes 649204861Sdes ZFS_LOG(1, "Searching by guids [%ju:%ju].", 650204861Sdes (uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)vd->vdev_guid); 651204861Sdes cp = vdev_geom_attach_by_guids(vd); 652204861Sdes if (cp != NULL) { 653204861Sdes len = strlen(cp->provider->name) + strlen("/dev/") + 1; 654204861Sdes buf = kmem_alloc(len, KM_SLEEP); 655204861Sdes 656204861Sdes snprintf(buf, len, "/dev/%s", cp->provider->name); 657204861Sdes spa_strfree(vd->vdev_path); 658204861Sdes vd->vdev_path = buf; 659204861Sdes 660204861Sdes ZFS_LOG(1, "Attach by guid [%ju:%ju] succeeded, provider %s.", 661204861Sdes (uintmax_t)spa_guid(vd->vdev_spa), 662204861Sdes (uintmax_t)vd->vdev_guid, vd->vdev_path); 663204861Sdes } else { 664204861Sdes ZFS_LOG(1, "Search by guid [%ju:%ju] failed.", 665204861Sdes (uintmax_t)spa_guid(vd->vdev_spa), 666204861Sdes (uintmax_t)vd->vdev_guid); 667204861Sdes } 668204861Sdes 669204861Sdes return (cp); 670204861Sdes} 671204861Sdes 672255767Sdesstatic struct g_consumer * 673255767Sdesvdev_geom_open_by_path(vdev_t *vd, int check_guid) 674255767Sdes{ 675204861Sdes struct g_provider *pp; 676204861Sdes struct g_consumer *cp; 677204861Sdes 678204861Sdes g_topology_assert(); 679204861Sdes 680204861Sdes cp = NULL; 681226046Sdes pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1); 682226046Sdes if (pp != NULL) { 683226046Sdes ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path); 684226046Sdes if (!check_guid || vdev_attach_ok(vd, pp)) 685226046Sdes cp = vdev_geom_attach(pp, vd); 686226046Sdes } 687226046Sdes 688226046Sdes return (cp); 689226046Sdes} 690226046Sdes 691226046Sdesstatic int 692226046Sdesvdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, 693226046Sdes uint64_t *logical_ashift, uint64_t *physical_ashift) 694226046Sdes{ 695204861Sdes struct g_provider *pp; 696 struct g_consumer *cp; 697 size_t bufsize; 698 int error; 699 700 /* Set the TLS to indicate downstack that we should not access zvols*/ 701 VERIFY(tsd_set(zfs_geom_probe_vdev_key, vd) == 0); 702 703 /* 704 * We must have a pathname, and it must be absolute. 705 */ 706 if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { 707 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; 708 return (EINVAL); 709 } 710 711 vd->vdev_tsd = NULL; 712 713 DROP_GIANT(); 714 g_topology_lock(); 715 error = 0; 716 717 if (vd->vdev_spa->spa_splitting_newspa || 718 (vd->vdev_prevstate == VDEV_STATE_UNKNOWN && 719 vd->vdev_spa->spa_load_state == SPA_LOAD_NONE || 720 vd->vdev_spa->spa_load_state == SPA_LOAD_CREATE)) { 721 /* 722 * We are dealing with a vdev that hasn't been previously 723 * opened (since boot), and we are not loading an 724 * existing pool configuration. This looks like a 725 * vdev add operation to a new or existing pool. 726 * Assume the user knows what he/she is doing and find 727 * GEOM provider by its name, ignoring GUID mismatches. 728 * 729 * XXPOLICY: It would be safer to only allow a device 730 * that is unlabeled or labeled but missing 731 * GUID information to be opened in this fashion, 732 * unless we are doing a split, in which case we 733 * should allow any guid. 734 */ 735 cp = vdev_geom_open_by_path(vd, 0); 736 } else { 737 /* 738 * Try using the recorded path for this device, but only 739 * accept it if its label data contains the expected GUIDs. 740 */ 741 cp = vdev_geom_open_by_path(vd, 1); 742 if (cp == NULL) { 743 /* 744 * The device at vd->vdev_path doesn't have the 745 * expected GUIDs. The disks might have merely 746 * moved around so try all other GEOM providers 747 * to find one with the right GUIDs. 748 */ 749 cp = vdev_geom_open_by_guids(vd); 750 } 751 } 752 753 /* Clear the TLS now that tasting is done */ 754 VERIFY(tsd_set(zfs_geom_probe_vdev_key, NULL) == 0); 755 756 if (cp == NULL) { 757 ZFS_LOG(1, "Provider %s not found.", vd->vdev_path); 758 error = ENOENT; 759 } else if (cp->provider->sectorsize > VDEV_PAD_SIZE || 760 !ISP2(cp->provider->sectorsize)) { 761 ZFS_LOG(1, "Provider %s has unsupported sectorsize.", 762 vd->vdev_path); 763 764 vdev_geom_close_locked(vd); 765 error = EINVAL; 766 cp = NULL; 767 } else if (cp->acw == 0 && (spa_mode(vd->vdev_spa) & FWRITE) != 0) { 768 int i; 769 770 for (i = 0; i < 5; i++) { 771 error = g_access(cp, 0, 1, 0); 772 if (error == 0) 773 break; 774 g_topology_unlock(); 775 tsleep(vd, 0, "vdev", hz / 2); 776 g_topology_lock(); 777 } 778 if (error != 0) { 779 printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n", 780 vd->vdev_path, error); 781 vdev_geom_close_locked(vd); 782 cp = NULL; 783 } 784 } 785 786 /* Fetch initial physical path information for this device. */ 787 if (cp != NULL) 788 vdev_geom_attrchanged(cp, "GEOM::physpath"); 789 790 g_topology_unlock(); 791 PICKUP_GIANT(); 792 if (cp == NULL) { 793 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 794 return (error); 795 } 796 pp = cp->provider; 797 798 /* 799 * Determine the actual size of the device. 800 */ 801 *max_psize = *psize = pp->mediasize; 802 803 /* 804 * Determine the device's minimum transfer size and preferred 805 * transfer size. 806 */ 807 *logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1; 808 *physical_ashift = 0; 809 if (pp->stripesize > (1 << *logical_ashift) && ISP2(pp->stripesize) && 810 pp->stripesize <= (1 << SPA_MAXASHIFT) && pp->stripeoffset == 0) 811 *physical_ashift = highbit(pp->stripesize) - 1; 812 813 /* 814 * Clear the nowritecache settings, so that on a vdev_reopen() 815 * we will try again. 816 */ 817 vd->vdev_nowritecache = B_FALSE; 818 819 /* 820 * Determine the device's rotation rate. 821 */ 822 vdev_geom_set_rotation_rate(vd, cp); 823 824 return (0); 825} 826 827static void 828vdev_geom_close(vdev_t *vd) 829{ 830 831 DROP_GIANT(); 832 g_topology_lock(); 833 vdev_geom_close_locked(vd); 834 g_topology_unlock(); 835 PICKUP_GIANT(); 836} 837 838static void 839vdev_geom_io_intr(struct bio *bp) 840{ 841 vdev_t *vd; 842 zio_t *zio; 843 844 zio = bp->bio_caller1; 845 vd = zio->io_vd; 846 zio->io_error = bp->bio_error; 847 if (zio->io_error == 0 && bp->bio_resid != 0) 848 zio->io_error = SET_ERROR(EIO); 849 850 switch(zio->io_error) { 851 case ENOTSUP: 852 /* 853 * If we get ENOTSUP for BIO_FLUSH or BIO_DELETE we know 854 * that future attempts will never succeed. In this case 855 * we set a persistent flag so that we don't bother with 856 * requests in the future. 857 */ 858 switch(bp->bio_cmd) { 859 case BIO_FLUSH: 860 vd->vdev_nowritecache = B_TRUE; 861 break; 862 case BIO_DELETE: 863 vd->vdev_notrim = B_TRUE; 864 break; 865 } 866 break; 867 case ENXIO: 868 if (!vd->vdev_remove_wanted) { 869 /* 870 * If provider's error is set we assume it is being 871 * removed. 872 */ 873 if (bp->bio_to->error != 0) { 874 vd->vdev_remove_wanted = B_TRUE; 875 spa_async_request(zio->io_spa, 876 SPA_ASYNC_REMOVE); 877 } else if (!vd->vdev_delayed_close) { 878 vd->vdev_delayed_close = B_TRUE; 879 } 880 } 881 break; 882 } 883 g_destroy_bio(bp); 884 zio_delay_interrupt(zio); 885} 886 887static void 888vdev_geom_io_start(zio_t *zio) 889{ 890 vdev_t *vd; 891 struct g_consumer *cp; 892 struct bio *bp; 893 int error; 894 895 vd = zio->io_vd; 896 897 switch (zio->io_type) { 898 case ZIO_TYPE_IOCTL: 899 /* XXPOLICY */ 900 if (!vdev_readable(vd)) { 901 zio->io_error = SET_ERROR(ENXIO); 902 zio_interrupt(zio); 903 return; 904 } else { 905 switch (zio->io_cmd) { 906 case DKIOCFLUSHWRITECACHE: 907 if (zfs_nocacheflush || vdev_geom_bio_flush_disable) 908 break; 909 if (vd->vdev_nowritecache) { 910 zio->io_error = SET_ERROR(ENOTSUP); 911 break; 912 } 913 goto sendreq; 914 default: 915 zio->io_error = SET_ERROR(ENOTSUP); 916 } 917 } 918 919 zio_execute(zio); 920 return; 921 case ZIO_TYPE_FREE: 922 if (vd->vdev_notrim) { 923 zio->io_error = SET_ERROR(ENOTSUP); 924 } else if (!vdev_geom_bio_delete_disable) { 925 goto sendreq; 926 } 927 zio_execute(zio); 928 return; 929 } 930sendreq: 931 ASSERT(zio->io_type == ZIO_TYPE_READ || 932 zio->io_type == ZIO_TYPE_WRITE || 933 zio->io_type == ZIO_TYPE_FREE || 934 zio->io_type == ZIO_TYPE_IOCTL); 935 936 cp = vd->vdev_tsd; 937 if (cp == NULL) { 938 zio->io_error = SET_ERROR(ENXIO); 939 zio_interrupt(zio); 940 return; 941 } 942 bp = g_alloc_bio(); 943 bp->bio_caller1 = zio; 944 switch (zio->io_type) { 945 case ZIO_TYPE_READ: 946 case ZIO_TYPE_WRITE: 947 zio->io_target_timestamp = zio_handle_io_delay(zio); 948 bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE; 949 bp->bio_data = zio->io_data; 950 bp->bio_offset = zio->io_offset; 951 bp->bio_length = zio->io_size; 952 break; 953 case ZIO_TYPE_FREE: 954 bp->bio_cmd = BIO_DELETE; 955 bp->bio_data = NULL; 956 bp->bio_offset = zio->io_offset; 957 bp->bio_length = zio->io_size; 958 break; 959 case ZIO_TYPE_IOCTL: 960 bp->bio_cmd = BIO_FLUSH; 961 bp->bio_flags |= BIO_ORDERED; 962 bp->bio_data = NULL; 963 bp->bio_offset = cp->provider->mediasize; 964 bp->bio_length = 0; 965 break; 966 } 967 bp->bio_done = vdev_geom_io_intr; 968 969 g_io_request(bp, cp); 970} 971 972static void 973vdev_geom_io_done(zio_t *zio) 974{ 975} 976 977static void 978vdev_geom_hold(vdev_t *vd) 979{ 980} 981 982static void 983vdev_geom_rele(vdev_t *vd) 984{ 985} 986 987vdev_ops_t vdev_geom_ops = { 988 vdev_geom_open, 989 vdev_geom_close, 990 vdev_default_asize, 991 vdev_geom_io_start, 992 vdev_geom_io_done, 993 NULL, 994 vdev_geom_hold, 995 vdev_geom_rele, 996 VDEV_TYPE_DISK, /* name of this vdev type */ 997 B_TRUE /* leaf vdev */ 998}; 999