1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2013 by Delphix. All rights reserved. 24 * Copyright 2013 Nexenta Systems, Inc. All rights reserved. 25 * Copyright 2013 Joyent, Inc. All rights reserved. 26 */ 27 28#include <sys/zfs_context.h> 29#include <sys/spa_impl.h> 30#include <sys/refcount.h> 31#include <sys/vdev_disk.h> 32#include <sys/vdev_impl.h> 33#include <sys/fs/zfs.h> 34#include <sys/zio.h> 35#include <sys/sunldi.h> 36#include <sys/efi_partition.h> 37#include <sys/fm/fs/zfs.h> 38 39/* 40 * Virtual device vector for disks. 41 */ 42 43extern ldi_ident_t zfs_li; 44 45typedef struct vdev_disk_buf { 46 buf_t vdb_buf; 47 zio_t *vdb_io; 48} vdev_disk_buf_t; 49 50static void 51vdev_disk_hold(vdev_t *vd) 52{ 53 ddi_devid_t devid; 54 char *minor; 55 56 ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_WRITER)); 57 58 /* 59 * We must have a pathname, and it must be absolute. 60 */ 61 if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') 62 return; 63 64 /* 65 * Only prefetch path and devid info if the device has 66 * never been opened. 67 */ 68 if (vd->vdev_tsd != NULL) 69 return; 70 71 if (vd->vdev_wholedisk == -1ULL) { 72 size_t len = strlen(vd->vdev_path) + 3; 73 char *buf = kmem_alloc(len, KM_SLEEP); 74 75 (void) snprintf(buf, len, "%ss0", vd->vdev_path); 76 77 (void) ldi_vp_from_name(buf, &vd->vdev_name_vp); 78 kmem_free(buf, len); 79 } 80 81 if (vd->vdev_name_vp == NULL) 82 (void) ldi_vp_from_name(vd->vdev_path, &vd->vdev_name_vp); 83 84 if (vd->vdev_devid != NULL && 85 ddi_devid_str_decode(vd->vdev_devid, &devid, &minor) == 0) { 86 (void) ldi_vp_from_devid(devid, minor, &vd->vdev_devid_vp); 87 ddi_devid_str_free(minor); 88 ddi_devid_free(devid); 89 } 90} 91 92static void 93vdev_disk_rele(vdev_t *vd) 94{ 95 ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_WRITER)); 96 97 if (vd->vdev_name_vp) { 98 VN_RELE_ASYNC(vd->vdev_name_vp, 99 dsl_pool_vnrele_taskq(vd->vdev_spa->spa_dsl_pool)); 100 vd->vdev_name_vp = NULL; 101 } 102 if (vd->vdev_devid_vp) { 103 VN_RELE_ASYNC(vd->vdev_devid_vp, 104 dsl_pool_vnrele_taskq(vd->vdev_spa->spa_dsl_pool)); 105 vd->vdev_devid_vp = NULL; 106 } 107} 108 109static uint64_t 110vdev_disk_get_space(vdev_t *vd, uint64_t capacity, uint_t blksz) 111{ 112 ASSERT(vd->vdev_wholedisk); 113 114 vdev_disk_t *dvd = vd->vdev_tsd; 115 dk_efi_t dk_ioc; 116 efi_gpt_t *efi; 117 uint64_t avail_space = 0; 118 int efisize = EFI_LABEL_SIZE * 2; 119 120 dk_ioc.dki_data = kmem_alloc(efisize, KM_SLEEP); 121 dk_ioc.dki_lba = 1; 122 dk_ioc.dki_length = efisize; 123 dk_ioc.dki_data_64 = (uint64_t)(uintptr_t)dk_ioc.dki_data; 124 efi = dk_ioc.dki_data; 125 126 if (ldi_ioctl(dvd->vd_lh, DKIOCGETEFI, (intptr_t)&dk_ioc, 127 FKIOCTL, kcred, NULL) == 0) { 128 uint64_t efi_altern_lba = LE_64(efi->efi_gpt_AlternateLBA); 129 130 zfs_dbgmsg("vdev %s, capacity %llu, altern lba %llu", 131 vd->vdev_path, capacity, efi_altern_lba); 132 if (capacity > efi_altern_lba) 133 avail_space = (capacity - efi_altern_lba) * blksz; 134 } 135 kmem_free(dk_ioc.dki_data, efisize); 136 return (avail_space); 137} 138 139/* 140 * We want to be loud in DEBUG kernels when DKIOCGMEDIAINFOEXT fails, or when 141 * even a fallback to DKIOCGMEDIAINFO fails. 142 */ 143#ifdef DEBUG 144#define VDEV_DEBUG(...) cmn_err(CE_NOTE, __VA_ARGS__) 145#else 146#define VDEV_DEBUG(...) /* Nothing... */ 147#endif 148 149static int 150vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, 151 uint64_t *ashift) 152{ 153 spa_t *spa = vd->vdev_spa; 154 vdev_disk_t *dvd; 155 union { 156 struct dk_minfo_ext ude; 157 struct dk_minfo ud; 158 } dks; 159 struct dk_minfo_ext *dkmext = &dks.ude; 160 struct dk_minfo *dkm = &dks.ud; 161 int error; 162 dev_t dev; 163 int otyp; 164 boolean_t validate_devid = B_FALSE; 165 ddi_devid_t devid; 166 uint64_t capacity = 0, blksz = 0, pbsize; 167 168 /* 169 * We must have a pathname, and it must be absolute. 170 */ 171 if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { 172 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; 173 return (SET_ERROR(EINVAL)); 174 } 175 176 /* 177 * Reopen the device if it's not currently open. Otherwise, 178 * just update the physical size of the device. 179 */ 180 if (vd->vdev_tsd != NULL) { 181 ASSERT(vd->vdev_reopening); 182 dvd = vd->vdev_tsd; 183 goto skip_open; 184 } 185 186 dvd = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP); 187 188 /* 189 * When opening a disk device, we want to preserve the user's original 190 * intent. We always want to open the device by the path the user gave 191 * us, even if it is one of multiple paths to the save device. But we 192 * also want to be able to survive disks being removed/recabled. 193 * Therefore the sequence of opening devices is: 194 * 195 * 1. Try opening the device by path. For legacy pools without the 196 * 'whole_disk' property, attempt to fix the path by appending 's0'. 197 * 198 * 2. If the devid of the device matches the stored value, return 199 * success. 200 * 201 * 3. Otherwise, the device may have moved. Try opening the device 202 * by the devid instead. 203 */ 204 if (vd->vdev_devid != NULL) { 205 if (ddi_devid_str_decode(vd->vdev_devid, &dvd->vd_devid, 206 &dvd->vd_minor) != 0) { 207 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; 208 return (SET_ERROR(EINVAL)); 209 } 210 } 211 212 error = EINVAL; /* presume failure */ 213 214 if (vd->vdev_path != NULL) { 215 216 if (vd->vdev_wholedisk == -1ULL) { 217 size_t len = strlen(vd->vdev_path) + 3; 218 char *buf = kmem_alloc(len, KM_SLEEP); 219 ldi_handle_t lh; 220 221 (void) snprintf(buf, len, "%ss0", vd->vdev_path); 222 223 if (ldi_open_by_name(buf, spa_mode(spa), kcred, 224 &lh, zfs_li) == 0) { 225 spa_strfree(vd->vdev_path); 226 vd->vdev_path = buf; 227 vd->vdev_wholedisk = 1ULL; 228 (void) ldi_close(lh, spa_mode(spa), kcred); 229 } else { 230 kmem_free(buf, len); 231 } 232 } 233 234 error = ldi_open_by_name(vd->vdev_path, spa_mode(spa), kcred, 235 &dvd->vd_lh, zfs_li); 236 237 /* 238 * Compare the devid to the stored value. 239 */ 240 if (error == 0 && vd->vdev_devid != NULL && 241 ldi_get_devid(dvd->vd_lh, &devid) == 0) { 242 if (ddi_devid_compare(devid, dvd->vd_devid) != 0) { 243 error = SET_ERROR(EINVAL); 244 (void) ldi_close(dvd->vd_lh, spa_mode(spa), 245 kcred); 246 dvd->vd_lh = NULL; 247 } 248 ddi_devid_free(devid); 249 } 250 251 /* 252 * If we succeeded in opening the device, but 'vdev_wholedisk' 253 * is not yet set, then this must be a slice. 254 */ 255 if (error == 0 && vd->vdev_wholedisk == -1ULL) 256 vd->vdev_wholedisk = 0; 257 } 258 259 /* 260 * If we were unable to open by path, or the devid check fails, open by 261 * devid instead. 262 */ 263 if (error != 0 && vd->vdev_devid != NULL) { 264 error = ldi_open_by_devid(dvd->vd_devid, dvd->vd_minor, 265 spa_mode(spa), kcred, &dvd->vd_lh, zfs_li); 266 } 267 268 /* 269 * If all else fails, then try opening by physical path (if available) 270 * or the logical path (if we failed due to the devid check). While not 271 * as reliable as the devid, this will give us something, and the higher 272 * level vdev validation will prevent us from opening the wrong device. 273 */ 274 if (error) { 275 if (vd->vdev_devid != NULL) 276 validate_devid = B_TRUE; 277 278 if (vd->vdev_physpath != NULL && 279 (dev = ddi_pathname_to_dev_t(vd->vdev_physpath)) != NODEV) 280 error = ldi_open_by_dev(&dev, OTYP_BLK, spa_mode(spa), 281 kcred, &dvd->vd_lh, zfs_li); 282 283 /* 284 * Note that we don't support the legacy auto-wholedisk support 285 * as above. This hasn't been used in a very long time and we 286 * don't need to propagate its oddities to this edge condition. 287 */ 288 if (error && vd->vdev_path != NULL) 289 error = ldi_open_by_name(vd->vdev_path, spa_mode(spa), 290 kcred, &dvd->vd_lh, zfs_li); 291 } 292 293 if (error) { 294 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 295 return (error); 296 } 297 298 /* 299 * Now that the device has been successfully opened, update the devid 300 * if necessary. 301 */ 302 if (validate_devid && spa_writeable(spa) && 303 ldi_get_devid(dvd->vd_lh, &devid) == 0) { 304 if (ddi_devid_compare(devid, dvd->vd_devid) != 0) { 305 char *vd_devid; 306 307 vd_devid = ddi_devid_str_encode(devid, dvd->vd_minor); 308 zfs_dbgmsg("vdev %s: update devid from %s, " 309 "to %s", vd->vdev_path, vd->vdev_devid, vd_devid); 310 spa_strfree(vd->vdev_devid); 311 vd->vdev_devid = spa_strdup(vd_devid); 312 ddi_devid_str_free(vd_devid); 313 } 314 ddi_devid_free(devid); 315 } 316 317 /* 318 * Once a device is opened, verify that the physical device path (if 319 * available) is up to date. 320 */ 321 if (ldi_get_dev(dvd->vd_lh, &dev) == 0 && 322 ldi_get_otyp(dvd->vd_lh, &otyp) == 0) { 323 char *physpath, *minorname; 324 325 physpath = kmem_alloc(MAXPATHLEN, KM_SLEEP); 326 minorname = NULL; 327 if (ddi_dev_pathname(dev, otyp, physpath) == 0 && 328 ldi_get_minor_name(dvd->vd_lh, &minorname) == 0 && 329 (vd->vdev_physpath == NULL || 330 strcmp(vd->vdev_physpath, physpath) != 0)) { 331 if (vd->vdev_physpath) 332 spa_strfree(vd->vdev_physpath); 333 (void) strlcat(physpath, ":", MAXPATHLEN); 334 (void) strlcat(physpath, minorname, MAXPATHLEN); 335 vd->vdev_physpath = spa_strdup(physpath); 336 } 337 if (minorname) 338 kmem_free(minorname, strlen(minorname) + 1); 339 kmem_free(physpath, MAXPATHLEN); 340 } 341 342skip_open: 343 /* 344 * Determine the actual size of the device. 345 */ 346 if (ldi_get_size(dvd->vd_lh, psize) != 0) { 347 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 348 return (SET_ERROR(EINVAL)); 349 } 350 351 *max_psize = *psize; 352 353 /* 354 * Determine the device's minimum transfer size. 355 * If the ioctl isn't supported, assume DEV_BSIZE. 356 */ 357 if ((error = ldi_ioctl(dvd->vd_lh, DKIOCGMEDIAINFOEXT, 358 (intptr_t)dkmext, FKIOCTL, kcred, NULL)) == 0) { 359 capacity = dkmext->dki_capacity - 1; 360 blksz = dkmext->dki_lbsize; 361 pbsize = dkmext->dki_pbsize; 362 } else if ((error = ldi_ioctl(dvd->vd_lh, DKIOCGMEDIAINFO, 363 (intptr_t)dkm, FKIOCTL, kcred, NULL)) == 0) { 364 VDEV_DEBUG( 365 "vdev_disk_open(\"%s\"): fallback to DKIOCGMEDIAINFO\n", 366 vd->vdev_path); 367 capacity = dkm->dki_capacity - 1; 368 blksz = dkm->dki_lbsize; 369 pbsize = blksz; 370 } else { 371 VDEV_DEBUG("vdev_disk_open(\"%s\"): " 372 "both DKIOCGMEDIAINFO{,EXT} calls failed, %d\n", 373 vd->vdev_path, error); 374 pbsize = DEV_BSIZE; 375 } 376 377 *ashift = highbit(MAX(pbsize, SPA_MINBLOCKSIZE)) - 1; 378 379 if (vd->vdev_wholedisk == 1) { 380 int wce = 1; 381 382 if (error == 0) { 383 /* 384 * If we have the capability to expand, we'd have 385 * found out via success from DKIOCGMEDIAINFO{,EXT}. 386 * Adjust max_psize upward accordingly since we know 387 * we own the whole disk now. 388 */ 389 *max_psize += vdev_disk_get_space(vd, capacity, blksz); 390 zfs_dbgmsg("capacity change: vdev %s, psize %llu, " 391 "max_psize %llu", vd->vdev_path, *psize, 392 *max_psize); 393 } 394 395 /* 396 * Since we own the whole disk, try to enable disk write 397 * caching. We ignore errors because it's OK if we can't do it. 398 */ 399 (void) ldi_ioctl(dvd->vd_lh, DKIOCSETWCE, (intptr_t)&wce, 400 FKIOCTL, kcred, NULL); 401 } 402 403 /* 404 * Clear the nowritecache bit, so that on a vdev_reopen() we will 405 * try again. 406 */ 407 vd->vdev_nowritecache = B_FALSE; 408 409 return (0); 410} 411 412static void 413vdev_disk_close(vdev_t *vd) 414{ 415 vdev_disk_t *dvd = vd->vdev_tsd; 416 417 if (vd->vdev_reopening || dvd == NULL) 418 return; 419 420 if (dvd->vd_minor != NULL) 421 ddi_devid_str_free(dvd->vd_minor); 422 423 if (dvd->vd_devid != NULL) 424 ddi_devid_free(dvd->vd_devid); 425 426 if (dvd->vd_lh != NULL) 427 (void) ldi_close(dvd->vd_lh, spa_mode(vd->vdev_spa), kcred); 428 429 vd->vdev_delayed_close = B_FALSE; 430 kmem_free(dvd, sizeof (vdev_disk_t)); 431 vd->vdev_tsd = NULL; 432} 433 434int 435vdev_disk_physio(vdev_t *vd, caddr_t data, 436 size_t size, uint64_t offset, int flags, boolean_t isdump) 437{ 438 vdev_disk_t *dvd = vd->vdev_tsd; 439 440 ASSERT(vd->vdev_ops == &vdev_disk_ops); 441 442 /* 443 * If in the context of an active crash dump, use the ldi_dump(9F) 444 * call instead of ldi_strategy(9F) as usual. 445 */ 446 if (isdump) { 447 ASSERT3P(dvd, !=, NULL); 448 return (ldi_dump(dvd->vd_lh, data, lbtodb(offset), 449 lbtodb(size))); 450 } 451 452 return (vdev_disk_ldi_physio(dvd->vd_lh, data, size, offset, flags)); 453} 454 455int 456vdev_disk_ldi_physio(ldi_handle_t vd_lh, caddr_t data, 457 size_t size, uint64_t offset, int flags) 458{ 459 buf_t *bp; 460 int error = 0; 461 462 if (vd_lh == NULL) 463 return (SET_ERROR(EINVAL)); 464 465 ASSERT(flags & B_READ || flags & B_WRITE); 466 467 bp = getrbuf(KM_SLEEP); 468 bp->b_flags = flags | B_BUSY | B_NOCACHE | B_FAILFAST; 469 bp->b_bcount = size; 470 bp->b_un.b_addr = (void *)data; 471 bp->b_lblkno = lbtodb(offset); 472 bp->b_bufsize = size; 473 474 error = ldi_strategy(vd_lh, bp); 475 ASSERT(error == 0); 476 if ((error = biowait(bp)) == 0 && bp->b_resid != 0) 477 error = SET_ERROR(EIO); 478 freerbuf(bp); 479 480 return (error); 481} 482 483static void 484vdev_disk_io_intr(buf_t *bp) 485{ 486 vdev_disk_buf_t *vdb = (vdev_disk_buf_t *)bp; 487 zio_t *zio = vdb->vdb_io; 488 489 /* 490 * The rest of the zio stack only deals with EIO, ECKSUM, and ENXIO. 491 * Rather than teach the rest of the stack about other error 492 * possibilities (EFAULT, etc), we normalize the error value here. 493 */ 494 zio->io_error = (geterror(bp) != 0 ? EIO : 0); 495 496 if (zio->io_error == 0 && bp->b_resid != 0) 497 zio->io_error = SET_ERROR(EIO); 498 499 kmem_free(vdb, sizeof (vdev_disk_buf_t)); 500 501 zio_interrupt(zio); 502} 503 504static void 505vdev_disk_ioctl_free(zio_t *zio) 506{ 507 kmem_free(zio->io_vsd, sizeof (struct dk_callback)); 508} 509 510static const zio_vsd_ops_t vdev_disk_vsd_ops = { 511 vdev_disk_ioctl_free, 512 zio_vsd_default_cksum_report 513}; 514 515static void 516vdev_disk_ioctl_done(void *zio_arg, int error) 517{ 518 zio_t *zio = zio_arg; 519 520 zio->io_error = error; 521 522 zio_interrupt(zio); 523} 524 525static int 526vdev_disk_io_start(zio_t *zio) 527{ 528 vdev_t *vd = zio->io_vd; 529 vdev_disk_t *dvd = vd->vdev_tsd; 530 vdev_disk_buf_t *vdb; 531 struct dk_callback *dkc; 532 buf_t *bp; 533 int error; 534 535 if (zio->io_type == ZIO_TYPE_IOCTL) { 536 /* XXPOLICY */ 537 if (!vdev_readable(vd)) { 538 zio->io_error = SET_ERROR(ENXIO); 539 return (ZIO_PIPELINE_CONTINUE); 540 } 541 542 switch (zio->io_cmd) { 543 544 case DKIOCFLUSHWRITECACHE: 545 546 if (zfs_nocacheflush) 547 break; 548 549 if (vd->vdev_nowritecache) { 550 zio->io_error = SET_ERROR(ENOTSUP); 551 break; 552 } 553 554 zio->io_vsd = dkc = kmem_alloc(sizeof (*dkc), KM_SLEEP); 555 zio->io_vsd_ops = &vdev_disk_vsd_ops; 556 557 dkc->dkc_callback = vdev_disk_ioctl_done; 558 dkc->dkc_flag = FLUSH_VOLATILE; 559 dkc->dkc_cookie = zio; 560 561 error = ldi_ioctl(dvd->vd_lh, zio->io_cmd, 562 (uintptr_t)dkc, FKIOCTL, kcred, NULL); 563 564 if (error == 0) { 565 /* 566 * The ioctl will be done asychronously, 567 * and will call vdev_disk_ioctl_done() 568 * upon completion. 569 */ 570 return (ZIO_PIPELINE_STOP); 571 } 572 573 if (error == ENOTSUP || error == ENOTTY) { 574 /* 575 * If we get ENOTSUP or ENOTTY, we know that 576 * no future attempts will ever succeed. 577 * In this case we set a persistent bit so 578 * that we don't bother with the ioctl in the 579 * future. 580 */ 581 vd->vdev_nowritecache = B_TRUE; 582 } 583 zio->io_error = error; 584 585 break; 586 587 default: 588 zio->io_error = SET_ERROR(ENOTSUP); 589 } 590 591 return (ZIO_PIPELINE_CONTINUE); 592 } 593 594 vdb = kmem_alloc(sizeof (vdev_disk_buf_t), KM_SLEEP); 595 596 vdb->vdb_io = zio; 597 bp = &vdb->vdb_buf; 598 599 bioinit(bp); 600 bp->b_flags = B_BUSY | B_NOCACHE | 601 (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE); 602 if (!(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD))) 603 bp->b_flags |= B_FAILFAST; 604 bp->b_bcount = zio->io_size; 605 bp->b_un.b_addr = zio->io_data; 606 bp->b_lblkno = lbtodb(zio->io_offset); 607 bp->b_bufsize = zio->io_size; 608 bp->b_iodone = (int (*)())vdev_disk_io_intr; 609 610 /* ldi_strategy() will return non-zero only on programming errors */ 611 VERIFY(ldi_strategy(dvd->vd_lh, bp) == 0); 612 613 return (ZIO_PIPELINE_STOP); 614} 615 616static void 617vdev_disk_io_done(zio_t *zio) 618{ 619 vdev_t *vd = zio->io_vd; 620 621 /* 622 * If the device returned EIO, then attempt a DKIOCSTATE ioctl to see if 623 * the device has been removed. If this is the case, then we trigger an 624 * asynchronous removal of the device. Otherwise, probe the device and 625 * make sure it's still accessible. 626 */ 627 if (zio->io_error == EIO && !vd->vdev_remove_wanted) { 628 vdev_disk_t *dvd = vd->vdev_tsd; 629 int state = DKIO_NONE; 630 631 if (ldi_ioctl(dvd->vd_lh, DKIOCSTATE, (intptr_t)&state, 632 FKIOCTL, kcred, NULL) == 0 && state != DKIO_INSERTED) { 633 /* 634 * We post the resource as soon as possible, instead of 635 * when the async removal actually happens, because the 636 * DE is using this information to discard previous I/O 637 * errors. 638 */ 639 zfs_post_remove(zio->io_spa, vd); 640 vd->vdev_remove_wanted = B_TRUE; 641 spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE); 642 } else if (!vd->vdev_delayed_close) { 643 vd->vdev_delayed_close = B_TRUE; 644 } 645 } 646} 647 648vdev_ops_t vdev_disk_ops = { 649 vdev_disk_open, 650 vdev_disk_close, 651 vdev_default_asize, 652 vdev_disk_io_start, 653 vdev_disk_io_done, 654 NULL, 655 vdev_disk_hold, 656 vdev_disk_rele, 657 VDEV_TYPE_DISK, /* name of this vdev type */ 658 B_TRUE /* leaf vdev */ 659}; 660 661/* 662 * Given the root disk device devid or pathname, read the label from 663 * the device, and construct a configuration nvlist. 664 */ 665int 666vdev_disk_read_rootlabel(char *devpath, char *devid, nvlist_t **config) 667{ 668 ldi_handle_t vd_lh; 669 vdev_label_t *label; 670 uint64_t s, size; 671 int l; 672 ddi_devid_t tmpdevid; 673 int error = -1; 674 char *minor_name; 675 676 /* 677 * Read the device label and build the nvlist. 678 */ 679 if (devid != NULL && ddi_devid_str_decode(devid, &tmpdevid, 680 &minor_name) == 0) { 681 error = ldi_open_by_devid(tmpdevid, minor_name, 682 FREAD, kcred, &vd_lh, zfs_li); 683 ddi_devid_free(tmpdevid); 684 ddi_devid_str_free(minor_name); 685 } 686 687 if (error && (error = ldi_open_by_name(devpath, FREAD, kcred, &vd_lh, 688 zfs_li))) 689 return (error); 690 691 if (ldi_get_size(vd_lh, &s)) { 692 (void) ldi_close(vd_lh, FREAD, kcred); 693 return (SET_ERROR(EIO)); 694 } 695 696 size = P2ALIGN_TYPED(s, sizeof (vdev_label_t), uint64_t); 697 label = kmem_alloc(sizeof (vdev_label_t), KM_SLEEP); 698 699 *config = NULL; 700 for (l = 0; l < VDEV_LABELS; l++) { 701 uint64_t offset, state, txg = 0; 702 703 /* read vdev label */ 704 offset = vdev_label_offset(size, l, 0); 705 if (vdev_disk_ldi_physio(vd_lh, (caddr_t)label, 706 VDEV_SKIP_SIZE + VDEV_PHYS_SIZE, offset, B_READ) != 0) 707 continue; 708 709 if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist, 710 sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0) { 711 *config = NULL; 712 continue; 713 } 714 715 if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE, 716 &state) != 0 || state >= POOL_STATE_DESTROYED) { 717 nvlist_free(*config); 718 *config = NULL; 719 continue; 720 } 721 722 if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG, 723 &txg) != 0 || txg == 0) { 724 nvlist_free(*config); 725 *config = NULL; 726 continue; 727 } 728 729 break; 730 } 731 732 kmem_free(label, sizeof (vdev_label_t)); 733 (void) ldi_close(vd_lh, FREAD, kcred); 734 if (*config == NULL) 735 error = SET_ERROR(EIDRM); 736 737 return (error); 738} 739