1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26/* 27 * Pool import support functions. 28 * 29 * To import a pool, we rely on reading the configuration information from the 30 * ZFS label of each device. If we successfully read the label, then we 31 * organize the configuration information in the following hierarchy: 32 * 33 * pool guid -> toplevel vdev guid -> label txg 34 * 35 * Duplicate entries matching this same tuple will be discarded. Once we have 36 * examined every device, we pick the best label txg config for each toplevel 37 * vdev. We then arrange these toplevel vdevs into a complete pool config, and 38 * update any paths that have changed. Finally, we attempt to import the pool 39 * using our derived config, and record the results. 40 */ 41 42#include <devid.h> 43#include <dirent.h> 44#include <errno.h> 45#include <libintl.h> 46#include <stdlib.h> 47#include <string.h> 48#include <sys/stat.h> 49#include <unistd.h> 50#include <fcntl.h> 51 52#include <sys/vdev_impl.h> 53 54#include "libzfs.h" 55#include "libzfs_impl.h" 56 57/* 58 * Intermediate structures used to gather configuration information. 59 */ 60typedef struct config_entry { 61 uint64_t ce_txg; 62 nvlist_t *ce_config; 63 struct config_entry *ce_next; 64} config_entry_t; 65 66typedef struct vdev_entry { 67 uint64_t ve_guid; 68 config_entry_t *ve_configs; 69 struct vdev_entry *ve_next; 70} vdev_entry_t; 71 72typedef struct pool_entry { 73 uint64_t pe_guid; 74 vdev_entry_t *pe_vdevs; 75 struct pool_entry *pe_next; 76} pool_entry_t; 77 78typedef struct name_entry { 79 char *ne_name; 80 uint64_t ne_guid; 81 struct name_entry *ne_next; 82} name_entry_t; 83 84typedef struct pool_list { 85 pool_entry_t *pools; 86 name_entry_t *names; 87} pool_list_t; 88 89static char * 90get_devid(const char *path) 91{ 92 int fd; 93 ddi_devid_t devid; 94 char *minor, *ret; 95 96 if ((fd = open(path, O_RDONLY)) < 0) 97 return (NULL); 98 99 minor = NULL; 100 ret = NULL; 101 if (devid_get(fd, &devid) == 0) { 102 if (devid_get_minor_name(fd, &minor) == 0) 103 ret = devid_str_encode(devid, minor); 104 if (minor != NULL) 105 devid_str_free(minor); 106 devid_free(devid); 107 } 108 (void) close(fd); 109 110 return (ret); 111} 112 113 114/* 115 * Go through and fix up any path and/or devid information for the given vdev 116 * configuration. 117 */ 118static int 119fix_paths(nvlist_t *nv, name_entry_t *names) 120{ 121 nvlist_t **child; 122 uint_t c, children; 123 uint64_t guid; 124 name_entry_t *ne, *best; 125 char *path, *devid; 126 int matched; 127 128 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 129 &child, &children) == 0) { 130 for (c = 0; c < children; c++) 131 if (fix_paths(child[c], names) != 0) 132 return (-1); 133 return (0); 134 } 135 136 /* 137 * This is a leaf (file or disk) vdev. In either case, go through 138 * the name list and see if we find a matching guid. If so, replace 139 * the path and see if we can calculate a new devid. 140 * 141 * There may be multiple names associated with a particular guid, in 142 * which case we have overlapping slices or multiple paths to the same 143 * disk. If this is the case, then we want to pick the path that is 144 * the most similar to the original, where "most similar" is the number 145 * of matching characters starting from the end of the path. This will 146 * preserve slice numbers even if the disks have been reorganized, and 147 * will also catch preferred disk names if multiple paths exist. 148 */ 149 verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0); 150 if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0) 151 path = NULL; 152 153 matched = 0; 154 best = NULL; 155 for (ne = names; ne != NULL; ne = ne->ne_next) { 156 if (ne->ne_guid == guid) { 157 const char *src, *dst; 158 int count; 159 160 if (path == NULL) { 161 best = ne; 162 break; 163 } 164 165 src = ne->ne_name + strlen(ne->ne_name) - 1; 166 dst = path + strlen(path) - 1; 167 for (count = 0; src >= ne->ne_name && dst >= path; 168 src--, dst--, count++) 169 if (*src != *dst) 170 break; 171 172 /* 173 * At this point, 'count' is the number of characters 174 * matched from the end. 175 */ 176 if (count > matched || best == NULL) { 177 best = ne; 178 matched = count; 179 } 180 } 181 } 182 183 if (best == NULL) 184 return (0); 185 186 if (nvlist_add_string(nv, ZPOOL_CONFIG_PATH, best->ne_name) != 0) 187 return (-1); 188 189 if ((devid = get_devid(best->ne_name)) == NULL) { 190 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID); 191 } else { 192 if (nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, devid) != 0) 193 return (-1); 194 devid_str_free(devid); 195 } 196 197 return (0); 198} 199 200/* 201 * Add the given configuration to the list of known devices. 202 */ 203static int 204add_config(libzfs_handle_t *hdl, pool_list_t *pl, const char *path, 205 nvlist_t *config) 206{ 207 uint64_t pool_guid, vdev_guid, top_guid, txg, state; 208 pool_entry_t *pe; 209 vdev_entry_t *ve; 210 config_entry_t *ce; 211 name_entry_t *ne; 212 213 /* 214 * If this is a hot spare not currently in use or level 2 cache 215 * device, add it to the list of names to translate, but don't do 216 * anything else. 217 */ 218 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, 219 &state) == 0 && 220 (state == POOL_STATE_SPARE || state == POOL_STATE_L2CACHE) && 221 nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid) == 0) { 222 if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL) 223 return (-1); 224 225 if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) { 226 free(ne); 227 return (-1); 228 } 229 ne->ne_guid = vdev_guid; 230 ne->ne_next = pl->names; 231 pl->names = ne; 232 return (0); 233 } 234 235 /* 236 * If we have a valid config but cannot read any of these fields, then 237 * it means we have a half-initialized label. In vdev_label_init() 238 * we write a label with txg == 0 so that we can identify the device 239 * in case the user refers to the same disk later on. If we fail to 240 * create the pool, we'll be left with a label in this state 241 * which should not be considered part of a valid pool. 242 */ 243 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, 244 &pool_guid) != 0 || 245 nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, 246 &vdev_guid) != 0 || 247 nvlist_lookup_uint64(config, ZPOOL_CONFIG_TOP_GUID, 248 &top_guid) != 0 || 249 nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, 250 &txg) != 0 || txg == 0) { 251 nvlist_free(config); 252 return (0); 253 } 254 255 /* 256 * First, see if we know about this pool. If not, then add it to the 257 * list of known pools. 258 */ 259 for (pe = pl->pools; pe != NULL; pe = pe->pe_next) { 260 if (pe->pe_guid == pool_guid) 261 break; 262 } 263 264 if (pe == NULL) { 265 if ((pe = zfs_alloc(hdl, sizeof (pool_entry_t))) == NULL) { 266 nvlist_free(config); 267 return (-1); 268 } 269 pe->pe_guid = pool_guid; 270 pe->pe_next = pl->pools; 271 pl->pools = pe; 272 } 273 274 /* 275 * Second, see if we know about this toplevel vdev. Add it if its 276 * missing. 277 */ 278 for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) { 279 if (ve->ve_guid == top_guid) 280 break; 281 } 282 283 if (ve == NULL) { 284 if ((ve = zfs_alloc(hdl, sizeof (vdev_entry_t))) == NULL) { 285 nvlist_free(config); 286 return (-1); 287 } 288 ve->ve_guid = top_guid; 289 ve->ve_next = pe->pe_vdevs; 290 pe->pe_vdevs = ve; 291 } 292 293 /* 294 * Third, see if we have a config with a matching transaction group. If 295 * so, then we do nothing. Otherwise, add it to the list of known 296 * configs. 297 */ 298 for (ce = ve->ve_configs; ce != NULL; ce = ce->ce_next) { 299 if (ce->ce_txg == txg) 300 break; 301 } 302 303 if (ce == NULL) { 304 if ((ce = zfs_alloc(hdl, sizeof (config_entry_t))) == NULL) { 305 nvlist_free(config); 306 return (-1); 307 } 308 ce->ce_txg = txg; 309 ce->ce_config = config; 310 ce->ce_next = ve->ve_configs; 311 ve->ve_configs = ce; 312 } else { 313 nvlist_free(config); 314 } 315 316 /* 317 * At this point we've successfully added our config to the list of 318 * known configs. The last thing to do is add the vdev guid -> path 319 * mappings so that we can fix up the configuration as necessary before 320 * doing the import. 321 */ 322 if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL) 323 return (-1); 324 325 if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) { 326 free(ne); 327 return (-1); 328 } 329 330 ne->ne_guid = vdev_guid; 331 ne->ne_next = pl->names; 332 pl->names = ne; 333 334 return (0); 335} 336 337/* 338 * Returns true if the named pool matches the given GUID. 339 */ 340static int 341pool_active(libzfs_handle_t *hdl, const char *name, uint64_t guid, 342 boolean_t *isactive) 343{ 344 zpool_handle_t *zhp; 345 uint64_t theguid; 346 347 if (zpool_open_silent(hdl, name, &zhp) != 0) 348 return (-1); 349 350 if (zhp == NULL) { 351 *isactive = B_FALSE; 352 return (0); 353 } 354 355 verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_POOL_GUID, 356 &theguid) == 0); 357 358 zpool_close(zhp); 359 360 *isactive = (theguid == guid); 361 return (0); 362} 363 364static nvlist_t * 365refresh_config(libzfs_handle_t *hdl, nvlist_t *config) 366{ 367 nvlist_t *nvl; 368 zfs_cmd_t zc = { 0 }; 369 int err; 370 371 if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0) 372 return (NULL); 373 374 if (zcmd_alloc_dst_nvlist(hdl, &zc, 375 zc.zc_nvlist_conf_size * 2) != 0) { 376 zcmd_free_nvlists(&zc); 377 return (NULL); 378 } 379 380 while ((err = ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_TRYIMPORT, 381 &zc)) != 0 && errno == ENOMEM) { 382 if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) { 383 zcmd_free_nvlists(&zc); 384 return (NULL); 385 } 386 } 387 388 if (err) { 389 zcmd_free_nvlists(&zc); 390 return (NULL); 391 } 392 393 if (zcmd_read_dst_nvlist(hdl, &zc, &nvl) != 0) { 394 zcmd_free_nvlists(&zc); 395 return (NULL); 396 } 397 398 zcmd_free_nvlists(&zc); 399 return (nvl); 400} 401 402/* 403 * Determine if the vdev id is a hole in the namespace. 404 */ 405boolean_t 406vdev_is_hole(uint64_t *hole_array, uint_t holes, uint_t id) 407{ 408 for (int c = 0; c < holes; c++) { 409 410 /* Top-level is a hole */ 411 if (hole_array[c] == id) 412 return (B_TRUE); 413 } 414 return (B_FALSE); 415} 416 417/* 418 * Convert our list of pools into the definitive set of configurations. We 419 * start by picking the best config for each toplevel vdev. Once that's done, 420 * we assemble the toplevel vdevs into a full config for the pool. We make a 421 * pass to fix up any incorrect paths, and then add it to the main list to 422 * return to the user. 423 */ 424static nvlist_t * 425get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok) 426{ 427 pool_entry_t *pe; 428 vdev_entry_t *ve; 429 config_entry_t *ce; 430 nvlist_t *ret = NULL, *config = NULL, *tmp, *nvtop, *nvroot; 431 nvlist_t **spares, **l2cache; 432 uint_t i, nspares, nl2cache; 433 boolean_t config_seen; 434 uint64_t best_txg; 435 char *name, *hostname; 436 uint64_t version, guid; 437 uint_t children = 0; 438 nvlist_t **child = NULL; 439 uint_t holes; 440 uint64_t *hole_array, max_id; 441 uint_t c; 442 boolean_t isactive; 443 uint64_t hostid; 444 nvlist_t *nvl; 445 boolean_t found_one = B_FALSE; 446 boolean_t valid_top_config = B_FALSE; 447 448 if (nvlist_alloc(&ret, 0, 0) != 0) 449 goto nomem; 450 451 for (pe = pl->pools; pe != NULL; pe = pe->pe_next) { 452 uint64_t id, max_txg = 0; 453 454 if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0) 455 goto nomem; 456 config_seen = B_FALSE; 457 458 /* 459 * Iterate over all toplevel vdevs. Grab the pool configuration 460 * from the first one we find, and then go through the rest and 461 * add them as necessary to the 'vdevs' member of the config. 462 */ 463 for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) { 464 465 /* 466 * Determine the best configuration for this vdev by 467 * selecting the config with the latest transaction 468 * group. 469 */ 470 best_txg = 0; 471 for (ce = ve->ve_configs; ce != NULL; 472 ce = ce->ce_next) { 473 474 if (ce->ce_txg > best_txg) { 475 tmp = ce->ce_config; 476 best_txg = ce->ce_txg; 477 } 478 } 479 480 /* 481 * We rely on the fact that the max txg for the 482 * pool will contain the most up-to-date information 483 * about the valid top-levels in the vdev namespace. 484 */ 485 if (best_txg > max_txg) { 486 (void) nvlist_remove(config, 487 ZPOOL_CONFIG_VDEV_CHILDREN, 488 DATA_TYPE_UINT64); 489 (void) nvlist_remove(config, 490 ZPOOL_CONFIG_HOLE_ARRAY, 491 DATA_TYPE_UINT64_ARRAY); 492 493 max_txg = best_txg; 494 hole_array = NULL; 495 holes = 0; 496 max_id = 0; 497 valid_top_config = B_FALSE; 498 499 if (nvlist_lookup_uint64(tmp, 500 ZPOOL_CONFIG_VDEV_CHILDREN, &max_id) == 0) { 501 verify(nvlist_add_uint64(config, 502 ZPOOL_CONFIG_VDEV_CHILDREN, 503 max_id) == 0); 504 valid_top_config = B_TRUE; 505 } 506 507 if (nvlist_lookup_uint64_array(tmp, 508 ZPOOL_CONFIG_HOLE_ARRAY, &hole_array, 509 &holes) == 0) { 510 verify(nvlist_add_uint64_array(config, 511 ZPOOL_CONFIG_HOLE_ARRAY, 512 hole_array, holes) == 0); 513 } 514 } 515 516 if (!config_seen) { 517 /* 518 * Copy the relevant pieces of data to the pool 519 * configuration: 520 * 521 * version 522 * pool guid 523 * name 524 * pool state 525 * hostid (if available) 526 * hostname (if available) 527 */ 528 uint64_t state; 529 530 verify(nvlist_lookup_uint64(tmp, 531 ZPOOL_CONFIG_VERSION, &version) == 0); 532 if (nvlist_add_uint64(config, 533 ZPOOL_CONFIG_VERSION, version) != 0) 534 goto nomem; 535 verify(nvlist_lookup_uint64(tmp, 536 ZPOOL_CONFIG_POOL_GUID, &guid) == 0); 537 if (nvlist_add_uint64(config, 538 ZPOOL_CONFIG_POOL_GUID, guid) != 0) 539 goto nomem; 540 verify(nvlist_lookup_string(tmp, 541 ZPOOL_CONFIG_POOL_NAME, &name) == 0); 542 if (nvlist_add_string(config, 543 ZPOOL_CONFIG_POOL_NAME, name) != 0) 544 goto nomem; 545 verify(nvlist_lookup_uint64(tmp, 546 ZPOOL_CONFIG_POOL_STATE, &state) == 0); 547 if (nvlist_add_uint64(config, 548 ZPOOL_CONFIG_POOL_STATE, state) != 0) 549 goto nomem; 550 hostid = 0; 551 if (nvlist_lookup_uint64(tmp, 552 ZPOOL_CONFIG_HOSTID, &hostid) == 0) { 553 if (nvlist_add_uint64(config, 554 ZPOOL_CONFIG_HOSTID, hostid) != 0) 555 goto nomem; 556 verify(nvlist_lookup_string(tmp, 557 ZPOOL_CONFIG_HOSTNAME, 558 &hostname) == 0); 559 if (nvlist_add_string(config, 560 ZPOOL_CONFIG_HOSTNAME, 561 hostname) != 0) 562 goto nomem; 563 } 564 565 config_seen = B_TRUE; 566 } 567 568 /* 569 * Add this top-level vdev to the child array. 570 */ 571 verify(nvlist_lookup_nvlist(tmp, 572 ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0); 573 verify(nvlist_lookup_uint64(nvtop, ZPOOL_CONFIG_ID, 574 &id) == 0); 575 576 if (id >= children) { 577 nvlist_t **newchild; 578 579 newchild = zfs_alloc(hdl, (id + 1) * 580 sizeof (nvlist_t *)); 581 if (newchild == NULL) 582 goto nomem; 583 584 for (c = 0; c < children; c++) 585 newchild[c] = child[c]; 586 587 free(child); 588 child = newchild; 589 children = id + 1; 590 } 591 if (nvlist_dup(nvtop, &child[id], 0) != 0) 592 goto nomem; 593 594 } 595 596 /* 597 * If we have information about all the top-levels then 598 * clean up the nvlist which we've constructed. This 599 * means removing any extraneous devices that are 600 * beyond the valid range or adding devices to the end 601 * of our array which appear to be missing. 602 */ 603 if (valid_top_config) { 604 if (max_id < children) { 605 for (c = max_id; c < children; c++) 606 nvlist_free(child[c]); 607 children = max_id; 608 } else if (max_id > children) { 609 nvlist_t **newchild; 610 611 newchild = zfs_alloc(hdl, (max_id) * 612 sizeof (nvlist_t *)); 613 if (newchild == NULL) 614 goto nomem; 615 616 for (c = 0; c < children; c++) 617 newchild[c] = child[c]; 618 619 free(child); 620 child = newchild; 621 children = max_id; 622 } 623 } 624 625 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, 626 &guid) == 0); 627 628 /* 629 * The vdev namespace may contain holes as a result of 630 * device removal. We must add them back into the vdev 631 * tree before we process any missing devices. 632 */ 633 if (holes > 0) { 634 ASSERT(valid_top_config); 635 636 for (c = 0; c < children; c++) { 637 nvlist_t *holey; 638 639 if (child[c] != NULL || 640 !vdev_is_hole(hole_array, holes, c)) 641 continue; 642 643 if (nvlist_alloc(&holey, NV_UNIQUE_NAME, 644 0) != 0) 645 goto nomem; 646 647 /* 648 * Holes in the namespace are treated as 649 * "hole" top-level vdevs and have a 650 * special flag set on them. 651 */ 652 if (nvlist_add_string(holey, 653 ZPOOL_CONFIG_TYPE, 654 VDEV_TYPE_HOLE) != 0 || 655 nvlist_add_uint64(holey, 656 ZPOOL_CONFIG_ID, c) != 0 || 657 nvlist_add_uint64(holey, 658 ZPOOL_CONFIG_GUID, 0ULL) != 0) 659 goto nomem; 660 child[c] = holey; 661 } 662 } 663 664 /* 665 * Look for any missing top-level vdevs. If this is the case, 666 * create a faked up 'missing' vdev as a placeholder. We cannot 667 * simply compress the child array, because the kernel performs 668 * certain checks to make sure the vdev IDs match their location 669 * in the configuration. 670 */ 671 for (c = 0; c < children; c++) { 672 if (child[c] == NULL) { 673 nvlist_t *missing; 674 if (nvlist_alloc(&missing, NV_UNIQUE_NAME, 675 0) != 0) 676 goto nomem; 677 if (nvlist_add_string(missing, 678 ZPOOL_CONFIG_TYPE, 679 VDEV_TYPE_MISSING) != 0 || 680 nvlist_add_uint64(missing, 681 ZPOOL_CONFIG_ID, c) != 0 || 682 nvlist_add_uint64(missing, 683 ZPOOL_CONFIG_GUID, 0ULL) != 0) { 684 nvlist_free(missing); 685 goto nomem; 686 } 687 child[c] = missing; 688 } 689 } 690 691 /* 692 * Put all of this pool's top-level vdevs into a root vdev. 693 */ 694 if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0) 695 goto nomem; 696 if (nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, 697 VDEV_TYPE_ROOT) != 0 || 698 nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) != 0 || 699 nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, guid) != 0 || 700 nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 701 child, children) != 0) { 702 nvlist_free(nvroot); 703 goto nomem; 704 } 705 706 for (c = 0; c < children; c++) 707 nvlist_free(child[c]); 708 free(child); 709 children = 0; 710 child = NULL; 711 712 /* 713 * Go through and fix up any paths and/or devids based on our 714 * known list of vdev GUID -> path mappings. 715 */ 716 if (fix_paths(nvroot, pl->names) != 0) { 717 nvlist_free(nvroot); 718 goto nomem; 719 } 720 721 /* 722 * Add the root vdev to this pool's configuration. 723 */ 724 if (nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 725 nvroot) != 0) { 726 nvlist_free(nvroot); 727 goto nomem; 728 } 729 nvlist_free(nvroot); 730 731 /* 732 * zdb uses this path to report on active pools that were 733 * imported or created using -R. 734 */ 735 if (active_ok) 736 goto add_pool; 737 738 /* 739 * Determine if this pool is currently active, in which case we 740 * can't actually import it. 741 */ 742 verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, 743 &name) == 0); 744 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, 745 &guid) == 0); 746 747 if (pool_active(hdl, name, guid, &isactive) != 0) 748 goto error; 749 750 if (isactive) { 751 nvlist_free(config); 752 config = NULL; 753 continue; 754 } 755 756 if ((nvl = refresh_config(hdl, config)) == NULL) { 757 nvlist_free(config); 758 config = NULL; 759 continue; 760 } 761 762 nvlist_free(config); 763 config = nvl; 764 765 /* 766 * Go through and update the paths for spares, now that we have 767 * them. 768 */ 769 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 770 &nvroot) == 0); 771 if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 772 &spares, &nspares) == 0) { 773 for (i = 0; i < nspares; i++) { 774 if (fix_paths(spares[i], pl->names) != 0) 775 goto nomem; 776 } 777 } 778 779 /* 780 * Update the paths for l2cache devices. 781 */ 782 if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 783 &l2cache, &nl2cache) == 0) { 784 for (i = 0; i < nl2cache; i++) { 785 if (fix_paths(l2cache[i], pl->names) != 0) 786 goto nomem; 787 } 788 } 789 790 /* 791 * Restore the original information read from the actual label. 792 */ 793 (void) nvlist_remove(config, ZPOOL_CONFIG_HOSTID, 794 DATA_TYPE_UINT64); 795 (void) nvlist_remove(config, ZPOOL_CONFIG_HOSTNAME, 796 DATA_TYPE_STRING); 797 if (hostid != 0) { 798 verify(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID, 799 hostid) == 0); 800 verify(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME, 801 hostname) == 0); 802 } 803 804add_pool: 805 /* 806 * Add this pool to the list of configs. 807 */ 808 verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, 809 &name) == 0); 810 if (nvlist_add_nvlist(ret, name, config) != 0) 811 goto nomem; 812 813 found_one = B_TRUE; 814 nvlist_free(config); 815 config = NULL; 816 } 817 818 if (!found_one) { 819 nvlist_free(ret); 820 ret = NULL; 821 } 822 823 return (ret); 824 825nomem: 826 (void) no_memory(hdl); 827error: 828 nvlist_free(config); 829 nvlist_free(ret); 830 for (c = 0; c < children; c++) 831 nvlist_free(child[c]); 832 free(child); 833 834 return (NULL); 835} 836 837/* 838 * Return the offset of the given label. 839 */ 840static uint64_t 841label_offset(uint64_t size, int l) 842{ 843 ASSERT(P2PHASE_TYPED(size, sizeof (vdev_label_t), uint64_t) == 0); 844 return (l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ? 845 0 : size - VDEV_LABELS * sizeof (vdev_label_t))); 846} 847 848/* 849 * Given a file descriptor, read the label information and return an nvlist 850 * describing the configuration, if there is one. 851 */ 852int 853zpool_read_label(int fd, nvlist_t **config) 854{ 855 struct stat64 statbuf; 856 int l; 857 vdev_label_t *label; 858 uint64_t state, txg, size; 859 860 *config = NULL; 861 862 if (fstat64(fd, &statbuf) == -1) 863 return (0); 864 size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t); 865 866 if ((label = malloc(sizeof (vdev_label_t))) == NULL) 867 return (-1); 868 869 for (l = 0; l < VDEV_LABELS; l++) { 870 if (pread64(fd, label, sizeof (vdev_label_t), 871 label_offset(size, l)) != sizeof (vdev_label_t)) 872 continue; 873 874 if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist, 875 sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0) 876 continue; 877 878 if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE, 879 &state) != 0 || state > POOL_STATE_L2CACHE) { 880 nvlist_free(*config); 881 continue; 882 } 883 884 if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE && 885 (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG, 886 &txg) != 0 || txg == 0)) { 887 nvlist_free(*config); 888 continue; 889 } 890 891 free(label); 892 return (0); 893 } 894 895 free(label); 896 *config = NULL; 897 return (0); 898} 899 900/* 901 * Given a file descriptor, clear (zero) the label information. This function 902 * is currently only used in the appliance stack as part of the ZFS sysevent 903 * module. 904 */ 905int 906zpool_clear_label(int fd) 907{ 908 struct stat64 statbuf; 909 int l; 910 vdev_label_t *label; 911 uint64_t size; 912 913 if (fstat64(fd, &statbuf) == -1) 914 return (0); 915 size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t); 916 917 if ((label = calloc(sizeof (vdev_label_t), 1)) == NULL) 918 return (-1); 919 920 for (l = 0; l < VDEV_LABELS; l++) { 921 if (pwrite64(fd, label, sizeof (vdev_label_t), 922 label_offset(size, l)) != sizeof (vdev_label_t)) 923 return (-1); 924 } 925 926 free(label); 927 return (0); 928} 929 930/* 931 * Given a list of directories to search, find all pools stored on disk. This 932 * includes partial pools which are not available to import. If no args are 933 * given (argc is 0), then the default directory (/dev/dsk) is searched. 934 * poolname or guid (but not both) are provided by the caller when trying 935 * to import a specific pool. 936 */ 937static nvlist_t * 938zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg) 939{ 940 int i, dirs = iarg->paths; 941 DIR *dirp = NULL; 942 struct dirent64 *dp; 943 char path[MAXPATHLEN]; 944 char *end, **dir = iarg->path; 945 size_t pathleft; 946 struct stat64 statbuf; 947 nvlist_t *ret = NULL, *config; 948 static char *default_dir = "/dev/dsk"; 949 int fd; 950 pool_list_t pools = { 0 }; 951 pool_entry_t *pe, *penext; 952 vdev_entry_t *ve, *venext; 953 config_entry_t *ce, *cenext; 954 name_entry_t *ne, *nenext; 955 956 if (dirs == 0) { 957 dirs = 1; 958 dir = &default_dir; 959 } 960 961 /* 962 * Go through and read the label configuration information from every 963 * possible device, organizing the information according to pool GUID 964 * and toplevel GUID. 965 */ 966 for (i = 0; i < dirs; i++) { 967 char *rdsk; 968 int dfd; 969 970 /* use realpath to normalize the path */ 971 if (realpath(dir[i], path) == 0) { 972 (void) zfs_error_fmt(hdl, EZFS_BADPATH, 973 dgettext(TEXT_DOMAIN, "cannot open '%s'"), dir[i]); 974 goto error; 975 } 976 end = &path[strlen(path)]; 977 *end++ = '/'; 978 *end = 0; 979 pathleft = &path[sizeof (path)] - end; 980 981 /* 982 * Using raw devices instead of block devices when we're 983 * reading the labels skips a bunch of slow operations during 984 * close(2) processing, so we replace /dev/dsk with /dev/rdsk. 985 */ 986 if (strcmp(path, "/dev/dsk/") == 0) 987 rdsk = "/dev/rdsk/"; 988 else 989 rdsk = path; 990 991 if ((dfd = open64(rdsk, O_RDONLY)) < 0 || 992 (dirp = fdopendir(dfd)) == NULL) { 993 zfs_error_aux(hdl, strerror(errno)); 994 (void) zfs_error_fmt(hdl, EZFS_BADPATH, 995 dgettext(TEXT_DOMAIN, "cannot open '%s'"), 996 rdsk); 997 goto error; 998 } 999 1000 /* 1001 * This is not MT-safe, but we have no MT consumers of libzfs 1002 */ 1003 while ((dp = readdir64(dirp)) != NULL) { 1004 const char *name = dp->d_name; 1005 if (name[0] == '.' && 1006 (name[1] == 0 || (name[1] == '.' && name[2] == 0))) 1007 continue; 1008 1009 (void)snprintf(path, sizeof (path), "%s/%s", 1010 rdsk, dp->d_name); 1011 1012 if ((fd = open(path, O_RDONLY)) < 0) 1013 continue; 1014 1015 /* 1016 * Ignore failed stats. We only want regular 1017 * files, character devs and block devs. 1018 */ 1019 if (fstat64(fd, &statbuf) != 0 || 1020 (!S_ISREG(statbuf.st_mode) && 1021 !S_ISCHR(statbuf.st_mode) && 1022 !S_ISBLK(statbuf.st_mode))) { 1023 (void) close(fd); 1024 continue; 1025 } 1026 1027 if ((zpool_read_label(fd, &config)) != 0) { 1028 (void) close(fd); 1029 (void) no_memory(hdl); 1030 goto error; 1031 } 1032 1033 (void) close(fd); 1034 1035 if (config != NULL) { 1036 boolean_t matched = B_TRUE; 1037 1038 if (iarg->poolname != NULL) { 1039 char *pname; 1040 1041 matched = nvlist_lookup_string(config, 1042 ZPOOL_CONFIG_POOL_NAME, 1043 &pname) == 0 && 1044 strcmp(iarg->poolname, pname) == 0; 1045 } else if (iarg->guid != 0) { 1046 uint64_t this_guid; 1047 1048 matched = nvlist_lookup_uint64(config, 1049 ZPOOL_CONFIG_POOL_GUID, 1050 &this_guid) == 0 && 1051 iarg->guid == this_guid; 1052 } 1053 if (!matched) { 1054 nvlist_free(config); 1055 config = NULL; 1056 continue; 1057 } 1058 /* use the non-raw path for the config */ 1059 (void) strlcpy(end, name, pathleft); 1060 if (add_config(hdl, &pools, path, config) != 0) 1061 goto error; 1062 } 1063 } 1064 1065 (void) closedir(dirp); 1066 dirp = NULL; 1067 } 1068 1069 ret = get_configs(hdl, &pools, iarg->can_be_active); 1070 1071error: 1072 for (pe = pools.pools; pe != NULL; pe = penext) { 1073 penext = pe->pe_next; 1074 for (ve = pe->pe_vdevs; ve != NULL; ve = venext) { 1075 venext = ve->ve_next; 1076 for (ce = ve->ve_configs; ce != NULL; ce = cenext) { 1077 cenext = ce->ce_next; 1078 if (ce->ce_config) 1079 nvlist_free(ce->ce_config); 1080 free(ce); 1081 } 1082 free(ve); 1083 } 1084 free(pe); 1085 } 1086 1087 for (ne = pools.names; ne != NULL; ne = nenext) { 1088 nenext = ne->ne_next; 1089 if (ne->ne_name) 1090 free(ne->ne_name); 1091 free(ne); 1092 } 1093 1094 if (dirp) 1095 (void) closedir(dirp); 1096 1097 return (ret); 1098} 1099 1100nvlist_t * 1101zpool_find_import(libzfs_handle_t *hdl, int argc, char **argv) 1102{ 1103 importargs_t iarg = { 0 }; 1104 1105 iarg.paths = argc; 1106 iarg.path = argv; 1107 1108 return (zpool_find_import_impl(hdl, &iarg)); 1109} 1110 1111/* 1112 * Given a cache file, return the contents as a list of importable pools. 1113 * poolname or guid (but not both) are provided by the caller when trying 1114 * to import a specific pool. 1115 */ 1116nvlist_t * 1117zpool_find_import_cached(libzfs_handle_t *hdl, const char *cachefile, 1118 char *poolname, uint64_t guid) 1119{ 1120 char *buf; 1121 int fd; 1122 struct stat64 statbuf; 1123 nvlist_t *raw, *src, *dst; 1124 nvlist_t *pools; 1125 nvpair_t *elem; 1126 char *name; 1127 uint64_t this_guid; 1128 boolean_t active; 1129 1130 verify(poolname == NULL || guid == 0); 1131 1132 if ((fd = open(cachefile, O_RDONLY)) < 0) { 1133 zfs_error_aux(hdl, "%s", strerror(errno)); 1134 (void) zfs_error(hdl, EZFS_BADCACHE, 1135 dgettext(TEXT_DOMAIN, "failed to open cache file")); 1136 return (NULL); 1137 } 1138 1139 if (fstat64(fd, &statbuf) != 0) { 1140 zfs_error_aux(hdl, "%s", strerror(errno)); 1141 (void) close(fd); 1142 (void) zfs_error(hdl, EZFS_BADCACHE, 1143 dgettext(TEXT_DOMAIN, "failed to get size of cache file")); 1144 return (NULL); 1145 } 1146 1147 if ((buf = zfs_alloc(hdl, statbuf.st_size)) == NULL) { 1148 (void) close(fd); 1149 return (NULL); 1150 } 1151 1152 if (read(fd, buf, statbuf.st_size) != statbuf.st_size) { 1153 (void) close(fd); 1154 free(buf); 1155 (void) zfs_error(hdl, EZFS_BADCACHE, 1156 dgettext(TEXT_DOMAIN, 1157 "failed to read cache file contents")); 1158 return (NULL); 1159 } 1160 1161 (void) close(fd); 1162 1163 if (nvlist_unpack(buf, statbuf.st_size, &raw, 0) != 0) { 1164 free(buf); 1165 (void) zfs_error(hdl, EZFS_BADCACHE, 1166 dgettext(TEXT_DOMAIN, 1167 "invalid or corrupt cache file contents")); 1168 return (NULL); 1169 } 1170 1171 free(buf); 1172 1173 /* 1174 * Go through and get the current state of the pools and refresh their 1175 * state. 1176 */ 1177 if (nvlist_alloc(&pools, 0, 0) != 0) { 1178 (void) no_memory(hdl); 1179 nvlist_free(raw); 1180 return (NULL); 1181 } 1182 1183 elem = NULL; 1184 while ((elem = nvlist_next_nvpair(raw, elem)) != NULL) { 1185 verify(nvpair_value_nvlist(elem, &src) == 0); 1186 1187 verify(nvlist_lookup_string(src, ZPOOL_CONFIG_POOL_NAME, 1188 &name) == 0); 1189 if (poolname != NULL && strcmp(poolname, name) != 0) 1190 continue; 1191 1192 verify(nvlist_lookup_uint64(src, ZPOOL_CONFIG_POOL_GUID, 1193 &this_guid) == 0); 1194 if (guid != 0) { 1195 verify(nvlist_lookup_uint64(src, ZPOOL_CONFIG_POOL_GUID, 1196 &this_guid) == 0); 1197 if (guid != this_guid) 1198 continue; 1199 } 1200 1201 if (pool_active(hdl, name, this_guid, &active) != 0) { 1202 nvlist_free(raw); 1203 nvlist_free(pools); 1204 return (NULL); 1205 } 1206 1207 if (active) 1208 continue; 1209 1210 if ((dst = refresh_config(hdl, src)) == NULL) { 1211 nvlist_free(raw); 1212 nvlist_free(pools); 1213 return (NULL); 1214 } 1215 1216 if (nvlist_add_nvlist(pools, nvpair_name(elem), dst) != 0) { 1217 (void) no_memory(hdl); 1218 nvlist_free(dst); 1219 nvlist_free(raw); 1220 nvlist_free(pools); 1221 return (NULL); 1222 } 1223 nvlist_free(dst); 1224 } 1225 1226 nvlist_free(raw); 1227 return (pools); 1228} 1229 1230static int 1231name_or_guid_exists(zpool_handle_t *zhp, void *data) 1232{ 1233 importargs_t *import = data; 1234 int found = 0; 1235 1236 if (import->poolname != NULL) { 1237 char *pool_name; 1238 1239 verify(nvlist_lookup_string(zhp->zpool_config, 1240 ZPOOL_CONFIG_POOL_NAME, &pool_name) == 0); 1241 if (strcmp(pool_name, import->poolname) == 0) 1242 found = 1; 1243 } else { 1244 uint64_t pool_guid; 1245 1246 verify(nvlist_lookup_uint64(zhp->zpool_config, 1247 ZPOOL_CONFIG_POOL_GUID, &pool_guid) == 0); 1248 if (pool_guid == import->guid) 1249 found = 1; 1250 } 1251 1252 zpool_close(zhp); 1253 return (found); 1254} 1255 1256nvlist_t * 1257zpool_search_import(libzfs_handle_t *hdl, importargs_t *import) 1258{ 1259 verify(import->poolname == NULL || import->guid == 0); 1260 1261 if (import->unique) 1262 import->exists = zpool_iter(hdl, name_or_guid_exists, import); 1263 1264 if (import->cachefile != NULL) 1265 return (zpool_find_import_cached(hdl, import->cachefile, 1266 import->poolname, import->guid)); 1267 1268 return (zpool_find_import_impl(hdl, import)); 1269} 1270 1271boolean_t 1272find_guid(nvlist_t *nv, uint64_t guid) 1273{ 1274 uint64_t tmp; 1275 nvlist_t **child; 1276 uint_t c, children; 1277 1278 verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &tmp) == 0); 1279 if (tmp == guid) 1280 return (B_TRUE); 1281 1282 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 1283 &child, &children) == 0) { 1284 for (c = 0; c < children; c++) 1285 if (find_guid(child[c], guid)) 1286 return (B_TRUE); 1287 } 1288 1289 return (B_FALSE); 1290} 1291 1292typedef struct aux_cbdata { 1293 const char *cb_type; 1294 uint64_t cb_guid; 1295 zpool_handle_t *cb_zhp; 1296} aux_cbdata_t; 1297 1298static int 1299find_aux(zpool_handle_t *zhp, void *data) 1300{ 1301 aux_cbdata_t *cbp = data; 1302 nvlist_t **list; 1303 uint_t i, count; 1304 uint64_t guid; 1305 nvlist_t *nvroot; 1306 1307 verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE, 1308 &nvroot) == 0); 1309 1310 if (nvlist_lookup_nvlist_array(nvroot, cbp->cb_type, 1311 &list, &count) == 0) { 1312 for (i = 0; i < count; i++) { 1313 verify(nvlist_lookup_uint64(list[i], 1314 ZPOOL_CONFIG_GUID, &guid) == 0); 1315 if (guid == cbp->cb_guid) { 1316 cbp->cb_zhp = zhp; 1317 return (1); 1318 } 1319 } 1320 } 1321 1322 zpool_close(zhp); 1323 return (0); 1324} 1325 1326/* 1327 * Determines if the pool is in use. If so, it returns true and the state of 1328 * the pool as well as the name of the pool. Both strings are allocated and 1329 * must be freed by the caller. 1330 */ 1331int 1332zpool_in_use(libzfs_handle_t *hdl, int fd, pool_state_t *state, char **namestr, 1333 boolean_t *inuse) 1334{ 1335 nvlist_t *config; 1336 char *name; 1337 boolean_t ret; 1338 uint64_t guid, vdev_guid; 1339 zpool_handle_t *zhp; 1340 nvlist_t *pool_config; 1341 uint64_t stateval, isspare; 1342 aux_cbdata_t cb = { 0 }; 1343 boolean_t isactive; 1344 1345 *inuse = B_FALSE; 1346 1347 if (zpool_read_label(fd, &config) != 0) { 1348 (void) no_memory(hdl); 1349 return (-1); 1350 } 1351 1352 if (config == NULL) 1353 return (0); 1354 1355 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, 1356 &stateval) == 0); 1357 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, 1358 &vdev_guid) == 0); 1359 1360 if (stateval != POOL_STATE_SPARE && stateval != POOL_STATE_L2CACHE) { 1361 verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, 1362 &name) == 0); 1363 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, 1364 &guid) == 0); 1365 } 1366 1367 switch (stateval) { 1368 case POOL_STATE_EXPORTED: 1369 ret = B_TRUE; 1370 break; 1371 1372 case POOL_STATE_ACTIVE: 1373 /* 1374 * For an active pool, we have to determine if it's really part 1375 * of a currently active pool (in which case the pool will exist 1376 * and the guid will be the same), or whether it's part of an 1377 * active pool that was disconnected without being explicitly 1378 * exported. 1379 */ 1380 if (pool_active(hdl, name, guid, &isactive) != 0) { 1381 nvlist_free(config); 1382 return (-1); 1383 } 1384 1385 if (isactive) { 1386 /* 1387 * Because the device may have been removed while 1388 * offlined, we only report it as active if the vdev is 1389 * still present in the config. Otherwise, pretend like 1390 * it's not in use. 1391 */ 1392 if ((zhp = zpool_open_canfail(hdl, name)) != NULL && 1393 (pool_config = zpool_get_config(zhp, NULL)) 1394 != NULL) { 1395 nvlist_t *nvroot; 1396 1397 verify(nvlist_lookup_nvlist(pool_config, 1398 ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 1399 ret = find_guid(nvroot, vdev_guid); 1400 } else { 1401 ret = B_FALSE; 1402 } 1403 1404 /* 1405 * If this is an active spare within another pool, we 1406 * treat it like an unused hot spare. This allows the 1407 * user to create a pool with a hot spare that currently 1408 * in use within another pool. Since we return B_TRUE, 1409 * libdiskmgt will continue to prevent generic consumers 1410 * from using the device. 1411 */ 1412 if (ret && nvlist_lookup_uint64(config, 1413 ZPOOL_CONFIG_IS_SPARE, &isspare) == 0 && isspare) 1414 stateval = POOL_STATE_SPARE; 1415 1416 if (zhp != NULL) 1417 zpool_close(zhp); 1418 } else { 1419 stateval = POOL_STATE_POTENTIALLY_ACTIVE; 1420 ret = B_TRUE; 1421 } 1422 break; 1423 1424 case POOL_STATE_SPARE: 1425 /* 1426 * For a hot spare, it can be either definitively in use, or 1427 * potentially active. To determine if it's in use, we iterate 1428 * over all pools in the system and search for one with a spare 1429 * with a matching guid. 1430 * 1431 * Due to the shared nature of spares, we don't actually report 1432 * the potentially active case as in use. This means the user 1433 * can freely create pools on the hot spares of exported pools, 1434 * but to do otherwise makes the resulting code complicated, and 1435 * we end up having to deal with this case anyway. 1436 */ 1437 cb.cb_zhp = NULL; 1438 cb.cb_guid = vdev_guid; 1439 cb.cb_type = ZPOOL_CONFIG_SPARES; 1440 if (zpool_iter(hdl, find_aux, &cb) == 1) { 1441 name = (char *)zpool_get_name(cb.cb_zhp); 1442 ret = TRUE; 1443 } else { 1444 ret = FALSE; 1445 } 1446 break; 1447 1448 case POOL_STATE_L2CACHE: 1449 1450 /* 1451 * Check if any pool is currently using this l2cache device. 1452 */ 1453 cb.cb_zhp = NULL; 1454 cb.cb_guid = vdev_guid; 1455 cb.cb_type = ZPOOL_CONFIG_L2CACHE; 1456 if (zpool_iter(hdl, find_aux, &cb) == 1) { 1457 name = (char *)zpool_get_name(cb.cb_zhp); 1458 ret = TRUE; 1459 } else { 1460 ret = FALSE; 1461 } 1462 break; 1463 1464 default: 1465 ret = B_FALSE; 1466 } 1467 1468 1469 if (ret) { 1470 if ((*namestr = zfs_strdup(hdl, name)) == NULL) { 1471 if (cb.cb_zhp) 1472 zpool_close(cb.cb_zhp); 1473 nvlist_free(config); 1474 return (-1); 1475 } 1476 *state = (pool_state_t)stateval; 1477 } 1478 1479 if (cb.cb_zhp) 1480 zpool_close(cb.cb_zhp); 1481 1482 nvlist_free(config); 1483 *inuse = ret; 1484 return (0); 1485} 1486