libzfs_import.c revision 278177
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
24 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
25 * Copyright (c) 2013 by Delphix. All rights reserved.
26 */
27
28/*
29 * Pool import support functions.
30 *
31 * To import a pool, we rely on reading the configuration information from the
32 * ZFS label of each device.  If we successfully read the label, then we
33 * organize the configuration information in the following hierarchy:
34 *
35 * 	pool guid -> toplevel vdev guid -> label txg
36 *
37 * Duplicate entries matching this same tuple will be discarded.  Once we have
38 * examined every device, we pick the best label txg config for each toplevel
39 * vdev.  We then arrange these toplevel vdevs into a complete pool config, and
40 * update any paths that have changed.  Finally, we attempt to import the pool
41 * using our derived config, and record the results.
42 */
43
44#include <ctype.h>
45#include <devid.h>
46#include <dirent.h>
47#include <errno.h>
48#include <libintl.h>
49#include <stddef.h>
50#include <stdlib.h>
51#include <string.h>
52#include <sys/stat.h>
53#include <unistd.h>
54#include <fcntl.h>
55#include <thread_pool.h>
56#include <libgeom.h>
57
58#include <sys/vdev_impl.h>
59
60#include "libzfs.h"
61#include "libzfs_impl.h"
62
63/*
64 * Intermediate structures used to gather configuration information.
65 */
66typedef struct config_entry {
67	uint64_t		ce_txg;
68	nvlist_t		*ce_config;
69	struct config_entry	*ce_next;
70} config_entry_t;
71
72typedef struct vdev_entry {
73	uint64_t		ve_guid;
74	config_entry_t		*ve_configs;
75	struct vdev_entry	*ve_next;
76} vdev_entry_t;
77
78typedef struct pool_entry {
79	uint64_t		pe_guid;
80	vdev_entry_t		*pe_vdevs;
81	struct pool_entry	*pe_next;
82} pool_entry_t;
83
84typedef struct name_entry {
85	char			*ne_name;
86	uint64_t		ne_guid;
87	struct name_entry	*ne_next;
88} name_entry_t;
89
90typedef struct pool_list {
91	pool_entry_t		*pools;
92	name_entry_t		*names;
93} pool_list_t;
94
95static char *
96get_devid(const char *path)
97{
98#ifdef have_devid
99	int fd;
100	ddi_devid_t devid;
101	char *minor, *ret;
102
103	if ((fd = open(path, O_RDONLY)) < 0)
104		return (NULL);
105
106	minor = NULL;
107	ret = NULL;
108	if (devid_get(fd, &devid) == 0) {
109		if (devid_get_minor_name(fd, &minor) == 0)
110			ret = devid_str_encode(devid, minor);
111		if (minor != NULL)
112			devid_str_free(minor);
113		devid_free(devid);
114	}
115	(void) close(fd);
116
117	return (ret);
118#else
119	return (NULL);
120#endif
121}
122
123
124/*
125 * Go through and fix up any path and/or devid information for the given vdev
126 * configuration.
127 */
128static int
129fix_paths(nvlist_t *nv, name_entry_t *names)
130{
131	nvlist_t **child;
132	uint_t c, children;
133	uint64_t guid;
134	name_entry_t *ne, *best;
135	char *path, *devid;
136	int matched;
137
138	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
139	    &child, &children) == 0) {
140		for (c = 0; c < children; c++)
141			if (fix_paths(child[c], names) != 0)
142				return (-1);
143		return (0);
144	}
145
146	/*
147	 * This is a leaf (file or disk) vdev.  In either case, go through
148	 * the name list and see if we find a matching guid.  If so, replace
149	 * the path and see if we can calculate a new devid.
150	 *
151	 * There may be multiple names associated with a particular guid, in
152	 * which case we have overlapping slices or multiple paths to the same
153	 * disk.  If this is the case, then we want to pick the path that is
154	 * the most similar to the original, where "most similar" is the number
155	 * of matching characters starting from the end of the path.  This will
156	 * preserve slice numbers even if the disks have been reorganized, and
157	 * will also catch preferred disk names if multiple paths exist.
158	 */
159	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0);
160	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
161		path = NULL;
162
163	matched = 0;
164	best = NULL;
165	for (ne = names; ne != NULL; ne = ne->ne_next) {
166		if (ne->ne_guid == guid) {
167			const char *src, *dst;
168			int count;
169
170			if (path == NULL) {
171				best = ne;
172				break;
173			}
174
175			src = ne->ne_name + strlen(ne->ne_name) - 1;
176			dst = path + strlen(path) - 1;
177			for (count = 0; src >= ne->ne_name && dst >= path;
178			    src--, dst--, count++)
179				if (*src != *dst)
180					break;
181
182			/*
183			 * At this point, 'count' is the number of characters
184			 * matched from the end.
185			 */
186			if (count > matched || best == NULL) {
187				best = ne;
188				matched = count;
189			}
190		}
191	}
192
193	if (best == NULL)
194		return (0);
195
196	if (nvlist_add_string(nv, ZPOOL_CONFIG_PATH, best->ne_name) != 0)
197		return (-1);
198
199	if ((devid = get_devid(best->ne_name)) == NULL) {
200		(void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
201	} else {
202		if (nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, devid) != 0) {
203			devid_str_free(devid);
204			return (-1);
205		}
206		devid_str_free(devid);
207	}
208
209	return (0);
210}
211
212/*
213 * Add the given configuration to the list of known devices.
214 */
215static int
216add_config(libzfs_handle_t *hdl, pool_list_t *pl, const char *path,
217    nvlist_t *config)
218{
219	uint64_t pool_guid, vdev_guid, top_guid, txg, state;
220	pool_entry_t *pe;
221	vdev_entry_t *ve;
222	config_entry_t *ce;
223	name_entry_t *ne;
224
225	/*
226	 * If this is a hot spare not currently in use or level 2 cache
227	 * device, add it to the list of names to translate, but don't do
228	 * anything else.
229	 */
230	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
231	    &state) == 0 &&
232	    (state == POOL_STATE_SPARE || state == POOL_STATE_L2CACHE) &&
233	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid) == 0) {
234		if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL)
235			return (-1);
236
237		if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) {
238			free(ne);
239			return (-1);
240		}
241		ne->ne_guid = vdev_guid;
242		ne->ne_next = pl->names;
243		pl->names = ne;
244		return (0);
245	}
246
247	/*
248	 * If we have a valid config but cannot read any of these fields, then
249	 * it means we have a half-initialized label.  In vdev_label_init()
250	 * we write a label with txg == 0 so that we can identify the device
251	 * in case the user refers to the same disk later on.  If we fail to
252	 * create the pool, we'll be left with a label in this state
253	 * which should not be considered part of a valid pool.
254	 */
255	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
256	    &pool_guid) != 0 ||
257	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID,
258	    &vdev_guid) != 0 ||
259	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_TOP_GUID,
260	    &top_guid) != 0 ||
261	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
262	    &txg) != 0 || txg == 0) {
263		nvlist_free(config);
264		return (0);
265	}
266
267	/*
268	 * First, see if we know about this pool.  If not, then add it to the
269	 * list of known pools.
270	 */
271	for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
272		if (pe->pe_guid == pool_guid)
273			break;
274	}
275
276	if (pe == NULL) {
277		if ((pe = zfs_alloc(hdl, sizeof (pool_entry_t))) == NULL) {
278			nvlist_free(config);
279			return (-1);
280		}
281		pe->pe_guid = pool_guid;
282		pe->pe_next = pl->pools;
283		pl->pools = pe;
284	}
285
286	/*
287	 * Second, see if we know about this toplevel vdev.  Add it if its
288	 * missing.
289	 */
290	for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) {
291		if (ve->ve_guid == top_guid)
292			break;
293	}
294
295	if (ve == NULL) {
296		if ((ve = zfs_alloc(hdl, sizeof (vdev_entry_t))) == NULL) {
297			nvlist_free(config);
298			return (-1);
299		}
300		ve->ve_guid = top_guid;
301		ve->ve_next = pe->pe_vdevs;
302		pe->pe_vdevs = ve;
303	}
304
305	/*
306	 * Third, see if we have a config with a matching transaction group.  If
307	 * so, then we do nothing.  Otherwise, add it to the list of known
308	 * configs.
309	 */
310	for (ce = ve->ve_configs; ce != NULL; ce = ce->ce_next) {
311		if (ce->ce_txg == txg)
312			break;
313	}
314
315	if (ce == NULL) {
316		if ((ce = zfs_alloc(hdl, sizeof (config_entry_t))) == NULL) {
317			nvlist_free(config);
318			return (-1);
319		}
320		ce->ce_txg = txg;
321		ce->ce_config = config;
322		ce->ce_next = ve->ve_configs;
323		ve->ve_configs = ce;
324	} else {
325		nvlist_free(config);
326	}
327
328	/*
329	 * At this point we've successfully added our config to the list of
330	 * known configs.  The last thing to do is add the vdev guid -> path
331	 * mappings so that we can fix up the configuration as necessary before
332	 * doing the import.
333	 */
334	if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL)
335		return (-1);
336
337	if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) {
338		free(ne);
339		return (-1);
340	}
341
342	ne->ne_guid = vdev_guid;
343	ne->ne_next = pl->names;
344	pl->names = ne;
345
346	return (0);
347}
348
349/*
350 * Returns true if the named pool matches the given GUID.
351 */
352static int
353pool_active(libzfs_handle_t *hdl, const char *name, uint64_t guid,
354    boolean_t *isactive)
355{
356	zpool_handle_t *zhp;
357	uint64_t theguid;
358
359	if (zpool_open_silent(hdl, name, &zhp) != 0)
360		return (-1);
361
362	if (zhp == NULL) {
363		*isactive = B_FALSE;
364		return (0);
365	}
366
367	verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_POOL_GUID,
368	    &theguid) == 0);
369
370	zpool_close(zhp);
371
372	*isactive = (theguid == guid);
373	return (0);
374}
375
376static nvlist_t *
377refresh_config(libzfs_handle_t *hdl, nvlist_t *config)
378{
379	nvlist_t *nvl;
380	zfs_cmd_t zc = { 0 };
381	int err;
382
383	if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0)
384		return (NULL);
385
386	if (zcmd_alloc_dst_nvlist(hdl, &zc,
387	    zc.zc_nvlist_conf_size * 2) != 0) {
388		zcmd_free_nvlists(&zc);
389		return (NULL);
390	}
391
392	while ((err = ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_TRYIMPORT,
393	    &zc)) != 0 && errno == ENOMEM) {
394		if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
395			zcmd_free_nvlists(&zc);
396			return (NULL);
397		}
398	}
399
400	if (err) {
401		zcmd_free_nvlists(&zc);
402		return (NULL);
403	}
404
405	if (zcmd_read_dst_nvlist(hdl, &zc, &nvl) != 0) {
406		zcmd_free_nvlists(&zc);
407		return (NULL);
408	}
409
410	zcmd_free_nvlists(&zc);
411	return (nvl);
412}
413
414/*
415 * Determine if the vdev id is a hole in the namespace.
416 */
417boolean_t
418vdev_is_hole(uint64_t *hole_array, uint_t holes, uint_t id)
419{
420	for (int c = 0; c < holes; c++) {
421
422		/* Top-level is a hole */
423		if (hole_array[c] == id)
424			return (B_TRUE);
425	}
426	return (B_FALSE);
427}
428
429/*
430 * Convert our list of pools into the definitive set of configurations.  We
431 * start by picking the best config for each toplevel vdev.  Once that's done,
432 * we assemble the toplevel vdevs into a full config for the pool.  We make a
433 * pass to fix up any incorrect paths, and then add it to the main list to
434 * return to the user.
435 */
436static nvlist_t *
437get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
438{
439	pool_entry_t *pe;
440	vdev_entry_t *ve;
441	config_entry_t *ce;
442	nvlist_t *ret = NULL, *config = NULL, *tmp, *nvtop, *nvroot;
443	nvlist_t **spares, **l2cache;
444	uint_t i, nspares, nl2cache;
445	boolean_t config_seen;
446	uint64_t best_txg;
447	char *name, *hostname;
448	uint64_t guid;
449	uint_t children = 0;
450	nvlist_t **child = NULL;
451	uint_t holes;
452	uint64_t *hole_array, max_id;
453	uint_t c;
454	boolean_t isactive;
455	uint64_t hostid;
456	nvlist_t *nvl;
457	boolean_t found_one = B_FALSE;
458	boolean_t valid_top_config = B_FALSE;
459
460	if (nvlist_alloc(&ret, 0, 0) != 0)
461		goto nomem;
462
463	for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
464		uint64_t id, max_txg = 0;
465
466		if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0)
467			goto nomem;
468		config_seen = B_FALSE;
469
470		/*
471		 * Iterate over all toplevel vdevs.  Grab the pool configuration
472		 * from the first one we find, and then go through the rest and
473		 * add them as necessary to the 'vdevs' member of the config.
474		 */
475		for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) {
476
477			/*
478			 * Determine the best configuration for this vdev by
479			 * selecting the config with the latest transaction
480			 * group.
481			 */
482			best_txg = 0;
483			for (ce = ve->ve_configs; ce != NULL;
484			    ce = ce->ce_next) {
485
486				if (ce->ce_txg > best_txg) {
487					tmp = ce->ce_config;
488					best_txg = ce->ce_txg;
489				}
490			}
491
492			/*
493			 * We rely on the fact that the max txg for the
494			 * pool will contain the most up-to-date information
495			 * about the valid top-levels in the vdev namespace.
496			 */
497			if (best_txg > max_txg) {
498				(void) nvlist_remove(config,
499				    ZPOOL_CONFIG_VDEV_CHILDREN,
500				    DATA_TYPE_UINT64);
501				(void) nvlist_remove(config,
502				    ZPOOL_CONFIG_HOLE_ARRAY,
503				    DATA_TYPE_UINT64_ARRAY);
504
505				max_txg = best_txg;
506				hole_array = NULL;
507				holes = 0;
508				max_id = 0;
509				valid_top_config = B_FALSE;
510
511				if (nvlist_lookup_uint64(tmp,
512				    ZPOOL_CONFIG_VDEV_CHILDREN, &max_id) == 0) {
513					verify(nvlist_add_uint64(config,
514					    ZPOOL_CONFIG_VDEV_CHILDREN,
515					    max_id) == 0);
516					valid_top_config = B_TRUE;
517				}
518
519				if (nvlist_lookup_uint64_array(tmp,
520				    ZPOOL_CONFIG_HOLE_ARRAY, &hole_array,
521				    &holes) == 0) {
522					verify(nvlist_add_uint64_array(config,
523					    ZPOOL_CONFIG_HOLE_ARRAY,
524					    hole_array, holes) == 0);
525				}
526			}
527
528			if (!config_seen) {
529				/*
530				 * Copy the relevant pieces of data to the pool
531				 * configuration:
532				 *
533				 *	version
534				 *	pool guid
535				 *	name
536				 *	comment (if available)
537				 *	pool state
538				 *	hostid (if available)
539				 *	hostname (if available)
540				 */
541				uint64_t state, version;
542				char *comment = NULL;
543
544				version = fnvlist_lookup_uint64(tmp,
545				    ZPOOL_CONFIG_VERSION);
546				fnvlist_add_uint64(config,
547				    ZPOOL_CONFIG_VERSION, version);
548				guid = fnvlist_lookup_uint64(tmp,
549				    ZPOOL_CONFIG_POOL_GUID);
550				fnvlist_add_uint64(config,
551				    ZPOOL_CONFIG_POOL_GUID, guid);
552				name = fnvlist_lookup_string(tmp,
553				    ZPOOL_CONFIG_POOL_NAME);
554				fnvlist_add_string(config,
555				    ZPOOL_CONFIG_POOL_NAME, name);
556
557				if (nvlist_lookup_string(tmp,
558				    ZPOOL_CONFIG_COMMENT, &comment) == 0)
559					fnvlist_add_string(config,
560					    ZPOOL_CONFIG_COMMENT, comment);
561
562				state = fnvlist_lookup_uint64(tmp,
563				    ZPOOL_CONFIG_POOL_STATE);
564				fnvlist_add_uint64(config,
565				    ZPOOL_CONFIG_POOL_STATE, state);
566
567				hostid = 0;
568				if (nvlist_lookup_uint64(tmp,
569				    ZPOOL_CONFIG_HOSTID, &hostid) == 0) {
570					fnvlist_add_uint64(config,
571					    ZPOOL_CONFIG_HOSTID, hostid);
572					hostname = fnvlist_lookup_string(tmp,
573					    ZPOOL_CONFIG_HOSTNAME);
574					fnvlist_add_string(config,
575					    ZPOOL_CONFIG_HOSTNAME, hostname);
576				}
577
578				config_seen = B_TRUE;
579			}
580
581			/*
582			 * Add this top-level vdev to the child array.
583			 */
584			verify(nvlist_lookup_nvlist(tmp,
585			    ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0);
586			verify(nvlist_lookup_uint64(nvtop, ZPOOL_CONFIG_ID,
587			    &id) == 0);
588
589			if (id >= children) {
590				nvlist_t **newchild;
591
592				newchild = zfs_alloc(hdl, (id + 1) *
593				    sizeof (nvlist_t *));
594				if (newchild == NULL)
595					goto nomem;
596
597				for (c = 0; c < children; c++)
598					newchild[c] = child[c];
599
600				free(child);
601				child = newchild;
602				children = id + 1;
603			}
604			if (nvlist_dup(nvtop, &child[id], 0) != 0)
605				goto nomem;
606
607		}
608
609		/*
610		 * If we have information about all the top-levels then
611		 * clean up the nvlist which we've constructed. This
612		 * means removing any extraneous devices that are
613		 * beyond the valid range or adding devices to the end
614		 * of our array which appear to be missing.
615		 */
616		if (valid_top_config) {
617			if (max_id < children) {
618				for (c = max_id; c < children; c++)
619					nvlist_free(child[c]);
620				children = max_id;
621			} else if (max_id > children) {
622				nvlist_t **newchild;
623
624				newchild = zfs_alloc(hdl, (max_id) *
625				    sizeof (nvlist_t *));
626				if (newchild == NULL)
627					goto nomem;
628
629				for (c = 0; c < children; c++)
630					newchild[c] = child[c];
631
632				free(child);
633				child = newchild;
634				children = max_id;
635			}
636		}
637
638		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
639		    &guid) == 0);
640
641		/*
642		 * The vdev namespace may contain holes as a result of
643		 * device removal. We must add them back into the vdev
644		 * tree before we process any missing devices.
645		 */
646		if (holes > 0) {
647			ASSERT(valid_top_config);
648
649			for (c = 0; c < children; c++) {
650				nvlist_t *holey;
651
652				if (child[c] != NULL ||
653				    !vdev_is_hole(hole_array, holes, c))
654					continue;
655
656				if (nvlist_alloc(&holey, NV_UNIQUE_NAME,
657				    0) != 0)
658					goto nomem;
659
660				/*
661				 * Holes in the namespace are treated as
662				 * "hole" top-level vdevs and have a
663				 * special flag set on them.
664				 */
665				if (nvlist_add_string(holey,
666				    ZPOOL_CONFIG_TYPE,
667				    VDEV_TYPE_HOLE) != 0 ||
668				    nvlist_add_uint64(holey,
669				    ZPOOL_CONFIG_ID, c) != 0 ||
670				    nvlist_add_uint64(holey,
671				    ZPOOL_CONFIG_GUID, 0ULL) != 0) {
672					nvlist_free(holey);
673					goto nomem;
674				}
675				child[c] = holey;
676			}
677		}
678
679		/*
680		 * Look for any missing top-level vdevs.  If this is the case,
681		 * create a faked up 'missing' vdev as a placeholder.  We cannot
682		 * simply compress the child array, because the kernel performs
683		 * certain checks to make sure the vdev IDs match their location
684		 * in the configuration.
685		 */
686		for (c = 0; c < children; c++) {
687			if (child[c] == NULL) {
688				nvlist_t *missing;
689				if (nvlist_alloc(&missing, NV_UNIQUE_NAME,
690				    0) != 0)
691					goto nomem;
692				if (nvlist_add_string(missing,
693				    ZPOOL_CONFIG_TYPE,
694				    VDEV_TYPE_MISSING) != 0 ||
695				    nvlist_add_uint64(missing,
696				    ZPOOL_CONFIG_ID, c) != 0 ||
697				    nvlist_add_uint64(missing,
698				    ZPOOL_CONFIG_GUID, 0ULL) != 0) {
699					nvlist_free(missing);
700					goto nomem;
701				}
702				child[c] = missing;
703			}
704		}
705
706		/*
707		 * Put all of this pool's top-level vdevs into a root vdev.
708		 */
709		if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0)
710			goto nomem;
711		if (nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
712		    VDEV_TYPE_ROOT) != 0 ||
713		    nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) != 0 ||
714		    nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, guid) != 0 ||
715		    nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
716		    child, children) != 0) {
717			nvlist_free(nvroot);
718			goto nomem;
719		}
720
721		for (c = 0; c < children; c++)
722			nvlist_free(child[c]);
723		free(child);
724		children = 0;
725		child = NULL;
726
727		/*
728		 * Go through and fix up any paths and/or devids based on our
729		 * known list of vdev GUID -> path mappings.
730		 */
731		if (fix_paths(nvroot, pl->names) != 0) {
732			nvlist_free(nvroot);
733			goto nomem;
734		}
735
736		/*
737		 * Add the root vdev to this pool's configuration.
738		 */
739		if (nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
740		    nvroot) != 0) {
741			nvlist_free(nvroot);
742			goto nomem;
743		}
744		nvlist_free(nvroot);
745
746		/*
747		 * zdb uses this path to report on active pools that were
748		 * imported or created using -R.
749		 */
750		if (active_ok)
751			goto add_pool;
752
753		/*
754		 * Determine if this pool is currently active, in which case we
755		 * can't actually import it.
756		 */
757		verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
758		    &name) == 0);
759		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
760		    &guid) == 0);
761
762		if (pool_active(hdl, name, guid, &isactive) != 0)
763			goto error;
764
765		if (isactive) {
766			nvlist_free(config);
767			config = NULL;
768			continue;
769		}
770
771		if ((nvl = refresh_config(hdl, config)) == NULL) {
772			nvlist_free(config);
773			config = NULL;
774			continue;
775		}
776
777		nvlist_free(config);
778		config = nvl;
779
780		/*
781		 * Go through and update the paths for spares, now that we have
782		 * them.
783		 */
784		verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
785		    &nvroot) == 0);
786		if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
787		    &spares, &nspares) == 0) {
788			for (i = 0; i < nspares; i++) {
789				if (fix_paths(spares[i], pl->names) != 0)
790					goto nomem;
791			}
792		}
793
794		/*
795		 * Update the paths for l2cache devices.
796		 */
797		if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
798		    &l2cache, &nl2cache) == 0) {
799			for (i = 0; i < nl2cache; i++) {
800				if (fix_paths(l2cache[i], pl->names) != 0)
801					goto nomem;
802			}
803		}
804
805		/*
806		 * Restore the original information read from the actual label.
807		 */
808		(void) nvlist_remove(config, ZPOOL_CONFIG_HOSTID,
809		    DATA_TYPE_UINT64);
810		(void) nvlist_remove(config, ZPOOL_CONFIG_HOSTNAME,
811		    DATA_TYPE_STRING);
812		if (hostid != 0) {
813			verify(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID,
814			    hostid) == 0);
815			verify(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME,
816			    hostname) == 0);
817		}
818
819add_pool:
820		/*
821		 * Add this pool to the list of configs.
822		 */
823		verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
824		    &name) == 0);
825		if (nvlist_add_nvlist(ret, name, config) != 0)
826			goto nomem;
827
828		found_one = B_TRUE;
829		nvlist_free(config);
830		config = NULL;
831	}
832
833	if (!found_one) {
834		nvlist_free(ret);
835		ret = NULL;
836	}
837
838	return (ret);
839
840nomem:
841	(void) no_memory(hdl);
842error:
843	nvlist_free(config);
844	nvlist_free(ret);
845	for (c = 0; c < children; c++)
846		nvlist_free(child[c]);
847	free(child);
848
849	return (NULL);
850}
851
852/*
853 * Return the offset of the given label.
854 */
855static uint64_t
856label_offset(uint64_t size, int l)
857{
858	ASSERT(P2PHASE_TYPED(size, sizeof (vdev_label_t), uint64_t) == 0);
859	return (l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ?
860	    0 : size - VDEV_LABELS * sizeof (vdev_label_t)));
861}
862
863/*
864 * Given a file descriptor, read the label information and return an nvlist
865 * describing the configuration, if there is one.
866 */
867int
868zpool_read_label(int fd, nvlist_t **config)
869{
870	struct stat64 statbuf;
871	int l;
872	vdev_label_t *label;
873	uint64_t state, txg, size;
874
875	*config = NULL;
876
877	if (fstat64(fd, &statbuf) == -1)
878		return (0);
879	size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
880
881	if ((label = malloc(sizeof (vdev_label_t))) == NULL)
882		return (-1);
883
884	for (l = 0; l < VDEV_LABELS; l++) {
885		if (pread64(fd, label, sizeof (vdev_label_t),
886		    label_offset(size, l)) != sizeof (vdev_label_t))
887			continue;
888
889		if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist,
890		    sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0)
891			continue;
892
893		if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
894		    &state) != 0 || state > POOL_STATE_L2CACHE) {
895			nvlist_free(*config);
896			continue;
897		}
898
899		if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
900		    (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
901		    &txg) != 0 || txg == 0)) {
902			nvlist_free(*config);
903			continue;
904		}
905
906		free(label);
907		return (0);
908	}
909
910	free(label);
911	*config = NULL;
912	return (0);
913}
914
915typedef struct rdsk_node {
916	char *rn_name;
917	int rn_dfd;
918	libzfs_handle_t *rn_hdl;
919	nvlist_t *rn_config;
920	avl_tree_t *rn_avl;
921	avl_node_t rn_node;
922	boolean_t rn_nozpool;
923} rdsk_node_t;
924
925static int
926slice_cache_compare(const void *arg1, const void *arg2)
927{
928	const char  *nm1 = ((rdsk_node_t *)arg1)->rn_name;
929	const char  *nm2 = ((rdsk_node_t *)arg2)->rn_name;
930	char *nm1slice, *nm2slice;
931	int rv;
932
933	/*
934	 * slices zero and two are the most likely to provide results,
935	 * so put those first
936	 */
937	nm1slice = strstr(nm1, "s0");
938	nm2slice = strstr(nm2, "s0");
939	if (nm1slice && !nm2slice) {
940		return (-1);
941	}
942	if (!nm1slice && nm2slice) {
943		return (1);
944	}
945	nm1slice = strstr(nm1, "s2");
946	nm2slice = strstr(nm2, "s2");
947	if (nm1slice && !nm2slice) {
948		return (-1);
949	}
950	if (!nm1slice && nm2slice) {
951		return (1);
952	}
953
954	rv = strcmp(nm1, nm2);
955	if (rv == 0)
956		return (0);
957	return (rv > 0 ? 1 : -1);
958}
959
960#ifdef sun
961static void
962check_one_slice(avl_tree_t *r, char *diskname, uint_t partno,
963    diskaddr_t size, uint_t blksz)
964{
965	rdsk_node_t tmpnode;
966	rdsk_node_t *node;
967	char sname[MAXNAMELEN];
968
969	tmpnode.rn_name = &sname[0];
970	(void) snprintf(tmpnode.rn_name, MAXNAMELEN, "%s%u",
971	    diskname, partno);
972	/*
973	 * protect against division by zero for disk labels that
974	 * contain a bogus sector size
975	 */
976	if (blksz == 0)
977		blksz = DEV_BSIZE;
978	/* too small to contain a zpool? */
979	if ((size < (SPA_MINDEVSIZE / blksz)) &&
980	    (node = avl_find(r, &tmpnode, NULL)))
981		node->rn_nozpool = B_TRUE;
982}
983#endif	/* sun */
984
985static void
986nozpool_all_slices(avl_tree_t *r, const char *sname)
987{
988#ifdef sun
989	char diskname[MAXNAMELEN];
990	char *ptr;
991	int i;
992
993	(void) strncpy(diskname, sname, MAXNAMELEN);
994	if (((ptr = strrchr(diskname, 's')) == NULL) &&
995	    ((ptr = strrchr(diskname, 'p')) == NULL))
996		return;
997	ptr[0] = 's';
998	ptr[1] = '\0';
999	for (i = 0; i < NDKMAP; i++)
1000		check_one_slice(r, diskname, i, 0, 1);
1001	ptr[0] = 'p';
1002	for (i = 0; i <= FD_NUMPART; i++)
1003		check_one_slice(r, diskname, i, 0, 1);
1004#endif	/* sun */
1005}
1006
1007#ifdef sun
1008static void
1009check_slices(avl_tree_t *r, int fd, const char *sname)
1010{
1011	struct extvtoc vtoc;
1012	struct dk_gpt *gpt;
1013	char diskname[MAXNAMELEN];
1014	char *ptr;
1015	int i;
1016
1017	(void) strncpy(diskname, sname, MAXNAMELEN);
1018	if ((ptr = strrchr(diskname, 's')) == NULL || !isdigit(ptr[1]))
1019		return;
1020	ptr[1] = '\0';
1021
1022	if (read_extvtoc(fd, &vtoc) >= 0) {
1023		for (i = 0; i < NDKMAP; i++)
1024			check_one_slice(r, diskname, i,
1025			    vtoc.v_part[i].p_size, vtoc.v_sectorsz);
1026	} else if (efi_alloc_and_read(fd, &gpt) >= 0) {
1027		/*
1028		 * on x86 we'll still have leftover links that point
1029		 * to slices s[9-15], so use NDKMAP instead
1030		 */
1031		for (i = 0; i < NDKMAP; i++)
1032			check_one_slice(r, diskname, i,
1033			    gpt->efi_parts[i].p_size, gpt->efi_lbasize);
1034		/* nodes p[1-4] are never used with EFI labels */
1035		ptr[0] = 'p';
1036		for (i = 1; i <= FD_NUMPART; i++)
1037			check_one_slice(r, diskname, i, 0, 1);
1038		efi_free(gpt);
1039	}
1040}
1041#endif	/* sun */
1042
1043static void
1044zpool_open_func(void *arg)
1045{
1046	rdsk_node_t *rn = arg;
1047	struct stat64 statbuf;
1048	nvlist_t *config;
1049	int fd;
1050
1051	if (rn->rn_nozpool)
1052		return;
1053	if ((fd = openat64(rn->rn_dfd, rn->rn_name, O_RDONLY)) < 0) {
1054		/* symlink to a device that's no longer there */
1055		if (errno == ENOENT)
1056			nozpool_all_slices(rn->rn_avl, rn->rn_name);
1057		return;
1058	}
1059	/*
1060	 * Ignore failed stats.  We only want regular
1061	 * files, character devs and block devs.
1062	 */
1063	if (fstat64(fd, &statbuf) != 0 ||
1064	    (!S_ISREG(statbuf.st_mode) &&
1065	    !S_ISCHR(statbuf.st_mode) &&
1066	    !S_ISBLK(statbuf.st_mode))) {
1067		(void) close(fd);
1068		return;
1069	}
1070	/* this file is too small to hold a zpool */
1071#ifdef sun
1072	if (S_ISREG(statbuf.st_mode) &&
1073	    statbuf.st_size < SPA_MINDEVSIZE) {
1074		(void) close(fd);
1075		return;
1076	} else if (!S_ISREG(statbuf.st_mode)) {
1077		/*
1078		 * Try to read the disk label first so we don't have to
1079		 * open a bunch of minor nodes that can't have a zpool.
1080		 */
1081		check_slices(rn->rn_avl, fd, rn->rn_name);
1082	}
1083#else	/* !sun */
1084	if (statbuf.st_size < SPA_MINDEVSIZE) {
1085		(void) close(fd);
1086		return;
1087	}
1088#endif	/* sun */
1089
1090	if ((zpool_read_label(fd, &config)) != 0) {
1091		(void) close(fd);
1092		(void) no_memory(rn->rn_hdl);
1093		return;
1094	}
1095	(void) close(fd);
1096
1097
1098	rn->rn_config = config;
1099	if (config != NULL) {
1100		assert(rn->rn_nozpool == B_FALSE);
1101	}
1102}
1103
1104/*
1105 * Given a file descriptor, clear (zero) the label information.  This function
1106 * is used in the appliance stack as part of the ZFS sysevent module and
1107 * to implement the "zpool labelclear" command.
1108 */
1109int
1110zpool_clear_label(int fd)
1111{
1112	struct stat64 statbuf;
1113	int l;
1114	vdev_label_t *label;
1115	uint64_t size;
1116
1117	if (fstat64(fd, &statbuf) == -1)
1118		return (0);
1119	size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
1120
1121	if ((label = calloc(sizeof (vdev_label_t), 1)) == NULL)
1122		return (-1);
1123
1124	for (l = 0; l < VDEV_LABELS; l++) {
1125		if (pwrite64(fd, label, sizeof (vdev_label_t),
1126		    label_offset(size, l)) != sizeof (vdev_label_t)) {
1127			free(label);
1128			return (-1);
1129		}
1130	}
1131
1132	free(label);
1133	return (0);
1134}
1135
1136/*
1137 * Given a list of directories to search, find all pools stored on disk.  This
1138 * includes partial pools which are not available to import.  If no args are
1139 * given (argc is 0), then the default directory (/dev/dsk) is searched.
1140 * poolname or guid (but not both) are provided by the caller when trying
1141 * to import a specific pool.
1142 */
1143static nvlist_t *
1144zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg)
1145{
1146	int i, dirs = iarg->paths;
1147	struct dirent64 *dp;
1148	char path[MAXPATHLEN];
1149	char *end, **dir = iarg->path;
1150	size_t pathleft;
1151	nvlist_t *ret = NULL;
1152	static char *default_dir = "/dev";
1153	pool_list_t pools = { 0 };
1154	pool_entry_t *pe, *penext;
1155	vdev_entry_t *ve, *venext;
1156	config_entry_t *ce, *cenext;
1157	name_entry_t *ne, *nenext;
1158	avl_tree_t slice_cache;
1159	rdsk_node_t *slice;
1160	void *cookie;
1161
1162	if (dirs == 0) {
1163		dirs = 1;
1164		dir = &default_dir;
1165	}
1166
1167	/*
1168	 * Go through and read the label configuration information from every
1169	 * possible device, organizing the information according to pool GUID
1170	 * and toplevel GUID.
1171	 */
1172	for (i = 0; i < dirs; i++) {
1173		tpool_t *t;
1174		char *rdsk;
1175		int dfd;
1176		boolean_t config_failed = B_FALSE;
1177		DIR *dirp;
1178
1179		/* use realpath to normalize the path */
1180		if (realpath(dir[i], path) == 0) {
1181			(void) zfs_error_fmt(hdl, EZFS_BADPATH,
1182			    dgettext(TEXT_DOMAIN, "cannot open '%s'"), dir[i]);
1183			goto error;
1184		}
1185		end = &path[strlen(path)];
1186		*end++ = '/';
1187		*end = 0;
1188		pathleft = &path[sizeof (path)] - end;
1189
1190		/*
1191		 * Using raw devices instead of block devices when we're
1192		 * reading the labels skips a bunch of slow operations during
1193		 * close(2) processing, so we replace /dev/dsk with /dev/rdsk.
1194		 */
1195		if (strcmp(path, "/dev/dsk/") == 0)
1196			rdsk = "/dev/";
1197		else
1198			rdsk = path;
1199
1200		if ((dfd = open64(rdsk, O_RDONLY)) < 0 ||
1201		    (dirp = fdopendir(dfd)) == NULL) {
1202			if (dfd >= 0)
1203				(void) close(dfd);
1204			zfs_error_aux(hdl, strerror(errno));
1205			(void) zfs_error_fmt(hdl, EZFS_BADPATH,
1206			    dgettext(TEXT_DOMAIN, "cannot open '%s'"),
1207			    rdsk);
1208			goto error;
1209		}
1210
1211		avl_create(&slice_cache, slice_cache_compare,
1212		    sizeof (rdsk_node_t), offsetof(rdsk_node_t, rn_node));
1213
1214		if (strcmp(rdsk, "/dev/") == 0) {
1215			struct gmesh mesh;
1216			struct gclass *mp;
1217			struct ggeom *gp;
1218			struct gprovider *pp;
1219
1220			errno = geom_gettree(&mesh);
1221			if (errno != 0) {
1222				zfs_error_aux(hdl, strerror(errno));
1223				(void) zfs_error_fmt(hdl, EZFS_BADPATH,
1224				    dgettext(TEXT_DOMAIN, "cannot get GEOM tree"));
1225				goto error;
1226			}
1227
1228			LIST_FOREACH(mp, &mesh.lg_class, lg_class) {
1229		        	LIST_FOREACH(gp, &mp->lg_geom, lg_geom) {
1230					LIST_FOREACH(pp, &gp->lg_provider, lg_provider) {
1231						slice = zfs_alloc(hdl, sizeof (rdsk_node_t));
1232						slice->rn_name = zfs_strdup(hdl, pp->lg_name);
1233						slice->rn_avl = &slice_cache;
1234						slice->rn_dfd = dfd;
1235						slice->rn_hdl = hdl;
1236						slice->rn_nozpool = B_FALSE;
1237						avl_add(&slice_cache, slice);
1238					}
1239				}
1240			}
1241
1242			geom_deletetree(&mesh);
1243			goto skipdir;
1244		}
1245
1246		/*
1247		 * This is not MT-safe, but we have no MT consumers of libzfs
1248		 */
1249		while ((dp = readdir64(dirp)) != NULL) {
1250			const char *name = dp->d_name;
1251			if (name[0] == '.' &&
1252			    (name[1] == 0 || (name[1] == '.' && name[2] == 0)))
1253				continue;
1254
1255			slice = zfs_alloc(hdl, sizeof (rdsk_node_t));
1256			slice->rn_name = zfs_strdup(hdl, name);
1257			slice->rn_avl = &slice_cache;
1258			slice->rn_dfd = dfd;
1259			slice->rn_hdl = hdl;
1260			slice->rn_nozpool = B_FALSE;
1261			avl_add(&slice_cache, slice);
1262		}
1263skipdir:
1264		/*
1265		 * create a thread pool to do all of this in parallel;
1266		 * rn_nozpool is not protected, so this is racy in that
1267		 * multiple tasks could decide that the same slice can
1268		 * not hold a zpool, which is benign.  Also choose
1269		 * double the number of processors; we hold a lot of
1270		 * locks in the kernel, so going beyond this doesn't
1271		 * buy us much.
1272		 */
1273		t = tpool_create(1, 2 * sysconf(_SC_NPROCESSORS_ONLN),
1274		    0, NULL);
1275		for (slice = avl_first(&slice_cache); slice;
1276		    (slice = avl_walk(&slice_cache, slice,
1277		    AVL_AFTER)))
1278			(void) tpool_dispatch(t, zpool_open_func, slice);
1279		tpool_wait(t);
1280		tpool_destroy(t);
1281
1282		cookie = NULL;
1283		while ((slice = avl_destroy_nodes(&slice_cache,
1284		    &cookie)) != NULL) {
1285			if (slice->rn_config != NULL && !config_failed) {
1286				nvlist_t *config = slice->rn_config;
1287				boolean_t matched = B_TRUE;
1288
1289				if (iarg->poolname != NULL) {
1290					char *pname;
1291
1292					matched = nvlist_lookup_string(config,
1293					    ZPOOL_CONFIG_POOL_NAME,
1294					    &pname) == 0 &&
1295					    strcmp(iarg->poolname, pname) == 0;
1296				} else if (iarg->guid != 0) {
1297					uint64_t this_guid;
1298
1299					matched = nvlist_lookup_uint64(config,
1300					    ZPOOL_CONFIG_POOL_GUID,
1301					    &this_guid) == 0 &&
1302					    iarg->guid == this_guid;
1303				}
1304				if (!matched) {
1305					nvlist_free(config);
1306				} else {
1307					/*
1308					 * use the non-raw path for the config
1309					 */
1310					(void) strlcpy(end, slice->rn_name,
1311					    pathleft);
1312					if (add_config(hdl, &pools, path,
1313					    config) != 0)
1314						config_failed = B_TRUE;
1315				}
1316			}
1317			free(slice->rn_name);
1318			free(slice);
1319		}
1320		avl_destroy(&slice_cache);
1321
1322		(void) closedir(dirp);
1323
1324		if (config_failed)
1325			goto error;
1326	}
1327
1328	ret = get_configs(hdl, &pools, iarg->can_be_active);
1329
1330error:
1331	for (pe = pools.pools; pe != NULL; pe = penext) {
1332		penext = pe->pe_next;
1333		for (ve = pe->pe_vdevs; ve != NULL; ve = venext) {
1334			venext = ve->ve_next;
1335			for (ce = ve->ve_configs; ce != NULL; ce = cenext) {
1336				cenext = ce->ce_next;
1337				if (ce->ce_config)
1338					nvlist_free(ce->ce_config);
1339				free(ce);
1340			}
1341			free(ve);
1342		}
1343		free(pe);
1344	}
1345
1346	for (ne = pools.names; ne != NULL; ne = nenext) {
1347		nenext = ne->ne_next;
1348		free(ne->ne_name);
1349		free(ne);
1350	}
1351
1352	return (ret);
1353}
1354
1355nvlist_t *
1356zpool_find_import(libzfs_handle_t *hdl, int argc, char **argv)
1357{
1358	importargs_t iarg = { 0 };
1359
1360	iarg.paths = argc;
1361	iarg.path = argv;
1362
1363	return (zpool_find_import_impl(hdl, &iarg));
1364}
1365
1366/*
1367 * Given a cache file, return the contents as a list of importable pools.
1368 * poolname or guid (but not both) are provided by the caller when trying
1369 * to import a specific pool.
1370 */
1371nvlist_t *
1372zpool_find_import_cached(libzfs_handle_t *hdl, const char *cachefile,
1373    char *poolname, uint64_t guid)
1374{
1375	char *buf;
1376	int fd;
1377	struct stat64 statbuf;
1378	nvlist_t *raw, *src, *dst;
1379	nvlist_t *pools;
1380	nvpair_t *elem;
1381	char *name;
1382	uint64_t this_guid;
1383	boolean_t active;
1384
1385	verify(poolname == NULL || guid == 0);
1386
1387	if ((fd = open(cachefile, O_RDONLY)) < 0) {
1388		zfs_error_aux(hdl, "%s", strerror(errno));
1389		(void) zfs_error(hdl, EZFS_BADCACHE,
1390		    dgettext(TEXT_DOMAIN, "failed to open cache file"));
1391		return (NULL);
1392	}
1393
1394	if (fstat64(fd, &statbuf) != 0) {
1395		zfs_error_aux(hdl, "%s", strerror(errno));
1396		(void) close(fd);
1397		(void) zfs_error(hdl, EZFS_BADCACHE,
1398		    dgettext(TEXT_DOMAIN, "failed to get size of cache file"));
1399		return (NULL);
1400	}
1401
1402	if ((buf = zfs_alloc(hdl, statbuf.st_size)) == NULL) {
1403		(void) close(fd);
1404		return (NULL);
1405	}
1406
1407	if (read(fd, buf, statbuf.st_size) != statbuf.st_size) {
1408		(void) close(fd);
1409		free(buf);
1410		(void) zfs_error(hdl, EZFS_BADCACHE,
1411		    dgettext(TEXT_DOMAIN,
1412		    "failed to read cache file contents"));
1413		return (NULL);
1414	}
1415
1416	(void) close(fd);
1417
1418	if (nvlist_unpack(buf, statbuf.st_size, &raw, 0) != 0) {
1419		free(buf);
1420		(void) zfs_error(hdl, EZFS_BADCACHE,
1421		    dgettext(TEXT_DOMAIN,
1422		    "invalid or corrupt cache file contents"));
1423		return (NULL);
1424	}
1425
1426	free(buf);
1427
1428	/*
1429	 * Go through and get the current state of the pools and refresh their
1430	 * state.
1431	 */
1432	if (nvlist_alloc(&pools, 0, 0) != 0) {
1433		(void) no_memory(hdl);
1434		nvlist_free(raw);
1435		return (NULL);
1436	}
1437
1438	elem = NULL;
1439	while ((elem = nvlist_next_nvpair(raw, elem)) != NULL) {
1440		src = fnvpair_value_nvlist(elem);
1441
1442		name = fnvlist_lookup_string(src, ZPOOL_CONFIG_POOL_NAME);
1443		if (poolname != NULL && strcmp(poolname, name) != 0)
1444			continue;
1445
1446		this_guid = fnvlist_lookup_uint64(src, ZPOOL_CONFIG_POOL_GUID);
1447		if (guid != 0 && guid != this_guid)
1448			continue;
1449
1450		if (pool_active(hdl, name, this_guid, &active) != 0) {
1451			nvlist_free(raw);
1452			nvlist_free(pools);
1453			return (NULL);
1454		}
1455
1456		if (active)
1457			continue;
1458
1459		if ((dst = refresh_config(hdl, src)) == NULL) {
1460			nvlist_free(raw);
1461			nvlist_free(pools);
1462			return (NULL);
1463		}
1464
1465		if (nvlist_add_nvlist(pools, nvpair_name(elem), dst) != 0) {
1466			(void) no_memory(hdl);
1467			nvlist_free(dst);
1468			nvlist_free(raw);
1469			nvlist_free(pools);
1470			return (NULL);
1471		}
1472		nvlist_free(dst);
1473	}
1474
1475	nvlist_free(raw);
1476	return (pools);
1477}
1478
1479static int
1480name_or_guid_exists(zpool_handle_t *zhp, void *data)
1481{
1482	importargs_t *import = data;
1483	int found = 0;
1484
1485	if (import->poolname != NULL) {
1486		char *pool_name;
1487
1488		verify(nvlist_lookup_string(zhp->zpool_config,
1489		    ZPOOL_CONFIG_POOL_NAME, &pool_name) == 0);
1490		if (strcmp(pool_name, import->poolname) == 0)
1491			found = 1;
1492	} else {
1493		uint64_t pool_guid;
1494
1495		verify(nvlist_lookup_uint64(zhp->zpool_config,
1496		    ZPOOL_CONFIG_POOL_GUID, &pool_guid) == 0);
1497		if (pool_guid == import->guid)
1498			found = 1;
1499	}
1500
1501	zpool_close(zhp);
1502	return (found);
1503}
1504
1505nvlist_t *
1506zpool_search_import(libzfs_handle_t *hdl, importargs_t *import)
1507{
1508	verify(import->poolname == NULL || import->guid == 0);
1509
1510	if (import->unique)
1511		import->exists = zpool_iter(hdl, name_or_guid_exists, import);
1512
1513	if (import->cachefile != NULL)
1514		return (zpool_find_import_cached(hdl, import->cachefile,
1515		    import->poolname, import->guid));
1516
1517	return (zpool_find_import_impl(hdl, import));
1518}
1519
1520boolean_t
1521find_guid(nvlist_t *nv, uint64_t guid)
1522{
1523	uint64_t tmp;
1524	nvlist_t **child;
1525	uint_t c, children;
1526
1527	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &tmp) == 0);
1528	if (tmp == guid)
1529		return (B_TRUE);
1530
1531	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
1532	    &child, &children) == 0) {
1533		for (c = 0; c < children; c++)
1534			if (find_guid(child[c], guid))
1535				return (B_TRUE);
1536	}
1537
1538	return (B_FALSE);
1539}
1540
1541typedef struct aux_cbdata {
1542	const char	*cb_type;
1543	uint64_t	cb_guid;
1544	zpool_handle_t	*cb_zhp;
1545} aux_cbdata_t;
1546
1547static int
1548find_aux(zpool_handle_t *zhp, void *data)
1549{
1550	aux_cbdata_t *cbp = data;
1551	nvlist_t **list;
1552	uint_t i, count;
1553	uint64_t guid;
1554	nvlist_t *nvroot;
1555
1556	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
1557	    &nvroot) == 0);
1558
1559	if (nvlist_lookup_nvlist_array(nvroot, cbp->cb_type,
1560	    &list, &count) == 0) {
1561		for (i = 0; i < count; i++) {
1562			verify(nvlist_lookup_uint64(list[i],
1563			    ZPOOL_CONFIG_GUID, &guid) == 0);
1564			if (guid == cbp->cb_guid) {
1565				cbp->cb_zhp = zhp;
1566				return (1);
1567			}
1568		}
1569	}
1570
1571	zpool_close(zhp);
1572	return (0);
1573}
1574
1575/*
1576 * Determines if the pool is in use.  If so, it returns true and the state of
1577 * the pool as well as the name of the pool.  Both strings are allocated and
1578 * must be freed by the caller.
1579 */
1580int
1581zpool_in_use(libzfs_handle_t *hdl, int fd, pool_state_t *state, char **namestr,
1582    boolean_t *inuse)
1583{
1584	nvlist_t *config;
1585	char *name;
1586	boolean_t ret;
1587	uint64_t guid, vdev_guid;
1588	zpool_handle_t *zhp;
1589	nvlist_t *pool_config;
1590	uint64_t stateval, isspare;
1591	aux_cbdata_t cb = { 0 };
1592	boolean_t isactive;
1593
1594	*inuse = B_FALSE;
1595
1596	if (zpool_read_label(fd, &config) != 0) {
1597		(void) no_memory(hdl);
1598		return (-1);
1599	}
1600
1601	if (config == NULL)
1602		return (0);
1603
1604	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
1605	    &stateval) == 0);
1606	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID,
1607	    &vdev_guid) == 0);
1608
1609	if (stateval != POOL_STATE_SPARE && stateval != POOL_STATE_L2CACHE) {
1610		verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
1611		    &name) == 0);
1612		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
1613		    &guid) == 0);
1614	}
1615
1616	switch (stateval) {
1617	case POOL_STATE_EXPORTED:
1618		/*
1619		 * A pool with an exported state may in fact be imported
1620		 * read-only, so check the in-core state to see if it's
1621		 * active and imported read-only.  If it is, set
1622		 * its state to active.
1623		 */
1624		if (pool_active(hdl, name, guid, &isactive) == 0 && isactive &&
1625		    (zhp = zpool_open_canfail(hdl, name)) != NULL) {
1626			if (zpool_get_prop_int(zhp, ZPOOL_PROP_READONLY, NULL))
1627				stateval = POOL_STATE_ACTIVE;
1628
1629			/*
1630			 * All we needed the zpool handle for is the
1631			 * readonly prop check.
1632			 */
1633			zpool_close(zhp);
1634		}
1635
1636		ret = B_TRUE;
1637		break;
1638
1639	case POOL_STATE_ACTIVE:
1640		/*
1641		 * For an active pool, we have to determine if it's really part
1642		 * of a currently active pool (in which case the pool will exist
1643		 * and the guid will be the same), or whether it's part of an
1644		 * active pool that was disconnected without being explicitly
1645		 * exported.
1646		 */
1647		if (pool_active(hdl, name, guid, &isactive) != 0) {
1648			nvlist_free(config);
1649			return (-1);
1650		}
1651
1652		if (isactive) {
1653			/*
1654			 * Because the device may have been removed while
1655			 * offlined, we only report it as active if the vdev is
1656			 * still present in the config.  Otherwise, pretend like
1657			 * it's not in use.
1658			 */
1659			if ((zhp = zpool_open_canfail(hdl, name)) != NULL &&
1660			    (pool_config = zpool_get_config(zhp, NULL))
1661			    != NULL) {
1662				nvlist_t *nvroot;
1663
1664				verify(nvlist_lookup_nvlist(pool_config,
1665				    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
1666				ret = find_guid(nvroot, vdev_guid);
1667			} else {
1668				ret = B_FALSE;
1669			}
1670
1671			/*
1672			 * If this is an active spare within another pool, we
1673			 * treat it like an unused hot spare.  This allows the
1674			 * user to create a pool with a hot spare that currently
1675			 * in use within another pool.  Since we return B_TRUE,
1676			 * libdiskmgt will continue to prevent generic consumers
1677			 * from using the device.
1678			 */
1679			if (ret && nvlist_lookup_uint64(config,
1680			    ZPOOL_CONFIG_IS_SPARE, &isspare) == 0 && isspare)
1681				stateval = POOL_STATE_SPARE;
1682
1683			if (zhp != NULL)
1684				zpool_close(zhp);
1685		} else {
1686			stateval = POOL_STATE_POTENTIALLY_ACTIVE;
1687			ret = B_TRUE;
1688		}
1689		break;
1690
1691	case POOL_STATE_SPARE:
1692		/*
1693		 * For a hot spare, it can be either definitively in use, or
1694		 * potentially active.  To determine if it's in use, we iterate
1695		 * over all pools in the system and search for one with a spare
1696		 * with a matching guid.
1697		 *
1698		 * Due to the shared nature of spares, we don't actually report
1699		 * the potentially active case as in use.  This means the user
1700		 * can freely create pools on the hot spares of exported pools,
1701		 * but to do otherwise makes the resulting code complicated, and
1702		 * we end up having to deal with this case anyway.
1703		 */
1704		cb.cb_zhp = NULL;
1705		cb.cb_guid = vdev_guid;
1706		cb.cb_type = ZPOOL_CONFIG_SPARES;
1707		if (zpool_iter(hdl, find_aux, &cb) == 1) {
1708			name = (char *)zpool_get_name(cb.cb_zhp);
1709			ret = B_TRUE;
1710		} else {
1711			ret = B_FALSE;
1712		}
1713		break;
1714
1715	case POOL_STATE_L2CACHE:
1716
1717		/*
1718		 * Check if any pool is currently using this l2cache device.
1719		 */
1720		cb.cb_zhp = NULL;
1721		cb.cb_guid = vdev_guid;
1722		cb.cb_type = ZPOOL_CONFIG_L2CACHE;
1723		if (zpool_iter(hdl, find_aux, &cb) == 1) {
1724			name = (char *)zpool_get_name(cb.cb_zhp);
1725			ret = B_TRUE;
1726		} else {
1727			ret = B_FALSE;
1728		}
1729		break;
1730
1731	default:
1732		ret = B_FALSE;
1733	}
1734
1735
1736	if (ret) {
1737		if ((*namestr = zfs_strdup(hdl, name)) == NULL) {
1738			if (cb.cb_zhp)
1739				zpool_close(cb.cb_zhp);
1740			nvlist_free(config);
1741			return (-1);
1742		}
1743		*state = (pool_state_t)stateval;
1744	}
1745
1746	if (cb.cb_zhp)
1747		zpool_close(cb.cb_zhp);
1748
1749	nvlist_free(config);
1750	*inuse = ret;
1751	return (0);
1752}
1753