1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
25 * Copyright 2015 RackTop Systems.
26 * Copyright (c) 2016, Intel Corporation.
27 */
28
29/*
30 * Pool import support functions.
31 *
32 * Used by zpool, ztest, zdb, and zhack to locate importable configs. Since
33 * these commands are expected to run in the global zone, we can assume
34 * that the devices are all readable when called.
35 *
36 * To import a pool, we rely on reading the configuration information from the
37 * ZFS label of each device.  If we successfully read the label, then we
38 * organize the configuration information in the following hierarchy:
39 *
40 *	pool guid -> toplevel vdev guid -> label txg
41 *
42 * Duplicate entries matching this same tuple will be discarded.  Once we have
43 * examined every device, we pick the best label txg config for each toplevel
44 * vdev.  We then arrange these toplevel vdevs into a complete pool config, and
45 * update any paths that have changed.  Finally, we attempt to import the pool
46 * using our derived config, and record the results.
47 */
48
49#include <ctype.h>
50#include <dirent.h>
51#include <errno.h>
52#include <libintl.h>
53#include <libgen.h>
54#include <stddef.h>
55#include <stdlib.h>
56#include <stdio.h>
57#include <string.h>
58#include <sys/stat.h>
59#include <unistd.h>
60#include <fcntl.h>
61#include <sys/dktp/fdisk.h>
62#include <sys/vdev_impl.h>
63#include <sys/fs/zfs.h>
64
65#include <thread_pool.h>
66#include <libzutil.h>
67#include <libnvpair.h>
68#include <libzfs.h>
69
70#include "zutil_import.h"
71
72#ifdef HAVE_LIBUDEV
73#include <libudev.h>
74#include <sched.h>
75#endif
76#include <blkid/blkid.h>
77
78#define	DEV_BYID_PATH	"/dev/disk/by-id/"
79
80/*
81 * Skip devices with well known prefixes:
82 * there can be side effects when opening devices which need to be avoided.
83 *
84 * hpet        - High Precision Event Timer
85 * watchdog[N] - Watchdog must be closed in a special way.
86 */
87static boolean_t
88should_skip_dev(const char *dev)
89{
90	return ((strcmp(dev, "watchdog") == 0) ||
91	    (strncmp(dev, "watchdog", 8) == 0 && isdigit(dev[8])) ||
92	    (strcmp(dev, "hpet") == 0));
93}
94
95int
96zfs_dev_flush(int fd)
97{
98	return (ioctl(fd, BLKFLSBUF));
99}
100
101void
102zpool_open_func(void *arg)
103{
104	rdsk_node_t *rn = arg;
105	libpc_handle_t *hdl = rn->rn_hdl;
106	struct stat64 statbuf;
107	nvlist_t *config;
108	uint64_t vdev_guid = 0;
109	int error;
110	int num_labels = 0;
111	int fd;
112
113	if (should_skip_dev(zfs_basename(rn->rn_name)))
114		return;
115
116	/*
117	 * Ignore failed stats.  We only want regular files and block devices.
118	 * Ignore files that are too small to hold a zpool.
119	 */
120	if (stat64(rn->rn_name, &statbuf) != 0 ||
121	    (!S_ISREG(statbuf.st_mode) && !S_ISBLK(statbuf.st_mode)) ||
122	    (S_ISREG(statbuf.st_mode) && statbuf.st_size < SPA_MINDEVSIZE))
123		return;
124
125	/*
126	 * Preferentially open using O_DIRECT to bypass the block device
127	 * cache which may be stale for multipath devices.  An EINVAL errno
128	 * indicates O_DIRECT is unsupported so fallback to just O_RDONLY.
129	 */
130	fd = open(rn->rn_name, O_RDONLY | O_DIRECT | O_CLOEXEC);
131	if ((fd < 0) && (errno == EINVAL))
132		fd = open(rn->rn_name, O_RDONLY | O_CLOEXEC);
133	if ((fd < 0) && (errno == EACCES))
134		hdl->lpc_open_access_error = B_TRUE;
135	if (fd < 0)
136		return;
137
138	error = zpool_read_label(fd, &config, &num_labels);
139	if (error != 0) {
140		(void) close(fd);
141		return;
142	}
143
144	if (num_labels == 0) {
145		(void) close(fd);
146		nvlist_free(config);
147		return;
148	}
149
150	/*
151	 * Check that the vdev is for the expected guid.  Additional entries
152	 * are speculatively added based on the paths stored in the labels.
153	 * Entries with valid paths but incorrect guids must be removed.
154	 */
155	error = nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid);
156	if (error || (rn->rn_vdev_guid && rn->rn_vdev_guid != vdev_guid)) {
157		(void) close(fd);
158		nvlist_free(config);
159		return;
160	}
161
162	(void) close(fd);
163
164	rn->rn_config = config;
165	rn->rn_num_labels = num_labels;
166
167	/*
168	 * Add additional entries for paths described by this label.
169	 */
170	if (rn->rn_labelpaths) {
171		const char *path = NULL;
172		const char *devid = NULL;
173		rdsk_node_t *slice;
174		avl_index_t where;
175		int error;
176
177		if (label_paths(rn->rn_hdl, rn->rn_config, &path, &devid))
178			return;
179
180		/*
181		 * Allow devlinks to stabilize so all paths are available.
182		 */
183		zpool_disk_wait(rn->rn_name);
184
185		if (path != NULL) {
186			slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
187			slice->rn_name = zutil_strdup(hdl, path);
188			slice->rn_vdev_guid = vdev_guid;
189			slice->rn_avl = rn->rn_avl;
190			slice->rn_hdl = hdl;
191			slice->rn_order = IMPORT_ORDER_PREFERRED_1;
192			slice->rn_labelpaths = B_FALSE;
193			pthread_mutex_lock(rn->rn_lock);
194			if (avl_find(rn->rn_avl, slice, &where)) {
195			pthread_mutex_unlock(rn->rn_lock);
196				free(slice->rn_name);
197				free(slice);
198			} else {
199				avl_insert(rn->rn_avl, slice, where);
200				pthread_mutex_unlock(rn->rn_lock);
201				zpool_open_func(slice);
202			}
203		}
204
205		if (devid != NULL) {
206			slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
207			error = asprintf(&slice->rn_name, "%s%s",
208			    DEV_BYID_PATH, devid);
209			if (error == -1) {
210				free(slice);
211				return;
212			}
213
214			slice->rn_vdev_guid = vdev_guid;
215			slice->rn_avl = rn->rn_avl;
216			slice->rn_hdl = hdl;
217			slice->rn_order = IMPORT_ORDER_PREFERRED_2;
218			slice->rn_labelpaths = B_FALSE;
219			pthread_mutex_lock(rn->rn_lock);
220			if (avl_find(rn->rn_avl, slice, &where)) {
221				pthread_mutex_unlock(rn->rn_lock);
222				free(slice->rn_name);
223				free(slice);
224			} else {
225				avl_insert(rn->rn_avl, slice, where);
226				pthread_mutex_unlock(rn->rn_lock);
227				zpool_open_func(slice);
228			}
229		}
230	}
231}
232
233static const char * const
234zpool_default_import_path[] = {
235	"/dev/disk/by-vdev",	/* Custom rules, use first if they exist */
236	"/dev/mapper",		/* Use multipath devices before components */
237	"/dev/disk/by-partlabel", /* Single unique entry set by user */
238	"/dev/disk/by-partuuid", /* Generated partition uuid */
239	"/dev/disk/by-label",	/* Custom persistent labels */
240	"/dev/disk/by-uuid",	/* Single unique entry and persistent */
241	"/dev/disk/by-id",	/* May be multiple entries and persistent */
242	"/dev/disk/by-path",	/* Encodes physical location and persistent */
243	"/dev"			/* UNSAFE device names will change */
244};
245
246const char * const *
247zpool_default_search_paths(size_t *count)
248{
249	*count = ARRAY_SIZE(zpool_default_import_path);
250	return (zpool_default_import_path);
251}
252
253/*
254 * Given a full path to a device determine if that device appears in the
255 * import search path.  If it does return the first match and store the
256 * index in the passed 'order' variable, otherwise return an error.
257 */
258static int
259zfs_path_order(const char *name, int *order)
260{
261	const char *env = getenv("ZPOOL_IMPORT_PATH");
262
263	if (env) {
264		for (int i = 0; ; ++i) {
265			env += strspn(env, ":");
266			size_t dirlen = strcspn(env, ":");
267			if (dirlen) {
268				if (strncmp(name, env, dirlen) == 0) {
269					*order = i;
270					return (0);
271				}
272
273				env += dirlen;
274			} else
275				break;
276		}
277	} else {
278		for (int i = 0; i < ARRAY_SIZE(zpool_default_import_path);
279		    ++i) {
280			if (strncmp(name, zpool_default_import_path[i],
281			    strlen(zpool_default_import_path[i])) == 0) {
282				*order = i;
283				return (0);
284			}
285		}
286	}
287
288	return (ENOENT);
289}
290
291/*
292 * Use libblkid to quickly enumerate all known zfs devices.
293 */
294int
295zpool_find_import_blkid(libpc_handle_t *hdl, pthread_mutex_t *lock,
296    avl_tree_t **slice_cache)
297{
298	rdsk_node_t *slice;
299	blkid_cache cache;
300	blkid_dev_iterate iter;
301	blkid_dev dev;
302	avl_index_t where;
303	int error;
304
305	*slice_cache = NULL;
306
307	error = blkid_get_cache(&cache, NULL);
308	if (error != 0)
309		return (error);
310
311	error = blkid_probe_all_new(cache);
312	if (error != 0) {
313		blkid_put_cache(cache);
314		return (error);
315	}
316
317	iter = blkid_dev_iterate_begin(cache);
318	if (iter == NULL) {
319		blkid_put_cache(cache);
320		return (EINVAL);
321	}
322
323	/* Only const char *s since 2.32 */
324	error = blkid_dev_set_search(iter,
325	    (char *)"TYPE", (char *)"zfs_member");
326	if (error != 0) {
327		blkid_dev_iterate_end(iter);
328		blkid_put_cache(cache);
329		return (error);
330	}
331
332	*slice_cache = zutil_alloc(hdl, sizeof (avl_tree_t));
333	avl_create(*slice_cache, slice_cache_compare, sizeof (rdsk_node_t),
334	    offsetof(rdsk_node_t, rn_node));
335
336	while (blkid_dev_next(iter, &dev) == 0) {
337		slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
338		slice->rn_name = zutil_strdup(hdl, blkid_dev_devname(dev));
339		slice->rn_vdev_guid = 0;
340		slice->rn_lock = lock;
341		slice->rn_avl = *slice_cache;
342		slice->rn_hdl = hdl;
343		slice->rn_labelpaths = B_TRUE;
344
345		error = zfs_path_order(slice->rn_name, &slice->rn_order);
346		if (error == 0)
347			slice->rn_order += IMPORT_ORDER_SCAN_OFFSET;
348		else
349			slice->rn_order = IMPORT_ORDER_DEFAULT;
350
351		pthread_mutex_lock(lock);
352		if (avl_find(*slice_cache, slice, &where)) {
353			free(slice->rn_name);
354			free(slice);
355		} else {
356			avl_insert(*slice_cache, slice, where);
357		}
358		pthread_mutex_unlock(lock);
359	}
360
361	blkid_dev_iterate_end(iter);
362	blkid_put_cache(cache);
363
364	return (0);
365}
366
367/*
368 * Linux persistent device strings for vdev labels
369 *
370 * based on libudev for consistency with libudev disk add/remove events
371 */
372
373typedef struct vdev_dev_strs {
374	char	vds_devid[128];
375	char	vds_devphys[128];
376} vdev_dev_strs_t;
377
378#ifdef HAVE_LIBUDEV
379
380/*
381 * Obtain the persistent device id string (describes what)
382 *
383 * used by ZED vdev matching for auto-{online,expand,replace}
384 */
385int
386zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen)
387{
388	struct udev_list_entry *entry;
389	const char *bus;
390	char devbyid[MAXPATHLEN];
391
392	/* The bus based by-id path is preferred */
393	bus = udev_device_get_property_value(dev, "ID_BUS");
394
395	if (bus == NULL) {
396		const char *dm_uuid;
397
398		/*
399		 * For multipath nodes use the persistent uuid based identifier
400		 *
401		 * Example: /dev/disk/by-id/dm-uuid-mpath-35000c5006304de3f
402		 */
403		dm_uuid = udev_device_get_property_value(dev, "DM_UUID");
404		if (dm_uuid != NULL) {
405			(void) snprintf(bufptr, buflen, "dm-uuid-%s", dm_uuid);
406			return (0);
407		}
408
409		/*
410		 * For volumes use the persistent /dev/zvol/dataset identifier
411		 */
412		entry = udev_device_get_devlinks_list_entry(dev);
413		while (entry != NULL) {
414			const char *name;
415
416			name = udev_list_entry_get_name(entry);
417			if (strncmp(name, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) {
418				(void) strlcpy(bufptr, name, buflen);
419				return (0);
420			}
421			entry = udev_list_entry_get_next(entry);
422		}
423
424		/*
425		 * NVME 'by-id' symlinks are similar to bus case
426		 */
427		struct udev_device *parent;
428
429		parent = udev_device_get_parent_with_subsystem_devtype(dev,
430		    "nvme", NULL);
431		if (parent != NULL)
432			bus = "nvme";	/* continue with bus symlink search */
433		else
434			return (ENODATA);
435	}
436
437	/*
438	 * locate the bus specific by-id link
439	 */
440	(void) snprintf(devbyid, sizeof (devbyid), "%s%s-", DEV_BYID_PATH, bus);
441	entry = udev_device_get_devlinks_list_entry(dev);
442	while (entry != NULL) {
443		const char *name;
444
445		name = udev_list_entry_get_name(entry);
446		if (strncmp(name, devbyid, strlen(devbyid)) == 0) {
447			name += strlen(DEV_BYID_PATH);
448			(void) strlcpy(bufptr, name, buflen);
449			return (0);
450		}
451		entry = udev_list_entry_get_next(entry);
452	}
453
454	return (ENODATA);
455}
456
457/*
458 * Obtain the persistent physical location string (describes where)
459 *
460 * used by ZED vdev matching for auto-{online,expand,replace}
461 */
462int
463zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen)
464{
465	const char *physpath = NULL;
466	struct udev_list_entry *entry;
467
468	/*
469	 * Normal disks use ID_PATH for their physical path.
470	 */
471	physpath = udev_device_get_property_value(dev, "ID_PATH");
472	if (physpath != NULL && strlen(physpath) > 0) {
473		(void) strlcpy(bufptr, physpath, buflen);
474		return (0);
475	}
476
477	/*
478	 * Device mapper devices are virtual and don't have a physical
479	 * path. For them we use ID_VDEV instead, which is setup via the
480	 * /etc/vdev_id.conf file.  ID_VDEV provides a persistent path
481	 * to a virtual device.  If you don't have vdev_id.conf setup,
482	 * you cannot use multipath autoreplace with device mapper.
483	 */
484	physpath = udev_device_get_property_value(dev, "ID_VDEV");
485	if (physpath != NULL && strlen(physpath) > 0) {
486		(void) strlcpy(bufptr, physpath, buflen);
487		return (0);
488	}
489
490	/*
491	 * For ZFS volumes use the persistent /dev/zvol/dataset identifier
492	 */
493	entry = udev_device_get_devlinks_list_entry(dev);
494	while (entry != NULL) {
495		physpath = udev_list_entry_get_name(entry);
496		if (strncmp(physpath, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) {
497			(void) strlcpy(bufptr, physpath, buflen);
498			return (0);
499		}
500		entry = udev_list_entry_get_next(entry);
501	}
502
503	/*
504	 * For all other devices fallback to using the by-uuid name.
505	 */
506	entry = udev_device_get_devlinks_list_entry(dev);
507	while (entry != NULL) {
508		physpath = udev_list_entry_get_name(entry);
509		if (strncmp(physpath, "/dev/disk/by-uuid", 17) == 0) {
510			(void) strlcpy(bufptr, physpath, buflen);
511			return (0);
512		}
513		entry = udev_list_entry_get_next(entry);
514	}
515
516	return (ENODATA);
517}
518
519/*
520 * A disk is considered a multipath whole disk when:
521 *	DEVNAME key value has "dm-"
522 *	DM_NAME key value has "mpath" prefix
523 *	DM_UUID key exists
524 *	ID_PART_TABLE_TYPE key does not exist or is not gpt
525 */
526static boolean_t
527udev_mpath_whole_disk(struct udev_device *dev)
528{
529	const char *devname, *type, *uuid;
530
531	devname = udev_device_get_property_value(dev, "DEVNAME");
532	type = udev_device_get_property_value(dev, "ID_PART_TABLE_TYPE");
533	uuid = udev_device_get_property_value(dev, "DM_UUID");
534
535	if ((devname != NULL && strncmp(devname, "/dev/dm-", 8) == 0) &&
536	    ((type == NULL) || (strcmp(type, "gpt") != 0)) &&
537	    (uuid != NULL)) {
538		return (B_TRUE);
539	}
540
541	return (B_FALSE);
542}
543
544static int
545udev_device_is_ready(struct udev_device *dev)
546{
547#ifdef HAVE_LIBUDEV_UDEV_DEVICE_GET_IS_INITIALIZED
548	return (udev_device_get_is_initialized(dev));
549#else
550	/* wait for DEVLINKS property to be initialized */
551	return (udev_device_get_property_value(dev, "DEVLINKS") != NULL);
552#endif
553}
554
555#else
556
557int
558zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen)
559{
560	(void) dev, (void) bufptr, (void) buflen;
561	return (ENODATA);
562}
563
564int
565zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen)
566{
567	(void) dev, (void) bufptr, (void) buflen;
568	return (ENODATA);
569}
570
571#endif /* HAVE_LIBUDEV */
572
573/*
574 * Wait up to timeout_ms for udev to set up the device node.  The device is
575 * considered ready when libudev determines it has been initialized, all of
576 * the device links have been verified to exist, and it has been allowed to
577 * settle.  At this point the device can be accessed reliably. Depending on
578 * the complexity of the udev rules this process could take several seconds.
579 */
580int
581zpool_label_disk_wait(const char *path, int timeout_ms)
582{
583#ifdef HAVE_LIBUDEV
584	struct udev *udev;
585	struct udev_device *dev = NULL;
586	char nodepath[MAXPATHLEN];
587	char *sysname = NULL;
588	int ret = ENODEV;
589	int settle_ms = 50;
590	long sleep_ms = 10;
591	hrtime_t start, settle;
592
593	if ((udev = udev_new()) == NULL)
594		return (ENXIO);
595
596	start = gethrtime();
597	settle = 0;
598
599	do {
600		if (sysname == NULL) {
601			if (realpath(path, nodepath) != NULL) {
602				sysname = strrchr(nodepath, '/') + 1;
603			} else {
604				(void) usleep(sleep_ms * MILLISEC);
605				continue;
606			}
607		}
608
609		dev = udev_device_new_from_subsystem_sysname(udev,
610		    "block", sysname);
611		if ((dev != NULL) && udev_device_is_ready(dev)) {
612			struct udev_list_entry *links, *link = NULL;
613
614			ret = 0;
615			links = udev_device_get_devlinks_list_entry(dev);
616
617			udev_list_entry_foreach(link, links) {
618				struct stat64 statbuf;
619				const char *name;
620
621				name = udev_list_entry_get_name(link);
622				errno = 0;
623				if (stat64(name, &statbuf) == 0 && errno == 0)
624					continue;
625
626				settle = 0;
627				ret = ENODEV;
628				break;
629			}
630
631			if (ret == 0) {
632				if (settle == 0) {
633					settle = gethrtime();
634				} else if (NSEC2MSEC(gethrtime() - settle) >=
635				    settle_ms) {
636					udev_device_unref(dev);
637					break;
638				}
639			}
640		}
641
642		udev_device_unref(dev);
643		(void) usleep(sleep_ms * MILLISEC);
644
645	} while (NSEC2MSEC(gethrtime() - start) < timeout_ms);
646
647	udev_unref(udev);
648
649	return (ret);
650#else
651	int settle_ms = 50;
652	long sleep_ms = 10;
653	hrtime_t start, settle;
654	struct stat64 statbuf;
655
656	start = gethrtime();
657	settle = 0;
658
659	do {
660		errno = 0;
661		if ((stat64(path, &statbuf) == 0) && (errno == 0)) {
662			if (settle == 0)
663				settle = gethrtime();
664			else if (NSEC2MSEC(gethrtime() - settle) >= settle_ms)
665				return (0);
666		} else if (errno != ENOENT) {
667			return (errno);
668		}
669
670		usleep(sleep_ms * MILLISEC);
671	} while (NSEC2MSEC(gethrtime() - start) < timeout_ms);
672
673	return (ENODEV);
674#endif /* HAVE_LIBUDEV */
675}
676
677/*
678 * Simplified version of zpool_label_disk_wait() where we wait for a device
679 * to appear using the default timeouts.
680 */
681int
682zpool_disk_wait(const char *path)
683{
684	int timeout;
685	timeout = zpool_getenv_int("ZPOOL_IMPORT_UDEV_TIMEOUT_MS",
686	    DISK_LABEL_WAIT);
687
688	return (zpool_label_disk_wait(path, timeout));
689}
690
691/*
692 * Encode the persistent devices strings
693 * used for the vdev disk label
694 */
695static int
696encode_device_strings(const char *path, vdev_dev_strs_t *ds,
697    boolean_t wholedisk)
698{
699#ifdef HAVE_LIBUDEV
700	struct udev *udev;
701	struct udev_device *dev = NULL;
702	char nodepath[MAXPATHLEN];
703	char *sysname;
704	int ret = ENODEV;
705	hrtime_t start;
706
707	if ((udev = udev_new()) == NULL)
708		return (ENXIO);
709
710	/* resolve path to a runtime device node instance */
711	if (realpath(path, nodepath) == NULL)
712		goto no_dev;
713
714	sysname = strrchr(nodepath, '/') + 1;
715
716	/*
717	 * Wait up to 3 seconds for udev to set up the device node context
718	 */
719	start = gethrtime();
720	do {
721		dev = udev_device_new_from_subsystem_sysname(udev, "block",
722		    sysname);
723		if (dev == NULL)
724			goto no_dev;
725		if (udev_device_is_ready(dev))
726			break;  /* udev ready */
727
728		udev_device_unref(dev);
729		dev = NULL;
730
731		if (NSEC2MSEC(gethrtime() - start) < 10)
732			(void) sched_yield();	/* yield/busy wait up to 10ms */
733		else
734			(void) usleep(10 * MILLISEC);
735
736	} while (NSEC2MSEC(gethrtime() - start) < (3 * MILLISEC));
737
738	if (dev == NULL)
739		goto no_dev;
740
741	/*
742	 * Only whole disks require extra device strings
743	 */
744	if (!wholedisk && !udev_mpath_whole_disk(dev))
745		goto no_dev;
746
747	ret = zfs_device_get_devid(dev, ds->vds_devid, sizeof (ds->vds_devid));
748	if (ret != 0)
749		goto no_dev_ref;
750
751	/* physical location string (optional) */
752	if (zfs_device_get_physical(dev, ds->vds_devphys,
753	    sizeof (ds->vds_devphys)) != 0) {
754		ds->vds_devphys[0] = '\0'; /* empty string --> not available */
755	}
756
757no_dev_ref:
758	udev_device_unref(dev);
759no_dev:
760	udev_unref(udev);
761
762	return (ret);
763#else
764	(void) path;
765	(void) ds;
766	(void) wholedisk;
767	return (ENOENT);
768#endif
769}
770
771/*
772 * Rescan the enclosure sysfs path for turning on enclosure LEDs and store it
773 * in the nvlist * (if applicable).  Like:
774 *    vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4'
775 *
776 * If an old path was in the nvlist, and the rescan can not find a new path,
777 * then keep the old path, since the disk may have been removed.
778 *
779 * path: The vdev path (value from ZPOOL_CONFIG_PATH)
780 * key: The nvlist_t name (like ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH)
781 */
782void
783update_vdev_config_dev_sysfs_path(nvlist_t *nv, const char *path,
784    const char *key)
785{
786	char *upath, *spath;
787	const char *oldpath = NULL;
788
789	(void) nvlist_lookup_string(nv, key, &oldpath);
790
791	/* Add enclosure sysfs path (if disk is in an enclosure). */
792	upath = zfs_get_underlying_path(path);
793	spath = zfs_get_enclosure_sysfs_path(upath);
794
795	if (spath) {
796		(void) nvlist_add_string(nv, key, spath);
797	} else {
798		/*
799		 * We couldn't dynamically scan the disk's enclosure sysfs path.
800		 * This could be because the disk went away.  If there's an old
801		 * enclosure sysfs path in the nvlist, then keep using it.
802		 */
803		if (!oldpath) {
804			(void) nvlist_remove_all(nv, key);
805		}
806	}
807
808	free(upath);
809	free(spath);
810}
811
812/*
813 * This will get called for each leaf vdev.
814 */
815static int
816sysfs_path_pool_vdev_iter_f(void *hdl_data, nvlist_t *nv, void *data)
817{
818	(void) hdl_data, (void) data;
819
820	const char *path = NULL;
821	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
822		return (1);
823
824	/* Rescan our enclosure sysfs path for this vdev */
825	update_vdev_config_dev_sysfs_path(nv, path,
826	    ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
827	return (0);
828}
829
830/*
831 * Given an nvlist for our pool (with vdev tree), iterate over all the
832 * leaf vdevs and update their ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH.
833 */
834void
835update_vdevs_config_dev_sysfs_path(nvlist_t *config)
836{
837	nvlist_t *nvroot = NULL;
838	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
839	    &nvroot) == 0);
840	for_each_vdev_in_nvlist(nvroot, sysfs_path_pool_vdev_iter_f, NULL);
841}
842
843/*
844 * Update a leaf vdev's persistent device strings
845 *
846 * - only applies for a dedicated leaf vdev (aka whole disk)
847 * - updated during pool create|add|attach|import
848 * - used for matching device matching during auto-{online,expand,replace}
849 * - stored in a leaf disk config label (i.e. alongside 'path' NVP)
850 * - these strings are currently not used in kernel (i.e. for vdev_disk_open)
851 *
852 * single device node example:
853 * 	devid:		'scsi-MG03SCA300_350000494a8cb3d67-part1'
854 * 	phys_path:	'pci-0000:04:00.0-sas-0x50000394a8cb3d67-lun-0'
855 *
856 * multipath device node example:
857 * 	devid:		'dm-uuid-mpath-35000c5006304de3f'
858 *
859 * We also store the enclosure sysfs path for turning on enclosure LEDs
860 * (if applicable):
861 *	vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4'
862 */
863void
864update_vdev_config_dev_strs(nvlist_t *nv)
865{
866	vdev_dev_strs_t vds;
867	const char *env, *type, *path;
868	uint64_t wholedisk = 0;
869
870	/*
871	 * For the benefit of legacy ZFS implementations, allow
872	 * for opting out of devid strings in the vdev label.
873	 *
874	 * example use:
875	 *	env ZFS_VDEV_DEVID_OPT_OUT=YES zpool import dozer
876	 *
877	 * explanation:
878	 * Older OpenZFS implementations had issues when attempting to
879	 * display pool config VDEV names if a "devid" NVP value is
880	 * present in the pool's config.
881	 *
882	 * For example, a pool that originated on illumos platform would
883	 * have a devid value in the config and "zpool status" would fail
884	 * when listing the config.
885	 *
886	 * A pool can be stripped of any "devid" values on import or
887	 * prevented from adding them on zpool create|add by setting
888	 * ZFS_VDEV_DEVID_OPT_OUT.
889	 */
890	env = getenv("ZFS_VDEV_DEVID_OPT_OUT");
891	if (env && (strtoul(env, NULL, 0) > 0 ||
892	    !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2))) {
893		(void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
894		(void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH);
895		return;
896	}
897
898	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0 ||
899	    strcmp(type, VDEV_TYPE_DISK) != 0) {
900		return;
901	}
902	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
903		return;
904	(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk);
905
906	/*
907	 * Update device string values in the config nvlist.
908	 */
909	if (encode_device_strings(path, &vds, (boolean_t)wholedisk) == 0) {
910		(void) nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, vds.vds_devid);
911		if (vds.vds_devphys[0] != '\0') {
912			(void) nvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH,
913			    vds.vds_devphys);
914		}
915		update_vdev_config_dev_sysfs_path(nv, path,
916		    ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
917	} else {
918		/* Clear out any stale entries. */
919		(void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
920		(void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH);
921		(void) nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
922	}
923}
924