vdev_geom.c revision 299958
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
23 * All rights reserved.
24 *
25 * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>
26 */
27
28#include <sys/zfs_context.h>
29#include <sys/param.h>
30#include <sys/kernel.h>
31#include <sys/bio.h>
32#include <sys/disk.h>
33#include <sys/spa.h>
34#include <sys/spa_impl.h>
35#include <sys/vdev_impl.h>
36#include <sys/fs/zfs.h>
37#include <sys/zio.h>
38#include <geom/geom.h>
39#include <geom/geom_int.h>
40
41/*
42 * Virtual device vector for GEOM.
43 */
44
45static g_attrchanged_t vdev_geom_attrchanged;
46struct g_class zfs_vdev_class = {
47	.name = "ZFS::VDEV",
48	.version = G_VERSION,
49	.attrchanged = vdev_geom_attrchanged,
50};
51
52DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev);
53
54SYSCTL_DECL(_vfs_zfs_vdev);
55/* Don't send BIO_FLUSH. */
56static int vdev_geom_bio_flush_disable = 0;
57TUNABLE_INT("vfs.zfs.vdev.bio_flush_disable", &vdev_geom_bio_flush_disable);
58SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RW,
59    &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH");
60/* Don't send BIO_DELETE. */
61static int vdev_geom_bio_delete_disable = 0;
62TUNABLE_INT("vfs.zfs.vdev.bio_delete_disable", &vdev_geom_bio_delete_disable);
63SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RW,
64    &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE");
65
66static void
67vdev_geom_set_rotation_rate(vdev_t *vd, struct g_consumer *cp)
68{
69	int error;
70	uint16_t rate;
71
72	error = g_getattr("GEOM::rotation_rate", cp, &rate);
73	if (error == 0)
74		vd->vdev_rotation_rate = rate;
75	else
76		vd->vdev_rotation_rate = VDEV_RATE_UNKNOWN;
77}
78
79static void
80vdev_geom_attrchanged(struct g_consumer *cp, const char *attr)
81{
82	vdev_t *vd;
83	spa_t *spa;
84	char *physpath;
85	int error, physpath_len;
86
87	vd = cp->private;
88	if (vd == NULL)
89		return;
90
91	if (strcmp(attr, "GEOM::rotation_rate") == 0) {
92		vdev_geom_set_rotation_rate(vd, cp);
93		return;
94	}
95
96	if (strcmp(attr, "GEOM::physpath") != 0)
97		return;
98
99	if (g_access(cp, 1, 0, 0) != 0)
100		return;
101
102	/*
103	 * Record/Update physical path information for this device.
104	 */
105	spa = vd->vdev_spa;
106	physpath_len = MAXPATHLEN;
107	physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO);
108	error = g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath);
109	g_access(cp, -1, 0, 0);
110	if (error == 0) {
111		char *old_physpath;
112
113		/* g_topology lock ensures that vdev has not been closed */
114		g_topology_assert();
115		old_physpath = vd->vdev_physpath;
116		vd->vdev_physpath = spa_strdup(physpath);
117		spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
118
119		if (old_physpath != NULL)
120			spa_strfree(old_physpath);
121	}
122	g_free(physpath);
123}
124
125static void
126vdev_geom_orphan(struct g_consumer *cp)
127{
128	vdev_t *vd;
129
130	g_topology_assert();
131
132	vd = cp->private;
133	if (vd == NULL) {
134		/* Vdev close in progress.  Ignore the event. */
135		return;
136	}
137
138	/*
139	 * Orphan callbacks occur from the GEOM event thread.
140	 * Concurrent with this call, new I/O requests may be
141	 * working their way through GEOM about to find out
142	 * (only once executed by the g_down thread) that we've
143	 * been orphaned from our disk provider.  These I/Os
144	 * must be retired before we can detach our consumer.
145	 * This is most easily achieved by acquiring the
146	 * SPA ZIO configuration lock as a writer, but doing
147	 * so with the GEOM topology lock held would cause
148	 * a lock order reversal.  Instead, rely on the SPA's
149	 * async removal support to invoke a close on this
150	 * vdev once it is safe to do so.
151	 */
152	vd->vdev_remove_wanted = B_TRUE;
153	spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE);
154}
155
156static struct g_consumer *
157vdev_geom_attach(struct g_provider *pp, vdev_t *vd)
158{
159	struct g_geom *gp;
160	struct g_consumer *cp;
161	int error;
162
163	g_topology_assert();
164
165	ZFS_LOG(1, "Attaching to %s.", pp->name);
166	/* Do we have geom already? No? Create one. */
167	LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) {
168		if (gp->flags & G_GEOM_WITHER)
169			continue;
170		if (strcmp(gp->name, "zfs::vdev") != 0)
171			continue;
172		break;
173	}
174	if (gp == NULL) {
175		gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev");
176		gp->orphan = vdev_geom_orphan;
177		gp->attrchanged = vdev_geom_attrchanged;
178		cp = g_new_consumer(gp);
179		error = g_attach(cp, pp);
180		if (error != 0) {
181			ZFS_LOG(1, "%s(%d): g_attach failed: %d\n", __func__,
182			    __LINE__, error);
183			g_wither_geom(gp, ENXIO);
184			return (NULL);
185		}
186		error = g_access(cp, 1, 0, 1);
187		if (error != 0) {
188			ZFS_LOG(1, "%s(%d): g_access failed: %d\n", __func__,
189			       __LINE__, error);
190			g_wither_geom(gp, ENXIO);
191			return (NULL);
192		}
193		ZFS_LOG(1, "Created geom and consumer for %s.", pp->name);
194	} else {
195		/* Check if we are already connected to this provider. */
196		LIST_FOREACH(cp, &gp->consumer, consumer) {
197			if (cp->provider == pp) {
198				ZFS_LOG(1, "Found consumer for %s.", pp->name);
199				break;
200			}
201		}
202		if (cp == NULL) {
203			cp = g_new_consumer(gp);
204			error = g_attach(cp, pp);
205			if (error != 0) {
206				ZFS_LOG(1, "%s(%d): g_attach failed: %d\n",
207				    __func__, __LINE__, error);
208				g_destroy_consumer(cp);
209				return (NULL);
210			}
211			error = g_access(cp, 1, 0, 1);
212			if (error != 0) {
213				ZFS_LOG(1, "%s(%d): g_access failed: %d\n",
214				    __func__, __LINE__, error);
215				g_detach(cp);
216				g_destroy_consumer(cp);
217				return (NULL);
218			}
219			ZFS_LOG(1, "Created consumer for %s.", pp->name);
220		} else {
221			error = g_access(cp, 1, 0, 1);
222			if (error != 0) {
223				ZFS_LOG(1, "%s(%d): g_access failed: %d\n",
224				    __func__, __LINE__, error);
225				return (NULL);
226			}
227			ZFS_LOG(1, "Used existing consumer for %s.", pp->name);
228		}
229	}
230
231	/*
232	 * BUG: cp may already belong to a vdev.  This could happen if:
233	 * 1) That vdev is a shared spare, or
234	 * 2) We are trying to reopen a missing vdev and we are scanning by
235	 *    guid.  In that case, we'll ultimately fail to open this consumer,
236	 *    but not until after setting the private field.
237	 * The solution is to:
238	 * 1) Don't set the private field until after the open succeeds, and
239	 * 2) Set it to a linked list of vdevs, not just a single vdev
240	 */
241	cp->private = vd;
242	vd->vdev_tsd = cp;
243
244	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
245	return (cp);
246}
247
248static void
249vdev_geom_close_locked(vdev_t *vd)
250{
251	struct g_geom *gp;
252	struct g_consumer *cp;
253
254	g_topology_assert();
255
256	cp = vd->vdev_tsd;
257	if (cp == NULL)
258		return;
259
260	ZFS_LOG(1, "Closing access to %s.", cp->provider->name);
261	KASSERT(vd->vdev_tsd == cp, ("%s: vdev_tsd is not cp", __func__));
262	vd->vdev_tsd = NULL;
263	vd->vdev_delayed_close = B_FALSE;
264	cp->private = NULL;
265
266	gp = cp->geom;
267	g_access(cp, -1, 0, -1);
268	/* Destroy consumer on last close. */
269	if (cp->acr == 0 && cp->ace == 0) {
270		if (cp->acw > 0)
271			g_access(cp, 0, -cp->acw, 0);
272		if (cp->provider != NULL) {
273			ZFS_LOG(1, "Destroyed consumer to %s.",
274			    cp->provider->name);
275			g_detach(cp);
276		}
277		g_destroy_consumer(cp);
278	}
279	/* Destroy geom if there are no consumers left. */
280	if (LIST_EMPTY(&gp->consumer)) {
281		ZFS_LOG(1, "Destroyed geom %s.", gp->name);
282		g_wither_geom(gp, ENXIO);
283	}
284}
285
286static void
287nvlist_get_guids(nvlist_t *list, uint64_t *pguid, uint64_t *vguid)
288{
289
290	(void) nvlist_lookup_uint64(list, ZPOOL_CONFIG_GUID, vguid);
291	(void) nvlist_lookup_uint64(list, ZPOOL_CONFIG_POOL_GUID, pguid);
292}
293
294static int
295vdev_geom_io(struct g_consumer *cp, int cmd, void *data, off_t offset, off_t size)
296{
297	struct bio *bp;
298	u_char *p;
299	off_t off, maxio;
300	int error;
301
302	ASSERT((offset % cp->provider->sectorsize) == 0);
303	ASSERT((size % cp->provider->sectorsize) == 0);
304
305	bp = g_alloc_bio();
306	off = offset;
307	offset += size;
308	p = data;
309	maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize);
310	error = 0;
311
312	for (; off < offset; off += maxio, p += maxio, size -= maxio) {
313		bzero(bp, sizeof(*bp));
314		bp->bio_cmd = cmd;
315		bp->bio_done = NULL;
316		bp->bio_offset = off;
317		bp->bio_length = MIN(size, maxio);
318		bp->bio_data = p;
319		g_io_request(bp, cp);
320		error = biowait(bp, "vdev_geom_io");
321		if (error != 0)
322			break;
323	}
324
325	g_destroy_bio(bp);
326	return (error);
327}
328
329static void
330vdev_geom_taste_orphan(struct g_consumer *cp)
331{
332
333	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
334	    cp->provider->name));
335}
336
337static int
338vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config)
339{
340	struct g_provider *pp;
341	vdev_label_t *label;
342	char *p, *buf;
343	size_t buflen;
344	uint64_t psize;
345	off_t offset, size;
346	uint64_t state, txg;
347	int error, l, len;
348
349	g_topology_assert_not();
350
351	pp = cp->provider;
352	ZFS_LOG(1, "Reading config from %s...", pp->name);
353
354	psize = pp->mediasize;
355	psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t));
356
357	size = sizeof(*label) + pp->sectorsize -
358	    ((sizeof(*label) - 1) % pp->sectorsize) - 1;
359
360	label = kmem_alloc(size, KM_SLEEP);
361	buflen = sizeof(label->vl_vdev_phys.vp_nvlist);
362
363	*config = NULL;
364	for (l = 0; l < VDEV_LABELS; l++) {
365
366		offset = vdev_label_offset(psize, l, 0);
367		if ((offset % pp->sectorsize) != 0)
368			continue;
369
370		if (vdev_geom_io(cp, BIO_READ, label, offset, size) != 0)
371			continue;
372		buf = label->vl_vdev_phys.vp_nvlist;
373
374		if (nvlist_unpack(buf, buflen, config, 0) != 0)
375			continue;
376
377		if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
378		    &state) != 0 || state > POOL_STATE_L2CACHE) {
379			nvlist_free(*config);
380			*config = NULL;
381			continue;
382		}
383
384		if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
385		    (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
386		    &txg) != 0 || txg == 0)) {
387			nvlist_free(*config);
388			*config = NULL;
389			continue;
390		}
391
392		break;
393	}
394
395	kmem_free(label, size);
396	return (*config == NULL ? ENOENT : 0);
397}
398
399static void
400resize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id)
401{
402	nvlist_t **new_configs;
403	uint64_t i;
404
405	if (id < *count)
406		return;
407	new_configs = kmem_zalloc((id + 1) * sizeof(nvlist_t *),
408	    KM_SLEEP);
409	for (i = 0; i < *count; i++)
410		new_configs[i] = (*configs)[i];
411	if (*configs != NULL)
412		kmem_free(*configs, *count * sizeof(void *));
413	*configs = new_configs;
414	*count = id + 1;
415}
416
417static void
418process_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg,
419    const char *name, uint64_t* known_pool_guid)
420{
421	nvlist_t *vdev_tree;
422	uint64_t pool_guid;
423	uint64_t vdev_guid, known_guid;
424	uint64_t id, txg, known_txg;
425	char *pname;
426	int i;
427
428	if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 ||
429	    strcmp(pname, name) != 0)
430		goto ignore;
431
432	if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0)
433		goto ignore;
434
435	if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0)
436		goto ignore;
437
438	if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0)
439		goto ignore;
440
441	if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0)
442		goto ignore;
443
444	VERIFY(nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, &txg) == 0);
445
446	if (*known_pool_guid != 0) {
447		if (pool_guid != *known_pool_guid)
448			goto ignore;
449	} else
450		*known_pool_guid = pool_guid;
451
452	resize_configs(configs, count, id);
453
454	if ((*configs)[id] != NULL) {
455		VERIFY(nvlist_lookup_uint64((*configs)[id],
456		    ZPOOL_CONFIG_POOL_TXG, &known_txg) == 0);
457		if (txg <= known_txg)
458			goto ignore;
459		nvlist_free((*configs)[id]);
460	}
461
462	(*configs)[id] = cfg;
463	return;
464
465ignore:
466	nvlist_free(cfg);
467}
468
469static int
470vdev_geom_attach_taster(struct g_consumer *cp, struct g_provider *pp)
471{
472	int error;
473
474	if (pp->flags & G_PF_WITHER)
475		return (EINVAL);
476	g_attach(cp, pp);
477	error = g_access(cp, 1, 0, 0);
478	if (error == 0) {
479		if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize))
480			error = EINVAL;
481		else if (pp->mediasize < SPA_MINDEVSIZE)
482			error = EINVAL;
483		if (error != 0)
484			g_access(cp, -1, 0, 0);
485	}
486	if (error != 0)
487		g_detach(cp);
488	return (error);
489}
490
491static void
492vdev_geom_detach_taster(struct g_consumer *cp)
493{
494	g_access(cp, -1, 0, 0);
495	g_detach(cp);
496}
497
498int
499vdev_geom_read_pool_label(const char *name,
500    nvlist_t ***configs, uint64_t *count)
501{
502	struct g_class *mp;
503	struct g_geom *gp, *zgp;
504	struct g_provider *pp;
505	struct g_consumer *zcp;
506	nvlist_t *vdev_cfg;
507	uint64_t pool_guid;
508	int error;
509
510	DROP_GIANT();
511	g_topology_lock();
512
513	zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
514	/* This orphan function should be never called. */
515	zgp->orphan = vdev_geom_taste_orphan;
516	zcp = g_new_consumer(zgp);
517
518	*configs = NULL;
519	*count = 0;
520	pool_guid = 0;
521	LIST_FOREACH(mp, &g_classes, class) {
522		if (mp == &zfs_vdev_class)
523			continue;
524		LIST_FOREACH(gp, &mp->geom, geom) {
525			if (gp->flags & G_GEOM_WITHER)
526				continue;
527			LIST_FOREACH(pp, &gp->provider, provider) {
528				if (pp->flags & G_PF_WITHER)
529					continue;
530				if (vdev_geom_attach_taster(zcp, pp) != 0)
531					continue;
532				g_topology_unlock();
533				error = vdev_geom_read_config(zcp, &vdev_cfg);
534				g_topology_lock();
535				vdev_geom_detach_taster(zcp);
536				if (error)
537					continue;
538				ZFS_LOG(1, "successfully read vdev config");
539
540				process_vdev_config(configs, count,
541				    vdev_cfg, name, &pool_guid);
542			}
543		}
544	}
545
546	g_destroy_consumer(zcp);
547	g_destroy_geom(zgp);
548	g_topology_unlock();
549	PICKUP_GIANT();
550
551	return (*count > 0 ? 0 : ENOENT);
552}
553
554static void
555vdev_geom_read_guids(struct g_consumer *cp, uint64_t *pguid, uint64_t *vguid)
556{
557	nvlist_t *config;
558
559	g_topology_assert_not();
560
561	*pguid = 0;
562	*vguid = 0;
563	if (vdev_geom_read_config(cp, &config) == 0) {
564		nvlist_get_guids(config, pguid, vguid);
565		nvlist_free(config);
566	}
567}
568
569static struct g_consumer *
570vdev_geom_attach_by_guids(vdev_t *vd)
571{
572	struct g_class *mp;
573	struct g_geom *gp, *zgp;
574	struct g_provider *pp;
575	struct g_consumer *cp, *zcp;
576	uint64_t pguid, vguid;
577
578	g_topology_assert();
579
580	zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
581	/* This orphan function should be never called. */
582	zgp->orphan = vdev_geom_taste_orphan;
583	zcp = g_new_consumer(zgp);
584
585	cp = NULL;
586	LIST_FOREACH(mp, &g_classes, class) {
587		if (mp == &zfs_vdev_class)
588			continue;
589		LIST_FOREACH(gp, &mp->geom, geom) {
590			if (gp->flags & G_GEOM_WITHER)
591				continue;
592			LIST_FOREACH(pp, &gp->provider, provider) {
593				if (vdev_geom_attach_taster(zcp, pp) != 0)
594					continue;
595				g_topology_unlock();
596				vdev_geom_read_guids(zcp, &pguid, &vguid);
597				g_topology_lock();
598				vdev_geom_detach_taster(zcp);
599				/*
600				 * Check that the label's vdev guid matches the
601				 * desired guid.  If the label has a pool guid,
602				 * check that it matches too. (Inactive spares
603				 * and L2ARCs do not have any pool guid in the
604				 * label.)
605				*/
606				if ((pguid != 0 &&
607				     pguid != spa_guid(vd->vdev_spa)) ||
608				    vguid != vd->vdev_guid)
609					continue;
610				cp = vdev_geom_attach(pp, vd);
611				if (cp == NULL) {
612					printf("ZFS WARNING: Unable to "
613					    "attach to %s.\n", pp->name);
614					continue;
615				}
616				break;
617			}
618			if (cp != NULL)
619				break;
620		}
621		if (cp != NULL)
622			break;
623	}
624end:
625	g_destroy_consumer(zcp);
626	g_destroy_geom(zgp);
627	return (cp);
628}
629
630static struct g_consumer *
631vdev_geom_open_by_guids(vdev_t *vd)
632{
633	struct g_consumer *cp;
634	char *buf;
635	size_t len;
636
637	g_topology_assert();
638
639	ZFS_LOG(1, "Searching by guids [%ju:%ju].",
640		(uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)vd->vdev_guid);
641	cp = vdev_geom_attach_by_guids(vd);
642	if (cp != NULL) {
643		len = strlen(cp->provider->name) + strlen("/dev/") + 1;
644		buf = kmem_alloc(len, KM_SLEEP);
645
646		snprintf(buf, len, "/dev/%s", cp->provider->name);
647		spa_strfree(vd->vdev_path);
648		vd->vdev_path = buf;
649
650		ZFS_LOG(1, "Attach by guid [%ju:%ju] succeeded, provider %s.",
651		    (uintmax_t)spa_guid(vd->vdev_spa),
652		    (uintmax_t)vd->vdev_guid, vd->vdev_path);
653	} else {
654		ZFS_LOG(1, "Search by guid [%ju:%ju] failed.",
655		    (uintmax_t)spa_guid(vd->vdev_spa),
656		    (uintmax_t)vd->vdev_guid);
657	}
658
659	return (cp);
660}
661
662static struct g_consumer *
663vdev_geom_open_by_path(vdev_t *vd, int check_guid)
664{
665	struct g_provider *pp;
666	struct g_consumer *cp;
667	uint64_t pguid, vguid;
668
669	g_topology_assert();
670
671	cp = NULL;
672	pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1);
673	if (pp != NULL) {
674		ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path);
675		cp = vdev_geom_attach(pp, vd);
676		if (cp != NULL && check_guid && ISP2(pp->sectorsize) &&
677		    pp->sectorsize <= VDEV_PAD_SIZE) {
678			g_topology_unlock();
679			vdev_geom_read_guids(cp, &pguid, &vguid);
680			g_topology_lock();
681			/*
682			 * Check that the label's vdev guid matches the
683			 * desired guid.  If the label has a pool guid,
684			 * check that it matches too. (Inactive spares
685			 * and L2ARCs do not have any pool guid in the
686			 * label.)
687			 */
688			if ((pguid != 0 &&
689			    pguid != spa_guid(vd->vdev_spa)) ||
690			    vguid != vd->vdev_guid) {
691				vdev_geom_close_locked(vd);
692				cp = NULL;
693				ZFS_LOG(1, "guid mismatch for provider %s: "
694				    "%ju:%ju != %ju:%ju.", vd->vdev_path,
695				    (uintmax_t)spa_guid(vd->vdev_spa),
696				    (uintmax_t)vd->vdev_guid,
697				    (uintmax_t)pguid, (uintmax_t)vguid);
698			} else {
699				ZFS_LOG(1, "guid match for provider %s.",
700				    vd->vdev_path);
701			}
702		}
703	}
704
705	return (cp);
706}
707
708static int
709vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
710    uint64_t *logical_ashift, uint64_t *physical_ashift)
711{
712	struct g_provider *pp;
713	struct g_consumer *cp;
714	size_t bufsize;
715	int error;
716
717	/*
718	 * We must have a pathname, and it must be absolute.
719	 */
720	if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
721		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
722		return (EINVAL);
723	}
724
725	vd->vdev_tsd = NULL;
726
727	DROP_GIANT();
728	g_topology_lock();
729	error = 0;
730
731	if (vd->vdev_spa->spa_splitting_newspa ||
732	    (vd->vdev_prevstate == VDEV_STATE_UNKNOWN &&
733	     vd->vdev_spa->spa_load_state == SPA_LOAD_NONE)) {
734		/*
735		 * We are dealing with a vdev that hasn't been previously
736		 * opened (since boot), and we are not loading an
737		 * existing pool configuration.  This looks like a
738		 * vdev add operation to a new or existing pool.
739		 * Assume the user knows what he/she is doing and find
740		 * GEOM provider by its name, ignoring GUID mismatches.
741		 *
742		 * XXPOLICY: It would be safer to only allow a device
743		 *           that is unlabeled or labeled but missing
744		 *           GUID information to be opened in this fashion,
745		 *           unless we are doing a split, in which case we
746		 *           should allow any guid.
747		 */
748		cp = vdev_geom_open_by_path(vd, 0);
749	} else {
750		/*
751		 * Try using the recorded path for this device, but only
752		 * accept it if its label data contains the expected GUIDs.
753		 */
754		cp = vdev_geom_open_by_path(vd, 1);
755		if (cp == NULL) {
756			/*
757			 * The device at vd->vdev_path doesn't have the
758			 * expected GUIDs. The disks might have merely
759			 * moved around so try all other GEOM providers
760			 * to find one with the right GUIDs.
761			 */
762			cp = vdev_geom_open_by_guids(vd);
763		}
764	}
765
766	if (cp == NULL) {
767		ZFS_LOG(1, "Provider %s not found.", vd->vdev_path);
768		error = ENOENT;
769	} else if (cp->provider->sectorsize > VDEV_PAD_SIZE ||
770	    !ISP2(cp->provider->sectorsize)) {
771		ZFS_LOG(1, "Provider %s has unsupported sectorsize.",
772		    vd->vdev_path);
773
774		vdev_geom_close_locked(vd);
775		error = EINVAL;
776		cp = NULL;
777	} else if (cp->acw == 0 && (spa_mode(vd->vdev_spa) & FWRITE) != 0) {
778		int i;
779
780		for (i = 0; i < 5; i++) {
781			error = g_access(cp, 0, 1, 0);
782			if (error == 0)
783				break;
784			g_topology_unlock();
785			tsleep(vd, 0, "vdev", hz / 2);
786			g_topology_lock();
787		}
788		if (error != 0) {
789			printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n",
790			    vd->vdev_path, error);
791			vdev_geom_close_locked(vd);
792			cp = NULL;
793		}
794	}
795
796	/* Fetch initial physical path information for this device. */
797	if (cp != NULL)
798		vdev_geom_attrchanged(cp, "GEOM::physpath");
799
800	g_topology_unlock();
801	PICKUP_GIANT();
802	if (cp == NULL) {
803		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
804		return (error);
805	}
806	pp = cp->provider;
807
808	/*
809	 * Determine the actual size of the device.
810	 */
811	*max_psize = *psize = pp->mediasize;
812
813	/*
814	 * Determine the device's minimum transfer size and preferred
815	 * transfer size.
816	 */
817	*logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1;
818	*physical_ashift = 0;
819	if (pp->stripesize > (1 << *logical_ashift) && ISP2(pp->stripesize) &&
820	    pp->stripesize <= (1 << SPA_MAXASHIFT) && pp->stripeoffset == 0)
821		*physical_ashift = highbit(pp->stripesize) - 1;
822
823	/*
824	 * Clear the nowritecache settings, so that on a vdev_reopen()
825	 * we will try again.
826	 */
827	vd->vdev_nowritecache = B_FALSE;
828
829	/*
830	 * Determine the device's rotation rate.
831	 */
832	vdev_geom_set_rotation_rate(vd, cp);
833
834	return (0);
835}
836
837static void
838vdev_geom_close(vdev_t *vd)
839{
840
841	DROP_GIANT();
842	g_topology_lock();
843	vdev_geom_close_locked(vd);
844	g_topology_unlock();
845	PICKUP_GIANT();
846}
847
848static void
849vdev_geom_io_intr(struct bio *bp)
850{
851	vdev_t *vd;
852	zio_t *zio;
853
854	zio = bp->bio_caller1;
855	vd = zio->io_vd;
856	zio->io_error = bp->bio_error;
857	if (zio->io_error == 0 && bp->bio_resid != 0)
858		zio->io_error = SET_ERROR(EIO);
859
860	switch(zio->io_error) {
861	case ENOTSUP:
862		/*
863		 * If we get ENOTSUP for BIO_FLUSH or BIO_DELETE we know
864		 * that future attempts will never succeed. In this case
865		 * we set a persistent flag so that we don't bother with
866		 * requests in the future.
867		 */
868		switch(bp->bio_cmd) {
869		case BIO_FLUSH:
870			vd->vdev_nowritecache = B_TRUE;
871			break;
872		case BIO_DELETE:
873			vd->vdev_notrim = B_TRUE;
874			break;
875		}
876		break;
877	case ENXIO:
878		if (!vd->vdev_remove_wanted) {
879			/*
880			 * If provider's error is set we assume it is being
881			 * removed.
882			 */
883			if (bp->bio_to->error != 0) {
884				vd->vdev_remove_wanted = B_TRUE;
885				spa_async_request(zio->io_spa,
886				    SPA_ASYNC_REMOVE);
887			} else if (!vd->vdev_delayed_close) {
888				vd->vdev_delayed_close = B_TRUE;
889			}
890		}
891		break;
892	}
893	g_destroy_bio(bp);
894	zio_delay_interrupt(zio);
895}
896
897static void
898vdev_geom_io_start(zio_t *zio)
899{
900	vdev_t *vd;
901	struct g_consumer *cp;
902	struct bio *bp;
903	int error;
904
905	vd = zio->io_vd;
906
907	switch (zio->io_type) {
908	case ZIO_TYPE_IOCTL:
909		/* XXPOLICY */
910		if (!vdev_readable(vd)) {
911			zio->io_error = SET_ERROR(ENXIO);
912			zio_interrupt(zio);
913			return;
914		} else {
915			switch (zio->io_cmd) {
916			case DKIOCFLUSHWRITECACHE:
917				if (zfs_nocacheflush || vdev_geom_bio_flush_disable)
918					break;
919				if (vd->vdev_nowritecache) {
920					zio->io_error = SET_ERROR(ENOTSUP);
921					break;
922				}
923				goto sendreq;
924			default:
925				zio->io_error = SET_ERROR(ENOTSUP);
926			}
927		}
928
929		zio_execute(zio);
930		return;
931	case ZIO_TYPE_FREE:
932		if (vd->vdev_notrim) {
933			zio->io_error = SET_ERROR(ENOTSUP);
934		} else if (!vdev_geom_bio_delete_disable) {
935			goto sendreq;
936		}
937		zio_execute(zio);
938		return;
939	}
940sendreq:
941	ASSERT(zio->io_type == ZIO_TYPE_READ ||
942	    zio->io_type == ZIO_TYPE_WRITE ||
943	    zio->io_type == ZIO_TYPE_FREE ||
944	    zio->io_type == ZIO_TYPE_IOCTL);
945
946	cp = vd->vdev_tsd;
947	if (cp == NULL) {
948		zio->io_error = SET_ERROR(ENXIO);
949		zio_interrupt(zio);
950		return;
951	}
952	bp = g_alloc_bio();
953	bp->bio_caller1 = zio;
954	switch (zio->io_type) {
955	case ZIO_TYPE_READ:
956	case ZIO_TYPE_WRITE:
957		zio->io_target_timestamp = zio_handle_io_delay(zio);
958		bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE;
959		bp->bio_data = zio->io_data;
960		bp->bio_offset = zio->io_offset;
961		bp->bio_length = zio->io_size;
962		break;
963	case ZIO_TYPE_FREE:
964		bp->bio_cmd = BIO_DELETE;
965		bp->bio_data = NULL;
966		bp->bio_offset = zio->io_offset;
967		bp->bio_length = zio->io_size;
968		break;
969	case ZIO_TYPE_IOCTL:
970		bp->bio_cmd = BIO_FLUSH;
971		bp->bio_flags |= BIO_ORDERED;
972		bp->bio_data = NULL;
973		bp->bio_offset = cp->provider->mediasize;
974		bp->bio_length = 0;
975		break;
976	}
977	bp->bio_done = vdev_geom_io_intr;
978
979	g_io_request(bp, cp);
980}
981
982static void
983vdev_geom_io_done(zio_t *zio)
984{
985}
986
987static void
988vdev_geom_hold(vdev_t *vd)
989{
990}
991
992static void
993vdev_geom_rele(vdev_t *vd)
994{
995}
996
997vdev_ops_t vdev_geom_ops = {
998	vdev_geom_open,
999	vdev_geom_close,
1000	vdev_default_asize,
1001	vdev_geom_io_start,
1002	vdev_geom_io_done,
1003	NULL,
1004	vdev_geom_hold,
1005	vdev_geom_rele,
1006	VDEV_TYPE_DISK,		/* name of this vdev type */
1007	B_TRUE			/* leaf vdev */
1008};
1009