vdev_geom.c revision 284193
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
23 * All rights reserved.
24 *
25 * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>
26 */
27
28#include <sys/zfs_context.h>
29#include <sys/param.h>
30#include <sys/kernel.h>
31#include <sys/bio.h>
32#include <sys/disk.h>
33#include <sys/spa.h>
34#include <sys/spa_impl.h>
35#include <sys/vdev_impl.h>
36#include <sys/fs/zfs.h>
37#include <sys/zio.h>
38#include <geom/geom.h>
39#include <geom/geom_int.h>
40
41/*
42 * Virtual device vector for GEOM.
43 */
44
45static g_attrchanged_t vdev_geom_attrchanged;
46struct g_class zfs_vdev_class = {
47	.name = "ZFS::VDEV",
48	.version = G_VERSION,
49	.attrchanged = vdev_geom_attrchanged,
50};
51
52DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev);
53
54SYSCTL_DECL(_vfs_zfs_vdev);
55/* Don't send BIO_FLUSH. */
56static int vdev_geom_bio_flush_disable = 0;
57TUNABLE_INT("vfs.zfs.vdev.bio_flush_disable", &vdev_geom_bio_flush_disable);
58SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RW,
59    &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH");
60/* Don't send BIO_DELETE. */
61static int vdev_geom_bio_delete_disable = 0;
62TUNABLE_INT("vfs.zfs.vdev.bio_delete_disable", &vdev_geom_bio_delete_disable);
63SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RW,
64    &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE");
65
66static void
67vdev_geom_set_rotation_rate(vdev_t *vd, struct g_consumer *cp)
68{
69	int error;
70	uint16_t rate;
71
72	error = g_getattr("GEOM::rotation_rate", cp, &rate);
73	if (error == 0)
74		vd->vdev_rotation_rate = rate;
75	else
76		vd->vdev_rotation_rate = VDEV_RATE_UNKNOWN;
77}
78
79static void
80vdev_geom_attrchanged(struct g_consumer *cp, const char *attr)
81{
82	vdev_t *vd;
83
84	vd = cp->private;
85	if (vd == NULL)
86		return;
87
88	if (strcmp(attr, "GEOM::rotation_rate") == 0) {
89		vdev_geom_set_rotation_rate(vd, cp);
90		return;
91	}
92}
93
94static void
95vdev_geom_orphan(struct g_consumer *cp)
96{
97	vdev_t *vd;
98
99	g_topology_assert();
100
101	vd = cp->private;
102	if (vd == NULL)
103		return;
104
105	/*
106	 * Orphan callbacks occur from the GEOM event thread.
107	 * Concurrent with this call, new I/O requests may be
108	 * working their way through GEOM about to find out
109	 * (only once executed by the g_down thread) that we've
110	 * been orphaned from our disk provider.  These I/Os
111	 * must be retired before we can detach our consumer.
112	 * This is most easily achieved by acquiring the
113	 * SPA ZIO configuration lock as a writer, but doing
114	 * so with the GEOM topology lock held would cause
115	 * a lock order reversal.  Instead, rely on the SPA's
116	 * async removal support to invoke a close on this
117	 * vdev once it is safe to do so.
118	 */
119	zfs_post_remove(vd->vdev_spa, vd);
120	vd->vdev_remove_wanted = B_TRUE;
121	spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE);
122}
123
124static struct g_consumer *
125vdev_geom_attach(struct g_provider *pp)
126{
127	struct g_geom *gp;
128	struct g_consumer *cp;
129
130	g_topology_assert();
131
132	ZFS_LOG(1, "Attaching to %s.", pp->name);
133	/* Do we have geom already? No? Create one. */
134	LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) {
135		if (gp->flags & G_GEOM_WITHER)
136			continue;
137		if (strcmp(gp->name, "zfs::vdev") != 0)
138			continue;
139		break;
140	}
141	if (gp == NULL) {
142		gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev");
143		gp->orphan = vdev_geom_orphan;
144		cp = g_new_consumer(gp);
145		if (g_attach(cp, pp) != 0) {
146			g_wither_geom(gp, ENXIO);
147			return (NULL);
148		}
149		if (g_access(cp, 1, 0, 1) != 0) {
150			g_wither_geom(gp, ENXIO);
151			return (NULL);
152		}
153		ZFS_LOG(1, "Created geom and consumer for %s.", pp->name);
154	} else {
155		/* Check if we are already connected to this provider. */
156		LIST_FOREACH(cp, &gp->consumer, consumer) {
157			if (cp->provider == pp) {
158				ZFS_LOG(1, "Found consumer for %s.", pp->name);
159				break;
160			}
161		}
162		if (cp == NULL) {
163			cp = g_new_consumer(gp);
164			if (g_attach(cp, pp) != 0) {
165				g_destroy_consumer(cp);
166				return (NULL);
167			}
168			if (g_access(cp, 1, 0, 1) != 0) {
169				g_detach(cp);
170				g_destroy_consumer(cp);
171				return (NULL);
172			}
173			ZFS_LOG(1, "Created consumer for %s.", pp->name);
174		} else {
175			if (g_access(cp, 1, 0, 1) != 0)
176				return (NULL);
177			ZFS_LOG(1, "Used existing consumer for %s.", pp->name);
178		}
179	}
180	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
181	return (cp);
182}
183
184static void
185vdev_geom_detach(void *arg, int flag __unused)
186{
187	struct g_geom *gp;
188	struct g_consumer *cp;
189
190	g_topology_assert();
191	cp = arg;
192	gp = cp->geom;
193
194	ZFS_LOG(1, "Closing access to %s.", cp->provider->name);
195	g_access(cp, -1, 0, -1);
196	/* Destroy consumer on last close. */
197	if (cp->acr == 0 && cp->ace == 0) {
198		ZFS_LOG(1, "Destroyed consumer to %s.", cp->provider->name);
199		if (cp->acw > 0)
200			g_access(cp, 0, -cp->acw, 0);
201		g_detach(cp);
202		g_destroy_consumer(cp);
203	}
204	/* Destroy geom if there are no consumers left. */
205	if (LIST_EMPTY(&gp->consumer)) {
206		ZFS_LOG(1, "Destroyed geom %s.", gp->name);
207		g_wither_geom(gp, ENXIO);
208	}
209}
210
211static uint64_t
212nvlist_get_guid(nvlist_t *list)
213{
214	uint64_t value;
215
216	value = 0;
217	nvlist_lookup_uint64(list, ZPOOL_CONFIG_GUID, &value);
218	return (value);
219}
220
221static int
222vdev_geom_io(struct g_consumer *cp, int cmd, void *data, off_t offset, off_t size)
223{
224	struct bio *bp;
225	u_char *p;
226	off_t off, maxio;
227	int error;
228
229	ASSERT((offset % cp->provider->sectorsize) == 0);
230	ASSERT((size % cp->provider->sectorsize) == 0);
231
232	bp = g_alloc_bio();
233	off = offset;
234	offset += size;
235	p = data;
236	maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize);
237	error = 0;
238
239	for (; off < offset; off += maxio, p += maxio, size -= maxio) {
240		bzero(bp, sizeof(*bp));
241		bp->bio_cmd = cmd;
242		bp->bio_done = NULL;
243		bp->bio_offset = off;
244		bp->bio_length = MIN(size, maxio);
245		bp->bio_data = p;
246		g_io_request(bp, cp);
247		error = biowait(bp, "vdev_geom_io");
248		if (error != 0)
249			break;
250	}
251
252	g_destroy_bio(bp);
253	return (error);
254}
255
256static void
257vdev_geom_taste_orphan(struct g_consumer *cp)
258{
259
260	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
261	    cp->provider->name));
262}
263
264static int
265vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config)
266{
267	struct g_provider *pp;
268	vdev_label_t *label;
269	char *p, *buf;
270	size_t buflen;
271	uint64_t psize;
272	off_t offset, size;
273	uint64_t guid, state, txg;
274	int error, l, len;
275
276	g_topology_assert_not();
277
278	pp = cp->provider;
279	ZFS_LOG(1, "Reading config from %s...", pp->name);
280
281	psize = pp->mediasize;
282	psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t));
283
284	size = sizeof(*label) + pp->sectorsize -
285	    ((sizeof(*label) - 1) % pp->sectorsize) - 1;
286
287	guid = 0;
288	label = kmem_alloc(size, KM_SLEEP);
289	buflen = sizeof(label->vl_vdev_phys.vp_nvlist);
290
291	*config = NULL;
292	for (l = 0; l < VDEV_LABELS; l++) {
293
294		offset = vdev_label_offset(psize, l, 0);
295		if ((offset % pp->sectorsize) != 0)
296			continue;
297
298		if (vdev_geom_io(cp, BIO_READ, label, offset, size) != 0)
299			continue;
300		buf = label->vl_vdev_phys.vp_nvlist;
301
302		if (nvlist_unpack(buf, buflen, config, 0) != 0)
303			continue;
304
305		if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
306		    &state) != 0 || state > POOL_STATE_L2CACHE) {
307			nvlist_free(*config);
308			*config = NULL;
309			continue;
310		}
311
312		if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
313		    (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
314		    &txg) != 0 || txg == 0)) {
315			nvlist_free(*config);
316			*config = NULL;
317			continue;
318		}
319
320		break;
321	}
322
323	kmem_free(label, size);
324	return (*config == NULL ? ENOENT : 0);
325}
326
327static void
328resize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id)
329{
330	nvlist_t **new_configs;
331	uint64_t i;
332
333	if (id < *count)
334		return;
335	new_configs = kmem_zalloc((id + 1) * sizeof(nvlist_t *),
336	    KM_SLEEP);
337	for (i = 0; i < *count; i++)
338		new_configs[i] = (*configs)[i];
339	if (*configs != NULL)
340		kmem_free(*configs, *count * sizeof(void *));
341	*configs = new_configs;
342	*count = id + 1;
343}
344
345static void
346process_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg,
347    const char *name, uint64_t* known_pool_guid)
348{
349	nvlist_t *vdev_tree;
350	uint64_t pool_guid;
351	uint64_t vdev_guid, known_guid;
352	uint64_t id, txg, known_txg;
353	char *pname;
354	int i;
355
356	if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 ||
357	    strcmp(pname, name) != 0)
358		goto ignore;
359
360	if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0)
361		goto ignore;
362
363	if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0)
364		goto ignore;
365
366	if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0)
367		goto ignore;
368
369	if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0)
370		goto ignore;
371
372	VERIFY(nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, &txg) == 0);
373
374	if (*known_pool_guid != 0) {
375		if (pool_guid != *known_pool_guid)
376			goto ignore;
377	} else
378		*known_pool_guid = pool_guid;
379
380	resize_configs(configs, count, id);
381
382	if ((*configs)[id] != NULL) {
383		VERIFY(nvlist_lookup_uint64((*configs)[id],
384		    ZPOOL_CONFIG_POOL_TXG, &known_txg) == 0);
385		if (txg <= known_txg)
386			goto ignore;
387		nvlist_free((*configs)[id]);
388	}
389
390	(*configs)[id] = cfg;
391	return;
392
393ignore:
394	nvlist_free(cfg);
395}
396
397static int
398vdev_geom_attach_taster(struct g_consumer *cp, struct g_provider *pp)
399{
400	int error;
401
402	if (pp->flags & G_PF_WITHER)
403		return (EINVAL);
404	g_attach(cp, pp);
405	error = g_access(cp, 1, 0, 0);
406	if (error == 0) {
407		if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize))
408			error = EINVAL;
409		else if (pp->mediasize < SPA_MINDEVSIZE)
410			error = EINVAL;
411		if (error != 0)
412			g_access(cp, -1, 0, 0);
413	}
414	if (error != 0)
415		g_detach(cp);
416	return (error);
417}
418
419static void
420vdev_geom_detach_taster(struct g_consumer *cp)
421{
422	g_access(cp, -1, 0, 0);
423	g_detach(cp);
424}
425
426int
427vdev_geom_read_pool_label(const char *name,
428    nvlist_t ***configs, uint64_t *count)
429{
430	struct g_class *mp;
431	struct g_geom *gp, *zgp;
432	struct g_provider *pp;
433	struct g_consumer *zcp;
434	nvlist_t *vdev_cfg;
435	uint64_t pool_guid;
436	int error;
437
438	DROP_GIANT();
439	g_topology_lock();
440
441	zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
442	/* This orphan function should be never called. */
443	zgp->orphan = vdev_geom_taste_orphan;
444	zcp = g_new_consumer(zgp);
445
446	*configs = NULL;
447	*count = 0;
448	pool_guid = 0;
449	LIST_FOREACH(mp, &g_classes, class) {
450		if (mp == &zfs_vdev_class)
451			continue;
452		LIST_FOREACH(gp, &mp->geom, geom) {
453			if (gp->flags & G_GEOM_WITHER)
454				continue;
455			LIST_FOREACH(pp, &gp->provider, provider) {
456				if (pp->flags & G_PF_WITHER)
457					continue;
458				if (vdev_geom_attach_taster(zcp, pp) != 0)
459					continue;
460				g_topology_unlock();
461				error = vdev_geom_read_config(zcp, &vdev_cfg);
462				g_topology_lock();
463				vdev_geom_detach_taster(zcp);
464				if (error)
465					continue;
466				ZFS_LOG(1, "successfully read vdev config");
467
468				process_vdev_config(configs, count,
469				    vdev_cfg, name, &pool_guid);
470			}
471		}
472	}
473
474	g_destroy_consumer(zcp);
475	g_destroy_geom(zgp);
476	g_topology_unlock();
477	PICKUP_GIANT();
478
479	return (*count > 0 ? 0 : ENOENT);
480}
481
482static uint64_t
483vdev_geom_read_guid(struct g_consumer *cp)
484{
485	nvlist_t *config;
486	uint64_t guid;
487
488	g_topology_assert_not();
489
490	guid = 0;
491	if (vdev_geom_read_config(cp, &config) == 0) {
492		guid = nvlist_get_guid(config);
493		nvlist_free(config);
494	}
495	return (guid);
496}
497
498static struct g_consumer *
499vdev_geom_attach_by_guid(uint64_t guid)
500{
501	struct g_class *mp;
502	struct g_geom *gp, *zgp;
503	struct g_provider *pp;
504	struct g_consumer *cp, *zcp;
505	uint64_t pguid;
506
507	g_topology_assert();
508
509	zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
510	/* This orphan function should be never called. */
511	zgp->orphan = vdev_geom_taste_orphan;
512	zcp = g_new_consumer(zgp);
513
514	cp = NULL;
515	LIST_FOREACH(mp, &g_classes, class) {
516		if (mp == &zfs_vdev_class)
517			continue;
518		LIST_FOREACH(gp, &mp->geom, geom) {
519			if (gp->flags & G_GEOM_WITHER)
520				continue;
521			LIST_FOREACH(pp, &gp->provider, provider) {
522				if (vdev_geom_attach_taster(zcp, pp) != 0)
523					continue;
524				g_topology_unlock();
525				pguid = vdev_geom_read_guid(zcp);
526				g_topology_lock();
527				vdev_geom_detach_taster(zcp);
528				if (pguid != guid)
529					continue;
530				cp = vdev_geom_attach(pp);
531				if (cp == NULL) {
532					printf("ZFS WARNING: Unable to attach to %s.\n",
533					    pp->name);
534					continue;
535				}
536				break;
537			}
538			if (cp != NULL)
539				break;
540		}
541		if (cp != NULL)
542			break;
543	}
544end:
545	g_destroy_consumer(zcp);
546	g_destroy_geom(zgp);
547	return (cp);
548}
549
550static struct g_consumer *
551vdev_geom_open_by_guid(vdev_t *vd)
552{
553	struct g_consumer *cp;
554	char *buf;
555	size_t len;
556
557	g_topology_assert();
558
559	ZFS_LOG(1, "Searching by guid [%ju].", (uintmax_t)vd->vdev_guid);
560	cp = vdev_geom_attach_by_guid(vd->vdev_guid);
561	if (cp != NULL) {
562		len = strlen(cp->provider->name) + strlen("/dev/") + 1;
563		buf = kmem_alloc(len, KM_SLEEP);
564
565		snprintf(buf, len, "/dev/%s", cp->provider->name);
566		spa_strfree(vd->vdev_path);
567		vd->vdev_path = buf;
568
569		ZFS_LOG(1, "Attach by guid [%ju] succeeded, provider %s.",
570		    (uintmax_t)vd->vdev_guid, vd->vdev_path);
571	} else {
572		ZFS_LOG(1, "Search by guid [%ju] failed.",
573		    (uintmax_t)vd->vdev_guid);
574	}
575
576	return (cp);
577}
578
579static struct g_consumer *
580vdev_geom_open_by_path(vdev_t *vd, int check_guid)
581{
582	struct g_provider *pp;
583	struct g_consumer *cp;
584	uint64_t guid;
585
586	g_topology_assert();
587
588	cp = NULL;
589	pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1);
590	if (pp != NULL) {
591		ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path);
592		cp = vdev_geom_attach(pp);
593		if (cp != NULL && check_guid && ISP2(pp->sectorsize) &&
594		    pp->sectorsize <= VDEV_PAD_SIZE) {
595			g_topology_unlock();
596			guid = vdev_geom_read_guid(cp);
597			g_topology_lock();
598			if (guid != vd->vdev_guid) {
599				vdev_geom_detach(cp, 0);
600				cp = NULL;
601				ZFS_LOG(1, "guid mismatch for provider %s: "
602				    "%ju != %ju.", vd->vdev_path,
603				    (uintmax_t)vd->vdev_guid, (uintmax_t)guid);
604			} else {
605				ZFS_LOG(1, "guid match for provider %s.",
606				    vd->vdev_path);
607			}
608		}
609	}
610
611	return (cp);
612}
613
614static int
615vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
616    uint64_t *logical_ashift, uint64_t *physical_ashift)
617{
618	struct g_provider *pp;
619	struct g_consumer *cp;
620	size_t bufsize;
621	int error;
622
623	/*
624	 * We must have a pathname, and it must be absolute.
625	 */
626	if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
627		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
628		return (EINVAL);
629	}
630
631	vd->vdev_tsd = NULL;
632
633	DROP_GIANT();
634	g_topology_lock();
635	error = 0;
636
637	/*
638	 * If we're creating or splitting a pool, just find the GEOM provider
639	 * by its name and ignore GUID mismatches.
640	 */
641	if (vd->vdev_spa->spa_load_state == SPA_LOAD_NONE ||
642	    vd->vdev_spa->spa_splitting_newspa == B_TRUE)
643		cp = vdev_geom_open_by_path(vd, 0);
644	else {
645		cp = vdev_geom_open_by_path(vd, 1);
646		if (cp == NULL) {
647			/*
648			 * The device at vd->vdev_path doesn't have the
649			 * expected guid. The disks might have merely
650			 * moved around so try all other GEOM providers
651			 * to find one with the right guid.
652			 */
653			cp = vdev_geom_open_by_guid(vd);
654		}
655	}
656
657	if (cp == NULL) {
658		ZFS_LOG(1, "Provider %s not found.", vd->vdev_path);
659		error = ENOENT;
660	} else if (cp->provider->sectorsize > VDEV_PAD_SIZE ||
661	    !ISP2(cp->provider->sectorsize)) {
662		ZFS_LOG(1, "Provider %s has unsupported sectorsize.",
663		    vd->vdev_path);
664		vdev_geom_detach(cp, 0);
665		error = EINVAL;
666		cp = NULL;
667	} else if (cp->acw == 0 && (spa_mode(vd->vdev_spa) & FWRITE) != 0) {
668		int i;
669
670		for (i = 0; i < 5; i++) {
671			error = g_access(cp, 0, 1, 0);
672			if (error == 0)
673				break;
674			g_topology_unlock();
675			tsleep(vd, 0, "vdev", hz / 2);
676			g_topology_lock();
677		}
678		if (error != 0) {
679			printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n",
680			    vd->vdev_path, error);
681			vdev_geom_detach(cp, 0);
682			cp = NULL;
683		}
684	}
685	g_topology_unlock();
686	PICKUP_GIANT();
687	if (cp == NULL) {
688		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
689		return (error);
690	}
691
692	cp->private = vd;
693	vd->vdev_tsd = cp;
694	pp = cp->provider;
695
696	/*
697	 * Determine the actual size of the device.
698	 */
699	*max_psize = *psize = pp->mediasize;
700
701	/*
702	 * Determine the device's minimum transfer size and preferred
703	 * transfer size.
704	 */
705	*logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1;
706	*physical_ashift = 0;
707	if (pp->stripesize)
708		*physical_ashift = highbit(pp->stripesize) - 1;
709
710	/*
711	 * Clear the nowritecache settings, so that on a vdev_reopen()
712	 * we will try again.
713	 */
714	vd->vdev_nowritecache = B_FALSE;
715
716	if (vd->vdev_physpath != NULL)
717		spa_strfree(vd->vdev_physpath);
718	bufsize = sizeof("/dev/") + strlen(pp->name);
719	vd->vdev_physpath = kmem_alloc(bufsize, KM_SLEEP);
720	snprintf(vd->vdev_physpath, bufsize, "/dev/%s", pp->name);
721
722	/*
723	 * Determine the device's rotation rate.
724	 */
725	vdev_geom_set_rotation_rate(vd, cp);
726
727	return (0);
728}
729
730static void
731vdev_geom_close(vdev_t *vd)
732{
733	struct g_consumer *cp;
734
735	cp = vd->vdev_tsd;
736	if (cp == NULL)
737		return;
738	vd->vdev_tsd = NULL;
739	vd->vdev_delayed_close = B_FALSE;
740	cp->private = NULL;	/* XXX locking */
741	g_post_event(vdev_geom_detach, cp, M_WAITOK, NULL);
742}
743
744static void
745vdev_geom_io_intr(struct bio *bp)
746{
747	vdev_t *vd;
748	zio_t *zio;
749
750	zio = bp->bio_caller1;
751	vd = zio->io_vd;
752	zio->io_error = bp->bio_error;
753	if (zio->io_error == 0 && bp->bio_resid != 0)
754		zio->io_error = SET_ERROR(EIO);
755
756	switch(zio->io_error) {
757	case ENOTSUP:
758		/*
759		 * If we get ENOTSUP for BIO_FLUSH or BIO_DELETE we know
760		 * that future attempts will never succeed. In this case
761		 * we set a persistent flag so that we don't bother with
762		 * requests in the future.
763		 */
764		switch(bp->bio_cmd) {
765		case BIO_FLUSH:
766			vd->vdev_nowritecache = B_TRUE;
767			break;
768		case BIO_DELETE:
769			vd->vdev_notrim = B_TRUE;
770			break;
771		}
772		break;
773	case ENXIO:
774		if (!vd->vdev_remove_wanted) {
775			/*
776			 * If provider's error is set we assume it is being
777			 * removed.
778			 */
779			if (bp->bio_to->error != 0) {
780				vd->vdev_remove_wanted = B_TRUE;
781				spa_async_request(zio->io_spa,
782				    SPA_ASYNC_REMOVE);
783			} else if (!vd->vdev_delayed_close) {
784				vd->vdev_delayed_close = B_TRUE;
785			}
786		}
787		break;
788	}
789	g_destroy_bio(bp);
790	zio_interrupt(zio);
791}
792
793static int
794vdev_geom_io_start(zio_t *zio)
795{
796	vdev_t *vd;
797	struct g_consumer *cp;
798	struct bio *bp;
799	int error;
800
801	vd = zio->io_vd;
802
803	switch (zio->io_type) {
804	case ZIO_TYPE_IOCTL:
805		/* XXPOLICY */
806		if (!vdev_readable(vd)) {
807			zio->io_error = SET_ERROR(ENXIO);
808		} else {
809			switch (zio->io_cmd) {
810			case DKIOCFLUSHWRITECACHE:
811				if (zfs_nocacheflush || vdev_geom_bio_flush_disable)
812					break;
813				if (vd->vdev_nowritecache) {
814					zio->io_error = SET_ERROR(ENOTSUP);
815					break;
816				}
817				goto sendreq;
818			default:
819				zio->io_error = SET_ERROR(ENOTSUP);
820			}
821		}
822
823		zio_interrupt(zio);
824		return (ZIO_PIPELINE_STOP);
825	case ZIO_TYPE_FREE:
826		if (vd->vdev_notrim) {
827			zio->io_error = SET_ERROR(ENOTSUP);
828		} else if (!vdev_geom_bio_delete_disable) {
829			goto sendreq;
830		}
831		zio_interrupt(zio);
832		return (ZIO_PIPELINE_STOP);
833	}
834sendreq:
835	ASSERT(zio->io_type == ZIO_TYPE_READ ||
836	    zio->io_type == ZIO_TYPE_WRITE ||
837	    zio->io_type == ZIO_TYPE_FREE ||
838	    zio->io_type == ZIO_TYPE_IOCTL);
839
840	cp = vd->vdev_tsd;
841	if (cp == NULL) {
842		zio->io_error = SET_ERROR(ENXIO);
843		zio_interrupt(zio);
844		return (ZIO_PIPELINE_STOP);
845	}
846	bp = g_alloc_bio();
847	bp->bio_caller1 = zio;
848	switch (zio->io_type) {
849	case ZIO_TYPE_READ:
850	case ZIO_TYPE_WRITE:
851		bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE;
852		bp->bio_data = zio->io_data;
853		bp->bio_offset = zio->io_offset;
854		bp->bio_length = zio->io_size;
855		break;
856	case ZIO_TYPE_FREE:
857		bp->bio_cmd = BIO_DELETE;
858		bp->bio_data = NULL;
859		bp->bio_offset = zio->io_offset;
860		bp->bio_length = zio->io_size;
861		break;
862	case ZIO_TYPE_IOCTL:
863		bp->bio_cmd = BIO_FLUSH;
864		bp->bio_flags |= BIO_ORDERED;
865		bp->bio_data = NULL;
866		bp->bio_offset = cp->provider->mediasize;
867		bp->bio_length = 0;
868		break;
869	}
870	bp->bio_done = vdev_geom_io_intr;
871
872	g_io_request(bp, cp);
873
874	return (ZIO_PIPELINE_STOP);
875}
876
877static void
878vdev_geom_io_done(zio_t *zio)
879{
880}
881
882static void
883vdev_geom_hold(vdev_t *vd)
884{
885}
886
887static void
888vdev_geom_rele(vdev_t *vd)
889{
890}
891
892vdev_ops_t vdev_geom_ops = {
893	vdev_geom_open,
894	vdev_geom_close,
895	vdev_default_asize,
896	vdev_geom_io_start,
897	vdev_geom_io_done,
898	NULL,
899	vdev_geom_hold,
900	vdev_geom_rele,
901	VDEV_TYPE_DISK,		/* name of this vdev type */
902	B_TRUE			/* leaf vdev */
903};
904