vdev_geom.c revision 270312
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
23 * All rights reserved.
24 *
25 * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>
26 */
27
28#include <sys/zfs_context.h>
29#include <sys/param.h>
30#include <sys/kernel.h>
31#include <sys/bio.h>
32#include <sys/disk.h>
33#include <sys/spa.h>
34#include <sys/spa_impl.h>
35#include <sys/vdev_impl.h>
36#include <sys/fs/zfs.h>
37#include <sys/zio.h>
38#include <geom/geom.h>
39#include <geom/geom_int.h>
40
41/*
42 * Virtual device vector for GEOM.
43 */
44
45struct g_class zfs_vdev_class = {
46	.name = "ZFS::VDEV",
47	.version = G_VERSION,
48};
49
50DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev);
51
52SYSCTL_DECL(_vfs_zfs_vdev);
53/* Don't send BIO_FLUSH. */
54static int vdev_geom_bio_flush_disable = 0;
55TUNABLE_INT("vfs.zfs.vdev.bio_flush_disable", &vdev_geom_bio_flush_disable);
56SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RW,
57    &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH");
58/* Don't send BIO_DELETE. */
59static int vdev_geom_bio_delete_disable = 0;
60TUNABLE_INT("vfs.zfs.vdev.bio_delete_disable", &vdev_geom_bio_delete_disable);
61SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RW,
62    &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE");
63
64static void
65vdev_geom_orphan(struct g_consumer *cp)
66{
67	vdev_t *vd;
68
69	g_topology_assert();
70
71	vd = cp->private;
72	if (vd == NULL)
73		return;
74
75	/*
76	 * Orphan callbacks occur from the GEOM event thread.
77	 * Concurrent with this call, new I/O requests may be
78	 * working their way through GEOM about to find out
79	 * (only once executed by the g_down thread) that we've
80	 * been orphaned from our disk provider.  These I/Os
81	 * must be retired before we can detach our consumer.
82	 * This is most easily achieved by acquiring the
83	 * SPA ZIO configuration lock as a writer, but doing
84	 * so with the GEOM topology lock held would cause
85	 * a lock order reversal.  Instead, rely on the SPA's
86	 * async removal support to invoke a close on this
87	 * vdev once it is safe to do so.
88	 */
89	zfs_post_remove(vd->vdev_spa, vd);
90	vd->vdev_remove_wanted = B_TRUE;
91	spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE);
92}
93
94static struct g_consumer *
95vdev_geom_attach(struct g_provider *pp)
96{
97	struct g_geom *gp;
98	struct g_consumer *cp;
99
100	g_topology_assert();
101
102	ZFS_LOG(1, "Attaching to %s.", pp->name);
103	/* Do we have geom already? No? Create one. */
104	LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) {
105		if (gp->flags & G_GEOM_WITHER)
106			continue;
107		if (strcmp(gp->name, "zfs::vdev") != 0)
108			continue;
109		break;
110	}
111	if (gp == NULL) {
112		gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev");
113		gp->orphan = vdev_geom_orphan;
114		cp = g_new_consumer(gp);
115		if (g_attach(cp, pp) != 0) {
116			g_wither_geom(gp, ENXIO);
117			return (NULL);
118		}
119		if (g_access(cp, 1, 0, 1) != 0) {
120			g_wither_geom(gp, ENXIO);
121			return (NULL);
122		}
123		ZFS_LOG(1, "Created geom and consumer for %s.", pp->name);
124	} else {
125		/* Check if we are already connected to this provider. */
126		LIST_FOREACH(cp, &gp->consumer, consumer) {
127			if (cp->provider == pp) {
128				ZFS_LOG(1, "Found consumer for %s.", pp->name);
129				break;
130			}
131		}
132		if (cp == NULL) {
133			cp = g_new_consumer(gp);
134			if (g_attach(cp, pp) != 0) {
135				g_destroy_consumer(cp);
136				return (NULL);
137			}
138			if (g_access(cp, 1, 0, 1) != 0) {
139				g_detach(cp);
140				g_destroy_consumer(cp);
141				return (NULL);
142			}
143			ZFS_LOG(1, "Created consumer for %s.", pp->name);
144		} else {
145			if (g_access(cp, 1, 0, 1) != 0)
146				return (NULL);
147			ZFS_LOG(1, "Used existing consumer for %s.", pp->name);
148		}
149	}
150	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
151	return (cp);
152}
153
154static void
155vdev_geom_detach(void *arg, int flag __unused)
156{
157	struct g_geom *gp;
158	struct g_consumer *cp;
159
160	g_topology_assert();
161	cp = arg;
162	gp = cp->geom;
163
164	ZFS_LOG(1, "Closing access to %s.", cp->provider->name);
165	g_access(cp, -1, 0, -1);
166	/* Destroy consumer on last close. */
167	if (cp->acr == 0 && cp->ace == 0) {
168		ZFS_LOG(1, "Destroyed consumer to %s.", cp->provider->name);
169		if (cp->acw > 0)
170			g_access(cp, 0, -cp->acw, 0);
171		g_detach(cp);
172		g_destroy_consumer(cp);
173	}
174	/* Destroy geom if there are no consumers left. */
175	if (LIST_EMPTY(&gp->consumer)) {
176		ZFS_LOG(1, "Destroyed geom %s.", gp->name);
177		g_wither_geom(gp, ENXIO);
178	}
179}
180
181static uint64_t
182nvlist_get_guid(nvlist_t *list)
183{
184	uint64_t value;
185
186	value = 0;
187	nvlist_lookup_uint64(list, ZPOOL_CONFIG_GUID, &value);
188	return (value);
189}
190
191static int
192vdev_geom_io(struct g_consumer *cp, int cmd, void *data, off_t offset, off_t size)
193{
194	struct bio *bp;
195	u_char *p;
196	off_t off, maxio;
197	int error;
198
199	ASSERT((offset % cp->provider->sectorsize) == 0);
200	ASSERT((size % cp->provider->sectorsize) == 0);
201
202	bp = g_alloc_bio();
203	off = offset;
204	offset += size;
205	p = data;
206	maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize);
207	error = 0;
208
209	for (; off < offset; off += maxio, p += maxio, size -= maxio) {
210		bzero(bp, sizeof(*bp));
211		bp->bio_cmd = cmd;
212		bp->bio_done = NULL;
213		bp->bio_offset = off;
214		bp->bio_length = MIN(size, maxio);
215		bp->bio_data = p;
216		g_io_request(bp, cp);
217		error = biowait(bp, "vdev_geom_io");
218		if (error != 0)
219			break;
220	}
221
222	g_destroy_bio(bp);
223	return (error);
224}
225
226static void
227vdev_geom_taste_orphan(struct g_consumer *cp)
228{
229
230	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
231	    cp->provider->name));
232}
233
234static int
235vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config)
236{
237	struct g_provider *pp;
238	vdev_label_t *label;
239	char *p, *buf;
240	size_t buflen;
241	uint64_t psize;
242	off_t offset, size;
243	uint64_t guid, state, txg;
244	int error, l, len;
245
246	g_topology_assert_not();
247
248	pp = cp->provider;
249	ZFS_LOG(1, "Reading config from %s...", pp->name);
250
251	psize = pp->mediasize;
252	psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t));
253
254	size = sizeof(*label) + pp->sectorsize -
255	    ((sizeof(*label) - 1) % pp->sectorsize) - 1;
256
257	guid = 0;
258	label = kmem_alloc(size, KM_SLEEP);
259	buflen = sizeof(label->vl_vdev_phys.vp_nvlist);
260
261	*config = NULL;
262	for (l = 0; l < VDEV_LABELS; l++) {
263
264		offset = vdev_label_offset(psize, l, 0);
265		if ((offset % pp->sectorsize) != 0)
266			continue;
267
268		if (vdev_geom_io(cp, BIO_READ, label, offset, size) != 0)
269			continue;
270		buf = label->vl_vdev_phys.vp_nvlist;
271
272		if (nvlist_unpack(buf, buflen, config, 0) != 0)
273			continue;
274
275		if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
276		    &state) != 0 || state > POOL_STATE_L2CACHE) {
277			nvlist_free(*config);
278			*config = NULL;
279			continue;
280		}
281
282		if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
283		    (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
284		    &txg) != 0 || txg == 0)) {
285			nvlist_free(*config);
286			*config = NULL;
287			continue;
288		}
289
290		break;
291	}
292
293	kmem_free(label, size);
294	return (*config == NULL ? ENOENT : 0);
295}
296
297static void
298resize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id)
299{
300	nvlist_t **new_configs;
301	uint64_t i;
302
303	if (id < *count)
304		return;
305	new_configs = kmem_zalloc((id + 1) * sizeof(nvlist_t *),
306	    KM_SLEEP);
307	for (i = 0; i < *count; i++)
308		new_configs[i] = (*configs)[i];
309	if (*configs != NULL)
310		kmem_free(*configs, *count * sizeof(void *));
311	*configs = new_configs;
312	*count = id + 1;
313}
314
315static void
316process_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg,
317    const char *name, uint64_t* known_pool_guid)
318{
319	nvlist_t *vdev_tree;
320	uint64_t pool_guid;
321	uint64_t vdev_guid, known_guid;
322	uint64_t id, txg, known_txg;
323	char *pname;
324	int i;
325
326	if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 ||
327	    strcmp(pname, name) != 0)
328		goto ignore;
329
330	if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0)
331		goto ignore;
332
333	if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0)
334		goto ignore;
335
336	if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0)
337		goto ignore;
338
339	if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0)
340		goto ignore;
341
342	VERIFY(nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, &txg) == 0);
343
344	if (*known_pool_guid != 0) {
345		if (pool_guid != *known_pool_guid)
346			goto ignore;
347	} else
348		*known_pool_guid = pool_guid;
349
350	resize_configs(configs, count, id);
351
352	if ((*configs)[id] != NULL) {
353		VERIFY(nvlist_lookup_uint64((*configs)[id],
354		    ZPOOL_CONFIG_POOL_TXG, &known_txg) == 0);
355		if (txg <= known_txg)
356			goto ignore;
357		nvlist_free((*configs)[id]);
358	}
359
360	(*configs)[id] = cfg;
361	return;
362
363ignore:
364	nvlist_free(cfg);
365}
366
367static int
368vdev_geom_attach_taster(struct g_consumer *cp, struct g_provider *pp)
369{
370	int error;
371
372	if (pp->flags & G_PF_WITHER)
373		return (EINVAL);
374	g_attach(cp, pp);
375	error = g_access(cp, 1, 0, 0);
376	if (error == 0) {
377		if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize))
378			error = EINVAL;
379		else if (pp->mediasize < SPA_MINDEVSIZE)
380			error = EINVAL;
381		if (error != 0)
382			g_access(cp, -1, 0, 0);
383	}
384	if (error != 0)
385		g_detach(cp);
386	return (error);
387}
388
389static void
390vdev_geom_detach_taster(struct g_consumer *cp)
391{
392	g_access(cp, -1, 0, 0);
393	g_detach(cp);
394}
395
396int
397vdev_geom_read_pool_label(const char *name,
398    nvlist_t ***configs, uint64_t *count)
399{
400	struct g_class *mp;
401	struct g_geom *gp, *zgp;
402	struct g_provider *pp;
403	struct g_consumer *zcp;
404	nvlist_t *vdev_cfg;
405	uint64_t pool_guid;
406	int error;
407
408	DROP_GIANT();
409	g_topology_lock();
410
411	zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
412	/* This orphan function should be never called. */
413	zgp->orphan = vdev_geom_taste_orphan;
414	zcp = g_new_consumer(zgp);
415
416	*configs = NULL;
417	*count = 0;
418	pool_guid = 0;
419	LIST_FOREACH(mp, &g_classes, class) {
420		if (mp == &zfs_vdev_class)
421			continue;
422		LIST_FOREACH(gp, &mp->geom, geom) {
423			if (gp->flags & G_GEOM_WITHER)
424				continue;
425			LIST_FOREACH(pp, &gp->provider, provider) {
426				if (pp->flags & G_PF_WITHER)
427					continue;
428				if (vdev_geom_attach_taster(zcp, pp) != 0)
429					continue;
430				g_topology_unlock();
431				error = vdev_geom_read_config(zcp, &vdev_cfg);
432				g_topology_lock();
433				vdev_geom_detach_taster(zcp);
434				if (error)
435					continue;
436				ZFS_LOG(1, "successfully read vdev config");
437
438				process_vdev_config(configs, count,
439				    vdev_cfg, name, &pool_guid);
440			}
441		}
442	}
443
444	g_destroy_consumer(zcp);
445	g_destroy_geom(zgp);
446	g_topology_unlock();
447	PICKUP_GIANT();
448
449	return (*count > 0 ? 0 : ENOENT);
450}
451
452static uint64_t
453vdev_geom_read_guid(struct g_consumer *cp)
454{
455	nvlist_t *config;
456	uint64_t guid;
457
458	g_topology_assert_not();
459
460	guid = 0;
461	if (vdev_geom_read_config(cp, &config) == 0) {
462		guid = nvlist_get_guid(config);
463		nvlist_free(config);
464	}
465	return (guid);
466}
467
468static struct g_consumer *
469vdev_geom_attach_by_guid(uint64_t guid)
470{
471	struct g_class *mp;
472	struct g_geom *gp, *zgp;
473	struct g_provider *pp;
474	struct g_consumer *cp, *zcp;
475	uint64_t pguid;
476
477	g_topology_assert();
478
479	zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
480	/* This orphan function should be never called. */
481	zgp->orphan = vdev_geom_taste_orphan;
482	zcp = g_new_consumer(zgp);
483
484	cp = NULL;
485	LIST_FOREACH(mp, &g_classes, class) {
486		if (mp == &zfs_vdev_class)
487			continue;
488		LIST_FOREACH(gp, &mp->geom, geom) {
489			if (gp->flags & G_GEOM_WITHER)
490				continue;
491			LIST_FOREACH(pp, &gp->provider, provider) {
492				if (vdev_geom_attach_taster(zcp, pp) != 0)
493					continue;
494				g_topology_unlock();
495				pguid = vdev_geom_read_guid(zcp);
496				g_topology_lock();
497				vdev_geom_detach_taster(zcp);
498				if (pguid != guid)
499					continue;
500				cp = vdev_geom_attach(pp);
501				if (cp == NULL) {
502					printf("ZFS WARNING: Unable to attach to %s.\n",
503					    pp->name);
504					continue;
505				}
506				break;
507			}
508			if (cp != NULL)
509				break;
510		}
511		if (cp != NULL)
512			break;
513	}
514end:
515	g_destroy_consumer(zcp);
516	g_destroy_geom(zgp);
517	return (cp);
518}
519
520static struct g_consumer *
521vdev_geom_open_by_guid(vdev_t *vd)
522{
523	struct g_consumer *cp;
524	char *buf;
525	size_t len;
526
527	g_topology_assert();
528
529	ZFS_LOG(1, "Searching by guid [%ju].", (uintmax_t)vd->vdev_guid);
530	cp = vdev_geom_attach_by_guid(vd->vdev_guid);
531	if (cp != NULL) {
532		len = strlen(cp->provider->name) + strlen("/dev/") + 1;
533		buf = kmem_alloc(len, KM_SLEEP);
534
535		snprintf(buf, len, "/dev/%s", cp->provider->name);
536		spa_strfree(vd->vdev_path);
537		vd->vdev_path = buf;
538
539		ZFS_LOG(1, "Attach by guid [%ju] succeeded, provider %s.",
540		    (uintmax_t)vd->vdev_guid, vd->vdev_path);
541	} else {
542		ZFS_LOG(1, "Search by guid [%ju] failed.",
543		    (uintmax_t)vd->vdev_guid);
544	}
545
546	return (cp);
547}
548
549static struct g_consumer *
550vdev_geom_open_by_path(vdev_t *vd, int check_guid)
551{
552	struct g_provider *pp;
553	struct g_consumer *cp;
554	uint64_t guid;
555
556	g_topology_assert();
557
558	cp = NULL;
559	pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1);
560	if (pp != NULL) {
561		ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path);
562		cp = vdev_geom_attach(pp);
563		if (cp != NULL && check_guid && ISP2(pp->sectorsize) &&
564		    pp->sectorsize <= VDEV_PAD_SIZE) {
565			g_topology_unlock();
566			guid = vdev_geom_read_guid(cp);
567			g_topology_lock();
568			if (guid != vd->vdev_guid) {
569				vdev_geom_detach(cp, 0);
570				cp = NULL;
571				ZFS_LOG(1, "guid mismatch for provider %s: "
572				    "%ju != %ju.", vd->vdev_path,
573				    (uintmax_t)vd->vdev_guid, (uintmax_t)guid);
574			} else {
575				ZFS_LOG(1, "guid match for provider %s.",
576				    vd->vdev_path);
577			}
578		}
579	}
580
581	return (cp);
582}
583
584static int
585vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
586    uint64_t *logical_ashift, uint64_t *physical_ashift)
587{
588	struct g_provider *pp;
589	struct g_consumer *cp;
590	size_t bufsize;
591	int error;
592
593	/*
594	 * We must have a pathname, and it must be absolute.
595	 */
596	if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
597		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
598		return (EINVAL);
599	}
600
601	vd->vdev_tsd = NULL;
602
603	DROP_GIANT();
604	g_topology_lock();
605	error = 0;
606
607	/*
608	 * If we're creating or splitting a pool, just find the GEOM provider
609	 * by its name and ignore GUID mismatches.
610	 */
611	if (vd->vdev_spa->spa_load_state == SPA_LOAD_NONE ||
612	    vd->vdev_spa->spa_splitting_newspa == B_TRUE)
613		cp = vdev_geom_open_by_path(vd, 0);
614	else {
615		cp = vdev_geom_open_by_path(vd, 1);
616		if (cp == NULL) {
617			/*
618			 * The device at vd->vdev_path doesn't have the
619			 * expected guid. The disks might have merely
620			 * moved around so try all other GEOM providers
621			 * to find one with the right guid.
622			 */
623			cp = vdev_geom_open_by_guid(vd);
624		}
625	}
626
627	if (cp == NULL) {
628		ZFS_LOG(1, "Provider %s not found.", vd->vdev_path);
629		error = ENOENT;
630	} else if (cp->provider->sectorsize > VDEV_PAD_SIZE ||
631	    !ISP2(cp->provider->sectorsize)) {
632		ZFS_LOG(1, "Provider %s has unsupported sectorsize.",
633		    vd->vdev_path);
634		vdev_geom_detach(cp, 0);
635		error = EINVAL;
636		cp = NULL;
637	} else if (cp->acw == 0 && (spa_mode(vd->vdev_spa) & FWRITE) != 0) {
638		int i;
639
640		for (i = 0; i < 5; i++) {
641			error = g_access(cp, 0, 1, 0);
642			if (error == 0)
643				break;
644			g_topology_unlock();
645			tsleep(vd, 0, "vdev", hz / 2);
646			g_topology_lock();
647		}
648		if (error != 0) {
649			printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n",
650			    vd->vdev_path, error);
651			vdev_geom_detach(cp, 0);
652			cp = NULL;
653		}
654	}
655	g_topology_unlock();
656	PICKUP_GIANT();
657	if (cp == NULL) {
658		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
659		return (error);
660	}
661
662	cp->private = vd;
663	vd->vdev_tsd = cp;
664	pp = cp->provider;
665
666	/*
667	 * Determine the actual size of the device.
668	 */
669	*max_psize = *psize = pp->mediasize;
670
671	/*
672	 * Determine the device's minimum transfer size and preferred
673	 * transfer size.
674	 */
675	*logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1;
676	*physical_ashift = 0;
677	if (pp->stripesize)
678		*physical_ashift = highbit(pp->stripesize) - 1;
679
680	/*
681	 * Clear the nowritecache settings, so that on a vdev_reopen()
682	 * we will try again.
683	 */
684	vd->vdev_nowritecache = B_FALSE;
685
686	if (vd->vdev_physpath != NULL)
687		spa_strfree(vd->vdev_physpath);
688	bufsize = sizeof("/dev/") + strlen(pp->name);
689	vd->vdev_physpath = kmem_alloc(bufsize, KM_SLEEP);
690	snprintf(vd->vdev_physpath, bufsize, "/dev/%s", pp->name);
691
692	return (0);
693}
694
695static void
696vdev_geom_close(vdev_t *vd)
697{
698	struct g_consumer *cp;
699
700	cp = vd->vdev_tsd;
701	if (cp == NULL)
702		return;
703	vd->vdev_tsd = NULL;
704	vd->vdev_delayed_close = B_FALSE;
705	cp->private = NULL;	/* XXX locking */
706	g_post_event(vdev_geom_detach, cp, M_WAITOK, NULL);
707}
708
709static void
710vdev_geom_io_intr(struct bio *bp)
711{
712	vdev_t *vd;
713	zio_t *zio;
714
715	zio = bp->bio_caller1;
716	vd = zio->io_vd;
717	zio->io_error = bp->bio_error;
718	if (zio->io_error == 0 && bp->bio_resid != 0)
719		zio->io_error = SET_ERROR(EIO);
720
721	switch(zio->io_error) {
722	case ENOTSUP:
723		/*
724		 * If we get ENOTSUP for BIO_FLUSH or BIO_DELETE we know
725		 * that future attempts will never succeed. In this case
726		 * we set a persistent flag so that we don't bother with
727		 * requests in the future.
728		 */
729		switch(bp->bio_cmd) {
730		case BIO_FLUSH:
731			vd->vdev_nowritecache = B_TRUE;
732			break;
733		case BIO_DELETE:
734			vd->vdev_notrim = B_TRUE;
735			break;
736		}
737		break;
738	case ENXIO:
739		if (!vd->vdev_remove_wanted) {
740			/*
741			 * If provider's error is set we assume it is being
742			 * removed.
743			 */
744			if (bp->bio_to->error != 0) {
745				vd->vdev_remove_wanted = B_TRUE;
746				spa_async_request(zio->io_spa,
747				    SPA_ASYNC_REMOVE);
748			} else if (!vd->vdev_delayed_close) {
749				vd->vdev_delayed_close = B_TRUE;
750			}
751		}
752		break;
753	}
754	g_destroy_bio(bp);
755	zio_interrupt(zio);
756}
757
758static int
759vdev_geom_io_start(zio_t *zio)
760{
761	vdev_t *vd;
762	struct g_consumer *cp;
763	struct bio *bp;
764	int error;
765
766	vd = zio->io_vd;
767
768	switch (zio->io_type) {
769	case ZIO_TYPE_IOCTL:
770		/* XXPOLICY */
771		if (!vdev_readable(vd)) {
772			zio->io_error = SET_ERROR(ENXIO);
773		} else {
774			switch (zio->io_cmd) {
775			case DKIOCFLUSHWRITECACHE:
776				if (zfs_nocacheflush || vdev_geom_bio_flush_disable)
777					break;
778				if (vd->vdev_nowritecache) {
779					zio->io_error = SET_ERROR(ENOTSUP);
780					break;
781				}
782				goto sendreq;
783			default:
784				zio->io_error = SET_ERROR(ENOTSUP);
785			}
786		}
787
788		zio_interrupt(zio);
789		return (ZIO_PIPELINE_STOP);
790	case ZIO_TYPE_FREE:
791		if (vd->vdev_notrim) {
792			zio->io_error = SET_ERROR(ENOTSUP);
793		} else if (!vdev_geom_bio_delete_disable) {
794			goto sendreq;
795		}
796		zio_interrupt(zio);
797		return (ZIO_PIPELINE_STOP);
798	}
799sendreq:
800	cp = vd->vdev_tsd;
801	if (cp == NULL) {
802		zio->io_error = SET_ERROR(ENXIO);
803		zio_interrupt(zio);
804		return (ZIO_PIPELINE_STOP);
805	}
806	bp = g_alloc_bio();
807	bp->bio_caller1 = zio;
808	switch (zio->io_type) {
809	case ZIO_TYPE_READ:
810	case ZIO_TYPE_WRITE:
811		bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE;
812		bp->bio_data = zio->io_data;
813		bp->bio_offset = zio->io_offset;
814		bp->bio_length = zio->io_size;
815		break;
816	case ZIO_TYPE_FREE:
817		bp->bio_cmd = BIO_DELETE;
818		bp->bio_data = NULL;
819		bp->bio_offset = zio->io_offset;
820		bp->bio_length = zio->io_size;
821		break;
822	case ZIO_TYPE_IOCTL:
823		bp->bio_cmd = BIO_FLUSH;
824		bp->bio_flags |= BIO_ORDERED;
825		bp->bio_data = NULL;
826		bp->bio_offset = cp->provider->mediasize;
827		bp->bio_length = 0;
828		break;
829	}
830	bp->bio_done = vdev_geom_io_intr;
831
832	g_io_request(bp, cp);
833
834	return (ZIO_PIPELINE_STOP);
835}
836
837static void
838vdev_geom_io_done(zio_t *zio)
839{
840}
841
842static void
843vdev_geom_hold(vdev_t *vd)
844{
845}
846
847static void
848vdev_geom_rele(vdev_t *vd)
849{
850}
851
852vdev_ops_t vdev_geom_ops = {
853	vdev_geom_open,
854	vdev_geom_close,
855	vdev_default_asize,
856	vdev_geom_io_start,
857	vdev_geom_io_done,
858	NULL,
859	vdev_geom_hold,
860	vdev_geom_rele,
861	VDEV_TYPE_DISK,		/* name of this vdev type */
862	B_TRUE			/* leaf vdev */
863};
864