vdev_geom.c revision 330522
1184054Slulf/*
2184054Slulf * CDDL HEADER START
3184054Slulf *
4184054Slulf * The contents of this file are subject to the terms of the
5184054Slulf * Common Development and Distribution License (the "License").
6184054Slulf * You may not use this file except in compliance with the License.
7184054Slulf *
8184054Slulf * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9184054Slulf * or http://www.opensolaris.org/os/licensing.
10184054Slulf * See the License for the specific language governing permissions
11184054Slulf * and limitations under the License.
12184054Slulf *
13184054Slulf * When distributing Covered Code, include this CDDL HEADER in each
14184054Slulf * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15184054Slulf * If applicable, add the following below this CDDL HEADER, with the
16184054Slulf * fields enclosed by brackets "[]" replaced with your own identifying
17184054Slulf * information: Portions Copyright [yyyy] [name of copyright owner]
18184054Slulf *
19184054Slulf * CDDL HEADER END
20184054Slulf */
21184054Slulf/*
22184054Slulf * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
23184054Slulf * All rights reserved.
24184054Slulf *
25184054Slulf * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>
26184054Slulf */
27184054Slulf
28184054Slulf#include <sys/zfs_context.h>
29184054Slulf#include <sys/param.h>
30184054Slulf#include <sys/kernel.h>
31184054Slulf#include <sys/bio.h>
32184054Slulf#include <sys/disk.h>
33184054Slulf#include <sys/spa.h>
34184054Slulf#include <sys/spa_impl.h>
35184054Slulf#include <sys/vdev_impl.h>
36184054Slulf#include <sys/fs/zfs.h>
37184054Slulf#include <sys/zio.h>
38184054Slulf#include <geom/geom.h>
39184054Slulf#include <geom/geom_int.h>
40184054Slulf
41184054Slulf/*
42184054Slulf * Virtual device vector for GEOM.
43184054Slulf */
44184054Slulf
45184054Slulfstatic g_attrchanged_t vdev_geom_attrchanged;
46184054Slulfstruct g_class zfs_vdev_class = {
47184054Slulf	.name = "ZFS::VDEV",
48184054Slulf	.version = G_VERSION,
49184054Slulf	.attrchanged = vdev_geom_attrchanged,
50184054Slulf};
51184054Slulf
52184054Slulfstruct consumer_vdev_elem {
53184054Slulf	SLIST_ENTRY(consumer_vdev_elem)	elems;
54184054Slulf	vdev_t				*vd;
55184054Slulf};
56184054Slulf
57184054SlulfSLIST_HEAD(consumer_priv_t, consumer_vdev_elem);
58184054Slulf_Static_assert(sizeof(((struct g_consumer*)NULL)->private)
59184054Slulf    == sizeof(struct consumer_priv_t*),
60184054Slulf    "consumer_priv_t* can't be stored in g_consumer.private");
61184054Slulf
62184054SlulfDECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev);
63184054Slulf
64184054SlulfSYSCTL_DECL(_vfs_zfs_vdev);
65184054Slulf/* Don't send BIO_FLUSH. */
66184054Slulfstatic int vdev_geom_bio_flush_disable = 0;
67184054SlulfTUNABLE_INT("vfs.zfs.vdev.bio_flush_disable", &vdev_geom_bio_flush_disable);
68184054SlulfSYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RW,
69184054Slulf    &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH");
70184054Slulf/* Don't send BIO_DELETE. */
71184054Slulfstatic int vdev_geom_bio_delete_disable = 0;
72184054SlulfTUNABLE_INT("vfs.zfs.vdev.bio_delete_disable", &vdev_geom_bio_delete_disable);
73184054SlulfSYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RW,
74184054Slulf    &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE");
75184054Slulf
76184054Slulf/* Declare local functions */
77184054Slulfstatic void vdev_geom_detach(struct g_consumer *cp, boolean_t open_for_read);
78184054Slulf
79184054Slulf/*
80184054Slulf * Thread local storage used to indicate when a thread is probing geoms
81184054Slulf * for their guids.  If NULL, this thread is not tasting geoms.  If non NULL,
82184054Slulf * it is looking for a replacement for the vdev_t* that is its value.
83184054Slulf */
84184054Slulfuint_t zfs_geom_probe_vdev_key;
85184054Slulf
86184054Slulfstatic void
87184054Slulfvdev_geom_set_rotation_rate(vdev_t *vd, struct g_consumer *cp)
88184054Slulf{
89184054Slulf	int error;
90184054Slulf	uint16_t rate;
91184054Slulf
92184054Slulf	error = g_getattr("GEOM::rotation_rate", cp, &rate);
93184054Slulf	if (error == 0)
94184054Slulf		vd->vdev_rotation_rate = rate;
95184054Slulf	else
96184054Slulf		vd->vdev_rotation_rate = VDEV_RATE_UNKNOWN;
97184054Slulf}
98184054Slulf
99184054Slulfstatic void
100184054Slulfvdev_geom_set_physpath(vdev_t *vd, struct g_consumer *cp,
101184054Slulf		       boolean_t do_null_update)
102184054Slulf{
103184054Slulf	boolean_t needs_update = B_FALSE;
104184054Slulf	char *physpath;
105184054Slulf	int error, physpath_len;
106184054Slulf
107184054Slulf	physpath_len = MAXPATHLEN;
108184054Slulf	physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO);
109184054Slulf	error = g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath);
110184054Slulf	if (error == 0) {
111184054Slulf		char *old_physpath;
112184054Slulf
113184054Slulf		/* g_topology lock ensures that vdev has not been closed */
114184054Slulf		g_topology_assert();
115184054Slulf		old_physpath = vd->vdev_physpath;
116184054Slulf		vd->vdev_physpath = spa_strdup(physpath);
117184054Slulf
118184054Slulf		if (old_physpath != NULL) {
119184054Slulf			needs_update = (strcmp(old_physpath,
120184054Slulf						vd->vdev_physpath) != 0);
121184054Slulf			spa_strfree(old_physpath);
122184054Slulf		} else
123184054Slulf			needs_update = do_null_update;
124184054Slulf	}
125184054Slulf	g_free(physpath);
126184054Slulf
127184054Slulf	/*
128184054Slulf	 * If the physical path changed, update the config.
129184054Slulf	 * Only request an update for previously unset physpaths if
130184054Slulf	 * requested by the caller.
131184054Slulf	 */
132184054Slulf	if (needs_update)
133184054Slulf		spa_async_request(vd->vdev_spa, SPA_ASYNC_CONFIG_UPDATE);
134184054Slulf
135184054Slulf}
136184054Slulf
137184054Slulfstatic void
138184054Slulfvdev_geom_attrchanged(struct g_consumer *cp, const char *attr)
139184054Slulf{
140184054Slulf	char *old_physpath;
141184054Slulf	struct consumer_priv_t *priv;
142184054Slulf	struct consumer_vdev_elem *elem;
143184054Slulf	int error;
144184054Slulf
145184054Slulf	priv = (struct consumer_priv_t*)&cp->private;
146184054Slulf	if (SLIST_EMPTY(priv))
147184054Slulf		return;
148184054Slulf
149184054Slulf	SLIST_FOREACH(elem, priv, elems) {
150184054Slulf		vdev_t *vd = elem->vd;
151184054Slulf		if (strcmp(attr, "GEOM::rotation_rate") == 0) {
152184054Slulf			vdev_geom_set_rotation_rate(vd, cp);
153184054Slulf			return;
154184054Slulf		}
155184054Slulf		if (strcmp(attr, "GEOM::physpath") == 0) {
156184054Slulf			vdev_geom_set_physpath(vd, cp, /*null_update*/B_TRUE);
157184054Slulf			return;
158184054Slulf		}
159184054Slulf	}
160184054Slulf}
161184054Slulf
162184054Slulfstatic void
163184054Slulfvdev_geom_orphan(struct g_consumer *cp)
164184054Slulf{
165184054Slulf	struct consumer_priv_t *priv;
166184054Slulf	struct consumer_vdev_elem *elem;
167184054Slulf
168184054Slulf	g_topology_assert();
169184054Slulf
170184054Slulf	priv = (struct consumer_priv_t*)&cp->private;
171184054Slulf	if (SLIST_EMPTY(priv))
172184054Slulf		/* Vdev close in progress.  Ignore the event. */
173184054Slulf		return;
174184054Slulf
175184054Slulf	/*
176184054Slulf	 * Orphan callbacks occur from the GEOM event thread.
177184054Slulf	 * Concurrent with this call, new I/O requests may be
178184054Slulf	 * working their way through GEOM about to find out
179184054Slulf	 * (only once executed by the g_down thread) that we've
180184054Slulf	 * been orphaned from our disk provider.  These I/Os
181184054Slulf	 * must be retired before we can detach our consumer.
182184054Slulf	 * This is most easily achieved by acquiring the
183184054Slulf	 * SPA ZIO configuration lock as a writer, but doing
184184054Slulf	 * so with the GEOM topology lock held would cause
185184054Slulf	 * a lock order reversal.  Instead, rely on the SPA's
186184054Slulf	 * async removal support to invoke a close on this
187184054Slulf	 * vdev once it is safe to do so.
188184054Slulf	 */
189184054Slulf	SLIST_FOREACH(elem, priv, elems) {
190184054Slulf		vdev_t *vd = elem->vd;
191184054Slulf
192184054Slulf		vd->vdev_remove_wanted = B_TRUE;
193184054Slulf		spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE);
194184054Slulf	}
195184054Slulf}
196184054Slulf
197184054Slulfstatic struct g_consumer *
198184054Slulfvdev_geom_attach(struct g_provider *pp, vdev_t *vd)
199184054Slulf{
200184054Slulf	struct g_geom *gp;
201184054Slulf	struct g_consumer *cp;
202184054Slulf	int error;
203184054Slulf
204184054Slulf	g_topology_assert();
205184054Slulf
206184054Slulf	ZFS_LOG(1, "Attaching to %s.", pp->name);
207184054Slulf
208184054Slulf	if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize)) {
209184054Slulf		ZFS_LOG(1, "Failing attach of %s. Incompatible sectorsize %d\n",
210184054Slulf		    pp->name, pp->sectorsize);
211184054Slulf		return (NULL);
212184054Slulf	} else if (pp->mediasize < SPA_MINDEVSIZE) {
213184054Slulf		ZFS_LOG(1, "Failing attach of %s. Incompatible mediasize %ju\n",
214184054Slulf		    pp->name, pp->mediasize);
215184054Slulf		return (NULL);
216184054Slulf	}
217184054Slulf
218184054Slulf	/* Do we have geom already? No? Create one. */
219184054Slulf	LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) {
220184054Slulf		if (gp->flags & G_GEOM_WITHER)
221184054Slulf			continue;
222184054Slulf		if (strcmp(gp->name, "zfs::vdev") != 0)
223184054Slulf			continue;
224184054Slulf		break;
225184054Slulf	}
226184054Slulf	if (gp == NULL) {
227184054Slulf		gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev");
228184054Slulf		gp->orphan = vdev_geom_orphan;
229184054Slulf		gp->attrchanged = vdev_geom_attrchanged;
230184054Slulf		cp = g_new_consumer(gp);
231184054Slulf		error = g_attach(cp, pp);
232184054Slulf		if (error != 0) {
233184054Slulf			ZFS_LOG(1, "%s(%d): g_attach failed: %d\n", __func__,
234184054Slulf			    __LINE__, error);
235184054Slulf			vdev_geom_detach(cp, B_FALSE);
236184054Slulf			return (NULL);
237184054Slulf		}
238184054Slulf		error = g_access(cp, 1, 0, 1);
239184054Slulf		if (error != 0) {
240184054Slulf			ZFS_LOG(1, "%s(%d): g_access failed: %d", __func__,
241184054Slulf			       __LINE__, error);
242184054Slulf			vdev_geom_detach(cp, B_FALSE);
243184054Slulf			return (NULL);
244184054Slulf		}
245184054Slulf		ZFS_LOG(1, "Created geom and consumer for %s.", pp->name);
246184054Slulf	} else {
247184054Slulf		/* Check if we are already connected to this provider. */
248184054Slulf		LIST_FOREACH(cp, &gp->consumer, consumer) {
249184054Slulf			if (cp->provider == pp) {
250184054Slulf				ZFS_LOG(1, "Found consumer for %s.", pp->name);
251184054Slulf				break;
252184054Slulf			}
253184054Slulf		}
254184054Slulf		if (cp == NULL) {
255184054Slulf			cp = g_new_consumer(gp);
256184054Slulf			error = g_attach(cp, pp);
257184054Slulf			if (error != 0) {
258184054Slulf				ZFS_LOG(1, "%s(%d): g_attach failed: %d\n",
259184054Slulf				    __func__, __LINE__, error);
260184054Slulf				vdev_geom_detach(cp, B_FALSE);
261184054Slulf				return (NULL);
262184054Slulf			}
263184054Slulf			error = g_access(cp, 1, 0, 1);
264184054Slulf			if (error != 0) {
265184054Slulf				ZFS_LOG(1, "%s(%d): g_access failed: %d\n",
266184054Slulf				    __func__, __LINE__, error);
267184054Slulf				vdev_geom_detach(cp, B_FALSE);
268184054Slulf				return (NULL);
269184054Slulf			}
270184054Slulf			ZFS_LOG(1, "Created consumer for %s.", pp->name);
271184054Slulf		} else {
272184054Slulf			error = g_access(cp, 1, 0, 1);
273184054Slulf			if (error != 0) {
274184054Slulf				ZFS_LOG(1, "%s(%d): g_access failed: %d\n",
275184054Slulf				    __func__, __LINE__, error);
276184054Slulf				return (NULL);
277184054Slulf			}
278184054Slulf			ZFS_LOG(1, "Used existing consumer for %s.", pp->name);
279184054Slulf		}
280184054Slulf	}
281184054Slulf
282184054Slulf	if (vd != NULL)
283184054Slulf		vd->vdev_tsd = cp;
284184054Slulf
285184054Slulf	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
286184054Slulf	return (cp);
287184054Slulf}
288184054Slulf
289184054Slulfstatic void
290184054Slulfvdev_geom_detach(struct g_consumer *cp, boolean_t open_for_read)
291184054Slulf{
292184054Slulf	struct g_geom *gp;
293184054Slulf
294184054Slulf	g_topology_assert();
295184054Slulf
296184054Slulf	ZFS_LOG(1, "Detaching from %s.",
297184054Slulf	    cp->provider && cp->provider->name ? cp->provider->name : "NULL");
298184054Slulf
299184054Slulf	gp = cp->geom;
300184054Slulf	if (open_for_read)
301184054Slulf		g_access(cp, -1, 0, -1);
302184054Slulf	/* Destroy consumer on last close. */
303184054Slulf	if (cp->acr == 0 && cp->ace == 0) {
304184054Slulf		if (cp->acw > 0)
305184054Slulf			g_access(cp, 0, -cp->acw, 0);
306184054Slulf		if (cp->provider != NULL) {
307184054Slulf			ZFS_LOG(1, "Destroying consumer for %s.",
308184054Slulf			    cp->provider->name ? cp->provider->name : "NULL");
309184054Slulf			g_detach(cp);
310184054Slulf		}
311184054Slulf		g_destroy_consumer(cp);
312184054Slulf	}
313184054Slulf	/* Destroy geom if there are no consumers left. */
314184054Slulf	if (LIST_EMPTY(&gp->consumer)) {
315184054Slulf		ZFS_LOG(1, "Destroyed geom %s.", gp->name);
316184054Slulf		g_wither_geom(gp, ENXIO);
317184054Slulf	}
318184054Slulf}
319184054Slulf
320184054Slulfstatic void
321184054Slulfvdev_geom_close_locked(vdev_t *vd)
322184054Slulf{
323184054Slulf	struct g_consumer *cp;
324184054Slulf	struct consumer_priv_t *priv;
325184054Slulf	struct consumer_vdev_elem *elem, *elem_temp;
326184054Slulf
327184054Slulf	g_topology_assert();
328184054Slulf
329184054Slulf	cp = vd->vdev_tsd;
330184054Slulf	vd->vdev_delayed_close = B_FALSE;
331184054Slulf	if (cp == NULL)
332184054Slulf		return;
333184054Slulf
334184054Slulf	ZFS_LOG(1, "Closing access to %s.", cp->provider->name);
335184054Slulf	KASSERT(cp->private != NULL, ("%s: cp->private is NULL", __func__));
336184054Slulf	priv = (struct consumer_priv_t*)&cp->private;
337184054Slulf	vd->vdev_tsd = NULL;
338184054Slulf	SLIST_FOREACH_SAFE(elem, priv, elems, elem_temp) {
339184054Slulf		if (elem->vd == vd) {
340184054Slulf			SLIST_REMOVE(priv, elem, consumer_vdev_elem, elems);
341184054Slulf			g_free(elem);
342184054Slulf		}
343184054Slulf	}
344184054Slulf
345184054Slulf	vdev_geom_detach(cp, B_TRUE);
346184054Slulf}
347184054Slulf
348184054Slulf/*
349184054Slulf * Issue one or more bios to the vdev in parallel
350184054Slulf * cmds, datas, offsets, errors, and sizes are arrays of length ncmds.  Each IO
351184054Slulf * operation is described by parallel entries from each array.  There may be
352184054Slulf * more bios actually issued than entries in the array
353184054Slulf */
354184054Slulfstatic void
355184054Slulfvdev_geom_io(struct g_consumer *cp, int *cmds, void **datas, off_t *offsets,
356184054Slulf    off_t *sizes, int *errors, int ncmds)
357184054Slulf{
358184054Slulf	struct bio **bios;
359184054Slulf	u_char *p;
360184054Slulf	off_t off, maxio, s, end;
361184054Slulf	int i, n_bios, j;
362184054Slulf	size_t bios_size;
363184054Slulf
364184054Slulf	maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize);
365184054Slulf	n_bios = 0;
366184054Slulf
367184054Slulf	/* How many bios are required for all commands ? */
368184054Slulf	for (i = 0; i < ncmds; i++)
369184054Slulf		n_bios += (sizes[i] + maxio - 1) / maxio;
370184054Slulf
371184054Slulf	/* Allocate memory for the bios */
372184054Slulf	bios_size = n_bios * sizeof(struct bio*);
373184054Slulf	bios = kmem_zalloc(bios_size, KM_SLEEP);
374184054Slulf
375184054Slulf	/* Prepare and issue all of the bios */
376184054Slulf	for (i = j = 0; i < ncmds; i++) {
377184054Slulf		off = offsets[i];
378184054Slulf		p = datas[i];
379184054Slulf		s = sizes[i];
380184054Slulf		end = off + s;
381184054Slulf		ASSERT((off % cp->provider->sectorsize) == 0);
382184054Slulf		ASSERT((s % cp->provider->sectorsize) == 0);
383184054Slulf
384184054Slulf		for (; off < end; off += maxio, p += maxio, s -= maxio, j++) {
385184054Slulf			bios[j] = g_alloc_bio();
386184054Slulf			bios[j]->bio_cmd = cmds[i];
387184054Slulf			bios[j]->bio_done = NULL;
388184054Slulf			bios[j]->bio_offset = off;
389184054Slulf			bios[j]->bio_length = MIN(s, maxio);
390184054Slulf			bios[j]->bio_data = p;
391184054Slulf			g_io_request(bios[j], cp);
392184054Slulf		}
393184054Slulf	}
394184054Slulf	ASSERT(j == n_bios);
395184054Slulf
396184054Slulf	/* Wait for all of the bios to complete, and clean them up */
397184054Slulf	for (i = j = 0; i < ncmds; i++) {
398184054Slulf		off = offsets[i];
399184054Slulf		s = sizes[i];
400184054Slulf		end = off + s;
401184054Slulf
402184054Slulf		for (; off < end; off += maxio, s -= maxio, j++) {
403184054Slulf			errors[i] = biowait(bios[j], "vdev_geom_io") || errors[i];
404184054Slulf			g_destroy_bio(bios[j]);
405184054Slulf		}
406184054Slulf	}
407184054Slulf	kmem_free(bios, bios_size);
408184054Slulf}
409184054Slulf
410184054Slulf/*
411184054Slulf * Read the vdev config from a device.  Return the number of valid labels that
412184054Slulf * were found.  The vdev config will be returned in config if and only if at
413184054Slulf * least one valid label was found.
414184054Slulf */
415184054Slulfstatic int
416184054Slulfvdev_geom_read_config(struct g_consumer *cp, nvlist_t **config)
417184054Slulf{
418184054Slulf	struct g_provider *pp;
419184054Slulf	vdev_phys_t *vdev_lists[VDEV_LABELS];
420184054Slulf	char *buf;
421184054Slulf	size_t buflen;
422184054Slulf	uint64_t psize, state, txg;
423184054Slulf	off_t offsets[VDEV_LABELS];
424184054Slulf	off_t size;
425184054Slulf	off_t sizes[VDEV_LABELS];
426184054Slulf	int cmds[VDEV_LABELS];
427184054Slulf	int errors[VDEV_LABELS];
428184054Slulf	int l, nlabels;
429184054Slulf
430184054Slulf	g_topology_assert_not();
431184054Slulf
432184054Slulf	pp = cp->provider;
433184054Slulf	ZFS_LOG(1, "Reading config from %s...", pp->name);
434184054Slulf
435184054Slulf	psize = pp->mediasize;
436184054Slulf	psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t));
437184054Slulf
438184054Slulf	size = sizeof(*vdev_lists[0]) + pp->sectorsize -
439184054Slulf	    ((sizeof(*vdev_lists[0]) - 1) % pp->sectorsize) - 1;
440184054Slulf
441184054Slulf	buflen = sizeof(vdev_lists[0]->vp_nvlist);
442184054Slulf
443184054Slulf	*config = NULL;
444184054Slulf	/* Create all of the IO requests */
445184054Slulf	for (l = 0; l < VDEV_LABELS; l++) {
446184054Slulf		cmds[l] = BIO_READ;
447184054Slulf		vdev_lists[l] = kmem_alloc(size, KM_SLEEP);
448184054Slulf		offsets[l] = vdev_label_offset(psize, l, 0) + VDEV_SKIP_SIZE;
449184054Slulf		sizes[l] = size;
450184054Slulf		errors[l] = 0;
451184054Slulf		ASSERT(offsets[l] % pp->sectorsize == 0);
452184054Slulf	}
453184054Slulf
454184054Slulf	/* Issue the IO requests */
455184054Slulf	vdev_geom_io(cp, cmds, (void**)vdev_lists, offsets, sizes, errors,
456184054Slulf	    VDEV_LABELS);
457184054Slulf
458184054Slulf	/* Parse the labels */
459184054Slulf	nlabels = 0;
460184054Slulf	for (l = 0; l < VDEV_LABELS; l++) {
461184054Slulf		if (errors[l] != 0)
462184054Slulf			continue;
463184054Slulf
464184054Slulf		buf = vdev_lists[l]->vp_nvlist;
465184054Slulf
466184054Slulf		if (nvlist_unpack(buf, buflen, config, 0) != 0)
467184054Slulf			continue;
468184054Slulf
469184054Slulf		if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
470184054Slulf		    &state) != 0 || state > POOL_STATE_L2CACHE) {
471184054Slulf			nvlist_free(*config);
472184054Slulf			*config = NULL;
473184054Slulf			continue;
474184054Slulf		}
475184054Slulf
476184054Slulf		if (state != POOL_STATE_SPARE &&
477184054Slulf		    state != POOL_STATE_L2CACHE &&
478184054Slulf		    (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
479184054Slulf		    &txg) != 0 || txg == 0)) {
480184054Slulf			nvlist_free(*config);
481184054Slulf			*config = NULL;
482184054Slulf			continue;
483184054Slulf		}
484184054Slulf
485184054Slulf		nlabels++;
486184054Slulf	}
487184054Slulf
488184054Slulf	/* Free the label storage */
489184054Slulf	for (l = 0; l < VDEV_LABELS; l++)
490184054Slulf		kmem_free(vdev_lists[l], size);
491184054Slulf
492184054Slulf	return (nlabels);
493184054Slulf}
494184054Slulf
495184054Slulfstatic void
496184054Slulfresize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id)
497184054Slulf{
498184054Slulf	nvlist_t **new_configs;
499184054Slulf	uint64_t i;
500184054Slulf
501184054Slulf	if (id < *count)
502184054Slulf		return;
503184054Slulf	new_configs = kmem_zalloc((id + 1) * sizeof(nvlist_t *),
504184054Slulf	    KM_SLEEP);
505184054Slulf	for (i = 0; i < *count; i++)
506184054Slulf		new_configs[i] = (*configs)[i];
507184054Slulf	if (*configs != NULL)
508184054Slulf		kmem_free(*configs, *count * sizeof(void *));
509184054Slulf	*configs = new_configs;
510184054Slulf	*count = id + 1;
511184054Slulf}
512184054Slulf
513184054Slulfstatic void
514184054Slulfprocess_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg,
515184054Slulf    const char *name, uint64_t* known_pool_guid)
516184054Slulf{
517184054Slulf	nvlist_t *vdev_tree;
518184054Slulf	uint64_t pool_guid;
519184054Slulf	uint64_t vdev_guid, known_guid;
520184054Slulf	uint64_t id, txg, known_txg;
521184054Slulf	char *pname;
522184054Slulf	int i;
523184054Slulf
524184054Slulf	if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 ||
525184054Slulf	    strcmp(pname, name) != 0)
526184054Slulf		goto ignore;
527184054Slulf
528184054Slulf	if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0)
529184054Slulf		goto ignore;
530184054Slulf
531184054Slulf	if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0)
532184054Slulf		goto ignore;
533184054Slulf
534184054Slulf	if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0)
535184054Slulf		goto ignore;
536184054Slulf
537184054Slulf	if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0)
538184054Slulf		goto ignore;
539184054Slulf
540184054Slulf	VERIFY(nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, &txg) == 0);
541184054Slulf
542184054Slulf	if (*known_pool_guid != 0) {
543184054Slulf		if (pool_guid != *known_pool_guid)
544184054Slulf			goto ignore;
545184054Slulf	} else
546184054Slulf		*known_pool_guid = pool_guid;
547184054Slulf
548184054Slulf	resize_configs(configs, count, id);
549184054Slulf
550184054Slulf	if ((*configs)[id] != NULL) {
551184054Slulf		VERIFY(nvlist_lookup_uint64((*configs)[id],
552184054Slulf		    ZPOOL_CONFIG_POOL_TXG, &known_txg) == 0);
553184054Slulf		if (txg <= known_txg)
554184054Slulf			goto ignore;
555184054Slulf		nvlist_free((*configs)[id]);
556184054Slulf	}
557184054Slulf
558184054Slulf	(*configs)[id] = cfg;
559184054Slulf	return;
560184054Slulf
561184054Slulfignore:
562184054Slulf	nvlist_free(cfg);
563184054Slulf}
564184054Slulf
565184054Slulfint
566184054Slulfvdev_geom_read_pool_label(const char *name,
567184054Slulf    nvlist_t ***configs, uint64_t *count)
568184054Slulf{
569184054Slulf	struct g_class *mp;
570184054Slulf	struct g_geom *gp;
571184054Slulf	struct g_provider *pp;
572184054Slulf	struct g_consumer *zcp;
573184054Slulf	nvlist_t *vdev_cfg;
574184054Slulf	uint64_t pool_guid;
575184054Slulf	int error, nlabels;
576184054Slulf
577184054Slulf	DROP_GIANT();
578184054Slulf	g_topology_lock();
579184054Slulf
580184054Slulf	*configs = NULL;
581184054Slulf	*count = 0;
582184054Slulf	pool_guid = 0;
583184054Slulf	LIST_FOREACH(mp, &g_classes, class) {
584184054Slulf		if (mp == &zfs_vdev_class)
585184054Slulf			continue;
586184054Slulf		LIST_FOREACH(gp, &mp->geom, geom) {
587184054Slulf			if (gp->flags & G_GEOM_WITHER)
588184054Slulf				continue;
589184054Slulf			LIST_FOREACH(pp, &gp->provider, provider) {
590184054Slulf				if (pp->flags & G_PF_WITHER)
591184054Slulf					continue;
592184054Slulf				zcp = vdev_geom_attach(pp, NULL);
593184054Slulf				if (zcp == NULL)
594184054Slulf					continue;
595184054Slulf				g_topology_unlock();
596184054Slulf				nlabels = vdev_geom_read_config(zcp, &vdev_cfg);
597184054Slulf				g_topology_lock();
598184054Slulf				vdev_geom_detach(zcp, B_TRUE);
599184054Slulf				if (nlabels == 0)
600184054Slulf					continue;
601184054Slulf				ZFS_LOG(1, "successfully read vdev config");
602184054Slulf
603184054Slulf				process_vdev_config(configs, count,
604184054Slulf				    vdev_cfg, name, &pool_guid);
605184054Slulf			}
606184054Slulf		}
607184054Slulf	}
608184054Slulf	g_topology_unlock();
609184054Slulf	PICKUP_GIANT();
610184054Slulf
611184054Slulf	return (*count > 0 ? 0 : ENOENT);
612184054Slulf}
613184054Slulf
614184054Slulfenum match {
615184054Slulf	NO_MATCH = 0,		/* No matching labels found */
616184054Slulf	TOPGUID_MATCH = 1,	/* Labels match top guid, not vdev guid*/
617184054Slulf	ZERO_MATCH = 1,		/* Should never be returned */
618184054Slulf	ONE_MATCH = 2,		/* 1 label matching the vdev_guid */
619184054Slulf	TWO_MATCH = 3,		/* 2 label matching the vdev_guid */
620184054Slulf	THREE_MATCH = 4,	/* 3 label matching the vdev_guid */
621184054Slulf	FULL_MATCH = 5		/* all labels match the vdev_guid */
622184054Slulf};
623184054Slulf
624184054Slulfstatic enum match
625184054Slulfvdev_attach_ok(vdev_t *vd, struct g_provider *pp)
626184054Slulf{
627184054Slulf	nvlist_t *config;
628184054Slulf	uint64_t pool_guid, top_guid, vdev_guid;
629184054Slulf	struct g_consumer *cp;
630184054Slulf	int nlabels;
631184054Slulf
632184054Slulf	cp = vdev_geom_attach(pp, NULL);
633184054Slulf	if (cp == NULL) {
634184054Slulf		ZFS_LOG(1, "Unable to attach tasting instance to %s.",
635184054Slulf		    pp->name);
636184054Slulf		return (NO_MATCH);
637184054Slulf	}
638184054Slulf	g_topology_unlock();
639184054Slulf	nlabels = vdev_geom_read_config(cp, &config);
640184054Slulf	if (nlabels == 0) {
641184054Slulf		g_topology_lock();
642184054Slulf		vdev_geom_detach(cp, B_TRUE);
643184054Slulf		ZFS_LOG(1, "Unable to read config from %s.", pp->name);
644184054Slulf		return (NO_MATCH);
645184054Slulf	}
646184054Slulf	g_topology_lock();
647184054Slulf	vdev_geom_detach(cp, B_TRUE);
648184054Slulf
649184054Slulf	pool_guid = 0;
650184054Slulf	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid);
651184054Slulf	top_guid = 0;
652184054Slulf	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_TOP_GUID, &top_guid);
653184054Slulf	vdev_guid = 0;
654184054Slulf	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid);
655184054Slulf	nvlist_free(config);
656184054Slulf
657184054Slulf	/*
658184054Slulf	 * Check that the label's pool guid matches the desired guid.
659184054Slulf	 * Inactive spares and L2ARCs do not have any pool guid in the label.
660184054Slulf	 */
661184054Slulf	if (pool_guid != 0 && pool_guid != spa_guid(vd->vdev_spa)) {
662184054Slulf		ZFS_LOG(1, "pool guid mismatch for provider %s: %ju != %ju.",
663184054Slulf		    pp->name,
664184054Slulf		    (uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)pool_guid);
665184054Slulf		return (NO_MATCH);
666184054Slulf	}
667184054Slulf
668184054Slulf	/*
669184054Slulf	 * Check that the label's vdev guid matches the desired guid.
670184054Slulf	 * The second condition handles possible race on vdev detach, when
671184054Slulf	 * remaining vdev receives GUID of destroyed top level mirror vdev.
672184054Slulf	 */
673184054Slulf	if (vdev_guid == vd->vdev_guid) {
674184054Slulf		ZFS_LOG(1, "guids match for provider %s.", pp->name);
675184054Slulf		return (ZERO_MATCH + nlabels);
676184054Slulf	} else if (top_guid == vd->vdev_guid && vd == vd->vdev_top) {
677184054Slulf		ZFS_LOG(1, "top vdev guid match for provider %s.", pp->name);
678184054Slulf		return (TOPGUID_MATCH);
679184054Slulf	}
680184054Slulf	ZFS_LOG(1, "vdev guid mismatch for provider %s: %ju != %ju.",
681184054Slulf	    pp->name, (uintmax_t)vd->vdev_guid, (uintmax_t)vdev_guid);
682184054Slulf	return (NO_MATCH);
683184054Slulf}
684184054Slulf
685184054Slulfstatic struct g_consumer *
686184054Slulfvdev_geom_attach_by_guids(vdev_t *vd)
687184054Slulf{
688184054Slulf	struct g_class *mp;
689184054Slulf	struct g_geom *gp;
690184054Slulf	struct g_provider *pp, *best_pp;
691184054Slulf	struct g_consumer *cp;
692184054Slulf	enum match match, best_match;
693184054Slulf
694184054Slulf	g_topology_assert();
695184054Slulf
696184054Slulf	cp = NULL;
697184054Slulf	best_pp = NULL;
698184054Slulf	best_match = NO_MATCH;
699184054Slulf	LIST_FOREACH(mp, &g_classes, class) {
700184054Slulf		if (mp == &zfs_vdev_class)
701184054Slulf			continue;
702184054Slulf		LIST_FOREACH(gp, &mp->geom, geom) {
703184054Slulf			if (gp->flags & G_GEOM_WITHER)
704184054Slulf				continue;
705184054Slulf			LIST_FOREACH(pp, &gp->provider, provider) {
706184054Slulf				match = vdev_attach_ok(vd, pp);
707184054Slulf				if (match > best_match) {
708184054Slulf					best_match = match;
709184054Slulf					best_pp = pp;
710184054Slulf				}
711184054Slulf				if (match == FULL_MATCH)
712184054Slulf					goto out;
713184054Slulf			}
714184054Slulf		}
715184054Slulf	}
716184054Slulf
717184054Slulfout:
718184054Slulf	if (best_pp) {
719184054Slulf		cp = vdev_geom_attach(best_pp, vd);
720184054Slulf		if (cp == NULL) {
721184054Slulf			printf("ZFS WARNING: Unable to attach to %s.\n",
722184054Slulf			    best_pp->name);
723184054Slulf		}
724184054Slulf	}
725184054Slulf	return (cp);
726184054Slulf}
727184054Slulf
728184054Slulfstatic struct g_consumer *
729184054Slulfvdev_geom_open_by_guids(vdev_t *vd)
730184054Slulf{
731184054Slulf	struct g_consumer *cp;
732184054Slulf	char *buf;
733184054Slulf	size_t len;
734184054Slulf
735184054Slulf	g_topology_assert();
736184054Slulf
737184054Slulf	ZFS_LOG(1, "Searching by guids [%ju:%ju].",
738184054Slulf		(uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)vd->vdev_guid);
739184054Slulf	cp = vdev_geom_attach_by_guids(vd);
740184054Slulf	if (cp != NULL) {
741184054Slulf		len = strlen(cp->provider->name) + strlen("/dev/") + 1;
742184054Slulf		buf = kmem_alloc(len, KM_SLEEP);
743184054Slulf
744184054Slulf		snprintf(buf, len, "/dev/%s", cp->provider->name);
745184054Slulf		spa_strfree(vd->vdev_path);
746184054Slulf		vd->vdev_path = buf;
747184054Slulf
748184054Slulf		ZFS_LOG(1, "Attach by guid [%ju:%ju] succeeded, provider %s.",
749184054Slulf		    (uintmax_t)spa_guid(vd->vdev_spa),
750184054Slulf		    (uintmax_t)vd->vdev_guid, cp->provider->name);
751184054Slulf	} else {
752184054Slulf		ZFS_LOG(1, "Search by guid [%ju:%ju] failed.",
753184054Slulf		    (uintmax_t)spa_guid(vd->vdev_spa),
754184054Slulf		    (uintmax_t)vd->vdev_guid);
755184054Slulf	}
756184054Slulf
757184054Slulf	return (cp);
758184054Slulf}
759184054Slulf
760184054Slulfstatic struct g_consumer *
761184054Slulfvdev_geom_open_by_path(vdev_t *vd, int check_guid)
762184054Slulf{
763184054Slulf	struct g_provider *pp;
764184054Slulf	struct g_consumer *cp;
765184054Slulf
766184054Slulf	g_topology_assert();
767184054Slulf
768184054Slulf	cp = NULL;
769184054Slulf	pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1);
770184054Slulf	if (pp != NULL) {
771184054Slulf		ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path);
772184054Slulf		if (!check_guid || vdev_attach_ok(vd, pp) == FULL_MATCH)
773184054Slulf			cp = vdev_geom_attach(pp, vd);
774184054Slulf	}
775184054Slulf
776184054Slulf	return (cp);
777184054Slulf}
778184054Slulf
779184054Slulfstatic int
780184054Slulfvdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
781184054Slulf    uint64_t *logical_ashift, uint64_t *physical_ashift)
782184054Slulf{
783184054Slulf	struct g_provider *pp;
784184054Slulf	struct g_consumer *cp;
785184054Slulf	size_t bufsize;
786184054Slulf	int error;
787184054Slulf
788184054Slulf	/* Set the TLS to indicate downstack that we should not access zvols*/
789184054Slulf	VERIFY(tsd_set(zfs_geom_probe_vdev_key, vd) == 0);
790184054Slulf
791184054Slulf	/*
792184054Slulf	 * We must have a pathname, and it must be absolute.
793184054Slulf	 */
794184054Slulf	if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
795184054Slulf		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
796184054Slulf		return (EINVAL);
797184054Slulf	}
798184054Slulf
799184054Slulf	/*
800184054Slulf	 * Reopen the device if it's not currently open. Otherwise,
801184054Slulf	 * just update the physical size of the device.
802184054Slulf	 */
803184054Slulf	if ((cp = vd->vdev_tsd) != NULL) {
804184054Slulf		ASSERT(vd->vdev_reopening);
805184054Slulf		goto skip_open;
806184054Slulf	}
807184054Slulf
808184054Slulf	DROP_GIANT();
809184054Slulf	g_topology_lock();
810184054Slulf	error = 0;
811184054Slulf
812184054Slulf	if (vd->vdev_spa->spa_splitting_newspa ||
813184054Slulf	    (vd->vdev_prevstate == VDEV_STATE_UNKNOWN &&
814184054Slulf	     vd->vdev_spa->spa_load_state == SPA_LOAD_NONE ||
815184054Slulf	     vd->vdev_spa->spa_load_state == SPA_LOAD_CREATE)) {
816184054Slulf		/*
817184054Slulf		 * We are dealing with a vdev that hasn't been previously
818184054Slulf		 * opened (since boot), and we are not loading an
819184054Slulf		 * existing pool configuration.  This looks like a
820184054Slulf		 * vdev add operation to a new or existing pool.
821184054Slulf		 * Assume the user knows what he/she is doing and find
822184054Slulf		 * GEOM provider by its name, ignoring GUID mismatches.
823184054Slulf		 *
824184054Slulf		 * XXPOLICY: It would be safer to only allow a device
825184054Slulf		 *           that is unlabeled or labeled but missing
826184054Slulf		 *           GUID information to be opened in this fashion,
827184054Slulf		 *           unless we are doing a split, in which case we
828184054Slulf		 *           should allow any guid.
829184054Slulf		 */
830184054Slulf		cp = vdev_geom_open_by_path(vd, 0);
831184054Slulf	} else {
832184054Slulf		/*
833184054Slulf		 * Try using the recorded path for this device, but only
834184054Slulf		 * accept it if its label data contains the expected GUIDs.
835184054Slulf		 */
836184054Slulf		cp = vdev_geom_open_by_path(vd, 1);
837184054Slulf		if (cp == NULL) {
838184054Slulf			/*
839184054Slulf			 * The device at vd->vdev_path doesn't have the
840184054Slulf			 * expected GUIDs. The disks might have merely
841184054Slulf			 * moved around so try all other GEOM providers
842184054Slulf			 * to find one with the right GUIDs.
843184054Slulf			 */
844184054Slulf			cp = vdev_geom_open_by_guids(vd);
845184054Slulf		}
846184054Slulf	}
847184054Slulf
848184054Slulf	/* Clear the TLS now that tasting is done */
849184054Slulf	VERIFY(tsd_set(zfs_geom_probe_vdev_key, NULL) == 0);
850184054Slulf
851184054Slulf	if (cp == NULL) {
852184054Slulf		ZFS_LOG(1, "Vdev %s not found.", vd->vdev_path);
853184054Slulf		error = ENOENT;
854184054Slulf	} else {
855184054Slulf		struct consumer_priv_t *priv;
856184054Slulf		struct consumer_vdev_elem *elem;
857184054Slulf		int spamode;
858184054Slulf
859184054Slulf		priv = (struct consumer_priv_t*)&cp->private;
860184054Slulf		if (cp->private == NULL)
861184054Slulf			SLIST_INIT(priv);
862184054Slulf		elem = g_malloc(sizeof(*elem), M_WAITOK|M_ZERO);
863184054Slulf		elem->vd = vd;
864184054Slulf		SLIST_INSERT_HEAD(priv, elem, elems);
865184054Slulf
866184054Slulf		spamode = spa_mode(vd->vdev_spa);
867184054Slulf		if (cp->provider->sectorsize > VDEV_PAD_SIZE ||
868184054Slulf		    !ISP2(cp->provider->sectorsize)) {
869184054Slulf			ZFS_LOG(1, "Provider %s has unsupported sectorsize.",
870184054Slulf			    cp->provider->name);
871184054Slulf
872184054Slulf			vdev_geom_close_locked(vd);
873184054Slulf			error = EINVAL;
874184054Slulf			cp = NULL;
875184054Slulf		} else if (cp->acw == 0 && (spamode & FWRITE) != 0) {
876184054Slulf			int i;
877184054Slulf
878184054Slulf			for (i = 0; i < 5; i++) {
879184054Slulf				error = g_access(cp, 0, 1, 0);
880184054Slulf				if (error == 0)
881184054Slulf					break;
882184054Slulf				g_topology_unlock();
883184054Slulf				tsleep(vd, 0, "vdev", hz / 2);
884184054Slulf				g_topology_lock();
885184054Slulf			}
886184054Slulf			if (error != 0) {
887184054Slulf				printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n",
888184054Slulf				    cp->provider->name, error);
889184054Slulf				vdev_geom_close_locked(vd);
890184054Slulf				cp = NULL;
891184054Slulf			}
892184054Slulf		}
893184054Slulf	}
894184054Slulf
895184054Slulf	/* Fetch initial physical path information for this device. */
896184054Slulf	if (cp != NULL) {
897184054Slulf		vdev_geom_attrchanged(cp, "GEOM::physpath");
898184054Slulf
899184054Slulf		/* Set other GEOM characteristics */
900184054Slulf		vdev_geom_set_physpath(vd, cp, /*do_null_update*/B_FALSE);
901184054Slulf		vdev_geom_set_rotation_rate(vd, cp);
902184054Slulf	}
903184054Slulf
904184054Slulf	g_topology_unlock();
905184054Slulf	PICKUP_GIANT();
906184054Slulf	if (cp == NULL) {
907184054Slulf		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
908184054Slulf		return (error);
909184054Slulf	}
910184054Slulfskip_open:
911184054Slulf	pp = cp->provider;
912184054Slulf
913184054Slulf	/*
914184054Slulf	 * Determine the actual size of the device.
915184054Slulf	 */
916184054Slulf	*max_psize = *psize = pp->mediasize;
917184054Slulf
918184054Slulf	/*
919184054Slulf	 * Determine the device's minimum transfer size and preferred
920184054Slulf	 * transfer size.
921184054Slulf	 */
922184054Slulf	*logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1;
923184054Slulf	*physical_ashift = 0;
924184054Slulf	if (pp->stripesize > (1 << *logical_ashift) && ISP2(pp->stripesize) &&
925184054Slulf	    pp->stripesize <= (1 << SPA_MAXASHIFT) && pp->stripeoffset == 0)
926184054Slulf		*physical_ashift = highbit(pp->stripesize) - 1;
927184054Slulf
928184054Slulf	/*
929184054Slulf	 * Clear the nowritecache settings, so that on a vdev_reopen()
930184054Slulf	 * we will try again.
931184054Slulf	 */
932184054Slulf	vd->vdev_nowritecache = B_FALSE;
933184054Slulf
934184054Slulf	return (0);
935184054Slulf}
936184054Slulf
937184054Slulfstatic void
938184054Slulfvdev_geom_close(vdev_t *vd)
939184054Slulf{
940184054Slulf	struct g_consumer *cp;
941184054Slulf
942184054Slulf	cp = vd->vdev_tsd;
943184054Slulf
944184054Slulf	DROP_GIANT();
945184054Slulf	g_topology_lock();
946184054Slulf
947184054Slulf	if (!vd->vdev_reopening ||
948184054Slulf	    (cp != NULL && ((cp->flags & G_CF_ORPHAN) != 0 ||
949184054Slulf	    (cp->provider != NULL && cp->provider->error != 0))))
950184054Slulf		vdev_geom_close_locked(vd);
951184054Slulf
952184054Slulf	g_topology_unlock();
953184054Slulf	PICKUP_GIANT();
954184054Slulf}
955184054Slulf
956184054Slulfstatic void
957184054Slulfvdev_geom_io_intr(struct bio *bp)
958184054Slulf{
959184054Slulf	vdev_t *vd;
960184054Slulf	zio_t *zio;
961184054Slulf
962184054Slulf	zio = bp->bio_caller1;
963184054Slulf	vd = zio->io_vd;
964184054Slulf	zio->io_error = bp->bio_error;
965184054Slulf	if (zio->io_error == 0 && bp->bio_resid != 0)
966184054Slulf		zio->io_error = SET_ERROR(EIO);
967184054Slulf
968184054Slulf	switch(zio->io_error) {
969184054Slulf	case ENOTSUP:
970184054Slulf		/*
971184054Slulf		 * If we get ENOTSUP for BIO_FLUSH or BIO_DELETE we know
972184054Slulf		 * that future attempts will never succeed. In this case
973184054Slulf		 * we set a persistent flag so that we don't bother with
974184054Slulf		 * requests in the future.
975184054Slulf		 */
976184054Slulf		switch(bp->bio_cmd) {
977184054Slulf		case BIO_FLUSH:
978184054Slulf			vd->vdev_nowritecache = B_TRUE;
979184054Slulf			break;
980184054Slulf		case BIO_DELETE:
981184054Slulf			vd->vdev_notrim = B_TRUE;
982184054Slulf			break;
983184054Slulf		}
984184054Slulf		break;
985184054Slulf	case ENXIO:
986184054Slulf		if (!vd->vdev_remove_wanted) {
987184054Slulf			/*
988184054Slulf			 * If provider's error is set we assume it is being
989184054Slulf			 * removed.
990184054Slulf			 */
991184054Slulf			if (bp->bio_to->error != 0) {
992184054Slulf				vd->vdev_remove_wanted = B_TRUE;
993184054Slulf				spa_async_request(zio->io_spa,
994184054Slulf				    SPA_ASYNC_REMOVE);
995184054Slulf			} else if (!vd->vdev_delayed_close) {
996184054Slulf				vd->vdev_delayed_close = B_TRUE;
997184054Slulf			}
998184054Slulf		}
999184054Slulf		break;
1000184054Slulf	}
1001184054Slulf	g_destroy_bio(bp);
1002184054Slulf	zio_delay_interrupt(zio);
1003184054Slulf}
1004184054Slulf
1005184054Slulfstatic void
1006184054Slulfvdev_geom_io_start(zio_t *zio)
1007184054Slulf{
1008184054Slulf	vdev_t *vd;
1009184054Slulf	struct g_consumer *cp;
1010184054Slulf	struct bio *bp;
1011184054Slulf	int error;
1012184054Slulf
1013184054Slulf	vd = zio->io_vd;
1014184054Slulf
1015184054Slulf	switch (zio->io_type) {
1016184054Slulf	case ZIO_TYPE_IOCTL:
1017184054Slulf		/* XXPOLICY */
1018184054Slulf		if (!vdev_readable(vd)) {
1019184054Slulf			zio->io_error = SET_ERROR(ENXIO);
1020184054Slulf			zio_interrupt(zio);
1021184054Slulf			return;
1022184054Slulf		} else {
1023184054Slulf			switch (zio->io_cmd) {
1024184054Slulf			case DKIOCFLUSHWRITECACHE:
1025184054Slulf				if (zfs_nocacheflush || vdev_geom_bio_flush_disable)
1026184054Slulf					break;
1027184054Slulf				if (vd->vdev_nowritecache) {
1028184054Slulf					zio->io_error = SET_ERROR(ENOTSUP);
1029184054Slulf					break;
1030184054Slulf				}
1031184054Slulf				goto sendreq;
1032184054Slulf			default:
1033184054Slulf				zio->io_error = SET_ERROR(ENOTSUP);
1034184054Slulf			}
1035184054Slulf		}
1036184054Slulf
1037184054Slulf		zio_execute(zio);
1038184054Slulf		return;
1039184054Slulf	case ZIO_TYPE_FREE:
1040184054Slulf		if (vd->vdev_notrim) {
1041184054Slulf			zio->io_error = SET_ERROR(ENOTSUP);
1042184054Slulf		} else if (!vdev_geom_bio_delete_disable) {
1043184054Slulf			goto sendreq;
1044184054Slulf		}
1045184054Slulf		zio_execute(zio);
1046184054Slulf		return;
1047184054Slulf	}
1048184054Slulfsendreq:
1049184054Slulf	ASSERT(zio->io_type == ZIO_TYPE_READ ||
1050184054Slulf	    zio->io_type == ZIO_TYPE_WRITE ||
1051184054Slulf	    zio->io_type == ZIO_TYPE_FREE ||
1052184054Slulf	    zio->io_type == ZIO_TYPE_IOCTL);
1053184054Slulf
1054184054Slulf	cp = vd->vdev_tsd;
1055184054Slulf	if (cp == NULL) {
1056184054Slulf		zio->io_error = SET_ERROR(ENXIO);
1057184054Slulf		zio_interrupt(zio);
1058184054Slulf		return;
1059184054Slulf	}
1060184054Slulf	bp = g_alloc_bio();
1061184054Slulf	bp->bio_caller1 = zio;
1062184054Slulf	switch (zio->io_type) {
1063184054Slulf	case ZIO_TYPE_READ:
1064184054Slulf	case ZIO_TYPE_WRITE:
1065184054Slulf		zio->io_target_timestamp = zio_handle_io_delay(zio);
1066184054Slulf		bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE;
1067184054Slulf		bp->bio_data = zio->io_data;
1068184054Slulf		bp->bio_offset = zio->io_offset;
1069184054Slulf		bp->bio_length = zio->io_size;
1070184054Slulf		break;
1071184054Slulf	case ZIO_TYPE_FREE:
1072184054Slulf		bp->bio_cmd = BIO_DELETE;
1073184054Slulf		bp->bio_data = NULL;
1074184054Slulf		bp->bio_offset = zio->io_offset;
1075184054Slulf		bp->bio_length = zio->io_size;
1076184054Slulf		break;
1077184054Slulf	case ZIO_TYPE_IOCTL:
1078184054Slulf		bp->bio_cmd = BIO_FLUSH;
1079184054Slulf		bp->bio_flags |= BIO_ORDERED;
1080184054Slulf		bp->bio_data = NULL;
1081184054Slulf		bp->bio_offset = cp->provider->mediasize;
1082184054Slulf		bp->bio_length = 0;
1083184054Slulf		break;
1084184054Slulf	}
1085184054Slulf	bp->bio_done = vdev_geom_io_intr;
1086184054Slulf
1087184054Slulf	g_io_request(bp, cp);
1088184054Slulf}
1089184054Slulf
1090184054Slulfstatic void
1091184054Slulfvdev_geom_io_done(zio_t *zio)
1092184054Slulf{
1093184054Slulf}
1094184054Slulf
1095184054Slulfstatic void
1096184054Slulfvdev_geom_hold(vdev_t *vd)
1097184054Slulf{
1098184054Slulf}
1099184054Slulf
1100184054Slulfstatic void
1101184054Slulfvdev_geom_rele(vdev_t *vd)
1102184054Slulf{
1103184054Slulf}
1104184054Slulf
1105184054Slulfvdev_ops_t vdev_geom_ops = {
1106184054Slulf	vdev_geom_open,
1107184054Slulf	vdev_geom_close,
1108184054Slulf	vdev_default_asize,
1109184054Slulf	vdev_geom_io_start,
1110184054Slulf	vdev_geom_io_done,
1111184054Slulf	NULL,
1112184054Slulf	vdev_geom_hold,
1113184054Slulf	vdev_geom_rele,
1114184054Slulf	VDEV_TYPE_DISK,		/* name of this vdev type */
1115184054Slulf	B_TRUE			/* leaf vdev */
1116184054Slulf};
1117184054Slulf