g_mirror.c revision 260385
1/*-
2 * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: stable/10/sys/geom/mirror/g_mirror.c 260385 2014-01-07 01:32:23Z scottl $");
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/kernel.h>
33#include <sys/module.h>
34#include <sys/limits.h>
35#include <sys/lock.h>
36#include <sys/mutex.h>
37#include <sys/bio.h>
38#include <sys/sbuf.h>
39#include <sys/sysctl.h>
40#include <sys/malloc.h>
41#include <sys/eventhandler.h>
42#include <vm/uma.h>
43#include <geom/geom.h>
44#include <sys/proc.h>
45#include <sys/kthread.h>
46#include <sys/sched.h>
47#include <geom/mirror/g_mirror.h>
48
49FEATURE(geom_mirror, "GEOM mirroring support");
50
51static MALLOC_DEFINE(M_MIRROR, "mirror_data", "GEOM_MIRROR Data");
52
53SYSCTL_DECL(_kern_geom);
54static SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW, 0,
55    "GEOM_MIRROR stuff");
56u_int g_mirror_debug = 0;
57TUNABLE_INT("kern.geom.mirror.debug", &g_mirror_debug);
58SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RW, &g_mirror_debug, 0,
59    "Debug level");
60static u_int g_mirror_timeout = 4;
61TUNABLE_INT("kern.geom.mirror.timeout", &g_mirror_timeout);
62SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RW, &g_mirror_timeout,
63    0, "Time to wait on all mirror components");
64static u_int g_mirror_idletime = 5;
65TUNABLE_INT("kern.geom.mirror.idletime", &g_mirror_idletime);
66SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, idletime, CTLFLAG_RW,
67    &g_mirror_idletime, 0, "Mark components as clean when idling");
68static u_int g_mirror_disconnect_on_failure = 1;
69TUNABLE_INT("kern.geom.mirror.disconnect_on_failure",
70    &g_mirror_disconnect_on_failure);
71SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, disconnect_on_failure, CTLFLAG_RW,
72    &g_mirror_disconnect_on_failure, 0, "Disconnect component on I/O failure.");
73static u_int g_mirror_syncreqs = 2;
74TUNABLE_INT("kern.geom.mirror.sync_requests", &g_mirror_syncreqs);
75SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_requests, CTLFLAG_RDTUN,
76    &g_mirror_syncreqs, 0, "Parallel synchronization I/O requests.");
77
78#define	MSLEEP(ident, mtx, priority, wmesg, timeout)	do {		\
79	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));	\
80	msleep((ident), (mtx), (priority), (wmesg), (timeout));		\
81	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
82} while (0)
83
84static eventhandler_tag g_mirror_post_sync = NULL;
85static int g_mirror_shutdown = 0;
86
87static int g_mirror_destroy_geom(struct gctl_req *req, struct g_class *mp,
88    struct g_geom *gp);
89static g_taste_t g_mirror_taste;
90static void g_mirror_init(struct g_class *mp);
91static void g_mirror_fini(struct g_class *mp);
92
93struct g_class g_mirror_class = {
94	.name = G_MIRROR_CLASS_NAME,
95	.version = G_VERSION,
96	.ctlreq = g_mirror_config,
97	.taste = g_mirror_taste,
98	.destroy_geom = g_mirror_destroy_geom,
99	.init = g_mirror_init,
100	.fini = g_mirror_fini
101};
102
103
104static void g_mirror_destroy_provider(struct g_mirror_softc *sc);
105static int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state);
106static void g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force);
107static void g_mirror_dumpconf(struct sbuf *sb, const char *indent,
108    struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
109static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type);
110static void g_mirror_register_request(struct bio *bp);
111static void g_mirror_sync_release(struct g_mirror_softc *sc);
112
113
114static const char *
115g_mirror_disk_state2str(int state)
116{
117
118	switch (state) {
119	case G_MIRROR_DISK_STATE_NONE:
120		return ("NONE");
121	case G_MIRROR_DISK_STATE_NEW:
122		return ("NEW");
123	case G_MIRROR_DISK_STATE_ACTIVE:
124		return ("ACTIVE");
125	case G_MIRROR_DISK_STATE_STALE:
126		return ("STALE");
127	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
128		return ("SYNCHRONIZING");
129	case G_MIRROR_DISK_STATE_DISCONNECTED:
130		return ("DISCONNECTED");
131	case G_MIRROR_DISK_STATE_DESTROY:
132		return ("DESTROY");
133	default:
134		return ("INVALID");
135	}
136}
137
138static const char *
139g_mirror_device_state2str(int state)
140{
141
142	switch (state) {
143	case G_MIRROR_DEVICE_STATE_STARTING:
144		return ("STARTING");
145	case G_MIRROR_DEVICE_STATE_RUNNING:
146		return ("RUNNING");
147	default:
148		return ("INVALID");
149	}
150}
151
152static const char *
153g_mirror_get_diskname(struct g_mirror_disk *disk)
154{
155
156	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
157		return ("[unknown]");
158	return (disk->d_name);
159}
160
161/*
162 * --- Events handling functions ---
163 * Events in geom_mirror are used to maintain disks and device status
164 * from one thread to simplify locking.
165 */
166static void
167g_mirror_event_free(struct g_mirror_event *ep)
168{
169
170	free(ep, M_MIRROR);
171}
172
173int
174g_mirror_event_send(void *arg, int state, int flags)
175{
176	struct g_mirror_softc *sc;
177	struct g_mirror_disk *disk;
178	struct g_mirror_event *ep;
179	int error;
180
181	ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK);
182	G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep);
183	if ((flags & G_MIRROR_EVENT_DEVICE) != 0) {
184		disk = NULL;
185		sc = arg;
186	} else {
187		disk = arg;
188		sc = disk->d_softc;
189	}
190	ep->e_disk = disk;
191	ep->e_state = state;
192	ep->e_flags = flags;
193	ep->e_error = 0;
194	mtx_lock(&sc->sc_events_mtx);
195	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
196	mtx_unlock(&sc->sc_events_mtx);
197	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
198	mtx_lock(&sc->sc_queue_mtx);
199	wakeup(sc);
200	mtx_unlock(&sc->sc_queue_mtx);
201	if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0)
202		return (0);
203	sx_assert(&sc->sc_lock, SX_XLOCKED);
204	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
205	sx_xunlock(&sc->sc_lock);
206	while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) {
207		mtx_lock(&sc->sc_events_mtx);
208		MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event",
209		    hz * 5);
210	}
211	error = ep->e_error;
212	g_mirror_event_free(ep);
213	sx_xlock(&sc->sc_lock);
214	return (error);
215}
216
217static struct g_mirror_event *
218g_mirror_event_get(struct g_mirror_softc *sc)
219{
220	struct g_mirror_event *ep;
221
222	mtx_lock(&sc->sc_events_mtx);
223	ep = TAILQ_FIRST(&sc->sc_events);
224	mtx_unlock(&sc->sc_events_mtx);
225	return (ep);
226}
227
228static void
229g_mirror_event_remove(struct g_mirror_softc *sc, struct g_mirror_event *ep)
230{
231
232	mtx_lock(&sc->sc_events_mtx);
233	TAILQ_REMOVE(&sc->sc_events, ep, e_next);
234	mtx_unlock(&sc->sc_events_mtx);
235}
236
237static void
238g_mirror_event_cancel(struct g_mirror_disk *disk)
239{
240	struct g_mirror_softc *sc;
241	struct g_mirror_event *ep, *tmpep;
242
243	sc = disk->d_softc;
244	sx_assert(&sc->sc_lock, SX_XLOCKED);
245
246	mtx_lock(&sc->sc_events_mtx);
247	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
248		if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0)
249			continue;
250		if (ep->e_disk != disk)
251			continue;
252		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
253		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
254			g_mirror_event_free(ep);
255		else {
256			ep->e_error = ECANCELED;
257			wakeup(ep);
258		}
259	}
260	mtx_unlock(&sc->sc_events_mtx);
261}
262
263/*
264 * Return the number of disks in given state.
265 * If state is equal to -1, count all connected disks.
266 */
267u_int
268g_mirror_ndisks(struct g_mirror_softc *sc, int state)
269{
270	struct g_mirror_disk *disk;
271	u_int n = 0;
272
273	sx_assert(&sc->sc_lock, SX_LOCKED);
274
275	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
276		if (state == -1 || disk->d_state == state)
277			n++;
278	}
279	return (n);
280}
281
282/*
283 * Find a disk in mirror by its disk ID.
284 */
285static struct g_mirror_disk *
286g_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id)
287{
288	struct g_mirror_disk *disk;
289
290	sx_assert(&sc->sc_lock, SX_XLOCKED);
291
292	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
293		if (disk->d_id == id)
294			return (disk);
295	}
296	return (NULL);
297}
298
299static u_int
300g_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp)
301{
302	struct bio *bp;
303	u_int nreqs = 0;
304
305	mtx_lock(&sc->sc_queue_mtx);
306	TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
307		if (bp->bio_from == cp)
308			nreqs++;
309	}
310	mtx_unlock(&sc->sc_queue_mtx);
311	return (nreqs);
312}
313
314static int
315g_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp)
316{
317
318	if (cp->index > 0) {
319		G_MIRROR_DEBUG(2,
320		    "I/O requests for %s exist, can't destroy it now.",
321		    cp->provider->name);
322		return (1);
323	}
324	if (g_mirror_nrequests(sc, cp) > 0) {
325		G_MIRROR_DEBUG(2,
326		    "I/O requests for %s in queue, can't destroy it now.",
327		    cp->provider->name);
328		return (1);
329	}
330	return (0);
331}
332
333static void
334g_mirror_destroy_consumer(void *arg, int flags __unused)
335{
336	struct g_consumer *cp;
337
338	g_topology_assert();
339
340	cp = arg;
341	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", cp->provider->name);
342	g_detach(cp);
343	g_destroy_consumer(cp);
344}
345
346static void
347g_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
348{
349	struct g_provider *pp;
350	int retaste_wait;
351
352	g_topology_assert();
353
354	cp->private = NULL;
355	if (g_mirror_is_busy(sc, cp))
356		return;
357	pp = cp->provider;
358	retaste_wait = 0;
359	if (cp->acw == 1) {
360		if ((pp->geom->flags & G_GEOM_WITHER) == 0)
361			retaste_wait = 1;
362	}
363	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr,
364	    -cp->acw, -cp->ace, 0);
365	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
366		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
367	if (retaste_wait) {
368		/*
369		 * After retaste event was send (inside g_access()), we can send
370		 * event to detach and destroy consumer.
371		 * A class, which has consumer to the given provider connected
372		 * will not receive retaste event for the provider.
373		 * This is the way how I ignore retaste events when I close
374		 * consumers opened for write: I detach and destroy consumer
375		 * after retaste event is sent.
376		 */
377		g_post_event(g_mirror_destroy_consumer, cp, M_WAITOK, NULL);
378		return;
379	}
380	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", pp->name);
381	g_detach(cp);
382	g_destroy_consumer(cp);
383}
384
385static int
386g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp)
387{
388	struct g_consumer *cp;
389	int error;
390
391	g_topology_assert_not();
392	KASSERT(disk->d_consumer == NULL,
393	    ("Disk already connected (device %s).", disk->d_softc->sc_name));
394
395	g_topology_lock();
396	cp = g_new_consumer(disk->d_softc->sc_geom);
397	cp->flags |= G_CF_DIRECT_RECEIVE;
398	error = g_attach(cp, pp);
399	if (error != 0) {
400		g_destroy_consumer(cp);
401		g_topology_unlock();
402		return (error);
403	}
404	error = g_access(cp, 1, 1, 1);
405	if (error != 0) {
406		g_detach(cp);
407		g_destroy_consumer(cp);
408		g_topology_unlock();
409		G_MIRROR_DEBUG(0, "Cannot open consumer %s (error=%d).",
410		    pp->name, error);
411		return (error);
412	}
413	g_topology_unlock();
414	disk->d_consumer = cp;
415	disk->d_consumer->private = disk;
416	disk->d_consumer->index = 0;
417
418	G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk));
419	return (0);
420}
421
422static void
423g_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
424{
425
426	g_topology_assert();
427
428	if (cp == NULL)
429		return;
430	if (cp->provider != NULL)
431		g_mirror_kill_consumer(sc, cp);
432	else
433		g_destroy_consumer(cp);
434}
435
436/*
437 * Initialize disk. This means allocate memory, create consumer, attach it
438 * to the provider and open access (r1w1e1) to it.
439 */
440static struct g_mirror_disk *
441g_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp,
442    struct g_mirror_metadata *md, int *errorp)
443{
444	struct g_mirror_disk *disk;
445	int i, error;
446
447	disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO);
448	if (disk == NULL) {
449		error = ENOMEM;
450		goto fail;
451	}
452	disk->d_softc = sc;
453	error = g_mirror_connect_disk(disk, pp);
454	if (error != 0)
455		goto fail;
456	disk->d_id = md->md_did;
457	disk->d_state = G_MIRROR_DISK_STATE_NONE;
458	disk->d_priority = md->md_priority;
459	disk->d_flags = md->md_dflags;
460	error = g_getattr("GEOM::candelete", disk->d_consumer, &i);
461	if (error == 0 && i != 0)
462		disk->d_flags |= G_MIRROR_DISK_FLAG_CANDELETE;
463	if (md->md_provider[0] != '\0')
464		disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED;
465	disk->d_sync.ds_consumer = NULL;
466	disk->d_sync.ds_offset = md->md_sync_offset;
467	disk->d_sync.ds_offset_done = md->md_sync_offset;
468	disk->d_genid = md->md_genid;
469	disk->d_sync.ds_syncid = md->md_syncid;
470	if (errorp != NULL)
471		*errorp = 0;
472	return (disk);
473fail:
474	if (errorp != NULL)
475		*errorp = error;
476	if (disk != NULL)
477		free(disk, M_MIRROR);
478	return (NULL);
479}
480
481static void
482g_mirror_destroy_disk(struct g_mirror_disk *disk)
483{
484	struct g_mirror_softc *sc;
485
486	g_topology_assert_not();
487	sc = disk->d_softc;
488	sx_assert(&sc->sc_lock, SX_XLOCKED);
489
490	LIST_REMOVE(disk, d_next);
491	g_mirror_event_cancel(disk);
492	if (sc->sc_hint == disk)
493		sc->sc_hint = NULL;
494	switch (disk->d_state) {
495	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
496		g_mirror_sync_stop(disk, 1);
497		/* FALLTHROUGH */
498	case G_MIRROR_DISK_STATE_NEW:
499	case G_MIRROR_DISK_STATE_STALE:
500	case G_MIRROR_DISK_STATE_ACTIVE:
501		g_topology_lock();
502		g_mirror_disconnect_consumer(sc, disk->d_consumer);
503		g_topology_unlock();
504		free(disk, M_MIRROR);
505		break;
506	default:
507		KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
508		    g_mirror_get_diskname(disk),
509		    g_mirror_disk_state2str(disk->d_state)));
510	}
511}
512
513static void
514g_mirror_destroy_device(struct g_mirror_softc *sc)
515{
516	struct g_mirror_disk *disk;
517	struct g_mirror_event *ep;
518	struct g_geom *gp;
519	struct g_consumer *cp, *tmpcp;
520
521	g_topology_assert_not();
522	sx_assert(&sc->sc_lock, SX_XLOCKED);
523
524	gp = sc->sc_geom;
525	if (sc->sc_provider != NULL)
526		g_mirror_destroy_provider(sc);
527	for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL;
528	    disk = LIST_FIRST(&sc->sc_disks)) {
529		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
530		g_mirror_update_metadata(disk);
531		g_mirror_destroy_disk(disk);
532	}
533	while ((ep = g_mirror_event_get(sc)) != NULL) {
534		g_mirror_event_remove(sc, ep);
535		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
536			g_mirror_event_free(ep);
537		else {
538			ep->e_error = ECANCELED;
539			ep->e_flags |= G_MIRROR_EVENT_DONE;
540			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep);
541			mtx_lock(&sc->sc_events_mtx);
542			wakeup(ep);
543			mtx_unlock(&sc->sc_events_mtx);
544		}
545	}
546	callout_drain(&sc->sc_callout);
547
548	g_topology_lock();
549	LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) {
550		g_mirror_disconnect_consumer(sc, cp);
551	}
552	g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
553	G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name);
554	g_wither_geom(gp, ENXIO);
555	g_topology_unlock();
556	mtx_destroy(&sc->sc_queue_mtx);
557	mtx_destroy(&sc->sc_events_mtx);
558	mtx_destroy(&sc->sc_done_mtx);
559	sx_xunlock(&sc->sc_lock);
560	sx_destroy(&sc->sc_lock);
561}
562
563static void
564g_mirror_orphan(struct g_consumer *cp)
565{
566	struct g_mirror_disk *disk;
567
568	g_topology_assert();
569
570	disk = cp->private;
571	if (disk == NULL)
572		return;
573	disk->d_softc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
574	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
575	    G_MIRROR_EVENT_DONTWAIT);
576}
577
578/*
579 * Function should return the next active disk on the list.
580 * It is possible that it will be the same disk as given.
581 * If there are no active disks on list, NULL is returned.
582 */
583static __inline struct g_mirror_disk *
584g_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
585{
586	struct g_mirror_disk *dp;
587
588	for (dp = LIST_NEXT(disk, d_next); dp != disk;
589	    dp = LIST_NEXT(dp, d_next)) {
590		if (dp == NULL)
591			dp = LIST_FIRST(&sc->sc_disks);
592		if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE)
593			break;
594	}
595	if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
596		return (NULL);
597	return (dp);
598}
599
600static struct g_mirror_disk *
601g_mirror_get_disk(struct g_mirror_softc *sc)
602{
603	struct g_mirror_disk *disk;
604
605	if (sc->sc_hint == NULL) {
606		sc->sc_hint = LIST_FIRST(&sc->sc_disks);
607		if (sc->sc_hint == NULL)
608			return (NULL);
609	}
610	disk = sc->sc_hint;
611	if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) {
612		disk = g_mirror_find_next(sc, disk);
613		if (disk == NULL)
614			return (NULL);
615	}
616	sc->sc_hint = g_mirror_find_next(sc, disk);
617	return (disk);
618}
619
620static int
621g_mirror_write_metadata(struct g_mirror_disk *disk,
622    struct g_mirror_metadata *md)
623{
624	struct g_mirror_softc *sc;
625	struct g_consumer *cp;
626	off_t offset, length;
627	u_char *sector;
628	int error = 0;
629
630	g_topology_assert_not();
631	sc = disk->d_softc;
632	sx_assert(&sc->sc_lock, SX_LOCKED);
633
634	cp = disk->d_consumer;
635	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
636	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
637	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
638	    ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr,
639	    cp->acw, cp->ace));
640	length = cp->provider->sectorsize;
641	offset = cp->provider->mediasize - length;
642	sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO);
643	if (md != NULL)
644		mirror_metadata_encode(md, sector);
645	error = g_write_data(cp, offset, sector, length);
646	free(sector, M_MIRROR);
647	if (error != 0) {
648		if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
649			disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
650			G_MIRROR_DEBUG(0, "Cannot write metadata on %s "
651			    "(device=%s, error=%d).",
652			    g_mirror_get_diskname(disk), sc->sc_name, error);
653		} else {
654			G_MIRROR_DEBUG(1, "Cannot write metadata on %s "
655			    "(device=%s, error=%d).",
656			    g_mirror_get_diskname(disk), sc->sc_name, error);
657		}
658		if (g_mirror_disconnect_on_failure &&
659		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) {
660			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
661			g_mirror_event_send(disk,
662			    G_MIRROR_DISK_STATE_DISCONNECTED,
663			    G_MIRROR_EVENT_DONTWAIT);
664		}
665	}
666	return (error);
667}
668
669static int
670g_mirror_clear_metadata(struct g_mirror_disk *disk)
671{
672	int error;
673
674	g_topology_assert_not();
675	sx_assert(&disk->d_softc->sc_lock, SX_LOCKED);
676
677	error = g_mirror_write_metadata(disk, NULL);
678	if (error == 0) {
679		G_MIRROR_DEBUG(2, "Metadata on %s cleared.",
680		    g_mirror_get_diskname(disk));
681	} else {
682		G_MIRROR_DEBUG(0,
683		    "Cannot clear metadata on disk %s (error=%d).",
684		    g_mirror_get_diskname(disk), error);
685	}
686	return (error);
687}
688
689void
690g_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk,
691    struct g_mirror_metadata *md)
692{
693
694	strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic));
695	md->md_version = G_MIRROR_VERSION;
696	strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
697	md->md_mid = sc->sc_id;
698	md->md_all = sc->sc_ndisks;
699	md->md_slice = sc->sc_slice;
700	md->md_balance = sc->sc_balance;
701	md->md_genid = sc->sc_genid;
702	md->md_mediasize = sc->sc_mediasize;
703	md->md_sectorsize = sc->sc_sectorsize;
704	md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK);
705	bzero(md->md_provider, sizeof(md->md_provider));
706	if (disk == NULL) {
707		md->md_did = arc4random();
708		md->md_priority = 0;
709		md->md_syncid = 0;
710		md->md_dflags = 0;
711		md->md_sync_offset = 0;
712		md->md_provsize = 0;
713	} else {
714		md->md_did = disk->d_id;
715		md->md_priority = disk->d_priority;
716		md->md_syncid = disk->d_sync.ds_syncid;
717		md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK);
718		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
719			md->md_sync_offset = disk->d_sync.ds_offset_done;
720		else
721			md->md_sync_offset = 0;
722		if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) {
723			strlcpy(md->md_provider,
724			    disk->d_consumer->provider->name,
725			    sizeof(md->md_provider));
726		}
727		md->md_provsize = disk->d_consumer->provider->mediasize;
728	}
729}
730
731void
732g_mirror_update_metadata(struct g_mirror_disk *disk)
733{
734	struct g_mirror_softc *sc;
735	struct g_mirror_metadata md;
736	int error;
737
738	g_topology_assert_not();
739	sc = disk->d_softc;
740	sx_assert(&sc->sc_lock, SX_LOCKED);
741
742	g_mirror_fill_metadata(sc, disk, &md);
743	error = g_mirror_write_metadata(disk, &md);
744	if (error == 0) {
745		G_MIRROR_DEBUG(2, "Metadata on %s updated.",
746		    g_mirror_get_diskname(disk));
747	} else {
748		G_MIRROR_DEBUG(0,
749		    "Cannot update metadata on disk %s (error=%d).",
750		    g_mirror_get_diskname(disk), error);
751	}
752}
753
754static void
755g_mirror_bump_syncid(struct g_mirror_softc *sc)
756{
757	struct g_mirror_disk *disk;
758
759	g_topology_assert_not();
760	sx_assert(&sc->sc_lock, SX_XLOCKED);
761	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
762	    ("%s called with no active disks (device=%s).", __func__,
763	    sc->sc_name));
764
765	sc->sc_syncid++;
766	G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
767	    sc->sc_syncid);
768	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
769		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
770		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
771			disk->d_sync.ds_syncid = sc->sc_syncid;
772			g_mirror_update_metadata(disk);
773		}
774	}
775}
776
777static void
778g_mirror_bump_genid(struct g_mirror_softc *sc)
779{
780	struct g_mirror_disk *disk;
781
782	g_topology_assert_not();
783	sx_assert(&sc->sc_lock, SX_XLOCKED);
784	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
785	    ("%s called with no active disks (device=%s).", __func__,
786	    sc->sc_name));
787
788	sc->sc_genid++;
789	G_MIRROR_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name,
790	    sc->sc_genid);
791	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
792		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
793		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
794			disk->d_genid = sc->sc_genid;
795			g_mirror_update_metadata(disk);
796		}
797	}
798}
799
800static int
801g_mirror_idle(struct g_mirror_softc *sc, int acw)
802{
803	struct g_mirror_disk *disk;
804	int timeout;
805
806	g_topology_assert_not();
807	sx_assert(&sc->sc_lock, SX_XLOCKED);
808
809	if (sc->sc_provider == NULL)
810		return (0);
811	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
812		return (0);
813	if (sc->sc_idle)
814		return (0);
815	if (sc->sc_writes > 0)
816		return (0);
817	if (acw > 0 || (acw == -1 && sc->sc_provider->acw > 0)) {
818		timeout = g_mirror_idletime - (time_uptime - sc->sc_last_write);
819		if (!g_mirror_shutdown && timeout > 0)
820			return (timeout);
821	}
822	sc->sc_idle = 1;
823	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
824		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
825			continue;
826		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.",
827		    g_mirror_get_diskname(disk), sc->sc_name);
828		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
829		g_mirror_update_metadata(disk);
830	}
831	return (0);
832}
833
834static void
835g_mirror_unidle(struct g_mirror_softc *sc)
836{
837	struct g_mirror_disk *disk;
838
839	g_topology_assert_not();
840	sx_assert(&sc->sc_lock, SX_XLOCKED);
841
842	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
843		return;
844	sc->sc_idle = 0;
845	sc->sc_last_write = time_uptime;
846	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
847		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
848			continue;
849		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.",
850		    g_mirror_get_diskname(disk), sc->sc_name);
851		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
852		g_mirror_update_metadata(disk);
853	}
854}
855
856static void
857g_mirror_flush_done(struct bio *bp)
858{
859	struct g_mirror_softc *sc;
860	struct bio *pbp;
861
862	pbp = bp->bio_parent;
863	sc = pbp->bio_to->geom->softc;
864	mtx_lock(&sc->sc_done_mtx);
865	if (pbp->bio_error == 0)
866		pbp->bio_error = bp->bio_error;
867	pbp->bio_completed += bp->bio_completed;
868	pbp->bio_inbed++;
869	if (pbp->bio_children == pbp->bio_inbed) {
870		mtx_unlock(&sc->sc_done_mtx);
871		g_io_deliver(pbp, pbp->bio_error);
872	} else
873		mtx_unlock(&sc->sc_done_mtx);
874	g_destroy_bio(bp);
875}
876
877static void
878g_mirror_done(struct bio *bp)
879{
880	struct g_mirror_softc *sc;
881
882	sc = bp->bio_from->geom->softc;
883	bp->bio_cflags = G_MIRROR_BIO_FLAG_REGULAR;
884	mtx_lock(&sc->sc_queue_mtx);
885	bioq_disksort(&sc->sc_queue, bp);
886	mtx_unlock(&sc->sc_queue_mtx);
887	wakeup(sc);
888}
889
890static void
891g_mirror_regular_request(struct bio *bp)
892{
893	struct g_mirror_softc *sc;
894	struct g_mirror_disk *disk;
895	struct bio *pbp;
896
897	g_topology_assert_not();
898
899	pbp = bp->bio_parent;
900	sc = pbp->bio_to->geom->softc;
901	bp->bio_from->index--;
902	if (bp->bio_cmd == BIO_WRITE)
903		sc->sc_writes--;
904	disk = bp->bio_from->private;
905	if (disk == NULL) {
906		g_topology_lock();
907		g_mirror_kill_consumer(sc, bp->bio_from);
908		g_topology_unlock();
909	}
910
911	pbp->bio_inbed++;
912	KASSERT(pbp->bio_inbed <= pbp->bio_children,
913	    ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
914	    pbp->bio_children));
915	if (bp->bio_error == 0 && pbp->bio_error == 0) {
916		G_MIRROR_LOGREQ(3, bp, "Request delivered.");
917		g_destroy_bio(bp);
918		if (pbp->bio_children == pbp->bio_inbed) {
919			G_MIRROR_LOGREQ(3, pbp, "Request delivered.");
920			pbp->bio_completed = pbp->bio_length;
921			if (pbp->bio_cmd == BIO_WRITE ||
922			    pbp->bio_cmd == BIO_DELETE) {
923				bioq_remove(&sc->sc_inflight, pbp);
924				/* Release delayed sync requests if possible. */
925				g_mirror_sync_release(sc);
926			}
927			g_io_deliver(pbp, pbp->bio_error);
928		}
929		return;
930	} else if (bp->bio_error != 0) {
931		if (pbp->bio_error == 0)
932			pbp->bio_error = bp->bio_error;
933		if (disk != NULL) {
934			if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
935				disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
936				G_MIRROR_LOGREQ(0, bp,
937				    "Request failed (error=%d).",
938				    bp->bio_error);
939			} else {
940				G_MIRROR_LOGREQ(1, bp,
941				    "Request failed (error=%d).",
942				    bp->bio_error);
943			}
944			if (g_mirror_disconnect_on_failure &&
945			    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1)
946			{
947				sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
948				g_mirror_event_send(disk,
949				    G_MIRROR_DISK_STATE_DISCONNECTED,
950				    G_MIRROR_EVENT_DONTWAIT);
951			}
952		}
953		switch (pbp->bio_cmd) {
954		case BIO_DELETE:
955		case BIO_WRITE:
956			pbp->bio_inbed--;
957			pbp->bio_children--;
958			break;
959		}
960	}
961	g_destroy_bio(bp);
962
963	switch (pbp->bio_cmd) {
964	case BIO_READ:
965		if (pbp->bio_inbed < pbp->bio_children)
966			break;
967		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 1)
968			g_io_deliver(pbp, pbp->bio_error);
969		else {
970			pbp->bio_error = 0;
971			mtx_lock(&sc->sc_queue_mtx);
972			bioq_disksort(&sc->sc_queue, pbp);
973			mtx_unlock(&sc->sc_queue_mtx);
974			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
975			wakeup(sc);
976		}
977		break;
978	case BIO_DELETE:
979	case BIO_WRITE:
980		if (pbp->bio_children == 0) {
981			/*
982			 * All requests failed.
983			 */
984		} else if (pbp->bio_inbed < pbp->bio_children) {
985			/* Do nothing. */
986			break;
987		} else if (pbp->bio_children == pbp->bio_inbed) {
988			/* Some requests succeeded. */
989			pbp->bio_error = 0;
990			pbp->bio_completed = pbp->bio_length;
991		}
992		bioq_remove(&sc->sc_inflight, pbp);
993		/* Release delayed sync requests if possible. */
994		g_mirror_sync_release(sc);
995		g_io_deliver(pbp, pbp->bio_error);
996		break;
997	default:
998		KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd));
999		break;
1000	}
1001}
1002
1003static void
1004g_mirror_sync_done(struct bio *bp)
1005{
1006	struct g_mirror_softc *sc;
1007
1008	G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered.");
1009	sc = bp->bio_from->geom->softc;
1010	bp->bio_cflags = G_MIRROR_BIO_FLAG_SYNC;
1011	mtx_lock(&sc->sc_queue_mtx);
1012	bioq_disksort(&sc->sc_queue, bp);
1013	mtx_unlock(&sc->sc_queue_mtx);
1014	wakeup(sc);
1015}
1016
1017static void
1018g_mirror_kernel_dump(struct bio *bp)
1019{
1020	struct g_mirror_softc *sc;
1021	struct g_mirror_disk *disk;
1022	struct bio *cbp;
1023	struct g_kerneldump *gkd;
1024
1025	/*
1026	 * We configure dumping to the first component, because this component
1027	 * will be used for reading with 'prefer' balance algorithm.
1028	 * If the component with the higest priority is currently disconnected
1029	 * we will not be able to read the dump after the reboot if it will be
1030	 * connected and synchronized later. Can we do something better?
1031	 */
1032	sc = bp->bio_to->geom->softc;
1033	disk = LIST_FIRST(&sc->sc_disks);
1034
1035	gkd = (struct g_kerneldump *)bp->bio_data;
1036	if (gkd->length > bp->bio_to->mediasize)
1037		gkd->length = bp->bio_to->mediasize;
1038	cbp = g_clone_bio(bp);
1039	if (cbp == NULL) {
1040		g_io_deliver(bp, ENOMEM);
1041		return;
1042	}
1043	cbp->bio_done = g_std_done;
1044	g_io_request(cbp, disk->d_consumer);
1045	G_MIRROR_DEBUG(1, "Kernel dump will go to %s.",
1046	    g_mirror_get_diskname(disk));
1047}
1048
1049static void
1050g_mirror_flush(struct g_mirror_softc *sc, struct bio *bp)
1051{
1052	struct bio_queue_head queue;
1053	struct g_mirror_disk *disk;
1054	struct g_consumer *cp;
1055	struct bio *cbp;
1056
1057	bioq_init(&queue);
1058	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1059		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1060			continue;
1061		cbp = g_clone_bio(bp);
1062		if (cbp == NULL) {
1063			while ((cbp = bioq_takefirst(&queue)) != NULL)
1064				g_destroy_bio(cbp);
1065			if (bp->bio_error == 0)
1066				bp->bio_error = ENOMEM;
1067			g_io_deliver(bp, bp->bio_error);
1068			return;
1069		}
1070		bioq_insert_tail(&queue, cbp);
1071		cbp->bio_done = g_mirror_flush_done;
1072		cbp->bio_caller1 = disk;
1073		cbp->bio_to = disk->d_consumer->provider;
1074	}
1075	while ((cbp = bioq_takefirst(&queue)) != NULL) {
1076		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1077		disk = cbp->bio_caller1;
1078		cbp->bio_caller1 = NULL;
1079		cp = disk->d_consumer;
1080		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1081		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1082		    cp->acr, cp->acw, cp->ace));
1083		g_io_request(cbp, disk->d_consumer);
1084	}
1085}
1086
1087static void
1088g_mirror_start(struct bio *bp)
1089{
1090	struct g_mirror_softc *sc;
1091
1092	sc = bp->bio_to->geom->softc;
1093	/*
1094	 * If sc == NULL or there are no valid disks, provider's error
1095	 * should be set and g_mirror_start() should not be called at all.
1096	 */
1097	KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1098	    ("Provider's error should be set (error=%d)(mirror=%s).",
1099	    bp->bio_to->error, bp->bio_to->name));
1100	G_MIRROR_LOGREQ(3, bp, "Request received.");
1101
1102	switch (bp->bio_cmd) {
1103	case BIO_READ:
1104	case BIO_WRITE:
1105	case BIO_DELETE:
1106		break;
1107	case BIO_FLUSH:
1108		g_mirror_flush(sc, bp);
1109		return;
1110	case BIO_GETATTR:
1111		if (g_handleattr_int(bp, "GEOM::candelete", 1))
1112			return;
1113		else if (strcmp("GEOM::kerneldump", bp->bio_attribute) == 0) {
1114			g_mirror_kernel_dump(bp);
1115			return;
1116		}
1117		/* FALLTHROUGH */
1118	default:
1119		g_io_deliver(bp, EOPNOTSUPP);
1120		return;
1121	}
1122	mtx_lock(&sc->sc_queue_mtx);
1123	bioq_disksort(&sc->sc_queue, bp);
1124	mtx_unlock(&sc->sc_queue_mtx);
1125	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
1126	wakeup(sc);
1127}
1128
1129/*
1130 * Return TRUE if the given request is colliding with a in-progress
1131 * synchronization request.
1132 */
1133static int
1134g_mirror_sync_collision(struct g_mirror_softc *sc, struct bio *bp)
1135{
1136	struct g_mirror_disk *disk;
1137	struct bio *sbp;
1138	off_t rstart, rend, sstart, send;
1139	int i;
1140
1141	if (sc->sc_sync.ds_ndisks == 0)
1142		return (0);
1143	rstart = bp->bio_offset;
1144	rend = bp->bio_offset + bp->bio_length;
1145	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1146		if (disk->d_state != G_MIRROR_DISK_STATE_SYNCHRONIZING)
1147			continue;
1148		for (i = 0; i < g_mirror_syncreqs; i++) {
1149			sbp = disk->d_sync.ds_bios[i];
1150			if (sbp == NULL)
1151				continue;
1152			sstart = sbp->bio_offset;
1153			send = sbp->bio_offset + sbp->bio_length;
1154			if (rend > sstart && rstart < send)
1155				return (1);
1156		}
1157	}
1158	return (0);
1159}
1160
1161/*
1162 * Return TRUE if the given sync request is colliding with a in-progress regular
1163 * request.
1164 */
1165static int
1166g_mirror_regular_collision(struct g_mirror_softc *sc, struct bio *sbp)
1167{
1168	off_t rstart, rend, sstart, send;
1169	struct bio *bp;
1170
1171	if (sc->sc_sync.ds_ndisks == 0)
1172		return (0);
1173	sstart = sbp->bio_offset;
1174	send = sbp->bio_offset + sbp->bio_length;
1175	TAILQ_FOREACH(bp, &sc->sc_inflight.queue, bio_queue) {
1176		rstart = bp->bio_offset;
1177		rend = bp->bio_offset + bp->bio_length;
1178		if (rend > sstart && rstart < send)
1179			return (1);
1180	}
1181	return (0);
1182}
1183
1184/*
1185 * Puts request onto delayed queue.
1186 */
1187static void
1188g_mirror_regular_delay(struct g_mirror_softc *sc, struct bio *bp)
1189{
1190
1191	G_MIRROR_LOGREQ(2, bp, "Delaying request.");
1192	bioq_insert_head(&sc->sc_regular_delayed, bp);
1193}
1194
1195/*
1196 * Puts synchronization request onto delayed queue.
1197 */
1198static void
1199g_mirror_sync_delay(struct g_mirror_softc *sc, struct bio *bp)
1200{
1201
1202	G_MIRROR_LOGREQ(2, bp, "Delaying synchronization request.");
1203	bioq_insert_tail(&sc->sc_sync_delayed, bp);
1204}
1205
1206/*
1207 * Releases delayed regular requests which don't collide anymore with sync
1208 * requests.
1209 */
1210static void
1211g_mirror_regular_release(struct g_mirror_softc *sc)
1212{
1213	struct bio *bp, *bp2;
1214
1215	TAILQ_FOREACH_SAFE(bp, &sc->sc_regular_delayed.queue, bio_queue, bp2) {
1216		if (g_mirror_sync_collision(sc, bp))
1217			continue;
1218		bioq_remove(&sc->sc_regular_delayed, bp);
1219		G_MIRROR_LOGREQ(2, bp, "Releasing delayed request (%p).", bp);
1220		mtx_lock(&sc->sc_queue_mtx);
1221		bioq_insert_head(&sc->sc_queue, bp);
1222#if 0
1223		/*
1224		 * wakeup() is not needed, because this function is called from
1225		 * the worker thread.
1226		 */
1227		wakeup(&sc->sc_queue);
1228#endif
1229		mtx_unlock(&sc->sc_queue_mtx);
1230	}
1231}
1232
1233/*
1234 * Releases delayed sync requests which don't collide anymore with regular
1235 * requests.
1236 */
1237static void
1238g_mirror_sync_release(struct g_mirror_softc *sc)
1239{
1240	struct bio *bp, *bp2;
1241
1242	TAILQ_FOREACH_SAFE(bp, &sc->sc_sync_delayed.queue, bio_queue, bp2) {
1243		if (g_mirror_regular_collision(sc, bp))
1244			continue;
1245		bioq_remove(&sc->sc_sync_delayed, bp);
1246		G_MIRROR_LOGREQ(2, bp,
1247		    "Releasing delayed synchronization request.");
1248		g_io_request(bp, bp->bio_from);
1249	}
1250}
1251
1252/*
1253 * Handle synchronization requests.
1254 * Every synchronization request is two-steps process: first, READ request is
1255 * send to active provider and then WRITE request (with read data) to the provider
1256 * beeing synchronized. When WRITE is finished, new synchronization request is
1257 * send.
1258 */
1259static void
1260g_mirror_sync_request(struct bio *bp)
1261{
1262	struct g_mirror_softc *sc;
1263	struct g_mirror_disk *disk;
1264
1265	bp->bio_from->index--;
1266	sc = bp->bio_from->geom->softc;
1267	disk = bp->bio_from->private;
1268	if (disk == NULL) {
1269		sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
1270		g_topology_lock();
1271		g_mirror_kill_consumer(sc, bp->bio_from);
1272		g_topology_unlock();
1273		free(bp->bio_data, M_MIRROR);
1274		g_destroy_bio(bp);
1275		sx_xlock(&sc->sc_lock);
1276		return;
1277	}
1278
1279	/*
1280	 * Synchronization request.
1281	 */
1282	switch (bp->bio_cmd) {
1283	case BIO_READ:
1284	    {
1285		struct g_consumer *cp;
1286
1287		if (bp->bio_error != 0) {
1288			G_MIRROR_LOGREQ(0, bp,
1289			    "Synchronization request failed (error=%d).",
1290			    bp->bio_error);
1291			g_destroy_bio(bp);
1292			return;
1293		}
1294		G_MIRROR_LOGREQ(3, bp,
1295		    "Synchronization request half-finished.");
1296		bp->bio_cmd = BIO_WRITE;
1297		bp->bio_cflags = 0;
1298		cp = disk->d_consumer;
1299		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1300		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1301		    cp->acr, cp->acw, cp->ace));
1302		cp->index++;
1303		g_io_request(bp, cp);
1304		return;
1305	    }
1306	case BIO_WRITE:
1307	    {
1308		struct g_mirror_disk_sync *sync;
1309		off_t offset;
1310		void *data;
1311		int i;
1312
1313		if (bp->bio_error != 0) {
1314			G_MIRROR_LOGREQ(0, bp,
1315			    "Synchronization request failed (error=%d).",
1316			    bp->bio_error);
1317			g_destroy_bio(bp);
1318			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
1319			g_mirror_event_send(disk,
1320			    G_MIRROR_DISK_STATE_DISCONNECTED,
1321			    G_MIRROR_EVENT_DONTWAIT);
1322			return;
1323		}
1324		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
1325		sync = &disk->d_sync;
1326		if (sync->ds_offset == sc->sc_mediasize ||
1327		    sync->ds_consumer == NULL ||
1328		    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1329			/* Don't send more synchronization requests. */
1330			sync->ds_inflight--;
1331			if (sync->ds_bios != NULL) {
1332				i = (int)(uintptr_t)bp->bio_caller1;
1333				sync->ds_bios[i] = NULL;
1334			}
1335			free(bp->bio_data, M_MIRROR);
1336			g_destroy_bio(bp);
1337			if (sync->ds_inflight > 0)
1338				return;
1339			if (sync->ds_consumer == NULL ||
1340			    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1341				return;
1342			}
1343			/* Disk up-to-date, activate it. */
1344			g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE,
1345			    G_MIRROR_EVENT_DONTWAIT);
1346			return;
1347		}
1348
1349		/* Send next synchronization request. */
1350		data = bp->bio_data;
1351		bzero(bp, sizeof(*bp));
1352		bp->bio_cmd = BIO_READ;
1353		bp->bio_offset = sync->ds_offset;
1354		bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
1355		sync->ds_offset += bp->bio_length;
1356		bp->bio_done = g_mirror_sync_done;
1357		bp->bio_data = data;
1358		bp->bio_from = sync->ds_consumer;
1359		bp->bio_to = sc->sc_provider;
1360		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
1361		sync->ds_consumer->index++;
1362		/*
1363		 * Delay the request if it is colliding with a regular request.
1364		 */
1365		if (g_mirror_regular_collision(sc, bp))
1366			g_mirror_sync_delay(sc, bp);
1367		else
1368			g_io_request(bp, sync->ds_consumer);
1369
1370		/* Release delayed requests if possible. */
1371		g_mirror_regular_release(sc);
1372
1373		/* Find the smallest offset */
1374		offset = sc->sc_mediasize;
1375		for (i = 0; i < g_mirror_syncreqs; i++) {
1376			bp = sync->ds_bios[i];
1377			if (bp->bio_offset < offset)
1378				offset = bp->bio_offset;
1379		}
1380		if (sync->ds_offset_done + (MAXPHYS * 100) < offset) {
1381			/* Update offset_done on every 100 blocks. */
1382			sync->ds_offset_done = offset;
1383			g_mirror_update_metadata(disk);
1384		}
1385		return;
1386	    }
1387	default:
1388		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1389		    bp->bio_cmd, sc->sc_name));
1390		break;
1391	}
1392}
1393
1394static void
1395g_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp)
1396{
1397	struct g_mirror_disk *disk;
1398	struct g_consumer *cp;
1399	struct bio *cbp;
1400
1401	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1402		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE)
1403			break;
1404	}
1405	if (disk == NULL) {
1406		if (bp->bio_error == 0)
1407			bp->bio_error = ENXIO;
1408		g_io_deliver(bp, bp->bio_error);
1409		return;
1410	}
1411	cbp = g_clone_bio(bp);
1412	if (cbp == NULL) {
1413		if (bp->bio_error == 0)
1414			bp->bio_error = ENOMEM;
1415		g_io_deliver(bp, bp->bio_error);
1416		return;
1417	}
1418	/*
1419	 * Fill in the component buf structure.
1420	 */
1421	cp = disk->d_consumer;
1422	cbp->bio_done = g_mirror_done;
1423	cbp->bio_to = cp->provider;
1424	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1425	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1426	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1427	    cp->acw, cp->ace));
1428	cp->index++;
1429	g_io_request(cbp, cp);
1430}
1431
1432static void
1433g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp)
1434{
1435	struct g_mirror_disk *disk;
1436	struct g_consumer *cp;
1437	struct bio *cbp;
1438
1439	disk = g_mirror_get_disk(sc);
1440	if (disk == NULL) {
1441		if (bp->bio_error == 0)
1442			bp->bio_error = ENXIO;
1443		g_io_deliver(bp, bp->bio_error);
1444		return;
1445	}
1446	cbp = g_clone_bio(bp);
1447	if (cbp == NULL) {
1448		if (bp->bio_error == 0)
1449			bp->bio_error = ENOMEM;
1450		g_io_deliver(bp, bp->bio_error);
1451		return;
1452	}
1453	/*
1454	 * Fill in the component buf structure.
1455	 */
1456	cp = disk->d_consumer;
1457	cbp->bio_done = g_mirror_done;
1458	cbp->bio_to = cp->provider;
1459	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1460	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1461	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1462	    cp->acw, cp->ace));
1463	cp->index++;
1464	g_io_request(cbp, cp);
1465}
1466
1467#define TRACK_SIZE  (1 * 1024 * 1024)
1468#define LOAD_SCALE	256
1469#define ABS(x)		(((x) >= 0) ? (x) : (-(x)))
1470
1471static void
1472g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp)
1473{
1474	struct g_mirror_disk *disk, *dp;
1475	struct g_consumer *cp;
1476	struct bio *cbp;
1477	int prio, best;
1478
1479	/* Find a disk with the smallest load. */
1480	disk = NULL;
1481	best = INT_MAX;
1482	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1483		if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1484			continue;
1485		prio = dp->load;
1486		/* If disk head is precisely in position - highly prefer it. */
1487		if (dp->d_last_offset == bp->bio_offset)
1488			prio -= 2 * LOAD_SCALE;
1489		else
1490		/* If disk head is close to position - prefer it. */
1491		if (ABS(dp->d_last_offset - bp->bio_offset) < TRACK_SIZE)
1492			prio -= 1 * LOAD_SCALE;
1493		if (prio <= best) {
1494			disk = dp;
1495			best = prio;
1496		}
1497	}
1498	KASSERT(disk != NULL, ("NULL disk for %s.", sc->sc_name));
1499	cbp = g_clone_bio(bp);
1500	if (cbp == NULL) {
1501		if (bp->bio_error == 0)
1502			bp->bio_error = ENOMEM;
1503		g_io_deliver(bp, bp->bio_error);
1504		return;
1505	}
1506	/*
1507	 * Fill in the component buf structure.
1508	 */
1509	cp = disk->d_consumer;
1510	cbp->bio_done = g_mirror_done;
1511	cbp->bio_to = cp->provider;
1512	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1513	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1514	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1515	    cp->acw, cp->ace));
1516	cp->index++;
1517	/* Remember last head position */
1518	disk->d_last_offset = bp->bio_offset + bp->bio_length;
1519	/* Update loads. */
1520	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1521		dp->load = (dp->d_consumer->index * LOAD_SCALE +
1522		    dp->load * 7) / 8;
1523	}
1524	g_io_request(cbp, cp);
1525}
1526
1527static void
1528g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp)
1529{
1530	struct bio_queue_head queue;
1531	struct g_mirror_disk *disk;
1532	struct g_consumer *cp;
1533	struct bio *cbp;
1534	off_t left, mod, offset, slice;
1535	u_char *data;
1536	u_int ndisks;
1537
1538	if (bp->bio_length <= sc->sc_slice) {
1539		g_mirror_request_round_robin(sc, bp);
1540		return;
1541	}
1542	ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE);
1543	slice = bp->bio_length / ndisks;
1544	mod = slice % sc->sc_provider->sectorsize;
1545	if (mod != 0)
1546		slice += sc->sc_provider->sectorsize - mod;
1547	/*
1548	 * Allocate all bios before sending any request, so we can
1549	 * return ENOMEM in nice and clean way.
1550	 */
1551	left = bp->bio_length;
1552	offset = bp->bio_offset;
1553	data = bp->bio_data;
1554	bioq_init(&queue);
1555	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1556		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1557			continue;
1558		cbp = g_clone_bio(bp);
1559		if (cbp == NULL) {
1560			while ((cbp = bioq_takefirst(&queue)) != NULL)
1561				bioq_remove(&queue, cbp);
1562			if (bp->bio_error == 0)
1563				bp->bio_error = ENOMEM;
1564			g_io_deliver(bp, bp->bio_error);
1565			return;
1566		}
1567		bioq_insert_tail(&queue, cbp);
1568		cbp->bio_done = g_mirror_done;
1569		cbp->bio_caller1 = disk;
1570		cbp->bio_to = disk->d_consumer->provider;
1571		cbp->bio_offset = offset;
1572		cbp->bio_data = data;
1573		cbp->bio_length = MIN(left, slice);
1574		left -= cbp->bio_length;
1575		if (left == 0)
1576			break;
1577		offset += cbp->bio_length;
1578		data += cbp->bio_length;
1579	}
1580	while ((cbp = bioq_takefirst(&queue)) != NULL) {
1581		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1582		disk = cbp->bio_caller1;
1583		cbp->bio_caller1 = NULL;
1584		cp = disk->d_consumer;
1585		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1586		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1587		    cp->acr, cp->acw, cp->ace));
1588		disk->d_consumer->index++;
1589		g_io_request(cbp, disk->d_consumer);
1590	}
1591}
1592
1593static void
1594g_mirror_register_request(struct bio *bp)
1595{
1596	struct g_mirror_softc *sc;
1597
1598	sc = bp->bio_to->geom->softc;
1599	switch (bp->bio_cmd) {
1600	case BIO_READ:
1601		switch (sc->sc_balance) {
1602		case G_MIRROR_BALANCE_LOAD:
1603			g_mirror_request_load(sc, bp);
1604			break;
1605		case G_MIRROR_BALANCE_PREFER:
1606			g_mirror_request_prefer(sc, bp);
1607			break;
1608		case G_MIRROR_BALANCE_ROUND_ROBIN:
1609			g_mirror_request_round_robin(sc, bp);
1610			break;
1611		case G_MIRROR_BALANCE_SPLIT:
1612			g_mirror_request_split(sc, bp);
1613			break;
1614		}
1615		return;
1616	case BIO_WRITE:
1617	case BIO_DELETE:
1618	    {
1619		struct g_mirror_disk *disk;
1620		struct g_mirror_disk_sync *sync;
1621		struct bio_queue_head queue;
1622		struct g_consumer *cp;
1623		struct bio *cbp;
1624
1625		/*
1626		 * Delay the request if it is colliding with a synchronization
1627		 * request.
1628		 */
1629		if (g_mirror_sync_collision(sc, bp)) {
1630			g_mirror_regular_delay(sc, bp);
1631			return;
1632		}
1633
1634		if (sc->sc_idle)
1635			g_mirror_unidle(sc);
1636		else
1637			sc->sc_last_write = time_uptime;
1638
1639		/*
1640		 * Allocate all bios before sending any request, so we can
1641		 * return ENOMEM in nice and clean way.
1642		 */
1643		bioq_init(&queue);
1644		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1645			sync = &disk->d_sync;
1646			switch (disk->d_state) {
1647			case G_MIRROR_DISK_STATE_ACTIVE:
1648				break;
1649			case G_MIRROR_DISK_STATE_SYNCHRONIZING:
1650				if (bp->bio_offset >= sync->ds_offset)
1651					continue;
1652				break;
1653			default:
1654				continue;
1655			}
1656			if (bp->bio_cmd == BIO_DELETE &&
1657			    (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE) == 0)
1658				continue;
1659			cbp = g_clone_bio(bp);
1660			if (cbp == NULL) {
1661				while ((cbp = bioq_takefirst(&queue)) != NULL)
1662					g_destroy_bio(cbp);
1663				if (bp->bio_error == 0)
1664					bp->bio_error = ENOMEM;
1665				g_io_deliver(bp, bp->bio_error);
1666				return;
1667			}
1668			bioq_insert_tail(&queue, cbp);
1669			cbp->bio_done = g_mirror_done;
1670			cp = disk->d_consumer;
1671			cbp->bio_caller1 = cp;
1672			cbp->bio_to = cp->provider;
1673			KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1674			    ("Consumer %s not opened (r%dw%de%d).",
1675			    cp->provider->name, cp->acr, cp->acw, cp->ace));
1676		}
1677		while ((cbp = bioq_takefirst(&queue)) != NULL) {
1678			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1679			cp = cbp->bio_caller1;
1680			cbp->bio_caller1 = NULL;
1681			cp->index++;
1682			sc->sc_writes++;
1683			g_io_request(cbp, cp);
1684		}
1685		/*
1686		 * Put request onto inflight queue, so we can check if new
1687		 * synchronization requests don't collide with it.
1688		 */
1689		bioq_insert_tail(&sc->sc_inflight, bp);
1690		/*
1691		 * Bump syncid on first write.
1692		 */
1693		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0) {
1694			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
1695			g_mirror_bump_syncid(sc);
1696		}
1697		return;
1698	    }
1699	default:
1700		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1701		    bp->bio_cmd, sc->sc_name));
1702		break;
1703	}
1704}
1705
1706static int
1707g_mirror_can_destroy(struct g_mirror_softc *sc)
1708{
1709	struct g_geom *gp;
1710	struct g_consumer *cp;
1711
1712	g_topology_assert();
1713	gp = sc->sc_geom;
1714	if (gp->softc == NULL)
1715		return (1);
1716	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_TASTING) != 0)
1717		return (0);
1718	LIST_FOREACH(cp, &gp->consumer, consumer) {
1719		if (g_mirror_is_busy(sc, cp))
1720			return (0);
1721	}
1722	gp = sc->sc_sync.ds_geom;
1723	LIST_FOREACH(cp, &gp->consumer, consumer) {
1724		if (g_mirror_is_busy(sc, cp))
1725			return (0);
1726	}
1727	G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
1728	    sc->sc_name);
1729	return (1);
1730}
1731
1732static int
1733g_mirror_try_destroy(struct g_mirror_softc *sc)
1734{
1735
1736	if (sc->sc_rootmount != NULL) {
1737		G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
1738		    sc->sc_rootmount);
1739		root_mount_rel(sc->sc_rootmount);
1740		sc->sc_rootmount = NULL;
1741	}
1742	g_topology_lock();
1743	if (!g_mirror_can_destroy(sc)) {
1744		g_topology_unlock();
1745		return (0);
1746	}
1747	sc->sc_geom->softc = NULL;
1748	sc->sc_sync.ds_geom->softc = NULL;
1749	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WAIT) != 0) {
1750		g_topology_unlock();
1751		G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1752		    &sc->sc_worker);
1753		/* Unlock sc_lock here, as it can be destroyed after wakeup. */
1754		sx_xunlock(&sc->sc_lock);
1755		wakeup(&sc->sc_worker);
1756		sc->sc_worker = NULL;
1757	} else {
1758		g_topology_unlock();
1759		g_mirror_destroy_device(sc);
1760		free(sc, M_MIRROR);
1761	}
1762	return (1);
1763}
1764
1765/*
1766 * Worker thread.
1767 */
1768static void
1769g_mirror_worker(void *arg)
1770{
1771	struct g_mirror_softc *sc;
1772	struct g_mirror_event *ep;
1773	struct bio *bp;
1774	int timeout;
1775
1776	sc = arg;
1777	thread_lock(curthread);
1778	sched_prio(curthread, PRIBIO);
1779	thread_unlock(curthread);
1780
1781	sx_xlock(&sc->sc_lock);
1782	for (;;) {
1783		G_MIRROR_DEBUG(5, "%s: Let's see...", __func__);
1784		/*
1785		 * First take a look at events.
1786		 * This is important to handle events before any I/O requests.
1787		 */
1788		ep = g_mirror_event_get(sc);
1789		if (ep != NULL) {
1790			g_mirror_event_remove(sc, ep);
1791			if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) {
1792				/* Update only device status. */
1793				G_MIRROR_DEBUG(3,
1794				    "Running event for device %s.",
1795				    sc->sc_name);
1796				ep->e_error = 0;
1797				g_mirror_update_device(sc, 1);
1798			} else {
1799				/* Update disk status. */
1800				G_MIRROR_DEBUG(3, "Running event for disk %s.",
1801				     g_mirror_get_diskname(ep->e_disk));
1802				ep->e_error = g_mirror_update_disk(ep->e_disk,
1803				    ep->e_state);
1804				if (ep->e_error == 0)
1805					g_mirror_update_device(sc, 0);
1806			}
1807			if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) {
1808				KASSERT(ep->e_error == 0,
1809				    ("Error cannot be handled."));
1810				g_mirror_event_free(ep);
1811			} else {
1812				ep->e_flags |= G_MIRROR_EVENT_DONE;
1813				G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1814				    ep);
1815				mtx_lock(&sc->sc_events_mtx);
1816				wakeup(ep);
1817				mtx_unlock(&sc->sc_events_mtx);
1818			}
1819			if ((sc->sc_flags &
1820			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1821				if (g_mirror_try_destroy(sc)) {
1822					curthread->td_pflags &= ~TDP_GEOM;
1823					G_MIRROR_DEBUG(1, "Thread exiting.");
1824					kproc_exit(0);
1825				}
1826			}
1827			G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__);
1828			continue;
1829		}
1830		/*
1831		 * Check if we can mark array as CLEAN and if we can't take
1832		 * how much seconds should we wait.
1833		 */
1834		timeout = g_mirror_idle(sc, -1);
1835		/*
1836		 * Now I/O requests.
1837		 */
1838		/* Get first request from the queue. */
1839		mtx_lock(&sc->sc_queue_mtx);
1840		bp = bioq_first(&sc->sc_queue);
1841		if (bp == NULL) {
1842			if ((sc->sc_flags &
1843			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1844				mtx_unlock(&sc->sc_queue_mtx);
1845				if (g_mirror_try_destroy(sc)) {
1846					curthread->td_pflags &= ~TDP_GEOM;
1847					G_MIRROR_DEBUG(1, "Thread exiting.");
1848					kproc_exit(0);
1849				}
1850				mtx_lock(&sc->sc_queue_mtx);
1851			}
1852			sx_xunlock(&sc->sc_lock);
1853			/*
1854			 * XXX: We can miss an event here, because an event
1855			 *      can be added without sx-device-lock and without
1856			 *      mtx-queue-lock. Maybe I should just stop using
1857			 *      dedicated mutex for events synchronization and
1858			 *      stick with the queue lock?
1859			 *      The event will hang here until next I/O request
1860			 *      or next event is received.
1861			 */
1862			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w1",
1863			    timeout * hz);
1864			sx_xlock(&sc->sc_lock);
1865			G_MIRROR_DEBUG(5, "%s: I'm here 4.", __func__);
1866			continue;
1867		}
1868		bioq_remove(&sc->sc_queue, bp);
1869		mtx_unlock(&sc->sc_queue_mtx);
1870
1871		if (bp->bio_from->geom == sc->sc_sync.ds_geom &&
1872		    (bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) {
1873			g_mirror_sync_request(bp);	/* READ */
1874		} else if (bp->bio_to != sc->sc_provider) {
1875			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0)
1876				g_mirror_regular_request(bp);
1877			else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
1878				g_mirror_sync_request(bp);	/* WRITE */
1879			else {
1880				KASSERT(0,
1881				    ("Invalid request cflags=0x%hhx to=%s.",
1882				    bp->bio_cflags, bp->bio_to->name));
1883			}
1884		} else {
1885			g_mirror_register_request(bp);
1886		}
1887		G_MIRROR_DEBUG(5, "%s: I'm here 9.", __func__);
1888	}
1889}
1890
1891static void
1892g_mirror_update_idle(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
1893{
1894
1895	sx_assert(&sc->sc_lock, SX_LOCKED);
1896
1897	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
1898		return;
1899	if (!sc->sc_idle && (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) {
1900		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.",
1901		    g_mirror_get_diskname(disk), sc->sc_name);
1902		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1903	} else if (sc->sc_idle &&
1904	    (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
1905		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.",
1906		    g_mirror_get_diskname(disk), sc->sc_name);
1907		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
1908	}
1909}
1910
1911static void
1912g_mirror_sync_start(struct g_mirror_disk *disk)
1913{
1914	struct g_mirror_softc *sc;
1915	struct g_consumer *cp;
1916	struct bio *bp;
1917	int error, i;
1918
1919	g_topology_assert_not();
1920	sc = disk->d_softc;
1921	sx_assert(&sc->sc_lock, SX_LOCKED);
1922
1923	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
1924	    ("Disk %s is not marked for synchronization.",
1925	    g_mirror_get_diskname(disk)));
1926	KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1927	    ("Device not in RUNNING state (%s, %u).", sc->sc_name,
1928	    sc->sc_state));
1929
1930	sx_xunlock(&sc->sc_lock);
1931	g_topology_lock();
1932	cp = g_new_consumer(sc->sc_sync.ds_geom);
1933	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
1934	error = g_attach(cp, sc->sc_provider);
1935	KASSERT(error == 0,
1936	    ("Cannot attach to %s (error=%d).", sc->sc_name, error));
1937	error = g_access(cp, 1, 0, 0);
1938	KASSERT(error == 0, ("Cannot open %s (error=%d).", sc->sc_name, error));
1939	g_topology_unlock();
1940	sx_xlock(&sc->sc_lock);
1941
1942	G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
1943	    g_mirror_get_diskname(disk));
1944	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) == 0)
1945		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1946	KASSERT(disk->d_sync.ds_consumer == NULL,
1947	    ("Sync consumer already exists (device=%s, disk=%s).",
1948	    sc->sc_name, g_mirror_get_diskname(disk)));
1949
1950	disk->d_sync.ds_consumer = cp;
1951	disk->d_sync.ds_consumer->private = disk;
1952	disk->d_sync.ds_consumer->index = 0;
1953
1954	/*
1955	 * Allocate memory for synchronization bios and initialize them.
1956	 */
1957	disk->d_sync.ds_bios = malloc(sizeof(struct bio *) * g_mirror_syncreqs,
1958	    M_MIRROR, M_WAITOK);
1959	for (i = 0; i < g_mirror_syncreqs; i++) {
1960		bp = g_alloc_bio();
1961		disk->d_sync.ds_bios[i] = bp;
1962		bp->bio_parent = NULL;
1963		bp->bio_cmd = BIO_READ;
1964		bp->bio_data = malloc(MAXPHYS, M_MIRROR, M_WAITOK);
1965		bp->bio_cflags = 0;
1966		bp->bio_offset = disk->d_sync.ds_offset;
1967		bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
1968		disk->d_sync.ds_offset += bp->bio_length;
1969		bp->bio_done = g_mirror_sync_done;
1970		bp->bio_from = disk->d_sync.ds_consumer;
1971		bp->bio_to = sc->sc_provider;
1972		bp->bio_caller1 = (void *)(uintptr_t)i;
1973	}
1974
1975	/* Increase the number of disks in SYNCHRONIZING state. */
1976	sc->sc_sync.ds_ndisks++;
1977	/* Set the number of in-flight synchronization requests. */
1978	disk->d_sync.ds_inflight = g_mirror_syncreqs;
1979
1980	/*
1981	 * Fire off first synchronization requests.
1982	 */
1983	for (i = 0; i < g_mirror_syncreqs; i++) {
1984		bp = disk->d_sync.ds_bios[i];
1985		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
1986		disk->d_sync.ds_consumer->index++;
1987		/*
1988		 * Delay the request if it is colliding with a regular request.
1989		 */
1990		if (g_mirror_regular_collision(sc, bp))
1991			g_mirror_sync_delay(sc, bp);
1992		else
1993			g_io_request(bp, disk->d_sync.ds_consumer);
1994	}
1995}
1996
1997/*
1998 * Stop synchronization process.
1999 * type: 0 - synchronization finished
2000 *       1 - synchronization stopped
2001 */
2002static void
2003g_mirror_sync_stop(struct g_mirror_disk *disk, int type)
2004{
2005	struct g_mirror_softc *sc;
2006	struct g_consumer *cp;
2007
2008	g_topology_assert_not();
2009	sc = disk->d_softc;
2010	sx_assert(&sc->sc_lock, SX_LOCKED);
2011
2012	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2013	    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2014	    g_mirror_disk_state2str(disk->d_state)));
2015	if (disk->d_sync.ds_consumer == NULL)
2016		return;
2017
2018	if (type == 0) {
2019		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.",
2020		    sc->sc_name, g_mirror_get_diskname(disk));
2021	} else /* if (type == 1) */ {
2022		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
2023		    sc->sc_name, g_mirror_get_diskname(disk));
2024	}
2025	free(disk->d_sync.ds_bios, M_MIRROR);
2026	disk->d_sync.ds_bios = NULL;
2027	cp = disk->d_sync.ds_consumer;
2028	disk->d_sync.ds_consumer = NULL;
2029	disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2030	sc->sc_sync.ds_ndisks--;
2031	sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
2032	g_topology_lock();
2033	g_mirror_kill_consumer(sc, cp);
2034	g_topology_unlock();
2035	sx_xlock(&sc->sc_lock);
2036}
2037
2038static void
2039g_mirror_launch_provider(struct g_mirror_softc *sc)
2040{
2041	struct g_mirror_disk *disk;
2042	struct g_provider *pp, *dp;
2043
2044	sx_assert(&sc->sc_lock, SX_LOCKED);
2045
2046	g_topology_lock();
2047	pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name);
2048	pp->flags |= G_PF_DIRECT_RECEIVE;
2049	pp->mediasize = sc->sc_mediasize;
2050	pp->sectorsize = sc->sc_sectorsize;
2051	pp->stripesize = 0;
2052	pp->stripeoffset = 0;
2053
2054	/* Splitting of unmapped BIO's could work but isn't implemented now */
2055	if (sc->sc_balance != G_MIRROR_BALANCE_SPLIT)
2056		pp->flags |= G_PF_ACCEPT_UNMAPPED;
2057
2058	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2059		if (disk->d_consumer && disk->d_consumer->provider) {
2060			dp = disk->d_consumer->provider;
2061			if (dp->stripesize > pp->stripesize) {
2062				pp->stripesize = dp->stripesize;
2063				pp->stripeoffset = dp->stripeoffset;
2064			}
2065			/* A provider underneath us doesn't support unmapped */
2066			if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) {
2067				G_MIRROR_DEBUG(0, "Cancelling unmapped "
2068				    "because of %s.", dp->name);
2069				pp->flags &= ~G_PF_ACCEPT_UNMAPPED;
2070			}
2071		}
2072	}
2073	sc->sc_provider = pp;
2074	g_error_provider(pp, 0);
2075	g_topology_unlock();
2076	G_MIRROR_DEBUG(0, "Device %s launched (%u/%u).", pp->name,
2077	    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE), sc->sc_ndisks);
2078	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2079		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2080			g_mirror_sync_start(disk);
2081	}
2082}
2083
2084static void
2085g_mirror_destroy_provider(struct g_mirror_softc *sc)
2086{
2087	struct g_mirror_disk *disk;
2088	struct bio *bp;
2089
2090	g_topology_assert_not();
2091	KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
2092	    sc->sc_name));
2093
2094	g_topology_lock();
2095	g_error_provider(sc->sc_provider, ENXIO);
2096	mtx_lock(&sc->sc_queue_mtx);
2097	while ((bp = bioq_takefirst(&sc->sc_queue)) != NULL)
2098		g_io_deliver(bp, ENXIO);
2099	mtx_unlock(&sc->sc_queue_mtx);
2100	G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.", sc->sc_name,
2101	    sc->sc_provider->name);
2102	sc->sc_provider->flags |= G_PF_WITHER;
2103	g_orphan_provider(sc->sc_provider, ENXIO);
2104	g_topology_unlock();
2105	sc->sc_provider = NULL;
2106	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2107		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2108			g_mirror_sync_stop(disk, 1);
2109	}
2110}
2111
2112static void
2113g_mirror_go(void *arg)
2114{
2115	struct g_mirror_softc *sc;
2116
2117	sc = arg;
2118	G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
2119	g_mirror_event_send(sc, 0,
2120	    G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE);
2121}
2122
2123static u_int
2124g_mirror_determine_state(struct g_mirror_disk *disk)
2125{
2126	struct g_mirror_softc *sc;
2127	u_int state;
2128
2129	sc = disk->d_softc;
2130	if (sc->sc_syncid == disk->d_sync.ds_syncid) {
2131		if ((disk->d_flags &
2132		    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
2133			/* Disk does not need synchronization. */
2134			state = G_MIRROR_DISK_STATE_ACTIVE;
2135		} else {
2136			if ((sc->sc_flags &
2137			     G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2138			    (disk->d_flags &
2139			     G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2140				/*
2141				 * We can start synchronization from
2142				 * the stored offset.
2143				 */
2144				state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2145			} else {
2146				state = G_MIRROR_DISK_STATE_STALE;
2147			}
2148		}
2149	} else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
2150		/*
2151		 * Reset all synchronization data for this disk,
2152		 * because if it even was synchronized, it was
2153		 * synchronized to disks with different syncid.
2154		 */
2155		disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2156		disk->d_sync.ds_offset = 0;
2157		disk->d_sync.ds_offset_done = 0;
2158		disk->d_sync.ds_syncid = sc->sc_syncid;
2159		if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2160		    (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2161			state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2162		} else {
2163			state = G_MIRROR_DISK_STATE_STALE;
2164		}
2165	} else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
2166		/*
2167		 * Not good, NOT GOOD!
2168		 * It means that mirror was started on stale disks
2169		 * and more fresh disk just arrive.
2170		 * If there were writes, mirror is broken, sorry.
2171		 * I think the best choice here is don't touch
2172		 * this disk and inform the user loudly.
2173		 */
2174		G_MIRROR_DEBUG(0, "Device %s was started before the freshest "
2175		    "disk (%s) arrives!! It will not be connected to the "
2176		    "running device.", sc->sc_name,
2177		    g_mirror_get_diskname(disk));
2178		g_mirror_destroy_disk(disk);
2179		state = G_MIRROR_DISK_STATE_NONE;
2180		/* Return immediately, because disk was destroyed. */
2181		return (state);
2182	}
2183	G_MIRROR_DEBUG(3, "State for %s disk: %s.",
2184	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(state));
2185	return (state);
2186}
2187
2188/*
2189 * Update device state.
2190 */
2191static void
2192g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force)
2193{
2194	struct g_mirror_disk *disk;
2195	u_int state;
2196
2197	sx_assert(&sc->sc_lock, SX_XLOCKED);
2198
2199	switch (sc->sc_state) {
2200	case G_MIRROR_DEVICE_STATE_STARTING:
2201	    {
2202		struct g_mirror_disk *pdisk, *tdisk;
2203		u_int dirty, ndisks, genid, syncid;
2204
2205		KASSERT(sc->sc_provider == NULL,
2206		    ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
2207		/*
2208		 * Are we ready? We are, if all disks are connected or
2209		 * if we have any disks and 'force' is true.
2210		 */
2211		ndisks = g_mirror_ndisks(sc, -1);
2212		if (sc->sc_ndisks == ndisks || (force && ndisks > 0)) {
2213			;
2214		} else if (ndisks == 0) {
2215			/*
2216			 * Disks went down in starting phase, so destroy
2217			 * device.
2218			 */
2219			callout_drain(&sc->sc_callout);
2220			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2221			G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
2222			    sc->sc_rootmount);
2223			root_mount_rel(sc->sc_rootmount);
2224			sc->sc_rootmount = NULL;
2225			return;
2226		} else {
2227			return;
2228		}
2229
2230		/*
2231		 * Activate all disks with the biggest syncid.
2232		 */
2233		if (force) {
2234			/*
2235			 * If 'force' is true, we have been called due to
2236			 * timeout, so don't bother canceling timeout.
2237			 */
2238			ndisks = 0;
2239			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2240				if ((disk->d_flags &
2241				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
2242					ndisks++;
2243				}
2244			}
2245			if (ndisks == 0) {
2246				/* No valid disks found, destroy device. */
2247				sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2248				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2249				    __LINE__, sc->sc_rootmount);
2250				root_mount_rel(sc->sc_rootmount);
2251				sc->sc_rootmount = NULL;
2252				return;
2253			}
2254		} else {
2255			/* Cancel timeout. */
2256			callout_drain(&sc->sc_callout);
2257		}
2258
2259		/*
2260		 * Find the biggest genid.
2261		 */
2262		genid = 0;
2263		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2264			if (disk->d_genid > genid)
2265				genid = disk->d_genid;
2266		}
2267		sc->sc_genid = genid;
2268		/*
2269		 * Remove all disks without the biggest genid.
2270		 */
2271		LIST_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) {
2272			if (disk->d_genid < genid) {
2273				G_MIRROR_DEBUG(0,
2274				    "Component %s (device %s) broken, skipping.",
2275				    g_mirror_get_diskname(disk), sc->sc_name);
2276				g_mirror_destroy_disk(disk);
2277			}
2278		}
2279
2280		/*
2281		 * Find the biggest syncid.
2282		 */
2283		syncid = 0;
2284		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2285			if (disk->d_sync.ds_syncid > syncid)
2286				syncid = disk->d_sync.ds_syncid;
2287		}
2288
2289		/*
2290		 * Here we need to look for dirty disks and if all disks
2291		 * with the biggest syncid are dirty, we have to choose
2292		 * one with the biggest priority and rebuild the rest.
2293		 */
2294		/*
2295		 * Find the number of dirty disks with the biggest syncid.
2296		 * Find the number of disks with the biggest syncid.
2297		 * While here, find a disk with the biggest priority.
2298		 */
2299		dirty = ndisks = 0;
2300		pdisk = NULL;
2301		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2302			if (disk->d_sync.ds_syncid != syncid)
2303				continue;
2304			if ((disk->d_flags &
2305			    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2306				continue;
2307			}
2308			ndisks++;
2309			if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
2310				dirty++;
2311				if (pdisk == NULL ||
2312				    pdisk->d_priority < disk->d_priority) {
2313					pdisk = disk;
2314				}
2315			}
2316		}
2317		if (dirty == 0) {
2318			/* No dirty disks at all, great. */
2319		} else if (dirty == ndisks) {
2320			/*
2321			 * Force synchronization for all dirty disks except one
2322			 * with the biggest priority.
2323			 */
2324			KASSERT(pdisk != NULL, ("pdisk == NULL"));
2325			G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a "
2326			    "master disk for synchronization.",
2327			    g_mirror_get_diskname(pdisk), sc->sc_name);
2328			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2329				if (disk->d_sync.ds_syncid != syncid)
2330					continue;
2331				if ((disk->d_flags &
2332				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2333					continue;
2334				}
2335				KASSERT((disk->d_flags &
2336				    G_MIRROR_DISK_FLAG_DIRTY) != 0,
2337				    ("Disk %s isn't marked as dirty.",
2338				    g_mirror_get_diskname(disk)));
2339				/* Skip the disk with the biggest priority. */
2340				if (disk == pdisk)
2341					continue;
2342				disk->d_sync.ds_syncid = 0;
2343			}
2344		} else if (dirty < ndisks) {
2345			/*
2346			 * Force synchronization for all dirty disks.
2347			 * We have some non-dirty disks.
2348			 */
2349			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2350				if (disk->d_sync.ds_syncid != syncid)
2351					continue;
2352				if ((disk->d_flags &
2353				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2354					continue;
2355				}
2356				if ((disk->d_flags &
2357				    G_MIRROR_DISK_FLAG_DIRTY) == 0) {
2358					continue;
2359				}
2360				disk->d_sync.ds_syncid = 0;
2361			}
2362		}
2363
2364		/* Reset hint. */
2365		sc->sc_hint = NULL;
2366		sc->sc_syncid = syncid;
2367		if (force) {
2368			/* Remember to bump syncid on first write. */
2369			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2370		}
2371		state = G_MIRROR_DEVICE_STATE_RUNNING;
2372		G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.",
2373		    sc->sc_name, g_mirror_device_state2str(sc->sc_state),
2374		    g_mirror_device_state2str(state));
2375		sc->sc_state = state;
2376		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2377			state = g_mirror_determine_state(disk);
2378			g_mirror_event_send(disk, state,
2379			    G_MIRROR_EVENT_DONTWAIT);
2380			if (state == G_MIRROR_DISK_STATE_STALE)
2381				sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2382		}
2383		break;
2384	    }
2385	case G_MIRROR_DEVICE_STATE_RUNNING:
2386		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 &&
2387		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2388			/*
2389			 * No active disks or no disks at all,
2390			 * so destroy device.
2391			 */
2392			if (sc->sc_provider != NULL)
2393				g_mirror_destroy_provider(sc);
2394			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2395			break;
2396		} else if (g_mirror_ndisks(sc,
2397		    G_MIRROR_DISK_STATE_ACTIVE) > 0 &&
2398		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2399			/*
2400			 * We have active disks, launch provider if it doesn't
2401			 * exist.
2402			 */
2403			if (sc->sc_provider == NULL)
2404				g_mirror_launch_provider(sc);
2405			if (sc->sc_rootmount != NULL) {
2406				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2407				    __LINE__, sc->sc_rootmount);
2408				root_mount_rel(sc->sc_rootmount);
2409				sc->sc_rootmount = NULL;
2410			}
2411		}
2412		/*
2413		 * Genid should be bumped immediately, so do it here.
2414		 */
2415		if ((sc->sc_bump_id & G_MIRROR_BUMP_GENID) != 0) {
2416			sc->sc_bump_id &= ~G_MIRROR_BUMP_GENID;
2417			g_mirror_bump_genid(sc);
2418		}
2419		break;
2420	default:
2421		KASSERT(1 == 0, ("Wrong device state (%s, %s).",
2422		    sc->sc_name, g_mirror_device_state2str(sc->sc_state)));
2423		break;
2424	}
2425}
2426
2427/*
2428 * Update disk state and device state if needed.
2429 */
2430#define	DISK_STATE_CHANGED()	G_MIRROR_DEBUG(1,			\
2431	"Disk %s state changed from %s to %s (device %s).",		\
2432	g_mirror_get_diskname(disk),					\
2433	g_mirror_disk_state2str(disk->d_state),				\
2434	g_mirror_disk_state2str(state), sc->sc_name)
2435static int
2436g_mirror_update_disk(struct g_mirror_disk *disk, u_int state)
2437{
2438	struct g_mirror_softc *sc;
2439
2440	sc = disk->d_softc;
2441	sx_assert(&sc->sc_lock, SX_XLOCKED);
2442
2443again:
2444	G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.",
2445	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state),
2446	    g_mirror_disk_state2str(state));
2447	switch (state) {
2448	case G_MIRROR_DISK_STATE_NEW:
2449		/*
2450		 * Possible scenarios:
2451		 * 1. New disk arrive.
2452		 */
2453		/* Previous state should be NONE. */
2454		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE,
2455		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2456		    g_mirror_disk_state2str(disk->d_state)));
2457		DISK_STATE_CHANGED();
2458
2459		disk->d_state = state;
2460		if (LIST_EMPTY(&sc->sc_disks))
2461			LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next);
2462		else {
2463			struct g_mirror_disk *dp;
2464
2465			LIST_FOREACH(dp, &sc->sc_disks, d_next) {
2466				if (disk->d_priority >= dp->d_priority) {
2467					LIST_INSERT_BEFORE(dp, disk, d_next);
2468					dp = NULL;
2469					break;
2470				}
2471				if (LIST_NEXT(dp, d_next) == NULL)
2472					break;
2473			}
2474			if (dp != NULL)
2475				LIST_INSERT_AFTER(dp, disk, d_next);
2476		}
2477		G_MIRROR_DEBUG(1, "Device %s: provider %s detected.",
2478		    sc->sc_name, g_mirror_get_diskname(disk));
2479		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
2480			break;
2481		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2482		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2483		    g_mirror_device_state2str(sc->sc_state),
2484		    g_mirror_get_diskname(disk),
2485		    g_mirror_disk_state2str(disk->d_state)));
2486		state = g_mirror_determine_state(disk);
2487		if (state != G_MIRROR_DISK_STATE_NONE)
2488			goto again;
2489		break;
2490	case G_MIRROR_DISK_STATE_ACTIVE:
2491		/*
2492		 * Possible scenarios:
2493		 * 1. New disk does not need synchronization.
2494		 * 2. Synchronization process finished successfully.
2495		 */
2496		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2497		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2498		    g_mirror_device_state2str(sc->sc_state),
2499		    g_mirror_get_diskname(disk),
2500		    g_mirror_disk_state2str(disk->d_state)));
2501		/* Previous state should be NEW or SYNCHRONIZING. */
2502		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW ||
2503		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2504		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2505		    g_mirror_disk_state2str(disk->d_state)));
2506		DISK_STATE_CHANGED();
2507
2508		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2509			disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2510			disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC;
2511			g_mirror_sync_stop(disk, 0);
2512		}
2513		disk->d_state = state;
2514		disk->d_sync.ds_offset = 0;
2515		disk->d_sync.ds_offset_done = 0;
2516		g_mirror_update_idle(sc, disk);
2517		g_mirror_update_metadata(disk);
2518		G_MIRROR_DEBUG(1, "Device %s: provider %s activated.",
2519		    sc->sc_name, g_mirror_get_diskname(disk));
2520		break;
2521	case G_MIRROR_DISK_STATE_STALE:
2522		/*
2523		 * Possible scenarios:
2524		 * 1. Stale disk was connected.
2525		 */
2526		/* Previous state should be NEW. */
2527		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2528		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2529		    g_mirror_disk_state2str(disk->d_state)));
2530		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2531		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2532		    g_mirror_device_state2str(sc->sc_state),
2533		    g_mirror_get_diskname(disk),
2534		    g_mirror_disk_state2str(disk->d_state)));
2535		/*
2536		 * STALE state is only possible if device is marked
2537		 * NOAUTOSYNC.
2538		 */
2539		KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0,
2540		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2541		    g_mirror_device_state2str(sc->sc_state),
2542		    g_mirror_get_diskname(disk),
2543		    g_mirror_disk_state2str(disk->d_state)));
2544		DISK_STATE_CHANGED();
2545
2546		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2547		disk->d_state = state;
2548		g_mirror_update_metadata(disk);
2549		G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.",
2550		    sc->sc_name, g_mirror_get_diskname(disk));
2551		break;
2552	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
2553		/*
2554		 * Possible scenarios:
2555		 * 1. Disk which needs synchronization was connected.
2556		 */
2557		/* Previous state should be NEW. */
2558		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2559		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2560		    g_mirror_disk_state2str(disk->d_state)));
2561		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2562		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2563		    g_mirror_device_state2str(sc->sc_state),
2564		    g_mirror_get_diskname(disk),
2565		    g_mirror_disk_state2str(disk->d_state)));
2566		DISK_STATE_CHANGED();
2567
2568		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2569			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2570		disk->d_state = state;
2571		if (sc->sc_provider != NULL) {
2572			g_mirror_sync_start(disk);
2573			g_mirror_update_metadata(disk);
2574		}
2575		break;
2576	case G_MIRROR_DISK_STATE_DISCONNECTED:
2577		/*
2578		 * Possible scenarios:
2579		 * 1. Device wasn't running yet, but disk disappear.
2580		 * 2. Disk was active and disapppear.
2581		 * 3. Disk disappear during synchronization process.
2582		 */
2583		if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) {
2584			/*
2585			 * Previous state should be ACTIVE, STALE or
2586			 * SYNCHRONIZING.
2587			 */
2588			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
2589			    disk->d_state == G_MIRROR_DISK_STATE_STALE ||
2590			    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2591			    ("Wrong disk state (%s, %s).",
2592			    g_mirror_get_diskname(disk),
2593			    g_mirror_disk_state2str(disk->d_state)));
2594		} else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) {
2595			/* Previous state should be NEW. */
2596			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2597			    ("Wrong disk state (%s, %s).",
2598			    g_mirror_get_diskname(disk),
2599			    g_mirror_disk_state2str(disk->d_state)));
2600			/*
2601			 * Reset bumping syncid if disk disappeared in STARTING
2602			 * state.
2603			 */
2604			if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0)
2605				sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
2606#ifdef	INVARIANTS
2607		} else {
2608			KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
2609			    sc->sc_name,
2610			    g_mirror_device_state2str(sc->sc_state),
2611			    g_mirror_get_diskname(disk),
2612			    g_mirror_disk_state2str(disk->d_state)));
2613#endif
2614		}
2615		DISK_STATE_CHANGED();
2616		G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.",
2617		    sc->sc_name, g_mirror_get_diskname(disk));
2618
2619		g_mirror_destroy_disk(disk);
2620		break;
2621	case G_MIRROR_DISK_STATE_DESTROY:
2622	    {
2623		int error;
2624
2625		error = g_mirror_clear_metadata(disk);
2626		if (error != 0)
2627			return (error);
2628		DISK_STATE_CHANGED();
2629		G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.",
2630		    sc->sc_name, g_mirror_get_diskname(disk));
2631
2632		g_mirror_destroy_disk(disk);
2633		sc->sc_ndisks--;
2634		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2635			g_mirror_update_metadata(disk);
2636		}
2637		break;
2638	    }
2639	default:
2640		KASSERT(1 == 0, ("Unknown state (%u).", state));
2641		break;
2642	}
2643	return (0);
2644}
2645#undef	DISK_STATE_CHANGED
2646
2647int
2648g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md)
2649{
2650	struct g_provider *pp;
2651	u_char *buf;
2652	int error;
2653
2654	g_topology_assert();
2655
2656	error = g_access(cp, 1, 0, 0);
2657	if (error != 0)
2658		return (error);
2659	pp = cp->provider;
2660	g_topology_unlock();
2661	/* Metadata are stored on last sector. */
2662	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
2663	    &error);
2664	g_topology_lock();
2665	g_access(cp, -1, 0, 0);
2666	if (buf == NULL) {
2667		G_MIRROR_DEBUG(1, "Cannot read metadata from %s (error=%d).",
2668		    cp->provider->name, error);
2669		return (error);
2670	}
2671
2672	/* Decode metadata. */
2673	error = mirror_metadata_decode(buf, md);
2674	g_free(buf);
2675	if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0)
2676		return (EINVAL);
2677	if (md->md_version > G_MIRROR_VERSION) {
2678		G_MIRROR_DEBUG(0,
2679		    "Kernel module is too old to handle metadata from %s.",
2680		    cp->provider->name);
2681		return (EINVAL);
2682	}
2683	if (error != 0) {
2684		G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
2685		    cp->provider->name);
2686		return (error);
2687	}
2688
2689	return (0);
2690}
2691
2692static int
2693g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp,
2694    struct g_mirror_metadata *md)
2695{
2696
2697	if (g_mirror_id2disk(sc, md->md_did) != NULL) {
2698		G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.",
2699		    pp->name, md->md_did);
2700		return (EEXIST);
2701	}
2702	if (md->md_all != sc->sc_ndisks) {
2703		G_MIRROR_DEBUG(1,
2704		    "Invalid '%s' field on disk %s (device %s), skipping.",
2705		    "md_all", pp->name, sc->sc_name);
2706		return (EINVAL);
2707	}
2708	if (md->md_slice != sc->sc_slice) {
2709		G_MIRROR_DEBUG(1,
2710		    "Invalid '%s' field on disk %s (device %s), skipping.",
2711		    "md_slice", pp->name, sc->sc_name);
2712		return (EINVAL);
2713	}
2714	if (md->md_balance != sc->sc_balance) {
2715		G_MIRROR_DEBUG(1,
2716		    "Invalid '%s' field on disk %s (device %s), skipping.",
2717		    "md_balance", pp->name, sc->sc_name);
2718		return (EINVAL);
2719	}
2720	if (md->md_mediasize != sc->sc_mediasize) {
2721		G_MIRROR_DEBUG(1,
2722		    "Invalid '%s' field on disk %s (device %s), skipping.",
2723		    "md_mediasize", pp->name, sc->sc_name);
2724		return (EINVAL);
2725	}
2726	if (sc->sc_mediasize > pp->mediasize) {
2727		G_MIRROR_DEBUG(1,
2728		    "Invalid size of disk %s (device %s), skipping.", pp->name,
2729		    sc->sc_name);
2730		return (EINVAL);
2731	}
2732	if (md->md_sectorsize != sc->sc_sectorsize) {
2733		G_MIRROR_DEBUG(1,
2734		    "Invalid '%s' field on disk %s (device %s), skipping.",
2735		    "md_sectorsize", pp->name, sc->sc_name);
2736		return (EINVAL);
2737	}
2738	if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
2739		G_MIRROR_DEBUG(1,
2740		    "Invalid sector size of disk %s (device %s), skipping.",
2741		    pp->name, sc->sc_name);
2742		return (EINVAL);
2743	}
2744	if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) {
2745		G_MIRROR_DEBUG(1,
2746		    "Invalid device flags on disk %s (device %s), skipping.",
2747		    pp->name, sc->sc_name);
2748		return (EINVAL);
2749	}
2750	if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) {
2751		G_MIRROR_DEBUG(1,
2752		    "Invalid disk flags on disk %s (device %s), skipping.",
2753		    pp->name, sc->sc_name);
2754		return (EINVAL);
2755	}
2756	return (0);
2757}
2758
2759int
2760g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp,
2761    struct g_mirror_metadata *md)
2762{
2763	struct g_mirror_disk *disk;
2764	int error;
2765
2766	g_topology_assert_not();
2767	G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name);
2768
2769	error = g_mirror_check_metadata(sc, pp, md);
2770	if (error != 0)
2771		return (error);
2772	if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING &&
2773	    md->md_genid < sc->sc_genid) {
2774		G_MIRROR_DEBUG(0, "Component %s (device %s) broken, skipping.",
2775		    pp->name, sc->sc_name);
2776		return (EINVAL);
2777	}
2778	disk = g_mirror_init_disk(sc, pp, md, &error);
2779	if (disk == NULL)
2780		return (error);
2781	error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW,
2782	    G_MIRROR_EVENT_WAIT);
2783	if (error != 0)
2784		return (error);
2785	if (md->md_version < G_MIRROR_VERSION) {
2786		G_MIRROR_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).",
2787		    pp->name, md->md_version, G_MIRROR_VERSION);
2788		g_mirror_update_metadata(disk);
2789	}
2790	return (0);
2791}
2792
2793static void
2794g_mirror_destroy_delayed(void *arg, int flag)
2795{
2796	struct g_mirror_softc *sc;
2797	int error;
2798
2799	if (flag == EV_CANCEL) {
2800		G_MIRROR_DEBUG(1, "Destroying canceled.");
2801		return;
2802	}
2803	sc = arg;
2804	g_topology_unlock();
2805	sx_xlock(&sc->sc_lock);
2806	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) == 0,
2807	    ("DESTROY flag set on %s.", sc->sc_name));
2808	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROYING) != 0,
2809	    ("DESTROYING flag not set on %s.", sc->sc_name));
2810	G_MIRROR_DEBUG(1, "Destroying %s (delayed).", sc->sc_name);
2811	error = g_mirror_destroy(sc, G_MIRROR_DESTROY_SOFT);
2812	if (error != 0) {
2813		G_MIRROR_DEBUG(0, "Cannot destroy %s.", sc->sc_name);
2814		sx_xunlock(&sc->sc_lock);
2815	}
2816	g_topology_lock();
2817}
2818
2819static int
2820g_mirror_access(struct g_provider *pp, int acr, int acw, int ace)
2821{
2822	struct g_mirror_softc *sc;
2823	int dcr, dcw, dce, error = 0;
2824
2825	g_topology_assert();
2826	G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
2827	    acw, ace);
2828
2829	sc = pp->geom->softc;
2830	if (sc == NULL && acr <= 0 && acw <= 0 && ace <= 0)
2831		return (0);
2832	KASSERT(sc != NULL, ("NULL softc (provider=%s).", pp->name));
2833
2834	dcr = pp->acr + acr;
2835	dcw = pp->acw + acw;
2836	dce = pp->ace + ace;
2837
2838	g_topology_unlock();
2839	sx_xlock(&sc->sc_lock);
2840	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0 ||
2841	    LIST_EMPTY(&sc->sc_disks)) {
2842		if (acr > 0 || acw > 0 || ace > 0)
2843			error = ENXIO;
2844		goto end;
2845	}
2846	if (dcw == 0)
2847		g_mirror_idle(sc, dcw);
2848	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROYING) != 0) {
2849		if (acr > 0 || acw > 0 || ace > 0) {
2850			error = ENXIO;
2851			goto end;
2852		}
2853		if (dcr == 0 && dcw == 0 && dce == 0) {
2854			g_post_event(g_mirror_destroy_delayed, sc, M_WAITOK,
2855			    sc, NULL);
2856		}
2857	}
2858end:
2859	sx_xunlock(&sc->sc_lock);
2860	g_topology_lock();
2861	return (error);
2862}
2863
2864static struct g_geom *
2865g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md)
2866{
2867	struct g_mirror_softc *sc;
2868	struct g_geom *gp;
2869	int error, timeout;
2870
2871	g_topology_assert();
2872	G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
2873	    md->md_mid);
2874
2875	/* One disk is minimum. */
2876	if (md->md_all < 1)
2877		return (NULL);
2878	/*
2879	 * Action geom.
2880	 */
2881	gp = g_new_geomf(mp, "%s", md->md_name);
2882	sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO);
2883	gp->start = g_mirror_start;
2884	gp->orphan = g_mirror_orphan;
2885	gp->access = g_mirror_access;
2886	gp->dumpconf = g_mirror_dumpconf;
2887
2888	sc->sc_id = md->md_mid;
2889	sc->sc_slice = md->md_slice;
2890	sc->sc_balance = md->md_balance;
2891	sc->sc_mediasize = md->md_mediasize;
2892	sc->sc_sectorsize = md->md_sectorsize;
2893	sc->sc_ndisks = md->md_all;
2894	sc->sc_flags = md->md_mflags;
2895	sc->sc_bump_id = 0;
2896	sc->sc_idle = 1;
2897	sc->sc_last_write = time_uptime;
2898	sc->sc_writes = 0;
2899	sx_init(&sc->sc_lock, "gmirror:lock");
2900	bioq_init(&sc->sc_queue);
2901	mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
2902	bioq_init(&sc->sc_regular_delayed);
2903	bioq_init(&sc->sc_inflight);
2904	bioq_init(&sc->sc_sync_delayed);
2905	LIST_INIT(&sc->sc_disks);
2906	TAILQ_INIT(&sc->sc_events);
2907	mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF);
2908	callout_init(&sc->sc_callout, CALLOUT_MPSAFE);
2909	mtx_init(&sc->sc_done_mtx, "gmirror:done", NULL, MTX_DEF);
2910	sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING;
2911	gp->softc = sc;
2912	sc->sc_geom = gp;
2913	sc->sc_provider = NULL;
2914	/*
2915	 * Synchronization geom.
2916	 */
2917	gp = g_new_geomf(mp, "%s.sync", md->md_name);
2918	gp->softc = sc;
2919	gp->orphan = g_mirror_orphan;
2920	sc->sc_sync.ds_geom = gp;
2921	sc->sc_sync.ds_ndisks = 0;
2922	error = kproc_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0,
2923	    "g_mirror %s", md->md_name);
2924	if (error != 0) {
2925		G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.",
2926		    sc->sc_name);
2927		g_destroy_geom(sc->sc_sync.ds_geom);
2928		mtx_destroy(&sc->sc_done_mtx);
2929		mtx_destroy(&sc->sc_events_mtx);
2930		mtx_destroy(&sc->sc_queue_mtx);
2931		sx_destroy(&sc->sc_lock);
2932		g_destroy_geom(sc->sc_geom);
2933		free(sc, M_MIRROR);
2934		return (NULL);
2935	}
2936
2937	G_MIRROR_DEBUG(1, "Device %s created (%u components, id=%u).",
2938	    sc->sc_name, sc->sc_ndisks, sc->sc_id);
2939
2940	sc->sc_rootmount = root_mount_hold("GMIRROR");
2941	G_MIRROR_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount);
2942	/*
2943	 * Run timeout.
2944	 */
2945	timeout = g_mirror_timeout * hz;
2946	callout_reset(&sc->sc_callout, timeout, g_mirror_go, sc);
2947	return (sc->sc_geom);
2948}
2949
2950int
2951g_mirror_destroy(struct g_mirror_softc *sc, int how)
2952{
2953	struct g_mirror_disk *disk;
2954	struct g_provider *pp;
2955
2956	g_topology_assert_not();
2957	if (sc == NULL)
2958		return (ENXIO);
2959	sx_assert(&sc->sc_lock, SX_XLOCKED);
2960
2961	pp = sc->sc_provider;
2962	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
2963		switch (how) {
2964		case G_MIRROR_DESTROY_SOFT:
2965			G_MIRROR_DEBUG(1,
2966			    "Device %s is still open (r%dw%de%d).", pp->name,
2967			    pp->acr, pp->acw, pp->ace);
2968			return (EBUSY);
2969		case G_MIRROR_DESTROY_DELAYED:
2970			G_MIRROR_DEBUG(1,
2971			    "Device %s will be destroyed on last close.",
2972			    pp->name);
2973			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2974				if (disk->d_state ==
2975				    G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2976					g_mirror_sync_stop(disk, 1);
2977				}
2978			}
2979			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROYING;
2980			return (EBUSY);
2981		case G_MIRROR_DESTROY_HARD:
2982			G_MIRROR_DEBUG(1, "Device %s is still open, so it "
2983			    "can't be definitely removed.", pp->name);
2984		}
2985	}
2986
2987	g_topology_lock();
2988	if (sc->sc_geom->softc == NULL) {
2989		g_topology_unlock();
2990		return (0);
2991	}
2992	sc->sc_geom->softc = NULL;
2993	sc->sc_sync.ds_geom->softc = NULL;
2994	g_topology_unlock();
2995
2996	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2997	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_WAIT;
2998	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
2999	sx_xunlock(&sc->sc_lock);
3000	mtx_lock(&sc->sc_queue_mtx);
3001	wakeup(sc);
3002	mtx_unlock(&sc->sc_queue_mtx);
3003	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
3004	while (sc->sc_worker != NULL)
3005		tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5);
3006	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
3007	sx_xlock(&sc->sc_lock);
3008	g_mirror_destroy_device(sc);
3009	free(sc, M_MIRROR);
3010	return (0);
3011}
3012
3013static void
3014g_mirror_taste_orphan(struct g_consumer *cp)
3015{
3016
3017	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
3018	    cp->provider->name));
3019}
3020
3021static struct g_geom *
3022g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
3023{
3024	struct g_mirror_metadata md;
3025	struct g_mirror_softc *sc;
3026	struct g_consumer *cp;
3027	struct g_geom *gp;
3028	int error;
3029
3030	g_topology_assert();
3031	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
3032	G_MIRROR_DEBUG(2, "Tasting %s.", pp->name);
3033
3034	gp = g_new_geomf(mp, "mirror:taste");
3035	/*
3036	 * This orphan function should be never called.
3037	 */
3038	gp->orphan = g_mirror_taste_orphan;
3039	cp = g_new_consumer(gp);
3040	g_attach(cp, pp);
3041	error = g_mirror_read_metadata(cp, &md);
3042	g_detach(cp);
3043	g_destroy_consumer(cp);
3044	g_destroy_geom(gp);
3045	if (error != 0)
3046		return (NULL);
3047	gp = NULL;
3048
3049	if (md.md_provider[0] != '\0' &&
3050	    !g_compare_names(md.md_provider, pp->name))
3051		return (NULL);
3052	if (md.md_provsize != 0 && md.md_provsize != pp->mediasize)
3053		return (NULL);
3054	if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) {
3055		G_MIRROR_DEBUG(0,
3056		    "Device %s: provider %s marked as inactive, skipping.",
3057		    md.md_name, pp->name);
3058		return (NULL);
3059	}
3060	if (g_mirror_debug >= 2)
3061		mirror_metadata_dump(&md);
3062
3063	/*
3064	 * Let's check if device already exists.
3065	 */
3066	sc = NULL;
3067	LIST_FOREACH(gp, &mp->geom, geom) {
3068		sc = gp->softc;
3069		if (sc == NULL)
3070			continue;
3071		if (sc->sc_sync.ds_geom == gp)
3072			continue;
3073		if (strcmp(md.md_name, sc->sc_name) != 0)
3074			continue;
3075		if (md.md_mid != sc->sc_id) {
3076			G_MIRROR_DEBUG(0, "Device %s already configured.",
3077			    sc->sc_name);
3078			return (NULL);
3079		}
3080		break;
3081	}
3082	if (gp == NULL) {
3083		gp = g_mirror_create(mp, &md);
3084		if (gp == NULL) {
3085			G_MIRROR_DEBUG(0, "Cannot create device %s.",
3086			    md.md_name);
3087			return (NULL);
3088		}
3089		sc = gp->softc;
3090	}
3091	G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
3092	g_topology_unlock();
3093	sx_xlock(&sc->sc_lock);
3094	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_TASTING;
3095	error = g_mirror_add_disk(sc, pp, &md);
3096	if (error != 0) {
3097		G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
3098		    pp->name, gp->name, error);
3099		if (LIST_EMPTY(&sc->sc_disks)) {
3100			g_cancel_event(sc);
3101			g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3102			g_topology_lock();
3103			return (NULL);
3104		}
3105		gp = NULL;
3106	}
3107	sc->sc_flags &= ~G_MIRROR_DEVICE_FLAG_TASTING;
3108	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
3109		g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3110		g_topology_lock();
3111		return (NULL);
3112	}
3113	sx_xunlock(&sc->sc_lock);
3114	g_topology_lock();
3115	return (gp);
3116}
3117
3118static int
3119g_mirror_destroy_geom(struct gctl_req *req __unused,
3120    struct g_class *mp __unused, struct g_geom *gp)
3121{
3122	struct g_mirror_softc *sc;
3123	int error;
3124
3125	g_topology_unlock();
3126	sc = gp->softc;
3127	sx_xlock(&sc->sc_lock);
3128	g_cancel_event(sc);
3129	error = g_mirror_destroy(gp->softc, G_MIRROR_DESTROY_SOFT);
3130	if (error != 0)
3131		sx_xunlock(&sc->sc_lock);
3132	g_topology_lock();
3133	return (error);
3134}
3135
3136static void
3137g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
3138    struct g_consumer *cp, struct g_provider *pp)
3139{
3140	struct g_mirror_softc *sc;
3141
3142	g_topology_assert();
3143
3144	sc = gp->softc;
3145	if (sc == NULL)
3146		return;
3147	/* Skip synchronization geom. */
3148	if (gp == sc->sc_sync.ds_geom)
3149		return;
3150	if (pp != NULL) {
3151		/* Nothing here. */
3152	} else if (cp != NULL) {
3153		struct g_mirror_disk *disk;
3154
3155		disk = cp->private;
3156		if (disk == NULL)
3157			return;
3158		g_topology_unlock();
3159		sx_xlock(&sc->sc_lock);
3160		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id);
3161		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
3162			sbuf_printf(sb, "%s<Synchronized>", indent);
3163			if (disk->d_sync.ds_offset == 0)
3164				sbuf_printf(sb, "0%%");
3165			else {
3166				sbuf_printf(sb, "%u%%",
3167				    (u_int)((disk->d_sync.ds_offset * 100) /
3168				    sc->sc_provider->mediasize));
3169			}
3170			sbuf_printf(sb, "</Synchronized>\n");
3171			if (disk->d_sync.ds_offset > 0) {
3172				sbuf_printf(sb, "%s<BytesSynced>%jd"
3173				    "</BytesSynced>\n", indent,
3174				    (intmax_t)disk->d_sync.ds_offset);
3175			}
3176		}
3177		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
3178		    disk->d_sync.ds_syncid);
3179		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent,
3180		    disk->d_genid);
3181		sbuf_printf(sb, "%s<Flags>", indent);
3182		if (disk->d_flags == 0)
3183			sbuf_printf(sb, "NONE");
3184		else {
3185			int first = 1;
3186
3187#define	ADD_FLAG(flag, name)	do {					\
3188	if ((disk->d_flags & (flag)) != 0) {				\
3189		if (!first)						\
3190			sbuf_printf(sb, ", ");				\
3191		else							\
3192			first = 0;					\
3193		sbuf_printf(sb, name);					\
3194	}								\
3195} while (0)
3196			ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY");
3197			ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED");
3198			ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE");
3199			ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING,
3200			    "SYNCHRONIZING");
3201			ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
3202			ADD_FLAG(G_MIRROR_DISK_FLAG_BROKEN, "BROKEN");
3203#undef	ADD_FLAG
3204		}
3205		sbuf_printf(sb, "</Flags>\n");
3206		sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent,
3207		    disk->d_priority);
3208		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
3209		    g_mirror_disk_state2str(disk->d_state));
3210		sx_xunlock(&sc->sc_lock);
3211		g_topology_lock();
3212	} else {
3213		g_topology_unlock();
3214		sx_xlock(&sc->sc_lock);
3215		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
3216		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
3217		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid);
3218		sbuf_printf(sb, "%s<Flags>", indent);
3219		if (sc->sc_flags == 0)
3220			sbuf_printf(sb, "NONE");
3221		else {
3222			int first = 1;
3223
3224#define	ADD_FLAG(flag, name)	do {					\
3225	if ((sc->sc_flags & (flag)) != 0) {				\
3226		if (!first)						\
3227			sbuf_printf(sb, ", ");				\
3228		else							\
3229			first = 0;					\
3230		sbuf_printf(sb, name);					\
3231	}								\
3232} while (0)
3233			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOFAILSYNC, "NOFAILSYNC");
3234			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
3235#undef	ADD_FLAG
3236		}
3237		sbuf_printf(sb, "</Flags>\n");
3238		sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent,
3239		    (u_int)sc->sc_slice);
3240		sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent,
3241		    balance_name(sc->sc_balance));
3242		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
3243		    sc->sc_ndisks);
3244		sbuf_printf(sb, "%s<State>", indent);
3245		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
3246			sbuf_printf(sb, "%s", "STARTING");
3247		else if (sc->sc_ndisks ==
3248		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE))
3249			sbuf_printf(sb, "%s", "COMPLETE");
3250		else
3251			sbuf_printf(sb, "%s", "DEGRADED");
3252		sbuf_printf(sb, "</State>\n");
3253		sx_xunlock(&sc->sc_lock);
3254		g_topology_lock();
3255	}
3256}
3257
3258static void
3259g_mirror_shutdown_post_sync(void *arg, int howto)
3260{
3261	struct g_class *mp;
3262	struct g_geom *gp, *gp2;
3263	struct g_mirror_softc *sc;
3264	int error;
3265
3266	mp = arg;
3267	DROP_GIANT();
3268	g_topology_lock();
3269	g_mirror_shutdown = 1;
3270	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
3271		if ((sc = gp->softc) == NULL)
3272			continue;
3273		/* Skip synchronization geom. */
3274		if (gp == sc->sc_sync.ds_geom)
3275			continue;
3276		g_topology_unlock();
3277		sx_xlock(&sc->sc_lock);
3278		g_mirror_idle(sc, -1);
3279		g_cancel_event(sc);
3280		error = g_mirror_destroy(sc, G_MIRROR_DESTROY_DELAYED);
3281		if (error != 0)
3282			sx_xunlock(&sc->sc_lock);
3283		g_topology_lock();
3284	}
3285	g_topology_unlock();
3286	PICKUP_GIANT();
3287}
3288
3289static void
3290g_mirror_init(struct g_class *mp)
3291{
3292
3293	g_mirror_post_sync = EVENTHANDLER_REGISTER(shutdown_post_sync,
3294	    g_mirror_shutdown_post_sync, mp, SHUTDOWN_PRI_FIRST);
3295	if (g_mirror_post_sync == NULL)
3296		G_MIRROR_DEBUG(0, "Warning! Cannot register shutdown event.");
3297}
3298
3299static void
3300g_mirror_fini(struct g_class *mp)
3301{
3302
3303	if (g_mirror_post_sync != NULL)
3304		EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_mirror_post_sync);
3305}
3306
3307DECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
3308