geom_vinum_drive.c revision 152971
1/*-
2 * Copyright (c) 2004, 2005 Lukas Ertl
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/geom/vinum/geom_vinum_drive.c 152971 2005-11-30 22:15:00Z sobomax $");
29
30#include <sys/param.h>
31#include <sys/bio.h>
32#include <sys/errno.h>
33#include <sys/conf.h>
34#include <sys/kernel.h>
35#include <sys/kthread.h>
36#include <sys/libkern.h>
37#include <sys/lock.h>
38#include <sys/malloc.h>
39#include <sys/module.h>
40#include <sys/mutex.h>
41#include <sys/sbuf.h>
42#include <sys/systm.h>
43#include <sys/time.h>
44
45#include <geom/geom.h>
46#include <geom/vinum/geom_vinum_var.h>
47#include <geom/vinum/geom_vinum.h>
48#include <geom/vinum/geom_vinum_share.h>
49
50static void	gv_drive_dead(void *, int);
51static void	gv_drive_worker(void *);
52
53void
54gv_config_new_drive(struct gv_drive *d)
55{
56	struct gv_hdr *vhdr;
57	struct gv_freelist *fl;
58
59	KASSERT(d != NULL, ("config_new_drive: NULL d"));
60
61	vhdr = g_malloc(sizeof(*vhdr), M_WAITOK | M_ZERO);
62	vhdr->magic = GV_MAGIC;
63	vhdr->config_length = GV_CFG_LEN;
64
65	bcopy(hostname, vhdr->label.sysname, GV_HOSTNAME_LEN);
66	strncpy(vhdr->label.name, d->name, GV_MAXDRIVENAME);
67	microtime(&vhdr->label.date_of_birth);
68
69	d->hdr = vhdr;
70
71	LIST_INIT(&d->subdisks);
72	LIST_INIT(&d->freelist);
73
74	fl = g_malloc(sizeof(struct gv_freelist), M_WAITOK | M_ZERO);
75	fl->offset = GV_DATA_START;
76	fl->size = d->avail;
77	LIST_INSERT_HEAD(&d->freelist, fl, freelist);
78	d->freelist_entries = 1;
79
80	TAILQ_INIT(&d->bqueue);
81	mtx_init(&d->bqueue_mtx, "gv_drive", NULL, MTX_DEF);
82	kthread_create(gv_drive_worker, d, NULL, 0, 0, "gv_d %s", d->name);
83	d->flags |= GV_DRIVE_THREAD_ACTIVE;
84}
85
86void
87gv_save_config_all(struct gv_softc *sc)
88{
89	struct gv_drive *d;
90
91	g_topology_assert();
92
93	LIST_FOREACH(d, &sc->drives, drive) {
94		if (d->geom == NULL)
95			continue;
96		gv_save_config(NULL, d, sc);
97	}
98}
99
100/* Save the vinum configuration back to disk. */
101void
102gv_save_config(struct g_consumer *cp, struct gv_drive *d, struct gv_softc *sc)
103{
104	struct g_geom *gp;
105	struct g_consumer *cp2;
106	struct gv_hdr *vhdr, *hdr;
107	struct sbuf *sb;
108	int error;
109
110	g_topology_assert();
111
112	KASSERT(d != NULL, ("gv_save_config: null d"));
113	KASSERT(sc != NULL, ("gv_save_config: null sc"));
114
115	/*
116	 * We can't save the config on a drive that isn't up, but drives that
117	 * were just created aren't officially up yet, so we check a special
118	 * flag.
119	 */
120	if ((d->state != GV_DRIVE_UP) && !(d->flags && GV_DRIVE_NEWBORN))
121		return;
122
123	if (cp == NULL) {
124		gp = d->geom;
125		KASSERT(gp != NULL, ("gv_save_config: null gp"));
126		cp2 = LIST_FIRST(&gp->consumer);
127		KASSERT(cp2 != NULL, ("gv_save_config: null cp2"));
128	} else
129		cp2 = cp;
130
131	vhdr = g_malloc(GV_HDR_LEN, M_WAITOK | M_ZERO);
132	vhdr->magic = GV_MAGIC;
133	vhdr->config_length = GV_CFG_LEN;
134
135	hdr = d->hdr;
136	if (hdr == NULL) {
137		printf("GEOM_VINUM: drive %s has NULL hdr\n", d->name);
138		g_free(vhdr);
139		return;
140	}
141	microtime(&hdr->label.last_update);
142	bcopy(&hdr->label, &vhdr->label, sizeof(struct gv_label));
143
144	sb = sbuf_new(NULL, NULL, GV_CFG_LEN, SBUF_FIXEDLEN);
145	gv_format_config(sc, sb, 1, NULL);
146	sbuf_finish(sb);
147
148	error = g_access(cp2, 0, 1, 0);
149	if (error) {
150		printf("GEOM_VINUM: g_access failed on drive %s, errno %d\n",
151		    d->name, error);
152		sbuf_delete(sb);
153		g_free(vhdr);
154		return;
155	}
156	g_topology_unlock();
157
158	do {
159		error = g_write_data(cp2, GV_HDR_OFFSET, vhdr, GV_HDR_LEN);
160		if (error) {
161			printf("GEOM_VINUM: writing vhdr failed on drive %s, "
162			    "errno %d", d->name, error);
163			break;
164		}
165
166		error = g_write_data(cp2, GV_CFG_OFFSET, sbuf_data(sb),
167		    GV_CFG_LEN);
168		if (error) {
169			printf("GEOM_VINUM: writing first config copy failed "
170			    "on drive %s, errno %d", d->name, error);
171			break;
172		}
173
174		error = g_write_data(cp2, GV_CFG_OFFSET + GV_CFG_LEN,
175		    sbuf_data(sb), GV_CFG_LEN);
176		if (error)
177			printf("GEOM_VINUM: writing second config copy failed "
178			    "on drive %s, errno %d", d->name, error);
179	} while (0);
180
181	g_topology_lock();
182	g_access(cp2, 0, -1, 0);
183	sbuf_delete(sb);
184	g_free(vhdr);
185
186	if (d->geom != NULL)
187		gv_drive_modify(d);
188}
189
190/* This resembles g_slice_access(). */
191static int
192gv_drive_access(struct g_provider *pp, int dr, int dw, int de)
193{
194	struct g_geom *gp;
195	struct g_consumer *cp;
196	struct g_provider *pp2;
197	struct gv_drive *d;
198	struct gv_sd *s, *s2;
199	int error;
200
201	gp = pp->geom;
202	cp = LIST_FIRST(&gp->consumer);
203	if (cp == NULL)
204		return (0);
205
206	d = gp->softc;
207	if (d == NULL)
208		return (0);
209
210	s = pp->private;
211	KASSERT(s != NULL, ("gv_drive_access: NULL s"));
212
213	LIST_FOREACH(s2, &d->subdisks, from_drive) {
214		if (s == s2)
215			continue;
216		if (s->drive_offset + s->size <= s2->drive_offset)
217			continue;
218		if (s2->drive_offset + s2->size <= s->drive_offset)
219			continue;
220
221		/* Overlap. */
222		pp2 = s2->provider;
223		KASSERT(s2 != NULL, ("gv_drive_access: NULL s2"));
224		if ((pp->acw + dw) > 0 && pp2->ace > 0)
225			return (EPERM);
226		if ((pp->ace + de) > 0 && pp2->acw > 0)
227			return (EPERM);
228	}
229
230	error = g_access(cp, dr, dw, de);
231	return (error);
232}
233
234static void
235gv_drive_done(struct bio *bp)
236{
237	struct gv_drive *d;
238	struct gv_bioq *bq;
239
240	/* Put the BIO on the worker queue again. */
241	d = bp->bio_from->geom->softc;
242	bp->bio_cflags |= GV_BIO_DONE;
243	bq = g_malloc(sizeof(*bq), M_NOWAIT | M_ZERO);
244	bq->bp = bp;
245	mtx_lock(&d->bqueue_mtx);
246	TAILQ_INSERT_TAIL(&d->bqueue, bq, queue);
247	wakeup(d);
248	mtx_unlock(&d->bqueue_mtx);
249}
250
251
252static void
253gv_drive_start(struct bio *bp)
254{
255	struct gv_drive *d;
256	struct gv_sd *s;
257	struct gv_bioq *bq;
258
259	switch (bp->bio_cmd) {
260	case BIO_READ:
261	case BIO_WRITE:
262	case BIO_DELETE:
263		break;
264	case BIO_GETATTR:
265	default:
266		g_io_deliver(bp, EOPNOTSUPP);
267		return;
268	}
269
270	s = bp->bio_to->private;
271	if ((s->state == GV_SD_DOWN) || (s->state == GV_SD_STALE)) {
272		g_io_deliver(bp, ENXIO);
273		return;
274	}
275
276	d = bp->bio_to->geom->softc;
277
278	/*
279	 * Put the BIO on the worker queue, where the worker thread will pick
280	 * it up.
281	 */
282	bq = g_malloc(sizeof(*bq), M_NOWAIT | M_ZERO);
283	bq->bp = bp;
284	mtx_lock(&d->bqueue_mtx);
285	TAILQ_INSERT_TAIL(&d->bqueue, bq, queue);
286	wakeup(d);
287	mtx_unlock(&d->bqueue_mtx);
288
289}
290
291static void
292gv_drive_worker(void *arg)
293{
294	struct bio *bp, *cbp;
295	struct g_geom *gp;
296	struct g_provider *pp;
297	struct gv_drive *d;
298	struct gv_sd *s;
299	struct gv_bioq *bq, *bq2;
300	int error;
301
302	d = arg;
303
304	mtx_lock(&d->bqueue_mtx);
305	for (;;) {
306		/* We were signaled to exit. */
307		if (d->flags & GV_DRIVE_THREAD_DIE)
308			break;
309
310		/* Take the first BIO from out queue. */
311		bq = TAILQ_FIRST(&d->bqueue);
312		if (bq == NULL) {
313			msleep(d, &d->bqueue_mtx, PRIBIO, "-", hz/10);
314			continue;
315 		}
316		TAILQ_REMOVE(&d->bqueue, bq, queue);
317		mtx_unlock(&d->bqueue_mtx);
318
319		bp = bq->bp;
320		g_free(bq);
321		pp = bp->bio_to;
322		gp = pp->geom;
323
324		/* Completed request. */
325		if (bp->bio_cflags & GV_BIO_DONE) {
326			error = bp->bio_error;
327
328			/* Deliver the original request. */
329			g_std_done(bp);
330
331			/* The request had an error, we need to clean up. */
332			if (error != 0) {
333				g_topology_lock();
334				gv_set_drive_state(d, GV_DRIVE_DOWN,
335				    GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG);
336				g_topology_unlock();
337				g_post_event(gv_drive_dead, d, M_WAITOK, d,
338				    NULL);
339			}
340
341		/* New request, needs to be sent downwards. */
342		} else {
343			s = pp->private;
344
345			if ((s->state == GV_SD_DOWN) ||
346			    (s->state == GV_SD_STALE)) {
347				g_io_deliver(bp, ENXIO);
348				mtx_lock(&d->bqueue_mtx);
349				continue;
350			}
351			if (bp->bio_offset > s->size) {
352				g_io_deliver(bp, EINVAL);
353				mtx_lock(&d->bqueue_mtx);
354				continue;
355			}
356
357			cbp = g_clone_bio(bp);
358			if (cbp == NULL) {
359				g_io_deliver(bp, ENOMEM);
360				mtx_lock(&d->bqueue_mtx);
361				continue;
362			}
363			if (cbp->bio_offset + cbp->bio_length > s->size)
364				cbp->bio_length = s->size -
365				    cbp->bio_offset;
366			cbp->bio_done = gv_drive_done;
367			cbp->bio_offset += s->drive_offset;
368			g_io_request(cbp, LIST_FIRST(&gp->consumer));
369		}
370
371		mtx_lock(&d->bqueue_mtx);
372	}
373
374	TAILQ_FOREACH_SAFE(bq, &d->bqueue, queue, bq2) {
375		TAILQ_REMOVE(&d->bqueue, bq, queue);
376		mtx_unlock(&d->bqueue_mtx);
377		bp = bq->bp;
378		g_free(bq);
379		if (bp->bio_cflags & GV_BIO_DONE)
380			g_std_done(bp);
381		else
382			g_io_deliver(bp, ENXIO);
383		mtx_lock(&d->bqueue_mtx);
384	}
385	mtx_unlock(&d->bqueue_mtx);
386	d->flags |= GV_DRIVE_THREAD_DEAD;
387
388	kthread_exit(ENXIO);
389}
390
391
392static void
393gv_drive_orphan(struct g_consumer *cp)
394{
395	struct g_geom *gp;
396	struct gv_drive *d;
397
398	g_topology_assert();
399	gp = cp->geom;
400	g_trace(G_T_TOPOLOGY, "gv_drive_orphan(%s)", gp->name);
401	d = gp->softc;
402	if (d != NULL) {
403		gv_set_drive_state(d, GV_DRIVE_DOWN,
404		    GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG);
405		g_post_event(gv_drive_dead, d, M_WAITOK, d, NULL);
406	} else
407		g_wither_geom(gp, ENXIO);
408}
409
410static struct g_geom *
411gv_drive_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
412{
413	struct g_geom *gp, *gp2;
414	struct g_consumer *cp;
415	struct gv_drive *d;
416	struct gv_sd *s;
417	struct gv_softc *sc;
418	struct gv_freelist *fl;
419	struct gv_hdr *vhdr;
420	int error;
421	char *buf, errstr[ERRBUFSIZ];
422
423	vhdr = NULL;
424	d = NULL;
425
426	g_trace(G_T_TOPOLOGY, "gv_drive_taste(%s, %s)", mp->name, pp->name);
427	g_topology_assert();
428
429	/* Find the VINUM class and its associated geom. */
430	gp2 = find_vinum_geom();
431	if (gp2 == NULL)
432		return (NULL);
433	sc = gp2->softc;
434
435	gp = g_new_geomf(mp, "%s.vinumdrive", pp->name);
436	gp->start = gv_drive_start;
437	gp->orphan = gv_drive_orphan;
438	gp->access = gv_drive_access;
439	gp->start = gv_drive_start;
440
441	cp = g_new_consumer(gp);
442	g_attach(cp, pp);
443	error = g_access(cp, 1, 0, 0);
444	if (error) {
445		g_detach(cp);
446		g_destroy_consumer(cp);
447		g_destroy_geom(gp);
448		return (NULL);
449	}
450
451	g_topology_unlock();
452
453	/* Now check if the provided slice is a valid vinum drive. */
454	do {
455		vhdr = g_read_data(cp, GV_HDR_OFFSET, pp->sectorsize, NULL);
456		if (vhdr == NULL)
457			break;
458		if (vhdr->magic != GV_MAGIC) {
459			g_free(vhdr);
460			break;
461		}
462
463		/* A valid vinum drive, let's parse the on-disk information. */
464		buf = g_read_data(cp, GV_CFG_OFFSET, GV_CFG_LEN, NULL);
465		if (buf == NULL) {
466			g_free(vhdr);
467			break;
468		}
469		g_topology_lock();
470		gv_parse_config(sc, buf, 1);
471		g_free(buf);
472
473		/*
474		 * Let's see if this drive is already known in the
475		 * configuration.
476		 */
477		d = gv_find_drive(sc, vhdr->label.name);
478
479		/* We already know about this drive. */
480		if (d != NULL) {
481			/* Check if this drive already has a geom. */
482			if (d->geom != NULL) {
483				g_topology_unlock();
484				break;
485			}
486			bcopy(vhdr, d->hdr, sizeof(*vhdr));
487
488		/* This is a new drive. */
489		} else {
490			d = g_malloc(sizeof(*d), M_WAITOK | M_ZERO);
491
492			/* Initialize all needed variables. */
493			d->size = pp->mediasize - GV_DATA_START;
494			d->avail = d->size;
495			d->hdr = vhdr;
496			strncpy(d->name, vhdr->label.name, GV_MAXDRIVENAME);
497			LIST_INIT(&d->subdisks);
498			LIST_INIT(&d->freelist);
499
500			/* We also need a freelist entry. */
501			fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
502			fl->offset = GV_DATA_START;
503			fl->size = d->avail;
504			LIST_INSERT_HEAD(&d->freelist, fl, freelist);
505			d->freelist_entries = 1;
506
507			TAILQ_INIT(&d->bqueue);
508
509			/* Save it into the main configuration. */
510			LIST_INSERT_HEAD(&sc->drives, d, drive);
511		}
512
513		/*
514		 * Create a bio queue mutex and a worker thread, if necessary.
515		 */
516		if (mtx_initialized(&d->bqueue_mtx) == 0)
517			mtx_init(&d->bqueue_mtx, "gv_drive", NULL, MTX_DEF);
518
519		if (!(d->flags & GV_DRIVE_THREAD_ACTIVE)) {
520			kthread_create(gv_drive_worker, d, NULL, 0, 0,
521			    "gv_d %s", d->name);
522			d->flags |= GV_DRIVE_THREAD_ACTIVE;
523		}
524
525		g_access(cp, -1, 0, 0);
526
527		gp->softc = d;
528		d->geom = gp;
529		d->vinumconf = sc;
530		strncpy(d->device, pp->name, GV_MAXDRIVENAME);
531
532		/*
533		 * Find out which subdisks belong to this drive and crosslink
534		 * them.
535		 */
536		LIST_FOREACH(s, &sc->subdisks, sd) {
537			if (!strncmp(s->drive, d->name, GV_MAXDRIVENAME))
538				/* XXX: errors ignored */
539				gv_sd_to_drive(sc, d, s, errstr,
540				    sizeof(errstr));
541		}
542
543		/* This drive is now up for sure. */
544		gv_set_drive_state(d, GV_DRIVE_UP, 0);
545
546		/*
547		 * If there are subdisks on this drive, we need to create
548		 * providers for them.
549		 */
550		if (d->sdcount)
551			gv_drive_modify(d);
552
553		return (gp);
554
555	} while (0);
556
557	g_topology_lock();
558	g_access(cp, -1, 0, 0);
559
560	g_detach(cp);
561	g_destroy_consumer(cp);
562	g_destroy_geom(gp);
563	return (NULL);
564}
565
566/*
567 * Modify the providers for the given drive 'd'.  It is assumed that the
568 * subdisk list of 'd' is already correctly set up.
569 */
570void
571gv_drive_modify(struct gv_drive *d)
572{
573	struct g_geom *gp;
574	struct g_consumer *cp;
575	struct g_provider *pp, *pp2;
576	struct gv_sd *s;
577
578	KASSERT(d != NULL, ("gv_drive_modify: null d"));
579	gp = d->geom;
580	KASSERT(gp != NULL, ("gv_drive_modify: null gp"));
581	cp = LIST_FIRST(&gp->consumer);
582	KASSERT(cp != NULL, ("gv_drive_modify: null cp"));
583	pp = cp->provider;
584	KASSERT(pp != NULL, ("gv_drive_modify: null pp"));
585
586	g_topology_assert();
587
588	LIST_FOREACH(s, &d->subdisks, from_drive) {
589		/* This subdisk already has a provider. */
590		if (s->provider != NULL)
591			continue;
592		pp2 = g_new_providerf(gp, "gvinum/sd/%s", s->name);
593		pp2->mediasize = s->size;
594		pp2->sectorsize = pp->sectorsize;
595		g_error_provider(pp2, 0);
596		s->provider = pp2;
597		pp2->private = s;
598	}
599}
600
601static void
602gv_drive_dead(void *arg, int flag)
603{
604	struct g_geom *gp;
605	struct g_consumer *cp;
606	struct gv_drive *d;
607	struct gv_sd *s;
608
609	g_topology_assert();
610	KASSERT(arg != NULL, ("gv_drive_dead: NULL arg"));
611
612	if (flag == EV_CANCEL)
613		return;
614
615	d = arg;
616	if (d->state != GV_DRIVE_DOWN)
617		return;
618
619	g_trace(G_T_TOPOLOGY, "gv_drive_dead(%s)", d->name);
620
621	gp = d->geom;
622	if (gp == NULL)
623		return;
624
625	LIST_FOREACH(cp, &gp->consumer, consumer) {
626		if (cp->nstart != cp->nend) {
627			printf("GEOM_VINUM: dead drive '%s' has still "
628			    "active requests, can't detach consumer\n",
629			    d->name);
630			g_post_event(gv_drive_dead, d, M_WAITOK, d,
631			    NULL);
632			return;
633		}
634		if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0)
635			g_access(cp, -cp->acr, -cp->acw, -cp->ace);
636	}
637
638	printf("GEOM_VINUM: lost drive '%s'\n", d->name);
639	d->geom = NULL;
640	LIST_FOREACH(s, &d->subdisks, from_drive) {
641		s->provider = NULL;
642		s->consumer = NULL;
643	}
644	gv_kill_drive_thread(d);
645	gp->softc = NULL;
646	g_wither_geom(gp, ENXIO);
647}
648
649static int
650gv_drive_destroy_geom(struct gctl_req *req, struct g_class *mp,
651    struct g_geom *gp)
652{
653	struct gv_drive *d;
654
655	g_trace(G_T_TOPOLOGY, "gv_drive_destroy_geom: %s", gp->name);
656	g_topology_assert();
657
658	d = gp->softc;
659	gv_kill_drive_thread(d);
660
661	g_wither_geom(gp, ENXIO);
662	return (0);
663}
664
665#define	VINUMDRIVE_CLASS_NAME "VINUMDRIVE"
666
667static struct g_class g_vinum_drive_class = {
668	.name = VINUMDRIVE_CLASS_NAME,
669	.version = G_VERSION,
670	.taste = gv_drive_taste,
671	.destroy_geom = gv_drive_destroy_geom
672};
673
674DECLARE_GEOM_CLASS(g_vinum_drive_class, g_vinum_drive);
675