geom_vinum_drive.c revision 135173
1/*-
2 * Copyright (c) 2004 Lukas Ertl
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/geom/vinum/geom_vinum_drive.c 135173 2004-09-13 21:01:36Z le $");
29
30#include <sys/param.h>
31#include <sys/bio.h>
32#include <sys/errno.h>
33#include <sys/conf.h>
34#include <sys/kernel.h>
35#include <sys/kthread.h>
36#include <sys/libkern.h>
37#include <sys/lock.h>
38#include <sys/malloc.h>
39#include <sys/module.h>
40#include <sys/mutex.h>
41#include <sys/sbuf.h>
42#include <sys/systm.h>
43#include <sys/time.h>
44
45#include <geom/geom.h>
46#include <geom/vinum/geom_vinum_var.h>
47#include <geom/vinum/geom_vinum.h>
48#include <geom/vinum/geom_vinum_share.h>
49
50static void	gv_drive_worker(void *);
51void	gv_drive_modify(struct gv_drive *);
52
53void
54gv_config_new_drive(struct gv_drive *d)
55{
56	struct gv_hdr *vhdr;
57	struct gv_freelist *fl;
58
59	KASSERT(d != NULL, ("config_new_drive: NULL d"));
60
61	vhdr = g_malloc(sizeof(*vhdr), M_WAITOK | M_ZERO);
62	vhdr->magic = GV_MAGIC;
63	vhdr->config_length = GV_CFG_LEN;
64
65	bcopy(hostname, vhdr->label.sysname, GV_HOSTNAME_LEN);
66	strncpy(vhdr->label.name, d->name, GV_MAXDRIVENAME);
67	microtime(&vhdr->label.date_of_birth);
68
69	d->hdr = vhdr;
70
71	LIST_INIT(&d->subdisks);
72	LIST_INIT(&d->freelist);
73
74	fl = g_malloc(sizeof(struct gv_freelist), M_WAITOK | M_ZERO);
75	fl->offset = GV_DATA_START;
76	fl->size = d->avail;
77	LIST_INSERT_HEAD(&d->freelist, fl, freelist);
78	d->freelist_entries = 1;
79
80	TAILQ_INIT(&d->bqueue);
81	mtx_init(&d->bqueue_mtx, "gv_drive", NULL, MTX_DEF);
82	kthread_create(gv_drive_worker, d, NULL, 0, 0, "gv_d %s", d->name);
83	d->flags |= GV_DRIVE_THREAD_ACTIVE;
84}
85
86void
87gv_save_config_all(struct gv_softc *sc)
88{
89	struct gv_drive *d;
90
91	g_topology_assert();
92
93	LIST_FOREACH(d, &sc->drives, drive) {
94		if (d->geom == NULL)
95			continue;
96		gv_save_config(NULL, d, sc);
97	}
98}
99
100/* Save the vinum configuration back to disk. */
101void
102gv_save_config(struct g_consumer *cp, struct gv_drive *d, struct gv_softc *sc)
103{
104	struct g_geom *gp;
105	struct g_consumer *cp2;
106	struct gv_hdr *vhdr, *hdr;
107	struct sbuf *sb;
108	int error;
109
110	g_topology_assert();
111
112	KASSERT(d != NULL, ("gv_save_config: null d"));
113	KASSERT(sc != NULL, ("gv_save_config: null sc"));
114
115	if (cp == NULL) {
116		gp = d->geom;
117		KASSERT(gp != NULL, ("gv_save_config: null gp"));
118		cp2 = LIST_FIRST(&gp->consumer);
119		KASSERT(cp2 != NULL, ("gv_save_config: null cp2"));
120	} else
121		cp2 = cp;
122
123	vhdr = g_malloc(GV_HDR_LEN, M_WAITOK | M_ZERO);
124	vhdr->magic = GV_MAGIC;
125	vhdr->config_length = GV_CFG_LEN;
126
127	hdr = d->hdr;
128	if (hdr == NULL) {
129		printf("NULL hdr!!!\n");
130		g_free(vhdr);
131		return;
132	}
133	microtime(&hdr->label.last_update);
134	bcopy(&hdr->label, &vhdr->label, sizeof(struct gv_label));
135
136	sb = sbuf_new(NULL, NULL, GV_CFG_LEN, SBUF_FIXEDLEN);
137	gv_format_config(sc, sb, 1, NULL);
138	sbuf_finish(sb);
139
140	error = g_access(cp2, 0, 1, 0);
141	if (error) {
142		printf("g_access failed: %d\n", error);
143		sbuf_delete(sb);
144		return;
145	}
146	g_topology_unlock();
147
148	do {
149		error = g_write_data(cp2, GV_HDR_OFFSET, vhdr, GV_HDR_LEN);
150		if (error) {
151			printf("writing vhdr failed: %d", error);
152			break;
153		}
154
155		error = g_write_data(cp2, GV_CFG_OFFSET, sbuf_data(sb),
156		    GV_CFG_LEN);
157		if (error) {
158			printf("writing first config copy failed: %d", error);
159			break;
160		}
161
162		error = g_write_data(cp2, GV_CFG_OFFSET + GV_CFG_LEN,
163		    sbuf_data(sb), GV_CFG_LEN);
164		if (error)
165			printf("writing second config copy failed: %d", error);
166	} while (0);
167
168	g_topology_lock();
169	g_access(cp2, 0, -1, 0);
170	sbuf_delete(sb);
171	g_free(vhdr);
172
173	if (d->geom != NULL)
174		gv_drive_modify(d);
175}
176
177/* This resembles g_slice_access(). */
178static int
179gv_drive_access(struct g_provider *pp, int dr, int dw, int de)
180{
181	struct g_geom *gp;
182	struct g_consumer *cp;
183	struct g_provider *pp2;
184	struct gv_drive *d;
185	struct gv_sd *s, *s2;
186	int error;
187
188	gp = pp->geom;
189	cp = LIST_FIRST(&gp->consumer);
190	if (cp == NULL)
191		return (0);
192
193	d = gp->softc;
194
195	s = pp->private;
196	KASSERT(s != NULL, ("gv_drive_access: NULL s"));
197
198	LIST_FOREACH(s2, &d->subdisks, from_drive) {
199		if (s == s2)
200			continue;
201		if (s->drive_offset + s->size <= s2->drive_offset)
202			continue;
203		if (s2->drive_offset + s2->size <= s->drive_offset)
204			continue;
205
206		/* Overlap. */
207		pp2 = s2->provider;
208		KASSERT(s2 != NULL, ("gv_drive_access: NULL s2"));
209		if ((pp->acw + dw) > 0 && pp2->ace > 0) {
210			printf("FOOO: permission denied - e\n");
211			return (EPERM);
212		}
213		if ((pp->ace + de) > 0 && pp2->acw > 0) {
214			printf("FOOO: permission denied - w\n");
215			return (EPERM);
216		}
217	}
218
219#if 0
220	/* On first open, grab an extra "exclusive" bit */
221	if (cp->acr == 0 && cp->acw == 0 && cp->ace == 0)
222		de++;
223	/* ... and let go of it on last close */
224	if ((cp->acr + dr) == 0 && (cp->acw + dw) == 0 && (cp->ace + de) == 1)
225		de--;
226#endif
227	error = g_access(cp, dr, dw, de);
228	if (error) {
229		printf("FOOO: g_access failed: %d\n", error);
230	}
231	return (error);
232}
233
234static void
235gv_drive_done(struct bio *bp)
236{
237	struct gv_drive *d;
238	struct gv_bioq *bq;
239
240	/* Put the BIO on the worker queue again. */
241	d = bp->bio_from->geom->softc;
242	bp->bio_cflags |= GV_BIO_DONE;
243	bq = g_malloc(sizeof(*bq), M_NOWAIT | M_ZERO);
244	bq->bp = bp;
245	mtx_lock(&d->bqueue_mtx);
246	TAILQ_INSERT_TAIL(&d->bqueue, bq, queue);
247	wakeup(d);
248	mtx_unlock(&d->bqueue_mtx);
249}
250
251
252static void
253gv_drive_start(struct bio *bp)
254{
255	struct gv_drive *d;
256	struct gv_sd *s;
257	struct gv_bioq *bq;
258
259	switch (bp->bio_cmd) {
260	case BIO_READ:
261	case BIO_WRITE:
262	case BIO_DELETE:
263		break;
264	case BIO_GETATTR:
265	default:
266		g_io_deliver(bp, EOPNOTSUPP);
267		return;
268	}
269
270	s = bp->bio_to->private;
271	if ((s->state == GV_SD_DOWN) || (s->state == GV_SD_STALE)) {
272		g_io_deliver(bp, ENXIO);
273		return;
274	}
275
276	d = bp->bio_to->geom->softc;
277
278	/*
279	 * Put the BIO on the worker queue, where the worker thread will pick
280	 * it up.
281	 */
282	bq = g_malloc(sizeof(*bq), M_NOWAIT | M_ZERO);
283	bq->bp = bp;
284	mtx_lock(&d->bqueue_mtx);
285	TAILQ_INSERT_TAIL(&d->bqueue, bq, queue);
286	wakeup(d);
287	mtx_unlock(&d->bqueue_mtx);
288
289}
290
291static void
292gv_drive_worker(void *arg)
293{
294	struct bio *bp, *cbp;
295	struct g_geom *gp;
296	struct g_provider *pp;
297	struct g_consumer *cp;
298	struct gv_drive *d;
299	struct gv_sd *s;
300	struct gv_bioq *bq, *bq2;
301	int error;
302
303	d = arg;
304
305	mtx_lock(&d->bqueue_mtx);
306	for (;;) {
307		/* We were signaled to exit. */
308		if (d->flags & GV_DRIVE_THREAD_DIE)
309			break;
310
311		/* Take the first BIO from out queue. */
312		bq = TAILQ_FIRST(&d->bqueue);
313		if (bq == NULL) {
314			msleep(d, &d->bqueue_mtx, PRIBIO, "-", hz/10);
315			continue;
316 		}
317		TAILQ_REMOVE(&d->bqueue, bq, queue);
318		mtx_unlock(&d->bqueue_mtx);
319
320		bp = bq->bp;
321		g_free(bq);
322		pp = bp->bio_to;
323		gp = pp->geom;
324
325		/* Completed request. */
326		if (bp->bio_cflags & GV_BIO_DONE) {
327			error = bp->bio_error;
328
329			/* Deliver the original request. */
330			g_std_done(bp);
331
332			/* The request had an error, we need to clean up. */
333			if (error != 0) {
334				g_topology_lock();
335				cp = LIST_FIRST(&gp->consumer);
336				if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
337					g_access(cp, -cp->acr, -cp->acw,
338					    -cp->ace);
339				gv_set_drive_state(d, GV_DRIVE_DOWN,
340				    GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG);
341				if (cp->nstart == cp->nend) {
342					g_detach(cp);
343					g_destroy_consumer(cp);
344				}
345				g_topology_unlock();
346			}
347
348		/* New request, needs to be sent downwards. */
349		} else {
350			s = pp->private;
351
352			if ((s->state == GV_SD_DOWN) ||
353			    (s->state == GV_SD_STALE)) {
354				g_io_deliver(bp, ENXIO);
355				mtx_lock(&d->bqueue_mtx);
356				continue;
357			}
358			if (bp->bio_offset > s->size) {
359				g_io_deliver(bp, EINVAL);
360				mtx_lock(&d->bqueue_mtx);
361				continue;
362			}
363
364			cbp = g_clone_bio(bp);
365			if (cbp == NULL) {
366				g_io_deliver(bp, ENOMEM);
367				mtx_lock(&d->bqueue_mtx);
368				continue;
369			}
370			if (cbp->bio_offset + cbp->bio_length > s->size)
371				cbp->bio_length = s->size -
372				    cbp->bio_offset;
373			cbp->bio_done = gv_drive_done;
374			cbp->bio_offset += s->drive_offset;
375			g_io_request(cbp, LIST_FIRST(&gp->consumer));
376		}
377
378		mtx_lock(&d->bqueue_mtx);
379	}
380
381	TAILQ_FOREACH_SAFE(bq, &d->bqueue, queue, bq2) {
382		TAILQ_REMOVE(&d->bqueue, bq, queue);
383		mtx_unlock(&d->bqueue_mtx);
384		bp = bq->bp;
385		g_free(bq);
386		if (bp->bio_cflags & GV_BIO_DONE)
387			g_std_done(bp);
388		else
389			g_io_deliver(bp, ENXIO);
390		mtx_lock(&d->bqueue_mtx);
391	}
392	mtx_unlock(&d->bqueue_mtx);
393	d->flags |= GV_DRIVE_THREAD_DEAD;
394
395	kthread_exit(ENXIO);
396}
397
398
399static void
400gv_drive_orphan(struct g_consumer *cp)
401{
402	struct g_geom *gp;
403	struct gv_drive *d;
404	struct gv_sd *s;
405	int error;
406
407	g_topology_assert();
408	gp = cp->geom;
409	g_trace(G_T_TOPOLOGY, "gv_drive_orphan(%s)", gp->name);
410	if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0)
411		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
412	error = cp->provider->error;
413	if (error == 0)
414		error = ENXIO;
415	g_detach(cp);
416	g_destroy_consumer(cp);
417	if (!LIST_EMPTY(&gp->consumer))
418		return;
419	d = gp->softc;
420	if (d != NULL) {
421		printf("gvinum: lost drive '%s'\n", d->name);
422		d->geom = NULL;
423		LIST_FOREACH(s, &d->subdisks, from_drive) {
424			s->provider = NULL;
425			s->consumer = NULL;
426		}
427		gv_kill_drive_thread(d);
428		gv_set_drive_state(d, GV_DRIVE_DOWN,
429		    GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG);
430	}
431	gp->softc = NULL;
432	g_wither_geom(gp, error);
433}
434
435static struct g_geom *
436gv_drive_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
437{
438	struct g_geom *gp, *gp2;
439	struct g_consumer *cp;
440	struct gv_drive *d;
441	struct gv_sd *s;
442	struct gv_softc *sc;
443	struct gv_freelist *fl;
444	struct gv_hdr *vhdr;
445	int error;
446	char *buf, errstr[ERRBUFSIZ];
447
448	vhdr = NULL;
449	d = NULL;
450
451	g_trace(G_T_TOPOLOGY, "gv_drive_taste(%s, %s)", mp->name, pp->name);
452	g_topology_assert();
453
454	if (pp->sectorsize == 0)
455		return(NULL);
456
457	/* Find the VINUM class and its associated geom. */
458	gp2 = find_vinum_geom();
459	if (gp2 == NULL)
460		return (NULL);
461	sc = gp2->softc;
462
463	gp = g_new_geomf(mp, "%s.vinumdrive", pp->name);
464	gp->start = gv_drive_start;
465	gp->orphan = gv_drive_orphan;
466	gp->access = gv_drive_access;
467	gp->start = gv_drive_start;
468
469	cp = g_new_consumer(gp);
470	g_attach(cp, pp);
471	error = g_access(cp, 1, 0, 0);
472	if (error) {
473		g_detach(cp);
474		g_destroy_consumer(cp);
475		g_destroy_geom(gp);
476		return (NULL);
477	}
478
479	g_topology_unlock();
480
481	/* Now check if the provided slice is a valid vinum drive. */
482	do {
483		vhdr = g_read_data(cp, GV_HDR_OFFSET, pp->sectorsize, &error);
484		if (vhdr == NULL || error != 0)
485			break;
486		if (vhdr->magic != GV_MAGIC) {
487			g_free(vhdr);
488			break;
489		}
490
491		/*
492		 * We have found a valid vinum drive.  Let's see if it is
493		 * already known in the configuration.  There's a chance that
494		 * the VINUMDRIVE class tastes before the VINUM class could
495		 * taste, so parse the configuration here too, just to be on
496		 * the safe side.
497		 */
498		buf = g_read_data(cp, GV_CFG_OFFSET, GV_CFG_LEN, &error);
499		if (buf == NULL || error != 0) {
500			g_free(vhdr);
501			break;
502		}
503		g_topology_lock();
504		gv_parse_config(sc, buf, 1);
505		g_free(buf);
506
507		d = gv_find_drive(sc, vhdr->label.name);
508
509		/* We already know about this drive. */
510		if (d != NULL) {
511			/* Check if this drive already has a geom. */
512			if (d->geom != NULL) {
513				g_topology_unlock();
514				break;
515			}
516			bcopy(vhdr, d->hdr, sizeof(*vhdr));
517
518		/* This is a new drive. */
519		} else {
520			d = g_malloc(sizeof(*d), M_WAITOK | M_ZERO);
521
522			/* Initialize all needed variables. */
523			d->size = pp->mediasize - GV_DATA_START;
524			d->avail = d->size;
525			d->hdr = vhdr;
526			strncpy(d->name, vhdr->label.name, GV_MAXDRIVENAME);
527			LIST_INIT(&d->subdisks);
528			LIST_INIT(&d->freelist);
529
530			/* We also need a freelist entry. */
531			fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
532			fl->offset = GV_DATA_START;
533			fl->size = d->avail;
534			LIST_INSERT_HEAD(&d->freelist, fl, freelist);
535			d->freelist_entries = 1;
536
537			TAILQ_INIT(&d->bqueue);
538			mtx_init(&d->bqueue_mtx, "gv_drive", NULL, MTX_DEF);
539			kthread_create(gv_drive_worker, d, NULL, 0, 0,
540			    "gv_d %s", d->name);
541			d->flags |= GV_DRIVE_THREAD_ACTIVE;
542
543			/* Save it into the main configuration. */
544			LIST_INSERT_HEAD(&sc->drives, d, drive);
545		}
546
547		g_access(cp, -1, 0, 0);
548
549		gp->softc = d;
550		d->geom = gp;
551		d->vinumconf = sc;
552		strncpy(d->device, pp->name, GV_MAXDRIVENAME);
553
554		/*
555		 * Find out which subdisks belong to this drive and crosslink
556		 * them.
557		 */
558		LIST_FOREACH(s, &sc->subdisks, sd) {
559			if (!strncmp(s->drive, d->name, GV_MAXDRIVENAME))
560				/* XXX: errors ignored */
561				gv_sd_to_drive(sc, d, s, errstr,
562				    sizeof(errstr));
563		}
564
565		/* This drive is now up for sure. */
566		gv_set_drive_state(d, GV_DRIVE_UP, 0);
567
568		/*
569		 * If there are subdisks on this drive, we need to create
570		 * providers for them.
571		 */
572		if (d->sdcount)
573			gv_drive_modify(d);
574
575		return (gp);
576
577	} while (0);
578
579	g_topology_lock();
580	g_access(cp, -1, 0, 0);
581
582	g_detach(cp);
583	g_destroy_consumer(cp);
584	g_destroy_geom(gp);
585	return (NULL);
586}
587
588/*
589 * Modify the providers for the given drive 'd'.  It is assumed that the
590 * subdisk list of 'd' is already correctly set up.
591 */
592void
593gv_drive_modify(struct gv_drive *d)
594{
595	struct g_geom *gp;
596	struct g_consumer *cp;
597	struct g_provider *pp, *pp2;
598	struct gv_sd *s;
599	int nsd;
600
601	KASSERT(d != NULL, ("gv_drive_modify: null d"));
602	gp = d->geom;
603	KASSERT(gp != NULL, ("gv_drive_modify: null gp"));
604	cp = LIST_FIRST(&gp->consumer);
605	KASSERT(cp != NULL, ("gv_drive_modify: null cp"));
606	pp = cp->provider;
607	KASSERT(pp != NULL, ("gv_drive_modify: null pp"));
608
609	g_topology_assert();
610
611	nsd = 0;
612	LIST_FOREACH(s, &d->subdisks, from_drive) {
613		/* This subdisk already has a provider. */
614		if (s->provider != NULL)
615			continue;
616		pp2 = g_new_providerf(gp, "gvinum/sd/%s", s->name);
617		pp2->mediasize = s->size;
618		pp2->sectorsize = pp->sectorsize;
619		g_error_provider(pp2, 0);
620		s->provider = pp2;
621		pp2->private = s;
622	}
623}
624
625static int
626gv_drive_destroy_geom(struct gctl_req *req, struct g_class *mp,
627    struct g_geom *gp)
628{
629	struct gv_drive *d;
630
631	g_trace(G_T_TOPOLOGY, "gv_drive_destroy_geom: %s", gp->name);
632	g_topology_assert();
633
634	d = gp->softc;
635	gv_kill_drive_thread(d);
636
637	g_wither_geom(gp, ENXIO);
638	return (0);
639}
640
641#define	VINUMDRIVE_CLASS_NAME "VINUMDRIVE"
642
643static struct g_class g_vinum_drive_class = {
644	.name = VINUMDRIVE_CLASS_NAME,
645	.version = G_VERSION,
646	.taste = gv_drive_taste,
647	.destroy_geom = gv_drive_destroy_geom
648};
649
650DECLARE_GEOM_CLASS(g_vinum_drive_class, g_vinum_drive);
651