geom_dev.c revision 114216
1/*-
2 * Copyright (c) 2002 Poul-Henning Kamp
3 * Copyright (c) 2002 Networks Associates Technology, Inc.
4 * All rights reserved.
5 *
6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp
7 * and NAI Labs, the Security Research Division of Network Associates, Inc.
8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
9 * DARPA CHATS research program.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. The names of the authors may not be used to endorse or promote
20 *    products derived from this software without specific prior written
21 *    permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * $FreeBSD: head/sys/geom/geom_dev.c 114216 2003-04-29 13:36:06Z kan $
36 */
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/malloc.h>
41#include <sys/kernel.h>
42#include <sys/conf.h>
43#include <sys/bio.h>
44#include <sys/lock.h>
45#include <sys/mutex.h>
46#include <sys/errno.h>
47#include <sys/time.h>
48#include <sys/disk.h>
49#include <sys/fcntl.h>
50#include <sys/limits.h>
51#include <geom/geom.h>
52#include <geom/geom_int.h>
53
54static d_open_t		g_dev_open;
55static d_close_t	g_dev_close;
56static d_strategy_t	g_dev_strategy;
57static d_ioctl_t	g_dev_ioctl;
58
59static struct cdevsw g_dev_cdevsw = {
60	.d_open =	g_dev_open,
61	.d_close =	g_dev_close,
62	.d_read =	physread,
63	.d_write =	physwrite,
64	.d_ioctl =	g_dev_ioctl,
65	.d_strategy =	g_dev_strategy,
66	.d_name =	"g_dev",
67	.d_maj =	GEOM_MAJOR,
68	.d_flags =	D_DISK | D_TRACKCLOSE,
69};
70
71static g_taste_t g_dev_taste;
72static g_orphan_t g_dev_orphan;
73
74static struct g_class g_dev_class	= {
75	.name = "DEV",
76	.taste = g_dev_taste,
77	G_CLASS_INITIALIZER
78};
79
80int
81g_dev_print(void)
82{
83	struct g_geom *gp;
84
85	if (LIST_EMPTY(&g_dev_class.geom))
86		return (0);
87	printf("List of GEOM disk devices:\n  ");
88	LIST_FOREACH(gp, &g_dev_class.geom, geom)
89		printf(" %s", gp->name);
90	printf("\n");
91	return (1);
92}
93
94/*
95 * XXX: This is disgusting and wrong in every way imaginable:  The only reason
96 * XXX: we have a clone function is because of the root-mount hack we currently
97 * XXX: employ.  An improvment would be to unregister this cloner once we know
98 * XXX: we no longer need it.  Ideally, root-fs would be mounted through DEVFS
99 * XXX: eliminating the need for this hack.
100 */
101static void
102g_dev_clone(void *arg __unused, char *name, int namelen __unused, dev_t *dev)
103{
104	struct g_geom *gp;
105
106	if (*dev != NODEV)
107		return;
108
109	g_waitidle();
110
111	/* g_topology_lock(); */
112	LIST_FOREACH(gp, &g_dev_class.geom, geom) {
113		if (strcmp(gp->name, name))
114			continue;
115		*dev = gp->softc;
116		g_trace(G_T_TOPOLOGY, "g_dev_clone(%s) = %p", name, *dev);
117		return;
118	}
119	/* g_topology_unlock(); */
120	return;
121}
122
123static void
124g_dev_register_cloner(void *foo __unused)
125{
126	static int once;
127
128	/* XXX: why would this happen more than once ?? */
129	if (!once) {
130		EVENTHANDLER_REGISTER(dev_clone, g_dev_clone, 0, 1000);
131		once++;
132	}
133}
134
135SYSINIT(geomdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE,g_dev_register_cloner,NULL);
136
137static struct g_geom *
138g_dev_taste(struct g_class *mp, struct g_provider *pp, int insist __unused)
139{
140	struct g_geom *gp;
141	struct g_consumer *cp;
142	static int unit = GEOM_MINOR_PROVIDERS;
143	int error;
144	dev_t dev;
145
146	g_trace(G_T_TOPOLOGY, "dev_taste(%s,%s)", mp->name, pp->name);
147	g_topology_assert();
148	LIST_FOREACH(cp, &pp->consumers, consumers)
149		if (cp->geom->class == mp)
150			return (NULL);
151	gp = g_new_geomf(mp, pp->name);
152	gp->orphan = g_dev_orphan;
153	cp = g_new_consumer(gp);
154	error = g_attach(cp, pp);
155	KASSERT(error == 0,
156	    ("g_dev_taste(%s) failed to g_attach, err=%d", pp->name, error));
157	/*
158	 * XXX: I'm not 100% sure we can call make_dev(9) without Giant
159	 * yet.  Once we can, we don't need to drop topology here either.
160	 */
161	g_topology_unlock();
162	mtx_lock(&Giant);
163	dev = make_dev(&g_dev_cdevsw, unit2minor(unit++),
164	    UID_ROOT, GID_OPERATOR, 0640, gp->name);
165	if (pp->flags & G_PF_CANDELETE)
166		dev->si_flags |= SI_CANDELETE;
167	mtx_unlock(&Giant);
168	g_topology_lock();
169	dev->si_iosize_max = MAXPHYS;
170	dev->si_stripesize = pp->stripesize;
171	dev->si_stripeoffset = pp->stripeoffset;
172	gp->softc = dev;
173	dev->si_drv1 = gp;
174	dev->si_drv2 = cp;
175	return (gp);
176}
177
178static int
179g_dev_open(dev_t dev, int flags, int fmt, struct thread *td)
180{
181	struct g_geom *gp;
182	struct g_consumer *cp;
183	int error, r, w, e;
184
185	gp = dev->si_drv1;
186	cp = dev->si_drv2;
187	if (gp == NULL || cp == NULL || gp->softc != dev)
188		return(ENXIO);		/* g_dev_taste() not done yet */
189
190	g_trace(G_T_ACCESS, "g_dev_open(%s, %d, %d, %p)",
191	    gp->name, flags, fmt, td);
192	r = flags & FREAD ? 1 : 0;
193	w = flags & FWRITE ? 1 : 0;
194#ifdef notyet
195	e = flags & O_EXCL ? 1 : 0;
196#else
197	e = 0;
198#endif
199	DROP_GIANT();
200	g_topology_lock();
201	if (dev->si_devsw == NULL)
202		error = ENXIO;		/* We were orphaned */
203	else
204		error = g_access_rel(cp, r, w, e);
205	g_topology_unlock();
206	PICKUP_GIANT();
207	g_waitidle();
208	if (!error)
209		dev->si_bsize_phys = cp->provider->sectorsize;
210	return(error);
211}
212
213static int
214g_dev_close(dev_t dev, int flags, int fmt, struct thread *td)
215{
216	struct g_geom *gp;
217	struct g_consumer *cp;
218	int error, r, w, e;
219
220	gp = dev->si_drv1;
221	cp = dev->si_drv2;
222	if (gp == NULL || cp == NULL)
223		return(ENXIO);
224	g_trace(G_T_ACCESS, "g_dev_close(%s, %d, %d, %p)",
225	    gp->name, flags, fmt, td);
226	r = flags & FREAD ? -1 : 0;
227	w = flags & FWRITE ? -1 : 0;
228#ifdef notyet
229	e = flags & O_EXCL ? -1 : 0;
230#else
231	e = 0;
232#endif
233	DROP_GIANT();
234	g_topology_lock();
235	if (dev->si_devsw == NULL)
236		error = ENXIO;		/* We were orphaned */
237	else
238		error = g_access_rel(cp, r, w, e);
239	KASSERT((cp->acr || cp->acw) || (cp->nstart == cp->nend),
240	    ("final g_dev_close() with outstanding bios"));
241	g_topology_unlock();
242	PICKUP_GIANT();
243	g_waitidle();
244	return (error);
245}
246
247/*
248 * XXX: Until we have unmessed the ioctl situation, there is a race against
249 * XXX: a concurrent orphanization.  We cannot close it by holding topology
250 * XXX: since that would prevent us from doing our job, and stalling events
251 * XXX: will break (actually: stall) the BSD disklabel hacks.
252 */
253static int
254g_dev_ioctl(dev_t dev, u_long cmd, caddr_t data, int fflag, struct thread *td)
255{
256	struct g_geom *gp, *gp2;
257	struct g_consumer *cp;
258	struct g_provider *pp2;
259	struct g_kerneldump kd;
260	int i, error;
261	u_int u;
262	struct g_ioctl *gio;
263
264	gp = dev->si_drv1;
265	cp = dev->si_drv2;
266	pp2 = cp->provider;
267	gp2 = pp2->geom;
268	gio = NULL;
269
270	error = 0;
271	KASSERT(cp->acr || cp->acw,
272	    ("Consumer with zero access count in g_dev_ioctl"));
273	DROP_GIANT();
274
275	gio = NULL;
276	i = IOCPARM_LEN(cmd);
277	switch (cmd) {
278	case DIOCGSECTORSIZE:
279		*(u_int *)data = cp->provider->sectorsize;
280		if (*(u_int *)data == 0)
281			error = ENOENT;
282		break;
283	case DIOCGMEDIASIZE:
284		*(off_t *)data = cp->provider->mediasize;
285		if (*(off_t *)data == 0)
286			error = ENOENT;
287		break;
288	case DIOCGFWSECTORS:
289		error = g_io_getattr("GEOM::fwsectors", cp, &i, data);
290		if (error == 0 && *(u_int *)data == 0)
291			error = ENOENT;
292		break;
293	case DIOCGFWHEADS:
294		error = g_io_getattr("GEOM::fwheads", cp, &i, data);
295		if (error == 0 && *(u_int *)data == 0)
296			error = ENOENT;
297		break;
298	case DIOCGFRONTSTUFF:
299		error = g_io_getattr("GEOM::frontstuff", cp, &i, data);
300		break;
301	case DIOCSKERNELDUMP:
302		u = *((u_int *)data);
303		if (!u) {
304			set_dumper(NULL);
305			error = 0;
306			break;
307		}
308		kd.offset = 0;
309		kd.length = OFF_MAX;
310		i = sizeof kd;
311		error = g_io_getattr("GEOM::kerneldump", cp, &i, &kd);
312		if (!error)
313			dev->si_flags |= SI_DUMPDEV;
314		break;
315
316	default:
317		gio = g_malloc(sizeof *gio, M_WAITOK | M_ZERO);
318		gio->cmd = cmd;
319		gio->data = data;
320		gio->fflag = fflag;
321		gio->td = td;
322		i = sizeof *gio;
323		/*
324		 * We always issue ioctls as getattr since the direction of data
325		 * movement in ioctl is no indication of the ioctl being a "set"
326		 * or "get" type ioctl or if such simplistic terms even apply
327		 */
328		error = g_io_getattr("GEOM::ioctl", cp, &i, gio);
329		break;
330	}
331
332	PICKUP_GIANT();
333	if (error == EDIRIOCTL) {
334		KASSERT(gio != NULL, ("NULL gio but EDIRIOCTL"));
335		KASSERT(gio->func != NULL, ("NULL function but EDIRIOCTL"));
336		error = (gio->func)(gio->dev, cmd, data, fflag, td);
337	}
338	g_waitidle();
339	if (gio != NULL && (error == EOPNOTSUPP || error == ENOIOCTL)) {
340		if (g_debugflags & G_T_TOPOLOGY) {
341			i = IOCGROUP(cmd);
342			printf("IOCTL(0x%lx) \"%s\"", cmd, gp->name);
343			if (i > ' ' && i <= '~')
344				printf(" '%c'", (int)IOCGROUP(cmd));
345			else
346				printf(" 0x%lx", IOCGROUP(cmd));
347			printf("/%ld ", cmd & 0xff);
348			if (cmd & IOC_IN)
349				printf("I");
350			if (cmd & IOC_OUT)
351				printf("O");
352			printf("(%ld) = ENOIOCTL\n", IOCPARM_LEN(cmd));
353		}
354		error = ENOTTY;
355	}
356	if (gio != NULL)
357		g_free(gio);
358	return (error);
359}
360
361static void
362g_dev_done(struct bio *bp2)
363{
364	struct bio *bp;
365
366	bp = bp2->bio_parent;
367	bp->bio_error = bp2->bio_error;
368	if (bp->bio_error != 0) {
369		g_trace(G_T_BIO, "g_dev_done(%p) had error %d",
370		    bp2, bp->bio_error);
371		bp->bio_flags |= BIO_ERROR;
372	} else {
373		g_trace(G_T_BIO, "g_dev_done(%p/%p) resid %ld completed %jd",
374		    bp2, bp, bp->bio_resid, (intmax_t)bp2->bio_completed);
375	}
376	bp->bio_resid = bp->bio_bcount - bp2->bio_completed;
377	g_destroy_bio(bp2);
378	mtx_lock(&Giant);
379	biodone(bp);
380	mtx_unlock(&Giant);
381}
382
383static void
384g_dev_strategy(struct bio *bp)
385{
386	struct g_geom *gp;
387	struct g_consumer *cp;
388	struct bio *bp2;
389	dev_t dev;
390
391	KASSERT(bp->bio_cmd == BIO_READ ||
392	        bp->bio_cmd == BIO_WRITE ||
393	        bp->bio_cmd == BIO_DELETE,
394		("Wrong bio_cmd bio=%p cmd=%d", bp, bp->bio_cmd));
395	dev = bp->bio_dev;
396	gp = dev->si_drv1;
397	cp = dev->si_drv2;
398	KASSERT(cp->acr || cp->acw,
399	    ("Consumer with zero access count in g_dev_strategy"));
400
401	bp2 = g_clone_bio(bp);
402	KASSERT(bp2 != NULL, ("XXX: ENOMEM in a bad place"));
403	bp2->bio_offset = (off_t)bp->bio_blkno << DEV_BSHIFT;
404	KASSERT(bp2->bio_offset >= 0,
405	    ("Negative bio_offset (%jd) on bio %p",
406	    (intmax_t)bp2->bio_offset, bp));
407	bp2->bio_length = (off_t)bp->bio_bcount;
408	bp2->bio_done = g_dev_done;
409	g_trace(G_T_BIO,
410	    "g_dev_strategy(%p/%p) offset %jd length %jd data %p cmd %d",
411	    bp, bp2, (intmax_t)bp->bio_offset, (intmax_t)bp2->bio_length,
412	    bp2->bio_data, bp2->bio_cmd);
413	g_io_request(bp2, cp);
414	KASSERT(cp->acr || cp->acw,
415	    ("g_dev_strategy raced with g_dev_close and lost"));
416
417}
418
419/*
420 * g_dev_orphan()
421 *
422 * Called from below when the provider orphaned us.
423 * - Clear any dump settings.
424 * - Destroy the dev_t to prevent any more request from coming in.  The
425 *   provider is already marked with an error, so anything which comes in
426 *   in the interrim will be returned immediately.
427 * - Wait for any outstanding I/O to finish.
428 * - Set our access counts to zero, whatever they were.
429 * - Detach and self-destruct.
430 */
431
432static void
433g_dev_orphan(struct g_consumer *cp)
434{
435	struct g_geom *gp;
436	dev_t dev;
437
438	g_topology_assert();
439	gp = cp->geom;
440	dev = gp->softc;
441	g_trace(G_T_TOPOLOGY, "g_dev_orphan(%p(%s))", cp, gp->name);
442
443	/* Reset any dump-area set on this device */
444	if (dev->si_flags & SI_DUMPDEV)
445		set_dumper(NULL);
446
447	/* Destroy the dev_t so we get no more requests */
448	destroy_dev(dev);
449
450	/* Wait for the cows to come home */
451	while (cp->nstart != cp->nend)
452		msleep(&dev, NULL, PRIBIO, "gdevorphan", hz / 10);
453
454	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
455		g_access_rel(cp, -cp->acr, -cp->acw, -cp->ace);
456
457	g_detach(cp);
458	g_destroy_consumer(cp);
459	g_destroy_geom(gp);
460}
461
462DECLARE_GEOM_CLASS(g_dev_class, g_dev);
463