geom_dev.c revision 243333
1/*-
2 * Copyright (c) 2002 Poul-Henning Kamp
3 * Copyright (c) 2002 Networks Associates Technology, Inc.
4 * All rights reserved.
5 *
6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp
7 * and NAI Labs, the Security Research Division of Network Associates, Inc.
8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
9 * DARPA CHATS research program.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. The names of the authors may not be used to endorse or promote
20 *    products derived from this software without specific prior written
21 *    permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36#include <sys/cdefs.h>
37__FBSDID("$FreeBSD: head/sys/geom/geom_dev.c 243333 2012-11-20 12:32:18Z jh $");
38
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/malloc.h>
42#include <sys/kernel.h>
43#include <sys/conf.h>
44#include <sys/ctype.h>
45#include <sys/bio.h>
46#include <sys/bus.h>
47#include <sys/lock.h>
48#include <sys/mutex.h>
49#include <sys/proc.h>
50#include <sys/errno.h>
51#include <sys/time.h>
52#include <sys/disk.h>
53#include <sys/fcntl.h>
54#include <sys/limits.h>
55#include <geom/geom.h>
56#include <geom/geom_int.h>
57#include <machine/stdarg.h>
58
59/*
60 * Use the consumer private field to reference a physdev alias (if any).
61 */
62#define cp_alias_dev	private
63
64static d_open_t		g_dev_open;
65static d_close_t	g_dev_close;
66static d_strategy_t	g_dev_strategy;
67static d_ioctl_t	g_dev_ioctl;
68
69static struct cdevsw g_dev_cdevsw = {
70	.d_version =	D_VERSION,
71	.d_open =	g_dev_open,
72	.d_close =	g_dev_close,
73	.d_read =	physread,
74	.d_write =	physwrite,
75	.d_ioctl =	g_dev_ioctl,
76	.d_strategy =	g_dev_strategy,
77	.d_name =	"g_dev",
78	.d_flags =	D_DISK | D_TRACKCLOSE,
79};
80
81static g_taste_t g_dev_taste;
82static g_orphan_t g_dev_orphan;
83static g_attrchanged_t g_dev_attrchanged;
84
85static struct g_class g_dev_class	= {
86	.name = "DEV",
87	.version = G_VERSION,
88	.taste = g_dev_taste,
89	.orphan = g_dev_orphan,
90	.attrchanged = g_dev_attrchanged
91};
92
93void
94g_dev_print(void)
95{
96	struct g_geom *gp;
97	char const *p = "";
98
99	LIST_FOREACH(gp, &g_dev_class.geom, geom) {
100		printf("%s%s", p, gp->name);
101		p = " ";
102	}
103	printf("\n");
104}
105
106static void
107g_dev_attrchanged(struct g_consumer *cp, const char *attr)
108{
109	struct cdev *dev;
110	char buf[SPECNAMELEN + 6];
111
112	if (strcmp(attr, "GEOM::media") == 0) {
113		dev = cp->geom->softc;
114		snprintf(buf, sizeof(buf), "cdev=%s", dev->si_name);
115		devctl_notify_f("DEVFS", "CDEV", "MEDIACHANGE", buf, M_WAITOK);
116		dev = cp->cp_alias_dev;
117		if (dev != NULL) {
118			snprintf(buf, sizeof(buf), "cdev=%s", dev->si_name);
119			devctl_notify_f("DEVFS", "CDEV", "MEDIACHANGE", buf,
120			    M_WAITOK);
121		}
122		return;
123	}
124
125	if (strcmp(attr, "GEOM::physpath") != 0)
126		return;
127
128	if (g_access(cp, 1, 0, 0) == 0) {
129		char *physpath;
130		int error, physpath_len;
131
132		physpath_len = MAXPATHLEN;
133		physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO);
134		error =
135		    g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath);
136		g_access(cp, -1, 0, 0);
137		if (error == 0 && strlen(physpath) != 0) {
138			struct cdev *old_alias_dev;
139			struct cdev **alias_devp;
140
141			dev = cp->geom->softc;
142			old_alias_dev = cp->cp_alias_dev;
143			alias_devp = (struct cdev **)&cp->cp_alias_dev;
144			make_dev_physpath_alias(MAKEDEV_WAITOK, alias_devp,
145			    dev, old_alias_dev, physpath);
146		} else if (cp->cp_alias_dev) {
147			destroy_dev((struct cdev *)cp->cp_alias_dev);
148			cp->cp_alias_dev = NULL;
149		}
150		g_free(physpath);
151	}
152}
153
154struct g_provider *
155g_dev_getprovider(struct cdev *dev)
156{
157	struct g_consumer *cp;
158
159	g_topology_assert();
160	if (dev == NULL)
161		return (NULL);
162	if (dev->si_devsw != &g_dev_cdevsw)
163		return (NULL);
164	cp = dev->si_drv2;
165	return (cp->provider);
166}
167
168static struct g_geom *
169g_dev_taste(struct g_class *mp, struct g_provider *pp, int insist __unused)
170{
171	struct g_geom *gp;
172	struct g_consumer *cp;
173	int error, len;
174	struct cdev *dev, *adev;
175	char buf[64], *val;
176
177	g_trace(G_T_TOPOLOGY, "dev_taste(%s,%s)", mp->name, pp->name);
178	g_topology_assert();
179	gp = g_new_geomf(mp, "%s", pp->name);
180	cp = g_new_consumer(gp);
181	error = g_attach(cp, pp);
182	KASSERT(error == 0,
183	    ("g_dev_taste(%s) failed to g_attach, err=%d", pp->name, error));
184	error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &dev,
185	    &g_dev_cdevsw, NULL, UID_ROOT, GID_OPERATOR, 0640, "%s", gp->name);
186	if (error != 0) {
187		printf("%s: make_dev_p() failed (gp->name=%s, error=%d)\n",
188		    __func__, gp->name, error);
189		g_detach(cp);
190		g_destroy_consumer(cp);
191		g_destroy_geom(gp);
192		return (NULL);
193	}
194
195	/* Search for device alias name and create it if found. */
196	adev = NULL;
197	for (len = MIN(strlen(gp->name), sizeof(buf) - 15); len > 0; len--) {
198		snprintf(buf, sizeof(buf), "kern.devalias.%s", gp->name);
199		buf[14 + len] = 0;
200		val = getenv(buf);
201		if (val != NULL) {
202			snprintf(buf, sizeof(buf), "%s%s",
203			    val, gp->name + len);
204			freeenv(val);
205			make_dev_alias_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK,
206			    &adev, dev, "%s", buf);
207			break;
208		}
209	}
210
211	dev->si_iosize_max = MAXPHYS;
212	gp->softc = dev;
213	dev->si_drv1 = gp;
214	dev->si_drv2 = cp;
215	if (adev != NULL) {
216		adev->si_iosize_max = MAXPHYS;
217		adev->si_drv1 = gp;
218		adev->si_drv2 = cp;
219	}
220
221	g_dev_attrchanged(cp, "GEOM::physpath");
222
223	return (gp);
224}
225
226static int
227g_dev_open(struct cdev *dev, int flags, int fmt, struct thread *td)
228{
229	struct g_geom *gp;
230	struct g_consumer *cp;
231	int error, r, w, e;
232
233	gp = dev->si_drv1;
234	cp = dev->si_drv2;
235	if (gp == NULL || cp == NULL || gp->softc != dev)
236		return(ENXIO);		/* g_dev_taste() not done yet */
237
238	g_trace(G_T_ACCESS, "g_dev_open(%s, %d, %d, %p)",
239	    gp->name, flags, fmt, td);
240
241	r = flags & FREAD ? 1 : 0;
242	w = flags & FWRITE ? 1 : 0;
243#ifdef notyet
244	e = flags & O_EXCL ? 1 : 0;
245#else
246	e = 0;
247#endif
248	if (w) {
249		/*
250		 * When running in very secure mode, do not allow
251		 * opens for writing of any disks.
252		 */
253		error = securelevel_ge(td->td_ucred, 2);
254		if (error)
255			return (error);
256	}
257	g_topology_lock();
258	if (dev->si_devsw == NULL)
259		error = ENXIO;		/* We were orphaned */
260	else
261		error = g_access(cp, r, w, e);
262	g_topology_unlock();
263	return(error);
264}
265
266static int
267g_dev_close(struct cdev *dev, int flags, int fmt, struct thread *td)
268{
269	struct g_geom *gp;
270	struct g_consumer *cp;
271	int error, r, w, e, i;
272
273	gp = dev->si_drv1;
274	cp = dev->si_drv2;
275	if (gp == NULL || cp == NULL)
276		return(ENXIO);
277	g_trace(G_T_ACCESS, "g_dev_close(%s, %d, %d, %p)",
278	    gp->name, flags, fmt, td);
279	r = flags & FREAD ? -1 : 0;
280	w = flags & FWRITE ? -1 : 0;
281#ifdef notyet
282	e = flags & O_EXCL ? -1 : 0;
283#else
284	e = 0;
285#endif
286	g_topology_lock();
287	if (dev->si_devsw == NULL)
288		error = ENXIO;		/* We were orphaned */
289	else
290		error = g_access(cp, r, w, e);
291	for (i = 0; i < 10 * hz;) {
292		if (cp->acr != 0 || cp->acw != 0)
293			break;
294 		if (cp->nstart == cp->nend)
295			break;
296		pause("gdevwclose", hz / 10);
297		i += hz / 10;
298	}
299	if (cp->acr == 0 && cp->acw == 0 && cp->nstart != cp->nend) {
300		printf("WARNING: Final close of geom_dev(%s) %s %s\n",
301		    gp->name,
302		    "still has outstanding I/O after 10 seconds.",
303		    "Completing close anyway, panic may happen later.");
304	}
305	g_topology_unlock();
306	return (error);
307}
308
309/*
310 * XXX: Until we have unmessed the ioctl situation, there is a race against
311 * XXX: a concurrent orphanization.  We cannot close it by holding topology
312 * XXX: since that would prevent us from doing our job, and stalling events
313 * XXX: will break (actually: stall) the BSD disklabel hacks.
314 */
315static int
316g_dev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td)
317{
318	struct g_geom *gp;
319	struct g_consumer *cp;
320	struct g_provider *pp;
321	struct g_kerneldump kd;
322	off_t offset, length, chunk;
323	int i, error;
324	u_int u;
325
326	gp = dev->si_drv1;
327	cp = dev->si_drv2;
328	pp = cp->provider;
329
330	error = 0;
331	KASSERT(cp->acr || cp->acw,
332	    ("Consumer with zero access count in g_dev_ioctl"));
333
334	i = IOCPARM_LEN(cmd);
335	switch (cmd) {
336	case DIOCGSECTORSIZE:
337		*(u_int *)data = cp->provider->sectorsize;
338		if (*(u_int *)data == 0)
339			error = ENOENT;
340		break;
341	case DIOCGMEDIASIZE:
342		*(off_t *)data = cp->provider->mediasize;
343		if (*(off_t *)data == 0)
344			error = ENOENT;
345		break;
346	case DIOCGFWSECTORS:
347		error = g_io_getattr("GEOM::fwsectors", cp, &i, data);
348		if (error == 0 && *(u_int *)data == 0)
349			error = ENOENT;
350		break;
351	case DIOCGFWHEADS:
352		error = g_io_getattr("GEOM::fwheads", cp, &i, data);
353		if (error == 0 && *(u_int *)data == 0)
354			error = ENOENT;
355		break;
356	case DIOCGFRONTSTUFF:
357		error = g_io_getattr("GEOM::frontstuff", cp, &i, data);
358		break;
359	case DIOCSKERNELDUMP:
360		u = *((u_int *)data);
361		if (!u) {
362			set_dumper(NULL, NULL);
363			error = 0;
364			break;
365		}
366		kd.offset = 0;
367		kd.length = OFF_MAX;
368		i = sizeof kd;
369		error = g_io_getattr("GEOM::kerneldump", cp, &i, &kd);
370		if (!error) {
371			error = set_dumper(&kd.di, devtoname(dev));
372			if (!error)
373				dev->si_flags |= SI_DUMPDEV;
374		}
375		break;
376	case DIOCGFLUSH:
377		error = g_io_flush(cp);
378		break;
379	case DIOCGDELETE:
380		offset = ((off_t *)data)[0];
381		length = ((off_t *)data)[1];
382		if ((offset % cp->provider->sectorsize) != 0 ||
383		    (length % cp->provider->sectorsize) != 0 || length <= 0) {
384			printf("%s: offset=%jd length=%jd\n", __func__, offset,
385			    length);
386			error = EINVAL;
387			break;
388		}
389		while (length > 0) {
390			chunk = length;
391			if (chunk > 65536 * cp->provider->sectorsize)
392				chunk = 65536 * cp->provider->sectorsize;
393			error = g_delete_data(cp, offset, chunk);
394			length -= chunk;
395			offset += chunk;
396			if (error)
397				break;
398			/*
399			 * Since the request size is unbounded, the service
400			 * time is likewise.  We make this ioctl interruptible
401			 * by checking for signals for each bio.
402			 */
403			if (SIGPENDING(td))
404				break;
405		}
406		break;
407	case DIOCGIDENT:
408		error = g_io_getattr("GEOM::ident", cp, &i, data);
409		break;
410	case DIOCGPROVIDERNAME:
411		if (pp == NULL)
412			return (ENOENT);
413		strlcpy(data, pp->name, i);
414		break;
415	case DIOCGSTRIPESIZE:
416		*(off_t *)data = cp->provider->stripesize;
417		break;
418	case DIOCGSTRIPEOFFSET:
419		*(off_t *)data = cp->provider->stripeoffset;
420		break;
421	case DIOCGPHYSPATH:
422		error = g_io_getattr("GEOM::physpath", cp, &i, data);
423		if (error == 0 && *(char *)data == '\0')
424			error = ENOENT;
425		break;
426	default:
427		if (cp->provider->geom->ioctl != NULL) {
428			error = cp->provider->geom->ioctl(cp->provider, cmd, data, fflag, td);
429		} else {
430			error = ENOIOCTL;
431		}
432	}
433
434	return (error);
435}
436
437static void
438g_dev_done(struct bio *bp2)
439{
440	struct bio *bp;
441
442	bp = bp2->bio_parent;
443	bp->bio_error = bp2->bio_error;
444	if (bp->bio_error != 0) {
445		g_trace(G_T_BIO, "g_dev_done(%p) had error %d",
446		    bp2, bp->bio_error);
447		bp->bio_flags |= BIO_ERROR;
448	} else {
449		g_trace(G_T_BIO, "g_dev_done(%p/%p) resid %ld completed %jd",
450		    bp2, bp, bp->bio_resid, (intmax_t)bp2->bio_completed);
451	}
452	bp->bio_resid = bp->bio_length - bp2->bio_completed;
453	bp->bio_completed = bp2->bio_completed;
454	g_destroy_bio(bp2);
455	biodone(bp);
456}
457
458static void
459g_dev_strategy(struct bio *bp)
460{
461	struct g_consumer *cp;
462	struct bio *bp2;
463	struct cdev *dev;
464
465	KASSERT(bp->bio_cmd == BIO_READ ||
466	        bp->bio_cmd == BIO_WRITE ||
467	        bp->bio_cmd == BIO_DELETE,
468		("Wrong bio_cmd bio=%p cmd=%d", bp, bp->bio_cmd));
469	dev = bp->bio_dev;
470	cp = dev->si_drv2;
471	KASSERT(cp->acr || cp->acw,
472	    ("Consumer with zero access count in g_dev_strategy"));
473#ifdef INVARIANTS
474	if ((bp->bio_offset % cp->provider->sectorsize) != 0 ||
475	    (bp->bio_bcount % cp->provider->sectorsize) != 0) {
476		bp->bio_resid = bp->bio_bcount;
477		biofinish(bp, NULL, EINVAL);
478		return;
479	}
480#endif
481	for (;;) {
482		/*
483		 * XXX: This is not an ideal solution, but I belive it to
484		 * XXX: deadlock safe, all things considered.
485		 */
486		bp2 = g_clone_bio(bp);
487		if (bp2 != NULL)
488			break;
489		pause("gdstrat", hz / 10);
490	}
491	KASSERT(bp2 != NULL, ("XXX: ENOMEM in a bad place"));
492	bp2->bio_done = g_dev_done;
493	g_trace(G_T_BIO,
494	    "g_dev_strategy(%p/%p) offset %jd length %jd data %p cmd %d",
495	    bp, bp2, (intmax_t)bp->bio_offset, (intmax_t)bp2->bio_length,
496	    bp2->bio_data, bp2->bio_cmd);
497	g_io_request(bp2, cp);
498	KASSERT(cp->acr || cp->acw,
499	    ("g_dev_strategy raced with g_dev_close and lost"));
500
501}
502
503/*
504 * g_dev_orphan()
505 *
506 * Called from below when the provider orphaned us.
507 * - Clear any dump settings.
508 * - Destroy the struct cdev to prevent any more request from coming in.  The
509 *   provider is already marked with an error, so anything which comes in
510 *   in the interrim will be returned immediately.
511 * - Wait for any outstanding I/O to finish.
512 * - Set our access counts to zero, whatever they were.
513 * - Detach and self-destruct.
514 */
515
516static void
517g_dev_orphan(struct g_consumer *cp)
518{
519	struct g_geom *gp;
520	struct cdev *dev;
521
522	g_topology_assert();
523	gp = cp->geom;
524	dev = gp->softc;
525	g_trace(G_T_TOPOLOGY, "g_dev_orphan(%p(%s))", cp, gp->name);
526
527	/* Reset any dump-area set on this device */
528	if (dev->si_flags & SI_DUMPDEV)
529		set_dumper(NULL, NULL);
530
531	/* Destroy the struct cdev *so we get no more requests */
532	destroy_dev(dev);
533
534	/* Wait for the cows to come home */
535	while (cp->nstart != cp->nend)
536		pause("gdevorphan", hz / 10);
537
538	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
539		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
540
541	g_detach(cp);
542	g_destroy_consumer(cp);
543	g_destroy_geom(gp);
544}
545
546DECLARE_GEOM_CLASS(g_dev_class, g_dev);
547