geom_disk.c revision 321291
1/*-
2 * Copyright (c) 2002 Poul-Henning Kamp
3 * Copyright (c) 2002 Networks Associates Technology, Inc.
4 * All rights reserved.
5 *
6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp
7 * and NAI Labs, the Security Research Division of Network Associates, Inc.
8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
9 * DARPA CHATS research program.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. The names of the authors may not be used to endorse or promote
20 *    products derived from this software without specific prior written
21 *    permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36#include <sys/cdefs.h>
37__FBSDID("$FreeBSD: stable/10/sys/geom/geom_disk.c 321291 2017-07-20 11:37:02Z mav $");
38
39#include "opt_geom.h"
40
41#include <sys/param.h>
42#include <sys/systm.h>
43#include <sys/kernel.h>
44#include <sys/sysctl.h>
45#include <sys/bio.h>
46#include <sys/bus.h>
47#include <sys/ctype.h>
48#include <sys/fcntl.h>
49#include <sys/malloc.h>
50#include <sys/sbuf.h>
51#include <sys/devicestat.h>
52#include <machine/md_var.h>
53
54#include <sys/lock.h>
55#include <sys/mutex.h>
56#include <geom/geom.h>
57#include <geom/geom_disk.h>
58#include <geom/geom_int.h>
59
60#include <dev/led/led.h>
61
62#include <machine/bus.h>
63
64struct g_disk_softc {
65	struct mtx		 done_mtx;
66	struct disk		*dp;
67	struct sysctl_ctx_list	sysctl_ctx;
68	struct sysctl_oid	*sysctl_tree;
69	char			led[64];
70	uint32_t		state;
71	struct mtx		 start_mtx;
72};
73
74static g_access_t g_disk_access;
75static g_start_t g_disk_start;
76static g_ioctl_t g_disk_ioctl;
77static g_dumpconf_t g_disk_dumpconf;
78static g_provgone_t g_disk_providergone;
79
80static struct g_class g_disk_class = {
81	.name = G_DISK_CLASS_NAME,
82	.version = G_VERSION,
83	.start = g_disk_start,
84	.access = g_disk_access,
85	.ioctl = g_disk_ioctl,
86	.providergone = g_disk_providergone,
87	.dumpconf = g_disk_dumpconf,
88};
89
90SYSCTL_DECL(_kern_geom);
91static SYSCTL_NODE(_kern_geom, OID_AUTO, disk, CTLFLAG_RW, 0,
92    "GEOM_DISK stuff");
93
94DECLARE_GEOM_CLASS(g_disk_class, g_disk);
95
96static void __inline
97g_disk_lock_giant(struct disk *dp)
98{
99
100	if (dp->d_flags & DISKFLAG_NEEDSGIANT)
101		mtx_lock(&Giant);
102}
103
104static void __inline
105g_disk_unlock_giant(struct disk *dp)
106{
107
108	if (dp->d_flags & DISKFLAG_NEEDSGIANT)
109		mtx_unlock(&Giant);
110}
111
112static int
113g_disk_access(struct g_provider *pp, int r, int w, int e)
114{
115	struct disk *dp;
116	struct g_disk_softc *sc;
117	int error;
118
119	g_trace(G_T_ACCESS, "g_disk_access(%s, %d, %d, %d)",
120	    pp->name, r, w, e);
121	g_topology_assert();
122	sc = pp->private;
123	if (sc == NULL || (dp = sc->dp) == NULL || dp->d_destroyed) {
124		/*
125		 * Allow decreasing access count even if disk is not
126		 * avaliable anymore.
127		 */
128		if (r <= 0 && w <= 0 && e <= 0)
129			return (0);
130		return (ENXIO);
131	}
132	r += pp->acr;
133	w += pp->acw;
134	e += pp->ace;
135	error = 0;
136	if ((pp->acr + pp->acw + pp->ace) == 0 && (r + w + e) > 0) {
137		if (dp->d_open != NULL) {
138			g_disk_lock_giant(dp);
139			error = dp->d_open(dp);
140			if (bootverbose && error != 0)
141				printf("Opened disk %s -> %d\n",
142				    pp->name, error);
143			g_disk_unlock_giant(dp);
144			if (error != 0)
145				return (error);
146		}
147		pp->mediasize = dp->d_mediasize;
148		pp->sectorsize = dp->d_sectorsize;
149		if (dp->d_maxsize == 0) {
150			printf("WARNING: Disk drive %s%d has no d_maxsize\n",
151			    dp->d_name, dp->d_unit);
152			dp->d_maxsize = DFLTPHYS;
153		}
154		if (dp->d_delmaxsize == 0) {
155			if (bootverbose && dp->d_flags & DISKFLAG_CANDELETE) {
156				printf("WARNING: Disk drive %s%d has no "
157				    "d_delmaxsize\n", dp->d_name, dp->d_unit);
158			}
159			dp->d_delmaxsize = dp->d_maxsize;
160		}
161		pp->stripeoffset = dp->d_stripeoffset;
162		pp->stripesize = dp->d_stripesize;
163		dp->d_flags |= DISKFLAG_OPEN;
164	} else if ((pp->acr + pp->acw + pp->ace) > 0 && (r + w + e) == 0) {
165		if (dp->d_close != NULL) {
166			g_disk_lock_giant(dp);
167			error = dp->d_close(dp);
168			if (error != 0)
169				printf("Closed disk %s -> %d\n",
170				    pp->name, error);
171			g_disk_unlock_giant(dp);
172		}
173		sc->state = G_STATE_ACTIVE;
174		if (sc->led[0] != 0)
175			led_set(sc->led, "0");
176		dp->d_flags &= ~DISKFLAG_OPEN;
177	}
178	return (error);
179}
180
181static void
182g_disk_kerneldump(struct bio *bp, struct disk *dp)
183{
184	struct g_kerneldump *gkd;
185	struct g_geom *gp;
186
187	gkd = (struct g_kerneldump*)bp->bio_data;
188	gp = bp->bio_to->geom;
189	g_trace(G_T_TOPOLOGY, "g_disk_kerneldump(%s, %jd, %jd)",
190		gp->name, (intmax_t)gkd->offset, (intmax_t)gkd->length);
191	if (dp->d_dump == NULL) {
192		g_io_deliver(bp, ENODEV);
193		return;
194	}
195	gkd->di.dumper = dp->d_dump;
196	gkd->di.priv = dp;
197	gkd->di.blocksize = dp->d_sectorsize;
198	gkd->di.maxiosize = dp->d_maxsize;
199	gkd->di.mediaoffset = gkd->offset;
200	if ((gkd->offset + gkd->length) > dp->d_mediasize)
201		gkd->length = dp->d_mediasize - gkd->offset;
202	gkd->di.mediasize = gkd->length;
203	g_io_deliver(bp, 0);
204}
205
206static void
207g_disk_setstate(struct bio *bp, struct g_disk_softc *sc)
208{
209	const char *cmd;
210
211	memcpy(&sc->state, bp->bio_data, sizeof(sc->state));
212	if (sc->led[0] != 0) {
213		switch (sc->state) {
214		case G_STATE_FAILED:
215			cmd = "1";
216			break;
217		case G_STATE_REBUILD:
218			cmd = "f5";
219			break;
220		case G_STATE_RESYNC:
221			cmd = "f1";
222			break;
223		default:
224			cmd = "0";
225			break;
226		}
227		led_set(sc->led, cmd);
228	}
229	g_io_deliver(bp, 0);
230}
231
232static void
233g_disk_done(struct bio *bp)
234{
235	struct bintime now;
236	struct bio *bp2;
237	struct g_disk_softc *sc;
238
239	/* See "notes" for why we need a mutex here */
240	/* XXX: will witness accept a mix of Giant/unGiant drivers here ? */
241	bp2 = bp->bio_parent;
242	sc = bp2->bio_to->private;
243	bp->bio_completed = bp->bio_length - bp->bio_resid;
244	binuptime(&now);
245	mtx_lock(&sc->done_mtx);
246	if (bp2->bio_error == 0)
247		bp2->bio_error = bp->bio_error;
248	bp2->bio_completed += bp->bio_completed;
249	if ((bp->bio_cmd & (BIO_READ|BIO_WRITE|BIO_DELETE|BIO_FLUSH)) != 0)
250		devstat_end_transaction_bio_bt(sc->dp->d_devstat, bp, &now);
251	bp2->bio_inbed++;
252	if (bp2->bio_children == bp2->bio_inbed) {
253		mtx_unlock(&sc->done_mtx);
254		bp2->bio_resid = bp2->bio_bcount - bp2->bio_completed;
255		g_io_deliver(bp2, bp2->bio_error);
256	} else
257		mtx_unlock(&sc->done_mtx);
258	g_destroy_bio(bp);
259}
260
261static int
262g_disk_ioctl(struct g_provider *pp, u_long cmd, void * data, int fflag, struct thread *td)
263{
264	struct disk *dp;
265	struct g_disk_softc *sc;
266	int error;
267
268	sc = pp->private;
269	dp = sc->dp;
270
271	if (dp->d_ioctl == NULL)
272		return (ENOIOCTL);
273	g_disk_lock_giant(dp);
274	error = dp->d_ioctl(dp, cmd, data, fflag, td);
275	g_disk_unlock_giant(dp);
276	return (error);
277}
278
279static off_t
280g_disk_maxsize(struct disk *dp, struct bio *bp)
281{
282	if (bp->bio_cmd == BIO_DELETE)
283		return (dp->d_delmaxsize);
284	return (dp->d_maxsize);
285}
286
287static int
288g_disk_maxsegs(struct disk *dp, struct bio *bp)
289{
290	return ((g_disk_maxsize(dp, bp) / PAGE_SIZE) + 1);
291}
292
293static void
294g_disk_advance(struct disk *dp, struct bio *bp, off_t off)
295{
296
297	bp->bio_offset += off;
298	bp->bio_length -= off;
299
300	if ((bp->bio_flags & BIO_VLIST) != 0) {
301		bus_dma_segment_t *seg, *end;
302
303		seg = (bus_dma_segment_t *)bp->bio_data;
304		end = (bus_dma_segment_t *)bp->bio_data + bp->bio_ma_n;
305		off += bp->bio_ma_offset;
306		while (off >= seg->ds_len) {
307			KASSERT((seg != end),
308			    ("vlist request runs off the end"));
309			off -= seg->ds_len;
310			seg++;
311		}
312		bp->bio_ma_offset = off;
313		bp->bio_ma_n = end - seg;
314		bp->bio_data = (void *)seg;
315	} else if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
316		bp->bio_ma += off / PAGE_SIZE;
317		bp->bio_ma_offset += off;
318		bp->bio_ma_offset %= PAGE_SIZE;
319		bp->bio_ma_n -= off / PAGE_SIZE;
320	} else {
321		bp->bio_data += off;
322	}
323}
324
325static void
326g_disk_seg_limit(bus_dma_segment_t *seg, off_t *poffset,
327    off_t *plength, int *ppages)
328{
329	uintptr_t seg_page_base;
330	uintptr_t seg_page_end;
331	off_t offset;
332	off_t length;
333	int seg_pages;
334
335	offset = *poffset;
336	length = *plength;
337
338	if (length > seg->ds_len - offset)
339		length = seg->ds_len - offset;
340
341	seg_page_base = trunc_page(seg->ds_addr + offset);
342	seg_page_end  = round_page(seg->ds_addr + offset + length);
343	seg_pages = (seg_page_end - seg_page_base) >> PAGE_SHIFT;
344
345	if (seg_pages > *ppages) {
346		seg_pages = *ppages;
347		length = (seg_page_base + (seg_pages << PAGE_SHIFT)) -
348		    (seg->ds_addr + offset);
349	}
350
351	*poffset = 0;
352	*plength -= length;
353	*ppages -= seg_pages;
354}
355
356static off_t
357g_disk_vlist_limit(struct disk *dp, struct bio *bp, bus_dma_segment_t **pendseg)
358{
359	bus_dma_segment_t *seg, *end;
360	off_t residual;
361	off_t offset;
362	int pages;
363
364	seg = (bus_dma_segment_t *)bp->bio_data;
365	end = (bus_dma_segment_t *)bp->bio_data + bp->bio_ma_n;
366	residual = bp->bio_length;
367	offset = bp->bio_ma_offset;
368	pages = g_disk_maxsegs(dp, bp);
369	while (residual != 0 && pages != 0) {
370		KASSERT((seg != end),
371		    ("vlist limit runs off the end"));
372		g_disk_seg_limit(seg, &offset, &residual, &pages);
373		seg++;
374	}
375	if (pendseg != NULL)
376		*pendseg = seg;
377	return (residual);
378}
379
380static bool
381g_disk_limit(struct disk *dp, struct bio *bp)
382{
383	bool limited = false;
384	off_t maxsz;
385
386	maxsz = g_disk_maxsize(dp, bp);
387
388	/*
389	 * XXX: If we have a stripesize we should really use it here.
390	 *      Care should be taken in the delete case if this is done
391	 *      as deletes can be very sensitive to size given how they
392	 *      are processed.
393	 */
394	if (bp->bio_length > maxsz) {
395		bp->bio_length = maxsz;
396		limited = true;
397	}
398
399	if ((bp->bio_flags & BIO_VLIST) != 0) {
400		bus_dma_segment_t *firstseg, *endseg;
401		off_t residual;
402
403		firstseg = (bus_dma_segment_t*)bp->bio_data;
404		residual = g_disk_vlist_limit(dp, bp, &endseg);
405		if (residual != 0) {
406			bp->bio_ma_n = endseg - firstseg;
407			bp->bio_length -= residual;
408			limited = true;
409		}
410	} else if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
411		bp->bio_ma_n =
412		    howmany(bp->bio_ma_offset + bp->bio_length, PAGE_SIZE);
413	}
414
415	return (limited);
416}
417
418static void
419g_disk_start(struct bio *bp)
420{
421	struct bio *bp2, *bp3;
422	struct disk *dp;
423	struct g_disk_softc *sc;
424	int error;
425	off_t off;
426
427	sc = bp->bio_to->private;
428	if (sc == NULL || (dp = sc->dp) == NULL || dp->d_destroyed) {
429		g_io_deliver(bp, ENXIO);
430		return;
431	}
432	error = EJUSTRETURN;
433	switch(bp->bio_cmd) {
434	case BIO_DELETE:
435		if (!(dp->d_flags & DISKFLAG_CANDELETE)) {
436			error = EOPNOTSUPP;
437			break;
438		}
439		/* fall-through */
440	case BIO_READ:
441	case BIO_WRITE:
442		KASSERT((dp->d_flags & DISKFLAG_UNMAPPED_BIO) != 0 ||
443		    (bp->bio_flags & BIO_UNMAPPED) == 0,
444		    ("unmapped bio not supported by disk %s", dp->d_name));
445		off = 0;
446		bp3 = NULL;
447		bp2 = g_clone_bio(bp);
448		if (bp2 == NULL) {
449			error = ENOMEM;
450			break;
451		}
452		for (;;) {
453			if (g_disk_limit(dp, bp2)) {
454				off += bp2->bio_length;
455
456				/*
457				 * To avoid a race, we need to grab the next bio
458				 * before we schedule this one.  See "notes".
459				 */
460				bp3 = g_clone_bio(bp);
461				if (bp3 == NULL)
462					bp->bio_error = ENOMEM;
463			}
464			bp2->bio_done = g_disk_done;
465			bp2->bio_pblkno = bp2->bio_offset / dp->d_sectorsize;
466			bp2->bio_bcount = bp2->bio_length;
467			bp2->bio_disk = dp;
468			mtx_lock(&sc->start_mtx);
469			devstat_start_transaction_bio(dp->d_devstat, bp2);
470			mtx_unlock(&sc->start_mtx);
471			g_disk_lock_giant(dp);
472			dp->d_strategy(bp2);
473			g_disk_unlock_giant(dp);
474
475			if (bp3 == NULL)
476				break;
477
478			bp2 = bp3;
479			bp3 = NULL;
480			g_disk_advance(dp, bp2, off);
481		}
482		break;
483	case BIO_GETATTR:
484		/* Give the driver a chance to override */
485		if (dp->d_getattr != NULL) {
486			if (bp->bio_disk == NULL)
487				bp->bio_disk = dp;
488			error = dp->d_getattr(bp);
489			if (error != -1)
490				break;
491			error = EJUSTRETURN;
492		}
493		if (g_handleattr_int(bp, "GEOM::candelete",
494		    (dp->d_flags & DISKFLAG_CANDELETE) != 0))
495			break;
496		else if (g_handleattr_int(bp, "GEOM::fwsectors",
497		    dp->d_fwsectors))
498			break;
499		else if (g_handleattr_int(bp, "GEOM::fwheads", dp->d_fwheads))
500			break;
501		else if (g_handleattr_off_t(bp, "GEOM::frontstuff", 0))
502			break;
503		else if (g_handleattr_str(bp, "GEOM::ident", dp->d_ident))
504			break;
505		else if (g_handleattr_str(bp, "GEOM::descr", dp->d_descr))
506			break;
507		else if (g_handleattr_uint16_t(bp, "GEOM::hba_vendor",
508		    dp->d_hba_vendor))
509			break;
510		else if (g_handleattr_uint16_t(bp, "GEOM::hba_device",
511		    dp->d_hba_device))
512			break;
513		else if (g_handleattr_uint16_t(bp, "GEOM::hba_subvendor",
514		    dp->d_hba_subvendor))
515			break;
516		else if (g_handleattr_uint16_t(bp, "GEOM::hba_subdevice",
517		    dp->d_hba_subdevice))
518			break;
519		else if (!strcmp(bp->bio_attribute, "GEOM::kerneldump"))
520			g_disk_kerneldump(bp, dp);
521		else if (!strcmp(bp->bio_attribute, "GEOM::setstate"))
522			g_disk_setstate(bp, sc);
523		else if (!strcmp(bp->bio_attribute, "GEOM::rotation_rate")) {
524			uint64_t v;
525
526			if ((dp->d_flags & DISKFLAG_LACKS_ROTRATE) == 0)
527				v = dp->d_rotation_rate;
528			else
529				v = 0; /* rate unknown */
530			g_handleattr_uint16_t(bp, "GEOM::rotation_rate", v);
531			break;
532		} else
533			error = ENOIOCTL;
534		break;
535	case BIO_FLUSH:
536		g_trace(G_T_BIO, "g_disk_flushcache(%s)",
537		    bp->bio_to->name);
538		if (!(dp->d_flags & DISKFLAG_CANFLUSHCACHE)) {
539			error = EOPNOTSUPP;
540			break;
541		}
542		bp2 = g_clone_bio(bp);
543		if (bp2 == NULL) {
544			g_io_deliver(bp, ENOMEM);
545			return;
546		}
547		bp2->bio_done = g_disk_done;
548		bp2->bio_disk = dp;
549		mtx_lock(&sc->start_mtx);
550		devstat_start_transaction_bio(dp->d_devstat, bp2);
551		mtx_unlock(&sc->start_mtx);
552		g_disk_lock_giant(dp);
553		dp->d_strategy(bp2);
554		g_disk_unlock_giant(dp);
555		break;
556	default:
557		error = EOPNOTSUPP;
558		break;
559	}
560	if (error != EJUSTRETURN)
561		g_io_deliver(bp, error);
562	return;
563}
564
565static void
566g_disk_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp)
567{
568	struct bio *bp;
569	struct disk *dp;
570	struct g_disk_softc *sc;
571	char *buf;
572	int res = 0;
573
574	sc = gp->softc;
575	if (sc == NULL || (dp = sc->dp) == NULL)
576		return;
577	if (indent == NULL) {
578		sbuf_printf(sb, " hd %u", dp->d_fwheads);
579		sbuf_printf(sb, " sc %u", dp->d_fwsectors);
580		return;
581	}
582	if (pp != NULL) {
583		sbuf_printf(sb, "%s<fwheads>%u</fwheads>\n",
584		    indent, dp->d_fwheads);
585		sbuf_printf(sb, "%s<fwsectors>%u</fwsectors>\n",
586		    indent, dp->d_fwsectors);
587
588		/*
589		 * "rotationrate" is a little complicated, because the value
590		 * returned by the drive might not be the RPM; 0 and 1 are
591		 * special cases, and there's also a valid range.
592		 */
593		sbuf_printf(sb, "%s<rotationrate>", indent);
594		if (dp->d_rotation_rate == DISK_RR_UNKNOWN) /* Old drives */
595			sbuf_printf(sb, "unknown");	/* don't report RPM. */
596		else if (dp->d_rotation_rate == DISK_RR_NON_ROTATING)
597			sbuf_printf(sb, "0");
598		else if ((dp->d_rotation_rate >= DISK_RR_MIN) &&
599		    (dp->d_rotation_rate <= DISK_RR_MAX))
600			sbuf_printf(sb, "%u", dp->d_rotation_rate);
601		else
602			sbuf_printf(sb, "invalid");
603		sbuf_printf(sb, "</rotationrate>\n");
604		if (dp->d_getattr != NULL) {
605			buf = g_malloc(DISK_IDENT_SIZE, M_WAITOK);
606			bp = g_alloc_bio();
607			bp->bio_disk = dp;
608			bp->bio_attribute = "GEOM::ident";
609			bp->bio_length = DISK_IDENT_SIZE;
610			bp->bio_data = buf;
611			res = dp->d_getattr(bp);
612			sbuf_printf(sb, "%s<ident>", indent);
613			g_conf_printf_escaped(sb, "%s",
614			    res == 0 ? buf: dp->d_ident);
615			sbuf_printf(sb, "</ident>\n");
616			bp->bio_attribute = "GEOM::lunid";
617			bp->bio_length = DISK_IDENT_SIZE;
618			bp->bio_data = buf;
619			if (dp->d_getattr(bp) == 0) {
620				sbuf_printf(sb, "%s<lunid>", indent);
621				g_conf_printf_escaped(sb, "%s", buf);
622				sbuf_printf(sb, "</lunid>\n");
623			}
624			bp->bio_attribute = "GEOM::lunname";
625			bp->bio_length = DISK_IDENT_SIZE;
626			bp->bio_data = buf;
627			if (dp->d_getattr(bp) == 0) {
628				sbuf_printf(sb, "%s<lunname>", indent);
629				g_conf_printf_escaped(sb, "%s", buf);
630				sbuf_printf(sb, "</lunname>\n");
631			}
632			g_destroy_bio(bp);
633			g_free(buf);
634		} else {
635			sbuf_printf(sb, "%s<ident>", indent);
636			g_conf_printf_escaped(sb, "%s", dp->d_ident);
637			sbuf_printf(sb, "</ident>\n");
638		}
639		sbuf_printf(sb, "%s<descr>", indent);
640		g_conf_printf_escaped(sb, "%s", dp->d_descr);
641		sbuf_printf(sb, "</descr>\n");
642	}
643}
644
645static void
646g_disk_resize(void *ptr, int flag)
647{
648	struct disk *dp;
649	struct g_geom *gp;
650	struct g_provider *pp;
651
652	if (flag == EV_CANCEL)
653		return;
654	g_topology_assert();
655
656	dp = ptr;
657	gp = dp->d_geom;
658
659	if (dp->d_destroyed || gp == NULL)
660		return;
661
662	LIST_FOREACH(pp, &gp->provider, provider) {
663		if (pp->sectorsize != 0 &&
664		    pp->sectorsize != dp->d_sectorsize)
665			g_wither_provider(pp, ENXIO);
666		else
667			g_resize_provider(pp, dp->d_mediasize);
668	}
669}
670
671static void
672g_disk_create(void *arg, int flag)
673{
674	struct g_geom *gp;
675	struct g_provider *pp;
676	struct disk *dp;
677	struct g_disk_softc *sc;
678	char tmpstr[80];
679
680	if (flag == EV_CANCEL)
681		return;
682	g_topology_assert();
683	dp = arg;
684	sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
685	mtx_init(&sc->start_mtx, "g_disk_start", NULL, MTX_DEF);
686	mtx_init(&sc->done_mtx, "g_disk_done", NULL, MTX_DEF);
687	sc->dp = dp;
688	gp = g_new_geomf(&g_disk_class, "%s%d", dp->d_name, dp->d_unit);
689	gp->softc = sc;
690	pp = g_new_providerf(gp, "%s", gp->name);
691	devstat_remove_entry(pp->stat);
692	pp->stat = NULL;
693	dp->d_devstat->id = pp;
694	pp->mediasize = dp->d_mediasize;
695	pp->sectorsize = dp->d_sectorsize;
696	pp->stripeoffset = dp->d_stripeoffset;
697	pp->stripesize = dp->d_stripesize;
698	if ((dp->d_flags & DISKFLAG_UNMAPPED_BIO) != 0)
699		pp->flags |= G_PF_ACCEPT_UNMAPPED;
700	if ((dp->d_flags & DISKFLAG_DIRECT_COMPLETION) != 0)
701		pp->flags |= G_PF_DIRECT_SEND;
702	pp->flags |= G_PF_DIRECT_RECEIVE;
703	if (bootverbose)
704		printf("GEOM: new disk %s\n", gp->name);
705	sysctl_ctx_init(&sc->sysctl_ctx);
706	snprintf(tmpstr, sizeof(tmpstr), "GEOM disk %s", gp->name);
707	sc->sysctl_tree = SYSCTL_ADD_NODE(&sc->sysctl_ctx,
708		SYSCTL_STATIC_CHILDREN(_kern_geom_disk), OID_AUTO, gp->name,
709		CTLFLAG_RD, 0, tmpstr);
710	if (sc->sysctl_tree != NULL) {
711		snprintf(tmpstr, sizeof(tmpstr),
712		    "kern.geom.disk.%s.led", gp->name);
713		TUNABLE_STR_FETCH(tmpstr, sc->led, sizeof(sc->led));
714		SYSCTL_ADD_STRING(&sc->sysctl_ctx,
715		    SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, "led",
716		    CTLFLAG_RW | CTLFLAG_TUN, sc->led, sizeof(sc->led),
717		    "LED name");
718	}
719	pp->private = sc;
720	dp->d_geom = gp;
721	g_error_provider(pp, 0);
722}
723
724/*
725 * We get this callback after all of the consumers have gone away, and just
726 * before the provider is freed.  If the disk driver provided a d_gone
727 * callback, let them know that it is okay to free resources -- they won't
728 * be getting any more accesses from GEOM.
729 */
730static void
731g_disk_providergone(struct g_provider *pp)
732{
733	struct disk *dp;
734	struct g_disk_softc *sc;
735
736	sc = (struct g_disk_softc *)pp->private;
737	dp = sc->dp;
738	if (dp != NULL && dp->d_gone != NULL)
739		dp->d_gone(dp);
740	if (sc->sysctl_tree != NULL) {
741		sysctl_ctx_free(&sc->sysctl_ctx);
742		sc->sysctl_tree = NULL;
743	}
744	if (sc->led[0] != 0) {
745		led_set(sc->led, "0");
746		sc->led[0] = 0;
747	}
748	pp->private = NULL;
749	pp->geom->softc = NULL;
750	mtx_destroy(&sc->done_mtx);
751	mtx_destroy(&sc->start_mtx);
752	g_free(sc);
753}
754
755static void
756g_disk_destroy(void *ptr, int flag)
757{
758	struct disk *dp;
759	struct g_geom *gp;
760	struct g_disk_softc *sc;
761
762	g_topology_assert();
763	dp = ptr;
764	gp = dp->d_geom;
765	if (gp != NULL) {
766		sc = gp->softc;
767		if (sc != NULL)
768			sc->dp = NULL;
769		dp->d_geom = NULL;
770		g_wither_geom(gp, ENXIO);
771	}
772	g_free(dp);
773}
774
775/*
776 * We only allow printable characters in disk ident,
777 * the rest is converted to 'x<HH>'.
778 */
779static void
780g_disk_ident_adjust(char *ident, size_t size)
781{
782	char *p, tmp[4], newid[DISK_IDENT_SIZE];
783
784	newid[0] = '\0';
785	for (p = ident; *p != '\0'; p++) {
786		if (isprint(*p)) {
787			tmp[0] = *p;
788			tmp[1] = '\0';
789		} else {
790			snprintf(tmp, sizeof(tmp), "x%02hhx",
791			    *(unsigned char *)p);
792		}
793		if (strlcat(newid, tmp, sizeof(newid)) >= sizeof(newid))
794			break;
795	}
796	bzero(ident, size);
797	strlcpy(ident, newid, size);
798}
799
800struct disk *
801disk_alloc(void)
802{
803
804	return (g_malloc(sizeof(struct disk), M_WAITOK | M_ZERO));
805}
806
807void
808disk_create(struct disk *dp, int version)
809{
810
811	if (version != DISK_VERSION) {
812		printf("WARNING: Attempt to add disk %s%d %s",
813		    dp->d_name, dp->d_unit,
814		    " using incompatible ABI version of disk(9)\n");
815		printf("WARNING: Ignoring disk %s%d\n",
816		    dp->d_name, dp->d_unit);
817		return;
818	}
819	if (version < DISK_VERSION_04)
820		dp->d_flags |= DISKFLAG_LACKS_ROTRATE;
821	KASSERT(dp->d_strategy != NULL, ("disk_create need d_strategy"));
822	KASSERT(dp->d_name != NULL, ("disk_create need d_name"));
823	KASSERT(*dp->d_name != 0, ("disk_create need d_name"));
824	KASSERT(strlen(dp->d_name) < SPECNAMELEN - 4, ("disk name too long"));
825	if (dp->d_devstat == NULL)
826		dp->d_devstat = devstat_new_entry(dp->d_name, dp->d_unit,
827		    dp->d_sectorsize, DEVSTAT_ALL_SUPPORTED,
828		    DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX);
829	dp->d_geom = NULL;
830	g_disk_ident_adjust(dp->d_ident, sizeof(dp->d_ident));
831	g_post_event(g_disk_create, dp, M_WAITOK, dp, NULL);
832}
833
834void
835disk_destroy(struct disk *dp)
836{
837
838	g_cancel_event(dp);
839	dp->d_destroyed = 1;
840	if (dp->d_devstat != NULL)
841		devstat_remove_entry(dp->d_devstat);
842	g_post_event(g_disk_destroy, dp, M_WAITOK, NULL);
843}
844
845void
846disk_gone(struct disk *dp)
847{
848	struct g_geom *gp;
849	struct g_provider *pp;
850
851	gp = dp->d_geom;
852	if (gp != NULL) {
853		pp = LIST_FIRST(&gp->provider);
854		if (pp != NULL) {
855			KASSERT(LIST_NEXT(pp, provider) == NULL,
856			    ("geom %p has more than one provider", gp));
857			g_wither_provider(pp, ENXIO);
858		}
859	}
860}
861
862void
863disk_attr_changed(struct disk *dp, const char *attr, int flag)
864{
865	struct g_geom *gp;
866	struct g_provider *pp;
867	char devnamebuf[128];
868
869	gp = dp->d_geom;
870	if (gp != NULL)
871		LIST_FOREACH(pp, &gp->provider, provider)
872			(void)g_attr_changed(pp, attr, flag);
873	snprintf(devnamebuf, sizeof(devnamebuf), "devname=%s%d", dp->d_name,
874	    dp->d_unit);
875	devctl_notify("GEOM", "disk", attr, devnamebuf);
876}
877
878void
879disk_media_changed(struct disk *dp, int flag)
880{
881	struct g_geom *gp;
882	struct g_provider *pp;
883
884	gp = dp->d_geom;
885	if (gp != NULL) {
886		pp = LIST_FIRST(&gp->provider);
887		if (pp != NULL) {
888			KASSERT(LIST_NEXT(pp, provider) == NULL,
889			    ("geom %p has more than one provider", gp));
890			g_media_changed(pp, flag);
891		}
892	}
893}
894
895void
896disk_media_gone(struct disk *dp, int flag)
897{
898	struct g_geom *gp;
899	struct g_provider *pp;
900
901	gp = dp->d_geom;
902	if (gp != NULL) {
903		pp = LIST_FIRST(&gp->provider);
904		if (pp != NULL) {
905			KASSERT(LIST_NEXT(pp, provider) == NULL,
906			    ("geom %p has more than one provider", gp));
907			g_media_gone(pp, flag);
908		}
909	}
910}
911
912int
913disk_resize(struct disk *dp, int flag)
914{
915
916	if (dp->d_destroyed || dp->d_geom == NULL)
917		return (0);
918
919	return (g_post_event(g_disk_resize, dp, flag, NULL));
920}
921
922static void
923g_kern_disks(void *p, int flag __unused)
924{
925	struct sbuf *sb;
926	struct g_geom *gp;
927	char *sp;
928
929	sb = p;
930	sp = "";
931	g_topology_assert();
932	LIST_FOREACH(gp, &g_disk_class.geom, geom) {
933		sbuf_printf(sb, "%s%s", sp, gp->name);
934		sp = " ";
935	}
936	sbuf_finish(sb);
937}
938
939static int
940sysctl_disks(SYSCTL_HANDLER_ARGS)
941{
942	int error;
943	struct sbuf *sb;
944
945	sb = sbuf_new_auto();
946	g_waitfor_event(g_kern_disks, sb, M_WAITOK, NULL);
947	error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
948	sbuf_delete(sb);
949	return error;
950}
951
952SYSCTL_PROC(_kern, OID_AUTO, disks,
953    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
954    sysctl_disks, "A", "names of available disks");
955