geom_disk.c revision 300214
1/*-
2 * Copyright (c) 2002 Poul-Henning Kamp
3 * Copyright (c) 2002 Networks Associates Technology, Inc.
4 * All rights reserved.
5 *
6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp
7 * and NAI Labs, the Security Research Division of Network Associates, Inc.
8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
9 * DARPA CHATS research program.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. The names of the authors may not be used to endorse or promote
20 *    products derived from this software without specific prior written
21 *    permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36#include <sys/cdefs.h>
37__FBSDID("$FreeBSD: stable/10/sys/geom/geom_disk.c 300214 2016-05-19 16:04:20Z asomers $");
38
39#include "opt_geom.h"
40
41#include <sys/param.h>
42#include <sys/systm.h>
43#include <sys/kernel.h>
44#include <sys/sysctl.h>
45#include <sys/bio.h>
46#include <sys/bus.h>
47#include <sys/ctype.h>
48#include <sys/fcntl.h>
49#include <sys/malloc.h>
50#include <sys/sbuf.h>
51#include <sys/devicestat.h>
52#include <machine/md_var.h>
53
54#include <sys/lock.h>
55#include <sys/mutex.h>
56#include <geom/geom.h>
57#include <geom/geom_disk.h>
58#include <geom/geom_int.h>
59
60#include <dev/led/led.h>
61
62#include <machine/bus.h>
63
64struct g_disk_softc {
65	struct mtx		 done_mtx;
66	struct disk		*dp;
67	struct sysctl_ctx_list	sysctl_ctx;
68	struct sysctl_oid	*sysctl_tree;
69	char			led[64];
70	uint32_t		state;
71	struct mtx		 start_mtx;
72};
73
74static g_access_t g_disk_access;
75static g_start_t g_disk_start;
76static g_ioctl_t g_disk_ioctl;
77static g_dumpconf_t g_disk_dumpconf;
78static g_provgone_t g_disk_providergone;
79
80static struct g_class g_disk_class = {
81	.name = G_DISK_CLASS_NAME,
82	.version = G_VERSION,
83	.start = g_disk_start,
84	.access = g_disk_access,
85	.ioctl = g_disk_ioctl,
86	.providergone = g_disk_providergone,
87	.dumpconf = g_disk_dumpconf,
88};
89
90SYSCTL_DECL(_kern_geom);
91static SYSCTL_NODE(_kern_geom, OID_AUTO, disk, CTLFLAG_RW, 0,
92    "GEOM_DISK stuff");
93
94DECLARE_GEOM_CLASS(g_disk_class, g_disk);
95
96static void __inline
97g_disk_lock_giant(struct disk *dp)
98{
99
100	if (dp->d_flags & DISKFLAG_NEEDSGIANT)
101		mtx_lock(&Giant);
102}
103
104static void __inline
105g_disk_unlock_giant(struct disk *dp)
106{
107
108	if (dp->d_flags & DISKFLAG_NEEDSGIANT)
109		mtx_unlock(&Giant);
110}
111
112static int
113g_disk_access(struct g_provider *pp, int r, int w, int e)
114{
115	struct disk *dp;
116	struct g_disk_softc *sc;
117	int error;
118
119	g_trace(G_T_ACCESS, "g_disk_access(%s, %d, %d, %d)",
120	    pp->name, r, w, e);
121	g_topology_assert();
122	sc = pp->private;
123	if (sc == NULL || (dp = sc->dp) == NULL || dp->d_destroyed) {
124		/*
125		 * Allow decreasing access count even if disk is not
126		 * avaliable anymore.
127		 */
128		if (r <= 0 && w <= 0 && e <= 0)
129			return (0);
130		return (ENXIO);
131	}
132	r += pp->acr;
133	w += pp->acw;
134	e += pp->ace;
135	error = 0;
136	if ((pp->acr + pp->acw + pp->ace) == 0 && (r + w + e) > 0) {
137		if (dp->d_open != NULL) {
138			g_disk_lock_giant(dp);
139			error = dp->d_open(dp);
140			if (bootverbose && error != 0)
141				printf("Opened disk %s -> %d\n",
142				    pp->name, error);
143			g_disk_unlock_giant(dp);
144			if (error != 0)
145				return (error);
146		}
147		pp->mediasize = dp->d_mediasize;
148		pp->sectorsize = dp->d_sectorsize;
149		if (dp->d_maxsize == 0) {
150			printf("WARNING: Disk drive %s%d has no d_maxsize\n",
151			    dp->d_name, dp->d_unit);
152			dp->d_maxsize = DFLTPHYS;
153		}
154		if (dp->d_delmaxsize == 0) {
155			if (bootverbose && dp->d_flags & DISKFLAG_CANDELETE) {
156				printf("WARNING: Disk drive %s%d has no "
157				    "d_delmaxsize\n", dp->d_name, dp->d_unit);
158			}
159			dp->d_delmaxsize = dp->d_maxsize;
160		}
161		pp->stripeoffset = dp->d_stripeoffset;
162		pp->stripesize = dp->d_stripesize;
163		dp->d_flags |= DISKFLAG_OPEN;
164	} else if ((pp->acr + pp->acw + pp->ace) > 0 && (r + w + e) == 0) {
165		if (dp->d_close != NULL) {
166			g_disk_lock_giant(dp);
167			error = dp->d_close(dp);
168			if (error != 0)
169				printf("Closed disk %s -> %d\n",
170				    pp->name, error);
171			g_disk_unlock_giant(dp);
172		}
173		sc->state = G_STATE_ACTIVE;
174		if (sc->led[0] != 0)
175			led_set(sc->led, "0");
176		dp->d_flags &= ~DISKFLAG_OPEN;
177	}
178	return (error);
179}
180
181static void
182g_disk_kerneldump(struct bio *bp, struct disk *dp)
183{
184	struct g_kerneldump *gkd;
185	struct g_geom *gp;
186
187	gkd = (struct g_kerneldump*)bp->bio_data;
188	gp = bp->bio_to->geom;
189	g_trace(G_T_TOPOLOGY, "g_disk_kerneldump(%s, %jd, %jd)",
190		gp->name, (intmax_t)gkd->offset, (intmax_t)gkd->length);
191	if (dp->d_dump == NULL) {
192		g_io_deliver(bp, ENODEV);
193		return;
194	}
195	gkd->di.dumper = dp->d_dump;
196	gkd->di.priv = dp;
197	gkd->di.blocksize = dp->d_sectorsize;
198	gkd->di.maxiosize = dp->d_maxsize;
199	gkd->di.mediaoffset = gkd->offset;
200	if ((gkd->offset + gkd->length) > dp->d_mediasize)
201		gkd->length = dp->d_mediasize - gkd->offset;
202	gkd->di.mediasize = gkd->length;
203	g_io_deliver(bp, 0);
204}
205
206static void
207g_disk_setstate(struct bio *bp, struct g_disk_softc *sc)
208{
209	const char *cmd;
210
211	memcpy(&sc->state, bp->bio_data, sizeof(sc->state));
212	if (sc->led[0] != 0) {
213		switch (sc->state) {
214		case G_STATE_FAILED:
215			cmd = "1";
216			break;
217		case G_STATE_REBUILD:
218			cmd = "f5";
219			break;
220		case G_STATE_RESYNC:
221			cmd = "f1";
222			break;
223		default:
224			cmd = "0";
225			break;
226		}
227		led_set(sc->led, cmd);
228	}
229	g_io_deliver(bp, 0);
230}
231
232static void
233g_disk_done(struct bio *bp)
234{
235	struct bintime now;
236	struct bio *bp2;
237	struct g_disk_softc *sc;
238
239	/* See "notes" for why we need a mutex here */
240	/* XXX: will witness accept a mix of Giant/unGiant drivers here ? */
241	bp2 = bp->bio_parent;
242	sc = bp2->bio_to->private;
243	bp->bio_completed = bp->bio_length - bp->bio_resid;
244	binuptime(&now);
245	mtx_lock(&sc->done_mtx);
246	if (bp2->bio_error == 0)
247		bp2->bio_error = bp->bio_error;
248	bp2->bio_completed += bp->bio_completed;
249	if ((bp->bio_cmd & (BIO_READ|BIO_WRITE|BIO_DELETE|BIO_FLUSH)) != 0)
250		devstat_end_transaction_bio_bt(sc->dp->d_devstat, bp, &now);
251	bp2->bio_inbed++;
252	if (bp2->bio_children == bp2->bio_inbed) {
253		mtx_unlock(&sc->done_mtx);
254		bp2->bio_resid = bp2->bio_bcount - bp2->bio_completed;
255		g_io_deliver(bp2, bp2->bio_error);
256	} else
257		mtx_unlock(&sc->done_mtx);
258	g_destroy_bio(bp);
259}
260
261static int
262g_disk_ioctl(struct g_provider *pp, u_long cmd, void * data, int fflag, struct thread *td)
263{
264	struct disk *dp;
265	struct g_disk_softc *sc;
266	int error;
267
268	sc = pp->private;
269	dp = sc->dp;
270
271	if (dp->d_ioctl == NULL)
272		return (ENOIOCTL);
273	g_disk_lock_giant(dp);
274	error = dp->d_ioctl(dp, cmd, data, fflag, td);
275	g_disk_unlock_giant(dp);
276	return (error);
277}
278
279static off_t
280g_disk_maxsize(struct disk *dp, struct bio *bp)
281{
282	if (bp->bio_cmd == BIO_DELETE)
283		return (dp->d_delmaxsize);
284	return (dp->d_maxsize);
285}
286
287static int
288g_disk_maxsegs(struct disk *dp, struct bio *bp)
289{
290	return ((g_disk_maxsize(dp, bp) / PAGE_SIZE) + 1);
291}
292
293static void
294g_disk_advance(struct disk *dp, struct bio *bp, off_t off)
295{
296
297	bp->bio_offset += off;
298	bp->bio_length -= off;
299
300	if ((bp->bio_flags & BIO_VLIST) != 0) {
301		bus_dma_segment_t *seg, *end;
302
303		seg = (bus_dma_segment_t *)bp->bio_data;
304		end = (bus_dma_segment_t *)bp->bio_data + bp->bio_ma_n;
305		off += bp->bio_ma_offset;
306		while (off >= seg->ds_len) {
307			KASSERT((seg != end),
308			    ("vlist request runs off the end"));
309			off -= seg->ds_len;
310			seg++;
311		}
312		bp->bio_ma_offset = off;
313		bp->bio_ma_n = end - seg;
314		bp->bio_data = (void *)seg;
315	} else if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
316		bp->bio_ma += off / PAGE_SIZE;
317		bp->bio_ma_offset += off;
318		bp->bio_ma_offset %= PAGE_SIZE;
319		bp->bio_ma_n -= off / PAGE_SIZE;
320	} else {
321		bp->bio_data += off;
322	}
323}
324
325static void
326g_disk_seg_limit(bus_dma_segment_t *seg, off_t *poffset,
327    off_t *plength, int *ppages)
328{
329	uintptr_t seg_page_base;
330	uintptr_t seg_page_end;
331	off_t offset;
332	off_t length;
333	int seg_pages;
334
335	offset = *poffset;
336	length = *plength;
337
338	if (length > seg->ds_len - offset)
339		length = seg->ds_len - offset;
340
341	seg_page_base = trunc_page(seg->ds_addr + offset);
342	seg_page_end  = round_page(seg->ds_addr + offset + length);
343	seg_pages = (seg_page_end - seg_page_base) >> PAGE_SHIFT;
344
345	if (seg_pages > *ppages) {
346		seg_pages = *ppages;
347		length = (seg_page_base + (seg_pages << PAGE_SHIFT)) -
348		    (seg->ds_addr + offset);
349	}
350
351	*poffset = 0;
352	*plength -= length;
353	*ppages -= seg_pages;
354}
355
356static off_t
357g_disk_vlist_limit(struct disk *dp, struct bio *bp, bus_dma_segment_t **pendseg)
358{
359	bus_dma_segment_t *seg, *end;
360	off_t residual;
361	off_t offset;
362	int pages;
363
364	seg = (bus_dma_segment_t *)bp->bio_data;
365	end = (bus_dma_segment_t *)bp->bio_data + bp->bio_ma_n;
366	residual = bp->bio_length;
367	offset = bp->bio_ma_offset;
368	pages = g_disk_maxsegs(dp, bp);
369	while (residual != 0 && pages != 0) {
370		KASSERT((seg != end),
371		    ("vlist limit runs off the end"));
372		g_disk_seg_limit(seg, &offset, &residual, &pages);
373		seg++;
374	}
375	if (pendseg != NULL)
376		*pendseg = seg;
377	return (residual);
378}
379
380static bool
381g_disk_limit(struct disk *dp, struct bio *bp)
382{
383	bool limited = false;
384	off_t maxsz;
385
386	maxsz = g_disk_maxsize(dp, bp);
387
388	/*
389	 * XXX: If we have a stripesize we should really use it here.
390	 *      Care should be taken in the delete case if this is done
391	 *      as deletes can be very sensitive to size given how they
392	 *      are processed.
393	 */
394	if (bp->bio_length > maxsz) {
395		bp->bio_length = maxsz;
396		limited = true;
397	}
398
399	if ((bp->bio_flags & BIO_VLIST) != 0) {
400		bus_dma_segment_t *firstseg, *endseg;
401		off_t residual;
402
403		firstseg = (bus_dma_segment_t*)bp->bio_data;
404		residual = g_disk_vlist_limit(dp, bp, &endseg);
405		if (residual != 0) {
406			bp->bio_ma_n = endseg - firstseg;
407			bp->bio_length -= residual;
408			limited = true;
409		}
410	} else if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
411		bp->bio_ma_n =
412		    howmany(bp->bio_ma_offset + bp->bio_length, PAGE_SIZE);
413	}
414
415	return (limited);
416}
417
418static void
419g_disk_start(struct bio *bp)
420{
421	struct bio *bp2, *bp3;
422	struct disk *dp;
423	struct g_disk_softc *sc;
424	int error;
425	off_t off;
426
427	sc = bp->bio_to->private;
428	if (sc == NULL || (dp = sc->dp) == NULL || dp->d_destroyed) {
429		g_io_deliver(bp, ENXIO);
430		return;
431	}
432	error = EJUSTRETURN;
433	switch(bp->bio_cmd) {
434	case BIO_DELETE:
435		if (!(dp->d_flags & DISKFLAG_CANDELETE)) {
436			error = EOPNOTSUPP;
437			break;
438		}
439		/* fall-through */
440	case BIO_READ:
441	case BIO_WRITE:
442		KASSERT((dp->d_flags & DISKFLAG_UNMAPPED_BIO) != 0 ||
443		    (bp->bio_flags & BIO_UNMAPPED) == 0,
444		    ("unmapped bio not supported by disk %s", dp->d_name));
445		off = 0;
446		bp3 = NULL;
447		bp2 = g_clone_bio(bp);
448		if (bp2 == NULL) {
449			error = ENOMEM;
450			break;
451		}
452		for (;;) {
453			if (g_disk_limit(dp, bp2)) {
454				off += bp2->bio_length;
455
456				/*
457				 * To avoid a race, we need to grab the next bio
458				 * before we schedule this one.  See "notes".
459				 */
460				bp3 = g_clone_bio(bp);
461				if (bp3 == NULL)
462					bp->bio_error = ENOMEM;
463			}
464			bp2->bio_done = g_disk_done;
465			bp2->bio_pblkno = bp2->bio_offset / dp->d_sectorsize;
466			bp2->bio_bcount = bp2->bio_length;
467			bp2->bio_disk = dp;
468			mtx_lock(&sc->start_mtx);
469			devstat_start_transaction_bio(dp->d_devstat, bp2);
470			mtx_unlock(&sc->start_mtx);
471			g_disk_lock_giant(dp);
472			dp->d_strategy(bp2);
473			g_disk_unlock_giant(dp);
474
475			if (bp3 == NULL)
476				break;
477
478			bp2 = bp3;
479			bp3 = NULL;
480			g_disk_advance(dp, bp2, off);
481		}
482		break;
483	case BIO_GETATTR:
484		/* Give the driver a chance to override */
485		if (dp->d_getattr != NULL) {
486			if (bp->bio_disk == NULL)
487				bp->bio_disk = dp;
488			error = dp->d_getattr(bp);
489			if (error != -1)
490				break;
491			error = EJUSTRETURN;
492		}
493		if (g_handleattr_int(bp, "GEOM::candelete",
494		    (dp->d_flags & DISKFLAG_CANDELETE) != 0))
495			break;
496		else if (g_handleattr_int(bp, "GEOM::fwsectors",
497		    dp->d_fwsectors))
498			break;
499		else if (g_handleattr_int(bp, "GEOM::fwheads", dp->d_fwheads))
500			break;
501		else if (g_handleattr_off_t(bp, "GEOM::frontstuff", 0))
502			break;
503		else if (g_handleattr_str(bp, "GEOM::ident", dp->d_ident))
504			break;
505		else if (g_handleattr_uint16_t(bp, "GEOM::hba_vendor",
506		    dp->d_hba_vendor))
507			break;
508		else if (g_handleattr_uint16_t(bp, "GEOM::hba_device",
509		    dp->d_hba_device))
510			break;
511		else if (g_handleattr_uint16_t(bp, "GEOM::hba_subvendor",
512		    dp->d_hba_subvendor))
513			break;
514		else if (g_handleattr_uint16_t(bp, "GEOM::hba_subdevice",
515		    dp->d_hba_subdevice))
516			break;
517		else if (!strcmp(bp->bio_attribute, "GEOM::kerneldump"))
518			g_disk_kerneldump(bp, dp);
519		else if (!strcmp(bp->bio_attribute, "GEOM::setstate"))
520			g_disk_setstate(bp, sc);
521		else if (!strcmp(bp->bio_attribute, "GEOM::rotation_rate")) {
522			uint64_t v;
523
524			if ((dp->d_flags & DISKFLAG_LACKS_ROTRATE) == 0)
525				v = dp->d_rotation_rate;
526			else
527				v = 0; /* rate unknown */
528			g_handleattr_uint16_t(bp, "GEOM::rotation_rate", v);
529			break;
530		} else
531			error = ENOIOCTL;
532		break;
533	case BIO_FLUSH:
534		g_trace(G_T_BIO, "g_disk_flushcache(%s)",
535		    bp->bio_to->name);
536		if (!(dp->d_flags & DISKFLAG_CANFLUSHCACHE)) {
537			error = EOPNOTSUPP;
538			break;
539		}
540		bp2 = g_clone_bio(bp);
541		if (bp2 == NULL) {
542			g_io_deliver(bp, ENOMEM);
543			return;
544		}
545		bp2->bio_done = g_disk_done;
546		bp2->bio_disk = dp;
547		mtx_lock(&sc->start_mtx);
548		devstat_start_transaction_bio(dp->d_devstat, bp2);
549		mtx_unlock(&sc->start_mtx);
550		g_disk_lock_giant(dp);
551		dp->d_strategy(bp2);
552		g_disk_unlock_giant(dp);
553		break;
554	default:
555		error = EOPNOTSUPP;
556		break;
557	}
558	if (error != EJUSTRETURN)
559		g_io_deliver(bp, error);
560	return;
561}
562
563static void
564g_disk_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp)
565{
566	struct bio *bp;
567	struct disk *dp;
568	struct g_disk_softc *sc;
569	char *buf;
570	int res = 0;
571
572	sc = gp->softc;
573	if (sc == NULL || (dp = sc->dp) == NULL)
574		return;
575	if (indent == NULL) {
576		sbuf_printf(sb, " hd %u", dp->d_fwheads);
577		sbuf_printf(sb, " sc %u", dp->d_fwsectors);
578		return;
579	}
580	if (pp != NULL) {
581		sbuf_printf(sb, "%s<fwheads>%u</fwheads>\n",
582		    indent, dp->d_fwheads);
583		sbuf_printf(sb, "%s<fwsectors>%u</fwsectors>\n",
584		    indent, dp->d_fwsectors);
585
586		/*
587		 * "rotationrate" is a little complicated, because the value
588		 * returned by the drive might not be the RPM; 0 and 1 are
589		 * special cases, and there's also a valid range.
590		 */
591		sbuf_printf(sb, "%s<rotationrate>", indent);
592		if (dp->d_rotation_rate == 0)		/* Old drives don't */
593			sbuf_printf(sb, "unknown");	/* report RPM. */
594		else if (dp->d_rotation_rate == 1)	/* Since 0 is used */
595			sbuf_printf(sb, "0");		/* above, SSDs use 1. */
596		else if ((dp->d_rotation_rate >= 0x041) &&
597		    (dp->d_rotation_rate <= 0xfffe))
598			sbuf_printf(sb, "%u", dp->d_rotation_rate);
599		else
600			sbuf_printf(sb, "invalid");
601		sbuf_printf(sb, "</rotationrate>\n");
602		if (dp->d_getattr != NULL) {
603			buf = g_malloc(DISK_IDENT_SIZE, M_WAITOK);
604			bp = g_alloc_bio();
605			bp->bio_disk = dp;
606			bp->bio_attribute = "GEOM::ident";
607			bp->bio_length = DISK_IDENT_SIZE;
608			bp->bio_data = buf;
609			res = dp->d_getattr(bp);
610			sbuf_printf(sb, "%s<ident>", indent);
611			g_conf_printf_escaped(sb, "%s",
612			    res == 0 ? buf: dp->d_ident);
613			sbuf_printf(sb, "</ident>\n");
614			bp->bio_attribute = "GEOM::lunid";
615			bp->bio_length = DISK_IDENT_SIZE;
616			bp->bio_data = buf;
617			if (dp->d_getattr(bp) == 0) {
618				sbuf_printf(sb, "%s<lunid>", indent);
619				g_conf_printf_escaped(sb, "%s", buf);
620				sbuf_printf(sb, "</lunid>\n");
621			}
622			bp->bio_attribute = "GEOM::lunname";
623			bp->bio_length = DISK_IDENT_SIZE;
624			bp->bio_data = buf;
625			if (dp->d_getattr(bp) == 0) {
626				sbuf_printf(sb, "%s<lunname>", indent);
627				g_conf_printf_escaped(sb, "%s", buf);
628				sbuf_printf(sb, "</lunname>\n");
629			}
630			g_destroy_bio(bp);
631			g_free(buf);
632		} else {
633			sbuf_printf(sb, "%s<ident>", indent);
634			g_conf_printf_escaped(sb, "%s", dp->d_ident);
635			sbuf_printf(sb, "</ident>\n");
636		}
637		sbuf_printf(sb, "%s<descr>", indent);
638		g_conf_printf_escaped(sb, "%s", dp->d_descr);
639		sbuf_printf(sb, "</descr>\n");
640	}
641}
642
643static void
644g_disk_resize(void *ptr, int flag)
645{
646	struct disk *dp;
647	struct g_geom *gp;
648	struct g_provider *pp;
649
650	if (flag == EV_CANCEL)
651		return;
652	g_topology_assert();
653
654	dp = ptr;
655	gp = dp->d_geom;
656
657	if (dp->d_destroyed || gp == NULL)
658		return;
659
660	LIST_FOREACH(pp, &gp->provider, provider) {
661		if (pp->sectorsize != 0 &&
662		    pp->sectorsize != dp->d_sectorsize)
663			g_wither_provider(pp, ENXIO);
664		else
665			g_resize_provider(pp, dp->d_mediasize);
666	}
667}
668
669static void
670g_disk_create(void *arg, int flag)
671{
672	struct g_geom *gp;
673	struct g_provider *pp;
674	struct disk *dp;
675	struct g_disk_softc *sc;
676	char tmpstr[80];
677
678	if (flag == EV_CANCEL)
679		return;
680	g_topology_assert();
681	dp = arg;
682	sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
683	mtx_init(&sc->start_mtx, "g_disk_start", NULL, MTX_DEF);
684	mtx_init(&sc->done_mtx, "g_disk_done", NULL, MTX_DEF);
685	sc->dp = dp;
686	gp = g_new_geomf(&g_disk_class, "%s%d", dp->d_name, dp->d_unit);
687	gp->softc = sc;
688	pp = g_new_providerf(gp, "%s", gp->name);
689	devstat_remove_entry(pp->stat);
690	pp->stat = NULL;
691	dp->d_devstat->id = pp;
692	pp->mediasize = dp->d_mediasize;
693	pp->sectorsize = dp->d_sectorsize;
694	pp->stripeoffset = dp->d_stripeoffset;
695	pp->stripesize = dp->d_stripesize;
696	if ((dp->d_flags & DISKFLAG_UNMAPPED_BIO) != 0)
697		pp->flags |= G_PF_ACCEPT_UNMAPPED;
698	if ((dp->d_flags & DISKFLAG_DIRECT_COMPLETION) != 0)
699		pp->flags |= G_PF_DIRECT_SEND;
700	pp->flags |= G_PF_DIRECT_RECEIVE;
701	if (bootverbose)
702		printf("GEOM: new disk %s\n", gp->name);
703	sysctl_ctx_init(&sc->sysctl_ctx);
704	snprintf(tmpstr, sizeof(tmpstr), "GEOM disk %s", gp->name);
705	sc->sysctl_tree = SYSCTL_ADD_NODE(&sc->sysctl_ctx,
706		SYSCTL_STATIC_CHILDREN(_kern_geom_disk), OID_AUTO, gp->name,
707		CTLFLAG_RD, 0, tmpstr);
708	if (sc->sysctl_tree != NULL) {
709		snprintf(tmpstr, sizeof(tmpstr),
710		    "kern.geom.disk.%s.led", gp->name);
711		TUNABLE_STR_FETCH(tmpstr, sc->led, sizeof(sc->led));
712		SYSCTL_ADD_STRING(&sc->sysctl_ctx,
713		    SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, "led",
714		    CTLFLAG_RW | CTLFLAG_TUN, sc->led, sizeof(sc->led),
715		    "LED name");
716	}
717	pp->private = sc;
718	dp->d_geom = gp;
719	g_error_provider(pp, 0);
720}
721
722/*
723 * We get this callback after all of the consumers have gone away, and just
724 * before the provider is freed.  If the disk driver provided a d_gone
725 * callback, let them know that it is okay to free resources -- they won't
726 * be getting any more accesses from GEOM.
727 */
728static void
729g_disk_providergone(struct g_provider *pp)
730{
731	struct disk *dp;
732	struct g_disk_softc *sc;
733
734	sc = (struct g_disk_softc *)pp->private;
735	dp = sc->dp;
736	if (dp != NULL && dp->d_gone != NULL)
737		dp->d_gone(dp);
738	if (sc->sysctl_tree != NULL) {
739		sysctl_ctx_free(&sc->sysctl_ctx);
740		sc->sysctl_tree = NULL;
741	}
742	if (sc->led[0] != 0) {
743		led_set(sc->led, "0");
744		sc->led[0] = 0;
745	}
746	pp->private = NULL;
747	pp->geom->softc = NULL;
748	mtx_destroy(&sc->done_mtx);
749	mtx_destroy(&sc->start_mtx);
750	g_free(sc);
751}
752
753static void
754g_disk_destroy(void *ptr, int flag)
755{
756	struct disk *dp;
757	struct g_geom *gp;
758	struct g_disk_softc *sc;
759
760	g_topology_assert();
761	dp = ptr;
762	gp = dp->d_geom;
763	if (gp != NULL) {
764		sc = gp->softc;
765		if (sc != NULL)
766			sc->dp = NULL;
767		dp->d_geom = NULL;
768		g_wither_geom(gp, ENXIO);
769	}
770	g_free(dp);
771}
772
773/*
774 * We only allow printable characters in disk ident,
775 * the rest is converted to 'x<HH>'.
776 */
777static void
778g_disk_ident_adjust(char *ident, size_t size)
779{
780	char *p, tmp[4], newid[DISK_IDENT_SIZE];
781
782	newid[0] = '\0';
783	for (p = ident; *p != '\0'; p++) {
784		if (isprint(*p)) {
785			tmp[0] = *p;
786			tmp[1] = '\0';
787		} else {
788			snprintf(tmp, sizeof(tmp), "x%02hhx",
789			    *(unsigned char *)p);
790		}
791		if (strlcat(newid, tmp, sizeof(newid)) >= sizeof(newid))
792			break;
793	}
794	bzero(ident, size);
795	strlcpy(ident, newid, size);
796}
797
798struct disk *
799disk_alloc(void)
800{
801
802	return (g_malloc(sizeof(struct disk), M_WAITOK | M_ZERO));
803}
804
805void
806disk_create(struct disk *dp, int version)
807{
808
809	if (version != DISK_VERSION) {
810		printf("WARNING: Attempt to add disk %s%d %s",
811		    dp->d_name, dp->d_unit,
812		    " using incompatible ABI version of disk(9)\n");
813		printf("WARNING: Ignoring disk %s%d\n",
814		    dp->d_name, dp->d_unit);
815		return;
816	}
817	if (version < DISK_VERSION_04)
818		dp->d_flags |= DISKFLAG_LACKS_ROTRATE;
819	KASSERT(dp->d_strategy != NULL, ("disk_create need d_strategy"));
820	KASSERT(dp->d_name != NULL, ("disk_create need d_name"));
821	KASSERT(*dp->d_name != 0, ("disk_create need d_name"));
822	KASSERT(strlen(dp->d_name) < SPECNAMELEN - 4, ("disk name too long"));
823	if (dp->d_devstat == NULL)
824		dp->d_devstat = devstat_new_entry(dp->d_name, dp->d_unit,
825		    dp->d_sectorsize, DEVSTAT_ALL_SUPPORTED,
826		    DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX);
827	dp->d_geom = NULL;
828	g_disk_ident_adjust(dp->d_ident, sizeof(dp->d_ident));
829	g_post_event(g_disk_create, dp, M_WAITOK, dp, NULL);
830}
831
832void
833disk_destroy(struct disk *dp)
834{
835
836	g_cancel_event(dp);
837	dp->d_destroyed = 1;
838	if (dp->d_devstat != NULL)
839		devstat_remove_entry(dp->d_devstat);
840	g_post_event(g_disk_destroy, dp, M_WAITOK, NULL);
841}
842
843void
844disk_gone(struct disk *dp)
845{
846	struct g_geom *gp;
847	struct g_provider *pp;
848
849	gp = dp->d_geom;
850	if (gp != NULL) {
851		pp = LIST_FIRST(&gp->provider);
852		if (pp != NULL) {
853			KASSERT(LIST_NEXT(pp, provider) == NULL,
854			    ("geom %p has more than one provider", gp));
855			g_wither_provider(pp, ENXIO);
856		}
857	}
858}
859
860void
861disk_attr_changed(struct disk *dp, const char *attr, int flag)
862{
863	struct g_geom *gp;
864	struct g_provider *pp;
865	char devnamebuf[128];
866
867	gp = dp->d_geom;
868	if (gp != NULL)
869		LIST_FOREACH(pp, &gp->provider, provider)
870			(void)g_attr_changed(pp, attr, flag);
871	snprintf(devnamebuf, sizeof(devnamebuf), "devname=%s%d", dp->d_name,
872	    dp->d_unit);
873	devctl_notify("GEOM", "disk", attr, devnamebuf);
874}
875
876void
877disk_media_changed(struct disk *dp, int flag)
878{
879	struct g_geom *gp;
880	struct g_provider *pp;
881
882	gp = dp->d_geom;
883	if (gp != NULL) {
884		pp = LIST_FIRST(&gp->provider);
885		if (pp != NULL) {
886			KASSERT(LIST_NEXT(pp, provider) == NULL,
887			    ("geom %p has more than one provider", gp));
888			g_media_changed(pp, flag);
889		}
890	}
891}
892
893void
894disk_media_gone(struct disk *dp, int flag)
895{
896	struct g_geom *gp;
897	struct g_provider *pp;
898
899	gp = dp->d_geom;
900	if (gp != NULL) {
901		pp = LIST_FIRST(&gp->provider);
902		if (pp != NULL) {
903			KASSERT(LIST_NEXT(pp, provider) == NULL,
904			    ("geom %p has more than one provider", gp));
905			g_media_gone(pp, flag);
906		}
907	}
908}
909
910int
911disk_resize(struct disk *dp, int flag)
912{
913
914	if (dp->d_destroyed || dp->d_geom == NULL)
915		return (0);
916
917	return (g_post_event(g_disk_resize, dp, flag, NULL));
918}
919
920static void
921g_kern_disks(void *p, int flag __unused)
922{
923	struct sbuf *sb;
924	struct g_geom *gp;
925	char *sp;
926
927	sb = p;
928	sp = "";
929	g_topology_assert();
930	LIST_FOREACH(gp, &g_disk_class.geom, geom) {
931		sbuf_printf(sb, "%s%s", sp, gp->name);
932		sp = " ";
933	}
934	sbuf_finish(sb);
935}
936
937static int
938sysctl_disks(SYSCTL_HANDLER_ARGS)
939{
940	int error;
941	struct sbuf *sb;
942
943	sb = sbuf_new_auto();
944	g_waitfor_event(g_kern_disks, sb, M_WAITOK, NULL);
945	error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
946	sbuf_delete(sb);
947	return error;
948}
949
950SYSCTL_PROC(_kern, OID_AUTO, disks,
951    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
952    sysctl_disks, "A", "names of available disks");
953