1/*-
2 * Copyright (c) 2003 Poul-Henning Kamp.
3 * Copyright (c) 1995 Jason R. Thorpe.
4 * Copyright (c) 1990, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 * All rights reserved.
7 * Copyright (c) 1988 University of Utah.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * the Systems Programming Group of the University of Utah Computer
11 * Science Department.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 *    must display the following acknowledgement:
23 *	This product includes software developed for the NetBSD Project
24 *	by Jason R. Thorpe.
25 * 4. The names of the authors may not be used to endorse or promote products
26 *    derived from this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
29 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
30 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
31 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
35 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
36 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * Dynamic configuration and disklabel support by:
41 *	Jason R. Thorpe <thorpej@nas.nasa.gov>
42 *	Numerical Aerodynamic Simulation Facility
43 *	Mail Stop 258-6
44 *	NASA Ames Research Center
45 *	Moffett Field, CA 94035
46 *
47 * from: Utah $Hdr: cd.c 1.6 90/11/28$
48 *	@(#)cd.c	8.2 (Berkeley) 11/16/93
49 *	$NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $
50 */
51
52#include <sys/cdefs.h>
53__FBSDID("$FreeBSD$");
54
55#include <sys/param.h>
56#include <sys/systm.h>
57#include <sys/kernel.h>
58#include <sys/module.h>
59#include <sys/bio.h>
60#include <sys/malloc.h>
61#include <sys/sbuf.h>
62#include <geom/geom.h>
63
64/*
65 * Number of blocks to untouched in front of a component partition.
66 * This is to avoid violating its disklabel area when it starts at the
67 * beginning of the slice.
68 */
69#if !defined(CCD_OFFSET)
70#define CCD_OFFSET 16
71#endif
72
73/* sc_flags */
74#define CCDF_UNIFORM	0x02	/* use LCCD of sizes for uniform interleave */
75#define CCDF_MIRROR	0x04	/* use mirroring */
76#define CCDF_NO_OFFSET	0x08	/* do not leave space in front */
77#define CCDF_LINUX	0x10	/* use Linux compatibility mode */
78
79/* Mask of user-settable ccd flags. */
80#define CCDF_USERMASK	(CCDF_UNIFORM|CCDF_MIRROR)
81
82/*
83 * Interleave description table.
84 * Computed at boot time to speed irregular-interleave lookups.
85 * The idea is that we interleave in "groups".  First we interleave
86 * evenly over all component disks up to the size of the smallest
87 * component (the first group), then we interleave evenly over all
88 * remaining disks up to the size of the next-smallest (second group),
89 * and so on.
90 *
91 * Each table entry describes the interleave characteristics of one
92 * of these groups.  For example if a concatenated disk consisted of
93 * three components of 5, 3, and 7 DEV_BSIZE blocks interleaved at
94 * DEV_BSIZE (1), the table would have three entries:
95 *
96 *	ndisk	startblk	startoff	dev
97 *	3	0		0		0, 1, 2
98 *	2	9		3		0, 2
99 *	1	13		5		2
100 *	0	-		-		-
101 *
102 * which says that the first nine blocks (0-8) are interleaved over
103 * 3 disks (0, 1, 2) starting at block offset 0 on any component disk,
104 * the next 4 blocks (9-12) are interleaved over 2 disks (0, 2) starting
105 * at component block 3, and the remaining blocks (13-14) are on disk
106 * 2 starting at offset 5.
107 */
108struct ccdiinfo {
109	int	ii_ndisk;	/* # of disks range is interleaved over */
110	daddr_t	ii_startblk;	/* starting scaled block # for range */
111	daddr_t	ii_startoff;	/* starting component offset (block #) */
112	int	*ii_index;	/* ordered list of components in range */
113};
114
115/*
116 * Component info table.
117 * Describes a single component of a concatenated disk.
118 */
119struct ccdcinfo {
120	daddr_t		ci_size; 		/* size */
121	struct g_provider *ci_provider;		/* provider */
122	struct g_consumer *ci_consumer;		/* consumer */
123};
124
125/*
126 * A concatenated disk is described by this structure.
127 */
128
129struct ccd_s {
130	LIST_ENTRY(ccd_s) list;
131
132	int		 sc_unit;		/* logical unit number */
133	int		 sc_flags;		/* flags */
134	daddr_t		 sc_size;		/* size of ccd */
135	int		 sc_ileave;		/* interleave */
136	u_int		 sc_ndisks;		/* number of components */
137	struct ccdcinfo	 *sc_cinfo;		/* component info */
138	struct ccdiinfo	 *sc_itable;		/* interleave table */
139	u_int32_t	 sc_secsize;		/* # bytes per sector */
140	int		 sc_pick;		/* side of mirror picked */
141	daddr_t		 sc_blk[2];		/* mirror localization */
142	u_int32_t	 sc_offset;		/* actual offset used */
143};
144
145static g_start_t g_ccd_start;
146static void ccdiodone(struct bio *bp);
147static void ccdinterleave(struct ccd_s *);
148static int ccdinit(struct gctl_req *req, struct ccd_s *);
149static int ccdbuffer(struct bio **ret, struct ccd_s *,
150		      struct bio *, daddr_t, caddr_t, long);
151
152static void
153g_ccd_orphan(struct g_consumer *cp)
154{
155	/*
156	 * XXX: We don't do anything here.  It is not obvious
157	 * XXX: what DTRT would be, so we do what the previous
158	 * XXX: code did: ignore it and let the user cope.
159	 */
160}
161
162static int
163g_ccd_access(struct g_provider *pp, int dr, int dw, int de)
164{
165	struct g_geom *gp;
166	struct g_consumer *cp1, *cp2;
167	int error;
168
169	de += dr;
170	de += dw;
171
172	gp = pp->geom;
173	error = ENXIO;
174	LIST_FOREACH(cp1, &gp->consumer, consumer) {
175		error = g_access(cp1, dr, dw, de);
176		if (error) {
177			LIST_FOREACH(cp2, &gp->consumer, consumer) {
178				if (cp1 == cp2)
179					break;
180				g_access(cp2, -dr, -dw, -de);
181			}
182			break;
183		}
184	}
185	return (error);
186}
187
188/*
189 * Free the softc and its substructures.
190 */
191static void
192g_ccd_freesc(struct ccd_s *sc)
193{
194	struct ccdiinfo *ii;
195
196	g_free(sc->sc_cinfo);
197	if (sc->sc_itable != NULL) {
198		for (ii = sc->sc_itable; ii->ii_ndisk > 0; ii++)
199			if (ii->ii_index != NULL)
200				g_free(ii->ii_index);
201		g_free(sc->sc_itable);
202	}
203	g_free(sc);
204}
205
206
207static int
208ccdinit(struct gctl_req *req, struct ccd_s *cs)
209{
210	struct ccdcinfo *ci;
211	daddr_t size;
212	int ix;
213	daddr_t minsize;
214	int maxsecsize;
215	off_t mediasize;
216	u_int sectorsize;
217
218	cs->sc_size = 0;
219
220	maxsecsize = 0;
221	minsize = 0;
222
223	if (cs->sc_flags & CCDF_LINUX) {
224		cs->sc_offset = 0;
225		cs->sc_ileave *= 2;
226		if (cs->sc_flags & CCDF_MIRROR && cs->sc_ndisks != 2)
227			gctl_error(req, "Mirror mode for Linux raids is "
228			                "only supported with 2 devices");
229	} else {
230		if (cs->sc_flags & CCDF_NO_OFFSET)
231			cs->sc_offset = 0;
232		else
233			cs->sc_offset = CCD_OFFSET;
234
235	}
236	for (ix = 0; ix < cs->sc_ndisks; ix++) {
237		ci = &cs->sc_cinfo[ix];
238
239		mediasize = ci->ci_provider->mediasize;
240		sectorsize = ci->ci_provider->sectorsize;
241		if (sectorsize > maxsecsize)
242			maxsecsize = sectorsize;
243		size = mediasize / DEV_BSIZE - cs->sc_offset;
244
245		/* Truncate to interleave boundary */
246
247		if (cs->sc_ileave > 1)
248			size -= size % cs->sc_ileave;
249
250		if (size == 0) {
251			gctl_error(req, "Component %s has effective size zero",
252			    ci->ci_provider->name);
253			return(ENODEV);
254		}
255
256		if (minsize == 0 || size < minsize)
257			minsize = size;
258		ci->ci_size = size;
259		cs->sc_size += size;
260	}
261
262	/*
263	 * Don't allow the interleave to be smaller than
264	 * the biggest component sector.
265	 */
266	if ((cs->sc_ileave > 0) &&
267	    (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) {
268		gctl_error(req, "Interleave to small for sector size");
269		return(EINVAL);
270	}
271
272	/*
273	 * If uniform interleave is desired set all sizes to that of
274	 * the smallest component.  This will guarentee that a single
275	 * interleave table is generated.
276	 *
277	 * Lost space must be taken into account when calculating the
278	 * overall size.  Half the space is lost when CCDF_MIRROR is
279	 * specified.
280	 */
281	if (cs->sc_flags & CCDF_UNIFORM) {
282		for (ix = 0; ix < cs->sc_ndisks; ix++) {
283			ci = &cs->sc_cinfo[ix];
284			ci->ci_size = minsize;
285		}
286		cs->sc_size = cs->sc_ndisks * minsize;
287	}
288
289	if (cs->sc_flags & CCDF_MIRROR) {
290		/*
291		 * Check to see if an even number of components
292		 * have been specified.  The interleave must also
293		 * be non-zero in order for us to be able to
294		 * guarentee the topology.
295		 */
296		if (cs->sc_ndisks % 2) {
297			gctl_error(req,
298			      "Mirroring requires an even number of disks");
299			return(EINVAL);
300		}
301		if (cs->sc_ileave == 0) {
302			gctl_error(req,
303			     "An interleave must be specified when mirroring");
304			return(EINVAL);
305		}
306		cs->sc_size = (cs->sc_ndisks/2) * minsize;
307	}
308
309	/*
310	 * Construct the interleave table.
311	 */
312	ccdinterleave(cs);
313
314	/*
315	 * Create pseudo-geometry based on 1MB cylinders.  It's
316	 * pretty close.
317	 */
318	cs->sc_secsize = maxsecsize;
319
320	return (0);
321}
322
323static void
324ccdinterleave(struct ccd_s *cs)
325{
326	struct ccdcinfo *ci, *smallci;
327	struct ccdiinfo *ii;
328	daddr_t bn, lbn;
329	int ix;
330	daddr_t size;
331
332
333	/*
334	 * Allocate an interleave table.  The worst case occurs when each
335	 * of N disks is of a different size, resulting in N interleave
336	 * tables.
337	 *
338	 * Chances are this is too big, but we don't care.
339	 */
340	size = (cs->sc_ndisks + 1) * sizeof(struct ccdiinfo);
341	cs->sc_itable = g_malloc(size, M_WAITOK | M_ZERO);
342
343	/*
344	 * Trivial case: no interleave (actually interleave of disk size).
345	 * Each table entry represents a single component in its entirety.
346	 *
347	 * An interleave of 0 may not be used with a mirror setup.
348	 */
349	if (cs->sc_ileave == 0) {
350		bn = 0;
351		ii = cs->sc_itable;
352
353		for (ix = 0; ix < cs->sc_ndisks; ix++) {
354			/* Allocate space for ii_index. */
355			ii->ii_index = g_malloc(sizeof(int), M_WAITOK);
356			ii->ii_ndisk = 1;
357			ii->ii_startblk = bn;
358			ii->ii_startoff = 0;
359			ii->ii_index[0] = ix;
360			bn += cs->sc_cinfo[ix].ci_size;
361			ii++;
362		}
363		ii->ii_ndisk = 0;
364		return;
365	}
366
367	/*
368	 * The following isn't fast or pretty; it doesn't have to be.
369	 */
370	size = 0;
371	bn = lbn = 0;
372	for (ii = cs->sc_itable; ; ii++) {
373		/*
374		 * Allocate space for ii_index.  We might allocate more then
375		 * we use.
376		 */
377		ii->ii_index = g_malloc((sizeof(int) * cs->sc_ndisks),
378		    M_WAITOK);
379
380		/*
381		 * Locate the smallest of the remaining components
382		 */
383		smallci = NULL;
384		for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_ndisks];
385		    ci++) {
386			if (ci->ci_size > size &&
387			    (smallci == NULL ||
388			     ci->ci_size < smallci->ci_size)) {
389				smallci = ci;
390			}
391		}
392
393		/*
394		 * Nobody left, all done
395		 */
396		if (smallci == NULL) {
397			ii->ii_ndisk = 0;
398			g_free(ii->ii_index);
399			ii->ii_index = NULL;
400			break;
401		}
402
403		/*
404		 * Record starting logical block using an sc_ileave blocksize.
405		 */
406		ii->ii_startblk = bn / cs->sc_ileave;
407
408		/*
409		 * Record starting component block using an sc_ileave
410		 * blocksize.  This value is relative to the beginning of
411		 * a component disk.
412		 */
413		ii->ii_startoff = lbn;
414
415		/*
416		 * Determine how many disks take part in this interleave
417		 * and record their indices.
418		 */
419		ix = 0;
420		for (ci = cs->sc_cinfo;
421		    ci < &cs->sc_cinfo[cs->sc_ndisks]; ci++) {
422			if (ci->ci_size >= smallci->ci_size) {
423				ii->ii_index[ix++] = ci - cs->sc_cinfo;
424			}
425		}
426		ii->ii_ndisk = ix;
427		bn += ix * (smallci->ci_size - size);
428		lbn = smallci->ci_size / cs->sc_ileave;
429		size = smallci->ci_size;
430	}
431}
432
433static void
434g_ccd_start(struct bio *bp)
435{
436	long bcount, rcount;
437	struct bio *cbp[2];
438	caddr_t addr;
439	daddr_t bn;
440	int err;
441	struct ccd_s *cs;
442
443	cs = bp->bio_to->geom->softc;
444
445	/*
446	 * Block all GETATTR requests, we wouldn't know which of our
447	 * subdevices we should ship it off to.
448	 * XXX: this may not be the right policy.
449	 */
450	if(bp->bio_cmd == BIO_GETATTR) {
451		g_io_deliver(bp, EINVAL);
452		return;
453	}
454
455	/*
456	 * Translate the partition-relative block number to an absolute.
457	 */
458	bn = bp->bio_offset / cs->sc_secsize;
459
460	/*
461	 * Allocate component buffers and fire off the requests
462	 */
463	addr = bp->bio_data;
464	for (bcount = bp->bio_length; bcount > 0; bcount -= rcount) {
465		err = ccdbuffer(cbp, cs, bp, bn, addr, bcount);
466		if (err) {
467			bp->bio_completed += bcount;
468			if (bp->bio_error == 0)
469				bp->bio_error = err;
470			if (bp->bio_completed == bp->bio_length)
471				g_io_deliver(bp, bp->bio_error);
472			return;
473		}
474		rcount = cbp[0]->bio_length;
475
476		if (cs->sc_flags & CCDF_MIRROR) {
477			/*
478			 * Mirroring.  Writes go to both disks, reads are
479			 * taken from whichever disk seems most appropriate.
480			 *
481			 * We attempt to localize reads to the disk whos arm
482			 * is nearest the read request.  We ignore seeks due
483			 * to writes when making this determination and we
484			 * also try to avoid hogging.
485			 */
486			if (cbp[0]->bio_cmd != BIO_READ) {
487				g_io_request(cbp[0], cbp[0]->bio_from);
488				g_io_request(cbp[1], cbp[1]->bio_from);
489			} else {
490				int pick = cs->sc_pick;
491				daddr_t range = cs->sc_size / 16;
492
493				if (bn < cs->sc_blk[pick] - range ||
494				    bn > cs->sc_blk[pick] + range
495				) {
496					cs->sc_pick = pick = 1 - pick;
497				}
498				cs->sc_blk[pick] = bn + btodb(rcount);
499				g_io_request(cbp[pick], cbp[pick]->bio_from);
500			}
501		} else {
502			/*
503			 * Not mirroring
504			 */
505			g_io_request(cbp[0], cbp[0]->bio_from);
506		}
507		bn += btodb(rcount);
508		addr += rcount;
509	}
510}
511
512/*
513 * Build a component buffer header.
514 */
515static int
516ccdbuffer(struct bio **cb, struct ccd_s *cs, struct bio *bp, daddr_t bn, caddr_t addr, long bcount)
517{
518	struct ccdcinfo *ci, *ci2 = NULL;
519	struct bio *cbp;
520	daddr_t cbn, cboff;
521	off_t cbc;
522
523	/*
524	 * Determine which component bn falls in.
525	 */
526	cbn = bn;
527	cboff = 0;
528
529	if (cs->sc_ileave == 0) {
530		/*
531		 * Serially concatenated and neither a mirror nor a parity
532		 * config.  This is a special case.
533		 */
534		daddr_t sblk;
535
536		sblk = 0;
537		for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++)
538			sblk += ci->ci_size;
539		cbn -= sblk;
540	} else {
541		struct ccdiinfo *ii;
542		int ccdisk, off;
543
544		/*
545		 * Calculate cbn, the logical superblock (sc_ileave chunks),
546		 * and cboff, a normal block offset (DEV_BSIZE chunks) relative
547		 * to cbn.
548		 */
549		cboff = cbn % cs->sc_ileave;	/* DEV_BSIZE gran */
550		cbn = cbn / cs->sc_ileave;	/* DEV_BSIZE * ileave gran */
551
552		/*
553		 * Figure out which interleave table to use.
554		 */
555		for (ii = cs->sc_itable; ii->ii_ndisk; ii++) {
556			if (ii->ii_startblk > cbn)
557				break;
558		}
559		ii--;
560
561		/*
562		 * off is the logical superblock relative to the beginning
563		 * of this interleave block.
564		 */
565		off = cbn - ii->ii_startblk;
566
567		/*
568		 * We must calculate which disk component to use (ccdisk),
569		 * and recalculate cbn to be the superblock relative to
570		 * the beginning of the component.  This is typically done by
571		 * adding 'off' and ii->ii_startoff together.  However, 'off'
572		 * must typically be divided by the number of components in
573		 * this interleave array to be properly convert it from a
574		 * CCD-relative logical superblock number to a
575		 * component-relative superblock number.
576		 */
577		if (ii->ii_ndisk == 1) {
578			/*
579			 * When we have just one disk, it can't be a mirror
580			 * or a parity config.
581			 */
582			ccdisk = ii->ii_index[0];
583			cbn = ii->ii_startoff + off;
584		} else {
585			if (cs->sc_flags & CCDF_MIRROR) {
586				/*
587				 * We have forced a uniform mapping, resulting
588				 * in a single interleave array.  We double
589				 * up on the first half of the available
590				 * components and our mirror is in the second
591				 * half.  This only works with a single
592				 * interleave array because doubling up
593				 * doubles the number of sectors, so there
594				 * cannot be another interleave array because
595				 * the next interleave array's calculations
596				 * would be off.
597				 */
598				int ndisk2 = ii->ii_ndisk / 2;
599				ccdisk = ii->ii_index[off % ndisk2];
600				cbn = ii->ii_startoff + off / ndisk2;
601				ci2 = &cs->sc_cinfo[ccdisk + ndisk2];
602			} else {
603				ccdisk = ii->ii_index[off % ii->ii_ndisk];
604				cbn = ii->ii_startoff + off / ii->ii_ndisk;
605			}
606		}
607
608		ci = &cs->sc_cinfo[ccdisk];
609
610		/*
611		 * Convert cbn from a superblock to a normal block so it
612		 * can be used to calculate (along with cboff) the normal
613		 * block index into this particular disk.
614		 */
615		cbn *= cs->sc_ileave;
616	}
617
618	/*
619	 * Fill in the component buf structure.
620	 */
621	cbp = g_clone_bio(bp);
622	if (cbp == NULL)
623		return (ENOMEM);
624	cbp->bio_done = g_std_done;
625	cbp->bio_offset = dbtob(cbn + cboff + cs->sc_offset);
626	cbp->bio_data = addr;
627	if (cs->sc_ileave == 0)
628              cbc = dbtob((off_t)(ci->ci_size - cbn));
629	else
630              cbc = dbtob((off_t)(cs->sc_ileave - cboff));
631	cbp->bio_length = (cbc < bcount) ? cbc : bcount;
632
633	cbp->bio_from = ci->ci_consumer;
634	cb[0] = cbp;
635
636	if (cs->sc_flags & CCDF_MIRROR) {
637		cbp = g_clone_bio(bp);
638		if (cbp == NULL)
639			return (ENOMEM);
640		cbp->bio_done = cb[0]->bio_done = ccdiodone;
641		cbp->bio_offset = cb[0]->bio_offset;
642		cbp->bio_data = cb[0]->bio_data;
643		cbp->bio_length = cb[0]->bio_length;
644		cbp->bio_from = ci2->ci_consumer;
645		cbp->bio_caller1 = cb[0];
646		cb[0]->bio_caller1 = cbp;
647		cb[1] = cbp;
648	}
649	return (0);
650}
651
652/*
653 * Called only for mirrored operations.
654 */
655static void
656ccdiodone(struct bio *cbp)
657{
658	struct bio *mbp, *pbp;
659
660	mbp = cbp->bio_caller1;
661	pbp = cbp->bio_parent;
662
663	if (pbp->bio_cmd == BIO_READ) {
664		if (cbp->bio_error == 0) {
665			/* We will not be needing the partner bio */
666			if (mbp != NULL) {
667				pbp->bio_inbed++;
668				g_destroy_bio(mbp);
669			}
670			g_std_done(cbp);
671			return;
672		}
673		if (mbp != NULL) {
674			/* Try partner the bio instead */
675			mbp->bio_caller1 = NULL;
676			pbp->bio_inbed++;
677			g_destroy_bio(cbp);
678			g_io_request(mbp, mbp->bio_from);
679			/*
680			 * XXX: If this comes back OK, we should actually
681			 * try to write the good data on the failed mirror
682			 */
683			return;
684		}
685		g_std_done(cbp);
686		return;
687	}
688	if (mbp != NULL) {
689		mbp->bio_caller1 = NULL;
690		pbp->bio_inbed++;
691		if (cbp->bio_error != 0 && pbp->bio_error == 0)
692			pbp->bio_error = cbp->bio_error;
693		g_destroy_bio(cbp);
694		return;
695	}
696	g_std_done(cbp);
697}
698
699static void
700g_ccd_create(struct gctl_req *req, struct g_class *mp)
701{
702	int *unit, *ileave, *nprovider;
703	struct g_geom *gp;
704	struct g_consumer *cp;
705	struct g_provider *pp;
706	struct ccd_s *sc;
707	struct sbuf *sb;
708	char buf[20];
709	int i, error;
710
711	g_topology_assert();
712	unit = gctl_get_paraml(req, "unit", sizeof (*unit));
713	if (unit == NULL) {
714		gctl_error(req, "unit parameter not given");
715		return;
716	}
717	ileave = gctl_get_paraml(req, "ileave", sizeof (*ileave));
718	if (ileave == NULL) {
719		gctl_error(req, "ileave parameter not given");
720		return;
721	}
722	nprovider = gctl_get_paraml(req, "nprovider", sizeof (*nprovider));
723	if (nprovider == NULL) {
724		gctl_error(req, "nprovider parameter not given");
725		return;
726	}
727
728	/* Check for duplicate unit */
729	LIST_FOREACH(gp, &mp->geom, geom) {
730		sc = gp->softc;
731		if (sc != NULL && sc->sc_unit == *unit) {
732			gctl_error(req, "Unit %d already configured", *unit);
733			return;
734		}
735	}
736
737	if (*nprovider <= 0) {
738		gctl_error(req, "Bogus nprovider argument (= %d)", *nprovider);
739		return;
740	}
741
742	/* Check all providers are valid */
743	for (i = 0; i < *nprovider; i++) {
744		sprintf(buf, "provider%d", i);
745		pp = gctl_get_provider(req, buf);
746		if (pp == NULL)
747			return;
748	}
749
750	gp = g_new_geomf(mp, "ccd%d", *unit);
751	sc = g_malloc(sizeof *sc, M_WAITOK | M_ZERO);
752	gp->softc = sc;
753	sc->sc_ndisks = *nprovider;
754
755	/* Allocate space for the component info. */
756	sc->sc_cinfo = g_malloc(sc->sc_ndisks * sizeof(struct ccdcinfo),
757	    M_WAITOK | M_ZERO);
758
759	/* Create consumers and attach to all providers */
760	for (i = 0; i < *nprovider; i++) {
761		sprintf(buf, "provider%d", i);
762		pp = gctl_get_provider(req, buf);
763		cp = g_new_consumer(gp);
764		error = g_attach(cp, pp);
765		KASSERT(error == 0, ("attach to %s failed", pp->name));
766		sc->sc_cinfo[i].ci_consumer = cp;
767		sc->sc_cinfo[i].ci_provider = pp;
768	}
769
770	sc->sc_unit = *unit;
771	sc->sc_ileave = *ileave;
772
773	if (gctl_get_param(req, "no_offset", NULL))
774		sc->sc_flags |= CCDF_NO_OFFSET;
775	if (gctl_get_param(req, "linux", NULL))
776		sc->sc_flags |= CCDF_LINUX;
777
778	if (gctl_get_param(req, "uniform", NULL))
779		sc->sc_flags |= CCDF_UNIFORM;
780	if (gctl_get_param(req, "mirror", NULL))
781		sc->sc_flags |= CCDF_MIRROR;
782
783	if (sc->sc_ileave == 0 && (sc->sc_flags & CCDF_MIRROR)) {
784		printf("%s: disabling mirror, interleave is 0\n", gp->name);
785		sc->sc_flags &= ~(CCDF_MIRROR);
786	}
787
788	if ((sc->sc_flags & CCDF_MIRROR) && !(sc->sc_flags & CCDF_UNIFORM)) {
789		printf("%s: mirror/parity forces uniform flag\n", gp->name);
790		sc->sc_flags |= CCDF_UNIFORM;
791	}
792
793	error = ccdinit(req, sc);
794	if (error != 0) {
795		g_ccd_freesc(sc);
796		gp->softc = NULL;
797		g_wither_geom(gp, ENXIO);
798		return;
799	}
800
801	pp = g_new_providerf(gp, "%s", gp->name);
802	pp->mediasize = sc->sc_size * (off_t)sc->sc_secsize;
803	pp->sectorsize = sc->sc_secsize;
804	g_error_provider(pp, 0);
805
806	sb = sbuf_new_auto();
807	sbuf_printf(sb, "ccd%d: %d components ", sc->sc_unit, *nprovider);
808	for (i = 0; i < *nprovider; i++) {
809		sbuf_printf(sb, "%s%s",
810		    i == 0 ? "(" : ", ",
811		    sc->sc_cinfo[i].ci_provider->name);
812	}
813	sbuf_printf(sb, "), %jd blocks ", (off_t)pp->mediasize / DEV_BSIZE);
814	if (sc->sc_ileave != 0)
815		sbuf_printf(sb, "interleaved at %d blocks\n",
816			sc->sc_ileave);
817	else
818		sbuf_printf(sb, "concatenated\n");
819	sbuf_finish(sb);
820	gctl_set_param_err(req, "output", sbuf_data(sb), sbuf_len(sb) + 1);
821	sbuf_delete(sb);
822}
823
824static int
825g_ccd_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp)
826{
827	struct g_provider *pp;
828	struct ccd_s *sc;
829
830	g_topology_assert();
831	sc = gp->softc;
832	pp = LIST_FIRST(&gp->provider);
833	if (sc == NULL || pp == NULL)
834		return (EBUSY);
835	if (pp->acr != 0 || pp->acw != 0 || pp->ace != 0) {
836		gctl_error(req, "%s is open(r%dw%de%d)", gp->name,
837		    pp->acr, pp->acw, pp->ace);
838		return (EBUSY);
839	}
840	g_ccd_freesc(sc);
841	gp->softc = NULL;
842	g_wither_geom(gp, ENXIO);
843	return (0);
844}
845
846static void
847g_ccd_list(struct gctl_req *req, struct g_class *mp)
848{
849	struct sbuf *sb;
850	struct ccd_s *cs;
851	struct g_geom *gp;
852	int i, unit, *up;
853
854	up = gctl_get_paraml(req, "unit", sizeof (*up));
855	if (up == NULL) {
856		gctl_error(req, "unit parameter not given");
857		return;
858	}
859	unit = *up;
860	sb = sbuf_new_auto();
861	LIST_FOREACH(gp, &mp->geom, geom) {
862		cs = gp->softc;
863		if (cs == NULL || (unit >= 0 && unit != cs->sc_unit))
864			continue;
865		sbuf_printf(sb, "ccd%d\t\t%d\t%d\t",
866		    cs->sc_unit, cs->sc_ileave, cs->sc_flags & CCDF_USERMASK);
867
868		for (i = 0; i < cs->sc_ndisks; ++i) {
869			sbuf_printf(sb, "%s/dev/%s", i == 0 ? "" : " ",
870			    cs->sc_cinfo[i].ci_provider->name);
871		}
872		sbuf_printf(sb, "\n");
873	}
874	sbuf_finish(sb);
875	gctl_set_param_err(req, "output", sbuf_data(sb), sbuf_len(sb) + 1);
876	sbuf_delete(sb);
877}
878
879static void
880g_ccd_config(struct gctl_req *req, struct g_class *mp, char const *verb)
881{
882	struct g_geom *gp;
883
884	g_topology_assert();
885	if (!strcmp(verb, "create geom")) {
886		g_ccd_create(req, mp);
887	} else if (!strcmp(verb, "destroy geom")) {
888		gp = gctl_get_geom(req, mp, "geom");
889		if (gp != NULL)
890		g_ccd_destroy_geom(req, mp, gp);
891	} else if (!strcmp(verb, "list")) {
892		g_ccd_list(req, mp);
893	} else {
894		gctl_error(req, "unknown verb");
895	}
896}
897
898static struct g_class g_ccd_class = {
899	.name = "CCD",
900	.version = G_VERSION,
901	.ctlreq = g_ccd_config,
902	.destroy_geom = g_ccd_destroy_geom,
903	.start = g_ccd_start,
904	.orphan = g_ccd_orphan,
905	.access = g_ccd_access,
906};
907
908DECLARE_GEOM_CLASS(g_ccd_class, g_ccd);
909