1/*-
2 * SPDX-License-Identifier: (BSD-2-Clause AND BSD-3-Clause)
3 *
4 * Copyright (c) 2003 Poul-Henning Kamp.
5 * Copyright (c) 1996, 1997 The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Jason R. Thorpe.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 *
32 * $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $
33 */
34
35/*-
36 * Copyright (c) 1988 University of Utah.
37 * Copyright (c) 1990, 1993
38 *	The Regents of the University of California.  All rights reserved.
39 *
40 * This code is derived from software contributed to Berkeley by
41 * the Systems Programming Group of the University of Utah Computer
42 * Science Department.
43 *
44 * Redistribution and use in source and binary forms, with or without
45 * modification, are permitted provided that the following conditions
46 * are met:
47 * 1. Redistributions of source code must retain the above copyright
48 *    notice, this list of conditions and the following disclaimer.
49 * 2. Redistributions in binary form must reproduce the above copyright
50 *    notice, this list of conditions and the following disclaimer in the
51 *    documentation and/or other materials provided with the distribution.
52 * 3. Neither the name of the University nor the names of its contributors
53 *    may be used to endorse or promote products derived from this software
54 *    without specific prior written permission.
55 *
56 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
57 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
58 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
59 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
60 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
61 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
62 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
64 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
65 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
66 * SUCH DAMAGE.
67 *
68 * from: Utah $Hdr: cd.c 1.6 90/11/28$
69 */
70
71/*
72 * Dynamic configuration and disklabel support by:
73 *	Jason R. Thorpe <thorpej@nas.nasa.gov>
74 *	Numerical Aerodynamic Simulation Facility
75 *	Mail Stop 258-6
76 *	NASA Ames Research Center
77 *	Moffett Field, CA 94035
78 */
79
80#include <sys/param.h>
81#include <sys/systm.h>
82#include <sys/kernel.h>
83#include <sys/module.h>
84#include <sys/bio.h>
85#include <sys/malloc.h>
86#include <sys/sbuf.h>
87#include <geom/geom.h>
88
89/*
90 * Number of blocks to untouched in front of a component partition.
91 * This is to avoid violating its disklabel area when it starts at the
92 * beginning of the slice.
93 */
94#if !defined(CCD_OFFSET)
95#define CCD_OFFSET 16
96#endif
97
98/* sc_flags */
99#define CCDF_UNIFORM	0x02	/* use LCCD of sizes for uniform interleave */
100#define CCDF_MIRROR	0x04	/* use mirroring */
101#define CCDF_NO_OFFSET	0x08	/* do not leave space in front */
102#define CCDF_LINUX	0x10	/* use Linux compatibility mode */
103
104/* Mask of user-settable ccd flags. */
105#define CCDF_USERMASK	(CCDF_UNIFORM|CCDF_MIRROR)
106
107/*
108 * Interleave description table.
109 * Computed at boot time to speed irregular-interleave lookups.
110 * The idea is that we interleave in "groups".  First we interleave
111 * evenly over all component disks up to the size of the smallest
112 * component (the first group), then we interleave evenly over all
113 * remaining disks up to the size of the next-smallest (second group),
114 * and so on.
115 *
116 * Each table entry describes the interleave characteristics of one
117 * of these groups.  For example if a concatenated disk consisted of
118 * three components of 5, 3, and 7 DEV_BSIZE blocks interleaved at
119 * DEV_BSIZE (1), the table would have three entries:
120 *
121 *	ndisk	startblk	startoff	dev
122 *	3	0		0		0, 1, 2
123 *	2	9		3		0, 2
124 *	1	13		5		2
125 *	0	-		-		-
126 *
127 * which says that the first nine blocks (0-8) are interleaved over
128 * 3 disks (0, 1, 2) starting at block offset 0 on any component disk,
129 * the next 4 blocks (9-12) are interleaved over 2 disks (0, 2) starting
130 * at component block 3, and the remaining blocks (13-14) are on disk
131 * 2 starting at offset 5.
132 */
133struct ccdiinfo {
134	int	ii_ndisk;	/* # of disks range is interleaved over */
135	daddr_t	ii_startblk;	/* starting scaled block # for range */
136	daddr_t	ii_startoff;	/* starting component offset (block #) */
137	int	*ii_index;	/* ordered list of components in range */
138};
139
140/*
141 * Component info table.
142 * Describes a single component of a concatenated disk.
143 */
144struct ccdcinfo {
145	daddr_t		ci_size; 		/* size */
146	struct g_provider *ci_provider;		/* provider */
147	struct g_consumer *ci_consumer;		/* consumer */
148};
149
150/*
151 * A concatenated disk is described by this structure.
152 */
153
154struct ccd_s {
155	LIST_ENTRY(ccd_s) list;
156
157	int		 sc_unit;		/* logical unit number */
158	int		 sc_flags;		/* flags */
159	daddr_t		 sc_size;		/* size of ccd */
160	int		 sc_ileave;		/* interleave */
161	u_int		 sc_ndisks;		/* number of components */
162	struct ccdcinfo	 *sc_cinfo;		/* component info */
163	struct ccdiinfo	 *sc_itable;		/* interleave table */
164	uint32_t	 sc_secsize;		/* # bytes per sector */
165	int		 sc_pick;		/* side of mirror picked */
166	daddr_t		 sc_blk[2];		/* mirror localization */
167	uint32_t	 sc_offset;		/* actual offset used */
168};
169
170static g_start_t g_ccd_start;
171static void ccdiodone(struct bio *bp);
172static void ccdinterleave(struct ccd_s *);
173static int ccdinit(struct gctl_req *req, struct ccd_s *);
174static int ccdbuffer(struct bio **ret, struct ccd_s *,
175		      struct bio *, daddr_t, caddr_t, long);
176
177static void
178g_ccd_orphan(struct g_consumer *cp)
179{
180	/*
181	 * XXX: We don't do anything here.  It is not obvious
182	 * XXX: what DTRT would be, so we do what the previous
183	 * XXX: code did: ignore it and let the user cope.
184	 */
185}
186
187static int
188g_ccd_access(struct g_provider *pp, int dr, int dw, int de)
189{
190	struct g_geom *gp;
191	struct g_consumer *cp1, *cp2;
192	int error;
193
194	de += dr;
195	de += dw;
196
197	gp = pp->geom;
198	error = ENXIO;
199	LIST_FOREACH(cp1, &gp->consumer, consumer) {
200		error = g_access(cp1, dr, dw, de);
201		if (error) {
202			LIST_FOREACH(cp2, &gp->consumer, consumer) {
203				if (cp1 == cp2)
204					break;
205				g_access(cp2, -dr, -dw, -de);
206			}
207			break;
208		}
209	}
210	return (error);
211}
212
213/*
214 * Free the softc and its substructures.
215 */
216static void
217g_ccd_freesc(struct ccd_s *sc)
218{
219	struct ccdiinfo *ii;
220
221	g_free(sc->sc_cinfo);
222	if (sc->sc_itable != NULL) {
223		for (ii = sc->sc_itable; ii->ii_ndisk > 0; ii++)
224			g_free(ii->ii_index);
225		g_free(sc->sc_itable);
226	}
227	g_free(sc);
228}
229
230static int
231ccdinit(struct gctl_req *req, struct ccd_s *cs)
232{
233	struct ccdcinfo *ci;
234	daddr_t size;
235	int ix;
236	daddr_t minsize;
237	int maxsecsize;
238	off_t mediasize;
239	u_int sectorsize;
240
241	cs->sc_size = 0;
242
243	maxsecsize = 0;
244	minsize = 0;
245
246	if (cs->sc_flags & CCDF_LINUX) {
247		cs->sc_offset = 0;
248		cs->sc_ileave *= 2;
249		if (cs->sc_flags & CCDF_MIRROR && cs->sc_ndisks != 2)
250			gctl_error(req, "Mirror mode for Linux raids is "
251			                "only supported with 2 devices");
252	} else {
253		if (cs->sc_flags & CCDF_NO_OFFSET)
254			cs->sc_offset = 0;
255		else
256			cs->sc_offset = CCD_OFFSET;
257	}
258	for (ix = 0; ix < cs->sc_ndisks; ix++) {
259		ci = &cs->sc_cinfo[ix];
260
261		mediasize = ci->ci_provider->mediasize;
262		sectorsize = ci->ci_provider->sectorsize;
263		if (sectorsize > maxsecsize)
264			maxsecsize = sectorsize;
265		size = mediasize / DEV_BSIZE - cs->sc_offset;
266
267		/* Truncate to interleave boundary */
268
269		if (cs->sc_ileave > 1)
270			size -= size % cs->sc_ileave;
271
272		if (size == 0) {
273			gctl_error(req, "Component %s has effective size zero",
274			    ci->ci_provider->name);
275			return(ENODEV);
276		}
277
278		if (minsize == 0 || size < minsize)
279			minsize = size;
280		ci->ci_size = size;
281		cs->sc_size += size;
282	}
283
284	/*
285	 * Don't allow the interleave to be smaller than
286	 * the biggest component sector.
287	 */
288	if ((cs->sc_ileave > 0) &&
289	    (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) {
290		gctl_error(req, "Interleave to small for sector size");
291		return(EINVAL);
292	}
293
294	/*
295	 * If uniform interleave is desired set all sizes to that of
296	 * the smallest component.  This will guarantee that a single
297	 * interleave table is generated.
298	 *
299	 * Lost space must be taken into account when calculating the
300	 * overall size.  Half the space is lost when CCDF_MIRROR is
301	 * specified.
302	 */
303	if (cs->sc_flags & CCDF_UNIFORM) {
304		for (ix = 0; ix < cs->sc_ndisks; ix++) {
305			ci = &cs->sc_cinfo[ix];
306			ci->ci_size = minsize;
307		}
308		cs->sc_size = cs->sc_ndisks * minsize;
309	}
310
311	if (cs->sc_flags & CCDF_MIRROR) {
312		/*
313		 * Check to see if an even number of components
314		 * have been specified.  The interleave must also
315		 * be non-zero in order for us to be able to
316		 * guarantee the topology.
317		 */
318		if (cs->sc_ndisks % 2) {
319			gctl_error(req,
320			      "Mirroring requires an even number of disks");
321			return(EINVAL);
322		}
323		if (cs->sc_ileave == 0) {
324			gctl_error(req,
325			     "An interleave must be specified when mirroring");
326			return(EINVAL);
327		}
328		cs->sc_size = (cs->sc_ndisks/2) * minsize;
329	}
330
331	/*
332	 * Construct the interleave table.
333	 */
334	ccdinterleave(cs);
335
336	/*
337	 * Create pseudo-geometry based on 1MB cylinders.  It's
338	 * pretty close.
339	 */
340	cs->sc_secsize = maxsecsize;
341
342	return (0);
343}
344
345static void
346ccdinterleave(struct ccd_s *cs)
347{
348	struct ccdcinfo *ci, *smallci;
349	struct ccdiinfo *ii;
350	daddr_t bn, lbn;
351	int ix;
352	daddr_t size;
353
354	/*
355	 * Allocate an interleave table.  The worst case occurs when each
356	 * of N disks is of a different size, resulting in N interleave
357	 * tables.
358	 *
359	 * Chances are this is too big, but we don't care.
360	 */
361	size = (cs->sc_ndisks + 1) * sizeof(struct ccdiinfo);
362	cs->sc_itable = g_malloc(size, M_WAITOK | M_ZERO);
363
364	/*
365	 * Trivial case: no interleave (actually interleave of disk size).
366	 * Each table entry represents a single component in its entirety.
367	 *
368	 * An interleave of 0 may not be used with a mirror setup.
369	 */
370	if (cs->sc_ileave == 0) {
371		bn = 0;
372		ii = cs->sc_itable;
373
374		for (ix = 0; ix < cs->sc_ndisks; ix++) {
375			/* Allocate space for ii_index. */
376			ii->ii_index = g_malloc(sizeof(int), M_WAITOK);
377			ii->ii_ndisk = 1;
378			ii->ii_startblk = bn;
379			ii->ii_startoff = 0;
380			ii->ii_index[0] = ix;
381			bn += cs->sc_cinfo[ix].ci_size;
382			ii++;
383		}
384		ii->ii_ndisk = 0;
385		return;
386	}
387
388	/*
389	 * The following isn't fast or pretty; it doesn't have to be.
390	 */
391	size = 0;
392	bn = lbn = 0;
393	for (ii = cs->sc_itable; ; ii++) {
394		/*
395		 * Allocate space for ii_index.  We might allocate more then
396		 * we use.
397		 */
398		ii->ii_index = g_malloc((sizeof(int) * cs->sc_ndisks),
399		    M_WAITOK);
400
401		/*
402		 * Locate the smallest of the remaining components
403		 */
404		smallci = NULL;
405		for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_ndisks];
406		    ci++) {
407			if (ci->ci_size > size &&
408			    (smallci == NULL ||
409			     ci->ci_size < smallci->ci_size)) {
410				smallci = ci;
411			}
412		}
413
414		/*
415		 * Nobody left, all done
416		 */
417		if (smallci == NULL) {
418			ii->ii_ndisk = 0;
419			g_free(ii->ii_index);
420			ii->ii_index = NULL;
421			break;
422		}
423
424		/*
425		 * Record starting logical block using an sc_ileave blocksize.
426		 */
427		ii->ii_startblk = bn / cs->sc_ileave;
428
429		/*
430		 * Record starting component block using an sc_ileave
431		 * blocksize.  This value is relative to the beginning of
432		 * a component disk.
433		 */
434		ii->ii_startoff = lbn;
435
436		/*
437		 * Determine how many disks take part in this interleave
438		 * and record their indices.
439		 */
440		ix = 0;
441		for (ci = cs->sc_cinfo;
442		    ci < &cs->sc_cinfo[cs->sc_ndisks]; ci++) {
443			if (ci->ci_size >= smallci->ci_size) {
444				ii->ii_index[ix++] = ci - cs->sc_cinfo;
445			}
446		}
447		ii->ii_ndisk = ix;
448		bn += ix * (smallci->ci_size - size);
449		lbn = smallci->ci_size / cs->sc_ileave;
450		size = smallci->ci_size;
451	}
452}
453
454static void
455g_ccd_start(struct bio *bp)
456{
457	long bcount, rcount;
458	struct bio *cbp[2];
459	caddr_t addr;
460	daddr_t bn;
461	int err;
462	struct ccd_s *cs;
463
464	cs = bp->bio_to->geom->softc;
465
466	/*
467	 * Block all GETATTR requests, we wouldn't know which of our
468	 * subdevices we should ship it off to.
469	 * XXX: this may not be the right policy.
470	 */
471	if(bp->bio_cmd == BIO_GETATTR) {
472		g_io_deliver(bp, EINVAL);
473		return;
474	}
475
476	/*
477	 * Translate the partition-relative block number to an absolute.
478	 */
479	bn = bp->bio_offset / cs->sc_secsize;
480
481	/*
482	 * Allocate component buffers and fire off the requests
483	 */
484	addr = bp->bio_data;
485	for (bcount = bp->bio_length; bcount > 0; bcount -= rcount) {
486		err = ccdbuffer(cbp, cs, bp, bn, addr, bcount);
487		if (err) {
488			bp->bio_completed += bcount;
489			if (bp->bio_error == 0)
490				bp->bio_error = err;
491			if (bp->bio_completed == bp->bio_length)
492				g_io_deliver(bp, bp->bio_error);
493			return;
494		}
495		rcount = cbp[0]->bio_length;
496
497		if (cs->sc_flags & CCDF_MIRROR) {
498			/*
499			 * Mirroring.  Writes go to both disks, reads are
500			 * taken from whichever disk seems most appropriate.
501			 *
502			 * We attempt to localize reads to the disk whos arm
503			 * is nearest the read request.  We ignore seeks due
504			 * to writes when making this determination and we
505			 * also try to avoid hogging.
506			 */
507			if (cbp[0]->bio_cmd != BIO_READ) {
508				g_io_request(cbp[0], cbp[0]->bio_from);
509				g_io_request(cbp[1], cbp[1]->bio_from);
510			} else {
511				int pick = cs->sc_pick;
512				daddr_t range = cs->sc_size / 16;
513
514				if (bn < cs->sc_blk[pick] - range ||
515				    bn > cs->sc_blk[pick] + range
516				) {
517					cs->sc_pick = pick = 1 - pick;
518				}
519				cs->sc_blk[pick] = bn + btodb(rcount);
520				g_io_request(cbp[pick], cbp[pick]->bio_from);
521			}
522		} else {
523			/*
524			 * Not mirroring
525			 */
526			g_io_request(cbp[0], cbp[0]->bio_from);
527		}
528		bn += btodb(rcount);
529		addr += rcount;
530	}
531}
532
533/*
534 * Build a component buffer header.
535 */
536static int
537ccdbuffer(struct bio **cb, struct ccd_s *cs, struct bio *bp, daddr_t bn, caddr_t addr, long bcount)
538{
539	struct ccdcinfo *ci, *ci2 = NULL;
540	struct bio *cbp;
541	daddr_t cbn, cboff;
542	off_t cbc;
543
544	/*
545	 * Determine which component bn falls in.
546	 */
547	cbn = bn;
548	cboff = 0;
549
550	if (cs->sc_ileave == 0) {
551		/*
552		 * Serially concatenated and neither a mirror nor a parity
553		 * config.  This is a special case.
554		 */
555		daddr_t sblk;
556
557		sblk = 0;
558		for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++)
559			sblk += ci->ci_size;
560		cbn -= sblk;
561	} else {
562		struct ccdiinfo *ii;
563		int ccdisk, off;
564
565		/*
566		 * Calculate cbn, the logical superblock (sc_ileave chunks),
567		 * and cboff, a normal block offset (DEV_BSIZE chunks) relative
568		 * to cbn.
569		 */
570		cboff = cbn % cs->sc_ileave;	/* DEV_BSIZE gran */
571		cbn = cbn / cs->sc_ileave;	/* DEV_BSIZE * ileave gran */
572
573		/*
574		 * Figure out which interleave table to use.
575		 */
576		for (ii = cs->sc_itable; ii->ii_ndisk; ii++) {
577			if (ii->ii_startblk > cbn)
578				break;
579		}
580		ii--;
581
582		/*
583		 * off is the logical superblock relative to the beginning
584		 * of this interleave block.
585		 */
586		off = cbn - ii->ii_startblk;
587
588		/*
589		 * We must calculate which disk component to use (ccdisk),
590		 * and recalculate cbn to be the superblock relative to
591		 * the beginning of the component.  This is typically done by
592		 * adding 'off' and ii->ii_startoff together.  However, 'off'
593		 * must typically be divided by the number of components in
594		 * this interleave array to be properly convert it from a
595		 * CCD-relative logical superblock number to a
596		 * component-relative superblock number.
597		 */
598		if (ii->ii_ndisk == 1) {
599			/*
600			 * When we have just one disk, it can't be a mirror
601			 * or a parity config.
602			 */
603			ccdisk = ii->ii_index[0];
604			cbn = ii->ii_startoff + off;
605		} else {
606			if (cs->sc_flags & CCDF_MIRROR) {
607				/*
608				 * We have forced a uniform mapping, resulting
609				 * in a single interleave array.  We double
610				 * up on the first half of the available
611				 * components and our mirror is in the second
612				 * half.  This only works with a single
613				 * interleave array because doubling up
614				 * doubles the number of sectors, so there
615				 * cannot be another interleave array because
616				 * the next interleave array's calculations
617				 * would be off.
618				 */
619				int ndisk2 = ii->ii_ndisk / 2;
620				ccdisk = ii->ii_index[off % ndisk2];
621				cbn = ii->ii_startoff + off / ndisk2;
622				ci2 = &cs->sc_cinfo[ccdisk + ndisk2];
623			} else {
624				ccdisk = ii->ii_index[off % ii->ii_ndisk];
625				cbn = ii->ii_startoff + off / ii->ii_ndisk;
626			}
627		}
628
629		ci = &cs->sc_cinfo[ccdisk];
630
631		/*
632		 * Convert cbn from a superblock to a normal block so it
633		 * can be used to calculate (along with cboff) the normal
634		 * block index into this particular disk.
635		 */
636		cbn *= cs->sc_ileave;
637	}
638
639	/*
640	 * Fill in the component buf structure.
641	 */
642	cbp = g_clone_bio(bp);
643	if (cbp == NULL)
644		return (ENOMEM);
645	cbp->bio_done = g_std_done;
646	cbp->bio_offset = dbtob(cbn + cboff + cs->sc_offset);
647	cbp->bio_data = addr;
648	if (cs->sc_ileave == 0)
649              cbc = dbtob((off_t)(ci->ci_size - cbn));
650	else
651              cbc = dbtob((off_t)(cs->sc_ileave - cboff));
652	cbp->bio_length = (cbc < bcount) ? cbc : bcount;
653
654	cbp->bio_from = ci->ci_consumer;
655	cb[0] = cbp;
656
657	if (cs->sc_flags & CCDF_MIRROR) {
658		cbp = g_clone_bio(bp);
659		if (cbp == NULL)
660			return (ENOMEM);
661		cbp->bio_done = cb[0]->bio_done = ccdiodone;
662		cbp->bio_offset = cb[0]->bio_offset;
663		cbp->bio_data = cb[0]->bio_data;
664		cbp->bio_length = cb[0]->bio_length;
665		cbp->bio_from = ci2->ci_consumer;
666		cbp->bio_caller1 = cb[0];
667		cb[0]->bio_caller1 = cbp;
668		cb[1] = cbp;
669	}
670	return (0);
671}
672
673/*
674 * Called only for mirrored operations.
675 */
676static void
677ccdiodone(struct bio *cbp)
678{
679	struct bio *mbp, *pbp;
680
681	mbp = cbp->bio_caller1;
682	pbp = cbp->bio_parent;
683
684	if (pbp->bio_cmd == BIO_READ) {
685		if (cbp->bio_error == 0) {
686			/* We will not be needing the partner bio */
687			if (mbp != NULL) {
688				pbp->bio_inbed++;
689				g_destroy_bio(mbp);
690			}
691			g_std_done(cbp);
692			return;
693		}
694		if (mbp != NULL) {
695			/* Try partner the bio instead */
696			mbp->bio_caller1 = NULL;
697			pbp->bio_inbed++;
698			g_destroy_bio(cbp);
699			g_io_request(mbp, mbp->bio_from);
700			/*
701			 * XXX: If this comes back OK, we should actually
702			 * try to write the good data on the failed mirror
703			 */
704			return;
705		}
706		g_std_done(cbp);
707		return;
708	}
709	if (mbp != NULL) {
710		mbp->bio_caller1 = NULL;
711		pbp->bio_inbed++;
712		if (cbp->bio_error != 0 && pbp->bio_error == 0)
713			pbp->bio_error = cbp->bio_error;
714		g_destroy_bio(cbp);
715		return;
716	}
717	g_std_done(cbp);
718}
719
720static void
721g_ccd_create(struct gctl_req *req, struct g_class *mp)
722{
723	int *unit, *ileave, *nprovider;
724	struct g_geom *gp;
725	struct g_consumer *cp;
726	struct g_provider *pp;
727	struct ccd_s *sc;
728	struct sbuf *sb;
729	char buf[20];
730	int i, error;
731
732	g_topology_assert();
733	unit = gctl_get_paraml(req, "unit", sizeof (*unit));
734	if (unit == NULL) {
735		gctl_error(req, "unit parameter not given");
736		return;
737	}
738	ileave = gctl_get_paraml(req, "ileave", sizeof (*ileave));
739	if (ileave == NULL) {
740		gctl_error(req, "ileave parameter not given");
741		return;
742	}
743	nprovider = gctl_get_paraml(req, "nprovider", sizeof (*nprovider));
744	if (nprovider == NULL) {
745		gctl_error(req, "nprovider parameter not given");
746		return;
747	}
748
749	/* Check for duplicate unit */
750	LIST_FOREACH(gp, &mp->geom, geom) {
751		sc = gp->softc;
752		if (sc != NULL && sc->sc_unit == *unit) {
753			gctl_error(req, "Unit %d already configured", *unit);
754			return;
755		}
756	}
757
758	if (*nprovider <= 0) {
759		gctl_error(req, "Bogus nprovider argument (= %d)", *nprovider);
760		return;
761	}
762
763	/* Check all providers are valid */
764	for (i = 0; i < *nprovider; i++) {
765		snprintf(buf, sizeof(buf), "provider%d", i);
766		pp = gctl_get_provider(req, buf);
767		if (pp == NULL)
768			return;
769	}
770
771	gp = g_new_geomf(mp, "ccd%d", *unit);
772	sc = g_malloc(sizeof *sc, M_WAITOK | M_ZERO);
773	gp->softc = sc;
774	sc->sc_ndisks = *nprovider;
775
776	/* Allocate space for the component info. */
777	sc->sc_cinfo = g_malloc(sc->sc_ndisks * sizeof(struct ccdcinfo),
778	    M_WAITOK | M_ZERO);
779
780	/* Create consumers and attach to all providers */
781	for (i = 0; i < *nprovider; i++) {
782		snprintf(buf, sizeof(buf), "provider%d", i);
783		pp = gctl_get_provider(req, buf);
784		cp = g_new_consumer(gp);
785		error = g_attach(cp, pp);
786		KASSERT(error == 0, ("attach to %s failed", pp->name));
787		sc->sc_cinfo[i].ci_consumer = cp;
788		sc->sc_cinfo[i].ci_provider = pp;
789	}
790
791	sc->sc_unit = *unit;
792	sc->sc_ileave = *ileave;
793
794	if (gctl_get_param(req, "no_offset", NULL))
795		sc->sc_flags |= CCDF_NO_OFFSET;
796	if (gctl_get_param(req, "linux", NULL))
797		sc->sc_flags |= CCDF_LINUX;
798
799	if (gctl_get_param(req, "uniform", NULL))
800		sc->sc_flags |= CCDF_UNIFORM;
801	if (gctl_get_param(req, "mirror", NULL))
802		sc->sc_flags |= CCDF_MIRROR;
803
804	if (sc->sc_ileave == 0 && (sc->sc_flags & CCDF_MIRROR)) {
805		printf("%s: disabling mirror, interleave is 0\n", gp->name);
806		sc->sc_flags &= ~(CCDF_MIRROR);
807	}
808
809	if ((sc->sc_flags & CCDF_MIRROR) && !(sc->sc_flags & CCDF_UNIFORM)) {
810		printf("%s: mirror/parity forces uniform flag\n", gp->name);
811		sc->sc_flags |= CCDF_UNIFORM;
812	}
813
814	error = ccdinit(req, sc);
815	if (error != 0) {
816		g_ccd_freesc(sc);
817		gp->softc = NULL;
818		g_wither_geom(gp, ENXIO);
819		return;
820	}
821
822	pp = g_new_providerf(gp, "%s", gp->name);
823	pp->mediasize = sc->sc_size * (off_t)sc->sc_secsize;
824	pp->sectorsize = sc->sc_secsize;
825	g_error_provider(pp, 0);
826
827	sb = sbuf_new_auto();
828	sbuf_printf(sb, "ccd%d: %d components ", sc->sc_unit, *nprovider);
829	for (i = 0; i < *nprovider; i++) {
830		sbuf_printf(sb, "%s%s",
831		    i == 0 ? "(" : ", ",
832		    sc->sc_cinfo[i].ci_provider->name);
833	}
834	sbuf_printf(sb, "), %jd blocks ", (off_t)pp->mediasize / DEV_BSIZE);
835	if (sc->sc_ileave != 0)
836		sbuf_printf(sb, "interleaved at %d blocks\n",
837			sc->sc_ileave);
838	else
839		sbuf_printf(sb, "concatenated\n");
840	sbuf_finish(sb);
841	gctl_set_param_err(req, "output", sbuf_data(sb), sbuf_len(sb) + 1);
842	sbuf_delete(sb);
843}
844
845static int
846g_ccd_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp)
847{
848	struct g_provider *pp;
849	struct ccd_s *sc;
850
851	g_topology_assert();
852	sc = gp->softc;
853	pp = LIST_FIRST(&gp->provider);
854	if (sc == NULL || pp == NULL)
855		return (EBUSY);
856	if (pp->acr != 0 || pp->acw != 0 || pp->ace != 0) {
857		gctl_error(req, "%s is open(r%dw%de%d)", gp->name,
858		    pp->acr, pp->acw, pp->ace);
859		return (EBUSY);
860	}
861	g_ccd_freesc(sc);
862	gp->softc = NULL;
863	g_wither_geom(gp, ENXIO);
864	return (0);
865}
866
867static void
868g_ccd_list(struct gctl_req *req, struct g_class *mp)
869{
870	struct sbuf *sb;
871	struct ccd_s *cs;
872	struct g_geom *gp;
873	int i, unit, *up;
874
875	up = gctl_get_paraml(req, "unit", sizeof (*up));
876	if (up == NULL) {
877		gctl_error(req, "unit parameter not given");
878		return;
879	}
880	unit = *up;
881	sb = sbuf_new_auto();
882	LIST_FOREACH(gp, &mp->geom, geom) {
883		cs = gp->softc;
884		if (cs == NULL || (unit >= 0 && unit != cs->sc_unit))
885			continue;
886		sbuf_printf(sb, "ccd%d\t\t%d\t%d\t",
887		    cs->sc_unit, cs->sc_ileave, cs->sc_flags & CCDF_USERMASK);
888
889		for (i = 0; i < cs->sc_ndisks; ++i) {
890			sbuf_printf(sb, "%s/dev/%s", i == 0 ? "" : " ",
891			    cs->sc_cinfo[i].ci_provider->name);
892		}
893		sbuf_printf(sb, "\n");
894	}
895	sbuf_finish(sb);
896	gctl_set_param_err(req, "output", sbuf_data(sb), sbuf_len(sb) + 1);
897	sbuf_delete(sb);
898}
899
900static void
901g_ccd_config(struct gctl_req *req, struct g_class *mp, char const *verb)
902{
903	struct g_geom *gp;
904
905	g_topology_assert();
906	if (!strcmp(verb, "create geom")) {
907		g_ccd_create(req, mp);
908	} else if (!strcmp(verb, "destroy geom")) {
909		gp = gctl_get_geom(req, mp, "geom");
910		if (gp != NULL)
911			g_ccd_destroy_geom(req, mp, gp);
912	} else if (!strcmp(verb, "list")) {
913		g_ccd_list(req, mp);
914	} else {
915		gctl_error(req, "unknown verb");
916	}
917}
918
919static struct g_class g_ccd_class = {
920	.name = "CCD",
921	.version = G_VERSION,
922	.ctlreq = g_ccd_config,
923	.destroy_geom = g_ccd_destroy_geom,
924	.start = g_ccd_start,
925	.orphan = g_ccd_orphan,
926	.access = g_ccd_access,
927};
928
929DECLARE_GEOM_CLASS(g_ccd_class, g_ccd);
930MODULE_VERSION(geom_ccd, 0);
931