1139778Simp/*-
2109471Sphk * Copyright (c) 2003 Poul-Henning Kamp.
313041Sasami * Copyright (c) 1995 Jason R. Thorpe.
4109471Sphk * Copyright (c) 1990, 1993
5109471Sphk *	The Regents of the University of California.  All rights reserved.
613041Sasami * All rights reserved.
7109471Sphk * Copyright (c) 1988 University of Utah.
813041Sasami *
9109471Sphk * This code is derived from software contributed to Berkeley by
10109471Sphk * the Systems Programming Group of the University of Utah Computer
11109471Sphk * Science Department.
12109471Sphk *
1313041Sasami * Redistribution and use in source and binary forms, with or without
1413041Sasami * modification, are permitted provided that the following conditions
1513041Sasami * are met:
1613041Sasami * 1. Redistributions of source code must retain the above copyright
1713041Sasami *    notice, this list of conditions and the following disclaimer.
1813041Sasami * 2. Redistributions in binary form must reproduce the above copyright
1913041Sasami *    notice, this list of conditions and the following disclaimer in the
2013041Sasami *    documentation and/or other materials provided with the distribution.
2113041Sasami * 3. All advertising materials mentioning features or use of this software
2213041Sasami *    must display the following acknowledgement:
2313041Sasami *	This product includes software developed for the NetBSD Project
2413041Sasami *	by Jason R. Thorpe.
25109471Sphk * 4. The names of the authors may not be used to endorse or promote products
2613041Sasami *    derived from this software without specific prior written permission.
2713041Sasami *
2813041Sasami * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
2913041Sasami * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
3013041Sasami * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
3113041Sasami * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
3213041Sasami * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
3313041Sasami * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
3413041Sasami * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
3513041Sasami * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
3613041Sasami * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3713041Sasami * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3813041Sasami * SUCH DAMAGE.
3913041Sasami *
4013041Sasami * Dynamic configuration and disklabel support by:
4113041Sasami *	Jason R. Thorpe <thorpej@nas.nasa.gov>
4213041Sasami *	Numerical Aerodynamic Simulation Facility
4313041Sasami *	Mail Stop 258-6
4413041Sasami *	NASA Ames Research Center
4513041Sasami *	Moffett Field, CA 94035
46109471Sphk *
47109471Sphk * from: Utah $Hdr: cd.c 1.6 90/11/28$
48109471Sphk *	@(#)cd.c	8.2 (Berkeley) 11/16/93
49109471Sphk *	$NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $
5013041Sasami */
5113041Sasami
52116196Sobrien#include <sys/cdefs.h>
53116196Sobrien__FBSDID("$FreeBSD$");
54116196Sobrien
5513041Sasami#include <sys/param.h>
5613041Sasami#include <sys/systm.h>
5714730Sasami#include <sys/kernel.h>
5843076Speter#include <sys/module.h>
5960041Sphk#include <sys/bio.h>
6013041Sasami#include <sys/malloc.h>
61223921Sae#include <sys/sbuf.h>
62115729Sphk#include <geom/geom.h>
6313041Sasami
64115731Sphk/*
65115849Sphk * Number of blocks to untouched in front of a component partition.
66115849Sphk * This is to avoid violating its disklabel area when it starts at the
67115849Sphk * beginning of the slice.
68115731Sphk */
69115849Sphk#if !defined(CCD_OFFSET)
70115849Sphk#define CCD_OFFSET 16
71115849Sphk#endif
72115731Sphk
73115849Sphk/* sc_flags */
74115849Sphk#define CCDF_UNIFORM	0x02	/* use LCCD of sizes for uniform interleave */
75115849Sphk#define CCDF_MIRROR	0x04	/* use mirroring */
76157740Scracauer#define CCDF_NO_OFFSET	0x08	/* do not leave space in front */
77157740Scracauer#define CCDF_LINUX	0x10	/* use Linux compatibility mode */
78115849Sphk
79115849Sphk/* Mask of user-settable ccd flags. */
80115849Sphk#define CCDF_USERMASK	(CCDF_UNIFORM|CCDF_MIRROR)
81115849Sphk
82115731Sphk/*
83115731Sphk * Interleave description table.
84115731Sphk * Computed at boot time to speed irregular-interleave lookups.
85115731Sphk * The idea is that we interleave in "groups".  First we interleave
86115731Sphk * evenly over all component disks up to the size of the smallest
87115731Sphk * component (the first group), then we interleave evenly over all
88115731Sphk * remaining disks up to the size of the next-smallest (second group),
89115731Sphk * and so on.
90115731Sphk *
91115731Sphk * Each table entry describes the interleave characteristics of one
92115731Sphk * of these groups.  For example if a concatenated disk consisted of
93115731Sphk * three components of 5, 3, and 7 DEV_BSIZE blocks interleaved at
94115731Sphk * DEV_BSIZE (1), the table would have three entries:
95115731Sphk *
96115731Sphk *	ndisk	startblk	startoff	dev
97115731Sphk *	3	0		0		0, 1, 2
98115731Sphk *	2	9		3		0, 2
99115731Sphk *	1	13		5		2
100115731Sphk *	0	-		-		-
101115731Sphk *
102115731Sphk * which says that the first nine blocks (0-8) are interleaved over
103115731Sphk * 3 disks (0, 1, 2) starting at block offset 0 on any component disk,
104115731Sphk * the next 4 blocks (9-12) are interleaved over 2 disks (0, 2) starting
105115731Sphk * at component block 3, and the remaining blocks (13-14) are on disk
106115731Sphk * 2 starting at offset 5.
107115731Sphk */
108115731Sphkstruct ccdiinfo {
109115731Sphk	int	ii_ndisk;	/* # of disks range is interleaved over */
110115731Sphk	daddr_t	ii_startblk;	/* starting scaled block # for range */
111115731Sphk	daddr_t	ii_startoff;	/* starting component offset (block #) */
112115731Sphk	int	*ii_index;	/* ordered list of components in range */
113115731Sphk};
114115731Sphk
115115731Sphk/*
116115849Sphk * Component info table.
117115849Sphk * Describes a single component of a concatenated disk.
118115731Sphk */
119115849Sphkstruct ccdcinfo {
120119300Sps	daddr_t		ci_size; 		/* size */
121115849Sphk	struct g_provider *ci_provider;		/* provider */
122115849Sphk	struct g_consumer *ci_consumer;		/* consumer */
123115731Sphk};
124115731Sphk
125115731Sphk/*
126115731Sphk * A concatenated disk is described by this structure.
127115731Sphk */
128115849Sphk
129115731Sphkstruct ccd_s {
130115731Sphk	LIST_ENTRY(ccd_s) list;
131115731Sphk
132115731Sphk	int		 sc_unit;		/* logical unit number */
133115731Sphk	int		 sc_flags;		/* flags */
134119300Sps	daddr_t		 sc_size;		/* size of ccd */
135115731Sphk	int		 sc_ileave;		/* interleave */
136115849Sphk	u_int		 sc_ndisks;		/* number of components */
137115731Sphk	struct ccdcinfo	 *sc_cinfo;		/* component info */
138115731Sphk	struct ccdiinfo	 *sc_itable;		/* interleave table */
139115849Sphk	u_int32_t	 sc_secsize;		/* # bytes per sector */
140115731Sphk	int		 sc_pick;		/* side of mirror picked */
141115731Sphk	daddr_t		 sc_blk[2];		/* mirror localization */
142157740Scracauer	u_int32_t	 sc_offset;		/* actual offset used */
143115731Sphk};
144115731Sphk
145115849Sphkstatic g_start_t g_ccd_start;
14682937Sphkstatic void ccdiodone(struct bio *bp);
147115849Sphkstatic void ccdinterleave(struct ccd_s *);
148115849Sphkstatic int ccdinit(struct gctl_req *req, struct ccd_s *);
149115849Sphkstatic int ccdbuffer(struct bio **ret, struct ccd_s *,
15082937Sphk		      struct bio *, daddr_t, caddr_t, long);
15113041Sasami
152115849Sphkstatic void
153115849Sphkg_ccd_orphan(struct g_consumer *cp)
15482937Sphk{
155115953Sphk	/*
156115953Sphk	 * XXX: We don't do anything here.  It is not obvious
157115953Sphk	 * XXX: what DTRT would be, so we do what the previous
158115953Sphk	 * XXX: code did: ignore it and let the user cope.
159115953Sphk	 */
16082937Sphk}
16182937Sphk
162115849Sphkstatic int
163115849Sphkg_ccd_access(struct g_provider *pp, int dr, int dw, int de)
16482937Sphk{
165115849Sphk	struct g_geom *gp;
166115849Sphk	struct g_consumer *cp1, *cp2;
167115849Sphk	int error;
16882937Sphk
169115849Sphk	de += dr;
170115849Sphk	de += dw;
17182937Sphk
172115849Sphk	gp = pp->geom;
173115849Sphk	error = ENXIO;
174115849Sphk	LIST_FOREACH(cp1, &gp->consumer, consumer) {
175125755Sphk		error = g_access(cp1, dr, dw, de);
176115849Sphk		if (error) {
177115849Sphk			LIST_FOREACH(cp2, &gp->consumer, consumer) {
178115849Sphk				if (cp1 == cp2)
179115849Sphk					break;
180125755Sphk				g_access(cp2, -dr, -dw, -de);
181115849Sphk			}
182115849Sphk			break;
183115849Sphk		}
184115849Sphk	}
185115849Sphk	return (error);
18682937Sphk}
18782937Sphk
18815763Sasami/*
189115849Sphk * Free the softc and its substructures.
19013041Sasami */
19130688Sphkstatic void
192115849Sphkg_ccd_freesc(struct ccd_s *sc)
19313041Sasami{
194115849Sphk	struct ccdiinfo *ii;
19513041Sasami
196115849Sphk	g_free(sc->sc_cinfo);
197115849Sphk	if (sc->sc_itable != NULL) {
198115849Sphk		for (ii = sc->sc_itable; ii->ii_ndisk > 0; ii++)
199115849Sphk			if (ii->ii_index != NULL)
200115849Sphk				g_free(ii->ii_index);
201115849Sphk		g_free(sc->sc_itable);
20214730Sasami	}
203115849Sphk	g_free(sc);
20413041Sasami}
20513041Sasami
20643076Speter
20713041Sasamistatic int
208115849Sphkccdinit(struct gctl_req *req, struct ccd_s *cs)
20913041Sasami{
210115849Sphk	struct ccdcinfo *ci;
211119300Sps	daddr_t size;
21251601Sdillon	int ix;
213119300Sps	daddr_t minsize;
21413041Sasami	int maxsecsize;
21593653Sphk	off_t mediasize;
21693653Sphk	u_int sectorsize;
21713041Sasami
21813041Sasami	cs->sc_size = 0;
21913041Sasami
22013041Sasami	maxsecsize = 0;
22113041Sasami	minsize = 0;
222157740Scracauer
223157740Scracauer	if (cs->sc_flags & CCDF_LINUX) {
224157740Scracauer		cs->sc_offset = 0;
225157740Scracauer		cs->sc_ileave *= 2;
226157740Scracauer		if (cs->sc_flags & CCDF_MIRROR && cs->sc_ndisks != 2)
227157740Scracauer			gctl_error(req, "Mirror mode for Linux raids is "
228157740Scracauer			                "only supported with 2 devices");
229157740Scracauer	} else {
230157740Scracauer		if (cs->sc_flags & CCDF_NO_OFFSET)
231157740Scracauer			cs->sc_offset = 0;
232157740Scracauer		else
233157740Scracauer			cs->sc_offset = CCD_OFFSET;
234157740Scracauer
235157740Scracauer	}
236115849Sphk	for (ix = 0; ix < cs->sc_ndisks; ix++) {
23713041Sasami		ci = &cs->sc_cinfo[ix];
23813041Sasami
239115849Sphk		mediasize = ci->ci_provider->mediasize;
240115849Sphk		sectorsize = ci->ci_provider->sectorsize;
24193653Sphk		if (sectorsize > maxsecsize)
24293653Sphk			maxsecsize = sectorsize;
243157740Scracauer		size = mediasize / DEV_BSIZE - cs->sc_offset;
24413041Sasami
245115849Sphk		/* Truncate to interleave boundary */
24613041Sasami
24713041Sasami		if (cs->sc_ileave > 1)
24813041Sasami			size -= size % cs->sc_ileave;
24913041Sasami
25013041Sasami		if (size == 0) {
251115849Sphk			gctl_error(req, "Component %s has effective size zero",
252115849Sphk			    ci->ci_provider->name);
253115849Sphk			return(ENODEV);
25413041Sasami		}
25513041Sasami
25613041Sasami		if (minsize == 0 || size < minsize)
25713041Sasami			minsize = size;
25813041Sasami		ci->ci_size = size;
25913041Sasami		cs->sc_size += size;
26013041Sasami	}
26113041Sasami
26213041Sasami	/*
26313041Sasami	 * Don't allow the interleave to be smaller than
26413041Sasami	 * the biggest component sector.
26513041Sasami	 */
26613041Sasami	if ((cs->sc_ileave > 0) &&
26713041Sasami	    (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) {
268115849Sphk		gctl_error(req, "Interleave to small for sector size");
269115849Sphk		return(EINVAL);
27013041Sasami	}
27113041Sasami
27213041Sasami	/*
27313041Sasami	 * If uniform interleave is desired set all sizes to that of
27451601Sdillon	 * the smallest component.  This will guarentee that a single
27551601Sdillon	 * interleave table is generated.
27651601Sdillon	 *
27751601Sdillon	 * Lost space must be taken into account when calculating the
27851601Sdillon	 * overall size.  Half the space is lost when CCDF_MIRROR is
279109421Sphk	 * specified.
28013041Sasami	 */
28182937Sphk	if (cs->sc_flags & CCDF_UNIFORM) {
282115849Sphk		for (ix = 0; ix < cs->sc_ndisks; ix++) {
283115849Sphk			ci = &cs->sc_cinfo[ix];
28413041Sasami			ci->ci_size = minsize;
28551601Sdillon		}
286115849Sphk		cs->sc_size = cs->sc_ndisks * minsize;
28713041Sasami	}
28813041Sasami
289115849Sphk	if (cs->sc_flags & CCDF_MIRROR) {
290115849Sphk		/*
291115849Sphk		 * Check to see if an even number of components
292115849Sphk		 * have been specified.  The interleave must also
293115849Sphk		 * be non-zero in order for us to be able to
294115849Sphk		 * guarentee the topology.
295115849Sphk		 */
296115849Sphk		if (cs->sc_ndisks % 2) {
297115849Sphk			gctl_error(req,
298115849Sphk			      "Mirroring requires an even number of disks");
299115849Sphk			return(EINVAL);
300115849Sphk		}
301115849Sphk		if (cs->sc_ileave == 0) {
302115849Sphk			gctl_error(req,
303115849Sphk			     "An interleave must be specified when mirroring");
304115849Sphk			return(EINVAL);
305115849Sphk		}
306115849Sphk		cs->sc_size = (cs->sc_ndisks/2) * minsize;
307115849Sphk	}
308115849Sphk
30913041Sasami	/*
31013041Sasami	 * Construct the interleave table.
31113041Sasami	 */
312115849Sphk	ccdinterleave(cs);
31313041Sasami
31413041Sasami	/*
31513041Sasami	 * Create pseudo-geometry based on 1MB cylinders.  It's
31613041Sasami	 * pretty close.
31713041Sasami	 */
318115849Sphk	cs->sc_secsize = maxsecsize;
31913041Sasami
32013041Sasami	return (0);
32113041Sasami}
32213041Sasami
32313041Sasamistatic void
324115849Sphkccdinterleave(struct ccd_s *cs)
32513041Sasami{
32651601Sdillon	struct ccdcinfo *ci, *smallci;
32751601Sdillon	struct ccdiinfo *ii;
32851601Sdillon	daddr_t bn, lbn;
32951601Sdillon	int ix;
330119300Sps	daddr_t size;
33113041Sasami
33251601Sdillon
33313041Sasami	/*
33451601Sdillon	 * Allocate an interleave table.  The worst case occurs when each
33551601Sdillon	 * of N disks is of a different size, resulting in N interleave
33651601Sdillon	 * tables.
33751601Sdillon	 *
33813041Sasami	 * Chances are this is too big, but we don't care.
33913041Sasami	 */
340115849Sphk	size = (cs->sc_ndisks + 1) * sizeof(struct ccdiinfo);
341115849Sphk	cs->sc_itable = g_malloc(size, M_WAITOK | M_ZERO);
34213041Sasami
34313041Sasami	/*
34413041Sasami	 * Trivial case: no interleave (actually interleave of disk size).
34513041Sasami	 * Each table entry represents a single component in its entirety.
34651601Sdillon	 *
347109421Sphk	 * An interleave of 0 may not be used with a mirror setup.
34813041Sasami	 */
34913041Sasami	if (cs->sc_ileave == 0) {
35013041Sasami		bn = 0;
35113041Sasami		ii = cs->sc_itable;
35213041Sasami
353115849Sphk		for (ix = 0; ix < cs->sc_ndisks; ix++) {
35413041Sasami			/* Allocate space for ii_index. */
355115849Sphk			ii->ii_index = g_malloc(sizeof(int), M_WAITOK);
35613041Sasami			ii->ii_ndisk = 1;
35713041Sasami			ii->ii_startblk = bn;
35813041Sasami			ii->ii_startoff = 0;
35913041Sasami			ii->ii_index[0] = ix;
36013041Sasami			bn += cs->sc_cinfo[ix].ci_size;
36113041Sasami			ii++;
36213041Sasami		}
36313041Sasami		ii->ii_ndisk = 0;
36413041Sasami		return;
36513041Sasami	}
36613041Sasami
36713041Sasami	/*
36813041Sasami	 * The following isn't fast or pretty; it doesn't have to be.
36913041Sasami	 */
37013041Sasami	size = 0;
37113041Sasami	bn = lbn = 0;
37213041Sasami	for (ii = cs->sc_itable; ; ii++) {
37351601Sdillon		/*
37451601Sdillon		 * Allocate space for ii_index.  We might allocate more then
37551601Sdillon		 * we use.
37651601Sdillon		 */
377115849Sphk		ii->ii_index = g_malloc((sizeof(int) * cs->sc_ndisks),
378115849Sphk		    M_WAITOK);
37913041Sasami
38013041Sasami		/*
38113041Sasami		 * Locate the smallest of the remaining components
38213041Sasami		 */
38313041Sasami		smallci = NULL;
384115849Sphk		for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_ndisks];
38551601Sdillon		    ci++) {
38613041Sasami			if (ci->ci_size > size &&
38713041Sasami			    (smallci == NULL ||
38851601Sdillon			     ci->ci_size < smallci->ci_size)) {
38913041Sasami				smallci = ci;
39051601Sdillon			}
39151601Sdillon		}
39213041Sasami
39313041Sasami		/*
39413041Sasami		 * Nobody left, all done
39513041Sasami		 */
39613041Sasami		if (smallci == NULL) {
39713041Sasami			ii->ii_ndisk = 0;
398115849Sphk			g_free(ii->ii_index);
399115849Sphk			ii->ii_index = NULL;
40013041Sasami			break;
40113041Sasami		}
40213041Sasami
40313041Sasami		/*
40451601Sdillon		 * Record starting logical block using an sc_ileave blocksize.
40513041Sasami		 */
40613041Sasami		ii->ii_startblk = bn / cs->sc_ileave;
40751601Sdillon
40851601Sdillon		/*
409115849Sphk		 * Record starting component block using an sc_ileave
41051601Sdillon		 * blocksize.  This value is relative to the beginning of
41151601Sdillon		 * a component disk.
41251601Sdillon		 */
41313041Sasami		ii->ii_startoff = lbn;
41413041Sasami
41513041Sasami		/*
41613041Sasami		 * Determine how many disks take part in this interleave
41713041Sasami		 * and record their indices.
41813041Sasami		 */
41913041Sasami		ix = 0;
42051601Sdillon		for (ci = cs->sc_cinfo;
421115849Sphk		    ci < &cs->sc_cinfo[cs->sc_ndisks]; ci++) {
42251601Sdillon			if (ci->ci_size >= smallci->ci_size) {
42313041Sasami				ii->ii_index[ix++] = ci - cs->sc_cinfo;
42451601Sdillon			}
42551601Sdillon		}
42613041Sasami		ii->ii_ndisk = ix;
42713041Sasami		bn += ix * (smallci->ci_size - size);
42813041Sasami		lbn = smallci->ci_size / cs->sc_ileave;
42913041Sasami		size = smallci->ci_size;
43013041Sasami	}
43113041Sasami}
43213041Sasami
43330688Sphkstatic void
434115849Sphkg_ccd_start(struct bio *bp)
43513041Sasami{
43651601Sdillon	long bcount, rcount;
437115849Sphk	struct bio *cbp[2];
43813041Sasami	caddr_t addr;
43913041Sasami	daddr_t bn;
440109535Sphk	int err;
441115849Sphk	struct ccd_s *cs;
44213041Sasami
443115849Sphk	cs = bp->bio_to->geom->softc;
444115849Sphk
44513041Sasami	/*
446119296Sphk	 * Block all GETATTR requests, we wouldn't know which of our
447119296Sphk	 * subdevices we should ship it off to.
448119296Sphk	 * XXX: this may not be the right policy.
449119296Sphk	 */
450119296Sphk	if(bp->bio_cmd == BIO_GETATTR) {
451119296Sphk		g_io_deliver(bp, EINVAL);
452119296Sphk		return;
453119296Sphk	}
454119296Sphk
455119296Sphk	/*
45613041Sasami	 * Translate the partition-relative block number to an absolute.
45713041Sasami	 */
458115849Sphk	bn = bp->bio_offset / cs->sc_secsize;
45913041Sasami
46013041Sasami	/*
46113041Sasami	 * Allocate component buffers and fire off the requests
46213041Sasami	 */
46359841Sphk	addr = bp->bio_data;
464115849Sphk	for (bcount = bp->bio_length; bcount > 0; bcount -= rcount) {
465109535Sphk		err = ccdbuffer(cbp, cs, bp, bn, addr, bcount);
466109535Sphk		if (err) {
467116107Sphk			bp->bio_completed += bcount;
468122550Sphk			if (bp->bio_error == 0)
469113464Sphk				bp->bio_error = err;
470116107Sphk			if (bp->bio_completed == bp->bio_length)
471116107Sphk				g_io_deliver(bp, bp->bio_error);
472109535Sphk			return;
473109535Sphk		}
474115849Sphk		rcount = cbp[0]->bio_length;
47551601Sdillon
476115849Sphk		if (cs->sc_flags & CCDF_MIRROR) {
47751601Sdillon			/*
47851601Sdillon			 * Mirroring.  Writes go to both disks, reads are
47951601Sdillon			 * taken from whichever disk seems most appropriate.
48051601Sdillon			 *
48151601Sdillon			 * We attempt to localize reads to the disk whos arm
48251601Sdillon			 * is nearest the read request.  We ignore seeks due
48351601Sdillon			 * to writes when making this determination and we
48451601Sdillon			 * also try to avoid hogging.
48551601Sdillon			 */
486115849Sphk			if (cbp[0]->bio_cmd != BIO_READ) {
487115849Sphk				g_io_request(cbp[0], cbp[0]->bio_from);
488115849Sphk				g_io_request(cbp[1], cbp[1]->bio_from);
48951601Sdillon			} else {
49051601Sdillon				int pick = cs->sc_pick;
49151601Sdillon				daddr_t range = cs->sc_size / 16;
49251601Sdillon
49351601Sdillon				if (bn < cs->sc_blk[pick] - range ||
49451601Sdillon				    bn > cs->sc_blk[pick] + range
49551601Sdillon				) {
49651601Sdillon					cs->sc_pick = pick = 1 - pick;
49751601Sdillon				}
49851601Sdillon				cs->sc_blk[pick] = bn + btodb(rcount);
499115849Sphk				g_io_request(cbp[pick], cbp[pick]->bio_from);
50051601Sdillon			}
50151601Sdillon		} else {
50251601Sdillon			/*
50351601Sdillon			 * Not mirroring
50451601Sdillon			 */
505115849Sphk			g_io_request(cbp[0], cbp[0]->bio_from);
50613775Sasami		}
50713041Sasami		bn += btodb(rcount);
50813041Sasami		addr += rcount;
50913041Sasami	}
51013041Sasami}
51113041Sasami
51213041Sasami/*
51313041Sasami * Build a component buffer header.
51413041Sasami */
515109535Sphkstatic int
516115849Sphkccdbuffer(struct bio **cb, struct ccd_s *cs, struct bio *bp, daddr_t bn, caddr_t addr, long bcount)
51713041Sasami{
518116107Sphk	struct ccdcinfo *ci, *ci2 = NULL;
519115849Sphk	struct bio *cbp;
52051601Sdillon	daddr_t cbn, cboff;
52151601Sdillon	off_t cbc;
52213041Sasami
52313041Sasami	/*
52413041Sasami	 * Determine which component bn falls in.
52513041Sasami	 */
52613041Sasami	cbn = bn;
52713041Sasami	cboff = 0;
52813041Sasami
52913041Sasami	if (cs->sc_ileave == 0) {
53051601Sdillon		/*
53151601Sdillon		 * Serially concatenated and neither a mirror nor a parity
53251601Sdillon		 * config.  This is a special case.
53351601Sdillon		 */
53451601Sdillon		daddr_t sblk;
53513041Sasami
53613041Sasami		sblk = 0;
53713041Sasami		for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++)
53813041Sasami			sblk += ci->ci_size;
53913041Sasami		cbn -= sblk;
54051601Sdillon	} else {
54151601Sdillon		struct ccdiinfo *ii;
54213041Sasami		int ccdisk, off;
54313041Sasami
54451601Sdillon		/*
54551601Sdillon		 * Calculate cbn, the logical superblock (sc_ileave chunks),
54651601Sdillon		 * and cboff, a normal block offset (DEV_BSIZE chunks) relative
54751601Sdillon		 * to cbn.
54851601Sdillon		 */
54951601Sdillon		cboff = cbn % cs->sc_ileave;	/* DEV_BSIZE gran */
55051601Sdillon		cbn = cbn / cs->sc_ileave;	/* DEV_BSIZE * ileave gran */
55151601Sdillon
55251601Sdillon		/*
55351601Sdillon		 * Figure out which interleave table to use.
55451601Sdillon		 */
55551601Sdillon		for (ii = cs->sc_itable; ii->ii_ndisk; ii++) {
55613041Sasami			if (ii->ii_startblk > cbn)
55713041Sasami				break;
55851601Sdillon		}
55913041Sasami		ii--;
56051601Sdillon
56151601Sdillon		/*
56251601Sdillon		 * off is the logical superblock relative to the beginning
56351601Sdillon		 * of this interleave block.
56451601Sdillon		 */
56513041Sasami		off = cbn - ii->ii_startblk;
56651601Sdillon
56751601Sdillon		/*
56851601Sdillon		 * We must calculate which disk component to use (ccdisk),
56951601Sdillon		 * and recalculate cbn to be the superblock relative to
57051601Sdillon		 * the beginning of the component.  This is typically done by
57151601Sdillon		 * adding 'off' and ii->ii_startoff together.  However, 'off'
57251601Sdillon		 * must typically be divided by the number of components in
57351601Sdillon		 * this interleave array to be properly convert it from a
57451601Sdillon		 * CCD-relative logical superblock number to a
57551601Sdillon		 * component-relative superblock number.
57651601Sdillon		 */
57713041Sasami		if (ii->ii_ndisk == 1) {
57851601Sdillon			/*
57951601Sdillon			 * When we have just one disk, it can't be a mirror
58051601Sdillon			 * or a parity config.
58151601Sdillon			 */
58213041Sasami			ccdisk = ii->ii_index[0];
58313041Sasami			cbn = ii->ii_startoff + off;
58413041Sasami		} else {
585115849Sphk			if (cs->sc_flags & CCDF_MIRROR) {
58651601Sdillon				/*
58751601Sdillon				 * We have forced a uniform mapping, resulting
58851601Sdillon				 * in a single interleave array.  We double
58951601Sdillon				 * up on the first half of the available
59051601Sdillon				 * components and our mirror is in the second
59151601Sdillon				 * half.  This only works with a single
59251601Sdillon				 * interleave array because doubling up
59351601Sdillon				 * doubles the number of sectors, so there
59451601Sdillon				 * cannot be another interleave array because
59551601Sdillon				 * the next interleave array's calculations
59651601Sdillon				 * would be off.
59751601Sdillon				 */
59851601Sdillon				int ndisk2 = ii->ii_ndisk / 2;
59951601Sdillon				ccdisk = ii->ii_index[off % ndisk2];
60051601Sdillon				cbn = ii->ii_startoff + off / ndisk2;
60151601Sdillon				ci2 = &cs->sc_cinfo[ccdisk + ndisk2];
60251601Sdillon			} else {
60313173Sasami				ccdisk = ii->ii_index[off % ii->ii_ndisk];
60413173Sasami				cbn = ii->ii_startoff + off / ii->ii_ndisk;
60513173Sasami			}
60613041Sasami		}
60751601Sdillon
60851601Sdillon		ci = &cs->sc_cinfo[ccdisk];
60951601Sdillon
61051601Sdillon		/*
61151601Sdillon		 * Convert cbn from a superblock to a normal block so it
61251601Sdillon		 * can be used to calculate (along with cboff) the normal
61351601Sdillon		 * block index into this particular disk.
61451601Sdillon		 */
61513041Sasami		cbn *= cs->sc_ileave;
61613041Sasami	}
61713041Sasami
61813041Sasami	/*
61913041Sasami	 * Fill in the component buf structure.
62013041Sasami	 */
621115849Sphk	cbp = g_clone_bio(bp);
622116107Sphk	if (cbp == NULL)
623116107Sphk		return (ENOMEM);
624115849Sphk	cbp->bio_done = g_std_done;
625157740Scracauer	cbp->bio_offset = dbtob(cbn + cboff + cs->sc_offset);
626115849Sphk	cbp->bio_data = addr;
62713041Sasami	if (cs->sc_ileave == 0)
62844671Sdg              cbc = dbtob((off_t)(ci->ci_size - cbn));
62913041Sasami	else
63044671Sdg              cbc = dbtob((off_t)(cs->sc_ileave - cboff));
631115849Sphk	cbp->bio_length = (cbc < bcount) ? cbc : bcount;
63221470Sdyson
633115849Sphk	cbp->bio_from = ci->ci_consumer;
63413775Sasami	cb[0] = cbp;
63551601Sdillon
636115849Sphk	if (cs->sc_flags & CCDF_MIRROR) {
637115849Sphk		cbp = g_clone_bio(bp);
638116107Sphk		if (cbp == NULL)
639116107Sphk			return (ENOMEM);
640115849Sphk		cbp->bio_done = cb[0]->bio_done = ccdiodone;
641115849Sphk		cbp->bio_offset = cb[0]->bio_offset;
642115849Sphk		cbp->bio_data = cb[0]->bio_data;
643115849Sphk		cbp->bio_length = cb[0]->bio_length;
644115849Sphk		cbp->bio_from = ci2->ci_consumer;
645115849Sphk		cbp->bio_caller1 = cb[0];
646115849Sphk		cb[0]->bio_caller1 = cbp;
64713775Sasami		cb[1] = cbp;
64813775Sasami	}
649109535Sphk	return (0);
65013041Sasami}
65113041Sasami
65213041Sasami/*
653115953Sphk * Called only for mirrored operations.
65413041Sasami */
65530688Sphkstatic void
656115849Sphkccdiodone(struct bio *cbp)
65713041Sasami{
658115849Sphk	struct bio *mbp, *pbp;
65913041Sasami
660115849Sphk	mbp = cbp->bio_caller1;
661115849Sphk	pbp = cbp->bio_parent;
66213041Sasami
663115849Sphk	if (pbp->bio_cmd == BIO_READ) {
664115849Sphk		if (cbp->bio_error == 0) {
665115953Sphk			/* We will not be needing the partner bio */
666115953Sphk			if (mbp != NULL) {
667115953Sphk				pbp->bio_inbed++;
668115953Sphk				g_destroy_bio(mbp);
669115953Sphk			}
670115849Sphk			g_std_done(cbp);
671115849Sphk			return;
67214821Sasami		}
673115849Sphk		if (mbp != NULL) {
674115953Sphk			/* Try partner the bio instead */
675115849Sphk			mbp->bio_caller1 = NULL;
676115849Sphk			pbp->bio_inbed++;
677115849Sphk			g_destroy_bio(cbp);
678115849Sphk			g_io_request(mbp, mbp->bio_from);
679115953Sphk			/*
680115953Sphk			 * XXX: If this comes back OK, we should actually
681115953Sphk			 * try to write the good data on the failed mirror
682115953Sphk			 */
683115849Sphk			return;
684115849Sphk		}
685115849Sphk		g_std_done(cbp);
686118182Sphk		return;
68751601Sdillon	}
688115849Sphk	if (mbp != NULL) {
689115849Sphk		mbp->bio_caller1 = NULL;
690115849Sphk		pbp->bio_inbed++;
691115953Sphk		if (cbp->bio_error != 0 && pbp->bio_error == 0)
692115849Sphk			pbp->bio_error = cbp->bio_error;
693118182Sphk		g_destroy_bio(cbp);
694115849Sphk		return;
695109473Sphk	}
696115849Sphk	g_std_done(cbp);
69713041Sasami}
69813041Sasami
699115849Sphkstatic void
700115849Sphkg_ccd_create(struct gctl_req *req, struct g_class *mp)
70113041Sasami{
702115849Sphk	int *unit, *ileave, *nprovider;
703115849Sphk	struct g_geom *gp;
704115849Sphk	struct g_consumer *cp;
705115849Sphk	struct g_provider *pp;
706115849Sphk	struct ccd_s *sc;
707115849Sphk	struct sbuf *sb;
708115849Sphk	char buf[20];
709115849Sphk	int i, error;
710109421Sphk
711115849Sphk	g_topology_assert();
712115849Sphk	unit = gctl_get_paraml(req, "unit", sizeof (*unit));
713185318Slulf	if (unit == NULL) {
714185318Slulf		gctl_error(req, "unit parameter not given");
715185318Slulf		return;
716185318Slulf	}
717115849Sphk	ileave = gctl_get_paraml(req, "ileave", sizeof (*ileave));
718185318Slulf	if (ileave == NULL) {
719185318Slulf		gctl_error(req, "ileave parameter not given");
720185318Slulf		return;
721185318Slulf	}
722115849Sphk	nprovider = gctl_get_paraml(req, "nprovider", sizeof (*nprovider));
723185318Slulf	if (nprovider == NULL) {
724185318Slulf		gctl_error(req, "nprovider parameter not given");
725185318Slulf		return;
726185318Slulf	}
727109421Sphk
728115849Sphk	/* Check for duplicate unit */
729115849Sphk	LIST_FOREACH(gp, &mp->geom, geom) {
730115849Sphk		sc = gp->softc;
731119295Sphk		if (sc != NULL && sc->sc_unit == *unit) {
732115849Sphk			gctl_error(req, "Unit %d already configured", *unit);
733115849Sphk			return;
73413784Sasami		}
735115849Sphk	}
73613041Sasami
737115849Sphk	if (*nprovider <= 0) {
738115849Sphk		gctl_error(req, "Bogus nprovider argument (= %d)", *nprovider);
739115849Sphk		return;
740115849Sphk	}
74113041Sasami
742115849Sphk	/* Check all providers are valid */
743115849Sphk	for (i = 0; i < *nprovider; i++) {
744115849Sphk		sprintf(buf, "provider%d", i);
745115849Sphk		pp = gctl_get_provider(req, buf);
746115849Sphk		if (pp == NULL)
747115849Sphk			return;
748115849Sphk	}
74913041Sasami
750115849Sphk	gp = g_new_geomf(mp, "ccd%d", *unit);
751115849Sphk	sc = g_malloc(sizeof *sc, M_WAITOK | M_ZERO);
752115849Sphk	gp->softc = sc;
753115849Sphk	sc->sc_ndisks = *nprovider;
75413041Sasami
755115849Sphk	/* Allocate space for the component info. */
756115849Sphk	sc->sc_cinfo = g_malloc(sc->sc_ndisks * sizeof(struct ccdcinfo),
757115849Sphk	    M_WAITOK | M_ZERO);
75813041Sasami
759115849Sphk	/* Create consumers and attach to all providers */
760115849Sphk	for (i = 0; i < *nprovider; i++) {
761115849Sphk		sprintf(buf, "provider%d", i);
762115849Sphk		pp = gctl_get_provider(req, buf);
763115849Sphk		cp = g_new_consumer(gp);
764115849Sphk		error = g_attach(cp, pp);
765115849Sphk		KASSERT(error == 0, ("attach to %s failed", pp->name));
766115849Sphk		sc->sc_cinfo[i].ci_consumer = cp;
767115849Sphk		sc->sc_cinfo[i].ci_provider = pp;
768115849Sphk	}
76913041Sasami
770115849Sphk	sc->sc_unit = *unit;
771115849Sphk	sc->sc_ileave = *ileave;
77213041Sasami
773157740Scracauer	if (gctl_get_param(req, "no_offset", NULL))
774157740Scracauer		sc->sc_flags |= CCDF_NO_OFFSET;
775157740Scracauer	if (gctl_get_param(req, "linux", NULL))
776157740Scracauer		sc->sc_flags |= CCDF_LINUX;
777157740Scracauer
778115849Sphk	if (gctl_get_param(req, "uniform", NULL))
779115849Sphk		sc->sc_flags |= CCDF_UNIFORM;
780115849Sphk	if (gctl_get_param(req, "mirror", NULL))
781115849Sphk		sc->sc_flags |= CCDF_MIRROR;
78213041Sasami
783115849Sphk	if (sc->sc_ileave == 0 && (sc->sc_flags & CCDF_MIRROR)) {
784115849Sphk		printf("%s: disabling mirror, interleave is 0\n", gp->name);
785115849Sphk		sc->sc_flags &= ~(CCDF_MIRROR);
78613041Sasami	}
78713041Sasami
788115849Sphk	if ((sc->sc_flags & CCDF_MIRROR) && !(sc->sc_flags & CCDF_UNIFORM)) {
789115849Sphk		printf("%s: mirror/parity forces uniform flag\n", gp->name);
790115849Sphk		sc->sc_flags |= CCDF_UNIFORM;
79113041Sasami	}
79213041Sasami
793115849Sphk	error = ccdinit(req, sc);
794115849Sphk	if (error != 0) {
795115849Sphk		g_ccd_freesc(sc);
796115849Sphk		gp->softc = NULL;
797115849Sphk		g_wither_geom(gp, ENXIO);
798115849Sphk		return;
79913041Sasami	}
80013041Sasami
801115849Sphk	pp = g_new_providerf(gp, "%s", gp->name);
802115849Sphk	pp->mediasize = sc->sc_size * (off_t)sc->sc_secsize;
803115849Sphk	pp->sectorsize = sc->sc_secsize;
804115849Sphk	g_error_provider(pp, 0);
80513041Sasami
806181463Sdes	sb = sbuf_new_auto();
807115849Sphk	sbuf_printf(sb, "ccd%d: %d components ", sc->sc_unit, *nprovider);
808115849Sphk	for (i = 0; i < *nprovider; i++) {
809115849Sphk		sbuf_printf(sb, "%s%s",
810115849Sphk		    i == 0 ? "(" : ", ",
811115849Sphk		    sc->sc_cinfo[i].ci_provider->name);
81213041Sasami	}
813115849Sphk	sbuf_printf(sb, "), %jd blocks ", (off_t)pp->mediasize / DEV_BSIZE);
814115849Sphk	if (sc->sc_ileave != 0)
815115849Sphk		sbuf_printf(sb, "interleaved at %d blocks\n",
816115849Sphk			sc->sc_ileave);
817115849Sphk	else
818115849Sphk		sbuf_printf(sb, "concatenated\n");
819115849Sphk	sbuf_finish(sb);
820157581Smarcel	gctl_set_param_err(req, "output", sbuf_data(sb), sbuf_len(sb) + 1);
821115849Sphk	sbuf_delete(sb);
82213041Sasami}
82313041Sasami
824119299Sphkstatic int
825119299Sphkg_ccd_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp)
82613041Sasami{
827115849Sphk	struct g_provider *pp;
828115849Sphk	struct ccd_s *sc;
82913041Sasami
830115849Sphk	g_topology_assert();
831115849Sphk	sc = gp->softc;
832115849Sphk	pp = LIST_FIRST(&gp->provider);
833119299Sphk	if (sc == NULL || pp == NULL)
834119299Sphk		return (EBUSY);
835115849Sphk	if (pp->acr != 0 || pp->acw != 0 || pp->ace != 0) {
836115849Sphk		gctl_error(req, "%s is open(r%dw%de%d)", gp->name,
837115849Sphk		    pp->acr, pp->acw, pp->ace);
838119299Sphk		return (EBUSY);
83913041Sasami	}
840115849Sphk	g_ccd_freesc(sc);
841115849Sphk	gp->softc = NULL;
842115849Sphk	g_wither_geom(gp, ENXIO);
843119299Sphk	return (0);
84413041Sasami}
845115729Sphk
846115849Sphkstatic void
847115849Sphkg_ccd_list(struct gctl_req *req, struct g_class *mp)
848115729Sphk{
849115729Sphk	struct sbuf *sb;
850115729Sphk	struct ccd_s *cs;
851115849Sphk	struct g_geom *gp;
852115849Sphk	int i, unit, *up;
853115729Sphk
854185318Slulf	up = gctl_get_paraml(req, "unit", sizeof (*up));
855185318Slulf	if (up == NULL) {
856185318Slulf		gctl_error(req, "unit parameter not given");
857185318Slulf		return;
858185318Slulf	}
859115849Sphk	unit = *up;
860181463Sdes	sb = sbuf_new_auto();
861115849Sphk	LIST_FOREACH(gp, &mp->geom, geom) {
862115849Sphk		cs = gp->softc;
863119295Sphk		if (cs == NULL || (unit >= 0 && unit != cs->sc_unit))
864115731Sphk			continue;
865115729Sphk		sbuf_printf(sb, "ccd%d\t\t%d\t%d\t",
866115849Sphk		    cs->sc_unit, cs->sc_ileave, cs->sc_flags & CCDF_USERMASK);
867115729Sphk
868115849Sphk		for (i = 0; i < cs->sc_ndisks; ++i) {
869115849Sphk			sbuf_printf(sb, "%s/dev/%s", i == 0 ? "" : " ",
870115849Sphk			    cs->sc_cinfo[i].ci_provider->name);
871115729Sphk		}
872115729Sphk		sbuf_printf(sb, "\n");
873115729Sphk	}
874115729Sphk	sbuf_finish(sb);
875157581Smarcel	gctl_set_param_err(req, "output", sbuf_data(sb), sbuf_len(sb) + 1);
876115849Sphk	sbuf_delete(sb);
877115729Sphk}
878115729Sphk
879115729Sphkstatic void
880115729Sphkg_ccd_config(struct gctl_req *req, struct g_class *mp, char const *verb)
881115729Sphk{
882119299Sphk	struct g_geom *gp;
883115729Sphk
884115729Sphk	g_topology_assert();
885115729Sphk	if (!strcmp(verb, "create geom")) {
886115849Sphk		g_ccd_create(req, mp);
887115729Sphk	} else if (!strcmp(verb, "destroy geom")) {
888119299Sphk		gp = gctl_get_geom(req, mp, "geom");
889119299Sphk		if (gp != NULL)
890119299Sphk		g_ccd_destroy_geom(req, mp, gp);
891115729Sphk	} else if (!strcmp(verb, "list")) {
892115849Sphk		g_ccd_list(req, mp);
893115729Sphk	} else {
894115729Sphk		gctl_error(req, "unknown verb");
895115729Sphk	}
896115729Sphk}
897115729Sphk
898115729Sphkstatic struct g_class g_ccd_class = {
899115729Sphk	.name = "CCD",
900133318Sphk	.version = G_VERSION,
901115729Sphk	.ctlreq = g_ccd_config,
902119299Sphk	.destroy_geom = g_ccd_destroy_geom,
903133314Sphk	.start = g_ccd_start,
904133314Sphk	.orphan = g_ccd_orphan,
905133314Sphk	.access = g_ccd_access,
906115729Sphk};
907115729Sphk
908115729SphkDECLARE_GEOM_CLASS(g_ccd_class, g_ccd);
909