1139778Simp/*- 2109471Sphk * Copyright (c) 2003 Poul-Henning Kamp. 313041Sasami * Copyright (c) 1995 Jason R. Thorpe. 4109471Sphk * Copyright (c) 1990, 1993 5109471Sphk * The Regents of the University of California. All rights reserved. 613041Sasami * All rights reserved. 7109471Sphk * Copyright (c) 1988 University of Utah. 813041Sasami * 9109471Sphk * This code is derived from software contributed to Berkeley by 10109471Sphk * the Systems Programming Group of the University of Utah Computer 11109471Sphk * Science Department. 12109471Sphk * 1313041Sasami * Redistribution and use in source and binary forms, with or without 1413041Sasami * modification, are permitted provided that the following conditions 1513041Sasami * are met: 1613041Sasami * 1. Redistributions of source code must retain the above copyright 1713041Sasami * notice, this list of conditions and the following disclaimer. 1813041Sasami * 2. Redistributions in binary form must reproduce the above copyright 1913041Sasami * notice, this list of conditions and the following disclaimer in the 2013041Sasami * documentation and/or other materials provided with the distribution. 2113041Sasami * 3. All advertising materials mentioning features or use of this software 2213041Sasami * must display the following acknowledgement: 2313041Sasami * This product includes software developed for the NetBSD Project 2413041Sasami * by Jason R. Thorpe. 25109471Sphk * 4. The names of the authors may not be used to endorse or promote products 2613041Sasami * derived from this software without specific prior written permission. 2713041Sasami * 2813041Sasami * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 2913041Sasami * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 3013041Sasami * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 3113041Sasami * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 3213041Sasami * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 3313041Sasami * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 3413041Sasami * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 3513041Sasami * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 3613041Sasami * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 3713041Sasami * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3813041Sasami * SUCH DAMAGE. 3913041Sasami * 4013041Sasami * Dynamic configuration and disklabel support by: 4113041Sasami * Jason R. Thorpe <thorpej@nas.nasa.gov> 4213041Sasami * Numerical Aerodynamic Simulation Facility 4313041Sasami * Mail Stop 258-6 4413041Sasami * NASA Ames Research Center 4513041Sasami * Moffett Field, CA 94035 46109471Sphk * 47109471Sphk * from: Utah $Hdr: cd.c 1.6 90/11/28$ 48109471Sphk * @(#)cd.c 8.2 (Berkeley) 11/16/93 49109471Sphk * $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ 5013041Sasami */ 5113041Sasami 52116196Sobrien#include <sys/cdefs.h> 53116196Sobrien__FBSDID("$FreeBSD$"); 54116196Sobrien 5513041Sasami#include <sys/param.h> 5613041Sasami#include <sys/systm.h> 5714730Sasami#include <sys/kernel.h> 5843076Speter#include <sys/module.h> 5960041Sphk#include <sys/bio.h> 6013041Sasami#include <sys/malloc.h> 61223921Sae#include <sys/sbuf.h> 62115729Sphk#include <geom/geom.h> 6313041Sasami 64115731Sphk/* 65115849Sphk * Number of blocks to untouched in front of a component partition. 66115849Sphk * This is to avoid violating its disklabel area when it starts at the 67115849Sphk * beginning of the slice. 68115731Sphk */ 69115849Sphk#if !defined(CCD_OFFSET) 70115849Sphk#define CCD_OFFSET 16 71115849Sphk#endif 72115731Sphk 73115849Sphk/* sc_flags */ 74115849Sphk#define CCDF_UNIFORM 0x02 /* use LCCD of sizes for uniform interleave */ 75115849Sphk#define CCDF_MIRROR 0x04 /* use mirroring */ 76157740Scracauer#define CCDF_NO_OFFSET 0x08 /* do not leave space in front */ 77157740Scracauer#define CCDF_LINUX 0x10 /* use Linux compatibility mode */ 78115849Sphk 79115849Sphk/* Mask of user-settable ccd flags. */ 80115849Sphk#define CCDF_USERMASK (CCDF_UNIFORM|CCDF_MIRROR) 81115849Sphk 82115731Sphk/* 83115731Sphk * Interleave description table. 84115731Sphk * Computed at boot time to speed irregular-interleave lookups. 85115731Sphk * The idea is that we interleave in "groups". First we interleave 86115731Sphk * evenly over all component disks up to the size of the smallest 87115731Sphk * component (the first group), then we interleave evenly over all 88115731Sphk * remaining disks up to the size of the next-smallest (second group), 89115731Sphk * and so on. 90115731Sphk * 91115731Sphk * Each table entry describes the interleave characteristics of one 92115731Sphk * of these groups. For example if a concatenated disk consisted of 93115731Sphk * three components of 5, 3, and 7 DEV_BSIZE blocks interleaved at 94115731Sphk * DEV_BSIZE (1), the table would have three entries: 95115731Sphk * 96115731Sphk * ndisk startblk startoff dev 97115731Sphk * 3 0 0 0, 1, 2 98115731Sphk * 2 9 3 0, 2 99115731Sphk * 1 13 5 2 100115731Sphk * 0 - - - 101115731Sphk * 102115731Sphk * which says that the first nine blocks (0-8) are interleaved over 103115731Sphk * 3 disks (0, 1, 2) starting at block offset 0 on any component disk, 104115731Sphk * the next 4 blocks (9-12) are interleaved over 2 disks (0, 2) starting 105115731Sphk * at component block 3, and the remaining blocks (13-14) are on disk 106115731Sphk * 2 starting at offset 5. 107115731Sphk */ 108115731Sphkstruct ccdiinfo { 109115731Sphk int ii_ndisk; /* # of disks range is interleaved over */ 110115731Sphk daddr_t ii_startblk; /* starting scaled block # for range */ 111115731Sphk daddr_t ii_startoff; /* starting component offset (block #) */ 112115731Sphk int *ii_index; /* ordered list of components in range */ 113115731Sphk}; 114115731Sphk 115115731Sphk/* 116115849Sphk * Component info table. 117115849Sphk * Describes a single component of a concatenated disk. 118115731Sphk */ 119115849Sphkstruct ccdcinfo { 120119300Sps daddr_t ci_size; /* size */ 121115849Sphk struct g_provider *ci_provider; /* provider */ 122115849Sphk struct g_consumer *ci_consumer; /* consumer */ 123115731Sphk}; 124115731Sphk 125115731Sphk/* 126115731Sphk * A concatenated disk is described by this structure. 127115731Sphk */ 128115849Sphk 129115731Sphkstruct ccd_s { 130115731Sphk LIST_ENTRY(ccd_s) list; 131115731Sphk 132115731Sphk int sc_unit; /* logical unit number */ 133115731Sphk int sc_flags; /* flags */ 134119300Sps daddr_t sc_size; /* size of ccd */ 135115731Sphk int sc_ileave; /* interleave */ 136115849Sphk u_int sc_ndisks; /* number of components */ 137115731Sphk struct ccdcinfo *sc_cinfo; /* component info */ 138115731Sphk struct ccdiinfo *sc_itable; /* interleave table */ 139115849Sphk u_int32_t sc_secsize; /* # bytes per sector */ 140115731Sphk int sc_pick; /* side of mirror picked */ 141115731Sphk daddr_t sc_blk[2]; /* mirror localization */ 142157740Scracauer u_int32_t sc_offset; /* actual offset used */ 143115731Sphk}; 144115731Sphk 145115849Sphkstatic g_start_t g_ccd_start; 14682937Sphkstatic void ccdiodone(struct bio *bp); 147115849Sphkstatic void ccdinterleave(struct ccd_s *); 148115849Sphkstatic int ccdinit(struct gctl_req *req, struct ccd_s *); 149115849Sphkstatic int ccdbuffer(struct bio **ret, struct ccd_s *, 15082937Sphk struct bio *, daddr_t, caddr_t, long); 15113041Sasami 152115849Sphkstatic void 153115849Sphkg_ccd_orphan(struct g_consumer *cp) 15482937Sphk{ 155115953Sphk /* 156115953Sphk * XXX: We don't do anything here. It is not obvious 157115953Sphk * XXX: what DTRT would be, so we do what the previous 158115953Sphk * XXX: code did: ignore it and let the user cope. 159115953Sphk */ 16082937Sphk} 16182937Sphk 162115849Sphkstatic int 163115849Sphkg_ccd_access(struct g_provider *pp, int dr, int dw, int de) 16482937Sphk{ 165115849Sphk struct g_geom *gp; 166115849Sphk struct g_consumer *cp1, *cp2; 167115849Sphk int error; 16882937Sphk 169115849Sphk de += dr; 170115849Sphk de += dw; 17182937Sphk 172115849Sphk gp = pp->geom; 173115849Sphk error = ENXIO; 174115849Sphk LIST_FOREACH(cp1, &gp->consumer, consumer) { 175125755Sphk error = g_access(cp1, dr, dw, de); 176115849Sphk if (error) { 177115849Sphk LIST_FOREACH(cp2, &gp->consumer, consumer) { 178115849Sphk if (cp1 == cp2) 179115849Sphk break; 180125755Sphk g_access(cp2, -dr, -dw, -de); 181115849Sphk } 182115849Sphk break; 183115849Sphk } 184115849Sphk } 185115849Sphk return (error); 18682937Sphk} 18782937Sphk 18815763Sasami/* 189115849Sphk * Free the softc and its substructures. 19013041Sasami */ 19130688Sphkstatic void 192115849Sphkg_ccd_freesc(struct ccd_s *sc) 19313041Sasami{ 194115849Sphk struct ccdiinfo *ii; 19513041Sasami 196115849Sphk g_free(sc->sc_cinfo); 197115849Sphk if (sc->sc_itable != NULL) { 198115849Sphk for (ii = sc->sc_itable; ii->ii_ndisk > 0; ii++) 199115849Sphk if (ii->ii_index != NULL) 200115849Sphk g_free(ii->ii_index); 201115849Sphk g_free(sc->sc_itable); 20214730Sasami } 203115849Sphk g_free(sc); 20413041Sasami} 20513041Sasami 20643076Speter 20713041Sasamistatic int 208115849Sphkccdinit(struct gctl_req *req, struct ccd_s *cs) 20913041Sasami{ 210115849Sphk struct ccdcinfo *ci; 211119300Sps daddr_t size; 21251601Sdillon int ix; 213119300Sps daddr_t minsize; 21413041Sasami int maxsecsize; 21593653Sphk off_t mediasize; 21693653Sphk u_int sectorsize; 21713041Sasami 21813041Sasami cs->sc_size = 0; 21913041Sasami 22013041Sasami maxsecsize = 0; 22113041Sasami minsize = 0; 222157740Scracauer 223157740Scracauer if (cs->sc_flags & CCDF_LINUX) { 224157740Scracauer cs->sc_offset = 0; 225157740Scracauer cs->sc_ileave *= 2; 226157740Scracauer if (cs->sc_flags & CCDF_MIRROR && cs->sc_ndisks != 2) 227157740Scracauer gctl_error(req, "Mirror mode for Linux raids is " 228157740Scracauer "only supported with 2 devices"); 229157740Scracauer } else { 230157740Scracauer if (cs->sc_flags & CCDF_NO_OFFSET) 231157740Scracauer cs->sc_offset = 0; 232157740Scracauer else 233157740Scracauer cs->sc_offset = CCD_OFFSET; 234157740Scracauer 235157740Scracauer } 236115849Sphk for (ix = 0; ix < cs->sc_ndisks; ix++) { 23713041Sasami ci = &cs->sc_cinfo[ix]; 23813041Sasami 239115849Sphk mediasize = ci->ci_provider->mediasize; 240115849Sphk sectorsize = ci->ci_provider->sectorsize; 24193653Sphk if (sectorsize > maxsecsize) 24293653Sphk maxsecsize = sectorsize; 243157740Scracauer size = mediasize / DEV_BSIZE - cs->sc_offset; 24413041Sasami 245115849Sphk /* Truncate to interleave boundary */ 24613041Sasami 24713041Sasami if (cs->sc_ileave > 1) 24813041Sasami size -= size % cs->sc_ileave; 24913041Sasami 25013041Sasami if (size == 0) { 251115849Sphk gctl_error(req, "Component %s has effective size zero", 252115849Sphk ci->ci_provider->name); 253115849Sphk return(ENODEV); 25413041Sasami } 25513041Sasami 25613041Sasami if (minsize == 0 || size < minsize) 25713041Sasami minsize = size; 25813041Sasami ci->ci_size = size; 25913041Sasami cs->sc_size += size; 26013041Sasami } 26113041Sasami 26213041Sasami /* 26313041Sasami * Don't allow the interleave to be smaller than 26413041Sasami * the biggest component sector. 26513041Sasami */ 26613041Sasami if ((cs->sc_ileave > 0) && 26713041Sasami (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { 268115849Sphk gctl_error(req, "Interleave to small for sector size"); 269115849Sphk return(EINVAL); 27013041Sasami } 27113041Sasami 27213041Sasami /* 27313041Sasami * If uniform interleave is desired set all sizes to that of 27451601Sdillon * the smallest component. This will guarentee that a single 27551601Sdillon * interleave table is generated. 27651601Sdillon * 27751601Sdillon * Lost space must be taken into account when calculating the 27851601Sdillon * overall size. Half the space is lost when CCDF_MIRROR is 279109421Sphk * specified. 28013041Sasami */ 28182937Sphk if (cs->sc_flags & CCDF_UNIFORM) { 282115849Sphk for (ix = 0; ix < cs->sc_ndisks; ix++) { 283115849Sphk ci = &cs->sc_cinfo[ix]; 28413041Sasami ci->ci_size = minsize; 28551601Sdillon } 286115849Sphk cs->sc_size = cs->sc_ndisks * minsize; 28713041Sasami } 28813041Sasami 289115849Sphk if (cs->sc_flags & CCDF_MIRROR) { 290115849Sphk /* 291115849Sphk * Check to see if an even number of components 292115849Sphk * have been specified. The interleave must also 293115849Sphk * be non-zero in order for us to be able to 294115849Sphk * guarentee the topology. 295115849Sphk */ 296115849Sphk if (cs->sc_ndisks % 2) { 297115849Sphk gctl_error(req, 298115849Sphk "Mirroring requires an even number of disks"); 299115849Sphk return(EINVAL); 300115849Sphk } 301115849Sphk if (cs->sc_ileave == 0) { 302115849Sphk gctl_error(req, 303115849Sphk "An interleave must be specified when mirroring"); 304115849Sphk return(EINVAL); 305115849Sphk } 306115849Sphk cs->sc_size = (cs->sc_ndisks/2) * minsize; 307115849Sphk } 308115849Sphk 30913041Sasami /* 31013041Sasami * Construct the interleave table. 31113041Sasami */ 312115849Sphk ccdinterleave(cs); 31313041Sasami 31413041Sasami /* 31513041Sasami * Create pseudo-geometry based on 1MB cylinders. It's 31613041Sasami * pretty close. 31713041Sasami */ 318115849Sphk cs->sc_secsize = maxsecsize; 31913041Sasami 32013041Sasami return (0); 32113041Sasami} 32213041Sasami 32313041Sasamistatic void 324115849Sphkccdinterleave(struct ccd_s *cs) 32513041Sasami{ 32651601Sdillon struct ccdcinfo *ci, *smallci; 32751601Sdillon struct ccdiinfo *ii; 32851601Sdillon daddr_t bn, lbn; 32951601Sdillon int ix; 330119300Sps daddr_t size; 33113041Sasami 33251601Sdillon 33313041Sasami /* 33451601Sdillon * Allocate an interleave table. The worst case occurs when each 33551601Sdillon * of N disks is of a different size, resulting in N interleave 33651601Sdillon * tables. 33751601Sdillon * 33813041Sasami * Chances are this is too big, but we don't care. 33913041Sasami */ 340115849Sphk size = (cs->sc_ndisks + 1) * sizeof(struct ccdiinfo); 341115849Sphk cs->sc_itable = g_malloc(size, M_WAITOK | M_ZERO); 34213041Sasami 34313041Sasami /* 34413041Sasami * Trivial case: no interleave (actually interleave of disk size). 34513041Sasami * Each table entry represents a single component in its entirety. 34651601Sdillon * 347109421Sphk * An interleave of 0 may not be used with a mirror setup. 34813041Sasami */ 34913041Sasami if (cs->sc_ileave == 0) { 35013041Sasami bn = 0; 35113041Sasami ii = cs->sc_itable; 35213041Sasami 353115849Sphk for (ix = 0; ix < cs->sc_ndisks; ix++) { 35413041Sasami /* Allocate space for ii_index. */ 355115849Sphk ii->ii_index = g_malloc(sizeof(int), M_WAITOK); 35613041Sasami ii->ii_ndisk = 1; 35713041Sasami ii->ii_startblk = bn; 35813041Sasami ii->ii_startoff = 0; 35913041Sasami ii->ii_index[0] = ix; 36013041Sasami bn += cs->sc_cinfo[ix].ci_size; 36113041Sasami ii++; 36213041Sasami } 36313041Sasami ii->ii_ndisk = 0; 36413041Sasami return; 36513041Sasami } 36613041Sasami 36713041Sasami /* 36813041Sasami * The following isn't fast or pretty; it doesn't have to be. 36913041Sasami */ 37013041Sasami size = 0; 37113041Sasami bn = lbn = 0; 37213041Sasami for (ii = cs->sc_itable; ; ii++) { 37351601Sdillon /* 37451601Sdillon * Allocate space for ii_index. We might allocate more then 37551601Sdillon * we use. 37651601Sdillon */ 377115849Sphk ii->ii_index = g_malloc((sizeof(int) * cs->sc_ndisks), 378115849Sphk M_WAITOK); 37913041Sasami 38013041Sasami /* 38113041Sasami * Locate the smallest of the remaining components 38213041Sasami */ 38313041Sasami smallci = NULL; 384115849Sphk for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_ndisks]; 38551601Sdillon ci++) { 38613041Sasami if (ci->ci_size > size && 38713041Sasami (smallci == NULL || 38851601Sdillon ci->ci_size < smallci->ci_size)) { 38913041Sasami smallci = ci; 39051601Sdillon } 39151601Sdillon } 39213041Sasami 39313041Sasami /* 39413041Sasami * Nobody left, all done 39513041Sasami */ 39613041Sasami if (smallci == NULL) { 39713041Sasami ii->ii_ndisk = 0; 398115849Sphk g_free(ii->ii_index); 399115849Sphk ii->ii_index = NULL; 40013041Sasami break; 40113041Sasami } 40213041Sasami 40313041Sasami /* 40451601Sdillon * Record starting logical block using an sc_ileave blocksize. 40513041Sasami */ 40613041Sasami ii->ii_startblk = bn / cs->sc_ileave; 40751601Sdillon 40851601Sdillon /* 409115849Sphk * Record starting component block using an sc_ileave 41051601Sdillon * blocksize. This value is relative to the beginning of 41151601Sdillon * a component disk. 41251601Sdillon */ 41313041Sasami ii->ii_startoff = lbn; 41413041Sasami 41513041Sasami /* 41613041Sasami * Determine how many disks take part in this interleave 41713041Sasami * and record their indices. 41813041Sasami */ 41913041Sasami ix = 0; 42051601Sdillon for (ci = cs->sc_cinfo; 421115849Sphk ci < &cs->sc_cinfo[cs->sc_ndisks]; ci++) { 42251601Sdillon if (ci->ci_size >= smallci->ci_size) { 42313041Sasami ii->ii_index[ix++] = ci - cs->sc_cinfo; 42451601Sdillon } 42551601Sdillon } 42613041Sasami ii->ii_ndisk = ix; 42713041Sasami bn += ix * (smallci->ci_size - size); 42813041Sasami lbn = smallci->ci_size / cs->sc_ileave; 42913041Sasami size = smallci->ci_size; 43013041Sasami } 43113041Sasami} 43213041Sasami 43330688Sphkstatic void 434115849Sphkg_ccd_start(struct bio *bp) 43513041Sasami{ 43651601Sdillon long bcount, rcount; 437115849Sphk struct bio *cbp[2]; 43813041Sasami caddr_t addr; 43913041Sasami daddr_t bn; 440109535Sphk int err; 441115849Sphk struct ccd_s *cs; 44213041Sasami 443115849Sphk cs = bp->bio_to->geom->softc; 444115849Sphk 44513041Sasami /* 446119296Sphk * Block all GETATTR requests, we wouldn't know which of our 447119296Sphk * subdevices we should ship it off to. 448119296Sphk * XXX: this may not be the right policy. 449119296Sphk */ 450119296Sphk if(bp->bio_cmd == BIO_GETATTR) { 451119296Sphk g_io_deliver(bp, EINVAL); 452119296Sphk return; 453119296Sphk } 454119296Sphk 455119296Sphk /* 45613041Sasami * Translate the partition-relative block number to an absolute. 45713041Sasami */ 458115849Sphk bn = bp->bio_offset / cs->sc_secsize; 45913041Sasami 46013041Sasami /* 46113041Sasami * Allocate component buffers and fire off the requests 46213041Sasami */ 46359841Sphk addr = bp->bio_data; 464115849Sphk for (bcount = bp->bio_length; bcount > 0; bcount -= rcount) { 465109535Sphk err = ccdbuffer(cbp, cs, bp, bn, addr, bcount); 466109535Sphk if (err) { 467116107Sphk bp->bio_completed += bcount; 468122550Sphk if (bp->bio_error == 0) 469113464Sphk bp->bio_error = err; 470116107Sphk if (bp->bio_completed == bp->bio_length) 471116107Sphk g_io_deliver(bp, bp->bio_error); 472109535Sphk return; 473109535Sphk } 474115849Sphk rcount = cbp[0]->bio_length; 47551601Sdillon 476115849Sphk if (cs->sc_flags & CCDF_MIRROR) { 47751601Sdillon /* 47851601Sdillon * Mirroring. Writes go to both disks, reads are 47951601Sdillon * taken from whichever disk seems most appropriate. 48051601Sdillon * 48151601Sdillon * We attempt to localize reads to the disk whos arm 48251601Sdillon * is nearest the read request. We ignore seeks due 48351601Sdillon * to writes when making this determination and we 48451601Sdillon * also try to avoid hogging. 48551601Sdillon */ 486115849Sphk if (cbp[0]->bio_cmd != BIO_READ) { 487115849Sphk g_io_request(cbp[0], cbp[0]->bio_from); 488115849Sphk g_io_request(cbp[1], cbp[1]->bio_from); 48951601Sdillon } else { 49051601Sdillon int pick = cs->sc_pick; 49151601Sdillon daddr_t range = cs->sc_size / 16; 49251601Sdillon 49351601Sdillon if (bn < cs->sc_blk[pick] - range || 49451601Sdillon bn > cs->sc_blk[pick] + range 49551601Sdillon ) { 49651601Sdillon cs->sc_pick = pick = 1 - pick; 49751601Sdillon } 49851601Sdillon cs->sc_blk[pick] = bn + btodb(rcount); 499115849Sphk g_io_request(cbp[pick], cbp[pick]->bio_from); 50051601Sdillon } 50151601Sdillon } else { 50251601Sdillon /* 50351601Sdillon * Not mirroring 50451601Sdillon */ 505115849Sphk g_io_request(cbp[0], cbp[0]->bio_from); 50613775Sasami } 50713041Sasami bn += btodb(rcount); 50813041Sasami addr += rcount; 50913041Sasami } 51013041Sasami} 51113041Sasami 51213041Sasami/* 51313041Sasami * Build a component buffer header. 51413041Sasami */ 515109535Sphkstatic int 516115849Sphkccdbuffer(struct bio **cb, struct ccd_s *cs, struct bio *bp, daddr_t bn, caddr_t addr, long bcount) 51713041Sasami{ 518116107Sphk struct ccdcinfo *ci, *ci2 = NULL; 519115849Sphk struct bio *cbp; 52051601Sdillon daddr_t cbn, cboff; 52151601Sdillon off_t cbc; 52213041Sasami 52313041Sasami /* 52413041Sasami * Determine which component bn falls in. 52513041Sasami */ 52613041Sasami cbn = bn; 52713041Sasami cboff = 0; 52813041Sasami 52913041Sasami if (cs->sc_ileave == 0) { 53051601Sdillon /* 53151601Sdillon * Serially concatenated and neither a mirror nor a parity 53251601Sdillon * config. This is a special case. 53351601Sdillon */ 53451601Sdillon daddr_t sblk; 53513041Sasami 53613041Sasami sblk = 0; 53713041Sasami for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++) 53813041Sasami sblk += ci->ci_size; 53913041Sasami cbn -= sblk; 54051601Sdillon } else { 54151601Sdillon struct ccdiinfo *ii; 54213041Sasami int ccdisk, off; 54313041Sasami 54451601Sdillon /* 54551601Sdillon * Calculate cbn, the logical superblock (sc_ileave chunks), 54651601Sdillon * and cboff, a normal block offset (DEV_BSIZE chunks) relative 54751601Sdillon * to cbn. 54851601Sdillon */ 54951601Sdillon cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */ 55051601Sdillon cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */ 55151601Sdillon 55251601Sdillon /* 55351601Sdillon * Figure out which interleave table to use. 55451601Sdillon */ 55551601Sdillon for (ii = cs->sc_itable; ii->ii_ndisk; ii++) { 55613041Sasami if (ii->ii_startblk > cbn) 55713041Sasami break; 55851601Sdillon } 55913041Sasami ii--; 56051601Sdillon 56151601Sdillon /* 56251601Sdillon * off is the logical superblock relative to the beginning 56351601Sdillon * of this interleave block. 56451601Sdillon */ 56513041Sasami off = cbn - ii->ii_startblk; 56651601Sdillon 56751601Sdillon /* 56851601Sdillon * We must calculate which disk component to use (ccdisk), 56951601Sdillon * and recalculate cbn to be the superblock relative to 57051601Sdillon * the beginning of the component. This is typically done by 57151601Sdillon * adding 'off' and ii->ii_startoff together. However, 'off' 57251601Sdillon * must typically be divided by the number of components in 57351601Sdillon * this interleave array to be properly convert it from a 57451601Sdillon * CCD-relative logical superblock number to a 57551601Sdillon * component-relative superblock number. 57651601Sdillon */ 57713041Sasami if (ii->ii_ndisk == 1) { 57851601Sdillon /* 57951601Sdillon * When we have just one disk, it can't be a mirror 58051601Sdillon * or a parity config. 58151601Sdillon */ 58213041Sasami ccdisk = ii->ii_index[0]; 58313041Sasami cbn = ii->ii_startoff + off; 58413041Sasami } else { 585115849Sphk if (cs->sc_flags & CCDF_MIRROR) { 58651601Sdillon /* 58751601Sdillon * We have forced a uniform mapping, resulting 58851601Sdillon * in a single interleave array. We double 58951601Sdillon * up on the first half of the available 59051601Sdillon * components and our mirror is in the second 59151601Sdillon * half. This only works with a single 59251601Sdillon * interleave array because doubling up 59351601Sdillon * doubles the number of sectors, so there 59451601Sdillon * cannot be another interleave array because 59551601Sdillon * the next interleave array's calculations 59651601Sdillon * would be off. 59751601Sdillon */ 59851601Sdillon int ndisk2 = ii->ii_ndisk / 2; 59951601Sdillon ccdisk = ii->ii_index[off % ndisk2]; 60051601Sdillon cbn = ii->ii_startoff + off / ndisk2; 60151601Sdillon ci2 = &cs->sc_cinfo[ccdisk + ndisk2]; 60251601Sdillon } else { 60313173Sasami ccdisk = ii->ii_index[off % ii->ii_ndisk]; 60413173Sasami cbn = ii->ii_startoff + off / ii->ii_ndisk; 60513173Sasami } 60613041Sasami } 60751601Sdillon 60851601Sdillon ci = &cs->sc_cinfo[ccdisk]; 60951601Sdillon 61051601Sdillon /* 61151601Sdillon * Convert cbn from a superblock to a normal block so it 61251601Sdillon * can be used to calculate (along with cboff) the normal 61351601Sdillon * block index into this particular disk. 61451601Sdillon */ 61513041Sasami cbn *= cs->sc_ileave; 61613041Sasami } 61713041Sasami 61813041Sasami /* 61913041Sasami * Fill in the component buf structure. 62013041Sasami */ 621115849Sphk cbp = g_clone_bio(bp); 622116107Sphk if (cbp == NULL) 623116107Sphk return (ENOMEM); 624115849Sphk cbp->bio_done = g_std_done; 625157740Scracauer cbp->bio_offset = dbtob(cbn + cboff + cs->sc_offset); 626115849Sphk cbp->bio_data = addr; 62713041Sasami if (cs->sc_ileave == 0) 62844671Sdg cbc = dbtob((off_t)(ci->ci_size - cbn)); 62913041Sasami else 63044671Sdg cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 631115849Sphk cbp->bio_length = (cbc < bcount) ? cbc : bcount; 63221470Sdyson 633115849Sphk cbp->bio_from = ci->ci_consumer; 63413775Sasami cb[0] = cbp; 63551601Sdillon 636115849Sphk if (cs->sc_flags & CCDF_MIRROR) { 637115849Sphk cbp = g_clone_bio(bp); 638116107Sphk if (cbp == NULL) 639116107Sphk return (ENOMEM); 640115849Sphk cbp->bio_done = cb[0]->bio_done = ccdiodone; 641115849Sphk cbp->bio_offset = cb[0]->bio_offset; 642115849Sphk cbp->bio_data = cb[0]->bio_data; 643115849Sphk cbp->bio_length = cb[0]->bio_length; 644115849Sphk cbp->bio_from = ci2->ci_consumer; 645115849Sphk cbp->bio_caller1 = cb[0]; 646115849Sphk cb[0]->bio_caller1 = cbp; 64713775Sasami cb[1] = cbp; 64813775Sasami } 649109535Sphk return (0); 65013041Sasami} 65113041Sasami 65213041Sasami/* 653115953Sphk * Called only for mirrored operations. 65413041Sasami */ 65530688Sphkstatic void 656115849Sphkccdiodone(struct bio *cbp) 65713041Sasami{ 658115849Sphk struct bio *mbp, *pbp; 65913041Sasami 660115849Sphk mbp = cbp->bio_caller1; 661115849Sphk pbp = cbp->bio_parent; 66213041Sasami 663115849Sphk if (pbp->bio_cmd == BIO_READ) { 664115849Sphk if (cbp->bio_error == 0) { 665115953Sphk /* We will not be needing the partner bio */ 666115953Sphk if (mbp != NULL) { 667115953Sphk pbp->bio_inbed++; 668115953Sphk g_destroy_bio(mbp); 669115953Sphk } 670115849Sphk g_std_done(cbp); 671115849Sphk return; 67214821Sasami } 673115849Sphk if (mbp != NULL) { 674115953Sphk /* Try partner the bio instead */ 675115849Sphk mbp->bio_caller1 = NULL; 676115849Sphk pbp->bio_inbed++; 677115849Sphk g_destroy_bio(cbp); 678115849Sphk g_io_request(mbp, mbp->bio_from); 679115953Sphk /* 680115953Sphk * XXX: If this comes back OK, we should actually 681115953Sphk * try to write the good data on the failed mirror 682115953Sphk */ 683115849Sphk return; 684115849Sphk } 685115849Sphk g_std_done(cbp); 686118182Sphk return; 68751601Sdillon } 688115849Sphk if (mbp != NULL) { 689115849Sphk mbp->bio_caller1 = NULL; 690115849Sphk pbp->bio_inbed++; 691115953Sphk if (cbp->bio_error != 0 && pbp->bio_error == 0) 692115849Sphk pbp->bio_error = cbp->bio_error; 693118182Sphk g_destroy_bio(cbp); 694115849Sphk return; 695109473Sphk } 696115849Sphk g_std_done(cbp); 69713041Sasami} 69813041Sasami 699115849Sphkstatic void 700115849Sphkg_ccd_create(struct gctl_req *req, struct g_class *mp) 70113041Sasami{ 702115849Sphk int *unit, *ileave, *nprovider; 703115849Sphk struct g_geom *gp; 704115849Sphk struct g_consumer *cp; 705115849Sphk struct g_provider *pp; 706115849Sphk struct ccd_s *sc; 707115849Sphk struct sbuf *sb; 708115849Sphk char buf[20]; 709115849Sphk int i, error; 710109421Sphk 711115849Sphk g_topology_assert(); 712115849Sphk unit = gctl_get_paraml(req, "unit", sizeof (*unit)); 713185318Slulf if (unit == NULL) { 714185318Slulf gctl_error(req, "unit parameter not given"); 715185318Slulf return; 716185318Slulf } 717115849Sphk ileave = gctl_get_paraml(req, "ileave", sizeof (*ileave)); 718185318Slulf if (ileave == NULL) { 719185318Slulf gctl_error(req, "ileave parameter not given"); 720185318Slulf return; 721185318Slulf } 722115849Sphk nprovider = gctl_get_paraml(req, "nprovider", sizeof (*nprovider)); 723185318Slulf if (nprovider == NULL) { 724185318Slulf gctl_error(req, "nprovider parameter not given"); 725185318Slulf return; 726185318Slulf } 727109421Sphk 728115849Sphk /* Check for duplicate unit */ 729115849Sphk LIST_FOREACH(gp, &mp->geom, geom) { 730115849Sphk sc = gp->softc; 731119295Sphk if (sc != NULL && sc->sc_unit == *unit) { 732115849Sphk gctl_error(req, "Unit %d already configured", *unit); 733115849Sphk return; 73413784Sasami } 735115849Sphk } 73613041Sasami 737115849Sphk if (*nprovider <= 0) { 738115849Sphk gctl_error(req, "Bogus nprovider argument (= %d)", *nprovider); 739115849Sphk return; 740115849Sphk } 74113041Sasami 742115849Sphk /* Check all providers are valid */ 743115849Sphk for (i = 0; i < *nprovider; i++) { 744115849Sphk sprintf(buf, "provider%d", i); 745115849Sphk pp = gctl_get_provider(req, buf); 746115849Sphk if (pp == NULL) 747115849Sphk return; 748115849Sphk } 74913041Sasami 750115849Sphk gp = g_new_geomf(mp, "ccd%d", *unit); 751115849Sphk sc = g_malloc(sizeof *sc, M_WAITOK | M_ZERO); 752115849Sphk gp->softc = sc; 753115849Sphk sc->sc_ndisks = *nprovider; 75413041Sasami 755115849Sphk /* Allocate space for the component info. */ 756115849Sphk sc->sc_cinfo = g_malloc(sc->sc_ndisks * sizeof(struct ccdcinfo), 757115849Sphk M_WAITOK | M_ZERO); 75813041Sasami 759115849Sphk /* Create consumers and attach to all providers */ 760115849Sphk for (i = 0; i < *nprovider; i++) { 761115849Sphk sprintf(buf, "provider%d", i); 762115849Sphk pp = gctl_get_provider(req, buf); 763115849Sphk cp = g_new_consumer(gp); 764115849Sphk error = g_attach(cp, pp); 765115849Sphk KASSERT(error == 0, ("attach to %s failed", pp->name)); 766115849Sphk sc->sc_cinfo[i].ci_consumer = cp; 767115849Sphk sc->sc_cinfo[i].ci_provider = pp; 768115849Sphk } 76913041Sasami 770115849Sphk sc->sc_unit = *unit; 771115849Sphk sc->sc_ileave = *ileave; 77213041Sasami 773157740Scracauer if (gctl_get_param(req, "no_offset", NULL)) 774157740Scracauer sc->sc_flags |= CCDF_NO_OFFSET; 775157740Scracauer if (gctl_get_param(req, "linux", NULL)) 776157740Scracauer sc->sc_flags |= CCDF_LINUX; 777157740Scracauer 778115849Sphk if (gctl_get_param(req, "uniform", NULL)) 779115849Sphk sc->sc_flags |= CCDF_UNIFORM; 780115849Sphk if (gctl_get_param(req, "mirror", NULL)) 781115849Sphk sc->sc_flags |= CCDF_MIRROR; 78213041Sasami 783115849Sphk if (sc->sc_ileave == 0 && (sc->sc_flags & CCDF_MIRROR)) { 784115849Sphk printf("%s: disabling mirror, interleave is 0\n", gp->name); 785115849Sphk sc->sc_flags &= ~(CCDF_MIRROR); 78613041Sasami } 78713041Sasami 788115849Sphk if ((sc->sc_flags & CCDF_MIRROR) && !(sc->sc_flags & CCDF_UNIFORM)) { 789115849Sphk printf("%s: mirror/parity forces uniform flag\n", gp->name); 790115849Sphk sc->sc_flags |= CCDF_UNIFORM; 79113041Sasami } 79213041Sasami 793115849Sphk error = ccdinit(req, sc); 794115849Sphk if (error != 0) { 795115849Sphk g_ccd_freesc(sc); 796115849Sphk gp->softc = NULL; 797115849Sphk g_wither_geom(gp, ENXIO); 798115849Sphk return; 79913041Sasami } 80013041Sasami 801115849Sphk pp = g_new_providerf(gp, "%s", gp->name); 802115849Sphk pp->mediasize = sc->sc_size * (off_t)sc->sc_secsize; 803115849Sphk pp->sectorsize = sc->sc_secsize; 804115849Sphk g_error_provider(pp, 0); 80513041Sasami 806181463Sdes sb = sbuf_new_auto(); 807115849Sphk sbuf_printf(sb, "ccd%d: %d components ", sc->sc_unit, *nprovider); 808115849Sphk for (i = 0; i < *nprovider; i++) { 809115849Sphk sbuf_printf(sb, "%s%s", 810115849Sphk i == 0 ? "(" : ", ", 811115849Sphk sc->sc_cinfo[i].ci_provider->name); 81213041Sasami } 813115849Sphk sbuf_printf(sb, "), %jd blocks ", (off_t)pp->mediasize / DEV_BSIZE); 814115849Sphk if (sc->sc_ileave != 0) 815115849Sphk sbuf_printf(sb, "interleaved at %d blocks\n", 816115849Sphk sc->sc_ileave); 817115849Sphk else 818115849Sphk sbuf_printf(sb, "concatenated\n"); 819115849Sphk sbuf_finish(sb); 820157581Smarcel gctl_set_param_err(req, "output", sbuf_data(sb), sbuf_len(sb) + 1); 821115849Sphk sbuf_delete(sb); 82213041Sasami} 82313041Sasami 824119299Sphkstatic int 825119299Sphkg_ccd_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp) 82613041Sasami{ 827115849Sphk struct g_provider *pp; 828115849Sphk struct ccd_s *sc; 82913041Sasami 830115849Sphk g_topology_assert(); 831115849Sphk sc = gp->softc; 832115849Sphk pp = LIST_FIRST(&gp->provider); 833119299Sphk if (sc == NULL || pp == NULL) 834119299Sphk return (EBUSY); 835115849Sphk if (pp->acr != 0 || pp->acw != 0 || pp->ace != 0) { 836115849Sphk gctl_error(req, "%s is open(r%dw%de%d)", gp->name, 837115849Sphk pp->acr, pp->acw, pp->ace); 838119299Sphk return (EBUSY); 83913041Sasami } 840115849Sphk g_ccd_freesc(sc); 841115849Sphk gp->softc = NULL; 842115849Sphk g_wither_geom(gp, ENXIO); 843119299Sphk return (0); 84413041Sasami} 845115729Sphk 846115849Sphkstatic void 847115849Sphkg_ccd_list(struct gctl_req *req, struct g_class *mp) 848115729Sphk{ 849115729Sphk struct sbuf *sb; 850115729Sphk struct ccd_s *cs; 851115849Sphk struct g_geom *gp; 852115849Sphk int i, unit, *up; 853115729Sphk 854185318Slulf up = gctl_get_paraml(req, "unit", sizeof (*up)); 855185318Slulf if (up == NULL) { 856185318Slulf gctl_error(req, "unit parameter not given"); 857185318Slulf return; 858185318Slulf } 859115849Sphk unit = *up; 860181463Sdes sb = sbuf_new_auto(); 861115849Sphk LIST_FOREACH(gp, &mp->geom, geom) { 862115849Sphk cs = gp->softc; 863119295Sphk if (cs == NULL || (unit >= 0 && unit != cs->sc_unit)) 864115731Sphk continue; 865115729Sphk sbuf_printf(sb, "ccd%d\t\t%d\t%d\t", 866115849Sphk cs->sc_unit, cs->sc_ileave, cs->sc_flags & CCDF_USERMASK); 867115729Sphk 868115849Sphk for (i = 0; i < cs->sc_ndisks; ++i) { 869115849Sphk sbuf_printf(sb, "%s/dev/%s", i == 0 ? "" : " ", 870115849Sphk cs->sc_cinfo[i].ci_provider->name); 871115729Sphk } 872115729Sphk sbuf_printf(sb, "\n"); 873115729Sphk } 874115729Sphk sbuf_finish(sb); 875157581Smarcel gctl_set_param_err(req, "output", sbuf_data(sb), sbuf_len(sb) + 1); 876115849Sphk sbuf_delete(sb); 877115729Sphk} 878115729Sphk 879115729Sphkstatic void 880115729Sphkg_ccd_config(struct gctl_req *req, struct g_class *mp, char const *verb) 881115729Sphk{ 882119299Sphk struct g_geom *gp; 883115729Sphk 884115729Sphk g_topology_assert(); 885115729Sphk if (!strcmp(verb, "create geom")) { 886115849Sphk g_ccd_create(req, mp); 887115729Sphk } else if (!strcmp(verb, "destroy geom")) { 888119299Sphk gp = gctl_get_geom(req, mp, "geom"); 889119299Sphk if (gp != NULL) 890119299Sphk g_ccd_destroy_geom(req, mp, gp); 891115729Sphk } else if (!strcmp(verb, "list")) { 892115849Sphk g_ccd_list(req, mp); 893115729Sphk } else { 894115729Sphk gctl_error(req, "unknown verb"); 895115729Sphk } 896115729Sphk} 897115729Sphk 898115729Sphkstatic struct g_class g_ccd_class = { 899115729Sphk .name = "CCD", 900133318Sphk .version = G_VERSION, 901115729Sphk .ctlreq = g_ccd_config, 902119299Sphk .destroy_geom = g_ccd_destroy_geom, 903133314Sphk .start = g_ccd_start, 904133314Sphk .orphan = g_ccd_orphan, 905133314Sphk .access = g_ccd_access, 906115729Sphk}; 907115729Sphk 908115729SphkDECLARE_GEOM_CLASS(g_ccd_class, g_ccd); 909