1219974Smav/*- 2219974Smav * Copyright (c) 2010 Alexander Motin <mav@FreeBSD.org> 3219974Smav * All rights reserved. 4219974Smav * 5219974Smav * Redistribution and use in source and binary forms, with or without 6219974Smav * modification, are permitted provided that the following conditions 7219974Smav * are met: 8219974Smav * 1. Redistributions of source code must retain the above copyright 9219974Smav * notice, this list of conditions and the following disclaimer. 10219974Smav * 2. Redistributions in binary form must reproduce the above copyright 11219974Smav * notice, this list of conditions and the following disclaimer in the 12219974Smav * documentation and/or other materials provided with the distribution. 13219974Smav * 14219974Smav * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 15219974Smav * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16219974Smav * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17219974Smav * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 18219974Smav * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19219974Smav * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20219974Smav * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21219974Smav * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22219974Smav * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23219974Smav * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24219974Smav * SUCH DAMAGE. 25219974Smav */ 26219974Smav 27219974Smav#include <sys/cdefs.h> 28219974Smav__FBSDID("$FreeBSD$"); 29219974Smav 30219974Smav#include <sys/param.h> 31219974Smav#include <sys/bio.h> 32219974Smav#include <sys/endian.h> 33219974Smav#include <sys/kernel.h> 34219974Smav#include <sys/kobj.h> 35219974Smav#include <sys/lock.h> 36219974Smav#include <sys/malloc.h> 37219974Smav#include <sys/mutex.h> 38219974Smav#include <sys/systm.h> 39219974Smav#include <geom/geom.h> 40219974Smav#include "geom/raid/g_raid.h" 41219974Smav#include "g_raid_tr_if.h" 42219974Smav 43219974Smavstatic MALLOC_DEFINE(M_TR_RAID0, "tr_raid0_data", "GEOM_RAID RAID0 data"); 44219974Smav 45219974Smavstruct g_raid_tr_raid0_object { 46219974Smav struct g_raid_tr_object trso_base; 47219974Smav int trso_starting; 48219974Smav int trso_stopped; 49219974Smav}; 50219974Smav 51219974Smavstatic g_raid_tr_taste_t g_raid_tr_taste_raid0; 52219974Smavstatic g_raid_tr_event_t g_raid_tr_event_raid0; 53219974Smavstatic g_raid_tr_start_t g_raid_tr_start_raid0; 54219974Smavstatic g_raid_tr_stop_t g_raid_tr_stop_raid0; 55219974Smavstatic g_raid_tr_iostart_t g_raid_tr_iostart_raid0; 56219974Smavstatic g_raid_tr_iodone_t g_raid_tr_iodone_raid0; 57219974Smavstatic g_raid_tr_kerneldump_t g_raid_tr_kerneldump_raid0; 58219974Smavstatic g_raid_tr_free_t g_raid_tr_free_raid0; 59219974Smav 60219974Smavstatic kobj_method_t g_raid_tr_raid0_methods[] = { 61219974Smav KOBJMETHOD(g_raid_tr_taste, g_raid_tr_taste_raid0), 62219974Smav KOBJMETHOD(g_raid_tr_event, g_raid_tr_event_raid0), 63219974Smav KOBJMETHOD(g_raid_tr_start, g_raid_tr_start_raid0), 64219974Smav KOBJMETHOD(g_raid_tr_stop, g_raid_tr_stop_raid0), 65219974Smav KOBJMETHOD(g_raid_tr_iostart, g_raid_tr_iostart_raid0), 66219974Smav KOBJMETHOD(g_raid_tr_iodone, g_raid_tr_iodone_raid0), 67219974Smav KOBJMETHOD(g_raid_tr_kerneldump, g_raid_tr_kerneldump_raid0), 68219974Smav KOBJMETHOD(g_raid_tr_free, g_raid_tr_free_raid0), 69219974Smav { 0, 0 } 70219974Smav}; 71219974Smav 72219974Smavstatic struct g_raid_tr_class g_raid_tr_raid0_class = { 73219974Smav "RAID0", 74219974Smav g_raid_tr_raid0_methods, 75219974Smav sizeof(struct g_raid_tr_raid0_object), 76240465Smav .trc_enable = 1, 77260385Sscottl .trc_priority = 100, 78260385Sscottl .trc_accept_unmapped = 1 79219974Smav}; 80219974Smav 81219974Smavstatic int 82219974Smavg_raid_tr_taste_raid0(struct g_raid_tr_object *tr, struct g_raid_volume *volume) 83219974Smav{ 84219974Smav struct g_raid_tr_raid0_object *trs; 85219974Smav 86219974Smav trs = (struct g_raid_tr_raid0_object *)tr; 87219974Smav if (tr->tro_volume->v_raid_level != G_RAID_VOLUME_RL_RAID0 || 88219974Smav tr->tro_volume->v_raid_level_qualifier != G_RAID_VOLUME_RLQ_NONE) 89219974Smav return (G_RAID_TR_TASTE_FAIL); 90219974Smav trs->trso_starting = 1; 91219974Smav return (G_RAID_TR_TASTE_SUCCEED); 92219974Smav} 93219974Smav 94219974Smavstatic int 95219974Smavg_raid_tr_update_state_raid0(struct g_raid_volume *vol) 96219974Smav{ 97219974Smav struct g_raid_tr_raid0_object *trs; 98219974Smav struct g_raid_softc *sc; 99219974Smav u_int s; 100219974Smav int n, f; 101219974Smav 102219974Smav sc = vol->v_softc; 103219974Smav trs = (struct g_raid_tr_raid0_object *)vol->v_tr; 104219974Smav if (trs->trso_stopped) 105219974Smav s = G_RAID_VOLUME_S_STOPPED; 106219974Smav else if (trs->trso_starting) 107219974Smav s = G_RAID_VOLUME_S_STARTING; 108219974Smav else { 109219974Smav n = g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_ACTIVE); 110219974Smav f = g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_FAILED); 111219974Smav if (n + f == vol->v_disks_count) { 112219974Smav if (f == 0) 113219974Smav s = G_RAID_VOLUME_S_OPTIMAL; 114219974Smav else 115219974Smav s = G_RAID_VOLUME_S_SUBOPTIMAL; 116219974Smav } else 117219974Smav s = G_RAID_VOLUME_S_BROKEN; 118219974Smav } 119219974Smav if (s != vol->v_state) { 120219974Smav g_raid_event_send(vol, G_RAID_VOLUME_S_ALIVE(s) ? 121219974Smav G_RAID_VOLUME_E_UP : G_RAID_VOLUME_E_DOWN, 122219974Smav G_RAID_EVENT_VOLUME); 123219974Smav g_raid_change_volume_state(vol, s); 124219974Smav if (!trs->trso_starting && !trs->trso_stopped) 125219974Smav g_raid_write_metadata(sc, vol, NULL, NULL); 126219974Smav } 127219974Smav return (0); 128219974Smav} 129219974Smav 130219974Smavstatic int 131219974Smavg_raid_tr_event_raid0(struct g_raid_tr_object *tr, 132219974Smav struct g_raid_subdisk *sd, u_int event) 133219974Smav{ 134219974Smav struct g_raid_tr_raid0_object *trs; 135219974Smav struct g_raid_softc *sc; 136219974Smav struct g_raid_volume *vol; 137219974Smav int state; 138219974Smav 139219974Smav trs = (struct g_raid_tr_raid0_object *)tr; 140219974Smav vol = tr->tro_volume; 141219974Smav sc = vol->v_softc; 142219974Smav 143219974Smav state = sd->sd_state; 144219974Smav if (state != G_RAID_SUBDISK_S_NONE && 145219974Smav state != G_RAID_SUBDISK_S_FAILED && 146219974Smav state != G_RAID_SUBDISK_S_ACTIVE) { 147219974Smav G_RAID_DEBUG1(1, sc, 148219974Smav "Promote subdisk %s:%d from %s to ACTIVE.", 149219974Smav vol->v_name, sd->sd_pos, 150219974Smav g_raid_subdisk_state2str(sd->sd_state)); 151219974Smav g_raid_change_subdisk_state(sd, G_RAID_SUBDISK_S_ACTIVE); 152219974Smav } 153219974Smav if (state != sd->sd_state && 154219974Smav !trs->trso_starting && !trs->trso_stopped) 155219974Smav g_raid_write_metadata(sc, vol, sd, NULL); 156219974Smav g_raid_tr_update_state_raid0(vol); 157219974Smav return (0); 158219974Smav} 159219974Smav 160219974Smavstatic int 161219974Smavg_raid_tr_start_raid0(struct g_raid_tr_object *tr) 162219974Smav{ 163219974Smav struct g_raid_tr_raid0_object *trs; 164219974Smav struct g_raid_volume *vol; 165219974Smav 166219974Smav trs = (struct g_raid_tr_raid0_object *)tr; 167219974Smav vol = tr->tro_volume; 168219974Smav trs->trso_starting = 0; 169219974Smav g_raid_tr_update_state_raid0(vol); 170219974Smav return (0); 171219974Smav} 172219974Smav 173219974Smavstatic int 174219974Smavg_raid_tr_stop_raid0(struct g_raid_tr_object *tr) 175219974Smav{ 176219974Smav struct g_raid_tr_raid0_object *trs; 177219974Smav struct g_raid_volume *vol; 178219974Smav 179219974Smav trs = (struct g_raid_tr_raid0_object *)tr; 180219974Smav vol = tr->tro_volume; 181219974Smav trs->trso_starting = 0; 182219974Smav trs->trso_stopped = 1; 183219974Smav g_raid_tr_update_state_raid0(vol); 184219974Smav return (0); 185219974Smav} 186219974Smav 187219974Smavstatic void 188219974Smavg_raid_tr_iostart_raid0(struct g_raid_tr_object *tr, struct bio *bp) 189219974Smav{ 190219974Smav struct g_raid_volume *vol; 191219974Smav struct g_raid_subdisk *sd; 192219974Smav struct bio_queue_head queue; 193219974Smav struct bio *cbp; 194219974Smav char *addr; 195219974Smav off_t offset, start, length, nstripe, remain; 196219974Smav u_int no, strip_size; 197219974Smav 198219974Smav vol = tr->tro_volume; 199219974Smav if (vol->v_state != G_RAID_VOLUME_S_OPTIMAL && 200219974Smav vol->v_state != G_RAID_VOLUME_S_SUBOPTIMAL) { 201219974Smav g_raid_iodone(bp, EIO); 202219974Smav return; 203219974Smav } 204219974Smav if (bp->bio_cmd == BIO_FLUSH) { 205219974Smav g_raid_tr_flush_common(tr, bp); 206219974Smav return; 207219974Smav } 208260385Sscottl if ((bp->bio_flags & BIO_UNMAPPED) != 0) 209260385Sscottl addr = NULL; 210260385Sscottl else 211260385Sscottl addr = bp->bio_data; 212219974Smav strip_size = vol->v_strip_size; 213219974Smav 214219974Smav /* Stripe number. */ 215219974Smav nstripe = bp->bio_offset / strip_size; 216219974Smav /* Start position in stripe. */ 217219974Smav start = bp->bio_offset % strip_size; 218219974Smav /* Disk number. */ 219219974Smav no = nstripe % vol->v_disks_count; 220219974Smav /* Stripe start position in disk. */ 221219974Smav offset = (nstripe / vol->v_disks_count) * strip_size; 222219974Smav /* Length of data to operate. */ 223219974Smav remain = bp->bio_length; 224219974Smav 225219974Smav bioq_init(&queue); 226219974Smav do { 227219974Smav length = MIN(strip_size - start, remain); 228219974Smav cbp = g_clone_bio(bp); 229219974Smav if (cbp == NULL) 230219974Smav goto failure; 231219974Smav cbp->bio_offset = offset + start; 232219974Smav cbp->bio_length = length; 233260385Sscottl if ((bp->bio_flags & BIO_UNMAPPED) != 0 && 234260385Sscottl bp->bio_cmd != BIO_DELETE) { 235260385Sscottl cbp->bio_ma_offset += (uintptr_t)addr; 236260385Sscottl cbp->bio_ma += cbp->bio_ma_offset / PAGE_SIZE; 237260385Sscottl cbp->bio_ma_offset %= PAGE_SIZE; 238260385Sscottl cbp->bio_ma_n = round_page(cbp->bio_ma_offset + 239260385Sscottl cbp->bio_length) / PAGE_SIZE; 240260385Sscottl } else 241260385Sscottl cbp->bio_data = addr; 242219974Smav cbp->bio_caller1 = &vol->v_subdisks[no]; 243219974Smav bioq_insert_tail(&queue, cbp); 244219974Smav if (++no >= vol->v_disks_count) { 245219974Smav no = 0; 246219974Smav offset += strip_size; 247219974Smav } 248219974Smav remain -= length; 249242323Smav if (bp->bio_cmd != BIO_DELETE) 250242323Smav addr += length; 251219974Smav start = 0; 252219974Smav } while (remain > 0); 253260385Sscottl while ((cbp = bioq_takefirst(&queue)) != NULL) { 254219974Smav sd = cbp->bio_caller1; 255219974Smav cbp->bio_caller1 = NULL; 256219974Smav g_raid_subdisk_iostart(sd, cbp); 257219974Smav } 258219974Smav return; 259219974Smavfailure: 260260385Sscottl while ((cbp = bioq_takefirst(&queue)) != NULL) 261219974Smav g_destroy_bio(cbp); 262219974Smav if (bp->bio_error == 0) 263219974Smav bp->bio_error = ENOMEM; 264219974Smav g_raid_iodone(bp, bp->bio_error); 265219974Smav} 266219974Smav 267219974Smavstatic int 268219974Smavg_raid_tr_kerneldump_raid0(struct g_raid_tr_object *tr, 269219974Smav void *virtual, vm_offset_t physical, off_t boffset, size_t blength) 270219974Smav{ 271219974Smav struct g_raid_volume *vol; 272219974Smav char *addr; 273219974Smav off_t offset, start, length, nstripe, remain; 274219974Smav u_int no, strip_size; 275219974Smav int error; 276219974Smav 277219974Smav vol = tr->tro_volume; 278219974Smav if (vol->v_state != G_RAID_VOLUME_S_OPTIMAL) 279219974Smav return (ENXIO); 280219974Smav addr = virtual; 281219974Smav strip_size = vol->v_strip_size; 282219974Smav 283219974Smav /* Stripe number. */ 284219974Smav nstripe = boffset / strip_size; 285219974Smav /* Start position in stripe. */ 286219974Smav start = boffset % strip_size; 287219974Smav /* Disk number. */ 288219974Smav no = nstripe % vol->v_disks_count; 289219974Smav /* Stripe tart position in disk. */ 290219974Smav offset = (nstripe / vol->v_disks_count) * strip_size; 291219974Smav /* Length of data to operate. */ 292219974Smav remain = blength; 293219974Smav 294219974Smav do { 295219974Smav length = MIN(strip_size - start, remain); 296219974Smav error = g_raid_subdisk_kerneldump(&vol->v_subdisks[no], 297219974Smav addr, 0, offset + start, length); 298219974Smav if (error != 0) 299219974Smav return (error); 300219974Smav if (++no >= vol->v_disks_count) { 301219974Smav no = 0; 302219974Smav offset += strip_size; 303219974Smav } 304219974Smav remain -= length; 305219974Smav addr += length; 306219974Smav start = 0; 307219974Smav } while (remain > 0); 308219974Smav return (0); 309219974Smav} 310219974Smav 311219974Smavstatic void 312219974Smavg_raid_tr_iodone_raid0(struct g_raid_tr_object *tr, 313219974Smav struct g_raid_subdisk *sd,struct bio *bp) 314219974Smav{ 315219974Smav struct bio *pbp; 316219974Smav 317219974Smav pbp = bp->bio_parent; 318219974Smav if (pbp->bio_error == 0) 319219974Smav pbp->bio_error = bp->bio_error; 320219974Smav g_destroy_bio(bp); 321219974Smav pbp->bio_inbed++; 322219974Smav if (pbp->bio_children == pbp->bio_inbed) { 323219974Smav pbp->bio_completed = pbp->bio_length; 324219974Smav g_raid_iodone(pbp, bp->bio_error); 325219974Smav } 326219974Smav} 327219974Smav 328219974Smavstatic int 329219974Smavg_raid_tr_free_raid0(struct g_raid_tr_object *tr) 330219974Smav{ 331219974Smav 332219974Smav return (0); 333219974Smav} 334219974Smav 335240465SmavG_RAID_TR_DECLARE(raid0, "RAID0"); 336