1219974Smav/*- 2219974Smav * Copyright (c) 2010 Alexander Motin <mav@FreeBSD.org> 3219974Smav * All rights reserved. 4219974Smav * 5219974Smav * Redistribution and use in source and binary forms, with or without 6219974Smav * modification, are permitted provided that the following conditions 7219974Smav * are met: 8219974Smav * 1. Redistributions of source code must retain the above copyright 9219974Smav * notice, this list of conditions and the following disclaimer. 10219974Smav * 2. Redistributions in binary form must reproduce the above copyright 11219974Smav * notice, this list of conditions and the following disclaimer in the 12219974Smav * documentation and/or other materials provided with the distribution. 13219974Smav * 14219974Smav * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 15219974Smav * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16219974Smav * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17219974Smav * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 18219974Smav * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19219974Smav * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20219974Smav * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21219974Smav * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22219974Smav * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23219974Smav * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24219974Smav * SUCH DAMAGE. 25219974Smav */ 26219974Smav 27219974Smav#include <sys/cdefs.h> 28219974Smav__FBSDID("$FreeBSD$"); 29219974Smav 30219974Smav#include <sys/param.h> 31219974Smav#include <sys/bio.h> 32219974Smav#include <sys/endian.h> 33219974Smav#include <sys/kernel.h> 34219974Smav#include <sys/kobj.h> 35219974Smav#include <sys/lock.h> 36219974Smav#include <sys/malloc.h> 37219974Smav#include <sys/mutex.h> 38219974Smav#include <sys/systm.h> 39219974Smav#include <geom/geom.h> 40219974Smav#include "geom/raid/g_raid.h" 41219974Smav#include "g_raid_tr_if.h" 42219974Smav 43219974Smavstatic MALLOC_DEFINE(M_TR_CONCAT, "tr_concat_data", "GEOM_RAID CONCAT data"); 44219974Smav 45219974Smavstruct g_raid_tr_concat_object { 46219974Smav struct g_raid_tr_object trso_base; 47219974Smav int trso_starting; 48219974Smav int trso_stopped; 49219974Smav}; 50219974Smav 51219974Smavstatic g_raid_tr_taste_t g_raid_tr_taste_concat; 52219974Smavstatic g_raid_tr_event_t g_raid_tr_event_concat; 53219974Smavstatic g_raid_tr_start_t g_raid_tr_start_concat; 54219974Smavstatic g_raid_tr_stop_t g_raid_tr_stop_concat; 55219974Smavstatic g_raid_tr_iostart_t g_raid_tr_iostart_concat; 56219974Smavstatic g_raid_tr_iodone_t g_raid_tr_iodone_concat; 57219974Smavstatic g_raid_tr_kerneldump_t g_raid_tr_kerneldump_concat; 58219974Smavstatic g_raid_tr_free_t g_raid_tr_free_concat; 59219974Smav 60219974Smavstatic kobj_method_t g_raid_tr_concat_methods[] = { 61219974Smav KOBJMETHOD(g_raid_tr_taste, g_raid_tr_taste_concat), 62219974Smav KOBJMETHOD(g_raid_tr_event, g_raid_tr_event_concat), 63219974Smav KOBJMETHOD(g_raid_tr_start, g_raid_tr_start_concat), 64219974Smav KOBJMETHOD(g_raid_tr_stop, g_raid_tr_stop_concat), 65219974Smav KOBJMETHOD(g_raid_tr_iostart, g_raid_tr_iostart_concat), 66219974Smav KOBJMETHOD(g_raid_tr_iodone, g_raid_tr_iodone_concat), 67219974Smav KOBJMETHOD(g_raid_tr_kerneldump, g_raid_tr_kerneldump_concat), 68219974Smav KOBJMETHOD(g_raid_tr_free, g_raid_tr_free_concat), 69219974Smav { 0, 0 } 70219974Smav}; 71219974Smav 72219974Smavstatic struct g_raid_tr_class g_raid_tr_concat_class = { 73219974Smav "CONCAT", 74219974Smav g_raid_tr_concat_methods, 75219974Smav sizeof(struct g_raid_tr_concat_object), 76240465Smav .trc_enable = 1, 77219974Smav .trc_priority = 50 78219974Smav}; 79219974Smav 80219974Smavstatic int 81219974Smavg_raid_tr_taste_concat(struct g_raid_tr_object *tr, struct g_raid_volume *volume) 82219974Smav{ 83219974Smav struct g_raid_tr_concat_object *trs; 84219974Smav 85219974Smav trs = (struct g_raid_tr_concat_object *)tr; 86219974Smav if (tr->tro_volume->v_raid_level != G_RAID_VOLUME_RL_SINGLE && 87219974Smav tr->tro_volume->v_raid_level != G_RAID_VOLUME_RL_CONCAT && 88219974Smav !(tr->tro_volume->v_disks_count == 1 && 89219974Smav tr->tro_volume->v_raid_level != G_RAID_VOLUME_RL_UNKNOWN)) 90219974Smav return (G_RAID_TR_TASTE_FAIL); 91219974Smav trs->trso_starting = 1; 92219974Smav return (G_RAID_TR_TASTE_SUCCEED); 93219974Smav} 94219974Smav 95219974Smavstatic int 96219974Smavg_raid_tr_update_state_concat(struct g_raid_volume *vol) 97219974Smav{ 98219974Smav struct g_raid_tr_concat_object *trs; 99219974Smav struct g_raid_softc *sc; 100219974Smav off_t size; 101219974Smav u_int s; 102219974Smav int i, n, f; 103219974Smav 104219974Smav sc = vol->v_softc; 105219974Smav trs = (struct g_raid_tr_concat_object *)vol->v_tr; 106219974Smav if (trs->trso_stopped) 107219974Smav s = G_RAID_VOLUME_S_STOPPED; 108219974Smav else if (trs->trso_starting) 109219974Smav s = G_RAID_VOLUME_S_STARTING; 110219974Smav else { 111219974Smav n = g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_ACTIVE); 112219974Smav f = g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_FAILED); 113219974Smav if (n + f == vol->v_disks_count) { 114219974Smav if (f == 0) 115219974Smav s = G_RAID_VOLUME_S_OPTIMAL; 116219974Smav else 117219974Smav s = G_RAID_VOLUME_S_SUBOPTIMAL; 118219974Smav } else 119219974Smav s = G_RAID_VOLUME_S_BROKEN; 120219974Smav } 121219974Smav if (s != vol->v_state) { 122219974Smav 123219974Smav /* 124219974Smav * Some metadata modules may not know CONCAT volume 125219974Smav * mediasize until all disks connected. Recalculate. 126219974Smav */ 127245519Smav if (vol->v_raid_level == G_RAID_VOLUME_RL_CONCAT && 128245519Smav G_RAID_VOLUME_S_ALIVE(s) && 129219974Smav !G_RAID_VOLUME_S_ALIVE(vol->v_state)) { 130219974Smav size = 0; 131219974Smav for (i = 0; i < vol->v_disks_count; i++) { 132219974Smav if (vol->v_subdisks[i].sd_state != 133219974Smav G_RAID_SUBDISK_S_NONE) 134219974Smav size += vol->v_subdisks[i].sd_size; 135219974Smav } 136219974Smav vol->v_mediasize = size; 137219974Smav } 138219974Smav 139219974Smav g_raid_event_send(vol, G_RAID_VOLUME_S_ALIVE(s) ? 140219974Smav G_RAID_VOLUME_E_UP : G_RAID_VOLUME_E_DOWN, 141219974Smav G_RAID_EVENT_VOLUME); 142219974Smav g_raid_change_volume_state(vol, s); 143219974Smav if (!trs->trso_starting && !trs->trso_stopped) 144219974Smav g_raid_write_metadata(sc, vol, NULL, NULL); 145219974Smav } 146219974Smav return (0); 147219974Smav} 148219974Smav 149219974Smavstatic int 150219974Smavg_raid_tr_event_concat(struct g_raid_tr_object *tr, 151219974Smav struct g_raid_subdisk *sd, u_int event) 152219974Smav{ 153219974Smav struct g_raid_tr_concat_object *trs; 154219974Smav struct g_raid_softc *sc; 155219974Smav struct g_raid_volume *vol; 156219974Smav int state; 157219974Smav 158219974Smav trs = (struct g_raid_tr_concat_object *)tr; 159219974Smav vol = tr->tro_volume; 160219974Smav sc = vol->v_softc; 161219974Smav 162219974Smav state = sd->sd_state; 163219974Smav if (state != G_RAID_SUBDISK_S_NONE && 164219974Smav state != G_RAID_SUBDISK_S_FAILED && 165219974Smav state != G_RAID_SUBDISK_S_ACTIVE) { 166219974Smav G_RAID_DEBUG1(1, sc, 167219974Smav "Promote subdisk %s:%d from %s to ACTIVE.", 168219974Smav vol->v_name, sd->sd_pos, 169219974Smav g_raid_subdisk_state2str(sd->sd_state)); 170219974Smav g_raid_change_subdisk_state(sd, G_RAID_SUBDISK_S_ACTIVE); 171219974Smav } 172219974Smav if (state != sd->sd_state && 173219974Smav !trs->trso_starting && !trs->trso_stopped) 174219974Smav g_raid_write_metadata(sc, vol, sd, NULL); 175219974Smav g_raid_tr_update_state_concat(vol); 176219974Smav return (0); 177219974Smav} 178219974Smav 179219974Smavstatic int 180219974Smavg_raid_tr_start_concat(struct g_raid_tr_object *tr) 181219974Smav{ 182219974Smav struct g_raid_tr_concat_object *trs; 183219974Smav struct g_raid_volume *vol; 184219974Smav 185219974Smav trs = (struct g_raid_tr_concat_object *)tr; 186219974Smav vol = tr->tro_volume; 187219974Smav trs->trso_starting = 0; 188219974Smav g_raid_tr_update_state_concat(vol); 189219974Smav return (0); 190219974Smav} 191219974Smav 192219974Smavstatic int 193219974Smavg_raid_tr_stop_concat(struct g_raid_tr_object *tr) 194219974Smav{ 195219974Smav struct g_raid_tr_concat_object *trs; 196219974Smav struct g_raid_volume *vol; 197219974Smav 198219974Smav trs = (struct g_raid_tr_concat_object *)tr; 199219974Smav vol = tr->tro_volume; 200219974Smav trs->trso_starting = 0; 201219974Smav trs->trso_stopped = 1; 202219974Smav g_raid_tr_update_state_concat(vol); 203219974Smav return (0); 204219974Smav} 205219974Smav 206219974Smavstatic void 207219974Smavg_raid_tr_iostart_concat(struct g_raid_tr_object *tr, struct bio *bp) 208219974Smav{ 209219974Smav struct g_raid_volume *vol; 210219974Smav struct g_raid_subdisk *sd; 211219974Smav struct bio_queue_head queue; 212219974Smav struct bio *cbp; 213219974Smav char *addr; 214219974Smav off_t offset, length, remain; 215219974Smav u_int no; 216219974Smav 217219974Smav vol = tr->tro_volume; 218219974Smav if (vol->v_state != G_RAID_VOLUME_S_OPTIMAL && 219219974Smav vol->v_state != G_RAID_VOLUME_S_SUBOPTIMAL) { 220219974Smav g_raid_iodone(bp, EIO); 221219974Smav return; 222219974Smav } 223219974Smav if (bp->bio_cmd == BIO_FLUSH) { 224219974Smav g_raid_tr_flush_common(tr, bp); 225219974Smav return; 226219974Smav } 227219974Smav 228219974Smav offset = bp->bio_offset; 229219974Smav remain = bp->bio_length; 230219974Smav addr = bp->bio_data; 231219974Smav no = 0; 232219974Smav while (no < vol->v_disks_count && 233219974Smav offset >= vol->v_subdisks[no].sd_size) { 234219974Smav offset -= vol->v_subdisks[no].sd_size; 235219974Smav no++; 236219974Smav } 237219974Smav KASSERT(no < vol->v_disks_count, 238219974Smav ("Request starts after volume end (%ju)", bp->bio_offset)); 239219974Smav bioq_init(&queue); 240219974Smav do { 241219974Smav sd = &vol->v_subdisks[no]; 242219974Smav length = MIN(sd->sd_size - offset, remain); 243219974Smav cbp = g_clone_bio(bp); 244219974Smav if (cbp == NULL) 245219974Smav goto failure; 246219974Smav cbp->bio_offset = offset; 247219974Smav cbp->bio_data = addr; 248219974Smav cbp->bio_length = length; 249219974Smav cbp->bio_caller1 = sd; 250219974Smav bioq_insert_tail(&queue, cbp); 251219974Smav remain -= length; 252242323Smav if (bp->bio_cmd != BIO_DELETE) 253242323Smav addr += length; 254219974Smav offset = 0; 255219974Smav no++; 256219974Smav KASSERT(no < vol->v_disks_count || remain == 0, 257219974Smav ("Request ends after volume end (%ju, %ju)", 258219974Smav bp->bio_offset, bp->bio_length)); 259219974Smav } while (remain > 0); 260219974Smav for (cbp = bioq_first(&queue); cbp != NULL; 261219974Smav cbp = bioq_first(&queue)) { 262219974Smav bioq_remove(&queue, cbp); 263219974Smav sd = cbp->bio_caller1; 264219974Smav cbp->bio_caller1 = NULL; 265219974Smav g_raid_subdisk_iostart(sd, cbp); 266219974Smav } 267219974Smav return; 268219974Smavfailure: 269219974Smav for (cbp = bioq_first(&queue); cbp != NULL; 270219974Smav cbp = bioq_first(&queue)) { 271219974Smav bioq_remove(&queue, cbp); 272219974Smav g_destroy_bio(cbp); 273219974Smav } 274219974Smav if (bp->bio_error == 0) 275219974Smav bp->bio_error = ENOMEM; 276219974Smav g_raid_iodone(bp, bp->bio_error); 277219974Smav} 278219974Smav 279219974Smavstatic int 280219974Smavg_raid_tr_kerneldump_concat(struct g_raid_tr_object *tr, 281219974Smav void *virtual, vm_offset_t physical, off_t boffset, size_t blength) 282219974Smav{ 283219974Smav struct g_raid_volume *vol; 284219974Smav struct g_raid_subdisk *sd; 285219974Smav char *addr; 286219974Smav off_t offset, length, remain; 287219974Smav int error, no; 288219974Smav 289219974Smav vol = tr->tro_volume; 290219974Smav if (vol->v_state != G_RAID_VOLUME_S_OPTIMAL) 291219974Smav return (ENXIO); 292219974Smav 293219974Smav offset = boffset; 294219974Smav remain = blength; 295219974Smav addr = virtual; 296219974Smav no = 0; 297219974Smav while (no < vol->v_disks_count && 298219974Smav offset >= vol->v_subdisks[no].sd_size) { 299219974Smav offset -= vol->v_subdisks[no].sd_size; 300219974Smav no++; 301219974Smav } 302219974Smav KASSERT(no < vol->v_disks_count, 303219974Smav ("Request starts after volume end (%ju)", boffset)); 304219974Smav do { 305219974Smav sd = &vol->v_subdisks[no]; 306219974Smav length = MIN(sd->sd_size - offset, remain); 307219974Smav error = g_raid_subdisk_kerneldump(&vol->v_subdisks[no], 308219974Smav addr, 0, offset, length); 309219974Smav if (error != 0) 310219974Smav return (error); 311219974Smav remain -= length; 312219974Smav addr += length; 313219974Smav offset = 0; 314219974Smav no++; 315219974Smav KASSERT(no < vol->v_disks_count || remain == 0, 316219974Smav ("Request ends after volume end (%ju, %zu)", 317219974Smav boffset, blength)); 318219974Smav } while (remain > 0); 319219974Smav return (0); 320219974Smav} 321219974Smav 322219974Smavstatic void 323219974Smavg_raid_tr_iodone_concat(struct g_raid_tr_object *tr, 324219974Smav struct g_raid_subdisk *sd,struct bio *bp) 325219974Smav{ 326219974Smav struct bio *pbp; 327219974Smav 328219974Smav pbp = bp->bio_parent; 329219974Smav if (pbp->bio_error == 0) 330219974Smav pbp->bio_error = bp->bio_error; 331219974Smav g_destroy_bio(bp); 332219974Smav pbp->bio_inbed++; 333219974Smav if (pbp->bio_children == pbp->bio_inbed) { 334219974Smav pbp->bio_completed = pbp->bio_length; 335219974Smav g_raid_iodone(pbp, bp->bio_error); 336219974Smav } 337219974Smav} 338219974Smav 339219974Smavstatic int 340219974Smavg_raid_tr_free_concat(struct g_raid_tr_object *tr) 341219974Smav{ 342219974Smav 343219974Smav return (0); 344219974Smav} 345219974Smav 346240465SmavG_RAID_TR_DECLARE(concat, "CONCAT"); 347