tr_concat.c revision 219974
1219974Smav/*- 2219974Smav * Copyright (c) 2010 Alexander Motin <mav@FreeBSD.org> 3219974Smav * All rights reserved. 4219974Smav * 5219974Smav * Redistribution and use in source and binary forms, with or without 6219974Smav * modification, are permitted provided that the following conditions 7219974Smav * are met: 8219974Smav * 1. Redistributions of source code must retain the above copyright 9219974Smav * notice, this list of conditions and the following disclaimer. 10219974Smav * 2. Redistributions in binary form must reproduce the above copyright 11219974Smav * notice, this list of conditions and the following disclaimer in the 12219974Smav * documentation and/or other materials provided with the distribution. 13219974Smav * 14219974Smav * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 15219974Smav * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16219974Smav * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17219974Smav * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 18219974Smav * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19219974Smav * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20219974Smav * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21219974Smav * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22219974Smav * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23219974Smav * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24219974Smav * SUCH DAMAGE. 25219974Smav */ 26219974Smav 27219974Smav#include <sys/cdefs.h> 28219974Smav__FBSDID("$FreeBSD: head/sys/geom/raid/tr_concat.c 219974 2011-03-24 21:31:32Z mav $"); 29219974Smav 30219974Smav#include <sys/param.h> 31219974Smav#include <sys/bio.h> 32219974Smav#include <sys/endian.h> 33219974Smav#include <sys/kernel.h> 34219974Smav#include <sys/kobj.h> 35219974Smav#include <sys/lock.h> 36219974Smav#include <sys/malloc.h> 37219974Smav#include <sys/mutex.h> 38219974Smav#include <sys/systm.h> 39219974Smav#include <geom/geom.h> 40219974Smav#include "geom/raid/g_raid.h" 41219974Smav#include "g_raid_tr_if.h" 42219974Smav 43219974Smavstatic MALLOC_DEFINE(M_TR_CONCAT, "tr_concat_data", "GEOM_RAID CONCAT data"); 44219974Smav 45219974Smavstruct g_raid_tr_concat_object { 46219974Smav struct g_raid_tr_object trso_base; 47219974Smav int trso_starting; 48219974Smav int trso_stopped; 49219974Smav}; 50219974Smav 51219974Smavstatic g_raid_tr_taste_t g_raid_tr_taste_concat; 52219974Smavstatic g_raid_tr_event_t g_raid_tr_event_concat; 53219974Smavstatic g_raid_tr_start_t g_raid_tr_start_concat; 54219974Smavstatic g_raid_tr_stop_t g_raid_tr_stop_concat; 55219974Smavstatic g_raid_tr_iostart_t g_raid_tr_iostart_concat; 56219974Smavstatic g_raid_tr_iodone_t g_raid_tr_iodone_concat; 57219974Smavstatic g_raid_tr_kerneldump_t g_raid_tr_kerneldump_concat; 58219974Smavstatic g_raid_tr_free_t g_raid_tr_free_concat; 59219974Smav 60219974Smavstatic kobj_method_t g_raid_tr_concat_methods[] = { 61219974Smav KOBJMETHOD(g_raid_tr_taste, g_raid_tr_taste_concat), 62219974Smav KOBJMETHOD(g_raid_tr_event, g_raid_tr_event_concat), 63219974Smav KOBJMETHOD(g_raid_tr_start, g_raid_tr_start_concat), 64219974Smav KOBJMETHOD(g_raid_tr_stop, g_raid_tr_stop_concat), 65219974Smav KOBJMETHOD(g_raid_tr_iostart, g_raid_tr_iostart_concat), 66219974Smav KOBJMETHOD(g_raid_tr_iodone, g_raid_tr_iodone_concat), 67219974Smav KOBJMETHOD(g_raid_tr_kerneldump, g_raid_tr_kerneldump_concat), 68219974Smav KOBJMETHOD(g_raid_tr_free, g_raid_tr_free_concat), 69219974Smav { 0, 0 } 70219974Smav}; 71219974Smav 72219974Smavstatic struct g_raid_tr_class g_raid_tr_concat_class = { 73219974Smav "CONCAT", 74219974Smav g_raid_tr_concat_methods, 75219974Smav sizeof(struct g_raid_tr_concat_object), 76219974Smav .trc_priority = 50 77219974Smav}; 78219974Smav 79219974Smavstatic int 80219974Smavg_raid_tr_taste_concat(struct g_raid_tr_object *tr, struct g_raid_volume *volume) 81219974Smav{ 82219974Smav struct g_raid_tr_concat_object *trs; 83219974Smav 84219974Smav trs = (struct g_raid_tr_concat_object *)tr; 85219974Smav if (tr->tro_volume->v_raid_level != G_RAID_VOLUME_RL_SINGLE && 86219974Smav tr->tro_volume->v_raid_level != G_RAID_VOLUME_RL_CONCAT && 87219974Smav !(tr->tro_volume->v_disks_count == 1 && 88219974Smav tr->tro_volume->v_raid_level != G_RAID_VOLUME_RL_UNKNOWN)) 89219974Smav return (G_RAID_TR_TASTE_FAIL); 90219974Smav trs->trso_starting = 1; 91219974Smav return (G_RAID_TR_TASTE_SUCCEED); 92219974Smav} 93219974Smav 94219974Smavstatic int 95219974Smavg_raid_tr_update_state_concat(struct g_raid_volume *vol) 96219974Smav{ 97219974Smav struct g_raid_tr_concat_object *trs; 98219974Smav struct g_raid_softc *sc; 99219974Smav off_t size; 100219974Smav u_int s; 101219974Smav int i, n, f; 102219974Smav 103219974Smav sc = vol->v_softc; 104219974Smav trs = (struct g_raid_tr_concat_object *)vol->v_tr; 105219974Smav if (trs->trso_stopped) 106219974Smav s = G_RAID_VOLUME_S_STOPPED; 107219974Smav else if (trs->trso_starting) 108219974Smav s = G_RAID_VOLUME_S_STARTING; 109219974Smav else { 110219974Smav n = g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_ACTIVE); 111219974Smav f = g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_FAILED); 112219974Smav if (n + f == vol->v_disks_count) { 113219974Smav if (f == 0) 114219974Smav s = G_RAID_VOLUME_S_OPTIMAL; 115219974Smav else 116219974Smav s = G_RAID_VOLUME_S_SUBOPTIMAL; 117219974Smav } else 118219974Smav s = G_RAID_VOLUME_S_BROKEN; 119219974Smav } 120219974Smav if (s != vol->v_state) { 121219974Smav 122219974Smav /* 123219974Smav * Some metadata modules may not know CONCAT volume 124219974Smav * mediasize until all disks connected. Recalculate. 125219974Smav */ 126219974Smav if (G_RAID_VOLUME_S_ALIVE(s) && 127219974Smav !G_RAID_VOLUME_S_ALIVE(vol->v_state)) { 128219974Smav size = 0; 129219974Smav for (i = 0; i < vol->v_disks_count; i++) { 130219974Smav if (vol->v_subdisks[i].sd_state != 131219974Smav G_RAID_SUBDISK_S_NONE) 132219974Smav size += vol->v_subdisks[i].sd_size; 133219974Smav } 134219974Smav vol->v_mediasize = size; 135219974Smav } 136219974Smav 137219974Smav g_raid_event_send(vol, G_RAID_VOLUME_S_ALIVE(s) ? 138219974Smav G_RAID_VOLUME_E_UP : G_RAID_VOLUME_E_DOWN, 139219974Smav G_RAID_EVENT_VOLUME); 140219974Smav g_raid_change_volume_state(vol, s); 141219974Smav if (!trs->trso_starting && !trs->trso_stopped) 142219974Smav g_raid_write_metadata(sc, vol, NULL, NULL); 143219974Smav } 144219974Smav return (0); 145219974Smav} 146219974Smav 147219974Smavstatic int 148219974Smavg_raid_tr_event_concat(struct g_raid_tr_object *tr, 149219974Smav struct g_raid_subdisk *sd, u_int event) 150219974Smav{ 151219974Smav struct g_raid_tr_concat_object *trs; 152219974Smav struct g_raid_softc *sc; 153219974Smav struct g_raid_volume *vol; 154219974Smav int state; 155219974Smav 156219974Smav trs = (struct g_raid_tr_concat_object *)tr; 157219974Smav vol = tr->tro_volume; 158219974Smav sc = vol->v_softc; 159219974Smav 160219974Smav state = sd->sd_state; 161219974Smav if (state != G_RAID_SUBDISK_S_NONE && 162219974Smav state != G_RAID_SUBDISK_S_FAILED && 163219974Smav state != G_RAID_SUBDISK_S_ACTIVE) { 164219974Smav G_RAID_DEBUG1(1, sc, 165219974Smav "Promote subdisk %s:%d from %s to ACTIVE.", 166219974Smav vol->v_name, sd->sd_pos, 167219974Smav g_raid_subdisk_state2str(sd->sd_state)); 168219974Smav g_raid_change_subdisk_state(sd, G_RAID_SUBDISK_S_ACTIVE); 169219974Smav } 170219974Smav if (state != sd->sd_state && 171219974Smav !trs->trso_starting && !trs->trso_stopped) 172219974Smav g_raid_write_metadata(sc, vol, sd, NULL); 173219974Smav g_raid_tr_update_state_concat(vol); 174219974Smav return (0); 175219974Smav} 176219974Smav 177219974Smavstatic int 178219974Smavg_raid_tr_start_concat(struct g_raid_tr_object *tr) 179219974Smav{ 180219974Smav struct g_raid_tr_concat_object *trs; 181219974Smav struct g_raid_volume *vol; 182219974Smav 183219974Smav trs = (struct g_raid_tr_concat_object *)tr; 184219974Smav vol = tr->tro_volume; 185219974Smav trs->trso_starting = 0; 186219974Smav g_raid_tr_update_state_concat(vol); 187219974Smav return (0); 188219974Smav} 189219974Smav 190219974Smavstatic int 191219974Smavg_raid_tr_stop_concat(struct g_raid_tr_object *tr) 192219974Smav{ 193219974Smav struct g_raid_tr_concat_object *trs; 194219974Smav struct g_raid_volume *vol; 195219974Smav 196219974Smav trs = (struct g_raid_tr_concat_object *)tr; 197219974Smav vol = tr->tro_volume; 198219974Smav trs->trso_starting = 0; 199219974Smav trs->trso_stopped = 1; 200219974Smav g_raid_tr_update_state_concat(vol); 201219974Smav return (0); 202219974Smav} 203219974Smav 204219974Smavstatic void 205219974Smavg_raid_tr_iostart_concat(struct g_raid_tr_object *tr, struct bio *bp) 206219974Smav{ 207219974Smav struct g_raid_volume *vol; 208219974Smav struct g_raid_subdisk *sd; 209219974Smav struct bio_queue_head queue; 210219974Smav struct bio *cbp; 211219974Smav char *addr; 212219974Smav off_t offset, length, remain; 213219974Smav u_int no; 214219974Smav 215219974Smav vol = tr->tro_volume; 216219974Smav if (vol->v_state != G_RAID_VOLUME_S_OPTIMAL && 217219974Smav vol->v_state != G_RAID_VOLUME_S_SUBOPTIMAL) { 218219974Smav g_raid_iodone(bp, EIO); 219219974Smav return; 220219974Smav } 221219974Smav if (bp->bio_cmd == BIO_FLUSH) { 222219974Smav g_raid_tr_flush_common(tr, bp); 223219974Smav return; 224219974Smav } 225219974Smav 226219974Smav offset = bp->bio_offset; 227219974Smav remain = bp->bio_length; 228219974Smav addr = bp->bio_data; 229219974Smav no = 0; 230219974Smav while (no < vol->v_disks_count && 231219974Smav offset >= vol->v_subdisks[no].sd_size) { 232219974Smav offset -= vol->v_subdisks[no].sd_size; 233219974Smav no++; 234219974Smav } 235219974Smav KASSERT(no < vol->v_disks_count, 236219974Smav ("Request starts after volume end (%ju)", bp->bio_offset)); 237219974Smav bioq_init(&queue); 238219974Smav do { 239219974Smav sd = &vol->v_subdisks[no]; 240219974Smav length = MIN(sd->sd_size - offset, remain); 241219974Smav cbp = g_clone_bio(bp); 242219974Smav if (cbp == NULL) 243219974Smav goto failure; 244219974Smav cbp->bio_offset = offset; 245219974Smav cbp->bio_data = addr; 246219974Smav cbp->bio_length = length; 247219974Smav cbp->bio_caller1 = sd; 248219974Smav bioq_insert_tail(&queue, cbp); 249219974Smav remain -= length; 250219974Smav addr += length; 251219974Smav offset = 0; 252219974Smav no++; 253219974Smav KASSERT(no < vol->v_disks_count || remain == 0, 254219974Smav ("Request ends after volume end (%ju, %ju)", 255219974Smav bp->bio_offset, bp->bio_length)); 256219974Smav } while (remain > 0); 257219974Smav for (cbp = bioq_first(&queue); cbp != NULL; 258219974Smav cbp = bioq_first(&queue)) { 259219974Smav bioq_remove(&queue, cbp); 260219974Smav sd = cbp->bio_caller1; 261219974Smav cbp->bio_caller1 = NULL; 262219974Smav g_raid_subdisk_iostart(sd, cbp); 263219974Smav } 264219974Smav return; 265219974Smavfailure: 266219974Smav for (cbp = bioq_first(&queue); cbp != NULL; 267219974Smav cbp = bioq_first(&queue)) { 268219974Smav bioq_remove(&queue, cbp); 269219974Smav g_destroy_bio(cbp); 270219974Smav } 271219974Smav if (bp->bio_error == 0) 272219974Smav bp->bio_error = ENOMEM; 273219974Smav g_raid_iodone(bp, bp->bio_error); 274219974Smav} 275219974Smav 276219974Smavstatic int 277219974Smavg_raid_tr_kerneldump_concat(struct g_raid_tr_object *tr, 278219974Smav void *virtual, vm_offset_t physical, off_t boffset, size_t blength) 279219974Smav{ 280219974Smav struct g_raid_volume *vol; 281219974Smav struct g_raid_subdisk *sd; 282219974Smav char *addr; 283219974Smav off_t offset, length, remain; 284219974Smav int error, no; 285219974Smav 286219974Smav vol = tr->tro_volume; 287219974Smav if (vol->v_state != G_RAID_VOLUME_S_OPTIMAL) 288219974Smav return (ENXIO); 289219974Smav 290219974Smav offset = boffset; 291219974Smav remain = blength; 292219974Smav addr = virtual; 293219974Smav no = 0; 294219974Smav while (no < vol->v_disks_count && 295219974Smav offset >= vol->v_subdisks[no].sd_size) { 296219974Smav offset -= vol->v_subdisks[no].sd_size; 297219974Smav no++; 298219974Smav } 299219974Smav KASSERT(no < vol->v_disks_count, 300219974Smav ("Request starts after volume end (%ju)", boffset)); 301219974Smav do { 302219974Smav sd = &vol->v_subdisks[no]; 303219974Smav length = MIN(sd->sd_size - offset, remain); 304219974Smav error = g_raid_subdisk_kerneldump(&vol->v_subdisks[no], 305219974Smav addr, 0, offset, length); 306219974Smav if (error != 0) 307219974Smav return (error); 308219974Smav remain -= length; 309219974Smav addr += length; 310219974Smav offset = 0; 311219974Smav no++; 312219974Smav KASSERT(no < vol->v_disks_count || remain == 0, 313219974Smav ("Request ends after volume end (%ju, %zu)", 314219974Smav boffset, blength)); 315219974Smav } while (remain > 0); 316219974Smav return (0); 317219974Smav} 318219974Smav 319219974Smavstatic void 320219974Smavg_raid_tr_iodone_concat(struct g_raid_tr_object *tr, 321219974Smav struct g_raid_subdisk *sd,struct bio *bp) 322219974Smav{ 323219974Smav struct bio *pbp; 324219974Smav 325219974Smav pbp = bp->bio_parent; 326219974Smav if (pbp->bio_error == 0) 327219974Smav pbp->bio_error = bp->bio_error; 328219974Smav g_destroy_bio(bp); 329219974Smav pbp->bio_inbed++; 330219974Smav if (pbp->bio_children == pbp->bio_inbed) { 331219974Smav pbp->bio_completed = pbp->bio_length; 332219974Smav g_raid_iodone(pbp, bp->bio_error); 333219974Smav } 334219974Smav} 335219974Smav 336219974Smavstatic int 337219974Smavg_raid_tr_free_concat(struct g_raid_tr_object *tr) 338219974Smav{ 339219974Smav 340219974Smav return (0); 341219974Smav} 342219974Smav 343219974SmavG_RAID_TR_DECLARE(g_raid_tr_concat); 344