tr_concat.c revision 240465
1219974Smav/*- 2219974Smav * Copyright (c) 2010 Alexander Motin <mav@FreeBSD.org> 3219974Smav * All rights reserved. 4219974Smav * 5219974Smav * Redistribution and use in source and binary forms, with or without 6219974Smav * modification, are permitted provided that the following conditions 7219974Smav * are met: 8219974Smav * 1. Redistributions of source code must retain the above copyright 9219974Smav * notice, this list of conditions and the following disclaimer. 10219974Smav * 2. Redistributions in binary form must reproduce the above copyright 11219974Smav * notice, this list of conditions and the following disclaimer in the 12219974Smav * documentation and/or other materials provided with the distribution. 13219974Smav * 14219974Smav * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 15219974Smav * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16219974Smav * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17219974Smav * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 18219974Smav * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19219974Smav * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20219974Smav * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21219974Smav * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22219974Smav * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23219974Smav * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24219974Smav * SUCH DAMAGE. 25219974Smav */ 26219974Smav 27219974Smav#include <sys/cdefs.h> 28219974Smav__FBSDID("$FreeBSD: head/sys/geom/raid/tr_concat.c 240465 2012-09-13 13:27:09Z mav $"); 29219974Smav 30219974Smav#include <sys/param.h> 31219974Smav#include <sys/bio.h> 32219974Smav#include <sys/endian.h> 33219974Smav#include <sys/kernel.h> 34219974Smav#include <sys/kobj.h> 35219974Smav#include <sys/lock.h> 36219974Smav#include <sys/malloc.h> 37219974Smav#include <sys/mutex.h> 38219974Smav#include <sys/systm.h> 39219974Smav#include <geom/geom.h> 40219974Smav#include "geom/raid/g_raid.h" 41219974Smav#include "g_raid_tr_if.h" 42219974Smav 43219974Smavstatic MALLOC_DEFINE(M_TR_CONCAT, "tr_concat_data", "GEOM_RAID CONCAT data"); 44219974Smav 45219974Smavstruct g_raid_tr_concat_object { 46219974Smav struct g_raid_tr_object trso_base; 47219974Smav int trso_starting; 48219974Smav int trso_stopped; 49219974Smav}; 50219974Smav 51219974Smavstatic g_raid_tr_taste_t g_raid_tr_taste_concat; 52219974Smavstatic g_raid_tr_event_t g_raid_tr_event_concat; 53219974Smavstatic g_raid_tr_start_t g_raid_tr_start_concat; 54219974Smavstatic g_raid_tr_stop_t g_raid_tr_stop_concat; 55219974Smavstatic g_raid_tr_iostart_t g_raid_tr_iostart_concat; 56219974Smavstatic g_raid_tr_iodone_t g_raid_tr_iodone_concat; 57219974Smavstatic g_raid_tr_kerneldump_t g_raid_tr_kerneldump_concat; 58219974Smavstatic g_raid_tr_free_t g_raid_tr_free_concat; 59219974Smav 60219974Smavstatic kobj_method_t g_raid_tr_concat_methods[] = { 61219974Smav KOBJMETHOD(g_raid_tr_taste, g_raid_tr_taste_concat), 62219974Smav KOBJMETHOD(g_raid_tr_event, g_raid_tr_event_concat), 63219974Smav KOBJMETHOD(g_raid_tr_start, g_raid_tr_start_concat), 64219974Smav KOBJMETHOD(g_raid_tr_stop, g_raid_tr_stop_concat), 65219974Smav KOBJMETHOD(g_raid_tr_iostart, g_raid_tr_iostart_concat), 66219974Smav KOBJMETHOD(g_raid_tr_iodone, g_raid_tr_iodone_concat), 67219974Smav KOBJMETHOD(g_raid_tr_kerneldump, g_raid_tr_kerneldump_concat), 68219974Smav KOBJMETHOD(g_raid_tr_free, g_raid_tr_free_concat), 69219974Smav { 0, 0 } 70219974Smav}; 71219974Smav 72219974Smavstatic struct g_raid_tr_class g_raid_tr_concat_class = { 73219974Smav "CONCAT", 74219974Smav g_raid_tr_concat_methods, 75219974Smav sizeof(struct g_raid_tr_concat_object), 76240465Smav .trc_enable = 1, 77219974Smav .trc_priority = 50 78219974Smav}; 79219974Smav 80219974Smavstatic int 81219974Smavg_raid_tr_taste_concat(struct g_raid_tr_object *tr, struct g_raid_volume *volume) 82219974Smav{ 83219974Smav struct g_raid_tr_concat_object *trs; 84219974Smav 85219974Smav trs = (struct g_raid_tr_concat_object *)tr; 86219974Smav if (tr->tro_volume->v_raid_level != G_RAID_VOLUME_RL_SINGLE && 87219974Smav tr->tro_volume->v_raid_level != G_RAID_VOLUME_RL_CONCAT && 88219974Smav !(tr->tro_volume->v_disks_count == 1 && 89219974Smav tr->tro_volume->v_raid_level != G_RAID_VOLUME_RL_UNKNOWN)) 90219974Smav return (G_RAID_TR_TASTE_FAIL); 91219974Smav trs->trso_starting = 1; 92219974Smav return (G_RAID_TR_TASTE_SUCCEED); 93219974Smav} 94219974Smav 95219974Smavstatic int 96219974Smavg_raid_tr_update_state_concat(struct g_raid_volume *vol) 97219974Smav{ 98219974Smav struct g_raid_tr_concat_object *trs; 99219974Smav struct g_raid_softc *sc; 100219974Smav off_t size; 101219974Smav u_int s; 102219974Smav int i, n, f; 103219974Smav 104219974Smav sc = vol->v_softc; 105219974Smav trs = (struct g_raid_tr_concat_object *)vol->v_tr; 106219974Smav if (trs->trso_stopped) 107219974Smav s = G_RAID_VOLUME_S_STOPPED; 108219974Smav else if (trs->trso_starting) 109219974Smav s = G_RAID_VOLUME_S_STARTING; 110219974Smav else { 111219974Smav n = g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_ACTIVE); 112219974Smav f = g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_FAILED); 113219974Smav if (n + f == vol->v_disks_count) { 114219974Smav if (f == 0) 115219974Smav s = G_RAID_VOLUME_S_OPTIMAL; 116219974Smav else 117219974Smav s = G_RAID_VOLUME_S_SUBOPTIMAL; 118219974Smav } else 119219974Smav s = G_RAID_VOLUME_S_BROKEN; 120219974Smav } 121219974Smav if (s != vol->v_state) { 122219974Smav 123219974Smav /* 124219974Smav * Some metadata modules may not know CONCAT volume 125219974Smav * mediasize until all disks connected. Recalculate. 126219974Smav */ 127219974Smav if (G_RAID_VOLUME_S_ALIVE(s) && 128219974Smav !G_RAID_VOLUME_S_ALIVE(vol->v_state)) { 129219974Smav size = 0; 130219974Smav for (i = 0; i < vol->v_disks_count; i++) { 131219974Smav if (vol->v_subdisks[i].sd_state != 132219974Smav G_RAID_SUBDISK_S_NONE) 133219974Smav size += vol->v_subdisks[i].sd_size; 134219974Smav } 135219974Smav vol->v_mediasize = size; 136219974Smav } 137219974Smav 138219974Smav g_raid_event_send(vol, G_RAID_VOLUME_S_ALIVE(s) ? 139219974Smav G_RAID_VOLUME_E_UP : G_RAID_VOLUME_E_DOWN, 140219974Smav G_RAID_EVENT_VOLUME); 141219974Smav g_raid_change_volume_state(vol, s); 142219974Smav if (!trs->trso_starting && !trs->trso_stopped) 143219974Smav g_raid_write_metadata(sc, vol, NULL, NULL); 144219974Smav } 145219974Smav return (0); 146219974Smav} 147219974Smav 148219974Smavstatic int 149219974Smavg_raid_tr_event_concat(struct g_raid_tr_object *tr, 150219974Smav struct g_raid_subdisk *sd, u_int event) 151219974Smav{ 152219974Smav struct g_raid_tr_concat_object *trs; 153219974Smav struct g_raid_softc *sc; 154219974Smav struct g_raid_volume *vol; 155219974Smav int state; 156219974Smav 157219974Smav trs = (struct g_raid_tr_concat_object *)tr; 158219974Smav vol = tr->tro_volume; 159219974Smav sc = vol->v_softc; 160219974Smav 161219974Smav state = sd->sd_state; 162219974Smav if (state != G_RAID_SUBDISK_S_NONE && 163219974Smav state != G_RAID_SUBDISK_S_FAILED && 164219974Smav state != G_RAID_SUBDISK_S_ACTIVE) { 165219974Smav G_RAID_DEBUG1(1, sc, 166219974Smav "Promote subdisk %s:%d from %s to ACTIVE.", 167219974Smav vol->v_name, sd->sd_pos, 168219974Smav g_raid_subdisk_state2str(sd->sd_state)); 169219974Smav g_raid_change_subdisk_state(sd, G_RAID_SUBDISK_S_ACTIVE); 170219974Smav } 171219974Smav if (state != sd->sd_state && 172219974Smav !trs->trso_starting && !trs->trso_stopped) 173219974Smav g_raid_write_metadata(sc, vol, sd, NULL); 174219974Smav g_raid_tr_update_state_concat(vol); 175219974Smav return (0); 176219974Smav} 177219974Smav 178219974Smavstatic int 179219974Smavg_raid_tr_start_concat(struct g_raid_tr_object *tr) 180219974Smav{ 181219974Smav struct g_raid_tr_concat_object *trs; 182219974Smav struct g_raid_volume *vol; 183219974Smav 184219974Smav trs = (struct g_raid_tr_concat_object *)tr; 185219974Smav vol = tr->tro_volume; 186219974Smav trs->trso_starting = 0; 187219974Smav g_raid_tr_update_state_concat(vol); 188219974Smav return (0); 189219974Smav} 190219974Smav 191219974Smavstatic int 192219974Smavg_raid_tr_stop_concat(struct g_raid_tr_object *tr) 193219974Smav{ 194219974Smav struct g_raid_tr_concat_object *trs; 195219974Smav struct g_raid_volume *vol; 196219974Smav 197219974Smav trs = (struct g_raid_tr_concat_object *)tr; 198219974Smav vol = tr->tro_volume; 199219974Smav trs->trso_starting = 0; 200219974Smav trs->trso_stopped = 1; 201219974Smav g_raid_tr_update_state_concat(vol); 202219974Smav return (0); 203219974Smav} 204219974Smav 205219974Smavstatic void 206219974Smavg_raid_tr_iostart_concat(struct g_raid_tr_object *tr, struct bio *bp) 207219974Smav{ 208219974Smav struct g_raid_volume *vol; 209219974Smav struct g_raid_subdisk *sd; 210219974Smav struct bio_queue_head queue; 211219974Smav struct bio *cbp; 212219974Smav char *addr; 213219974Smav off_t offset, length, remain; 214219974Smav u_int no; 215219974Smav 216219974Smav vol = tr->tro_volume; 217219974Smav if (vol->v_state != G_RAID_VOLUME_S_OPTIMAL && 218219974Smav vol->v_state != G_RAID_VOLUME_S_SUBOPTIMAL) { 219219974Smav g_raid_iodone(bp, EIO); 220219974Smav return; 221219974Smav } 222219974Smav if (bp->bio_cmd == BIO_FLUSH) { 223219974Smav g_raid_tr_flush_common(tr, bp); 224219974Smav return; 225219974Smav } 226219974Smav 227219974Smav offset = bp->bio_offset; 228219974Smav remain = bp->bio_length; 229219974Smav addr = bp->bio_data; 230219974Smav no = 0; 231219974Smav while (no < vol->v_disks_count && 232219974Smav offset >= vol->v_subdisks[no].sd_size) { 233219974Smav offset -= vol->v_subdisks[no].sd_size; 234219974Smav no++; 235219974Smav } 236219974Smav KASSERT(no < vol->v_disks_count, 237219974Smav ("Request starts after volume end (%ju)", bp->bio_offset)); 238219974Smav bioq_init(&queue); 239219974Smav do { 240219974Smav sd = &vol->v_subdisks[no]; 241219974Smav length = MIN(sd->sd_size - offset, remain); 242219974Smav cbp = g_clone_bio(bp); 243219974Smav if (cbp == NULL) 244219974Smav goto failure; 245219974Smav cbp->bio_offset = offset; 246219974Smav cbp->bio_data = addr; 247219974Smav cbp->bio_length = length; 248219974Smav cbp->bio_caller1 = sd; 249219974Smav bioq_insert_tail(&queue, cbp); 250219974Smav remain -= length; 251219974Smav addr += length; 252219974Smav offset = 0; 253219974Smav no++; 254219974Smav KASSERT(no < vol->v_disks_count || remain == 0, 255219974Smav ("Request ends after volume end (%ju, %ju)", 256219974Smav bp->bio_offset, bp->bio_length)); 257219974Smav } while (remain > 0); 258219974Smav for (cbp = bioq_first(&queue); cbp != NULL; 259219974Smav cbp = bioq_first(&queue)) { 260219974Smav bioq_remove(&queue, cbp); 261219974Smav sd = cbp->bio_caller1; 262219974Smav cbp->bio_caller1 = NULL; 263219974Smav g_raid_subdisk_iostart(sd, cbp); 264219974Smav } 265219974Smav return; 266219974Smavfailure: 267219974Smav for (cbp = bioq_first(&queue); cbp != NULL; 268219974Smav cbp = bioq_first(&queue)) { 269219974Smav bioq_remove(&queue, cbp); 270219974Smav g_destroy_bio(cbp); 271219974Smav } 272219974Smav if (bp->bio_error == 0) 273219974Smav bp->bio_error = ENOMEM; 274219974Smav g_raid_iodone(bp, bp->bio_error); 275219974Smav} 276219974Smav 277219974Smavstatic int 278219974Smavg_raid_tr_kerneldump_concat(struct g_raid_tr_object *tr, 279219974Smav void *virtual, vm_offset_t physical, off_t boffset, size_t blength) 280219974Smav{ 281219974Smav struct g_raid_volume *vol; 282219974Smav struct g_raid_subdisk *sd; 283219974Smav char *addr; 284219974Smav off_t offset, length, remain; 285219974Smav int error, no; 286219974Smav 287219974Smav vol = tr->tro_volume; 288219974Smav if (vol->v_state != G_RAID_VOLUME_S_OPTIMAL) 289219974Smav return (ENXIO); 290219974Smav 291219974Smav offset = boffset; 292219974Smav remain = blength; 293219974Smav addr = virtual; 294219974Smav no = 0; 295219974Smav while (no < vol->v_disks_count && 296219974Smav offset >= vol->v_subdisks[no].sd_size) { 297219974Smav offset -= vol->v_subdisks[no].sd_size; 298219974Smav no++; 299219974Smav } 300219974Smav KASSERT(no < vol->v_disks_count, 301219974Smav ("Request starts after volume end (%ju)", boffset)); 302219974Smav do { 303219974Smav sd = &vol->v_subdisks[no]; 304219974Smav length = MIN(sd->sd_size - offset, remain); 305219974Smav error = g_raid_subdisk_kerneldump(&vol->v_subdisks[no], 306219974Smav addr, 0, offset, length); 307219974Smav if (error != 0) 308219974Smav return (error); 309219974Smav remain -= length; 310219974Smav addr += length; 311219974Smav offset = 0; 312219974Smav no++; 313219974Smav KASSERT(no < vol->v_disks_count || remain == 0, 314219974Smav ("Request ends after volume end (%ju, %zu)", 315219974Smav boffset, blength)); 316219974Smav } while (remain > 0); 317219974Smav return (0); 318219974Smav} 319219974Smav 320219974Smavstatic void 321219974Smavg_raid_tr_iodone_concat(struct g_raid_tr_object *tr, 322219974Smav struct g_raid_subdisk *sd,struct bio *bp) 323219974Smav{ 324219974Smav struct bio *pbp; 325219974Smav 326219974Smav pbp = bp->bio_parent; 327219974Smav if (pbp->bio_error == 0) 328219974Smav pbp->bio_error = bp->bio_error; 329219974Smav g_destroy_bio(bp); 330219974Smav pbp->bio_inbed++; 331219974Smav if (pbp->bio_children == pbp->bio_inbed) { 332219974Smav pbp->bio_completed = pbp->bio_length; 333219974Smav g_raid_iodone(pbp, bp->bio_error); 334219974Smav } 335219974Smav} 336219974Smav 337219974Smavstatic int 338219974Smavg_raid_tr_free_concat(struct g_raid_tr_object *tr) 339219974Smav{ 340219974Smav 341219974Smav return (0); 342219974Smav} 343219974Smav 344240465SmavG_RAID_TR_DECLARE(concat, "CONCAT"); 345