1219974Smav/*- 2219974Smav * Copyright (c) 2010 Alexander Motin <mav@FreeBSD.org> 3219974Smav * All rights reserved. 4219974Smav * 5219974Smav * Redistribution and use in source and binary forms, with or without 6219974Smav * modification, are permitted provided that the following conditions 7219974Smav * are met: 8219974Smav * 1. Redistributions of source code must retain the above copyright 9219974Smav * notice, this list of conditions and the following disclaimer. 10219974Smav * 2. Redistributions in binary form must reproduce the above copyright 11219974Smav * notice, this list of conditions and the following disclaimer in the 12219974Smav * documentation and/or other materials provided with the distribution. 13219974Smav * 14219974Smav * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 15219974Smav * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16219974Smav * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17219974Smav * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 18219974Smav * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19219974Smav * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20219974Smav * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21219974Smav * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22219974Smav * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23219974Smav * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24219974Smav * SUCH DAMAGE. 25219974Smav */ 26219974Smav 27219974Smav#include <sys/cdefs.h> 28219974Smav__FBSDID("$FreeBSD: stable/10/sys/geom/raid/tr_concat.c 326745 2017-12-10 13:45:41Z eugen $"); 29219974Smav 30219974Smav#include <sys/param.h> 31219974Smav#include <sys/bio.h> 32219974Smav#include <sys/endian.h> 33219974Smav#include <sys/kernel.h> 34219974Smav#include <sys/kobj.h> 35219974Smav#include <sys/lock.h> 36219974Smav#include <sys/malloc.h> 37219974Smav#include <sys/mutex.h> 38219974Smav#include <sys/systm.h> 39219974Smav#include <geom/geom.h> 40219974Smav#include "geom/raid/g_raid.h" 41219974Smav#include "g_raid_tr_if.h" 42219974Smav 43219974Smavstatic MALLOC_DEFINE(M_TR_CONCAT, "tr_concat_data", "GEOM_RAID CONCAT data"); 44219974Smav 45219974Smavstruct g_raid_tr_concat_object { 46219974Smav struct g_raid_tr_object trso_base; 47219974Smav int trso_starting; 48219974Smav int trso_stopped; 49219974Smav}; 50219974Smav 51219974Smavstatic g_raid_tr_taste_t g_raid_tr_taste_concat; 52219974Smavstatic g_raid_tr_event_t g_raid_tr_event_concat; 53219974Smavstatic g_raid_tr_start_t g_raid_tr_start_concat; 54219974Smavstatic g_raid_tr_stop_t g_raid_tr_stop_concat; 55219974Smavstatic g_raid_tr_iostart_t g_raid_tr_iostart_concat; 56219974Smavstatic g_raid_tr_iodone_t g_raid_tr_iodone_concat; 57219974Smavstatic g_raid_tr_kerneldump_t g_raid_tr_kerneldump_concat; 58219974Smavstatic g_raid_tr_free_t g_raid_tr_free_concat; 59219974Smav 60219974Smavstatic kobj_method_t g_raid_tr_concat_methods[] = { 61219974Smav KOBJMETHOD(g_raid_tr_taste, g_raid_tr_taste_concat), 62219974Smav KOBJMETHOD(g_raid_tr_event, g_raid_tr_event_concat), 63219974Smav KOBJMETHOD(g_raid_tr_start, g_raid_tr_start_concat), 64219974Smav KOBJMETHOD(g_raid_tr_stop, g_raid_tr_stop_concat), 65219974Smav KOBJMETHOD(g_raid_tr_iostart, g_raid_tr_iostart_concat), 66219974Smav KOBJMETHOD(g_raid_tr_iodone, g_raid_tr_iodone_concat), 67219974Smav KOBJMETHOD(g_raid_tr_kerneldump, g_raid_tr_kerneldump_concat), 68219974Smav KOBJMETHOD(g_raid_tr_free, g_raid_tr_free_concat), 69219974Smav { 0, 0 } 70219974Smav}; 71219974Smav 72219974Smavstatic struct g_raid_tr_class g_raid_tr_concat_class = { 73219974Smav "CONCAT", 74219974Smav g_raid_tr_concat_methods, 75219974Smav sizeof(struct g_raid_tr_concat_object), 76240465Smav .trc_enable = 1, 77260385Sscottl .trc_priority = 50, 78260385Sscottl .trc_accept_unmapped = 1 79219974Smav}; 80219974Smav 81219974Smavstatic int 82219974Smavg_raid_tr_taste_concat(struct g_raid_tr_object *tr, struct g_raid_volume *volume) 83219974Smav{ 84219974Smav struct g_raid_tr_concat_object *trs; 85219974Smav 86219974Smav trs = (struct g_raid_tr_concat_object *)tr; 87219974Smav if (tr->tro_volume->v_raid_level != G_RAID_VOLUME_RL_SINGLE && 88219974Smav tr->tro_volume->v_raid_level != G_RAID_VOLUME_RL_CONCAT && 89219974Smav !(tr->tro_volume->v_disks_count == 1 && 90219974Smav tr->tro_volume->v_raid_level != G_RAID_VOLUME_RL_UNKNOWN)) 91219974Smav return (G_RAID_TR_TASTE_FAIL); 92219974Smav trs->trso_starting = 1; 93219974Smav return (G_RAID_TR_TASTE_SUCCEED); 94219974Smav} 95219974Smav 96219974Smavstatic int 97219974Smavg_raid_tr_update_state_concat(struct g_raid_volume *vol) 98219974Smav{ 99219974Smav struct g_raid_tr_concat_object *trs; 100219974Smav struct g_raid_softc *sc; 101219974Smav off_t size; 102219974Smav u_int s; 103219974Smav int i, n, f; 104219974Smav 105219974Smav sc = vol->v_softc; 106219974Smav trs = (struct g_raid_tr_concat_object *)vol->v_tr; 107219974Smav if (trs->trso_stopped) 108219974Smav s = G_RAID_VOLUME_S_STOPPED; 109219974Smav else if (trs->trso_starting) 110219974Smav s = G_RAID_VOLUME_S_STARTING; 111219974Smav else { 112219974Smav n = g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_ACTIVE); 113219974Smav f = g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_FAILED); 114219974Smav if (n + f == vol->v_disks_count) { 115219974Smav if (f == 0) 116219974Smav s = G_RAID_VOLUME_S_OPTIMAL; 117219974Smav else 118219974Smav s = G_RAID_VOLUME_S_SUBOPTIMAL; 119219974Smav } else 120219974Smav s = G_RAID_VOLUME_S_BROKEN; 121219974Smav } 122219974Smav if (s != vol->v_state) { 123219974Smav 124219974Smav /* 125219974Smav * Some metadata modules may not know CONCAT volume 126219974Smav * mediasize until all disks connected. Recalculate. 127219974Smav */ 128245519Smav if (vol->v_raid_level == G_RAID_VOLUME_RL_CONCAT && 129245519Smav G_RAID_VOLUME_S_ALIVE(s) && 130219974Smav !G_RAID_VOLUME_S_ALIVE(vol->v_state)) { 131219974Smav size = 0; 132219974Smav for (i = 0; i < vol->v_disks_count; i++) { 133219974Smav if (vol->v_subdisks[i].sd_state != 134219974Smav G_RAID_SUBDISK_S_NONE) 135219974Smav size += vol->v_subdisks[i].sd_size; 136219974Smav } 137219974Smav vol->v_mediasize = size; 138219974Smav } 139219974Smav 140219974Smav g_raid_event_send(vol, G_RAID_VOLUME_S_ALIVE(s) ? 141219974Smav G_RAID_VOLUME_E_UP : G_RAID_VOLUME_E_DOWN, 142219974Smav G_RAID_EVENT_VOLUME); 143219974Smav g_raid_change_volume_state(vol, s); 144219974Smav if (!trs->trso_starting && !trs->trso_stopped) 145219974Smav g_raid_write_metadata(sc, vol, NULL, NULL); 146219974Smav } 147219974Smav return (0); 148219974Smav} 149219974Smav 150219974Smavstatic int 151219974Smavg_raid_tr_event_concat(struct g_raid_tr_object *tr, 152219974Smav struct g_raid_subdisk *sd, u_int event) 153219974Smav{ 154219974Smav struct g_raid_tr_concat_object *trs; 155219974Smav struct g_raid_softc *sc; 156219974Smav struct g_raid_volume *vol; 157219974Smav int state; 158219974Smav 159219974Smav trs = (struct g_raid_tr_concat_object *)tr; 160219974Smav vol = tr->tro_volume; 161219974Smav sc = vol->v_softc; 162219974Smav 163219974Smav state = sd->sd_state; 164219974Smav if (state != G_RAID_SUBDISK_S_NONE && 165219974Smav state != G_RAID_SUBDISK_S_FAILED && 166219974Smav state != G_RAID_SUBDISK_S_ACTIVE) { 167219974Smav G_RAID_DEBUG1(1, sc, 168219974Smav "Promote subdisk %s:%d from %s to ACTIVE.", 169219974Smav vol->v_name, sd->sd_pos, 170219974Smav g_raid_subdisk_state2str(sd->sd_state)); 171219974Smav g_raid_change_subdisk_state(sd, G_RAID_SUBDISK_S_ACTIVE); 172219974Smav } 173219974Smav if (state != sd->sd_state && 174219974Smav !trs->trso_starting && !trs->trso_stopped) 175219974Smav g_raid_write_metadata(sc, vol, sd, NULL); 176219974Smav g_raid_tr_update_state_concat(vol); 177219974Smav return (0); 178219974Smav} 179219974Smav 180219974Smavstatic int 181219974Smavg_raid_tr_start_concat(struct g_raid_tr_object *tr) 182219974Smav{ 183219974Smav struct g_raid_tr_concat_object *trs; 184219974Smav struct g_raid_volume *vol; 185219974Smav 186219974Smav trs = (struct g_raid_tr_concat_object *)tr; 187219974Smav vol = tr->tro_volume; 188219974Smav trs->trso_starting = 0; 189219974Smav g_raid_tr_update_state_concat(vol); 190219974Smav return (0); 191219974Smav} 192219974Smav 193219974Smavstatic int 194219974Smavg_raid_tr_stop_concat(struct g_raid_tr_object *tr) 195219974Smav{ 196219974Smav struct g_raid_tr_concat_object *trs; 197219974Smav struct g_raid_volume *vol; 198219974Smav 199219974Smav trs = (struct g_raid_tr_concat_object *)tr; 200219974Smav vol = tr->tro_volume; 201219974Smav trs->trso_starting = 0; 202219974Smav trs->trso_stopped = 1; 203219974Smav g_raid_tr_update_state_concat(vol); 204219974Smav return (0); 205219974Smav} 206219974Smav 207219974Smavstatic void 208219974Smavg_raid_tr_iostart_concat(struct g_raid_tr_object *tr, struct bio *bp) 209219974Smav{ 210219974Smav struct g_raid_volume *vol; 211219974Smav struct g_raid_subdisk *sd; 212219974Smav struct bio_queue_head queue; 213219974Smav struct bio *cbp; 214219974Smav char *addr; 215219974Smav off_t offset, length, remain; 216219974Smav u_int no; 217219974Smav 218219974Smav vol = tr->tro_volume; 219219974Smav if (vol->v_state != G_RAID_VOLUME_S_OPTIMAL && 220219974Smav vol->v_state != G_RAID_VOLUME_S_SUBOPTIMAL) { 221219974Smav g_raid_iodone(bp, EIO); 222219974Smav return; 223219974Smav } 224219974Smav if (bp->bio_cmd == BIO_FLUSH) { 225219974Smav g_raid_tr_flush_common(tr, bp); 226219974Smav return; 227219974Smav } 228219974Smav 229219974Smav offset = bp->bio_offset; 230219974Smav remain = bp->bio_length; 231260385Sscottl if ((bp->bio_flags & BIO_UNMAPPED) != 0) 232260385Sscottl addr = NULL; 233260385Sscottl else 234260385Sscottl addr = bp->bio_data; 235219974Smav no = 0; 236219974Smav while (no < vol->v_disks_count && 237219974Smav offset >= vol->v_subdisks[no].sd_size) { 238219974Smav offset -= vol->v_subdisks[no].sd_size; 239219974Smav no++; 240219974Smav } 241219974Smav KASSERT(no < vol->v_disks_count, 242219974Smav ("Request starts after volume end (%ju)", bp->bio_offset)); 243219974Smav bioq_init(&queue); 244219974Smav do { 245219974Smav sd = &vol->v_subdisks[no]; 246219974Smav length = MIN(sd->sd_size - offset, remain); 247219974Smav cbp = g_clone_bio(bp); 248219974Smav if (cbp == NULL) 249219974Smav goto failure; 250219974Smav cbp->bio_offset = offset; 251219974Smav cbp->bio_length = length; 252260385Sscottl if ((bp->bio_flags & BIO_UNMAPPED) != 0 && 253260385Sscottl bp->bio_cmd != BIO_DELETE) { 254260385Sscottl cbp->bio_ma_offset += (uintptr_t)addr; 255260385Sscottl cbp->bio_ma += cbp->bio_ma_offset / PAGE_SIZE; 256260385Sscottl cbp->bio_ma_offset %= PAGE_SIZE; 257260385Sscottl cbp->bio_ma_n = round_page(cbp->bio_ma_offset + 258260385Sscottl cbp->bio_length) / PAGE_SIZE; 259260385Sscottl } else 260260385Sscottl cbp->bio_data = addr; 261219974Smav cbp->bio_caller1 = sd; 262219974Smav bioq_insert_tail(&queue, cbp); 263219974Smav remain -= length; 264242323Smav if (bp->bio_cmd != BIO_DELETE) 265242323Smav addr += length; 266219974Smav offset = 0; 267219974Smav no++; 268219974Smav KASSERT(no < vol->v_disks_count || remain == 0, 269219974Smav ("Request ends after volume end (%ju, %ju)", 270219974Smav bp->bio_offset, bp->bio_length)); 271219974Smav } while (remain > 0); 272260385Sscottl while ((cbp = bioq_takefirst(&queue)) != NULL) { 273219974Smav sd = cbp->bio_caller1; 274219974Smav cbp->bio_caller1 = NULL; 275219974Smav g_raid_subdisk_iostart(sd, cbp); 276219974Smav } 277219974Smav return; 278219974Smavfailure: 279260385Sscottl while ((cbp = bioq_takefirst(&queue)) != NULL) 280219974Smav g_destroy_bio(cbp); 281219974Smav if (bp->bio_error == 0) 282219974Smav bp->bio_error = ENOMEM; 283219974Smav g_raid_iodone(bp, bp->bio_error); 284219974Smav} 285219974Smav 286219974Smavstatic int 287219974Smavg_raid_tr_kerneldump_concat(struct g_raid_tr_object *tr, 288219974Smav void *virtual, vm_offset_t physical, off_t boffset, size_t blength) 289219974Smav{ 290219974Smav struct g_raid_volume *vol; 291219974Smav struct g_raid_subdisk *sd; 292219974Smav char *addr; 293219974Smav off_t offset, length, remain; 294219974Smav int error, no; 295219974Smav 296219974Smav vol = tr->tro_volume; 297219974Smav if (vol->v_state != G_RAID_VOLUME_S_OPTIMAL) 298219974Smav return (ENXIO); 299219974Smav 300219974Smav offset = boffset; 301219974Smav remain = blength; 302219974Smav addr = virtual; 303219974Smav no = 0; 304219974Smav while (no < vol->v_disks_count && 305219974Smav offset >= vol->v_subdisks[no].sd_size) { 306219974Smav offset -= vol->v_subdisks[no].sd_size; 307219974Smav no++; 308219974Smav } 309219974Smav KASSERT(no < vol->v_disks_count, 310219974Smav ("Request starts after volume end (%ju)", boffset)); 311219974Smav do { 312219974Smav sd = &vol->v_subdisks[no]; 313219974Smav length = MIN(sd->sd_size - offset, remain); 314219974Smav error = g_raid_subdisk_kerneldump(&vol->v_subdisks[no], 315219974Smav addr, 0, offset, length); 316219974Smav if (error != 0) 317219974Smav return (error); 318219974Smav remain -= length; 319219974Smav addr += length; 320219974Smav offset = 0; 321219974Smav no++; 322219974Smav KASSERT(no < vol->v_disks_count || remain == 0, 323219974Smav ("Request ends after volume end (%ju, %zu)", 324219974Smav boffset, blength)); 325219974Smav } while (remain > 0); 326219974Smav return (0); 327219974Smav} 328219974Smav 329219974Smavstatic void 330219974Smavg_raid_tr_iodone_concat(struct g_raid_tr_object *tr, 331219974Smav struct g_raid_subdisk *sd,struct bio *bp) 332219974Smav{ 333219974Smav struct bio *pbp; 334219974Smav 335219974Smav pbp = bp->bio_parent; 336219974Smav if (pbp->bio_error == 0) 337219974Smav pbp->bio_error = bp->bio_error; 338219974Smav g_destroy_bio(bp); 339219974Smav pbp->bio_inbed++; 340219974Smav if (pbp->bio_children == pbp->bio_inbed) { 341219974Smav pbp->bio_completed = pbp->bio_length; 342326745Seugen g_raid_iodone(pbp, pbp->bio_error); 343219974Smav } 344219974Smav} 345219974Smav 346219974Smavstatic int 347219974Smavg_raid_tr_free_concat(struct g_raid_tr_object *tr) 348219974Smav{ 349219974Smav 350219974Smav return (0); 351219974Smav} 352219974Smav 353240465SmavG_RAID_TR_DECLARE(concat, "CONCAT"); 354