ctl_backend_block.c revision 275892
1/*- 2 * Copyright (c) 2003 Silicon Graphics International Corp. 3 * Copyright (c) 2009-2011 Spectra Logic Corporation 4 * Copyright (c) 2012 The FreeBSD Foundation 5 * All rights reserved. 6 * 7 * Portions of this software were developed by Edward Tomasz Napierala 8 * under sponsorship from the FreeBSD Foundation. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions, and the following disclaimer, 15 * without modification. 16 * 2. Redistributions in binary form must reproduce at minimum a disclaimer 17 * substantially similar to the "NO WARRANTY" disclaimer below 18 * ("Disclaimer") and any redistribution must be conditioned upon 19 * including a substantially similar Disclaimer requirement for further 20 * binary redistribution. 21 * 22 * NO WARRANTY 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR 26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 27 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 31 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 32 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 * POSSIBILITY OF SUCH DAMAGES. 34 * 35 * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_block.c#5 $ 36 */ 37/* 38 * CAM Target Layer driver backend for block devices. 39 * 40 * Author: Ken Merry <ken@FreeBSD.org> 41 */ 42#include <sys/cdefs.h> 43__FBSDID("$FreeBSD: stable/10/sys/cam/ctl/ctl_backend_block.c 275892 2014-12-18 08:38:07Z mav $"); 44 45#include <opt_kdtrace.h> 46 47#include <sys/param.h> 48#include <sys/systm.h> 49#include <sys/kernel.h> 50#include <sys/types.h> 51#include <sys/kthread.h> 52#include <sys/bio.h> 53#include <sys/fcntl.h> 54#include <sys/limits.h> 55#include <sys/lock.h> 56#include <sys/mutex.h> 57#include <sys/condvar.h> 58#include <sys/malloc.h> 59#include <sys/conf.h> 60#include <sys/ioccom.h> 61#include <sys/queue.h> 62#include <sys/sbuf.h> 63#include <sys/endian.h> 64#include <sys/uio.h> 65#include <sys/buf.h> 66#include <sys/taskqueue.h> 67#include <sys/vnode.h> 68#include <sys/namei.h> 69#include <sys/mount.h> 70#include <sys/disk.h> 71#include <sys/fcntl.h> 72#include <sys/filedesc.h> 73#include <sys/filio.h> 74#include <sys/proc.h> 75#include <sys/pcpu.h> 76#include <sys/module.h> 77#include <sys/sdt.h> 78#include <sys/devicestat.h> 79#include <sys/sysctl.h> 80 81#include <geom/geom.h> 82 83#include <cam/cam.h> 84#include <cam/scsi/scsi_all.h> 85#include <cam/scsi/scsi_da.h> 86#include <cam/ctl/ctl_io.h> 87#include <cam/ctl/ctl.h> 88#include <cam/ctl/ctl_backend.h> 89#include <cam/ctl/ctl_frontend_internal.h> 90#include <cam/ctl/ctl_ioctl.h> 91#include <cam/ctl/ctl_scsi_all.h> 92#include <cam/ctl/ctl_error.h> 93 94/* 95 * The idea here is that we'll allocate enough S/G space to hold a 1MB 96 * I/O. If we get an I/O larger than that, we'll split it. 97 */ 98#define CTLBLK_HALF_IO_SIZE (512 * 1024) 99#define CTLBLK_MAX_IO_SIZE (CTLBLK_HALF_IO_SIZE * 2) 100#define CTLBLK_MAX_SEG MAXPHYS 101#define CTLBLK_HALF_SEGS MAX(CTLBLK_HALF_IO_SIZE / CTLBLK_MAX_SEG, 1) 102#define CTLBLK_MAX_SEGS (CTLBLK_HALF_SEGS * 2) 103 104#ifdef CTLBLK_DEBUG 105#define DPRINTF(fmt, args...) \ 106 printf("cbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) 107#else 108#define DPRINTF(fmt, args...) do {} while(0) 109#endif 110 111#define PRIV(io) \ 112 ((struct ctl_ptr_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_BACKEND]) 113#define ARGS(io) \ 114 ((struct ctl_lba_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]) 115 116SDT_PROVIDER_DEFINE(cbb); 117 118typedef enum { 119 CTL_BE_BLOCK_LUN_UNCONFIGURED = 0x01, 120 CTL_BE_BLOCK_LUN_CONFIG_ERR = 0x02, 121 CTL_BE_BLOCK_LUN_WAITING = 0x04, 122 CTL_BE_BLOCK_LUN_MULTI_THREAD = 0x08 123} ctl_be_block_lun_flags; 124 125typedef enum { 126 CTL_BE_BLOCK_NONE, 127 CTL_BE_BLOCK_DEV, 128 CTL_BE_BLOCK_FILE 129} ctl_be_block_type; 130 131struct ctl_be_block_devdata { 132 struct cdev *cdev; 133 struct cdevsw *csw; 134 int dev_ref; 135}; 136 137struct ctl_be_block_filedata { 138 struct ucred *cred; 139}; 140 141union ctl_be_block_bedata { 142 struct ctl_be_block_devdata dev; 143 struct ctl_be_block_filedata file; 144}; 145 146struct ctl_be_block_io; 147struct ctl_be_block_lun; 148 149typedef void (*cbb_dispatch_t)(struct ctl_be_block_lun *be_lun, 150 struct ctl_be_block_io *beio); 151typedef uint64_t (*cbb_getattr_t)(struct ctl_be_block_lun *be_lun, 152 const char *attrname); 153 154/* 155 * Backend LUN structure. There is a 1:1 mapping between a block device 156 * and a backend block LUN, and between a backend block LUN and a CTL LUN. 157 */ 158struct ctl_be_block_lun { 159 struct ctl_lun_create_params params; 160 struct ctl_block_disk *disk; 161 char lunname[32]; 162 char *dev_path; 163 ctl_be_block_type dev_type; 164 struct vnode *vn; 165 union ctl_be_block_bedata backend; 166 cbb_dispatch_t dispatch; 167 cbb_dispatch_t lun_flush; 168 cbb_dispatch_t unmap; 169 cbb_dispatch_t get_lba_status; 170 cbb_getattr_t getattr; 171 uma_zone_t lun_zone; 172 uint64_t size_blocks; 173 uint64_t size_bytes; 174 uint32_t blocksize; 175 int blocksize_shift; 176 uint16_t pblockexp; 177 uint16_t pblockoff; 178 struct ctl_be_block_softc *softc; 179 struct devstat *disk_stats; 180 ctl_be_block_lun_flags flags; 181 STAILQ_ENTRY(ctl_be_block_lun) links; 182 struct ctl_be_lun ctl_be_lun; 183 struct taskqueue *io_taskqueue; 184 struct task io_task; 185 int num_threads; 186 STAILQ_HEAD(, ctl_io_hdr) input_queue; 187 STAILQ_HEAD(, ctl_io_hdr) config_read_queue; 188 STAILQ_HEAD(, ctl_io_hdr) config_write_queue; 189 STAILQ_HEAD(, ctl_io_hdr) datamove_queue; 190 struct mtx_padalign io_lock; 191 struct mtx_padalign queue_lock; 192}; 193 194/* 195 * Overall softc structure for the block backend module. 196 */ 197struct ctl_be_block_softc { 198 struct mtx lock; 199 int num_disks; 200 STAILQ_HEAD(, ctl_block_disk) disk_list; 201 int num_luns; 202 STAILQ_HEAD(, ctl_be_block_lun) lun_list; 203}; 204 205static struct ctl_be_block_softc backend_block_softc; 206 207/* 208 * Per-I/O information. 209 */ 210struct ctl_be_block_io { 211 union ctl_io *io; 212 struct ctl_sg_entry sg_segs[CTLBLK_MAX_SEGS]; 213 struct iovec xiovecs[CTLBLK_MAX_SEGS]; 214 int bio_cmd; 215 int num_segs; 216 int num_bios_sent; 217 int num_bios_done; 218 int send_complete; 219 int num_errors; 220 struct bintime ds_t0; 221 devstat_tag_type ds_tag_type; 222 devstat_trans_flags ds_trans_type; 223 uint64_t io_len; 224 uint64_t io_offset; 225 struct ctl_be_block_softc *softc; 226 struct ctl_be_block_lun *lun; 227 void (*beio_cont)(struct ctl_be_block_io *beio); /* to continue processing */ 228}; 229 230static int cbb_num_threads = 14; 231TUNABLE_INT("kern.cam.ctl.block.num_threads", &cbb_num_threads); 232SYSCTL_NODE(_kern_cam_ctl, OID_AUTO, block, CTLFLAG_RD, 0, 233 "CAM Target Layer Block Backend"); 234SYSCTL_INT(_kern_cam_ctl_block, OID_AUTO, num_threads, CTLFLAG_RW, 235 &cbb_num_threads, 0, "Number of threads per backing file"); 236 237static struct ctl_be_block_io *ctl_alloc_beio(struct ctl_be_block_softc *softc); 238static void ctl_free_beio(struct ctl_be_block_io *beio); 239static void ctl_complete_beio(struct ctl_be_block_io *beio); 240static int ctl_be_block_move_done(union ctl_io *io); 241static void ctl_be_block_biodone(struct bio *bio); 242static void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun, 243 struct ctl_be_block_io *beio); 244static void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun, 245 struct ctl_be_block_io *beio); 246static void ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun, 247 struct ctl_be_block_io *beio); 248static void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun, 249 struct ctl_be_block_io *beio); 250static void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun, 251 struct ctl_be_block_io *beio); 252static void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun, 253 struct ctl_be_block_io *beio); 254static uint64_t ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, 255 const char *attrname); 256static void ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun, 257 union ctl_io *io); 258static void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun, 259 union ctl_io *io); 260static void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun, 261 union ctl_io *io); 262static void ctl_be_block_worker(void *context, int pending); 263static int ctl_be_block_submit(union ctl_io *io); 264static int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, 265 int flag, struct thread *td); 266static int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, 267 struct ctl_lun_req *req); 268static int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, 269 struct ctl_lun_req *req); 270static int ctl_be_block_close(struct ctl_be_block_lun *be_lun); 271static int ctl_be_block_open(struct ctl_be_block_softc *softc, 272 struct ctl_be_block_lun *be_lun, 273 struct ctl_lun_req *req); 274static int ctl_be_block_create(struct ctl_be_block_softc *softc, 275 struct ctl_lun_req *req); 276static int ctl_be_block_rm(struct ctl_be_block_softc *softc, 277 struct ctl_lun_req *req); 278static int ctl_be_block_modify_file(struct ctl_be_block_lun *be_lun, 279 struct ctl_lun_req *req); 280static int ctl_be_block_modify_dev(struct ctl_be_block_lun *be_lun, 281 struct ctl_lun_req *req); 282static int ctl_be_block_modify(struct ctl_be_block_softc *softc, 283 struct ctl_lun_req *req); 284static void ctl_be_block_lun_shutdown(void *be_lun); 285static void ctl_be_block_lun_config_status(void *be_lun, 286 ctl_lun_config_status status); 287static int ctl_be_block_config_write(union ctl_io *io); 288static int ctl_be_block_config_read(union ctl_io *io); 289static int ctl_be_block_lun_info(void *be_lun, struct sbuf *sb); 290static uint64_t ctl_be_block_lun_attr(void *be_lun, const char *attrname); 291int ctl_be_block_init(void); 292 293static struct ctl_backend_driver ctl_be_block_driver = 294{ 295 .name = "block", 296 .flags = CTL_BE_FLAG_HAS_CONFIG, 297 .init = ctl_be_block_init, 298 .data_submit = ctl_be_block_submit, 299 .data_move_done = ctl_be_block_move_done, 300 .config_read = ctl_be_block_config_read, 301 .config_write = ctl_be_block_config_write, 302 .ioctl = ctl_be_block_ioctl, 303 .lun_info = ctl_be_block_lun_info, 304 .lun_attr = ctl_be_block_lun_attr 305}; 306 307MALLOC_DEFINE(M_CTLBLK, "ctlblk", "Memory used for CTL block backend"); 308CTL_BACKEND_DECLARE(cbb, ctl_be_block_driver); 309 310static uma_zone_t beio_zone; 311 312static struct ctl_be_block_io * 313ctl_alloc_beio(struct ctl_be_block_softc *softc) 314{ 315 struct ctl_be_block_io *beio; 316 317 beio = uma_zalloc(beio_zone, M_WAITOK | M_ZERO); 318 beio->softc = softc; 319 return (beio); 320} 321 322static void 323ctl_free_beio(struct ctl_be_block_io *beio) 324{ 325 int duplicate_free; 326 int i; 327 328 duplicate_free = 0; 329 330 for (i = 0; i < beio->num_segs; i++) { 331 if (beio->sg_segs[i].addr == NULL) 332 duplicate_free++; 333 334 uma_zfree(beio->lun->lun_zone, beio->sg_segs[i].addr); 335 beio->sg_segs[i].addr = NULL; 336 337 /* For compare we had two equal S/G lists. */ 338 if (ARGS(beio->io)->flags & CTL_LLF_COMPARE) { 339 uma_zfree(beio->lun->lun_zone, 340 beio->sg_segs[i + CTLBLK_HALF_SEGS].addr); 341 beio->sg_segs[i + CTLBLK_HALF_SEGS].addr = NULL; 342 } 343 } 344 345 if (duplicate_free > 0) { 346 printf("%s: %d duplicate frees out of %d segments\n", __func__, 347 duplicate_free, beio->num_segs); 348 } 349 350 uma_zfree(beio_zone, beio); 351} 352 353static void 354ctl_complete_beio(struct ctl_be_block_io *beio) 355{ 356 union ctl_io *io = beio->io; 357 358 if (beio->beio_cont != NULL) { 359 beio->beio_cont(beio); 360 } else { 361 ctl_free_beio(beio); 362 ctl_data_submit_done(io); 363 } 364} 365 366static int 367ctl_be_block_move_done(union ctl_io *io) 368{ 369 struct ctl_be_block_io *beio; 370 struct ctl_be_block_lun *be_lun; 371 struct ctl_lba_len_flags *lbalen; 372#ifdef CTL_TIME_IO 373 struct bintime cur_bt; 374#endif 375 int i; 376 377 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 378 be_lun = beio->lun; 379 380 DPRINTF("entered\n"); 381 382#ifdef CTL_TIME_IO 383 getbintime(&cur_bt); 384 bintime_sub(&cur_bt, &io->io_hdr.dma_start_bt); 385 bintime_add(&io->io_hdr.dma_bt, &cur_bt); 386 io->io_hdr.num_dmas++; 387#endif 388 io->scsiio.kern_rel_offset += io->scsiio.kern_data_len; 389 390 /* 391 * We set status at this point for read commands, and write 392 * commands with errors. 393 */ 394 if (io->io_hdr.flags & CTL_FLAG_ABORT) { 395 ; 396 } else if ((io->io_hdr.port_status == 0) && 397 ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE)) { 398 lbalen = ARGS(beio->io); 399 if (lbalen->flags & CTL_LLF_READ) { 400 ctl_set_success(&io->scsiio); 401 } else if (lbalen->flags & CTL_LLF_COMPARE) { 402 /* We have two data blocks ready for comparison. */ 403 for (i = 0; i < beio->num_segs; i++) { 404 if (memcmp(beio->sg_segs[i].addr, 405 beio->sg_segs[i + CTLBLK_HALF_SEGS].addr, 406 beio->sg_segs[i].len) != 0) 407 break; 408 } 409 if (i < beio->num_segs) 410 ctl_set_sense(&io->scsiio, 411 /*current_error*/ 1, 412 /*sense_key*/ SSD_KEY_MISCOMPARE, 413 /*asc*/ 0x1D, 414 /*ascq*/ 0x00, 415 SSD_ELEM_NONE); 416 else 417 ctl_set_success(&io->scsiio); 418 } 419 } else if ((io->io_hdr.port_status != 0) && 420 ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE || 421 (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) { 422 /* 423 * For hardware error sense keys, the sense key 424 * specific value is defined to be a retry count, 425 * but we use it to pass back an internal FETD 426 * error code. XXX KDM Hopefully the FETD is only 427 * using 16 bits for an error code, since that's 428 * all the space we have in the sks field. 429 */ 430 ctl_set_internal_failure(&io->scsiio, 431 /*sks_valid*/ 1, 432 /*retry_count*/ 433 io->io_hdr.port_status); 434 } 435 436 /* 437 * If this is a read, or a write with errors, it is done. 438 */ 439 if ((beio->bio_cmd == BIO_READ) 440 || ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0) 441 || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)) { 442 ctl_complete_beio(beio); 443 return (0); 444 } 445 446 /* 447 * At this point, we have a write and the DMA completed 448 * successfully. We now have to queue it to the task queue to 449 * execute the backend I/O. That is because we do blocking 450 * memory allocations, and in the file backing case, blocking I/O. 451 * This move done routine is generally called in the SIM's 452 * interrupt context, and therefore we cannot block. 453 */ 454 mtx_lock(&be_lun->queue_lock); 455 /* 456 * XXX KDM make sure that links is okay to use at this point. 457 * Otherwise, we either need to add another field to ctl_io_hdr, 458 * or deal with resource allocation here. 459 */ 460 STAILQ_INSERT_TAIL(&be_lun->datamove_queue, &io->io_hdr, links); 461 mtx_unlock(&be_lun->queue_lock); 462 463 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 464 465 return (0); 466} 467 468static void 469ctl_be_block_biodone(struct bio *bio) 470{ 471 struct ctl_be_block_io *beio; 472 struct ctl_be_block_lun *be_lun; 473 union ctl_io *io; 474 int error; 475 476 beio = bio->bio_caller1; 477 be_lun = beio->lun; 478 io = beio->io; 479 480 DPRINTF("entered\n"); 481 482 error = bio->bio_error; 483 mtx_lock(&be_lun->io_lock); 484 if (error != 0) 485 beio->num_errors++; 486 487 beio->num_bios_done++; 488 489 /* 490 * XXX KDM will this cause WITNESS to complain? Holding a lock 491 * during the free might cause it to complain. 492 */ 493 g_destroy_bio(bio); 494 495 /* 496 * If the send complete bit isn't set, or we aren't the last I/O to 497 * complete, then we're done. 498 */ 499 if ((beio->send_complete == 0) 500 || (beio->num_bios_done < beio->num_bios_sent)) { 501 mtx_unlock(&be_lun->io_lock); 502 return; 503 } 504 505 /* 506 * At this point, we've verified that we are the last I/O to 507 * complete, so it's safe to drop the lock. 508 */ 509 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 510 beio->ds_tag_type, beio->ds_trans_type, 511 /*now*/ NULL, /*then*/&beio->ds_t0); 512 mtx_unlock(&be_lun->io_lock); 513 514 /* 515 * If there are any errors from the backing device, we fail the 516 * entire I/O with a medium error. 517 */ 518 if (beio->num_errors > 0) { 519 if (error == EOPNOTSUPP) { 520 ctl_set_invalid_opcode(&io->scsiio); 521 } else if (error == ENOSPC) { 522 ctl_set_space_alloc_fail(&io->scsiio); 523 } else if (beio->bio_cmd == BIO_FLUSH) { 524 /* XXX KDM is there is a better error here? */ 525 ctl_set_internal_failure(&io->scsiio, 526 /*sks_valid*/ 1, 527 /*retry_count*/ 0xbad2); 528 } else 529 ctl_set_medium_error(&io->scsiio); 530 ctl_complete_beio(beio); 531 return; 532 } 533 534 /* 535 * If this is a write, a flush, a delete or verify, we're all done. 536 * If this is a read, we can now send the data to the user. 537 */ 538 if ((beio->bio_cmd == BIO_WRITE) 539 || (beio->bio_cmd == BIO_FLUSH) 540 || (beio->bio_cmd == BIO_DELETE) 541 || (ARGS(io)->flags & CTL_LLF_VERIFY)) { 542 ctl_set_success(&io->scsiio); 543 ctl_complete_beio(beio); 544 } else { 545 if ((ARGS(io)->flags & CTL_LLF_READ) && 546 beio->beio_cont == NULL) 547 ctl_set_success(&io->scsiio); 548#ifdef CTL_TIME_IO 549 getbintime(&io->io_hdr.dma_start_bt); 550#endif 551 ctl_datamove(io); 552 } 553} 554 555static void 556ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun, 557 struct ctl_be_block_io *beio) 558{ 559 union ctl_io *io = beio->io; 560 struct mount *mountpoint; 561 int error, lock_flags; 562 563 DPRINTF("entered\n"); 564 565 binuptime(&beio->ds_t0); 566 mtx_lock(&be_lun->io_lock); 567 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 568 mtx_unlock(&be_lun->io_lock); 569 570 (void) vn_start_write(be_lun->vn, &mountpoint, V_WAIT); 571 572 if (MNT_SHARED_WRITES(mountpoint) 573 || ((mountpoint == NULL) 574 && MNT_SHARED_WRITES(be_lun->vn->v_mount))) 575 lock_flags = LK_SHARED; 576 else 577 lock_flags = LK_EXCLUSIVE; 578 579 vn_lock(be_lun->vn, lock_flags | LK_RETRY); 580 581 error = VOP_FSYNC(be_lun->vn, MNT_WAIT, curthread); 582 VOP_UNLOCK(be_lun->vn, 0); 583 584 vn_finished_write(mountpoint); 585 586 mtx_lock(&be_lun->io_lock); 587 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 588 beio->ds_tag_type, beio->ds_trans_type, 589 /*now*/ NULL, /*then*/&beio->ds_t0); 590 mtx_unlock(&be_lun->io_lock); 591 592 if (error == 0) 593 ctl_set_success(&io->scsiio); 594 else { 595 /* XXX KDM is there is a better error here? */ 596 ctl_set_internal_failure(&io->scsiio, 597 /*sks_valid*/ 1, 598 /*retry_count*/ 0xbad1); 599 } 600 601 ctl_complete_beio(beio); 602} 603 604SDT_PROBE_DEFINE1(cbb, kernel, read, file_start, "uint64_t"); 605SDT_PROBE_DEFINE1(cbb, kernel, write, file_start, "uint64_t"); 606SDT_PROBE_DEFINE1(cbb, kernel, read, file_done,"uint64_t"); 607SDT_PROBE_DEFINE1(cbb, kernel, write, file_done, "uint64_t"); 608 609static void 610ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun, 611 struct ctl_be_block_io *beio) 612{ 613 struct ctl_be_block_filedata *file_data; 614 union ctl_io *io; 615 struct uio xuio; 616 struct iovec *xiovec; 617 int flags; 618 int error, i; 619 620 DPRINTF("entered\n"); 621 622 file_data = &be_lun->backend.file; 623 io = beio->io; 624 flags = 0; 625 if (ARGS(io)->flags & CTL_LLF_DPO) 626 flags |= IO_DIRECT; 627 if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA) 628 flags |= IO_SYNC; 629 630 bzero(&xuio, sizeof(xuio)); 631 if (beio->bio_cmd == BIO_READ) { 632 SDT_PROBE(cbb, kernel, read, file_start, 0, 0, 0, 0, 0); 633 xuio.uio_rw = UIO_READ; 634 } else { 635 SDT_PROBE(cbb, kernel, write, file_start, 0, 0, 0, 0, 0); 636 xuio.uio_rw = UIO_WRITE; 637 } 638 xuio.uio_offset = beio->io_offset; 639 xuio.uio_resid = beio->io_len; 640 xuio.uio_segflg = UIO_SYSSPACE; 641 xuio.uio_iov = beio->xiovecs; 642 xuio.uio_iovcnt = beio->num_segs; 643 xuio.uio_td = curthread; 644 645 for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) { 646 xiovec->iov_base = beio->sg_segs[i].addr; 647 xiovec->iov_len = beio->sg_segs[i].len; 648 } 649 650 binuptime(&beio->ds_t0); 651 mtx_lock(&be_lun->io_lock); 652 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 653 mtx_unlock(&be_lun->io_lock); 654 655 if (beio->bio_cmd == BIO_READ) { 656 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 657 658 /* 659 * UFS pays attention to IO_DIRECT for reads. If the 660 * DIRECTIO option is configured into the kernel, it calls 661 * ffs_rawread(). But that only works for single-segment 662 * uios with user space addresses. In our case, with a 663 * kernel uio, it still reads into the buffer cache, but it 664 * will just try to release the buffer from the cache later 665 * on in ffs_read(). 666 * 667 * ZFS does not pay attention to IO_DIRECT for reads. 668 * 669 * UFS does not pay attention to IO_SYNC for reads. 670 * 671 * ZFS pays attention to IO_SYNC (which translates into the 672 * Solaris define FRSYNC for zfs_read()) for reads. It 673 * attempts to sync the file before reading. 674 * 675 * So, to attempt to provide some barrier semantics in the 676 * BIO_ORDERED case, set both IO_DIRECT and IO_SYNC. 677 */ 678 error = VOP_READ(be_lun->vn, &xuio, flags, file_data->cred); 679 680 VOP_UNLOCK(be_lun->vn, 0); 681 SDT_PROBE(cbb, kernel, read, file_done, 0, 0, 0, 0, 0); 682 } else { 683 struct mount *mountpoint; 684 int lock_flags; 685 686 (void)vn_start_write(be_lun->vn, &mountpoint, V_WAIT); 687 688 if (MNT_SHARED_WRITES(mountpoint) 689 || ((mountpoint == NULL) 690 && MNT_SHARED_WRITES(be_lun->vn->v_mount))) 691 lock_flags = LK_SHARED; 692 else 693 lock_flags = LK_EXCLUSIVE; 694 695 vn_lock(be_lun->vn, lock_flags | LK_RETRY); 696 697 /* 698 * UFS pays attention to IO_DIRECT for writes. The write 699 * is done asynchronously. (Normally the write would just 700 * get put into cache. 701 * 702 * UFS pays attention to IO_SYNC for writes. It will 703 * attempt to write the buffer out synchronously if that 704 * flag is set. 705 * 706 * ZFS does not pay attention to IO_DIRECT for writes. 707 * 708 * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC) 709 * for writes. It will flush the transaction from the 710 * cache before returning. 711 * 712 * So if we've got the BIO_ORDERED flag set, we want 713 * IO_SYNC in either the UFS or ZFS case. 714 */ 715 error = VOP_WRITE(be_lun->vn, &xuio, flags, file_data->cred); 716 VOP_UNLOCK(be_lun->vn, 0); 717 718 vn_finished_write(mountpoint); 719 SDT_PROBE(cbb, kernel, write, file_done, 0, 0, 0, 0, 0); 720 } 721 722 mtx_lock(&be_lun->io_lock); 723 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 724 beio->ds_tag_type, beio->ds_trans_type, 725 /*now*/ NULL, /*then*/&beio->ds_t0); 726 mtx_unlock(&be_lun->io_lock); 727 728 /* 729 * If we got an error, set the sense data to "MEDIUM ERROR" and 730 * return the I/O to the user. 731 */ 732 if (error != 0) { 733 char path_str[32]; 734 735 ctl_scsi_path_string(io, path_str, sizeof(path_str)); 736 printf("%s%s command returned errno %d\n", path_str, 737 (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE", error); 738 if (error == ENOSPC) { 739 ctl_set_space_alloc_fail(&io->scsiio); 740 } else 741 ctl_set_medium_error(&io->scsiio); 742 ctl_complete_beio(beio); 743 return; 744 } 745 746 /* 747 * If this is a write or a verify, we're all done. 748 * If this is a read, we can now send the data to the user. 749 */ 750 if ((beio->bio_cmd == BIO_WRITE) || 751 (ARGS(io)->flags & CTL_LLF_VERIFY)) { 752 ctl_set_success(&io->scsiio); 753 ctl_complete_beio(beio); 754 } else { 755 if ((ARGS(io)->flags & CTL_LLF_READ) && 756 beio->beio_cont == NULL) 757 ctl_set_success(&io->scsiio); 758#ifdef CTL_TIME_IO 759 getbintime(&io->io_hdr.dma_start_bt); 760#endif 761 ctl_datamove(io); 762 } 763} 764 765static void 766ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun, 767 struct ctl_be_block_io *beio) 768{ 769 union ctl_io *io = beio->io; 770 struct ctl_lba_len_flags *lbalen = ARGS(io); 771 struct scsi_get_lba_status_data *data; 772 off_t roff, off; 773 int error, status; 774 775 DPRINTF("entered\n"); 776 777 off = roff = ((off_t)lbalen->lba) << be_lun->blocksize_shift; 778 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 779 error = VOP_IOCTL(be_lun->vn, FIOSEEKHOLE, &off, 780 0, curthread->td_ucred, curthread); 781 if (error == 0 && off > roff) 782 status = 0; /* mapped up to off */ 783 else { 784 error = VOP_IOCTL(be_lun->vn, FIOSEEKDATA, &off, 785 0, curthread->td_ucred, curthread); 786 if (error == 0 && off > roff) 787 status = 1; /* deallocated up to off */ 788 else { 789 status = 0; /* unknown up to the end */ 790 off = be_lun->size_bytes; 791 } 792 } 793 VOP_UNLOCK(be_lun->vn, 0); 794 795 off >>= be_lun->blocksize_shift; 796 data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr; 797 scsi_u64to8b(lbalen->lba, data->descr[0].addr); 798 scsi_ulto4b(MIN(UINT32_MAX, off - lbalen->lba), 799 data->descr[0].length); 800 data->descr[0].status = status; 801 802 ctl_complete_beio(beio); 803} 804 805static void 806ctl_be_block_dispatch_zvol(struct ctl_be_block_lun *be_lun, 807 struct ctl_be_block_io *beio) 808{ 809 struct ctl_be_block_devdata *dev_data; 810 union ctl_io *io; 811 struct uio xuio; 812 struct iovec *xiovec; 813 int flags; 814 int error, i; 815 816 DPRINTF("entered\n"); 817 818 dev_data = &be_lun->backend.dev; 819 io = beio->io; 820 flags = 0; 821 if (ARGS(io)->flags & CTL_LLF_DPO) 822 flags |= IO_DIRECT; 823 if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA) 824 flags |= IO_SYNC; 825 826 bzero(&xuio, sizeof(xuio)); 827 if (beio->bio_cmd == BIO_READ) { 828 SDT_PROBE(cbb, kernel, read, file_start, 0, 0, 0, 0, 0); 829 xuio.uio_rw = UIO_READ; 830 } else { 831 SDT_PROBE(cbb, kernel, write, file_start, 0, 0, 0, 0, 0); 832 xuio.uio_rw = UIO_WRITE; 833 } 834 xuio.uio_offset = beio->io_offset; 835 xuio.uio_resid = beio->io_len; 836 xuio.uio_segflg = UIO_SYSSPACE; 837 xuio.uio_iov = beio->xiovecs; 838 xuio.uio_iovcnt = beio->num_segs; 839 xuio.uio_td = curthread; 840 841 for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) { 842 xiovec->iov_base = beio->sg_segs[i].addr; 843 xiovec->iov_len = beio->sg_segs[i].len; 844 } 845 846 binuptime(&beio->ds_t0); 847 mtx_lock(&be_lun->io_lock); 848 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 849 mtx_unlock(&be_lun->io_lock); 850 851 if (beio->bio_cmd == BIO_READ) { 852 error = (*dev_data->csw->d_read)(dev_data->cdev, &xuio, flags); 853 SDT_PROBE(cbb, kernel, read, file_done, 0, 0, 0, 0, 0); 854 } else { 855 error = (*dev_data->csw->d_write)(dev_data->cdev, &xuio, flags); 856 SDT_PROBE(cbb, kernel, write, file_done, 0, 0, 0, 0, 0); 857 } 858 859 mtx_lock(&be_lun->io_lock); 860 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 861 beio->ds_tag_type, beio->ds_trans_type, 862 /*now*/ NULL, /*then*/&beio->ds_t0); 863 mtx_unlock(&be_lun->io_lock); 864 865 /* 866 * If we got an error, set the sense data to "MEDIUM ERROR" and 867 * return the I/O to the user. 868 */ 869 if (error != 0) { 870 if (error == ENOSPC) { 871 ctl_set_space_alloc_fail(&io->scsiio); 872 } else 873 ctl_set_medium_error(&io->scsiio); 874 ctl_complete_beio(beio); 875 return; 876 } 877 878 /* 879 * If this is a write or a verify, we're all done. 880 * If this is a read, we can now send the data to the user. 881 */ 882 if ((beio->bio_cmd == BIO_WRITE) || 883 (ARGS(io)->flags & CTL_LLF_VERIFY)) { 884 ctl_set_success(&io->scsiio); 885 ctl_complete_beio(beio); 886 } else { 887 if ((ARGS(io)->flags & CTL_LLF_READ) && 888 beio->beio_cont == NULL) 889 ctl_set_success(&io->scsiio); 890#ifdef CTL_TIME_IO 891 getbintime(&io->io_hdr.dma_start_bt); 892#endif 893 ctl_datamove(io); 894 } 895} 896 897static void 898ctl_be_block_gls_zvol(struct ctl_be_block_lun *be_lun, 899 struct ctl_be_block_io *beio) 900{ 901 struct ctl_be_block_devdata *dev_data = &be_lun->backend.dev; 902 union ctl_io *io = beio->io; 903 struct ctl_lba_len_flags *lbalen = ARGS(io); 904 struct scsi_get_lba_status_data *data; 905 off_t roff, off; 906 int error, status; 907 908 DPRINTF("entered\n"); 909 910 off = roff = ((off_t)lbalen->lba) << be_lun->blocksize_shift; 911 error = (*dev_data->csw->d_ioctl)(dev_data->cdev, FIOSEEKHOLE, 912 (caddr_t)&off, FREAD, curthread); 913 if (error == 0 && off > roff) 914 status = 0; /* mapped up to off */ 915 else { 916 error = (*dev_data->csw->d_ioctl)(dev_data->cdev, FIOSEEKDATA, 917 (caddr_t)&off, FREAD, curthread); 918 if (error == 0 && off > roff) 919 status = 1; /* deallocated up to off */ 920 else { 921 status = 0; /* unknown up to the end */ 922 off = be_lun->size_bytes; 923 } 924 } 925 926 off >>= be_lun->blocksize_shift; 927 data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr; 928 scsi_u64to8b(lbalen->lba, data->descr[0].addr); 929 scsi_ulto4b(MIN(UINT32_MAX, off - lbalen->lba), 930 data->descr[0].length); 931 data->descr[0].status = status; 932 933 ctl_complete_beio(beio); 934} 935 936static void 937ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun, 938 struct ctl_be_block_io *beio) 939{ 940 struct bio *bio; 941 union ctl_io *io; 942 struct ctl_be_block_devdata *dev_data; 943 944 dev_data = &be_lun->backend.dev; 945 io = beio->io; 946 947 DPRINTF("entered\n"); 948 949 /* This can't fail, it's a blocking allocation. */ 950 bio = g_alloc_bio(); 951 952 bio->bio_cmd = BIO_FLUSH; 953 bio->bio_flags |= BIO_ORDERED; 954 bio->bio_dev = dev_data->cdev; 955 bio->bio_offset = 0; 956 bio->bio_data = 0; 957 bio->bio_done = ctl_be_block_biodone; 958 bio->bio_caller1 = beio; 959 bio->bio_pblkno = 0; 960 961 /* 962 * We don't need to acquire the LUN lock here, because we are only 963 * sending one bio, and so there is no other context to synchronize 964 * with. 965 */ 966 beio->num_bios_sent = 1; 967 beio->send_complete = 1; 968 969 binuptime(&beio->ds_t0); 970 mtx_lock(&be_lun->io_lock); 971 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 972 mtx_unlock(&be_lun->io_lock); 973 974 (*dev_data->csw->d_strategy)(bio); 975} 976 977static void 978ctl_be_block_unmap_dev_range(struct ctl_be_block_lun *be_lun, 979 struct ctl_be_block_io *beio, 980 uint64_t off, uint64_t len, int last) 981{ 982 struct bio *bio; 983 struct ctl_be_block_devdata *dev_data; 984 uint64_t maxlen; 985 986 dev_data = &be_lun->backend.dev; 987 maxlen = LONG_MAX - (LONG_MAX % be_lun->blocksize); 988 while (len > 0) { 989 bio = g_alloc_bio(); 990 bio->bio_cmd = BIO_DELETE; 991 bio->bio_dev = dev_data->cdev; 992 bio->bio_offset = off; 993 bio->bio_length = MIN(len, maxlen); 994 bio->bio_data = 0; 995 bio->bio_done = ctl_be_block_biodone; 996 bio->bio_caller1 = beio; 997 bio->bio_pblkno = off / be_lun->blocksize; 998 999 off += bio->bio_length; 1000 len -= bio->bio_length; 1001 1002 mtx_lock(&be_lun->io_lock); 1003 beio->num_bios_sent++; 1004 if (last && len == 0) 1005 beio->send_complete = 1; 1006 mtx_unlock(&be_lun->io_lock); 1007 1008 (*dev_data->csw->d_strategy)(bio); 1009 } 1010} 1011 1012static void 1013ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun, 1014 struct ctl_be_block_io *beio) 1015{ 1016 union ctl_io *io; 1017 struct ctl_be_block_devdata *dev_data; 1018 struct ctl_ptr_len_flags *ptrlen; 1019 struct scsi_unmap_desc *buf, *end; 1020 uint64_t len; 1021 1022 dev_data = &be_lun->backend.dev; 1023 io = beio->io; 1024 1025 DPRINTF("entered\n"); 1026 1027 binuptime(&beio->ds_t0); 1028 mtx_lock(&be_lun->io_lock); 1029 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 1030 mtx_unlock(&be_lun->io_lock); 1031 1032 if (beio->io_offset == -1) { 1033 beio->io_len = 0; 1034 ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 1035 buf = (struct scsi_unmap_desc *)ptrlen->ptr; 1036 end = buf + ptrlen->len / sizeof(*buf); 1037 for (; buf < end; buf++) { 1038 len = (uint64_t)scsi_4btoul(buf->length) * 1039 be_lun->blocksize; 1040 beio->io_len += len; 1041 ctl_be_block_unmap_dev_range(be_lun, beio, 1042 scsi_8btou64(buf->lba) * be_lun->blocksize, len, 1043 (end - buf < 2) ? TRUE : FALSE); 1044 } 1045 } else 1046 ctl_be_block_unmap_dev_range(be_lun, beio, 1047 beio->io_offset, beio->io_len, TRUE); 1048} 1049 1050static void 1051ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun, 1052 struct ctl_be_block_io *beio) 1053{ 1054 TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue); 1055 int i; 1056 struct bio *bio; 1057 struct ctl_be_block_devdata *dev_data; 1058 off_t cur_offset; 1059 int max_iosize; 1060 1061 DPRINTF("entered\n"); 1062 1063 dev_data = &be_lun->backend.dev; 1064 1065 /* 1066 * We have to limit our I/O size to the maximum supported by the 1067 * backend device. Hopefully it is MAXPHYS. If the driver doesn't 1068 * set it properly, use DFLTPHYS. 1069 */ 1070 max_iosize = dev_data->cdev->si_iosize_max; 1071 if (max_iosize < PAGE_SIZE) 1072 max_iosize = DFLTPHYS; 1073 1074 cur_offset = beio->io_offset; 1075 for (i = 0; i < beio->num_segs; i++) { 1076 size_t cur_size; 1077 uint8_t *cur_ptr; 1078 1079 cur_size = beio->sg_segs[i].len; 1080 cur_ptr = beio->sg_segs[i].addr; 1081 1082 while (cur_size > 0) { 1083 /* This can't fail, it's a blocking allocation. */ 1084 bio = g_alloc_bio(); 1085 1086 KASSERT(bio != NULL, ("g_alloc_bio() failed!\n")); 1087 1088 bio->bio_cmd = beio->bio_cmd; 1089 bio->bio_dev = dev_data->cdev; 1090 bio->bio_caller1 = beio; 1091 bio->bio_length = min(cur_size, max_iosize); 1092 bio->bio_offset = cur_offset; 1093 bio->bio_data = cur_ptr; 1094 bio->bio_done = ctl_be_block_biodone; 1095 bio->bio_pblkno = cur_offset / be_lun->blocksize; 1096 1097 cur_offset += bio->bio_length; 1098 cur_ptr += bio->bio_length; 1099 cur_size -= bio->bio_length; 1100 1101 TAILQ_INSERT_TAIL(&queue, bio, bio_queue); 1102 beio->num_bios_sent++; 1103 } 1104 } 1105 binuptime(&beio->ds_t0); 1106 mtx_lock(&be_lun->io_lock); 1107 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 1108 beio->send_complete = 1; 1109 mtx_unlock(&be_lun->io_lock); 1110 1111 /* 1112 * Fire off all allocated requests! 1113 */ 1114 while ((bio = TAILQ_FIRST(&queue)) != NULL) { 1115 TAILQ_REMOVE(&queue, bio, bio_queue); 1116 (*dev_data->csw->d_strategy)(bio); 1117 } 1118} 1119 1120static uint64_t 1121ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, const char *attrname) 1122{ 1123 struct ctl_be_block_devdata *dev_data = &be_lun->backend.dev; 1124 struct diocgattr_arg arg; 1125 int error; 1126 1127 if (dev_data->csw == NULL || dev_data->csw->d_ioctl == NULL) 1128 return (UINT64_MAX); 1129 strlcpy(arg.name, attrname, sizeof(arg.name)); 1130 arg.len = sizeof(arg.value.off); 1131 error = dev_data->csw->d_ioctl(dev_data->cdev, 1132 DIOCGATTR, (caddr_t)&arg, FREAD, curthread); 1133 if (error != 0) 1134 return (UINT64_MAX); 1135 return (arg.value.off); 1136} 1137 1138static void 1139ctl_be_block_cw_done_ws(struct ctl_be_block_io *beio) 1140{ 1141 union ctl_io *io; 1142 1143 io = beio->io; 1144 ctl_free_beio(beio); 1145 if ((io->io_hdr.flags & CTL_FLAG_ABORT) || 1146 ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE && 1147 (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) { 1148 ctl_config_write_done(io); 1149 return; 1150 } 1151 1152 ctl_be_block_config_write(io); 1153} 1154 1155static void 1156ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun, 1157 union ctl_io *io) 1158{ 1159 struct ctl_be_block_io *beio; 1160 struct ctl_be_block_softc *softc; 1161 struct ctl_lba_len_flags *lbalen; 1162 uint64_t len_left, lba; 1163 int i, seglen; 1164 uint8_t *buf, *end; 1165 1166 DPRINTF("entered\n"); 1167 1168 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1169 softc = be_lun->softc; 1170 lbalen = ARGS(beio->io); 1171 1172 if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP | SWS_ANCHOR | SWS_NDOB) || 1173 (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR) && be_lun->unmap == NULL)) { 1174 ctl_free_beio(beio); 1175 ctl_set_invalid_field(&io->scsiio, 1176 /*sks_valid*/ 1, 1177 /*command*/ 1, 1178 /*field*/ 1, 1179 /*bit_valid*/ 0, 1180 /*bit*/ 0); 1181 ctl_config_write_done(io); 1182 return; 1183 } 1184 1185 switch (io->scsiio.tag_type) { 1186 case CTL_TAG_ORDERED: 1187 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1188 break; 1189 case CTL_TAG_HEAD_OF_QUEUE: 1190 beio->ds_tag_type = DEVSTAT_TAG_HEAD; 1191 break; 1192 case CTL_TAG_UNTAGGED: 1193 case CTL_TAG_SIMPLE: 1194 case CTL_TAG_ACA: 1195 default: 1196 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; 1197 break; 1198 } 1199 1200 if (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR)) { 1201 beio->io_offset = lbalen->lba * be_lun->blocksize; 1202 beio->io_len = (uint64_t)lbalen->len * be_lun->blocksize; 1203 beio->bio_cmd = BIO_DELETE; 1204 beio->ds_trans_type = DEVSTAT_FREE; 1205 1206 be_lun->unmap(be_lun, beio); 1207 return; 1208 } 1209 1210 beio->bio_cmd = BIO_WRITE; 1211 beio->ds_trans_type = DEVSTAT_WRITE; 1212 1213 DPRINTF("WRITE SAME at LBA %jx len %u\n", 1214 (uintmax_t)lbalen->lba, lbalen->len); 1215 1216 len_left = (uint64_t)lbalen->len * be_lun->blocksize; 1217 for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) { 1218 1219 /* 1220 * Setup the S/G entry for this chunk. 1221 */ 1222 seglen = MIN(CTLBLK_MAX_SEG, len_left); 1223 seglen -= seglen % be_lun->blocksize; 1224 beio->sg_segs[i].len = seglen; 1225 beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK); 1226 1227 DPRINTF("segment %d addr %p len %zd\n", i, 1228 beio->sg_segs[i].addr, beio->sg_segs[i].len); 1229 1230 beio->num_segs++; 1231 len_left -= seglen; 1232 1233 buf = beio->sg_segs[i].addr; 1234 end = buf + seglen; 1235 for (; buf < end; buf += be_lun->blocksize) { 1236 memcpy(buf, io->scsiio.kern_data_ptr, be_lun->blocksize); 1237 if (lbalen->flags & SWS_LBDATA) 1238 scsi_ulto4b(lbalen->lba + lba, buf); 1239 lba++; 1240 } 1241 } 1242 1243 beio->io_offset = lbalen->lba * be_lun->blocksize; 1244 beio->io_len = lba * be_lun->blocksize; 1245 1246 /* We can not do all in one run. Correct and schedule rerun. */ 1247 if (len_left > 0) { 1248 lbalen->lba += lba; 1249 lbalen->len -= lba; 1250 beio->beio_cont = ctl_be_block_cw_done_ws; 1251 } 1252 1253 be_lun->dispatch(be_lun, beio); 1254} 1255 1256static void 1257ctl_be_block_cw_dispatch_unmap(struct ctl_be_block_lun *be_lun, 1258 union ctl_io *io) 1259{ 1260 struct ctl_be_block_io *beio; 1261 struct ctl_be_block_softc *softc; 1262 struct ctl_ptr_len_flags *ptrlen; 1263 1264 DPRINTF("entered\n"); 1265 1266 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1267 softc = be_lun->softc; 1268 ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 1269 1270 if ((ptrlen->flags & ~SU_ANCHOR) != 0 || be_lun->unmap == NULL) { 1271 ctl_free_beio(beio); 1272 ctl_set_invalid_field(&io->scsiio, 1273 /*sks_valid*/ 0, 1274 /*command*/ 1, 1275 /*field*/ 0, 1276 /*bit_valid*/ 0, 1277 /*bit*/ 0); 1278 ctl_config_write_done(io); 1279 return; 1280 } 1281 1282 switch (io->scsiio.tag_type) { 1283 case CTL_TAG_ORDERED: 1284 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1285 break; 1286 case CTL_TAG_HEAD_OF_QUEUE: 1287 beio->ds_tag_type = DEVSTAT_TAG_HEAD; 1288 break; 1289 case CTL_TAG_UNTAGGED: 1290 case CTL_TAG_SIMPLE: 1291 case CTL_TAG_ACA: 1292 default: 1293 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; 1294 break; 1295 } 1296 1297 beio->io_len = 0; 1298 beio->io_offset = -1; 1299 1300 beio->bio_cmd = BIO_DELETE; 1301 beio->ds_trans_type = DEVSTAT_FREE; 1302 1303 DPRINTF("UNMAP\n"); 1304 1305 be_lun->unmap(be_lun, beio); 1306} 1307 1308static void 1309ctl_be_block_cr_done(struct ctl_be_block_io *beio) 1310{ 1311 union ctl_io *io; 1312 1313 io = beio->io; 1314 ctl_free_beio(beio); 1315 ctl_config_read_done(io); 1316} 1317 1318static void 1319ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun, 1320 union ctl_io *io) 1321{ 1322 struct ctl_be_block_io *beio; 1323 struct ctl_be_block_softc *softc; 1324 1325 DPRINTF("entered\n"); 1326 1327 softc = be_lun->softc; 1328 beio = ctl_alloc_beio(softc); 1329 beio->io = io; 1330 beio->lun = be_lun; 1331 beio->beio_cont = ctl_be_block_cr_done; 1332 PRIV(io)->ptr = (void *)beio; 1333 1334 switch (io->scsiio.cdb[0]) { 1335 case SERVICE_ACTION_IN: /* GET LBA STATUS */ 1336 beio->bio_cmd = -1; 1337 beio->ds_trans_type = DEVSTAT_NO_DATA; 1338 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1339 beio->io_len = 0; 1340 if (be_lun->get_lba_status) 1341 be_lun->get_lba_status(be_lun, beio); 1342 else 1343 ctl_be_block_cr_done(beio); 1344 break; 1345 default: 1346 panic("Unhandled CDB type %#x", io->scsiio.cdb[0]); 1347 break; 1348 } 1349} 1350 1351static void 1352ctl_be_block_cw_done(struct ctl_be_block_io *beio) 1353{ 1354 union ctl_io *io; 1355 1356 io = beio->io; 1357 ctl_free_beio(beio); 1358 ctl_config_write_done(io); 1359} 1360 1361static void 1362ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun, 1363 union ctl_io *io) 1364{ 1365 struct ctl_be_block_io *beio; 1366 struct ctl_be_block_softc *softc; 1367 1368 DPRINTF("entered\n"); 1369 1370 softc = be_lun->softc; 1371 beio = ctl_alloc_beio(softc); 1372 beio->io = io; 1373 beio->lun = be_lun; 1374 beio->beio_cont = ctl_be_block_cw_done; 1375 PRIV(io)->ptr = (void *)beio; 1376 1377 switch (io->scsiio.cdb[0]) { 1378 case SYNCHRONIZE_CACHE: 1379 case SYNCHRONIZE_CACHE_16: 1380 beio->bio_cmd = BIO_FLUSH; 1381 beio->ds_trans_type = DEVSTAT_NO_DATA; 1382 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1383 beio->io_len = 0; 1384 be_lun->lun_flush(be_lun, beio); 1385 break; 1386 case WRITE_SAME_10: 1387 case WRITE_SAME_16: 1388 ctl_be_block_cw_dispatch_ws(be_lun, io); 1389 break; 1390 case UNMAP: 1391 ctl_be_block_cw_dispatch_unmap(be_lun, io); 1392 break; 1393 default: 1394 panic("Unhandled CDB type %#x", io->scsiio.cdb[0]); 1395 break; 1396 } 1397} 1398 1399SDT_PROBE_DEFINE1(cbb, kernel, read, start, "uint64_t"); 1400SDT_PROBE_DEFINE1(cbb, kernel, write, start, "uint64_t"); 1401SDT_PROBE_DEFINE1(cbb, kernel, read, alloc_done, "uint64_t"); 1402SDT_PROBE_DEFINE1(cbb, kernel, write, alloc_done, "uint64_t"); 1403 1404static void 1405ctl_be_block_next(struct ctl_be_block_io *beio) 1406{ 1407 struct ctl_be_block_lun *be_lun; 1408 union ctl_io *io; 1409 1410 io = beio->io; 1411 be_lun = beio->lun; 1412 ctl_free_beio(beio); 1413 if ((io->io_hdr.flags & CTL_FLAG_ABORT) || 1414 ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE && 1415 (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) { 1416 ctl_data_submit_done(io); 1417 return; 1418 } 1419 1420 io->io_hdr.status &= ~CTL_STATUS_MASK; 1421 io->io_hdr.status |= CTL_STATUS_NONE; 1422 1423 mtx_lock(&be_lun->queue_lock); 1424 /* 1425 * XXX KDM make sure that links is okay to use at this point. 1426 * Otherwise, we either need to add another field to ctl_io_hdr, 1427 * or deal with resource allocation here. 1428 */ 1429 STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links); 1430 mtx_unlock(&be_lun->queue_lock); 1431 1432 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 1433} 1434 1435static void 1436ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun, 1437 union ctl_io *io) 1438{ 1439 struct ctl_be_block_io *beio; 1440 struct ctl_be_block_softc *softc; 1441 struct ctl_lba_len_flags *lbalen; 1442 struct ctl_ptr_len_flags *bptrlen; 1443 uint64_t len_left, lbas; 1444 int i; 1445 1446 softc = be_lun->softc; 1447 1448 DPRINTF("entered\n"); 1449 1450 lbalen = ARGS(io); 1451 if (lbalen->flags & CTL_LLF_WRITE) { 1452 SDT_PROBE(cbb, kernel, write, start, 0, 0, 0, 0, 0); 1453 } else { 1454 SDT_PROBE(cbb, kernel, read, start, 0, 0, 0, 0, 0); 1455 } 1456 1457 beio = ctl_alloc_beio(softc); 1458 beio->io = io; 1459 beio->lun = be_lun; 1460 bptrlen = PRIV(io); 1461 bptrlen->ptr = (void *)beio; 1462 1463 switch (io->scsiio.tag_type) { 1464 case CTL_TAG_ORDERED: 1465 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1466 break; 1467 case CTL_TAG_HEAD_OF_QUEUE: 1468 beio->ds_tag_type = DEVSTAT_TAG_HEAD; 1469 break; 1470 case CTL_TAG_UNTAGGED: 1471 case CTL_TAG_SIMPLE: 1472 case CTL_TAG_ACA: 1473 default: 1474 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; 1475 break; 1476 } 1477 1478 if (lbalen->flags & CTL_LLF_WRITE) { 1479 beio->bio_cmd = BIO_WRITE; 1480 beio->ds_trans_type = DEVSTAT_WRITE; 1481 } else { 1482 beio->bio_cmd = BIO_READ; 1483 beio->ds_trans_type = DEVSTAT_READ; 1484 } 1485 1486 DPRINTF("%s at LBA %jx len %u @%ju\n", 1487 (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE", 1488 (uintmax_t)lbalen->lba, lbalen->len, bptrlen->len); 1489 if (lbalen->flags & CTL_LLF_COMPARE) 1490 lbas = CTLBLK_HALF_IO_SIZE; 1491 else 1492 lbas = CTLBLK_MAX_IO_SIZE; 1493 lbas = MIN(lbalen->len - bptrlen->len, lbas / be_lun->blocksize); 1494 beio->io_offset = (lbalen->lba + bptrlen->len) * be_lun->blocksize; 1495 beio->io_len = lbas * be_lun->blocksize; 1496 bptrlen->len += lbas; 1497 1498 for (i = 0, len_left = beio->io_len; len_left > 0; i++) { 1499 KASSERT(i < CTLBLK_MAX_SEGS, ("Too many segs (%d >= %d)", 1500 i, CTLBLK_MAX_SEGS)); 1501 1502 /* 1503 * Setup the S/G entry for this chunk. 1504 */ 1505 beio->sg_segs[i].len = min(CTLBLK_MAX_SEG, len_left); 1506 beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK); 1507 1508 DPRINTF("segment %d addr %p len %zd\n", i, 1509 beio->sg_segs[i].addr, beio->sg_segs[i].len); 1510 1511 /* Set up second segment for compare operation. */ 1512 if (lbalen->flags & CTL_LLF_COMPARE) { 1513 beio->sg_segs[i + CTLBLK_HALF_SEGS].len = 1514 beio->sg_segs[i].len; 1515 beio->sg_segs[i + CTLBLK_HALF_SEGS].addr = 1516 uma_zalloc(be_lun->lun_zone, M_WAITOK); 1517 } 1518 1519 beio->num_segs++; 1520 len_left -= beio->sg_segs[i].len; 1521 } 1522 if (bptrlen->len < lbalen->len) 1523 beio->beio_cont = ctl_be_block_next; 1524 io->scsiio.be_move_done = ctl_be_block_move_done; 1525 /* For compare we have separate S/G lists for read and datamove. */ 1526 if (lbalen->flags & CTL_LLF_COMPARE) 1527 io->scsiio.kern_data_ptr = (uint8_t *)&beio->sg_segs[CTLBLK_HALF_SEGS]; 1528 else 1529 io->scsiio.kern_data_ptr = (uint8_t *)beio->sg_segs; 1530 io->scsiio.kern_data_len = beio->io_len; 1531 io->scsiio.kern_data_resid = 0; 1532 io->scsiio.kern_sg_entries = beio->num_segs; 1533 io->io_hdr.flags |= CTL_FLAG_ALLOCATED | CTL_FLAG_KDPTR_SGLIST; 1534 1535 /* 1536 * For the read case, we need to read the data into our buffers and 1537 * then we can send it back to the user. For the write case, we 1538 * need to get the data from the user first. 1539 */ 1540 if (beio->bio_cmd == BIO_READ) { 1541 SDT_PROBE(cbb, kernel, read, alloc_done, 0, 0, 0, 0, 0); 1542 be_lun->dispatch(be_lun, beio); 1543 } else { 1544 SDT_PROBE(cbb, kernel, write, alloc_done, 0, 0, 0, 0, 0); 1545#ifdef CTL_TIME_IO 1546 getbintime(&io->io_hdr.dma_start_bt); 1547#endif 1548 ctl_datamove(io); 1549 } 1550} 1551 1552static void 1553ctl_be_block_worker(void *context, int pending) 1554{ 1555 struct ctl_be_block_lun *be_lun; 1556 struct ctl_be_block_softc *softc; 1557 union ctl_io *io; 1558 1559 be_lun = (struct ctl_be_block_lun *)context; 1560 softc = be_lun->softc; 1561 1562 DPRINTF("entered\n"); 1563 1564 mtx_lock(&be_lun->queue_lock); 1565 for (;;) { 1566 io = (union ctl_io *)STAILQ_FIRST(&be_lun->datamove_queue); 1567 if (io != NULL) { 1568 struct ctl_be_block_io *beio; 1569 1570 DPRINTF("datamove queue\n"); 1571 1572 STAILQ_REMOVE(&be_lun->datamove_queue, &io->io_hdr, 1573 ctl_io_hdr, links); 1574 1575 mtx_unlock(&be_lun->queue_lock); 1576 1577 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1578 1579 be_lun->dispatch(be_lun, beio); 1580 1581 mtx_lock(&be_lun->queue_lock); 1582 continue; 1583 } 1584 io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_write_queue); 1585 if (io != NULL) { 1586 DPRINTF("config write queue\n"); 1587 STAILQ_REMOVE(&be_lun->config_write_queue, &io->io_hdr, 1588 ctl_io_hdr, links); 1589 mtx_unlock(&be_lun->queue_lock); 1590 ctl_be_block_cw_dispatch(be_lun, io); 1591 mtx_lock(&be_lun->queue_lock); 1592 continue; 1593 } 1594 io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_read_queue); 1595 if (io != NULL) { 1596 DPRINTF("config read queue\n"); 1597 STAILQ_REMOVE(&be_lun->config_read_queue, &io->io_hdr, 1598 ctl_io_hdr, links); 1599 mtx_unlock(&be_lun->queue_lock); 1600 ctl_be_block_cr_dispatch(be_lun, io); 1601 mtx_lock(&be_lun->queue_lock); 1602 continue; 1603 } 1604 io = (union ctl_io *)STAILQ_FIRST(&be_lun->input_queue); 1605 if (io != NULL) { 1606 DPRINTF("input queue\n"); 1607 1608 STAILQ_REMOVE(&be_lun->input_queue, &io->io_hdr, 1609 ctl_io_hdr, links); 1610 mtx_unlock(&be_lun->queue_lock); 1611 1612 /* 1613 * We must drop the lock, since this routine and 1614 * its children may sleep. 1615 */ 1616 ctl_be_block_dispatch(be_lun, io); 1617 1618 mtx_lock(&be_lun->queue_lock); 1619 continue; 1620 } 1621 1622 /* 1623 * If we get here, there is no work left in the queues, so 1624 * just break out and let the task queue go to sleep. 1625 */ 1626 break; 1627 } 1628 mtx_unlock(&be_lun->queue_lock); 1629} 1630 1631/* 1632 * Entry point from CTL to the backend for I/O. We queue everything to a 1633 * work thread, so this just puts the I/O on a queue and wakes up the 1634 * thread. 1635 */ 1636static int 1637ctl_be_block_submit(union ctl_io *io) 1638{ 1639 struct ctl_be_block_lun *be_lun; 1640 struct ctl_be_lun *ctl_be_lun; 1641 1642 DPRINTF("entered\n"); 1643 1644 ctl_be_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[ 1645 CTL_PRIV_BACKEND_LUN].ptr; 1646 be_lun = (struct ctl_be_block_lun *)ctl_be_lun->be_lun; 1647 1648 /* 1649 * Make sure we only get SCSI I/O. 1650 */ 1651 KASSERT(io->io_hdr.io_type == CTL_IO_SCSI, ("Non-SCSI I/O (type " 1652 "%#x) encountered", io->io_hdr.io_type)); 1653 1654 PRIV(io)->len = 0; 1655 1656 mtx_lock(&be_lun->queue_lock); 1657 /* 1658 * XXX KDM make sure that links is okay to use at this point. 1659 * Otherwise, we either need to add another field to ctl_io_hdr, 1660 * or deal with resource allocation here. 1661 */ 1662 STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links); 1663 mtx_unlock(&be_lun->queue_lock); 1664 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 1665 1666 return (CTL_RETVAL_COMPLETE); 1667} 1668 1669static int 1670ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, 1671 int flag, struct thread *td) 1672{ 1673 struct ctl_be_block_softc *softc; 1674 int error; 1675 1676 softc = &backend_block_softc; 1677 1678 error = 0; 1679 1680 switch (cmd) { 1681 case CTL_LUN_REQ: { 1682 struct ctl_lun_req *lun_req; 1683 1684 lun_req = (struct ctl_lun_req *)addr; 1685 1686 switch (lun_req->reqtype) { 1687 case CTL_LUNREQ_CREATE: 1688 error = ctl_be_block_create(softc, lun_req); 1689 break; 1690 case CTL_LUNREQ_RM: 1691 error = ctl_be_block_rm(softc, lun_req); 1692 break; 1693 case CTL_LUNREQ_MODIFY: 1694 error = ctl_be_block_modify(softc, lun_req); 1695 break; 1696 default: 1697 lun_req->status = CTL_LUN_ERROR; 1698 snprintf(lun_req->error_str, sizeof(lun_req->error_str), 1699 "invalid LUN request type %d", 1700 lun_req->reqtype); 1701 break; 1702 } 1703 break; 1704 } 1705 default: 1706 error = ENOTTY; 1707 break; 1708 } 1709 1710 return (error); 1711} 1712 1713static int 1714ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 1715{ 1716 struct ctl_be_block_filedata *file_data; 1717 struct ctl_lun_create_params *params; 1718 struct vattr vattr; 1719 off_t pss; 1720 int error; 1721 1722 error = 0; 1723 file_data = &be_lun->backend.file; 1724 params = &be_lun->params; 1725 1726 be_lun->dev_type = CTL_BE_BLOCK_FILE; 1727 be_lun->dispatch = ctl_be_block_dispatch_file; 1728 be_lun->lun_flush = ctl_be_block_flush_file; 1729 be_lun->get_lba_status = ctl_be_block_gls_file; 1730 1731 error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred); 1732 if (error != 0) { 1733 snprintf(req->error_str, sizeof(req->error_str), 1734 "error calling VOP_GETATTR() for file %s", 1735 be_lun->dev_path); 1736 return (error); 1737 } 1738 1739 /* 1740 * Verify that we have the ability to upgrade to exclusive 1741 * access on this file so we can trap errors at open instead 1742 * of reporting them during first access. 1743 */ 1744 if (VOP_ISLOCKED(be_lun->vn) != LK_EXCLUSIVE) { 1745 vn_lock(be_lun->vn, LK_UPGRADE | LK_RETRY); 1746 if (be_lun->vn->v_iflag & VI_DOOMED) { 1747 error = EBADF; 1748 snprintf(req->error_str, sizeof(req->error_str), 1749 "error locking file %s", be_lun->dev_path); 1750 return (error); 1751 } 1752 } 1753 1754 1755 file_data->cred = crhold(curthread->td_ucred); 1756 if (params->lun_size_bytes != 0) 1757 be_lun->size_bytes = params->lun_size_bytes; 1758 else 1759 be_lun->size_bytes = vattr.va_size; 1760 /* 1761 * We set the multi thread flag for file operations because all 1762 * filesystems (in theory) are capable of allowing multiple readers 1763 * of a file at once. So we want to get the maximum possible 1764 * concurrency. 1765 */ 1766 be_lun->flags |= CTL_BE_BLOCK_LUN_MULTI_THREAD; 1767 1768 /* 1769 * For files we can use any logical block size. Prefer 512 bytes 1770 * for compatibility reasons. If file's vattr.va_blocksize 1771 * (preferred I/O block size) is bigger and multiple to chosen 1772 * logical block size -- report it as physical block size. 1773 */ 1774 if (params->blocksize_bytes != 0) 1775 be_lun->blocksize = params->blocksize_bytes; 1776 else 1777 be_lun->blocksize = 512; 1778 pss = vattr.va_blocksize / be_lun->blocksize; 1779 if ((pss > 0) && (pss * be_lun->blocksize == vattr.va_blocksize) && 1780 ((pss & (pss - 1)) == 0)) { 1781 be_lun->pblockexp = fls(pss) - 1; 1782 be_lun->pblockoff = 0; 1783 } 1784 1785 /* 1786 * Sanity check. The media size has to be at least one 1787 * sector long. 1788 */ 1789 if (be_lun->size_bytes < be_lun->blocksize) { 1790 error = EINVAL; 1791 snprintf(req->error_str, sizeof(req->error_str), 1792 "file %s size %ju < block size %u", be_lun->dev_path, 1793 (uintmax_t)be_lun->size_bytes, be_lun->blocksize); 1794 } 1795 return (error); 1796} 1797 1798static int 1799ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 1800{ 1801 struct ctl_lun_create_params *params; 1802 struct vattr vattr; 1803 struct cdev *dev; 1804 struct cdevsw *devsw; 1805 int error; 1806 off_t ps, pss, po, pos; 1807 1808 params = &be_lun->params; 1809 1810 be_lun->dev_type = CTL_BE_BLOCK_DEV; 1811 be_lun->backend.dev.cdev = be_lun->vn->v_rdev; 1812 be_lun->backend.dev.csw = dev_refthread(be_lun->backend.dev.cdev, 1813 &be_lun->backend.dev.dev_ref); 1814 if (be_lun->backend.dev.csw == NULL) 1815 panic("Unable to retrieve device switch"); 1816 if (strcmp(be_lun->backend.dev.csw->d_name, "zvol") == 0) { 1817 be_lun->dispatch = ctl_be_block_dispatch_zvol; 1818 be_lun->get_lba_status = ctl_be_block_gls_zvol; 1819 } else 1820 be_lun->dispatch = ctl_be_block_dispatch_dev; 1821 be_lun->lun_flush = ctl_be_block_flush_dev; 1822 be_lun->unmap = ctl_be_block_unmap_dev; 1823 be_lun->getattr = ctl_be_block_getattr_dev; 1824 1825 error = VOP_GETATTR(be_lun->vn, &vattr, NOCRED); 1826 if (error) { 1827 snprintf(req->error_str, sizeof(req->error_str), 1828 "error getting vnode attributes for device %s", 1829 be_lun->dev_path); 1830 return (error); 1831 } 1832 1833 dev = be_lun->vn->v_rdev; 1834 devsw = dev->si_devsw; 1835 if (!devsw->d_ioctl) { 1836 snprintf(req->error_str, sizeof(req->error_str), 1837 "no d_ioctl for device %s!", 1838 be_lun->dev_path); 1839 return (ENODEV); 1840 } 1841 1842 error = devsw->d_ioctl(dev, DIOCGSECTORSIZE, 1843 (caddr_t)&be_lun->blocksize, FREAD, 1844 curthread); 1845 if (error) { 1846 snprintf(req->error_str, sizeof(req->error_str), 1847 "error %d returned for DIOCGSECTORSIZE ioctl " 1848 "on %s!", error, be_lun->dev_path); 1849 return (error); 1850 } 1851 1852 /* 1853 * If the user has asked for a blocksize that is greater than the 1854 * backing device's blocksize, we can do it only if the blocksize 1855 * the user is asking for is an even multiple of the underlying 1856 * device's blocksize. 1857 */ 1858 if ((params->blocksize_bytes != 0) 1859 && (params->blocksize_bytes > be_lun->blocksize)) { 1860 uint32_t bs_multiple, tmp_blocksize; 1861 1862 bs_multiple = params->blocksize_bytes / be_lun->blocksize; 1863 1864 tmp_blocksize = bs_multiple * be_lun->blocksize; 1865 1866 if (tmp_blocksize == params->blocksize_bytes) { 1867 be_lun->blocksize = params->blocksize_bytes; 1868 } else { 1869 snprintf(req->error_str, sizeof(req->error_str), 1870 "requested blocksize %u is not an even " 1871 "multiple of backing device blocksize %u", 1872 params->blocksize_bytes, 1873 be_lun->blocksize); 1874 return (EINVAL); 1875 1876 } 1877 } else if ((params->blocksize_bytes != 0) 1878 && (params->blocksize_bytes != be_lun->blocksize)) { 1879 snprintf(req->error_str, sizeof(req->error_str), 1880 "requested blocksize %u < backing device " 1881 "blocksize %u", params->blocksize_bytes, 1882 be_lun->blocksize); 1883 return (EINVAL); 1884 } 1885 1886 error = devsw->d_ioctl(dev, DIOCGMEDIASIZE, 1887 (caddr_t)&be_lun->size_bytes, FREAD, 1888 curthread); 1889 if (error) { 1890 snprintf(req->error_str, sizeof(req->error_str), 1891 "error %d returned for DIOCGMEDIASIZE " 1892 " ioctl on %s!", error, 1893 be_lun->dev_path); 1894 return (error); 1895 } 1896 1897 if (params->lun_size_bytes != 0) { 1898 if (params->lun_size_bytes > be_lun->size_bytes) { 1899 snprintf(req->error_str, sizeof(req->error_str), 1900 "requested LUN size %ju > backing device " 1901 "size %ju", 1902 (uintmax_t)params->lun_size_bytes, 1903 (uintmax_t)be_lun->size_bytes); 1904 return (EINVAL); 1905 } 1906 1907 be_lun->size_bytes = params->lun_size_bytes; 1908 } 1909 1910 error = devsw->d_ioctl(dev, DIOCGSTRIPESIZE, 1911 (caddr_t)&ps, FREAD, curthread); 1912 if (error) 1913 ps = po = 0; 1914 else { 1915 error = devsw->d_ioctl(dev, DIOCGSTRIPEOFFSET, 1916 (caddr_t)&po, FREAD, curthread); 1917 if (error) 1918 po = 0; 1919 } 1920 pss = ps / be_lun->blocksize; 1921 pos = po / be_lun->blocksize; 1922 if ((pss > 0) && (pss * be_lun->blocksize == ps) && (pss >= pos) && 1923 ((pss & (pss - 1)) == 0) && (pos * be_lun->blocksize == po)) { 1924 be_lun->pblockexp = fls(pss) - 1; 1925 be_lun->pblockoff = (pss - pos) % pss; 1926 } 1927 1928 return (0); 1929} 1930 1931static int 1932ctl_be_block_close(struct ctl_be_block_lun *be_lun) 1933{ 1934 DROP_GIANT(); 1935 if (be_lun->vn) { 1936 int flags = FREAD | FWRITE; 1937 1938 switch (be_lun->dev_type) { 1939 case CTL_BE_BLOCK_DEV: 1940 if (be_lun->backend.dev.csw) { 1941 dev_relthread(be_lun->backend.dev.cdev, 1942 be_lun->backend.dev.dev_ref); 1943 be_lun->backend.dev.csw = NULL; 1944 be_lun->backend.dev.cdev = NULL; 1945 } 1946 break; 1947 case CTL_BE_BLOCK_FILE: 1948 break; 1949 case CTL_BE_BLOCK_NONE: 1950 break; 1951 default: 1952 panic("Unexpected backend type."); 1953 break; 1954 } 1955 1956 (void)vn_close(be_lun->vn, flags, NOCRED, curthread); 1957 be_lun->vn = NULL; 1958 1959 switch (be_lun->dev_type) { 1960 case CTL_BE_BLOCK_DEV: 1961 break; 1962 case CTL_BE_BLOCK_FILE: 1963 if (be_lun->backend.file.cred != NULL) { 1964 crfree(be_lun->backend.file.cred); 1965 be_lun->backend.file.cred = NULL; 1966 } 1967 break; 1968 case CTL_BE_BLOCK_NONE: 1969 break; 1970 default: 1971 panic("Unexpected backend type."); 1972 break; 1973 } 1974 be_lun->dev_type = CTL_BE_BLOCK_NONE; 1975 } 1976 PICKUP_GIANT(); 1977 1978 return (0); 1979} 1980 1981static int 1982ctl_be_block_open(struct ctl_be_block_softc *softc, 1983 struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 1984{ 1985 struct nameidata nd; 1986 int flags; 1987 int error; 1988 1989 /* 1990 * XXX KDM allow a read-only option? 1991 */ 1992 flags = FREAD | FWRITE; 1993 error = 0; 1994 1995 if (rootvnode == NULL) { 1996 snprintf(req->error_str, sizeof(req->error_str), 1997 "Root filesystem is not mounted"); 1998 return (1); 1999 } 2000 2001 if (!curthread->td_proc->p_fd->fd_cdir) { 2002 curthread->td_proc->p_fd->fd_cdir = rootvnode; 2003 VREF(rootvnode); 2004 } 2005 if (!curthread->td_proc->p_fd->fd_rdir) { 2006 curthread->td_proc->p_fd->fd_rdir = rootvnode; 2007 VREF(rootvnode); 2008 } 2009 if (!curthread->td_proc->p_fd->fd_jdir) { 2010 curthread->td_proc->p_fd->fd_jdir = rootvnode; 2011 VREF(rootvnode); 2012 } 2013 2014 again: 2015 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, be_lun->dev_path, curthread); 2016 error = vn_open(&nd, &flags, 0, NULL); 2017 if (error) { 2018 /* 2019 * This is the only reasonable guess we can make as far as 2020 * path if the user doesn't give us a fully qualified path. 2021 * If they want to specify a file, they need to specify the 2022 * full path. 2023 */ 2024 if (be_lun->dev_path[0] != '/') { 2025 char *dev_path = "/dev/"; 2026 char *dev_name; 2027 2028 /* Try adding device path at beginning of name */ 2029 dev_name = malloc(strlen(be_lun->dev_path) 2030 + strlen(dev_path) + 1, 2031 M_CTLBLK, M_WAITOK); 2032 if (dev_name) { 2033 sprintf(dev_name, "%s%s", dev_path, 2034 be_lun->dev_path); 2035 free(be_lun->dev_path, M_CTLBLK); 2036 be_lun->dev_path = dev_name; 2037 goto again; 2038 } 2039 } 2040 snprintf(req->error_str, sizeof(req->error_str), 2041 "error opening %s: %d", be_lun->dev_path, error); 2042 return (error); 2043 } 2044 2045 NDFREE(&nd, NDF_ONLY_PNBUF); 2046 2047 be_lun->vn = nd.ni_vp; 2048 2049 /* We only support disks and files. */ 2050 if (vn_isdisk(be_lun->vn, &error)) { 2051 error = ctl_be_block_open_dev(be_lun, req); 2052 } else if (be_lun->vn->v_type == VREG) { 2053 error = ctl_be_block_open_file(be_lun, req); 2054 } else { 2055 error = EINVAL; 2056 snprintf(req->error_str, sizeof(req->error_str), 2057 "%s is not a disk or plain file", be_lun->dev_path); 2058 } 2059 VOP_UNLOCK(be_lun->vn, 0); 2060 2061 if (error != 0) { 2062 ctl_be_block_close(be_lun); 2063 return (error); 2064 } 2065 2066 be_lun->blocksize_shift = fls(be_lun->blocksize) - 1; 2067 be_lun->size_blocks = be_lun->size_bytes >> be_lun->blocksize_shift; 2068 2069 return (0); 2070} 2071 2072static int 2073ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 2074{ 2075 struct ctl_be_block_lun *be_lun; 2076 struct ctl_lun_create_params *params; 2077 char num_thread_str[16]; 2078 char tmpstr[32]; 2079 char *value; 2080 int retval, num_threads, unmap; 2081 int tmp_num_threads; 2082 2083 params = &req->reqdata.create; 2084 retval = 0; 2085 req->status = CTL_LUN_OK; 2086 2087 num_threads = cbb_num_threads; 2088 2089 be_lun = malloc(sizeof(*be_lun), M_CTLBLK, M_ZERO | M_WAITOK); 2090 2091 be_lun->params = req->reqdata.create; 2092 be_lun->softc = softc; 2093 STAILQ_INIT(&be_lun->input_queue); 2094 STAILQ_INIT(&be_lun->config_read_queue); 2095 STAILQ_INIT(&be_lun->config_write_queue); 2096 STAILQ_INIT(&be_lun->datamove_queue); 2097 sprintf(be_lun->lunname, "cblk%d", softc->num_luns); 2098 mtx_init(&be_lun->io_lock, "cblk io lock", NULL, MTX_DEF); 2099 mtx_init(&be_lun->queue_lock, "cblk queue lock", NULL, MTX_DEF); 2100 ctl_init_opts(&be_lun->ctl_be_lun.options, 2101 req->num_be_args, req->kern_be_args); 2102 2103 be_lun->lun_zone = uma_zcreate(be_lun->lunname, CTLBLK_MAX_SEG, 2104 NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0); 2105 2106 if (be_lun->lun_zone == NULL) { 2107 snprintf(req->error_str, sizeof(req->error_str), 2108 "error allocating UMA zone"); 2109 goto bailout_error; 2110 } 2111 2112 if (params->flags & CTL_LUN_FLAG_DEV_TYPE) 2113 be_lun->ctl_be_lun.lun_type = params->device_type; 2114 else 2115 be_lun->ctl_be_lun.lun_type = T_DIRECT; 2116 2117 if (be_lun->ctl_be_lun.lun_type == T_DIRECT) { 2118 value = ctl_get_opt(&be_lun->ctl_be_lun.options, "file"); 2119 if (value == NULL) { 2120 snprintf(req->error_str, sizeof(req->error_str), 2121 "no file argument specified"); 2122 goto bailout_error; 2123 } 2124 be_lun->dev_path = strdup(value, M_CTLBLK); 2125 be_lun->blocksize = 512; 2126 be_lun->blocksize_shift = fls(be_lun->blocksize) - 1; 2127 2128 retval = ctl_be_block_open(softc, be_lun, req); 2129 if (retval != 0) { 2130 retval = 0; 2131 req->status = CTL_LUN_WARNING; 2132 } 2133 } else { 2134 /* 2135 * For processor devices, we don't have any size. 2136 */ 2137 be_lun->blocksize = 0; 2138 be_lun->pblockexp = 0; 2139 be_lun->pblockoff = 0; 2140 be_lun->size_blocks = 0; 2141 be_lun->size_bytes = 0; 2142 be_lun->ctl_be_lun.maxlba = 0; 2143 2144 /* 2145 * Default to just 1 thread for processor devices. 2146 */ 2147 num_threads = 1; 2148 } 2149 2150 /* 2151 * XXX This searching loop might be refactored to be combined with 2152 * the loop above, 2153 */ 2154 value = ctl_get_opt(&be_lun->ctl_be_lun.options, "num_threads"); 2155 if (value != NULL) { 2156 tmp_num_threads = strtol(value, NULL, 0); 2157 2158 /* 2159 * We don't let the user specify less than one 2160 * thread, but hope he's clueful enough not to 2161 * specify 1000 threads. 2162 */ 2163 if (tmp_num_threads < 1) { 2164 snprintf(req->error_str, sizeof(req->error_str), 2165 "invalid number of threads %s", 2166 num_thread_str); 2167 goto bailout_error; 2168 } 2169 num_threads = tmp_num_threads; 2170 } 2171 unmap = (be_lun->dispatch == ctl_be_block_dispatch_zvol); 2172 value = ctl_get_opt(&be_lun->ctl_be_lun.options, "unmap"); 2173 if (value != NULL) 2174 unmap = (strcmp(value, "on") == 0); 2175 2176 be_lun->flags = CTL_BE_BLOCK_LUN_UNCONFIGURED; 2177 be_lun->ctl_be_lun.flags = CTL_LUN_FLAG_PRIMARY; 2178 if (be_lun->vn == NULL) 2179 be_lun->ctl_be_lun.flags |= CTL_LUN_FLAG_OFFLINE; 2180 if (unmap) 2181 be_lun->ctl_be_lun.flags |= CTL_LUN_FLAG_UNMAP; 2182 be_lun->ctl_be_lun.be_lun = be_lun; 2183 be_lun->ctl_be_lun.maxlba = (be_lun->size_blocks == 0) ? 2184 0 : (be_lun->size_blocks - 1); 2185 be_lun->ctl_be_lun.blocksize = be_lun->blocksize; 2186 be_lun->ctl_be_lun.pblockexp = be_lun->pblockexp; 2187 be_lun->ctl_be_lun.pblockoff = be_lun->pblockoff; 2188 if (be_lun->dispatch == ctl_be_block_dispatch_zvol && 2189 be_lun->blocksize != 0) 2190 be_lun->ctl_be_lun.atomicblock = CTLBLK_MAX_IO_SIZE / 2191 be_lun->blocksize; 2192 /* Tell the user the blocksize we ended up using */ 2193 params->lun_size_bytes = be_lun->size_bytes; 2194 params->blocksize_bytes = be_lun->blocksize; 2195 if (params->flags & CTL_LUN_FLAG_ID_REQ) { 2196 be_lun->ctl_be_lun.req_lun_id = params->req_lun_id; 2197 be_lun->ctl_be_lun.flags |= CTL_LUN_FLAG_ID_REQ; 2198 } else 2199 be_lun->ctl_be_lun.req_lun_id = 0; 2200 2201 be_lun->ctl_be_lun.lun_shutdown = ctl_be_block_lun_shutdown; 2202 be_lun->ctl_be_lun.lun_config_status = 2203 ctl_be_block_lun_config_status; 2204 be_lun->ctl_be_lun.be = &ctl_be_block_driver; 2205 2206 if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) { 2207 snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%4d", 2208 softc->num_luns); 2209 strncpy((char *)be_lun->ctl_be_lun.serial_num, tmpstr, 2210 ctl_min(sizeof(be_lun->ctl_be_lun.serial_num), 2211 sizeof(tmpstr))); 2212 2213 /* Tell the user what we used for a serial number */ 2214 strncpy((char *)params->serial_num, tmpstr, 2215 ctl_min(sizeof(params->serial_num), sizeof(tmpstr))); 2216 } else { 2217 strncpy((char *)be_lun->ctl_be_lun.serial_num, 2218 params->serial_num, 2219 ctl_min(sizeof(be_lun->ctl_be_lun.serial_num), 2220 sizeof(params->serial_num))); 2221 } 2222 if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) { 2223 snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%4d", softc->num_luns); 2224 strncpy((char *)be_lun->ctl_be_lun.device_id, tmpstr, 2225 ctl_min(sizeof(be_lun->ctl_be_lun.device_id), 2226 sizeof(tmpstr))); 2227 2228 /* Tell the user what we used for a device ID */ 2229 strncpy((char *)params->device_id, tmpstr, 2230 ctl_min(sizeof(params->device_id), sizeof(tmpstr))); 2231 } else { 2232 strncpy((char *)be_lun->ctl_be_lun.device_id, 2233 params->device_id, 2234 ctl_min(sizeof(be_lun->ctl_be_lun.device_id), 2235 sizeof(params->device_id))); 2236 } 2237 2238 TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_be_block_worker, be_lun); 2239 2240 be_lun->io_taskqueue = taskqueue_create(be_lun->lunname, M_WAITOK, 2241 taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue); 2242 2243 if (be_lun->io_taskqueue == NULL) { 2244 snprintf(req->error_str, sizeof(req->error_str), 2245 "unable to create taskqueue"); 2246 goto bailout_error; 2247 } 2248 2249 /* 2250 * Note that we start the same number of threads by default for 2251 * both the file case and the block device case. For the file 2252 * case, we need multiple threads to allow concurrency, because the 2253 * vnode interface is designed to be a blocking interface. For the 2254 * block device case, ZFS zvols at least will block the caller's 2255 * context in many instances, and so we need multiple threads to 2256 * overcome that problem. Other block devices don't need as many 2257 * threads, but they shouldn't cause too many problems. 2258 * 2259 * If the user wants to just have a single thread for a block 2260 * device, he can specify that when the LUN is created, or change 2261 * the tunable/sysctl to alter the default number of threads. 2262 */ 2263 retval = taskqueue_start_threads(&be_lun->io_taskqueue, 2264 /*num threads*/num_threads, 2265 /*priority*/PWAIT, 2266 /*thread name*/ 2267 "%s taskq", be_lun->lunname); 2268 2269 if (retval != 0) 2270 goto bailout_error; 2271 2272 be_lun->num_threads = num_threads; 2273 2274 mtx_lock(&softc->lock); 2275 softc->num_luns++; 2276 STAILQ_INSERT_TAIL(&softc->lun_list, be_lun, links); 2277 2278 mtx_unlock(&softc->lock); 2279 2280 retval = ctl_add_lun(&be_lun->ctl_be_lun); 2281 if (retval != 0) { 2282 mtx_lock(&softc->lock); 2283 STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun, 2284 links); 2285 softc->num_luns--; 2286 mtx_unlock(&softc->lock); 2287 snprintf(req->error_str, sizeof(req->error_str), 2288 "ctl_add_lun() returned error %d, see dmesg for " 2289 "details", retval); 2290 retval = 0; 2291 goto bailout_error; 2292 } 2293 2294 mtx_lock(&softc->lock); 2295 2296 /* 2297 * Tell the config_status routine that we're waiting so it won't 2298 * clean up the LUN in the event of an error. 2299 */ 2300 be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING; 2301 2302 while (be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) { 2303 retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0); 2304 if (retval == EINTR) 2305 break; 2306 } 2307 be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING; 2308 2309 if (be_lun->flags & CTL_BE_BLOCK_LUN_CONFIG_ERR) { 2310 snprintf(req->error_str, sizeof(req->error_str), 2311 "LUN configuration error, see dmesg for details"); 2312 STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun, 2313 links); 2314 softc->num_luns--; 2315 mtx_unlock(&softc->lock); 2316 goto bailout_error; 2317 } else { 2318 params->req_lun_id = be_lun->ctl_be_lun.lun_id; 2319 } 2320 2321 mtx_unlock(&softc->lock); 2322 2323 be_lun->disk_stats = devstat_new_entry("cbb", params->req_lun_id, 2324 be_lun->blocksize, 2325 DEVSTAT_ALL_SUPPORTED, 2326 be_lun->ctl_be_lun.lun_type 2327 | DEVSTAT_TYPE_IF_OTHER, 2328 DEVSTAT_PRIORITY_OTHER); 2329 2330 return (retval); 2331 2332bailout_error: 2333 req->status = CTL_LUN_ERROR; 2334 2335 if (be_lun->io_taskqueue != NULL) 2336 taskqueue_free(be_lun->io_taskqueue); 2337 ctl_be_block_close(be_lun); 2338 if (be_lun->dev_path != NULL) 2339 free(be_lun->dev_path, M_CTLBLK); 2340 if (be_lun->lun_zone != NULL) 2341 uma_zdestroy(be_lun->lun_zone); 2342 ctl_free_opts(&be_lun->ctl_be_lun.options); 2343 mtx_destroy(&be_lun->queue_lock); 2344 mtx_destroy(&be_lun->io_lock); 2345 free(be_lun, M_CTLBLK); 2346 2347 return (retval); 2348} 2349 2350static int 2351ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 2352{ 2353 struct ctl_lun_rm_params *params; 2354 struct ctl_be_block_lun *be_lun; 2355 int retval; 2356 2357 params = &req->reqdata.rm; 2358 2359 mtx_lock(&softc->lock); 2360 2361 be_lun = NULL; 2362 2363 STAILQ_FOREACH(be_lun, &softc->lun_list, links) { 2364 if (be_lun->ctl_be_lun.lun_id == params->lun_id) 2365 break; 2366 } 2367 mtx_unlock(&softc->lock); 2368 2369 if (be_lun == NULL) { 2370 snprintf(req->error_str, sizeof(req->error_str), 2371 "LUN %u is not managed by the block backend", 2372 params->lun_id); 2373 goto bailout_error; 2374 } 2375 2376 retval = ctl_disable_lun(&be_lun->ctl_be_lun); 2377 2378 if (retval != 0) { 2379 snprintf(req->error_str, sizeof(req->error_str), 2380 "error %d returned from ctl_disable_lun() for " 2381 "LUN %d", retval, params->lun_id); 2382 goto bailout_error; 2383 2384 } 2385 2386 retval = ctl_invalidate_lun(&be_lun->ctl_be_lun); 2387 if (retval != 0) { 2388 snprintf(req->error_str, sizeof(req->error_str), 2389 "error %d returned from ctl_invalidate_lun() for " 2390 "LUN %d", retval, params->lun_id); 2391 goto bailout_error; 2392 } 2393 2394 mtx_lock(&softc->lock); 2395 2396 be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING; 2397 2398 while ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) { 2399 retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0); 2400 if (retval == EINTR) 2401 break; 2402 } 2403 2404 be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING; 2405 2406 if ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) { 2407 snprintf(req->error_str, sizeof(req->error_str), 2408 "interrupted waiting for LUN to be freed"); 2409 mtx_unlock(&softc->lock); 2410 goto bailout_error; 2411 } 2412 2413 STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun, links); 2414 2415 softc->num_luns--; 2416 mtx_unlock(&softc->lock); 2417 2418 taskqueue_drain(be_lun->io_taskqueue, &be_lun->io_task); 2419 2420 taskqueue_free(be_lun->io_taskqueue); 2421 2422 ctl_be_block_close(be_lun); 2423 2424 if (be_lun->disk_stats != NULL) 2425 devstat_remove_entry(be_lun->disk_stats); 2426 2427 uma_zdestroy(be_lun->lun_zone); 2428 2429 ctl_free_opts(&be_lun->ctl_be_lun.options); 2430 free(be_lun->dev_path, M_CTLBLK); 2431 mtx_destroy(&be_lun->queue_lock); 2432 mtx_destroy(&be_lun->io_lock); 2433 free(be_lun, M_CTLBLK); 2434 2435 req->status = CTL_LUN_OK; 2436 2437 return (0); 2438 2439bailout_error: 2440 2441 req->status = CTL_LUN_ERROR; 2442 2443 return (0); 2444} 2445 2446static int 2447ctl_be_block_modify_file(struct ctl_be_block_lun *be_lun, 2448 struct ctl_lun_req *req) 2449{ 2450 struct vattr vattr; 2451 int error; 2452 struct ctl_lun_create_params *params = &be_lun->params; 2453 2454 if (params->lun_size_bytes != 0) { 2455 be_lun->size_bytes = params->lun_size_bytes; 2456 } else { 2457 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 2458 error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred); 2459 VOP_UNLOCK(be_lun->vn, 0); 2460 if (error != 0) { 2461 snprintf(req->error_str, sizeof(req->error_str), 2462 "error calling VOP_GETATTR() for file %s", 2463 be_lun->dev_path); 2464 return (error); 2465 } 2466 2467 be_lun->size_bytes = vattr.va_size; 2468 } 2469 2470 return (0); 2471} 2472 2473static int 2474ctl_be_block_modify_dev(struct ctl_be_block_lun *be_lun, 2475 struct ctl_lun_req *req) 2476{ 2477 struct ctl_be_block_devdata *dev_data; 2478 int error; 2479 struct ctl_lun_create_params *params = &be_lun->params; 2480 uint64_t size_bytes; 2481 2482 dev_data = &be_lun->backend.dev; 2483 if (!dev_data->csw->d_ioctl) { 2484 snprintf(req->error_str, sizeof(req->error_str), 2485 "no d_ioctl for device %s!", be_lun->dev_path); 2486 return (ENODEV); 2487 } 2488 2489 error = dev_data->csw->d_ioctl(dev_data->cdev, DIOCGMEDIASIZE, 2490 (caddr_t)&size_bytes, FREAD, 2491 curthread); 2492 if (error) { 2493 snprintf(req->error_str, sizeof(req->error_str), 2494 "error %d returned for DIOCGMEDIASIZE ioctl " 2495 "on %s!", error, be_lun->dev_path); 2496 return (error); 2497 } 2498 2499 if (params->lun_size_bytes != 0) { 2500 if (params->lun_size_bytes > size_bytes) { 2501 snprintf(req->error_str, sizeof(req->error_str), 2502 "requested LUN size %ju > backing device " 2503 "size %ju", 2504 (uintmax_t)params->lun_size_bytes, 2505 (uintmax_t)size_bytes); 2506 return (EINVAL); 2507 } 2508 2509 be_lun->size_bytes = params->lun_size_bytes; 2510 } else { 2511 be_lun->size_bytes = size_bytes; 2512 } 2513 2514 return (0); 2515} 2516 2517static int 2518ctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 2519{ 2520 struct ctl_lun_modify_params *params; 2521 struct ctl_be_block_lun *be_lun; 2522 uint64_t oldsize; 2523 int error; 2524 2525 params = &req->reqdata.modify; 2526 2527 mtx_lock(&softc->lock); 2528 be_lun = NULL; 2529 STAILQ_FOREACH(be_lun, &softc->lun_list, links) { 2530 if (be_lun->ctl_be_lun.lun_id == params->lun_id) 2531 break; 2532 } 2533 mtx_unlock(&softc->lock); 2534 2535 if (be_lun == NULL) { 2536 snprintf(req->error_str, sizeof(req->error_str), 2537 "LUN %u is not managed by the block backend", 2538 params->lun_id); 2539 goto bailout_error; 2540 } 2541 2542 be_lun->params.lun_size_bytes = params->lun_size_bytes; 2543 2544 oldsize = be_lun->size_bytes; 2545 if (be_lun->vn == NULL) 2546 error = ctl_be_block_open(softc, be_lun, req); 2547 else if (be_lun->vn->v_type == VREG) 2548 error = ctl_be_block_modify_file(be_lun, req); 2549 else 2550 error = ctl_be_block_modify_dev(be_lun, req); 2551 2552 if (error == 0 && be_lun->size_bytes != oldsize) { 2553 be_lun->size_blocks = be_lun->size_bytes >> 2554 be_lun->blocksize_shift; 2555 2556 /* 2557 * The maximum LBA is the size - 1. 2558 * 2559 * XXX: Note that this field is being updated without locking, 2560 * which might cause problems on 32-bit architectures. 2561 */ 2562 be_lun->ctl_be_lun.maxlba = (be_lun->size_blocks == 0) ? 2563 0 : (be_lun->size_blocks - 1); 2564 be_lun->ctl_be_lun.blocksize = be_lun->blocksize; 2565 be_lun->ctl_be_lun.pblockexp = be_lun->pblockexp; 2566 be_lun->ctl_be_lun.pblockoff = be_lun->pblockoff; 2567 if (be_lun->dispatch == ctl_be_block_dispatch_zvol && 2568 be_lun->blocksize != 0) 2569 be_lun->ctl_be_lun.atomicblock = CTLBLK_MAX_IO_SIZE / 2570 be_lun->blocksize; 2571 ctl_lun_capacity_changed(&be_lun->ctl_be_lun); 2572 if (oldsize == 0 && be_lun->size_blocks != 0) 2573 ctl_lun_online(&be_lun->ctl_be_lun); 2574 } 2575 2576 /* Tell the user the exact size we ended up using */ 2577 params->lun_size_bytes = be_lun->size_bytes; 2578 2579 req->status = error ? CTL_LUN_WARNING : CTL_LUN_OK; 2580 2581 return (0); 2582 2583bailout_error: 2584 req->status = CTL_LUN_ERROR; 2585 2586 return (0); 2587} 2588 2589static void 2590ctl_be_block_lun_shutdown(void *be_lun) 2591{ 2592 struct ctl_be_block_lun *lun; 2593 struct ctl_be_block_softc *softc; 2594 2595 lun = (struct ctl_be_block_lun *)be_lun; 2596 2597 softc = lun->softc; 2598 2599 mtx_lock(&softc->lock); 2600 lun->flags |= CTL_BE_BLOCK_LUN_UNCONFIGURED; 2601 if (lun->flags & CTL_BE_BLOCK_LUN_WAITING) 2602 wakeup(lun); 2603 mtx_unlock(&softc->lock); 2604 2605} 2606 2607static void 2608ctl_be_block_lun_config_status(void *be_lun, ctl_lun_config_status status) 2609{ 2610 struct ctl_be_block_lun *lun; 2611 struct ctl_be_block_softc *softc; 2612 2613 lun = (struct ctl_be_block_lun *)be_lun; 2614 softc = lun->softc; 2615 2616 if (status == CTL_LUN_CONFIG_OK) { 2617 mtx_lock(&softc->lock); 2618 lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED; 2619 if (lun->flags & CTL_BE_BLOCK_LUN_WAITING) 2620 wakeup(lun); 2621 mtx_unlock(&softc->lock); 2622 2623 /* 2624 * We successfully added the LUN, attempt to enable it. 2625 */ 2626 if (ctl_enable_lun(&lun->ctl_be_lun) != 0) { 2627 printf("%s: ctl_enable_lun() failed!\n", __func__); 2628 if (ctl_invalidate_lun(&lun->ctl_be_lun) != 0) { 2629 printf("%s: ctl_invalidate_lun() failed!\n", 2630 __func__); 2631 } 2632 } 2633 2634 return; 2635 } 2636 2637 2638 mtx_lock(&softc->lock); 2639 lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED; 2640 lun->flags |= CTL_BE_BLOCK_LUN_CONFIG_ERR; 2641 wakeup(lun); 2642 mtx_unlock(&softc->lock); 2643} 2644 2645 2646static int 2647ctl_be_block_config_write(union ctl_io *io) 2648{ 2649 struct ctl_be_block_lun *be_lun; 2650 struct ctl_be_lun *ctl_be_lun; 2651 int retval; 2652 2653 retval = 0; 2654 2655 DPRINTF("entered\n"); 2656 2657 ctl_be_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[ 2658 CTL_PRIV_BACKEND_LUN].ptr; 2659 be_lun = (struct ctl_be_block_lun *)ctl_be_lun->be_lun; 2660 2661 switch (io->scsiio.cdb[0]) { 2662 case SYNCHRONIZE_CACHE: 2663 case SYNCHRONIZE_CACHE_16: 2664 case WRITE_SAME_10: 2665 case WRITE_SAME_16: 2666 case UNMAP: 2667 /* 2668 * The upper level CTL code will filter out any CDBs with 2669 * the immediate bit set and return the proper error. 2670 * 2671 * We don't really need to worry about what LBA range the 2672 * user asked to be synced out. When they issue a sync 2673 * cache command, we'll sync out the whole thing. 2674 */ 2675 mtx_lock(&be_lun->queue_lock); 2676 STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr, 2677 links); 2678 mtx_unlock(&be_lun->queue_lock); 2679 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 2680 break; 2681 case START_STOP_UNIT: { 2682 struct scsi_start_stop_unit *cdb; 2683 2684 cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb; 2685 2686 if (cdb->how & SSS_START) 2687 retval = ctl_start_lun(ctl_be_lun); 2688 else { 2689 retval = ctl_stop_lun(ctl_be_lun); 2690 /* 2691 * XXX KDM Copan-specific offline behavior. 2692 * Figure out a reasonable way to port this? 2693 */ 2694#ifdef NEEDTOPORT 2695 if ((retval == 0) 2696 && (cdb->byte2 & SSS_ONOFFLINE)) 2697 retval = ctl_lun_offline(ctl_be_lun); 2698#endif 2699 } 2700 2701 /* 2702 * In general, the above routines should not fail. They 2703 * just set state for the LUN. So we've got something 2704 * pretty wrong here if we can't start or stop the LUN. 2705 */ 2706 if (retval != 0) { 2707 ctl_set_internal_failure(&io->scsiio, 2708 /*sks_valid*/ 1, 2709 /*retry_count*/ 0xf051); 2710 retval = CTL_RETVAL_COMPLETE; 2711 } else { 2712 ctl_set_success(&io->scsiio); 2713 } 2714 ctl_config_write_done(io); 2715 break; 2716 } 2717 default: 2718 ctl_set_invalid_opcode(&io->scsiio); 2719 ctl_config_write_done(io); 2720 retval = CTL_RETVAL_COMPLETE; 2721 break; 2722 } 2723 2724 return (retval); 2725} 2726 2727static int 2728ctl_be_block_config_read(union ctl_io *io) 2729{ 2730 struct ctl_be_block_lun *be_lun; 2731 struct ctl_be_lun *ctl_be_lun; 2732 int retval = 0; 2733 2734 DPRINTF("entered\n"); 2735 2736 ctl_be_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[ 2737 CTL_PRIV_BACKEND_LUN].ptr; 2738 be_lun = (struct ctl_be_block_lun *)ctl_be_lun->be_lun; 2739 2740 switch (io->scsiio.cdb[0]) { 2741 case SERVICE_ACTION_IN: 2742 if (io->scsiio.cdb[1] == SGLS_SERVICE_ACTION) { 2743 mtx_lock(&be_lun->queue_lock); 2744 STAILQ_INSERT_TAIL(&be_lun->config_read_queue, 2745 &io->io_hdr, links); 2746 mtx_unlock(&be_lun->queue_lock); 2747 taskqueue_enqueue(be_lun->io_taskqueue, 2748 &be_lun->io_task); 2749 retval = CTL_RETVAL_QUEUED; 2750 break; 2751 } 2752 ctl_set_invalid_field(&io->scsiio, 2753 /*sks_valid*/ 1, 2754 /*command*/ 1, 2755 /*field*/ 1, 2756 /*bit_valid*/ 1, 2757 /*bit*/ 4); 2758 ctl_config_read_done(io); 2759 retval = CTL_RETVAL_COMPLETE; 2760 break; 2761 default: 2762 ctl_set_invalid_opcode(&io->scsiio); 2763 ctl_config_read_done(io); 2764 retval = CTL_RETVAL_COMPLETE; 2765 break; 2766 } 2767 2768 return (retval); 2769} 2770 2771static int 2772ctl_be_block_lun_info(void *be_lun, struct sbuf *sb) 2773{ 2774 struct ctl_be_block_lun *lun; 2775 int retval; 2776 2777 lun = (struct ctl_be_block_lun *)be_lun; 2778 retval = 0; 2779 2780 retval = sbuf_printf(sb, "\t<num_threads>"); 2781 2782 if (retval != 0) 2783 goto bailout; 2784 2785 retval = sbuf_printf(sb, "%d", lun->num_threads); 2786 2787 if (retval != 0) 2788 goto bailout; 2789 2790 retval = sbuf_printf(sb, "</num_threads>\n"); 2791 2792bailout: 2793 2794 return (retval); 2795} 2796 2797static uint64_t 2798ctl_be_block_lun_attr(void *be_lun, const char *attrname) 2799{ 2800 struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)be_lun; 2801 2802 if (lun->getattr == NULL) 2803 return (UINT64_MAX); 2804 return (lun->getattr(lun, attrname)); 2805} 2806 2807int 2808ctl_be_block_init(void) 2809{ 2810 struct ctl_be_block_softc *softc; 2811 int retval; 2812 2813 softc = &backend_block_softc; 2814 retval = 0; 2815 2816 mtx_init(&softc->lock, "ctlblock", NULL, MTX_DEF); 2817 beio_zone = uma_zcreate("beio", sizeof(struct ctl_be_block_io), 2818 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 2819 STAILQ_INIT(&softc->disk_list); 2820 STAILQ_INIT(&softc->lun_list); 2821 2822 return (retval); 2823} 2824