ctl_backend_block.c revision 268149
1/*- 2 * Copyright (c) 2003 Silicon Graphics International Corp. 3 * Copyright (c) 2009-2011 Spectra Logic Corporation 4 * Copyright (c) 2012 The FreeBSD Foundation 5 * All rights reserved. 6 * 7 * Portions of this software were developed by Edward Tomasz Napierala 8 * under sponsorship from the FreeBSD Foundation. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions, and the following disclaimer, 15 * without modification. 16 * 2. Redistributions in binary form must reproduce at minimum a disclaimer 17 * substantially similar to the "NO WARRANTY" disclaimer below 18 * ("Disclaimer") and any redistribution must be conditioned upon 19 * including a substantially similar Disclaimer requirement for further 20 * binary redistribution. 21 * 22 * NO WARRANTY 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR 26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 27 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 31 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 32 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 * POSSIBILITY OF SUCH DAMAGES. 34 * 35 * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_block.c#5 $ 36 */ 37/* 38 * CAM Target Layer driver backend for block devices. 39 * 40 * Author: Ken Merry <ken@FreeBSD.org> 41 */ 42#include <sys/cdefs.h> 43__FBSDID("$FreeBSD: stable/10/sys/cam/ctl/ctl_backend_block.c 268149 2014-07-02 10:42:43Z mav $"); 44 45#include <opt_kdtrace.h> 46 47#include <sys/param.h> 48#include <sys/systm.h> 49#include <sys/kernel.h> 50#include <sys/types.h> 51#include <sys/kthread.h> 52#include <sys/bio.h> 53#include <sys/fcntl.h> 54#include <sys/limits.h> 55#include <sys/lock.h> 56#include <sys/mutex.h> 57#include <sys/condvar.h> 58#include <sys/malloc.h> 59#include <sys/conf.h> 60#include <sys/ioccom.h> 61#include <sys/queue.h> 62#include <sys/sbuf.h> 63#include <sys/endian.h> 64#include <sys/uio.h> 65#include <sys/buf.h> 66#include <sys/taskqueue.h> 67#include <sys/vnode.h> 68#include <sys/namei.h> 69#include <sys/mount.h> 70#include <sys/disk.h> 71#include <sys/fcntl.h> 72#include <sys/filedesc.h> 73#include <sys/proc.h> 74#include <sys/pcpu.h> 75#include <sys/module.h> 76#include <sys/sdt.h> 77#include <sys/devicestat.h> 78#include <sys/sysctl.h> 79 80#include <geom/geom.h> 81 82#include <cam/cam.h> 83#include <cam/scsi/scsi_all.h> 84#include <cam/scsi/scsi_da.h> 85#include <cam/ctl/ctl_io.h> 86#include <cam/ctl/ctl.h> 87#include <cam/ctl/ctl_backend.h> 88#include <cam/ctl/ctl_frontend_internal.h> 89#include <cam/ctl/ctl_ioctl.h> 90#include <cam/ctl/ctl_scsi_all.h> 91#include <cam/ctl/ctl_error.h> 92 93/* 94 * The idea here is that we'll allocate enough S/G space to hold a 1MB 95 * I/O. If we get an I/O larger than that, we'll split it. 96 */ 97#define CTLBLK_MAX_IO_SIZE (1024 * 1024) 98#define CTLBLK_MAX_SEG MAXPHYS 99#define CTLBLK_MAX_SEGS MAX(CTLBLK_MAX_IO_SIZE / CTLBLK_MAX_SEG, 1) 100 101#ifdef CTLBLK_DEBUG 102#define DPRINTF(fmt, args...) \ 103 printf("cbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) 104#else 105#define DPRINTF(fmt, args...) do {} while(0) 106#endif 107 108SDT_PROVIDER_DEFINE(cbb); 109 110typedef enum { 111 CTL_BE_BLOCK_LUN_UNCONFIGURED = 0x01, 112 CTL_BE_BLOCK_LUN_CONFIG_ERR = 0x02, 113 CTL_BE_BLOCK_LUN_WAITING = 0x04, 114 CTL_BE_BLOCK_LUN_MULTI_THREAD = 0x08 115} ctl_be_block_lun_flags; 116 117typedef enum { 118 CTL_BE_BLOCK_NONE, 119 CTL_BE_BLOCK_DEV, 120 CTL_BE_BLOCK_FILE 121} ctl_be_block_type; 122 123struct ctl_be_block_devdata { 124 struct cdev *cdev; 125 struct cdevsw *csw; 126 int dev_ref; 127}; 128 129struct ctl_be_block_filedata { 130 struct ucred *cred; 131}; 132 133union ctl_be_block_bedata { 134 struct ctl_be_block_devdata dev; 135 struct ctl_be_block_filedata file; 136}; 137 138struct ctl_be_block_io; 139struct ctl_be_block_lun; 140 141typedef void (*cbb_dispatch_t)(struct ctl_be_block_lun *be_lun, 142 struct ctl_be_block_io *beio); 143 144/* 145 * Backend LUN structure. There is a 1:1 mapping between a block device 146 * and a backend block LUN, and between a backend block LUN and a CTL LUN. 147 */ 148struct ctl_be_block_lun { 149 struct ctl_block_disk *disk; 150 char lunname[32]; 151 char *dev_path; 152 ctl_be_block_type dev_type; 153 struct vnode *vn; 154 union ctl_be_block_bedata backend; 155 cbb_dispatch_t dispatch; 156 cbb_dispatch_t lun_flush; 157 cbb_dispatch_t unmap; 158 struct mtx lock; 159 uma_zone_t lun_zone; 160 uint64_t size_blocks; 161 uint64_t size_bytes; 162 uint32_t blocksize; 163 int blocksize_shift; 164 uint16_t pblockexp; 165 uint16_t pblockoff; 166 struct ctl_be_block_softc *softc; 167 struct devstat *disk_stats; 168 ctl_be_block_lun_flags flags; 169 STAILQ_ENTRY(ctl_be_block_lun) links; 170 struct ctl_be_lun ctl_be_lun; 171 struct taskqueue *io_taskqueue; 172 struct task io_task; 173 int num_threads; 174 STAILQ_HEAD(, ctl_io_hdr) input_queue; 175 STAILQ_HEAD(, ctl_io_hdr) config_write_queue; 176 STAILQ_HEAD(, ctl_io_hdr) datamove_queue; 177}; 178 179/* 180 * Overall softc structure for the block backend module. 181 */ 182struct ctl_be_block_softc { 183 struct mtx lock; 184 int num_disks; 185 STAILQ_HEAD(, ctl_block_disk) disk_list; 186 int num_luns; 187 STAILQ_HEAD(, ctl_be_block_lun) lun_list; 188}; 189 190static struct ctl_be_block_softc backend_block_softc; 191 192/* 193 * Per-I/O information. 194 */ 195struct ctl_be_block_io { 196 union ctl_io *io; 197 struct ctl_sg_entry sg_segs[CTLBLK_MAX_SEGS]; 198 struct iovec xiovecs[CTLBLK_MAX_SEGS]; 199 int bio_cmd; 200 int bio_flags; 201 int num_segs; 202 int num_bios_sent; 203 int num_bios_done; 204 int send_complete; 205 int num_errors; 206 struct bintime ds_t0; 207 devstat_tag_type ds_tag_type; 208 devstat_trans_flags ds_trans_type; 209 uint64_t io_len; 210 uint64_t io_offset; 211 struct ctl_be_block_softc *softc; 212 struct ctl_be_block_lun *lun; 213 void (*beio_cont)(struct ctl_be_block_io *beio); /* to continue processing */ 214}; 215 216static int cbb_num_threads = 14; 217TUNABLE_INT("kern.cam.ctl.block.num_threads", &cbb_num_threads); 218SYSCTL_NODE(_kern_cam_ctl, OID_AUTO, block, CTLFLAG_RD, 0, 219 "CAM Target Layer Block Backend"); 220SYSCTL_INT(_kern_cam_ctl_block, OID_AUTO, num_threads, CTLFLAG_RW, 221 &cbb_num_threads, 0, "Number of threads per backing file"); 222 223static struct ctl_be_block_io *ctl_alloc_beio(struct ctl_be_block_softc *softc); 224static void ctl_free_beio(struct ctl_be_block_io *beio); 225static void ctl_complete_beio(struct ctl_be_block_io *beio); 226static int ctl_be_block_move_done(union ctl_io *io); 227static void ctl_be_block_biodone(struct bio *bio); 228static void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun, 229 struct ctl_be_block_io *beio); 230static void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun, 231 struct ctl_be_block_io *beio); 232static void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun, 233 struct ctl_be_block_io *beio); 234static void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun, 235 struct ctl_be_block_io *beio); 236static void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun, 237 struct ctl_be_block_io *beio); 238static void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun, 239 union ctl_io *io); 240static void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun, 241 union ctl_io *io); 242static void ctl_be_block_worker(void *context, int pending); 243static int ctl_be_block_submit(union ctl_io *io); 244static int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, 245 int flag, struct thread *td); 246static int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, 247 struct ctl_lun_req *req); 248static int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, 249 struct ctl_lun_req *req); 250static int ctl_be_block_close(struct ctl_be_block_lun *be_lun); 251static int ctl_be_block_open(struct ctl_be_block_softc *softc, 252 struct ctl_be_block_lun *be_lun, 253 struct ctl_lun_req *req); 254static int ctl_be_block_create(struct ctl_be_block_softc *softc, 255 struct ctl_lun_req *req); 256static int ctl_be_block_rm(struct ctl_be_block_softc *softc, 257 struct ctl_lun_req *req); 258static int ctl_be_block_modify_file(struct ctl_be_block_lun *be_lun, 259 struct ctl_lun_req *req); 260static int ctl_be_block_modify_dev(struct ctl_be_block_lun *be_lun, 261 struct ctl_lun_req *req); 262static int ctl_be_block_modify(struct ctl_be_block_softc *softc, 263 struct ctl_lun_req *req); 264static void ctl_be_block_lun_shutdown(void *be_lun); 265static void ctl_be_block_lun_config_status(void *be_lun, 266 ctl_lun_config_status status); 267static int ctl_be_block_config_write(union ctl_io *io); 268static int ctl_be_block_config_read(union ctl_io *io); 269static int ctl_be_block_lun_info(void *be_lun, struct sbuf *sb); 270int ctl_be_block_init(void); 271 272static struct ctl_backend_driver ctl_be_block_driver = 273{ 274 .name = "block", 275 .flags = CTL_BE_FLAG_HAS_CONFIG, 276 .init = ctl_be_block_init, 277 .data_submit = ctl_be_block_submit, 278 .data_move_done = ctl_be_block_move_done, 279 .config_read = ctl_be_block_config_read, 280 .config_write = ctl_be_block_config_write, 281 .ioctl = ctl_be_block_ioctl, 282 .lun_info = ctl_be_block_lun_info 283}; 284 285MALLOC_DEFINE(M_CTLBLK, "ctlblk", "Memory used for CTL block backend"); 286CTL_BACKEND_DECLARE(cbb, ctl_be_block_driver); 287 288static uma_zone_t beio_zone; 289 290static struct ctl_be_block_io * 291ctl_alloc_beio(struct ctl_be_block_softc *softc) 292{ 293 struct ctl_be_block_io *beio; 294 295 beio = uma_zalloc(beio_zone, M_WAITOK | M_ZERO); 296 beio->softc = softc; 297 return (beio); 298} 299 300static void 301ctl_free_beio(struct ctl_be_block_io *beio) 302{ 303 int duplicate_free; 304 int i; 305 306 duplicate_free = 0; 307 308 for (i = 0; i < beio->num_segs; i++) { 309 if (beio->sg_segs[i].addr == NULL) 310 duplicate_free++; 311 312 uma_zfree(beio->lun->lun_zone, beio->sg_segs[i].addr); 313 beio->sg_segs[i].addr = NULL; 314 } 315 316 if (duplicate_free > 0) { 317 printf("%s: %d duplicate frees out of %d segments\n", __func__, 318 duplicate_free, beio->num_segs); 319 } 320 321 uma_zfree(beio_zone, beio); 322} 323 324static void 325ctl_complete_beio(struct ctl_be_block_io *beio) 326{ 327 union ctl_io *io; 328 int io_len; 329 330 io = beio->io; 331 332 if ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS) 333 io_len = beio->io_len; 334 else 335 io_len = 0; 336 337 devstat_end_transaction(beio->lun->disk_stats, 338 /*bytes*/ io_len, 339 beio->ds_tag_type, 340 beio->ds_trans_type, 341 /*now*/ NULL, 342 /*then*/&beio->ds_t0); 343 344 if (beio->beio_cont != NULL) { 345 beio->beio_cont(beio); 346 } else { 347 ctl_free_beio(beio); 348 ctl_done(io); 349 } 350} 351 352static int 353ctl_be_block_move_done(union ctl_io *io) 354{ 355 struct ctl_be_block_io *beio; 356 struct ctl_be_block_lun *be_lun; 357#ifdef CTL_TIME_IO 358 struct bintime cur_bt; 359#endif 360 361 beio = (struct ctl_be_block_io *) 362 io->io_hdr.ctl_private[CTL_PRIV_BACKEND].ptr; 363 364 be_lun = beio->lun; 365 366 DPRINTF("entered\n"); 367 368#ifdef CTL_TIME_IO 369 getbintime(&cur_bt); 370 bintime_sub(&cur_bt, &io->io_hdr.dma_start_bt); 371 bintime_add(&io->io_hdr.dma_bt, &cur_bt); 372 io->io_hdr.num_dmas++; 373#endif 374 375 /* 376 * We set status at this point for read commands, and write 377 * commands with errors. 378 */ 379 if ((beio->bio_cmd == BIO_READ) 380 && (io->io_hdr.port_status == 0) 381 && ((io->io_hdr.flags & CTL_FLAG_ABORT) == 0) 382 && ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE)) 383 ctl_set_success(&io->scsiio); 384 else if ((io->io_hdr.port_status != 0) 385 && ((io->io_hdr.flags & CTL_FLAG_ABORT) == 0) 386 && ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE)) { 387 /* 388 * For hardware error sense keys, the sense key 389 * specific value is defined to be a retry count, 390 * but we use it to pass back an internal FETD 391 * error code. XXX KDM Hopefully the FETD is only 392 * using 16 bits for an error code, since that's 393 * all the space we have in the sks field. 394 */ 395 ctl_set_internal_failure(&io->scsiio, 396 /*sks_valid*/ 1, 397 /*retry_count*/ 398 io->io_hdr.port_status); 399 } 400 401 /* 402 * If this is a read, or a write with errors, it is done. 403 */ 404 if ((beio->bio_cmd == BIO_READ) 405 || ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0) 406 || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)) { 407 ctl_complete_beio(beio); 408 return (0); 409 } 410 411 /* 412 * At this point, we have a write and the DMA completed 413 * successfully. We now have to queue it to the task queue to 414 * execute the backend I/O. That is because we do blocking 415 * memory allocations, and in the file backing case, blocking I/O. 416 * This move done routine is generally called in the SIM's 417 * interrupt context, and therefore we cannot block. 418 */ 419 mtx_lock(&be_lun->lock); 420 /* 421 * XXX KDM make sure that links is okay to use at this point. 422 * Otherwise, we either need to add another field to ctl_io_hdr, 423 * or deal with resource allocation here. 424 */ 425 STAILQ_INSERT_TAIL(&be_lun->datamove_queue, &io->io_hdr, links); 426 mtx_unlock(&be_lun->lock); 427 428 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 429 430 return (0); 431} 432 433static void 434ctl_be_block_biodone(struct bio *bio) 435{ 436 struct ctl_be_block_io *beio; 437 struct ctl_be_block_lun *be_lun; 438 union ctl_io *io; 439 int error; 440 441 beio = bio->bio_caller1; 442 be_lun = beio->lun; 443 io = beio->io; 444 445 DPRINTF("entered\n"); 446 447 error = bio->bio_error; 448 mtx_lock(&be_lun->lock); 449 if (error != 0) 450 beio->num_errors++; 451 452 beio->num_bios_done++; 453 454 /* 455 * XXX KDM will this cause WITNESS to complain? Holding a lock 456 * during the free might cause it to complain. 457 */ 458 g_destroy_bio(bio); 459 460 /* 461 * If the send complete bit isn't set, or we aren't the last I/O to 462 * complete, then we're done. 463 */ 464 if ((beio->send_complete == 0) 465 || (beio->num_bios_done < beio->num_bios_sent)) { 466 mtx_unlock(&be_lun->lock); 467 return; 468 } 469 470 /* 471 * At this point, we've verified that we are the last I/O to 472 * complete, so it's safe to drop the lock. 473 */ 474 mtx_unlock(&be_lun->lock); 475 476 /* 477 * If there are any errors from the backing device, we fail the 478 * entire I/O with a medium error. 479 */ 480 if (beio->num_errors > 0) { 481 if (error == EOPNOTSUPP) { 482 ctl_set_invalid_opcode(&io->scsiio); 483 } else if (beio->bio_cmd == BIO_FLUSH) { 484 /* XXX KDM is there is a better error here? */ 485 ctl_set_internal_failure(&io->scsiio, 486 /*sks_valid*/ 1, 487 /*retry_count*/ 0xbad2); 488 } else 489 ctl_set_medium_error(&io->scsiio); 490 ctl_complete_beio(beio); 491 return; 492 } 493 494 /* 495 * If this is a write, a flush or a delete, we're all done. 496 * If this is a read, we can now send the data to the user. 497 */ 498 if ((beio->bio_cmd == BIO_WRITE) 499 || (beio->bio_cmd == BIO_FLUSH) 500 || (beio->bio_cmd == BIO_DELETE)) { 501 ctl_set_success(&io->scsiio); 502 ctl_complete_beio(beio); 503 } else { 504#ifdef CTL_TIME_IO 505 getbintime(&io->io_hdr.dma_start_bt); 506#endif 507 ctl_datamove(io); 508 } 509} 510 511static void 512ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun, 513 struct ctl_be_block_io *beio) 514{ 515 union ctl_io *io; 516 struct mount *mountpoint; 517 int error, lock_flags; 518 519 DPRINTF("entered\n"); 520 521 io = beio->io; 522 523 (void) vn_start_write(be_lun->vn, &mountpoint, V_WAIT); 524 525 if (MNT_SHARED_WRITES(mountpoint) 526 || ((mountpoint == NULL) 527 && MNT_SHARED_WRITES(be_lun->vn->v_mount))) 528 lock_flags = LK_SHARED; 529 else 530 lock_flags = LK_EXCLUSIVE; 531 532 vn_lock(be_lun->vn, lock_flags | LK_RETRY); 533 534 binuptime(&beio->ds_t0); 535 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 536 537 error = VOP_FSYNC(be_lun->vn, MNT_WAIT, curthread); 538 VOP_UNLOCK(be_lun->vn, 0); 539 540 vn_finished_write(mountpoint); 541 542 if (error == 0) 543 ctl_set_success(&io->scsiio); 544 else { 545 /* XXX KDM is there is a better error here? */ 546 ctl_set_internal_failure(&io->scsiio, 547 /*sks_valid*/ 1, 548 /*retry_count*/ 0xbad1); 549 } 550 551 ctl_complete_beio(beio); 552} 553 554SDT_PROBE_DEFINE1(cbb, kernel, read, file_start, "uint64_t"); 555SDT_PROBE_DEFINE1(cbb, kernel, write, file_start, "uint64_t"); 556SDT_PROBE_DEFINE1(cbb, kernel, read, file_done,"uint64_t"); 557SDT_PROBE_DEFINE1(cbb, kernel, write, file_done, "uint64_t"); 558 559static void 560ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun, 561 struct ctl_be_block_io *beio) 562{ 563 struct ctl_be_block_filedata *file_data; 564 union ctl_io *io; 565 struct uio xuio; 566 struct iovec *xiovec; 567 int flags; 568 int error, i; 569 570 DPRINTF("entered\n"); 571 572 file_data = &be_lun->backend.file; 573 io = beio->io; 574 flags = beio->bio_flags; 575 576 if (beio->bio_cmd == BIO_READ) { 577 SDT_PROBE(cbb, kernel, read, file_start, 0, 0, 0, 0, 0); 578 } else { 579 SDT_PROBE(cbb, kernel, write, file_start, 0, 0, 0, 0, 0); 580 } 581 582 bzero(&xuio, sizeof(xuio)); 583 if (beio->bio_cmd == BIO_READ) 584 xuio.uio_rw = UIO_READ; 585 else 586 xuio.uio_rw = UIO_WRITE; 587 588 xuio.uio_offset = beio->io_offset; 589 xuio.uio_resid = beio->io_len; 590 xuio.uio_segflg = UIO_SYSSPACE; 591 xuio.uio_iov = beio->xiovecs; 592 xuio.uio_iovcnt = beio->num_segs; 593 xuio.uio_td = curthread; 594 595 for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) { 596 xiovec->iov_base = beio->sg_segs[i].addr; 597 xiovec->iov_len = beio->sg_segs[i].len; 598 } 599 600 if (beio->bio_cmd == BIO_READ) { 601 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 602 603 binuptime(&beio->ds_t0); 604 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 605 606 /* 607 * UFS pays attention to IO_DIRECT for reads. If the 608 * DIRECTIO option is configured into the kernel, it calls 609 * ffs_rawread(). But that only works for single-segment 610 * uios with user space addresses. In our case, with a 611 * kernel uio, it still reads into the buffer cache, but it 612 * will just try to release the buffer from the cache later 613 * on in ffs_read(). 614 * 615 * ZFS does not pay attention to IO_DIRECT for reads. 616 * 617 * UFS does not pay attention to IO_SYNC for reads. 618 * 619 * ZFS pays attention to IO_SYNC (which translates into the 620 * Solaris define FRSYNC for zfs_read()) for reads. It 621 * attempts to sync the file before reading. 622 * 623 * So, to attempt to provide some barrier semantics in the 624 * BIO_ORDERED case, set both IO_DIRECT and IO_SYNC. 625 */ 626 error = VOP_READ(be_lun->vn, &xuio, (flags & BIO_ORDERED) ? 627 (IO_DIRECT|IO_SYNC) : 0, file_data->cred); 628 629 VOP_UNLOCK(be_lun->vn, 0); 630 } else { 631 struct mount *mountpoint; 632 int lock_flags; 633 634 (void)vn_start_write(be_lun->vn, &mountpoint, V_WAIT); 635 636 if (MNT_SHARED_WRITES(mountpoint) 637 || ((mountpoint == NULL) 638 && MNT_SHARED_WRITES(be_lun->vn->v_mount))) 639 lock_flags = LK_SHARED; 640 else 641 lock_flags = LK_EXCLUSIVE; 642 643 vn_lock(be_lun->vn, lock_flags | LK_RETRY); 644 645 binuptime(&beio->ds_t0); 646 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 647 648 /* 649 * UFS pays attention to IO_DIRECT for writes. The write 650 * is done asynchronously. (Normally the write would just 651 * get put into cache. 652 * 653 * UFS pays attention to IO_SYNC for writes. It will 654 * attempt to write the buffer out synchronously if that 655 * flag is set. 656 * 657 * ZFS does not pay attention to IO_DIRECT for writes. 658 * 659 * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC) 660 * for writes. It will flush the transaction from the 661 * cache before returning. 662 * 663 * So if we've got the BIO_ORDERED flag set, we want 664 * IO_SYNC in either the UFS or ZFS case. 665 */ 666 error = VOP_WRITE(be_lun->vn, &xuio, (flags & BIO_ORDERED) ? 667 IO_SYNC : 0, file_data->cred); 668 VOP_UNLOCK(be_lun->vn, 0); 669 670 vn_finished_write(mountpoint); 671 } 672 673 /* 674 * If we got an error, set the sense data to "MEDIUM ERROR" and 675 * return the I/O to the user. 676 */ 677 if (error != 0) { 678 char path_str[32]; 679 680 ctl_scsi_path_string(io, path_str, sizeof(path_str)); 681 /* 682 * XXX KDM ZFS returns ENOSPC when the underlying 683 * filesystem fills up. What kind of SCSI error should we 684 * return for that? 685 */ 686 printf("%s%s command returned errno %d\n", path_str, 687 (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE", error); 688 ctl_set_medium_error(&io->scsiio); 689 ctl_complete_beio(beio); 690 return; 691 } 692 693 /* 694 * If this is a write, we're all done. 695 * If this is a read, we can now send the data to the user. 696 */ 697 if (beio->bio_cmd == BIO_WRITE) { 698 ctl_set_success(&io->scsiio); 699 SDT_PROBE(cbb, kernel, write, file_done, 0, 0, 0, 0, 0); 700 ctl_complete_beio(beio); 701 } else { 702 SDT_PROBE(cbb, kernel, read, file_done, 0, 0, 0, 0, 0); 703#ifdef CTL_TIME_IO 704 getbintime(&io->io_hdr.dma_start_bt); 705#endif 706 ctl_datamove(io); 707 } 708} 709 710static void 711ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun, 712 struct ctl_be_block_io *beio) 713{ 714 struct bio *bio; 715 union ctl_io *io; 716 struct ctl_be_block_devdata *dev_data; 717 718 dev_data = &be_lun->backend.dev; 719 io = beio->io; 720 721 DPRINTF("entered\n"); 722 723 /* This can't fail, it's a blocking allocation. */ 724 bio = g_alloc_bio(); 725 726 bio->bio_cmd = BIO_FLUSH; 727 bio->bio_flags |= BIO_ORDERED; 728 bio->bio_dev = dev_data->cdev; 729 bio->bio_offset = 0; 730 bio->bio_data = 0; 731 bio->bio_done = ctl_be_block_biodone; 732 bio->bio_caller1 = beio; 733 bio->bio_pblkno = 0; 734 735 /* 736 * We don't need to acquire the LUN lock here, because we are only 737 * sending one bio, and so there is no other context to synchronize 738 * with. 739 */ 740 beio->num_bios_sent = 1; 741 beio->send_complete = 1; 742 743 binuptime(&beio->ds_t0); 744 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 745 746 (*dev_data->csw->d_strategy)(bio); 747} 748 749static void 750ctl_be_block_unmap_dev_range(struct ctl_be_block_lun *be_lun, 751 struct ctl_be_block_io *beio, 752 uint64_t off, uint64_t len, int last) 753{ 754 struct bio *bio; 755 struct ctl_be_block_devdata *dev_data; 756 uint64_t maxlen; 757 758 dev_data = &be_lun->backend.dev; 759 maxlen = LONG_MAX - (LONG_MAX % be_lun->blocksize); 760 while (len > 0) { 761 bio = g_alloc_bio(); 762 bio->bio_cmd = BIO_DELETE; 763 bio->bio_flags |= beio->bio_flags; 764 bio->bio_dev = dev_data->cdev; 765 bio->bio_offset = off; 766 bio->bio_length = MIN(len, maxlen); 767 bio->bio_data = 0; 768 bio->bio_done = ctl_be_block_biodone; 769 bio->bio_caller1 = beio; 770 bio->bio_pblkno = off / be_lun->blocksize; 771 772 off += bio->bio_length; 773 len -= bio->bio_length; 774 775 mtx_lock(&be_lun->lock); 776 beio->num_bios_sent++; 777 if (last && len == 0) 778 beio->send_complete = 1; 779 mtx_unlock(&be_lun->lock); 780 781 (*dev_data->csw->d_strategy)(bio); 782 } 783} 784 785static void 786ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun, 787 struct ctl_be_block_io *beio) 788{ 789 union ctl_io *io; 790 struct ctl_be_block_devdata *dev_data; 791 struct ctl_ptr_len_flags *ptrlen; 792 struct scsi_unmap_desc *buf, *end; 793 uint64_t len; 794 795 dev_data = &be_lun->backend.dev; 796 io = beio->io; 797 798 DPRINTF("entered\n"); 799 800 binuptime(&beio->ds_t0); 801 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 802 803 if (beio->io_offset == -1) { 804 beio->io_len = 0; 805 ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 806 buf = (struct scsi_unmap_desc *)ptrlen->ptr; 807 end = buf + ptrlen->len / sizeof(*buf); 808 for (; buf < end; buf++) { 809 len = (uint64_t)scsi_4btoul(buf->length) * 810 be_lun->blocksize; 811 beio->io_len += len; 812 ctl_be_block_unmap_dev_range(be_lun, beio, 813 scsi_8btou64(buf->lba) * be_lun->blocksize, len, 814 (end - buf < 2) ? TRUE : FALSE); 815 } 816 } else 817 ctl_be_block_unmap_dev_range(be_lun, beio, 818 beio->io_offset, beio->io_len, TRUE); 819} 820 821static void 822ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun, 823 struct ctl_be_block_io *beio) 824{ 825 int i; 826 struct bio *bio; 827 struct ctl_be_block_devdata *dev_data; 828 off_t cur_offset; 829 int max_iosize; 830 831 DPRINTF("entered\n"); 832 833 dev_data = &be_lun->backend.dev; 834 835 /* 836 * We have to limit our I/O size to the maximum supported by the 837 * backend device. Hopefully it is MAXPHYS. If the driver doesn't 838 * set it properly, use DFLTPHYS. 839 */ 840 max_iosize = dev_data->cdev->si_iosize_max; 841 if (max_iosize < PAGE_SIZE) 842 max_iosize = DFLTPHYS; 843 844 cur_offset = beio->io_offset; 845 846 /* 847 * XXX KDM need to accurately reflect the number of I/Os outstanding 848 * to a device. 849 */ 850 binuptime(&beio->ds_t0); 851 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 852 853 for (i = 0; i < beio->num_segs; i++) { 854 size_t cur_size; 855 uint8_t *cur_ptr; 856 857 cur_size = beio->sg_segs[i].len; 858 cur_ptr = beio->sg_segs[i].addr; 859 860 while (cur_size > 0) { 861 /* This can't fail, it's a blocking allocation. */ 862 bio = g_alloc_bio(); 863 864 KASSERT(bio != NULL, ("g_alloc_bio() failed!\n")); 865 866 bio->bio_cmd = beio->bio_cmd; 867 bio->bio_flags |= beio->bio_flags; 868 bio->bio_dev = dev_data->cdev; 869 bio->bio_caller1 = beio; 870 bio->bio_length = min(cur_size, max_iosize); 871 bio->bio_offset = cur_offset; 872 bio->bio_data = cur_ptr; 873 bio->bio_done = ctl_be_block_biodone; 874 bio->bio_pblkno = cur_offset / be_lun->blocksize; 875 876 cur_offset += bio->bio_length; 877 cur_ptr += bio->bio_length; 878 cur_size -= bio->bio_length; 879 880 /* 881 * Make sure we set the complete bit just before we 882 * issue the last bio so we don't wind up with a 883 * race. 884 * 885 * Use the LUN mutex here instead of a combination 886 * of atomic variables for simplicity. 887 * 888 * XXX KDM we could have a per-IO lock, but that 889 * would cause additional per-IO setup and teardown 890 * overhead. Hopefully there won't be too much 891 * contention on the LUN lock. 892 */ 893 mtx_lock(&be_lun->lock); 894 895 beio->num_bios_sent++; 896 897 if ((i == beio->num_segs - 1) 898 && (cur_size == 0)) 899 beio->send_complete = 1; 900 901 mtx_unlock(&be_lun->lock); 902 903 (*dev_data->csw->d_strategy)(bio); 904 } 905 } 906} 907 908static void 909ctl_be_block_cw_done_ws(struct ctl_be_block_io *beio) 910{ 911 union ctl_io *io; 912 913 io = beio->io; 914 ctl_free_beio(beio); 915 if (((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE) 916 && ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) { 917 ctl_config_write_done(io); 918 return; 919 } 920 921 ctl_be_block_config_write(io); 922} 923 924static void 925ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun, 926 union ctl_io *io) 927{ 928 struct ctl_be_block_io *beio; 929 struct ctl_be_block_softc *softc; 930 struct ctl_lba_len_flags *lbalen; 931 uint64_t len_left, lba; 932 int i, seglen; 933 uint8_t *buf, *end; 934 935 DPRINTF("entered\n"); 936 937 beio = io->io_hdr.ctl_private[CTL_PRIV_BACKEND].ptr; 938 softc = be_lun->softc; 939 lbalen = (struct ctl_lba_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 940 941 if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP) || 942 (lbalen->flags & SWS_UNMAP && be_lun->unmap == NULL)) { 943 ctl_free_beio(beio); 944 ctl_set_invalid_field(&io->scsiio, 945 /*sks_valid*/ 1, 946 /*command*/ 1, 947 /*field*/ 1, 948 /*bit_valid*/ 0, 949 /*bit*/ 0); 950 ctl_config_write_done(io); 951 return; 952 } 953 954 /* 955 * If the I/O came down with an ordered or head of queue tag, set 956 * the BIO_ORDERED attribute. For head of queue tags, that's 957 * pretty much the best we can do. 958 */ 959 if ((io->scsiio.tag_type == CTL_TAG_ORDERED) 960 || (io->scsiio.tag_type == CTL_TAG_HEAD_OF_QUEUE)) 961 beio->bio_flags = BIO_ORDERED; 962 963 switch (io->scsiio.tag_type) { 964 case CTL_TAG_ORDERED: 965 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 966 break; 967 case CTL_TAG_HEAD_OF_QUEUE: 968 beio->ds_tag_type = DEVSTAT_TAG_HEAD; 969 break; 970 case CTL_TAG_UNTAGGED: 971 case CTL_TAG_SIMPLE: 972 case CTL_TAG_ACA: 973 default: 974 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; 975 break; 976 } 977 978 if (lbalen->flags & SWS_UNMAP) { 979 beio->io_offset = lbalen->lba * be_lun->blocksize; 980 beio->io_len = (uint64_t)lbalen->len * be_lun->blocksize; 981 beio->bio_cmd = BIO_DELETE; 982 beio->ds_trans_type = DEVSTAT_FREE; 983 984 be_lun->unmap(be_lun, beio); 985 return; 986 } 987 988 beio->bio_cmd = BIO_WRITE; 989 beio->ds_trans_type = DEVSTAT_WRITE; 990 991 DPRINTF("WRITE SAME at LBA %jx len %u\n", 992 (uintmax_t)lbalen->lba, lbalen->len); 993 994 len_left = (uint64_t)lbalen->len * be_lun->blocksize; 995 for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) { 996 997 /* 998 * Setup the S/G entry for this chunk. 999 */ 1000 seglen = MIN(CTLBLK_MAX_SEG, len_left); 1001 seglen -= seglen % be_lun->blocksize; 1002 beio->sg_segs[i].len = seglen; 1003 beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK); 1004 1005 DPRINTF("segment %d addr %p len %zd\n", i, 1006 beio->sg_segs[i].addr, beio->sg_segs[i].len); 1007 1008 beio->num_segs++; 1009 len_left -= seglen; 1010 1011 buf = beio->sg_segs[i].addr; 1012 end = buf + seglen; 1013 for (; buf < end; buf += be_lun->blocksize) { 1014 memcpy(buf, io->scsiio.kern_data_ptr, be_lun->blocksize); 1015 if (lbalen->flags & SWS_LBDATA) 1016 scsi_ulto4b(lbalen->lba + lba, buf); 1017 lba++; 1018 } 1019 } 1020 1021 beio->io_offset = lbalen->lba * be_lun->blocksize; 1022 beio->io_len = lba * be_lun->blocksize; 1023 1024 /* We can not do all in one run. Correct and schedule rerun. */ 1025 if (len_left > 0) { 1026 lbalen->lba += lba; 1027 lbalen->len -= lba; 1028 beio->beio_cont = ctl_be_block_cw_done_ws; 1029 } 1030 1031 be_lun->dispatch(be_lun, beio); 1032} 1033 1034static void 1035ctl_be_block_cw_dispatch_unmap(struct ctl_be_block_lun *be_lun, 1036 union ctl_io *io) 1037{ 1038 struct ctl_be_block_io *beio; 1039 struct ctl_be_block_softc *softc; 1040 struct ctl_ptr_len_flags *ptrlen; 1041 1042 DPRINTF("entered\n"); 1043 1044 beio = io->io_hdr.ctl_private[CTL_PRIV_BACKEND].ptr; 1045 softc = be_lun->softc; 1046 ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 1047 1048 if (ptrlen->flags != 0 || be_lun->unmap == NULL) { 1049 ctl_free_beio(beio); 1050 ctl_set_invalid_field(&io->scsiio, 1051 /*sks_valid*/ 0, 1052 /*command*/ 1, 1053 /*field*/ 0, 1054 /*bit_valid*/ 0, 1055 /*bit*/ 0); 1056 ctl_config_write_done(io); 1057 return; 1058 } 1059 1060 /* 1061 * If the I/O came down with an ordered or head of queue tag, set 1062 * the BIO_ORDERED attribute. For head of queue tags, that's 1063 * pretty much the best we can do. 1064 */ 1065 if ((io->scsiio.tag_type == CTL_TAG_ORDERED) 1066 || (io->scsiio.tag_type == CTL_TAG_HEAD_OF_QUEUE)) 1067 beio->bio_flags = BIO_ORDERED; 1068 1069 switch (io->scsiio.tag_type) { 1070 case CTL_TAG_ORDERED: 1071 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1072 break; 1073 case CTL_TAG_HEAD_OF_QUEUE: 1074 beio->ds_tag_type = DEVSTAT_TAG_HEAD; 1075 break; 1076 case CTL_TAG_UNTAGGED: 1077 case CTL_TAG_SIMPLE: 1078 case CTL_TAG_ACA: 1079 default: 1080 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; 1081 break; 1082 } 1083 1084 beio->io_len = 0; 1085 beio->io_offset = -1; 1086 1087 beio->bio_cmd = BIO_DELETE; 1088 beio->ds_trans_type = DEVSTAT_FREE; 1089 1090 DPRINTF("UNMAP\n"); 1091 1092 be_lun->unmap(be_lun, beio); 1093} 1094 1095static void 1096ctl_be_block_cw_done(struct ctl_be_block_io *beio) 1097{ 1098 union ctl_io *io; 1099 1100 io = beio->io; 1101 ctl_free_beio(beio); 1102 ctl_config_write_done(io); 1103} 1104 1105static void 1106ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun, 1107 union ctl_io *io) 1108{ 1109 struct ctl_be_block_io *beio; 1110 struct ctl_be_block_softc *softc; 1111 1112 DPRINTF("entered\n"); 1113 1114 softc = be_lun->softc; 1115 beio = ctl_alloc_beio(softc); 1116 beio->io = io; 1117 beio->lun = be_lun; 1118 beio->beio_cont = ctl_be_block_cw_done; 1119 io->io_hdr.ctl_private[CTL_PRIV_BACKEND].ptr = beio; 1120 1121 switch (io->scsiio.cdb[0]) { 1122 case SYNCHRONIZE_CACHE: 1123 case SYNCHRONIZE_CACHE_16: 1124 beio->bio_cmd = BIO_FLUSH; 1125 beio->ds_trans_type = DEVSTAT_NO_DATA; 1126 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1127 beio->io_len = 0; 1128 be_lun->lun_flush(be_lun, beio); 1129 break; 1130 case WRITE_SAME_10: 1131 case WRITE_SAME_16: 1132 ctl_be_block_cw_dispatch_ws(be_lun, io); 1133 break; 1134 case UNMAP: 1135 ctl_be_block_cw_dispatch_unmap(be_lun, io); 1136 break; 1137 default: 1138 panic("Unhandled CDB type %#x", io->scsiio.cdb[0]); 1139 break; 1140 } 1141} 1142 1143SDT_PROBE_DEFINE1(cbb, kernel, read, start, "uint64_t"); 1144SDT_PROBE_DEFINE1(cbb, kernel, write, start, "uint64_t"); 1145SDT_PROBE_DEFINE1(cbb, kernel, read, alloc_done, "uint64_t"); 1146SDT_PROBE_DEFINE1(cbb, kernel, write, alloc_done, "uint64_t"); 1147 1148static void 1149ctl_be_block_next(struct ctl_be_block_io *beio) 1150{ 1151 struct ctl_be_block_lun *be_lun; 1152 union ctl_io *io; 1153 1154 io = beio->io; 1155 be_lun = beio->lun; 1156 ctl_free_beio(beio); 1157 if (((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE) 1158 && ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) { 1159 ctl_done(io); 1160 return; 1161 } 1162 1163 io->scsiio.kern_rel_offset += io->scsiio.kern_data_len; 1164 io->io_hdr.status &= ~CTL_STATUS_MASK; 1165 io->io_hdr.status |= CTL_STATUS_NONE; 1166 1167 mtx_lock(&be_lun->lock); 1168 /* 1169 * XXX KDM make sure that links is okay to use at this point. 1170 * Otherwise, we either need to add another field to ctl_io_hdr, 1171 * or deal with resource allocation here. 1172 */ 1173 STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links); 1174 mtx_unlock(&be_lun->lock); 1175 1176 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 1177} 1178 1179static void 1180ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun, 1181 union ctl_io *io) 1182{ 1183 struct ctl_be_block_io *beio; 1184 struct ctl_be_block_softc *softc; 1185 struct ctl_lba_len *lbalen; 1186 uint64_t len_left, lbaoff; 1187 int i; 1188 1189 softc = be_lun->softc; 1190 1191 DPRINTF("entered\n"); 1192 1193 if ((io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_IN) { 1194 SDT_PROBE(cbb, kernel, read, start, 0, 0, 0, 0, 0); 1195 } else { 1196 SDT_PROBE(cbb, kernel, write, start, 0, 0, 0, 0, 0); 1197 } 1198 1199 beio = ctl_alloc_beio(softc); 1200 beio->io = io; 1201 beio->lun = be_lun; 1202 io->io_hdr.ctl_private[CTL_PRIV_BACKEND].ptr = beio; 1203 1204 /* 1205 * If the I/O came down with an ordered or head of queue tag, set 1206 * the BIO_ORDERED attribute. For head of queue tags, that's 1207 * pretty much the best we can do. 1208 * 1209 * XXX KDM we don't have a great way to easily know about the FUA 1210 * bit right now (it is decoded in ctl_read_write(), but we don't 1211 * pass that knowledge to the backend), and in any case we would 1212 * need to determine how to handle it. 1213 */ 1214 if ((io->scsiio.tag_type == CTL_TAG_ORDERED) 1215 || (io->scsiio.tag_type == CTL_TAG_HEAD_OF_QUEUE)) 1216 beio->bio_flags = BIO_ORDERED; 1217 1218 switch (io->scsiio.tag_type) { 1219 case CTL_TAG_ORDERED: 1220 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1221 break; 1222 case CTL_TAG_HEAD_OF_QUEUE: 1223 beio->ds_tag_type = DEVSTAT_TAG_HEAD; 1224 break; 1225 case CTL_TAG_UNTAGGED: 1226 case CTL_TAG_SIMPLE: 1227 case CTL_TAG_ACA: 1228 default: 1229 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; 1230 break; 1231 } 1232 1233 /* 1234 * This path handles read and write only. The config write path 1235 * handles flush operations. 1236 */ 1237 if ((io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_IN) { 1238 beio->bio_cmd = BIO_READ; 1239 beio->ds_trans_type = DEVSTAT_READ; 1240 } else { 1241 beio->bio_cmd = BIO_WRITE; 1242 beio->ds_trans_type = DEVSTAT_WRITE; 1243 } 1244 1245 lbalen = (struct ctl_lba_len *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 1246 DPRINTF("%s at LBA %jx len %u @%ju\n", 1247 (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE", 1248 (uintmax_t)lbalen->lba, lbalen->len, lbaoff); 1249 lbaoff = io->scsiio.kern_rel_offset / be_lun->blocksize; 1250 beio->io_offset = (lbalen->lba + lbaoff) * be_lun->blocksize; 1251 beio->io_len = MIN((lbalen->len - lbaoff) * be_lun->blocksize, 1252 CTLBLK_MAX_IO_SIZE); 1253 beio->io_len -= beio->io_len % be_lun->blocksize; 1254 1255 for (i = 0, len_left = beio->io_len; len_left > 0; i++) { 1256 KASSERT(i < CTLBLK_MAX_SEGS, ("Too many segs (%d >= %d)", 1257 i, CTLBLK_MAX_SEGS)); 1258 1259 /* 1260 * Setup the S/G entry for this chunk. 1261 */ 1262 beio->sg_segs[i].len = min(CTLBLK_MAX_SEG, len_left); 1263 beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK); 1264 1265 DPRINTF("segment %d addr %p len %zd\n", i, 1266 beio->sg_segs[i].addr, beio->sg_segs[i].len); 1267 1268 beio->num_segs++; 1269 len_left -= beio->sg_segs[i].len; 1270 } 1271 if (io->scsiio.kern_rel_offset + beio->io_len < 1272 io->scsiio.kern_total_len) 1273 beio->beio_cont = ctl_be_block_next; 1274 io->scsiio.be_move_done = ctl_be_block_move_done; 1275 io->scsiio.kern_data_ptr = (uint8_t *)beio->sg_segs; 1276 io->scsiio.kern_data_len = beio->io_len; 1277 io->scsiio.kern_data_resid = 0; 1278 io->scsiio.kern_sg_entries = beio->num_segs; 1279 io->io_hdr.flags |= CTL_FLAG_ALLOCATED | CTL_FLAG_KDPTR_SGLIST; 1280 1281 /* 1282 * For the read case, we need to read the data into our buffers and 1283 * then we can send it back to the user. For the write case, we 1284 * need to get the data from the user first. 1285 */ 1286 if (beio->bio_cmd == BIO_READ) { 1287 SDT_PROBE(cbb, kernel, read, alloc_done, 0, 0, 0, 0, 0); 1288 be_lun->dispatch(be_lun, beio); 1289 } else { 1290 SDT_PROBE(cbb, kernel, write, alloc_done, 0, 0, 0, 0, 0); 1291#ifdef CTL_TIME_IO 1292 getbintime(&io->io_hdr.dma_start_bt); 1293#endif 1294 ctl_datamove(io); 1295 } 1296} 1297 1298static void 1299ctl_be_block_worker(void *context, int pending) 1300{ 1301 struct ctl_be_block_lun *be_lun; 1302 struct ctl_be_block_softc *softc; 1303 union ctl_io *io; 1304 1305 be_lun = (struct ctl_be_block_lun *)context; 1306 softc = be_lun->softc; 1307 1308 DPRINTF("entered\n"); 1309 1310 mtx_lock(&be_lun->lock); 1311 for (;;) { 1312 io = (union ctl_io *)STAILQ_FIRST(&be_lun->datamove_queue); 1313 if (io != NULL) { 1314 struct ctl_be_block_io *beio; 1315 1316 DPRINTF("datamove queue\n"); 1317 1318 STAILQ_REMOVE(&be_lun->datamove_queue, &io->io_hdr, 1319 ctl_io_hdr, links); 1320 1321 mtx_unlock(&be_lun->lock); 1322 1323 beio = (struct ctl_be_block_io *) 1324 io->io_hdr.ctl_private[CTL_PRIV_BACKEND].ptr; 1325 1326 be_lun->dispatch(be_lun, beio); 1327 1328 mtx_lock(&be_lun->lock); 1329 continue; 1330 } 1331 io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_write_queue); 1332 if (io != NULL) { 1333 1334 DPRINTF("config write queue\n"); 1335 1336 STAILQ_REMOVE(&be_lun->config_write_queue, &io->io_hdr, 1337 ctl_io_hdr, links); 1338 1339 mtx_unlock(&be_lun->lock); 1340 1341 ctl_be_block_cw_dispatch(be_lun, io); 1342 1343 mtx_lock(&be_lun->lock); 1344 continue; 1345 } 1346 io = (union ctl_io *)STAILQ_FIRST(&be_lun->input_queue); 1347 if (io != NULL) { 1348 DPRINTF("input queue\n"); 1349 1350 STAILQ_REMOVE(&be_lun->input_queue, &io->io_hdr, 1351 ctl_io_hdr, links); 1352 mtx_unlock(&be_lun->lock); 1353 1354 /* 1355 * We must drop the lock, since this routine and 1356 * its children may sleep. 1357 */ 1358 ctl_be_block_dispatch(be_lun, io); 1359 1360 mtx_lock(&be_lun->lock); 1361 continue; 1362 } 1363 1364 /* 1365 * If we get here, there is no work left in the queues, so 1366 * just break out and let the task queue go to sleep. 1367 */ 1368 break; 1369 } 1370 mtx_unlock(&be_lun->lock); 1371} 1372 1373/* 1374 * Entry point from CTL to the backend for I/O. We queue everything to a 1375 * work thread, so this just puts the I/O on a queue and wakes up the 1376 * thread. 1377 */ 1378static int 1379ctl_be_block_submit(union ctl_io *io) 1380{ 1381 struct ctl_be_block_lun *be_lun; 1382 struct ctl_be_lun *ctl_be_lun; 1383 1384 DPRINTF("entered\n"); 1385 1386 ctl_be_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[ 1387 CTL_PRIV_BACKEND_LUN].ptr; 1388 be_lun = (struct ctl_be_block_lun *)ctl_be_lun->be_lun; 1389 1390 /* 1391 * Make sure we only get SCSI I/O. 1392 */ 1393 KASSERT(io->io_hdr.io_type == CTL_IO_SCSI, ("Non-SCSI I/O (type " 1394 "%#x) encountered", io->io_hdr.io_type)); 1395 1396 mtx_lock(&be_lun->lock); 1397 /* 1398 * XXX KDM make sure that links is okay to use at this point. 1399 * Otherwise, we either need to add another field to ctl_io_hdr, 1400 * or deal with resource allocation here. 1401 */ 1402 STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links); 1403 mtx_unlock(&be_lun->lock); 1404 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 1405 1406 return (CTL_RETVAL_COMPLETE); 1407} 1408 1409static int 1410ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, 1411 int flag, struct thread *td) 1412{ 1413 struct ctl_be_block_softc *softc; 1414 int error; 1415 1416 softc = &backend_block_softc; 1417 1418 error = 0; 1419 1420 switch (cmd) { 1421 case CTL_LUN_REQ: { 1422 struct ctl_lun_req *lun_req; 1423 1424 lun_req = (struct ctl_lun_req *)addr; 1425 1426 switch (lun_req->reqtype) { 1427 case CTL_LUNREQ_CREATE: 1428 error = ctl_be_block_create(softc, lun_req); 1429 break; 1430 case CTL_LUNREQ_RM: 1431 error = ctl_be_block_rm(softc, lun_req); 1432 break; 1433 case CTL_LUNREQ_MODIFY: 1434 error = ctl_be_block_modify(softc, lun_req); 1435 break; 1436 default: 1437 lun_req->status = CTL_LUN_ERROR; 1438 snprintf(lun_req->error_str, sizeof(lun_req->error_str), 1439 "%s: invalid LUN request type %d", __func__, 1440 lun_req->reqtype); 1441 break; 1442 } 1443 break; 1444 } 1445 default: 1446 error = ENOTTY; 1447 break; 1448 } 1449 1450 return (error); 1451} 1452 1453static int 1454ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 1455{ 1456 struct ctl_be_block_filedata *file_data; 1457 struct ctl_lun_create_params *params; 1458 struct vattr vattr; 1459 int error; 1460 1461 error = 0; 1462 file_data = &be_lun->backend.file; 1463 params = &req->reqdata.create; 1464 1465 be_lun->dev_type = CTL_BE_BLOCK_FILE; 1466 be_lun->dispatch = ctl_be_block_dispatch_file; 1467 be_lun->lun_flush = ctl_be_block_flush_file; 1468 1469 error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred); 1470 if (error != 0) { 1471 snprintf(req->error_str, sizeof(req->error_str), 1472 "error calling VOP_GETATTR() for file %s", 1473 be_lun->dev_path); 1474 return (error); 1475 } 1476 1477 /* 1478 * Verify that we have the ability to upgrade to exclusive 1479 * access on this file so we can trap errors at open instead 1480 * of reporting them during first access. 1481 */ 1482 if (VOP_ISLOCKED(be_lun->vn) != LK_EXCLUSIVE) { 1483 vn_lock(be_lun->vn, LK_UPGRADE | LK_RETRY); 1484 if (be_lun->vn->v_iflag & VI_DOOMED) { 1485 error = EBADF; 1486 snprintf(req->error_str, sizeof(req->error_str), 1487 "error locking file %s", be_lun->dev_path); 1488 return (error); 1489 } 1490 } 1491 1492 1493 file_data->cred = crhold(curthread->td_ucred); 1494 if (params->lun_size_bytes != 0) 1495 be_lun->size_bytes = params->lun_size_bytes; 1496 else 1497 be_lun->size_bytes = vattr.va_size; 1498 /* 1499 * We set the multi thread flag for file operations because all 1500 * filesystems (in theory) are capable of allowing multiple readers 1501 * of a file at once. So we want to get the maximum possible 1502 * concurrency. 1503 */ 1504 be_lun->flags |= CTL_BE_BLOCK_LUN_MULTI_THREAD; 1505 1506 /* 1507 * XXX KDM vattr.va_blocksize may be larger than 512 bytes here. 1508 * With ZFS, it is 131072 bytes. Block sizes that large don't work 1509 * with disklabel and UFS on FreeBSD at least. Large block sizes 1510 * may not work with other OSes as well. So just export a sector 1511 * size of 512 bytes, which should work with any OS or 1512 * application. Since our backing is a file, any block size will 1513 * work fine for the backing store. 1514 */ 1515#if 0 1516 be_lun->blocksize= vattr.va_blocksize; 1517#endif 1518 if (params->blocksize_bytes != 0) 1519 be_lun->blocksize = params->blocksize_bytes; 1520 else 1521 be_lun->blocksize = 512; 1522 1523 /* 1524 * Sanity check. The media size has to be at least one 1525 * sector long. 1526 */ 1527 if (be_lun->size_bytes < be_lun->blocksize) { 1528 error = EINVAL; 1529 snprintf(req->error_str, sizeof(req->error_str), 1530 "file %s size %ju < block size %u", be_lun->dev_path, 1531 (uintmax_t)be_lun->size_bytes, be_lun->blocksize); 1532 } 1533 return (error); 1534} 1535 1536static int 1537ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 1538{ 1539 struct ctl_lun_create_params *params; 1540 struct vattr vattr; 1541 struct cdev *dev; 1542 struct cdevsw *devsw; 1543 int error; 1544 off_t ps, pss, po, pos; 1545 1546 params = &req->reqdata.create; 1547 1548 be_lun->dev_type = CTL_BE_BLOCK_DEV; 1549 be_lun->dispatch = ctl_be_block_dispatch_dev; 1550 be_lun->lun_flush = ctl_be_block_flush_dev; 1551 be_lun->unmap = ctl_be_block_unmap_dev; 1552 be_lun->backend.dev.cdev = be_lun->vn->v_rdev; 1553 be_lun->backend.dev.csw = dev_refthread(be_lun->backend.dev.cdev, 1554 &be_lun->backend.dev.dev_ref); 1555 if (be_lun->backend.dev.csw == NULL) 1556 panic("Unable to retrieve device switch"); 1557 1558 error = VOP_GETATTR(be_lun->vn, &vattr, NOCRED); 1559 if (error) { 1560 snprintf(req->error_str, sizeof(req->error_str), 1561 "%s: error getting vnode attributes for device %s", 1562 __func__, be_lun->dev_path); 1563 return (error); 1564 } 1565 1566 dev = be_lun->vn->v_rdev; 1567 devsw = dev->si_devsw; 1568 if (!devsw->d_ioctl) { 1569 snprintf(req->error_str, sizeof(req->error_str), 1570 "%s: no d_ioctl for device %s!", __func__, 1571 be_lun->dev_path); 1572 return (ENODEV); 1573 } 1574 1575 error = devsw->d_ioctl(dev, DIOCGSECTORSIZE, 1576 (caddr_t)&be_lun->blocksize, FREAD, 1577 curthread); 1578 if (error) { 1579 snprintf(req->error_str, sizeof(req->error_str), 1580 "%s: error %d returned for DIOCGSECTORSIZE ioctl " 1581 "on %s!", __func__, error, be_lun->dev_path); 1582 return (error); 1583 } 1584 1585 /* 1586 * If the user has asked for a blocksize that is greater than the 1587 * backing device's blocksize, we can do it only if the blocksize 1588 * the user is asking for is an even multiple of the underlying 1589 * device's blocksize. 1590 */ 1591 if ((params->blocksize_bytes != 0) 1592 && (params->blocksize_bytes > be_lun->blocksize)) { 1593 uint32_t bs_multiple, tmp_blocksize; 1594 1595 bs_multiple = params->blocksize_bytes / be_lun->blocksize; 1596 1597 tmp_blocksize = bs_multiple * be_lun->blocksize; 1598 1599 if (tmp_blocksize == params->blocksize_bytes) { 1600 be_lun->blocksize = params->blocksize_bytes; 1601 } else { 1602 snprintf(req->error_str, sizeof(req->error_str), 1603 "%s: requested blocksize %u is not an even " 1604 "multiple of backing device blocksize %u", 1605 __func__, params->blocksize_bytes, 1606 be_lun->blocksize); 1607 return (EINVAL); 1608 1609 } 1610 } else if ((params->blocksize_bytes != 0) 1611 && (params->blocksize_bytes != be_lun->blocksize)) { 1612 snprintf(req->error_str, sizeof(req->error_str), 1613 "%s: requested blocksize %u < backing device " 1614 "blocksize %u", __func__, params->blocksize_bytes, 1615 be_lun->blocksize); 1616 return (EINVAL); 1617 } 1618 1619 error = devsw->d_ioctl(dev, DIOCGMEDIASIZE, 1620 (caddr_t)&be_lun->size_bytes, FREAD, 1621 curthread); 1622 if (error) { 1623 snprintf(req->error_str, sizeof(req->error_str), 1624 "%s: error %d returned for DIOCGMEDIASIZE " 1625 " ioctl on %s!", __func__, error, 1626 be_lun->dev_path); 1627 return (error); 1628 } 1629 1630 if (params->lun_size_bytes != 0) { 1631 if (params->lun_size_bytes > be_lun->size_bytes) { 1632 snprintf(req->error_str, sizeof(req->error_str), 1633 "%s: requested LUN size %ju > backing device " 1634 "size %ju", __func__, 1635 (uintmax_t)params->lun_size_bytes, 1636 (uintmax_t)be_lun->size_bytes); 1637 return (EINVAL); 1638 } 1639 1640 be_lun->size_bytes = params->lun_size_bytes; 1641 } 1642 1643 error = devsw->d_ioctl(dev, DIOCGSTRIPESIZE, 1644 (caddr_t)&ps, FREAD, curthread); 1645 if (error) 1646 ps = po = 0; 1647 else { 1648 error = devsw->d_ioctl(dev, DIOCGSTRIPEOFFSET, 1649 (caddr_t)&po, FREAD, curthread); 1650 if (error) 1651 po = 0; 1652 } 1653 pss = ps / be_lun->blocksize; 1654 pos = po / be_lun->blocksize; 1655 if ((pss > 0) && (pss * be_lun->blocksize == ps) && (pss >= pos) && 1656 ((pss & (pss - 1)) == 0) && (pos * be_lun->blocksize == po)) { 1657 be_lun->pblockexp = fls(pss) - 1; 1658 be_lun->pblockoff = (pss - pos) % pss; 1659 } 1660 1661 return (0); 1662} 1663 1664static int 1665ctl_be_block_close(struct ctl_be_block_lun *be_lun) 1666{ 1667 DROP_GIANT(); 1668 if (be_lun->vn) { 1669 int flags = FREAD | FWRITE; 1670 1671 switch (be_lun->dev_type) { 1672 case CTL_BE_BLOCK_DEV: 1673 if (be_lun->backend.dev.csw) { 1674 dev_relthread(be_lun->backend.dev.cdev, 1675 be_lun->backend.dev.dev_ref); 1676 be_lun->backend.dev.csw = NULL; 1677 be_lun->backend.dev.cdev = NULL; 1678 } 1679 break; 1680 case CTL_BE_BLOCK_FILE: 1681 break; 1682 case CTL_BE_BLOCK_NONE: 1683 break; 1684 default: 1685 panic("Unexpected backend type."); 1686 break; 1687 } 1688 1689 (void)vn_close(be_lun->vn, flags, NOCRED, curthread); 1690 be_lun->vn = NULL; 1691 1692 switch (be_lun->dev_type) { 1693 case CTL_BE_BLOCK_DEV: 1694 break; 1695 case CTL_BE_BLOCK_FILE: 1696 if (be_lun->backend.file.cred != NULL) { 1697 crfree(be_lun->backend.file.cred); 1698 be_lun->backend.file.cred = NULL; 1699 } 1700 break; 1701 case CTL_BE_BLOCK_NONE: 1702 break; 1703 default: 1704 panic("Unexpected backend type."); 1705 break; 1706 } 1707 } 1708 PICKUP_GIANT(); 1709 1710 return (0); 1711} 1712 1713static int 1714ctl_be_block_open(struct ctl_be_block_softc *softc, 1715 struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 1716{ 1717 struct nameidata nd; 1718 int flags; 1719 int error; 1720 1721 /* 1722 * XXX KDM allow a read-only option? 1723 */ 1724 flags = FREAD | FWRITE; 1725 error = 0; 1726 1727 if (rootvnode == NULL) { 1728 snprintf(req->error_str, sizeof(req->error_str), 1729 "%s: Root filesystem is not mounted", __func__); 1730 return (1); 1731 } 1732 1733 if (!curthread->td_proc->p_fd->fd_cdir) { 1734 curthread->td_proc->p_fd->fd_cdir = rootvnode; 1735 VREF(rootvnode); 1736 } 1737 if (!curthread->td_proc->p_fd->fd_rdir) { 1738 curthread->td_proc->p_fd->fd_rdir = rootvnode; 1739 VREF(rootvnode); 1740 } 1741 if (!curthread->td_proc->p_fd->fd_jdir) { 1742 curthread->td_proc->p_fd->fd_jdir = rootvnode; 1743 VREF(rootvnode); 1744 } 1745 1746 again: 1747 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, be_lun->dev_path, curthread); 1748 error = vn_open(&nd, &flags, 0, NULL); 1749 if (error) { 1750 /* 1751 * This is the only reasonable guess we can make as far as 1752 * path if the user doesn't give us a fully qualified path. 1753 * If they want to specify a file, they need to specify the 1754 * full path. 1755 */ 1756 if (be_lun->dev_path[0] != '/') { 1757 char *dev_path = "/dev/"; 1758 char *dev_name; 1759 1760 /* Try adding device path at beginning of name */ 1761 dev_name = malloc(strlen(be_lun->dev_path) 1762 + strlen(dev_path) + 1, 1763 M_CTLBLK, M_WAITOK); 1764 if (dev_name) { 1765 sprintf(dev_name, "%s%s", dev_path, 1766 be_lun->dev_path); 1767 free(be_lun->dev_path, M_CTLBLK); 1768 be_lun->dev_path = dev_name; 1769 goto again; 1770 } 1771 } 1772 snprintf(req->error_str, sizeof(req->error_str), 1773 "%s: error opening %s", __func__, be_lun->dev_path); 1774 return (error); 1775 } 1776 1777 NDFREE(&nd, NDF_ONLY_PNBUF); 1778 1779 be_lun->vn = nd.ni_vp; 1780 1781 /* We only support disks and files. */ 1782 if (vn_isdisk(be_lun->vn, &error)) { 1783 error = ctl_be_block_open_dev(be_lun, req); 1784 } else if (be_lun->vn->v_type == VREG) { 1785 error = ctl_be_block_open_file(be_lun, req); 1786 } else { 1787 error = EINVAL; 1788 snprintf(req->error_str, sizeof(req->error_str), 1789 "%s is not a disk or plain file", be_lun->dev_path); 1790 } 1791 VOP_UNLOCK(be_lun->vn, 0); 1792 1793 if (error != 0) { 1794 ctl_be_block_close(be_lun); 1795 return (error); 1796 } 1797 1798 be_lun->blocksize_shift = fls(be_lun->blocksize) - 1; 1799 be_lun->size_blocks = be_lun->size_bytes >> be_lun->blocksize_shift; 1800 1801 return (0); 1802} 1803 1804static int 1805ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 1806{ 1807 struct ctl_be_block_lun *be_lun; 1808 struct ctl_lun_create_params *params; 1809 char num_thread_str[16]; 1810 char tmpstr[32]; 1811 char *value; 1812 int retval, num_threads, unmap; 1813 int tmp_num_threads; 1814 1815 params = &req->reqdata.create; 1816 retval = 0; 1817 1818 num_threads = cbb_num_threads; 1819 1820 be_lun = malloc(sizeof(*be_lun), M_CTLBLK, M_ZERO | M_WAITOK); 1821 1822 be_lun->softc = softc; 1823 STAILQ_INIT(&be_lun->input_queue); 1824 STAILQ_INIT(&be_lun->config_write_queue); 1825 STAILQ_INIT(&be_lun->datamove_queue); 1826 sprintf(be_lun->lunname, "cblk%d", softc->num_luns); 1827 mtx_init(&be_lun->lock, be_lun->lunname, NULL, MTX_DEF); 1828 ctl_init_opts(&be_lun->ctl_be_lun, req); 1829 1830 be_lun->lun_zone = uma_zcreate(be_lun->lunname, CTLBLK_MAX_SEG, 1831 NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0); 1832 1833 if (be_lun->lun_zone == NULL) { 1834 snprintf(req->error_str, sizeof(req->error_str), 1835 "%s: error allocating UMA zone", __func__); 1836 goto bailout_error; 1837 } 1838 1839 if (params->flags & CTL_LUN_FLAG_DEV_TYPE) 1840 be_lun->ctl_be_lun.lun_type = params->device_type; 1841 else 1842 be_lun->ctl_be_lun.lun_type = T_DIRECT; 1843 1844 if (be_lun->ctl_be_lun.lun_type == T_DIRECT) { 1845 value = ctl_get_opt(&be_lun->ctl_be_lun, "file"); 1846 if (value == NULL) { 1847 snprintf(req->error_str, sizeof(req->error_str), 1848 "%s: no file argument specified", __func__); 1849 goto bailout_error; 1850 } 1851 be_lun->dev_path = strdup(value, M_CTLBLK); 1852 1853 retval = ctl_be_block_open(softc, be_lun, req); 1854 if (retval != 0) { 1855 retval = 0; 1856 goto bailout_error; 1857 } 1858 1859 /* 1860 * Tell the user the size of the file/device. 1861 */ 1862 params->lun_size_bytes = be_lun->size_bytes; 1863 1864 /* 1865 * The maximum LBA is the size - 1. 1866 */ 1867 be_lun->ctl_be_lun.maxlba = be_lun->size_blocks - 1; 1868 } else { 1869 /* 1870 * For processor devices, we don't have any size. 1871 */ 1872 be_lun->blocksize = 0; 1873 be_lun->pblockexp = 0; 1874 be_lun->pblockoff = 0; 1875 be_lun->size_blocks = 0; 1876 be_lun->size_bytes = 0; 1877 be_lun->ctl_be_lun.maxlba = 0; 1878 params->lun_size_bytes = 0; 1879 1880 /* 1881 * Default to just 1 thread for processor devices. 1882 */ 1883 num_threads = 1; 1884 } 1885 1886 /* 1887 * XXX This searching loop might be refactored to be combined with 1888 * the loop above, 1889 */ 1890 value = ctl_get_opt(&be_lun->ctl_be_lun, "num_threads"); 1891 if (value != NULL) { 1892 tmp_num_threads = strtol(value, NULL, 0); 1893 1894 /* 1895 * We don't let the user specify less than one 1896 * thread, but hope he's clueful enough not to 1897 * specify 1000 threads. 1898 */ 1899 if (tmp_num_threads < 1) { 1900 snprintf(req->error_str, sizeof(req->error_str), 1901 "%s: invalid number of threads %s", 1902 __func__, num_thread_str); 1903 goto bailout_error; 1904 } 1905 num_threads = tmp_num_threads; 1906 } 1907 unmap = 0; 1908 value = ctl_get_opt(&be_lun->ctl_be_lun, "unmap"); 1909 if (value != NULL && strcmp(value, "on") == 0) 1910 unmap = 1; 1911 1912 be_lun->flags = CTL_BE_BLOCK_LUN_UNCONFIGURED; 1913 be_lun->ctl_be_lun.flags = CTL_LUN_FLAG_PRIMARY; 1914 if (unmap) 1915 be_lun->ctl_be_lun.flags |= CTL_LUN_FLAG_UNMAP; 1916 be_lun->ctl_be_lun.be_lun = be_lun; 1917 be_lun->ctl_be_lun.blocksize = be_lun->blocksize; 1918 be_lun->ctl_be_lun.pblockexp = be_lun->pblockexp; 1919 be_lun->ctl_be_lun.pblockoff = be_lun->pblockoff; 1920 /* Tell the user the blocksize we ended up using */ 1921 params->blocksize_bytes = be_lun->blocksize; 1922 if (params->flags & CTL_LUN_FLAG_ID_REQ) { 1923 be_lun->ctl_be_lun.req_lun_id = params->req_lun_id; 1924 be_lun->ctl_be_lun.flags |= CTL_LUN_FLAG_ID_REQ; 1925 } else 1926 be_lun->ctl_be_lun.req_lun_id = 0; 1927 1928 be_lun->ctl_be_lun.lun_shutdown = ctl_be_block_lun_shutdown; 1929 be_lun->ctl_be_lun.lun_config_status = 1930 ctl_be_block_lun_config_status; 1931 be_lun->ctl_be_lun.be = &ctl_be_block_driver; 1932 1933 if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) { 1934 snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%4d", 1935 softc->num_luns); 1936 strncpy((char *)be_lun->ctl_be_lun.serial_num, tmpstr, 1937 ctl_min(sizeof(be_lun->ctl_be_lun.serial_num), 1938 sizeof(tmpstr))); 1939 1940 /* Tell the user what we used for a serial number */ 1941 strncpy((char *)params->serial_num, tmpstr, 1942 ctl_min(sizeof(params->serial_num), sizeof(tmpstr))); 1943 } else { 1944 strncpy((char *)be_lun->ctl_be_lun.serial_num, 1945 params->serial_num, 1946 ctl_min(sizeof(be_lun->ctl_be_lun.serial_num), 1947 sizeof(params->serial_num))); 1948 } 1949 if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) { 1950 snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%4d", softc->num_luns); 1951 strncpy((char *)be_lun->ctl_be_lun.device_id, tmpstr, 1952 ctl_min(sizeof(be_lun->ctl_be_lun.device_id), 1953 sizeof(tmpstr))); 1954 1955 /* Tell the user what we used for a device ID */ 1956 strncpy((char *)params->device_id, tmpstr, 1957 ctl_min(sizeof(params->device_id), sizeof(tmpstr))); 1958 } else { 1959 strncpy((char *)be_lun->ctl_be_lun.device_id, 1960 params->device_id, 1961 ctl_min(sizeof(be_lun->ctl_be_lun.device_id), 1962 sizeof(params->device_id))); 1963 } 1964 1965 TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_be_block_worker, be_lun); 1966 1967 be_lun->io_taskqueue = taskqueue_create(be_lun->lunname, M_WAITOK, 1968 taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue); 1969 1970 if (be_lun->io_taskqueue == NULL) { 1971 snprintf(req->error_str, sizeof(req->error_str), 1972 "%s: Unable to create taskqueue", __func__); 1973 goto bailout_error; 1974 } 1975 1976 /* 1977 * Note that we start the same number of threads by default for 1978 * both the file case and the block device case. For the file 1979 * case, we need multiple threads to allow concurrency, because the 1980 * vnode interface is designed to be a blocking interface. For the 1981 * block device case, ZFS zvols at least will block the caller's 1982 * context in many instances, and so we need multiple threads to 1983 * overcome that problem. Other block devices don't need as many 1984 * threads, but they shouldn't cause too many problems. 1985 * 1986 * If the user wants to just have a single thread for a block 1987 * device, he can specify that when the LUN is created, or change 1988 * the tunable/sysctl to alter the default number of threads. 1989 */ 1990 retval = taskqueue_start_threads(&be_lun->io_taskqueue, 1991 /*num threads*/num_threads, 1992 /*priority*/PWAIT, 1993 /*thread name*/ 1994 "%s taskq", be_lun->lunname); 1995 1996 if (retval != 0) 1997 goto bailout_error; 1998 1999 be_lun->num_threads = num_threads; 2000 2001 mtx_lock(&softc->lock); 2002 softc->num_luns++; 2003 STAILQ_INSERT_TAIL(&softc->lun_list, be_lun, links); 2004 2005 mtx_unlock(&softc->lock); 2006 2007 retval = ctl_add_lun(&be_lun->ctl_be_lun); 2008 if (retval != 0) { 2009 mtx_lock(&softc->lock); 2010 STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun, 2011 links); 2012 softc->num_luns--; 2013 mtx_unlock(&softc->lock); 2014 snprintf(req->error_str, sizeof(req->error_str), 2015 "%s: ctl_add_lun() returned error %d, see dmesg for " 2016 "details", __func__, retval); 2017 retval = 0; 2018 goto bailout_error; 2019 } 2020 2021 mtx_lock(&softc->lock); 2022 2023 /* 2024 * Tell the config_status routine that we're waiting so it won't 2025 * clean up the LUN in the event of an error. 2026 */ 2027 be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING; 2028 2029 while (be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) { 2030 retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0); 2031 if (retval == EINTR) 2032 break; 2033 } 2034 be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING; 2035 2036 if (be_lun->flags & CTL_BE_BLOCK_LUN_CONFIG_ERR) { 2037 snprintf(req->error_str, sizeof(req->error_str), 2038 "%s: LUN configuration error, see dmesg for details", 2039 __func__); 2040 STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun, 2041 links); 2042 softc->num_luns--; 2043 mtx_unlock(&softc->lock); 2044 goto bailout_error; 2045 } else { 2046 params->req_lun_id = be_lun->ctl_be_lun.lun_id; 2047 } 2048 2049 mtx_unlock(&softc->lock); 2050 2051 be_lun->disk_stats = devstat_new_entry("cbb", params->req_lun_id, 2052 be_lun->blocksize, 2053 DEVSTAT_ALL_SUPPORTED, 2054 be_lun->ctl_be_lun.lun_type 2055 | DEVSTAT_TYPE_IF_OTHER, 2056 DEVSTAT_PRIORITY_OTHER); 2057 2058 2059 req->status = CTL_LUN_OK; 2060 2061 return (retval); 2062 2063bailout_error: 2064 req->status = CTL_LUN_ERROR; 2065 2066 if (be_lun->io_taskqueue != NULL) 2067 taskqueue_free(be_lun->io_taskqueue); 2068 ctl_be_block_close(be_lun); 2069 if (be_lun->dev_path != NULL) 2070 free(be_lun->dev_path, M_CTLBLK); 2071 if (be_lun->lun_zone != NULL) 2072 uma_zdestroy(be_lun->lun_zone); 2073 ctl_free_opts(&be_lun->ctl_be_lun); 2074 mtx_destroy(&be_lun->lock); 2075 free(be_lun, M_CTLBLK); 2076 2077 return (retval); 2078} 2079 2080static int 2081ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 2082{ 2083 struct ctl_lun_rm_params *params; 2084 struct ctl_be_block_lun *be_lun; 2085 int retval; 2086 2087 params = &req->reqdata.rm; 2088 2089 mtx_lock(&softc->lock); 2090 2091 be_lun = NULL; 2092 2093 STAILQ_FOREACH(be_lun, &softc->lun_list, links) { 2094 if (be_lun->ctl_be_lun.lun_id == params->lun_id) 2095 break; 2096 } 2097 mtx_unlock(&softc->lock); 2098 2099 if (be_lun == NULL) { 2100 snprintf(req->error_str, sizeof(req->error_str), 2101 "%s: LUN %u is not managed by the block backend", 2102 __func__, params->lun_id); 2103 goto bailout_error; 2104 } 2105 2106 retval = ctl_disable_lun(&be_lun->ctl_be_lun); 2107 2108 if (retval != 0) { 2109 snprintf(req->error_str, sizeof(req->error_str), 2110 "%s: error %d returned from ctl_disable_lun() for " 2111 "LUN %d", __func__, retval, params->lun_id); 2112 goto bailout_error; 2113 2114 } 2115 2116 retval = ctl_invalidate_lun(&be_lun->ctl_be_lun); 2117 if (retval != 0) { 2118 snprintf(req->error_str, sizeof(req->error_str), 2119 "%s: error %d returned from ctl_invalidate_lun() for " 2120 "LUN %d", __func__, retval, params->lun_id); 2121 goto bailout_error; 2122 } 2123 2124 mtx_lock(&softc->lock); 2125 2126 be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING; 2127 2128 while ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) { 2129 retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0); 2130 if (retval == EINTR) 2131 break; 2132 } 2133 2134 be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING; 2135 2136 if ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) { 2137 snprintf(req->error_str, sizeof(req->error_str), 2138 "%s: interrupted waiting for LUN to be freed", 2139 __func__); 2140 mtx_unlock(&softc->lock); 2141 goto bailout_error; 2142 } 2143 2144 STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun, links); 2145 2146 softc->num_luns--; 2147 mtx_unlock(&softc->lock); 2148 2149 taskqueue_drain(be_lun->io_taskqueue, &be_lun->io_task); 2150 2151 taskqueue_free(be_lun->io_taskqueue); 2152 2153 ctl_be_block_close(be_lun); 2154 2155 if (be_lun->disk_stats != NULL) 2156 devstat_remove_entry(be_lun->disk_stats); 2157 2158 uma_zdestroy(be_lun->lun_zone); 2159 2160 ctl_free_opts(&be_lun->ctl_be_lun); 2161 free(be_lun->dev_path, M_CTLBLK); 2162 2163 free(be_lun, M_CTLBLK); 2164 2165 req->status = CTL_LUN_OK; 2166 2167 return (0); 2168 2169bailout_error: 2170 2171 req->status = CTL_LUN_ERROR; 2172 2173 return (0); 2174} 2175 2176static int 2177ctl_be_block_modify_file(struct ctl_be_block_lun *be_lun, 2178 struct ctl_lun_req *req) 2179{ 2180 struct vattr vattr; 2181 int error; 2182 struct ctl_lun_modify_params *params; 2183 2184 params = &req->reqdata.modify; 2185 2186 if (params->lun_size_bytes != 0) { 2187 be_lun->size_bytes = params->lun_size_bytes; 2188 } else { 2189 error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred); 2190 if (error != 0) { 2191 snprintf(req->error_str, sizeof(req->error_str), 2192 "error calling VOP_GETATTR() for file %s", 2193 be_lun->dev_path); 2194 return (error); 2195 } 2196 2197 be_lun->size_bytes = vattr.va_size; 2198 } 2199 2200 return (0); 2201} 2202 2203static int 2204ctl_be_block_modify_dev(struct ctl_be_block_lun *be_lun, 2205 struct ctl_lun_req *req) 2206{ 2207 struct cdev *dev; 2208 struct cdevsw *devsw; 2209 int error; 2210 struct ctl_lun_modify_params *params; 2211 uint64_t size_bytes; 2212 2213 params = &req->reqdata.modify; 2214 2215 dev = be_lun->vn->v_rdev; 2216 devsw = dev->si_devsw; 2217 if (!devsw->d_ioctl) { 2218 snprintf(req->error_str, sizeof(req->error_str), 2219 "%s: no d_ioctl for device %s!", __func__, 2220 be_lun->dev_path); 2221 return (ENODEV); 2222 } 2223 2224 error = devsw->d_ioctl(dev, DIOCGMEDIASIZE, 2225 (caddr_t)&size_bytes, FREAD, 2226 curthread); 2227 if (error) { 2228 snprintf(req->error_str, sizeof(req->error_str), 2229 "%s: error %d returned for DIOCGMEDIASIZE ioctl " 2230 "on %s!", __func__, error, be_lun->dev_path); 2231 return (error); 2232 } 2233 2234 if (params->lun_size_bytes != 0) { 2235 if (params->lun_size_bytes > size_bytes) { 2236 snprintf(req->error_str, sizeof(req->error_str), 2237 "%s: requested LUN size %ju > backing device " 2238 "size %ju", __func__, 2239 (uintmax_t)params->lun_size_bytes, 2240 (uintmax_t)size_bytes); 2241 return (EINVAL); 2242 } 2243 2244 be_lun->size_bytes = params->lun_size_bytes; 2245 } else { 2246 be_lun->size_bytes = size_bytes; 2247 } 2248 2249 return (0); 2250} 2251 2252static int 2253ctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 2254{ 2255 struct ctl_lun_modify_params *params; 2256 struct ctl_be_block_lun *be_lun; 2257 int error; 2258 2259 params = &req->reqdata.modify; 2260 2261 mtx_lock(&softc->lock); 2262 2263 be_lun = NULL; 2264 2265 STAILQ_FOREACH(be_lun, &softc->lun_list, links) { 2266 if (be_lun->ctl_be_lun.lun_id == params->lun_id) 2267 break; 2268 } 2269 mtx_unlock(&softc->lock); 2270 2271 if (be_lun == NULL) { 2272 snprintf(req->error_str, sizeof(req->error_str), 2273 "%s: LUN %u is not managed by the block backend", 2274 __func__, params->lun_id); 2275 goto bailout_error; 2276 } 2277 2278 if (params->lun_size_bytes != 0) { 2279 if (params->lun_size_bytes < be_lun->blocksize) { 2280 snprintf(req->error_str, sizeof(req->error_str), 2281 "%s: LUN size %ju < blocksize %u", __func__, 2282 params->lun_size_bytes, be_lun->blocksize); 2283 goto bailout_error; 2284 } 2285 } 2286 2287 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 2288 2289 if (be_lun->vn->v_type == VREG) 2290 error = ctl_be_block_modify_file(be_lun, req); 2291 else 2292 error = ctl_be_block_modify_dev(be_lun, req); 2293 2294 VOP_UNLOCK(be_lun->vn, 0); 2295 2296 if (error != 0) 2297 goto bailout_error; 2298 2299 be_lun->size_blocks = be_lun->size_bytes >> be_lun->blocksize_shift; 2300 2301 /* 2302 * The maximum LBA is the size - 1. 2303 * 2304 * XXX: Note that this field is being updated without locking, 2305 * which might cause problems on 32-bit architectures. 2306 */ 2307 be_lun->ctl_be_lun.maxlba = be_lun->size_blocks - 1; 2308 ctl_lun_capacity_changed(&be_lun->ctl_be_lun); 2309 2310 /* Tell the user the exact size we ended up using */ 2311 params->lun_size_bytes = be_lun->size_bytes; 2312 2313 req->status = CTL_LUN_OK; 2314 2315 return (0); 2316 2317bailout_error: 2318 req->status = CTL_LUN_ERROR; 2319 2320 return (0); 2321} 2322 2323static void 2324ctl_be_block_lun_shutdown(void *be_lun) 2325{ 2326 struct ctl_be_block_lun *lun; 2327 struct ctl_be_block_softc *softc; 2328 2329 lun = (struct ctl_be_block_lun *)be_lun; 2330 2331 softc = lun->softc; 2332 2333 mtx_lock(&softc->lock); 2334 lun->flags |= CTL_BE_BLOCK_LUN_UNCONFIGURED; 2335 if (lun->flags & CTL_BE_BLOCK_LUN_WAITING) 2336 wakeup(lun); 2337 mtx_unlock(&softc->lock); 2338 2339} 2340 2341static void 2342ctl_be_block_lun_config_status(void *be_lun, ctl_lun_config_status status) 2343{ 2344 struct ctl_be_block_lun *lun; 2345 struct ctl_be_block_softc *softc; 2346 2347 lun = (struct ctl_be_block_lun *)be_lun; 2348 softc = lun->softc; 2349 2350 if (status == CTL_LUN_CONFIG_OK) { 2351 mtx_lock(&softc->lock); 2352 lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED; 2353 if (lun->flags & CTL_BE_BLOCK_LUN_WAITING) 2354 wakeup(lun); 2355 mtx_unlock(&softc->lock); 2356 2357 /* 2358 * We successfully added the LUN, attempt to enable it. 2359 */ 2360 if (ctl_enable_lun(&lun->ctl_be_lun) != 0) { 2361 printf("%s: ctl_enable_lun() failed!\n", __func__); 2362 if (ctl_invalidate_lun(&lun->ctl_be_lun) != 0) { 2363 printf("%s: ctl_invalidate_lun() failed!\n", 2364 __func__); 2365 } 2366 } 2367 2368 return; 2369 } 2370 2371 2372 mtx_lock(&softc->lock); 2373 lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED; 2374 lun->flags |= CTL_BE_BLOCK_LUN_CONFIG_ERR; 2375 wakeup(lun); 2376 mtx_unlock(&softc->lock); 2377} 2378 2379 2380static int 2381ctl_be_block_config_write(union ctl_io *io) 2382{ 2383 struct ctl_be_block_lun *be_lun; 2384 struct ctl_be_lun *ctl_be_lun; 2385 int retval; 2386 2387 retval = 0; 2388 2389 DPRINTF("entered\n"); 2390 2391 ctl_be_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[ 2392 CTL_PRIV_BACKEND_LUN].ptr; 2393 be_lun = (struct ctl_be_block_lun *)ctl_be_lun->be_lun; 2394 2395 switch (io->scsiio.cdb[0]) { 2396 case SYNCHRONIZE_CACHE: 2397 case SYNCHRONIZE_CACHE_16: 2398 case WRITE_SAME_10: 2399 case WRITE_SAME_16: 2400 case UNMAP: 2401 /* 2402 * The upper level CTL code will filter out any CDBs with 2403 * the immediate bit set and return the proper error. 2404 * 2405 * We don't really need to worry about what LBA range the 2406 * user asked to be synced out. When they issue a sync 2407 * cache command, we'll sync out the whole thing. 2408 */ 2409 mtx_lock(&be_lun->lock); 2410 STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr, 2411 links); 2412 mtx_unlock(&be_lun->lock); 2413 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 2414 break; 2415 case START_STOP_UNIT: { 2416 struct scsi_start_stop_unit *cdb; 2417 2418 cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb; 2419 2420 if (cdb->how & SSS_START) 2421 retval = ctl_start_lun(ctl_be_lun); 2422 else { 2423 retval = ctl_stop_lun(ctl_be_lun); 2424 /* 2425 * XXX KDM Copan-specific offline behavior. 2426 * Figure out a reasonable way to port this? 2427 */ 2428#ifdef NEEDTOPORT 2429 if ((retval == 0) 2430 && (cdb->byte2 & SSS_ONOFFLINE)) 2431 retval = ctl_lun_offline(ctl_be_lun); 2432#endif 2433 } 2434 2435 /* 2436 * In general, the above routines should not fail. They 2437 * just set state for the LUN. So we've got something 2438 * pretty wrong here if we can't start or stop the LUN. 2439 */ 2440 if (retval != 0) { 2441 ctl_set_internal_failure(&io->scsiio, 2442 /*sks_valid*/ 1, 2443 /*retry_count*/ 0xf051); 2444 retval = CTL_RETVAL_COMPLETE; 2445 } else { 2446 ctl_set_success(&io->scsiio); 2447 } 2448 ctl_config_write_done(io); 2449 break; 2450 } 2451 default: 2452 ctl_set_invalid_opcode(&io->scsiio); 2453 ctl_config_write_done(io); 2454 retval = CTL_RETVAL_COMPLETE; 2455 break; 2456 } 2457 2458 return (retval); 2459 2460} 2461 2462static int 2463ctl_be_block_config_read(union ctl_io *io) 2464{ 2465 return (0); 2466} 2467 2468static int 2469ctl_be_block_lun_info(void *be_lun, struct sbuf *sb) 2470{ 2471 struct ctl_be_block_lun *lun; 2472 int retval; 2473 2474 lun = (struct ctl_be_block_lun *)be_lun; 2475 retval = 0; 2476 2477 retval = sbuf_printf(sb, "<num_threads>"); 2478 2479 if (retval != 0) 2480 goto bailout; 2481 2482 retval = sbuf_printf(sb, "%d", lun->num_threads); 2483 2484 if (retval != 0) 2485 goto bailout; 2486 2487 retval = sbuf_printf(sb, "</num_threads>"); 2488 2489bailout: 2490 2491 return (retval); 2492} 2493 2494int 2495ctl_be_block_init(void) 2496{ 2497 struct ctl_be_block_softc *softc; 2498 int retval; 2499 2500 softc = &backend_block_softc; 2501 retval = 0; 2502 2503 mtx_init(&softc->lock, "ctlblk", NULL, MTX_DEF); 2504 beio_zone = uma_zcreate("beio", sizeof(struct ctl_be_block_io), 2505 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 2506 STAILQ_INIT(&softc->disk_list); 2507 STAILQ_INIT(&softc->lun_list); 2508 2509 return (retval); 2510} 2511