ctl_backend_block.c revision 288774
1/*-
2 * Copyright (c) 2003 Silicon Graphics International Corp.
3 * Copyright (c) 2009-2011 Spectra Logic Corporation
4 * Copyright (c) 2012 The FreeBSD Foundation
5 * All rights reserved.
6 *
7 * Portions of this software were developed by Edward Tomasz Napierala
8 * under sponsorship from the FreeBSD Foundation.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions, and the following disclaimer,
15 *    without modification.
16 * 2. Redistributions in binary form must reproduce at minimum a disclaimer
17 *    substantially similar to the "NO WARRANTY" disclaimer below
18 *    ("Disclaimer") and any redistribution must be conditioned upon
19 *    including a substantially similar Disclaimer requirement for further
20 *    binary redistribution.
21 *
22 * NO WARRANTY
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
31 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
32 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGES.
34 *
35 * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_block.c#5 $
36 */
37/*
38 * CAM Target Layer driver backend for block devices.
39 *
40 * Author: Ken Merry <ken@FreeBSD.org>
41 */
42#include <sys/cdefs.h>
43__FBSDID("$FreeBSD: stable/10/sys/cam/ctl/ctl_backend_block.c 288774 2015-10-05 10:41:08Z mav $");
44
45#include <opt_kdtrace.h>
46
47#include <sys/param.h>
48#include <sys/systm.h>
49#include <sys/kernel.h>
50#include <sys/types.h>
51#include <sys/kthread.h>
52#include <sys/bio.h>
53#include <sys/fcntl.h>
54#include <sys/limits.h>
55#include <sys/lock.h>
56#include <sys/mutex.h>
57#include <sys/condvar.h>
58#include <sys/malloc.h>
59#include <sys/conf.h>
60#include <sys/ioccom.h>
61#include <sys/queue.h>
62#include <sys/sbuf.h>
63#include <sys/endian.h>
64#include <sys/uio.h>
65#include <sys/buf.h>
66#include <sys/taskqueue.h>
67#include <sys/vnode.h>
68#include <sys/namei.h>
69#include <sys/mount.h>
70#include <sys/disk.h>
71#include <sys/fcntl.h>
72#include <sys/filedesc.h>
73#include <sys/filio.h>
74#include <sys/proc.h>
75#include <sys/pcpu.h>
76#include <sys/module.h>
77#include <sys/sdt.h>
78#include <sys/devicestat.h>
79#include <sys/sysctl.h>
80
81#include <geom/geom.h>
82
83#include <cam/cam.h>
84#include <cam/scsi/scsi_all.h>
85#include <cam/scsi/scsi_da.h>
86#include <cam/ctl/ctl_io.h>
87#include <cam/ctl/ctl.h>
88#include <cam/ctl/ctl_backend.h>
89#include <cam/ctl/ctl_ioctl.h>
90#include <cam/ctl/ctl_ha.h>
91#include <cam/ctl/ctl_scsi_all.h>
92#include <cam/ctl/ctl_private.h>
93#include <cam/ctl/ctl_error.h>
94
95/*
96 * The idea here is that we'll allocate enough S/G space to hold a 1MB
97 * I/O.  If we get an I/O larger than that, we'll split it.
98 */
99#define	CTLBLK_HALF_IO_SIZE	(512 * 1024)
100#define	CTLBLK_MAX_IO_SIZE	(CTLBLK_HALF_IO_SIZE * 2)
101#define	CTLBLK_MAX_SEG		MAXPHYS
102#define	CTLBLK_HALF_SEGS	MAX(CTLBLK_HALF_IO_SIZE / CTLBLK_MAX_SEG, 1)
103#define	CTLBLK_MAX_SEGS		(CTLBLK_HALF_SEGS * 2)
104
105#ifdef CTLBLK_DEBUG
106#define DPRINTF(fmt, args...) \
107    printf("cbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
108#else
109#define DPRINTF(fmt, args...) do {} while(0)
110#endif
111
112#define PRIV(io)	\
113    ((struct ctl_ptr_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_BACKEND])
114#define ARGS(io)	\
115    ((struct ctl_lba_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN])
116
117SDT_PROVIDER_DEFINE(cbb);
118
119typedef enum {
120	CTL_BE_BLOCK_LUN_UNCONFIGURED	= 0x01,
121	CTL_BE_BLOCK_LUN_CONFIG_ERR	= 0x02,
122	CTL_BE_BLOCK_LUN_WAITING	= 0x04,
123} ctl_be_block_lun_flags;
124
125typedef enum {
126	CTL_BE_BLOCK_NONE,
127	CTL_BE_BLOCK_DEV,
128	CTL_BE_BLOCK_FILE
129} ctl_be_block_type;
130
131struct ctl_be_block_filedata {
132	struct ucred *cred;
133};
134
135union ctl_be_block_bedata {
136	struct ctl_be_block_filedata file;
137};
138
139struct ctl_be_block_io;
140struct ctl_be_block_lun;
141
142typedef void (*cbb_dispatch_t)(struct ctl_be_block_lun *be_lun,
143			       struct ctl_be_block_io *beio);
144typedef uint64_t (*cbb_getattr_t)(struct ctl_be_block_lun *be_lun,
145				  const char *attrname);
146
147/*
148 * Backend LUN structure.  There is a 1:1 mapping between a block device
149 * and a backend block LUN, and between a backend block LUN and a CTL LUN.
150 */
151struct ctl_be_block_lun {
152	struct ctl_lun_create_params params;
153	char lunname[32];
154	char *dev_path;
155	ctl_be_block_type dev_type;
156	struct vnode *vn;
157	union ctl_be_block_bedata backend;
158	cbb_dispatch_t dispatch;
159	cbb_dispatch_t lun_flush;
160	cbb_dispatch_t unmap;
161	cbb_dispatch_t get_lba_status;
162	cbb_getattr_t getattr;
163	uma_zone_t lun_zone;
164	uint64_t size_blocks;
165	uint64_t size_bytes;
166	struct ctl_be_block_softc *softc;
167	struct devstat *disk_stats;
168	ctl_be_block_lun_flags flags;
169	STAILQ_ENTRY(ctl_be_block_lun) links;
170	struct ctl_be_lun cbe_lun;
171	struct taskqueue *io_taskqueue;
172	struct task io_task;
173	int num_threads;
174	STAILQ_HEAD(, ctl_io_hdr) input_queue;
175	STAILQ_HEAD(, ctl_io_hdr) config_read_queue;
176	STAILQ_HEAD(, ctl_io_hdr) config_write_queue;
177	STAILQ_HEAD(, ctl_io_hdr) datamove_queue;
178	struct mtx_padalign io_lock;
179	struct mtx_padalign queue_lock;
180};
181
182/*
183 * Overall softc structure for the block backend module.
184 */
185struct ctl_be_block_softc {
186	struct mtx			 lock;
187	int				 num_luns;
188	STAILQ_HEAD(, ctl_be_block_lun)	 lun_list;
189};
190
191static struct ctl_be_block_softc backend_block_softc;
192
193/*
194 * Per-I/O information.
195 */
196struct ctl_be_block_io {
197	union ctl_io			*io;
198	struct ctl_sg_entry		sg_segs[CTLBLK_MAX_SEGS];
199	struct iovec			xiovecs[CTLBLK_MAX_SEGS];
200	int				bio_cmd;
201	int				num_segs;
202	int				num_bios_sent;
203	int				num_bios_done;
204	int				send_complete;
205	int				num_errors;
206	struct bintime			ds_t0;
207	devstat_tag_type		ds_tag_type;
208	devstat_trans_flags		ds_trans_type;
209	uint64_t			io_len;
210	uint64_t			io_offset;
211	int				io_arg;
212	struct ctl_be_block_softc	*softc;
213	struct ctl_be_block_lun		*lun;
214	void (*beio_cont)(struct ctl_be_block_io *beio); /* to continue processing */
215};
216
217extern struct ctl_softc *control_softc;
218
219static int cbb_num_threads = 14;
220TUNABLE_INT("kern.cam.ctl.block.num_threads", &cbb_num_threads);
221SYSCTL_NODE(_kern_cam_ctl, OID_AUTO, block, CTLFLAG_RD, 0,
222	    "CAM Target Layer Block Backend");
223SYSCTL_INT(_kern_cam_ctl_block, OID_AUTO, num_threads, CTLFLAG_RW,
224           &cbb_num_threads, 0, "Number of threads per backing file");
225
226static struct ctl_be_block_io *ctl_alloc_beio(struct ctl_be_block_softc *softc);
227static void ctl_free_beio(struct ctl_be_block_io *beio);
228static void ctl_complete_beio(struct ctl_be_block_io *beio);
229static int ctl_be_block_move_done(union ctl_io *io);
230static void ctl_be_block_biodone(struct bio *bio);
231static void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
232				    struct ctl_be_block_io *beio);
233static void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
234				       struct ctl_be_block_io *beio);
235static void ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun,
236				  struct ctl_be_block_io *beio);
237static uint64_t ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun,
238					 const char *attrname);
239static void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
240				   struct ctl_be_block_io *beio);
241static void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
242				   struct ctl_be_block_io *beio);
243static void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
244				      struct ctl_be_block_io *beio);
245static uint64_t ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun,
246					 const char *attrname);
247static void ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun,
248				    union ctl_io *io);
249static void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
250				    union ctl_io *io);
251static void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
252				  union ctl_io *io);
253static void ctl_be_block_worker(void *context, int pending);
254static int ctl_be_block_submit(union ctl_io *io);
255static int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
256				   int flag, struct thread *td);
257static int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun,
258				  struct ctl_lun_req *req);
259static int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun,
260				 struct ctl_lun_req *req);
261static int ctl_be_block_close(struct ctl_be_block_lun *be_lun);
262static int ctl_be_block_open(struct ctl_be_block_softc *softc,
263			     struct ctl_be_block_lun *be_lun,
264			     struct ctl_lun_req *req);
265static int ctl_be_block_create(struct ctl_be_block_softc *softc,
266			       struct ctl_lun_req *req);
267static int ctl_be_block_rm(struct ctl_be_block_softc *softc,
268			   struct ctl_lun_req *req);
269static int ctl_be_block_modify_file(struct ctl_be_block_lun *be_lun,
270				  struct ctl_lun_req *req);
271static int ctl_be_block_modify_dev(struct ctl_be_block_lun *be_lun,
272				 struct ctl_lun_req *req);
273static int ctl_be_block_modify(struct ctl_be_block_softc *softc,
274			   struct ctl_lun_req *req);
275static void ctl_be_block_lun_shutdown(void *be_lun);
276static void ctl_be_block_lun_config_status(void *be_lun,
277					   ctl_lun_config_status status);
278static int ctl_be_block_config_write(union ctl_io *io);
279static int ctl_be_block_config_read(union ctl_io *io);
280static int ctl_be_block_lun_info(void *be_lun, struct sbuf *sb);
281static uint64_t ctl_be_block_lun_attr(void *be_lun, const char *attrname);
282int ctl_be_block_init(void);
283
284static struct ctl_backend_driver ctl_be_block_driver =
285{
286	.name = "block",
287	.flags = CTL_BE_FLAG_HAS_CONFIG,
288	.init = ctl_be_block_init,
289	.data_submit = ctl_be_block_submit,
290	.data_move_done = ctl_be_block_move_done,
291	.config_read = ctl_be_block_config_read,
292	.config_write = ctl_be_block_config_write,
293	.ioctl = ctl_be_block_ioctl,
294	.lun_info = ctl_be_block_lun_info,
295	.lun_attr = ctl_be_block_lun_attr
296};
297
298MALLOC_DEFINE(M_CTLBLK, "ctlblk", "Memory used for CTL block backend");
299CTL_BACKEND_DECLARE(cbb, ctl_be_block_driver);
300
301static uma_zone_t beio_zone;
302
303static struct ctl_be_block_io *
304ctl_alloc_beio(struct ctl_be_block_softc *softc)
305{
306	struct ctl_be_block_io *beio;
307
308	beio = uma_zalloc(beio_zone, M_WAITOK | M_ZERO);
309	beio->softc = softc;
310	return (beio);
311}
312
313static void
314ctl_free_beio(struct ctl_be_block_io *beio)
315{
316	int duplicate_free;
317	int i;
318
319	duplicate_free = 0;
320
321	for (i = 0; i < beio->num_segs; i++) {
322		if (beio->sg_segs[i].addr == NULL)
323			duplicate_free++;
324
325		uma_zfree(beio->lun->lun_zone, beio->sg_segs[i].addr);
326		beio->sg_segs[i].addr = NULL;
327
328		/* For compare we had two equal S/G lists. */
329		if (ARGS(beio->io)->flags & CTL_LLF_COMPARE) {
330			uma_zfree(beio->lun->lun_zone,
331			    beio->sg_segs[i + CTLBLK_HALF_SEGS].addr);
332			beio->sg_segs[i + CTLBLK_HALF_SEGS].addr = NULL;
333		}
334	}
335
336	if (duplicate_free > 0) {
337		printf("%s: %d duplicate frees out of %d segments\n", __func__,
338		       duplicate_free, beio->num_segs);
339	}
340
341	uma_zfree(beio_zone, beio);
342}
343
344static void
345ctl_complete_beio(struct ctl_be_block_io *beio)
346{
347	union ctl_io *io = beio->io;
348
349	if (beio->beio_cont != NULL) {
350		beio->beio_cont(beio);
351	} else {
352		ctl_free_beio(beio);
353		ctl_data_submit_done(io);
354	}
355}
356
357static size_t
358cmp(uint8_t *a, uint8_t *b, size_t size)
359{
360	size_t i;
361
362	for (i = 0; i < size; i++) {
363		if (a[i] != b[i])
364			break;
365	}
366	return (i);
367}
368
369static void
370ctl_be_block_compare(union ctl_io *io)
371{
372	struct ctl_be_block_io *beio;
373	uint64_t off, res;
374	int i;
375	uint8_t info[8];
376
377	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
378	off = 0;
379	for (i = 0; i < beio->num_segs; i++) {
380		res = cmp(beio->sg_segs[i].addr,
381		    beio->sg_segs[i + CTLBLK_HALF_SEGS].addr,
382		    beio->sg_segs[i].len);
383		off += res;
384		if (res < beio->sg_segs[i].len)
385			break;
386	}
387	if (i < beio->num_segs) {
388		scsi_u64to8b(off, info);
389		ctl_set_sense(&io->scsiio, /*current_error*/ 1,
390		    /*sense_key*/ SSD_KEY_MISCOMPARE,
391		    /*asc*/ 0x1D, /*ascq*/ 0x00,
392		    /*type*/ SSD_ELEM_INFO,
393		    /*size*/ sizeof(info), /*data*/ &info,
394		    /*type*/ SSD_ELEM_NONE);
395	} else
396		ctl_set_success(&io->scsiio);
397}
398
399static int
400ctl_be_block_move_done(union ctl_io *io)
401{
402	struct ctl_be_block_io *beio;
403	struct ctl_be_block_lun *be_lun;
404	struct ctl_lba_len_flags *lbalen;
405#ifdef CTL_TIME_IO
406	struct bintime cur_bt;
407#endif
408
409	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
410	be_lun = beio->lun;
411
412	DPRINTF("entered\n");
413
414#ifdef CTL_TIME_IO
415	getbintime(&cur_bt);
416	bintime_sub(&cur_bt, &io->io_hdr.dma_start_bt);
417	bintime_add(&io->io_hdr.dma_bt, &cur_bt);
418	io->io_hdr.num_dmas++;
419#endif
420	io->scsiio.kern_rel_offset += io->scsiio.kern_data_len;
421
422	/*
423	 * We set status at this point for read commands, and write
424	 * commands with errors.
425	 */
426	if (io->io_hdr.flags & CTL_FLAG_ABORT) {
427		;
428	} else if ((io->io_hdr.port_status == 0) &&
429	    ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE)) {
430		lbalen = ARGS(beio->io);
431		if (lbalen->flags & CTL_LLF_READ) {
432			ctl_set_success(&io->scsiio);
433		} else if (lbalen->flags & CTL_LLF_COMPARE) {
434			/* We have two data blocks ready for comparison. */
435			ctl_be_block_compare(io);
436		}
437	} else if ((io->io_hdr.port_status != 0) &&
438	    ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE ||
439	     (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) {
440		/*
441		 * For hardware error sense keys, the sense key
442		 * specific value is defined to be a retry count,
443		 * but we use it to pass back an internal FETD
444		 * error code.  XXX KDM  Hopefully the FETD is only
445		 * using 16 bits for an error code, since that's
446		 * all the space we have in the sks field.
447		 */
448		ctl_set_internal_failure(&io->scsiio,
449					 /*sks_valid*/ 1,
450					 /*retry_count*/
451					 io->io_hdr.port_status);
452	}
453
454	/*
455	 * If this is a read, or a write with errors, it is done.
456	 */
457	if ((beio->bio_cmd == BIO_READ)
458	 || ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0)
459	 || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)) {
460		ctl_complete_beio(beio);
461		return (0);
462	}
463
464	/*
465	 * At this point, we have a write and the DMA completed
466	 * successfully.  We now have to queue it to the task queue to
467	 * execute the backend I/O.  That is because we do blocking
468	 * memory allocations, and in the file backing case, blocking I/O.
469	 * This move done routine is generally called in the SIM's
470	 * interrupt context, and therefore we cannot block.
471	 */
472	mtx_lock(&be_lun->queue_lock);
473	/*
474	 * XXX KDM make sure that links is okay to use at this point.
475	 * Otherwise, we either need to add another field to ctl_io_hdr,
476	 * or deal with resource allocation here.
477	 */
478	STAILQ_INSERT_TAIL(&be_lun->datamove_queue, &io->io_hdr, links);
479	mtx_unlock(&be_lun->queue_lock);
480
481	taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
482
483	return (0);
484}
485
486static void
487ctl_be_block_biodone(struct bio *bio)
488{
489	struct ctl_be_block_io *beio;
490	struct ctl_be_block_lun *be_lun;
491	union ctl_io *io;
492	int error;
493
494	beio = bio->bio_caller1;
495	be_lun = beio->lun;
496	io = beio->io;
497
498	DPRINTF("entered\n");
499
500	error = bio->bio_error;
501	mtx_lock(&be_lun->io_lock);
502	if (error != 0)
503		beio->num_errors++;
504
505	beio->num_bios_done++;
506
507	/*
508	 * XXX KDM will this cause WITNESS to complain?  Holding a lock
509	 * during the free might cause it to complain.
510	 */
511	g_destroy_bio(bio);
512
513	/*
514	 * If the send complete bit isn't set, or we aren't the last I/O to
515	 * complete, then we're done.
516	 */
517	if ((beio->send_complete == 0)
518	 || (beio->num_bios_done < beio->num_bios_sent)) {
519		mtx_unlock(&be_lun->io_lock);
520		return;
521	}
522
523	/*
524	 * At this point, we've verified that we are the last I/O to
525	 * complete, so it's safe to drop the lock.
526	 */
527	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
528	    beio->ds_tag_type, beio->ds_trans_type,
529	    /*now*/ NULL, /*then*/&beio->ds_t0);
530	mtx_unlock(&be_lun->io_lock);
531
532	/*
533	 * If there are any errors from the backing device, we fail the
534	 * entire I/O with a medium error.
535	 */
536	if (beio->num_errors > 0) {
537		if (error == EOPNOTSUPP) {
538			ctl_set_invalid_opcode(&io->scsiio);
539		} else if (error == ENOSPC || error == EDQUOT) {
540			ctl_set_space_alloc_fail(&io->scsiio);
541		} else if (error == EROFS || error == EACCES) {
542			ctl_set_hw_write_protected(&io->scsiio);
543		} else if (beio->bio_cmd == BIO_FLUSH) {
544			/* XXX KDM is there is a better error here? */
545			ctl_set_internal_failure(&io->scsiio,
546						 /*sks_valid*/ 1,
547						 /*retry_count*/ 0xbad2);
548		} else {
549			ctl_set_medium_error(&io->scsiio,
550			    beio->bio_cmd == BIO_READ);
551		}
552		ctl_complete_beio(beio);
553		return;
554	}
555
556	/*
557	 * If this is a write, a flush, a delete or verify, we're all done.
558	 * If this is a read, we can now send the data to the user.
559	 */
560	if ((beio->bio_cmd == BIO_WRITE)
561	 || (beio->bio_cmd == BIO_FLUSH)
562	 || (beio->bio_cmd == BIO_DELETE)
563	 || (ARGS(io)->flags & CTL_LLF_VERIFY)) {
564		ctl_set_success(&io->scsiio);
565		ctl_complete_beio(beio);
566	} else {
567		if ((ARGS(io)->flags & CTL_LLF_READ) &&
568		    beio->beio_cont == NULL) {
569			ctl_set_success(&io->scsiio);
570			ctl_serseq_done(io);
571		}
572#ifdef CTL_TIME_IO
573        	getbintime(&io->io_hdr.dma_start_bt);
574#endif
575		ctl_datamove(io);
576	}
577}
578
579static void
580ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
581			struct ctl_be_block_io *beio)
582{
583	union ctl_io *io = beio->io;
584	struct mount *mountpoint;
585	int error, lock_flags;
586
587	DPRINTF("entered\n");
588
589	binuptime(&beio->ds_t0);
590	mtx_lock(&be_lun->io_lock);
591	devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
592	mtx_unlock(&be_lun->io_lock);
593
594	(void) vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
595
596	if (MNT_SHARED_WRITES(mountpoint)
597	 || ((mountpoint == NULL)
598	  && MNT_SHARED_WRITES(be_lun->vn->v_mount)))
599		lock_flags = LK_SHARED;
600	else
601		lock_flags = LK_EXCLUSIVE;
602
603	vn_lock(be_lun->vn, lock_flags | LK_RETRY);
604
605	error = VOP_FSYNC(be_lun->vn, beio->io_arg ? MNT_NOWAIT : MNT_WAIT,
606	    curthread);
607	VOP_UNLOCK(be_lun->vn, 0);
608
609	vn_finished_write(mountpoint);
610
611	mtx_lock(&be_lun->io_lock);
612	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
613	    beio->ds_tag_type, beio->ds_trans_type,
614	    /*now*/ NULL, /*then*/&beio->ds_t0);
615	mtx_unlock(&be_lun->io_lock);
616
617	if (error == 0)
618		ctl_set_success(&io->scsiio);
619	else {
620		/* XXX KDM is there is a better error here? */
621		ctl_set_internal_failure(&io->scsiio,
622					 /*sks_valid*/ 1,
623					 /*retry_count*/ 0xbad1);
624	}
625
626	ctl_complete_beio(beio);
627}
628
629SDT_PROBE_DEFINE1(cbb, kernel, read, file_start, "uint64_t");
630SDT_PROBE_DEFINE1(cbb, kernel, write, file_start, "uint64_t");
631SDT_PROBE_DEFINE1(cbb, kernel, read, file_done,"uint64_t");
632SDT_PROBE_DEFINE1(cbb, kernel, write, file_done, "uint64_t");
633
634static void
635ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
636			   struct ctl_be_block_io *beio)
637{
638	struct ctl_be_block_filedata *file_data;
639	union ctl_io *io;
640	struct uio xuio;
641	struct iovec *xiovec;
642	size_t s;
643	int error, flags, i;
644
645	DPRINTF("entered\n");
646
647	file_data = &be_lun->backend.file;
648	io = beio->io;
649	flags = 0;
650	if (ARGS(io)->flags & CTL_LLF_DPO)
651		flags |= IO_DIRECT;
652	if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA)
653		flags |= IO_SYNC;
654
655	bzero(&xuio, sizeof(xuio));
656	if (beio->bio_cmd == BIO_READ) {
657		SDT_PROBE(cbb, kernel, read, file_start, 0, 0, 0, 0, 0);
658		xuio.uio_rw = UIO_READ;
659	} else {
660		SDT_PROBE(cbb, kernel, write, file_start, 0, 0, 0, 0, 0);
661		xuio.uio_rw = UIO_WRITE;
662	}
663	xuio.uio_offset = beio->io_offset;
664	xuio.uio_resid = beio->io_len;
665	xuio.uio_segflg = UIO_SYSSPACE;
666	xuio.uio_iov = beio->xiovecs;
667	xuio.uio_iovcnt = beio->num_segs;
668	xuio.uio_td = curthread;
669
670	for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) {
671		xiovec->iov_base = beio->sg_segs[i].addr;
672		xiovec->iov_len = beio->sg_segs[i].len;
673	}
674
675	binuptime(&beio->ds_t0);
676	mtx_lock(&be_lun->io_lock);
677	devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
678	mtx_unlock(&be_lun->io_lock);
679
680	if (beio->bio_cmd == BIO_READ) {
681		vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
682
683		/*
684		 * UFS pays attention to IO_DIRECT for reads.  If the
685		 * DIRECTIO option is configured into the kernel, it calls
686		 * ffs_rawread().  But that only works for single-segment
687		 * uios with user space addresses.  In our case, with a
688		 * kernel uio, it still reads into the buffer cache, but it
689		 * will just try to release the buffer from the cache later
690		 * on in ffs_read().
691		 *
692		 * ZFS does not pay attention to IO_DIRECT for reads.
693		 *
694		 * UFS does not pay attention to IO_SYNC for reads.
695		 *
696		 * ZFS pays attention to IO_SYNC (which translates into the
697		 * Solaris define FRSYNC for zfs_read()) for reads.  It
698		 * attempts to sync the file before reading.
699		 */
700		error = VOP_READ(be_lun->vn, &xuio, flags, file_data->cred);
701
702		VOP_UNLOCK(be_lun->vn, 0);
703		SDT_PROBE(cbb, kernel, read, file_done, 0, 0, 0, 0, 0);
704		if (error == 0 && xuio.uio_resid > 0) {
705			/*
706			 * If we red less then requested (EOF), then
707			 * we should clean the rest of the buffer.
708			 */
709			s = beio->io_len - xuio.uio_resid;
710			for (i = 0; i < beio->num_segs; i++) {
711				if (s >= beio->sg_segs[i].len) {
712					s -= beio->sg_segs[i].len;
713					continue;
714				}
715				bzero((uint8_t *)beio->sg_segs[i].addr + s,
716				    beio->sg_segs[i].len - s);
717				s = 0;
718			}
719		}
720	} else {
721		struct mount *mountpoint;
722		int lock_flags;
723
724		(void)vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
725
726		if (MNT_SHARED_WRITES(mountpoint)
727		 || ((mountpoint == NULL)
728		  && MNT_SHARED_WRITES(be_lun->vn->v_mount)))
729			lock_flags = LK_SHARED;
730		else
731			lock_flags = LK_EXCLUSIVE;
732
733		vn_lock(be_lun->vn, lock_flags | LK_RETRY);
734
735		/*
736		 * UFS pays attention to IO_DIRECT for writes.  The write
737		 * is done asynchronously.  (Normally the write would just
738		 * get put into cache.
739		 *
740		 * UFS pays attention to IO_SYNC for writes.  It will
741		 * attempt to write the buffer out synchronously if that
742		 * flag is set.
743		 *
744		 * ZFS does not pay attention to IO_DIRECT for writes.
745		 *
746		 * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC)
747		 * for writes.  It will flush the transaction from the
748		 * cache before returning.
749		 */
750		error = VOP_WRITE(be_lun->vn, &xuio, flags, file_data->cred);
751		VOP_UNLOCK(be_lun->vn, 0);
752
753		vn_finished_write(mountpoint);
754		SDT_PROBE(cbb, kernel, write, file_done, 0, 0, 0, 0, 0);
755        }
756
757	mtx_lock(&be_lun->io_lock);
758	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
759	    beio->ds_tag_type, beio->ds_trans_type,
760	    /*now*/ NULL, /*then*/&beio->ds_t0);
761	mtx_unlock(&be_lun->io_lock);
762
763	/*
764	 * If we got an error, set the sense data to "MEDIUM ERROR" and
765	 * return the I/O to the user.
766	 */
767	if (error != 0) {
768		if (error == ENOSPC || error == EDQUOT) {
769			ctl_set_space_alloc_fail(&io->scsiio);
770		} else if (error == EROFS || error == EACCES) {
771			ctl_set_hw_write_protected(&io->scsiio);
772		} else {
773			ctl_set_medium_error(&io->scsiio,
774			    beio->bio_cmd == BIO_READ);
775		}
776		ctl_complete_beio(beio);
777		return;
778	}
779
780	/*
781	 * If this is a write or a verify, we're all done.
782	 * If this is a read, we can now send the data to the user.
783	 */
784	if ((beio->bio_cmd == BIO_WRITE) ||
785	    (ARGS(io)->flags & CTL_LLF_VERIFY)) {
786		ctl_set_success(&io->scsiio);
787		ctl_complete_beio(beio);
788	} else {
789		if ((ARGS(io)->flags & CTL_LLF_READ) &&
790		    beio->beio_cont == NULL) {
791			ctl_set_success(&io->scsiio);
792			ctl_serseq_done(io);
793		}
794#ifdef CTL_TIME_IO
795        	getbintime(&io->io_hdr.dma_start_bt);
796#endif
797		ctl_datamove(io);
798	}
799}
800
801static void
802ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun,
803			struct ctl_be_block_io *beio)
804{
805	union ctl_io *io = beio->io;
806	struct ctl_lba_len_flags *lbalen = ARGS(io);
807	struct scsi_get_lba_status_data *data;
808	off_t roff, off;
809	int error, status;
810
811	DPRINTF("entered\n");
812
813	off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize;
814	vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
815	error = VOP_IOCTL(be_lun->vn, FIOSEEKHOLE, &off,
816	    0, curthread->td_ucred, curthread);
817	if (error == 0 && off > roff)
818		status = 0;	/* mapped up to off */
819	else {
820		error = VOP_IOCTL(be_lun->vn, FIOSEEKDATA, &off,
821		    0, curthread->td_ucred, curthread);
822		if (error == 0 && off > roff)
823			status = 1;	/* deallocated up to off */
824		else {
825			status = 0;	/* unknown up to the end */
826			off = be_lun->size_bytes;
827		}
828	}
829	VOP_UNLOCK(be_lun->vn, 0);
830
831	data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr;
832	scsi_u64to8b(lbalen->lba, data->descr[0].addr);
833	scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize -
834	    lbalen->lba), data->descr[0].length);
835	data->descr[0].status = status;
836
837	ctl_complete_beio(beio);
838}
839
840static uint64_t
841ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, const char *attrname)
842{
843	struct vattr		vattr;
844	struct statfs		statfs;
845	uint64_t		val;
846	int			error;
847
848	val = UINT64_MAX;
849	if (be_lun->vn == NULL)
850		return (val);
851	vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
852	if (strcmp(attrname, "blocksused") == 0) {
853		error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
854		if (error == 0)
855			val = vattr.va_bytes / be_lun->cbe_lun.blocksize;
856	}
857	if (strcmp(attrname, "blocksavail") == 0 &&
858	    (be_lun->vn->v_iflag & VI_DOOMED) == 0) {
859		error = VFS_STATFS(be_lun->vn->v_mount, &statfs);
860		if (error == 0)
861			val = statfs.f_bavail * statfs.f_bsize /
862			    be_lun->cbe_lun.blocksize;
863	}
864	VOP_UNLOCK(be_lun->vn, 0);
865	return (val);
866}
867
868static void
869ctl_be_block_dispatch_zvol(struct ctl_be_block_lun *be_lun,
870			   struct ctl_be_block_io *beio)
871{
872	union ctl_io *io;
873	struct cdevsw *csw;
874	struct cdev *dev;
875	struct uio xuio;
876	struct iovec *xiovec;
877	int error, flags, i, ref;
878
879	DPRINTF("entered\n");
880
881	io = beio->io;
882	flags = 0;
883	if (ARGS(io)->flags & CTL_LLF_DPO)
884		flags |= IO_DIRECT;
885	if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA)
886		flags |= IO_SYNC;
887
888	bzero(&xuio, sizeof(xuio));
889	if (beio->bio_cmd == BIO_READ) {
890		SDT_PROBE(cbb, kernel, read, file_start, 0, 0, 0, 0, 0);
891		xuio.uio_rw = UIO_READ;
892	} else {
893		SDT_PROBE(cbb, kernel, write, file_start, 0, 0, 0, 0, 0);
894		xuio.uio_rw = UIO_WRITE;
895	}
896	xuio.uio_offset = beio->io_offset;
897	xuio.uio_resid = beio->io_len;
898	xuio.uio_segflg = UIO_SYSSPACE;
899	xuio.uio_iov = beio->xiovecs;
900	xuio.uio_iovcnt = beio->num_segs;
901	xuio.uio_td = curthread;
902
903	for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) {
904		xiovec->iov_base = beio->sg_segs[i].addr;
905		xiovec->iov_len = beio->sg_segs[i].len;
906	}
907
908	binuptime(&beio->ds_t0);
909	mtx_lock(&be_lun->io_lock);
910	devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
911	mtx_unlock(&be_lun->io_lock);
912
913	csw = devvn_refthread(be_lun->vn, &dev, &ref);
914	if (csw) {
915		if (beio->bio_cmd == BIO_READ)
916			error = csw->d_read(dev, &xuio, flags);
917		else
918			error = csw->d_write(dev, &xuio, flags);
919		dev_relthread(dev, ref);
920	} else
921		error = ENXIO;
922
923	if (beio->bio_cmd == BIO_READ)
924		SDT_PROBE(cbb, kernel, read, file_done, 0, 0, 0, 0, 0);
925	else
926		SDT_PROBE(cbb, kernel, write, file_done, 0, 0, 0, 0, 0);
927
928	mtx_lock(&be_lun->io_lock);
929	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
930	    beio->ds_tag_type, beio->ds_trans_type,
931	    /*now*/ NULL, /*then*/&beio->ds_t0);
932	mtx_unlock(&be_lun->io_lock);
933
934	/*
935	 * If we got an error, set the sense data to "MEDIUM ERROR" and
936	 * return the I/O to the user.
937	 */
938	if (error != 0) {
939		if (error == ENOSPC || error == EDQUOT) {
940			ctl_set_space_alloc_fail(&io->scsiio);
941		} else if (error == EROFS || error == EACCES) {
942			ctl_set_hw_write_protected(&io->scsiio);
943		} else {
944			ctl_set_medium_error(&io->scsiio,
945			    beio->bio_cmd == BIO_READ);
946		}
947		ctl_complete_beio(beio);
948		return;
949	}
950
951	/*
952	 * If this is a write or a verify, we're all done.
953	 * If this is a read, we can now send the data to the user.
954	 */
955	if ((beio->bio_cmd == BIO_WRITE) ||
956	    (ARGS(io)->flags & CTL_LLF_VERIFY)) {
957		ctl_set_success(&io->scsiio);
958		ctl_complete_beio(beio);
959	} else {
960		if ((ARGS(io)->flags & CTL_LLF_READ) &&
961		    beio->beio_cont == NULL) {
962			ctl_set_success(&io->scsiio);
963			ctl_serseq_done(io);
964		}
965#ifdef CTL_TIME_IO
966        	getbintime(&io->io_hdr.dma_start_bt);
967#endif
968		ctl_datamove(io);
969	}
970}
971
972static void
973ctl_be_block_gls_zvol(struct ctl_be_block_lun *be_lun,
974			struct ctl_be_block_io *beio)
975{
976	union ctl_io *io = beio->io;
977	struct cdevsw *csw;
978	struct cdev *dev;
979	struct ctl_lba_len_flags *lbalen = ARGS(io);
980	struct scsi_get_lba_status_data *data;
981	off_t roff, off;
982	int error, ref, status;
983
984	DPRINTF("entered\n");
985
986	csw = devvn_refthread(be_lun->vn, &dev, &ref);
987	if (csw == NULL) {
988		status = 0;	/* unknown up to the end */
989		off = be_lun->size_bytes;
990		goto done;
991	}
992	off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize;
993	error = csw->d_ioctl(dev, FIOSEEKHOLE, (caddr_t)&off, FREAD,
994	    curthread);
995	if (error == 0 && off > roff)
996		status = 0;	/* mapped up to off */
997	else {
998		error = csw->d_ioctl(dev, FIOSEEKDATA, (caddr_t)&off, FREAD,
999		    curthread);
1000		if (error == 0 && off > roff)
1001			status = 1;	/* deallocated up to off */
1002		else {
1003			status = 0;	/* unknown up to the end */
1004			off = be_lun->size_bytes;
1005		}
1006	}
1007	dev_relthread(dev, ref);
1008
1009done:
1010	data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr;
1011	scsi_u64to8b(lbalen->lba, data->descr[0].addr);
1012	scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize -
1013	    lbalen->lba), data->descr[0].length);
1014	data->descr[0].status = status;
1015
1016	ctl_complete_beio(beio);
1017}
1018
1019static void
1020ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
1021		       struct ctl_be_block_io *beio)
1022{
1023	struct bio *bio;
1024	union ctl_io *io;
1025	struct cdevsw *csw;
1026	struct cdev *dev;
1027	int ref;
1028
1029	io = beio->io;
1030
1031	DPRINTF("entered\n");
1032
1033	/* This can't fail, it's a blocking allocation. */
1034	bio = g_alloc_bio();
1035
1036	bio->bio_cmd	    = BIO_FLUSH;
1037	bio->bio_offset	    = 0;
1038	bio->bio_data	    = 0;
1039	bio->bio_done	    = ctl_be_block_biodone;
1040	bio->bio_caller1    = beio;
1041	bio->bio_pblkno	    = 0;
1042
1043	/*
1044	 * We don't need to acquire the LUN lock here, because we are only
1045	 * sending one bio, and so there is no other context to synchronize
1046	 * with.
1047	 */
1048	beio->num_bios_sent = 1;
1049	beio->send_complete = 1;
1050
1051	binuptime(&beio->ds_t0);
1052	mtx_lock(&be_lun->io_lock);
1053	devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1054	mtx_unlock(&be_lun->io_lock);
1055
1056	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1057	if (csw) {
1058		bio->bio_dev = dev;
1059		csw->d_strategy(bio);
1060		dev_relthread(dev, ref);
1061	} else {
1062		bio->bio_error = ENXIO;
1063		ctl_be_block_biodone(bio);
1064	}
1065}
1066
1067static void
1068ctl_be_block_unmap_dev_range(struct ctl_be_block_lun *be_lun,
1069		       struct ctl_be_block_io *beio,
1070		       uint64_t off, uint64_t len, int last)
1071{
1072	struct bio *bio;
1073	uint64_t maxlen;
1074	struct cdevsw *csw;
1075	struct cdev *dev;
1076	int ref;
1077
1078	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1079	maxlen = LONG_MAX - (LONG_MAX % be_lun->cbe_lun.blocksize);
1080	while (len > 0) {
1081		bio = g_alloc_bio();
1082		bio->bio_cmd	    = BIO_DELETE;
1083		bio->bio_dev	    = dev;
1084		bio->bio_offset	    = off;
1085		bio->bio_length	    = MIN(len, maxlen);
1086		bio->bio_data	    = 0;
1087		bio->bio_done	    = ctl_be_block_biodone;
1088		bio->bio_caller1    = beio;
1089		bio->bio_pblkno     = off / be_lun->cbe_lun.blocksize;
1090
1091		off += bio->bio_length;
1092		len -= bio->bio_length;
1093
1094		mtx_lock(&be_lun->io_lock);
1095		beio->num_bios_sent++;
1096		if (last && len == 0)
1097			beio->send_complete = 1;
1098		mtx_unlock(&be_lun->io_lock);
1099
1100		if (csw) {
1101			csw->d_strategy(bio);
1102		} else {
1103			bio->bio_error = ENXIO;
1104			ctl_be_block_biodone(bio);
1105		}
1106	}
1107	if (csw)
1108		dev_relthread(dev, ref);
1109}
1110
1111static void
1112ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
1113		       struct ctl_be_block_io *beio)
1114{
1115	union ctl_io *io;
1116	struct ctl_ptr_len_flags *ptrlen;
1117	struct scsi_unmap_desc *buf, *end;
1118	uint64_t len;
1119
1120	io = beio->io;
1121
1122	DPRINTF("entered\n");
1123
1124	binuptime(&beio->ds_t0);
1125	mtx_lock(&be_lun->io_lock);
1126	devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1127	mtx_unlock(&be_lun->io_lock);
1128
1129	if (beio->io_offset == -1) {
1130		beio->io_len = 0;
1131		ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1132		buf = (struct scsi_unmap_desc *)ptrlen->ptr;
1133		end = buf + ptrlen->len / sizeof(*buf);
1134		for (; buf < end; buf++) {
1135			len = (uint64_t)scsi_4btoul(buf->length) *
1136			    be_lun->cbe_lun.blocksize;
1137			beio->io_len += len;
1138			ctl_be_block_unmap_dev_range(be_lun, beio,
1139			    scsi_8btou64(buf->lba) * be_lun->cbe_lun.blocksize,
1140			    len, (end - buf < 2) ? TRUE : FALSE);
1141		}
1142	} else
1143		ctl_be_block_unmap_dev_range(be_lun, beio,
1144		    beio->io_offset, beio->io_len, TRUE);
1145}
1146
1147static void
1148ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
1149			  struct ctl_be_block_io *beio)
1150{
1151	TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
1152	struct bio *bio;
1153	struct cdevsw *csw;
1154	struct cdev *dev;
1155	off_t cur_offset;
1156	int i, max_iosize, ref;
1157
1158	DPRINTF("entered\n");
1159	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1160
1161	/*
1162	 * We have to limit our I/O size to the maximum supported by the
1163	 * backend device.  Hopefully it is MAXPHYS.  If the driver doesn't
1164	 * set it properly, use DFLTPHYS.
1165	 */
1166	if (csw) {
1167		max_iosize = dev->si_iosize_max;
1168		if (max_iosize < PAGE_SIZE)
1169			max_iosize = DFLTPHYS;
1170	} else
1171		max_iosize = DFLTPHYS;
1172
1173	cur_offset = beio->io_offset;
1174	for (i = 0; i < beio->num_segs; i++) {
1175		size_t cur_size;
1176		uint8_t *cur_ptr;
1177
1178		cur_size = beio->sg_segs[i].len;
1179		cur_ptr = beio->sg_segs[i].addr;
1180
1181		while (cur_size > 0) {
1182			/* This can't fail, it's a blocking allocation. */
1183			bio = g_alloc_bio();
1184
1185			KASSERT(bio != NULL, ("g_alloc_bio() failed!\n"));
1186
1187			bio->bio_cmd = beio->bio_cmd;
1188			bio->bio_dev = dev;
1189			bio->bio_caller1 = beio;
1190			bio->bio_length = min(cur_size, max_iosize);
1191			bio->bio_offset = cur_offset;
1192			bio->bio_data = cur_ptr;
1193			bio->bio_done = ctl_be_block_biodone;
1194			bio->bio_pblkno = cur_offset / be_lun->cbe_lun.blocksize;
1195
1196			cur_offset += bio->bio_length;
1197			cur_ptr += bio->bio_length;
1198			cur_size -= bio->bio_length;
1199
1200			TAILQ_INSERT_TAIL(&queue, bio, bio_queue);
1201			beio->num_bios_sent++;
1202		}
1203	}
1204	binuptime(&beio->ds_t0);
1205	mtx_lock(&be_lun->io_lock);
1206	devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1207	beio->send_complete = 1;
1208	mtx_unlock(&be_lun->io_lock);
1209
1210	/*
1211	 * Fire off all allocated requests!
1212	 */
1213	while ((bio = TAILQ_FIRST(&queue)) != NULL) {
1214		TAILQ_REMOVE(&queue, bio, bio_queue);
1215		if (csw)
1216			csw->d_strategy(bio);
1217		else {
1218			bio->bio_error = ENXIO;
1219			ctl_be_block_biodone(bio);
1220		}
1221	}
1222	if (csw)
1223		dev_relthread(dev, ref);
1224}
1225
1226static uint64_t
1227ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, const char *attrname)
1228{
1229	struct diocgattr_arg	arg;
1230	struct cdevsw *csw;
1231	struct cdev *dev;
1232	int error, ref;
1233
1234	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1235	if (csw == NULL)
1236		return (UINT64_MAX);
1237	strlcpy(arg.name, attrname, sizeof(arg.name));
1238	arg.len = sizeof(arg.value.off);
1239	if (csw->d_ioctl) {
1240		error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD,
1241		    curthread);
1242	} else
1243		error = ENODEV;
1244	dev_relthread(dev, ref);
1245	if (error != 0)
1246		return (UINT64_MAX);
1247	return (arg.value.off);
1248}
1249
1250static void
1251ctl_be_block_cw_dispatch_sync(struct ctl_be_block_lun *be_lun,
1252			    union ctl_io *io)
1253{
1254	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1255	struct ctl_be_block_io *beio;
1256	struct ctl_lba_len_flags *lbalen;
1257
1258	DPRINTF("entered\n");
1259	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1260	lbalen = (struct ctl_lba_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1261
1262	beio->io_len = lbalen->len * cbe_lun->blocksize;
1263	beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1264	beio->io_arg = (lbalen->flags & SSC_IMMED) != 0;
1265	beio->bio_cmd = BIO_FLUSH;
1266	beio->ds_trans_type = DEVSTAT_NO_DATA;
1267	DPRINTF("SYNC\n");
1268	be_lun->lun_flush(be_lun, beio);
1269}
1270
1271static void
1272ctl_be_block_cw_done_ws(struct ctl_be_block_io *beio)
1273{
1274	union ctl_io *io;
1275
1276	io = beio->io;
1277	ctl_free_beio(beio);
1278	if ((io->io_hdr.flags & CTL_FLAG_ABORT) ||
1279	    ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
1280	     (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
1281		ctl_config_write_done(io);
1282		return;
1283	}
1284
1285	ctl_be_block_config_write(io);
1286}
1287
1288static void
1289ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun,
1290			    union ctl_io *io)
1291{
1292	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1293	struct ctl_be_block_io *beio;
1294	struct ctl_lba_len_flags *lbalen;
1295	uint64_t len_left, lba;
1296	uint32_t pb, pbo, adj;
1297	int i, seglen;
1298	uint8_t *buf, *end;
1299
1300	DPRINTF("entered\n");
1301
1302	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1303	lbalen = ARGS(beio->io);
1304
1305	if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP | SWS_ANCHOR | SWS_NDOB) ||
1306	    (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR) && be_lun->unmap == NULL)) {
1307		ctl_free_beio(beio);
1308		ctl_set_invalid_field(&io->scsiio,
1309				      /*sks_valid*/ 1,
1310				      /*command*/ 1,
1311				      /*field*/ 1,
1312				      /*bit_valid*/ 0,
1313				      /*bit*/ 0);
1314		ctl_config_write_done(io);
1315		return;
1316	}
1317
1318	if (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR)) {
1319		beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1320		beio->io_len = (uint64_t)lbalen->len * cbe_lun->blocksize;
1321		beio->bio_cmd = BIO_DELETE;
1322		beio->ds_trans_type = DEVSTAT_FREE;
1323
1324		be_lun->unmap(be_lun, beio);
1325		return;
1326	}
1327
1328	beio->bio_cmd = BIO_WRITE;
1329	beio->ds_trans_type = DEVSTAT_WRITE;
1330
1331	DPRINTF("WRITE SAME at LBA %jx len %u\n",
1332	       (uintmax_t)lbalen->lba, lbalen->len);
1333
1334	pb = cbe_lun->blocksize << be_lun->cbe_lun.pblockexp;
1335	if (be_lun->cbe_lun.pblockoff > 0)
1336		pbo = pb - cbe_lun->blocksize * be_lun->cbe_lun.pblockoff;
1337	else
1338		pbo = 0;
1339	len_left = (uint64_t)lbalen->len * cbe_lun->blocksize;
1340	for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) {
1341
1342		/*
1343		 * Setup the S/G entry for this chunk.
1344		 */
1345		seglen = MIN(CTLBLK_MAX_SEG, len_left);
1346		if (pb > cbe_lun->blocksize) {
1347			adj = ((lbalen->lba + lba) * cbe_lun->blocksize +
1348			    seglen - pbo) % pb;
1349			if (seglen > adj)
1350				seglen -= adj;
1351			else
1352				seglen -= seglen % cbe_lun->blocksize;
1353		} else
1354			seglen -= seglen % cbe_lun->blocksize;
1355		beio->sg_segs[i].len = seglen;
1356		beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK);
1357
1358		DPRINTF("segment %d addr %p len %zd\n", i,
1359			beio->sg_segs[i].addr, beio->sg_segs[i].len);
1360
1361		beio->num_segs++;
1362		len_left -= seglen;
1363
1364		buf = beio->sg_segs[i].addr;
1365		end = buf + seglen;
1366		for (; buf < end; buf += cbe_lun->blocksize) {
1367			memcpy(buf, io->scsiio.kern_data_ptr, cbe_lun->blocksize);
1368			if (lbalen->flags & SWS_LBDATA)
1369				scsi_ulto4b(lbalen->lba + lba, buf);
1370			lba++;
1371		}
1372	}
1373
1374	beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1375	beio->io_len = lba * cbe_lun->blocksize;
1376
1377	/* We can not do all in one run. Correct and schedule rerun. */
1378	if (len_left > 0) {
1379		lbalen->lba += lba;
1380		lbalen->len -= lba;
1381		beio->beio_cont = ctl_be_block_cw_done_ws;
1382	}
1383
1384	be_lun->dispatch(be_lun, beio);
1385}
1386
1387static void
1388ctl_be_block_cw_dispatch_unmap(struct ctl_be_block_lun *be_lun,
1389			    union ctl_io *io)
1390{
1391	struct ctl_be_block_io *beio;
1392	struct ctl_ptr_len_flags *ptrlen;
1393
1394	DPRINTF("entered\n");
1395
1396	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1397	ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1398
1399	if ((ptrlen->flags & ~SU_ANCHOR) != 0 || be_lun->unmap == NULL) {
1400		ctl_free_beio(beio);
1401		ctl_set_invalid_field(&io->scsiio,
1402				      /*sks_valid*/ 0,
1403				      /*command*/ 1,
1404				      /*field*/ 0,
1405				      /*bit_valid*/ 0,
1406				      /*bit*/ 0);
1407		ctl_config_write_done(io);
1408		return;
1409	}
1410
1411	beio->io_len = 0;
1412	beio->io_offset = -1;
1413	beio->bio_cmd = BIO_DELETE;
1414	beio->ds_trans_type = DEVSTAT_FREE;
1415	DPRINTF("UNMAP\n");
1416	be_lun->unmap(be_lun, beio);
1417}
1418
1419static void
1420ctl_be_block_cr_done(struct ctl_be_block_io *beio)
1421{
1422	union ctl_io *io;
1423
1424	io = beio->io;
1425	ctl_free_beio(beio);
1426	ctl_config_read_done(io);
1427}
1428
1429static void
1430ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun,
1431			 union ctl_io *io)
1432{
1433	struct ctl_be_block_io *beio;
1434	struct ctl_be_block_softc *softc;
1435
1436	DPRINTF("entered\n");
1437
1438	softc = be_lun->softc;
1439	beio = ctl_alloc_beio(softc);
1440	beio->io = io;
1441	beio->lun = be_lun;
1442	beio->beio_cont = ctl_be_block_cr_done;
1443	PRIV(io)->ptr = (void *)beio;
1444
1445	switch (io->scsiio.cdb[0]) {
1446	case SERVICE_ACTION_IN:		/* GET LBA STATUS */
1447		beio->bio_cmd = -1;
1448		beio->ds_trans_type = DEVSTAT_NO_DATA;
1449		beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1450		beio->io_len = 0;
1451		if (be_lun->get_lba_status)
1452			be_lun->get_lba_status(be_lun, beio);
1453		else
1454			ctl_be_block_cr_done(beio);
1455		break;
1456	default:
1457		panic("Unhandled CDB type %#x", io->scsiio.cdb[0]);
1458		break;
1459	}
1460}
1461
1462static void
1463ctl_be_block_cw_done(struct ctl_be_block_io *beio)
1464{
1465	union ctl_io *io;
1466
1467	io = beio->io;
1468	ctl_free_beio(beio);
1469	ctl_config_write_done(io);
1470}
1471
1472static void
1473ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
1474			 union ctl_io *io)
1475{
1476	struct ctl_be_block_io *beio;
1477	struct ctl_be_block_softc *softc;
1478
1479	DPRINTF("entered\n");
1480
1481	softc = be_lun->softc;
1482	beio = ctl_alloc_beio(softc);
1483	beio->io = io;
1484	beio->lun = be_lun;
1485	beio->beio_cont = ctl_be_block_cw_done;
1486	switch (io->scsiio.tag_type) {
1487	case CTL_TAG_ORDERED:
1488		beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1489		break;
1490	case CTL_TAG_HEAD_OF_QUEUE:
1491		beio->ds_tag_type = DEVSTAT_TAG_HEAD;
1492		break;
1493	case CTL_TAG_UNTAGGED:
1494	case CTL_TAG_SIMPLE:
1495	case CTL_TAG_ACA:
1496	default:
1497		beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1498		break;
1499	}
1500	PRIV(io)->ptr = (void *)beio;
1501
1502	switch (io->scsiio.cdb[0]) {
1503	case SYNCHRONIZE_CACHE:
1504	case SYNCHRONIZE_CACHE_16:
1505		ctl_be_block_cw_dispatch_sync(be_lun, io);
1506		break;
1507	case WRITE_SAME_10:
1508	case WRITE_SAME_16:
1509		ctl_be_block_cw_dispatch_ws(be_lun, io);
1510		break;
1511	case UNMAP:
1512		ctl_be_block_cw_dispatch_unmap(be_lun, io);
1513		break;
1514	default:
1515		panic("Unhandled CDB type %#x", io->scsiio.cdb[0]);
1516		break;
1517	}
1518}
1519
1520SDT_PROBE_DEFINE1(cbb, kernel, read, start, "uint64_t");
1521SDT_PROBE_DEFINE1(cbb, kernel, write, start, "uint64_t");
1522SDT_PROBE_DEFINE1(cbb, kernel, read, alloc_done, "uint64_t");
1523SDT_PROBE_DEFINE1(cbb, kernel, write, alloc_done, "uint64_t");
1524
1525static void
1526ctl_be_block_next(struct ctl_be_block_io *beio)
1527{
1528	struct ctl_be_block_lun *be_lun;
1529	union ctl_io *io;
1530
1531	io = beio->io;
1532	be_lun = beio->lun;
1533	ctl_free_beio(beio);
1534	if ((io->io_hdr.flags & CTL_FLAG_ABORT) ||
1535	    ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
1536	     (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
1537		ctl_data_submit_done(io);
1538		return;
1539	}
1540
1541	io->io_hdr.status &= ~CTL_STATUS_MASK;
1542	io->io_hdr.status |= CTL_STATUS_NONE;
1543
1544	mtx_lock(&be_lun->queue_lock);
1545	/*
1546	 * XXX KDM make sure that links is okay to use at this point.
1547	 * Otherwise, we either need to add another field to ctl_io_hdr,
1548	 * or deal with resource allocation here.
1549	 */
1550	STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
1551	mtx_unlock(&be_lun->queue_lock);
1552
1553	taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
1554}
1555
1556static void
1557ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
1558			   union ctl_io *io)
1559{
1560	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1561	struct ctl_be_block_io *beio;
1562	struct ctl_be_block_softc *softc;
1563	struct ctl_lba_len_flags *lbalen;
1564	struct ctl_ptr_len_flags *bptrlen;
1565	uint64_t len_left, lbas;
1566	int i;
1567
1568	softc = be_lun->softc;
1569
1570	DPRINTF("entered\n");
1571
1572	lbalen = ARGS(io);
1573	if (lbalen->flags & CTL_LLF_WRITE) {
1574		SDT_PROBE(cbb, kernel, write, start, 0, 0, 0, 0, 0);
1575	} else {
1576		SDT_PROBE(cbb, kernel, read, start, 0, 0, 0, 0, 0);
1577	}
1578
1579	beio = ctl_alloc_beio(softc);
1580	beio->io = io;
1581	beio->lun = be_lun;
1582	bptrlen = PRIV(io);
1583	bptrlen->ptr = (void *)beio;
1584
1585	switch (io->scsiio.tag_type) {
1586	case CTL_TAG_ORDERED:
1587		beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1588		break;
1589	case CTL_TAG_HEAD_OF_QUEUE:
1590		beio->ds_tag_type = DEVSTAT_TAG_HEAD;
1591		break;
1592	case CTL_TAG_UNTAGGED:
1593	case CTL_TAG_SIMPLE:
1594	case CTL_TAG_ACA:
1595	default:
1596		beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1597		break;
1598	}
1599
1600	if (lbalen->flags & CTL_LLF_WRITE) {
1601		beio->bio_cmd = BIO_WRITE;
1602		beio->ds_trans_type = DEVSTAT_WRITE;
1603	} else {
1604		beio->bio_cmd = BIO_READ;
1605		beio->ds_trans_type = DEVSTAT_READ;
1606	}
1607
1608	DPRINTF("%s at LBA %jx len %u @%ju\n",
1609	       (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE",
1610	       (uintmax_t)lbalen->lba, lbalen->len, bptrlen->len);
1611	if (lbalen->flags & CTL_LLF_COMPARE)
1612		lbas = CTLBLK_HALF_IO_SIZE;
1613	else
1614		lbas = CTLBLK_MAX_IO_SIZE;
1615	lbas = MIN(lbalen->len - bptrlen->len, lbas / cbe_lun->blocksize);
1616	beio->io_offset = (lbalen->lba + bptrlen->len) * cbe_lun->blocksize;
1617	beio->io_len = lbas * cbe_lun->blocksize;
1618	bptrlen->len += lbas;
1619
1620	for (i = 0, len_left = beio->io_len; len_left > 0; i++) {
1621		KASSERT(i < CTLBLK_MAX_SEGS, ("Too many segs (%d >= %d)",
1622		    i, CTLBLK_MAX_SEGS));
1623
1624		/*
1625		 * Setup the S/G entry for this chunk.
1626		 */
1627		beio->sg_segs[i].len = min(CTLBLK_MAX_SEG, len_left);
1628		beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK);
1629
1630		DPRINTF("segment %d addr %p len %zd\n", i,
1631			beio->sg_segs[i].addr, beio->sg_segs[i].len);
1632
1633		/* Set up second segment for compare operation. */
1634		if (lbalen->flags & CTL_LLF_COMPARE) {
1635			beio->sg_segs[i + CTLBLK_HALF_SEGS].len =
1636			    beio->sg_segs[i].len;
1637			beio->sg_segs[i + CTLBLK_HALF_SEGS].addr =
1638			    uma_zalloc(be_lun->lun_zone, M_WAITOK);
1639		}
1640
1641		beio->num_segs++;
1642		len_left -= beio->sg_segs[i].len;
1643	}
1644	if (bptrlen->len < lbalen->len)
1645		beio->beio_cont = ctl_be_block_next;
1646	io->scsiio.be_move_done = ctl_be_block_move_done;
1647	/* For compare we have separate S/G lists for read and datamove. */
1648	if (lbalen->flags & CTL_LLF_COMPARE)
1649		io->scsiio.kern_data_ptr = (uint8_t *)&beio->sg_segs[CTLBLK_HALF_SEGS];
1650	else
1651		io->scsiio.kern_data_ptr = (uint8_t *)beio->sg_segs;
1652	io->scsiio.kern_data_len = beio->io_len;
1653	io->scsiio.kern_data_resid = 0;
1654	io->scsiio.kern_sg_entries = beio->num_segs;
1655	io->io_hdr.flags |= CTL_FLAG_ALLOCATED | CTL_FLAG_KDPTR_SGLIST;
1656
1657	/*
1658	 * For the read case, we need to read the data into our buffers and
1659	 * then we can send it back to the user.  For the write case, we
1660	 * need to get the data from the user first.
1661	 */
1662	if (beio->bio_cmd == BIO_READ) {
1663		SDT_PROBE(cbb, kernel, read, alloc_done, 0, 0, 0, 0, 0);
1664		be_lun->dispatch(be_lun, beio);
1665	} else {
1666		SDT_PROBE(cbb, kernel, write, alloc_done, 0, 0, 0, 0, 0);
1667#ifdef CTL_TIME_IO
1668        	getbintime(&io->io_hdr.dma_start_bt);
1669#endif
1670		ctl_datamove(io);
1671	}
1672}
1673
1674static void
1675ctl_be_block_worker(void *context, int pending)
1676{
1677	struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)context;
1678	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1679	union ctl_io *io;
1680	struct ctl_be_block_io *beio;
1681
1682	DPRINTF("entered\n");
1683	/*
1684	 * Fetch and process I/Os from all queues.  If we detect LUN
1685	 * CTL_LUN_FLAG_OFFLINE status here -- it is result of a race,
1686	 * so make response maximally opaque to not confuse initiator.
1687	 */
1688	for (;;) {
1689		mtx_lock(&be_lun->queue_lock);
1690		io = (union ctl_io *)STAILQ_FIRST(&be_lun->datamove_queue);
1691		if (io != NULL) {
1692			DPRINTF("datamove queue\n");
1693			STAILQ_REMOVE(&be_lun->datamove_queue, &io->io_hdr,
1694				      ctl_io_hdr, links);
1695			mtx_unlock(&be_lun->queue_lock);
1696			beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1697			if (cbe_lun->flags & CTL_LUN_FLAG_OFFLINE) {
1698				ctl_set_busy(&io->scsiio);
1699				ctl_complete_beio(beio);
1700				return;
1701			}
1702			be_lun->dispatch(be_lun, beio);
1703			continue;
1704		}
1705		io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_write_queue);
1706		if (io != NULL) {
1707			DPRINTF("config write queue\n");
1708			STAILQ_REMOVE(&be_lun->config_write_queue, &io->io_hdr,
1709				      ctl_io_hdr, links);
1710			mtx_unlock(&be_lun->queue_lock);
1711			if (cbe_lun->flags & CTL_LUN_FLAG_OFFLINE) {
1712				ctl_set_busy(&io->scsiio);
1713				ctl_config_write_done(io);
1714				return;
1715			}
1716			ctl_be_block_cw_dispatch(be_lun, io);
1717			continue;
1718		}
1719		io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_read_queue);
1720		if (io != NULL) {
1721			DPRINTF("config read queue\n");
1722			STAILQ_REMOVE(&be_lun->config_read_queue, &io->io_hdr,
1723				      ctl_io_hdr, links);
1724			mtx_unlock(&be_lun->queue_lock);
1725			if (cbe_lun->flags & CTL_LUN_FLAG_OFFLINE) {
1726				ctl_set_busy(&io->scsiio);
1727				ctl_config_read_done(io);
1728				return;
1729			}
1730			ctl_be_block_cr_dispatch(be_lun, io);
1731			continue;
1732		}
1733		io = (union ctl_io *)STAILQ_FIRST(&be_lun->input_queue);
1734		if (io != NULL) {
1735			DPRINTF("input queue\n");
1736			STAILQ_REMOVE(&be_lun->input_queue, &io->io_hdr,
1737				      ctl_io_hdr, links);
1738			mtx_unlock(&be_lun->queue_lock);
1739			if (cbe_lun->flags & CTL_LUN_FLAG_OFFLINE) {
1740				ctl_set_busy(&io->scsiio);
1741				ctl_data_submit_done(io);
1742				return;
1743			}
1744			ctl_be_block_dispatch(be_lun, io);
1745			continue;
1746		}
1747
1748		/*
1749		 * If we get here, there is no work left in the queues, so
1750		 * just break out and let the task queue go to sleep.
1751		 */
1752		mtx_unlock(&be_lun->queue_lock);
1753		break;
1754	}
1755}
1756
1757/*
1758 * Entry point from CTL to the backend for I/O.  We queue everything to a
1759 * work thread, so this just puts the I/O on a queue and wakes up the
1760 * thread.
1761 */
1762static int
1763ctl_be_block_submit(union ctl_io *io)
1764{
1765	struct ctl_be_block_lun *be_lun;
1766	struct ctl_be_lun *cbe_lun;
1767
1768	DPRINTF("entered\n");
1769
1770	cbe_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[
1771		CTL_PRIV_BACKEND_LUN].ptr;
1772	be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun;
1773
1774	/*
1775	 * Make sure we only get SCSI I/O.
1776	 */
1777	KASSERT(io->io_hdr.io_type == CTL_IO_SCSI, ("Non-SCSI I/O (type "
1778		"%#x) encountered", io->io_hdr.io_type));
1779
1780	PRIV(io)->len = 0;
1781
1782	mtx_lock(&be_lun->queue_lock);
1783	/*
1784	 * XXX KDM make sure that links is okay to use at this point.
1785	 * Otherwise, we either need to add another field to ctl_io_hdr,
1786	 * or deal with resource allocation here.
1787	 */
1788	STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
1789	mtx_unlock(&be_lun->queue_lock);
1790	taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
1791
1792	return (CTL_RETVAL_COMPLETE);
1793}
1794
1795static int
1796ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
1797			int flag, struct thread *td)
1798{
1799	struct ctl_be_block_softc *softc;
1800	int error;
1801
1802	softc = &backend_block_softc;
1803
1804	error = 0;
1805
1806	switch (cmd) {
1807	case CTL_LUN_REQ: {
1808		struct ctl_lun_req *lun_req;
1809
1810		lun_req = (struct ctl_lun_req *)addr;
1811
1812		switch (lun_req->reqtype) {
1813		case CTL_LUNREQ_CREATE:
1814			error = ctl_be_block_create(softc, lun_req);
1815			break;
1816		case CTL_LUNREQ_RM:
1817			error = ctl_be_block_rm(softc, lun_req);
1818			break;
1819		case CTL_LUNREQ_MODIFY:
1820			error = ctl_be_block_modify(softc, lun_req);
1821			break;
1822		default:
1823			lun_req->status = CTL_LUN_ERROR;
1824			snprintf(lun_req->error_str, sizeof(lun_req->error_str),
1825				 "invalid LUN request type %d",
1826				 lun_req->reqtype);
1827			break;
1828		}
1829		break;
1830	}
1831	default:
1832		error = ENOTTY;
1833		break;
1834	}
1835
1836	return (error);
1837}
1838
1839static int
1840ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1841{
1842	struct ctl_be_lun *cbe_lun;
1843	struct ctl_be_block_filedata *file_data;
1844	struct ctl_lun_create_params *params;
1845	char			     *value;
1846	struct vattr		      vattr;
1847	off_t			      ps, pss, po, pos, us, uss, uo, uos;
1848	int			      error;
1849
1850	error = 0;
1851	cbe_lun = &be_lun->cbe_lun;
1852	file_data = &be_lun->backend.file;
1853	params = &be_lun->params;
1854
1855	be_lun->dev_type = CTL_BE_BLOCK_FILE;
1856	be_lun->dispatch = ctl_be_block_dispatch_file;
1857	be_lun->lun_flush = ctl_be_block_flush_file;
1858	be_lun->get_lba_status = ctl_be_block_gls_file;
1859	be_lun->getattr = ctl_be_block_getattr_file;
1860	be_lun->unmap = NULL;
1861	cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP;
1862
1863	error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
1864	if (error != 0) {
1865		snprintf(req->error_str, sizeof(req->error_str),
1866			 "error calling VOP_GETATTR() for file %s",
1867			 be_lun->dev_path);
1868		return (error);
1869	}
1870
1871	/*
1872	 * Verify that we have the ability to upgrade to exclusive
1873	 * access on this file so we can trap errors at open instead
1874	 * of reporting them during first access.
1875	 */
1876	if (VOP_ISLOCKED(be_lun->vn) != LK_EXCLUSIVE) {
1877		vn_lock(be_lun->vn, LK_UPGRADE | LK_RETRY);
1878		if (be_lun->vn->v_iflag & VI_DOOMED) {
1879			error = EBADF;
1880			snprintf(req->error_str, sizeof(req->error_str),
1881				 "error locking file %s", be_lun->dev_path);
1882			return (error);
1883		}
1884	}
1885
1886	file_data->cred = crhold(curthread->td_ucred);
1887	if (params->lun_size_bytes != 0)
1888		be_lun->size_bytes = params->lun_size_bytes;
1889	else
1890		be_lun->size_bytes = vattr.va_size;
1891
1892	/*
1893	 * For files we can use any logical block size.  Prefer 512 bytes
1894	 * for compatibility reasons.  If file's vattr.va_blocksize
1895	 * (preferred I/O block size) is bigger and multiple to chosen
1896	 * logical block size -- report it as physical block size.
1897	 */
1898	if (params->blocksize_bytes != 0)
1899		cbe_lun->blocksize = params->blocksize_bytes;
1900	else
1901		cbe_lun->blocksize = 512;
1902	be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
1903	cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
1904	    0 : (be_lun->size_blocks - 1);
1905
1906	us = ps = vattr.va_blocksize;
1907	uo = po = 0;
1908
1909	value = ctl_get_opt(&cbe_lun->options, "pblocksize");
1910	if (value != NULL)
1911		ctl_expand_number(value, &ps);
1912	value = ctl_get_opt(&cbe_lun->options, "pblockoffset");
1913	if (value != NULL)
1914		ctl_expand_number(value, &po);
1915	pss = ps / cbe_lun->blocksize;
1916	pos = po / cbe_lun->blocksize;
1917	if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) &&
1918	    ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) {
1919		cbe_lun->pblockexp = fls(pss) - 1;
1920		cbe_lun->pblockoff = (pss - pos) % pss;
1921	}
1922
1923	value = ctl_get_opt(&cbe_lun->options, "ublocksize");
1924	if (value != NULL)
1925		ctl_expand_number(value, &us);
1926	value = ctl_get_opt(&cbe_lun->options, "ublockoffset");
1927	if (value != NULL)
1928		ctl_expand_number(value, &uo);
1929	uss = us / cbe_lun->blocksize;
1930	uos = uo / cbe_lun->blocksize;
1931	if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) &&
1932	    ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) {
1933		cbe_lun->ublockexp = fls(uss) - 1;
1934		cbe_lun->ublockoff = (uss - uos) % uss;
1935	}
1936
1937	/*
1938	 * Sanity check.  The media size has to be at least one
1939	 * sector long.
1940	 */
1941	if (be_lun->size_bytes < cbe_lun->blocksize) {
1942		error = EINVAL;
1943		snprintf(req->error_str, sizeof(req->error_str),
1944			 "file %s size %ju < block size %u", be_lun->dev_path,
1945			 (uintmax_t)be_lun->size_bytes, cbe_lun->blocksize);
1946	}
1947
1948	cbe_lun->opttxferlen = CTLBLK_MAX_IO_SIZE / cbe_lun->blocksize;
1949	return (error);
1950}
1951
1952static int
1953ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1954{
1955	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1956	struct ctl_lun_create_params *params;
1957	struct cdevsw		     *csw;
1958	struct cdev		     *dev;
1959	char			     *value;
1960	int			      error, atomic, maxio, ref, unmap, tmp;
1961	off_t			      ps, pss, po, pos, us, uss, uo, uos, otmp;
1962
1963	params = &be_lun->params;
1964
1965	be_lun->dev_type = CTL_BE_BLOCK_DEV;
1966	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1967	if (csw == NULL)
1968		return (ENXIO);
1969	if (strcmp(csw->d_name, "zvol") == 0) {
1970		be_lun->dispatch = ctl_be_block_dispatch_zvol;
1971		be_lun->get_lba_status = ctl_be_block_gls_zvol;
1972		atomic = maxio = CTLBLK_MAX_IO_SIZE;
1973	} else {
1974		be_lun->dispatch = ctl_be_block_dispatch_dev;
1975		be_lun->get_lba_status = NULL;
1976		atomic = 0;
1977		maxio = dev->si_iosize_max;
1978		if (maxio <= 0)
1979			maxio = DFLTPHYS;
1980		if (maxio > CTLBLK_MAX_IO_SIZE)
1981			maxio = CTLBLK_MAX_IO_SIZE;
1982	}
1983	be_lun->lun_flush = ctl_be_block_flush_dev;
1984	be_lun->getattr = ctl_be_block_getattr_dev;
1985	be_lun->unmap = ctl_be_block_unmap_dev;
1986
1987	if (!csw->d_ioctl) {
1988		dev_relthread(dev, ref);
1989		snprintf(req->error_str, sizeof(req->error_str),
1990			 "no d_ioctl for device %s!", be_lun->dev_path);
1991		return (ENODEV);
1992	}
1993
1994	error = csw->d_ioctl(dev, DIOCGSECTORSIZE, (caddr_t)&tmp, FREAD,
1995			       curthread);
1996	if (error) {
1997		dev_relthread(dev, ref);
1998		snprintf(req->error_str, sizeof(req->error_str),
1999			 "error %d returned for DIOCGSECTORSIZE ioctl "
2000			 "on %s!", error, be_lun->dev_path);
2001		return (error);
2002	}
2003
2004	/*
2005	 * If the user has asked for a blocksize that is greater than the
2006	 * backing device's blocksize, we can do it only if the blocksize
2007	 * the user is asking for is an even multiple of the underlying
2008	 * device's blocksize.
2009	 */
2010	if ((params->blocksize_bytes != 0) &&
2011	    (params->blocksize_bytes >= tmp)) {
2012		if (params->blocksize_bytes % tmp == 0) {
2013			cbe_lun->blocksize = params->blocksize_bytes;
2014		} else {
2015			dev_relthread(dev, ref);
2016			snprintf(req->error_str, sizeof(req->error_str),
2017				 "requested blocksize %u is not an even "
2018				 "multiple of backing device blocksize %u",
2019				 params->blocksize_bytes, tmp);
2020			return (EINVAL);
2021		}
2022	} else if (params->blocksize_bytes != 0) {
2023		dev_relthread(dev, ref);
2024		snprintf(req->error_str, sizeof(req->error_str),
2025			 "requested blocksize %u < backing device "
2026			 "blocksize %u", params->blocksize_bytes, tmp);
2027		return (EINVAL);
2028	} else
2029		cbe_lun->blocksize = tmp;
2030
2031	error = csw->d_ioctl(dev, DIOCGMEDIASIZE, (caddr_t)&otmp, FREAD,
2032			     curthread);
2033	if (error) {
2034		dev_relthread(dev, ref);
2035		snprintf(req->error_str, sizeof(req->error_str),
2036			 "error %d returned for DIOCGMEDIASIZE "
2037			 " ioctl on %s!", error,
2038			 be_lun->dev_path);
2039		return (error);
2040	}
2041
2042	if (params->lun_size_bytes != 0) {
2043		if (params->lun_size_bytes > otmp) {
2044			dev_relthread(dev, ref);
2045			snprintf(req->error_str, sizeof(req->error_str),
2046				 "requested LUN size %ju > backing device "
2047				 "size %ju",
2048				 (uintmax_t)params->lun_size_bytes,
2049				 (uintmax_t)otmp);
2050			return (EINVAL);
2051		}
2052
2053		be_lun->size_bytes = params->lun_size_bytes;
2054	} else
2055		be_lun->size_bytes = otmp;
2056	be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
2057	cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
2058	    0 : (be_lun->size_blocks - 1);
2059
2060	error = csw->d_ioctl(dev, DIOCGSTRIPESIZE, (caddr_t)&ps, FREAD,
2061	    curthread);
2062	if (error)
2063		ps = po = 0;
2064	else {
2065		error = csw->d_ioctl(dev, DIOCGSTRIPEOFFSET, (caddr_t)&po,
2066		    FREAD, curthread);
2067		if (error)
2068			po = 0;
2069	}
2070	us = ps;
2071	uo = po;
2072
2073	value = ctl_get_opt(&cbe_lun->options, "pblocksize");
2074	if (value != NULL)
2075		ctl_expand_number(value, &ps);
2076	value = ctl_get_opt(&cbe_lun->options, "pblockoffset");
2077	if (value != NULL)
2078		ctl_expand_number(value, &po);
2079	pss = ps / cbe_lun->blocksize;
2080	pos = po / cbe_lun->blocksize;
2081	if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) &&
2082	    ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) {
2083		cbe_lun->pblockexp = fls(pss) - 1;
2084		cbe_lun->pblockoff = (pss - pos) % pss;
2085	}
2086
2087	value = ctl_get_opt(&cbe_lun->options, "ublocksize");
2088	if (value != NULL)
2089		ctl_expand_number(value, &us);
2090	value = ctl_get_opt(&cbe_lun->options, "ublockoffset");
2091	if (value != NULL)
2092		ctl_expand_number(value, &uo);
2093	uss = us / cbe_lun->blocksize;
2094	uos = uo / cbe_lun->blocksize;
2095	if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) &&
2096	    ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) {
2097		cbe_lun->ublockexp = fls(uss) - 1;
2098		cbe_lun->ublockoff = (uss - uos) % uss;
2099	}
2100
2101	cbe_lun->atomicblock = atomic / cbe_lun->blocksize;
2102	cbe_lun->opttxferlen = maxio / cbe_lun->blocksize;
2103
2104	if (be_lun->dispatch == ctl_be_block_dispatch_zvol) {
2105		unmap = 1;
2106	} else {
2107		struct diocgattr_arg	arg;
2108
2109		strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name));
2110		arg.len = sizeof(arg.value.i);
2111		error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD,
2112		    curthread);
2113		unmap = (error == 0) ? arg.value.i : 0;
2114	}
2115	value = ctl_get_opt(&cbe_lun->options, "unmap");
2116	if (value != NULL)
2117		unmap = (strcmp(value, "on") == 0);
2118	if (unmap)
2119		cbe_lun->flags |= CTL_LUN_FLAG_UNMAP;
2120	else
2121		cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP;
2122
2123	dev_relthread(dev, ref);
2124	return (0);
2125}
2126
2127static int
2128ctl_be_block_close(struct ctl_be_block_lun *be_lun)
2129{
2130	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
2131	int flags;
2132
2133	if (be_lun->vn) {
2134		flags = FREAD;
2135		if ((cbe_lun->flags & CTL_LUN_FLAG_READONLY) == 0)
2136			flags |= FWRITE;
2137		(void)vn_close(be_lun->vn, flags, NOCRED, curthread);
2138		be_lun->vn = NULL;
2139
2140		switch (be_lun->dev_type) {
2141		case CTL_BE_BLOCK_DEV:
2142			break;
2143		case CTL_BE_BLOCK_FILE:
2144			if (be_lun->backend.file.cred != NULL) {
2145				crfree(be_lun->backend.file.cred);
2146				be_lun->backend.file.cred = NULL;
2147			}
2148			break;
2149		case CTL_BE_BLOCK_NONE:
2150			break;
2151		default:
2152			panic("Unexpected backend type.");
2153			break;
2154		}
2155		be_lun->dev_type = CTL_BE_BLOCK_NONE;
2156	}
2157	return (0);
2158}
2159
2160static int
2161ctl_be_block_open(struct ctl_be_block_softc *softc,
2162		  struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
2163{
2164	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
2165	struct nameidata nd;
2166	char		*value;
2167	int		 error, flags;
2168
2169	error = 0;
2170	if (rootvnode == NULL) {
2171		snprintf(req->error_str, sizeof(req->error_str),
2172			 "Root filesystem is not mounted");
2173		return (1);
2174	}
2175	if (!curthread->td_proc->p_fd->fd_cdir) {
2176		curthread->td_proc->p_fd->fd_cdir = rootvnode;
2177		VREF(rootvnode);
2178	}
2179	if (!curthread->td_proc->p_fd->fd_rdir) {
2180		curthread->td_proc->p_fd->fd_rdir = rootvnode;
2181		VREF(rootvnode);
2182	}
2183	if (!curthread->td_proc->p_fd->fd_jdir) {
2184		curthread->td_proc->p_fd->fd_jdir = rootvnode;
2185		VREF(rootvnode);
2186	}
2187
2188	value = ctl_get_opt(&cbe_lun->options, "file");
2189	if (value == NULL) {
2190		snprintf(req->error_str, sizeof(req->error_str),
2191			 "no file argument specified");
2192		return (1);
2193	}
2194	free(be_lun->dev_path, M_CTLBLK);
2195	be_lun->dev_path = strdup(value, M_CTLBLK);
2196
2197	flags = FREAD;
2198	value = ctl_get_opt(&cbe_lun->options, "readonly");
2199	if (value == NULL || strcmp(value, "on") != 0)
2200		flags |= FWRITE;
2201
2202again:
2203	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, be_lun->dev_path, curthread);
2204	error = vn_open(&nd, &flags, 0, NULL);
2205	if ((error == EROFS || error == EACCES) && (flags & FWRITE)) {
2206		flags &= ~FWRITE;
2207		goto again;
2208	}
2209	if (error) {
2210		/*
2211		 * This is the only reasonable guess we can make as far as
2212		 * path if the user doesn't give us a fully qualified path.
2213		 * If they want to specify a file, they need to specify the
2214		 * full path.
2215		 */
2216		if (be_lun->dev_path[0] != '/') {
2217			char *dev_name;
2218
2219			asprintf(&dev_name, M_CTLBLK, "/dev/%s",
2220				be_lun->dev_path);
2221			free(be_lun->dev_path, M_CTLBLK);
2222			be_lun->dev_path = dev_name;
2223			goto again;
2224		}
2225		snprintf(req->error_str, sizeof(req->error_str),
2226		    "error opening %s: %d", be_lun->dev_path, error);
2227		return (error);
2228	}
2229	if (flags & FWRITE)
2230		cbe_lun->flags &= ~CTL_LUN_FLAG_READONLY;
2231	else
2232		cbe_lun->flags |= CTL_LUN_FLAG_READONLY;
2233
2234	NDFREE(&nd, NDF_ONLY_PNBUF);
2235	be_lun->vn = nd.ni_vp;
2236
2237	/* We only support disks and files. */
2238	if (vn_isdisk(be_lun->vn, &error)) {
2239		error = ctl_be_block_open_dev(be_lun, req);
2240	} else if (be_lun->vn->v_type == VREG) {
2241		error = ctl_be_block_open_file(be_lun, req);
2242	} else {
2243		error = EINVAL;
2244		snprintf(req->error_str, sizeof(req->error_str),
2245			 "%s is not a disk or plain file", be_lun->dev_path);
2246	}
2247	VOP_UNLOCK(be_lun->vn, 0);
2248
2249	if (error != 0)
2250		ctl_be_block_close(be_lun);
2251	cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
2252	if (be_lun->dispatch != ctl_be_block_dispatch_dev)
2253		cbe_lun->serseq = CTL_LUN_SERSEQ_READ;
2254	value = ctl_get_opt(&cbe_lun->options, "serseq");
2255	if (value != NULL && strcmp(value, "on") == 0)
2256		cbe_lun->serseq = CTL_LUN_SERSEQ_ON;
2257	else if (value != NULL && strcmp(value, "read") == 0)
2258		cbe_lun->serseq = CTL_LUN_SERSEQ_READ;
2259	else if (value != NULL && strcmp(value, "off") == 0)
2260		cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
2261	return (0);
2262}
2263
2264static int
2265ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2266{
2267	struct ctl_be_lun *cbe_lun;
2268	struct ctl_be_block_lun *be_lun;
2269	struct ctl_lun_create_params *params;
2270	char num_thread_str[16];
2271	char tmpstr[32];
2272	char *value;
2273	int retval, num_threads;
2274	int tmp_num_threads;
2275
2276	params = &req->reqdata.create;
2277	retval = 0;
2278	req->status = CTL_LUN_OK;
2279
2280	be_lun = malloc(sizeof(*be_lun), M_CTLBLK, M_ZERO | M_WAITOK);
2281	cbe_lun = &be_lun->cbe_lun;
2282	cbe_lun->be_lun = be_lun;
2283	be_lun->params = req->reqdata.create;
2284	be_lun->softc = softc;
2285	STAILQ_INIT(&be_lun->input_queue);
2286	STAILQ_INIT(&be_lun->config_read_queue);
2287	STAILQ_INIT(&be_lun->config_write_queue);
2288	STAILQ_INIT(&be_lun->datamove_queue);
2289	sprintf(be_lun->lunname, "cblk%d", softc->num_luns);
2290	mtx_init(&be_lun->io_lock, "cblk io lock", NULL, MTX_DEF);
2291	mtx_init(&be_lun->queue_lock, "cblk queue lock", NULL, MTX_DEF);
2292	ctl_init_opts(&cbe_lun->options,
2293	    req->num_be_args, req->kern_be_args);
2294	be_lun->lun_zone = uma_zcreate(be_lun->lunname, CTLBLK_MAX_SEG,
2295	    NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0);
2296	if (be_lun->lun_zone == NULL) {
2297		snprintf(req->error_str, sizeof(req->error_str),
2298			 "error allocating UMA zone");
2299		goto bailout_error;
2300	}
2301
2302	if (params->flags & CTL_LUN_FLAG_DEV_TYPE)
2303		cbe_lun->lun_type = params->device_type;
2304	else
2305		cbe_lun->lun_type = T_DIRECT;
2306	be_lun->flags = CTL_BE_BLOCK_LUN_UNCONFIGURED;
2307	cbe_lun->flags = 0;
2308	value = ctl_get_opt(&cbe_lun->options, "ha_role");
2309	if (value != NULL) {
2310		if (strcmp(value, "primary") == 0)
2311			cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2312	} else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
2313		cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2314
2315	if (cbe_lun->lun_type == T_DIRECT) {
2316		be_lun->size_bytes = params->lun_size_bytes;
2317		if (params->blocksize_bytes != 0)
2318			cbe_lun->blocksize = params->blocksize_bytes;
2319		else
2320			cbe_lun->blocksize = 512;
2321		be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
2322		cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
2323		    0 : (be_lun->size_blocks - 1);
2324
2325		if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) ||
2326		    control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) {
2327			retval = ctl_be_block_open(softc, be_lun, req);
2328			if (retval != 0) {
2329				retval = 0;
2330				req->status = CTL_LUN_WARNING;
2331			}
2332		}
2333		num_threads = cbb_num_threads;
2334	} else {
2335		num_threads = 1;
2336	}
2337
2338	/*
2339	 * XXX This searching loop might be refactored to be combined with
2340	 * the loop above,
2341	 */
2342	value = ctl_get_opt(&cbe_lun->options, "num_threads");
2343	if (value != NULL) {
2344		tmp_num_threads = strtol(value, NULL, 0);
2345
2346		/*
2347		 * We don't let the user specify less than one
2348		 * thread, but hope he's clueful enough not to
2349		 * specify 1000 threads.
2350		 */
2351		if (tmp_num_threads < 1) {
2352			snprintf(req->error_str, sizeof(req->error_str),
2353				 "invalid number of threads %s",
2354				 num_thread_str);
2355			goto bailout_error;
2356		}
2357		num_threads = tmp_num_threads;
2358	}
2359
2360	if (be_lun->vn == NULL)
2361		cbe_lun->flags |= CTL_LUN_FLAG_OFFLINE;
2362	/* Tell the user the blocksize we ended up using */
2363	params->lun_size_bytes = be_lun->size_bytes;
2364	params->blocksize_bytes = cbe_lun->blocksize;
2365	if (params->flags & CTL_LUN_FLAG_ID_REQ) {
2366		cbe_lun->req_lun_id = params->req_lun_id;
2367		cbe_lun->flags |= CTL_LUN_FLAG_ID_REQ;
2368	} else
2369		cbe_lun->req_lun_id = 0;
2370
2371	cbe_lun->lun_shutdown = ctl_be_block_lun_shutdown;
2372	cbe_lun->lun_config_status = ctl_be_block_lun_config_status;
2373	cbe_lun->be = &ctl_be_block_driver;
2374
2375	if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) {
2376		snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%4d",
2377			 softc->num_luns);
2378		strncpy((char *)cbe_lun->serial_num, tmpstr,
2379			MIN(sizeof(cbe_lun->serial_num), sizeof(tmpstr)));
2380
2381		/* Tell the user what we used for a serial number */
2382		strncpy((char *)params->serial_num, tmpstr,
2383			MIN(sizeof(params->serial_num), sizeof(tmpstr)));
2384	} else {
2385		strncpy((char *)cbe_lun->serial_num, params->serial_num,
2386			MIN(sizeof(cbe_lun->serial_num),
2387			sizeof(params->serial_num)));
2388	}
2389	if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) {
2390		snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%4d", softc->num_luns);
2391		strncpy((char *)cbe_lun->device_id, tmpstr,
2392			MIN(sizeof(cbe_lun->device_id), sizeof(tmpstr)));
2393
2394		/* Tell the user what we used for a device ID */
2395		strncpy((char *)params->device_id, tmpstr,
2396			MIN(sizeof(params->device_id), sizeof(tmpstr)));
2397	} else {
2398		strncpy((char *)cbe_lun->device_id, params->device_id,
2399			MIN(sizeof(cbe_lun->device_id),
2400			    sizeof(params->device_id)));
2401	}
2402
2403	TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_be_block_worker, be_lun);
2404
2405	be_lun->io_taskqueue = taskqueue_create(be_lun->lunname, M_WAITOK,
2406	    taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue);
2407
2408	if (be_lun->io_taskqueue == NULL) {
2409		snprintf(req->error_str, sizeof(req->error_str),
2410			 "unable to create taskqueue");
2411		goto bailout_error;
2412	}
2413
2414	/*
2415	 * Note that we start the same number of threads by default for
2416	 * both the file case and the block device case.  For the file
2417	 * case, we need multiple threads to allow concurrency, because the
2418	 * vnode interface is designed to be a blocking interface.  For the
2419	 * block device case, ZFS zvols at least will block the caller's
2420	 * context in many instances, and so we need multiple threads to
2421	 * overcome that problem.  Other block devices don't need as many
2422	 * threads, but they shouldn't cause too many problems.
2423	 *
2424	 * If the user wants to just have a single thread for a block
2425	 * device, he can specify that when the LUN is created, or change
2426	 * the tunable/sysctl to alter the default number of threads.
2427	 */
2428	retval = taskqueue_start_threads(&be_lun->io_taskqueue,
2429					 /*num threads*/num_threads,
2430					 /*priority*/PWAIT,
2431					 /*thread name*/
2432					 "%s taskq", be_lun->lunname);
2433
2434	if (retval != 0)
2435		goto bailout_error;
2436
2437	be_lun->num_threads = num_threads;
2438
2439	mtx_lock(&softc->lock);
2440	softc->num_luns++;
2441	STAILQ_INSERT_TAIL(&softc->lun_list, be_lun, links);
2442
2443	mtx_unlock(&softc->lock);
2444
2445	retval = ctl_add_lun(&be_lun->cbe_lun);
2446	if (retval != 0) {
2447		mtx_lock(&softc->lock);
2448		STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun,
2449			      links);
2450		softc->num_luns--;
2451		mtx_unlock(&softc->lock);
2452		snprintf(req->error_str, sizeof(req->error_str),
2453			 "ctl_add_lun() returned error %d, see dmesg for "
2454			 "details", retval);
2455		retval = 0;
2456		goto bailout_error;
2457	}
2458
2459	mtx_lock(&softc->lock);
2460
2461	/*
2462	 * Tell the config_status routine that we're waiting so it won't
2463	 * clean up the LUN in the event of an error.
2464	 */
2465	be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING;
2466
2467	while (be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) {
2468		retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0);
2469		if (retval == EINTR)
2470			break;
2471	}
2472	be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
2473
2474	if (be_lun->flags & CTL_BE_BLOCK_LUN_CONFIG_ERR) {
2475		snprintf(req->error_str, sizeof(req->error_str),
2476			 "LUN configuration error, see dmesg for details");
2477		STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun,
2478			      links);
2479		softc->num_luns--;
2480		mtx_unlock(&softc->lock);
2481		goto bailout_error;
2482	} else {
2483		params->req_lun_id = cbe_lun->lun_id;
2484	}
2485
2486	mtx_unlock(&softc->lock);
2487
2488	be_lun->disk_stats = devstat_new_entry("cbb", params->req_lun_id,
2489					       cbe_lun->blocksize,
2490					       DEVSTAT_ALL_SUPPORTED,
2491					       cbe_lun->lun_type
2492					       | DEVSTAT_TYPE_IF_OTHER,
2493					       DEVSTAT_PRIORITY_OTHER);
2494
2495	return (retval);
2496
2497bailout_error:
2498	req->status = CTL_LUN_ERROR;
2499
2500	if (be_lun->io_taskqueue != NULL)
2501		taskqueue_free(be_lun->io_taskqueue);
2502	ctl_be_block_close(be_lun);
2503	if (be_lun->dev_path != NULL)
2504		free(be_lun->dev_path, M_CTLBLK);
2505	if (be_lun->lun_zone != NULL)
2506		uma_zdestroy(be_lun->lun_zone);
2507	ctl_free_opts(&cbe_lun->options);
2508	mtx_destroy(&be_lun->queue_lock);
2509	mtx_destroy(&be_lun->io_lock);
2510	free(be_lun, M_CTLBLK);
2511
2512	return (retval);
2513}
2514
2515static int
2516ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2517{
2518	struct ctl_lun_rm_params *params;
2519	struct ctl_be_block_lun *be_lun;
2520	struct ctl_be_lun *cbe_lun;
2521	int retval;
2522
2523	params = &req->reqdata.rm;
2524
2525	mtx_lock(&softc->lock);
2526	STAILQ_FOREACH(be_lun, &softc->lun_list, links) {
2527		if (be_lun->cbe_lun.lun_id == params->lun_id)
2528			break;
2529	}
2530	mtx_unlock(&softc->lock);
2531
2532	if (be_lun == NULL) {
2533		snprintf(req->error_str, sizeof(req->error_str),
2534			 "LUN %u is not managed by the block backend",
2535			 params->lun_id);
2536		goto bailout_error;
2537	}
2538	cbe_lun = &be_lun->cbe_lun;
2539
2540	retval = ctl_disable_lun(cbe_lun);
2541	if (retval != 0) {
2542		snprintf(req->error_str, sizeof(req->error_str),
2543			 "error %d returned from ctl_disable_lun() for "
2544			 "LUN %d", retval, params->lun_id);
2545		goto bailout_error;
2546	}
2547
2548	if (be_lun->vn != NULL) {
2549		cbe_lun->flags |= CTL_LUN_FLAG_OFFLINE;
2550		ctl_lun_offline(cbe_lun);
2551		taskqueue_drain_all(be_lun->io_taskqueue);
2552		ctl_be_block_close(be_lun);
2553	}
2554
2555	retval = ctl_invalidate_lun(cbe_lun);
2556	if (retval != 0) {
2557		snprintf(req->error_str, sizeof(req->error_str),
2558			 "error %d returned from ctl_invalidate_lun() for "
2559			 "LUN %d", retval, params->lun_id);
2560		goto bailout_error;
2561	}
2562
2563	mtx_lock(&softc->lock);
2564	be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING;
2565	while ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) {
2566                retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0);
2567                if (retval == EINTR)
2568                        break;
2569        }
2570	be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
2571
2572	if ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) {
2573		snprintf(req->error_str, sizeof(req->error_str),
2574			 "interrupted waiting for LUN to be freed");
2575		mtx_unlock(&softc->lock);
2576		goto bailout_error;
2577	}
2578
2579	STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun, links);
2580
2581	softc->num_luns--;
2582	mtx_unlock(&softc->lock);
2583
2584	taskqueue_drain_all(be_lun->io_taskqueue);
2585	taskqueue_free(be_lun->io_taskqueue);
2586
2587	if (be_lun->disk_stats != NULL)
2588		devstat_remove_entry(be_lun->disk_stats);
2589
2590	uma_zdestroy(be_lun->lun_zone);
2591
2592	ctl_free_opts(&cbe_lun->options);
2593	free(be_lun->dev_path, M_CTLBLK);
2594	mtx_destroy(&be_lun->queue_lock);
2595	mtx_destroy(&be_lun->io_lock);
2596	free(be_lun, M_CTLBLK);
2597
2598	req->status = CTL_LUN_OK;
2599
2600	return (0);
2601
2602bailout_error:
2603
2604	req->status = CTL_LUN_ERROR;
2605
2606	return (0);
2607}
2608
2609static int
2610ctl_be_block_modify_file(struct ctl_be_block_lun *be_lun,
2611			 struct ctl_lun_req *req)
2612{
2613	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
2614	struct vattr vattr;
2615	int error;
2616	struct ctl_lun_create_params *params = &be_lun->params;
2617
2618	if (params->lun_size_bytes != 0) {
2619		be_lun->size_bytes = params->lun_size_bytes;
2620	} else  {
2621		vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
2622		error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
2623		VOP_UNLOCK(be_lun->vn, 0);
2624		if (error != 0) {
2625			snprintf(req->error_str, sizeof(req->error_str),
2626				 "error calling VOP_GETATTR() for file %s",
2627				 be_lun->dev_path);
2628			return (error);
2629		}
2630		be_lun->size_bytes = vattr.va_size;
2631	}
2632	be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
2633	cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
2634	    0 : (be_lun->size_blocks - 1);
2635	return (0);
2636}
2637
2638static int
2639ctl_be_block_modify_dev(struct ctl_be_block_lun *be_lun,
2640			struct ctl_lun_req *req)
2641{
2642	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
2643	struct ctl_lun_create_params *params = &be_lun->params;
2644	struct cdevsw *csw;
2645	struct cdev *dev;
2646	uint64_t size_bytes;
2647	int error, ref;
2648
2649	csw = devvn_refthread(be_lun->vn, &dev, &ref);
2650	if (csw == NULL)
2651		return (ENXIO);
2652	if (csw->d_ioctl == NULL) {
2653		dev_relthread(dev, ref);
2654		snprintf(req->error_str, sizeof(req->error_str),
2655			 "no d_ioctl for device %s!", be_lun->dev_path);
2656		return (ENODEV);
2657	}
2658
2659	error = csw->d_ioctl(dev, DIOCGMEDIASIZE, (caddr_t)&size_bytes, FREAD,
2660	    curthread);
2661	dev_relthread(dev, ref);
2662	if (error) {
2663		snprintf(req->error_str, sizeof(req->error_str),
2664			 "error %d returned for DIOCGMEDIASIZE ioctl "
2665			 "on %s!", error, be_lun->dev_path);
2666		return (error);
2667	}
2668
2669	if (params->lun_size_bytes != 0) {
2670		if (params->lun_size_bytes > size_bytes) {
2671			snprintf(req->error_str, sizeof(req->error_str),
2672				 "requested LUN size %ju > backing device "
2673				 "size %ju",
2674				 (uintmax_t)params->lun_size_bytes,
2675				 (uintmax_t)size_bytes);
2676			return (EINVAL);
2677		}
2678		be_lun->size_bytes = params->lun_size_bytes;
2679	} else {
2680		be_lun->size_bytes = size_bytes;
2681	}
2682	be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
2683	cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
2684	    0 : (be_lun->size_blocks - 1);
2685	return (0);
2686}
2687
2688static int
2689ctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2690{
2691	struct ctl_lun_modify_params *params;
2692	struct ctl_be_block_lun *be_lun;
2693	struct ctl_be_lun *cbe_lun;
2694	char *value;
2695	uint64_t oldsize;
2696	int error, wasprim;
2697
2698	params = &req->reqdata.modify;
2699
2700	mtx_lock(&softc->lock);
2701	STAILQ_FOREACH(be_lun, &softc->lun_list, links) {
2702		if (be_lun->cbe_lun.lun_id == params->lun_id)
2703			break;
2704	}
2705	mtx_unlock(&softc->lock);
2706
2707	if (be_lun == NULL) {
2708		snprintf(req->error_str, sizeof(req->error_str),
2709			 "LUN %u is not managed by the block backend",
2710			 params->lun_id);
2711		goto bailout_error;
2712	}
2713	cbe_lun = &be_lun->cbe_lun;
2714
2715	if (params->lun_size_bytes != 0)
2716		be_lun->params.lun_size_bytes = params->lun_size_bytes;
2717	ctl_update_opts(&cbe_lun->options, req->num_be_args, req->kern_be_args);
2718
2719	wasprim = (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY);
2720	value = ctl_get_opt(&cbe_lun->options, "ha_role");
2721	if (value != NULL) {
2722		if (strcmp(value, "primary") == 0)
2723			cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2724		else
2725			cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
2726	} else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
2727		cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2728	else
2729		cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
2730	if (wasprim != (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)) {
2731		if (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)
2732			ctl_lun_primary(cbe_lun);
2733		else
2734			ctl_lun_secondary(cbe_lun);
2735	}
2736
2737	oldsize = be_lun->size_blocks;
2738	if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) ||
2739	    control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) {
2740		if (be_lun->vn == NULL)
2741			error = ctl_be_block_open(softc, be_lun, req);
2742		else if (vn_isdisk(be_lun->vn, &error))
2743			error = ctl_be_block_modify_dev(be_lun, req);
2744		else if (be_lun->vn->v_type == VREG)
2745			error = ctl_be_block_modify_file(be_lun, req);
2746		else
2747			error = EINVAL;
2748		if ((cbe_lun->flags & CTL_LUN_FLAG_OFFLINE) &&
2749		    be_lun->vn != NULL) {
2750			cbe_lun->flags &= ~CTL_LUN_FLAG_OFFLINE;
2751			ctl_lun_online(cbe_lun);
2752		}
2753	} else {
2754		if (be_lun->vn != NULL) {
2755			cbe_lun->flags |= CTL_LUN_FLAG_OFFLINE;
2756			ctl_lun_offline(cbe_lun);
2757			taskqueue_drain_all(be_lun->io_taskqueue);
2758			error = ctl_be_block_close(be_lun);
2759		} else
2760			error = 0;
2761	}
2762	if (be_lun->size_blocks != oldsize)
2763		ctl_lun_capacity_changed(cbe_lun);
2764
2765	/* Tell the user the exact size we ended up using */
2766	params->lun_size_bytes = be_lun->size_bytes;
2767
2768	req->status = error ? CTL_LUN_WARNING : CTL_LUN_OK;
2769	return (0);
2770
2771bailout_error:
2772	req->status = CTL_LUN_ERROR;
2773	return (0);
2774}
2775
2776static void
2777ctl_be_block_lun_shutdown(void *be_lun)
2778{
2779	struct ctl_be_block_lun *lun;
2780	struct ctl_be_block_softc *softc;
2781
2782	lun = (struct ctl_be_block_lun *)be_lun;
2783
2784	softc = lun->softc;
2785
2786	mtx_lock(&softc->lock);
2787	lun->flags |= CTL_BE_BLOCK_LUN_UNCONFIGURED;
2788	if (lun->flags & CTL_BE_BLOCK_LUN_WAITING)
2789		wakeup(lun);
2790	mtx_unlock(&softc->lock);
2791
2792}
2793
2794static void
2795ctl_be_block_lun_config_status(void *be_lun, ctl_lun_config_status status)
2796{
2797	struct ctl_be_block_lun *lun;
2798	struct ctl_be_block_softc *softc;
2799
2800	lun = (struct ctl_be_block_lun *)be_lun;
2801	softc = lun->softc;
2802
2803	if (status == CTL_LUN_CONFIG_OK) {
2804		mtx_lock(&softc->lock);
2805		lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED;
2806		if (lun->flags & CTL_BE_BLOCK_LUN_WAITING)
2807			wakeup(lun);
2808		mtx_unlock(&softc->lock);
2809
2810		/*
2811		 * We successfully added the LUN, attempt to enable it.
2812		 */
2813		if (ctl_enable_lun(&lun->cbe_lun) != 0) {
2814			printf("%s: ctl_enable_lun() failed!\n", __func__);
2815			if (ctl_invalidate_lun(&lun->cbe_lun) != 0) {
2816				printf("%s: ctl_invalidate_lun() failed!\n",
2817				       __func__);
2818			}
2819		}
2820
2821		return;
2822	}
2823
2824
2825	mtx_lock(&softc->lock);
2826	lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED;
2827	lun->flags |= CTL_BE_BLOCK_LUN_CONFIG_ERR;
2828	wakeup(lun);
2829	mtx_unlock(&softc->lock);
2830}
2831
2832
2833static int
2834ctl_be_block_config_write(union ctl_io *io)
2835{
2836	struct ctl_be_block_lun *be_lun;
2837	struct ctl_be_lun *cbe_lun;
2838	int retval;
2839
2840	retval = 0;
2841
2842	DPRINTF("entered\n");
2843
2844	cbe_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[
2845		CTL_PRIV_BACKEND_LUN].ptr;
2846	be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun;
2847
2848	switch (io->scsiio.cdb[0]) {
2849	case SYNCHRONIZE_CACHE:
2850	case SYNCHRONIZE_CACHE_16:
2851	case WRITE_SAME_10:
2852	case WRITE_SAME_16:
2853	case UNMAP:
2854		/*
2855		 * The upper level CTL code will filter out any CDBs with
2856		 * the immediate bit set and return the proper error.
2857		 *
2858		 * We don't really need to worry about what LBA range the
2859		 * user asked to be synced out.  When they issue a sync
2860		 * cache command, we'll sync out the whole thing.
2861		 */
2862		mtx_lock(&be_lun->queue_lock);
2863		STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr,
2864				   links);
2865		mtx_unlock(&be_lun->queue_lock);
2866		taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
2867		break;
2868	case START_STOP_UNIT: {
2869		struct scsi_start_stop_unit *cdb;
2870
2871		cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb;
2872
2873		if (cdb->how & SSS_START)
2874			retval = ctl_start_lun(cbe_lun);
2875		else {
2876			retval = ctl_stop_lun(cbe_lun);
2877			/*
2878			 * XXX KDM Copan-specific offline behavior.
2879			 * Figure out a reasonable way to port this?
2880			 */
2881#ifdef NEEDTOPORT
2882			if ((retval == 0)
2883			 && (cdb->byte2 & SSS_ONOFFLINE))
2884				retval = ctl_lun_offline(cbe_lun);
2885#endif
2886		}
2887
2888		/*
2889		 * In general, the above routines should not fail.  They
2890		 * just set state for the LUN.  So we've got something
2891		 * pretty wrong here if we can't start or stop the LUN.
2892		 */
2893		if (retval != 0) {
2894			ctl_set_internal_failure(&io->scsiio,
2895						 /*sks_valid*/ 1,
2896						 /*retry_count*/ 0xf051);
2897			retval = CTL_RETVAL_COMPLETE;
2898		} else {
2899			ctl_set_success(&io->scsiio);
2900		}
2901		ctl_config_write_done(io);
2902		break;
2903	}
2904	default:
2905		ctl_set_invalid_opcode(&io->scsiio);
2906		ctl_config_write_done(io);
2907		retval = CTL_RETVAL_COMPLETE;
2908		break;
2909	}
2910
2911	return (retval);
2912}
2913
2914static int
2915ctl_be_block_config_read(union ctl_io *io)
2916{
2917	struct ctl_be_block_lun *be_lun;
2918	struct ctl_be_lun *cbe_lun;
2919	int retval = 0;
2920
2921	DPRINTF("entered\n");
2922
2923	cbe_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[
2924		CTL_PRIV_BACKEND_LUN].ptr;
2925	be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun;
2926
2927	switch (io->scsiio.cdb[0]) {
2928	case SERVICE_ACTION_IN:
2929		if (io->scsiio.cdb[1] == SGLS_SERVICE_ACTION) {
2930			mtx_lock(&be_lun->queue_lock);
2931			STAILQ_INSERT_TAIL(&be_lun->config_read_queue,
2932			    &io->io_hdr, links);
2933			mtx_unlock(&be_lun->queue_lock);
2934			taskqueue_enqueue(be_lun->io_taskqueue,
2935			    &be_lun->io_task);
2936			retval = CTL_RETVAL_QUEUED;
2937			break;
2938		}
2939		ctl_set_invalid_field(&io->scsiio,
2940				      /*sks_valid*/ 1,
2941				      /*command*/ 1,
2942				      /*field*/ 1,
2943				      /*bit_valid*/ 1,
2944				      /*bit*/ 4);
2945		ctl_config_read_done(io);
2946		retval = CTL_RETVAL_COMPLETE;
2947		break;
2948	default:
2949		ctl_set_invalid_opcode(&io->scsiio);
2950		ctl_config_read_done(io);
2951		retval = CTL_RETVAL_COMPLETE;
2952		break;
2953	}
2954
2955	return (retval);
2956}
2957
2958static int
2959ctl_be_block_lun_info(void *be_lun, struct sbuf *sb)
2960{
2961	struct ctl_be_block_lun *lun;
2962	int retval;
2963
2964	lun = (struct ctl_be_block_lun *)be_lun;
2965	retval = 0;
2966
2967	retval = sbuf_printf(sb, "\t<num_threads>");
2968
2969	if (retval != 0)
2970		goto bailout;
2971
2972	retval = sbuf_printf(sb, "%d", lun->num_threads);
2973
2974	if (retval != 0)
2975		goto bailout;
2976
2977	retval = sbuf_printf(sb, "</num_threads>\n");
2978
2979bailout:
2980
2981	return (retval);
2982}
2983
2984static uint64_t
2985ctl_be_block_lun_attr(void *be_lun, const char *attrname)
2986{
2987	struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)be_lun;
2988
2989	if (lun->getattr == NULL)
2990		return (UINT64_MAX);
2991	return (lun->getattr(lun, attrname));
2992}
2993
2994int
2995ctl_be_block_init(void)
2996{
2997	struct ctl_be_block_softc *softc;
2998	int retval;
2999
3000	softc = &backend_block_softc;
3001	retval = 0;
3002
3003	mtx_init(&softc->lock, "ctlblock", NULL, MTX_DEF);
3004	beio_zone = uma_zcreate("beio", sizeof(struct ctl_be_block_io),
3005	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
3006	STAILQ_INIT(&softc->lun_list);
3007
3008	return (retval);
3009}
3010