ctl_backend_block.c revision 288763
1/*-
2 * Copyright (c) 2003 Silicon Graphics International Corp.
3 * Copyright (c) 2009-2011 Spectra Logic Corporation
4 * Copyright (c) 2012 The FreeBSD Foundation
5 * All rights reserved.
6 *
7 * Portions of this software were developed by Edward Tomasz Napierala
8 * under sponsorship from the FreeBSD Foundation.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions, and the following disclaimer,
15 *    without modification.
16 * 2. Redistributions in binary form must reproduce at minimum a disclaimer
17 *    substantially similar to the "NO WARRANTY" disclaimer below
18 *    ("Disclaimer") and any redistribution must be conditioned upon
19 *    including a substantially similar Disclaimer requirement for further
20 *    binary redistribution.
21 *
22 * NO WARRANTY
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
31 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
32 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGES.
34 *
35 * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_block.c#5 $
36 */
37/*
38 * CAM Target Layer driver backend for block devices.
39 *
40 * Author: Ken Merry <ken@FreeBSD.org>
41 */
42#include <sys/cdefs.h>
43__FBSDID("$FreeBSD: stable/10/sys/cam/ctl/ctl_backend_block.c 288763 2015-10-05 09:25:04Z mav $");
44
45#include <opt_kdtrace.h>
46
47#include <sys/param.h>
48#include <sys/systm.h>
49#include <sys/kernel.h>
50#include <sys/types.h>
51#include <sys/kthread.h>
52#include <sys/bio.h>
53#include <sys/fcntl.h>
54#include <sys/limits.h>
55#include <sys/lock.h>
56#include <sys/mutex.h>
57#include <sys/condvar.h>
58#include <sys/malloc.h>
59#include <sys/conf.h>
60#include <sys/ioccom.h>
61#include <sys/queue.h>
62#include <sys/sbuf.h>
63#include <sys/endian.h>
64#include <sys/uio.h>
65#include <sys/buf.h>
66#include <sys/taskqueue.h>
67#include <sys/vnode.h>
68#include <sys/namei.h>
69#include <sys/mount.h>
70#include <sys/disk.h>
71#include <sys/fcntl.h>
72#include <sys/filedesc.h>
73#include <sys/filio.h>
74#include <sys/proc.h>
75#include <sys/pcpu.h>
76#include <sys/module.h>
77#include <sys/sdt.h>
78#include <sys/devicestat.h>
79#include <sys/sysctl.h>
80
81#include <geom/geom.h>
82
83#include <cam/cam.h>
84#include <cam/scsi/scsi_all.h>
85#include <cam/scsi/scsi_da.h>
86#include <cam/ctl/ctl_io.h>
87#include <cam/ctl/ctl.h>
88#include <cam/ctl/ctl_backend.h>
89#include <cam/ctl/ctl_ioctl.h>
90#include <cam/ctl/ctl_ha.h>
91#include <cam/ctl/ctl_scsi_all.h>
92#include <cam/ctl/ctl_private.h>
93#include <cam/ctl/ctl_error.h>
94
95/*
96 * The idea here is that we'll allocate enough S/G space to hold a 1MB
97 * I/O.  If we get an I/O larger than that, we'll split it.
98 */
99#define	CTLBLK_HALF_IO_SIZE	(512 * 1024)
100#define	CTLBLK_MAX_IO_SIZE	(CTLBLK_HALF_IO_SIZE * 2)
101#define	CTLBLK_MAX_SEG		MAXPHYS
102#define	CTLBLK_HALF_SEGS	MAX(CTLBLK_HALF_IO_SIZE / CTLBLK_MAX_SEG, 1)
103#define	CTLBLK_MAX_SEGS		(CTLBLK_HALF_SEGS * 2)
104
105#ifdef CTLBLK_DEBUG
106#define DPRINTF(fmt, args...) \
107    printf("cbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
108#else
109#define DPRINTF(fmt, args...) do {} while(0)
110#endif
111
112#define PRIV(io)	\
113    ((struct ctl_ptr_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_BACKEND])
114#define ARGS(io)	\
115    ((struct ctl_lba_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN])
116
117SDT_PROVIDER_DEFINE(cbb);
118
119typedef enum {
120	CTL_BE_BLOCK_LUN_UNCONFIGURED	= 0x01,
121	CTL_BE_BLOCK_LUN_CONFIG_ERR	= 0x02,
122	CTL_BE_BLOCK_LUN_WAITING	= 0x04,
123} ctl_be_block_lun_flags;
124
125typedef enum {
126	CTL_BE_BLOCK_NONE,
127	CTL_BE_BLOCK_DEV,
128	CTL_BE_BLOCK_FILE
129} ctl_be_block_type;
130
131struct ctl_be_block_filedata {
132	struct ucred *cred;
133};
134
135union ctl_be_block_bedata {
136	struct ctl_be_block_filedata file;
137};
138
139struct ctl_be_block_io;
140struct ctl_be_block_lun;
141
142typedef void (*cbb_dispatch_t)(struct ctl_be_block_lun *be_lun,
143			       struct ctl_be_block_io *beio);
144typedef uint64_t (*cbb_getattr_t)(struct ctl_be_block_lun *be_lun,
145				  const char *attrname);
146
147/*
148 * Backend LUN structure.  There is a 1:1 mapping between a block device
149 * and a backend block LUN, and between a backend block LUN and a CTL LUN.
150 */
151struct ctl_be_block_lun {
152	struct ctl_lun_create_params params;
153	char lunname[32];
154	char *dev_path;
155	ctl_be_block_type dev_type;
156	struct vnode *vn;
157	union ctl_be_block_bedata backend;
158	cbb_dispatch_t dispatch;
159	cbb_dispatch_t lun_flush;
160	cbb_dispatch_t unmap;
161	cbb_dispatch_t get_lba_status;
162	cbb_getattr_t getattr;
163	uma_zone_t lun_zone;
164	uint64_t size_blocks;
165	uint64_t size_bytes;
166	struct ctl_be_block_softc *softc;
167	struct devstat *disk_stats;
168	ctl_be_block_lun_flags flags;
169	STAILQ_ENTRY(ctl_be_block_lun) links;
170	struct ctl_be_lun cbe_lun;
171	struct taskqueue *io_taskqueue;
172	struct task io_task;
173	int num_threads;
174	STAILQ_HEAD(, ctl_io_hdr) input_queue;
175	STAILQ_HEAD(, ctl_io_hdr) config_read_queue;
176	STAILQ_HEAD(, ctl_io_hdr) config_write_queue;
177	STAILQ_HEAD(, ctl_io_hdr) datamove_queue;
178	struct mtx_padalign io_lock;
179	struct mtx_padalign queue_lock;
180};
181
182/*
183 * Overall softc structure for the block backend module.
184 */
185struct ctl_be_block_softc {
186	struct mtx			 lock;
187	int				 num_luns;
188	STAILQ_HEAD(, ctl_be_block_lun)	 lun_list;
189};
190
191static struct ctl_be_block_softc backend_block_softc;
192
193/*
194 * Per-I/O information.
195 */
196struct ctl_be_block_io {
197	union ctl_io			*io;
198	struct ctl_sg_entry		sg_segs[CTLBLK_MAX_SEGS];
199	struct iovec			xiovecs[CTLBLK_MAX_SEGS];
200	int				bio_cmd;
201	int				num_segs;
202	int				num_bios_sent;
203	int				num_bios_done;
204	int				send_complete;
205	int				num_errors;
206	struct bintime			ds_t0;
207	devstat_tag_type		ds_tag_type;
208	devstat_trans_flags		ds_trans_type;
209	uint64_t			io_len;
210	uint64_t			io_offset;
211	int				io_arg;
212	struct ctl_be_block_softc	*softc;
213	struct ctl_be_block_lun		*lun;
214	void (*beio_cont)(struct ctl_be_block_io *beio); /* to continue processing */
215};
216
217extern struct ctl_softc *control_softc;
218
219static int cbb_num_threads = 14;
220TUNABLE_INT("kern.cam.ctl.block.num_threads", &cbb_num_threads);
221SYSCTL_NODE(_kern_cam_ctl, OID_AUTO, block, CTLFLAG_RD, 0,
222	    "CAM Target Layer Block Backend");
223SYSCTL_INT(_kern_cam_ctl_block, OID_AUTO, num_threads, CTLFLAG_RW,
224           &cbb_num_threads, 0, "Number of threads per backing file");
225
226static struct ctl_be_block_io *ctl_alloc_beio(struct ctl_be_block_softc *softc);
227static void ctl_free_beio(struct ctl_be_block_io *beio);
228static void ctl_complete_beio(struct ctl_be_block_io *beio);
229static int ctl_be_block_move_done(union ctl_io *io);
230static void ctl_be_block_biodone(struct bio *bio);
231static void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
232				    struct ctl_be_block_io *beio);
233static void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
234				       struct ctl_be_block_io *beio);
235static void ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun,
236				  struct ctl_be_block_io *beio);
237static uint64_t ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun,
238					 const char *attrname);
239static void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
240				   struct ctl_be_block_io *beio);
241static void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
242				   struct ctl_be_block_io *beio);
243static void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
244				      struct ctl_be_block_io *beio);
245static uint64_t ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun,
246					 const char *attrname);
247static void ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun,
248				    union ctl_io *io);
249static void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
250				    union ctl_io *io);
251static void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
252				  union ctl_io *io);
253static void ctl_be_block_worker(void *context, int pending);
254static int ctl_be_block_submit(union ctl_io *io);
255static int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
256				   int flag, struct thread *td);
257static int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun,
258				  struct ctl_lun_req *req);
259static int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun,
260				 struct ctl_lun_req *req);
261static int ctl_be_block_close(struct ctl_be_block_lun *be_lun);
262static int ctl_be_block_open(struct ctl_be_block_softc *softc,
263			     struct ctl_be_block_lun *be_lun,
264			     struct ctl_lun_req *req);
265static int ctl_be_block_create(struct ctl_be_block_softc *softc,
266			       struct ctl_lun_req *req);
267static int ctl_be_block_rm(struct ctl_be_block_softc *softc,
268			   struct ctl_lun_req *req);
269static int ctl_be_block_modify_file(struct ctl_be_block_lun *be_lun,
270				  struct ctl_lun_req *req);
271static int ctl_be_block_modify_dev(struct ctl_be_block_lun *be_lun,
272				 struct ctl_lun_req *req);
273static int ctl_be_block_modify(struct ctl_be_block_softc *softc,
274			   struct ctl_lun_req *req);
275static void ctl_be_block_lun_shutdown(void *be_lun);
276static void ctl_be_block_lun_config_status(void *be_lun,
277					   ctl_lun_config_status status);
278static int ctl_be_block_config_write(union ctl_io *io);
279static int ctl_be_block_config_read(union ctl_io *io);
280static int ctl_be_block_lun_info(void *be_lun, struct sbuf *sb);
281static uint64_t ctl_be_block_lun_attr(void *be_lun, const char *attrname);
282int ctl_be_block_init(void);
283
284static struct ctl_backend_driver ctl_be_block_driver =
285{
286	.name = "block",
287	.flags = CTL_BE_FLAG_HAS_CONFIG,
288	.init = ctl_be_block_init,
289	.data_submit = ctl_be_block_submit,
290	.data_move_done = ctl_be_block_move_done,
291	.config_read = ctl_be_block_config_read,
292	.config_write = ctl_be_block_config_write,
293	.ioctl = ctl_be_block_ioctl,
294	.lun_info = ctl_be_block_lun_info,
295	.lun_attr = ctl_be_block_lun_attr
296};
297
298MALLOC_DEFINE(M_CTLBLK, "ctlblk", "Memory used for CTL block backend");
299CTL_BACKEND_DECLARE(cbb, ctl_be_block_driver);
300
301static uma_zone_t beio_zone;
302
303static struct ctl_be_block_io *
304ctl_alloc_beio(struct ctl_be_block_softc *softc)
305{
306	struct ctl_be_block_io *beio;
307
308	beio = uma_zalloc(beio_zone, M_WAITOK | M_ZERO);
309	beio->softc = softc;
310	return (beio);
311}
312
313static void
314ctl_free_beio(struct ctl_be_block_io *beio)
315{
316	int duplicate_free;
317	int i;
318
319	duplicate_free = 0;
320
321	for (i = 0; i < beio->num_segs; i++) {
322		if (beio->sg_segs[i].addr == NULL)
323			duplicate_free++;
324
325		uma_zfree(beio->lun->lun_zone, beio->sg_segs[i].addr);
326		beio->sg_segs[i].addr = NULL;
327
328		/* For compare we had two equal S/G lists. */
329		if (ARGS(beio->io)->flags & CTL_LLF_COMPARE) {
330			uma_zfree(beio->lun->lun_zone,
331			    beio->sg_segs[i + CTLBLK_HALF_SEGS].addr);
332			beio->sg_segs[i + CTLBLK_HALF_SEGS].addr = NULL;
333		}
334	}
335
336	if (duplicate_free > 0) {
337		printf("%s: %d duplicate frees out of %d segments\n", __func__,
338		       duplicate_free, beio->num_segs);
339	}
340
341	uma_zfree(beio_zone, beio);
342}
343
344static void
345ctl_complete_beio(struct ctl_be_block_io *beio)
346{
347	union ctl_io *io = beio->io;
348
349	if (beio->beio_cont != NULL) {
350		beio->beio_cont(beio);
351	} else {
352		ctl_free_beio(beio);
353		ctl_data_submit_done(io);
354	}
355}
356
357static size_t
358cmp(uint8_t *a, uint8_t *b, size_t size)
359{
360	size_t i;
361
362	for (i = 0; i < size; i++) {
363		if (a[i] != b[i])
364			break;
365	}
366	return (i);
367}
368
369static void
370ctl_be_block_compare(union ctl_io *io)
371{
372	struct ctl_be_block_io *beio;
373	uint64_t off, res;
374	int i;
375	uint8_t info[8];
376
377	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
378	off = 0;
379	for (i = 0; i < beio->num_segs; i++) {
380		res = cmp(beio->sg_segs[i].addr,
381		    beio->sg_segs[i + CTLBLK_HALF_SEGS].addr,
382		    beio->sg_segs[i].len);
383		off += res;
384		if (res < beio->sg_segs[i].len)
385			break;
386	}
387	if (i < beio->num_segs) {
388		scsi_u64to8b(off, info);
389		ctl_set_sense(&io->scsiio, /*current_error*/ 1,
390		    /*sense_key*/ SSD_KEY_MISCOMPARE,
391		    /*asc*/ 0x1D, /*ascq*/ 0x00,
392		    /*type*/ SSD_ELEM_INFO,
393		    /*size*/ sizeof(info), /*data*/ &info,
394		    /*type*/ SSD_ELEM_NONE);
395	} else
396		ctl_set_success(&io->scsiio);
397}
398
399static int
400ctl_be_block_move_done(union ctl_io *io)
401{
402	struct ctl_be_block_io *beio;
403	struct ctl_be_block_lun *be_lun;
404	struct ctl_lba_len_flags *lbalen;
405#ifdef CTL_TIME_IO
406	struct bintime cur_bt;
407#endif
408
409	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
410	be_lun = beio->lun;
411
412	DPRINTF("entered\n");
413
414#ifdef CTL_TIME_IO
415	getbintime(&cur_bt);
416	bintime_sub(&cur_bt, &io->io_hdr.dma_start_bt);
417	bintime_add(&io->io_hdr.dma_bt, &cur_bt);
418	io->io_hdr.num_dmas++;
419#endif
420	io->scsiio.kern_rel_offset += io->scsiio.kern_data_len;
421
422	/*
423	 * We set status at this point for read commands, and write
424	 * commands with errors.
425	 */
426	if (io->io_hdr.flags & CTL_FLAG_ABORT) {
427		;
428	} else if ((io->io_hdr.port_status == 0) &&
429	    ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE)) {
430		lbalen = ARGS(beio->io);
431		if (lbalen->flags & CTL_LLF_READ) {
432			ctl_set_success(&io->scsiio);
433		} else if (lbalen->flags & CTL_LLF_COMPARE) {
434			/* We have two data blocks ready for comparison. */
435			ctl_be_block_compare(io);
436		}
437	} else if ((io->io_hdr.port_status != 0) &&
438	    ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE ||
439	     (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) {
440		/*
441		 * For hardware error sense keys, the sense key
442		 * specific value is defined to be a retry count,
443		 * but we use it to pass back an internal FETD
444		 * error code.  XXX KDM  Hopefully the FETD is only
445		 * using 16 bits for an error code, since that's
446		 * all the space we have in the sks field.
447		 */
448		ctl_set_internal_failure(&io->scsiio,
449					 /*sks_valid*/ 1,
450					 /*retry_count*/
451					 io->io_hdr.port_status);
452	}
453
454	/*
455	 * If this is a read, or a write with errors, it is done.
456	 */
457	if ((beio->bio_cmd == BIO_READ)
458	 || ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0)
459	 || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)) {
460		ctl_complete_beio(beio);
461		return (0);
462	}
463
464	/*
465	 * At this point, we have a write and the DMA completed
466	 * successfully.  We now have to queue it to the task queue to
467	 * execute the backend I/O.  That is because we do blocking
468	 * memory allocations, and in the file backing case, blocking I/O.
469	 * This move done routine is generally called in the SIM's
470	 * interrupt context, and therefore we cannot block.
471	 */
472	mtx_lock(&be_lun->queue_lock);
473	/*
474	 * XXX KDM make sure that links is okay to use at this point.
475	 * Otherwise, we either need to add another field to ctl_io_hdr,
476	 * or deal with resource allocation here.
477	 */
478	STAILQ_INSERT_TAIL(&be_lun->datamove_queue, &io->io_hdr, links);
479	mtx_unlock(&be_lun->queue_lock);
480
481	taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
482
483	return (0);
484}
485
486static void
487ctl_be_block_biodone(struct bio *bio)
488{
489	struct ctl_be_block_io *beio;
490	struct ctl_be_block_lun *be_lun;
491	union ctl_io *io;
492	int error;
493
494	beio = bio->bio_caller1;
495	be_lun = beio->lun;
496	io = beio->io;
497
498	DPRINTF("entered\n");
499
500	error = bio->bio_error;
501	mtx_lock(&be_lun->io_lock);
502	if (error != 0)
503		beio->num_errors++;
504
505	beio->num_bios_done++;
506
507	/*
508	 * XXX KDM will this cause WITNESS to complain?  Holding a lock
509	 * during the free might cause it to complain.
510	 */
511	g_destroy_bio(bio);
512
513	/*
514	 * If the send complete bit isn't set, or we aren't the last I/O to
515	 * complete, then we're done.
516	 */
517	if ((beio->send_complete == 0)
518	 || (beio->num_bios_done < beio->num_bios_sent)) {
519		mtx_unlock(&be_lun->io_lock);
520		return;
521	}
522
523	/*
524	 * At this point, we've verified that we are the last I/O to
525	 * complete, so it's safe to drop the lock.
526	 */
527	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
528	    beio->ds_tag_type, beio->ds_trans_type,
529	    /*now*/ NULL, /*then*/&beio->ds_t0);
530	mtx_unlock(&be_lun->io_lock);
531
532	/*
533	 * If there are any errors from the backing device, we fail the
534	 * entire I/O with a medium error.
535	 */
536	if (beio->num_errors > 0) {
537		if (error == EOPNOTSUPP) {
538			ctl_set_invalid_opcode(&io->scsiio);
539		} else if (error == ENOSPC || error == EDQUOT) {
540			ctl_set_space_alloc_fail(&io->scsiio);
541		} else if (error == EROFS || error == EACCES) {
542			ctl_set_hw_write_protected(&io->scsiio);
543		} else if (beio->bio_cmd == BIO_FLUSH) {
544			/* XXX KDM is there is a better error here? */
545			ctl_set_internal_failure(&io->scsiio,
546						 /*sks_valid*/ 1,
547						 /*retry_count*/ 0xbad2);
548		} else
549			ctl_set_medium_error(&io->scsiio);
550		ctl_complete_beio(beio);
551		return;
552	}
553
554	/*
555	 * If this is a write, a flush, a delete or verify, we're all done.
556	 * If this is a read, we can now send the data to the user.
557	 */
558	if ((beio->bio_cmd == BIO_WRITE)
559	 || (beio->bio_cmd == BIO_FLUSH)
560	 || (beio->bio_cmd == BIO_DELETE)
561	 || (ARGS(io)->flags & CTL_LLF_VERIFY)) {
562		ctl_set_success(&io->scsiio);
563		ctl_complete_beio(beio);
564	} else {
565		if ((ARGS(io)->flags & CTL_LLF_READ) &&
566		    beio->beio_cont == NULL)
567			ctl_set_success(&io->scsiio);
568#ifdef CTL_TIME_IO
569        	getbintime(&io->io_hdr.dma_start_bt);
570#endif
571		ctl_datamove(io);
572	}
573}
574
575static void
576ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
577			struct ctl_be_block_io *beio)
578{
579	union ctl_io *io = beio->io;
580	struct mount *mountpoint;
581	int error, lock_flags;
582
583	DPRINTF("entered\n");
584
585	binuptime(&beio->ds_t0);
586	mtx_lock(&be_lun->io_lock);
587	devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
588	mtx_unlock(&be_lun->io_lock);
589
590	(void) vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
591
592	if (MNT_SHARED_WRITES(mountpoint)
593	 || ((mountpoint == NULL)
594	  && MNT_SHARED_WRITES(be_lun->vn->v_mount)))
595		lock_flags = LK_SHARED;
596	else
597		lock_flags = LK_EXCLUSIVE;
598
599	vn_lock(be_lun->vn, lock_flags | LK_RETRY);
600
601	error = VOP_FSYNC(be_lun->vn, beio->io_arg ? MNT_NOWAIT : MNT_WAIT,
602	    curthread);
603	VOP_UNLOCK(be_lun->vn, 0);
604
605	vn_finished_write(mountpoint);
606
607	mtx_lock(&be_lun->io_lock);
608	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
609	    beio->ds_tag_type, beio->ds_trans_type,
610	    /*now*/ NULL, /*then*/&beio->ds_t0);
611	mtx_unlock(&be_lun->io_lock);
612
613	if (error == 0)
614		ctl_set_success(&io->scsiio);
615	else {
616		/* XXX KDM is there is a better error here? */
617		ctl_set_internal_failure(&io->scsiio,
618					 /*sks_valid*/ 1,
619					 /*retry_count*/ 0xbad1);
620	}
621
622	ctl_complete_beio(beio);
623}
624
625SDT_PROBE_DEFINE1(cbb, kernel, read, file_start, "uint64_t");
626SDT_PROBE_DEFINE1(cbb, kernel, write, file_start, "uint64_t");
627SDT_PROBE_DEFINE1(cbb, kernel, read, file_done,"uint64_t");
628SDT_PROBE_DEFINE1(cbb, kernel, write, file_done, "uint64_t");
629
630static void
631ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
632			   struct ctl_be_block_io *beio)
633{
634	struct ctl_be_block_filedata *file_data;
635	union ctl_io *io;
636	struct uio xuio;
637	struct iovec *xiovec;
638	int flags;
639	int error, i;
640
641	DPRINTF("entered\n");
642
643	file_data = &be_lun->backend.file;
644	io = beio->io;
645	flags = 0;
646	if (ARGS(io)->flags & CTL_LLF_DPO)
647		flags |= IO_DIRECT;
648	if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA)
649		flags |= IO_SYNC;
650
651	bzero(&xuio, sizeof(xuio));
652	if (beio->bio_cmd == BIO_READ) {
653		SDT_PROBE(cbb, kernel, read, file_start, 0, 0, 0, 0, 0);
654		xuio.uio_rw = UIO_READ;
655	} else {
656		SDT_PROBE(cbb, kernel, write, file_start, 0, 0, 0, 0, 0);
657		xuio.uio_rw = UIO_WRITE;
658	}
659	xuio.uio_offset = beio->io_offset;
660	xuio.uio_resid = beio->io_len;
661	xuio.uio_segflg = UIO_SYSSPACE;
662	xuio.uio_iov = beio->xiovecs;
663	xuio.uio_iovcnt = beio->num_segs;
664	xuio.uio_td = curthread;
665
666	for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) {
667		xiovec->iov_base = beio->sg_segs[i].addr;
668		xiovec->iov_len = beio->sg_segs[i].len;
669	}
670
671	binuptime(&beio->ds_t0);
672	mtx_lock(&be_lun->io_lock);
673	devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
674	mtx_unlock(&be_lun->io_lock);
675
676	if (beio->bio_cmd == BIO_READ) {
677		vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
678
679		/*
680		 * UFS pays attention to IO_DIRECT for reads.  If the
681		 * DIRECTIO option is configured into the kernel, it calls
682		 * ffs_rawread().  But that only works for single-segment
683		 * uios with user space addresses.  In our case, with a
684		 * kernel uio, it still reads into the buffer cache, but it
685		 * will just try to release the buffer from the cache later
686		 * on in ffs_read().
687		 *
688		 * ZFS does not pay attention to IO_DIRECT for reads.
689		 *
690		 * UFS does not pay attention to IO_SYNC for reads.
691		 *
692		 * ZFS pays attention to IO_SYNC (which translates into the
693		 * Solaris define FRSYNC for zfs_read()) for reads.  It
694		 * attempts to sync the file before reading.
695		 */
696		error = VOP_READ(be_lun->vn, &xuio, flags, file_data->cred);
697
698		VOP_UNLOCK(be_lun->vn, 0);
699		SDT_PROBE(cbb, kernel, read, file_done, 0, 0, 0, 0, 0);
700	} else {
701		struct mount *mountpoint;
702		int lock_flags;
703
704		(void)vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
705
706		if (MNT_SHARED_WRITES(mountpoint)
707		 || ((mountpoint == NULL)
708		  && MNT_SHARED_WRITES(be_lun->vn->v_mount)))
709			lock_flags = LK_SHARED;
710		else
711			lock_flags = LK_EXCLUSIVE;
712
713		vn_lock(be_lun->vn, lock_flags | LK_RETRY);
714
715		/*
716		 * UFS pays attention to IO_DIRECT for writes.  The write
717		 * is done asynchronously.  (Normally the write would just
718		 * get put into cache.
719		 *
720		 * UFS pays attention to IO_SYNC for writes.  It will
721		 * attempt to write the buffer out synchronously if that
722		 * flag is set.
723		 *
724		 * ZFS does not pay attention to IO_DIRECT for writes.
725		 *
726		 * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC)
727		 * for writes.  It will flush the transaction from the
728		 * cache before returning.
729		 */
730		error = VOP_WRITE(be_lun->vn, &xuio, flags, file_data->cred);
731		VOP_UNLOCK(be_lun->vn, 0);
732
733		vn_finished_write(mountpoint);
734		SDT_PROBE(cbb, kernel, write, file_done, 0, 0, 0, 0, 0);
735        }
736
737	mtx_lock(&be_lun->io_lock);
738	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
739	    beio->ds_tag_type, beio->ds_trans_type,
740	    /*now*/ NULL, /*then*/&beio->ds_t0);
741	mtx_unlock(&be_lun->io_lock);
742
743	/*
744	 * If we got an error, set the sense data to "MEDIUM ERROR" and
745	 * return the I/O to the user.
746	 */
747	if (error != 0) {
748		char path_str[32];
749
750		ctl_scsi_path_string(io, path_str, sizeof(path_str));
751		printf("%s%s command returned errno %d\n", path_str,
752		       (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE", error);
753		if (error == ENOSPC || error == EDQUOT) {
754			ctl_set_space_alloc_fail(&io->scsiio);
755		} else if (error == EROFS || error == EACCES) {
756			ctl_set_hw_write_protected(&io->scsiio);
757		} else
758			ctl_set_medium_error(&io->scsiio);
759		ctl_complete_beio(beio);
760		return;
761	}
762
763	/*
764	 * If this is a write or a verify, we're all done.
765	 * If this is a read, we can now send the data to the user.
766	 */
767	if ((beio->bio_cmd == BIO_WRITE) ||
768	    (ARGS(io)->flags & CTL_LLF_VERIFY)) {
769		ctl_set_success(&io->scsiio);
770		ctl_complete_beio(beio);
771	} else {
772		if ((ARGS(io)->flags & CTL_LLF_READ) &&
773		    beio->beio_cont == NULL)
774			ctl_set_success(&io->scsiio);
775#ifdef CTL_TIME_IO
776        	getbintime(&io->io_hdr.dma_start_bt);
777#endif
778		ctl_datamove(io);
779	}
780}
781
782static void
783ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun,
784			struct ctl_be_block_io *beio)
785{
786	union ctl_io *io = beio->io;
787	struct ctl_lba_len_flags *lbalen = ARGS(io);
788	struct scsi_get_lba_status_data *data;
789	off_t roff, off;
790	int error, status;
791
792	DPRINTF("entered\n");
793
794	off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize;
795	vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
796	error = VOP_IOCTL(be_lun->vn, FIOSEEKHOLE, &off,
797	    0, curthread->td_ucred, curthread);
798	if (error == 0 && off > roff)
799		status = 0;	/* mapped up to off */
800	else {
801		error = VOP_IOCTL(be_lun->vn, FIOSEEKDATA, &off,
802		    0, curthread->td_ucred, curthread);
803		if (error == 0 && off > roff)
804			status = 1;	/* deallocated up to off */
805		else {
806			status = 0;	/* unknown up to the end */
807			off = be_lun->size_bytes;
808		}
809	}
810	VOP_UNLOCK(be_lun->vn, 0);
811
812	data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr;
813	scsi_u64to8b(lbalen->lba, data->descr[0].addr);
814	scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize -
815	    lbalen->lba), data->descr[0].length);
816	data->descr[0].status = status;
817
818	ctl_complete_beio(beio);
819}
820
821static uint64_t
822ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, const char *attrname)
823{
824	struct vattr		vattr;
825	struct statfs		statfs;
826	uint64_t		val;
827	int			error;
828
829	val = UINT64_MAX;
830	if (be_lun->vn == NULL)
831		return (val);
832	vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
833	if (strcmp(attrname, "blocksused") == 0) {
834		error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
835		if (error == 0)
836			val = vattr.va_bytes / be_lun->cbe_lun.blocksize;
837	}
838	if (strcmp(attrname, "blocksavail") == 0 &&
839	    (be_lun->vn->v_iflag & VI_DOOMED) == 0) {
840		error = VFS_STATFS(be_lun->vn->v_mount, &statfs);
841		if (error == 0)
842			val = statfs.f_bavail * statfs.f_bsize /
843			    be_lun->cbe_lun.blocksize;
844	}
845	VOP_UNLOCK(be_lun->vn, 0);
846	return (val);
847}
848
849static void
850ctl_be_block_dispatch_zvol(struct ctl_be_block_lun *be_lun,
851			   struct ctl_be_block_io *beio)
852{
853	union ctl_io *io;
854	struct cdevsw *csw;
855	struct cdev *dev;
856	struct uio xuio;
857	struct iovec *xiovec;
858	int error, flags, i, ref;
859
860	DPRINTF("entered\n");
861
862	io = beio->io;
863	flags = 0;
864	if (ARGS(io)->flags & CTL_LLF_DPO)
865		flags |= IO_DIRECT;
866	if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA)
867		flags |= IO_SYNC;
868
869	bzero(&xuio, sizeof(xuio));
870	if (beio->bio_cmd == BIO_READ) {
871		SDT_PROBE(cbb, kernel, read, file_start, 0, 0, 0, 0, 0);
872		xuio.uio_rw = UIO_READ;
873	} else {
874		SDT_PROBE(cbb, kernel, write, file_start, 0, 0, 0, 0, 0);
875		xuio.uio_rw = UIO_WRITE;
876	}
877	xuio.uio_offset = beio->io_offset;
878	xuio.uio_resid = beio->io_len;
879	xuio.uio_segflg = UIO_SYSSPACE;
880	xuio.uio_iov = beio->xiovecs;
881	xuio.uio_iovcnt = beio->num_segs;
882	xuio.uio_td = curthread;
883
884	for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) {
885		xiovec->iov_base = beio->sg_segs[i].addr;
886		xiovec->iov_len = beio->sg_segs[i].len;
887	}
888
889	binuptime(&beio->ds_t0);
890	mtx_lock(&be_lun->io_lock);
891	devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
892	mtx_unlock(&be_lun->io_lock);
893
894	csw = devvn_refthread(be_lun->vn, &dev, &ref);
895	if (csw) {
896		if (beio->bio_cmd == BIO_READ)
897			error = csw->d_read(dev, &xuio, flags);
898		else
899			error = csw->d_write(dev, &xuio, flags);
900		dev_relthread(dev, ref);
901	} else
902		error = ENXIO;
903
904	if (beio->bio_cmd == BIO_READ)
905		SDT_PROBE(cbb, kernel, read, file_done, 0, 0, 0, 0, 0);
906	else
907		SDT_PROBE(cbb, kernel, write, file_done, 0, 0, 0, 0, 0);
908
909	mtx_lock(&be_lun->io_lock);
910	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
911	    beio->ds_tag_type, beio->ds_trans_type,
912	    /*now*/ NULL, /*then*/&beio->ds_t0);
913	mtx_unlock(&be_lun->io_lock);
914
915	/*
916	 * If we got an error, set the sense data to "MEDIUM ERROR" and
917	 * return the I/O to the user.
918	 */
919	if (error != 0) {
920		if (error == ENOSPC || error == EDQUOT) {
921			ctl_set_space_alloc_fail(&io->scsiio);
922		} else if (error == EROFS || error == EACCES) {
923			ctl_set_hw_write_protected(&io->scsiio);
924		} else
925			ctl_set_medium_error(&io->scsiio);
926		ctl_complete_beio(beio);
927		return;
928	}
929
930	/*
931	 * If this is a write or a verify, we're all done.
932	 * If this is a read, we can now send the data to the user.
933	 */
934	if ((beio->bio_cmd == BIO_WRITE) ||
935	    (ARGS(io)->flags & CTL_LLF_VERIFY)) {
936		ctl_set_success(&io->scsiio);
937		ctl_complete_beio(beio);
938	} else {
939		if ((ARGS(io)->flags & CTL_LLF_READ) &&
940		    beio->beio_cont == NULL)
941			ctl_set_success(&io->scsiio);
942#ifdef CTL_TIME_IO
943        	getbintime(&io->io_hdr.dma_start_bt);
944#endif
945		ctl_datamove(io);
946	}
947}
948
949static void
950ctl_be_block_gls_zvol(struct ctl_be_block_lun *be_lun,
951			struct ctl_be_block_io *beio)
952{
953	union ctl_io *io = beio->io;
954	struct cdevsw *csw;
955	struct cdev *dev;
956	struct ctl_lba_len_flags *lbalen = ARGS(io);
957	struct scsi_get_lba_status_data *data;
958	off_t roff, off;
959	int error, ref, status;
960
961	DPRINTF("entered\n");
962
963	csw = devvn_refthread(be_lun->vn, &dev, &ref);
964	if (csw == NULL) {
965		status = 0;	/* unknown up to the end */
966		off = be_lun->size_bytes;
967		goto done;
968	}
969	off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize;
970	error = csw->d_ioctl(dev, FIOSEEKHOLE, (caddr_t)&off, FREAD,
971	    curthread);
972	if (error == 0 && off > roff)
973		status = 0;	/* mapped up to off */
974	else {
975		error = csw->d_ioctl(dev, FIOSEEKDATA, (caddr_t)&off, FREAD,
976		    curthread);
977		if (error == 0 && off > roff)
978			status = 1;	/* deallocated up to off */
979		else {
980			status = 0;	/* unknown up to the end */
981			off = be_lun->size_bytes;
982		}
983	}
984	dev_relthread(dev, ref);
985
986done:
987	data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr;
988	scsi_u64to8b(lbalen->lba, data->descr[0].addr);
989	scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize -
990	    lbalen->lba), data->descr[0].length);
991	data->descr[0].status = status;
992
993	ctl_complete_beio(beio);
994}
995
996static void
997ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
998		       struct ctl_be_block_io *beio)
999{
1000	struct bio *bio;
1001	union ctl_io *io;
1002	struct cdevsw *csw;
1003	struct cdev *dev;
1004	int ref;
1005
1006	io = beio->io;
1007
1008	DPRINTF("entered\n");
1009
1010	/* This can't fail, it's a blocking allocation. */
1011	bio = g_alloc_bio();
1012
1013	bio->bio_cmd	    = BIO_FLUSH;
1014	bio->bio_offset	    = 0;
1015	bio->bio_data	    = 0;
1016	bio->bio_done	    = ctl_be_block_biodone;
1017	bio->bio_caller1    = beio;
1018	bio->bio_pblkno	    = 0;
1019
1020	/*
1021	 * We don't need to acquire the LUN lock here, because we are only
1022	 * sending one bio, and so there is no other context to synchronize
1023	 * with.
1024	 */
1025	beio->num_bios_sent = 1;
1026	beio->send_complete = 1;
1027
1028	binuptime(&beio->ds_t0);
1029	mtx_lock(&be_lun->io_lock);
1030	devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1031	mtx_unlock(&be_lun->io_lock);
1032
1033	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1034	if (csw) {
1035		bio->bio_dev = dev;
1036		csw->d_strategy(bio);
1037		dev_relthread(dev, ref);
1038	} else {
1039		bio->bio_error = ENXIO;
1040		ctl_be_block_biodone(bio);
1041	}
1042}
1043
1044static void
1045ctl_be_block_unmap_dev_range(struct ctl_be_block_lun *be_lun,
1046		       struct ctl_be_block_io *beio,
1047		       uint64_t off, uint64_t len, int last)
1048{
1049	struct bio *bio;
1050	uint64_t maxlen;
1051	struct cdevsw *csw;
1052	struct cdev *dev;
1053	int ref;
1054
1055	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1056	maxlen = LONG_MAX - (LONG_MAX % be_lun->cbe_lun.blocksize);
1057	while (len > 0) {
1058		bio = g_alloc_bio();
1059		bio->bio_cmd	    = BIO_DELETE;
1060		bio->bio_dev	    = dev;
1061		bio->bio_offset	    = off;
1062		bio->bio_length	    = MIN(len, maxlen);
1063		bio->bio_data	    = 0;
1064		bio->bio_done	    = ctl_be_block_biodone;
1065		bio->bio_caller1    = beio;
1066		bio->bio_pblkno     = off / be_lun->cbe_lun.blocksize;
1067
1068		off += bio->bio_length;
1069		len -= bio->bio_length;
1070
1071		mtx_lock(&be_lun->io_lock);
1072		beio->num_bios_sent++;
1073		if (last && len == 0)
1074			beio->send_complete = 1;
1075		mtx_unlock(&be_lun->io_lock);
1076
1077		if (csw) {
1078			csw->d_strategy(bio);
1079		} else {
1080			bio->bio_error = ENXIO;
1081			ctl_be_block_biodone(bio);
1082		}
1083	}
1084	if (csw)
1085		dev_relthread(dev, ref);
1086}
1087
1088static void
1089ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
1090		       struct ctl_be_block_io *beio)
1091{
1092	union ctl_io *io;
1093	struct ctl_ptr_len_flags *ptrlen;
1094	struct scsi_unmap_desc *buf, *end;
1095	uint64_t len;
1096
1097	io = beio->io;
1098
1099	DPRINTF("entered\n");
1100
1101	binuptime(&beio->ds_t0);
1102	mtx_lock(&be_lun->io_lock);
1103	devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1104	mtx_unlock(&be_lun->io_lock);
1105
1106	if (beio->io_offset == -1) {
1107		beio->io_len = 0;
1108		ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1109		buf = (struct scsi_unmap_desc *)ptrlen->ptr;
1110		end = buf + ptrlen->len / sizeof(*buf);
1111		for (; buf < end; buf++) {
1112			len = (uint64_t)scsi_4btoul(buf->length) *
1113			    be_lun->cbe_lun.blocksize;
1114			beio->io_len += len;
1115			ctl_be_block_unmap_dev_range(be_lun, beio,
1116			    scsi_8btou64(buf->lba) * be_lun->cbe_lun.blocksize,
1117			    len, (end - buf < 2) ? TRUE : FALSE);
1118		}
1119	} else
1120		ctl_be_block_unmap_dev_range(be_lun, beio,
1121		    beio->io_offset, beio->io_len, TRUE);
1122}
1123
1124static void
1125ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
1126			  struct ctl_be_block_io *beio)
1127{
1128	TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
1129	struct bio *bio;
1130	struct cdevsw *csw;
1131	struct cdev *dev;
1132	off_t cur_offset;
1133	int i, max_iosize, ref;
1134
1135	DPRINTF("entered\n");
1136	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1137
1138	/*
1139	 * We have to limit our I/O size to the maximum supported by the
1140	 * backend device.  Hopefully it is MAXPHYS.  If the driver doesn't
1141	 * set it properly, use DFLTPHYS.
1142	 */
1143	if (csw) {
1144		max_iosize = dev->si_iosize_max;
1145		if (max_iosize < PAGE_SIZE)
1146			max_iosize = DFLTPHYS;
1147	} else
1148		max_iosize = DFLTPHYS;
1149
1150	cur_offset = beio->io_offset;
1151	for (i = 0; i < beio->num_segs; i++) {
1152		size_t cur_size;
1153		uint8_t *cur_ptr;
1154
1155		cur_size = beio->sg_segs[i].len;
1156		cur_ptr = beio->sg_segs[i].addr;
1157
1158		while (cur_size > 0) {
1159			/* This can't fail, it's a blocking allocation. */
1160			bio = g_alloc_bio();
1161
1162			KASSERT(bio != NULL, ("g_alloc_bio() failed!\n"));
1163
1164			bio->bio_cmd = beio->bio_cmd;
1165			bio->bio_dev = dev;
1166			bio->bio_caller1 = beio;
1167			bio->bio_length = min(cur_size, max_iosize);
1168			bio->bio_offset = cur_offset;
1169			bio->bio_data = cur_ptr;
1170			bio->bio_done = ctl_be_block_biodone;
1171			bio->bio_pblkno = cur_offset / be_lun->cbe_lun.blocksize;
1172
1173			cur_offset += bio->bio_length;
1174			cur_ptr += bio->bio_length;
1175			cur_size -= bio->bio_length;
1176
1177			TAILQ_INSERT_TAIL(&queue, bio, bio_queue);
1178			beio->num_bios_sent++;
1179		}
1180	}
1181	binuptime(&beio->ds_t0);
1182	mtx_lock(&be_lun->io_lock);
1183	devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1184	beio->send_complete = 1;
1185	mtx_unlock(&be_lun->io_lock);
1186
1187	/*
1188	 * Fire off all allocated requests!
1189	 */
1190	while ((bio = TAILQ_FIRST(&queue)) != NULL) {
1191		TAILQ_REMOVE(&queue, bio, bio_queue);
1192		if (csw)
1193			csw->d_strategy(bio);
1194		else {
1195			bio->bio_error = ENXIO;
1196			ctl_be_block_biodone(bio);
1197		}
1198	}
1199	if (csw)
1200		dev_relthread(dev, ref);
1201}
1202
1203static uint64_t
1204ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, const char *attrname)
1205{
1206	struct diocgattr_arg	arg;
1207	struct cdevsw *csw;
1208	struct cdev *dev;
1209	int error, ref;
1210
1211	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1212	if (csw == NULL)
1213		return (UINT64_MAX);
1214	strlcpy(arg.name, attrname, sizeof(arg.name));
1215	arg.len = sizeof(arg.value.off);
1216	if (csw->d_ioctl) {
1217		error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD,
1218		    curthread);
1219	} else
1220		error = ENODEV;
1221	dev_relthread(dev, ref);
1222	if (error != 0)
1223		return (UINT64_MAX);
1224	return (arg.value.off);
1225}
1226
1227static void
1228ctl_be_block_cw_dispatch_sync(struct ctl_be_block_lun *be_lun,
1229			    union ctl_io *io)
1230{
1231	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1232	struct ctl_be_block_io *beio;
1233	struct ctl_lba_len_flags *lbalen;
1234
1235	DPRINTF("entered\n");
1236	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1237	lbalen = (struct ctl_lba_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1238
1239	beio->io_len = lbalen->len * cbe_lun->blocksize;
1240	beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1241	beio->io_arg = (lbalen->flags & SSC_IMMED) != 0;
1242	beio->bio_cmd = BIO_FLUSH;
1243	beio->ds_trans_type = DEVSTAT_NO_DATA;
1244	DPRINTF("SYNC\n");
1245	be_lun->lun_flush(be_lun, beio);
1246}
1247
1248static void
1249ctl_be_block_cw_done_ws(struct ctl_be_block_io *beio)
1250{
1251	union ctl_io *io;
1252
1253	io = beio->io;
1254	ctl_free_beio(beio);
1255	if ((io->io_hdr.flags & CTL_FLAG_ABORT) ||
1256	    ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
1257	     (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
1258		ctl_config_write_done(io);
1259		return;
1260	}
1261
1262	ctl_be_block_config_write(io);
1263}
1264
1265static void
1266ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun,
1267			    union ctl_io *io)
1268{
1269	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1270	struct ctl_be_block_io *beio;
1271	struct ctl_lba_len_flags *lbalen;
1272	uint64_t len_left, lba;
1273	uint32_t pb, pbo, adj;
1274	int i, seglen;
1275	uint8_t *buf, *end;
1276
1277	DPRINTF("entered\n");
1278
1279	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1280	lbalen = ARGS(beio->io);
1281
1282	if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP | SWS_ANCHOR | SWS_NDOB) ||
1283	    (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR) && be_lun->unmap == NULL)) {
1284		ctl_free_beio(beio);
1285		ctl_set_invalid_field(&io->scsiio,
1286				      /*sks_valid*/ 1,
1287				      /*command*/ 1,
1288				      /*field*/ 1,
1289				      /*bit_valid*/ 0,
1290				      /*bit*/ 0);
1291		ctl_config_write_done(io);
1292		return;
1293	}
1294
1295	if (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR)) {
1296		beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1297		beio->io_len = (uint64_t)lbalen->len * cbe_lun->blocksize;
1298		beio->bio_cmd = BIO_DELETE;
1299		beio->ds_trans_type = DEVSTAT_FREE;
1300
1301		be_lun->unmap(be_lun, beio);
1302		return;
1303	}
1304
1305	beio->bio_cmd = BIO_WRITE;
1306	beio->ds_trans_type = DEVSTAT_WRITE;
1307
1308	DPRINTF("WRITE SAME at LBA %jx len %u\n",
1309	       (uintmax_t)lbalen->lba, lbalen->len);
1310
1311	pb = cbe_lun->blocksize << be_lun->cbe_lun.pblockexp;
1312	if (be_lun->cbe_lun.pblockoff > 0)
1313		pbo = pb - cbe_lun->blocksize * be_lun->cbe_lun.pblockoff;
1314	else
1315		pbo = 0;
1316	len_left = (uint64_t)lbalen->len * cbe_lun->blocksize;
1317	for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) {
1318
1319		/*
1320		 * Setup the S/G entry for this chunk.
1321		 */
1322		seglen = MIN(CTLBLK_MAX_SEG, len_left);
1323		if (pb > cbe_lun->blocksize) {
1324			adj = ((lbalen->lba + lba) * cbe_lun->blocksize +
1325			    seglen - pbo) % pb;
1326			if (seglen > adj)
1327				seglen -= adj;
1328			else
1329				seglen -= seglen % cbe_lun->blocksize;
1330		} else
1331			seglen -= seglen % cbe_lun->blocksize;
1332		beio->sg_segs[i].len = seglen;
1333		beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK);
1334
1335		DPRINTF("segment %d addr %p len %zd\n", i,
1336			beio->sg_segs[i].addr, beio->sg_segs[i].len);
1337
1338		beio->num_segs++;
1339		len_left -= seglen;
1340
1341		buf = beio->sg_segs[i].addr;
1342		end = buf + seglen;
1343		for (; buf < end; buf += cbe_lun->blocksize) {
1344			memcpy(buf, io->scsiio.kern_data_ptr, cbe_lun->blocksize);
1345			if (lbalen->flags & SWS_LBDATA)
1346				scsi_ulto4b(lbalen->lba + lba, buf);
1347			lba++;
1348		}
1349	}
1350
1351	beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1352	beio->io_len = lba * cbe_lun->blocksize;
1353
1354	/* We can not do all in one run. Correct and schedule rerun. */
1355	if (len_left > 0) {
1356		lbalen->lba += lba;
1357		lbalen->len -= lba;
1358		beio->beio_cont = ctl_be_block_cw_done_ws;
1359	}
1360
1361	be_lun->dispatch(be_lun, beio);
1362}
1363
1364static void
1365ctl_be_block_cw_dispatch_unmap(struct ctl_be_block_lun *be_lun,
1366			    union ctl_io *io)
1367{
1368	struct ctl_be_block_io *beio;
1369	struct ctl_ptr_len_flags *ptrlen;
1370
1371	DPRINTF("entered\n");
1372
1373	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1374	ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1375
1376	if ((ptrlen->flags & ~SU_ANCHOR) != 0 || be_lun->unmap == NULL) {
1377		ctl_free_beio(beio);
1378		ctl_set_invalid_field(&io->scsiio,
1379				      /*sks_valid*/ 0,
1380				      /*command*/ 1,
1381				      /*field*/ 0,
1382				      /*bit_valid*/ 0,
1383				      /*bit*/ 0);
1384		ctl_config_write_done(io);
1385		return;
1386	}
1387
1388	beio->io_len = 0;
1389	beio->io_offset = -1;
1390	beio->bio_cmd = BIO_DELETE;
1391	beio->ds_trans_type = DEVSTAT_FREE;
1392	DPRINTF("UNMAP\n");
1393	be_lun->unmap(be_lun, beio);
1394}
1395
1396static void
1397ctl_be_block_cr_done(struct ctl_be_block_io *beio)
1398{
1399	union ctl_io *io;
1400
1401	io = beio->io;
1402	ctl_free_beio(beio);
1403	ctl_config_read_done(io);
1404}
1405
1406static void
1407ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun,
1408			 union ctl_io *io)
1409{
1410	struct ctl_be_block_io *beio;
1411	struct ctl_be_block_softc *softc;
1412
1413	DPRINTF("entered\n");
1414
1415	softc = be_lun->softc;
1416	beio = ctl_alloc_beio(softc);
1417	beio->io = io;
1418	beio->lun = be_lun;
1419	beio->beio_cont = ctl_be_block_cr_done;
1420	PRIV(io)->ptr = (void *)beio;
1421
1422	switch (io->scsiio.cdb[0]) {
1423	case SERVICE_ACTION_IN:		/* GET LBA STATUS */
1424		beio->bio_cmd = -1;
1425		beio->ds_trans_type = DEVSTAT_NO_DATA;
1426		beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1427		beio->io_len = 0;
1428		if (be_lun->get_lba_status)
1429			be_lun->get_lba_status(be_lun, beio);
1430		else
1431			ctl_be_block_cr_done(beio);
1432		break;
1433	default:
1434		panic("Unhandled CDB type %#x", io->scsiio.cdb[0]);
1435		break;
1436	}
1437}
1438
1439static void
1440ctl_be_block_cw_done(struct ctl_be_block_io *beio)
1441{
1442	union ctl_io *io;
1443
1444	io = beio->io;
1445	ctl_free_beio(beio);
1446	ctl_config_write_done(io);
1447}
1448
1449static void
1450ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
1451			 union ctl_io *io)
1452{
1453	struct ctl_be_block_io *beio;
1454	struct ctl_be_block_softc *softc;
1455
1456	DPRINTF("entered\n");
1457
1458	softc = be_lun->softc;
1459	beio = ctl_alloc_beio(softc);
1460	beio->io = io;
1461	beio->lun = be_lun;
1462	beio->beio_cont = ctl_be_block_cw_done;
1463	switch (io->scsiio.tag_type) {
1464	case CTL_TAG_ORDERED:
1465		beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1466		break;
1467	case CTL_TAG_HEAD_OF_QUEUE:
1468		beio->ds_tag_type = DEVSTAT_TAG_HEAD;
1469		break;
1470	case CTL_TAG_UNTAGGED:
1471	case CTL_TAG_SIMPLE:
1472	case CTL_TAG_ACA:
1473	default:
1474		beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1475		break;
1476	}
1477	PRIV(io)->ptr = (void *)beio;
1478
1479	switch (io->scsiio.cdb[0]) {
1480	case SYNCHRONIZE_CACHE:
1481	case SYNCHRONIZE_CACHE_16:
1482		ctl_be_block_cw_dispatch_sync(be_lun, io);
1483		break;
1484	case WRITE_SAME_10:
1485	case WRITE_SAME_16:
1486		ctl_be_block_cw_dispatch_ws(be_lun, io);
1487		break;
1488	case UNMAP:
1489		ctl_be_block_cw_dispatch_unmap(be_lun, io);
1490		break;
1491	default:
1492		panic("Unhandled CDB type %#x", io->scsiio.cdb[0]);
1493		break;
1494	}
1495}
1496
1497SDT_PROBE_DEFINE1(cbb, kernel, read, start, "uint64_t");
1498SDT_PROBE_DEFINE1(cbb, kernel, write, start, "uint64_t");
1499SDT_PROBE_DEFINE1(cbb, kernel, read, alloc_done, "uint64_t");
1500SDT_PROBE_DEFINE1(cbb, kernel, write, alloc_done, "uint64_t");
1501
1502static void
1503ctl_be_block_next(struct ctl_be_block_io *beio)
1504{
1505	struct ctl_be_block_lun *be_lun;
1506	union ctl_io *io;
1507
1508	io = beio->io;
1509	be_lun = beio->lun;
1510	ctl_free_beio(beio);
1511	if ((io->io_hdr.flags & CTL_FLAG_ABORT) ||
1512	    ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
1513	     (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
1514		ctl_data_submit_done(io);
1515		return;
1516	}
1517
1518	io->io_hdr.status &= ~CTL_STATUS_MASK;
1519	io->io_hdr.status |= CTL_STATUS_NONE;
1520
1521	mtx_lock(&be_lun->queue_lock);
1522	/*
1523	 * XXX KDM make sure that links is okay to use at this point.
1524	 * Otherwise, we either need to add another field to ctl_io_hdr,
1525	 * or deal with resource allocation here.
1526	 */
1527	STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
1528	mtx_unlock(&be_lun->queue_lock);
1529
1530	taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
1531}
1532
1533static void
1534ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
1535			   union ctl_io *io)
1536{
1537	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1538	struct ctl_be_block_io *beio;
1539	struct ctl_be_block_softc *softc;
1540	struct ctl_lba_len_flags *lbalen;
1541	struct ctl_ptr_len_flags *bptrlen;
1542	uint64_t len_left, lbas;
1543	int i;
1544
1545	softc = be_lun->softc;
1546
1547	DPRINTF("entered\n");
1548
1549	lbalen = ARGS(io);
1550	if (lbalen->flags & CTL_LLF_WRITE) {
1551		SDT_PROBE(cbb, kernel, write, start, 0, 0, 0, 0, 0);
1552	} else {
1553		SDT_PROBE(cbb, kernel, read, start, 0, 0, 0, 0, 0);
1554	}
1555
1556	beio = ctl_alloc_beio(softc);
1557	beio->io = io;
1558	beio->lun = be_lun;
1559	bptrlen = PRIV(io);
1560	bptrlen->ptr = (void *)beio;
1561
1562	switch (io->scsiio.tag_type) {
1563	case CTL_TAG_ORDERED:
1564		beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1565		break;
1566	case CTL_TAG_HEAD_OF_QUEUE:
1567		beio->ds_tag_type = DEVSTAT_TAG_HEAD;
1568		break;
1569	case CTL_TAG_UNTAGGED:
1570	case CTL_TAG_SIMPLE:
1571	case CTL_TAG_ACA:
1572	default:
1573		beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1574		break;
1575	}
1576
1577	if (lbalen->flags & CTL_LLF_WRITE) {
1578		beio->bio_cmd = BIO_WRITE;
1579		beio->ds_trans_type = DEVSTAT_WRITE;
1580	} else {
1581		beio->bio_cmd = BIO_READ;
1582		beio->ds_trans_type = DEVSTAT_READ;
1583	}
1584
1585	DPRINTF("%s at LBA %jx len %u @%ju\n",
1586	       (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE",
1587	       (uintmax_t)lbalen->lba, lbalen->len, bptrlen->len);
1588	if (lbalen->flags & CTL_LLF_COMPARE)
1589		lbas = CTLBLK_HALF_IO_SIZE;
1590	else
1591		lbas = CTLBLK_MAX_IO_SIZE;
1592	lbas = MIN(lbalen->len - bptrlen->len, lbas / cbe_lun->blocksize);
1593	beio->io_offset = (lbalen->lba + bptrlen->len) * cbe_lun->blocksize;
1594	beio->io_len = lbas * cbe_lun->blocksize;
1595	bptrlen->len += lbas;
1596
1597	for (i = 0, len_left = beio->io_len; len_left > 0; i++) {
1598		KASSERT(i < CTLBLK_MAX_SEGS, ("Too many segs (%d >= %d)",
1599		    i, CTLBLK_MAX_SEGS));
1600
1601		/*
1602		 * Setup the S/G entry for this chunk.
1603		 */
1604		beio->sg_segs[i].len = min(CTLBLK_MAX_SEG, len_left);
1605		beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK);
1606
1607		DPRINTF("segment %d addr %p len %zd\n", i,
1608			beio->sg_segs[i].addr, beio->sg_segs[i].len);
1609
1610		/* Set up second segment for compare operation. */
1611		if (lbalen->flags & CTL_LLF_COMPARE) {
1612			beio->sg_segs[i + CTLBLK_HALF_SEGS].len =
1613			    beio->sg_segs[i].len;
1614			beio->sg_segs[i + CTLBLK_HALF_SEGS].addr =
1615			    uma_zalloc(be_lun->lun_zone, M_WAITOK);
1616		}
1617
1618		beio->num_segs++;
1619		len_left -= beio->sg_segs[i].len;
1620	}
1621	if (bptrlen->len < lbalen->len)
1622		beio->beio_cont = ctl_be_block_next;
1623	io->scsiio.be_move_done = ctl_be_block_move_done;
1624	/* For compare we have separate S/G lists for read and datamove. */
1625	if (lbalen->flags & CTL_LLF_COMPARE)
1626		io->scsiio.kern_data_ptr = (uint8_t *)&beio->sg_segs[CTLBLK_HALF_SEGS];
1627	else
1628		io->scsiio.kern_data_ptr = (uint8_t *)beio->sg_segs;
1629	io->scsiio.kern_data_len = beio->io_len;
1630	io->scsiio.kern_data_resid = 0;
1631	io->scsiio.kern_sg_entries = beio->num_segs;
1632	io->io_hdr.flags |= CTL_FLAG_ALLOCATED | CTL_FLAG_KDPTR_SGLIST;
1633
1634	/*
1635	 * For the read case, we need to read the data into our buffers and
1636	 * then we can send it back to the user.  For the write case, we
1637	 * need to get the data from the user first.
1638	 */
1639	if (beio->bio_cmd == BIO_READ) {
1640		SDT_PROBE(cbb, kernel, read, alloc_done, 0, 0, 0, 0, 0);
1641		be_lun->dispatch(be_lun, beio);
1642	} else {
1643		SDT_PROBE(cbb, kernel, write, alloc_done, 0, 0, 0, 0, 0);
1644#ifdef CTL_TIME_IO
1645        	getbintime(&io->io_hdr.dma_start_bt);
1646#endif
1647		ctl_datamove(io);
1648	}
1649}
1650
1651static void
1652ctl_be_block_worker(void *context, int pending)
1653{
1654	struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)context;
1655	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1656	union ctl_io *io;
1657	struct ctl_be_block_io *beio;
1658
1659	DPRINTF("entered\n");
1660	/*
1661	 * Fetch and process I/Os from all queues.  If we detect LUN
1662	 * CTL_LUN_FLAG_OFFLINE status here -- it is result of a race,
1663	 * so make response maximally opaque to not confuse initiator.
1664	 */
1665	for (;;) {
1666		mtx_lock(&be_lun->queue_lock);
1667		io = (union ctl_io *)STAILQ_FIRST(&be_lun->datamove_queue);
1668		if (io != NULL) {
1669			DPRINTF("datamove queue\n");
1670			STAILQ_REMOVE(&be_lun->datamove_queue, &io->io_hdr,
1671				      ctl_io_hdr, links);
1672			mtx_unlock(&be_lun->queue_lock);
1673			beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1674			if (cbe_lun->flags & CTL_LUN_FLAG_OFFLINE) {
1675				ctl_set_busy(&io->scsiio);
1676				ctl_complete_beio(beio);
1677				return;
1678			}
1679			be_lun->dispatch(be_lun, beio);
1680			continue;
1681		}
1682		io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_write_queue);
1683		if (io != NULL) {
1684			DPRINTF("config write queue\n");
1685			STAILQ_REMOVE(&be_lun->config_write_queue, &io->io_hdr,
1686				      ctl_io_hdr, links);
1687			mtx_unlock(&be_lun->queue_lock);
1688			if (cbe_lun->flags & CTL_LUN_FLAG_OFFLINE) {
1689				ctl_set_busy(&io->scsiio);
1690				ctl_config_write_done(io);
1691				return;
1692			}
1693			ctl_be_block_cw_dispatch(be_lun, io);
1694			continue;
1695		}
1696		io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_read_queue);
1697		if (io != NULL) {
1698			DPRINTF("config read queue\n");
1699			STAILQ_REMOVE(&be_lun->config_read_queue, &io->io_hdr,
1700				      ctl_io_hdr, links);
1701			mtx_unlock(&be_lun->queue_lock);
1702			if (cbe_lun->flags & CTL_LUN_FLAG_OFFLINE) {
1703				ctl_set_busy(&io->scsiio);
1704				ctl_config_read_done(io);
1705				return;
1706			}
1707			ctl_be_block_cr_dispatch(be_lun, io);
1708			continue;
1709		}
1710		io = (union ctl_io *)STAILQ_FIRST(&be_lun->input_queue);
1711		if (io != NULL) {
1712			DPRINTF("input queue\n");
1713			STAILQ_REMOVE(&be_lun->input_queue, &io->io_hdr,
1714				      ctl_io_hdr, links);
1715			mtx_unlock(&be_lun->queue_lock);
1716			if (cbe_lun->flags & CTL_LUN_FLAG_OFFLINE) {
1717				ctl_set_busy(&io->scsiio);
1718				ctl_data_submit_done(io);
1719				return;
1720			}
1721			ctl_be_block_dispatch(be_lun, io);
1722			continue;
1723		}
1724
1725		/*
1726		 * If we get here, there is no work left in the queues, so
1727		 * just break out and let the task queue go to sleep.
1728		 */
1729		mtx_unlock(&be_lun->queue_lock);
1730		break;
1731	}
1732}
1733
1734/*
1735 * Entry point from CTL to the backend for I/O.  We queue everything to a
1736 * work thread, so this just puts the I/O on a queue and wakes up the
1737 * thread.
1738 */
1739static int
1740ctl_be_block_submit(union ctl_io *io)
1741{
1742	struct ctl_be_block_lun *be_lun;
1743	struct ctl_be_lun *cbe_lun;
1744
1745	DPRINTF("entered\n");
1746
1747	cbe_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[
1748		CTL_PRIV_BACKEND_LUN].ptr;
1749	be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun;
1750
1751	/*
1752	 * Make sure we only get SCSI I/O.
1753	 */
1754	KASSERT(io->io_hdr.io_type == CTL_IO_SCSI, ("Non-SCSI I/O (type "
1755		"%#x) encountered", io->io_hdr.io_type));
1756
1757	PRIV(io)->len = 0;
1758
1759	mtx_lock(&be_lun->queue_lock);
1760	/*
1761	 * XXX KDM make sure that links is okay to use at this point.
1762	 * Otherwise, we either need to add another field to ctl_io_hdr,
1763	 * or deal with resource allocation here.
1764	 */
1765	STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
1766	mtx_unlock(&be_lun->queue_lock);
1767	taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
1768
1769	return (CTL_RETVAL_COMPLETE);
1770}
1771
1772static int
1773ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
1774			int flag, struct thread *td)
1775{
1776	struct ctl_be_block_softc *softc;
1777	int error;
1778
1779	softc = &backend_block_softc;
1780
1781	error = 0;
1782
1783	switch (cmd) {
1784	case CTL_LUN_REQ: {
1785		struct ctl_lun_req *lun_req;
1786
1787		lun_req = (struct ctl_lun_req *)addr;
1788
1789		switch (lun_req->reqtype) {
1790		case CTL_LUNREQ_CREATE:
1791			error = ctl_be_block_create(softc, lun_req);
1792			break;
1793		case CTL_LUNREQ_RM:
1794			error = ctl_be_block_rm(softc, lun_req);
1795			break;
1796		case CTL_LUNREQ_MODIFY:
1797			error = ctl_be_block_modify(softc, lun_req);
1798			break;
1799		default:
1800			lun_req->status = CTL_LUN_ERROR;
1801			snprintf(lun_req->error_str, sizeof(lun_req->error_str),
1802				 "invalid LUN request type %d",
1803				 lun_req->reqtype);
1804			break;
1805		}
1806		break;
1807	}
1808	default:
1809		error = ENOTTY;
1810		break;
1811	}
1812
1813	return (error);
1814}
1815
1816static int
1817ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1818{
1819	struct ctl_be_lun *cbe_lun;
1820	struct ctl_be_block_filedata *file_data;
1821	struct ctl_lun_create_params *params;
1822	char			     *value;
1823	struct vattr		      vattr;
1824	off_t			      ps, pss, po, pos, us, uss, uo, uos;
1825	int			      error;
1826
1827	error = 0;
1828	cbe_lun = &be_lun->cbe_lun;
1829	file_data = &be_lun->backend.file;
1830	params = &be_lun->params;
1831
1832	be_lun->dev_type = CTL_BE_BLOCK_FILE;
1833	be_lun->dispatch = ctl_be_block_dispatch_file;
1834	be_lun->lun_flush = ctl_be_block_flush_file;
1835	be_lun->get_lba_status = ctl_be_block_gls_file;
1836	be_lun->getattr = ctl_be_block_getattr_file;
1837	be_lun->unmap = NULL;
1838	cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP;
1839
1840	error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
1841	if (error != 0) {
1842		snprintf(req->error_str, sizeof(req->error_str),
1843			 "error calling VOP_GETATTR() for file %s",
1844			 be_lun->dev_path);
1845		return (error);
1846	}
1847
1848	/*
1849	 * Verify that we have the ability to upgrade to exclusive
1850	 * access on this file so we can trap errors at open instead
1851	 * of reporting them during first access.
1852	 */
1853	if (VOP_ISLOCKED(be_lun->vn) != LK_EXCLUSIVE) {
1854		vn_lock(be_lun->vn, LK_UPGRADE | LK_RETRY);
1855		if (be_lun->vn->v_iflag & VI_DOOMED) {
1856			error = EBADF;
1857			snprintf(req->error_str, sizeof(req->error_str),
1858				 "error locking file %s", be_lun->dev_path);
1859			return (error);
1860		}
1861	}
1862
1863	file_data->cred = crhold(curthread->td_ucred);
1864	if (params->lun_size_bytes != 0)
1865		be_lun->size_bytes = params->lun_size_bytes;
1866	else
1867		be_lun->size_bytes = vattr.va_size;
1868
1869	/*
1870	 * For files we can use any logical block size.  Prefer 512 bytes
1871	 * for compatibility reasons.  If file's vattr.va_blocksize
1872	 * (preferred I/O block size) is bigger and multiple to chosen
1873	 * logical block size -- report it as physical block size.
1874	 */
1875	if (params->blocksize_bytes != 0)
1876		cbe_lun->blocksize = params->blocksize_bytes;
1877	else
1878		cbe_lun->blocksize = 512;
1879	be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
1880	cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
1881	    0 : (be_lun->size_blocks - 1);
1882
1883	us = ps = vattr.va_blocksize;
1884	uo = po = 0;
1885
1886	value = ctl_get_opt(&cbe_lun->options, "pblocksize");
1887	if (value != NULL)
1888		ctl_expand_number(value, &ps);
1889	value = ctl_get_opt(&cbe_lun->options, "pblockoffset");
1890	if (value != NULL)
1891		ctl_expand_number(value, &po);
1892	pss = ps / cbe_lun->blocksize;
1893	pos = po / cbe_lun->blocksize;
1894	if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) &&
1895	    ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) {
1896		cbe_lun->pblockexp = fls(pss) - 1;
1897		cbe_lun->pblockoff = (pss - pos) % pss;
1898	}
1899
1900	value = ctl_get_opt(&cbe_lun->options, "ublocksize");
1901	if (value != NULL)
1902		ctl_expand_number(value, &us);
1903	value = ctl_get_opt(&cbe_lun->options, "ublockoffset");
1904	if (value != NULL)
1905		ctl_expand_number(value, &uo);
1906	uss = us / cbe_lun->blocksize;
1907	uos = uo / cbe_lun->blocksize;
1908	if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) &&
1909	    ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) {
1910		cbe_lun->ublockexp = fls(uss) - 1;
1911		cbe_lun->ublockoff = (uss - uos) % uss;
1912	}
1913
1914	/*
1915	 * Sanity check.  The media size has to be at least one
1916	 * sector long.
1917	 */
1918	if (be_lun->size_bytes < cbe_lun->blocksize) {
1919		error = EINVAL;
1920		snprintf(req->error_str, sizeof(req->error_str),
1921			 "file %s size %ju < block size %u", be_lun->dev_path,
1922			 (uintmax_t)be_lun->size_bytes, cbe_lun->blocksize);
1923	}
1924
1925	cbe_lun->opttxferlen = CTLBLK_MAX_IO_SIZE / cbe_lun->blocksize;
1926	return (error);
1927}
1928
1929static int
1930ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1931{
1932	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1933	struct ctl_lun_create_params *params;
1934	struct cdevsw		     *csw;
1935	struct cdev		     *dev;
1936	char			     *value;
1937	int			      error, atomic, maxio, ref, unmap, tmp;
1938	off_t			      ps, pss, po, pos, us, uss, uo, uos, otmp;
1939
1940	params = &be_lun->params;
1941
1942	be_lun->dev_type = CTL_BE_BLOCK_DEV;
1943	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1944	if (csw == NULL)
1945		return (ENXIO);
1946	if (strcmp(csw->d_name, "zvol") == 0) {
1947		be_lun->dispatch = ctl_be_block_dispatch_zvol;
1948		be_lun->get_lba_status = ctl_be_block_gls_zvol;
1949		atomic = maxio = CTLBLK_MAX_IO_SIZE;
1950	} else {
1951		be_lun->dispatch = ctl_be_block_dispatch_dev;
1952		be_lun->get_lba_status = NULL;
1953		atomic = 0;
1954		maxio = dev->si_iosize_max;
1955		if (maxio <= 0)
1956			maxio = DFLTPHYS;
1957		if (maxio > CTLBLK_MAX_IO_SIZE)
1958			maxio = CTLBLK_MAX_IO_SIZE;
1959	}
1960	be_lun->lun_flush = ctl_be_block_flush_dev;
1961	be_lun->getattr = ctl_be_block_getattr_dev;
1962	be_lun->unmap = ctl_be_block_unmap_dev;
1963
1964	if (!csw->d_ioctl) {
1965		dev_relthread(dev, ref);
1966		snprintf(req->error_str, sizeof(req->error_str),
1967			 "no d_ioctl for device %s!", be_lun->dev_path);
1968		return (ENODEV);
1969	}
1970
1971	error = csw->d_ioctl(dev, DIOCGSECTORSIZE, (caddr_t)&tmp, FREAD,
1972			       curthread);
1973	if (error) {
1974		dev_relthread(dev, ref);
1975		snprintf(req->error_str, sizeof(req->error_str),
1976			 "error %d returned for DIOCGSECTORSIZE ioctl "
1977			 "on %s!", error, be_lun->dev_path);
1978		return (error);
1979	}
1980
1981	/*
1982	 * If the user has asked for a blocksize that is greater than the
1983	 * backing device's blocksize, we can do it only if the blocksize
1984	 * the user is asking for is an even multiple of the underlying
1985	 * device's blocksize.
1986	 */
1987	if ((params->blocksize_bytes != 0) &&
1988	    (params->blocksize_bytes >= tmp)) {
1989		if (params->blocksize_bytes % tmp == 0) {
1990			cbe_lun->blocksize = params->blocksize_bytes;
1991		} else {
1992			dev_relthread(dev, ref);
1993			snprintf(req->error_str, sizeof(req->error_str),
1994				 "requested blocksize %u is not an even "
1995				 "multiple of backing device blocksize %u",
1996				 params->blocksize_bytes, tmp);
1997			return (EINVAL);
1998		}
1999	} else if (params->blocksize_bytes != 0) {
2000		dev_relthread(dev, ref);
2001		snprintf(req->error_str, sizeof(req->error_str),
2002			 "requested blocksize %u < backing device "
2003			 "blocksize %u", params->blocksize_bytes, tmp);
2004		return (EINVAL);
2005	} else
2006		cbe_lun->blocksize = tmp;
2007
2008	error = csw->d_ioctl(dev, DIOCGMEDIASIZE, (caddr_t)&otmp, FREAD,
2009			     curthread);
2010	if (error) {
2011		dev_relthread(dev, ref);
2012		snprintf(req->error_str, sizeof(req->error_str),
2013			 "error %d returned for DIOCGMEDIASIZE "
2014			 " ioctl on %s!", error,
2015			 be_lun->dev_path);
2016		return (error);
2017	}
2018
2019	if (params->lun_size_bytes != 0) {
2020		if (params->lun_size_bytes > otmp) {
2021			dev_relthread(dev, ref);
2022			snprintf(req->error_str, sizeof(req->error_str),
2023				 "requested LUN size %ju > backing device "
2024				 "size %ju",
2025				 (uintmax_t)params->lun_size_bytes,
2026				 (uintmax_t)otmp);
2027			return (EINVAL);
2028		}
2029
2030		be_lun->size_bytes = params->lun_size_bytes;
2031	} else
2032		be_lun->size_bytes = otmp;
2033	be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
2034	cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
2035	    0 : (be_lun->size_blocks - 1);
2036
2037	error = csw->d_ioctl(dev, DIOCGSTRIPESIZE, (caddr_t)&ps, FREAD,
2038	    curthread);
2039	if (error)
2040		ps = po = 0;
2041	else {
2042		error = csw->d_ioctl(dev, DIOCGSTRIPEOFFSET, (caddr_t)&po,
2043		    FREAD, curthread);
2044		if (error)
2045			po = 0;
2046	}
2047	us = ps;
2048	uo = po;
2049
2050	value = ctl_get_opt(&cbe_lun->options, "pblocksize");
2051	if (value != NULL)
2052		ctl_expand_number(value, &ps);
2053	value = ctl_get_opt(&cbe_lun->options, "pblockoffset");
2054	if (value != NULL)
2055		ctl_expand_number(value, &po);
2056	pss = ps / cbe_lun->blocksize;
2057	pos = po / cbe_lun->blocksize;
2058	if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) &&
2059	    ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) {
2060		cbe_lun->pblockexp = fls(pss) - 1;
2061		cbe_lun->pblockoff = (pss - pos) % pss;
2062	}
2063
2064	value = ctl_get_opt(&cbe_lun->options, "ublocksize");
2065	if (value != NULL)
2066		ctl_expand_number(value, &us);
2067	value = ctl_get_opt(&cbe_lun->options, "ublockoffset");
2068	if (value != NULL)
2069		ctl_expand_number(value, &uo);
2070	uss = us / cbe_lun->blocksize;
2071	uos = uo / cbe_lun->blocksize;
2072	if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) &&
2073	    ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) {
2074		cbe_lun->ublockexp = fls(uss) - 1;
2075		cbe_lun->ublockoff = (uss - uos) % uss;
2076	}
2077
2078	cbe_lun->atomicblock = atomic / cbe_lun->blocksize;
2079	cbe_lun->opttxferlen = maxio / cbe_lun->blocksize;
2080
2081	if (be_lun->dispatch == ctl_be_block_dispatch_zvol) {
2082		unmap = 1;
2083	} else {
2084		struct diocgattr_arg	arg;
2085
2086		strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name));
2087		arg.len = sizeof(arg.value.i);
2088		error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD,
2089		    curthread);
2090		unmap = (error == 0) ? arg.value.i : 0;
2091	}
2092	value = ctl_get_opt(&cbe_lun->options, "unmap");
2093	if (value != NULL)
2094		unmap = (strcmp(value, "on") == 0);
2095	if (unmap)
2096		cbe_lun->flags |= CTL_LUN_FLAG_UNMAP;
2097	else
2098		cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP;
2099
2100	dev_relthread(dev, ref);
2101	return (0);
2102}
2103
2104static int
2105ctl_be_block_close(struct ctl_be_block_lun *be_lun)
2106{
2107	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
2108	int flags;
2109
2110	if (be_lun->vn) {
2111		flags = FREAD;
2112		if ((cbe_lun->flags & CTL_LUN_FLAG_READONLY) == 0)
2113			flags |= FWRITE;
2114		(void)vn_close(be_lun->vn, flags, NOCRED, curthread);
2115		be_lun->vn = NULL;
2116
2117		switch (be_lun->dev_type) {
2118		case CTL_BE_BLOCK_DEV:
2119			break;
2120		case CTL_BE_BLOCK_FILE:
2121			if (be_lun->backend.file.cred != NULL) {
2122				crfree(be_lun->backend.file.cred);
2123				be_lun->backend.file.cred = NULL;
2124			}
2125			break;
2126		case CTL_BE_BLOCK_NONE:
2127			break;
2128		default:
2129			panic("Unexpected backend type.");
2130			break;
2131		}
2132		be_lun->dev_type = CTL_BE_BLOCK_NONE;
2133	}
2134	return (0);
2135}
2136
2137static int
2138ctl_be_block_open(struct ctl_be_block_softc *softc,
2139		  struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
2140{
2141	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
2142	struct nameidata nd;
2143	char		*value;
2144	int		 error, flags;
2145
2146	error = 0;
2147	if (rootvnode == NULL) {
2148		snprintf(req->error_str, sizeof(req->error_str),
2149			 "Root filesystem is not mounted");
2150		return (1);
2151	}
2152	if (!curthread->td_proc->p_fd->fd_cdir) {
2153		curthread->td_proc->p_fd->fd_cdir = rootvnode;
2154		VREF(rootvnode);
2155	}
2156	if (!curthread->td_proc->p_fd->fd_rdir) {
2157		curthread->td_proc->p_fd->fd_rdir = rootvnode;
2158		VREF(rootvnode);
2159	}
2160	if (!curthread->td_proc->p_fd->fd_jdir) {
2161		curthread->td_proc->p_fd->fd_jdir = rootvnode;
2162		VREF(rootvnode);
2163	}
2164
2165	value = ctl_get_opt(&cbe_lun->options, "file");
2166	if (value == NULL) {
2167		snprintf(req->error_str, sizeof(req->error_str),
2168			 "no file argument specified");
2169		return (1);
2170	}
2171	free(be_lun->dev_path, M_CTLBLK);
2172	be_lun->dev_path = strdup(value, M_CTLBLK);
2173
2174	flags = FREAD;
2175	value = ctl_get_opt(&cbe_lun->options, "readonly");
2176	if (value == NULL || strcmp(value, "on") != 0)
2177		flags |= FWRITE;
2178
2179again:
2180	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, be_lun->dev_path, curthread);
2181	error = vn_open(&nd, &flags, 0, NULL);
2182	if ((error == EROFS || error == EACCES) && (flags & FWRITE)) {
2183		flags &= ~FWRITE;
2184		goto again;
2185	}
2186	if (error) {
2187		/*
2188		 * This is the only reasonable guess we can make as far as
2189		 * path if the user doesn't give us a fully qualified path.
2190		 * If they want to specify a file, they need to specify the
2191		 * full path.
2192		 */
2193		if (be_lun->dev_path[0] != '/') {
2194			char *dev_name;
2195
2196			asprintf(&dev_name, M_CTLBLK, "/dev/%s",
2197				be_lun->dev_path);
2198			free(be_lun->dev_path, M_CTLBLK);
2199			be_lun->dev_path = dev_name;
2200			goto again;
2201		}
2202		snprintf(req->error_str, sizeof(req->error_str),
2203		    "error opening %s: %d", be_lun->dev_path, error);
2204		return (error);
2205	}
2206	if (flags & FWRITE)
2207		cbe_lun->flags &= ~CTL_LUN_FLAG_READONLY;
2208	else
2209		cbe_lun->flags |= CTL_LUN_FLAG_READONLY;
2210
2211	NDFREE(&nd, NDF_ONLY_PNBUF);
2212	be_lun->vn = nd.ni_vp;
2213
2214	/* We only support disks and files. */
2215	if (vn_isdisk(be_lun->vn, &error)) {
2216		error = ctl_be_block_open_dev(be_lun, req);
2217	} else if (be_lun->vn->v_type == VREG) {
2218		error = ctl_be_block_open_file(be_lun, req);
2219	} else {
2220		error = EINVAL;
2221		snprintf(req->error_str, sizeof(req->error_str),
2222			 "%s is not a disk or plain file", be_lun->dev_path);
2223	}
2224	VOP_UNLOCK(be_lun->vn, 0);
2225
2226	if (error != 0)
2227		ctl_be_block_close(be_lun);
2228	cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
2229	if (be_lun->dispatch != ctl_be_block_dispatch_dev)
2230		cbe_lun->serseq = CTL_LUN_SERSEQ_READ;
2231	value = ctl_get_opt(&cbe_lun->options, "serseq");
2232	if (value != NULL && strcmp(value, "on") == 0)
2233		cbe_lun->serseq = CTL_LUN_SERSEQ_ON;
2234	else if (value != NULL && strcmp(value, "read") == 0)
2235		cbe_lun->serseq = CTL_LUN_SERSEQ_READ;
2236	else if (value != NULL && strcmp(value, "off") == 0)
2237		cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
2238	return (0);
2239}
2240
2241static int
2242ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2243{
2244	struct ctl_be_lun *cbe_lun;
2245	struct ctl_be_block_lun *be_lun;
2246	struct ctl_lun_create_params *params;
2247	char num_thread_str[16];
2248	char tmpstr[32];
2249	char *value;
2250	int retval, num_threads;
2251	int tmp_num_threads;
2252
2253	params = &req->reqdata.create;
2254	retval = 0;
2255	req->status = CTL_LUN_OK;
2256
2257	be_lun = malloc(sizeof(*be_lun), M_CTLBLK, M_ZERO | M_WAITOK);
2258	cbe_lun = &be_lun->cbe_lun;
2259	cbe_lun->be_lun = be_lun;
2260	be_lun->params = req->reqdata.create;
2261	be_lun->softc = softc;
2262	STAILQ_INIT(&be_lun->input_queue);
2263	STAILQ_INIT(&be_lun->config_read_queue);
2264	STAILQ_INIT(&be_lun->config_write_queue);
2265	STAILQ_INIT(&be_lun->datamove_queue);
2266	sprintf(be_lun->lunname, "cblk%d", softc->num_luns);
2267	mtx_init(&be_lun->io_lock, "cblk io lock", NULL, MTX_DEF);
2268	mtx_init(&be_lun->queue_lock, "cblk queue lock", NULL, MTX_DEF);
2269	ctl_init_opts(&cbe_lun->options,
2270	    req->num_be_args, req->kern_be_args);
2271	be_lun->lun_zone = uma_zcreate(be_lun->lunname, CTLBLK_MAX_SEG,
2272	    NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0);
2273	if (be_lun->lun_zone == NULL) {
2274		snprintf(req->error_str, sizeof(req->error_str),
2275			 "error allocating UMA zone");
2276		goto bailout_error;
2277	}
2278
2279	if (params->flags & CTL_LUN_FLAG_DEV_TYPE)
2280		cbe_lun->lun_type = params->device_type;
2281	else
2282		cbe_lun->lun_type = T_DIRECT;
2283	be_lun->flags = CTL_BE_BLOCK_LUN_UNCONFIGURED;
2284	cbe_lun->flags = 0;
2285	value = ctl_get_opt(&cbe_lun->options, "ha_role");
2286	if (value != NULL) {
2287		if (strcmp(value, "primary") == 0)
2288			cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2289	} else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
2290		cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2291
2292	if (cbe_lun->lun_type == T_DIRECT) {
2293		be_lun->size_bytes = params->lun_size_bytes;
2294		if (params->blocksize_bytes != 0)
2295			cbe_lun->blocksize = params->blocksize_bytes;
2296		else
2297			cbe_lun->blocksize = 512;
2298		be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
2299		cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
2300		    0 : (be_lun->size_blocks - 1);
2301
2302		if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) ||
2303		    control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) {
2304			retval = ctl_be_block_open(softc, be_lun, req);
2305			if (retval != 0) {
2306				retval = 0;
2307				req->status = CTL_LUN_WARNING;
2308			}
2309		}
2310		num_threads = cbb_num_threads;
2311	} else {
2312		num_threads = 1;
2313	}
2314
2315	/*
2316	 * XXX This searching loop might be refactored to be combined with
2317	 * the loop above,
2318	 */
2319	value = ctl_get_opt(&cbe_lun->options, "num_threads");
2320	if (value != NULL) {
2321		tmp_num_threads = strtol(value, NULL, 0);
2322
2323		/*
2324		 * We don't let the user specify less than one
2325		 * thread, but hope he's clueful enough not to
2326		 * specify 1000 threads.
2327		 */
2328		if (tmp_num_threads < 1) {
2329			snprintf(req->error_str, sizeof(req->error_str),
2330				 "invalid number of threads %s",
2331				 num_thread_str);
2332			goto bailout_error;
2333		}
2334		num_threads = tmp_num_threads;
2335	}
2336
2337	if (be_lun->vn == NULL)
2338		cbe_lun->flags |= CTL_LUN_FLAG_OFFLINE;
2339	/* Tell the user the blocksize we ended up using */
2340	params->lun_size_bytes = be_lun->size_bytes;
2341	params->blocksize_bytes = cbe_lun->blocksize;
2342	if (params->flags & CTL_LUN_FLAG_ID_REQ) {
2343		cbe_lun->req_lun_id = params->req_lun_id;
2344		cbe_lun->flags |= CTL_LUN_FLAG_ID_REQ;
2345	} else
2346		cbe_lun->req_lun_id = 0;
2347
2348	cbe_lun->lun_shutdown = ctl_be_block_lun_shutdown;
2349	cbe_lun->lun_config_status = ctl_be_block_lun_config_status;
2350	cbe_lun->be = &ctl_be_block_driver;
2351
2352	if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) {
2353		snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%4d",
2354			 softc->num_luns);
2355		strncpy((char *)cbe_lun->serial_num, tmpstr,
2356			MIN(sizeof(cbe_lun->serial_num), sizeof(tmpstr)));
2357
2358		/* Tell the user what we used for a serial number */
2359		strncpy((char *)params->serial_num, tmpstr,
2360			MIN(sizeof(params->serial_num), sizeof(tmpstr)));
2361	} else {
2362		strncpy((char *)cbe_lun->serial_num, params->serial_num,
2363			MIN(sizeof(cbe_lun->serial_num),
2364			sizeof(params->serial_num)));
2365	}
2366	if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) {
2367		snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%4d", softc->num_luns);
2368		strncpy((char *)cbe_lun->device_id, tmpstr,
2369			MIN(sizeof(cbe_lun->device_id), sizeof(tmpstr)));
2370
2371		/* Tell the user what we used for a device ID */
2372		strncpy((char *)params->device_id, tmpstr,
2373			MIN(sizeof(params->device_id), sizeof(tmpstr)));
2374	} else {
2375		strncpy((char *)cbe_lun->device_id, params->device_id,
2376			MIN(sizeof(cbe_lun->device_id),
2377			    sizeof(params->device_id)));
2378	}
2379
2380	TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_be_block_worker, be_lun);
2381
2382	be_lun->io_taskqueue = taskqueue_create(be_lun->lunname, M_WAITOK,
2383	    taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue);
2384
2385	if (be_lun->io_taskqueue == NULL) {
2386		snprintf(req->error_str, sizeof(req->error_str),
2387			 "unable to create taskqueue");
2388		goto bailout_error;
2389	}
2390
2391	/*
2392	 * Note that we start the same number of threads by default for
2393	 * both the file case and the block device case.  For the file
2394	 * case, we need multiple threads to allow concurrency, because the
2395	 * vnode interface is designed to be a blocking interface.  For the
2396	 * block device case, ZFS zvols at least will block the caller's
2397	 * context in many instances, and so we need multiple threads to
2398	 * overcome that problem.  Other block devices don't need as many
2399	 * threads, but they shouldn't cause too many problems.
2400	 *
2401	 * If the user wants to just have a single thread for a block
2402	 * device, he can specify that when the LUN is created, or change
2403	 * the tunable/sysctl to alter the default number of threads.
2404	 */
2405	retval = taskqueue_start_threads(&be_lun->io_taskqueue,
2406					 /*num threads*/num_threads,
2407					 /*priority*/PWAIT,
2408					 /*thread name*/
2409					 "%s taskq", be_lun->lunname);
2410
2411	if (retval != 0)
2412		goto bailout_error;
2413
2414	be_lun->num_threads = num_threads;
2415
2416	mtx_lock(&softc->lock);
2417	softc->num_luns++;
2418	STAILQ_INSERT_TAIL(&softc->lun_list, be_lun, links);
2419
2420	mtx_unlock(&softc->lock);
2421
2422	retval = ctl_add_lun(&be_lun->cbe_lun);
2423	if (retval != 0) {
2424		mtx_lock(&softc->lock);
2425		STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun,
2426			      links);
2427		softc->num_luns--;
2428		mtx_unlock(&softc->lock);
2429		snprintf(req->error_str, sizeof(req->error_str),
2430			 "ctl_add_lun() returned error %d, see dmesg for "
2431			 "details", retval);
2432		retval = 0;
2433		goto bailout_error;
2434	}
2435
2436	mtx_lock(&softc->lock);
2437
2438	/*
2439	 * Tell the config_status routine that we're waiting so it won't
2440	 * clean up the LUN in the event of an error.
2441	 */
2442	be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING;
2443
2444	while (be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) {
2445		retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0);
2446		if (retval == EINTR)
2447			break;
2448	}
2449	be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
2450
2451	if (be_lun->flags & CTL_BE_BLOCK_LUN_CONFIG_ERR) {
2452		snprintf(req->error_str, sizeof(req->error_str),
2453			 "LUN configuration error, see dmesg for details");
2454		STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun,
2455			      links);
2456		softc->num_luns--;
2457		mtx_unlock(&softc->lock);
2458		goto bailout_error;
2459	} else {
2460		params->req_lun_id = cbe_lun->lun_id;
2461	}
2462
2463	mtx_unlock(&softc->lock);
2464
2465	be_lun->disk_stats = devstat_new_entry("cbb", params->req_lun_id,
2466					       cbe_lun->blocksize,
2467					       DEVSTAT_ALL_SUPPORTED,
2468					       cbe_lun->lun_type
2469					       | DEVSTAT_TYPE_IF_OTHER,
2470					       DEVSTAT_PRIORITY_OTHER);
2471
2472	return (retval);
2473
2474bailout_error:
2475	req->status = CTL_LUN_ERROR;
2476
2477	if (be_lun->io_taskqueue != NULL)
2478		taskqueue_free(be_lun->io_taskqueue);
2479	ctl_be_block_close(be_lun);
2480	if (be_lun->dev_path != NULL)
2481		free(be_lun->dev_path, M_CTLBLK);
2482	if (be_lun->lun_zone != NULL)
2483		uma_zdestroy(be_lun->lun_zone);
2484	ctl_free_opts(&cbe_lun->options);
2485	mtx_destroy(&be_lun->queue_lock);
2486	mtx_destroy(&be_lun->io_lock);
2487	free(be_lun, M_CTLBLK);
2488
2489	return (retval);
2490}
2491
2492static int
2493ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2494{
2495	struct ctl_lun_rm_params *params;
2496	struct ctl_be_block_lun *be_lun;
2497	struct ctl_be_lun *cbe_lun;
2498	int retval;
2499
2500	params = &req->reqdata.rm;
2501
2502	mtx_lock(&softc->lock);
2503	STAILQ_FOREACH(be_lun, &softc->lun_list, links) {
2504		if (be_lun->cbe_lun.lun_id == params->lun_id)
2505			break;
2506	}
2507	mtx_unlock(&softc->lock);
2508
2509	if (be_lun == NULL) {
2510		snprintf(req->error_str, sizeof(req->error_str),
2511			 "LUN %u is not managed by the block backend",
2512			 params->lun_id);
2513		goto bailout_error;
2514	}
2515	cbe_lun = &be_lun->cbe_lun;
2516
2517	retval = ctl_disable_lun(cbe_lun);
2518	if (retval != 0) {
2519		snprintf(req->error_str, sizeof(req->error_str),
2520			 "error %d returned from ctl_disable_lun() for "
2521			 "LUN %d", retval, params->lun_id);
2522		goto bailout_error;
2523	}
2524
2525	if (be_lun->vn != NULL) {
2526		cbe_lun->flags |= CTL_LUN_FLAG_OFFLINE;
2527		ctl_lun_offline(cbe_lun);
2528		taskqueue_drain_all(be_lun->io_taskqueue);
2529		ctl_be_block_close(be_lun);
2530	}
2531
2532	retval = ctl_invalidate_lun(cbe_lun);
2533	if (retval != 0) {
2534		snprintf(req->error_str, sizeof(req->error_str),
2535			 "error %d returned from ctl_invalidate_lun() for "
2536			 "LUN %d", retval, params->lun_id);
2537		goto bailout_error;
2538	}
2539
2540	mtx_lock(&softc->lock);
2541	be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING;
2542	while ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) {
2543                retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0);
2544                if (retval == EINTR)
2545                        break;
2546        }
2547	be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
2548
2549	if ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) {
2550		snprintf(req->error_str, sizeof(req->error_str),
2551			 "interrupted waiting for LUN to be freed");
2552		mtx_unlock(&softc->lock);
2553		goto bailout_error;
2554	}
2555
2556	STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun, links);
2557
2558	softc->num_luns--;
2559	mtx_unlock(&softc->lock);
2560
2561	taskqueue_drain_all(be_lun->io_taskqueue);
2562	taskqueue_free(be_lun->io_taskqueue);
2563
2564	if (be_lun->disk_stats != NULL)
2565		devstat_remove_entry(be_lun->disk_stats);
2566
2567	uma_zdestroy(be_lun->lun_zone);
2568
2569	ctl_free_opts(&cbe_lun->options);
2570	free(be_lun->dev_path, M_CTLBLK);
2571	mtx_destroy(&be_lun->queue_lock);
2572	mtx_destroy(&be_lun->io_lock);
2573	free(be_lun, M_CTLBLK);
2574
2575	req->status = CTL_LUN_OK;
2576
2577	return (0);
2578
2579bailout_error:
2580
2581	req->status = CTL_LUN_ERROR;
2582
2583	return (0);
2584}
2585
2586static int
2587ctl_be_block_modify_file(struct ctl_be_block_lun *be_lun,
2588			 struct ctl_lun_req *req)
2589{
2590	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
2591	struct vattr vattr;
2592	int error;
2593	struct ctl_lun_create_params *params = &be_lun->params;
2594
2595	if (params->lun_size_bytes != 0) {
2596		be_lun->size_bytes = params->lun_size_bytes;
2597	} else  {
2598		vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
2599		error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
2600		VOP_UNLOCK(be_lun->vn, 0);
2601		if (error != 0) {
2602			snprintf(req->error_str, sizeof(req->error_str),
2603				 "error calling VOP_GETATTR() for file %s",
2604				 be_lun->dev_path);
2605			return (error);
2606		}
2607		be_lun->size_bytes = vattr.va_size;
2608	}
2609	be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
2610	cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
2611	    0 : (be_lun->size_blocks - 1);
2612	return (0);
2613}
2614
2615static int
2616ctl_be_block_modify_dev(struct ctl_be_block_lun *be_lun,
2617			struct ctl_lun_req *req)
2618{
2619	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
2620	struct ctl_lun_create_params *params = &be_lun->params;
2621	struct cdevsw *csw;
2622	struct cdev *dev;
2623	uint64_t size_bytes;
2624	int error, ref;
2625
2626	csw = devvn_refthread(be_lun->vn, &dev, &ref);
2627	if (csw == NULL)
2628		return (ENXIO);
2629	if (csw->d_ioctl == NULL) {
2630		dev_relthread(dev, ref);
2631		snprintf(req->error_str, sizeof(req->error_str),
2632			 "no d_ioctl for device %s!", be_lun->dev_path);
2633		return (ENODEV);
2634	}
2635
2636	error = csw->d_ioctl(dev, DIOCGMEDIASIZE, (caddr_t)&size_bytes, FREAD,
2637	    curthread);
2638	dev_relthread(dev, ref);
2639	if (error) {
2640		snprintf(req->error_str, sizeof(req->error_str),
2641			 "error %d returned for DIOCGMEDIASIZE ioctl "
2642			 "on %s!", error, be_lun->dev_path);
2643		return (error);
2644	}
2645
2646	if (params->lun_size_bytes != 0) {
2647		if (params->lun_size_bytes > size_bytes) {
2648			snprintf(req->error_str, sizeof(req->error_str),
2649				 "requested LUN size %ju > backing device "
2650				 "size %ju",
2651				 (uintmax_t)params->lun_size_bytes,
2652				 (uintmax_t)size_bytes);
2653			return (EINVAL);
2654		}
2655		be_lun->size_bytes = params->lun_size_bytes;
2656	} else {
2657		be_lun->size_bytes = size_bytes;
2658	}
2659	be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
2660	cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
2661	    0 : (be_lun->size_blocks - 1);
2662	return (0);
2663}
2664
2665static int
2666ctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2667{
2668	struct ctl_lun_modify_params *params;
2669	struct ctl_be_block_lun *be_lun;
2670	struct ctl_be_lun *cbe_lun;
2671	char *value;
2672	uint64_t oldsize;
2673	int error, wasprim;
2674
2675	params = &req->reqdata.modify;
2676
2677	mtx_lock(&softc->lock);
2678	STAILQ_FOREACH(be_lun, &softc->lun_list, links) {
2679		if (be_lun->cbe_lun.lun_id == params->lun_id)
2680			break;
2681	}
2682	mtx_unlock(&softc->lock);
2683
2684	if (be_lun == NULL) {
2685		snprintf(req->error_str, sizeof(req->error_str),
2686			 "LUN %u is not managed by the block backend",
2687			 params->lun_id);
2688		goto bailout_error;
2689	}
2690	cbe_lun = &be_lun->cbe_lun;
2691
2692	if (params->lun_size_bytes != 0)
2693		be_lun->params.lun_size_bytes = params->lun_size_bytes;
2694	ctl_update_opts(&cbe_lun->options, req->num_be_args, req->kern_be_args);
2695
2696	wasprim = (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY);
2697	value = ctl_get_opt(&cbe_lun->options, "ha_role");
2698	if (value != NULL) {
2699		if (strcmp(value, "primary") == 0)
2700			cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2701		else
2702			cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
2703	} else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
2704		cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2705	else
2706		cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
2707	if (wasprim != (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)) {
2708		if (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)
2709			ctl_lun_primary(cbe_lun);
2710		else
2711			ctl_lun_secondary(cbe_lun);
2712	}
2713
2714	oldsize = be_lun->size_blocks;
2715	if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) ||
2716	    control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) {
2717		if (be_lun->vn == NULL)
2718			error = ctl_be_block_open(softc, be_lun, req);
2719		else if (vn_isdisk(be_lun->vn, &error))
2720			error = ctl_be_block_modify_dev(be_lun, req);
2721		else if (be_lun->vn->v_type == VREG)
2722			error = ctl_be_block_modify_file(be_lun, req);
2723		else
2724			error = EINVAL;
2725		if ((cbe_lun->flags & CTL_LUN_FLAG_OFFLINE) &&
2726		    be_lun->vn != NULL) {
2727			cbe_lun->flags &= ~CTL_LUN_FLAG_OFFLINE;
2728			ctl_lun_online(cbe_lun);
2729		}
2730	} else {
2731		if (be_lun->vn != NULL) {
2732			cbe_lun->flags |= CTL_LUN_FLAG_OFFLINE;
2733			ctl_lun_offline(cbe_lun);
2734			taskqueue_drain_all(be_lun->io_taskqueue);
2735			error = ctl_be_block_close(be_lun);
2736		} else
2737			error = 0;
2738	}
2739	if (be_lun->size_blocks != oldsize)
2740		ctl_lun_capacity_changed(cbe_lun);
2741
2742	/* Tell the user the exact size we ended up using */
2743	params->lun_size_bytes = be_lun->size_bytes;
2744
2745	req->status = error ? CTL_LUN_WARNING : CTL_LUN_OK;
2746	return (0);
2747
2748bailout_error:
2749	req->status = CTL_LUN_ERROR;
2750	return (0);
2751}
2752
2753static void
2754ctl_be_block_lun_shutdown(void *be_lun)
2755{
2756	struct ctl_be_block_lun *lun;
2757	struct ctl_be_block_softc *softc;
2758
2759	lun = (struct ctl_be_block_lun *)be_lun;
2760
2761	softc = lun->softc;
2762
2763	mtx_lock(&softc->lock);
2764	lun->flags |= CTL_BE_BLOCK_LUN_UNCONFIGURED;
2765	if (lun->flags & CTL_BE_BLOCK_LUN_WAITING)
2766		wakeup(lun);
2767	mtx_unlock(&softc->lock);
2768
2769}
2770
2771static void
2772ctl_be_block_lun_config_status(void *be_lun, ctl_lun_config_status status)
2773{
2774	struct ctl_be_block_lun *lun;
2775	struct ctl_be_block_softc *softc;
2776
2777	lun = (struct ctl_be_block_lun *)be_lun;
2778	softc = lun->softc;
2779
2780	if (status == CTL_LUN_CONFIG_OK) {
2781		mtx_lock(&softc->lock);
2782		lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED;
2783		if (lun->flags & CTL_BE_BLOCK_LUN_WAITING)
2784			wakeup(lun);
2785		mtx_unlock(&softc->lock);
2786
2787		/*
2788		 * We successfully added the LUN, attempt to enable it.
2789		 */
2790		if (ctl_enable_lun(&lun->cbe_lun) != 0) {
2791			printf("%s: ctl_enable_lun() failed!\n", __func__);
2792			if (ctl_invalidate_lun(&lun->cbe_lun) != 0) {
2793				printf("%s: ctl_invalidate_lun() failed!\n",
2794				       __func__);
2795			}
2796		}
2797
2798		return;
2799	}
2800
2801
2802	mtx_lock(&softc->lock);
2803	lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED;
2804	lun->flags |= CTL_BE_BLOCK_LUN_CONFIG_ERR;
2805	wakeup(lun);
2806	mtx_unlock(&softc->lock);
2807}
2808
2809
2810static int
2811ctl_be_block_config_write(union ctl_io *io)
2812{
2813	struct ctl_be_block_lun *be_lun;
2814	struct ctl_be_lun *cbe_lun;
2815	int retval;
2816
2817	retval = 0;
2818
2819	DPRINTF("entered\n");
2820
2821	cbe_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[
2822		CTL_PRIV_BACKEND_LUN].ptr;
2823	be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun;
2824
2825	switch (io->scsiio.cdb[0]) {
2826	case SYNCHRONIZE_CACHE:
2827	case SYNCHRONIZE_CACHE_16:
2828	case WRITE_SAME_10:
2829	case WRITE_SAME_16:
2830	case UNMAP:
2831		/*
2832		 * The upper level CTL code will filter out any CDBs with
2833		 * the immediate bit set and return the proper error.
2834		 *
2835		 * We don't really need to worry about what LBA range the
2836		 * user asked to be synced out.  When they issue a sync
2837		 * cache command, we'll sync out the whole thing.
2838		 */
2839		mtx_lock(&be_lun->queue_lock);
2840		STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr,
2841				   links);
2842		mtx_unlock(&be_lun->queue_lock);
2843		taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
2844		break;
2845	case START_STOP_UNIT: {
2846		struct scsi_start_stop_unit *cdb;
2847
2848		cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb;
2849
2850		if (cdb->how & SSS_START)
2851			retval = ctl_start_lun(cbe_lun);
2852		else {
2853			retval = ctl_stop_lun(cbe_lun);
2854			/*
2855			 * XXX KDM Copan-specific offline behavior.
2856			 * Figure out a reasonable way to port this?
2857			 */
2858#ifdef NEEDTOPORT
2859			if ((retval == 0)
2860			 && (cdb->byte2 & SSS_ONOFFLINE))
2861				retval = ctl_lun_offline(cbe_lun);
2862#endif
2863		}
2864
2865		/*
2866		 * In general, the above routines should not fail.  They
2867		 * just set state for the LUN.  So we've got something
2868		 * pretty wrong here if we can't start or stop the LUN.
2869		 */
2870		if (retval != 0) {
2871			ctl_set_internal_failure(&io->scsiio,
2872						 /*sks_valid*/ 1,
2873						 /*retry_count*/ 0xf051);
2874			retval = CTL_RETVAL_COMPLETE;
2875		} else {
2876			ctl_set_success(&io->scsiio);
2877		}
2878		ctl_config_write_done(io);
2879		break;
2880	}
2881	default:
2882		ctl_set_invalid_opcode(&io->scsiio);
2883		ctl_config_write_done(io);
2884		retval = CTL_RETVAL_COMPLETE;
2885		break;
2886	}
2887
2888	return (retval);
2889}
2890
2891static int
2892ctl_be_block_config_read(union ctl_io *io)
2893{
2894	struct ctl_be_block_lun *be_lun;
2895	struct ctl_be_lun *cbe_lun;
2896	int retval = 0;
2897
2898	DPRINTF("entered\n");
2899
2900	cbe_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[
2901		CTL_PRIV_BACKEND_LUN].ptr;
2902	be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun;
2903
2904	switch (io->scsiio.cdb[0]) {
2905	case SERVICE_ACTION_IN:
2906		if (io->scsiio.cdb[1] == SGLS_SERVICE_ACTION) {
2907			mtx_lock(&be_lun->queue_lock);
2908			STAILQ_INSERT_TAIL(&be_lun->config_read_queue,
2909			    &io->io_hdr, links);
2910			mtx_unlock(&be_lun->queue_lock);
2911			taskqueue_enqueue(be_lun->io_taskqueue,
2912			    &be_lun->io_task);
2913			retval = CTL_RETVAL_QUEUED;
2914			break;
2915		}
2916		ctl_set_invalid_field(&io->scsiio,
2917				      /*sks_valid*/ 1,
2918				      /*command*/ 1,
2919				      /*field*/ 1,
2920				      /*bit_valid*/ 1,
2921				      /*bit*/ 4);
2922		ctl_config_read_done(io);
2923		retval = CTL_RETVAL_COMPLETE;
2924		break;
2925	default:
2926		ctl_set_invalid_opcode(&io->scsiio);
2927		ctl_config_read_done(io);
2928		retval = CTL_RETVAL_COMPLETE;
2929		break;
2930	}
2931
2932	return (retval);
2933}
2934
2935static int
2936ctl_be_block_lun_info(void *be_lun, struct sbuf *sb)
2937{
2938	struct ctl_be_block_lun *lun;
2939	int retval;
2940
2941	lun = (struct ctl_be_block_lun *)be_lun;
2942	retval = 0;
2943
2944	retval = sbuf_printf(sb, "\t<num_threads>");
2945
2946	if (retval != 0)
2947		goto bailout;
2948
2949	retval = sbuf_printf(sb, "%d", lun->num_threads);
2950
2951	if (retval != 0)
2952		goto bailout;
2953
2954	retval = sbuf_printf(sb, "</num_threads>\n");
2955
2956bailout:
2957
2958	return (retval);
2959}
2960
2961static uint64_t
2962ctl_be_block_lun_attr(void *be_lun, const char *attrname)
2963{
2964	struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)be_lun;
2965
2966	if (lun->getattr == NULL)
2967		return (UINT64_MAX);
2968	return (lun->getattr(lun, attrname));
2969}
2970
2971int
2972ctl_be_block_init(void)
2973{
2974	struct ctl_be_block_softc *softc;
2975	int retval;
2976
2977	softc = &backend_block_softc;
2978	retval = 0;
2979
2980	mtx_init(&softc->lock, "ctlblock", NULL, MTX_DEF);
2981	beio_zone = uma_zcreate("beio", sizeof(struct ctl_be_block_io),
2982	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
2983	STAILQ_INIT(&softc->lun_list);
2984
2985	return (retval);
2986}
2987