1/*-
2 * Copyright (c) 1997-2007 Kenneth D. Merry
3 * Copyright (c) 2013, 2014, 2015 Spectra Logic Corporation
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions, and the following disclaimer,
11 *    without modification.
12 * 2. Redistributions in binary form must reproduce at minimum a disclaimer
13 *    substantially similar to the "NO WARRANTY" disclaimer below
14 *    ("Disclaimer") and any redistribution must be conditioned upon
15 *    including a substantially similar Disclaimer requirement for further
16 *    binary redistribution.
17 *
18 * NO WARRANTY
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
27 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
28 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGES.
30 *
31 * Authors: Ken Merry           (Spectra Logic Corporation)
32 */
33
34/*
35 * This is eventually intended to be:
36 * - A basic data transfer/copy utility
37 * - A simple benchmark utility
38 * - An example of how to use the asynchronous pass(4) driver interface.
39 */
40#include <sys/cdefs.h>
41#include <sys/ioctl.h>
42#include <sys/stdint.h>
43#include <sys/types.h>
44#include <sys/endian.h>
45#include <sys/param.h>
46#include <sys/sbuf.h>
47#include <sys/stat.h>
48#include <sys/event.h>
49#include <sys/time.h>
50#include <sys/uio.h>
51#include <vm/vm.h>
52#include <sys/bus.h>
53#include <sys/bus_dma.h>
54#include <sys/mtio.h>
55#include <sys/conf.h>
56#include <sys/disk.h>
57
58#include <stdio.h>
59#include <stdlib.h>
60#include <semaphore.h>
61#include <string.h>
62#include <unistd.h>
63#include <inttypes.h>
64#include <limits.h>
65#include <fcntl.h>
66#include <ctype.h>
67#include <err.h>
68#include <libutil.h>
69#include <pthread.h>
70#include <assert.h>
71#include <bsdxml.h>
72
73#include <cam/cam.h>
74#include <cam/cam_debug.h>
75#include <cam/cam_ccb.h>
76#include <cam/scsi/scsi_all.h>
77#include <cam/scsi/scsi_da.h>
78#include <cam/scsi/scsi_pass.h>
79#include <cam/scsi/scsi_message.h>
80#include <cam/scsi/smp_all.h>
81#include <cam/nvme/nvme_all.h>
82#include <camlib.h>
83#include <mtlib.h>
84#include <zlib.h>
85
86typedef enum {
87	CAMDD_CMD_NONE		= 0x00000000,
88	CAMDD_CMD_HELP		= 0x00000001,
89	CAMDD_CMD_WRITE		= 0x00000002,
90	CAMDD_CMD_READ		= 0x00000003
91} camdd_cmdmask;
92
93typedef enum {
94	CAMDD_ARG_NONE		= 0x00000000,
95	CAMDD_ARG_VERBOSE	= 0x00000001,
96	CAMDD_ARG_ERR_RECOVER	= 0x00000080,
97} camdd_argmask;
98
99typedef enum {
100	CAMDD_DEV_NONE		= 0x00,
101	CAMDD_DEV_PASS		= 0x01,
102	CAMDD_DEV_FILE		= 0x02
103} camdd_dev_type;
104
105struct camdd_io_opts {
106	camdd_dev_type	dev_type;
107	char		*dev_name;
108	uint64_t	blocksize;
109	uint64_t	queue_depth;
110	uint64_t	offset;
111	int		min_cmd_size;
112	int		write_dev;
113	uint64_t	debug;
114};
115
116typedef enum {
117	CAMDD_BUF_NONE,
118	CAMDD_BUF_DATA,
119	CAMDD_BUF_INDIRECT
120} camdd_buf_type;
121
122struct camdd_buf_indirect {
123	/*
124	 * Pointer to the source buffer.
125	 */
126	struct camdd_buf *src_buf;
127
128	/*
129	 * Offset into the source buffer, in bytes.
130	 */
131	uint64_t	  offset;
132	/*
133	 * Pointer to the starting point in the source buffer.
134	 */
135	uint8_t		 *start_ptr;
136
137	/*
138	 * Length of this chunk in bytes.
139	 */
140	size_t		  len;
141};
142
143struct camdd_buf_data {
144	/*
145	 * Buffer allocated when we allocate this camdd_buf.  This should
146	 * be the size of the blocksize for this device.
147	 */
148	uint8_t			*buf;
149
150	/*
151	 * The amount of backing store allocated in buf.  Generally this
152	 * will be the blocksize of the device.
153	 */
154	uint32_t		 alloc_len;
155
156	/*
157	 * The amount of data that was put into the buffer (on reads) or
158	 * the amount of data we have put onto the src_list so far (on
159	 * writes).
160	 */
161	uint32_t		 fill_len;
162
163	/*
164	 * The amount of data that was not transferred.
165	 */
166	uint32_t		 resid;
167
168	/*
169	 * Starting byte offset on the reader.
170	 */
171	uint64_t		 src_start_offset;
172
173	/*
174	 * CCB used for pass(4) device targets.
175	 */
176	union ccb		 ccb;
177
178	/*
179	 * Number of scatter/gather segments.
180	 */
181	int			 sg_count;
182
183	/*
184	 * Set if we had to tack on an extra buffer to round the transfer
185	 * up to a sector size.
186	 */
187	int			 extra_buf;
188
189	/*
190	 * Scatter/gather list used generally when we're the writer for a
191	 * pass(4) device.
192	 */
193	bus_dma_segment_t	*segs;
194
195	/*
196	 * Scatter/gather list used generally when we're the writer for a
197	 * file or block device;
198	 */
199	struct iovec		*iovec;
200};
201
202union camdd_buf_types {
203	struct camdd_buf_indirect	indirect;
204	struct camdd_buf_data		data;
205};
206
207typedef enum {
208	CAMDD_STATUS_NONE,
209	CAMDD_STATUS_OK,
210	CAMDD_STATUS_SHORT_IO,
211	CAMDD_STATUS_EOF,
212	CAMDD_STATUS_ERROR
213} camdd_buf_status;
214
215struct camdd_buf {
216	camdd_buf_type		 buf_type;
217	union camdd_buf_types	 buf_type_spec;
218
219	camdd_buf_status	 status;
220
221	uint64_t		 lba;
222	size_t			 len;
223
224	/*
225	 * A reference count of how many indirect buffers point to this
226	 * buffer.
227	 */
228	int			 refcount;
229
230	/*
231	 * A link back to our parent device.
232	 */
233	struct camdd_dev	*dev;
234	STAILQ_ENTRY(camdd_buf)  links;
235	STAILQ_ENTRY(camdd_buf)  work_links;
236
237	/*
238	 * A count of the buffers on the src_list.
239	 */
240	int			 src_count;
241
242	/*
243	 * List of buffers from our partner thread that are the components
244	 * of this buffer for the I/O.  Uses src_links.
245	 */
246	STAILQ_HEAD(,camdd_buf)	 src_list;
247	STAILQ_ENTRY(camdd_buf)  src_links;
248};
249
250#define	NUM_DEV_TYPES	2
251
252struct camdd_dev_pass {
253	int			 scsi_dev_type;
254	int			 protocol;
255	struct cam_device	*dev;
256	uint64_t		 max_sector;
257	uint32_t		 block_len;
258	uint32_t		 cpi_maxio;
259};
260
261typedef enum {
262	CAMDD_FILE_NONE,
263	CAMDD_FILE_REG,
264	CAMDD_FILE_STD,
265	CAMDD_FILE_PIPE,
266	CAMDD_FILE_DISK,
267	CAMDD_FILE_TAPE,
268	CAMDD_FILE_TTY,
269	CAMDD_FILE_MEM
270} camdd_file_type;
271
272typedef enum {
273	CAMDD_FF_NONE 		= 0x00,
274	CAMDD_FF_CAN_SEEK	= 0x01
275} camdd_file_flags;
276
277struct camdd_dev_file {
278	int			 fd;
279	struct stat		 sb;
280	char			 filename[MAXPATHLEN + 1];
281	camdd_file_type		 file_type;
282	camdd_file_flags	 file_flags;
283	uint8_t			*tmp_buf;
284};
285
286struct camdd_dev_block {
287	int			 fd;
288	uint64_t		 size_bytes;
289	uint32_t		 block_len;
290};
291
292union camdd_dev_spec {
293	struct camdd_dev_pass	pass;
294	struct camdd_dev_file	file;
295	struct camdd_dev_block	block;
296};
297
298typedef enum {
299	CAMDD_DEV_FLAG_NONE		= 0x00,
300	CAMDD_DEV_FLAG_EOF		= 0x01,
301	CAMDD_DEV_FLAG_PEER_EOF		= 0x02,
302	CAMDD_DEV_FLAG_ACTIVE		= 0x04,
303	CAMDD_DEV_FLAG_EOF_SENT		= 0x08,
304	CAMDD_DEV_FLAG_EOF_QUEUED	= 0x10
305} camdd_dev_flags;
306
307struct camdd_dev {
308	camdd_dev_type		 dev_type;
309	union camdd_dev_spec	 dev_spec;
310	camdd_dev_flags		 flags;
311	char			 device_name[MAXPATHLEN+1];
312	uint32_t		 blocksize;
313	uint32_t		 sector_size;
314	uint64_t		 max_sector;
315	uint64_t		 sector_io_limit;
316	int			 min_cmd_size;
317	int			 write_dev;
318	int			 retry_count;
319	int			 io_timeout;
320	int			 debug;
321	uint64_t		 start_offset_bytes;
322	uint64_t		 next_io_pos_bytes;
323	uint64_t		 next_peer_pos_bytes;
324	uint64_t		 next_completion_pos_bytes;
325	uint64_t		 peer_bytes_queued;
326	uint64_t		 bytes_transferred;
327	uint32_t		 target_queue_depth;
328	uint32_t		 cur_active_io;
329	uint8_t			*extra_buf;
330	uint32_t		 extra_buf_len;
331	struct camdd_dev	*peer_dev;
332	pthread_mutex_t		 mutex;
333	pthread_cond_t		 cond;
334	int			 kq;
335
336	int			 (*run)(struct camdd_dev *dev);
337	int			 (*fetch)(struct camdd_dev *dev);
338
339	/*
340	 * Buffers that are available for I/O.  Uses links.
341	 */
342	STAILQ_HEAD(,camdd_buf)	 free_queue;
343
344	/*
345	 * Free indirect buffers.  These are used for breaking a large
346	 * buffer into multiple pieces.
347	 */
348	STAILQ_HEAD(,camdd_buf)	 free_indirect_queue;
349
350	/*
351	 * Buffers that have been queued to the kernel.  Uses links.
352	 */
353	STAILQ_HEAD(,camdd_buf)	 active_queue;
354
355	/*
356	 * Will generally contain one of our buffers that is waiting for enough
357	 * I/O from our partner thread to be able to execute.  This will
358	 * generally happen when our per-I/O-size is larger than the
359	 * partner thread's per-I/O-size.  Uses links.
360	 */
361	STAILQ_HEAD(,camdd_buf)	 pending_queue;
362
363	/*
364	 * Number of buffers on the pending queue
365	 */
366	int			 num_pending_queue;
367
368	/*
369	 * Buffers that are filled and ready to execute.  This is used when
370	 * our partner (reader) thread sends us blocks that are larger than
371	 * our blocksize, and so we have to split them into multiple pieces.
372	 */
373	STAILQ_HEAD(,camdd_buf)	 run_queue;
374
375	/*
376	 * Number of buffers on the run queue.
377	 */
378	int			 num_run_queue;
379
380	STAILQ_HEAD(,camdd_buf)	 reorder_queue;
381
382	int			 num_reorder_queue;
383
384	/*
385	 * Buffers that have been queued to us by our partner thread
386	 * (generally the reader thread) to be written out.  Uses
387	 * work_links.
388	 */
389	STAILQ_HEAD(,camdd_buf)	 work_queue;
390
391	/*
392	 * Buffers that have been completed by our partner thread.  Uses
393	 * work_links.
394	 */
395	STAILQ_HEAD(,camdd_buf)	 peer_done_queue;
396
397	/*
398	 * Number of buffers on the peer done queue.
399	 */
400	uint32_t		 num_peer_done_queue;
401
402	/*
403	 * A list of buffers that we have queued to our peer thread.  Uses
404	 * links.
405	 */
406	STAILQ_HEAD(,camdd_buf)	 peer_work_queue;
407
408	/*
409	 * Number of buffers on the peer work queue.
410	 */
411	uint32_t		 num_peer_work_queue;
412};
413
414static sem_t camdd_sem;
415static sig_atomic_t need_exit = 0;
416static sig_atomic_t error_exit = 0;
417static sig_atomic_t need_status = 0;
418
419#ifndef min
420#define	min(a, b) (a < b) ? a : b
421#endif
422
423
424/* Generically useful offsets into the peripheral private area */
425#define ppriv_ptr0 periph_priv.entries[0].ptr
426#define ppriv_ptr1 periph_priv.entries[1].ptr
427#define ppriv_field0 periph_priv.entries[0].field
428#define ppriv_field1 periph_priv.entries[1].field
429
430#define	ccb_buf	ppriv_ptr0
431
432#define	CAMDD_FILE_DEFAULT_BLOCK	524288
433#define	CAMDD_FILE_DEFAULT_DEPTH	1
434#define	CAMDD_PASS_MAX_BLOCK		1048576
435#define	CAMDD_PASS_DEFAULT_DEPTH	6
436#define	CAMDD_PASS_RW_TIMEOUT		60 * 1000
437
438static int parse_btl(char *tstr, int *bus, int *target, int *lun);
439void camdd_free_dev(struct camdd_dev *dev);
440struct camdd_dev *camdd_alloc_dev(camdd_dev_type dev_type,
441				  struct kevent *new_ke, int num_ke,
442				  int retry_count, int timeout);
443static struct camdd_buf *camdd_alloc_buf(struct camdd_dev *dev,
444					 camdd_buf_type buf_type);
445void camdd_release_buf(struct camdd_buf *buf);
446struct camdd_buf *camdd_get_buf(struct camdd_dev *dev, camdd_buf_type buf_type);
447int camdd_buf_sg_create(struct camdd_buf *buf, int iovec,
448			uint32_t sector_size, uint32_t *num_sectors_used,
449			int *double_buf_needed);
450uint32_t camdd_buf_get_len(struct camdd_buf *buf);
451void camdd_buf_add_child(struct camdd_buf *buf, struct camdd_buf *child_buf);
452int camdd_probe_tape(int fd, char *filename, uint64_t *max_iosize,
453		     uint64_t *max_blk, uint64_t *min_blk, uint64_t *blk_gran);
454int camdd_probe_pass_scsi(struct cam_device *cam_dev, union ccb *ccb,
455         camdd_argmask arglist, int probe_retry_count,
456         int probe_timeout, uint64_t *maxsector, uint32_t *block_len);
457int camdd_probe_pass_nvme(struct cam_device *cam_dev, union ccb *ccb,
458         camdd_argmask arglist, int probe_retry_count,
459         int probe_timeout, uint64_t *maxsector, uint32_t *block_len);
460struct camdd_dev *camdd_probe_file(int fd, struct camdd_io_opts *io_opts,
461				   int retry_count, int timeout);
462struct camdd_dev *camdd_probe_pass(struct cam_device *cam_dev,
463				   struct camdd_io_opts *io_opts,
464				   camdd_argmask arglist, int probe_retry_count,
465				   int probe_timeout, int io_retry_count,
466				   int io_timeout);
467void nvme_read_write(struct ccb_nvmeio *nvmeio, uint32_t retries,
468		void (*cbfcnp)(struct cam_periph *, union ccb *),
469		uint32_t nsid, int readop, uint64_t lba,
470		uint32_t block_count, uint8_t *data_ptr, uint32_t dxfer_len,
471		uint32_t timeout);
472void *camdd_file_worker(void *arg);
473camdd_buf_status camdd_ccb_status(union ccb *ccb, int protocol);
474int camdd_get_cgd(struct cam_device *device, struct ccb_getdev *cgd);
475int camdd_queue_peer_buf(struct camdd_dev *dev, struct camdd_buf *buf);
476int camdd_complete_peer_buf(struct camdd_dev *dev, struct camdd_buf *peer_buf);
477void camdd_peer_done(struct camdd_buf *buf);
478void camdd_complete_buf(struct camdd_dev *dev, struct camdd_buf *buf,
479			int *error_count);
480int camdd_pass_fetch(struct camdd_dev *dev);
481int camdd_file_run(struct camdd_dev *dev);
482int camdd_pass_run(struct camdd_dev *dev);
483int camdd_get_next_lba_len(struct camdd_dev *dev, uint64_t *lba, ssize_t *len);
484int camdd_queue(struct camdd_dev *dev, struct camdd_buf *read_buf);
485void camdd_get_depth(struct camdd_dev *dev, uint32_t *our_depth,
486		     uint32_t *peer_depth, uint32_t *our_bytes,
487		     uint32_t *peer_bytes);
488void *camdd_worker(void *arg);
489void camdd_sig_handler(int sig);
490void camdd_print_status(struct camdd_dev *camdd_dev,
491			struct camdd_dev *other_dev,
492			struct timespec *start_time);
493int camdd_rw(struct camdd_io_opts *io_opts, camdd_argmask arglist,
494	     int num_io_opts, uint64_t max_io, int retry_count, int timeout);
495int camdd_parse_io_opts(char *args, int is_write,
496			struct camdd_io_opts *io_opts);
497void usage(void);
498
499/*
500 * Parse out a bus, or a bus, target and lun in the following
501 * format:
502 * bus
503 * bus:target
504 * bus:target:lun
505 *
506 * Returns the number of parsed components, or 0.
507 */
508static int
509parse_btl(char *tstr, int *bus, int *target, int *lun)
510{
511	char *tmpstr;
512	int convs = 0;
513
514	while (isspace(*tstr) && (*tstr != '\0'))
515		tstr++;
516
517	tmpstr = (char *)strtok(tstr, ":");
518	if ((tmpstr != NULL) && (*tmpstr != '\0')) {
519		*bus = strtol(tmpstr, NULL, 0);
520		convs++;
521		tmpstr = (char *)strtok(NULL, ":");
522		if ((tmpstr != NULL) && (*tmpstr != '\0')) {
523			*target = strtol(tmpstr, NULL, 0);
524			convs++;
525			tmpstr = (char *)strtok(NULL, ":");
526			if ((tmpstr != NULL) && (*tmpstr != '\0')) {
527				*lun = strtol(tmpstr, NULL, 0);
528				convs++;
529			}
530		}
531	}
532
533	return convs;
534}
535
536/*
537 * XXX KDM clean up and free all of the buffers on the queue!
538 */
539void
540camdd_free_dev(struct camdd_dev *dev)
541{
542	if (dev == NULL)
543		return;
544
545	switch (dev->dev_type) {
546	case CAMDD_DEV_FILE: {
547		struct camdd_dev_file *file_dev = &dev->dev_spec.file;
548
549		if (file_dev->fd != -1)
550			close(file_dev->fd);
551		free(file_dev->tmp_buf);
552		break;
553	}
554	case CAMDD_DEV_PASS: {
555		struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
556
557		if (pass_dev->dev != NULL)
558			cam_close_device(pass_dev->dev);
559		break;
560	}
561	default:
562		break;
563	}
564
565	free(dev);
566}
567
568struct camdd_dev *
569camdd_alloc_dev(camdd_dev_type dev_type, struct kevent *new_ke, int num_ke,
570		int retry_count, int timeout)
571{
572	struct camdd_dev *dev = NULL;
573	struct kevent *ke;
574	size_t ke_size;
575	int retval = 0;
576
577	dev = calloc(1, sizeof(*dev));
578	if (dev == NULL) {
579		warn("%s: unable to malloc %zu bytes", __func__, sizeof(*dev));
580		goto bailout;
581	}
582
583	dev->dev_type = dev_type;
584	dev->io_timeout = timeout;
585	dev->retry_count = retry_count;
586	STAILQ_INIT(&dev->free_queue);
587	STAILQ_INIT(&dev->free_indirect_queue);
588	STAILQ_INIT(&dev->active_queue);
589	STAILQ_INIT(&dev->pending_queue);
590	STAILQ_INIT(&dev->run_queue);
591	STAILQ_INIT(&dev->reorder_queue);
592	STAILQ_INIT(&dev->work_queue);
593	STAILQ_INIT(&dev->peer_done_queue);
594	STAILQ_INIT(&dev->peer_work_queue);
595	retval = pthread_mutex_init(&dev->mutex, NULL);
596	if (retval != 0) {
597		warnc(retval, "%s: failed to initialize mutex", __func__);
598		goto bailout;
599	}
600
601	retval = pthread_cond_init(&dev->cond, NULL);
602	if (retval != 0) {
603		warnc(retval, "%s: failed to initialize condition variable",
604		      __func__);
605		goto bailout;
606	}
607
608	dev->kq = kqueue();
609	if (dev->kq == -1) {
610		warn("%s: Unable to create kqueue", __func__);
611		goto bailout;
612	}
613
614	ke_size = sizeof(struct kevent) * (num_ke + 4);
615	ke = calloc(1, ke_size);
616	if (ke == NULL) {
617		warn("%s: unable to malloc %zu bytes", __func__, ke_size);
618		goto bailout;
619	}
620	if (num_ke > 0)
621		bcopy(new_ke, ke, num_ke * sizeof(struct kevent));
622
623	EV_SET(&ke[num_ke++], (uintptr_t)&dev->work_queue, EVFILT_USER,
624	       EV_ADD|EV_ENABLE|EV_CLEAR, 0,0, 0);
625	EV_SET(&ke[num_ke++], (uintptr_t)&dev->peer_done_queue, EVFILT_USER,
626	       EV_ADD|EV_ENABLE|EV_CLEAR, 0,0, 0);
627	EV_SET(&ke[num_ke++], SIGINFO, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0,0,0);
628	EV_SET(&ke[num_ke++], SIGINT, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0,0,0);
629
630	retval = kevent(dev->kq, ke, num_ke, NULL, 0, NULL);
631	if (retval == -1) {
632		warn("%s: Unable to register kevents", __func__);
633		goto bailout;
634	}
635
636
637	return (dev);
638
639bailout:
640	free(dev);
641
642	return (NULL);
643}
644
645static struct camdd_buf *
646camdd_alloc_buf(struct camdd_dev *dev, camdd_buf_type buf_type)
647{
648	struct camdd_buf *buf = NULL;
649	uint8_t *data_ptr = NULL;
650
651	/*
652	 * We only need to allocate data space for data buffers.
653	 */
654	switch (buf_type) {
655	case CAMDD_BUF_DATA:
656		data_ptr = malloc(dev->blocksize);
657		if (data_ptr == NULL) {
658			warn("unable to allocate %u bytes", dev->blocksize);
659			goto bailout_error;
660		}
661		break;
662	default:
663		break;
664	}
665
666	buf = calloc(1, sizeof(*buf));
667	if (buf == NULL) {
668		warn("unable to allocate %zu bytes", sizeof(*buf));
669		goto bailout_error;
670	}
671
672	buf->buf_type = buf_type;
673	buf->dev = dev;
674	switch (buf_type) {
675	case CAMDD_BUF_DATA: {
676		struct camdd_buf_data *data;
677
678		data = &buf->buf_type_spec.data;
679
680		data->alloc_len = dev->blocksize;
681		data->buf = data_ptr;
682		break;
683	}
684	case CAMDD_BUF_INDIRECT:
685		break;
686	default:
687		break;
688	}
689	STAILQ_INIT(&buf->src_list);
690
691	return (buf);
692
693bailout_error:
694	free(data_ptr);
695
696	return (NULL);
697}
698
699void
700camdd_release_buf(struct camdd_buf *buf)
701{
702	struct camdd_dev *dev;
703
704	dev = buf->dev;
705
706	switch (buf->buf_type) {
707	case CAMDD_BUF_DATA: {
708		struct camdd_buf_data *data;
709
710		data = &buf->buf_type_spec.data;
711
712		if (data->segs != NULL) {
713			if (data->extra_buf != 0) {
714				void *extra_buf;
715
716				extra_buf = (void *)
717				    data->segs[data->sg_count - 1].ds_addr;
718				free(extra_buf);
719				data->extra_buf = 0;
720			}
721			free(data->segs);
722			data->segs = NULL;
723			data->sg_count = 0;
724		} else if (data->iovec != NULL) {
725			if (data->extra_buf != 0) {
726				free(data->iovec[data->sg_count - 1].iov_base);
727				data->extra_buf = 0;
728			}
729			free(data->iovec);
730			data->iovec = NULL;
731			data->sg_count = 0;
732		}
733		STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
734		break;
735	}
736	case CAMDD_BUF_INDIRECT:
737		STAILQ_INSERT_TAIL(&dev->free_indirect_queue, buf, links);
738		break;
739	default:
740		err(1, "%s: Invalid buffer type %d for released buffer",
741		    __func__, buf->buf_type);
742		break;
743	}
744}
745
746struct camdd_buf *
747camdd_get_buf(struct camdd_dev *dev, camdd_buf_type buf_type)
748{
749	struct camdd_buf *buf = NULL;
750
751	switch (buf_type) {
752	case CAMDD_BUF_DATA:
753		buf = STAILQ_FIRST(&dev->free_queue);
754		if (buf != NULL) {
755			struct camdd_buf_data *data;
756			uint8_t *data_ptr;
757			uint32_t alloc_len;
758
759			STAILQ_REMOVE_HEAD(&dev->free_queue, links);
760			data = &buf->buf_type_spec.data;
761			data_ptr = data->buf;
762			alloc_len = data->alloc_len;
763			bzero(buf, sizeof(*buf));
764			data->buf = data_ptr;
765			data->alloc_len = alloc_len;
766		}
767		break;
768	case CAMDD_BUF_INDIRECT:
769		buf = STAILQ_FIRST(&dev->free_indirect_queue);
770		if (buf != NULL) {
771			STAILQ_REMOVE_HEAD(&dev->free_indirect_queue, links);
772
773			bzero(buf, sizeof(*buf));
774		}
775		break;
776	default:
777		warnx("Unknown buffer type %d requested", buf_type);
778		break;
779	}
780
781
782	if (buf == NULL)
783		return (camdd_alloc_buf(dev, buf_type));
784	else {
785		STAILQ_INIT(&buf->src_list);
786		buf->dev = dev;
787		buf->buf_type = buf_type;
788
789		return (buf);
790	}
791}
792
793int
794camdd_buf_sg_create(struct camdd_buf *buf, int iovec, uint32_t sector_size,
795		    uint32_t *num_sectors_used, int *double_buf_needed)
796{
797	struct camdd_buf *tmp_buf;
798	struct camdd_buf_data *data;
799	uint8_t *extra_buf = NULL;
800	size_t extra_buf_len = 0;
801	int extra_buf_attached = 0;
802	int i, retval = 0;
803
804	data = &buf->buf_type_spec.data;
805
806	data->sg_count = buf->src_count;
807	/*
808	 * Compose a scatter/gather list from all of the buffers in the list.
809	 * If the length of the buffer isn't a multiple of the sector size,
810	 * we'll have to add an extra buffer.  This should only happen
811	 * at the end of a transfer.
812	 */
813	if ((data->fill_len % sector_size) != 0) {
814		extra_buf_len = sector_size - (data->fill_len % sector_size);
815		extra_buf = calloc(extra_buf_len, 1);
816		if (extra_buf == NULL) {
817			warn("%s: unable to allocate %zu bytes for extra "
818			    "buffer space", __func__, extra_buf_len);
819			retval = 1;
820			goto bailout;
821		}
822		data->extra_buf = 1;
823		data->sg_count++;
824	}
825	if (iovec == 0) {
826		data->segs = calloc(data->sg_count, sizeof(bus_dma_segment_t));
827		if (data->segs == NULL) {
828			warn("%s: unable to allocate %zu bytes for S/G list",
829			    __func__, sizeof(bus_dma_segment_t) *
830			    data->sg_count);
831			retval = 1;
832			goto bailout;
833		}
834
835	} else {
836		data->iovec = calloc(data->sg_count, sizeof(struct iovec));
837		if (data->iovec == NULL) {
838			warn("%s: unable to allocate %zu bytes for S/G list",
839			    __func__, sizeof(struct iovec) * data->sg_count);
840			retval = 1;
841			goto bailout;
842		}
843	}
844
845	for (i = 0, tmp_buf = STAILQ_FIRST(&buf->src_list);
846	     i < buf->src_count && tmp_buf != NULL; i++,
847	     tmp_buf = STAILQ_NEXT(tmp_buf, src_links)) {
848
849		if (tmp_buf->buf_type == CAMDD_BUF_DATA) {
850			struct camdd_buf_data *tmp_data;
851
852			tmp_data = &tmp_buf->buf_type_spec.data;
853			if (iovec == 0) {
854				data->segs[i].ds_addr =
855				    (bus_addr_t) tmp_data->buf;
856				data->segs[i].ds_len = tmp_data->fill_len -
857				    tmp_data->resid;
858			} else {
859				data->iovec[i].iov_base = tmp_data->buf;
860				data->iovec[i].iov_len = tmp_data->fill_len -
861				    tmp_data->resid;
862			}
863			if (((tmp_data->fill_len - tmp_data->resid) %
864			     sector_size) != 0)
865				*double_buf_needed = 1;
866		} else {
867			struct camdd_buf_indirect *tmp_ind;
868
869			tmp_ind = &tmp_buf->buf_type_spec.indirect;
870			if (iovec == 0) {
871				data->segs[i].ds_addr =
872				    (bus_addr_t)tmp_ind->start_ptr;
873				data->segs[i].ds_len = tmp_ind->len;
874			} else {
875				data->iovec[i].iov_base = tmp_ind->start_ptr;
876				data->iovec[i].iov_len = tmp_ind->len;
877			}
878			if ((tmp_ind->len % sector_size) != 0)
879				*double_buf_needed = 1;
880		}
881	}
882
883	if (extra_buf != NULL) {
884		if (iovec == 0) {
885			data->segs[i].ds_addr = (bus_addr_t)extra_buf;
886			data->segs[i].ds_len = extra_buf_len;
887		} else {
888			data->iovec[i].iov_base = extra_buf;
889			data->iovec[i].iov_len = extra_buf_len;
890		}
891		extra_buf_attached = 1;
892		i++;
893	}
894	if ((tmp_buf != NULL) || (i != data->sg_count)) {
895		warnx("buffer source count does not match "
896		      "number of buffers in list!");
897		retval = 1;
898		goto bailout;
899	}
900
901bailout:
902	if (retval == 0) {
903		*num_sectors_used = (data->fill_len + extra_buf_len) /
904		    sector_size;
905	} else if (extra_buf_attached == 0) {
906		/*
907		 * If extra_buf isn't attached yet, we need to free it
908		 * to avoid leaking.
909		 */
910		free(extra_buf);
911		data->extra_buf = 0;
912		data->sg_count--;
913	}
914	return (retval);
915}
916
917uint32_t
918camdd_buf_get_len(struct camdd_buf *buf)
919{
920	uint32_t len = 0;
921
922	if (buf->buf_type != CAMDD_BUF_DATA) {
923		struct camdd_buf_indirect *indirect;
924
925		indirect = &buf->buf_type_spec.indirect;
926		len = indirect->len;
927	} else {
928		struct camdd_buf_data *data;
929
930		data = &buf->buf_type_spec.data;
931		len = data->fill_len;
932	}
933
934	return (len);
935}
936
937void
938camdd_buf_add_child(struct camdd_buf *buf, struct camdd_buf *child_buf)
939{
940	struct camdd_buf_data *data;
941
942	assert(buf->buf_type == CAMDD_BUF_DATA);
943
944	data = &buf->buf_type_spec.data;
945
946	STAILQ_INSERT_TAIL(&buf->src_list, child_buf, src_links);
947	buf->src_count++;
948
949	data->fill_len += camdd_buf_get_len(child_buf);
950}
951
952typedef enum {
953	CAMDD_TS_MAX_BLK,
954	CAMDD_TS_MIN_BLK,
955	CAMDD_TS_BLK_GRAN,
956	CAMDD_TS_EFF_IOSIZE
957} camdd_status_item_index;
958
959static struct camdd_status_items {
960	const char *name;
961	struct mt_status_entry *entry;
962} req_status_items[] = {
963	{ "max_blk", NULL },
964	{ "min_blk", NULL },
965	{ "blk_gran", NULL },
966	{ "max_effective_iosize", NULL }
967};
968
969int
970camdd_probe_tape(int fd, char *filename, uint64_t *max_iosize,
971		 uint64_t *max_blk, uint64_t *min_blk, uint64_t *blk_gran)
972{
973	struct mt_status_data status_data;
974	char *xml_str = NULL;
975	unsigned int i;
976	int retval = 0;
977
978	retval = mt_get_xml_str(fd, MTIOCEXTGET, &xml_str);
979	if (retval != 0)
980		err(1, "Couldn't get XML string from %s", filename);
981
982	retval = mt_get_status(xml_str, &status_data);
983	if (retval != XML_STATUS_OK) {
984		warn("couldn't get status for %s", filename);
985		retval = 1;
986		goto bailout;
987	} else
988		retval = 0;
989
990	if (status_data.error != 0) {
991		warnx("%s", status_data.error_str);
992		retval = 1;
993		goto bailout;
994	}
995
996	for (i = 0; i < nitems(req_status_items); i++) {
997                char *name;
998
999		name = __DECONST(char *, req_status_items[i].name);
1000		req_status_items[i].entry = mt_status_entry_find(&status_data,
1001		    name);
1002		if (req_status_items[i].entry == NULL) {
1003			errx(1, "Cannot find status entry %s",
1004			    req_status_items[i].name);
1005		}
1006	}
1007
1008	*max_iosize = req_status_items[CAMDD_TS_EFF_IOSIZE].entry->value_unsigned;
1009	*max_blk= req_status_items[CAMDD_TS_MAX_BLK].entry->value_unsigned;
1010	*min_blk= req_status_items[CAMDD_TS_MIN_BLK].entry->value_unsigned;
1011	*blk_gran = req_status_items[CAMDD_TS_BLK_GRAN].entry->value_unsigned;
1012bailout:
1013
1014	free(xml_str);
1015	mt_status_free(&status_data);
1016
1017	return (retval);
1018}
1019
1020struct camdd_dev *
1021camdd_probe_file(int fd, struct camdd_io_opts *io_opts, int retry_count,
1022    int timeout)
1023{
1024	struct camdd_dev *dev = NULL;
1025	struct camdd_dev_file *file_dev;
1026	uint64_t blocksize = io_opts->blocksize;
1027
1028	dev = camdd_alloc_dev(CAMDD_DEV_FILE, NULL, 0, retry_count, timeout);
1029	if (dev == NULL)
1030		goto bailout;
1031
1032	file_dev = &dev->dev_spec.file;
1033	file_dev->fd = fd;
1034	strlcpy(file_dev->filename, io_opts->dev_name,
1035	    sizeof(file_dev->filename));
1036	strlcpy(dev->device_name, io_opts->dev_name, sizeof(dev->device_name));
1037	if (blocksize == 0)
1038		dev->blocksize = CAMDD_FILE_DEFAULT_BLOCK;
1039	else
1040		dev->blocksize = blocksize;
1041
1042	if ((io_opts->queue_depth != 0)
1043	 && (io_opts->queue_depth != 1)) {
1044		warnx("Queue depth %ju for %s ignored, only 1 outstanding "
1045		    "command supported", (uintmax_t)io_opts->queue_depth,
1046		    io_opts->dev_name);
1047	}
1048	dev->target_queue_depth = CAMDD_FILE_DEFAULT_DEPTH;
1049	dev->run = camdd_file_run;
1050	dev->fetch = NULL;
1051
1052	/*
1053	 * We can effectively access files on byte boundaries.  We'll reset
1054	 * this for devices like disks that can be accessed on sector
1055	 * boundaries.
1056	 */
1057	dev->sector_size = 1;
1058
1059	if ((fd != STDIN_FILENO)
1060	 && (fd != STDOUT_FILENO)) {
1061		int retval;
1062
1063		retval = fstat(fd, &file_dev->sb);
1064		if (retval != 0) {
1065			warn("Cannot stat %s", dev->device_name);
1066			goto bailout_error;
1067		}
1068		if (S_ISREG(file_dev->sb.st_mode)) {
1069			file_dev->file_type = CAMDD_FILE_REG;
1070		} else if (S_ISCHR(file_dev->sb.st_mode)) {
1071			int type;
1072
1073			if (ioctl(fd, FIODTYPE, &type) == -1)
1074				err(1, "FIODTYPE ioctl failed on %s",
1075				    dev->device_name);
1076			else {
1077				if (type & D_TAPE)
1078					file_dev->file_type = CAMDD_FILE_TAPE;
1079				else if (type & D_DISK)
1080					file_dev->file_type = CAMDD_FILE_DISK;
1081				else if (type & D_MEM)
1082					file_dev->file_type = CAMDD_FILE_MEM;
1083				else if (type & D_TTY)
1084					file_dev->file_type = CAMDD_FILE_TTY;
1085			}
1086		} else if (S_ISDIR(file_dev->sb.st_mode)) {
1087			errx(1, "cannot operate on directory %s",
1088			    dev->device_name);
1089		} else if (S_ISFIFO(file_dev->sb.st_mode)) {
1090			file_dev->file_type = CAMDD_FILE_PIPE;
1091		} else
1092			errx(1, "Cannot determine file type for %s",
1093			    dev->device_name);
1094
1095		switch (file_dev->file_type) {
1096		case CAMDD_FILE_REG:
1097			if (file_dev->sb.st_size != 0)
1098				dev->max_sector = file_dev->sb.st_size - 1;
1099			else
1100				dev->max_sector = 0;
1101			file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
1102			break;
1103		case CAMDD_FILE_TAPE: {
1104			uint64_t max_iosize, max_blk, min_blk, blk_gran;
1105			/*
1106			 * Check block limits and maximum effective iosize.
1107			 * Make sure the blocksize is within the block
1108			 * limits (and a multiple of the minimum blocksize)
1109			 * and that the blocksize is <= maximum effective
1110			 * iosize.
1111			 */
1112			retval = camdd_probe_tape(fd, dev->device_name,
1113			    &max_iosize, &max_blk, &min_blk, &blk_gran);
1114			if (retval != 0)
1115				errx(1, "Unable to probe tape %s",
1116				    dev->device_name);
1117
1118			/*
1119			 * The blocksize needs to be <= the maximum
1120			 * effective I/O size of the tape device.  Note
1121			 * that this also takes into account the maximum
1122			 * blocksize reported by READ BLOCK LIMITS.
1123			 */
1124			if (dev->blocksize > max_iosize) {
1125				warnx("Blocksize %u too big for %s, limiting "
1126				    "to %ju", dev->blocksize, dev->device_name,
1127				    max_iosize);
1128				dev->blocksize = max_iosize;
1129			}
1130
1131			/*
1132			 * The blocksize needs to be at least min_blk;
1133			 */
1134			if (dev->blocksize < min_blk) {
1135				warnx("Blocksize %u too small for %s, "
1136				    "increasing to %ju", dev->blocksize,
1137				    dev->device_name, min_blk);
1138				dev->blocksize = min_blk;
1139			}
1140
1141			/*
1142			 * And the blocksize needs to be a multiple of
1143			 * the block granularity.
1144			 */
1145			if ((blk_gran != 0)
1146			 && (dev->blocksize % (1 << blk_gran))) {
1147				warnx("Blocksize %u for %s not a multiple of "
1148				    "%d, adjusting to %d", dev->blocksize,
1149				    dev->device_name, (1 << blk_gran),
1150				    dev->blocksize & ~((1 << blk_gran) - 1));
1151				dev->blocksize &= ~((1 << blk_gran) - 1);
1152			}
1153
1154			if (dev->blocksize == 0) {
1155				errx(1, "Unable to derive valid blocksize for "
1156				    "%s", dev->device_name);
1157			}
1158
1159			/*
1160			 * For tape drives, set the sector size to the
1161			 * blocksize so that we make sure not to write
1162			 * less than the blocksize out to the drive.
1163			 */
1164			dev->sector_size = dev->blocksize;
1165			break;
1166		}
1167		case CAMDD_FILE_DISK: {
1168			off_t media_size;
1169			unsigned int sector_size;
1170
1171			file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
1172
1173			if (ioctl(fd, DIOCGSECTORSIZE, &sector_size) == -1) {
1174				err(1, "DIOCGSECTORSIZE ioctl failed on %s",
1175				    dev->device_name);
1176			}
1177
1178			if (sector_size == 0) {
1179				errx(1, "DIOCGSECTORSIZE ioctl returned "
1180				    "invalid sector size %u for %s",
1181				    sector_size, dev->device_name);
1182			}
1183
1184			if (ioctl(fd, DIOCGMEDIASIZE, &media_size) == -1) {
1185				err(1, "DIOCGMEDIASIZE ioctl failed on %s",
1186				    dev->device_name);
1187			}
1188
1189			if (media_size == 0) {
1190				errx(1, "DIOCGMEDIASIZE ioctl returned "
1191				    "invalid media size %ju for %s",
1192				    (uintmax_t)media_size, dev->device_name);
1193			}
1194
1195			if (dev->blocksize % sector_size) {
1196				errx(1, "%s blocksize %u not a multiple of "
1197				    "sector size %u", dev->device_name,
1198				    dev->blocksize, sector_size);
1199			}
1200
1201			dev->sector_size = sector_size;
1202			dev->max_sector = (media_size / sector_size) - 1;
1203			break;
1204		}
1205		case CAMDD_FILE_MEM:
1206			file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
1207			break;
1208		default:
1209			break;
1210		}
1211	}
1212
1213	if ((io_opts->offset != 0)
1214	 && ((file_dev->file_flags & CAMDD_FF_CAN_SEEK) == 0)) {
1215		warnx("Offset %ju specified for %s, but we cannot seek on %s",
1216		    io_opts->offset, io_opts->dev_name, io_opts->dev_name);
1217		goto bailout_error;
1218	}
1219#if 0
1220	else if ((io_opts->offset != 0)
1221		&& ((io_opts->offset % dev->sector_size) != 0)) {
1222		warnx("Offset %ju for %s is not a multiple of the "
1223		      "sector size %u", io_opts->offset,
1224		      io_opts->dev_name, dev->sector_size);
1225		goto bailout_error;
1226	} else {
1227		dev->start_offset_bytes = io_opts->offset;
1228	}
1229#endif
1230
1231bailout:
1232	return (dev);
1233
1234bailout_error:
1235	camdd_free_dev(dev);
1236	return (NULL);
1237}
1238
1239/*
1240 * Get a get device CCB for the specified device.
1241 */
1242int
1243camdd_get_cgd(struct cam_device *device, struct ccb_getdev *cgd)
1244{
1245        union ccb *ccb;
1246	int retval = 0;
1247
1248	ccb = cam_getccb(device);
1249
1250	if (ccb == NULL) {
1251		warnx("%s: couldn't allocate CCB", __func__);
1252		return -1;
1253	}
1254
1255	CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->cgd);
1256
1257	ccb->ccb_h.func_code = XPT_GDEV_TYPE;
1258
1259	if (cam_send_ccb(device, ccb) < 0) {
1260		warn("%s: error sending Get Device Information CCB", __func__);
1261			cam_error_print(device, ccb, CAM_ESF_ALL,
1262					CAM_EPF_ALL, stderr);
1263		retval = -1;
1264		goto bailout;
1265	}
1266
1267	if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
1268			cam_error_print(device, ccb, CAM_ESF_ALL,
1269					CAM_EPF_ALL, stderr);
1270		retval = -1;
1271		goto bailout;
1272	}
1273
1274	bcopy(&ccb->cgd, cgd, sizeof(struct ccb_getdev));
1275
1276bailout:
1277	cam_freeccb(ccb);
1278
1279	return retval;
1280}
1281
1282int
1283camdd_probe_pass_scsi(struct cam_device *cam_dev, union ccb *ccb,
1284		 camdd_argmask arglist, int probe_retry_count,
1285		 int probe_timeout, uint64_t *maxsector, uint32_t *block_len)
1286{
1287	struct scsi_read_capacity_data rcap;
1288	struct scsi_read_capacity_data_long rcaplong;
1289	int retval = -1;
1290
1291	if (ccb == NULL) {
1292		warnx("%s: error passed ccb is NULL", __func__);
1293		goto bailout;
1294	}
1295
1296	CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->csio);
1297
1298	scsi_read_capacity(&ccb->csio,
1299			   /*retries*/ probe_retry_count,
1300			   /*cbfcnp*/ NULL,
1301			   /*tag_action*/ MSG_SIMPLE_Q_TAG,
1302			   &rcap,
1303			   SSD_FULL_SIZE,
1304			   /*timeout*/ probe_timeout ? probe_timeout : 5000);
1305
1306	/* Disable freezing the device queue */
1307	ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
1308
1309	if (arglist & CAMDD_ARG_ERR_RECOVER)
1310		ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
1311
1312	if (cam_send_ccb(cam_dev, ccb) < 0) {
1313		warn("error sending READ CAPACITY command");
1314
1315		cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
1316				CAM_EPF_ALL, stderr);
1317
1318		goto bailout;
1319	}
1320
1321	if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
1322		cam_error_print(cam_dev, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr);
1323		goto bailout;
1324	}
1325
1326	*maxsector = scsi_4btoul(rcap.addr);
1327	*block_len = scsi_4btoul(rcap.length);
1328
1329	/*
1330	 * A last block of 2^32-1 means that the true capacity is over 2TB,
1331	 * and we need to issue the long READ CAPACITY to get the real
1332	 * capacity.  Otherwise, we're all set.
1333	 */
1334	if (*maxsector != 0xffffffff) {
1335		retval = 0;
1336		goto bailout;
1337	}
1338
1339	scsi_read_capacity_16(&ccb->csio,
1340			      /*retries*/ probe_retry_count,
1341			      /*cbfcnp*/ NULL,
1342			      /*tag_action*/ MSG_SIMPLE_Q_TAG,
1343			      /*lba*/ 0,
1344			      /*reladdr*/ 0,
1345			      /*pmi*/ 0,
1346			      (uint8_t *)&rcaplong,
1347			      sizeof(rcaplong),
1348			      /*sense_len*/ SSD_FULL_SIZE,
1349			      /*timeout*/ probe_timeout ? probe_timeout : 5000);
1350
1351	/* Disable freezing the device queue */
1352	ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
1353
1354	if (arglist & CAMDD_ARG_ERR_RECOVER)
1355		ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
1356
1357	if (cam_send_ccb(cam_dev, ccb) < 0) {
1358		warn("error sending READ CAPACITY (16) command");
1359		cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
1360				CAM_EPF_ALL, stderr);
1361		goto bailout;
1362	}
1363
1364	if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
1365		cam_error_print(cam_dev, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr);
1366		goto bailout;
1367	}
1368
1369	*maxsector = scsi_8btou64(rcaplong.addr);
1370	*block_len = scsi_4btoul(rcaplong.length);
1371
1372	retval = 0;
1373
1374bailout:
1375	return retval;
1376}
1377
1378int
1379camdd_probe_pass_nvme(struct cam_device *cam_dev, union ccb *ccb,
1380		 camdd_argmask arglist, int probe_retry_count,
1381		 int probe_timeout, uint64_t *maxsector, uint32_t *block_len)
1382{
1383	struct nvme_command *nc = NULL;
1384	struct nvme_namespace_data nsdata;
1385	uint32_t nsid = cam_dev->target_lun & UINT32_MAX;
1386	uint8_t format = 0, lbads = 0;
1387	int retval = -1;
1388
1389	if (ccb == NULL) {
1390		warnx("%s: error passed ccb is NULL", __func__);
1391		goto bailout;
1392	}
1393
1394	CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->nvmeio);
1395
1396	/* Send Identify Namespace to get block size and capacity */
1397	nc = &ccb->nvmeio.cmd;
1398	nc->opc = NVME_OPC_IDENTIFY;
1399
1400	nc->nsid = nsid;
1401	nc->cdw10 = 0; /* Identify Namespace is CNS = 0 */
1402
1403	cam_fill_nvmeadmin(&ccb->nvmeio,
1404			/*retries*/ probe_retry_count,
1405			/*cbfcnp*/ NULL,
1406			CAM_DIR_IN,
1407			(uint8_t *)&nsdata,
1408			sizeof(nsdata),
1409			probe_timeout);
1410
1411	/* Disable freezing the device queue */
1412	ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
1413
1414	if (arglist & CAMDD_ARG_ERR_RECOVER)
1415		ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
1416
1417	if (cam_send_ccb(cam_dev, ccb) < 0) {
1418		warn("error sending Identify Namespace command");
1419
1420		cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
1421				CAM_EPF_ALL, stderr);
1422
1423		goto bailout;
1424	}
1425
1426	if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
1427		cam_error_print(cam_dev, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr);
1428		goto bailout;
1429	}
1430
1431	*maxsector = nsdata.nsze;
1432	/* The LBA Data Size (LBADS) is reported as a power of 2 */
1433	format = NVMEV(NVME_NS_DATA_FLBAS_FORMAT, nsdata.flbas);
1434	lbads = NVMEV(NVME_NS_DATA_LBAF_LBADS, nsdata.lbaf[format]);
1435	*block_len = 1 << lbads;
1436
1437	retval = 0;
1438
1439bailout:
1440	return retval;
1441}
1442
1443/*
1444 * Need to implement this.  Do a basic probe:
1445 * - Check the inquiry data, make sure we're talking to a device that we
1446 *   can reasonably expect to talk to -- direct, RBC, CD, WORM.
1447 * - Send a test unit ready, make sure the device is available.
1448 * - Get the capacity and block size.
1449 */
1450struct camdd_dev *
1451camdd_probe_pass(struct cam_device *cam_dev, struct camdd_io_opts *io_opts,
1452		 camdd_argmask arglist, int probe_retry_count,
1453		 int probe_timeout, int io_retry_count, int io_timeout)
1454{
1455	union ccb *ccb;
1456	uint64_t maxsector = 0;
1457	uint32_t cpi_maxio, max_iosize, pass_numblocks;
1458	uint32_t block_len = 0;
1459	struct camdd_dev *dev = NULL;
1460	struct camdd_dev_pass *pass_dev;
1461	struct kevent ke;
1462	struct ccb_getdev cgd;
1463	int retval;
1464	int scsi_dev_type = T_NODEVICE;
1465
1466	if ((retval = camdd_get_cgd(cam_dev, &cgd)) != 0) {
1467		warnx("%s: error retrieving CGD", __func__);
1468		return NULL;
1469	}
1470
1471	ccb = cam_getccb(cam_dev);
1472
1473	if (ccb == NULL) {
1474		warnx("%s: error allocating ccb", __func__);
1475		goto bailout;
1476	}
1477
1478	switch (cgd.protocol) {
1479	case PROTO_SCSI:
1480		scsi_dev_type = SID_TYPE(&cam_dev->inq_data);
1481
1482		/*
1483		 * For devices that support READ CAPACITY, we'll attempt to get the
1484		 * capacity.  Otherwise, we really don't support tape or other
1485		 * devices via SCSI passthrough, so just return an error in that case.
1486		 */
1487		switch (scsi_dev_type) {
1488		case T_DIRECT:
1489		case T_WORM:
1490		case T_CDROM:
1491		case T_OPTICAL:
1492		case T_RBC:
1493		case T_ZBC_HM:
1494			break;
1495		default:
1496			errx(1, "Unsupported SCSI device type %d", scsi_dev_type);
1497			break; /*NOTREACHED*/
1498		}
1499
1500		if ((retval = camdd_probe_pass_scsi(cam_dev, ccb, probe_retry_count,
1501						arglist, probe_timeout, &maxsector,
1502						&block_len))) {
1503			goto bailout;
1504		}
1505		break;
1506	case PROTO_NVME:
1507		if ((retval = camdd_probe_pass_nvme(cam_dev, ccb, probe_retry_count,
1508						arglist, probe_timeout, &maxsector,
1509						&block_len))) {
1510			goto bailout;
1511		}
1512		break;
1513	default:
1514		errx(1, "Unsupported PROTO type %d", cgd.protocol);
1515		break; /*NOTREACHED*/
1516	}
1517
1518	if (block_len == 0) {
1519		warnx("Sector size for %s%u is 0, cannot continue",
1520		    cam_dev->device_name, cam_dev->dev_unit_num);
1521		goto bailout_error;
1522	}
1523
1524	CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->cpi);
1525
1526	ccb->ccb_h.func_code = XPT_PATH_INQ;
1527	ccb->ccb_h.flags = CAM_DIR_NONE;
1528	ccb->ccb_h.retry_count = 1;
1529
1530	if (cam_send_ccb(cam_dev, ccb) < 0) {
1531		warn("error sending XPT_PATH_INQ CCB");
1532
1533		cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
1534				CAM_EPF_ALL, stderr);
1535		goto bailout;
1536	}
1537
1538	EV_SET(&ke, cam_dev->fd, EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
1539
1540	dev = camdd_alloc_dev(CAMDD_DEV_PASS, &ke, 1, io_retry_count,
1541			      io_timeout);
1542	if (dev == NULL)
1543		goto bailout;
1544
1545	pass_dev = &dev->dev_spec.pass;
1546	pass_dev->scsi_dev_type = scsi_dev_type;
1547	pass_dev->protocol = cgd.protocol;
1548	pass_dev->dev = cam_dev;
1549	pass_dev->max_sector = maxsector;
1550	pass_dev->block_len = block_len;
1551	pass_dev->cpi_maxio = ccb->cpi.maxio;
1552	snprintf(dev->device_name, sizeof(dev->device_name), "%s%u",
1553		 pass_dev->dev->device_name, pass_dev->dev->dev_unit_num);
1554	dev->sector_size = block_len;
1555	dev->max_sector = maxsector;
1556
1557
1558	/*
1559	 * Determine the optimal blocksize to use for this device.
1560	 */
1561
1562	/*
1563	 * If the controller has not specified a maximum I/O size,
1564	 * just go with 128K as a somewhat conservative value.
1565	 */
1566	if (pass_dev->cpi_maxio == 0)
1567		cpi_maxio = 131072;
1568	else
1569		cpi_maxio = pass_dev->cpi_maxio;
1570
1571	/*
1572	 * If the controller has a large maximum I/O size, limit it
1573	 * to something smaller so that the kernel doesn't have trouble
1574	 * allocating buffers to copy data in and out for us.
1575	 * XXX KDM this is until we have unmapped I/O support in the kernel.
1576	 */
1577	max_iosize = min(cpi_maxio, CAMDD_PASS_MAX_BLOCK);
1578
1579	/*
1580	 * If we weren't able to get a block size for some reason,
1581	 * default to 512 bytes.
1582	 */
1583	block_len = pass_dev->block_len;
1584	if (block_len == 0)
1585		block_len = 512;
1586
1587	/*
1588	 * Figure out how many blocksize chunks will fit in the
1589	 * maximum I/O size.
1590	 */
1591	pass_numblocks = max_iosize / block_len;
1592
1593	/*
1594	 * And finally, multiple the number of blocks by the LBA
1595	 * length to get our maximum block size;
1596	 */
1597	dev->blocksize = pass_numblocks * block_len;
1598
1599	if (io_opts->blocksize != 0) {
1600		if ((io_opts->blocksize % dev->sector_size) != 0) {
1601			warnx("Blocksize %ju for %s is not a multiple of "
1602			      "sector size %u", (uintmax_t)io_opts->blocksize,
1603			      dev->device_name, dev->sector_size);
1604			goto bailout_error;
1605		}
1606		dev->blocksize = io_opts->blocksize;
1607	}
1608	dev->target_queue_depth = CAMDD_PASS_DEFAULT_DEPTH;
1609	if (io_opts->queue_depth != 0)
1610		dev->target_queue_depth = io_opts->queue_depth;
1611
1612	if (io_opts->offset != 0) {
1613		if (io_opts->offset > (dev->max_sector * dev->sector_size)) {
1614			warnx("Offset %ju is past the end of device %s",
1615			    io_opts->offset, dev->device_name);
1616			goto bailout_error;
1617		}
1618#if 0
1619		else if ((io_opts->offset % dev->sector_size) != 0) {
1620			warnx("Offset %ju for %s is not a multiple of the "
1621			      "sector size %u", io_opts->offset,
1622			      dev->device_name, dev->sector_size);
1623			goto bailout_error;
1624		}
1625		dev->start_offset_bytes = io_opts->offset;
1626#endif
1627	}
1628
1629	dev->min_cmd_size = io_opts->min_cmd_size;
1630
1631	dev->run = camdd_pass_run;
1632	dev->fetch = camdd_pass_fetch;
1633
1634bailout:
1635	cam_freeccb(ccb);
1636
1637	return (dev);
1638
1639bailout_error:
1640	cam_freeccb(ccb);
1641
1642	camdd_free_dev(dev);
1643
1644	return (NULL);
1645}
1646
1647void
1648nvme_read_write(struct ccb_nvmeio *nvmeio, uint32_t retries,
1649		void (*cbfcnp)(struct cam_periph *, union ccb *),
1650		uint32_t nsid, int readop, uint64_t lba,
1651		uint32_t block_count, uint8_t *data_ptr, uint32_t dxfer_len,
1652		uint32_t timeout)
1653{
1654	struct nvme_command *nc = &nvmeio->cmd;
1655
1656	nc->opc = readop ? NVME_OPC_READ : NVME_OPC_WRITE;
1657
1658	nc->nsid = nsid;
1659
1660	nc->cdw10 = lba & UINT32_MAX;
1661	nc->cdw11 = lba >> 32;
1662
1663	/* NLB (bits 15:0) is a zero based value */
1664	nc->cdw12 = (block_count - 1) & UINT16_MAX;
1665
1666	cam_fill_nvmeio(nvmeio,
1667			retries,
1668			cbfcnp,
1669			readop ? CAM_DIR_IN : CAM_DIR_OUT,
1670			data_ptr,
1671			dxfer_len,
1672			timeout);
1673}
1674
1675void *
1676camdd_worker(void *arg)
1677{
1678	struct camdd_dev *dev = arg;
1679	struct camdd_buf *buf;
1680	struct timespec ts, *kq_ts;
1681
1682	ts.tv_sec = 0;
1683	ts.tv_nsec = 0;
1684
1685	pthread_mutex_lock(&dev->mutex);
1686
1687	dev->flags |= CAMDD_DEV_FLAG_ACTIVE;
1688
1689	for (;;) {
1690		struct kevent ke;
1691		int retval = 0;
1692
1693		/*
1694		 * XXX KDM check the reorder queue depth?
1695		 */
1696		if (dev->write_dev == 0) {
1697			uint32_t our_depth, peer_depth, peer_bytes, our_bytes;
1698			uint32_t target_depth = dev->target_queue_depth;
1699			uint32_t peer_target_depth =
1700			    dev->peer_dev->target_queue_depth;
1701			uint32_t peer_blocksize = dev->peer_dev->blocksize;
1702
1703			camdd_get_depth(dev, &our_depth, &peer_depth,
1704					&our_bytes, &peer_bytes);
1705
1706#if 0
1707			while (((our_depth < target_depth)
1708			     && (peer_depth < peer_target_depth))
1709			    || ((peer_bytes + our_bytes) <
1710				 (peer_blocksize * 2))) {
1711#endif
1712			while (((our_depth + peer_depth) <
1713			        (target_depth + peer_target_depth))
1714			    || ((peer_bytes + our_bytes) <
1715				(peer_blocksize * 3))) {
1716
1717				retval = camdd_queue(dev, NULL);
1718				if (retval == 1)
1719					break;
1720				else if (retval != 0) {
1721					error_exit = 1;
1722					goto bailout;
1723				}
1724
1725				camdd_get_depth(dev, &our_depth, &peer_depth,
1726						&our_bytes, &peer_bytes);
1727			}
1728		}
1729		/*
1730		 * See if we have any I/O that is ready to execute.
1731		 */
1732		buf = STAILQ_FIRST(&dev->run_queue);
1733		if (buf != NULL) {
1734			while (dev->target_queue_depth > dev->cur_active_io) {
1735				retval = dev->run(dev);
1736				if (retval == -1) {
1737					dev->flags |= CAMDD_DEV_FLAG_EOF;
1738					error_exit = 1;
1739					break;
1740				} else if (retval != 0) {
1741					break;
1742				}
1743			}
1744		}
1745
1746		/*
1747		 * We've reached EOF, or our partner has reached EOF.
1748		 */
1749		if ((dev->flags & CAMDD_DEV_FLAG_EOF)
1750		 || (dev->flags & CAMDD_DEV_FLAG_PEER_EOF)) {
1751			if (dev->write_dev != 0) {
1752			 	if ((STAILQ_EMPTY(&dev->work_queue))
1753				 && (dev->num_run_queue == 0)
1754				 && (dev->cur_active_io == 0)) {
1755					goto bailout;
1756				}
1757			} else {
1758				/*
1759				 * If we're the reader, and the writer
1760				 * got EOF, he is already done.  If we got
1761				 * the EOF, then we need to wait until
1762				 * everything is flushed out for the writer.
1763				 */
1764				if (dev->flags & CAMDD_DEV_FLAG_PEER_EOF) {
1765					goto bailout;
1766				} else if ((dev->num_peer_work_queue == 0)
1767					&& (dev->num_peer_done_queue == 0)
1768					&& (dev->cur_active_io == 0)
1769					&& (dev->num_run_queue == 0)) {
1770					goto bailout;
1771				}
1772			}
1773			/*
1774			 * XXX KDM need to do something about the pending
1775			 * queue and cleanup resources.
1776			 */
1777		}
1778
1779		if ((dev->write_dev == 0)
1780		 && (dev->cur_active_io == 0)
1781		 && (dev->peer_bytes_queued < dev->peer_dev->blocksize))
1782			kq_ts = &ts;
1783		else
1784			kq_ts = NULL;
1785
1786		/*
1787		 * Run kevent to see if there are events to process.
1788		 */
1789		pthread_mutex_unlock(&dev->mutex);
1790		retval = kevent(dev->kq, NULL, 0, &ke, 1, kq_ts);
1791		pthread_mutex_lock(&dev->mutex);
1792		if (retval == -1) {
1793			warn("%s: error returned from kevent",__func__);
1794			goto bailout;
1795		} else if (retval != 0) {
1796			switch (ke.filter) {
1797			case EVFILT_READ:
1798				if (dev->fetch != NULL) {
1799					retval = dev->fetch(dev);
1800					if (retval == -1) {
1801						error_exit = 1;
1802						goto bailout;
1803					}
1804				}
1805				break;
1806			case EVFILT_SIGNAL:
1807				/*
1808				 * We register for this so we don't get
1809				 * an error as a result of a SIGINFO or a
1810				 * SIGINT.  It will actually get handled
1811				 * by the signal handler.  If we get a
1812				 * SIGINT, bail out without printing an
1813				 * error message.  Any other signals
1814				 * will result in the error message above.
1815				 */
1816				if (ke.ident == SIGINT)
1817					goto bailout;
1818				break;
1819			case EVFILT_USER:
1820				retval = 0;
1821				/*
1822				 * Check to see if the other thread has
1823				 * queued any I/O for us to do.  (In this
1824				 * case we're the writer.)
1825				 */
1826				for (buf = STAILQ_FIRST(&dev->work_queue);
1827				     buf != NULL;
1828				     buf = STAILQ_FIRST(&dev->work_queue)) {
1829					STAILQ_REMOVE_HEAD(&dev->work_queue,
1830							   work_links);
1831					retval = camdd_queue(dev, buf);
1832					/*
1833					 * We keep going unless we get an
1834					 * actual error.  If we get EOF, we
1835					 * still want to remove the buffers
1836					 * from the queue and send the back
1837					 * to the reader thread.
1838					 */
1839					if (retval == -1) {
1840						error_exit = 1;
1841						goto bailout;
1842					} else
1843						retval = 0;
1844				}
1845
1846				/*
1847				 * Next check to see if the other thread has
1848				 * queued any completed buffers back to us.
1849				 * (In this case we're the reader.)
1850				 */
1851				for (buf = STAILQ_FIRST(&dev->peer_done_queue);
1852				     buf != NULL;
1853				     buf = STAILQ_FIRST(&dev->peer_done_queue)){
1854					STAILQ_REMOVE_HEAD(
1855					    &dev->peer_done_queue, work_links);
1856					dev->num_peer_done_queue--;
1857					camdd_peer_done(buf);
1858				}
1859				break;
1860			default:
1861				warnx("%s: unknown kevent filter %d",
1862				      __func__, ke.filter);
1863				break;
1864			}
1865		}
1866	}
1867
1868bailout:
1869
1870	dev->flags &= ~CAMDD_DEV_FLAG_ACTIVE;
1871
1872	/* XXX KDM cleanup resources here? */
1873
1874	pthread_mutex_unlock(&dev->mutex);
1875
1876	need_exit = 1;
1877	sem_post(&camdd_sem);
1878
1879	return (NULL);
1880}
1881
1882/*
1883 * Simplistic translation of CCB status to our local status.
1884 */
1885camdd_buf_status
1886camdd_ccb_status(union ccb *ccb, int protocol)
1887{
1888	camdd_buf_status status = CAMDD_STATUS_NONE;
1889	cam_status ccb_status;
1890
1891	ccb_status = ccb->ccb_h.status & CAM_STATUS_MASK;
1892
1893	switch (protocol) {
1894	case PROTO_SCSI:
1895		switch (ccb_status) {
1896		case CAM_REQ_CMP: {
1897			if (ccb->csio.resid == 0) {
1898				status = CAMDD_STATUS_OK;
1899			} else if (ccb->csio.dxfer_len > ccb->csio.resid) {
1900				status = CAMDD_STATUS_SHORT_IO;
1901			} else {
1902				status = CAMDD_STATUS_EOF;
1903			}
1904			break;
1905		}
1906		case CAM_SCSI_STATUS_ERROR: {
1907			switch (ccb->csio.scsi_status) {
1908			case SCSI_STATUS_OK:
1909			case SCSI_STATUS_COND_MET:
1910			case SCSI_STATUS_INTERMED:
1911			case SCSI_STATUS_INTERMED_COND_MET:
1912				status = CAMDD_STATUS_OK;
1913				break;
1914			case SCSI_STATUS_CMD_TERMINATED:
1915			case SCSI_STATUS_CHECK_COND:
1916			case SCSI_STATUS_QUEUE_FULL:
1917			case SCSI_STATUS_BUSY:
1918			case SCSI_STATUS_RESERV_CONFLICT:
1919			default:
1920				status = CAMDD_STATUS_ERROR;
1921				break;
1922			}
1923			break;
1924		}
1925		default:
1926			status = CAMDD_STATUS_ERROR;
1927			break;
1928		}
1929		break;
1930	case PROTO_NVME:
1931		switch (ccb_status) {
1932		case CAM_REQ_CMP:
1933			status = CAMDD_STATUS_OK;
1934			break;
1935		default:
1936			status = CAMDD_STATUS_ERROR;
1937			break;
1938		}
1939		break;
1940	default:
1941		status = CAMDD_STATUS_ERROR;
1942		break;
1943	}
1944
1945	return (status);
1946}
1947
1948/*
1949 * Queue a buffer to our peer's work thread for writing.
1950 *
1951 * Returns 0 for success, -1 for failure, 1 if the other thread exited.
1952 */
1953int
1954camdd_queue_peer_buf(struct camdd_dev *dev, struct camdd_buf *buf)
1955{
1956	struct kevent ke;
1957	STAILQ_HEAD(, camdd_buf) local_queue;
1958	struct camdd_buf *buf1, *buf2;
1959	struct camdd_buf_data *data = NULL;
1960	uint64_t peer_bytes_queued = 0;
1961	int active = 1;
1962	int retval = 0;
1963
1964	STAILQ_INIT(&local_queue);
1965
1966	/*
1967	 * Since we're the reader, we need to queue our I/O to the writer
1968	 * in sequential order in order to make sure it gets written out
1969	 * in sequential order.
1970	 *
1971	 * Check the next expected I/O starting offset.  If this doesn't
1972	 * match, put it on the reorder queue.
1973	 */
1974	if ((buf->lba * dev->sector_size) != dev->next_completion_pos_bytes) {
1975
1976		/*
1977		 * If there is nothing on the queue, there is no sorting
1978		 * needed.
1979		 */
1980		if (STAILQ_EMPTY(&dev->reorder_queue)) {
1981			STAILQ_INSERT_TAIL(&dev->reorder_queue, buf, links);
1982			dev->num_reorder_queue++;
1983			goto bailout;
1984		}
1985
1986		/*
1987		 * Sort in ascending order by starting LBA.  There should
1988		 * be no identical LBAs.
1989		 */
1990		for (buf1 = STAILQ_FIRST(&dev->reorder_queue); buf1 != NULL;
1991		     buf1 = buf2) {
1992			buf2 = STAILQ_NEXT(buf1, links);
1993			if (buf->lba < buf1->lba) {
1994				/*
1995				 * If we're less than the first one, then
1996				 * we insert at the head of the list
1997				 * because this has to be the first element
1998				 * on the list.
1999				 */
2000				STAILQ_INSERT_HEAD(&dev->reorder_queue,
2001						   buf, links);
2002				dev->num_reorder_queue++;
2003				break;
2004			} else if (buf->lba > buf1->lba) {
2005				if (buf2 == NULL) {
2006					STAILQ_INSERT_TAIL(&dev->reorder_queue,
2007					    buf, links);
2008					dev->num_reorder_queue++;
2009					break;
2010				} else if (buf->lba < buf2->lba) {
2011					STAILQ_INSERT_AFTER(&dev->reorder_queue,
2012					    buf1, buf, links);
2013					dev->num_reorder_queue++;
2014					break;
2015				}
2016			} else {
2017				errx(1, "Found buffers with duplicate LBA %ju!",
2018				     buf->lba);
2019			}
2020		}
2021		goto bailout;
2022	} else {
2023
2024		/*
2025		 * We're the next expected I/O completion, so put ourselves
2026		 * on the local queue to be sent to the writer.  We use
2027		 * work_links here so that we can queue this to the
2028		 * peer_work_queue before taking the buffer off of the
2029		 * local_queue.
2030		 */
2031		dev->next_completion_pos_bytes += buf->len;
2032		STAILQ_INSERT_TAIL(&local_queue, buf, work_links);
2033
2034		/*
2035		 * Go through the reorder queue looking for more sequential
2036		 * I/O and add it to the local queue.
2037		 */
2038		for (buf1 = STAILQ_FIRST(&dev->reorder_queue); buf1 != NULL;
2039		     buf1 = STAILQ_FIRST(&dev->reorder_queue)) {
2040			/*
2041			 * As soon as we see an I/O that is out of sequence,
2042			 * we're done.
2043			 */
2044			if ((buf1->lba * dev->sector_size) !=
2045			     dev->next_completion_pos_bytes)
2046				break;
2047
2048			STAILQ_REMOVE_HEAD(&dev->reorder_queue, links);
2049			dev->num_reorder_queue--;
2050			STAILQ_INSERT_TAIL(&local_queue, buf1, work_links);
2051			dev->next_completion_pos_bytes += buf1->len;
2052		}
2053	}
2054
2055	/*
2056	 * Setup the event to let the other thread know that it has work
2057	 * pending.
2058	 */
2059	EV_SET(&ke, (uintptr_t)&dev->peer_dev->work_queue, EVFILT_USER, 0,
2060	       NOTE_TRIGGER, 0, NULL);
2061
2062	/*
2063	 * Put this on our shadow queue so that we know what we've queued
2064	 * to the other thread.
2065	 */
2066	STAILQ_FOREACH_SAFE(buf1, &local_queue, work_links, buf2) {
2067		if (buf1->buf_type != CAMDD_BUF_DATA) {
2068			errx(1, "%s: should have a data buffer, not an "
2069			    "indirect buffer", __func__);
2070		}
2071		data = &buf1->buf_type_spec.data;
2072
2073		/*
2074		 * We only need to send one EOF to the writer, and don't
2075		 * need to continue sending EOFs after that.
2076		 */
2077		if (buf1->status == CAMDD_STATUS_EOF) {
2078			if (dev->flags & CAMDD_DEV_FLAG_EOF_SENT) {
2079				STAILQ_REMOVE(&local_queue, buf1, camdd_buf,
2080				    work_links);
2081				camdd_release_buf(buf1);
2082				retval = 1;
2083				continue;
2084			}
2085			dev->flags |= CAMDD_DEV_FLAG_EOF_SENT;
2086		}
2087
2088
2089		STAILQ_INSERT_TAIL(&dev->peer_work_queue, buf1, links);
2090		peer_bytes_queued += (data->fill_len - data->resid);
2091		dev->peer_bytes_queued += (data->fill_len - data->resid);
2092		dev->num_peer_work_queue++;
2093	}
2094
2095	if (STAILQ_FIRST(&local_queue) == NULL)
2096		goto bailout;
2097
2098	/*
2099	 * Drop our mutex and pick up the other thread's mutex.  We need to
2100	 * do this to avoid deadlocks.
2101	 */
2102	pthread_mutex_unlock(&dev->mutex);
2103	pthread_mutex_lock(&dev->peer_dev->mutex);
2104
2105	if (dev->peer_dev->flags & CAMDD_DEV_FLAG_ACTIVE) {
2106		/*
2107		 * Put the buffers on the other thread's incoming work queue.
2108		 */
2109		for (buf1 = STAILQ_FIRST(&local_queue); buf1 != NULL;
2110		     buf1 = STAILQ_FIRST(&local_queue)) {
2111			STAILQ_REMOVE_HEAD(&local_queue, work_links);
2112			STAILQ_INSERT_TAIL(&dev->peer_dev->work_queue, buf1,
2113					   work_links);
2114		}
2115		/*
2116		 * Send an event to the other thread's kqueue to let it know
2117		 * that there is something on the work queue.
2118		 */
2119		retval = kevent(dev->peer_dev->kq, &ke, 1, NULL, 0, NULL);
2120		if (retval == -1)
2121			warn("%s: unable to add peer work_queue kevent",
2122			     __func__);
2123		else
2124			retval = 0;
2125	} else
2126		active = 0;
2127
2128	pthread_mutex_unlock(&dev->peer_dev->mutex);
2129	pthread_mutex_lock(&dev->mutex);
2130
2131	/*
2132	 * If the other side isn't active, run through the queue and
2133	 * release all of the buffers.
2134	 */
2135	if (active == 0) {
2136		for (buf1 = STAILQ_FIRST(&local_queue); buf1 != NULL;
2137		     buf1 = STAILQ_FIRST(&local_queue)) {
2138			STAILQ_REMOVE_HEAD(&local_queue, work_links);
2139			STAILQ_REMOVE(&dev->peer_work_queue, buf1, camdd_buf,
2140				      links);
2141			dev->num_peer_work_queue--;
2142			camdd_release_buf(buf1);
2143		}
2144		dev->peer_bytes_queued -= peer_bytes_queued;
2145		retval = 1;
2146	}
2147
2148bailout:
2149	return (retval);
2150}
2151
2152/*
2153 * Return a buffer to the reader thread when we have completed writing it.
2154 */
2155int
2156camdd_complete_peer_buf(struct camdd_dev *dev, struct camdd_buf *peer_buf)
2157{
2158	struct kevent ke;
2159	int retval = 0;
2160
2161	/*
2162	 * Setup the event to let the other thread know that we have
2163	 * completed a buffer.
2164	 */
2165	EV_SET(&ke, (uintptr_t)&dev->peer_dev->peer_done_queue, EVFILT_USER, 0,
2166	       NOTE_TRIGGER, 0, NULL);
2167
2168	/*
2169	 * Drop our lock and acquire the other thread's lock before
2170	 * manipulating
2171	 */
2172	pthread_mutex_unlock(&dev->mutex);
2173	pthread_mutex_lock(&dev->peer_dev->mutex);
2174
2175	/*
2176	 * Put the buffer on the reader thread's peer done queue now that
2177	 * we have completed it.
2178	 */
2179	STAILQ_INSERT_TAIL(&dev->peer_dev->peer_done_queue, peer_buf,
2180			   work_links);
2181	dev->peer_dev->num_peer_done_queue++;
2182
2183	/*
2184	 * Send an event to the peer thread to let it know that we've added
2185	 * something to its peer done queue.
2186	 */
2187	retval = kevent(dev->peer_dev->kq, &ke, 1, NULL, 0, NULL);
2188	if (retval == -1)
2189		warn("%s: unable to add peer_done_queue kevent", __func__);
2190	else
2191		retval = 0;
2192
2193	/*
2194	 * Drop the other thread's lock and reacquire ours.
2195	 */
2196	pthread_mutex_unlock(&dev->peer_dev->mutex);
2197	pthread_mutex_lock(&dev->mutex);
2198
2199	return (retval);
2200}
2201
2202/*
2203 * Free a buffer that was written out by the writer thread and returned to
2204 * the reader thread.
2205 */
2206void
2207camdd_peer_done(struct camdd_buf *buf)
2208{
2209	struct camdd_dev *dev;
2210	struct camdd_buf_data *data;
2211
2212	dev = buf->dev;
2213	if (buf->buf_type != CAMDD_BUF_DATA) {
2214		errx(1, "%s: should have a data buffer, not an "
2215		    "indirect buffer", __func__);
2216	}
2217
2218	data = &buf->buf_type_spec.data;
2219
2220	STAILQ_REMOVE(&dev->peer_work_queue, buf, camdd_buf, links);
2221	dev->num_peer_work_queue--;
2222	dev->peer_bytes_queued -= (data->fill_len - data->resid);
2223
2224	if (buf->status == CAMDD_STATUS_EOF)
2225		dev->flags |= CAMDD_DEV_FLAG_PEER_EOF;
2226
2227	STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
2228}
2229
2230/*
2231 * Assumes caller holds the lock for this device.
2232 */
2233void
2234camdd_complete_buf(struct camdd_dev *dev, struct camdd_buf *buf,
2235		   int *error_count)
2236{
2237	int retval = 0;
2238
2239	/*
2240	 * If we're the reader, we need to send the completed I/O
2241	 * to the writer.  If we're the writer, we need to just
2242	 * free up resources, or let the reader know if we've
2243	 * encountered an error.
2244	 */
2245	if (dev->write_dev == 0) {
2246		retval = camdd_queue_peer_buf(dev, buf);
2247		if (retval != 0)
2248			(*error_count)++;
2249	} else {
2250		struct camdd_buf *tmp_buf, *next_buf;
2251
2252		STAILQ_FOREACH_SAFE(tmp_buf, &buf->src_list, src_links,
2253				    next_buf) {
2254			struct camdd_buf *src_buf;
2255			struct camdd_buf_indirect *indirect;
2256
2257			STAILQ_REMOVE(&buf->src_list, tmp_buf,
2258				      camdd_buf, src_links);
2259
2260			tmp_buf->status = buf->status;
2261
2262			if (tmp_buf->buf_type == CAMDD_BUF_DATA) {
2263				camdd_complete_peer_buf(dev, tmp_buf);
2264				continue;
2265			}
2266
2267			indirect = &tmp_buf->buf_type_spec.indirect;
2268			src_buf = indirect->src_buf;
2269			src_buf->refcount--;
2270			/*
2271			 * XXX KDM we probably need to account for
2272			 * exactly how many bytes we were able to
2273			 * write.  Allocate the residual to the
2274			 * first N buffers?  Or just track the
2275			 * number of bytes written?  Right now the reader
2276			 * doesn't do anything with a residual.
2277			 */
2278			src_buf->status = buf->status;
2279			if (src_buf->refcount <= 0)
2280				camdd_complete_peer_buf(dev, src_buf);
2281			STAILQ_INSERT_TAIL(&dev->free_indirect_queue,
2282					   tmp_buf, links);
2283		}
2284
2285		STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
2286	}
2287}
2288
2289/*
2290 * Fetch all completed commands from the pass(4) device.
2291 *
2292 * Returns the number of commands received, or -1 if any of the commands
2293 * completed with an error.  Returns 0 if no commands are available.
2294 */
2295int
2296camdd_pass_fetch(struct camdd_dev *dev)
2297{
2298	struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
2299	union ccb ccb;
2300	int retval = 0, num_fetched = 0, error_count = 0;
2301
2302	pthread_mutex_unlock(&dev->mutex);
2303	/*
2304	 * XXX KDM we don't distinguish between EFAULT and ENOENT.
2305	 */
2306	while ((retval = ioctl(pass_dev->dev->fd, CAMIOGET, &ccb)) != -1) {
2307		struct camdd_buf *buf;
2308		struct camdd_buf_data *data;
2309		cam_status ccb_status;
2310		union ccb *buf_ccb;
2311
2312		buf = ccb.ccb_h.ccb_buf;
2313		data = &buf->buf_type_spec.data;
2314		buf_ccb = &data->ccb;
2315
2316		num_fetched++;
2317
2318		/*
2319		 * Copy the CCB back out so we get status, sense data, etc.
2320		 */
2321		bcopy(&ccb, buf_ccb, sizeof(ccb));
2322
2323		pthread_mutex_lock(&dev->mutex);
2324
2325		/*
2326		 * We're now done, so take this off the active queue.
2327		 */
2328		STAILQ_REMOVE(&dev->active_queue, buf, camdd_buf, links);
2329		dev->cur_active_io--;
2330
2331		ccb_status = ccb.ccb_h.status & CAM_STATUS_MASK;
2332		if (ccb_status != CAM_REQ_CMP) {
2333			cam_error_print(pass_dev->dev, &ccb, CAM_ESF_ALL,
2334					CAM_EPF_ALL, stderr);
2335		}
2336
2337		switch (pass_dev->protocol) {
2338		case PROTO_SCSI:
2339			data->resid = ccb.csio.resid;
2340			dev->bytes_transferred += (ccb.csio.dxfer_len - ccb.csio.resid);
2341			break;
2342		case PROTO_NVME:
2343			data->resid = 0;
2344			dev->bytes_transferred += ccb.nvmeio.dxfer_len;
2345			break;
2346		default:
2347			return -1;
2348			break;
2349		}
2350
2351		if (buf->status == CAMDD_STATUS_NONE)
2352			buf->status = camdd_ccb_status(&ccb, pass_dev->protocol);
2353		if (buf->status == CAMDD_STATUS_ERROR)
2354			error_count++;
2355		else if (buf->status == CAMDD_STATUS_EOF) {
2356			/*
2357			 * Once we queue this buffer to our partner thread,
2358			 * he will know that we've hit EOF.
2359			 */
2360			dev->flags |= CAMDD_DEV_FLAG_EOF;
2361		}
2362
2363		camdd_complete_buf(dev, buf, &error_count);
2364
2365		/*
2366		 * Unlock in preparation for the ioctl call.
2367		 */
2368		pthread_mutex_unlock(&dev->mutex);
2369	}
2370
2371	pthread_mutex_lock(&dev->mutex);
2372
2373	if (error_count > 0)
2374		return (-1);
2375	else
2376		return (num_fetched);
2377}
2378
2379/*
2380 * Returns -1 for error, 0 for success/continue, and 1 for resource
2381 * shortage/stop processing.
2382 */
2383int
2384camdd_file_run(struct camdd_dev *dev)
2385{
2386	struct camdd_dev_file *file_dev = &dev->dev_spec.file;
2387	struct camdd_buf_data *data;
2388	struct camdd_buf *buf;
2389	off_t io_offset;
2390	int retval = 0, write_dev = dev->write_dev;
2391	int error_count = 0, no_resources = 0, double_buf_needed = 0;
2392	uint32_t num_sectors = 0, db_len = 0;
2393
2394	buf = STAILQ_FIRST(&dev->run_queue);
2395	if (buf == NULL) {
2396		no_resources = 1;
2397		goto bailout;
2398	} else if ((dev->write_dev == 0)
2399		&& (dev->flags & (CAMDD_DEV_FLAG_EOF |
2400				  CAMDD_DEV_FLAG_EOF_SENT))) {
2401		STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
2402		dev->num_run_queue--;
2403		buf->status = CAMDD_STATUS_EOF;
2404		error_count++;
2405		goto bailout;
2406	}
2407
2408	/*
2409	 * If we're writing, we need to go through the source buffer list
2410	 * and create an S/G list.
2411	 */
2412	if (write_dev != 0) {
2413		retval = camdd_buf_sg_create(buf, /*iovec*/ 1,
2414		    dev->sector_size, &num_sectors, &double_buf_needed);
2415		if (retval != 0) {
2416			no_resources = 1;
2417			goto bailout;
2418		}
2419	}
2420
2421	STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
2422	dev->num_run_queue--;
2423
2424	data = &buf->buf_type_spec.data;
2425
2426	/*
2427	 * pread(2) and pwrite(2) offsets are byte offsets.
2428	 */
2429	io_offset = buf->lba * dev->sector_size;
2430
2431	/*
2432	 * Unlock the mutex while we read or write.
2433	 */
2434	pthread_mutex_unlock(&dev->mutex);
2435
2436	/*
2437	 * Note that we don't need to double buffer if we're the reader
2438	 * because in that case, we have allocated a single buffer of
2439	 * sufficient size to do the read.  This copy is necessary on
2440	 * writes because if one of the components of the S/G list is not
2441	 * a sector size multiple, the kernel will reject the write.  This
2442	 * is unfortunate but not surprising.  So this will make sure that
2443	 * we're using a single buffer that is a multiple of the sector size.
2444	 */
2445	if ((double_buf_needed != 0)
2446	 && (data->sg_count > 1)
2447	 && (write_dev != 0)) {
2448		uint32_t cur_offset;
2449		int i;
2450
2451		if (file_dev->tmp_buf == NULL)
2452			file_dev->tmp_buf = calloc(dev->blocksize, 1);
2453		if (file_dev->tmp_buf == NULL) {
2454			buf->status = CAMDD_STATUS_ERROR;
2455			error_count++;
2456			pthread_mutex_lock(&dev->mutex);
2457			goto bailout;
2458		}
2459		for (i = 0, cur_offset = 0; i < data->sg_count; i++) {
2460			bcopy(data->iovec[i].iov_base,
2461			    &file_dev->tmp_buf[cur_offset],
2462			    data->iovec[i].iov_len);
2463			cur_offset += data->iovec[i].iov_len;
2464		}
2465		db_len = cur_offset;
2466	}
2467
2468	if (file_dev->file_flags & CAMDD_FF_CAN_SEEK) {
2469		if (write_dev == 0) {
2470			/*
2471			 * XXX KDM is there any way we would need a S/G
2472			 * list here?
2473			 */
2474			retval = pread(file_dev->fd, data->buf,
2475			    buf->len, io_offset);
2476		} else {
2477			if (double_buf_needed != 0) {
2478				retval = pwrite(file_dev->fd, file_dev->tmp_buf,
2479				    db_len, io_offset);
2480			} else if (data->sg_count == 0) {
2481				retval = pwrite(file_dev->fd, data->buf,
2482				    data->fill_len, io_offset);
2483			} else {
2484				retval = pwritev(file_dev->fd, data->iovec,
2485				    data->sg_count, io_offset);
2486			}
2487		}
2488	} else {
2489		if (write_dev == 0) {
2490			/*
2491			 * XXX KDM is there any way we would need a S/G
2492			 * list here?
2493			 */
2494			retval = read(file_dev->fd, data->buf, buf->len);
2495		} else {
2496			if (double_buf_needed != 0) {
2497				retval = write(file_dev->fd, file_dev->tmp_buf,
2498				    db_len);
2499			} else if (data->sg_count == 0) {
2500				retval = write(file_dev->fd, data->buf,
2501				    data->fill_len);
2502			} else {
2503				retval = writev(file_dev->fd, data->iovec,
2504				    data->sg_count);
2505			}
2506		}
2507	}
2508
2509	/* We're done, re-acquire the lock */
2510	pthread_mutex_lock(&dev->mutex);
2511
2512	if (retval >= (ssize_t)data->fill_len) {
2513		/*
2514		 * If the bytes transferred is more than the request size,
2515		 * that indicates an overrun, which should only happen at
2516		 * the end of a transfer if we have to round up to a sector
2517		 * boundary.
2518		 */
2519		if (buf->status == CAMDD_STATUS_NONE)
2520			buf->status = CAMDD_STATUS_OK;
2521		data->resid = 0;
2522		dev->bytes_transferred += retval;
2523	} else if (retval == -1) {
2524		warn("Error %s %s", (write_dev) ? "writing to" :
2525		    "reading from", file_dev->filename);
2526
2527		buf->status = CAMDD_STATUS_ERROR;
2528		data->resid = data->fill_len;
2529		error_count++;
2530
2531		if (dev->debug == 0)
2532			goto bailout;
2533
2534		if ((double_buf_needed != 0)
2535		 && (write_dev != 0)) {
2536			fprintf(stderr, "%s: fd %d, DB buf %p, len %u lba %ju "
2537			    "offset %ju\n", __func__, file_dev->fd,
2538			    file_dev->tmp_buf, db_len, (uintmax_t)buf->lba,
2539			    (uintmax_t)io_offset);
2540		} else if (data->sg_count == 0) {
2541			fprintf(stderr, "%s: fd %d, buf %p, len %u, lba %ju "
2542			    "offset %ju\n", __func__, file_dev->fd, data->buf,
2543			    data->fill_len, (uintmax_t)buf->lba,
2544			    (uintmax_t)io_offset);
2545		} else {
2546			int i;
2547
2548			fprintf(stderr, "%s: fd %d, len %u, lba %ju "
2549			    "offset %ju\n", __func__, file_dev->fd,
2550			    data->fill_len, (uintmax_t)buf->lba,
2551			    (uintmax_t)io_offset);
2552
2553			for (i = 0; i < data->sg_count; i++) {
2554				fprintf(stderr, "index %d ptr %p len %zu\n",
2555				    i, data->iovec[i].iov_base,
2556				    data->iovec[i].iov_len);
2557			}
2558		}
2559	} else if (retval == 0) {
2560		buf->status = CAMDD_STATUS_EOF;
2561		if (dev->debug != 0)
2562			printf("%s: got EOF from %s!\n", __func__,
2563			    file_dev->filename);
2564		data->resid = data->fill_len;
2565		error_count++;
2566	} else if (retval < (ssize_t)data->fill_len) {
2567		if (buf->status == CAMDD_STATUS_NONE)
2568			buf->status = CAMDD_STATUS_SHORT_IO;
2569		data->resid = data->fill_len - retval;
2570		dev->bytes_transferred += retval;
2571	}
2572
2573bailout:
2574	if (buf != NULL) {
2575		if (buf->status == CAMDD_STATUS_EOF) {
2576			struct camdd_buf *buf2;
2577			dev->flags |= CAMDD_DEV_FLAG_EOF;
2578			STAILQ_FOREACH(buf2, &dev->run_queue, links)
2579				buf2->status = CAMDD_STATUS_EOF;
2580		}
2581
2582		camdd_complete_buf(dev, buf, &error_count);
2583	}
2584
2585	if (error_count != 0)
2586		return (-1);
2587	else if (no_resources != 0)
2588		return (1);
2589	else
2590		return (0);
2591}
2592
2593/*
2594 * Execute one command from the run queue.  Returns 0 for success, 1 for
2595 * stop processing, and -1 for error.
2596 */
2597int
2598camdd_pass_run(struct camdd_dev *dev)
2599{
2600	struct camdd_buf *buf = NULL;
2601	struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
2602	struct camdd_buf_data *data;
2603	uint32_t num_blocks, sectors_used = 0;
2604	union ccb *ccb;
2605	int retval = 0, is_write = dev->write_dev;
2606	int double_buf_needed = 0;
2607
2608	buf = STAILQ_FIRST(&dev->run_queue);
2609	if (buf == NULL) {
2610		retval = 1;
2611		goto bailout;
2612	}
2613
2614	/*
2615	 * If we're writing, we need to go through the source buffer list
2616	 * and create an S/G list.
2617	 */
2618	if (is_write != 0) {
2619		retval = camdd_buf_sg_create(buf, /*iovec*/ 0,dev->sector_size,
2620		    &sectors_used, &double_buf_needed);
2621		if (retval != 0) {
2622			retval = -1;
2623			goto bailout;
2624		}
2625	}
2626
2627	STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
2628	dev->num_run_queue--;
2629
2630	data = &buf->buf_type_spec.data;
2631
2632	/*
2633	 * In almost every case the number of blocks should be the device
2634	 * block size.  The exception may be at the end of an I/O stream
2635	 * for a partial block or at the end of a device.
2636	 */
2637	if (is_write != 0)
2638		num_blocks = sectors_used;
2639	else
2640		num_blocks = data->fill_len / pass_dev->block_len;
2641
2642	ccb = &data->ccb;
2643
2644	switch (pass_dev->protocol) {
2645	case PROTO_SCSI:
2646		CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->csio);
2647
2648		scsi_read_write(&ccb->csio,
2649				/*retries*/ dev->retry_count,
2650				/*cbfcnp*/ NULL,
2651				/*tag_action*/ MSG_SIMPLE_Q_TAG,
2652				/*readop*/ (dev->write_dev == 0) ? SCSI_RW_READ :
2653					   SCSI_RW_WRITE,
2654				/*byte2*/ 0,
2655				/*minimum_cmd_size*/ dev->min_cmd_size,
2656				/*lba*/ buf->lba,
2657				/*block_count*/ num_blocks,
2658				/*data_ptr*/ (data->sg_count != 0) ?
2659					     (uint8_t *)data->segs : data->buf,
2660				/*dxfer_len*/ (num_blocks * pass_dev->block_len),
2661				/*sense_len*/ SSD_FULL_SIZE,
2662				/*timeout*/ dev->io_timeout);
2663
2664		if (data->sg_count != 0) {
2665			ccb->csio.sglist_cnt = data->sg_count;
2666		}
2667		break;
2668	case PROTO_NVME:
2669		CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->nvmeio);
2670
2671		nvme_read_write(&ccb->nvmeio,
2672				/*retries*/ dev->retry_count,
2673				/*cbfcnp*/ NULL,
2674				/*nsid*/ pass_dev->dev->target_lun & UINT32_MAX,
2675				/*readop*/ dev->write_dev == 0,
2676				/*lba*/ buf->lba,
2677				/*block_count*/ num_blocks,
2678				/*data_ptr*/ (data->sg_count != 0) ?
2679					     (uint8_t *)data->segs : data->buf,
2680				/*dxfer_len*/ (num_blocks * pass_dev->block_len),
2681				/*timeout*/ dev->io_timeout);
2682
2683		ccb->nvmeio.sglist_cnt = data->sg_count;
2684		break;
2685	default:
2686		retval = -1;
2687		goto bailout;
2688	}
2689
2690	/* Disable freezing the device queue */
2691	ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
2692
2693	if (dev->retry_count != 0)
2694		ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
2695
2696	if (data->sg_count != 0) {
2697		ccb->ccb_h.flags |= CAM_DATA_SG;
2698	}
2699
2700	/*
2701	 * Store a pointer to the buffer in the CCB.  The kernel will
2702	 * restore this when we get it back, and we'll use it to identify
2703	 * the buffer this CCB came from.
2704	 */
2705	ccb->ccb_h.ccb_buf = buf;
2706
2707	/*
2708	 * Unlock our mutex in preparation for issuing the ioctl.
2709	 */
2710	pthread_mutex_unlock(&dev->mutex);
2711	/*
2712	 * Queue the CCB to the pass(4) driver.
2713	 */
2714	if (ioctl(pass_dev->dev->fd, CAMIOQUEUE, ccb) == -1) {
2715		pthread_mutex_lock(&dev->mutex);
2716
2717		warn("%s: error sending CAMIOQUEUE ioctl to %s%u", __func__,
2718		     pass_dev->dev->device_name, pass_dev->dev->dev_unit_num);
2719		warn("%s: CCB address is %p", __func__, ccb);
2720		retval = -1;
2721
2722		STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
2723	} else {
2724		pthread_mutex_lock(&dev->mutex);
2725
2726		dev->cur_active_io++;
2727		STAILQ_INSERT_TAIL(&dev->active_queue, buf, links);
2728	}
2729
2730bailout:
2731	return (retval);
2732}
2733
2734int
2735camdd_get_next_lba_len(struct camdd_dev *dev, uint64_t *lba, ssize_t *len)
2736{
2737	uint32_t num_blocks;
2738	int retval = 0;
2739
2740	*lba = dev->next_io_pos_bytes / dev->sector_size;
2741	*len = dev->blocksize;
2742	num_blocks = *len / dev->sector_size;
2743
2744	/*
2745	 * If max_sector is 0, then we have no set limit.  This can happen
2746	 * if we're writing to a file in a filesystem, or reading from
2747	 * something like /dev/zero.
2748	 */
2749	if ((dev->max_sector != 0)
2750	 || (dev->sector_io_limit != 0)) {
2751		uint64_t max_sector;
2752
2753		if ((dev->max_sector != 0)
2754		 && (dev->sector_io_limit != 0))
2755			max_sector = min(dev->sector_io_limit, dev->max_sector);
2756		else if (dev->max_sector != 0)
2757			max_sector = dev->max_sector;
2758		else
2759			max_sector = dev->sector_io_limit;
2760
2761
2762		/*
2763		 * Check to see whether we're starting off past the end of
2764		 * the device.  If so, we need to just send an EOF
2765		 * notification to the writer.
2766		 */
2767		if (*lba > max_sector) {
2768			*len = 0;
2769			retval = 1;
2770		} else if (((*lba + num_blocks) > max_sector + 1)
2771			|| ((*lba + num_blocks) < *lba)) {
2772			/*
2773			 * If we get here (but pass the first check), we
2774			 * can trim the request length down to go to the
2775			 * end of the device.
2776			 */
2777			num_blocks = (max_sector + 1) - *lba;
2778			*len = num_blocks * dev->sector_size;
2779			retval = 1;
2780		}
2781	}
2782
2783	dev->next_io_pos_bytes += *len;
2784
2785	return (retval);
2786}
2787
2788/*
2789 * Returns 0 for success, 1 for EOF detected, and -1 for failure.
2790 */
2791int
2792camdd_queue(struct camdd_dev *dev, struct camdd_buf *read_buf)
2793{
2794	struct camdd_buf *buf = NULL;
2795	struct camdd_buf_data *data;
2796	size_t new_len;
2797	struct camdd_buf_data *rb_data;
2798	int is_write = dev->write_dev;
2799	int eof_flush_needed = 0;
2800	int retval = 0;
2801
2802	/*
2803	 * If we've gotten EOF or our partner has, we should not continue
2804	 * queueing I/O.  If we're a writer, though, we should continue
2805	 * to write any buffers that don't have EOF status.
2806	 */
2807	if ((dev->flags & CAMDD_DEV_FLAG_EOF)
2808	 || ((dev->flags & CAMDD_DEV_FLAG_PEER_EOF)
2809	  && (is_write == 0))) {
2810		/*
2811		 * Tell the worker thread that we have seen EOF.
2812		 */
2813		retval = 1;
2814
2815		/*
2816		 * If we're the writer, send the buffer back with EOF status.
2817		 */
2818		if (is_write) {
2819			read_buf->status = CAMDD_STATUS_EOF;
2820
2821			camdd_complete_peer_buf(dev, read_buf);
2822		}
2823		goto bailout;
2824	}
2825
2826	if (is_write == 0) {
2827		buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
2828		if (buf == NULL) {
2829			retval = -1;
2830			goto bailout;
2831		}
2832		data = &buf->buf_type_spec.data;
2833
2834		retval = camdd_get_next_lba_len(dev, &buf->lba, &buf->len);
2835		if (retval != 0) {
2836			buf->status = CAMDD_STATUS_EOF;
2837
2838		 	if ((buf->len == 0)
2839			 && ((dev->flags & (CAMDD_DEV_FLAG_EOF_SENT |
2840			     CAMDD_DEV_FLAG_EOF_QUEUED)) != 0)) {
2841				camdd_release_buf(buf);
2842				goto bailout;
2843			}
2844			dev->flags |= CAMDD_DEV_FLAG_EOF_QUEUED;
2845		}
2846
2847		data->fill_len = buf->len;
2848		data->src_start_offset = buf->lba * dev->sector_size;
2849
2850		/*
2851		 * Put this on the run queue.
2852		 */
2853		STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
2854		dev->num_run_queue++;
2855
2856		/* We're done. */
2857		goto bailout;
2858	}
2859
2860	/*
2861	 * Check for new EOF status from the reader.
2862	 */
2863	if ((read_buf->status == CAMDD_STATUS_EOF)
2864	 || (read_buf->status == CAMDD_STATUS_ERROR)) {
2865		dev->flags |= CAMDD_DEV_FLAG_PEER_EOF;
2866		if ((STAILQ_FIRST(&dev->pending_queue) == NULL)
2867		 && (read_buf->len == 0)) {
2868			camdd_complete_peer_buf(dev, read_buf);
2869			retval = 1;
2870			goto bailout;
2871		} else
2872			eof_flush_needed = 1;
2873	}
2874
2875	/*
2876	 * See if we have a buffer we're composing with pieces from our
2877	 * partner thread.
2878	 */
2879	buf = STAILQ_FIRST(&dev->pending_queue);
2880	if (buf == NULL) {
2881		uint64_t lba;
2882		ssize_t len;
2883
2884		retval = camdd_get_next_lba_len(dev, &lba, &len);
2885		if (retval != 0) {
2886			read_buf->status = CAMDD_STATUS_EOF;
2887
2888			if (len == 0) {
2889				dev->flags |= CAMDD_DEV_FLAG_EOF;
2890				camdd_complete_peer_buf(dev, read_buf);
2891				goto bailout;
2892			}
2893		}
2894
2895		/*
2896		 * If we don't have a pending buffer, we need to grab a new
2897		 * one from the free list or allocate another one.
2898		 */
2899		buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
2900		if (buf == NULL) {
2901			retval = 1;
2902			goto bailout;
2903		}
2904
2905		buf->lba = lba;
2906		buf->len = len;
2907
2908		STAILQ_INSERT_TAIL(&dev->pending_queue, buf, links);
2909		dev->num_pending_queue++;
2910	}
2911
2912	data = &buf->buf_type_spec.data;
2913
2914	rb_data = &read_buf->buf_type_spec.data;
2915
2916	if ((rb_data->src_start_offset != dev->next_peer_pos_bytes)
2917	 && (dev->debug != 0)) {
2918		printf("%s: WARNING: reader offset %#jx != expected offset "
2919		    "%#jx\n", __func__, (uintmax_t)rb_data->src_start_offset,
2920		    (uintmax_t)dev->next_peer_pos_bytes);
2921	}
2922	dev->next_peer_pos_bytes = rb_data->src_start_offset +
2923	    (rb_data->fill_len - rb_data->resid);
2924
2925	new_len = (rb_data->fill_len - rb_data->resid) + data->fill_len;
2926	if (new_len < buf->len) {
2927		/*
2928		 * There are three cases here:
2929		 * 1. We need more data to fill up a block, so we put
2930		 *    this I/O on the queue and wait for more I/O.
2931		 * 2. We have a pending buffer in the queue that is
2932		 *    smaller than our blocksize, but we got an EOF.  So we
2933		 *    need to go ahead and flush the write out.
2934		 * 3. We got an error.
2935		 */
2936
2937		/*
2938		 * Increment our fill length.
2939		 */
2940		data->fill_len += (rb_data->fill_len - rb_data->resid);
2941
2942		/*
2943		 * Add the new read buffer to the list for writing.
2944		 */
2945		STAILQ_INSERT_TAIL(&buf->src_list, read_buf, src_links);
2946
2947		/* Increment the count */
2948		buf->src_count++;
2949
2950		if (eof_flush_needed == 0) {
2951			/*
2952			 * We need to exit, because we don't have enough
2953			 * data yet.
2954			 */
2955			goto bailout;
2956		} else {
2957			/*
2958			 * Take the buffer off of the pending queue.
2959			 */
2960			STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf,
2961				      links);
2962			dev->num_pending_queue--;
2963
2964			/*
2965			 * If we need an EOF flush, but there is no data
2966			 * to flush, go ahead and return this buffer.
2967			 */
2968			if (data->fill_len == 0) {
2969				camdd_complete_buf(dev, buf, /*error_count*/0);
2970				retval = 1;
2971				goto bailout;
2972			}
2973
2974			/*
2975			 * Put this on the next queue for execution.
2976			 */
2977			STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
2978			dev->num_run_queue++;
2979		}
2980	} else if (new_len == buf->len) {
2981		/*
2982		 * We have enough data to completey fill one block,
2983		 * so we're ready to issue the I/O.
2984		 */
2985
2986		/*
2987		 * Take the buffer off of the pending queue.
2988		 */
2989		STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf, links);
2990		dev->num_pending_queue--;
2991
2992		/*
2993		 * Add the new read buffer to the list for writing.
2994		 */
2995		STAILQ_INSERT_TAIL(&buf->src_list, read_buf, src_links);
2996
2997		/* Increment the count */
2998		buf->src_count++;
2999
3000		/*
3001		 * Increment our fill length.
3002		 */
3003		data->fill_len += (rb_data->fill_len - rb_data->resid);
3004
3005		/*
3006		 * Put this on the next queue for execution.
3007		 */
3008		STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
3009		dev->num_run_queue++;
3010	} else {
3011		struct camdd_buf *idb;
3012		struct camdd_buf_indirect *indirect;
3013		uint32_t len_to_go, cur_offset;
3014
3015
3016		idb = camdd_get_buf(dev, CAMDD_BUF_INDIRECT);
3017		if (idb == NULL) {
3018			retval = 1;
3019			goto bailout;
3020		}
3021		indirect = &idb->buf_type_spec.indirect;
3022		indirect->src_buf = read_buf;
3023		read_buf->refcount++;
3024		indirect->offset = 0;
3025		indirect->start_ptr = rb_data->buf;
3026		/*
3027		 * We've already established that there is more
3028		 * data in read_buf than we have room for in our
3029		 * current write request.  So this particular chunk
3030		 * of the request should just be the remainder
3031		 * needed to fill up a block.
3032		 */
3033		indirect->len = buf->len - (data->fill_len - data->resid);
3034
3035		camdd_buf_add_child(buf, idb);
3036
3037		/*
3038		 * This buffer is ready to execute, so we can take
3039		 * it off the pending queue and put it on the run
3040		 * queue.
3041		 */
3042		STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf,
3043			      links);
3044		dev->num_pending_queue--;
3045		STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
3046		dev->num_run_queue++;
3047
3048		cur_offset = indirect->offset + indirect->len;
3049
3050		/*
3051		 * The resulting I/O would be too large to fit in
3052		 * one block.  We need to split this I/O into
3053		 * multiple pieces.  Allocate as many buffers as needed.
3054		 */
3055		for (len_to_go = rb_data->fill_len - rb_data->resid -
3056		     indirect->len; len_to_go > 0;) {
3057			struct camdd_buf *new_buf;
3058			struct camdd_buf_data *new_data;
3059			uint64_t lba;
3060			ssize_t len;
3061
3062			retval = camdd_get_next_lba_len(dev, &lba, &len);
3063			if ((retval != 0)
3064			 && (len == 0)) {
3065				/*
3066				 * The device has already been marked
3067				 * as EOF, and there is no space left.
3068				 */
3069				goto bailout;
3070			}
3071
3072			new_buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
3073			if (new_buf == NULL) {
3074				retval = 1;
3075				goto bailout;
3076			}
3077
3078			new_buf->lba = lba;
3079			new_buf->len = len;
3080
3081			idb = camdd_get_buf(dev, CAMDD_BUF_INDIRECT);
3082			if (idb == NULL) {
3083				retval = 1;
3084				goto bailout;
3085			}
3086
3087			indirect = &idb->buf_type_spec.indirect;
3088
3089			indirect->src_buf = read_buf;
3090			read_buf->refcount++;
3091			indirect->offset = cur_offset;
3092			indirect->start_ptr = rb_data->buf + cur_offset;
3093			indirect->len = min(len_to_go, new_buf->len);
3094#if 0
3095			if (((indirect->len % dev->sector_size) != 0)
3096			 || ((indirect->offset % dev->sector_size) != 0)) {
3097				warnx("offset %ju len %ju not aligned with "
3098				    "sector size %u", indirect->offset,
3099				    (uintmax_t)indirect->len, dev->sector_size);
3100			}
3101#endif
3102			cur_offset += indirect->len;
3103			len_to_go -= indirect->len;
3104
3105			camdd_buf_add_child(new_buf, idb);
3106
3107			new_data = &new_buf->buf_type_spec.data;
3108
3109			if ((new_data->fill_len == new_buf->len)
3110			 || (eof_flush_needed != 0)) {
3111				STAILQ_INSERT_TAIL(&dev->run_queue,
3112						   new_buf, links);
3113				dev->num_run_queue++;
3114			} else if (new_data->fill_len < buf->len) {
3115				STAILQ_INSERT_TAIL(&dev->pending_queue,
3116					   	new_buf, links);
3117				dev->num_pending_queue++;
3118			} else {
3119				warnx("%s: too much data in new "
3120				      "buffer!", __func__);
3121				retval = 1;
3122				goto bailout;
3123			}
3124		}
3125	}
3126
3127bailout:
3128	return (retval);
3129}
3130
3131void
3132camdd_get_depth(struct camdd_dev *dev, uint32_t *our_depth,
3133		uint32_t *peer_depth, uint32_t *our_bytes, uint32_t *peer_bytes)
3134{
3135	*our_depth = dev->cur_active_io + dev->num_run_queue;
3136	if (dev->num_peer_work_queue >
3137	    dev->num_peer_done_queue)
3138		*peer_depth = dev->num_peer_work_queue -
3139			      dev->num_peer_done_queue;
3140	else
3141		*peer_depth = 0;
3142	*our_bytes = *our_depth * dev->blocksize;
3143	*peer_bytes = dev->peer_bytes_queued;
3144}
3145
3146void
3147camdd_sig_handler(int sig)
3148{
3149	if (sig == SIGINFO)
3150		need_status = 1;
3151	else {
3152		need_exit = 1;
3153		error_exit = 1;
3154	}
3155
3156	sem_post(&camdd_sem);
3157}
3158
3159void
3160camdd_print_status(struct camdd_dev *camdd_dev, struct camdd_dev *other_dev,
3161		   struct timespec *start_time)
3162{
3163	struct timespec done_time;
3164	uint64_t total_ns;
3165	long double mb_sec, total_sec;
3166	int error = 0;
3167
3168	error = clock_gettime(CLOCK_MONOTONIC_PRECISE, &done_time);
3169	if (error != 0) {
3170		warn("Unable to get done time");
3171		return;
3172	}
3173
3174	timespecsub(&done_time, start_time, &done_time);
3175
3176	total_ns = done_time.tv_nsec + (done_time.tv_sec * 1000000000);
3177	total_sec = total_ns;
3178	total_sec /= 1000000000;
3179
3180	fprintf(stderr, "%ju bytes %s %s\n%ju bytes %s %s\n"
3181		"%.4Lf seconds elapsed\n",
3182		(uintmax_t)camdd_dev->bytes_transferred,
3183		(camdd_dev->write_dev == 0) ?  "read from" : "written to",
3184		camdd_dev->device_name,
3185		(uintmax_t)other_dev->bytes_transferred,
3186		(other_dev->write_dev == 0) ? "read from" : "written to",
3187		other_dev->device_name, total_sec);
3188
3189	mb_sec = min(other_dev->bytes_transferred,camdd_dev->bytes_transferred);
3190	mb_sec /= 1024 * 1024;
3191	mb_sec *= 1000000000;
3192	mb_sec /= total_ns;
3193	fprintf(stderr, "%.2Lf MB/sec\n", mb_sec);
3194}
3195
3196int
3197camdd_rw(struct camdd_io_opts *io_opts, camdd_argmask arglist, int num_io_opts,
3198	 uint64_t max_io, int retry_count, int timeout)
3199{
3200	struct cam_device *new_cam_dev = NULL;
3201	struct camdd_dev *devs[2];
3202	struct timespec start_time;
3203	pthread_t threads[2];
3204	int unit = 0;
3205	int error = 0;
3206	int i;
3207
3208	bzero(devs, sizeof(devs));
3209
3210	if (num_io_opts != 2) {
3211		warnx("Must have one input and one output path");
3212		error = 1;
3213		goto bailout;
3214	}
3215
3216	for (i = 0; i < num_io_opts; i++) {
3217		switch (io_opts[i].dev_type) {
3218		case CAMDD_DEV_PASS: {
3219			if (isdigit(io_opts[i].dev_name[0])) {
3220				int bus = 0, target = 0, lun = 0;
3221				int rv;
3222
3223				/* device specified as bus:target[:lun] */
3224				rv = parse_btl(io_opts[i].dev_name, &bus,
3225				    &target, &lun);
3226				if (rv < 2) {
3227					warnx("numeric device specification "
3228					     "must be either bus:target, or "
3229					     "bus:target:lun");
3230					error = 1;
3231					goto bailout;
3232				}
3233				/* default to 0 if lun was not specified */
3234				if (rv == 2) {
3235					lun = 0;
3236				}
3237				new_cam_dev = cam_open_btl(bus, target, lun,
3238				    O_RDWR, NULL);
3239			} else {
3240				char name[30];
3241
3242				if (cam_get_device(io_opts[i].dev_name, name,
3243						   sizeof name, &unit) == -1) {
3244					warnx("%s", cam_errbuf);
3245					error = 1;
3246					goto bailout;
3247				}
3248				new_cam_dev = cam_open_spec_device(name, unit,
3249				    O_RDWR, NULL);
3250			}
3251
3252			if (new_cam_dev == NULL) {
3253				warnx("%s", cam_errbuf);
3254				error = 1;
3255				goto bailout;
3256			}
3257
3258			devs[i] = camdd_probe_pass(new_cam_dev,
3259			    /*io_opts*/ &io_opts[i],
3260			    arglist,
3261			    /*probe_retry_count*/ 3,
3262			    /*probe_timeout*/ 5000,
3263			    /*io_retry_count*/ retry_count,
3264			    /*io_timeout*/ timeout);
3265			if (devs[i] == NULL) {
3266				warn("Unable to probe device %s%u",
3267				     new_cam_dev->device_name,
3268				     new_cam_dev->dev_unit_num);
3269				error = 1;
3270				goto bailout;
3271			}
3272			break;
3273		}
3274		case CAMDD_DEV_FILE: {
3275			int fd = -1;
3276
3277			if (io_opts[i].dev_name[0] == '-') {
3278				if (io_opts[i].write_dev != 0)
3279					fd = STDOUT_FILENO;
3280				else
3281					fd = STDIN_FILENO;
3282			} else {
3283				if (io_opts[i].write_dev != 0) {
3284					fd = open(io_opts[i].dev_name,
3285					    O_RDWR | O_CREAT, S_IWUSR |S_IRUSR);
3286				} else {
3287					fd = open(io_opts[i].dev_name,
3288					    O_RDONLY);
3289				}
3290			}
3291			if (fd == -1) {
3292				warn("error opening file %s",
3293				    io_opts[i].dev_name);
3294				error = 1;
3295				goto bailout;
3296			}
3297
3298			devs[i] = camdd_probe_file(fd, &io_opts[i],
3299			    retry_count, timeout);
3300			if (devs[i] == NULL) {
3301				error = 1;
3302				goto bailout;
3303			}
3304
3305			break;
3306		}
3307		default:
3308			warnx("Unknown device type %d (%s)",
3309			    io_opts[i].dev_type, io_opts[i].dev_name);
3310			error = 1;
3311			goto bailout;
3312			break; /*NOTREACHED */
3313		}
3314
3315		devs[i]->write_dev = io_opts[i].write_dev;
3316
3317		devs[i]->start_offset_bytes = io_opts[i].offset;
3318
3319		if (max_io != 0) {
3320			devs[i]->sector_io_limit =
3321			    (devs[i]->start_offset_bytes /
3322			    devs[i]->sector_size) +
3323			    (max_io / devs[i]->sector_size) - 1;
3324		}
3325
3326		devs[i]->next_io_pos_bytes = devs[i]->start_offset_bytes;
3327		devs[i]->next_completion_pos_bytes =devs[i]->start_offset_bytes;
3328	}
3329
3330	devs[0]->peer_dev = devs[1];
3331	devs[1]->peer_dev = devs[0];
3332	devs[0]->next_peer_pos_bytes = devs[0]->peer_dev->next_io_pos_bytes;
3333	devs[1]->next_peer_pos_bytes = devs[1]->peer_dev->next_io_pos_bytes;
3334
3335	sem_init(&camdd_sem, /*pshared*/ 0, 0);
3336
3337	signal(SIGINFO, camdd_sig_handler);
3338	signal(SIGINT, camdd_sig_handler);
3339
3340	error = clock_gettime(CLOCK_MONOTONIC_PRECISE, &start_time);
3341	if (error != 0) {
3342		warn("Unable to get start time");
3343		goto bailout;
3344	}
3345
3346	for (i = 0; i < num_io_opts; i++) {
3347		error = pthread_create(&threads[i], NULL, camdd_worker,
3348				       (void *)devs[i]);
3349		if (error != 0) {
3350			warnc(error, "pthread_create() failed");
3351			goto bailout;
3352		}
3353	}
3354
3355	for (;;) {
3356		if ((sem_wait(&camdd_sem) == -1)
3357		 || (need_exit != 0)) {
3358			struct kevent ke;
3359
3360			for (i = 0; i < num_io_opts; i++) {
3361				EV_SET(&ke, (uintptr_t)&devs[i]->work_queue,
3362				    EVFILT_USER, 0, NOTE_TRIGGER, 0, NULL);
3363
3364				devs[i]->flags |= CAMDD_DEV_FLAG_EOF;
3365
3366				error = kevent(devs[i]->kq, &ke, 1, NULL, 0,
3367						NULL);
3368				if (error == -1)
3369					warn("%s: unable to wake up thread",
3370					    __func__);
3371				error = 0;
3372			}
3373			break;
3374		} else if (need_status != 0) {
3375			camdd_print_status(devs[0], devs[1], &start_time);
3376			need_status = 0;
3377		}
3378	}
3379	for (i = 0; i < num_io_opts; i++) {
3380		pthread_join(threads[i], NULL);
3381	}
3382
3383	camdd_print_status(devs[0], devs[1], &start_time);
3384
3385bailout:
3386
3387	for (i = 0; i < num_io_opts; i++)
3388		camdd_free_dev(devs[i]);
3389
3390	return (error + error_exit);
3391}
3392
3393void
3394usage(void)
3395{
3396	fprintf(stderr,
3397"usage:  camdd <-i|-o pass=pass0,bs=1M,offset=1M,depth=4>\n"
3398"              <-i|-o file=/tmp/file,bs=512K,offset=1M>\n"
3399"              <-i|-o file=/dev/da0,bs=512K,offset=1M>\n"
3400"              <-i|-o file=/dev/nsa0,bs=512K>\n"
3401"              [-C retry_count][-E][-m max_io_amt][-t timeout_secs][-v][-h]\n"
3402"Option description\n"
3403"-i <arg=val>  Specify input device/file and parameters\n"
3404"-o <arg=val>  Specify output device/file and parameters\n"
3405"Input and Output parameters\n"
3406"pass=name     Specify a pass(4) device like pass0 or /dev/pass0\n"
3407"file=name     Specify a file or device, /tmp/foo, /dev/da0, /dev/null\n"
3408"              or - for stdin/stdout\n"
3409"bs=blocksize  Specify blocksize in bytes, or using K, M, G, etc. suffix\n"
3410"offset=len    Specify starting offset in bytes or using K, M, G suffix\n"
3411"              NOTE: offset cannot be specified on tapes, pipes, stdin/out\n"
3412"depth=N       Specify a numeric queue depth.  This only applies to pass(4)\n"
3413"mcs=N         Specify a minimum cmd size for pass(4) read/write commands\n"
3414"Optional arguments\n"
3415"-C retry_cnt  Specify a retry count for pass(4) devices\n"
3416"-E            Enable CAM error recovery for pass(4) devices\n"
3417"-m max_io     Specify the maximum amount to be transferred in bytes or\n"
3418"              using K, G, M, etc. suffixes\n"
3419"-t timeout    Specify the I/O timeout to use with pass(4) devices\n"
3420"-v            Enable verbose error recovery\n"
3421"-h            Print this message\n");
3422}
3423
3424
3425int
3426camdd_parse_io_opts(char *args, int is_write, struct camdd_io_opts *io_opts)
3427{
3428	char *tmpstr, *tmpstr2;
3429	char *orig_tmpstr = NULL;
3430	int retval = 0;
3431
3432	io_opts->write_dev = is_write;
3433
3434	tmpstr = strdup(args);
3435	if (tmpstr == NULL) {
3436		warn("strdup failed");
3437		retval = 1;
3438		goto bailout;
3439	}
3440	orig_tmpstr = tmpstr;
3441	while ((tmpstr2 = strsep(&tmpstr, ",")) != NULL) {
3442		char *name, *value;
3443
3444		/*
3445		 * If the user creates an empty parameter by putting in two
3446		 * commas, skip over it and look for the next field.
3447		 */
3448		if (*tmpstr2 == '\0')
3449			continue;
3450
3451		name = strsep(&tmpstr2, "=");
3452		if (*name == '\0') {
3453			warnx("Got empty I/O parameter name");
3454			retval = 1;
3455			goto bailout;
3456		}
3457		value = strsep(&tmpstr2, "=");
3458		if ((value == NULL)
3459		 || (*value == '\0')) {
3460			warnx("Empty I/O parameter value for %s", name);
3461			retval = 1;
3462			goto bailout;
3463		}
3464		if (strncasecmp(name, "file", 4) == 0) {
3465			io_opts->dev_type = CAMDD_DEV_FILE;
3466			io_opts->dev_name = strdup(value);
3467			if (io_opts->dev_name == NULL) {
3468				warn("Error allocating memory");
3469				retval = 1;
3470				goto bailout;
3471			}
3472		} else if (strncasecmp(name, "pass", 4) == 0) {
3473			io_opts->dev_type = CAMDD_DEV_PASS;
3474			io_opts->dev_name = strdup(value);
3475			if (io_opts->dev_name == NULL) {
3476				warn("Error allocating memory");
3477				retval = 1;
3478				goto bailout;
3479			}
3480		} else if ((strncasecmp(name, "bs", 2) == 0)
3481			|| (strncasecmp(name, "blocksize", 9) == 0)) {
3482			retval = expand_number(value, &io_opts->blocksize);
3483			if (retval == -1) {
3484				warn("expand_number(3) failed on %s=%s", name,
3485				    value);
3486				retval = 1;
3487				goto bailout;
3488			}
3489		} else if (strncasecmp(name, "depth", 5) == 0) {
3490			char *endptr;
3491
3492			io_opts->queue_depth = strtoull(value, &endptr, 0);
3493			if (*endptr != '\0') {
3494				warnx("invalid queue depth %s", value);
3495				retval = 1;
3496				goto bailout;
3497			}
3498		} else if (strncasecmp(name, "mcs", 3) == 0) {
3499			char *endptr;
3500
3501			io_opts->min_cmd_size = strtol(value, &endptr, 0);
3502			if ((*endptr != '\0')
3503			 || ((io_opts->min_cmd_size > 16)
3504			  || (io_opts->min_cmd_size < 0))) {
3505				warnx("invalid minimum cmd size %s", value);
3506				retval = 1;
3507				goto bailout;
3508			}
3509		} else if (strncasecmp(name, "offset", 6) == 0) {
3510			retval = expand_number(value, &io_opts->offset);
3511			if (retval == -1) {
3512				warn("expand_number(3) failed on %s=%s", name,
3513				    value);
3514				retval = 1;
3515				goto bailout;
3516			}
3517		} else if (strncasecmp(name, "debug", 5) == 0) {
3518			char *endptr;
3519
3520			io_opts->debug = strtoull(value, &endptr, 0);
3521			if (*endptr != '\0') {
3522				warnx("invalid debug level %s", value);
3523				retval = 1;
3524				goto bailout;
3525			}
3526		} else {
3527			warnx("Unrecognized parameter %s=%s", name, value);
3528		}
3529	}
3530bailout:
3531	free(orig_tmpstr);
3532
3533	return (retval);
3534}
3535
3536int
3537main(int argc, char **argv)
3538{
3539	int c;
3540	camdd_argmask arglist = CAMDD_ARG_NONE;
3541	int timeout = 0, retry_count = 1;
3542	int error = 0;
3543	uint64_t max_io = 0;
3544	struct camdd_io_opts *opt_list = NULL;
3545
3546	if (argc == 1) {
3547		usage();
3548		exit(1);
3549	}
3550
3551	opt_list = calloc(2, sizeof(struct camdd_io_opts));
3552	if (opt_list == NULL) {
3553		warn("Unable to allocate option list");
3554		error = 1;
3555		goto bailout;
3556	}
3557
3558	while ((c = getopt(argc, argv, "C:Ehi:m:o:t:v")) != -1){
3559		switch (c) {
3560		case 'C':
3561			retry_count = strtol(optarg, NULL, 0);
3562			if (retry_count < 0)
3563				errx(1, "retry count %d is < 0",
3564				     retry_count);
3565			break;
3566		case 'E':
3567			arglist |= CAMDD_ARG_ERR_RECOVER;
3568			break;
3569		case 'i':
3570		case 'o':
3571			if (((c == 'i')
3572			  && (opt_list[0].dev_type != CAMDD_DEV_NONE))
3573			 || ((c == 'o')
3574			  && (opt_list[1].dev_type != CAMDD_DEV_NONE))) {
3575				errx(1, "Only one input and output path "
3576				    "allowed");
3577			}
3578			error = camdd_parse_io_opts(optarg, (c == 'o') ? 1 : 0,
3579			    (c == 'o') ? &opt_list[1] : &opt_list[0]);
3580			if (error != 0)
3581				goto bailout;
3582			break;
3583		case 'm':
3584			error = expand_number(optarg, &max_io);
3585			if (error == -1) {
3586				warn("invalid maximum I/O amount %s", optarg);
3587				error = 1;
3588				goto bailout;
3589			}
3590			break;
3591		case 't':
3592			timeout = strtol(optarg, NULL, 0);
3593			if (timeout < 0)
3594				errx(1, "invalid timeout %d", timeout);
3595			/* Convert the timeout from seconds to ms */
3596			timeout *= 1000;
3597			break;
3598		case 'v':
3599			arglist |= CAMDD_ARG_VERBOSE;
3600			break;
3601		case 'h':
3602		default:
3603			usage();
3604			exit(1);
3605			break; /*NOTREACHED*/
3606		}
3607	}
3608
3609	if ((opt_list[0].dev_type == CAMDD_DEV_NONE)
3610	 || (opt_list[1].dev_type == CAMDD_DEV_NONE))
3611		errx(1, "Must specify both -i and -o");
3612
3613	/*
3614	 * Set the timeout if the user hasn't specified one.
3615	 */
3616	if (timeout == 0)
3617		timeout = CAMDD_PASS_RW_TIMEOUT;
3618
3619	error = camdd_rw(opt_list, arglist, 2, max_io, retry_count, timeout);
3620
3621bailout:
3622	free(opt_list);
3623
3624	exit(error);
3625}
3626