1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2003-2009 Silicon Graphics International Corp.
5 * Copyright (c) 2012 The FreeBSD Foundation
6 * Copyright (c) 2014-2017 Alexander Motin <mav@FreeBSD.org>
7 * Copyright (c) 2017 Jakub Wojciech Klama <jceel@FreeBSD.org>
8 * Copyright (c) 2018 Marcelo Araujo <araujo@FreeBSD.org>
9 * All rights reserved.
10 *
11 * Portions of this software were developed by Edward Tomasz Napierala
12 * under sponsorship from the FreeBSD Foundation.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 *    notice, this list of conditions, and the following disclaimer,
19 *    without modification.
20 * 2. Redistributions in binary form must reproduce at minimum a disclaimer
21 *    substantially similar to the "NO WARRANTY" disclaimer below
22 *    ("Disclaimer") and any redistribution must be conditioned upon
23 *    including a substantially similar Disclaimer requirement for further
24 *    binary redistribution.
25 *
26 * NO WARRANTY
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
35 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
36 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGES.
38 *
39 * $Id$
40 */
41/*
42 * CAM Target Layer, a SCSI device emulation subsystem.
43 *
44 * Author: Ken Merry <ken@FreeBSD.org>
45 */
46
47#include <sys/param.h>
48#include <sys/systm.h>
49#include <sys/ctype.h>
50#include <sys/kernel.h>
51#include <sys/types.h>
52#include <sys/kthread.h>
53#include <sys/bio.h>
54#include <sys/fcntl.h>
55#include <sys/lock.h>
56#include <sys/module.h>
57#include <sys/mutex.h>
58#include <sys/condvar.h>
59#include <sys/malloc.h>
60#include <sys/conf.h>
61#include <sys/ioccom.h>
62#include <sys/queue.h>
63#include <sys/sbuf.h>
64#include <sys/smp.h>
65#include <sys/endian.h>
66#include <sys/proc.h>
67#include <sys/sched.h>
68#include <sys/sysctl.h>
69#include <sys/nv.h>
70#include <sys/dnv.h>
71#include <vm/uma.h>
72
73#include <cam/cam.h>
74#include <cam/scsi/scsi_all.h>
75#include <cam/scsi/scsi_cd.h>
76#include <cam/scsi/scsi_da.h>
77#include <cam/ctl/ctl_io.h>
78#include <cam/ctl/ctl.h>
79#include <cam/ctl/ctl_frontend.h>
80#include <cam/ctl/ctl_util.h>
81#include <cam/ctl/ctl_backend.h>
82#include <cam/ctl/ctl_ioctl.h>
83#include <cam/ctl/ctl_ha.h>
84#include <cam/ctl/ctl_private.h>
85#include <cam/ctl/ctl_debug.h>
86#include <cam/ctl/ctl_nvme_all.h>
87#include <cam/ctl/ctl_scsi_all.h>
88#include <cam/ctl/ctl_error.h>
89
90struct ctl_softc *control_softc = NULL;
91
92/*
93 * Template mode pages.
94 */
95
96/*
97 * Note that these are default values only.  The actual values will be
98 * filled in when the user does a mode sense.
99 */
100const static struct scsi_da_rw_recovery_page rw_er_page_default = {
101	/*page_code*/SMS_RW_ERROR_RECOVERY_PAGE,
102	/*page_length*/sizeof(struct scsi_da_rw_recovery_page) - 2,
103	/*byte3*/SMS_RWER_AWRE|SMS_RWER_ARRE,
104	/*read_retry_count*/0,
105	/*correction_span*/0,
106	/*head_offset_count*/0,
107	/*data_strobe_offset_cnt*/0,
108	/*byte8*/SMS_RWER_LBPERE,
109	/*write_retry_count*/0,
110	/*reserved2*/0,
111	/*recovery_time_limit*/{0, 0},
112};
113
114const static struct scsi_da_rw_recovery_page rw_er_page_changeable = {
115	/*page_code*/SMS_RW_ERROR_RECOVERY_PAGE,
116	/*page_length*/sizeof(struct scsi_da_rw_recovery_page) - 2,
117	/*byte3*/SMS_RWER_PER,
118	/*read_retry_count*/0,
119	/*correction_span*/0,
120	/*head_offset_count*/0,
121	/*data_strobe_offset_cnt*/0,
122	/*byte8*/SMS_RWER_LBPERE,
123	/*write_retry_count*/0,
124	/*reserved2*/0,
125	/*recovery_time_limit*/{0, 0},
126};
127
128const static struct scsi_da_verify_recovery_page verify_er_page_default = {
129	/*page_code*/SMS_VERIFY_ERROR_RECOVERY_PAGE,
130	/*page_length*/sizeof(struct scsi_da_verify_recovery_page) - 2,
131	/*byte3*/0,
132	/*read_retry_count*/0,
133	/*reserved*/{ 0, 0, 0, 0, 0, 0 },
134	/*recovery_time_limit*/{0, 0},
135};
136
137const static struct scsi_da_verify_recovery_page verify_er_page_changeable = {
138	/*page_code*/SMS_VERIFY_ERROR_RECOVERY_PAGE,
139	/*page_length*/sizeof(struct scsi_da_verify_recovery_page) - 2,
140	/*byte3*/SMS_VER_PER,
141	/*read_retry_count*/0,
142	/*reserved*/{ 0, 0, 0, 0, 0, 0 },
143	/*recovery_time_limit*/{0, 0},
144};
145
146const static struct scsi_caching_page caching_page_default = {
147	/*page_code*/SMS_CACHING_PAGE,
148	/*page_length*/sizeof(struct scsi_caching_page) - 2,
149	/*flags1*/ SCP_DISC | SCP_WCE,
150	/*ret_priority*/ 0,
151	/*disable_pf_transfer_len*/ {0xff, 0xff},
152	/*min_prefetch*/ {0, 0},
153	/*max_prefetch*/ {0xff, 0xff},
154	/*max_pf_ceiling*/ {0xff, 0xff},
155	/*flags2*/ 0,
156	/*cache_segments*/ 0,
157	/*cache_seg_size*/ {0, 0},
158	/*reserved*/ 0,
159	/*non_cache_seg_size*/ {0, 0, 0}
160};
161
162const static struct scsi_caching_page caching_page_changeable = {
163	/*page_code*/SMS_CACHING_PAGE,
164	/*page_length*/sizeof(struct scsi_caching_page) - 2,
165	/*flags1*/ SCP_WCE | SCP_RCD,
166	/*ret_priority*/ 0,
167	/*disable_pf_transfer_len*/ {0, 0},
168	/*min_prefetch*/ {0, 0},
169	/*max_prefetch*/ {0, 0},
170	/*max_pf_ceiling*/ {0, 0},
171	/*flags2*/ 0,
172	/*cache_segments*/ 0,
173	/*cache_seg_size*/ {0, 0},
174	/*reserved*/ 0,
175	/*non_cache_seg_size*/ {0, 0, 0}
176};
177
178const static struct scsi_control_page control_page_default = {
179	/*page_code*/SMS_CONTROL_MODE_PAGE,
180	/*page_length*/sizeof(struct scsi_control_page) - 2,
181	/*rlec*/0,
182	/*queue_flags*/SCP_QUEUE_ALG_RESTRICTED,
183	/*eca_and_aen*/0,
184	/*flags4*/SCP_TAS,
185	/*aen_holdoff_period*/{0, 0},
186	/*busy_timeout_period*/{0, 0},
187	/*extended_selftest_completion_time*/{0, 0}
188};
189
190const static struct scsi_control_page control_page_changeable = {
191	/*page_code*/SMS_CONTROL_MODE_PAGE,
192	/*page_length*/sizeof(struct scsi_control_page) - 2,
193	/*rlec*/SCP_DSENSE,
194	/*queue_flags*/SCP_QUEUE_ALG_MASK | SCP_NUAR,
195	/*eca_and_aen*/SCP_SWP,
196	/*flags4*/0,
197	/*aen_holdoff_period*/{0, 0},
198	/*busy_timeout_period*/{0, 0},
199	/*extended_selftest_completion_time*/{0, 0}
200};
201
202#define CTL_CEM_LEN	(sizeof(struct scsi_control_ext_page) - 4)
203
204const static struct scsi_control_ext_page control_ext_page_default = {
205	/*page_code*/SMS_CONTROL_MODE_PAGE | SMPH_SPF,
206	/*subpage_code*/0x01,
207	/*page_length*/{CTL_CEM_LEN >> 8, CTL_CEM_LEN},
208	/*flags*/0,
209	/*prio*/0,
210	/*max_sense*/0
211};
212
213const static struct scsi_control_ext_page control_ext_page_changeable = {
214	/*page_code*/SMS_CONTROL_MODE_PAGE | SMPH_SPF,
215	/*subpage_code*/0x01,
216	/*page_length*/{CTL_CEM_LEN >> 8, CTL_CEM_LEN},
217	/*flags*/0,
218	/*prio*/0,
219	/*max_sense*/0xff
220};
221
222const static struct scsi_info_exceptions_page ie_page_default = {
223	/*page_code*/SMS_INFO_EXCEPTIONS_PAGE,
224	/*page_length*/sizeof(struct scsi_info_exceptions_page) - 2,
225	/*info_flags*/SIEP_FLAGS_EWASC,
226	/*mrie*/SIEP_MRIE_NO,
227	/*interval_timer*/{0, 0, 0, 0},
228	/*report_count*/{0, 0, 0, 1}
229};
230
231const static struct scsi_info_exceptions_page ie_page_changeable = {
232	/*page_code*/SMS_INFO_EXCEPTIONS_PAGE,
233	/*page_length*/sizeof(struct scsi_info_exceptions_page) - 2,
234	/*info_flags*/SIEP_FLAGS_EWASC | SIEP_FLAGS_DEXCPT | SIEP_FLAGS_TEST |
235	    SIEP_FLAGS_LOGERR,
236	/*mrie*/0x0f,
237	/*interval_timer*/{0xff, 0xff, 0xff, 0xff},
238	/*report_count*/{0xff, 0xff, 0xff, 0xff}
239};
240
241#define CTL_LBPM_LEN	(sizeof(struct ctl_logical_block_provisioning_page) - 4)
242
243const static struct ctl_logical_block_provisioning_page lbp_page_default = {{
244	/*page_code*/SMS_INFO_EXCEPTIONS_PAGE | SMPH_SPF,
245	/*subpage_code*/0x02,
246	/*page_length*/{CTL_LBPM_LEN >> 8, CTL_LBPM_LEN},
247	/*flags*/0,
248	/*reserved*/{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
249	/*descr*/{}},
250	{{/*flags*/0,
251	  /*resource*/0x01,
252	  /*reserved*/{0, 0},
253	  /*count*/{0, 0, 0, 0}},
254	 {/*flags*/0,
255	  /*resource*/0x02,
256	  /*reserved*/{0, 0},
257	  /*count*/{0, 0, 0, 0}},
258	 {/*flags*/0,
259	  /*resource*/0xf1,
260	  /*reserved*/{0, 0},
261	  /*count*/{0, 0, 0, 0}},
262	 {/*flags*/0,
263	  /*resource*/0xf2,
264	  /*reserved*/{0, 0},
265	  /*count*/{0, 0, 0, 0}}
266	}
267};
268
269const static struct ctl_logical_block_provisioning_page lbp_page_changeable = {{
270	/*page_code*/SMS_INFO_EXCEPTIONS_PAGE | SMPH_SPF,
271	/*subpage_code*/0x02,
272	/*page_length*/{CTL_LBPM_LEN >> 8, CTL_LBPM_LEN},
273	/*flags*/SLBPP_SITUA,
274	/*reserved*/{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
275	/*descr*/{}},
276	{{/*flags*/0,
277	  /*resource*/0,
278	  /*reserved*/{0, 0},
279	  /*count*/{0, 0, 0, 0}},
280	 {/*flags*/0,
281	  /*resource*/0,
282	  /*reserved*/{0, 0},
283	  /*count*/{0, 0, 0, 0}},
284	 {/*flags*/0,
285	  /*resource*/0,
286	  /*reserved*/{0, 0},
287	  /*count*/{0, 0, 0, 0}},
288	 {/*flags*/0,
289	  /*resource*/0,
290	  /*reserved*/{0, 0},
291	  /*count*/{0, 0, 0, 0}}
292	}
293};
294
295const static struct scsi_cddvd_capabilities_page cddvd_page_default = {
296	/*page_code*/SMS_CDDVD_CAPS_PAGE,
297	/*page_length*/sizeof(struct scsi_cddvd_capabilities_page) - 2,
298	/*caps1*/0x3f,
299	/*caps2*/0x00,
300	/*caps3*/0xf0,
301	/*caps4*/0x00,
302	/*caps5*/0x29,
303	/*caps6*/0x00,
304	/*obsolete*/{0, 0},
305	/*nvol_levels*/{0, 0},
306	/*buffer_size*/{8, 0},
307	/*obsolete2*/{0, 0},
308	/*reserved*/0,
309	/*digital*/0,
310	/*obsolete3*/0,
311	/*copy_management*/0,
312	/*reserved2*/0,
313	/*rotation_control*/0,
314	/*cur_write_speed*/0,
315	/*num_speed_descr*/0,
316};
317
318const static struct scsi_cddvd_capabilities_page cddvd_page_changeable = {
319	/*page_code*/SMS_CDDVD_CAPS_PAGE,
320	/*page_length*/sizeof(struct scsi_cddvd_capabilities_page) - 2,
321	/*caps1*/0,
322	/*caps2*/0,
323	/*caps3*/0,
324	/*caps4*/0,
325	/*caps5*/0,
326	/*caps6*/0,
327	/*obsolete*/{0, 0},
328	/*nvol_levels*/{0, 0},
329	/*buffer_size*/{0, 0},
330	/*obsolete2*/{0, 0},
331	/*reserved*/0,
332	/*digital*/0,
333	/*obsolete3*/0,
334	/*copy_management*/0,
335	/*reserved2*/0,
336	/*rotation_control*/0,
337	/*cur_write_speed*/0,
338	/*num_speed_descr*/0,
339};
340
341SYSCTL_NODE(_kern_cam, OID_AUTO, ctl, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
342    "CAM Target Layer");
343static int worker_threads = -1;
344SYSCTL_INT(_kern_cam_ctl, OID_AUTO, worker_threads, CTLFLAG_RDTUN,
345    &worker_threads, 1, "Number of worker threads");
346static int ctl_debug = CTL_DEBUG_NONE;
347SYSCTL_INT(_kern_cam_ctl, OID_AUTO, debug, CTLFLAG_RWTUN,
348    &ctl_debug, 0, "Enabled debug flags");
349static int ctl_lun_map_size = 1024;
350SYSCTL_INT(_kern_cam_ctl, OID_AUTO, lun_map_size, CTLFLAG_RWTUN,
351    &ctl_lun_map_size, 0, "Size of per-port LUN map (max LUN + 1)");
352#ifdef  CTL_TIME_IO
353static int ctl_time_io_secs = CTL_TIME_IO_DEFAULT_SECS;
354SYSCTL_INT(_kern_cam_ctl, OID_AUTO, time_io_secs, CTLFLAG_RWTUN,
355    &ctl_time_io_secs, 0, "Log requests taking more seconds");
356#endif
357
358/*
359 * Maximum number of LUNs we support.  MUST be a power of 2.
360 */
361#define	CTL_DEFAULT_MAX_LUNS	1024
362static int ctl_max_luns = CTL_DEFAULT_MAX_LUNS;
363TUNABLE_INT("kern.cam.ctl.max_luns", &ctl_max_luns);
364SYSCTL_INT(_kern_cam_ctl, OID_AUTO, max_luns, CTLFLAG_RDTUN,
365    &ctl_max_luns, CTL_DEFAULT_MAX_LUNS, "Maximum number of LUNs");
366
367/*
368 * Maximum number of ports registered at one time.
369 */
370#define	CTL_DEFAULT_MAX_PORTS		1024
371static int ctl_max_ports = CTL_DEFAULT_MAX_PORTS;
372TUNABLE_INT("kern.cam.ctl.max_ports", &ctl_max_ports);
373SYSCTL_INT(_kern_cam_ctl, OID_AUTO, max_ports, CTLFLAG_RDTUN,
374    &ctl_max_ports, CTL_DEFAULT_MAX_LUNS, "Maximum number of ports");
375
376/*
377 * Maximum number of initiators we support.
378 */
379#define	CTL_MAX_INITIATORS	(CTL_MAX_INIT_PER_PORT * ctl_max_ports)
380
381/*
382 * Supported pages (0x00), Serial number (0x80), Device ID (0x83),
383 * Extended INQUIRY Data (0x86), Mode Page Policy (0x87),
384 * SCSI Ports (0x88), Third-party Copy (0x8F), SCSI Feature Sets (0x92),
385 * Block limits (0xB0), Block Device Characteristics (0xB1) and
386 * Logical Block Provisioning (0xB2)
387 */
388#define SCSI_EVPD_NUM_SUPPORTED_PAGES	11
389
390static void ctl_isc_event_handler(ctl_ha_channel chanel, ctl_ha_event event,
391				  int param);
392static void ctl_copy_sense_data(union ctl_ha_msg *src, union ctl_io *dest);
393static void ctl_copy_sense_data_back(union ctl_io *src, union ctl_ha_msg *dest);
394static int ctl_init(void);
395static int ctl_shutdown(void);
396static int ctl_open(struct cdev *dev, int flags, int fmt, struct thread *td);
397static int ctl_close(struct cdev *dev, int flags, int fmt, struct thread *td);
398static void ctl_serialize_other_sc_cmd(struct ctl_scsiio *ctsio);
399static void ctl_ioctl_fill_ooa(struct ctl_lun *lun, uint32_t *cur_fill_num,
400			      struct ctl_ooa *ooa_hdr,
401			      struct ctl_ooa_entry *kern_entries);
402static int ctl_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag,
403		     struct thread *td);
404static int ctl_enable_lun(struct ctl_lun *lun);
405static int ctl_disable_lun(struct ctl_lun *lun);
406static int ctl_free_lun(struct ctl_lun *lun);
407
408static int ctl_do_mode_select(union ctl_io *io);
409static int ctl_pro_preempt(struct ctl_softc *softc, struct ctl_lun *lun,
410			   uint64_t res_key, uint64_t sa_res_key,
411			   uint8_t type, uint32_t residx,
412			   struct ctl_scsiio *ctsio,
413			   struct scsi_per_res_out *cdb,
414			   struct scsi_per_res_out_parms* param);
415static void ctl_pro_preempt_other(struct ctl_lun *lun,
416				  union ctl_ha_msg *msg);
417static void ctl_hndl_per_res_out_on_other_sc(union ctl_io *io);
418static int ctl_inquiry_evpd_supported(struct ctl_scsiio *ctsio, int alloc_len);
419static int ctl_inquiry_evpd_serial(struct ctl_scsiio *ctsio, int alloc_len);
420static int ctl_inquiry_evpd_devid(struct ctl_scsiio *ctsio, int alloc_len);
421static int ctl_inquiry_evpd_eid(struct ctl_scsiio *ctsio, int alloc_len);
422static int ctl_inquiry_evpd_mpp(struct ctl_scsiio *ctsio, int alloc_len);
423static int ctl_inquiry_evpd_scsi_ports(struct ctl_scsiio *ctsio,
424					 int alloc_len);
425static int ctl_inquiry_evpd_sfs(struct ctl_scsiio *ctsio, int alloc_len);
426static int ctl_inquiry_evpd_block_limits(struct ctl_scsiio *ctsio,
427					 int alloc_len);
428static int ctl_inquiry_evpd_bdc(struct ctl_scsiio *ctsio, int alloc_len);
429static int ctl_inquiry_evpd_lbp(struct ctl_scsiio *ctsio, int alloc_len);
430static int ctl_inquiry_evpd(struct ctl_scsiio *ctsio);
431static int ctl_inquiry_std(struct ctl_scsiio *ctsio);
432static int ctl_get_lba_len(union ctl_io *io, uint64_t *lba, uint64_t *len);
433static ctl_action ctl_extent_check(union ctl_io *io1, union ctl_io *io2,
434    bool seq);
435static ctl_action ctl_seq_check(union ctl_io *io1, union ctl_io *io2);
436static ctl_action ctl_check_for_blockage(struct ctl_lun *lun,
437    union ctl_io *pending_io, const uint8_t *serialize_row,
438    union ctl_io *ooa_io);
439static ctl_action ctl_check_ooa(struct ctl_lun *lun, union ctl_io *pending_io,
440				union ctl_io **starting_io);
441static void ctl_try_unblock_io(struct ctl_lun *lun, union ctl_io *io,
442    bool skip);
443static void ctl_try_unblock_others(struct ctl_lun *lun, union ctl_io *io,
444    bool skip);
445static int ctl_scsiio_lun_check(struct ctl_lun *lun,
446				const struct ctl_cmd_entry *entry,
447				struct ctl_scsiio *ctsio);
448static void ctl_failover_lun(union ctl_io *io);
449static void ctl_scsiio_precheck(struct ctl_scsiio *ctsio);
450static int ctl_scsiio(struct ctl_scsiio *ctsio);
451static void ctl_nvmeio_precheck(struct ctl_nvmeio *ctnio);
452static int ctl_nvmeio(struct ctl_nvmeio *ctnio);
453
454static int ctl_target_reset(union ctl_io *io);
455static void ctl_do_lun_reset(struct ctl_lun *lun, uint32_t initidx,
456			 ctl_ua_type ua_type);
457static int ctl_lun_reset(union ctl_io *io);
458static int ctl_abort_task(union ctl_io *io);
459static int ctl_abort_task_set(union ctl_io *io);
460static int ctl_query_task(union ctl_io *io, int task_set);
461static void ctl_i_t_nexus_loss(struct ctl_softc *softc, uint32_t initidx,
462			      ctl_ua_type ua_type);
463static int ctl_i_t_nexus_reset(union ctl_io *io);
464static int ctl_query_async_event(union ctl_io *io);
465static void ctl_run_task(union ctl_io *io);
466#ifdef CTL_IO_DELAY
467static void ctl_datamove_timer_wakeup(void *arg);
468static void ctl_done_timer_wakeup(void *arg);
469#endif /* CTL_IO_DELAY */
470
471static void ctl_send_datamove_done(union ctl_io *io, int have_lock);
472static void ctl_datamove_remote_write_cb(struct ctl_ha_dt_req *rq);
473static int ctl_datamove_remote_dm_write_cb(union ctl_io *io, bool samethr);
474static void ctl_datamove_remote_write(union ctl_io *io);
475static int ctl_datamove_remote_dm_read_cb(union ctl_io *io, bool samethr);
476static void ctl_datamove_remote_read_cb(struct ctl_ha_dt_req *rq);
477static int ctl_datamove_remote_sgl_setup(union ctl_io *io);
478static int ctl_datamove_remote_xfer(union ctl_io *io, unsigned command,
479				    ctl_ha_dt_cb callback);
480static void ctl_datamove_remote_read(union ctl_io *io);
481static void ctl_datamove_remote(union ctl_io *io);
482static void ctl_process_done(union ctl_io *io);
483static void ctl_thresh_thread(void *arg);
484static void ctl_work_thread(void *arg);
485static void ctl_enqueue_incoming(union ctl_io *io);
486static void ctl_enqueue_rtr(union ctl_io *io);
487static void ctl_enqueue_done(union ctl_io *io);
488static void ctl_enqueue_isc(union ctl_io *io);
489static const struct ctl_cmd_entry *
490    ctl_get_cmd_entry(struct ctl_scsiio *ctsio, int *sa);
491static const struct ctl_cmd_entry *
492    ctl_validate_command(struct ctl_scsiio *ctsio);
493static int ctl_cmd_applicable(uint8_t lun_type,
494    const struct ctl_cmd_entry *entry);
495static int ctl_ha_init(void);
496static int ctl_ha_shutdown(void);
497
498static uint64_t ctl_get_prkey(struct ctl_lun *lun, uint32_t residx);
499static void ctl_clr_prkey(struct ctl_lun *lun, uint32_t residx);
500static void ctl_alloc_prkey(struct ctl_lun *lun, uint32_t residx);
501static void ctl_set_prkey(struct ctl_lun *lun, uint32_t residx, uint64_t key);
502
503/*
504 * Load the serialization table.  This isn't very pretty, but is probably
505 * the easiest way to do it.
506 */
507#include "ctl_ser_table.c"
508
509/*
510 * We only need to define open, close and ioctl routines for this driver.
511 */
512static struct cdevsw ctl_cdevsw = {
513	.d_version =	D_VERSION,
514	.d_flags =	0,
515	.d_open =	ctl_open,
516	.d_close =	ctl_close,
517	.d_ioctl =	ctl_ioctl,
518	.d_name =	"ctl",
519};
520
521MALLOC_DEFINE(M_CTL, "ctlmem", "Memory used for CTL");
522
523static int ctl_module_event_handler(module_t, int /*modeventtype_t*/, void *);
524
525static moduledata_t ctl_moduledata = {
526	"ctl",
527	ctl_module_event_handler,
528	NULL
529};
530
531DECLARE_MODULE(ctl, ctl_moduledata, SI_SUB_CONFIGURE, SI_ORDER_THIRD);
532MODULE_VERSION(ctl, 1);
533
534static void
535ctl_be_move_done(union ctl_io *io, bool samethr)
536{
537	switch (io->io_hdr.io_type) {
538	case CTL_IO_SCSI:
539		io->scsiio.be_move_done(io, samethr);
540		break;
541	case CTL_IO_NVME:
542	case CTL_IO_NVME_ADMIN:
543		io->nvmeio.be_move_done(io, samethr);
544		break;
545	default:
546		__assert_unreachable();
547	}
548}
549
550static void
551ctl_continue_io(union ctl_io *io)
552{
553	switch (io->io_hdr.io_type) {
554	case CTL_IO_SCSI:
555		io->scsiio.io_cont(io);
556		break;
557	case CTL_IO_NVME:
558	case CTL_IO_NVME_ADMIN:
559		io->nvmeio.io_cont(io);
560		break;
561	default:
562		__assert_unreachable();
563	}
564}
565
566static struct ctl_frontend ha_frontend =
567{
568	.name = "ha",
569	.init = ctl_ha_init,
570	.shutdown = ctl_ha_shutdown,
571};
572
573static int
574ctl_ha_init(void)
575{
576	struct ctl_softc *softc = control_softc;
577
578	if (ctl_pool_create(softc, "othersc", CTL_POOL_ENTRIES_OTHER_SC,
579	                    &softc->othersc_pool) != 0)
580		return (ENOMEM);
581	if (ctl_ha_msg_init(softc) != CTL_HA_STATUS_SUCCESS) {
582		ctl_pool_free(softc->othersc_pool);
583		return (EIO);
584	}
585	if (ctl_ha_msg_register(CTL_HA_CHAN_CTL, ctl_isc_event_handler)
586	    != CTL_HA_STATUS_SUCCESS) {
587		ctl_ha_msg_destroy(softc);
588		ctl_pool_free(softc->othersc_pool);
589		return (EIO);
590	}
591	return (0);
592};
593
594static int
595ctl_ha_shutdown(void)
596{
597	struct ctl_softc *softc = control_softc;
598	struct ctl_port *port;
599
600	ctl_ha_msg_shutdown(softc);
601	if (ctl_ha_msg_deregister(CTL_HA_CHAN_CTL) != CTL_HA_STATUS_SUCCESS)
602		return (EIO);
603	if (ctl_ha_msg_destroy(softc) != CTL_HA_STATUS_SUCCESS)
604		return (EIO);
605	ctl_pool_free(softc->othersc_pool);
606	while ((port = STAILQ_FIRST(&ha_frontend.port_list)) != NULL) {
607		ctl_port_deregister(port);
608		free(port->port_name, M_CTL);
609		free(port, M_CTL);
610	}
611	return (0);
612};
613
614static void
615ctl_ha_datamove(union ctl_io *io)
616{
617	struct ctl_lun *lun = CTL_LUN(io);
618	struct ctl_sg_entry *sgl;
619	union ctl_ha_msg msg;
620	uint32_t sg_entries_sent;
621	int do_sg_copy, i, j;
622
623	CTL_IO_ASSERT(io, SCSI);
624
625	memset(&msg.dt, 0, sizeof(msg.dt));
626	msg.hdr.msg_type = CTL_MSG_DATAMOVE;
627	msg.hdr.original_sc = io->io_hdr.remote_io;
628	msg.hdr.serializing_sc = io;
629	msg.hdr.nexus = io->io_hdr.nexus;
630	msg.hdr.status = io->io_hdr.status;
631	msg.dt.flags = io->io_hdr.flags;
632
633	/*
634	 * We convert everything into a S/G list here.  We can't
635	 * pass by reference, only by value between controllers.
636	 * So we can't pass a pointer to the S/G list, only as many
637	 * S/G entries as we can fit in here.  If it's possible for
638	 * us to get more than CTL_HA_MAX_SG_ENTRIES S/G entries,
639	 * then we need to break this up into multiple transfers.
640	 */
641	if (ctl_kern_sg_entries(io) == 0) {
642		msg.dt.kern_sg_entries = 1;
643#if 0
644		if (io->io_hdr.flags & CTL_FLAG_BUS_ADDR) {
645			msg.dt.sg_list[0].addr = ctl_kern_data_ptr(io);
646		} else {
647			/* XXX KDM use busdma here! */
648			msg.dt.sg_list[0].addr =
649			    (void *)vtophys(ctl_kern_data_ptr(io));
650		}
651#else
652		KASSERT((io->io_hdr.flags & CTL_FLAG_BUS_ADDR) == 0,
653		    ("HA does not support BUS_ADDR"));
654		msg.dt.sg_list[0].addr = ctl_kern_data_ptr(io);
655#endif
656		msg.dt.sg_list[0].len = ctl_kern_data_len(io);
657		do_sg_copy = 0;
658	} else {
659		msg.dt.kern_sg_entries = ctl_kern_sg_entries(io);
660		do_sg_copy = 1;
661	}
662
663	msg.dt.kern_data_len = ctl_kern_data_len(io);
664	msg.dt.kern_total_len = ctl_kern_total_len(io);
665	msg.dt.kern_data_resid = ctl_kern_data_resid(io);
666	msg.dt.kern_rel_offset = ctl_kern_rel_offset(io);
667	msg.dt.sg_sequence = 0;
668
669	/*
670	 * Loop until we've sent all of the S/G entries.  On the
671	 * other end, we'll recompose these S/G entries into one
672	 * contiguous list before processing.
673	 */
674	for (sg_entries_sent = 0; sg_entries_sent < msg.dt.kern_sg_entries;
675	    msg.dt.sg_sequence++) {
676		msg.dt.cur_sg_entries = MIN((sizeof(msg.dt.sg_list) /
677		    sizeof(msg.dt.sg_list[0])),
678		    msg.dt.kern_sg_entries - sg_entries_sent);
679		if (do_sg_copy != 0) {
680			sgl = (struct ctl_sg_entry *)ctl_kern_data_ptr(io);
681			for (i = sg_entries_sent, j = 0;
682			     i < msg.dt.cur_sg_entries; i++, j++) {
683#if 0
684				if (io->io_hdr.flags & CTL_FLAG_BUS_ADDR) {
685					msg.dt.sg_list[j].addr = sgl[i].addr;
686				} else {
687					/* XXX KDM use busdma here! */
688					msg.dt.sg_list[j].addr =
689					    (void *)vtophys(sgl[i].addr);
690				}
691#else
692				KASSERT((io->io_hdr.flags &
693				    CTL_FLAG_BUS_ADDR) == 0,
694				    ("HA does not support BUS_ADDR"));
695				msg.dt.sg_list[j].addr = sgl[i].addr;
696#endif
697				msg.dt.sg_list[j].len = sgl[i].len;
698			}
699		}
700
701		sg_entries_sent += msg.dt.cur_sg_entries;
702		msg.dt.sg_last = (sg_entries_sent >= msg.dt.kern_sg_entries);
703		if (ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg,
704		    sizeof(msg.dt) - sizeof(msg.dt.sg_list) +
705		    sizeof(struct ctl_sg_entry) * msg.dt.cur_sg_entries,
706		    M_WAITOK) > CTL_HA_STATUS_SUCCESS) {
707			io->io_hdr.port_status = 31341;
708			ctl_datamove_done(io, true);
709			return;
710		}
711		msg.dt.sent_sg_entries = sg_entries_sent;
712	}
713
714	/*
715	 * Officially handover the request from us to peer.
716	 * If failover has just happened, then we must return error.
717	 * If failover happen just after, then it is not our problem.
718	 */
719	if (lun)
720		mtx_lock(&lun->lun_lock);
721	if (io->io_hdr.flags & CTL_FLAG_FAILOVER) {
722		if (lun)
723			mtx_unlock(&lun->lun_lock);
724		io->io_hdr.port_status = 31342;
725		ctl_datamove_done(io, true);
726		return;
727	}
728	io->io_hdr.flags &= ~CTL_FLAG_IO_ACTIVE;
729	io->io_hdr.flags |= CTL_FLAG_DMA_INPROG;
730	if (lun)
731		mtx_unlock(&lun->lun_lock);
732}
733
734static void
735ctl_ha_done(union ctl_io *io)
736{
737	union ctl_ha_msg msg;
738
739	if (io->io_hdr.io_type == CTL_IO_SCSI) {
740		memset(&msg, 0, sizeof(msg));
741		msg.hdr.msg_type = CTL_MSG_FINISH_IO;
742		msg.hdr.original_sc = io->io_hdr.remote_io;
743		msg.hdr.nexus = io->io_hdr.nexus;
744		msg.hdr.status = io->io_hdr.status;
745		msg.scsi.scsi_status = io->scsiio.scsi_status;
746		msg.scsi.tag_num = io->scsiio.tag_num;
747		msg.scsi.tag_type = io->scsiio.tag_type;
748		msg.scsi.sense_len = io->scsiio.sense_len;
749		memcpy(&msg.scsi.sense_data, &io->scsiio.sense_data,
750		    io->scsiio.sense_len);
751		ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg,
752		    sizeof(msg.scsi) - sizeof(msg.scsi.sense_data) +
753		    msg.scsi.sense_len, M_WAITOK);
754	}
755	ctl_free_io(io);
756}
757
758static void
759ctl_isc_handler_finish_xfer(struct ctl_softc *ctl_softc,
760			    union ctl_ha_msg *msg_info)
761{
762	struct ctl_scsiio *ctsio;
763
764	if (msg_info->hdr.original_sc == NULL) {
765		printf("%s: original_sc == NULL!\n", __func__);
766		/* XXX KDM now what? */
767		return;
768	}
769
770	ctsio = &msg_info->hdr.original_sc->scsiio;
771	ctsio->io_hdr.flags &= ~CTL_FLAG_SENT_2OTHER_SC;
772	ctsio->io_hdr.flags |= CTL_FLAG_IO_ACTIVE;
773	ctsio->io_hdr.msg_type = CTL_MSG_FINISH_IO;
774	ctsio->io_hdr.status = msg_info->hdr.status;
775	ctsio->scsi_status = msg_info->scsi.scsi_status;
776	ctsio->sense_len = msg_info->scsi.sense_len;
777	memcpy(&ctsio->sense_data, &msg_info->scsi.sense_data,
778	       msg_info->scsi.sense_len);
779	ctl_enqueue_isc((union ctl_io *)ctsio);
780}
781
782static void
783ctl_isc_handler_finish_ser_only(struct ctl_softc *ctl_softc,
784				union ctl_ha_msg *msg_info)
785{
786	struct ctl_scsiio *ctsio;
787
788	if (msg_info->hdr.serializing_sc == NULL) {
789		printf("%s: serializing_sc == NULL!\n", __func__);
790		/* XXX KDM now what? */
791		return;
792	}
793
794	ctsio = &msg_info->hdr.serializing_sc->scsiio;
795	ctsio->io_hdr.msg_type = CTL_MSG_FINISH_IO;
796	ctl_enqueue_isc((union ctl_io *)ctsio);
797}
798
799void
800ctl_isc_announce_lun(struct ctl_lun *lun)
801{
802	struct ctl_softc *softc = lun->ctl_softc;
803	union ctl_ha_msg *msg;
804	struct ctl_ha_msg_lun_pr_key pr_key;
805	int i, k;
806
807	if (softc->ha_link != CTL_HA_LINK_ONLINE)
808		return;
809	mtx_lock(&lun->lun_lock);
810	i = sizeof(msg->lun);
811	if (lun->lun_devid)
812		i += lun->lun_devid->len;
813	i += sizeof(pr_key) * lun->pr_key_count;
814alloc:
815	mtx_unlock(&lun->lun_lock);
816	msg = malloc(i, M_CTL, M_WAITOK);
817	mtx_lock(&lun->lun_lock);
818	k = sizeof(msg->lun);
819	if (lun->lun_devid)
820		k += lun->lun_devid->len;
821	k += sizeof(pr_key) * lun->pr_key_count;
822	if (i < k) {
823		free(msg, M_CTL);
824		i = k;
825		goto alloc;
826	}
827	bzero(&msg->lun, sizeof(msg->lun));
828	msg->hdr.msg_type = CTL_MSG_LUN_SYNC;
829	msg->hdr.nexus.targ_lun = lun->lun;
830	msg->hdr.nexus.targ_mapped_lun = lun->lun;
831	msg->lun.flags = lun->flags;
832	msg->lun.pr_generation = lun->pr_generation;
833	msg->lun.pr_res_idx = lun->pr_res_idx;
834	msg->lun.pr_res_type = lun->pr_res_type;
835	msg->lun.pr_key_count = lun->pr_key_count;
836	i = 0;
837	if (lun->lun_devid) {
838		msg->lun.lun_devid_len = lun->lun_devid->len;
839		memcpy(&msg->lun.data[i], lun->lun_devid->data,
840		    msg->lun.lun_devid_len);
841		i += msg->lun.lun_devid_len;
842	}
843	for (k = 0; k < CTL_MAX_INITIATORS; k++) {
844		if ((pr_key.pr_key = ctl_get_prkey(lun, k)) == 0)
845			continue;
846		pr_key.pr_iid = k;
847		memcpy(&msg->lun.data[i], &pr_key, sizeof(pr_key));
848		i += sizeof(pr_key);
849	}
850	mtx_unlock(&lun->lun_lock);
851	ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg->lun, sizeof(msg->lun) + i,
852	    M_WAITOK);
853	free(msg, M_CTL);
854
855	if (lun->flags & CTL_LUN_PRIMARY_SC) {
856		for (i = 0; i < CTL_NUM_MODE_PAGES; i++) {
857			ctl_isc_announce_mode(lun, -1,
858			    lun->mode_pages.index[i].page_code & SMPH_PC_MASK,
859			    lun->mode_pages.index[i].subpage);
860		}
861	}
862}
863
864void
865ctl_isc_announce_port(struct ctl_port *port)
866{
867	struct ctl_softc *softc = port->ctl_softc;
868	union ctl_ha_msg *msg;
869	int i;
870
871	if (port->targ_port < softc->port_min ||
872	    port->targ_port >= softc->port_max ||
873	    softc->ha_link != CTL_HA_LINK_ONLINE)
874		return;
875	i = sizeof(msg->port) + strlen(port->port_name) + 1;
876	if (port->lun_map)
877		i += port->lun_map_size * sizeof(uint32_t);
878	if (port->port_devid)
879		i += port->port_devid->len;
880	if (port->target_devid)
881		i += port->target_devid->len;
882	if (port->init_devid)
883		i += port->init_devid->len;
884	msg = malloc(i, M_CTL, M_WAITOK);
885	bzero(&msg->port, sizeof(msg->port));
886	msg->hdr.msg_type = CTL_MSG_PORT_SYNC;
887	msg->hdr.nexus.targ_port = port->targ_port;
888	msg->port.port_type = port->port_type;
889	msg->port.physical_port = port->physical_port;
890	msg->port.virtual_port = port->virtual_port;
891	msg->port.status = port->status;
892	i = 0;
893	msg->port.name_len = sprintf(&msg->port.data[i],
894	    "%d:%s", softc->ha_id, port->port_name) + 1;
895	i += msg->port.name_len;
896	if (port->lun_map) {
897		msg->port.lun_map_len = port->lun_map_size * sizeof(uint32_t);
898		memcpy(&msg->port.data[i], port->lun_map,
899		    msg->port.lun_map_len);
900		i += msg->port.lun_map_len;
901	}
902	if (port->port_devid) {
903		msg->port.port_devid_len = port->port_devid->len;
904		memcpy(&msg->port.data[i], port->port_devid->data,
905		    msg->port.port_devid_len);
906		i += msg->port.port_devid_len;
907	}
908	if (port->target_devid) {
909		msg->port.target_devid_len = port->target_devid->len;
910		memcpy(&msg->port.data[i], port->target_devid->data,
911		    msg->port.target_devid_len);
912		i += msg->port.target_devid_len;
913	}
914	if (port->init_devid) {
915		msg->port.init_devid_len = port->init_devid->len;
916		memcpy(&msg->port.data[i], port->init_devid->data,
917		    msg->port.init_devid_len);
918		i += msg->port.init_devid_len;
919	}
920	ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg->port, sizeof(msg->port) + i,
921	    M_WAITOK);
922	free(msg, M_CTL);
923}
924
925void
926ctl_isc_announce_iid(struct ctl_port *port, int iid)
927{
928	struct ctl_softc *softc = port->ctl_softc;
929	union ctl_ha_msg *msg;
930	int i, l;
931
932	if (port->targ_port < softc->port_min ||
933	    port->targ_port >= softc->port_max ||
934	    softc->ha_link != CTL_HA_LINK_ONLINE)
935		return;
936	mtx_lock(&softc->ctl_lock);
937	i = sizeof(msg->iid);
938	l = 0;
939	if (port->wwpn_iid[iid].name)
940		l = strlen(port->wwpn_iid[iid].name) + 1;
941	i += l;
942	msg = malloc(i, M_CTL, M_NOWAIT);
943	if (msg == NULL) {
944		mtx_unlock(&softc->ctl_lock);
945		return;
946	}
947	bzero(&msg->iid, sizeof(msg->iid));
948	msg->hdr.msg_type = CTL_MSG_IID_SYNC;
949	msg->hdr.nexus.targ_port = port->targ_port;
950	msg->hdr.nexus.initid = iid;
951	msg->iid.in_use = port->wwpn_iid[iid].in_use;
952	msg->iid.name_len = l;
953	msg->iid.wwpn = port->wwpn_iid[iid].wwpn;
954	if (port->wwpn_iid[iid].name)
955		strlcpy(msg->iid.data, port->wwpn_iid[iid].name, l);
956	mtx_unlock(&softc->ctl_lock);
957	ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg->iid, i, M_NOWAIT);
958	free(msg, M_CTL);
959}
960
961void
962ctl_isc_announce_mode(struct ctl_lun *lun, uint32_t initidx,
963    uint8_t page, uint8_t subpage)
964{
965	struct ctl_softc *softc = lun->ctl_softc;
966	union ctl_ha_msg *msg;
967	u_int i, l;
968
969	if (softc->ha_link != CTL_HA_LINK_ONLINE)
970		return;
971	for (i = 0; i < CTL_NUM_MODE_PAGES; i++) {
972		if ((lun->mode_pages.index[i].page_code & SMPH_PC_MASK) ==
973		    page && lun->mode_pages.index[i].subpage == subpage)
974			break;
975	}
976	if (i == CTL_NUM_MODE_PAGES)
977		return;
978
979	/* Don't try to replicate pages not present on this device. */
980	if (lun->mode_pages.index[i].page_data == NULL)
981		return;
982
983	l = sizeof(msg->mode) + lun->mode_pages.index[i].page_len;
984	msg = malloc(l, M_CTL, M_WAITOK | M_ZERO);
985	msg->hdr.msg_type = CTL_MSG_MODE_SYNC;
986	msg->hdr.nexus.targ_port = initidx / CTL_MAX_INIT_PER_PORT;
987	msg->hdr.nexus.initid = initidx % CTL_MAX_INIT_PER_PORT;
988	msg->hdr.nexus.targ_lun = lun->lun;
989	msg->hdr.nexus.targ_mapped_lun = lun->lun;
990	msg->mode.page_code = page;
991	msg->mode.subpage = subpage;
992	msg->mode.page_len = lun->mode_pages.index[i].page_len;
993	memcpy(msg->mode.data, lun->mode_pages.index[i].page_data,
994	    msg->mode.page_len);
995	ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg->mode, l, M_WAITOK);
996	free(msg, M_CTL);
997}
998
999static void
1000ctl_isc_ha_link_up(struct ctl_softc *softc)
1001{
1002	struct ctl_port *port;
1003	struct ctl_lun *lun;
1004	union ctl_ha_msg msg;
1005	int i;
1006
1007	/* Announce this node parameters to peer for validation. */
1008	msg.login.msg_type = CTL_MSG_LOGIN;
1009	msg.login.version = CTL_HA_VERSION;
1010	msg.login.ha_mode = softc->ha_mode;
1011	msg.login.ha_id = softc->ha_id;
1012	msg.login.max_luns = ctl_max_luns;
1013	msg.login.max_ports = ctl_max_ports;
1014	msg.login.max_init_per_port = CTL_MAX_INIT_PER_PORT;
1015	ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg.login, sizeof(msg.login),
1016	    M_WAITOK);
1017
1018	STAILQ_FOREACH(port, &softc->port_list, links) {
1019		ctl_isc_announce_port(port);
1020		for (i = 0; i < CTL_MAX_INIT_PER_PORT; i++) {
1021			if (port->wwpn_iid[i].in_use)
1022				ctl_isc_announce_iid(port, i);
1023		}
1024	}
1025	STAILQ_FOREACH(lun, &softc->lun_list, links)
1026		ctl_isc_announce_lun(lun);
1027}
1028
1029static void
1030ctl_isc_ha_link_down(struct ctl_softc *softc)
1031{
1032	struct ctl_port *port;
1033	struct ctl_lun *lun;
1034	union ctl_io *io;
1035	int i;
1036
1037	mtx_lock(&softc->ctl_lock);
1038	STAILQ_FOREACH(lun, &softc->lun_list, links) {
1039		mtx_lock(&lun->lun_lock);
1040		if (lun->flags & CTL_LUN_PEER_SC_PRIMARY) {
1041			lun->flags &= ~CTL_LUN_PEER_SC_PRIMARY;
1042			ctl_est_ua_all(lun, -1, CTL_UA_ASYM_ACC_CHANGE);
1043		}
1044		mtx_unlock(&lun->lun_lock);
1045
1046		mtx_unlock(&softc->ctl_lock);
1047		io = ctl_alloc_io(softc->othersc_pool);
1048		mtx_lock(&softc->ctl_lock);
1049		ctl_zero_io(io);
1050		io->io_hdr.msg_type = CTL_MSG_FAILOVER;
1051		io->io_hdr.nexus.targ_mapped_lun = lun->lun;
1052		ctl_enqueue_isc(io);
1053	}
1054
1055	STAILQ_FOREACH(port, &softc->port_list, links) {
1056		if (port->targ_port >= softc->port_min &&
1057		    port->targ_port < softc->port_max)
1058			continue;
1059		port->status &= ~CTL_PORT_STATUS_ONLINE;
1060		for (i = 0; i < CTL_MAX_INIT_PER_PORT; i++) {
1061			port->wwpn_iid[i].in_use = 0;
1062			free(port->wwpn_iid[i].name, M_CTL);
1063			port->wwpn_iid[i].name = NULL;
1064		}
1065	}
1066	mtx_unlock(&softc->ctl_lock);
1067}
1068
1069static void
1070ctl_isc_ua(struct ctl_softc *softc, union ctl_ha_msg *msg, int len)
1071{
1072	struct ctl_lun *lun;
1073	uint32_t iid;
1074
1075	if (len < sizeof(msg->ua)) {
1076		printf("%s: Received truncated message %d < %zu\n",
1077		    __func__, len, sizeof(msg->ua));
1078		ctl_ha_msg_abort(CTL_HA_CHAN_CTL);
1079		return;
1080	}
1081
1082	mtx_lock(&softc->ctl_lock);
1083	if (msg->hdr.nexus.targ_mapped_lun >= ctl_max_luns ||
1084	    (lun = softc->ctl_luns[msg->hdr.nexus.targ_mapped_lun]) == NULL) {
1085		mtx_unlock(&softc->ctl_lock);
1086		return;
1087	}
1088	mtx_lock(&lun->lun_lock);
1089	mtx_unlock(&softc->ctl_lock);
1090	if (msg->ua.ua_type == CTL_UA_THIN_PROV_THRES && msg->ua.ua_set)
1091		memcpy(lun->ua_tpt_info, msg->ua.ua_info, 8);
1092	iid = ctl_get_initindex(&msg->hdr.nexus);
1093	if (msg->ua.ua_all) {
1094		if (msg->ua.ua_set)
1095			ctl_est_ua_all(lun, iid, msg->ua.ua_type);
1096		else
1097			ctl_clr_ua_all(lun, iid, msg->ua.ua_type);
1098	} else {
1099		if (msg->ua.ua_set)
1100			ctl_est_ua(lun, iid, msg->ua.ua_type);
1101		else
1102			ctl_clr_ua(lun, iid, msg->ua.ua_type);
1103	}
1104	mtx_unlock(&lun->lun_lock);
1105}
1106
1107static void
1108ctl_isc_lun_sync(struct ctl_softc *softc, union ctl_ha_msg *msg, int len)
1109{
1110	struct ctl_lun *lun;
1111	struct ctl_ha_msg_lun_pr_key pr_key;
1112	int i, k;
1113	ctl_lun_flags oflags;
1114	uint32_t targ_lun;
1115
1116	if (len < offsetof(struct ctl_ha_msg_lun, data[0])) {
1117		printf("%s: Received truncated message %d < %zu\n",
1118		    __func__, len, offsetof(struct ctl_ha_msg_lun, data[0]));
1119		ctl_ha_msg_abort(CTL_HA_CHAN_CTL);
1120		return;
1121	}
1122	i = msg->lun.lun_devid_len + msg->lun.pr_key_count * sizeof(pr_key);
1123	if (len < offsetof(struct ctl_ha_msg_lun, data[i])) {
1124		printf("%s: Received truncated message data %d < %zu\n",
1125		    __func__, len, offsetof(struct ctl_ha_msg_lun, data[i]));
1126		ctl_ha_msg_abort(CTL_HA_CHAN_CTL);
1127		return;
1128	}
1129
1130	targ_lun = msg->hdr.nexus.targ_mapped_lun;
1131	mtx_lock(&softc->ctl_lock);
1132	if (targ_lun >= ctl_max_luns ||
1133	    (lun = softc->ctl_luns[targ_lun]) == NULL) {
1134		mtx_unlock(&softc->ctl_lock);
1135		return;
1136	}
1137	mtx_lock(&lun->lun_lock);
1138	mtx_unlock(&softc->ctl_lock);
1139	if (lun->flags & CTL_LUN_DISABLED) {
1140		mtx_unlock(&lun->lun_lock);
1141		return;
1142	}
1143	i = (lun->lun_devid != NULL) ? lun->lun_devid->len : 0;
1144	if (msg->lun.lun_devid_len != i || (i > 0 &&
1145	    memcmp(&msg->lun.data[0], lun->lun_devid->data, i) != 0)) {
1146		mtx_unlock(&lun->lun_lock);
1147		printf("%s: Received conflicting HA LUN %d\n",
1148		    __func__, targ_lun);
1149		return;
1150	} else {
1151		/* Record whether peer is primary. */
1152		oflags = lun->flags;
1153		if ((msg->lun.flags & CTL_LUN_PRIMARY_SC) &&
1154		    (msg->lun.flags & CTL_LUN_DISABLED) == 0)
1155			lun->flags |= CTL_LUN_PEER_SC_PRIMARY;
1156		else
1157			lun->flags &= ~CTL_LUN_PEER_SC_PRIMARY;
1158		if (oflags != lun->flags)
1159			ctl_est_ua_all(lun, -1, CTL_UA_ASYM_ACC_CHANGE);
1160
1161		/* If peer is primary and we are not -- use data */
1162		if ((lun->flags & CTL_LUN_PRIMARY_SC) == 0 &&
1163		    (lun->flags & CTL_LUN_PEER_SC_PRIMARY)) {
1164			lun->pr_generation = msg->lun.pr_generation;
1165			lun->pr_res_idx = msg->lun.pr_res_idx;
1166			lun->pr_res_type = msg->lun.pr_res_type;
1167			lun->pr_key_count = msg->lun.pr_key_count;
1168			for (k = 0; k < CTL_MAX_INITIATORS; k++)
1169				ctl_clr_prkey(lun, k);
1170			for (k = 0; k < msg->lun.pr_key_count; k++) {
1171				memcpy(&pr_key, &msg->lun.data[i],
1172				    sizeof(pr_key));
1173				ctl_alloc_prkey(lun, pr_key.pr_iid);
1174				ctl_set_prkey(lun, pr_key.pr_iid,
1175				    pr_key.pr_key);
1176				i += sizeof(pr_key);
1177			}
1178		}
1179
1180		mtx_unlock(&lun->lun_lock);
1181		CTL_DEBUG_PRINT(("%s: Known LUN %d, peer is %s\n",
1182		    __func__, targ_lun,
1183		    (msg->lun.flags & CTL_LUN_PRIMARY_SC) ?
1184		    "primary" : "secondary"));
1185
1186		/* If we are primary but peer doesn't know -- notify */
1187		if ((lun->flags & CTL_LUN_PRIMARY_SC) &&
1188		    (msg->lun.flags & CTL_LUN_PEER_SC_PRIMARY) == 0)
1189			ctl_isc_announce_lun(lun);
1190	}
1191}
1192
1193static void
1194ctl_isc_port_sync(struct ctl_softc *softc, union ctl_ha_msg *msg, int len)
1195{
1196	struct ctl_port *port;
1197	struct ctl_lun *lun;
1198	int i, new;
1199
1200	if (len < offsetof(struct ctl_ha_msg_port, data[0])) {
1201		printf("%s: Received truncated message %d < %zu\n",
1202		    __func__, len, offsetof(struct ctl_ha_msg_port, data[0]));
1203		ctl_ha_msg_abort(CTL_HA_CHAN_CTL);
1204		return;
1205	}
1206	i = msg->port.name_len + msg->port.lun_map_len +
1207	    msg->port.port_devid_len + msg->port.target_devid_len +
1208	    msg->port.init_devid_len;
1209	if (len < offsetof(struct ctl_ha_msg_port, data[i])) {
1210		printf("%s: Received truncated message data %d < %zu\n",
1211		    __func__, len, offsetof(struct ctl_ha_msg_port, data[i]));
1212		ctl_ha_msg_abort(CTL_HA_CHAN_CTL);
1213		return;
1214	}
1215
1216	port = softc->ctl_ports[msg->hdr.nexus.targ_port];
1217	if (port == NULL) {
1218		CTL_DEBUG_PRINT(("%s: New port %d\n", __func__,
1219		    msg->hdr.nexus.targ_port));
1220		new = 1;
1221		port = malloc(sizeof(*port), M_CTL, M_WAITOK | M_ZERO);
1222		port->frontend = &ha_frontend;
1223		port->targ_port = msg->hdr.nexus.targ_port;
1224		port->fe_datamove = ctl_ha_datamove;
1225		port->fe_done = ctl_ha_done;
1226	} else if (port->frontend == &ha_frontend) {
1227		CTL_DEBUG_PRINT(("%s: Updated port %d\n", __func__,
1228		    msg->hdr.nexus.targ_port));
1229		new = 0;
1230	} else {
1231		printf("%s: Received conflicting HA port %d\n",
1232		    __func__, msg->hdr.nexus.targ_port);
1233		return;
1234	}
1235	port->port_type = msg->port.port_type;
1236	port->physical_port = msg->port.physical_port;
1237	port->virtual_port = msg->port.virtual_port;
1238	port->status = msg->port.status;
1239	i = 0;
1240	free(port->port_name, M_CTL);
1241	port->port_name = strndup(&msg->port.data[i], msg->port.name_len,
1242	    M_CTL);
1243	i += msg->port.name_len;
1244	if (msg->port.lun_map_len != 0) {
1245		if (port->lun_map == NULL ||
1246		    port->lun_map_size * sizeof(uint32_t) <
1247		    msg->port.lun_map_len) {
1248			port->lun_map_size = 0;
1249			free(port->lun_map, M_CTL);
1250			port->lun_map = malloc(msg->port.lun_map_len,
1251			    M_CTL, M_WAITOK);
1252		}
1253		memcpy(port->lun_map, &msg->port.data[i], msg->port.lun_map_len);
1254		port->lun_map_size = msg->port.lun_map_len / sizeof(uint32_t);
1255		i += msg->port.lun_map_len;
1256	} else {
1257		port->lun_map_size = 0;
1258		free(port->lun_map, M_CTL);
1259		port->lun_map = NULL;
1260	}
1261	if (msg->port.port_devid_len != 0) {
1262		if (port->port_devid == NULL ||
1263		    port->port_devid->len < msg->port.port_devid_len) {
1264			free(port->port_devid, M_CTL);
1265			port->port_devid = malloc(sizeof(struct ctl_devid) +
1266			    msg->port.port_devid_len, M_CTL, M_WAITOK);
1267		}
1268		memcpy(port->port_devid->data, &msg->port.data[i],
1269		    msg->port.port_devid_len);
1270		port->port_devid->len = msg->port.port_devid_len;
1271		i += msg->port.port_devid_len;
1272	} else {
1273		free(port->port_devid, M_CTL);
1274		port->port_devid = NULL;
1275	}
1276	if (msg->port.target_devid_len != 0) {
1277		if (port->target_devid == NULL ||
1278		    port->target_devid->len < msg->port.target_devid_len) {
1279			free(port->target_devid, M_CTL);
1280			port->target_devid = malloc(sizeof(struct ctl_devid) +
1281			    msg->port.target_devid_len, M_CTL, M_WAITOK);
1282		}
1283		memcpy(port->target_devid->data, &msg->port.data[i],
1284		    msg->port.target_devid_len);
1285		port->target_devid->len = msg->port.target_devid_len;
1286		i += msg->port.target_devid_len;
1287	} else {
1288		free(port->target_devid, M_CTL);
1289		port->target_devid = NULL;
1290	}
1291	if (msg->port.init_devid_len != 0) {
1292		if (port->init_devid == NULL ||
1293		    port->init_devid->len < msg->port.init_devid_len) {
1294			free(port->init_devid, M_CTL);
1295			port->init_devid = malloc(sizeof(struct ctl_devid) +
1296			    msg->port.init_devid_len, M_CTL, M_WAITOK);
1297		}
1298		memcpy(port->init_devid->data, &msg->port.data[i],
1299		    msg->port.init_devid_len);
1300		port->init_devid->len = msg->port.init_devid_len;
1301		i += msg->port.init_devid_len;
1302	} else {
1303		free(port->init_devid, M_CTL);
1304		port->init_devid = NULL;
1305	}
1306	if (new) {
1307		if (ctl_port_register(port) != 0) {
1308			printf("%s: ctl_port_register() failed with error\n",
1309			    __func__);
1310		}
1311	}
1312	mtx_lock(&softc->ctl_lock);
1313	STAILQ_FOREACH(lun, &softc->lun_list, links) {
1314		if (ctl_lun_map_to_port(port, lun->lun) == UINT32_MAX)
1315			continue;
1316		mtx_lock(&lun->lun_lock);
1317		ctl_est_ua_all(lun, -1, CTL_UA_INQ_CHANGE);
1318		mtx_unlock(&lun->lun_lock);
1319	}
1320	mtx_unlock(&softc->ctl_lock);
1321}
1322
1323static void
1324ctl_isc_iid_sync(struct ctl_softc *softc, union ctl_ha_msg *msg, int len)
1325{
1326	struct ctl_port *port;
1327	int i, iid;
1328
1329	if (len < offsetof(struct ctl_ha_msg_iid, data[0])) {
1330		printf("%s: Received truncated message %d < %zu\n",
1331		    __func__, len, offsetof(struct ctl_ha_msg_iid, data[0]));
1332		ctl_ha_msg_abort(CTL_HA_CHAN_CTL);
1333		return;
1334	}
1335	i = msg->iid.name_len;
1336	if (len < offsetof(struct ctl_ha_msg_iid, data[i])) {
1337		printf("%s: Received truncated message data %d < %zu\n",
1338		    __func__, len, offsetof(struct ctl_ha_msg_iid, data[i]));
1339		ctl_ha_msg_abort(CTL_HA_CHAN_CTL);
1340		return;
1341	}
1342
1343	port = softc->ctl_ports[msg->hdr.nexus.targ_port];
1344	if (port == NULL) {
1345		printf("%s: Received IID for unknown port %d\n",
1346		    __func__, msg->hdr.nexus.targ_port);
1347		return;
1348	}
1349	iid = msg->hdr.nexus.initid;
1350	if (port->wwpn_iid[iid].in_use != 0 &&
1351	    msg->iid.in_use == 0)
1352		ctl_i_t_nexus_loss(softc, iid, CTL_UA_POWERON);
1353	port->wwpn_iid[iid].in_use = msg->iid.in_use;
1354	port->wwpn_iid[iid].wwpn = msg->iid.wwpn;
1355	free(port->wwpn_iid[iid].name, M_CTL);
1356	if (msg->iid.name_len) {
1357		port->wwpn_iid[iid].name = strndup(&msg->iid.data[0],
1358		    msg->iid.name_len, M_CTL);
1359	} else
1360		port->wwpn_iid[iid].name = NULL;
1361}
1362
1363static void
1364ctl_isc_login(struct ctl_softc *softc, union ctl_ha_msg *msg, int len)
1365{
1366
1367	if (len < sizeof(msg->login)) {
1368		printf("%s: Received truncated message %d < %zu\n",
1369		    __func__, len, sizeof(msg->login));
1370		ctl_ha_msg_abort(CTL_HA_CHAN_CTL);
1371		return;
1372	}
1373
1374	if (msg->login.version != CTL_HA_VERSION) {
1375		printf("CTL HA peers have different versions %d != %d\n",
1376		    msg->login.version, CTL_HA_VERSION);
1377		ctl_ha_msg_abort(CTL_HA_CHAN_CTL);
1378		return;
1379	}
1380	if (msg->login.ha_mode != softc->ha_mode) {
1381		printf("CTL HA peers have different ha_mode %d != %d\n",
1382		    msg->login.ha_mode, softc->ha_mode);
1383		ctl_ha_msg_abort(CTL_HA_CHAN_CTL);
1384		return;
1385	}
1386	if (msg->login.ha_id == softc->ha_id) {
1387		printf("CTL HA peers have same ha_id %d\n", msg->login.ha_id);
1388		ctl_ha_msg_abort(CTL_HA_CHAN_CTL);
1389		return;
1390	}
1391	if (msg->login.max_luns != ctl_max_luns ||
1392	    msg->login.max_ports != ctl_max_ports ||
1393	    msg->login.max_init_per_port != CTL_MAX_INIT_PER_PORT) {
1394		printf("CTL HA peers have different limits\n");
1395		ctl_ha_msg_abort(CTL_HA_CHAN_CTL);
1396		return;
1397	}
1398}
1399
1400static void
1401ctl_isc_mode_sync(struct ctl_softc *softc, union ctl_ha_msg *msg, int len)
1402{
1403	struct ctl_lun *lun;
1404	u_int i;
1405	uint32_t initidx, targ_lun;
1406
1407	if (len < offsetof(struct ctl_ha_msg_mode, data[0])) {
1408		printf("%s: Received truncated message %d < %zu\n",
1409		    __func__, len, offsetof(struct ctl_ha_msg_mode, data[0]));
1410		ctl_ha_msg_abort(CTL_HA_CHAN_CTL);
1411		return;
1412	}
1413	i = msg->mode.page_len;
1414	if (len < offsetof(struct ctl_ha_msg_mode, data[i])) {
1415		printf("%s: Received truncated message data %d < %zu\n",
1416		    __func__, len, offsetof(struct ctl_ha_msg_mode, data[i]));
1417		ctl_ha_msg_abort(CTL_HA_CHAN_CTL);
1418		return;
1419	}
1420
1421	targ_lun = msg->hdr.nexus.targ_mapped_lun;
1422	mtx_lock(&softc->ctl_lock);
1423	if (targ_lun >= ctl_max_luns ||
1424	    (lun = softc->ctl_luns[targ_lun]) == NULL) {
1425		mtx_unlock(&softc->ctl_lock);
1426		return;
1427	}
1428	mtx_lock(&lun->lun_lock);
1429	mtx_unlock(&softc->ctl_lock);
1430	if (lun->flags & CTL_LUN_DISABLED) {
1431		mtx_unlock(&lun->lun_lock);
1432		return;
1433	}
1434	for (i = 0; i < CTL_NUM_MODE_PAGES; i++) {
1435		if ((lun->mode_pages.index[i].page_code & SMPH_PC_MASK) ==
1436		    msg->mode.page_code &&
1437		    lun->mode_pages.index[i].subpage == msg->mode.subpage)
1438			break;
1439	}
1440	if (i == CTL_NUM_MODE_PAGES) {
1441		mtx_unlock(&lun->lun_lock);
1442		return;
1443	}
1444	memcpy(lun->mode_pages.index[i].page_data, msg->mode.data,
1445	    min(lun->mode_pages.index[i].page_len, msg->mode.page_len));
1446	initidx = ctl_get_initindex(&msg->hdr.nexus);
1447	if (initidx != -1)
1448		ctl_est_ua_all(lun, initidx, CTL_UA_MODE_CHANGE);
1449	mtx_unlock(&lun->lun_lock);
1450}
1451
1452/*
1453 * ISC (Inter Shelf Communication) event handler.  Events from the HA
1454 * subsystem come in here.
1455 */
1456static void
1457ctl_isc_event_handler(ctl_ha_channel channel, ctl_ha_event event, int param)
1458{
1459	struct ctl_softc *softc = control_softc;
1460	union ctl_io *io;
1461	struct ctl_prio *presio;
1462	ctl_ha_status isc_status;
1463
1464	CTL_DEBUG_PRINT(("CTL: Isc Msg event %d\n", event));
1465	if (event == CTL_HA_EVT_MSG_RECV) {
1466		union ctl_ha_msg *msg, msgbuf;
1467
1468		if (param > sizeof(msgbuf))
1469			msg = malloc(param, M_CTL, M_WAITOK);
1470		else
1471			msg = &msgbuf;
1472		isc_status = ctl_ha_msg_recv(CTL_HA_CHAN_CTL, msg, param,
1473		    M_WAITOK);
1474		if (isc_status != CTL_HA_STATUS_SUCCESS) {
1475			printf("%s: Error receiving message: %d\n",
1476			    __func__, isc_status);
1477			if (msg != &msgbuf)
1478				free(msg, M_CTL);
1479			return;
1480		}
1481
1482		CTL_DEBUG_PRINT(("CTL: msg_type %d len %d\n",
1483		    msg->hdr.msg_type, param));
1484		switch (msg->hdr.msg_type) {
1485		case CTL_MSG_SERIALIZE:
1486			io = ctl_alloc_io(softc->othersc_pool);
1487			ctl_zero_io(io);
1488			// populate ctsio from msg
1489			io->io_hdr.io_type = CTL_IO_SCSI;
1490			io->io_hdr.msg_type = CTL_MSG_SERIALIZE;
1491			io->io_hdr.remote_io = msg->hdr.original_sc;
1492			io->io_hdr.flags |= CTL_FLAG_FROM_OTHER_SC |
1493					    CTL_FLAG_IO_ACTIVE;
1494			/*
1495			 * If we're in serialization-only mode, we don't
1496			 * want to go through full done processing.  Thus
1497			 * the COPY flag.
1498			 *
1499			 * XXX KDM add another flag that is more specific.
1500			 */
1501			if (softc->ha_mode != CTL_HA_MODE_XFER)
1502				io->io_hdr.flags |= CTL_FLAG_INT_COPY;
1503			io->io_hdr.nexus = msg->hdr.nexus;
1504			io->scsiio.priority = msg->scsi.priority;
1505			io->scsiio.tag_num = msg->scsi.tag_num;
1506			io->scsiio.tag_type = msg->scsi.tag_type;
1507#ifdef CTL_TIME_IO
1508			io->io_hdr.start_time = time_uptime;
1509			getbinuptime(&io->io_hdr.start_bt);
1510#endif /* CTL_TIME_IO */
1511			io->scsiio.cdb_len = msg->scsi.cdb_len;
1512			memcpy(io->scsiio.cdb, msg->scsi.cdb,
1513			       CTL_MAX_CDBLEN);
1514			if (softc->ha_mode == CTL_HA_MODE_XFER) {
1515				const struct ctl_cmd_entry *entry;
1516
1517				entry = ctl_get_cmd_entry(&io->scsiio, NULL);
1518				io->io_hdr.flags &= ~CTL_FLAG_DATA_MASK;
1519				io->io_hdr.flags |=
1520					entry->flags & CTL_FLAG_DATA_MASK;
1521			}
1522			ctl_enqueue_isc(io);
1523			break;
1524
1525		/* Performed on the Originating SC, XFER mode only */
1526		case CTL_MSG_DATAMOVE: {
1527			struct ctl_sg_entry *sgl;
1528			int i, j;
1529
1530			io = msg->hdr.original_sc;
1531			if (io == NULL) {
1532				printf("%s: original_sc == NULL!\n", __func__);
1533				/* XXX KDM do something here */
1534				break;
1535			}
1536			CTL_IO_ASSERT(io, SCSI);
1537
1538			io->io_hdr.msg_type = CTL_MSG_DATAMOVE;
1539			io->io_hdr.flags |= CTL_FLAG_IO_ACTIVE;
1540			/*
1541			 * Keep track of this, we need to send it back over
1542			 * when the datamove is complete.
1543			 */
1544			io->io_hdr.remote_io = msg->hdr.serializing_sc;
1545			if (msg->hdr.status == CTL_SUCCESS)
1546				io->io_hdr.status = msg->hdr.status;
1547
1548			if (msg->dt.sg_sequence == 0) {
1549#ifdef CTL_TIME_IO
1550				getbinuptime(&io->io_hdr.dma_start_bt);
1551#endif
1552				i = msg->dt.kern_sg_entries +
1553				    msg->dt.kern_data_len /
1554				    CTL_HA_DATAMOVE_SEGMENT + 1;
1555				sgl = malloc(sizeof(*sgl) * i, M_CTL,
1556				    M_WAITOK | M_ZERO);
1557				CTL_RSGL(io) = sgl;
1558				CTL_LSGL(io) = &sgl[msg->dt.kern_sg_entries];
1559
1560				io->scsiio.kern_data_ptr = (uint8_t *)sgl;
1561
1562				io->scsiio.kern_sg_entries =
1563					msg->dt.kern_sg_entries;
1564				io->scsiio.rem_sg_entries =
1565					msg->dt.kern_sg_entries;
1566				io->scsiio.kern_data_len =
1567					msg->dt.kern_data_len;
1568				io->scsiio.kern_total_len =
1569					msg->dt.kern_total_len;
1570				io->scsiio.kern_data_resid =
1571					msg->dt.kern_data_resid;
1572				io->scsiio.kern_rel_offset =
1573					msg->dt.kern_rel_offset;
1574				io->io_hdr.flags &= ~CTL_FLAG_BUS_ADDR;
1575				io->io_hdr.flags |= msg->dt.flags &
1576				    CTL_FLAG_BUS_ADDR;
1577			} else
1578				sgl = (struct ctl_sg_entry *)
1579					io->scsiio.kern_data_ptr;
1580
1581			for (i = msg->dt.sent_sg_entries, j = 0;
1582			     i < (msg->dt.sent_sg_entries +
1583			     msg->dt.cur_sg_entries); i++, j++) {
1584				sgl[i].addr = msg->dt.sg_list[j].addr;
1585				sgl[i].len = msg->dt.sg_list[j].len;
1586			}
1587
1588			/*
1589			 * If this is the last piece of the I/O, we've got
1590			 * the full S/G list.  Queue processing in the thread.
1591			 * Otherwise wait for the next piece.
1592			 */
1593			if (msg->dt.sg_last != 0)
1594				ctl_enqueue_isc(io);
1595			break;
1596		}
1597		/* Performed on the Serializing (primary) SC, XFER mode only */
1598		case CTL_MSG_DATAMOVE_DONE: {
1599			if (msg->hdr.serializing_sc == NULL) {
1600				printf("%s: serializing_sc == NULL!\n",
1601				       __func__);
1602				/* XXX KDM now what? */
1603				break;
1604			}
1605			/*
1606			 * We grab the sense information here in case
1607			 * there was a failure, so we can return status
1608			 * back to the initiator.
1609			 */
1610			io = msg->hdr.serializing_sc;
1611			CTL_IO_ASSERT(io, SCSI);
1612
1613			io->io_hdr.msg_type = CTL_MSG_DATAMOVE_DONE;
1614			io->io_hdr.flags &= ~CTL_FLAG_DMA_INPROG;
1615			io->io_hdr.flags |= CTL_FLAG_IO_ACTIVE;
1616			io->io_hdr.port_status = msg->scsi.port_status;
1617			io->scsiio.kern_data_resid = msg->scsi.kern_data_resid;
1618			if (msg->hdr.status != CTL_STATUS_NONE) {
1619				io->io_hdr.status = msg->hdr.status;
1620				io->scsiio.scsi_status = msg->scsi.scsi_status;
1621				io->scsiio.sense_len = msg->scsi.sense_len;
1622				memcpy(&io->scsiio.sense_data,
1623				    &msg->scsi.sense_data,
1624				    msg->scsi.sense_len);
1625				if (msg->hdr.status == CTL_SUCCESS)
1626					io->io_hdr.flags |= CTL_FLAG_STATUS_SENT;
1627			}
1628			ctl_enqueue_isc(io);
1629			break;
1630		}
1631
1632		/* Preformed on Originating SC, SER_ONLY mode */
1633		case CTL_MSG_R2R:
1634			io = msg->hdr.original_sc;
1635			if (io == NULL) {
1636				printf("%s: original_sc == NULL!\n",
1637				    __func__);
1638				break;
1639			}
1640			io->io_hdr.flags |= CTL_FLAG_IO_ACTIVE;
1641			io->io_hdr.msg_type = CTL_MSG_R2R;
1642			io->io_hdr.remote_io = msg->hdr.serializing_sc;
1643			ctl_enqueue_isc(io);
1644			break;
1645
1646		/*
1647		 * Performed on Serializing(i.e. primary SC) SC in SER_ONLY
1648		 * mode.
1649		 * Performed on the Originating (i.e. secondary) SC in XFER
1650		 * mode
1651		 */
1652		case CTL_MSG_FINISH_IO:
1653			if (softc->ha_mode == CTL_HA_MODE_XFER)
1654				ctl_isc_handler_finish_xfer(softc, msg);
1655			else
1656				ctl_isc_handler_finish_ser_only(softc, msg);
1657			break;
1658
1659		/* Preformed on Originating SC */
1660		case CTL_MSG_BAD_JUJU:
1661			io = msg->hdr.original_sc;
1662			if (io == NULL) {
1663				printf("%s: Bad JUJU!, original_sc is NULL!\n",
1664				       __func__);
1665				break;
1666			}
1667			ctl_copy_sense_data(msg, io);
1668			/*
1669			 * IO should have already been cleaned up on other
1670			 * SC so clear this flag so we won't send a message
1671			 * back to finish the IO there.
1672			 */
1673			io->io_hdr.flags &= ~CTL_FLAG_SENT_2OTHER_SC;
1674			io->io_hdr.flags |= CTL_FLAG_IO_ACTIVE;
1675
1676			/* io = msg->hdr.serializing_sc; */
1677			io->io_hdr.msg_type = CTL_MSG_BAD_JUJU;
1678			ctl_enqueue_isc(io);
1679			break;
1680
1681		/* Handle resets sent from the other side */
1682		case CTL_MSG_MANAGE_TASKS: {
1683			struct ctl_taskio *taskio;
1684			taskio = (struct ctl_taskio *)ctl_alloc_io(
1685			    softc->othersc_pool);
1686			ctl_zero_io((union ctl_io *)taskio);
1687			taskio->io_hdr.io_type = CTL_IO_TASK;
1688			taskio->io_hdr.flags |= CTL_FLAG_FROM_OTHER_SC;
1689			taskio->io_hdr.nexus = msg->hdr.nexus;
1690			taskio->task_action = msg->task.task_action;
1691			taskio->tag_num = msg->task.tag_num;
1692			taskio->tag_type = msg->task.tag_type;
1693#ifdef CTL_TIME_IO
1694			taskio->io_hdr.start_time = time_uptime;
1695			getbinuptime(&taskio->io_hdr.start_bt);
1696#endif /* CTL_TIME_IO */
1697			ctl_run_task((union ctl_io *)taskio);
1698			break;
1699		}
1700		/* Persistent Reserve action which needs attention */
1701		case CTL_MSG_PERS_ACTION:
1702			presio = (struct ctl_prio *)ctl_alloc_io(
1703			    softc->othersc_pool);
1704			ctl_zero_io((union ctl_io *)presio);
1705			presio->io_hdr.msg_type = CTL_MSG_PERS_ACTION;
1706			presio->io_hdr.flags |= CTL_FLAG_FROM_OTHER_SC;
1707			presio->io_hdr.nexus = msg->hdr.nexus;
1708			presio->pr_msg = msg->pr;
1709			ctl_enqueue_isc((union ctl_io *)presio);
1710			break;
1711		case CTL_MSG_UA:
1712			ctl_isc_ua(softc, msg, param);
1713			break;
1714		case CTL_MSG_PORT_SYNC:
1715			ctl_isc_port_sync(softc, msg, param);
1716			break;
1717		case CTL_MSG_LUN_SYNC:
1718			ctl_isc_lun_sync(softc, msg, param);
1719			break;
1720		case CTL_MSG_IID_SYNC:
1721			ctl_isc_iid_sync(softc, msg, param);
1722			break;
1723		case CTL_MSG_LOGIN:
1724			ctl_isc_login(softc, msg, param);
1725			break;
1726		case CTL_MSG_MODE_SYNC:
1727			ctl_isc_mode_sync(softc, msg, param);
1728			break;
1729		default:
1730			printf("Received HA message of unknown type %d\n",
1731			    msg->hdr.msg_type);
1732			ctl_ha_msg_abort(CTL_HA_CHAN_CTL);
1733			break;
1734		}
1735		if (msg != &msgbuf)
1736			free(msg, M_CTL);
1737	} else if (event == CTL_HA_EVT_LINK_CHANGE) {
1738		printf("CTL: HA link status changed from %d to %d\n",
1739		    softc->ha_link, param);
1740		if (param == softc->ha_link)
1741			return;
1742		if (softc->ha_link == CTL_HA_LINK_ONLINE) {
1743			softc->ha_link = param;
1744			ctl_isc_ha_link_down(softc);
1745		} else {
1746			softc->ha_link = param;
1747			if (softc->ha_link == CTL_HA_LINK_ONLINE)
1748				ctl_isc_ha_link_up(softc);
1749		}
1750		return;
1751	} else {
1752		printf("ctl_isc_event_handler: Unknown event %d\n", event);
1753		return;
1754	}
1755}
1756
1757static void
1758ctl_copy_sense_data(union ctl_ha_msg *src, union ctl_io *dest)
1759{
1760
1761	memcpy(&dest->scsiio.sense_data, &src->scsi.sense_data,
1762	    src->scsi.sense_len);
1763	dest->scsiio.scsi_status = src->scsi.scsi_status;
1764	dest->scsiio.sense_len = src->scsi.sense_len;
1765	dest->io_hdr.status = src->hdr.status;
1766}
1767
1768static void
1769ctl_copy_sense_data_back(union ctl_io *src, union ctl_ha_msg *dest)
1770{
1771
1772	memcpy(&dest->scsi.sense_data, &src->scsiio.sense_data,
1773	    src->scsiio.sense_len);
1774	dest->scsi.scsi_status = src->scsiio.scsi_status;
1775	dest->scsi.sense_len = src->scsiio.sense_len;
1776	dest->hdr.status = src->io_hdr.status;
1777}
1778
1779void
1780ctl_est_ua(struct ctl_lun *lun, uint32_t initidx, ctl_ua_type ua)
1781{
1782	struct ctl_softc *softc = lun->ctl_softc;
1783	ctl_ua_type *pu;
1784
1785	if (initidx < softc->init_min || initidx >= softc->init_max)
1786		return;
1787	mtx_assert(&lun->lun_lock, MA_OWNED);
1788	pu = lun->pending_ua[initidx / CTL_MAX_INIT_PER_PORT];
1789	if (pu == NULL)
1790		return;
1791	pu[initidx % CTL_MAX_INIT_PER_PORT] |= ua;
1792}
1793
1794void
1795ctl_est_ua_port(struct ctl_lun *lun, int port, uint32_t except, ctl_ua_type ua)
1796{
1797	int i;
1798
1799	mtx_assert(&lun->lun_lock, MA_OWNED);
1800	if (lun->pending_ua[port] == NULL)
1801		return;
1802	for (i = 0; i < CTL_MAX_INIT_PER_PORT; i++) {
1803		if (port * CTL_MAX_INIT_PER_PORT + i == except)
1804			continue;
1805		lun->pending_ua[port][i] |= ua;
1806	}
1807}
1808
1809void
1810ctl_est_ua_all(struct ctl_lun *lun, uint32_t except, ctl_ua_type ua)
1811{
1812	struct ctl_softc *softc = lun->ctl_softc;
1813	int i;
1814
1815	mtx_assert(&lun->lun_lock, MA_OWNED);
1816	for (i = softc->port_min; i < softc->port_max; i++)
1817		ctl_est_ua_port(lun, i, except, ua);
1818}
1819
1820void
1821ctl_clr_ua(struct ctl_lun *lun, uint32_t initidx, ctl_ua_type ua)
1822{
1823	struct ctl_softc *softc = lun->ctl_softc;
1824	ctl_ua_type *pu;
1825
1826	if (initidx < softc->init_min || initidx >= softc->init_max)
1827		return;
1828	mtx_assert(&lun->lun_lock, MA_OWNED);
1829	pu = lun->pending_ua[initidx / CTL_MAX_INIT_PER_PORT];
1830	if (pu == NULL)
1831		return;
1832	pu[initidx % CTL_MAX_INIT_PER_PORT] &= ~ua;
1833}
1834
1835void
1836ctl_clr_ua_all(struct ctl_lun *lun, uint32_t except, ctl_ua_type ua)
1837{
1838	struct ctl_softc *softc = lun->ctl_softc;
1839	int i, j;
1840
1841	mtx_assert(&lun->lun_lock, MA_OWNED);
1842	for (i = softc->port_min; i < softc->port_max; i++) {
1843		if (lun->pending_ua[i] == NULL)
1844			continue;
1845		for (j = 0; j < CTL_MAX_INIT_PER_PORT; j++) {
1846			if (i * CTL_MAX_INIT_PER_PORT + j == except)
1847				continue;
1848			lun->pending_ua[i][j] &= ~ua;
1849		}
1850	}
1851}
1852
1853void
1854ctl_clr_ua_allluns(struct ctl_softc *ctl_softc, uint32_t initidx,
1855    ctl_ua_type ua_type)
1856{
1857	struct ctl_lun *lun;
1858
1859	mtx_assert(&ctl_softc->ctl_lock, MA_OWNED);
1860	STAILQ_FOREACH(lun, &ctl_softc->lun_list, links) {
1861		mtx_lock(&lun->lun_lock);
1862		ctl_clr_ua(lun, initidx, ua_type);
1863		mtx_unlock(&lun->lun_lock);
1864	}
1865}
1866
1867static int
1868ctl_ha_role_sysctl(SYSCTL_HANDLER_ARGS)
1869{
1870	struct ctl_softc *softc = (struct ctl_softc *)arg1;
1871	struct ctl_lun *lun;
1872	struct ctl_lun_req ireq;
1873	int error, value;
1874
1875	value = (softc->flags & CTL_FLAG_ACTIVE_SHELF) ? 0 : 1;
1876	error = sysctl_handle_int(oidp, &value, 0, req);
1877	if ((error != 0) || (req->newptr == NULL))
1878		return (error);
1879
1880	mtx_lock(&softc->ctl_lock);
1881	if (value == 0)
1882		softc->flags |= CTL_FLAG_ACTIVE_SHELF;
1883	else
1884		softc->flags &= ~CTL_FLAG_ACTIVE_SHELF;
1885	STAILQ_FOREACH(lun, &softc->lun_list, links) {
1886		mtx_unlock(&softc->ctl_lock);
1887		bzero(&ireq, sizeof(ireq));
1888		ireq.reqtype = CTL_LUNREQ_MODIFY;
1889		ireq.reqdata.modify.lun_id = lun->lun;
1890		lun->backend->ioctl(NULL, CTL_LUN_REQ, (caddr_t)&ireq, 0,
1891		    curthread);
1892		if (ireq.status != CTL_LUN_OK) {
1893			printf("%s: CTL_LUNREQ_MODIFY returned %d '%s'\n",
1894			    __func__, ireq.status, ireq.error_str);
1895		}
1896		mtx_lock(&softc->ctl_lock);
1897	}
1898	mtx_unlock(&softc->ctl_lock);
1899	return (0);
1900}
1901
1902static int
1903ctl_init(void)
1904{
1905	struct make_dev_args args;
1906	struct ctl_softc *softc;
1907	int i, error;
1908
1909	softc = control_softc = malloc(sizeof(*control_softc), M_DEVBUF,
1910			       M_WAITOK | M_ZERO);
1911
1912	make_dev_args_init(&args);
1913	args.mda_devsw = &ctl_cdevsw;
1914	args.mda_uid = UID_ROOT;
1915	args.mda_gid = GID_OPERATOR;
1916	args.mda_mode = 0600;
1917	args.mda_si_drv1 = softc;
1918	args.mda_si_drv2 = NULL;
1919	error = make_dev_s(&args, &softc->dev, "cam/ctl");
1920	if (error != 0) {
1921		free(softc, M_DEVBUF);
1922		control_softc = NULL;
1923		return (error);
1924	}
1925
1926	sysctl_ctx_init(&softc->sysctl_ctx);
1927	softc->sysctl_tree = SYSCTL_ADD_NODE(&softc->sysctl_ctx,
1928		SYSCTL_STATIC_CHILDREN(_kern_cam), OID_AUTO, "ctl",
1929		CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "CAM Target Layer");
1930
1931	if (softc->sysctl_tree == NULL) {
1932		printf("%s: unable to allocate sysctl tree\n", __func__);
1933		destroy_dev(softc->dev);
1934		free(softc, M_DEVBUF);
1935		control_softc = NULL;
1936		return (ENOMEM);
1937	}
1938
1939	mtx_init(&softc->ctl_lock, "CTL mutex", NULL, MTX_DEF);
1940	softc->io_zone = uma_zcreate("CTL IO", sizeof(union ctl_io),
1941	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
1942	softc->flags = 0;
1943
1944	SYSCTL_ADD_INT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
1945	    OID_AUTO, "ha_mode", CTLFLAG_RDTUN, (int *)&softc->ha_mode, 0,
1946	    "HA mode (0 - act/stby, 1 - serialize only, 2 - xfer)");
1947
1948	if (ctl_max_luns <= 0 || powerof2(ctl_max_luns) == 0) {
1949		printf("Bad value %d for kern.cam.ctl.max_luns, must be a power of two, using %d\n",
1950		    ctl_max_luns, CTL_DEFAULT_MAX_LUNS);
1951		ctl_max_luns = CTL_DEFAULT_MAX_LUNS;
1952	}
1953	softc->ctl_luns = malloc(sizeof(struct ctl_lun *) * ctl_max_luns,
1954	    M_DEVBUF, M_WAITOK | M_ZERO);
1955	softc->ctl_lun_mask = malloc(sizeof(uint32_t) *
1956	    ((ctl_max_luns + 31) / 32), M_DEVBUF, M_WAITOK | M_ZERO);
1957	if (ctl_max_ports <= 0 || powerof2(ctl_max_ports) == 0) {
1958		printf("Bad value %d for kern.cam.ctl.max_ports, must be a power of two, using %d\n",
1959		    ctl_max_ports, CTL_DEFAULT_MAX_PORTS);
1960		ctl_max_ports = CTL_DEFAULT_MAX_PORTS;
1961	}
1962	softc->ctl_port_mask = malloc(sizeof(uint32_t) *
1963	  ((ctl_max_ports + 31) / 32), M_DEVBUF, M_WAITOK | M_ZERO);
1964	softc->ctl_ports = malloc(sizeof(struct ctl_port *) * ctl_max_ports,
1965	     M_DEVBUF, M_WAITOK | M_ZERO);
1966
1967	/*
1968	 * In Copan's HA scheme, the "master" and "slave" roles are
1969	 * figured out through the slot the controller is in.  Although it
1970	 * is an active/active system, someone has to be in charge.
1971	 */
1972	SYSCTL_ADD_INT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
1973	    OID_AUTO, "ha_id", CTLFLAG_RDTUN, &softc->ha_id, 0,
1974	    "HA head ID (0 - no HA)");
1975	if (softc->ha_id == 0 || softc->ha_id > NUM_HA_SHELVES) {
1976		softc->flags |= CTL_FLAG_ACTIVE_SHELF;
1977		softc->is_single = 1;
1978		softc->port_cnt = ctl_max_ports;
1979		softc->port_min = 0;
1980	} else {
1981		softc->port_cnt = ctl_max_ports / NUM_HA_SHELVES;
1982		softc->port_min = (softc->ha_id - 1) * softc->port_cnt;
1983	}
1984	softc->port_max = softc->port_min + softc->port_cnt;
1985	softc->init_min = softc->port_min * CTL_MAX_INIT_PER_PORT;
1986	softc->init_max = softc->port_max * CTL_MAX_INIT_PER_PORT;
1987
1988	SYSCTL_ADD_INT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
1989	    OID_AUTO, "ha_link", CTLFLAG_RD, (int *)&softc->ha_link, 0,
1990	    "HA link state (0 - offline, 1 - unknown, 2 - online)");
1991
1992	STAILQ_INIT(&softc->lun_list);
1993	STAILQ_INIT(&softc->fe_list);
1994	STAILQ_INIT(&softc->port_list);
1995	STAILQ_INIT(&softc->be_list);
1996	ctl_tpc_init(softc);
1997
1998	if (worker_threads <= 0)
1999		worker_threads = max(1, mp_ncpus / 4);
2000	if (worker_threads > CTL_MAX_THREADS)
2001		worker_threads = CTL_MAX_THREADS;
2002
2003	for (i = 0; i < worker_threads; i++) {
2004		struct ctl_thread *thr = &softc->threads[i];
2005
2006		mtx_init(&thr->queue_lock, "CTL queue mutex", NULL, MTX_DEF);
2007		thr->ctl_softc = softc;
2008		STAILQ_INIT(&thr->incoming_queue);
2009		STAILQ_INIT(&thr->rtr_queue);
2010		STAILQ_INIT(&thr->done_queue);
2011		STAILQ_INIT(&thr->isc_queue);
2012
2013		error = kproc_kthread_add(ctl_work_thread, thr,
2014		    &softc->ctl_proc, &thr->thread, 0, 0, "ctl", "work%d", i);
2015		if (error != 0) {
2016			printf("error creating CTL work thread!\n");
2017			return (error);
2018		}
2019	}
2020	error = kproc_kthread_add(ctl_thresh_thread, softc,
2021	    &softc->ctl_proc, &softc->thresh_thread, 0, 0, "ctl", "thresh");
2022	if (error != 0) {
2023		printf("error creating CTL threshold thread!\n");
2024		return (error);
2025	}
2026
2027	SYSCTL_ADD_PROC(&softc->sysctl_ctx,SYSCTL_CHILDREN(softc->sysctl_tree),
2028	    OID_AUTO, "ha_role",
2029	    CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
2030	    softc, 0, ctl_ha_role_sysctl, "I", "HA role for this head");
2031
2032	if (softc->is_single == 0) {
2033		if (ctl_frontend_register(&ha_frontend) != 0)
2034			softc->is_single = 1;
2035	}
2036	return (0);
2037}
2038
2039static int
2040ctl_shutdown(void)
2041{
2042	struct ctl_softc *softc = control_softc;
2043	int i;
2044
2045	if (softc->is_single == 0)
2046		ctl_frontend_deregister(&ha_frontend);
2047
2048	destroy_dev(softc->dev);
2049
2050	/* Shutdown CTL threads. */
2051	softc->shutdown = 1;
2052	for (i = 0; i < worker_threads; i++) {
2053		struct ctl_thread *thr = &softc->threads[i];
2054		while (thr->thread != NULL) {
2055			wakeup(thr);
2056			if (thr->thread != NULL)
2057				pause("CTL thr shutdown", 1);
2058		}
2059		mtx_destroy(&thr->queue_lock);
2060	}
2061	while (softc->thresh_thread != NULL) {
2062		wakeup(softc->thresh_thread);
2063		if (softc->thresh_thread != NULL)
2064			pause("CTL thr shutdown", 1);
2065	}
2066
2067	ctl_tpc_shutdown(softc);
2068	uma_zdestroy(softc->io_zone);
2069	mtx_destroy(&softc->ctl_lock);
2070
2071	free(softc->ctl_luns, M_DEVBUF);
2072	free(softc->ctl_lun_mask, M_DEVBUF);
2073	free(softc->ctl_port_mask, M_DEVBUF);
2074	free(softc->ctl_ports, M_DEVBUF);
2075
2076	sysctl_ctx_free(&softc->sysctl_ctx);
2077
2078	free(softc, M_DEVBUF);
2079	control_softc = NULL;
2080	return (0);
2081}
2082
2083static int
2084ctl_module_event_handler(module_t mod, int what, void *arg)
2085{
2086
2087	switch (what) {
2088	case MOD_LOAD:
2089		return (ctl_init());
2090	case MOD_UNLOAD:
2091		return (ctl_shutdown());
2092	default:
2093		return (EOPNOTSUPP);
2094	}
2095}
2096
2097/*
2098 * XXX KDM should we do some access checks here?  Bump a reference count to
2099 * prevent a CTL module from being unloaded while someone has it open?
2100 */
2101static int
2102ctl_open(struct cdev *dev, int flags, int fmt, struct thread *td)
2103{
2104	return (0);
2105}
2106
2107static int
2108ctl_close(struct cdev *dev, int flags, int fmt, struct thread *td)
2109{
2110	return (0);
2111}
2112
2113/*
2114 * Remove an initiator by port number and initiator ID.
2115 * Returns 0 for success, -1 for failure.
2116 */
2117int
2118ctl_remove_initiator(struct ctl_port *port, int iid)
2119{
2120	struct ctl_softc *softc = port->ctl_softc;
2121	int last;
2122
2123	mtx_assert(&softc->ctl_lock, MA_NOTOWNED);
2124
2125	if (iid > CTL_MAX_INIT_PER_PORT) {
2126		printf("%s: initiator ID %u > maximun %u!\n",
2127		       __func__, iid, CTL_MAX_INIT_PER_PORT);
2128		return (-1);
2129	}
2130
2131	mtx_lock(&softc->ctl_lock);
2132	last = (--port->wwpn_iid[iid].in_use == 0);
2133	port->wwpn_iid[iid].last_use = time_uptime;
2134	mtx_unlock(&softc->ctl_lock);
2135	if (last)
2136		ctl_i_t_nexus_loss(softc, iid, CTL_UA_POWERON);
2137	ctl_isc_announce_iid(port, iid);
2138
2139	return (0);
2140}
2141
2142/*
2143 * Add an initiator to the initiator map.
2144 * Returns iid for success, < 0 for failure.
2145 */
2146int
2147ctl_add_initiator(struct ctl_port *port, int iid, uint64_t wwpn, char *name)
2148{
2149	struct ctl_softc *softc = port->ctl_softc;
2150	time_t best_time;
2151	int i, best;
2152
2153	mtx_assert(&softc->ctl_lock, MA_NOTOWNED);
2154
2155	if (iid >= CTL_MAX_INIT_PER_PORT) {
2156		printf("%s: WWPN %#jx initiator ID %u > maximum %u!\n",
2157		       __func__, wwpn, iid, CTL_MAX_INIT_PER_PORT);
2158		free(name, M_CTL);
2159		return (-1);
2160	}
2161
2162	mtx_lock(&softc->ctl_lock);
2163
2164	if (iid < 0 && (wwpn != 0 || name != NULL)) {
2165		for (i = 0; i < CTL_MAX_INIT_PER_PORT; i++) {
2166			if (wwpn != 0 && wwpn == port->wwpn_iid[i].wwpn) {
2167				iid = i;
2168				break;
2169			}
2170			if (name != NULL && port->wwpn_iid[i].name != NULL &&
2171			    strcmp(name, port->wwpn_iid[i].name) == 0) {
2172				iid = i;
2173				break;
2174			}
2175		}
2176	}
2177
2178	if (iid < 0) {
2179		for (i = 0; i < CTL_MAX_INIT_PER_PORT; i++) {
2180			if (port->wwpn_iid[i].in_use == 0 &&
2181			    port->wwpn_iid[i].wwpn == 0 &&
2182			    port->wwpn_iid[i].name == NULL) {
2183				iid = i;
2184				break;
2185			}
2186		}
2187	}
2188
2189	if (iid < 0) {
2190		best = -1;
2191		best_time = INT32_MAX;
2192		for (i = 0; i < CTL_MAX_INIT_PER_PORT; i++) {
2193			if (port->wwpn_iid[i].in_use == 0) {
2194				if (port->wwpn_iid[i].last_use < best_time) {
2195					best = i;
2196					best_time = port->wwpn_iid[i].last_use;
2197				}
2198			}
2199		}
2200		iid = best;
2201	}
2202
2203	if (iid < 0) {
2204		mtx_unlock(&softc->ctl_lock);
2205		free(name, M_CTL);
2206		return (-2);
2207	}
2208
2209	if (port->wwpn_iid[iid].in_use > 0 && (wwpn != 0 || name != NULL)) {
2210		/*
2211		 * This is not an error yet.
2212		 */
2213		if (wwpn != 0 && wwpn == port->wwpn_iid[iid].wwpn) {
2214#if 0
2215			printf("%s: port %d iid %u WWPN %#jx arrived"
2216			    " again\n", __func__, port->targ_port,
2217			    iid, (uintmax_t)wwpn);
2218#endif
2219			goto take;
2220		}
2221		if (name != NULL && port->wwpn_iid[iid].name != NULL &&
2222		    strcmp(name, port->wwpn_iid[iid].name) == 0) {
2223#if 0
2224			printf("%s: port %d iid %u name '%s' arrived"
2225			    " again\n", __func__, port->targ_port,
2226			    iid, name);
2227#endif
2228			goto take;
2229		}
2230
2231		/*
2232		 * This is an error, but what do we do about it?  The
2233		 * driver is telling us we have a new WWPN for this
2234		 * initiator ID, so we pretty much need to use it.
2235		 */
2236		printf("%s: port %d iid %u WWPN %#jx '%s' arrived,"
2237		    " but WWPN %#jx '%s' is still at that address\n",
2238		    __func__, port->targ_port, iid, wwpn, name,
2239		    (uintmax_t)port->wwpn_iid[iid].wwpn,
2240		    port->wwpn_iid[iid].name);
2241	}
2242take:
2243	free(port->wwpn_iid[iid].name, M_CTL);
2244	port->wwpn_iid[iid].name = name;
2245	port->wwpn_iid[iid].wwpn = wwpn;
2246	port->wwpn_iid[iid].in_use++;
2247	mtx_unlock(&softc->ctl_lock);
2248	ctl_isc_announce_iid(port, iid);
2249
2250	return (iid);
2251}
2252
2253static int
2254ctl_create_iid(struct ctl_port *port, int iid, uint8_t *buf)
2255{
2256	int len;
2257
2258	switch (port->port_type) {
2259	case CTL_PORT_FC:
2260	{
2261		struct scsi_transportid_fcp *id =
2262		    (struct scsi_transportid_fcp *)buf;
2263		if (port->wwpn_iid[iid].wwpn == 0)
2264			return (0);
2265		memset(id, 0, sizeof(*id));
2266		id->format_protocol = SCSI_PROTO_FC;
2267		scsi_u64to8b(port->wwpn_iid[iid].wwpn, id->n_port_name);
2268		return (sizeof(*id));
2269	}
2270	case CTL_PORT_ISCSI:
2271	{
2272		struct scsi_transportid_iscsi_port *id =
2273		    (struct scsi_transportid_iscsi_port *)buf;
2274		if (port->wwpn_iid[iid].name == NULL)
2275			return (0);
2276		memset(id, 0, 256);
2277		id->format_protocol = SCSI_TRN_ISCSI_FORMAT_PORT |
2278		    SCSI_PROTO_ISCSI;
2279		len = strlcpy(id->iscsi_name, port->wwpn_iid[iid].name, 252) + 1;
2280		len = roundup2(min(len, 252), 4);
2281		scsi_ulto2b(len, id->additional_length);
2282		return (sizeof(*id) + len);
2283	}
2284	case CTL_PORT_SAS:
2285	{
2286		struct scsi_transportid_sas *id =
2287		    (struct scsi_transportid_sas *)buf;
2288		if (port->wwpn_iid[iid].wwpn == 0)
2289			return (0);
2290		memset(id, 0, sizeof(*id));
2291		id->format_protocol = SCSI_PROTO_SAS;
2292		scsi_u64to8b(port->wwpn_iid[iid].wwpn, id->sas_address);
2293		return (sizeof(*id));
2294	}
2295	default:
2296	{
2297		struct scsi_transportid_spi *id =
2298		    (struct scsi_transportid_spi *)buf;
2299		memset(id, 0, sizeof(*id));
2300		id->format_protocol = SCSI_PROTO_SPI;
2301		scsi_ulto2b(iid, id->scsi_addr);
2302		scsi_ulto2b(port->targ_port, id->rel_trgt_port_id);
2303		return (sizeof(*id));
2304	}
2305	}
2306}
2307
2308/*
2309 * Serialize a command that went down the "wrong" side, and so was sent to
2310 * this controller for execution.  The logic is a little different than the
2311 * standard case in ctl_scsiio_precheck().  Errors in this case need to get
2312 * sent back to the other side, but in the success case, we execute the
2313 * command on this side (XFER mode) or tell the other side to execute it
2314 * (SER_ONLY mode).
2315 */
2316static void
2317ctl_serialize_other_sc_cmd(struct ctl_scsiio *ctsio)
2318{
2319	struct ctl_softc *softc = CTL_SOFTC(ctsio);
2320	struct ctl_port *port = CTL_PORT(ctsio);
2321	union ctl_ha_msg msg_info;
2322	struct ctl_lun *lun;
2323	const struct ctl_cmd_entry *entry;
2324	union ctl_io *bio;
2325	uint32_t targ_lun;
2326
2327	targ_lun = ctsio->io_hdr.nexus.targ_mapped_lun;
2328
2329	/* Make sure that we know about this port. */
2330	if (port == NULL || (port->status & CTL_PORT_STATUS_ONLINE) == 0) {
2331		ctl_set_internal_failure(ctsio, /*sks_valid*/ 0,
2332					 /*retry_count*/ 1);
2333		goto badjuju;
2334	}
2335
2336	/* Make sure that we know about this LUN. */
2337	mtx_lock(&softc->ctl_lock);
2338	if (targ_lun >= ctl_max_luns ||
2339	    (lun = softc->ctl_luns[targ_lun]) == NULL) {
2340		mtx_unlock(&softc->ctl_lock);
2341
2342		/*
2343		 * The other node would not send this request to us unless
2344		 * received announce that we are primary node for this LUN.
2345		 * If this LUN does not exist now, it is probably result of
2346		 * a race, so respond to initiator in the most opaque way.
2347		 */
2348		ctl_set_busy(ctsio);
2349		goto badjuju;
2350	}
2351	mtx_lock(&lun->lun_lock);
2352	mtx_unlock(&softc->ctl_lock);
2353
2354	/*
2355	 * If the LUN is invalid, pretend that it doesn't exist.
2356	 * It will go away as soon as all pending I/Os completed.
2357	 */
2358	if (lun->flags & CTL_LUN_DISABLED) {
2359		mtx_unlock(&lun->lun_lock);
2360		ctl_set_busy(ctsio);
2361		goto badjuju;
2362	}
2363
2364	entry = ctl_get_cmd_entry(ctsio, NULL);
2365	ctsio->seridx = entry->seridx;
2366	if (ctl_scsiio_lun_check(lun, entry, ctsio) != 0) {
2367		mtx_unlock(&lun->lun_lock);
2368		goto badjuju;
2369	}
2370
2371	CTL_LUN(ctsio) = lun;
2372	CTL_BACKEND_LUN(ctsio) = lun->be_lun;
2373
2374	/*
2375	 * Every I/O goes into the OOA queue for a
2376	 * particular LUN, and stays there until completion.
2377	 */
2378#ifdef CTL_TIME_IO
2379	if (LIST_EMPTY(&lun->ooa_queue))
2380		lun->idle_time += getsbinuptime() - lun->last_busy;
2381#endif
2382	LIST_INSERT_HEAD(&lun->ooa_queue, &ctsio->io_hdr, ooa_links);
2383
2384	bio = (union ctl_io *)LIST_NEXT(&ctsio->io_hdr, ooa_links);
2385	switch (ctl_check_ooa(lun, (union ctl_io *)ctsio, &bio)) {
2386	case CTL_ACTION_PASS:
2387	case CTL_ACTION_SKIP:
2388		if (softc->ha_mode == CTL_HA_MODE_XFER) {
2389			ctsio->io_hdr.flags |= CTL_FLAG_IS_WAS_ON_RTR;
2390			ctl_enqueue_rtr((union ctl_io *)ctsio);
2391			mtx_unlock(&lun->lun_lock);
2392		} else {
2393			ctsio->io_hdr.flags &= ~CTL_FLAG_IO_ACTIVE;
2394			mtx_unlock(&lun->lun_lock);
2395
2396			/* send msg back to other side */
2397			msg_info.hdr.original_sc = ctsio->io_hdr.remote_io;
2398			msg_info.hdr.serializing_sc = (union ctl_io *)ctsio;
2399			msg_info.hdr.msg_type = CTL_MSG_R2R;
2400			ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info,
2401			    sizeof(msg_info.hdr), M_WAITOK);
2402		}
2403		break;
2404	case CTL_ACTION_BLOCK:
2405		ctsio->io_hdr.blocker = bio;
2406		TAILQ_INSERT_TAIL(&bio->io_hdr.blocked_queue, &ctsio->io_hdr,
2407				  blocked_links);
2408		mtx_unlock(&lun->lun_lock);
2409		break;
2410	case CTL_ACTION_OVERLAP:
2411		LIST_REMOVE(&ctsio->io_hdr, ooa_links);
2412		mtx_unlock(&lun->lun_lock);
2413		ctl_set_overlapped_cmd(ctsio);
2414		goto badjuju;
2415	case CTL_ACTION_OVERLAP_TAG:
2416		LIST_REMOVE(&ctsio->io_hdr, ooa_links);
2417		mtx_unlock(&lun->lun_lock);
2418		ctl_set_overlapped_tag(ctsio, ctsio->tag_num & 0xff);
2419badjuju:
2420		ctl_copy_sense_data_back((union ctl_io *)ctsio, &msg_info);
2421		msg_info.hdr.original_sc = ctsio->io_hdr.remote_io;
2422		msg_info.hdr.serializing_sc = NULL;
2423		msg_info.hdr.msg_type = CTL_MSG_BAD_JUJU;
2424		ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info,
2425		    sizeof(msg_info.scsi), M_WAITOK);
2426		ctl_free_io((union ctl_io *)ctsio);
2427		break;
2428	default:
2429		__assert_unreachable();
2430	}
2431}
2432
2433/*
2434 * Returns 0 for success, errno for failure.
2435 */
2436static void
2437ctl_ioctl_fill_ooa(struct ctl_lun *lun, uint32_t *cur_fill_num,
2438		   struct ctl_ooa *ooa_hdr, struct ctl_ooa_entry *kern_entries)
2439{
2440	struct ctl_io_hdr *ioh;
2441
2442	mtx_lock(&lun->lun_lock);
2443	ioh = LIST_FIRST(&lun->ooa_queue);
2444	if (ioh == NULL) {
2445		mtx_unlock(&lun->lun_lock);
2446		return;
2447	}
2448	while (LIST_NEXT(ioh, ooa_links) != NULL)
2449		ioh = LIST_NEXT(ioh, ooa_links);
2450	for ( ; ioh; ioh = LIST_PREV(ioh, &lun->ooa_queue, ctl_io_hdr, ooa_links)) {
2451		union ctl_io *io = (union ctl_io *)ioh;
2452		struct ctl_ooa_entry *entry;
2453
2454		CTL_IO_ASSERT(io, SCSI);
2455
2456		/*
2457		 * If we've got more than we can fit, just count the
2458		 * remaining entries.
2459		 */
2460		if (*cur_fill_num >= ooa_hdr->alloc_num) {
2461			(*cur_fill_num)++;
2462			continue;
2463		}
2464
2465		entry = &kern_entries[*cur_fill_num];
2466
2467		entry->tag_num = io->scsiio.tag_num;
2468		entry->tag_type = io->scsiio.tag_type;
2469		entry->lun_num = lun->lun;
2470#ifdef CTL_TIME_IO
2471		entry->start_bt = io->io_hdr.start_bt;
2472#endif
2473		bcopy(io->scsiio.cdb, entry->cdb, io->scsiio.cdb_len);
2474		entry->cdb_len = io->scsiio.cdb_len;
2475		if (io->io_hdr.blocker != NULL)
2476			entry->cmd_flags |= CTL_OOACMD_FLAG_BLOCKED;
2477
2478		if (io->io_hdr.flags & CTL_FLAG_DMA_INPROG)
2479			entry->cmd_flags |= CTL_OOACMD_FLAG_DMA;
2480
2481		if (io->io_hdr.flags & CTL_FLAG_ABORT)
2482			entry->cmd_flags |= CTL_OOACMD_FLAG_ABORT;
2483
2484		if (io->io_hdr.flags & CTL_FLAG_IS_WAS_ON_RTR)
2485			entry->cmd_flags |= CTL_OOACMD_FLAG_RTR;
2486
2487		if (io->io_hdr.flags & CTL_FLAG_DMA_QUEUED)
2488			entry->cmd_flags |= CTL_OOACMD_FLAG_DMA_QUEUED;
2489
2490		if (io->io_hdr.flags & CTL_FLAG_STATUS_QUEUED)
2491			entry->cmd_flags |= CTL_OOACMD_FLAG_STATUS_QUEUED;
2492
2493		if (io->io_hdr.flags & CTL_FLAG_STATUS_SENT)
2494			entry->cmd_flags |= CTL_OOACMD_FLAG_STATUS_SENT;
2495		(*cur_fill_num)++;
2496	}
2497	mtx_unlock(&lun->lun_lock);
2498}
2499
2500/*
2501 * Escape characters that are illegal or not recommended in XML.
2502 */
2503int
2504ctl_sbuf_printf_esc(struct sbuf *sb, char *str, int size)
2505{
2506	char *end = str + size;
2507	int retval;
2508
2509	retval = 0;
2510
2511	for (; *str && str < end; str++) {
2512		switch (*str) {
2513		case '&':
2514			retval = sbuf_cat(sb, "&amp;");
2515			break;
2516		case '>':
2517			retval = sbuf_cat(sb, "&gt;");
2518			break;
2519		case '<':
2520			retval = sbuf_cat(sb, "&lt;");
2521			break;
2522		default:
2523			retval = sbuf_putc(sb, *str);
2524			break;
2525		}
2526
2527		if (retval != 0)
2528			break;
2529	}
2530
2531	return (retval);
2532}
2533
2534static void
2535ctl_id_sbuf(struct ctl_devid *id, struct sbuf *sb)
2536{
2537	struct scsi_vpd_id_descriptor *desc;
2538	int i;
2539
2540	if (id == NULL || id->len < 4)
2541		return;
2542	desc = (struct scsi_vpd_id_descriptor *)id->data;
2543	switch (desc->id_type & SVPD_ID_TYPE_MASK) {
2544	case SVPD_ID_TYPE_T10:
2545		sbuf_cat(sb, "t10.");
2546		break;
2547	case SVPD_ID_TYPE_EUI64:
2548		sbuf_cat(sb, "eui.");
2549		break;
2550	case SVPD_ID_TYPE_NAA:
2551		sbuf_cat(sb, "naa.");
2552		break;
2553	case SVPD_ID_TYPE_SCSI_NAME:
2554		break;
2555	}
2556	switch (desc->proto_codeset & SVPD_ID_CODESET_MASK) {
2557	case SVPD_ID_CODESET_BINARY:
2558		for (i = 0; i < desc->length; i++)
2559			sbuf_printf(sb, "%02x", desc->identifier[i]);
2560		break;
2561	case SVPD_ID_CODESET_ASCII:
2562		sbuf_printf(sb, "%.*s", (int)desc->length,
2563		    (char *)desc->identifier);
2564		break;
2565	case SVPD_ID_CODESET_UTF8:
2566		sbuf_cat(sb, (char *)desc->identifier);
2567		break;
2568	}
2569}
2570
2571static int
2572ctl_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag,
2573	  struct thread *td)
2574{
2575	struct ctl_softc *softc = dev->si_drv1;
2576	struct ctl_port *port;
2577	struct ctl_lun *lun;
2578	int retval;
2579
2580	retval = 0;
2581
2582	switch (cmd) {
2583	case CTL_IO:
2584		retval = ctl_ioctl_io(dev, cmd, addr, flag, td);
2585		break;
2586	case CTL_ENABLE_PORT:
2587	case CTL_DISABLE_PORT:
2588	case CTL_SET_PORT_WWNS: {
2589		struct ctl_port *port;
2590		struct ctl_port_entry *entry;
2591
2592		entry = (struct ctl_port_entry *)addr;
2593
2594		mtx_lock(&softc->ctl_lock);
2595		STAILQ_FOREACH(port, &softc->port_list, links) {
2596			int action, done;
2597
2598			if (port->targ_port < softc->port_min ||
2599			    port->targ_port >= softc->port_max)
2600				continue;
2601
2602			action = 0;
2603			done = 0;
2604			if ((entry->port_type == CTL_PORT_NONE)
2605			 && (entry->targ_port == port->targ_port)) {
2606				/*
2607				 * If the user only wants to enable or
2608				 * disable or set WWNs on a specific port,
2609				 * do the operation and we're done.
2610				 */
2611				action = 1;
2612				done = 1;
2613			} else if (entry->port_type & port->port_type) {
2614				/*
2615				 * Compare the user's type mask with the
2616				 * particular frontend type to see if we
2617				 * have a match.
2618				 */
2619				action = 1;
2620				done = 0;
2621
2622				/*
2623				 * Make sure the user isn't trying to set
2624				 * WWNs on multiple ports at the same time.
2625				 */
2626				if (cmd == CTL_SET_PORT_WWNS) {
2627					printf("%s: Can't set WWNs on "
2628					       "multiple ports\n", __func__);
2629					retval = EINVAL;
2630					break;
2631				}
2632			}
2633			if (action == 0)
2634				continue;
2635
2636			/*
2637			 * XXX KDM we have to drop the lock here, because
2638			 * the online/offline operations can potentially
2639			 * block.  We need to reference count the frontends
2640			 * so they can't go away,
2641			 */
2642			if (cmd == CTL_ENABLE_PORT) {
2643				mtx_unlock(&softc->ctl_lock);
2644				ctl_port_online(port);
2645				mtx_lock(&softc->ctl_lock);
2646			} else if (cmd == CTL_DISABLE_PORT) {
2647				mtx_unlock(&softc->ctl_lock);
2648				ctl_port_offline(port);
2649				mtx_lock(&softc->ctl_lock);
2650			} else if (cmd == CTL_SET_PORT_WWNS) {
2651				ctl_port_set_wwns(port,
2652				    (entry->flags & CTL_PORT_WWNN_VALID) ?
2653				    1 : 0, entry->wwnn,
2654				    (entry->flags & CTL_PORT_WWPN_VALID) ?
2655				    1 : 0, entry->wwpn);
2656			}
2657			if (done != 0)
2658				break;
2659		}
2660		mtx_unlock(&softc->ctl_lock);
2661		break;
2662	}
2663	case CTL_GET_OOA: {
2664		struct ctl_ooa *ooa_hdr;
2665		struct ctl_ooa_entry *entries;
2666		uint32_t cur_fill_num;
2667
2668		ooa_hdr = (struct ctl_ooa *)addr;
2669
2670		if ((ooa_hdr->alloc_len == 0)
2671		 || (ooa_hdr->alloc_num == 0)) {
2672			printf("%s: CTL_GET_OOA: alloc len %u and alloc num %u "
2673			       "must be non-zero\n", __func__,
2674			       ooa_hdr->alloc_len, ooa_hdr->alloc_num);
2675			retval = EINVAL;
2676			break;
2677		}
2678
2679		if (ooa_hdr->alloc_len != (ooa_hdr->alloc_num *
2680		    sizeof(struct ctl_ooa_entry))) {
2681			printf("%s: CTL_GET_OOA: alloc len %u must be alloc "
2682			       "num %d * sizeof(struct ctl_ooa_entry) %zd\n",
2683			       __func__, ooa_hdr->alloc_len,
2684			       ooa_hdr->alloc_num,sizeof(struct ctl_ooa_entry));
2685			retval = EINVAL;
2686			break;
2687		}
2688
2689		entries = malloc(ooa_hdr->alloc_len, M_CTL, M_WAITOK | M_ZERO);
2690		if (entries == NULL) {
2691			printf("%s: could not allocate %d bytes for OOA "
2692			       "dump\n", __func__, ooa_hdr->alloc_len);
2693			retval = ENOMEM;
2694			break;
2695		}
2696
2697		mtx_lock(&softc->ctl_lock);
2698		if ((ooa_hdr->flags & CTL_OOA_FLAG_ALL_LUNS) == 0 &&
2699		    (ooa_hdr->lun_num >= ctl_max_luns ||
2700		     softc->ctl_luns[ooa_hdr->lun_num] == NULL)) {
2701			mtx_unlock(&softc->ctl_lock);
2702			free(entries, M_CTL);
2703			printf("%s: CTL_GET_OOA: invalid LUN %ju\n",
2704			       __func__, (uintmax_t)ooa_hdr->lun_num);
2705			retval = EINVAL;
2706			break;
2707		}
2708
2709		cur_fill_num = 0;
2710
2711		if (ooa_hdr->flags & CTL_OOA_FLAG_ALL_LUNS) {
2712			STAILQ_FOREACH(lun, &softc->lun_list, links) {
2713				ctl_ioctl_fill_ooa(lun, &cur_fill_num,
2714				    ooa_hdr, entries);
2715			}
2716		} else {
2717			lun = softc->ctl_luns[ooa_hdr->lun_num];
2718			ctl_ioctl_fill_ooa(lun, &cur_fill_num, ooa_hdr,
2719			    entries);
2720		}
2721		mtx_unlock(&softc->ctl_lock);
2722
2723		ooa_hdr->fill_num = min(cur_fill_num, ooa_hdr->alloc_num);
2724		ooa_hdr->fill_len = ooa_hdr->fill_num *
2725			sizeof(struct ctl_ooa_entry);
2726		retval = copyout(entries, ooa_hdr->entries, ooa_hdr->fill_len);
2727		if (retval != 0) {
2728			printf("%s: error copying out %d bytes for OOA dump\n",
2729			       __func__, ooa_hdr->fill_len);
2730		}
2731
2732		getbinuptime(&ooa_hdr->cur_bt);
2733
2734		if (cur_fill_num > ooa_hdr->alloc_num) {
2735			ooa_hdr->dropped_num = cur_fill_num -ooa_hdr->alloc_num;
2736			ooa_hdr->status = CTL_OOA_NEED_MORE_SPACE;
2737		} else {
2738			ooa_hdr->dropped_num = 0;
2739			ooa_hdr->status = CTL_OOA_OK;
2740		}
2741
2742		free(entries, M_CTL);
2743		break;
2744	}
2745	case CTL_DELAY_IO: {
2746		struct ctl_io_delay_info *delay_info;
2747
2748		delay_info = (struct ctl_io_delay_info *)addr;
2749
2750#ifdef CTL_IO_DELAY
2751		mtx_lock(&softc->ctl_lock);
2752		if (delay_info->lun_id >= ctl_max_luns ||
2753		    (lun = softc->ctl_luns[delay_info->lun_id]) == NULL) {
2754			mtx_unlock(&softc->ctl_lock);
2755			delay_info->status = CTL_DELAY_STATUS_INVALID_LUN;
2756			break;
2757		}
2758		mtx_lock(&lun->lun_lock);
2759		mtx_unlock(&softc->ctl_lock);
2760		delay_info->status = CTL_DELAY_STATUS_OK;
2761		switch (delay_info->delay_type) {
2762		case CTL_DELAY_TYPE_CONT:
2763		case CTL_DELAY_TYPE_ONESHOT:
2764			break;
2765		default:
2766			delay_info->status = CTL_DELAY_STATUS_INVALID_TYPE;
2767			break;
2768		}
2769		switch (delay_info->delay_loc) {
2770		case CTL_DELAY_LOC_DATAMOVE:
2771			lun->delay_info.datamove_type = delay_info->delay_type;
2772			lun->delay_info.datamove_delay = delay_info->delay_secs;
2773			break;
2774		case CTL_DELAY_LOC_DONE:
2775			lun->delay_info.done_type = delay_info->delay_type;
2776			lun->delay_info.done_delay = delay_info->delay_secs;
2777			break;
2778		default:
2779			delay_info->status = CTL_DELAY_STATUS_INVALID_LOC;
2780			break;
2781		}
2782		mtx_unlock(&lun->lun_lock);
2783#else
2784		delay_info->status = CTL_DELAY_STATUS_NOT_IMPLEMENTED;
2785#endif /* CTL_IO_DELAY */
2786		break;
2787	}
2788	case CTL_ERROR_INJECT: {
2789		struct ctl_error_desc *err_desc, *new_err_desc;
2790
2791		err_desc = (struct ctl_error_desc *)addr;
2792
2793		new_err_desc = malloc(sizeof(*new_err_desc), M_CTL,
2794				      M_WAITOK | M_ZERO);
2795		bcopy(err_desc, new_err_desc, sizeof(*new_err_desc));
2796
2797		mtx_lock(&softc->ctl_lock);
2798		if (err_desc->lun_id >= ctl_max_luns ||
2799		    (lun = softc->ctl_luns[err_desc->lun_id]) == NULL) {
2800			mtx_unlock(&softc->ctl_lock);
2801			free(new_err_desc, M_CTL);
2802			printf("%s: CTL_ERROR_INJECT: invalid LUN %ju\n",
2803			       __func__, (uintmax_t)err_desc->lun_id);
2804			retval = EINVAL;
2805			break;
2806		}
2807		mtx_lock(&lun->lun_lock);
2808		mtx_unlock(&softc->ctl_lock);
2809
2810		/*
2811		 * We could do some checking here to verify the validity
2812		 * of the request, but given the complexity of error
2813		 * injection requests, the checking logic would be fairly
2814		 * complex.
2815		 *
2816		 * For now, if the request is invalid, it just won't get
2817		 * executed and might get deleted.
2818		 */
2819		STAILQ_INSERT_TAIL(&lun->error_list, new_err_desc, links);
2820
2821		/*
2822		 * XXX KDM check to make sure the serial number is unique,
2823		 * in case we somehow manage to wrap.  That shouldn't
2824		 * happen for a very long time, but it's the right thing to
2825		 * do.
2826		 */
2827		new_err_desc->serial = lun->error_serial;
2828		err_desc->serial = lun->error_serial;
2829		lun->error_serial++;
2830
2831		mtx_unlock(&lun->lun_lock);
2832		break;
2833	}
2834	case CTL_ERROR_INJECT_DELETE: {
2835		struct ctl_error_desc *delete_desc, *desc, *desc2;
2836		int delete_done;
2837
2838		delete_desc = (struct ctl_error_desc *)addr;
2839		delete_done = 0;
2840
2841		mtx_lock(&softc->ctl_lock);
2842		if (delete_desc->lun_id >= ctl_max_luns ||
2843		    (lun = softc->ctl_luns[delete_desc->lun_id]) == NULL) {
2844			mtx_unlock(&softc->ctl_lock);
2845			printf("%s: CTL_ERROR_INJECT_DELETE: invalid LUN %ju\n",
2846			       __func__, (uintmax_t)delete_desc->lun_id);
2847			retval = EINVAL;
2848			break;
2849		}
2850		mtx_lock(&lun->lun_lock);
2851		mtx_unlock(&softc->ctl_lock);
2852		STAILQ_FOREACH_SAFE(desc, &lun->error_list, links, desc2) {
2853			if (desc->serial != delete_desc->serial)
2854				continue;
2855
2856			STAILQ_REMOVE(&lun->error_list, desc, ctl_error_desc,
2857				      links);
2858			free(desc, M_CTL);
2859			delete_done = 1;
2860		}
2861		mtx_unlock(&lun->lun_lock);
2862		if (delete_done == 0) {
2863			printf("%s: CTL_ERROR_INJECT_DELETE: can't find "
2864			       "error serial %ju on LUN %u\n", __func__,
2865			       delete_desc->serial, delete_desc->lun_id);
2866			retval = EINVAL;
2867			break;
2868		}
2869		break;
2870	}
2871	case CTL_DUMP_STRUCTS: {
2872		int j, k;
2873		struct ctl_port *port;
2874		struct ctl_frontend *fe;
2875
2876		mtx_lock(&softc->ctl_lock);
2877		printf("CTL Persistent Reservation information start:\n");
2878		STAILQ_FOREACH(lun, &softc->lun_list, links) {
2879			mtx_lock(&lun->lun_lock);
2880			if ((lun->flags & CTL_LUN_DISABLED) != 0) {
2881				mtx_unlock(&lun->lun_lock);
2882				continue;
2883			}
2884
2885			for (j = 0; j < ctl_max_ports; j++) {
2886				if (lun->pr_keys[j] == NULL)
2887					continue;
2888				for (k = 0; k < CTL_MAX_INIT_PER_PORT; k++){
2889					if (lun->pr_keys[j][k] == 0)
2890						continue;
2891					printf("  LUN %ju port %d iid %d key "
2892					       "%#jx\n", lun->lun, j, k,
2893					       (uintmax_t)lun->pr_keys[j][k]);
2894				}
2895			}
2896			mtx_unlock(&lun->lun_lock);
2897		}
2898		printf("CTL Persistent Reservation information end\n");
2899		printf("CTL Ports:\n");
2900		STAILQ_FOREACH(port, &softc->port_list, links) {
2901			printf("  Port %d '%s' Frontend '%s' Type %u pp %d vp %d WWNN "
2902			       "%#jx WWPN %#jx\n", port->targ_port, port->port_name,
2903			       port->frontend->name, port->port_type,
2904			       port->physical_port, port->virtual_port,
2905			       (uintmax_t)port->wwnn, (uintmax_t)port->wwpn);
2906			for (j = 0; j < CTL_MAX_INIT_PER_PORT; j++) {
2907				if (port->wwpn_iid[j].in_use == 0 &&
2908				    port->wwpn_iid[j].wwpn == 0 &&
2909				    port->wwpn_iid[j].name == NULL)
2910					continue;
2911
2912				printf("    iid %u use %d WWPN %#jx '%s'\n",
2913				    j, port->wwpn_iid[j].in_use,
2914				    (uintmax_t)port->wwpn_iid[j].wwpn,
2915				    port->wwpn_iid[j].name);
2916			}
2917		}
2918		printf("CTL Port information end\n");
2919		mtx_unlock(&softc->ctl_lock);
2920		/*
2921		 * XXX KDM calling this without a lock.  We'd likely want
2922		 * to drop the lock before calling the frontend's dump
2923		 * routine anyway.
2924		 */
2925		printf("CTL Frontends:\n");
2926		STAILQ_FOREACH(fe, &softc->fe_list, links) {
2927			printf("  Frontend '%s'\n", fe->name);
2928			if (fe->fe_dump != NULL)
2929				fe->fe_dump();
2930		}
2931		printf("CTL Frontend information end\n");
2932		break;
2933	}
2934	case CTL_LUN_REQ: {
2935		struct ctl_lun_req *lun_req;
2936		struct ctl_backend_driver *backend;
2937		void *packed;
2938		nvlist_t *tmp_args_nvl;
2939		size_t packed_len;
2940
2941		lun_req = (struct ctl_lun_req *)addr;
2942		tmp_args_nvl = lun_req->args_nvl;
2943
2944		backend = ctl_backend_find(lun_req->backend);
2945		if (backend == NULL) {
2946			lun_req->status = CTL_LUN_ERROR;
2947			snprintf(lun_req->error_str,
2948				 sizeof(lun_req->error_str),
2949				 "Backend \"%s\" not found.",
2950				 lun_req->backend);
2951			break;
2952		}
2953
2954		if (lun_req->args != NULL) {
2955			if (lun_req->args_len > CTL_MAX_ARGS_LEN) {
2956				lun_req->status = CTL_LUN_ERROR;
2957				snprintf(lun_req->error_str, sizeof(lun_req->error_str),
2958				    "Too big args.");
2959				break;
2960			}
2961			packed = malloc(lun_req->args_len, M_CTL, M_WAITOK);
2962			if (copyin(lun_req->args, packed, lun_req->args_len) != 0) {
2963				free(packed, M_CTL);
2964				lun_req->status = CTL_LUN_ERROR;
2965				snprintf(lun_req->error_str, sizeof(lun_req->error_str),
2966				    "Cannot copyin args.");
2967				break;
2968			}
2969			lun_req->args_nvl = nvlist_unpack(packed,
2970			    lun_req->args_len, 0);
2971			free(packed, M_CTL);
2972
2973			if (lun_req->args_nvl == NULL) {
2974				lun_req->status = CTL_LUN_ERROR;
2975				snprintf(lun_req->error_str, sizeof(lun_req->error_str),
2976				    "Cannot unpack args nvlist.");
2977				break;
2978			}
2979		} else
2980			lun_req->args_nvl = nvlist_create(0);
2981
2982		lun_req->result_nvl = NULL;
2983		retval = backend->ioctl(dev, cmd, addr, flag, td);
2984		nvlist_destroy(lun_req->args_nvl);
2985		lun_req->args_nvl = tmp_args_nvl;
2986
2987		if (lun_req->result_nvl != NULL) {
2988			if (lun_req->result != NULL) {
2989				packed = nvlist_pack(lun_req->result_nvl,
2990				    &packed_len);
2991				if (packed == NULL) {
2992					lun_req->status = CTL_LUN_ERROR;
2993					snprintf(lun_req->error_str,
2994					    sizeof(lun_req->error_str),
2995					    "Cannot pack result nvlist.");
2996					break;
2997				}
2998
2999				if (packed_len > lun_req->result_len) {
3000					lun_req->status = CTL_LUN_ERROR;
3001					snprintf(lun_req->error_str,
3002					    sizeof(lun_req->error_str),
3003					    "Result nvlist too large.");
3004					free(packed, M_NVLIST);
3005					break;
3006				}
3007
3008				if (copyout(packed, lun_req->result, packed_len)) {
3009					lun_req->status = CTL_LUN_ERROR;
3010					snprintf(lun_req->error_str,
3011					    sizeof(lun_req->error_str),
3012					    "Cannot copyout() the result.");
3013					free(packed, M_NVLIST);
3014					break;
3015				}
3016
3017				lun_req->result_len = packed_len;
3018				free(packed, M_NVLIST);
3019			}
3020
3021			nvlist_destroy(lun_req->result_nvl);
3022		}
3023		break;
3024	}
3025	case CTL_LUN_LIST: {
3026		struct sbuf *sb;
3027		struct ctl_lun_list *list;
3028		const char *name, *value;
3029		void *cookie;
3030		int type;
3031
3032		list = (struct ctl_lun_list *)addr;
3033
3034		/*
3035		 * Allocate a fixed length sbuf here, based on the length
3036		 * of the user's buffer.  We could allocate an auto-extending
3037		 * buffer, and then tell the user how much larger our
3038		 * amount of data is than his buffer, but that presents
3039		 * some problems:
3040		 *
3041		 * 1.  The sbuf(9) routines use a blocking malloc, and so
3042		 *     we can't hold a lock while calling them with an
3043		 *     auto-extending buffer.
3044 		 *
3045		 * 2.  There is not currently a LUN reference counting
3046		 *     mechanism, outside of outstanding transactions on
3047		 *     the LUN's OOA queue.  So a LUN could go away on us
3048		 *     while we're getting the LUN number, backend-specific
3049		 *     information, etc.  Thus, given the way things
3050		 *     currently work, we need to hold the CTL lock while
3051		 *     grabbing LUN information.
3052		 *
3053		 * So, from the user's standpoint, the best thing to do is
3054		 * allocate what he thinks is a reasonable buffer length,
3055		 * and then if he gets a CTL_LUN_LIST_NEED_MORE_SPACE error,
3056		 * double the buffer length and try again.  (And repeat
3057		 * that until he succeeds.)
3058		 */
3059		sb = sbuf_new(NULL, NULL, list->alloc_len, SBUF_FIXEDLEN);
3060		if (sb == NULL) {
3061			list->status = CTL_LUN_LIST_ERROR;
3062			snprintf(list->error_str, sizeof(list->error_str),
3063				 "Unable to allocate %d bytes for LUN list",
3064				 list->alloc_len);
3065			break;
3066		}
3067
3068		sbuf_cat(sb, "<ctllunlist>\n");
3069
3070		mtx_lock(&softc->ctl_lock);
3071		STAILQ_FOREACH(lun, &softc->lun_list, links) {
3072			mtx_lock(&lun->lun_lock);
3073			retval = sbuf_printf(sb, "<lun id=\"%ju\">\n",
3074					     (uintmax_t)lun->lun);
3075
3076			/*
3077			 * Bail out as soon as we see that we've overfilled
3078			 * the buffer.
3079			 */
3080			if (retval != 0)
3081				break;
3082
3083			retval = sbuf_printf(sb, "\t<backend_type>%s"
3084					     "</backend_type>\n",
3085					     (lun->backend == NULL) ?  "none" :
3086					     lun->backend->name);
3087
3088			if (retval != 0)
3089				break;
3090
3091			retval = sbuf_printf(sb, "\t<lun_type>%d</lun_type>\n",
3092					     lun->be_lun->lun_type);
3093
3094			if (retval != 0)
3095				break;
3096
3097			if (lun->backend == NULL) {
3098				retval = sbuf_cat(sb, "</lun>\n");
3099				if (retval != 0)
3100					break;
3101				continue;
3102			}
3103
3104			retval = sbuf_printf(sb, "\t<size>%ju</size>\n",
3105					     (lun->be_lun->maxlba > 0) ?
3106					     lun->be_lun->maxlba + 1 : 0);
3107
3108			if (retval != 0)
3109				break;
3110
3111			retval = sbuf_printf(sb, "\t<blocksize>%u</blocksize>\n",
3112					     lun->be_lun->blocksize);
3113
3114			if (retval != 0)
3115				break;
3116
3117			retval = sbuf_cat(sb, "\t<serial_number>");
3118
3119			if (retval != 0)
3120				break;
3121
3122			retval = ctl_sbuf_printf_esc(sb,
3123			    lun->be_lun->serial_num,
3124			    sizeof(lun->be_lun->serial_num));
3125
3126			if (retval != 0)
3127				break;
3128
3129			retval = sbuf_cat(sb, "</serial_number>\n");
3130
3131			if (retval != 0)
3132				break;
3133
3134			retval = sbuf_cat(sb, "\t<device_id>");
3135
3136			if (retval != 0)
3137				break;
3138
3139			retval = ctl_sbuf_printf_esc(sb,
3140			    lun->be_lun->device_id,
3141			    sizeof(lun->be_lun->device_id));
3142
3143			if (retval != 0)
3144				break;
3145
3146			retval = sbuf_cat(sb, "</device_id>\n");
3147
3148			if (retval != 0)
3149				break;
3150
3151			if (lun->backend->lun_info != NULL) {
3152				retval = lun->backend->lun_info(lun->be_lun, sb);
3153				if (retval != 0)
3154					break;
3155			}
3156
3157			cookie = NULL;
3158			while ((name = nvlist_next(lun->be_lun->options, &type,
3159			    &cookie)) != NULL) {
3160				sbuf_printf(sb, "\t<%s>", name);
3161
3162				if (type == NV_TYPE_STRING) {
3163					value = dnvlist_get_string(
3164					    lun->be_lun->options, name, NULL);
3165					if (value != NULL)
3166						sbuf_cat(sb, value);
3167				}
3168
3169				sbuf_printf(sb, "</%s>\n", name);
3170			}
3171
3172			retval = sbuf_cat(sb, "</lun>\n");
3173
3174			if (retval != 0)
3175				break;
3176			mtx_unlock(&lun->lun_lock);
3177		}
3178		if (lun != NULL)
3179			mtx_unlock(&lun->lun_lock);
3180		mtx_unlock(&softc->ctl_lock);
3181
3182		if ((retval != 0)
3183		 || ((retval = sbuf_cat(sb, "</ctllunlist>\n")) != 0)) {
3184			retval = 0;
3185			sbuf_delete(sb);
3186			list->status = CTL_LUN_LIST_NEED_MORE_SPACE;
3187			snprintf(list->error_str, sizeof(list->error_str),
3188				 "Out of space, %d bytes is too small",
3189				 list->alloc_len);
3190			break;
3191		}
3192
3193		sbuf_finish(sb);
3194
3195		retval = copyout(sbuf_data(sb), list->lun_xml,
3196				 sbuf_len(sb) + 1);
3197
3198		list->fill_len = sbuf_len(sb) + 1;
3199		list->status = CTL_LUN_LIST_OK;
3200		sbuf_delete(sb);
3201		break;
3202	}
3203	case CTL_ISCSI: {
3204		struct ctl_iscsi *ci;
3205		struct ctl_frontend *fe;
3206
3207		ci = (struct ctl_iscsi *)addr;
3208
3209		fe = ctl_frontend_find("iscsi");
3210		if (fe == NULL) {
3211			ci->status = CTL_ISCSI_ERROR;
3212			snprintf(ci->error_str, sizeof(ci->error_str),
3213			    "Frontend \"iscsi\" not found.");
3214			break;
3215		}
3216
3217		retval = fe->ioctl(dev, cmd, addr, flag, td);
3218		break;
3219	}
3220	case CTL_NVMF: {
3221		struct ctl_nvmf *cn;
3222		struct ctl_frontend *fe;
3223
3224		cn = (struct ctl_nvmf *)addr;
3225
3226		fe = ctl_frontend_find("nvmf");
3227		if (fe == NULL) {
3228			cn->status = CTL_NVMF_ERROR;
3229			snprintf(cn->error_str, sizeof(cn->error_str),
3230			    "Frontend \"nvmf\" not found.");
3231			break;
3232		}
3233
3234		retval = fe->ioctl(dev, cmd, addr, flag, td);
3235		break;
3236	}
3237	case CTL_PORT_REQ: {
3238		struct ctl_req *req;
3239		struct ctl_frontend *fe;
3240		void *packed;
3241		nvlist_t *tmp_args_nvl;
3242		size_t packed_len;
3243
3244		req = (struct ctl_req *)addr;
3245		tmp_args_nvl = req->args_nvl;
3246
3247		fe = ctl_frontend_find(req->driver);
3248		if (fe == NULL) {
3249			req->status = CTL_LUN_ERROR;
3250			snprintf(req->error_str, sizeof(req->error_str),
3251			    "Frontend \"%s\" not found.", req->driver);
3252			break;
3253		}
3254
3255		if (req->args != NULL) {
3256			if (req->args_len > CTL_MAX_ARGS_LEN) {
3257				req->status = CTL_LUN_ERROR;
3258				snprintf(req->error_str, sizeof(req->error_str),
3259				    "Too big args.");
3260				break;
3261			}
3262			packed = malloc(req->args_len, M_CTL, M_WAITOK);
3263			if (copyin(req->args, packed, req->args_len) != 0) {
3264				free(packed, M_CTL);
3265				req->status = CTL_LUN_ERROR;
3266				snprintf(req->error_str, sizeof(req->error_str),
3267				    "Cannot copyin args.");
3268				break;
3269			}
3270			req->args_nvl = nvlist_unpack(packed,
3271			    req->args_len, 0);
3272			free(packed, M_CTL);
3273
3274			if (req->args_nvl == NULL) {
3275				req->status = CTL_LUN_ERROR;
3276				snprintf(req->error_str, sizeof(req->error_str),
3277				    "Cannot unpack args nvlist.");
3278				break;
3279			}
3280		} else
3281			req->args_nvl = nvlist_create(0);
3282
3283		req->result_nvl = NULL;
3284		if (fe->ioctl)
3285			retval = fe->ioctl(dev, cmd, addr, flag, td);
3286		else
3287			retval = ENODEV;
3288
3289		nvlist_destroy(req->args_nvl);
3290		req->args_nvl = tmp_args_nvl;
3291
3292		if (req->result_nvl != NULL) {
3293			if (req->result != NULL) {
3294				packed = nvlist_pack(req->result_nvl,
3295				    &packed_len);
3296				if (packed == NULL) {
3297					req->status = CTL_LUN_ERROR;
3298					snprintf(req->error_str,
3299					    sizeof(req->error_str),
3300					    "Cannot pack result nvlist.");
3301					break;
3302				}
3303
3304				if (packed_len > req->result_len) {
3305					req->status = CTL_LUN_ERROR;
3306					snprintf(req->error_str,
3307					    sizeof(req->error_str),
3308					    "Result nvlist too large.");
3309					free(packed, M_NVLIST);
3310					break;
3311				}
3312
3313				if (copyout(packed, req->result, packed_len)) {
3314					req->status = CTL_LUN_ERROR;
3315					snprintf(req->error_str,
3316					    sizeof(req->error_str),
3317					    "Cannot copyout() the result.");
3318					free(packed, M_NVLIST);
3319					break;
3320				}
3321
3322				req->result_len = packed_len;
3323				free(packed, M_NVLIST);
3324			}
3325
3326			nvlist_destroy(req->result_nvl);
3327		}
3328		break;
3329	}
3330	case CTL_PORT_LIST: {
3331		struct sbuf *sb;
3332		struct ctl_port *port;
3333		struct ctl_lun_list *list;
3334		const char *name, *value;
3335		void *cookie;
3336		int j, type;
3337		uint32_t plun;
3338
3339		list = (struct ctl_lun_list *)addr;
3340
3341		sb = sbuf_new(NULL, NULL, list->alloc_len, SBUF_FIXEDLEN);
3342		if (sb == NULL) {
3343			list->status = CTL_LUN_LIST_ERROR;
3344			snprintf(list->error_str, sizeof(list->error_str),
3345				 "Unable to allocate %d bytes for LUN list",
3346				 list->alloc_len);
3347			break;
3348		}
3349
3350		sbuf_cat(sb, "<ctlportlist>\n");
3351
3352		mtx_lock(&softc->ctl_lock);
3353		STAILQ_FOREACH(port, &softc->port_list, links) {
3354			retval = sbuf_printf(sb, "<targ_port id=\"%ju\">\n",
3355					     (uintmax_t)port->targ_port);
3356
3357			/*
3358			 * Bail out as soon as we see that we've overfilled
3359			 * the buffer.
3360			 */
3361			if (retval != 0)
3362				break;
3363
3364			retval = sbuf_printf(sb, "\t<frontend_type>%s"
3365			    "</frontend_type>\n", port->frontend->name);
3366			if (retval != 0)
3367				break;
3368
3369			retval = sbuf_printf(sb, "\t<port_type>%d</port_type>\n",
3370					     port->port_type);
3371			if (retval != 0)
3372				break;
3373
3374			retval = sbuf_printf(sb, "\t<online>%s</online>\n",
3375			    (port->status & CTL_PORT_STATUS_ONLINE) ? "YES" : "NO");
3376			if (retval != 0)
3377				break;
3378
3379			retval = sbuf_printf(sb, "\t<port_name>%s</port_name>\n",
3380			    port->port_name);
3381			if (retval != 0)
3382				break;
3383
3384			retval = sbuf_printf(sb, "\t<physical_port>%d</physical_port>\n",
3385			    port->physical_port);
3386			if (retval != 0)
3387				break;
3388
3389			retval = sbuf_printf(sb, "\t<virtual_port>%d</virtual_port>\n",
3390			    port->virtual_port);
3391			if (retval != 0)
3392				break;
3393
3394			if (port->target_devid != NULL) {
3395				sbuf_cat(sb, "\t<target>");
3396				ctl_id_sbuf(port->target_devid, sb);
3397				sbuf_cat(sb, "</target>\n");
3398			}
3399
3400			if (port->port_devid != NULL) {
3401				sbuf_cat(sb, "\t<port>");
3402				ctl_id_sbuf(port->port_devid, sb);
3403				sbuf_cat(sb, "</port>\n");
3404			}
3405
3406			if (port->port_info != NULL) {
3407				retval = port->port_info(port->onoff_arg, sb);
3408				if (retval != 0)
3409					break;
3410			}
3411
3412			cookie = NULL;
3413			while ((name = nvlist_next(port->options, &type,
3414			    &cookie)) != NULL) {
3415				sbuf_printf(sb, "\t<%s>", name);
3416
3417				if (type == NV_TYPE_STRING) {
3418					value = dnvlist_get_string(port->options,
3419					    name, NULL);
3420					if (value != NULL)
3421						sbuf_printf(sb, "%s", value);
3422				}
3423
3424				sbuf_printf(sb, "</%s>\n", name);
3425			}
3426
3427			if (port->lun_map != NULL) {
3428				sbuf_cat(sb, "\t<lun_map>on</lun_map>\n");
3429				for (j = 0; j < port->lun_map_size; j++) {
3430					plun = ctl_lun_map_from_port(port, j);
3431					if (plun == UINT32_MAX)
3432						continue;
3433					sbuf_printf(sb,
3434					    "\t<lun id=\"%u\">%u</lun>\n",
3435					    j, plun);
3436				}
3437			}
3438
3439			for (j = 0; j < CTL_MAX_INIT_PER_PORT; j++) {
3440				if (port->wwpn_iid[j].in_use == 0 ||
3441				    (port->wwpn_iid[j].wwpn == 0 &&
3442				     port->wwpn_iid[j].name == NULL))
3443					continue;
3444
3445				if (port->wwpn_iid[j].name != NULL)
3446					retval = sbuf_printf(sb,
3447					    "\t<initiator id=\"%u\">%s</initiator>\n",
3448					    j, port->wwpn_iid[j].name);
3449				else
3450					retval = sbuf_printf(sb,
3451					    "\t<initiator id=\"%u\">naa.%08jx</initiator>\n",
3452					    j, port->wwpn_iid[j].wwpn);
3453				if (retval != 0)
3454					break;
3455			}
3456			if (retval != 0)
3457				break;
3458
3459			retval = sbuf_cat(sb, "</targ_port>\n");
3460			if (retval != 0)
3461				break;
3462		}
3463		mtx_unlock(&softc->ctl_lock);
3464
3465		if ((retval != 0)
3466		 || ((retval = sbuf_cat(sb, "</ctlportlist>\n")) != 0)) {
3467			retval = 0;
3468			sbuf_delete(sb);
3469			list->status = CTL_LUN_LIST_NEED_MORE_SPACE;
3470			snprintf(list->error_str, sizeof(list->error_str),
3471				 "Out of space, %d bytes is too small",
3472				 list->alloc_len);
3473			break;
3474		}
3475
3476		sbuf_finish(sb);
3477
3478		retval = copyout(sbuf_data(sb), list->lun_xml,
3479				 sbuf_len(sb) + 1);
3480
3481		list->fill_len = sbuf_len(sb) + 1;
3482		list->status = CTL_LUN_LIST_OK;
3483		sbuf_delete(sb);
3484		break;
3485	}
3486	case CTL_LUN_MAP: {
3487		struct ctl_lun_map *lm  = (struct ctl_lun_map *)addr;
3488		struct ctl_port *port;
3489
3490		mtx_lock(&softc->ctl_lock);
3491		if (lm->port < softc->port_min ||
3492		    lm->port >= softc->port_max ||
3493		    (port = softc->ctl_ports[lm->port]) == NULL) {
3494			mtx_unlock(&softc->ctl_lock);
3495			return (ENXIO);
3496		}
3497		if (port->status & CTL_PORT_STATUS_ONLINE) {
3498			STAILQ_FOREACH(lun, &softc->lun_list, links) {
3499				if (ctl_lun_map_to_port(port, lun->lun) ==
3500				    UINT32_MAX)
3501					continue;
3502				mtx_lock(&lun->lun_lock);
3503				ctl_est_ua_port(lun, lm->port, -1,
3504				    CTL_UA_LUN_CHANGE);
3505				mtx_unlock(&lun->lun_lock);
3506			}
3507		}
3508		mtx_unlock(&softc->ctl_lock); // XXX: port_enable sleeps
3509		if (lm->plun != UINT32_MAX) {
3510			if (lm->lun == UINT32_MAX)
3511				retval = ctl_lun_map_unset(port, lm->plun);
3512			else if (lm->lun < ctl_max_luns &&
3513			    softc->ctl_luns[lm->lun] != NULL)
3514				retval = ctl_lun_map_set(port, lm->plun, lm->lun);
3515			else
3516				return (ENXIO);
3517		} else {
3518			if (lm->lun == UINT32_MAX)
3519				retval = ctl_lun_map_deinit(port);
3520			else
3521				retval = ctl_lun_map_init(port);
3522		}
3523		if (port->status & CTL_PORT_STATUS_ONLINE)
3524			ctl_isc_announce_port(port);
3525		break;
3526	}
3527	case CTL_GET_LUN_STATS: {
3528		struct ctl_get_io_stats *stats = (struct ctl_get_io_stats *)addr;
3529		int i;
3530
3531		/*
3532		 * XXX KDM no locking here.  If the LUN list changes,
3533		 * things can blow up.
3534		 */
3535		i = 0;
3536		stats->status = CTL_SS_OK;
3537		stats->fill_len = 0;
3538		STAILQ_FOREACH(lun, &softc->lun_list, links) {
3539			if (lun->lun < stats->first_item)
3540				continue;
3541			if (stats->fill_len + sizeof(lun->stats) >
3542			    stats->alloc_len) {
3543				stats->status = CTL_SS_NEED_MORE_SPACE;
3544				break;
3545			}
3546			retval = copyout(&lun->stats, &stats->stats[i++],
3547					 sizeof(lun->stats));
3548			if (retval != 0)
3549				break;
3550			stats->fill_len += sizeof(lun->stats);
3551		}
3552		stats->num_items = softc->num_luns;
3553		stats->flags = CTL_STATS_FLAG_NONE;
3554#ifdef CTL_TIME_IO
3555		stats->flags |= CTL_STATS_FLAG_TIME_VALID;
3556#endif
3557		getnanouptime(&stats->timestamp);
3558		break;
3559	}
3560	case CTL_GET_PORT_STATS: {
3561		struct ctl_get_io_stats *stats = (struct ctl_get_io_stats *)addr;
3562		int i;
3563
3564		/*
3565		 * XXX KDM no locking here.  If the LUN list changes,
3566		 * things can blow up.
3567		 */
3568		i = 0;
3569		stats->status = CTL_SS_OK;
3570		stats->fill_len = 0;
3571		STAILQ_FOREACH(port, &softc->port_list, links) {
3572			if (port->targ_port < stats->first_item)
3573				continue;
3574			if (stats->fill_len + sizeof(port->stats) >
3575			    stats->alloc_len) {
3576				stats->status = CTL_SS_NEED_MORE_SPACE;
3577				break;
3578			}
3579			retval = copyout(&port->stats, &stats->stats[i++],
3580					 sizeof(port->stats));
3581			if (retval != 0)
3582				break;
3583			stats->fill_len += sizeof(port->stats);
3584		}
3585		stats->num_items = softc->num_ports;
3586		stats->flags = CTL_STATS_FLAG_NONE;
3587#ifdef CTL_TIME_IO
3588		stats->flags |= CTL_STATS_FLAG_TIME_VALID;
3589#endif
3590		getnanouptime(&stats->timestamp);
3591		break;
3592	}
3593	default: {
3594		/* XXX KDM should we fix this? */
3595#if 0
3596		struct ctl_backend_driver *backend;
3597		unsigned int type;
3598		int found;
3599
3600		found = 0;
3601
3602		/*
3603		 * We encode the backend type as the ioctl type for backend
3604		 * ioctls.  So parse it out here, and then search for a
3605		 * backend of this type.
3606		 */
3607		type = _IOC_TYPE(cmd);
3608
3609		STAILQ_FOREACH(backend, &softc->be_list, links) {
3610			if (backend->type == type) {
3611				found = 1;
3612				break;
3613			}
3614		}
3615		if (found == 0) {
3616			printf("ctl: unknown ioctl command %#lx or backend "
3617			       "%d\n", cmd, type);
3618			retval = EINVAL;
3619			break;
3620		}
3621		retval = backend->ioctl(dev, cmd, addr, flag, td);
3622#endif
3623		retval = ENOTTY;
3624		break;
3625	}
3626	}
3627	return (retval);
3628}
3629
3630uint32_t
3631ctl_get_initindex(struct ctl_nexus *nexus)
3632{
3633	return (nexus->initid + (nexus->targ_port * CTL_MAX_INIT_PER_PORT));
3634}
3635
3636int
3637ctl_lun_map_init(struct ctl_port *port)
3638{
3639	struct ctl_softc *softc = port->ctl_softc;
3640	struct ctl_lun *lun;
3641	int size = ctl_lun_map_size;
3642	uint32_t i;
3643
3644	if (port->lun_map == NULL || port->lun_map_size < size) {
3645		port->lun_map_size = 0;
3646		free(port->lun_map, M_CTL);
3647		port->lun_map = malloc(size * sizeof(uint32_t),
3648		    M_CTL, M_NOWAIT);
3649	}
3650	if (port->lun_map == NULL)
3651		return (ENOMEM);
3652	for (i = 0; i < size; i++)
3653		port->lun_map[i] = UINT32_MAX;
3654	port->lun_map_size = size;
3655	if (port->status & CTL_PORT_STATUS_ONLINE) {
3656		if (port->lun_disable != NULL) {
3657			STAILQ_FOREACH(lun, &softc->lun_list, links)
3658				port->lun_disable(port->targ_lun_arg, lun->lun);
3659		}
3660		ctl_isc_announce_port(port);
3661	}
3662	return (0);
3663}
3664
3665int
3666ctl_lun_map_deinit(struct ctl_port *port)
3667{
3668	struct ctl_softc *softc = port->ctl_softc;
3669	struct ctl_lun *lun;
3670
3671	if (port->lun_map == NULL)
3672		return (0);
3673	port->lun_map_size = 0;
3674	free(port->lun_map, M_CTL);
3675	port->lun_map = NULL;
3676	if (port->status & CTL_PORT_STATUS_ONLINE) {
3677		if (port->lun_enable != NULL) {
3678			STAILQ_FOREACH(lun, &softc->lun_list, links)
3679				port->lun_enable(port->targ_lun_arg, lun->lun);
3680		}
3681		ctl_isc_announce_port(port);
3682	}
3683	return (0);
3684}
3685
3686int
3687ctl_lun_map_set(struct ctl_port *port, uint32_t plun, uint32_t glun)
3688{
3689	int status;
3690	uint32_t old;
3691
3692	if (port->lun_map == NULL) {
3693		status = ctl_lun_map_init(port);
3694		if (status != 0)
3695			return (status);
3696	}
3697	if (plun >= port->lun_map_size)
3698		return (EINVAL);
3699	old = port->lun_map[plun];
3700	port->lun_map[plun] = glun;
3701	if ((port->status & CTL_PORT_STATUS_ONLINE) && old == UINT32_MAX) {
3702		if (port->lun_enable != NULL)
3703			port->lun_enable(port->targ_lun_arg, plun);
3704		ctl_isc_announce_port(port);
3705	}
3706	return (0);
3707}
3708
3709int
3710ctl_lun_map_unset(struct ctl_port *port, uint32_t plun)
3711{
3712	uint32_t old;
3713
3714	if (port->lun_map == NULL || plun >= port->lun_map_size)
3715		return (0);
3716	old = port->lun_map[plun];
3717	port->lun_map[plun] = UINT32_MAX;
3718	if ((port->status & CTL_PORT_STATUS_ONLINE) && old != UINT32_MAX) {
3719		if (port->lun_disable != NULL)
3720			port->lun_disable(port->targ_lun_arg, plun);
3721		ctl_isc_announce_port(port);
3722	}
3723	return (0);
3724}
3725
3726uint32_t
3727ctl_lun_map_from_port(struct ctl_port *port, uint32_t lun_id)
3728{
3729
3730	if (port == NULL)
3731		return (UINT32_MAX);
3732	if (port->lun_map == NULL)
3733		return (lun_id);
3734	if (lun_id > port->lun_map_size)
3735		return (UINT32_MAX);
3736	return (port->lun_map[lun_id]);
3737}
3738
3739uint32_t
3740ctl_lun_map_to_port(struct ctl_port *port, uint32_t lun_id)
3741{
3742	uint32_t i;
3743
3744	if (port == NULL)
3745		return (UINT32_MAX);
3746	if (port->lun_map == NULL)
3747		return (lun_id);
3748	for (i = 0; i < port->lun_map_size; i++) {
3749		if (port->lun_map[i] == lun_id)
3750			return (i);
3751	}
3752	return (UINT32_MAX);
3753}
3754
3755uint32_t
3756ctl_decode_lun(uint64_t encoded)
3757{
3758	uint8_t lun[8];
3759	uint32_t result = 0xffffffff;
3760
3761	be64enc(lun, encoded);
3762	switch (lun[0] & RPL_LUNDATA_ATYP_MASK) {
3763	case RPL_LUNDATA_ATYP_PERIPH:
3764		if ((lun[0] & 0x3f) == 0 && lun[2] == 0 && lun[3] == 0 &&
3765		    lun[4] == 0 && lun[5] == 0 && lun[6] == 0 && lun[7] == 0)
3766			result = lun[1];
3767		break;
3768	case RPL_LUNDATA_ATYP_FLAT:
3769		if (lun[2] == 0 && lun[3] == 0 && lun[4] == 0 && lun[5] == 0 &&
3770		    lun[6] == 0 && lun[7] == 0)
3771			result = ((lun[0] & 0x3f) << 8) + lun[1];
3772		break;
3773	case RPL_LUNDATA_ATYP_EXTLUN:
3774		switch (lun[0] & RPL_LUNDATA_EXT_EAM_MASK) {
3775		case 0x02:
3776			switch (lun[0] & RPL_LUNDATA_EXT_LEN_MASK) {
3777			case 0x00:
3778				result = lun[1];
3779				break;
3780			case 0x10:
3781				result = (lun[1] << 16) + (lun[2] << 8) +
3782				    lun[3];
3783				break;
3784			case 0x20:
3785				if (lun[1] == 0 && lun[6] == 0 && lun[7] == 0)
3786					result = (lun[2] << 24) +
3787					    (lun[3] << 16) + (lun[4] << 8) +
3788					    lun[5];
3789				break;
3790			}
3791			break;
3792		case RPL_LUNDATA_EXT_EAM_NOT_SPEC:
3793			result = 0xffffffff;
3794			break;
3795		}
3796		break;
3797	}
3798	return (result);
3799}
3800
3801uint64_t
3802ctl_encode_lun(uint32_t decoded)
3803{
3804	uint64_t l = decoded;
3805
3806	if (l <= 0xff)
3807		return (((uint64_t)RPL_LUNDATA_ATYP_PERIPH << 56) | (l << 48));
3808	if (l <= 0x3fff)
3809		return (((uint64_t)RPL_LUNDATA_ATYP_FLAT << 56) | (l << 48));
3810	if (l <= 0xffffff)
3811		return (((uint64_t)(RPL_LUNDATA_ATYP_EXTLUN | 0x12) << 56) |
3812		    (l << 32));
3813	return ((((uint64_t)RPL_LUNDATA_ATYP_EXTLUN | 0x22) << 56) | (l << 16));
3814}
3815
3816int
3817ctl_ffz(uint32_t *mask, uint32_t first, uint32_t last)
3818{
3819	int i;
3820
3821	for (i = first; i < last; i++) {
3822		if ((mask[i / 32] & (1 << (i % 32))) == 0)
3823			return (i);
3824	}
3825	return (-1);
3826}
3827
3828int
3829ctl_set_mask(uint32_t *mask, uint32_t bit)
3830{
3831	uint32_t chunk, piece;
3832
3833	chunk = bit >> 5;
3834	piece = bit % (sizeof(uint32_t) * 8);
3835
3836	if ((mask[chunk] & (1 << piece)) != 0)
3837		return (-1);
3838	else
3839		mask[chunk] |= (1 << piece);
3840
3841	return (0);
3842}
3843
3844int
3845ctl_clear_mask(uint32_t *mask, uint32_t bit)
3846{
3847	uint32_t chunk, piece;
3848
3849	chunk = bit >> 5;
3850	piece = bit % (sizeof(uint32_t) * 8);
3851
3852	if ((mask[chunk] & (1 << piece)) == 0)
3853		return (-1);
3854	else
3855		mask[chunk] &= ~(1 << piece);
3856
3857	return (0);
3858}
3859
3860int
3861ctl_is_set(uint32_t *mask, uint32_t bit)
3862{
3863	uint32_t chunk, piece;
3864
3865	chunk = bit >> 5;
3866	piece = bit % (sizeof(uint32_t) * 8);
3867
3868	if ((mask[chunk] & (1 << piece)) == 0)
3869		return (0);
3870	else
3871		return (1);
3872}
3873
3874static uint64_t
3875ctl_get_prkey(struct ctl_lun *lun, uint32_t residx)
3876{
3877	uint64_t *t;
3878
3879	t = lun->pr_keys[residx/CTL_MAX_INIT_PER_PORT];
3880	if (t == NULL)
3881		return (0);
3882	return (t[residx % CTL_MAX_INIT_PER_PORT]);
3883}
3884
3885static void
3886ctl_clr_prkey(struct ctl_lun *lun, uint32_t residx)
3887{
3888	uint64_t *t;
3889
3890	t = lun->pr_keys[residx/CTL_MAX_INIT_PER_PORT];
3891	if (t == NULL)
3892		return;
3893	t[residx % CTL_MAX_INIT_PER_PORT] = 0;
3894}
3895
3896static void
3897ctl_alloc_prkey(struct ctl_lun *lun, uint32_t residx)
3898{
3899	uint64_t *p;
3900	u_int i;
3901
3902	i = residx/CTL_MAX_INIT_PER_PORT;
3903	if (lun->pr_keys[i] != NULL)
3904		return;
3905	mtx_unlock(&lun->lun_lock);
3906	p = malloc(sizeof(uint64_t) * CTL_MAX_INIT_PER_PORT, M_CTL,
3907	    M_WAITOK | M_ZERO);
3908	mtx_lock(&lun->lun_lock);
3909	if (lun->pr_keys[i] == NULL)
3910		lun->pr_keys[i] = p;
3911	else
3912		free(p, M_CTL);
3913}
3914
3915static void
3916ctl_set_prkey(struct ctl_lun *lun, uint32_t residx, uint64_t key)
3917{
3918	uint64_t *t;
3919
3920	t = lun->pr_keys[residx/CTL_MAX_INIT_PER_PORT];
3921	KASSERT(t != NULL, ("prkey %d is not allocated", residx));
3922	t[residx % CTL_MAX_INIT_PER_PORT] = key;
3923}
3924
3925/*
3926 * ctl_softc, pool_name, total_ctl_io are passed in.
3927 * npool is passed out.
3928 */
3929int
3930ctl_pool_create(struct ctl_softc *ctl_softc, const char *pool_name,
3931		uint32_t total_ctl_io, void **npool)
3932{
3933	struct ctl_io_pool *pool;
3934
3935	pool = (struct ctl_io_pool *)malloc(sizeof(*pool), M_CTL,
3936					    M_NOWAIT | M_ZERO);
3937	if (pool == NULL)
3938		return (ENOMEM);
3939
3940	snprintf(pool->name, sizeof(pool->name), "CTL IO %s", pool_name);
3941	pool->ctl_softc = ctl_softc;
3942#ifdef IO_POOLS
3943	pool->zone = uma_zsecond_create(pool->name, NULL,
3944	    NULL, NULL, NULL, ctl_softc->io_zone);
3945	/* uma_prealloc(pool->zone, total_ctl_io); */
3946#else
3947	pool->zone = ctl_softc->io_zone;
3948#endif
3949
3950	*npool = pool;
3951	return (0);
3952}
3953
3954void
3955ctl_pool_free(struct ctl_io_pool *pool)
3956{
3957
3958	if (pool == NULL)
3959		return;
3960
3961#ifdef IO_POOLS
3962	uma_zdestroy(pool->zone);
3963#endif
3964	free(pool, M_CTL);
3965}
3966
3967union ctl_io *
3968ctl_alloc_io(void *pool_ref)
3969{
3970	struct ctl_io_pool *pool = (struct ctl_io_pool *)pool_ref;
3971	union ctl_io *io;
3972
3973	io = uma_zalloc(pool->zone, M_WAITOK);
3974	if (io != NULL) {
3975		io->io_hdr.pool = pool_ref;
3976		CTL_SOFTC(io) = pool->ctl_softc;
3977		TAILQ_INIT(&io->io_hdr.blocked_queue);
3978	}
3979	return (io);
3980}
3981
3982union ctl_io *
3983ctl_alloc_io_nowait(void *pool_ref)
3984{
3985	struct ctl_io_pool *pool = (struct ctl_io_pool *)pool_ref;
3986	union ctl_io *io;
3987
3988	io = uma_zalloc(pool->zone, M_NOWAIT);
3989	if (io != NULL) {
3990		io->io_hdr.pool = pool_ref;
3991		CTL_SOFTC(io) = pool->ctl_softc;
3992		TAILQ_INIT(&io->io_hdr.blocked_queue);
3993	}
3994	return (io);
3995}
3996
3997void
3998ctl_free_io(union ctl_io *io)
3999{
4000	struct ctl_io_pool *pool;
4001
4002	if (io == NULL)
4003		return;
4004
4005	pool = (struct ctl_io_pool *)io->io_hdr.pool;
4006	uma_zfree(pool->zone, io);
4007}
4008
4009void
4010ctl_zero_io(union ctl_io *io)
4011{
4012	struct ctl_io_pool *pool;
4013
4014	if (io == NULL)
4015		return;
4016
4017	/*
4018	 * May need to preserve linked list pointers at some point too.
4019	 */
4020	pool = io->io_hdr.pool;
4021	memset(io, 0, sizeof(*io));
4022	io->io_hdr.pool = pool;
4023	CTL_SOFTC(io) = pool->ctl_softc;
4024	TAILQ_INIT(&io->io_hdr.blocked_queue);
4025}
4026
4027int
4028ctl_expand_number(const char *buf, uint64_t *num)
4029{
4030	char *endptr;
4031	uint64_t number;
4032	unsigned shift;
4033
4034	number = strtoq(buf, &endptr, 0);
4035
4036	switch (tolower((unsigned char)*endptr)) {
4037	case 'e':
4038		shift = 60;
4039		break;
4040	case 'p':
4041		shift = 50;
4042		break;
4043	case 't':
4044		shift = 40;
4045		break;
4046	case 'g':
4047		shift = 30;
4048		break;
4049	case 'm':
4050		shift = 20;
4051		break;
4052	case 'k':
4053		shift = 10;
4054		break;
4055	case 'b':
4056	case '\0': /* No unit. */
4057		*num = number;
4058		return (0);
4059	default:
4060		/* Unrecognized unit. */
4061		return (-1);
4062	}
4063
4064	if ((number << shift) >> shift != number) {
4065		/* Overflow */
4066		return (-1);
4067	}
4068	*num = number << shift;
4069	return (0);
4070}
4071
4072/*
4073 * This routine could be used in the future to load default and/or saved
4074 * mode page parameters for a particuar lun.
4075 */
4076static int
4077ctl_init_page_index(struct ctl_lun *lun)
4078{
4079	int i, page_code;
4080	struct ctl_page_index *page_index;
4081	const char *value;
4082	uint64_t ival;
4083
4084	memcpy(&lun->mode_pages.index, page_index_template,
4085	       sizeof(page_index_template));
4086
4087	for (i = 0; i < CTL_NUM_MODE_PAGES; i++) {
4088		page_index = &lun->mode_pages.index[i];
4089		if (lun->be_lun->lun_type == T_DIRECT &&
4090		    (page_index->page_flags & CTL_PAGE_FLAG_DIRECT) == 0)
4091			continue;
4092		if (lun->be_lun->lun_type == T_PROCESSOR &&
4093		    (page_index->page_flags & CTL_PAGE_FLAG_PROC) == 0)
4094			continue;
4095		if (lun->be_lun->lun_type == T_CDROM &&
4096		    (page_index->page_flags & CTL_PAGE_FLAG_CDROM) == 0)
4097			continue;
4098
4099		page_code = page_index->page_code & SMPH_PC_MASK;
4100		switch (page_code) {
4101		case SMS_RW_ERROR_RECOVERY_PAGE: {
4102			KASSERT(page_index->subpage == SMS_SUBPAGE_PAGE_0,
4103			    ("subpage %#x for page %#x is incorrect!",
4104			    page_index->subpage, page_code));
4105			memcpy(&lun->mode_pages.rw_er_page[CTL_PAGE_CURRENT],
4106			       &rw_er_page_default,
4107			       sizeof(rw_er_page_default));
4108			memcpy(&lun->mode_pages.rw_er_page[CTL_PAGE_CHANGEABLE],
4109			       &rw_er_page_changeable,
4110			       sizeof(rw_er_page_changeable));
4111			memcpy(&lun->mode_pages.rw_er_page[CTL_PAGE_DEFAULT],
4112			       &rw_er_page_default,
4113			       sizeof(rw_er_page_default));
4114			memcpy(&lun->mode_pages.rw_er_page[CTL_PAGE_SAVED],
4115			       &rw_er_page_default,
4116			       sizeof(rw_er_page_default));
4117			page_index->page_data =
4118				(uint8_t *)lun->mode_pages.rw_er_page;
4119			break;
4120		}
4121		case SMS_VERIFY_ERROR_RECOVERY_PAGE: {
4122			KASSERT(page_index->subpage == SMS_SUBPAGE_PAGE_0,
4123			    ("subpage %#x for page %#x is incorrect!",
4124			    page_index->subpage, page_code));
4125			memcpy(&lun->mode_pages.verify_er_page[CTL_PAGE_CURRENT],
4126			       &verify_er_page_default,
4127			       sizeof(verify_er_page_default));
4128			memcpy(&lun->mode_pages.verify_er_page[CTL_PAGE_CHANGEABLE],
4129			       &verify_er_page_changeable,
4130			       sizeof(verify_er_page_changeable));
4131			memcpy(&lun->mode_pages.verify_er_page[CTL_PAGE_DEFAULT],
4132			       &verify_er_page_default,
4133			       sizeof(verify_er_page_default));
4134			memcpy(&lun->mode_pages.verify_er_page[CTL_PAGE_SAVED],
4135			       &verify_er_page_default,
4136			       sizeof(verify_er_page_default));
4137			page_index->page_data =
4138				(uint8_t *)lun->mode_pages.verify_er_page;
4139			break;
4140		}
4141		case SMS_CACHING_PAGE: {
4142			struct scsi_caching_page *caching_page;
4143
4144			KASSERT(page_index->subpage == SMS_SUBPAGE_PAGE_0,
4145			    ("subpage %#x for page %#x is incorrect!",
4146			    page_index->subpage, page_code));
4147			memcpy(&lun->mode_pages.caching_page[CTL_PAGE_DEFAULT],
4148			       &caching_page_default,
4149			       sizeof(caching_page_default));
4150			memcpy(&lun->mode_pages.caching_page[
4151			       CTL_PAGE_CHANGEABLE], &caching_page_changeable,
4152			       sizeof(caching_page_changeable));
4153			memcpy(&lun->mode_pages.caching_page[CTL_PAGE_SAVED],
4154			       &caching_page_default,
4155			       sizeof(caching_page_default));
4156			caching_page = &lun->mode_pages.caching_page[
4157			    CTL_PAGE_SAVED];
4158			value = dnvlist_get_string(lun->be_lun->options,
4159			    "writecache", NULL);
4160			if (value != NULL && strcmp(value, "off") == 0)
4161				caching_page->flags1 &= ~SCP_WCE;
4162			value = dnvlist_get_string(lun->be_lun->options,
4163			    "readcache", NULL);
4164			if (value != NULL && strcmp(value, "off") == 0)
4165				caching_page->flags1 |= SCP_RCD;
4166			memcpy(&lun->mode_pages.caching_page[CTL_PAGE_CURRENT],
4167			       &lun->mode_pages.caching_page[CTL_PAGE_SAVED],
4168			       sizeof(caching_page_default));
4169			page_index->page_data =
4170				(uint8_t *)lun->mode_pages.caching_page;
4171			break;
4172		}
4173		case SMS_CONTROL_MODE_PAGE: {
4174			switch (page_index->subpage) {
4175			case SMS_SUBPAGE_PAGE_0: {
4176				struct scsi_control_page *control_page;
4177
4178				memcpy(&lun->mode_pages.control_page[
4179				    CTL_PAGE_DEFAULT],
4180				       &control_page_default,
4181				       sizeof(control_page_default));
4182				memcpy(&lun->mode_pages.control_page[
4183				    CTL_PAGE_CHANGEABLE],
4184				       &control_page_changeable,
4185				       sizeof(control_page_changeable));
4186				memcpy(&lun->mode_pages.control_page[
4187				    CTL_PAGE_SAVED],
4188				       &control_page_default,
4189				       sizeof(control_page_default));
4190				control_page = &lun->mode_pages.control_page[
4191				    CTL_PAGE_SAVED];
4192				value = dnvlist_get_string(lun->be_lun->options,
4193				    "reordering", NULL);
4194				if (value != NULL &&
4195				    strcmp(value, "unrestricted") == 0) {
4196					control_page->queue_flags &=
4197					    ~SCP_QUEUE_ALG_MASK;
4198					control_page->queue_flags |=
4199					    SCP_QUEUE_ALG_UNRESTRICTED;
4200				}
4201				memcpy(&lun->mode_pages.control_page[
4202				    CTL_PAGE_CURRENT],
4203				       &lun->mode_pages.control_page[
4204				    CTL_PAGE_SAVED],
4205				       sizeof(control_page_default));
4206				page_index->page_data =
4207				    (uint8_t *)lun->mode_pages.control_page;
4208				break;
4209			}
4210			case 0x01:
4211				memcpy(&lun->mode_pages.control_ext_page[
4212				    CTL_PAGE_DEFAULT],
4213				       &control_ext_page_default,
4214				       sizeof(control_ext_page_default));
4215				memcpy(&lun->mode_pages.control_ext_page[
4216				    CTL_PAGE_CHANGEABLE],
4217				       &control_ext_page_changeable,
4218				       sizeof(control_ext_page_changeable));
4219				memcpy(&lun->mode_pages.control_ext_page[
4220				    CTL_PAGE_SAVED],
4221				       &control_ext_page_default,
4222				       sizeof(control_ext_page_default));
4223				memcpy(&lun->mode_pages.control_ext_page[
4224				    CTL_PAGE_CURRENT],
4225				       &lun->mode_pages.control_ext_page[
4226				    CTL_PAGE_SAVED],
4227				       sizeof(control_ext_page_default));
4228				page_index->page_data =
4229				    (uint8_t *)lun->mode_pages.control_ext_page;
4230				break;
4231			default:
4232				panic("subpage %#x for page %#x is incorrect!",
4233				      page_index->subpage, page_code);
4234			}
4235			break;
4236		}
4237		case SMS_INFO_EXCEPTIONS_PAGE: {
4238			switch (page_index->subpage) {
4239			case SMS_SUBPAGE_PAGE_0:
4240				memcpy(&lun->mode_pages.ie_page[CTL_PAGE_CURRENT],
4241				       &ie_page_default,
4242				       sizeof(ie_page_default));
4243				memcpy(&lun->mode_pages.ie_page[
4244				       CTL_PAGE_CHANGEABLE], &ie_page_changeable,
4245				       sizeof(ie_page_changeable));
4246				memcpy(&lun->mode_pages.ie_page[CTL_PAGE_DEFAULT],
4247				       &ie_page_default,
4248				       sizeof(ie_page_default));
4249				memcpy(&lun->mode_pages.ie_page[CTL_PAGE_SAVED],
4250				       &ie_page_default,
4251				       sizeof(ie_page_default));
4252				page_index->page_data =
4253					(uint8_t *)lun->mode_pages.ie_page;
4254				break;
4255			case 0x02: {
4256				struct ctl_logical_block_provisioning_page *page;
4257
4258				memcpy(&lun->mode_pages.lbp_page[CTL_PAGE_DEFAULT],
4259				       &lbp_page_default,
4260				       sizeof(lbp_page_default));
4261				memcpy(&lun->mode_pages.lbp_page[
4262				       CTL_PAGE_CHANGEABLE], &lbp_page_changeable,
4263				       sizeof(lbp_page_changeable));
4264				memcpy(&lun->mode_pages.lbp_page[CTL_PAGE_SAVED],
4265				       &lbp_page_default,
4266				       sizeof(lbp_page_default));
4267				page = &lun->mode_pages.lbp_page[CTL_PAGE_SAVED];
4268				value = dnvlist_get_string(lun->be_lun->options,
4269				    "avail-threshold", NULL);
4270				if (value != NULL &&
4271				    ctl_expand_number(value, &ival) == 0) {
4272					page->descr[0].flags |= SLBPPD_ENABLED |
4273					    SLBPPD_ARMING_DEC;
4274					if (lun->be_lun->blocksize)
4275						ival /= lun->be_lun->blocksize;
4276					else
4277						ival /= 512;
4278					scsi_ulto4b(ival >> CTL_LBP_EXPONENT,
4279					    page->descr[0].count);
4280				}
4281				value = dnvlist_get_string(lun->be_lun->options,
4282				    "used-threshold", NULL);
4283				if (value != NULL &&
4284				    ctl_expand_number(value, &ival) == 0) {
4285					page->descr[1].flags |= SLBPPD_ENABLED |
4286					    SLBPPD_ARMING_INC;
4287					if (lun->be_lun->blocksize)
4288						ival /= lun->be_lun->blocksize;
4289					else
4290						ival /= 512;
4291					scsi_ulto4b(ival >> CTL_LBP_EXPONENT,
4292					    page->descr[1].count);
4293				}
4294				value = dnvlist_get_string(lun->be_lun->options,
4295				    "pool-avail-threshold", NULL);
4296				if (value != NULL &&
4297				    ctl_expand_number(value, &ival) == 0) {
4298					page->descr[2].flags |= SLBPPD_ENABLED |
4299					    SLBPPD_ARMING_DEC;
4300					if (lun->be_lun->blocksize)
4301						ival /= lun->be_lun->blocksize;
4302					else
4303						ival /= 512;
4304					scsi_ulto4b(ival >> CTL_LBP_EXPONENT,
4305					    page->descr[2].count);
4306				}
4307				value = dnvlist_get_string(lun->be_lun->options,
4308				    "pool-used-threshold", NULL);
4309				if (value != NULL &&
4310				    ctl_expand_number(value, &ival) == 0) {
4311					page->descr[3].flags |= SLBPPD_ENABLED |
4312					    SLBPPD_ARMING_INC;
4313					if (lun->be_lun->blocksize)
4314						ival /= lun->be_lun->blocksize;
4315					else
4316						ival /= 512;
4317					scsi_ulto4b(ival >> CTL_LBP_EXPONENT,
4318					    page->descr[3].count);
4319				}
4320				memcpy(&lun->mode_pages.lbp_page[CTL_PAGE_CURRENT],
4321				       &lun->mode_pages.lbp_page[CTL_PAGE_SAVED],
4322				       sizeof(lbp_page_default));
4323				page_index->page_data =
4324					(uint8_t *)lun->mode_pages.lbp_page;
4325				break;
4326			}
4327			default:
4328				panic("subpage %#x for page %#x is incorrect!",
4329				      page_index->subpage, page_code);
4330			}
4331			break;
4332		}
4333		case SMS_CDDVD_CAPS_PAGE:{
4334			KASSERT(page_index->subpage == SMS_SUBPAGE_PAGE_0,
4335			    ("subpage %#x for page %#x is incorrect!",
4336			    page_index->subpage, page_code));
4337			memcpy(&lun->mode_pages.cddvd_page[CTL_PAGE_DEFAULT],
4338			       &cddvd_page_default,
4339			       sizeof(cddvd_page_default));
4340			memcpy(&lun->mode_pages.cddvd_page[
4341			       CTL_PAGE_CHANGEABLE], &cddvd_page_changeable,
4342			       sizeof(cddvd_page_changeable));
4343			memcpy(&lun->mode_pages.cddvd_page[CTL_PAGE_SAVED],
4344			       &cddvd_page_default,
4345			       sizeof(cddvd_page_default));
4346			memcpy(&lun->mode_pages.cddvd_page[CTL_PAGE_CURRENT],
4347			       &lun->mode_pages.cddvd_page[CTL_PAGE_SAVED],
4348			       sizeof(cddvd_page_default));
4349			page_index->page_data =
4350				(uint8_t *)lun->mode_pages.cddvd_page;
4351			break;
4352		}
4353		default:
4354			panic("invalid page code value %#x", page_code);
4355		}
4356	}
4357
4358	return (CTL_RETVAL_COMPLETE);
4359}
4360
4361static int
4362ctl_init_log_page_index(struct ctl_lun *lun)
4363{
4364	struct ctl_page_index *page_index;
4365	int i, j, k, prev;
4366
4367	memcpy(&lun->log_pages.index, log_page_index_template,
4368	       sizeof(log_page_index_template));
4369
4370	prev = -1;
4371	for (i = 0, j = 0, k = 0; i < CTL_NUM_LOG_PAGES; i++) {
4372		page_index = &lun->log_pages.index[i];
4373		if (lun->be_lun->lun_type == T_DIRECT &&
4374		    (page_index->page_flags & CTL_PAGE_FLAG_DIRECT) == 0)
4375			continue;
4376		if (lun->be_lun->lun_type == T_PROCESSOR &&
4377		    (page_index->page_flags & CTL_PAGE_FLAG_PROC) == 0)
4378			continue;
4379		if (lun->be_lun->lun_type == T_CDROM &&
4380		    (page_index->page_flags & CTL_PAGE_FLAG_CDROM) == 0)
4381			continue;
4382
4383		if (page_index->page_code == SLS_LOGICAL_BLOCK_PROVISIONING &&
4384		    lun->backend->lun_attr == NULL)
4385			continue;
4386
4387		if (page_index->page_code != prev) {
4388			lun->log_pages.pages_page[j] = page_index->page_code;
4389			prev = page_index->page_code;
4390			j++;
4391		}
4392		lun->log_pages.subpages_page[k*2] = page_index->page_code;
4393		lun->log_pages.subpages_page[k*2+1] = page_index->subpage;
4394		k++;
4395	}
4396	lun->log_pages.index[0].page_data = &lun->log_pages.pages_page[0];
4397	lun->log_pages.index[0].page_len = j;
4398	lun->log_pages.index[1].page_data = &lun->log_pages.subpages_page[0];
4399	lun->log_pages.index[1].page_len = k * 2;
4400	lun->log_pages.index[2].page_data = (uint8_t *)&lun->log_pages.temp_page;
4401	lun->log_pages.index[2].page_len = sizeof(lun->log_pages.temp_page);
4402	lun->log_pages.index[3].page_data = &lun->log_pages.lbp_page[0];
4403	lun->log_pages.index[3].page_len = 12*CTL_NUM_LBP_PARAMS;
4404	lun->log_pages.index[4].page_data = (uint8_t *)&lun->log_pages.stat_page;
4405	lun->log_pages.index[4].page_len = sizeof(lun->log_pages.stat_page);
4406	lun->log_pages.index[5].page_data = (uint8_t *)&lun->log_pages.ie_page;
4407	lun->log_pages.index[5].page_len = sizeof(lun->log_pages.ie_page);
4408
4409	return (CTL_RETVAL_COMPLETE);
4410}
4411
4412static int
4413hex2bin(const char *str, uint8_t *buf, int buf_size)
4414{
4415	int i;
4416	u_char c;
4417
4418	memset(buf, 0, buf_size);
4419	while (isspace(str[0]))
4420		str++;
4421	if (str[0] == '0' && (str[1] == 'x' || str[1] == 'X'))
4422		str += 2;
4423	buf_size *= 2;
4424	for (i = 0; str[i] != 0 && i < buf_size; i++) {
4425		while (str[i] == '-')	/* Skip dashes in UUIDs. */
4426			str++;
4427		c = str[i];
4428		if (isdigit(c))
4429			c -= '0';
4430		else if (isalpha(c))
4431			c -= isupper(c) ? 'A' - 10 : 'a' - 10;
4432		else
4433			break;
4434		if (c >= 16)
4435			break;
4436		if ((i & 1) == 0)
4437			buf[i / 2] |= (c << 4);
4438		else
4439			buf[i / 2] |= c;
4440	}
4441	return ((i + 1) / 2);
4442}
4443
4444/*
4445 * Add LUN.
4446 *
4447 * Returns 0 for success, non-zero (errno) for failure.
4448 */
4449int
4450ctl_add_lun(struct ctl_be_lun *be_lun)
4451{
4452	struct ctl_softc *ctl_softc = control_softc;
4453	struct ctl_lun *nlun, *lun;
4454	struct scsi_vpd_id_descriptor *desc;
4455	struct scsi_vpd_id_t10 *t10id;
4456	const char *eui, *naa, *scsiname, *uuid, *vendor, *value;
4457	int lun_number;
4458	int devidlen, idlen1, idlen2 = 0, len;
4459
4460	/*
4461	 * We support only Direct Access, CD-ROM or Processor LUN types.
4462	 */
4463	switch (be_lun->lun_type) {
4464	case T_DIRECT:
4465	case T_PROCESSOR:
4466	case T_CDROM:
4467		break;
4468	case T_SEQUENTIAL:
4469	case T_CHANGER:
4470	default:
4471		return (EINVAL);
4472	}
4473	lun = malloc(sizeof(*lun), M_CTL, M_WAITOK | M_ZERO);
4474
4475	lun->pending_sense = malloc(sizeof(struct scsi_sense_data *) *
4476	    ctl_max_ports, M_DEVBUF, M_WAITOK | M_ZERO);
4477	lun->pending_ua = malloc(sizeof(ctl_ua_type *) * ctl_max_ports,
4478	    M_DEVBUF, M_WAITOK | M_ZERO);
4479	lun->pr_keys = malloc(sizeof(uint64_t *) * ctl_max_ports,
4480	    M_DEVBUF, M_WAITOK | M_ZERO);
4481
4482	/* Generate LUN ID. */
4483	devidlen = max(CTL_DEVID_MIN_LEN,
4484	    strnlen(be_lun->device_id, CTL_DEVID_LEN));
4485	idlen1 = sizeof(*t10id) + devidlen;
4486	len = sizeof(struct scsi_vpd_id_descriptor) + idlen1;
4487	scsiname = dnvlist_get_string(be_lun->options, "scsiname", NULL);
4488	if (scsiname != NULL) {
4489		idlen2 = roundup2(strlen(scsiname) + 1, 4);
4490		len += sizeof(struct scsi_vpd_id_descriptor) + idlen2;
4491	}
4492	eui = dnvlist_get_string(be_lun->options, "eui", NULL);
4493	if (eui != NULL) {
4494		len += sizeof(struct scsi_vpd_id_descriptor) + 16;
4495	}
4496	naa = dnvlist_get_string(be_lun->options, "naa", NULL);
4497	if (naa != NULL) {
4498		len += sizeof(struct scsi_vpd_id_descriptor) + 16;
4499	}
4500	uuid = dnvlist_get_string(be_lun->options, "uuid", NULL);
4501	if (uuid != NULL) {
4502		len += sizeof(struct scsi_vpd_id_descriptor) + 18;
4503	}
4504	lun->lun_devid = malloc(sizeof(struct ctl_devid) + len,
4505	    M_CTL, M_WAITOK | M_ZERO);
4506	desc = (struct scsi_vpd_id_descriptor *)lun->lun_devid->data;
4507	desc->proto_codeset = SVPD_ID_CODESET_ASCII;
4508	desc->id_type = SVPD_ID_PIV | SVPD_ID_ASSOC_LUN | SVPD_ID_TYPE_T10;
4509	desc->length = idlen1;
4510	t10id = (struct scsi_vpd_id_t10 *)&desc->identifier[0];
4511	memset(t10id->vendor, ' ', sizeof(t10id->vendor));
4512	if ((vendor = dnvlist_get_string(be_lun->options, "vendor", NULL)) == NULL) {
4513		strncpy((char *)t10id->vendor, CTL_VENDOR, sizeof(t10id->vendor));
4514	} else {
4515		strncpy(t10id->vendor, vendor,
4516		    min(sizeof(t10id->vendor), strlen(vendor)));
4517	}
4518	strncpy((char *)t10id->vendor_spec_id,
4519	    (char *)be_lun->device_id, devidlen);
4520	if (scsiname != NULL) {
4521		desc = (struct scsi_vpd_id_descriptor *)(&desc->identifier[0] +
4522		    desc->length);
4523		desc->proto_codeset = SVPD_ID_CODESET_UTF8;
4524		desc->id_type = SVPD_ID_PIV | SVPD_ID_ASSOC_LUN |
4525		    SVPD_ID_TYPE_SCSI_NAME;
4526		desc->length = idlen2;
4527		strlcpy(desc->identifier, scsiname, idlen2);
4528	}
4529	if (eui != NULL) {
4530		desc = (struct scsi_vpd_id_descriptor *)(&desc->identifier[0] +
4531		    desc->length);
4532		desc->proto_codeset = SVPD_ID_CODESET_BINARY;
4533		desc->id_type = SVPD_ID_PIV | SVPD_ID_ASSOC_LUN |
4534		    SVPD_ID_TYPE_EUI64;
4535		desc->length = hex2bin(eui, desc->identifier, 16);
4536		desc->length = desc->length > 12 ? 16 :
4537		    (desc->length > 8 ? 12 : 8);
4538		len -= 16 - desc->length;
4539	}
4540	if (naa != NULL) {
4541		desc = (struct scsi_vpd_id_descriptor *)(&desc->identifier[0] +
4542		    desc->length);
4543		desc->proto_codeset = SVPD_ID_CODESET_BINARY;
4544		desc->id_type = SVPD_ID_PIV | SVPD_ID_ASSOC_LUN |
4545		    SVPD_ID_TYPE_NAA;
4546		desc->length = hex2bin(naa, desc->identifier, 16);
4547		desc->length = desc->length > 8 ? 16 : 8;
4548		len -= 16 - desc->length;
4549	}
4550	if (uuid != NULL) {
4551		desc = (struct scsi_vpd_id_descriptor *)(&desc->identifier[0] +
4552		    desc->length);
4553		desc->proto_codeset = SVPD_ID_CODESET_BINARY;
4554		desc->id_type = SVPD_ID_PIV | SVPD_ID_ASSOC_LUN |
4555		    SVPD_ID_TYPE_UUID;
4556		desc->identifier[0] = 0x10;
4557		hex2bin(uuid, &desc->identifier[2], 16);
4558		desc->length = 18;
4559	}
4560	lun->lun_devid->len = len;
4561
4562	mtx_lock(&ctl_softc->ctl_lock);
4563	/*
4564	 * See if the caller requested a particular LUN number.  If so, see
4565	 * if it is available.  Otherwise, allocate the first available LUN.
4566	 */
4567	if (be_lun->flags & CTL_LUN_FLAG_ID_REQ) {
4568		if ((be_lun->req_lun_id > (ctl_max_luns - 1))
4569		 || (ctl_is_set(ctl_softc->ctl_lun_mask, be_lun->req_lun_id))) {
4570			mtx_unlock(&ctl_softc->ctl_lock);
4571			if (be_lun->req_lun_id > (ctl_max_luns - 1)) {
4572				printf("ctl: requested LUN ID %d is higher "
4573				       "than ctl_max_luns - 1 (%d)\n",
4574				       be_lun->req_lun_id, ctl_max_luns - 1);
4575			} else {
4576				/*
4577				 * XXX KDM return an error, or just assign
4578				 * another LUN ID in this case??
4579				 */
4580				printf("ctl: requested LUN ID %d is already "
4581				       "in use\n", be_lun->req_lun_id);
4582			}
4583fail:
4584			free(lun->lun_devid, M_CTL);
4585			free(lun, M_CTL);
4586			return (ENOSPC);
4587		}
4588		lun_number = be_lun->req_lun_id;
4589	} else {
4590		lun_number = ctl_ffz(ctl_softc->ctl_lun_mask, 0, ctl_max_luns);
4591		if (lun_number == -1) {
4592			mtx_unlock(&ctl_softc->ctl_lock);
4593			printf("ctl: can't allocate LUN, out of LUNs\n");
4594			goto fail;
4595		}
4596	}
4597	ctl_set_mask(ctl_softc->ctl_lun_mask, lun_number);
4598	mtx_unlock(&ctl_softc->ctl_lock);
4599
4600	mtx_init(&lun->lun_lock, "CTL LUN", NULL, MTX_DEF);
4601	lun->lun = lun_number;
4602	lun->be_lun = be_lun;
4603	/*
4604	 * The processor LUN is always enabled.  Disk LUNs come on line
4605	 * disabled, and must be enabled by the backend.
4606	 */
4607	lun->flags |= CTL_LUN_DISABLED;
4608	lun->backend = be_lun->be;
4609	be_lun->ctl_lun = lun;
4610	be_lun->lun_id = lun_number;
4611	if (be_lun->flags & CTL_LUN_FLAG_EJECTED)
4612		lun->flags |= CTL_LUN_EJECTED;
4613	if (be_lun->flags & CTL_LUN_FLAG_NO_MEDIA)
4614		lun->flags |= CTL_LUN_NO_MEDIA;
4615	if (be_lun->flags & CTL_LUN_FLAG_STOPPED)
4616		lun->flags |= CTL_LUN_STOPPED;
4617
4618	if (be_lun->flags & CTL_LUN_FLAG_PRIMARY)
4619		lun->flags |= CTL_LUN_PRIMARY_SC;
4620
4621	value = dnvlist_get_string(be_lun->options, "removable", NULL);
4622	if (value != NULL) {
4623		if (strcmp(value, "on") == 0)
4624			lun->flags |= CTL_LUN_REMOVABLE;
4625	} else if (be_lun->lun_type == T_CDROM)
4626		lun->flags |= CTL_LUN_REMOVABLE;
4627
4628	lun->ctl_softc = ctl_softc;
4629#ifdef CTL_TIME_IO
4630	lun->last_busy = getsbinuptime();
4631#endif
4632	LIST_INIT(&lun->ooa_queue);
4633	STAILQ_INIT(&lun->error_list);
4634	lun->ie_reported = 1;
4635	callout_init_mtx(&lun->ie_callout, &lun->lun_lock, 0);
4636	ctl_tpc_lun_init(lun);
4637	if (lun->flags & CTL_LUN_REMOVABLE) {
4638		lun->prevent = malloc((CTL_MAX_INITIATORS + 31) / 32 * 4,
4639		    M_CTL, M_WAITOK);
4640	}
4641
4642	/*
4643	 * Initialize the mode and log page index.
4644	 */
4645	ctl_init_page_index(lun);
4646	ctl_init_log_page_index(lun);
4647
4648	/* Setup statistics gathering */
4649	lun->stats.item = lun_number;
4650
4651	/*
4652	 * Now, before we insert this lun on the lun list, set the lun
4653	 * inventory changed UA for all other luns.
4654	 */
4655	mtx_lock(&ctl_softc->ctl_lock);
4656	STAILQ_FOREACH(nlun, &ctl_softc->lun_list, links) {
4657		mtx_lock(&nlun->lun_lock);
4658		ctl_est_ua_all(nlun, -1, CTL_UA_LUN_CHANGE);
4659		mtx_unlock(&nlun->lun_lock);
4660	}
4661	STAILQ_INSERT_TAIL(&ctl_softc->lun_list, lun, links);
4662	ctl_softc->ctl_luns[lun_number] = lun;
4663	ctl_softc->num_luns++;
4664	mtx_unlock(&ctl_softc->ctl_lock);
4665
4666	/*
4667	 * We successfully added the LUN, attempt to enable it.
4668	 */
4669	if (ctl_enable_lun(lun) != 0) {
4670		printf("%s: ctl_enable_lun() failed!\n", __func__);
4671		mtx_lock(&ctl_softc->ctl_lock);
4672		STAILQ_REMOVE(&ctl_softc->lun_list, lun, ctl_lun, links);
4673		ctl_clear_mask(ctl_softc->ctl_lun_mask, lun_number);
4674		ctl_softc->ctl_luns[lun_number] = NULL;
4675		ctl_softc->num_luns--;
4676		mtx_unlock(&ctl_softc->ctl_lock);
4677		free(lun->lun_devid, M_CTL);
4678		free(lun, M_CTL);
4679		return (EIO);
4680	}
4681
4682	return (0);
4683}
4684
4685/*
4686 * Free LUN that has no active requests.
4687 */
4688static int
4689ctl_free_lun(struct ctl_lun *lun)
4690{
4691	struct ctl_softc *softc = lun->ctl_softc;
4692	struct ctl_lun *nlun;
4693	int i;
4694
4695	KASSERT(LIST_EMPTY(&lun->ooa_queue),
4696	    ("Freeing a LUN %p with outstanding I/O!\n", lun));
4697
4698	mtx_lock(&softc->ctl_lock);
4699	STAILQ_REMOVE(&softc->lun_list, lun, ctl_lun, links);
4700	ctl_clear_mask(softc->ctl_lun_mask, lun->lun);
4701	softc->ctl_luns[lun->lun] = NULL;
4702	softc->num_luns--;
4703	STAILQ_FOREACH(nlun, &softc->lun_list, links) {
4704		mtx_lock(&nlun->lun_lock);
4705		ctl_est_ua_all(nlun, -1, CTL_UA_LUN_CHANGE);
4706		mtx_unlock(&nlun->lun_lock);
4707	}
4708	mtx_unlock(&softc->ctl_lock);
4709
4710	/*
4711	 * Tell the backend to free resources, if this LUN has a backend.
4712	 */
4713	lun->be_lun->lun_shutdown(lun->be_lun);
4714
4715	lun->ie_reportcnt = UINT32_MAX;
4716	callout_drain(&lun->ie_callout);
4717	ctl_tpc_lun_shutdown(lun);
4718	mtx_destroy(&lun->lun_lock);
4719	free(lun->lun_devid, M_CTL);
4720	for (i = 0; i < ctl_max_ports; i++)
4721		free(lun->pending_ua[i], M_CTL);
4722	free(lun->pending_ua, M_DEVBUF);
4723	for (i = 0; i < ctl_max_ports; i++)
4724		free(lun->pr_keys[i], M_CTL);
4725	free(lun->pr_keys, M_DEVBUF);
4726	free(lun->write_buffer, M_CTL);
4727	free(lun->prevent, M_CTL);
4728	free(lun, M_CTL);
4729
4730	return (0);
4731}
4732
4733static int
4734ctl_enable_lun(struct ctl_lun *lun)
4735{
4736	struct ctl_softc *softc;
4737	struct ctl_port *port, *nport;
4738	int retval;
4739
4740	softc = lun->ctl_softc;
4741
4742	mtx_lock(&softc->ctl_lock);
4743	mtx_lock(&lun->lun_lock);
4744	KASSERT((lun->flags & CTL_LUN_DISABLED) != 0,
4745	    ("%s: LUN not disabled", __func__));
4746	lun->flags &= ~CTL_LUN_DISABLED;
4747	mtx_unlock(&lun->lun_lock);
4748
4749	STAILQ_FOREACH_SAFE(port, &softc->port_list, links, nport) {
4750		if ((port->status & CTL_PORT_STATUS_ONLINE) == 0 ||
4751		    port->lun_map != NULL || port->lun_enable == NULL)
4752			continue;
4753
4754		/*
4755		 * Drop the lock while we call the FETD's enable routine.
4756		 * This can lead to a callback into CTL (at least in the
4757		 * case of the internal initiator frontend.
4758		 */
4759		mtx_unlock(&softc->ctl_lock);
4760		retval = port->lun_enable(port->targ_lun_arg, lun->lun);
4761		mtx_lock(&softc->ctl_lock);
4762		if (retval != 0) {
4763			printf("%s: FETD %s port %d returned error "
4764			       "%d for lun_enable on lun %jd\n",
4765			       __func__, port->port_name, port->targ_port,
4766			       retval, (intmax_t)lun->lun);
4767		}
4768	}
4769
4770	mtx_unlock(&softc->ctl_lock);
4771	ctl_isc_announce_lun(lun);
4772
4773	return (0);
4774}
4775
4776static int
4777ctl_disable_lun(struct ctl_lun *lun)
4778{
4779	struct ctl_softc *softc;
4780	struct ctl_port *port;
4781	int retval;
4782
4783	softc = lun->ctl_softc;
4784
4785	mtx_lock(&softc->ctl_lock);
4786	mtx_lock(&lun->lun_lock);
4787	KASSERT((lun->flags & CTL_LUN_DISABLED) == 0,
4788	    ("%s: LUN not enabled", __func__));
4789	lun->flags |= CTL_LUN_DISABLED;
4790	mtx_unlock(&lun->lun_lock);
4791
4792	STAILQ_FOREACH(port, &softc->port_list, links) {
4793		if ((port->status & CTL_PORT_STATUS_ONLINE) == 0 ||
4794		    port->lun_map != NULL || port->lun_disable == NULL)
4795			continue;
4796
4797		/*
4798		 * Drop the lock before we call the frontend's disable
4799		 * routine, to avoid lock order reversals.
4800		 *
4801		 * XXX KDM what happens if the frontend list changes while
4802		 * we're traversing it?  It's unlikely, but should be handled.
4803		 */
4804		mtx_unlock(&softc->ctl_lock);
4805		retval = port->lun_disable(port->targ_lun_arg, lun->lun);
4806		mtx_lock(&softc->ctl_lock);
4807		if (retval != 0) {
4808			printf("%s: FETD %s port %d returned error "
4809			       "%d for lun_disable on lun %jd\n",
4810			       __func__, port->port_name, port->targ_port,
4811			       retval, (intmax_t)lun->lun);
4812		}
4813	}
4814
4815	mtx_unlock(&softc->ctl_lock);
4816	ctl_isc_announce_lun(lun);
4817
4818	return (0);
4819}
4820
4821int
4822ctl_start_lun(struct ctl_be_lun *be_lun)
4823{
4824	struct ctl_lun *lun = (struct ctl_lun *)be_lun->ctl_lun;
4825
4826	mtx_lock(&lun->lun_lock);
4827	lun->flags &= ~CTL_LUN_STOPPED;
4828	mtx_unlock(&lun->lun_lock);
4829	return (0);
4830}
4831
4832int
4833ctl_stop_lun(struct ctl_be_lun *be_lun)
4834{
4835	struct ctl_lun *lun = (struct ctl_lun *)be_lun->ctl_lun;
4836
4837	mtx_lock(&lun->lun_lock);
4838	lun->flags |= CTL_LUN_STOPPED;
4839	mtx_unlock(&lun->lun_lock);
4840	return (0);
4841}
4842
4843int
4844ctl_lun_no_media(struct ctl_be_lun *be_lun)
4845{
4846	struct ctl_lun *lun = (struct ctl_lun *)be_lun->ctl_lun;
4847
4848	mtx_lock(&lun->lun_lock);
4849	lun->flags |= CTL_LUN_NO_MEDIA;
4850	mtx_unlock(&lun->lun_lock);
4851	return (0);
4852}
4853
4854int
4855ctl_lun_has_media(struct ctl_be_lun *be_lun)
4856{
4857	struct ctl_lun *lun = (struct ctl_lun *)be_lun->ctl_lun;
4858	union ctl_ha_msg msg;
4859
4860	mtx_lock(&lun->lun_lock);
4861	lun->flags &= ~(CTL_LUN_NO_MEDIA | CTL_LUN_EJECTED);
4862	if (lun->flags & CTL_LUN_REMOVABLE)
4863		ctl_est_ua_all(lun, -1, CTL_UA_MEDIUM_CHANGE);
4864	mtx_unlock(&lun->lun_lock);
4865	if ((lun->flags & CTL_LUN_REMOVABLE) &&
4866	    lun->ctl_softc->ha_mode == CTL_HA_MODE_XFER) {
4867		bzero(&msg.ua, sizeof(msg.ua));
4868		msg.hdr.msg_type = CTL_MSG_UA;
4869		msg.hdr.nexus.initid = -1;
4870		msg.hdr.nexus.targ_port = -1;
4871		msg.hdr.nexus.targ_lun = lun->lun;
4872		msg.hdr.nexus.targ_mapped_lun = lun->lun;
4873		msg.ua.ua_all = 1;
4874		msg.ua.ua_set = 1;
4875		msg.ua.ua_type = CTL_UA_MEDIUM_CHANGE;
4876		ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg, sizeof(msg.ua),
4877		    M_WAITOK);
4878	}
4879	return (0);
4880}
4881
4882int
4883ctl_lun_ejected(struct ctl_be_lun *be_lun)
4884{
4885	struct ctl_lun *lun = (struct ctl_lun *)be_lun->ctl_lun;
4886
4887	mtx_lock(&lun->lun_lock);
4888	lun->flags |= CTL_LUN_EJECTED;
4889	mtx_unlock(&lun->lun_lock);
4890	return (0);
4891}
4892
4893int
4894ctl_lun_primary(struct ctl_be_lun *be_lun)
4895{
4896	struct ctl_lun *lun = (struct ctl_lun *)be_lun->ctl_lun;
4897
4898	mtx_lock(&lun->lun_lock);
4899	lun->flags |= CTL_LUN_PRIMARY_SC;
4900	ctl_est_ua_all(lun, -1, CTL_UA_ASYM_ACC_CHANGE);
4901	mtx_unlock(&lun->lun_lock);
4902	ctl_isc_announce_lun(lun);
4903	return (0);
4904}
4905
4906int
4907ctl_lun_secondary(struct ctl_be_lun *be_lun)
4908{
4909	struct ctl_lun *lun = (struct ctl_lun *)be_lun->ctl_lun;
4910
4911	mtx_lock(&lun->lun_lock);
4912	lun->flags &= ~CTL_LUN_PRIMARY_SC;
4913	ctl_est_ua_all(lun, -1, CTL_UA_ASYM_ACC_CHANGE);
4914	mtx_unlock(&lun->lun_lock);
4915	ctl_isc_announce_lun(lun);
4916	return (0);
4917}
4918
4919/*
4920 * Remove LUN.  If there are active requests, wait for completion.
4921 *
4922 * Returns 0 for success, non-zero (errno) for failure.
4923 * Completion is reported to backed via the lun_shutdown() method.
4924 */
4925int
4926ctl_remove_lun(struct ctl_be_lun *be_lun)
4927{
4928	struct ctl_lun *lun;
4929
4930	lun = (struct ctl_lun *)be_lun->ctl_lun;
4931
4932	ctl_disable_lun(lun);
4933
4934	mtx_lock(&lun->lun_lock);
4935	lun->flags |= CTL_LUN_INVALID;
4936
4937	/*
4938	 * If there is nothing in the OOA queue, go ahead and free the LUN.
4939	 * If we have something in the OOA queue, we'll free it when the
4940	 * last I/O completes.
4941	 */
4942	if (LIST_EMPTY(&lun->ooa_queue)) {
4943		mtx_unlock(&lun->lun_lock);
4944		ctl_free_lun(lun);
4945	} else
4946		mtx_unlock(&lun->lun_lock);
4947
4948	return (0);
4949}
4950
4951void
4952ctl_lun_capacity_changed(struct ctl_be_lun *be_lun)
4953{
4954	struct ctl_lun *lun = (struct ctl_lun *)be_lun->ctl_lun;
4955	union ctl_ha_msg msg;
4956
4957	mtx_lock(&lun->lun_lock);
4958	ctl_est_ua_all(lun, -1, CTL_UA_CAPACITY_CHANGE);
4959	mtx_unlock(&lun->lun_lock);
4960	if (lun->ctl_softc->ha_mode == CTL_HA_MODE_XFER) {
4961		/* Send msg to other side. */
4962		bzero(&msg.ua, sizeof(msg.ua));
4963		msg.hdr.msg_type = CTL_MSG_UA;
4964		msg.hdr.nexus.initid = -1;
4965		msg.hdr.nexus.targ_port = -1;
4966		msg.hdr.nexus.targ_lun = lun->lun;
4967		msg.hdr.nexus.targ_mapped_lun = lun->lun;
4968		msg.ua.ua_all = 1;
4969		msg.ua.ua_set = 1;
4970		msg.ua.ua_type = CTL_UA_CAPACITY_CHANGE;
4971		ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg, sizeof(msg.ua),
4972		    M_WAITOK);
4973	}
4974}
4975
4976void
4977ctl_lun_nsdata_ids(struct ctl_be_lun *be_lun,
4978    struct nvme_namespace_data *nsdata)
4979{
4980	struct ctl_lun *lun = (struct ctl_lun *)be_lun->ctl_lun;
4981	struct scsi_vpd_id_descriptor *idd;
4982
4983	if (lun->lun_devid == NULL)
4984		return;
4985
4986	idd = scsi_get_devid_desc((struct scsi_vpd_id_descriptor *)
4987	    lun->lun_devid->data, lun->lun_devid->len, scsi_devid_is_lun_naa);
4988	if (idd != NULL) {
4989		if (idd->length == 16) {
4990			memcpy(nsdata->nguid, idd->identifier, 16);
4991			return;
4992		}
4993		if (idd->length == 8) {
4994			memcpy(nsdata->eui64, idd->identifier, 8);
4995			return;
4996		}
4997	}
4998
4999	idd = scsi_get_devid_desc((struct scsi_vpd_id_descriptor *)
5000	    lun->lun_devid->data, lun->lun_devid->len, scsi_devid_is_lun_eui64);
5001	if (idd != NULL) {
5002		if (idd->length == 8) {
5003			memcpy(nsdata->eui64, idd->identifier, 8);
5004			return;
5005		}
5006	}
5007}
5008
5009void
5010ctl_lun_nvme_ids(struct ctl_be_lun *be_lun, void *data)
5011{
5012	struct ctl_lun *lun = (struct ctl_lun *)be_lun->ctl_lun;
5013	struct scsi_vpd_id_descriptor *naa, *eui64, *uuid;
5014	char *p;
5015
5016	memset(data, 0, 4096);
5017
5018	if (lun->lun_devid == NULL)
5019		return;
5020
5021	naa = scsi_get_devid_desc((struct scsi_vpd_id_descriptor *)
5022	    lun->lun_devid->data, lun->lun_devid->len, scsi_devid_is_lun_naa);
5023	eui64 = scsi_get_devid_desc((struct scsi_vpd_id_descriptor *)
5024	    lun->lun_devid->data, lun->lun_devid->len, scsi_devid_is_lun_eui64);
5025	uuid = scsi_get_devid_desc((struct scsi_vpd_id_descriptor *)
5026	    lun->lun_devid->data, lun->lun_devid->len, scsi_devid_is_lun_uuid);
5027
5028	p = data;
5029
5030	/* EUI64 */
5031	if ((naa != NULL && naa->length == 8) || eui64 != NULL) {
5032		*p++ = 1;
5033		*p++ = 8;
5034		p += 2;
5035		if (naa != NULL && naa->length == 8)
5036			memcpy(p, naa->identifier, 8);
5037		else
5038			memcpy(p, eui64->identifier, 8);
5039		p += 8;
5040	}
5041
5042	/* NGUID */
5043	if (naa != NULL && naa->length == 16) {
5044		*p++ = 1;
5045		*p++ = 16;
5046		p += 2;
5047		memcpy(p, naa->identifier, 16);
5048		p += 16;
5049	}
5050
5051	/* UUID */
5052	if (uuid != NULL) {
5053		*p++ = 1;
5054		*p++ = uuid->length;
5055		p += 2;
5056		memcpy(p, uuid->identifier, uuid->length);
5057		p += uuid->length;
5058	}
5059}
5060
5061/*
5062 * Backend "memory move is complete" callback for requests that never
5063 * make it down to say RAIDCore's configuration code.
5064 */
5065int
5066ctl_config_move_done(union ctl_io *io, bool samethr)
5067{
5068	int retval;
5069
5070	CTL_DEBUG_PRINT(("ctl_config_move_done\n"));
5071
5072	if (ctl_debug & CTL_DEBUG_CDB_DATA)
5073		ctl_data_print(io);
5074	if (((io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_IN) ||
5075	    ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
5076	     (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS) ||
5077	    ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0)) {
5078		/*
5079		 * XXX KDM just assuming a single pointer here, and not a
5080		 * S/G list.  If we start using S/G lists for config data,
5081		 * we'll need to know how to clean them up here as well.
5082		 */
5083		if (io->io_hdr.flags & CTL_FLAG_ALLOCATED)
5084			free(ctl_kern_data_ptr(io), M_CTL);
5085		ctl_done(io);
5086		retval = CTL_RETVAL_COMPLETE;
5087	} else {
5088		/*
5089		 * XXX KDM now we need to continue data movement.  Some
5090		 * options:
5091		 * - call ctl_scsiio() again?  We don't do this for data
5092		 *   writes, because for those at least we know ahead of
5093		 *   time where the write will go and how long it is.  For
5094		 *   config writes, though, that information is largely
5095		 *   contained within the write itself, thus we need to
5096		 *   parse out the data again.
5097		 *
5098		 * - Call some other function once the data is in?
5099		 */
5100
5101		/*
5102		 * XXX KDM call ctl_scsiio() again for now, and check flag
5103		 * bits to see whether we're allocated or not.
5104		 */
5105		switch (io->io_hdr.io_type) {
5106		case CTL_IO_SCSI:
5107			retval = ctl_scsiio(&io->scsiio);
5108			break;
5109		case CTL_IO_NVME:
5110		case CTL_IO_NVME_ADMIN:
5111			retval = ctl_nvmeio(&io->nvmeio);
5112			break;
5113		default:
5114			__assert_unreachable();
5115		}
5116	}
5117	return (retval);
5118}
5119
5120/*
5121 * This gets called by a backend driver when it is done with a
5122 * data_submit method.
5123 */
5124void
5125ctl_data_submit_done(union ctl_io *io)
5126{
5127	/*
5128	 * If the IO_CONT flag is set, we need to call the supplied
5129	 * function to continue processing the I/O, instead of completing
5130	 * the I/O just yet.
5131	 *
5132	 * If there is an error, though, we don't want to keep processing.
5133	 * Instead, just send status back to the initiator.
5134	 */
5135	if ((io->io_hdr.flags & CTL_FLAG_IO_CONT) &&
5136	    (io->io_hdr.flags & CTL_FLAG_ABORT) == 0 &&
5137	    ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE ||
5138	     (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) {
5139		ctl_continue_io(io);
5140		return;
5141	}
5142	ctl_done(io);
5143}
5144
5145/*
5146 * This gets called by a backend driver when it is done with a
5147 * configuration write.
5148 */
5149void
5150ctl_config_write_done(union ctl_io *io)
5151{
5152	uint8_t *buf;
5153
5154	/*
5155	 * If the IO_CONT flag is set, we need to call the supplied
5156	 * function to continue processing the I/O, instead of completing
5157	 * the I/O just yet.
5158	 *
5159	 * If there is an error, though, we don't want to keep processing.
5160	 * Instead, just send status back to the initiator.
5161	 */
5162	if ((io->io_hdr.flags & CTL_FLAG_IO_CONT) &&
5163	    (io->io_hdr.flags & CTL_FLAG_ABORT) == 0 &&
5164	    ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE ||
5165	     (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) {
5166		ctl_continue_io(io);
5167		return;
5168	}
5169	/*
5170	 * Since a configuration write can be done for commands that actually
5171	 * have data allocated, like write buffer, and commands that have
5172	 * no data, like start/stop unit, we need to check here.
5173	 */
5174	if (io->io_hdr.flags & CTL_FLAG_ALLOCATED)
5175		buf = ctl_kern_data_ptr(io);
5176	else
5177		buf = NULL;
5178	ctl_done(io);
5179	if (buf)
5180		free(buf, M_CTL);
5181}
5182
5183void
5184ctl_config_read_done(union ctl_io *io)
5185{
5186	uint8_t *buf;
5187
5188	/*
5189	 * If there is some error -- we are done, skip data transfer.
5190	 */
5191	if ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0 ||
5192	    ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
5193	     (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
5194		if (io->io_hdr.flags & CTL_FLAG_ALLOCATED)
5195			buf = ctl_kern_data_ptr(io);
5196		else
5197			buf = NULL;
5198		ctl_done(io);
5199		if (buf)
5200			free(buf, M_CTL);
5201		return;
5202	}
5203
5204	/*
5205	 * If the IO_CONT flag is set, we need to call the supplied
5206	 * function to continue processing the I/O, instead of completing
5207	 * the I/O just yet.
5208	 */
5209	if (io->io_hdr.flags & CTL_FLAG_IO_CONT) {
5210		ctl_continue_io(io);
5211		return;
5212	}
5213
5214	ctl_datamove(io);
5215}
5216
5217/*
5218 * SCSI release command.
5219 */
5220int
5221ctl_scsi_release(struct ctl_scsiio *ctsio)
5222{
5223	struct ctl_lun *lun = CTL_LUN(ctsio);
5224	uint32_t residx;
5225
5226	CTL_DEBUG_PRINT(("ctl_scsi_release\n"));
5227
5228	residx = ctl_get_initindex(&ctsio->io_hdr.nexus);
5229
5230	/*
5231	 * XXX KDM right now, we only support LUN reservation.  We don't
5232	 * support 3rd party reservations, or extent reservations, which
5233	 * might actually need the parameter list.  If we've gotten this
5234	 * far, we've got a LUN reservation.  Anything else got kicked out
5235	 * above.  So, according to SPC, ignore the length.
5236	 */
5237
5238	mtx_lock(&lun->lun_lock);
5239
5240	/*
5241	 * According to SPC, it is not an error for an intiator to attempt
5242	 * to release a reservation on a LUN that isn't reserved, or that
5243	 * is reserved by another initiator.  The reservation can only be
5244	 * released, though, by the initiator who made it or by one of
5245	 * several reset type events.
5246	 */
5247	if ((lun->flags & CTL_LUN_RESERVED) && (lun->res_idx == residx))
5248			lun->flags &= ~CTL_LUN_RESERVED;
5249
5250	mtx_unlock(&lun->lun_lock);
5251
5252	ctl_set_success(ctsio);
5253	ctl_done((union ctl_io *)ctsio);
5254	return (CTL_RETVAL_COMPLETE);
5255}
5256
5257int
5258ctl_scsi_reserve(struct ctl_scsiio *ctsio)
5259{
5260	struct ctl_lun *lun = CTL_LUN(ctsio);
5261	uint32_t residx;
5262
5263	CTL_DEBUG_PRINT(("ctl_reserve\n"));
5264
5265	residx = ctl_get_initindex(&ctsio->io_hdr.nexus);
5266
5267	/*
5268	 * XXX KDM right now, we only support LUN reservation.  We don't
5269	 * support 3rd party reservations, or extent reservations, which
5270	 * might actually need the parameter list.  If we've gotten this
5271	 * far, we've got a LUN reservation.  Anything else got kicked out
5272	 * above.  So, according to SPC, ignore the length.
5273	 */
5274
5275	mtx_lock(&lun->lun_lock);
5276	if ((lun->flags & CTL_LUN_RESERVED) && (lun->res_idx != residx)) {
5277		ctl_set_reservation_conflict(ctsio);
5278		goto bailout;
5279	}
5280
5281	/* SPC-3 exceptions to SPC-2 RESERVE and RELEASE behavior. */
5282	if (lun->flags & CTL_LUN_PR_RESERVED) {
5283		ctl_set_success(ctsio);
5284		goto bailout;
5285	}
5286
5287	lun->flags |= CTL_LUN_RESERVED;
5288	lun->res_idx = residx;
5289	ctl_set_success(ctsio);
5290
5291bailout:
5292	mtx_unlock(&lun->lun_lock);
5293	ctl_done((union ctl_io *)ctsio);
5294	return (CTL_RETVAL_COMPLETE);
5295}
5296
5297int
5298ctl_start_stop(struct ctl_scsiio *ctsio)
5299{
5300	struct ctl_lun *lun = CTL_LUN(ctsio);
5301	struct scsi_start_stop_unit *cdb;
5302	int retval;
5303
5304	CTL_DEBUG_PRINT(("ctl_start_stop\n"));
5305
5306	cdb = (struct scsi_start_stop_unit *)ctsio->cdb;
5307
5308	if ((cdb->how & SSS_PC_MASK) == 0) {
5309		if ((lun->flags & CTL_LUN_PR_RESERVED) &&
5310		    (cdb->how & SSS_START) == 0) {
5311			uint32_t residx;
5312
5313			residx = ctl_get_initindex(&ctsio->io_hdr.nexus);
5314			if (ctl_get_prkey(lun, residx) == 0 ||
5315			    (lun->pr_res_idx != residx && lun->pr_res_type < 4)) {
5316				ctl_set_reservation_conflict(ctsio);
5317				ctl_done((union ctl_io *)ctsio);
5318				return (CTL_RETVAL_COMPLETE);
5319			}
5320		}
5321
5322		if ((cdb->how & SSS_LOEJ) &&
5323		    (lun->flags & CTL_LUN_REMOVABLE) == 0) {
5324			ctl_set_invalid_field(ctsio,
5325					      /*sks_valid*/ 1,
5326					      /*command*/ 1,
5327					      /*field*/ 4,
5328					      /*bit_valid*/ 1,
5329					      /*bit*/ 1);
5330			ctl_done((union ctl_io *)ctsio);
5331			return (CTL_RETVAL_COMPLETE);
5332		}
5333
5334		if ((cdb->how & SSS_START) == 0 && (cdb->how & SSS_LOEJ) &&
5335		    lun->prevent_count > 0) {
5336			/* "Medium removal prevented" */
5337			ctl_set_sense(ctsio, /*current_error*/ 1,
5338			    /*sense_key*/(lun->flags & CTL_LUN_NO_MEDIA) ?
5339			     SSD_KEY_NOT_READY : SSD_KEY_ILLEGAL_REQUEST,
5340			    /*asc*/ 0x53, /*ascq*/ 0x02, SSD_ELEM_NONE);
5341			ctl_done((union ctl_io *)ctsio);
5342			return (CTL_RETVAL_COMPLETE);
5343		}
5344	}
5345
5346	retval = lun->backend->config_write((union ctl_io *)ctsio);
5347	return (retval);
5348}
5349
5350int
5351ctl_prevent_allow(struct ctl_scsiio *ctsio)
5352{
5353	struct ctl_lun *lun = CTL_LUN(ctsio);
5354	struct scsi_prevent *cdb;
5355	int retval;
5356	uint32_t initidx;
5357
5358	CTL_DEBUG_PRINT(("ctl_prevent_allow\n"));
5359
5360	cdb = (struct scsi_prevent *)ctsio->cdb;
5361
5362	if ((lun->flags & CTL_LUN_REMOVABLE) == 0 || lun->prevent == NULL) {
5363		ctl_set_invalid_opcode(ctsio);
5364		ctl_done((union ctl_io *)ctsio);
5365		return (CTL_RETVAL_COMPLETE);
5366	}
5367
5368	initidx = ctl_get_initindex(&ctsio->io_hdr.nexus);
5369	mtx_lock(&lun->lun_lock);
5370	if ((cdb->how & PR_PREVENT) &&
5371	    ctl_is_set(lun->prevent, initidx) == 0) {
5372		ctl_set_mask(lun->prevent, initidx);
5373		lun->prevent_count++;
5374	} else if ((cdb->how & PR_PREVENT) == 0 &&
5375	    ctl_is_set(lun->prevent, initidx)) {
5376		ctl_clear_mask(lun->prevent, initidx);
5377		lun->prevent_count--;
5378	}
5379	mtx_unlock(&lun->lun_lock);
5380	retval = lun->backend->config_write((union ctl_io *)ctsio);
5381	return (retval);
5382}
5383
5384/*
5385 * We support the SYNCHRONIZE CACHE command (10 and 16 byte versions), but
5386 * we don't really do anything with the LBA and length fields if the user
5387 * passes them in.  Instead we'll just flush out the cache for the entire
5388 * LUN.
5389 */
5390int
5391ctl_sync_cache(struct ctl_scsiio *ctsio)
5392{
5393	struct ctl_lun *lun = CTL_LUN(ctsio);
5394	struct ctl_lba_len_flags *lbalen;
5395	uint64_t starting_lba;
5396	uint32_t block_count;
5397	int retval;
5398	uint8_t byte2;
5399
5400	CTL_DEBUG_PRINT(("ctl_sync_cache\n"));
5401
5402	retval = 0;
5403
5404	switch (ctsio->cdb[0]) {
5405	case SYNCHRONIZE_CACHE: {
5406		struct scsi_sync_cache *cdb;
5407		cdb = (struct scsi_sync_cache *)ctsio->cdb;
5408
5409		starting_lba = scsi_4btoul(cdb->begin_lba);
5410		block_count = scsi_2btoul(cdb->lb_count);
5411		byte2 = cdb->byte2;
5412		break;
5413	}
5414	case SYNCHRONIZE_CACHE_16: {
5415		struct scsi_sync_cache_16 *cdb;
5416		cdb = (struct scsi_sync_cache_16 *)ctsio->cdb;
5417
5418		starting_lba = scsi_8btou64(cdb->begin_lba);
5419		block_count = scsi_4btoul(cdb->lb_count);
5420		byte2 = cdb->byte2;
5421		break;
5422	}
5423	default:
5424		ctl_set_invalid_opcode(ctsio);
5425		ctl_done((union ctl_io *)ctsio);
5426		goto bailout;
5427		break; /* NOTREACHED */
5428	}
5429
5430	/*
5431	 * We check the LBA and length, but don't do anything with them.
5432	 * A SYNCHRONIZE CACHE will cause the entire cache for this lun to
5433	 * get flushed.  This check will just help satisfy anyone who wants
5434	 * to see an error for an out of range LBA.
5435	 */
5436	if ((starting_lba + block_count) > (lun->be_lun->maxlba + 1)) {
5437		ctl_set_lba_out_of_range(ctsio,
5438		    MAX(starting_lba, lun->be_lun->maxlba + 1));
5439		ctl_done((union ctl_io *)ctsio);
5440		goto bailout;
5441	}
5442
5443	lbalen = (struct ctl_lba_len_flags *)&ctsio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
5444	lbalen->lba = starting_lba;
5445	lbalen->len = block_count;
5446	lbalen->flags = byte2;
5447	retval = lun->backend->config_write((union ctl_io *)ctsio);
5448
5449bailout:
5450	return (retval);
5451}
5452
5453int
5454ctl_format(struct ctl_scsiio *ctsio)
5455{
5456	struct scsi_format *cdb;
5457	int length, defect_list_len;
5458
5459	CTL_DEBUG_PRINT(("ctl_format\n"));
5460
5461	cdb = (struct scsi_format *)ctsio->cdb;
5462
5463	length = 0;
5464	if (cdb->byte2 & SF_FMTDATA) {
5465		if (cdb->byte2 & SF_LONGLIST)
5466			length = sizeof(struct scsi_format_header_long);
5467		else
5468			length = sizeof(struct scsi_format_header_short);
5469	}
5470
5471	if (((ctsio->io_hdr.flags & CTL_FLAG_ALLOCATED) == 0)
5472	 && (length > 0)) {
5473		ctsio->kern_data_ptr = malloc(length, M_CTL, M_WAITOK);
5474		ctsio->kern_data_len = length;
5475		ctsio->kern_total_len = length;
5476		ctsio->kern_rel_offset = 0;
5477		ctsio->kern_sg_entries = 0;
5478		ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
5479		ctsio->be_move_done = ctl_config_move_done;
5480		ctl_datamove((union ctl_io *)ctsio);
5481
5482		return (CTL_RETVAL_COMPLETE);
5483	}
5484
5485	defect_list_len = 0;
5486
5487	if (cdb->byte2 & SF_FMTDATA) {
5488		if (cdb->byte2 & SF_LONGLIST) {
5489			struct scsi_format_header_long *header;
5490
5491			header = (struct scsi_format_header_long *)
5492				ctsio->kern_data_ptr;
5493
5494			defect_list_len = scsi_4btoul(header->defect_list_len);
5495			if (defect_list_len != 0) {
5496				ctl_set_invalid_field(ctsio,
5497						      /*sks_valid*/ 1,
5498						      /*command*/ 0,
5499						      /*field*/ 2,
5500						      /*bit_valid*/ 0,
5501						      /*bit*/ 0);
5502				goto bailout;
5503			}
5504		} else {
5505			struct scsi_format_header_short *header;
5506
5507			header = (struct scsi_format_header_short *)
5508				ctsio->kern_data_ptr;
5509
5510			defect_list_len = scsi_2btoul(header->defect_list_len);
5511			if (defect_list_len != 0) {
5512				ctl_set_invalid_field(ctsio,
5513						      /*sks_valid*/ 1,
5514						      /*command*/ 0,
5515						      /*field*/ 2,
5516						      /*bit_valid*/ 0,
5517						      /*bit*/ 0);
5518				goto bailout;
5519			}
5520		}
5521	}
5522
5523	ctl_set_success(ctsio);
5524bailout:
5525
5526	if (ctsio->io_hdr.flags & CTL_FLAG_ALLOCATED) {
5527		free(ctsio->kern_data_ptr, M_CTL);
5528		ctsio->io_hdr.flags &= ~CTL_FLAG_ALLOCATED;
5529	}
5530
5531	ctl_done((union ctl_io *)ctsio);
5532	return (CTL_RETVAL_COMPLETE);
5533}
5534
5535int
5536ctl_read_buffer(struct ctl_scsiio *ctsio)
5537{
5538	struct ctl_lun *lun = CTL_LUN(ctsio);
5539	uint64_t buffer_offset;
5540	uint32_t len;
5541	uint8_t byte2;
5542	static uint8_t descr[4];
5543	static uint8_t echo_descr[4] = { 0 };
5544
5545	CTL_DEBUG_PRINT(("ctl_read_buffer\n"));
5546
5547	switch (ctsio->cdb[0]) {
5548	case READ_BUFFER: {
5549		struct scsi_read_buffer *cdb;
5550
5551		cdb = (struct scsi_read_buffer *)ctsio->cdb;
5552		buffer_offset = scsi_3btoul(cdb->offset);
5553		len = scsi_3btoul(cdb->length);
5554		byte2 = cdb->byte2;
5555		break;
5556	}
5557	case READ_BUFFER_16: {
5558		struct scsi_read_buffer_16 *cdb;
5559
5560		cdb = (struct scsi_read_buffer_16 *)ctsio->cdb;
5561		buffer_offset = scsi_8btou64(cdb->offset);
5562		len = scsi_4btoul(cdb->length);
5563		byte2 = cdb->byte2;
5564		break;
5565	}
5566	default: /* This shouldn't happen. */
5567		ctl_set_invalid_opcode(ctsio);
5568		ctl_done((union ctl_io *)ctsio);
5569		return (CTL_RETVAL_COMPLETE);
5570	}
5571
5572	if (buffer_offset > CTL_WRITE_BUFFER_SIZE ||
5573	    buffer_offset + len > CTL_WRITE_BUFFER_SIZE) {
5574		ctl_set_invalid_field(ctsio,
5575				      /*sks_valid*/ 1,
5576				      /*command*/ 1,
5577				      /*field*/ 6,
5578				      /*bit_valid*/ 0,
5579				      /*bit*/ 0);
5580		ctl_done((union ctl_io *)ctsio);
5581		return (CTL_RETVAL_COMPLETE);
5582	}
5583
5584	if ((byte2 & RWB_MODE) == RWB_MODE_DESCR) {
5585		descr[0] = 0;
5586		scsi_ulto3b(CTL_WRITE_BUFFER_SIZE, &descr[1]);
5587		ctsio->kern_data_ptr = descr;
5588		len = min(len, sizeof(descr));
5589	} else if ((byte2 & RWB_MODE) == RWB_MODE_ECHO_DESCR) {
5590		ctsio->kern_data_ptr = echo_descr;
5591		len = min(len, sizeof(echo_descr));
5592	} else {
5593		if (lun->write_buffer == NULL) {
5594			lun->write_buffer = malloc(CTL_WRITE_BUFFER_SIZE,
5595			    M_CTL, M_WAITOK);
5596		}
5597		ctsio->kern_data_ptr = lun->write_buffer + buffer_offset;
5598	}
5599	ctsio->kern_data_len = len;
5600	ctsio->kern_total_len = len;
5601	ctsio->kern_rel_offset = 0;
5602	ctsio->kern_sg_entries = 0;
5603	ctl_set_success(ctsio);
5604	ctsio->be_move_done = ctl_config_move_done;
5605	ctl_datamove((union ctl_io *)ctsio);
5606	return (CTL_RETVAL_COMPLETE);
5607}
5608
5609int
5610ctl_write_buffer(struct ctl_scsiio *ctsio)
5611{
5612	struct ctl_lun *lun = CTL_LUN(ctsio);
5613	struct scsi_write_buffer *cdb;
5614	int buffer_offset, len;
5615
5616	CTL_DEBUG_PRINT(("ctl_write_buffer\n"));
5617
5618	cdb = (struct scsi_write_buffer *)ctsio->cdb;
5619
5620	len = scsi_3btoul(cdb->length);
5621	buffer_offset = scsi_3btoul(cdb->offset);
5622
5623	if (buffer_offset + len > CTL_WRITE_BUFFER_SIZE) {
5624		ctl_set_invalid_field(ctsio,
5625				      /*sks_valid*/ 1,
5626				      /*command*/ 1,
5627				      /*field*/ 6,
5628				      /*bit_valid*/ 0,
5629				      /*bit*/ 0);
5630		ctl_done((union ctl_io *)ctsio);
5631		return (CTL_RETVAL_COMPLETE);
5632	}
5633
5634	/*
5635	 * If we've got a kernel request that hasn't been malloced yet,
5636	 * malloc it and tell the caller the data buffer is here.
5637	 */
5638	if ((ctsio->io_hdr.flags & CTL_FLAG_ALLOCATED) == 0) {
5639		if (lun->write_buffer == NULL) {
5640			lun->write_buffer = malloc(CTL_WRITE_BUFFER_SIZE,
5641			    M_CTL, M_WAITOK);
5642		}
5643		ctsio->kern_data_ptr = lun->write_buffer + buffer_offset;
5644		ctsio->kern_data_len = len;
5645		ctsio->kern_total_len = len;
5646		ctsio->kern_rel_offset = 0;
5647		ctsio->kern_sg_entries = 0;
5648		ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
5649		ctsio->be_move_done = ctl_config_move_done;
5650		ctl_datamove((union ctl_io *)ctsio);
5651
5652		return (CTL_RETVAL_COMPLETE);
5653	}
5654
5655	ctl_set_success(ctsio);
5656	ctl_done((union ctl_io *)ctsio);
5657	return (CTL_RETVAL_COMPLETE);
5658}
5659
5660static int
5661ctl_write_same_cont(union ctl_io *io)
5662{
5663	struct ctl_lun *lun = CTL_LUN(io);
5664	struct ctl_scsiio *ctsio;
5665	struct ctl_lba_len_flags *lbalen;
5666	int retval;
5667
5668	CTL_IO_ASSERT(io, SCSI);
5669
5670	ctsio = &io->scsiio;
5671	ctsio->io_hdr.status = CTL_STATUS_NONE;
5672	lbalen = (struct ctl_lba_len_flags *)
5673	    &ctsio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
5674	lbalen->lba += lbalen->len;
5675	if ((lun->be_lun->maxlba + 1) - lbalen->lba <= UINT32_MAX) {
5676		ctsio->io_hdr.flags &= ~CTL_FLAG_IO_CONT;
5677		lbalen->len = (lun->be_lun->maxlba + 1) - lbalen->lba;
5678	}
5679
5680	CTL_DEBUG_PRINT(("ctl_write_same_cont: calling config_write()\n"));
5681	retval = lun->backend->config_write((union ctl_io *)ctsio);
5682	return (retval);
5683}
5684
5685int
5686ctl_write_same(struct ctl_scsiio *ctsio)
5687{
5688	struct ctl_lun *lun = CTL_LUN(ctsio);
5689	struct ctl_lba_len_flags *lbalen;
5690	const char *val;
5691	uint64_t lba, ival;
5692	uint32_t num_blocks;
5693	int len, retval;
5694	uint8_t byte2;
5695
5696	CTL_DEBUG_PRINT(("ctl_write_same\n"));
5697
5698	switch (ctsio->cdb[0]) {
5699	case WRITE_SAME_10: {
5700		struct scsi_write_same_10 *cdb;
5701
5702		cdb = (struct scsi_write_same_10 *)ctsio->cdb;
5703
5704		lba = scsi_4btoul(cdb->addr);
5705		num_blocks = scsi_2btoul(cdb->length);
5706		byte2 = cdb->byte2;
5707		break;
5708	}
5709	case WRITE_SAME_16: {
5710		struct scsi_write_same_16 *cdb;
5711
5712		cdb = (struct scsi_write_same_16 *)ctsio->cdb;
5713
5714		lba = scsi_8btou64(cdb->addr);
5715		num_blocks = scsi_4btoul(cdb->length);
5716		byte2 = cdb->byte2;
5717		break;
5718	}
5719	default:
5720		/*
5721		 * We got a command we don't support.  This shouldn't
5722		 * happen, commands should be filtered out above us.
5723		 */
5724		ctl_set_invalid_opcode(ctsio);
5725		ctl_done((union ctl_io *)ctsio);
5726
5727		return (CTL_RETVAL_COMPLETE);
5728		break; /* NOTREACHED */
5729	}
5730
5731	/* ANCHOR flag can be used only together with UNMAP */
5732	if ((byte2 & SWS_UNMAP) == 0 && (byte2 & SWS_ANCHOR) != 0) {
5733		ctl_set_invalid_field(ctsio, /*sks_valid*/ 1,
5734		    /*command*/ 1, /*field*/ 1, /*bit_valid*/ 1, /*bit*/ 0);
5735		ctl_done((union ctl_io *)ctsio);
5736		return (CTL_RETVAL_COMPLETE);
5737	}
5738
5739	/*
5740	 * The first check is to make sure we're in bounds, the second
5741	 * check is to catch wrap-around problems.  If the lba + num blocks
5742	 * is less than the lba, then we've wrapped around and the block
5743	 * range is invalid anyway.
5744	 */
5745	if (((lba + num_blocks) > (lun->be_lun->maxlba + 1))
5746	 || ((lba + num_blocks) < lba)) {
5747		ctl_set_lba_out_of_range(ctsio,
5748		    MAX(lba, lun->be_lun->maxlba + 1));
5749		ctl_done((union ctl_io *)ctsio);
5750		return (CTL_RETVAL_COMPLETE);
5751	}
5752
5753	/* Zero number of blocks means "to the last logical block" */
5754	if (num_blocks == 0) {
5755		ival = UINT64_MAX;
5756		val = dnvlist_get_string(lun->be_lun->options,
5757		    "write_same_max_lba", NULL);
5758		if (val != NULL)
5759			ctl_expand_number(val, &ival);
5760		if ((lun->be_lun->maxlba + 1) - lba > ival) {
5761			ctl_set_invalid_field(ctsio,
5762			    /*sks_valid*/ 1, /*command*/ 1,
5763			    /*field*/ ctsio->cdb[0] == WRITE_SAME_10 ? 7 : 10,
5764			    /*bit_valid*/ 0, /*bit*/ 0);
5765			ctl_done((union ctl_io *)ctsio);
5766			return (CTL_RETVAL_COMPLETE);
5767		}
5768		if ((lun->be_lun->maxlba + 1) - lba > UINT32_MAX) {
5769			ctsio->io_hdr.flags |= CTL_FLAG_IO_CONT;
5770			ctsio->io_cont = ctl_write_same_cont;
5771			num_blocks = 1 << 31;
5772		} else
5773			num_blocks = (lun->be_lun->maxlba + 1) - lba;
5774	}
5775
5776	len = lun->be_lun->blocksize;
5777
5778	/*
5779	 * If we've got a kernel request that hasn't been malloced yet,
5780	 * malloc it and tell the caller the data buffer is here.
5781	 */
5782	if ((byte2 & SWS_NDOB) == 0 &&
5783	    (ctsio->io_hdr.flags & CTL_FLAG_ALLOCATED) == 0) {
5784		ctsio->kern_data_ptr = malloc(len, M_CTL, M_WAITOK);
5785		ctsio->kern_data_len = len;
5786		ctsio->kern_total_len = len;
5787		ctsio->kern_rel_offset = 0;
5788		ctsio->kern_sg_entries = 0;
5789		ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
5790		ctsio->be_move_done = ctl_config_move_done;
5791		ctl_datamove((union ctl_io *)ctsio);
5792
5793		return (CTL_RETVAL_COMPLETE);
5794	}
5795
5796	lbalen = (struct ctl_lba_len_flags *)&ctsio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
5797	lbalen->lba = lba;
5798	lbalen->len = num_blocks;
5799	lbalen->flags = byte2;
5800	retval = lun->backend->config_write((union ctl_io *)ctsio);
5801
5802	return (retval);
5803}
5804
5805int
5806ctl_unmap(struct ctl_scsiio *ctsio)
5807{
5808	struct ctl_lun *lun = CTL_LUN(ctsio);
5809	struct scsi_unmap *cdb;
5810	struct ctl_ptr_len_flags *ptrlen;
5811	struct scsi_unmap_header *hdr;
5812	struct scsi_unmap_desc *buf, *end, *endnz, *range;
5813	uint64_t lba;
5814	uint32_t num_blocks;
5815	int len, retval;
5816	uint8_t byte2;
5817
5818	CTL_DEBUG_PRINT(("ctl_unmap\n"));
5819
5820	cdb = (struct scsi_unmap *)ctsio->cdb;
5821	len = scsi_2btoul(cdb->length);
5822	byte2 = cdb->byte2;
5823
5824	/*
5825	 * If we've got a kernel request that hasn't been malloced yet,
5826	 * malloc it and tell the caller the data buffer is here.
5827	 */
5828	if ((ctsio->io_hdr.flags & CTL_FLAG_ALLOCATED) == 0) {
5829		ctsio->kern_data_ptr = malloc(len, M_CTL, M_WAITOK);
5830		ctsio->kern_data_len = len;
5831		ctsio->kern_total_len = len;
5832		ctsio->kern_rel_offset = 0;
5833		ctsio->kern_sg_entries = 0;
5834		ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
5835		ctsio->be_move_done = ctl_config_move_done;
5836		ctl_datamove((union ctl_io *)ctsio);
5837
5838		return (CTL_RETVAL_COMPLETE);
5839	}
5840
5841	len = ctsio->kern_total_len - ctsio->kern_data_resid;
5842	hdr = (struct scsi_unmap_header *)ctsio->kern_data_ptr;
5843	if (len < sizeof (*hdr) ||
5844	    len < (scsi_2btoul(hdr->length) + sizeof(hdr->length)) ||
5845	    len < (scsi_2btoul(hdr->desc_length) + sizeof (*hdr)) ||
5846	    scsi_2btoul(hdr->desc_length) % sizeof(*buf) != 0) {
5847		ctl_set_invalid_field(ctsio,
5848				      /*sks_valid*/ 0,
5849				      /*command*/ 0,
5850				      /*field*/ 0,
5851				      /*bit_valid*/ 0,
5852				      /*bit*/ 0);
5853		goto done;
5854	}
5855	len = scsi_2btoul(hdr->desc_length);
5856	buf = (struct scsi_unmap_desc *)(hdr + 1);
5857	end = buf + len / sizeof(*buf);
5858
5859	endnz = buf;
5860	for (range = buf; range < end; range++) {
5861		lba = scsi_8btou64(range->lba);
5862		num_blocks = scsi_4btoul(range->length);
5863		if (((lba + num_blocks) > (lun->be_lun->maxlba + 1))
5864		 || ((lba + num_blocks) < lba)) {
5865			ctl_set_lba_out_of_range(ctsio,
5866			    MAX(lba, lun->be_lun->maxlba + 1));
5867			ctl_done((union ctl_io *)ctsio);
5868			return (CTL_RETVAL_COMPLETE);
5869		}
5870		if (num_blocks != 0)
5871			endnz = range + 1;
5872	}
5873
5874	/*
5875	 * Block backend can not handle zero last range.
5876	 * Filter it out and return if there is nothing left.
5877	 */
5878	len = (uint8_t *)endnz - (uint8_t *)buf;
5879	if (len == 0) {
5880		ctl_set_success(ctsio);
5881		goto done;
5882	}
5883
5884	mtx_lock(&lun->lun_lock);
5885	ptrlen = (struct ctl_ptr_len_flags *)
5886	    &ctsio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
5887	ptrlen->ptr = (void *)buf;
5888	ptrlen->len = len;
5889	ptrlen->flags = byte2;
5890	ctl_try_unblock_others(lun, (union ctl_io *)ctsio, FALSE);
5891	mtx_unlock(&lun->lun_lock);
5892
5893	retval = lun->backend->config_write((union ctl_io *)ctsio);
5894	return (retval);
5895
5896done:
5897	if (ctsio->io_hdr.flags & CTL_FLAG_ALLOCATED) {
5898		free(ctsio->kern_data_ptr, M_CTL);
5899		ctsio->io_hdr.flags &= ~CTL_FLAG_ALLOCATED;
5900	}
5901	ctl_done((union ctl_io *)ctsio);
5902	return (CTL_RETVAL_COMPLETE);
5903}
5904
5905int
5906ctl_default_page_handler(struct ctl_scsiio *ctsio,
5907			 struct ctl_page_index *page_index, uint8_t *page_ptr)
5908{
5909	struct ctl_lun *lun = CTL_LUN(ctsio);
5910	uint8_t *current_cp;
5911	int set_ua;
5912	uint32_t initidx;
5913
5914	initidx = ctl_get_initindex(&ctsio->io_hdr.nexus);
5915	set_ua = 0;
5916
5917	current_cp = (page_index->page_data + (page_index->page_len *
5918	    CTL_PAGE_CURRENT));
5919
5920	mtx_lock(&lun->lun_lock);
5921	if (memcmp(current_cp, page_ptr, page_index->page_len)) {
5922		memcpy(current_cp, page_ptr, page_index->page_len);
5923		set_ua = 1;
5924	}
5925	if (set_ua != 0)
5926		ctl_est_ua_all(lun, initidx, CTL_UA_MODE_CHANGE);
5927	mtx_unlock(&lun->lun_lock);
5928	if (set_ua) {
5929		ctl_isc_announce_mode(lun,
5930		    ctl_get_initindex(&ctsio->io_hdr.nexus),
5931		    page_index->page_code, page_index->subpage);
5932	}
5933	return (CTL_RETVAL_COMPLETE);
5934}
5935
5936static void
5937ctl_ie_timer(void *arg)
5938{
5939	struct ctl_lun *lun = arg;
5940	uint64_t t;
5941
5942	if (lun->ie_asc == 0)
5943		return;
5944
5945	if (lun->MODE_IE.mrie == SIEP_MRIE_UA)
5946		ctl_est_ua_all(lun, -1, CTL_UA_IE);
5947	else
5948		lun->ie_reported = 0;
5949
5950	if (lun->ie_reportcnt < scsi_4btoul(lun->MODE_IE.report_count)) {
5951		lun->ie_reportcnt++;
5952		t = scsi_4btoul(lun->MODE_IE.interval_timer);
5953		if (t == 0 || t == UINT32_MAX)
5954			t = 3000;  /* 5 min */
5955		callout_schedule_sbt(&lun->ie_callout, SBT_1S / 10 * t,
5956		    SBT_1S / 10, 0);
5957	}
5958}
5959
5960int
5961ctl_ie_page_handler(struct ctl_scsiio *ctsio,
5962			 struct ctl_page_index *page_index, uint8_t *page_ptr)
5963{
5964	struct ctl_lun *lun = CTL_LUN(ctsio);
5965	struct scsi_info_exceptions_page *pg;
5966	uint64_t t;
5967
5968	(void)ctl_default_page_handler(ctsio, page_index, page_ptr);
5969
5970	pg = (struct scsi_info_exceptions_page *)page_ptr;
5971	mtx_lock(&lun->lun_lock);
5972	if (pg->info_flags & SIEP_FLAGS_TEST) {
5973		lun->ie_asc = 0x5d;
5974		lun->ie_ascq = 0xff;
5975		if (pg->mrie == SIEP_MRIE_UA) {
5976			ctl_est_ua_all(lun, -1, CTL_UA_IE);
5977			lun->ie_reported = 1;
5978		} else {
5979			ctl_clr_ua_all(lun, -1, CTL_UA_IE);
5980			lun->ie_reported = -1;
5981		}
5982		lun->ie_reportcnt = 1;
5983		if (lun->ie_reportcnt < scsi_4btoul(pg->report_count)) {
5984			lun->ie_reportcnt++;
5985			t = scsi_4btoul(pg->interval_timer);
5986			if (t == 0 || t == UINT32_MAX)
5987				t = 3000;  /* 5 min */
5988			callout_reset_sbt(&lun->ie_callout, SBT_1S / 10 * t,
5989			    SBT_1S / 10, ctl_ie_timer, lun, 0);
5990		}
5991	} else {
5992		lun->ie_asc = 0;
5993		lun->ie_ascq = 0;
5994		lun->ie_reported = 1;
5995		ctl_clr_ua_all(lun, -1, CTL_UA_IE);
5996		lun->ie_reportcnt = UINT32_MAX;
5997		callout_stop(&lun->ie_callout);
5998	}
5999	mtx_unlock(&lun->lun_lock);
6000	return (CTL_RETVAL_COMPLETE);
6001}
6002
6003static int
6004ctl_do_mode_select(union ctl_io *io)
6005{
6006	struct ctl_lun *lun = CTL_LUN(io);
6007	struct scsi_mode_page_header *page_header;
6008	struct ctl_page_index *page_index;
6009	struct ctl_scsiio *ctsio;
6010	int page_len, page_len_offset, page_len_size;
6011	union ctl_modepage_info *modepage_info;
6012	uint16_t *len_left, *len_used;
6013	int retval, i;
6014
6015	CTL_IO_ASSERT(io, SCSI);
6016
6017	ctsio = &io->scsiio;
6018	page_index = NULL;
6019	page_len = 0;
6020
6021	modepage_info = (union ctl_modepage_info *)
6022		ctsio->io_hdr.ctl_private[CTL_PRIV_MODEPAGE].bytes;
6023	len_left = &modepage_info->header.len_left;
6024	len_used = &modepage_info->header.len_used;
6025
6026do_next_page:
6027
6028	page_header = (struct scsi_mode_page_header *)
6029		(ctsio->kern_data_ptr + *len_used);
6030
6031	if (*len_left == 0) {
6032		free(ctsio->kern_data_ptr, M_CTL);
6033		ctl_set_success(ctsio);
6034		ctl_done((union ctl_io *)ctsio);
6035		return (CTL_RETVAL_COMPLETE);
6036	} else if (*len_left < sizeof(struct scsi_mode_page_header)) {
6037		free(ctsio->kern_data_ptr, M_CTL);
6038		ctl_set_param_len_error(ctsio);
6039		ctl_done((union ctl_io *)ctsio);
6040		return (CTL_RETVAL_COMPLETE);
6041
6042	} else if ((page_header->page_code & SMPH_SPF)
6043		&& (*len_left < sizeof(struct scsi_mode_page_header_sp))) {
6044		free(ctsio->kern_data_ptr, M_CTL);
6045		ctl_set_param_len_error(ctsio);
6046		ctl_done((union ctl_io *)ctsio);
6047		return (CTL_RETVAL_COMPLETE);
6048	}
6049
6050	/*
6051	 * XXX KDM should we do something with the block descriptor?
6052	 */
6053	for (i = 0; i < CTL_NUM_MODE_PAGES; i++) {
6054		page_index = &lun->mode_pages.index[i];
6055		if (lun->be_lun->lun_type == T_DIRECT &&
6056		    (page_index->page_flags & CTL_PAGE_FLAG_DIRECT) == 0)
6057			continue;
6058		if (lun->be_lun->lun_type == T_PROCESSOR &&
6059		    (page_index->page_flags & CTL_PAGE_FLAG_PROC) == 0)
6060			continue;
6061		if (lun->be_lun->lun_type == T_CDROM &&
6062		    (page_index->page_flags & CTL_PAGE_FLAG_CDROM) == 0)
6063			continue;
6064
6065		if ((page_index->page_code & SMPH_PC_MASK) !=
6066		    (page_header->page_code & SMPH_PC_MASK))
6067			continue;
6068
6069		/*
6070		 * If neither page has a subpage code, then we've got a
6071		 * match.
6072		 */
6073		if (((page_index->page_code & SMPH_SPF) == 0)
6074		 && ((page_header->page_code & SMPH_SPF) == 0)) {
6075			page_len = page_header->page_length;
6076			break;
6077		}
6078
6079		/*
6080		 * If both pages have subpages, then the subpage numbers
6081		 * have to match.
6082		 */
6083		if ((page_index->page_code & SMPH_SPF)
6084		  && (page_header->page_code & SMPH_SPF)) {
6085			struct scsi_mode_page_header_sp *sph;
6086
6087			sph = (struct scsi_mode_page_header_sp *)page_header;
6088			if (page_index->subpage == sph->subpage) {
6089				page_len = scsi_2btoul(sph->page_length);
6090				break;
6091			}
6092		}
6093	}
6094
6095	/*
6096	 * If we couldn't find the page, or if we don't have a mode select
6097	 * handler for it, send back an error to the user.
6098	 */
6099	if ((i >= CTL_NUM_MODE_PAGES)
6100	 || (page_index->select_handler == NULL)) {
6101		ctl_set_invalid_field(ctsio,
6102				      /*sks_valid*/ 1,
6103				      /*command*/ 0,
6104				      /*field*/ *len_used,
6105				      /*bit_valid*/ 0,
6106				      /*bit*/ 0);
6107		free(ctsio->kern_data_ptr, M_CTL);
6108		ctl_done((union ctl_io *)ctsio);
6109		return (CTL_RETVAL_COMPLETE);
6110	}
6111
6112	if (page_index->page_code & SMPH_SPF) {
6113		page_len_offset = 2;
6114		page_len_size = 2;
6115	} else {
6116		page_len_size = 1;
6117		page_len_offset = 1;
6118	}
6119
6120	/*
6121	 * If the length the initiator gives us isn't the one we specify in
6122	 * the mode page header, or if they didn't specify enough data in
6123	 * the CDB to avoid truncating this page, kick out the request.
6124	 */
6125	if (page_len != page_index->page_len - page_len_offset - page_len_size) {
6126		ctl_set_invalid_field(ctsio,
6127				      /*sks_valid*/ 1,
6128				      /*command*/ 0,
6129				      /*field*/ *len_used + page_len_offset,
6130				      /*bit_valid*/ 0,
6131				      /*bit*/ 0);
6132		free(ctsio->kern_data_ptr, M_CTL);
6133		ctl_done((union ctl_io *)ctsio);
6134		return (CTL_RETVAL_COMPLETE);
6135	}
6136	if (*len_left < page_index->page_len) {
6137		free(ctsio->kern_data_ptr, M_CTL);
6138		ctl_set_param_len_error(ctsio);
6139		ctl_done((union ctl_io *)ctsio);
6140		return (CTL_RETVAL_COMPLETE);
6141	}
6142
6143	/*
6144	 * Run through the mode page, checking to make sure that the bits
6145	 * the user changed are actually legal for him to change.
6146	 */
6147	for (i = 0; i < page_index->page_len; i++) {
6148		uint8_t *user_byte, *change_mask, *current_byte;
6149		int bad_bit;
6150		int j;
6151
6152		user_byte = (uint8_t *)page_header + i;
6153		change_mask = page_index->page_data +
6154			      (page_index->page_len * CTL_PAGE_CHANGEABLE) + i;
6155		current_byte = page_index->page_data +
6156			       (page_index->page_len * CTL_PAGE_CURRENT) + i;
6157
6158		/*
6159		 * Check to see whether the user set any bits in this byte
6160		 * that he is not allowed to set.
6161		 */
6162		if ((*user_byte & ~(*change_mask)) ==
6163		    (*current_byte & ~(*change_mask)))
6164			continue;
6165
6166		/*
6167		 * Go through bit by bit to determine which one is illegal.
6168		 */
6169		bad_bit = 0;
6170		for (j = 7; j >= 0; j--) {
6171			if ((((1 << i) & ~(*change_mask)) & *user_byte) !=
6172			    (((1 << i) & ~(*change_mask)) & *current_byte)) {
6173				bad_bit = i;
6174				break;
6175			}
6176		}
6177		ctl_set_invalid_field(ctsio,
6178				      /*sks_valid*/ 1,
6179				      /*command*/ 0,
6180				      /*field*/ *len_used + i,
6181				      /*bit_valid*/ 1,
6182				      /*bit*/ bad_bit);
6183		free(ctsio->kern_data_ptr, M_CTL);
6184		ctl_done((union ctl_io *)ctsio);
6185		return (CTL_RETVAL_COMPLETE);
6186	}
6187
6188	/*
6189	 * Decrement these before we call the page handler, since we may
6190	 * end up getting called back one way or another before the handler
6191	 * returns to this context.
6192	 */
6193	*len_left -= page_index->page_len;
6194	*len_used += page_index->page_len;
6195
6196	retval = page_index->select_handler(ctsio, page_index,
6197					    (uint8_t *)page_header);
6198
6199	/*
6200	 * If the page handler returns CTL_RETVAL_QUEUED, then we need to
6201	 * wait until this queued command completes to finish processing
6202	 * the mode page.  If it returns anything other than
6203	 * CTL_RETVAL_COMPLETE (e.g. CTL_RETVAL_ERROR), then it should have
6204	 * already set the sense information, freed the data pointer, and
6205	 * completed the io for us.
6206	 */
6207	if (retval != CTL_RETVAL_COMPLETE)
6208		goto bailout_no_done;
6209
6210	/*
6211	 * If the initiator sent us more than one page, parse the next one.
6212	 */
6213	if (*len_left > 0)
6214		goto do_next_page;
6215
6216	ctl_set_success(ctsio);
6217	free(ctsio->kern_data_ptr, M_CTL);
6218	ctl_done((union ctl_io *)ctsio);
6219
6220bailout_no_done:
6221
6222	return (CTL_RETVAL_COMPLETE);
6223
6224}
6225
6226int
6227ctl_mode_select(struct ctl_scsiio *ctsio)
6228{
6229	struct ctl_lun *lun = CTL_LUN(ctsio);
6230	union ctl_modepage_info *modepage_info;
6231	int bd_len, i, header_size, param_len, rtd;
6232	uint32_t initidx;
6233
6234	initidx = ctl_get_initindex(&ctsio->io_hdr.nexus);
6235	switch (ctsio->cdb[0]) {
6236	case MODE_SELECT_6: {
6237		struct scsi_mode_select_6 *cdb;
6238
6239		cdb = (struct scsi_mode_select_6 *)ctsio->cdb;
6240
6241		rtd = (cdb->byte2 & SMS_RTD) ? 1 : 0;
6242		param_len = cdb->length;
6243		header_size = sizeof(struct scsi_mode_header_6);
6244		break;
6245	}
6246	case MODE_SELECT_10: {
6247		struct scsi_mode_select_10 *cdb;
6248
6249		cdb = (struct scsi_mode_select_10 *)ctsio->cdb;
6250
6251		rtd = (cdb->byte2 & SMS_RTD) ? 1 : 0;
6252		param_len = scsi_2btoul(cdb->length);
6253		header_size = sizeof(struct scsi_mode_header_10);
6254		break;
6255	}
6256	default:
6257		ctl_set_invalid_opcode(ctsio);
6258		ctl_done((union ctl_io *)ctsio);
6259		return (CTL_RETVAL_COMPLETE);
6260	}
6261
6262	if (rtd) {
6263		if (param_len != 0) {
6264			ctl_set_invalid_field(ctsio, /*sks_valid*/ 0,
6265			    /*command*/ 1, /*field*/ 0,
6266			    /*bit_valid*/ 0, /*bit*/ 0);
6267			ctl_done((union ctl_io *)ctsio);
6268			return (CTL_RETVAL_COMPLETE);
6269		}
6270
6271		/* Revert to defaults. */
6272		ctl_init_page_index(lun);
6273		mtx_lock(&lun->lun_lock);
6274		ctl_est_ua_all(lun, initidx, CTL_UA_MODE_CHANGE);
6275		mtx_unlock(&lun->lun_lock);
6276		for (i = 0; i < CTL_NUM_MODE_PAGES; i++) {
6277			ctl_isc_announce_mode(lun, -1,
6278			    lun->mode_pages.index[i].page_code & SMPH_PC_MASK,
6279			    lun->mode_pages.index[i].subpage);
6280		}
6281		ctl_set_success(ctsio);
6282		ctl_done((union ctl_io *)ctsio);
6283		return (CTL_RETVAL_COMPLETE);
6284	}
6285
6286	/*
6287	 * From SPC-3:
6288	 * "A parameter list length of zero indicates that the Data-Out Buffer
6289	 * shall be empty. This condition shall not be considered as an error."
6290	 */
6291	if (param_len == 0) {
6292		ctl_set_success(ctsio);
6293		ctl_done((union ctl_io *)ctsio);
6294		return (CTL_RETVAL_COMPLETE);
6295	}
6296
6297	/*
6298	 * Since we'll hit this the first time through, prior to
6299	 * allocation, we don't need to free a data buffer here.
6300	 */
6301	if (param_len < header_size) {
6302		ctl_set_param_len_error(ctsio);
6303		ctl_done((union ctl_io *)ctsio);
6304		return (CTL_RETVAL_COMPLETE);
6305	}
6306
6307	/*
6308	 * Allocate the data buffer and grab the user's data.  In theory,
6309	 * we shouldn't have to sanity check the parameter list length here
6310	 * because the maximum size is 64K.  We should be able to malloc
6311	 * that much without too many problems.
6312	 */
6313	if ((ctsio->io_hdr.flags & CTL_FLAG_ALLOCATED) == 0) {
6314		ctsio->kern_data_ptr = malloc(param_len, M_CTL, M_WAITOK);
6315		ctsio->kern_data_len = param_len;
6316		ctsio->kern_total_len = param_len;
6317		ctsio->kern_rel_offset = 0;
6318		ctsio->kern_sg_entries = 0;
6319		ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
6320		ctsio->be_move_done = ctl_config_move_done;
6321		ctl_datamove((union ctl_io *)ctsio);
6322
6323		return (CTL_RETVAL_COMPLETE);
6324	}
6325
6326	switch (ctsio->cdb[0]) {
6327	case MODE_SELECT_6: {
6328		struct scsi_mode_header_6 *mh6;
6329
6330		mh6 = (struct scsi_mode_header_6 *)ctsio->kern_data_ptr;
6331		bd_len = mh6->blk_desc_len;
6332		break;
6333	}
6334	case MODE_SELECT_10: {
6335		struct scsi_mode_header_10 *mh10;
6336
6337		mh10 = (struct scsi_mode_header_10 *)ctsio->kern_data_ptr;
6338		bd_len = scsi_2btoul(mh10->blk_desc_len);
6339		break;
6340	}
6341	default:
6342		panic("%s: Invalid CDB type %#x", __func__, ctsio->cdb[0]);
6343	}
6344
6345	if (param_len < (header_size + bd_len)) {
6346		free(ctsio->kern_data_ptr, M_CTL);
6347		ctl_set_param_len_error(ctsio);
6348		ctl_done((union ctl_io *)ctsio);
6349		return (CTL_RETVAL_COMPLETE);
6350	}
6351
6352	/*
6353	 * Set the IO_CONT flag, so that if this I/O gets passed to
6354	 * ctl_config_write_done(), it'll get passed back to
6355	 * ctl_do_mode_select() for further processing, or completion if
6356	 * we're all done.
6357	 */
6358	ctsio->io_hdr.flags |= CTL_FLAG_IO_CONT;
6359	ctsio->io_cont = ctl_do_mode_select;
6360
6361	modepage_info = (union ctl_modepage_info *)
6362		ctsio->io_hdr.ctl_private[CTL_PRIV_MODEPAGE].bytes;
6363	memset(modepage_info, 0, sizeof(*modepage_info));
6364	modepage_info->header.len_left = param_len - header_size - bd_len;
6365	modepage_info->header.len_used = header_size + bd_len;
6366
6367	return (ctl_do_mode_select((union ctl_io *)ctsio));
6368}
6369
6370int
6371ctl_mode_sense(struct ctl_scsiio *ctsio)
6372{
6373	struct ctl_lun *lun = CTL_LUN(ctsio);
6374	int pc, page_code, llba, subpage;
6375	int alloc_len, page_len, header_len, bd_len, total_len;
6376	void *block_desc;
6377	struct ctl_page_index *page_index;
6378
6379	llba = 0;
6380
6381	CTL_DEBUG_PRINT(("ctl_mode_sense\n"));
6382
6383	switch (ctsio->cdb[0]) {
6384	case MODE_SENSE_6: {
6385		struct scsi_mode_sense_6 *cdb;
6386
6387		cdb = (struct scsi_mode_sense_6 *)ctsio->cdb;
6388
6389		header_len = sizeof(struct scsi_mode_hdr_6);
6390		if (cdb->byte2 & SMS_DBD)
6391			bd_len = 0;
6392		else
6393			bd_len = sizeof(struct scsi_mode_block_descr);
6394		header_len += bd_len;
6395
6396		pc = (cdb->page & SMS_PAGE_CTRL_MASK) >> 6;
6397		page_code = cdb->page & SMS_PAGE_CODE;
6398		subpage = cdb->subpage;
6399		alloc_len = cdb->length;
6400		break;
6401	}
6402	case MODE_SENSE_10: {
6403		struct scsi_mode_sense_10 *cdb;
6404
6405		cdb = (struct scsi_mode_sense_10 *)ctsio->cdb;
6406
6407		header_len = sizeof(struct scsi_mode_hdr_10);
6408		if (cdb->byte2 & SMS_DBD) {
6409			bd_len = 0;
6410		} else if (lun->be_lun->lun_type == T_DIRECT) {
6411			if (cdb->byte2 & SMS10_LLBAA) {
6412				llba = 1;
6413				bd_len = sizeof(struct scsi_mode_block_descr_dlong);
6414			} else
6415				bd_len = sizeof(struct scsi_mode_block_descr_dshort);
6416		} else
6417			bd_len = sizeof(struct scsi_mode_block_descr);
6418		header_len += bd_len;
6419
6420		pc = (cdb->page & SMS_PAGE_CTRL_MASK) >> 6;
6421		page_code = cdb->page & SMS_PAGE_CODE;
6422		subpage = cdb->subpage;
6423		alloc_len = scsi_2btoul(cdb->length);
6424		break;
6425	}
6426	default:
6427		ctl_set_invalid_opcode(ctsio);
6428		ctl_done((union ctl_io *)ctsio);
6429		return (CTL_RETVAL_COMPLETE);
6430		break; /* NOTREACHED */
6431	}
6432
6433	/*
6434	 * We have to make a first pass through to calculate the size of
6435	 * the pages that match the user's query.  Then we allocate enough
6436	 * memory to hold it, and actually copy the data into the buffer.
6437	 */
6438	switch (page_code) {
6439	case SMS_ALL_PAGES_PAGE: {
6440		u_int i;
6441
6442		page_len = 0;
6443
6444		/*
6445		 * At the moment, values other than 0 and 0xff here are
6446		 * reserved according to SPC-3.
6447		 */
6448		if ((subpage != SMS_SUBPAGE_PAGE_0)
6449		 && (subpage != SMS_SUBPAGE_ALL)) {
6450			ctl_set_invalid_field(ctsio,
6451					      /*sks_valid*/ 1,
6452					      /*command*/ 1,
6453					      /*field*/ 3,
6454					      /*bit_valid*/ 0,
6455					      /*bit*/ 0);
6456			ctl_done((union ctl_io *)ctsio);
6457			return (CTL_RETVAL_COMPLETE);
6458		}
6459
6460		for (i = 0; i < CTL_NUM_MODE_PAGES; i++) {
6461			page_index = &lun->mode_pages.index[i];
6462
6463			/* Make sure the page is supported for this dev type */
6464			if (lun->be_lun->lun_type == T_DIRECT &&
6465			    (page_index->page_flags & CTL_PAGE_FLAG_DIRECT) == 0)
6466				continue;
6467			if (lun->be_lun->lun_type == T_PROCESSOR &&
6468			    (page_index->page_flags & CTL_PAGE_FLAG_PROC) == 0)
6469				continue;
6470			if (lun->be_lun->lun_type == T_CDROM &&
6471			    (page_index->page_flags & CTL_PAGE_FLAG_CDROM) == 0)
6472				continue;
6473
6474			/*
6475			 * We don't use this subpage if the user didn't
6476			 * request all subpages.
6477			 */
6478			if ((page_index->subpage != 0)
6479			 && (subpage == SMS_SUBPAGE_PAGE_0))
6480				continue;
6481
6482			page_len += page_index->page_len;
6483		}
6484		break;
6485	}
6486	default: {
6487		u_int i;
6488
6489		page_len = 0;
6490
6491		for (i = 0; i < CTL_NUM_MODE_PAGES; i++) {
6492			page_index = &lun->mode_pages.index[i];
6493
6494			/* Make sure the page is supported for this dev type */
6495			if (lun->be_lun->lun_type == T_DIRECT &&
6496			    (page_index->page_flags & CTL_PAGE_FLAG_DIRECT) == 0)
6497				continue;
6498			if (lun->be_lun->lun_type == T_PROCESSOR &&
6499			    (page_index->page_flags & CTL_PAGE_FLAG_PROC) == 0)
6500				continue;
6501			if (lun->be_lun->lun_type == T_CDROM &&
6502			    (page_index->page_flags & CTL_PAGE_FLAG_CDROM) == 0)
6503				continue;
6504
6505			/* Look for the right page code */
6506			if ((page_index->page_code & SMPH_PC_MASK) != page_code)
6507				continue;
6508
6509			/* Look for the right subpage or the subpage wildcard*/
6510			if ((page_index->subpage != subpage)
6511			 && (subpage != SMS_SUBPAGE_ALL))
6512				continue;
6513
6514			page_len += page_index->page_len;
6515		}
6516
6517		if (page_len == 0) {
6518			ctl_set_invalid_field(ctsio,
6519					      /*sks_valid*/ 1,
6520					      /*command*/ 1,
6521					      /*field*/ 2,
6522					      /*bit_valid*/ 1,
6523					      /*bit*/ 5);
6524			ctl_done((union ctl_io *)ctsio);
6525			return (CTL_RETVAL_COMPLETE);
6526		}
6527		break;
6528	}
6529	}
6530
6531	total_len = header_len + page_len;
6532
6533	ctsio->kern_data_ptr = malloc(total_len, M_CTL, M_WAITOK | M_ZERO);
6534	ctsio->kern_sg_entries = 0;
6535	ctsio->kern_rel_offset = 0;
6536	ctsio->kern_data_len = min(total_len, alloc_len);
6537	ctsio->kern_total_len = ctsio->kern_data_len;
6538
6539	switch (ctsio->cdb[0]) {
6540	case MODE_SENSE_6: {
6541		struct scsi_mode_hdr_6 *header;
6542
6543		header = (struct scsi_mode_hdr_6 *)ctsio->kern_data_ptr;
6544
6545		header->datalen = MIN(total_len - 1, 254);
6546		if (lun->be_lun->lun_type == T_DIRECT) {
6547			header->dev_specific = 0x10; /* DPOFUA */
6548			if ((lun->be_lun->flags & CTL_LUN_FLAG_READONLY) ||
6549			    (lun->MODE_CTRL.eca_and_aen & SCP_SWP) != 0)
6550				header->dev_specific |= 0x80; /* WP */
6551		}
6552		header->block_descr_len = bd_len;
6553		block_desc = &header[1];
6554		break;
6555	}
6556	case MODE_SENSE_10: {
6557		struct scsi_mode_hdr_10 *header;
6558		int datalen;
6559
6560		header = (struct scsi_mode_hdr_10 *)ctsio->kern_data_ptr;
6561
6562		datalen = MIN(total_len - 2, 65533);
6563		scsi_ulto2b(datalen, header->datalen);
6564		if (lun->be_lun->lun_type == T_DIRECT) {
6565			header->dev_specific = 0x10; /* DPOFUA */
6566			if ((lun->be_lun->flags & CTL_LUN_FLAG_READONLY) ||
6567			    (lun->MODE_CTRL.eca_and_aen & SCP_SWP) != 0)
6568				header->dev_specific |= 0x80; /* WP */
6569		}
6570		if (llba)
6571			header->flags |= SMH_LONGLBA;
6572		scsi_ulto2b(bd_len, header->block_descr_len);
6573		block_desc = &header[1];
6574		break;
6575	}
6576	default:
6577		panic("%s: Invalid CDB type %#x", __func__, ctsio->cdb[0]);
6578	}
6579
6580	/*
6581	 * If we've got a disk, use its blocksize in the block
6582	 * descriptor.  Otherwise, just set it to 0.
6583	 */
6584	if (bd_len > 0) {
6585		if (lun->be_lun->lun_type == T_DIRECT) {
6586			if (llba) {
6587				struct scsi_mode_block_descr_dlong *bd = block_desc;
6588				if (lun->be_lun->maxlba != 0)
6589					scsi_u64to8b(lun->be_lun->maxlba + 1,
6590					    bd->num_blocks);
6591				scsi_ulto4b(lun->be_lun->blocksize,
6592				    bd->block_len);
6593			} else {
6594				struct scsi_mode_block_descr_dshort *bd = block_desc;
6595				if (lun->be_lun->maxlba != 0)
6596					scsi_ulto4b(MIN(lun->be_lun->maxlba+1,
6597					    UINT32_MAX), bd->num_blocks);
6598				scsi_ulto3b(lun->be_lun->blocksize,
6599				    bd->block_len);
6600			}
6601		} else {
6602			struct scsi_mode_block_descr *bd = block_desc;
6603			scsi_ulto3b(0, bd->block_len);
6604		}
6605	}
6606
6607	switch (page_code) {
6608	case SMS_ALL_PAGES_PAGE: {
6609		int i, data_used;
6610
6611		data_used = header_len;
6612		for (i = 0; i < CTL_NUM_MODE_PAGES; i++) {
6613			struct ctl_page_index *page_index;
6614
6615			page_index = &lun->mode_pages.index[i];
6616			if (lun->be_lun->lun_type == T_DIRECT &&
6617			    (page_index->page_flags & CTL_PAGE_FLAG_DIRECT) == 0)
6618				continue;
6619			if (lun->be_lun->lun_type == T_PROCESSOR &&
6620			    (page_index->page_flags & CTL_PAGE_FLAG_PROC) == 0)
6621				continue;
6622			if (lun->be_lun->lun_type == T_CDROM &&
6623			    (page_index->page_flags & CTL_PAGE_FLAG_CDROM) == 0)
6624				continue;
6625
6626			/*
6627			 * We don't use this subpage if the user didn't
6628			 * request all subpages.  We already checked (above)
6629			 * to make sure the user only specified a subpage
6630			 * of 0 or 0xff in the SMS_ALL_PAGES_PAGE case.
6631			 */
6632			if ((page_index->subpage != 0)
6633			 && (subpage == SMS_SUBPAGE_PAGE_0))
6634				continue;
6635
6636			/*
6637			 * Call the handler, if it exists, to update the
6638			 * page to the latest values.
6639			 */
6640			if (page_index->sense_handler != NULL)
6641				page_index->sense_handler(ctsio, page_index,pc);
6642
6643			memcpy(ctsio->kern_data_ptr + data_used,
6644			       page_index->page_data +
6645			       (page_index->page_len * pc),
6646			       page_index->page_len);
6647			data_used += page_index->page_len;
6648		}
6649		break;
6650	}
6651	default: {
6652		int i, data_used;
6653
6654		data_used = header_len;
6655
6656		for (i = 0; i < CTL_NUM_MODE_PAGES; i++) {
6657			struct ctl_page_index *page_index;
6658
6659			page_index = &lun->mode_pages.index[i];
6660
6661			/* Look for the right page code */
6662			if ((page_index->page_code & SMPH_PC_MASK) != page_code)
6663				continue;
6664
6665			/* Look for the right subpage or the subpage wildcard*/
6666			if ((page_index->subpage != subpage)
6667			 && (subpage != SMS_SUBPAGE_ALL))
6668				continue;
6669
6670			/* Make sure the page is supported for this dev type */
6671			if (lun->be_lun->lun_type == T_DIRECT &&
6672			    (page_index->page_flags & CTL_PAGE_FLAG_DIRECT) == 0)
6673				continue;
6674			if (lun->be_lun->lun_type == T_PROCESSOR &&
6675			    (page_index->page_flags & CTL_PAGE_FLAG_PROC) == 0)
6676				continue;
6677			if (lun->be_lun->lun_type == T_CDROM &&
6678			    (page_index->page_flags & CTL_PAGE_FLAG_CDROM) == 0)
6679				continue;
6680
6681			/*
6682			 * Call the handler, if it exists, to update the
6683			 * page to the latest values.
6684			 */
6685			if (page_index->sense_handler != NULL)
6686				page_index->sense_handler(ctsio, page_index,pc);
6687
6688			memcpy(ctsio->kern_data_ptr + data_used,
6689			       page_index->page_data +
6690			       (page_index->page_len * pc),
6691			       page_index->page_len);
6692			data_used += page_index->page_len;
6693		}
6694		break;
6695	}
6696	}
6697
6698	ctl_set_success(ctsio);
6699	ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
6700	ctsio->be_move_done = ctl_config_move_done;
6701	ctl_datamove((union ctl_io *)ctsio);
6702	return (CTL_RETVAL_COMPLETE);
6703}
6704
6705int
6706ctl_temp_log_sense_handler(struct ctl_scsiio *ctsio,
6707			       struct ctl_page_index *page_index,
6708			       int pc)
6709{
6710	struct ctl_lun *lun = CTL_LUN(ctsio);
6711	struct scsi_log_temperature *data;
6712	const char *value;
6713
6714	data = (struct scsi_log_temperature *)page_index->page_data;
6715
6716	scsi_ulto2b(SLP_TEMPERATURE, data->hdr.param_code);
6717	data->hdr.param_control = SLP_LBIN;
6718	data->hdr.param_len = sizeof(struct scsi_log_temperature) -
6719	    sizeof(struct scsi_log_param_header);
6720	if ((value = dnvlist_get_string(lun->be_lun->options, "temperature",
6721	    NULL)) != NULL)
6722		data->temperature = strtol(value, NULL, 0);
6723	else
6724		data->temperature = 0xff;
6725	data++;
6726
6727	scsi_ulto2b(SLP_REFTEMPERATURE, data->hdr.param_code);
6728	data->hdr.param_control = SLP_LBIN;
6729	data->hdr.param_len = sizeof(struct scsi_log_temperature) -
6730	    sizeof(struct scsi_log_param_header);
6731	if ((value = dnvlist_get_string(lun->be_lun->options, "reftemperature",
6732	    NULL)) != NULL)
6733		data->temperature = strtol(value, NULL, 0);
6734	else
6735		data->temperature = 0xff;
6736	return (0);
6737}
6738
6739int
6740ctl_lbp_log_sense_handler(struct ctl_scsiio *ctsio,
6741			       struct ctl_page_index *page_index,
6742			       int pc)
6743{
6744	struct ctl_lun *lun = CTL_LUN(ctsio);
6745	struct scsi_log_param_header *phdr;
6746	uint8_t *data;
6747	uint64_t val;
6748
6749	data = page_index->page_data;
6750
6751	if (lun->backend->lun_attr != NULL &&
6752	    (val = lun->backend->lun_attr(lun->be_lun, "blocksavail"))
6753	     != UINT64_MAX) {
6754		phdr = (struct scsi_log_param_header *)data;
6755		scsi_ulto2b(0x0001, phdr->param_code);
6756		phdr->param_control = SLP_LBIN | SLP_LP;
6757		phdr->param_len = 8;
6758		data = (uint8_t *)(phdr + 1);
6759		scsi_ulto4b(val >> CTL_LBP_EXPONENT, data);
6760		data[4] = 0x02; /* per-pool */
6761		data += phdr->param_len;
6762	}
6763
6764	if (lun->backend->lun_attr != NULL &&
6765	    (val = lun->backend->lun_attr(lun->be_lun, "blocksused"))
6766	     != UINT64_MAX) {
6767		phdr = (struct scsi_log_param_header *)data;
6768		scsi_ulto2b(0x0002, phdr->param_code);
6769		phdr->param_control = SLP_LBIN | SLP_LP;
6770		phdr->param_len = 8;
6771		data = (uint8_t *)(phdr + 1);
6772		scsi_ulto4b(val >> CTL_LBP_EXPONENT, data);
6773		data[4] = 0x01; /* per-LUN */
6774		data += phdr->param_len;
6775	}
6776
6777	if (lun->backend->lun_attr != NULL &&
6778	    (val = lun->backend->lun_attr(lun->be_lun, "poolblocksavail"))
6779	     != UINT64_MAX) {
6780		phdr = (struct scsi_log_param_header *)data;
6781		scsi_ulto2b(0x00f1, phdr->param_code);
6782		phdr->param_control = SLP_LBIN | SLP_LP;
6783		phdr->param_len = 8;
6784		data = (uint8_t *)(phdr + 1);
6785		scsi_ulto4b(val >> CTL_LBP_EXPONENT, data);
6786		data[4] = 0x02; /* per-pool */
6787		data += phdr->param_len;
6788	}
6789
6790	if (lun->backend->lun_attr != NULL &&
6791	    (val = lun->backend->lun_attr(lun->be_lun, "poolblocksused"))
6792	     != UINT64_MAX) {
6793		phdr = (struct scsi_log_param_header *)data;
6794		scsi_ulto2b(0x00f2, phdr->param_code);
6795		phdr->param_control = SLP_LBIN | SLP_LP;
6796		phdr->param_len = 8;
6797		data = (uint8_t *)(phdr + 1);
6798		scsi_ulto4b(val >> CTL_LBP_EXPONENT, data);
6799		data[4] = 0x02; /* per-pool */
6800		data += phdr->param_len;
6801	}
6802
6803	page_index->page_len = data - page_index->page_data;
6804	return (0);
6805}
6806
6807int
6808ctl_sap_log_sense_handler(struct ctl_scsiio *ctsio,
6809			       struct ctl_page_index *page_index,
6810			       int pc)
6811{
6812	struct ctl_lun *lun = CTL_LUN(ctsio);
6813	struct stat_page *data;
6814	struct bintime *t;
6815
6816	data = (struct stat_page *)page_index->page_data;
6817
6818	scsi_ulto2b(SLP_SAP, data->sap.hdr.param_code);
6819	data->sap.hdr.param_control = SLP_LBIN;
6820	data->sap.hdr.param_len = sizeof(struct scsi_log_stat_and_perf) -
6821	    sizeof(struct scsi_log_param_header);
6822	scsi_u64to8b(lun->stats.operations[CTL_STATS_READ],
6823	    data->sap.read_num);
6824	scsi_u64to8b(lun->stats.operations[CTL_STATS_WRITE],
6825	    data->sap.write_num);
6826	if (lun->be_lun->blocksize > 0) {
6827		scsi_u64to8b(lun->stats.bytes[CTL_STATS_WRITE] /
6828		    lun->be_lun->blocksize, data->sap.recvieved_lba);
6829		scsi_u64to8b(lun->stats.bytes[CTL_STATS_READ] /
6830		    lun->be_lun->blocksize, data->sap.transmitted_lba);
6831	}
6832	t = &lun->stats.time[CTL_STATS_READ];
6833	scsi_u64to8b((uint64_t)t->sec * 1000 + t->frac / (UINT64_MAX / 1000),
6834	    data->sap.read_int);
6835	t = &lun->stats.time[CTL_STATS_WRITE];
6836	scsi_u64to8b((uint64_t)t->sec * 1000 + t->frac / (UINT64_MAX / 1000),
6837	    data->sap.write_int);
6838	scsi_u64to8b(0, data->sap.weighted_num);
6839	scsi_u64to8b(0, data->sap.weighted_int);
6840	scsi_ulto2b(SLP_IT, data->it.hdr.param_code);
6841	data->it.hdr.param_control = SLP_LBIN;
6842	data->it.hdr.param_len = sizeof(struct scsi_log_idle_time) -
6843	    sizeof(struct scsi_log_param_header);
6844#ifdef CTL_TIME_IO
6845	scsi_u64to8b(lun->idle_time / SBT_1MS, data->it.idle_int);
6846#endif
6847	scsi_ulto2b(SLP_TI, data->ti.hdr.param_code);
6848	data->it.hdr.param_control = SLP_LBIN;
6849	data->ti.hdr.param_len = sizeof(struct scsi_log_time_interval) -
6850	    sizeof(struct scsi_log_param_header);
6851	scsi_ulto4b(3, data->ti.exponent);
6852	scsi_ulto4b(1, data->ti.integer);
6853	return (0);
6854}
6855
6856int
6857ctl_ie_log_sense_handler(struct ctl_scsiio *ctsio,
6858			       struct ctl_page_index *page_index,
6859			       int pc)
6860{
6861	struct ctl_lun *lun = CTL_LUN(ctsio);
6862	struct scsi_log_informational_exceptions *data;
6863	const char *value;
6864
6865	data = (struct scsi_log_informational_exceptions *)page_index->page_data;
6866
6867	scsi_ulto2b(SLP_IE_GEN, data->hdr.param_code);
6868	data->hdr.param_control = SLP_LBIN;
6869	data->hdr.param_len = sizeof(struct scsi_log_informational_exceptions) -
6870	    sizeof(struct scsi_log_param_header);
6871	data->ie_asc = lun->ie_asc;
6872	data->ie_ascq = lun->ie_ascq;
6873	if ((value = dnvlist_get_string(lun->be_lun->options, "temperature",
6874	    NULL)) != NULL)
6875		data->temperature = strtol(value, NULL, 0);
6876	else
6877		data->temperature = 0xff;
6878	return (0);
6879}
6880
6881int
6882ctl_log_sense(struct ctl_scsiio *ctsio)
6883{
6884	struct ctl_lun *lun = CTL_LUN(ctsio);
6885	int i, pc, page_code, subpage;
6886	int alloc_len, total_len;
6887	struct ctl_page_index *page_index;
6888	struct scsi_log_sense *cdb;
6889	struct scsi_log_header *header;
6890
6891	CTL_DEBUG_PRINT(("ctl_log_sense\n"));
6892
6893	cdb = (struct scsi_log_sense *)ctsio->cdb;
6894	pc = (cdb->page & SLS_PAGE_CTRL_MASK) >> 6;
6895	page_code = cdb->page & SLS_PAGE_CODE;
6896	subpage = cdb->subpage;
6897	alloc_len = scsi_2btoul(cdb->length);
6898
6899	page_index = NULL;
6900	for (i = 0; i < CTL_NUM_LOG_PAGES; i++) {
6901		page_index = &lun->log_pages.index[i];
6902
6903		/* Look for the right page code */
6904		if ((page_index->page_code & SL_PAGE_CODE) != page_code)
6905			continue;
6906
6907		/* Look for the right subpage or the subpage wildcard*/
6908		if (page_index->subpage != subpage)
6909			continue;
6910
6911		break;
6912	}
6913	if (i >= CTL_NUM_LOG_PAGES) {
6914		ctl_set_invalid_field(ctsio,
6915				      /*sks_valid*/ 1,
6916				      /*command*/ 1,
6917				      /*field*/ 2,
6918				      /*bit_valid*/ 0,
6919				      /*bit*/ 0);
6920		ctl_done((union ctl_io *)ctsio);
6921		return (CTL_RETVAL_COMPLETE);
6922	}
6923
6924	total_len = sizeof(struct scsi_log_header) + page_index->page_len;
6925
6926	ctsio->kern_data_ptr = malloc(total_len, M_CTL, M_WAITOK | M_ZERO);
6927	ctsio->kern_sg_entries = 0;
6928	ctsio->kern_rel_offset = 0;
6929	ctsio->kern_data_len = min(total_len, alloc_len);
6930	ctsio->kern_total_len = ctsio->kern_data_len;
6931
6932	header = (struct scsi_log_header *)ctsio->kern_data_ptr;
6933	header->page = page_index->page_code;
6934	if (page_index->page_code == SLS_LOGICAL_BLOCK_PROVISIONING)
6935		header->page |= SL_DS;
6936	if (page_index->subpage) {
6937		header->page |= SL_SPF;
6938		header->subpage = page_index->subpage;
6939	}
6940	scsi_ulto2b(page_index->page_len, header->datalen);
6941
6942	/*
6943	 * Call the handler, if it exists, to update the
6944	 * page to the latest values.
6945	 */
6946	if (page_index->sense_handler != NULL)
6947		page_index->sense_handler(ctsio, page_index, pc);
6948
6949	memcpy(header + 1, page_index->page_data, page_index->page_len);
6950
6951	ctl_set_success(ctsio);
6952	ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
6953	ctsio->be_move_done = ctl_config_move_done;
6954	ctl_datamove((union ctl_io *)ctsio);
6955	return (CTL_RETVAL_COMPLETE);
6956}
6957
6958int
6959ctl_read_capacity(struct ctl_scsiio *ctsio)
6960{
6961	struct ctl_lun *lun = CTL_LUN(ctsio);
6962	struct scsi_read_capacity *cdb;
6963	struct scsi_read_capacity_data *data;
6964	uint32_t lba;
6965
6966	CTL_DEBUG_PRINT(("ctl_read_capacity\n"));
6967
6968	cdb = (struct scsi_read_capacity *)ctsio->cdb;
6969
6970	lba = scsi_4btoul(cdb->addr);
6971	if (((cdb->pmi & SRC_PMI) == 0)
6972	 && (lba != 0)) {
6973		ctl_set_invalid_field(/*ctsio*/ ctsio,
6974				      /*sks_valid*/ 1,
6975				      /*command*/ 1,
6976				      /*field*/ 2,
6977				      /*bit_valid*/ 0,
6978				      /*bit*/ 0);
6979		ctl_done((union ctl_io *)ctsio);
6980		return (CTL_RETVAL_COMPLETE);
6981	}
6982
6983	ctsio->kern_data_ptr = malloc(sizeof(*data), M_CTL, M_WAITOK | M_ZERO);
6984	data = (struct scsi_read_capacity_data *)ctsio->kern_data_ptr;
6985	ctsio->kern_data_len = sizeof(*data);
6986	ctsio->kern_total_len = sizeof(*data);
6987	ctsio->kern_rel_offset = 0;
6988	ctsio->kern_sg_entries = 0;
6989
6990	/*
6991	 * If the maximum LBA is greater than 0xfffffffe, the user must
6992	 * issue a SERVICE ACTION IN (16) command, with the read capacity
6993	 * serivce action set.
6994	 */
6995	if (lun->be_lun->maxlba > 0xfffffffe)
6996		scsi_ulto4b(0xffffffff, data->addr);
6997	else
6998		scsi_ulto4b(lun->be_lun->maxlba, data->addr);
6999
7000	/*
7001	 * XXX KDM this may not be 512 bytes...
7002	 */
7003	scsi_ulto4b(lun->be_lun->blocksize, data->length);
7004
7005	ctl_set_success(ctsio);
7006	ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
7007	ctsio->be_move_done = ctl_config_move_done;
7008	ctl_datamove((union ctl_io *)ctsio);
7009	return (CTL_RETVAL_COMPLETE);
7010}
7011
7012int
7013ctl_read_capacity_16(struct ctl_scsiio *ctsio)
7014{
7015	struct ctl_lun *lun = CTL_LUN(ctsio);
7016	struct scsi_read_capacity_16 *cdb;
7017	struct scsi_read_capacity_data_long *data;
7018	uint64_t lba;
7019	uint32_t alloc_len;
7020
7021	CTL_DEBUG_PRINT(("ctl_read_capacity_16\n"));
7022
7023	cdb = (struct scsi_read_capacity_16 *)ctsio->cdb;
7024
7025	alloc_len = scsi_4btoul(cdb->alloc_len);
7026	lba = scsi_8btou64(cdb->addr);
7027
7028	if ((cdb->reladr & SRC16_PMI)
7029	 && (lba != 0)) {
7030		ctl_set_invalid_field(/*ctsio*/ ctsio,
7031				      /*sks_valid*/ 1,
7032				      /*command*/ 1,
7033				      /*field*/ 2,
7034				      /*bit_valid*/ 0,
7035				      /*bit*/ 0);
7036		ctl_done((union ctl_io *)ctsio);
7037		return (CTL_RETVAL_COMPLETE);
7038	}
7039
7040	ctsio->kern_data_ptr = malloc(sizeof(*data), M_CTL, M_WAITOK | M_ZERO);
7041	data = (struct scsi_read_capacity_data_long *)ctsio->kern_data_ptr;
7042	ctsio->kern_rel_offset = 0;
7043	ctsio->kern_sg_entries = 0;
7044	ctsio->kern_data_len = min(sizeof(*data), alloc_len);
7045	ctsio->kern_total_len = ctsio->kern_data_len;
7046
7047	scsi_u64to8b(lun->be_lun->maxlba, data->addr);
7048	/* XXX KDM this may not be 512 bytes... */
7049	scsi_ulto4b(lun->be_lun->blocksize, data->length);
7050	data->prot_lbppbe = lun->be_lun->pblockexp & SRC16_LBPPBE;
7051	scsi_ulto2b(lun->be_lun->pblockoff & SRC16_LALBA_A, data->lalba_lbp);
7052	if (lun->be_lun->flags & CTL_LUN_FLAG_UNMAP)
7053		data->lalba_lbp[0] |= SRC16_LBPME | SRC16_LBPRZ;
7054
7055	ctl_set_success(ctsio);
7056	ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
7057	ctsio->be_move_done = ctl_config_move_done;
7058	ctl_datamove((union ctl_io *)ctsio);
7059	return (CTL_RETVAL_COMPLETE);
7060}
7061
7062int
7063ctl_get_lba_status(struct ctl_scsiio *ctsio)
7064{
7065	struct ctl_lun *lun = CTL_LUN(ctsio);
7066	struct scsi_get_lba_status *cdb;
7067	struct scsi_get_lba_status_data *data;
7068	struct ctl_lba_len_flags *lbalen;
7069	uint64_t lba;
7070	uint32_t alloc_len, total_len;
7071	int retval;
7072
7073	CTL_DEBUG_PRINT(("ctl_get_lba_status\n"));
7074
7075	cdb = (struct scsi_get_lba_status *)ctsio->cdb;
7076	lba = scsi_8btou64(cdb->addr);
7077	alloc_len = scsi_4btoul(cdb->alloc_len);
7078
7079	if (lba > lun->be_lun->maxlba) {
7080		ctl_set_lba_out_of_range(ctsio, lba);
7081		ctl_done((union ctl_io *)ctsio);
7082		return (CTL_RETVAL_COMPLETE);
7083	}
7084
7085	total_len = sizeof(*data) + sizeof(data->descr[0]);
7086	ctsio->kern_data_ptr = malloc(total_len, M_CTL, M_WAITOK | M_ZERO);
7087	data = (struct scsi_get_lba_status_data *)ctsio->kern_data_ptr;
7088	ctsio->kern_rel_offset = 0;
7089	ctsio->kern_sg_entries = 0;
7090	ctsio->kern_data_len = min(total_len, alloc_len);
7091	ctsio->kern_total_len = ctsio->kern_data_len;
7092
7093	/* Fill dummy data in case backend can't tell anything. */
7094	scsi_ulto4b(4 + sizeof(data->descr[0]), data->length);
7095	scsi_u64to8b(lba, data->descr[0].addr);
7096	scsi_ulto4b(MIN(UINT32_MAX, lun->be_lun->maxlba + 1 - lba),
7097	    data->descr[0].length);
7098	data->descr[0].status = 0; /* Mapped or unknown. */
7099
7100	ctl_set_success(ctsio);
7101	ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
7102	ctsio->be_move_done = ctl_config_move_done;
7103
7104	lbalen = (struct ctl_lba_len_flags *)&ctsio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
7105	lbalen->lba = lba;
7106	lbalen->len = total_len;
7107	lbalen->flags = 0;
7108	retval = lun->backend->config_read((union ctl_io *)ctsio);
7109	return (retval);
7110}
7111
7112int
7113ctl_read_defect(struct ctl_scsiio *ctsio)
7114{
7115	struct scsi_read_defect_data_10 *ccb10;
7116	struct scsi_read_defect_data_12 *ccb12;
7117	struct scsi_read_defect_data_hdr_10 *data10;
7118	struct scsi_read_defect_data_hdr_12 *data12;
7119	uint32_t alloc_len, data_len;
7120	uint8_t format;
7121
7122	CTL_DEBUG_PRINT(("ctl_read_defect\n"));
7123
7124	if (ctsio->cdb[0] == READ_DEFECT_DATA_10) {
7125		ccb10 = (struct scsi_read_defect_data_10 *)&ctsio->cdb;
7126		format = ccb10->format;
7127		alloc_len = scsi_2btoul(ccb10->alloc_length);
7128		data_len = sizeof(*data10);
7129	} else {
7130		ccb12 = (struct scsi_read_defect_data_12 *)&ctsio->cdb;
7131		format = ccb12->format;
7132		alloc_len = scsi_4btoul(ccb12->alloc_length);
7133		data_len = sizeof(*data12);
7134	}
7135	if (alloc_len == 0) {
7136		ctl_set_success(ctsio);
7137		ctl_done((union ctl_io *)ctsio);
7138		return (CTL_RETVAL_COMPLETE);
7139	}
7140
7141	ctsio->kern_data_ptr = malloc(data_len, M_CTL, M_WAITOK | M_ZERO);
7142	ctsio->kern_rel_offset = 0;
7143	ctsio->kern_sg_entries = 0;
7144	ctsio->kern_data_len = min(data_len, alloc_len);
7145	ctsio->kern_total_len = ctsio->kern_data_len;
7146
7147	if (ctsio->cdb[0] == READ_DEFECT_DATA_10) {
7148		data10 = (struct scsi_read_defect_data_hdr_10 *)
7149		    ctsio->kern_data_ptr;
7150		data10->format = format;
7151		scsi_ulto2b(0, data10->length);
7152	} else {
7153		data12 = (struct scsi_read_defect_data_hdr_12 *)
7154		    ctsio->kern_data_ptr;
7155		data12->format = format;
7156		scsi_ulto2b(0, data12->generation);
7157		scsi_ulto4b(0, data12->length);
7158	}
7159
7160	ctl_set_success(ctsio);
7161	ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
7162	ctsio->be_move_done = ctl_config_move_done;
7163	ctl_datamove((union ctl_io *)ctsio);
7164	return (CTL_RETVAL_COMPLETE);
7165}
7166
7167int
7168ctl_report_ident_info(struct ctl_scsiio *ctsio)
7169{
7170	struct ctl_lun *lun = CTL_LUN(ctsio);
7171	struct scsi_report_ident_info *cdb;
7172	struct scsi_report_ident_info_data *rii_ptr;
7173	struct scsi_report_ident_info_descr *riid_ptr;
7174	const char *oii, *otii;
7175	int retval, alloc_len, total_len = 0, len = 0;
7176
7177	CTL_DEBUG_PRINT(("ctl_report_ident_info\n"));
7178
7179	cdb = (struct scsi_report_ident_info *)ctsio->cdb;
7180	retval = CTL_RETVAL_COMPLETE;
7181
7182	total_len = sizeof(struct scsi_report_ident_info_data);
7183	switch (cdb->type) {
7184	case RII_LUII:
7185		oii = dnvlist_get_string(lun->be_lun->options,
7186		    "ident_info", NULL);
7187		if (oii)
7188			len = strlen(oii);	/* Approximately */
7189		break;
7190	case RII_LUTII:
7191		otii = dnvlist_get_string(lun->be_lun->options,
7192		    "text_ident_info", NULL);
7193		if (otii)
7194			len = strlen(otii) + 1;	/* NULL-terminated */
7195		break;
7196	case RII_IIS:
7197		len = 2 * sizeof(struct scsi_report_ident_info_descr);
7198		break;
7199	default:
7200		ctl_set_invalid_field(/*ctsio*/ ctsio,
7201				      /*sks_valid*/ 1,
7202				      /*command*/ 1,
7203				      /*field*/ 11,
7204				      /*bit_valid*/ 1,
7205				      /*bit*/ 2);
7206		ctl_done((union ctl_io *)ctsio);
7207		return(retval);
7208	}
7209	total_len += len;
7210	alloc_len = scsi_4btoul(cdb->length);
7211
7212	ctsio->kern_data_ptr = malloc(total_len, M_CTL, M_WAITOK | M_ZERO);
7213	ctsio->kern_sg_entries = 0;
7214	ctsio->kern_rel_offset = 0;
7215	ctsio->kern_data_len = min(total_len, alloc_len);
7216	ctsio->kern_total_len = ctsio->kern_data_len;
7217
7218	rii_ptr = (struct scsi_report_ident_info_data *)ctsio->kern_data_ptr;
7219	switch (cdb->type) {
7220	case RII_LUII:
7221		if (oii) {
7222			if (oii[0] == '0' && oii[1] == 'x')
7223				len = hex2bin(oii, (uint8_t *)(rii_ptr + 1), len);
7224			else
7225				strncpy((uint8_t *)(rii_ptr + 1), oii, len);
7226		}
7227		break;
7228	case RII_LUTII:
7229		if (otii)
7230			strlcpy((uint8_t *)(rii_ptr + 1), otii, len);
7231		break;
7232	case RII_IIS:
7233		riid_ptr = (struct scsi_report_ident_info_descr *)(rii_ptr + 1);
7234		riid_ptr->type = RII_LUII;
7235		scsi_ulto2b(0xffff, riid_ptr->length);
7236		riid_ptr++;
7237		riid_ptr->type = RII_LUTII;
7238		scsi_ulto2b(0xffff, riid_ptr->length);
7239	}
7240	scsi_ulto2b(len, rii_ptr->length);
7241
7242	ctl_set_success(ctsio);
7243	ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
7244	ctsio->be_move_done = ctl_config_move_done;
7245	ctl_datamove((union ctl_io *)ctsio);
7246	return(retval);
7247}
7248
7249int
7250ctl_report_tagret_port_groups(struct ctl_scsiio *ctsio)
7251{
7252	struct ctl_softc *softc = CTL_SOFTC(ctsio);
7253	struct ctl_lun *lun = CTL_LUN(ctsio);
7254	struct scsi_maintenance_in *cdb;
7255	int retval;
7256	int alloc_len, ext, total_len = 0, g, pc, pg, ts, os;
7257	int num_ha_groups, num_target_ports, shared_group;
7258	struct ctl_port *port;
7259	struct scsi_target_group_data *rtg_ptr;
7260	struct scsi_target_group_data_extended *rtg_ext_ptr;
7261	struct scsi_target_port_group_descriptor *tpg_desc;
7262
7263	CTL_DEBUG_PRINT(("ctl_report_tagret_port_groups\n"));
7264
7265	cdb = (struct scsi_maintenance_in *)ctsio->cdb;
7266	retval = CTL_RETVAL_COMPLETE;
7267
7268	switch (cdb->byte2 & STG_PDF_MASK) {
7269	case STG_PDF_LENGTH:
7270		ext = 0;
7271		break;
7272	case STG_PDF_EXTENDED:
7273		ext = 1;
7274		break;
7275	default:
7276		ctl_set_invalid_field(/*ctsio*/ ctsio,
7277				      /*sks_valid*/ 1,
7278				      /*command*/ 1,
7279				      /*field*/ 2,
7280				      /*bit_valid*/ 1,
7281				      /*bit*/ 5);
7282		ctl_done((union ctl_io *)ctsio);
7283		return(retval);
7284	}
7285
7286	num_target_ports = 0;
7287	shared_group = (softc->is_single != 0);
7288	mtx_lock(&softc->ctl_lock);
7289	STAILQ_FOREACH(port, &softc->port_list, links) {
7290		if ((port->status & CTL_PORT_STATUS_ONLINE) == 0)
7291			continue;
7292		if (ctl_lun_map_to_port(port, lun->lun) == UINT32_MAX)
7293			continue;
7294		num_target_ports++;
7295		if (port->status & CTL_PORT_STATUS_HA_SHARED)
7296			shared_group = 1;
7297	}
7298	mtx_unlock(&softc->ctl_lock);
7299	num_ha_groups = (softc->is_single) ? 0 : NUM_HA_SHELVES;
7300
7301	if (ext)
7302		total_len = sizeof(struct scsi_target_group_data_extended);
7303	else
7304		total_len = sizeof(struct scsi_target_group_data);
7305	total_len += sizeof(struct scsi_target_port_group_descriptor) *
7306		(shared_group + num_ha_groups) +
7307	    sizeof(struct scsi_target_port_descriptor) * num_target_ports;
7308
7309	alloc_len = scsi_4btoul(cdb->length);
7310
7311	ctsio->kern_data_ptr = malloc(total_len, M_CTL, M_WAITOK | M_ZERO);
7312	ctsio->kern_sg_entries = 0;
7313	ctsio->kern_rel_offset = 0;
7314	ctsio->kern_data_len = min(total_len, alloc_len);
7315	ctsio->kern_total_len = ctsio->kern_data_len;
7316
7317	if (ext) {
7318		rtg_ext_ptr = (struct scsi_target_group_data_extended *)
7319		    ctsio->kern_data_ptr;
7320		scsi_ulto4b(total_len - 4, rtg_ext_ptr->length);
7321		rtg_ext_ptr->format_type = 0x10;
7322		rtg_ext_ptr->implicit_transition_time = 0;
7323		tpg_desc = &rtg_ext_ptr->groups[0];
7324	} else {
7325		rtg_ptr = (struct scsi_target_group_data *)
7326		    ctsio->kern_data_ptr;
7327		scsi_ulto4b(total_len - 4, rtg_ptr->length);
7328		tpg_desc = &rtg_ptr->groups[0];
7329	}
7330
7331	mtx_lock(&softc->ctl_lock);
7332	pg = softc->port_min / softc->port_cnt;
7333	if (lun->flags & (CTL_LUN_PRIMARY_SC | CTL_LUN_PEER_SC_PRIMARY)) {
7334		/* Some shelf is known to be primary. */
7335		if (softc->ha_link == CTL_HA_LINK_OFFLINE)
7336			os = TPG_ASYMMETRIC_ACCESS_UNAVAILABLE;
7337		else if (softc->ha_link == CTL_HA_LINK_UNKNOWN)
7338			os = TPG_ASYMMETRIC_ACCESS_TRANSITIONING;
7339		else if (softc->ha_mode == CTL_HA_MODE_ACT_STBY)
7340			os = TPG_ASYMMETRIC_ACCESS_STANDBY;
7341		else
7342			os = TPG_ASYMMETRIC_ACCESS_NONOPTIMIZED;
7343		if (lun->flags & CTL_LUN_PRIMARY_SC) {
7344			ts = TPG_ASYMMETRIC_ACCESS_OPTIMIZED;
7345		} else {
7346			ts = os;
7347			os = TPG_ASYMMETRIC_ACCESS_OPTIMIZED;
7348		}
7349	} else {
7350		/* No known primary shelf. */
7351		if (softc->ha_link == CTL_HA_LINK_OFFLINE) {
7352			ts = TPG_ASYMMETRIC_ACCESS_UNAVAILABLE;
7353			os = TPG_ASYMMETRIC_ACCESS_OPTIMIZED;
7354		} else if (softc->ha_link == CTL_HA_LINK_UNKNOWN) {
7355			ts = TPG_ASYMMETRIC_ACCESS_TRANSITIONING;
7356			os = TPG_ASYMMETRIC_ACCESS_OPTIMIZED;
7357		} else {
7358			ts = os = TPG_ASYMMETRIC_ACCESS_TRANSITIONING;
7359		}
7360	}
7361	if (shared_group) {
7362		tpg_desc->pref_state = ts;
7363		tpg_desc->support = TPG_AO_SUP | TPG_AN_SUP | TPG_S_SUP |
7364		    TPG_U_SUP | TPG_T_SUP;
7365		scsi_ulto2b(1, tpg_desc->target_port_group);
7366		tpg_desc->status = TPG_IMPLICIT;
7367		pc = 0;
7368		STAILQ_FOREACH(port, &softc->port_list, links) {
7369			if ((port->status & CTL_PORT_STATUS_ONLINE) == 0)
7370				continue;
7371			if (!softc->is_single &&
7372			    (port->status & CTL_PORT_STATUS_HA_SHARED) == 0)
7373				continue;
7374			if (ctl_lun_map_to_port(port, lun->lun) == UINT32_MAX)
7375				continue;
7376			scsi_ulto2b(port->targ_port, tpg_desc->descriptors[pc].
7377			    relative_target_port_identifier);
7378			pc++;
7379		}
7380		tpg_desc->target_port_count = pc;
7381		tpg_desc = (struct scsi_target_port_group_descriptor *)
7382		    &tpg_desc->descriptors[pc];
7383	}
7384	for (g = 0; g < num_ha_groups; g++) {
7385		tpg_desc->pref_state = (g == pg) ? ts : os;
7386		tpg_desc->support = TPG_AO_SUP | TPG_AN_SUP | TPG_S_SUP |
7387		    TPG_U_SUP | TPG_T_SUP;
7388		scsi_ulto2b(2 + g, tpg_desc->target_port_group);
7389		tpg_desc->status = TPG_IMPLICIT;
7390		pc = 0;
7391		STAILQ_FOREACH(port, &softc->port_list, links) {
7392			if (port->targ_port < g * softc->port_cnt ||
7393			    port->targ_port >= (g + 1) * softc->port_cnt)
7394				continue;
7395			if ((port->status & CTL_PORT_STATUS_ONLINE) == 0)
7396				continue;
7397			if (port->status & CTL_PORT_STATUS_HA_SHARED)
7398				continue;
7399			if (ctl_lun_map_to_port(port, lun->lun) == UINT32_MAX)
7400				continue;
7401			scsi_ulto2b(port->targ_port, tpg_desc->descriptors[pc].
7402			    relative_target_port_identifier);
7403			pc++;
7404		}
7405		tpg_desc->target_port_count = pc;
7406		tpg_desc = (struct scsi_target_port_group_descriptor *)
7407		    &tpg_desc->descriptors[pc];
7408	}
7409	mtx_unlock(&softc->ctl_lock);
7410
7411	ctl_set_success(ctsio);
7412	ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
7413	ctsio->be_move_done = ctl_config_move_done;
7414	ctl_datamove((union ctl_io *)ctsio);
7415	return(retval);
7416}
7417
7418int
7419ctl_report_supported_opcodes(struct ctl_scsiio *ctsio)
7420{
7421	struct ctl_lun *lun = CTL_LUN(ctsio);
7422	struct scsi_report_supported_opcodes *cdb;
7423	const struct ctl_cmd_entry *entry, *sentry;
7424	struct scsi_report_supported_opcodes_all *all;
7425	struct scsi_report_supported_opcodes_descr *descr;
7426	struct scsi_report_supported_opcodes_one *one;
7427	int retval;
7428	int alloc_len, total_len;
7429	int opcode, service_action, i, j, num;
7430
7431	CTL_DEBUG_PRINT(("ctl_report_supported_opcodes\n"));
7432
7433	cdb = (struct scsi_report_supported_opcodes *)ctsio->cdb;
7434	retval = CTL_RETVAL_COMPLETE;
7435
7436	opcode = cdb->requested_opcode;
7437	service_action = scsi_2btoul(cdb->requested_service_action);
7438	switch (cdb->options & RSO_OPTIONS_MASK) {
7439	case RSO_OPTIONS_ALL:
7440		num = 0;
7441		for (i = 0; i < 256; i++) {
7442			entry = &ctl_cmd_table[i];
7443			if (entry->flags & CTL_CMD_FLAG_SA5) {
7444				for (j = 0; j < 32; j++) {
7445					sentry = &((const struct ctl_cmd_entry *)
7446					    entry->execute)[j];
7447					if (ctl_cmd_applicable(
7448					    lun->be_lun->lun_type, sentry))
7449						num++;
7450				}
7451			} else {
7452				if (ctl_cmd_applicable(lun->be_lun->lun_type,
7453				    entry))
7454					num++;
7455			}
7456		}
7457		total_len = sizeof(struct scsi_report_supported_opcodes_all) +
7458		    num * sizeof(struct scsi_report_supported_opcodes_descr);
7459		break;
7460	case RSO_OPTIONS_OC:
7461		if (ctl_cmd_table[opcode].flags & CTL_CMD_FLAG_SA5) {
7462			ctl_set_invalid_field(/*ctsio*/ ctsio,
7463					      /*sks_valid*/ 1,
7464					      /*command*/ 1,
7465					      /*field*/ 2,
7466					      /*bit_valid*/ 1,
7467					      /*bit*/ 2);
7468			ctl_done((union ctl_io *)ctsio);
7469			return (CTL_RETVAL_COMPLETE);
7470		}
7471		total_len = sizeof(struct scsi_report_supported_opcodes_one) + 32;
7472		break;
7473	case RSO_OPTIONS_OC_SA:
7474		if ((ctl_cmd_table[opcode].flags & CTL_CMD_FLAG_SA5) == 0 ||
7475		    service_action >= 32) {
7476			ctl_set_invalid_field(/*ctsio*/ ctsio,
7477					      /*sks_valid*/ 1,
7478					      /*command*/ 1,
7479					      /*field*/ 2,
7480					      /*bit_valid*/ 1,
7481					      /*bit*/ 2);
7482			ctl_done((union ctl_io *)ctsio);
7483			return (CTL_RETVAL_COMPLETE);
7484		}
7485		/* FALLTHROUGH */
7486	case RSO_OPTIONS_OC_ASA:
7487		total_len = sizeof(struct scsi_report_supported_opcodes_one) + 32;
7488		break;
7489	default:
7490		ctl_set_invalid_field(/*ctsio*/ ctsio,
7491				      /*sks_valid*/ 1,
7492				      /*command*/ 1,
7493				      /*field*/ 2,
7494				      /*bit_valid*/ 1,
7495				      /*bit*/ 2);
7496		ctl_done((union ctl_io *)ctsio);
7497		return (CTL_RETVAL_COMPLETE);
7498	}
7499
7500	alloc_len = scsi_4btoul(cdb->length);
7501
7502	ctsio->kern_data_ptr = malloc(total_len, M_CTL, M_WAITOK | M_ZERO);
7503	ctsio->kern_sg_entries = 0;
7504	ctsio->kern_rel_offset = 0;
7505	ctsio->kern_data_len = min(total_len, alloc_len);
7506	ctsio->kern_total_len = ctsio->kern_data_len;
7507
7508	switch (cdb->options & RSO_OPTIONS_MASK) {
7509	case RSO_OPTIONS_ALL:
7510		all = (struct scsi_report_supported_opcodes_all *)
7511		    ctsio->kern_data_ptr;
7512		num = 0;
7513		for (i = 0; i < 256; i++) {
7514			entry = &ctl_cmd_table[i];
7515			if (entry->flags & CTL_CMD_FLAG_SA5) {
7516				for (j = 0; j < 32; j++) {
7517					sentry = &((const struct ctl_cmd_entry *)
7518					    entry->execute)[j];
7519					if (!ctl_cmd_applicable(
7520					    lun->be_lun->lun_type, sentry))
7521						continue;
7522					descr = &all->descr[num++];
7523					descr->opcode = i;
7524					scsi_ulto2b(j, descr->service_action);
7525					descr->flags = RSO_SERVACTV;
7526					scsi_ulto2b(sentry->length,
7527					    descr->cdb_length);
7528				}
7529			} else {
7530				if (!ctl_cmd_applicable(lun->be_lun->lun_type,
7531				    entry))
7532					continue;
7533				descr = &all->descr[num++];
7534				descr->opcode = i;
7535				scsi_ulto2b(0, descr->service_action);
7536				descr->flags = 0;
7537				scsi_ulto2b(entry->length, descr->cdb_length);
7538			}
7539		}
7540		scsi_ulto4b(
7541		    num * sizeof(struct scsi_report_supported_opcodes_descr),
7542		    all->length);
7543		break;
7544	case RSO_OPTIONS_OC:
7545		one = (struct scsi_report_supported_opcodes_one *)
7546		    ctsio->kern_data_ptr;
7547		entry = &ctl_cmd_table[opcode];
7548		goto fill_one;
7549	case RSO_OPTIONS_OC_SA:
7550		one = (struct scsi_report_supported_opcodes_one *)
7551		    ctsio->kern_data_ptr;
7552		entry = &ctl_cmd_table[opcode];
7553		entry = &((const struct ctl_cmd_entry *)
7554		    entry->execute)[service_action];
7555fill_one:
7556		if (ctl_cmd_applicable(lun->be_lun->lun_type, entry)) {
7557			one->support = 3;
7558			scsi_ulto2b(entry->length, one->cdb_length);
7559			one->cdb_usage[0] = opcode;
7560			memcpy(&one->cdb_usage[1], entry->usage,
7561			    entry->length - 1);
7562		} else
7563			one->support = 1;
7564		break;
7565	case RSO_OPTIONS_OC_ASA:
7566		one = (struct scsi_report_supported_opcodes_one *)
7567		    ctsio->kern_data_ptr;
7568		entry = &ctl_cmd_table[opcode];
7569		if (entry->flags & CTL_CMD_FLAG_SA5) {
7570			entry = &((const struct ctl_cmd_entry *)
7571			    entry->execute)[service_action];
7572		} else if (service_action != 0) {
7573			one->support = 1;
7574			break;
7575		}
7576		goto fill_one;
7577	}
7578
7579	ctl_set_success(ctsio);
7580	ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
7581	ctsio->be_move_done = ctl_config_move_done;
7582	ctl_datamove((union ctl_io *)ctsio);
7583	return(retval);
7584}
7585
7586int
7587ctl_report_supported_tmf(struct ctl_scsiio *ctsio)
7588{
7589	struct scsi_report_supported_tmf *cdb;
7590	struct scsi_report_supported_tmf_ext_data *data;
7591	int retval;
7592	int alloc_len, total_len;
7593
7594	CTL_DEBUG_PRINT(("ctl_report_supported_tmf\n"));
7595
7596	cdb = (struct scsi_report_supported_tmf *)ctsio->cdb;
7597
7598	retval = CTL_RETVAL_COMPLETE;
7599
7600	if (cdb->options & RST_REPD)
7601		total_len = sizeof(struct scsi_report_supported_tmf_ext_data);
7602	else
7603		total_len = sizeof(struct scsi_report_supported_tmf_data);
7604	alloc_len = scsi_4btoul(cdb->length);
7605
7606	ctsio->kern_data_ptr = malloc(total_len, M_CTL, M_WAITOK | M_ZERO);
7607	ctsio->kern_sg_entries = 0;
7608	ctsio->kern_rel_offset = 0;
7609	ctsio->kern_data_len = min(total_len, alloc_len);
7610	ctsio->kern_total_len = ctsio->kern_data_len;
7611
7612	data = (struct scsi_report_supported_tmf_ext_data *)ctsio->kern_data_ptr;
7613	data->byte1 |= RST_ATS | RST_ATSS | RST_CTSS | RST_LURS | RST_QTS |
7614	    RST_TRS;
7615	data->byte2 |= RST_QAES | RST_QTSS | RST_ITNRS;
7616	data->length = total_len - 4;
7617
7618	ctl_set_success(ctsio);
7619	ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
7620	ctsio->be_move_done = ctl_config_move_done;
7621	ctl_datamove((union ctl_io *)ctsio);
7622	return (retval);
7623}
7624
7625int
7626ctl_report_timestamp(struct ctl_scsiio *ctsio)
7627{
7628	struct scsi_report_timestamp *cdb;
7629	struct scsi_report_timestamp_data *data;
7630	struct timeval tv;
7631	int64_t timestamp;
7632	int retval;
7633	int alloc_len, total_len;
7634
7635	CTL_DEBUG_PRINT(("ctl_report_timestamp\n"));
7636
7637	cdb = (struct scsi_report_timestamp *)ctsio->cdb;
7638
7639	retval = CTL_RETVAL_COMPLETE;
7640
7641	total_len = sizeof(struct scsi_report_timestamp_data);
7642	alloc_len = scsi_4btoul(cdb->length);
7643
7644	ctsio->kern_data_ptr = malloc(total_len, M_CTL, M_WAITOK | M_ZERO);
7645	ctsio->kern_sg_entries = 0;
7646	ctsio->kern_rel_offset = 0;
7647	ctsio->kern_data_len = min(total_len, alloc_len);
7648	ctsio->kern_total_len = ctsio->kern_data_len;
7649
7650	data = (struct scsi_report_timestamp_data *)ctsio->kern_data_ptr;
7651	scsi_ulto2b(sizeof(*data) - 2, data->length);
7652	data->origin = RTS_ORIG_OUTSIDE;
7653	getmicrotime(&tv);
7654	timestamp = (int64_t)tv.tv_sec * 1000 + tv.tv_usec / 1000;
7655	scsi_ulto4b(timestamp >> 16, data->timestamp);
7656	scsi_ulto2b(timestamp & 0xffff, &data->timestamp[4]);
7657
7658	ctl_set_success(ctsio);
7659	ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
7660	ctsio->be_move_done = ctl_config_move_done;
7661	ctl_datamove((union ctl_io *)ctsio);
7662	return (retval);
7663}
7664
7665int
7666ctl_persistent_reserve_in(struct ctl_scsiio *ctsio)
7667{
7668	struct ctl_softc *softc = CTL_SOFTC(ctsio);
7669	struct ctl_lun *lun = CTL_LUN(ctsio);
7670	struct scsi_per_res_in *cdb;
7671	int alloc_len, total_len = 0;
7672	/* struct scsi_per_res_in_rsrv in_data; */
7673	uint64_t key;
7674
7675	CTL_DEBUG_PRINT(("ctl_persistent_reserve_in\n"));
7676
7677	cdb = (struct scsi_per_res_in *)ctsio->cdb;
7678
7679	alloc_len = scsi_2btoul(cdb->length);
7680
7681retry:
7682	mtx_lock(&lun->lun_lock);
7683	switch (cdb->action) {
7684	case SPRI_RK: /* read keys */
7685		total_len = sizeof(struct scsi_per_res_in_keys) +
7686			lun->pr_key_count *
7687			sizeof(struct scsi_per_res_key);
7688		break;
7689	case SPRI_RR: /* read reservation */
7690		if (lun->flags & CTL_LUN_PR_RESERVED)
7691			total_len = sizeof(struct scsi_per_res_in_rsrv);
7692		else
7693			total_len = sizeof(struct scsi_per_res_in_header);
7694		break;
7695	case SPRI_RC: /* report capabilities */
7696		total_len = sizeof(struct scsi_per_res_cap);
7697		break;
7698	case SPRI_RS: /* read full status */
7699		total_len = sizeof(struct scsi_per_res_in_header) +
7700		    (sizeof(struct scsi_per_res_in_full_desc) + 256) *
7701		    lun->pr_key_count;
7702		break;
7703	default:
7704		panic("%s: Invalid PR type %#x", __func__, cdb->action);
7705	}
7706	mtx_unlock(&lun->lun_lock);
7707
7708	ctsio->kern_data_ptr = malloc(total_len, M_CTL, M_WAITOK | M_ZERO);
7709	ctsio->kern_rel_offset = 0;
7710	ctsio->kern_sg_entries = 0;
7711	ctsio->kern_data_len = min(total_len, alloc_len);
7712	ctsio->kern_total_len = ctsio->kern_data_len;
7713
7714	mtx_lock(&lun->lun_lock);
7715	switch (cdb->action) {
7716	case SPRI_RK: { // read keys
7717        struct scsi_per_res_in_keys *res_keys;
7718		int i, key_count;
7719
7720		res_keys = (struct scsi_per_res_in_keys*)ctsio->kern_data_ptr;
7721
7722		/*
7723		 * We had to drop the lock to allocate our buffer, which
7724		 * leaves time for someone to come in with another
7725		 * persistent reservation.  (That is unlikely, though,
7726		 * since this should be the only persistent reservation
7727		 * command active right now.)
7728		 */
7729		if (total_len != (sizeof(struct scsi_per_res_in_keys) +
7730		    (lun->pr_key_count *
7731		     sizeof(struct scsi_per_res_key)))){
7732			mtx_unlock(&lun->lun_lock);
7733			free(ctsio->kern_data_ptr, M_CTL);
7734			printf("%s: reservation length changed, retrying\n",
7735			       __func__);
7736			goto retry;
7737		}
7738
7739		scsi_ulto4b(lun->pr_generation, res_keys->header.generation);
7740
7741		scsi_ulto4b(sizeof(struct scsi_per_res_key) *
7742			     lun->pr_key_count, res_keys->header.length);
7743
7744		for (i = 0, key_count = 0; i < CTL_MAX_INITIATORS; i++) {
7745			if ((key = ctl_get_prkey(lun, i)) == 0)
7746				continue;
7747
7748			/*
7749			 * We used lun->pr_key_count to calculate the
7750			 * size to allocate.  If it turns out the number of
7751			 * initiators with the registered flag set is
7752			 * larger than that (i.e. they haven't been kept in
7753			 * sync), we've got a problem.
7754			 */
7755			if (key_count >= lun->pr_key_count) {
7756				key_count++;
7757				continue;
7758			}
7759			scsi_u64to8b(key, res_keys->keys[key_count].key);
7760			key_count++;
7761		}
7762		break;
7763	}
7764	case SPRI_RR: { // read reservation
7765		struct scsi_per_res_in_rsrv *res;
7766		int tmp_len, header_only;
7767
7768		res = (struct scsi_per_res_in_rsrv *)ctsio->kern_data_ptr;
7769
7770		scsi_ulto4b(lun->pr_generation, res->header.generation);
7771
7772		if (lun->flags & CTL_LUN_PR_RESERVED)
7773		{
7774			tmp_len = sizeof(struct scsi_per_res_in_rsrv);
7775			scsi_ulto4b(sizeof(struct scsi_per_res_in_rsrv_data),
7776				    res->header.length);
7777			header_only = 0;
7778		} else {
7779			tmp_len = sizeof(struct scsi_per_res_in_header);
7780			scsi_ulto4b(0, res->header.length);
7781			header_only = 1;
7782		}
7783
7784		/*
7785		 * We had to drop the lock to allocate our buffer, which
7786		 * leaves time for someone to come in with another
7787		 * persistent reservation.  (That is unlikely, though,
7788		 * since this should be the only persistent reservation
7789		 * command active right now.)
7790		 */
7791		if (tmp_len != total_len) {
7792			mtx_unlock(&lun->lun_lock);
7793			free(ctsio->kern_data_ptr, M_CTL);
7794			printf("%s: reservation status changed, retrying\n",
7795			       __func__);
7796			goto retry;
7797		}
7798
7799		/*
7800		 * No reservation held, so we're done.
7801		 */
7802		if (header_only != 0)
7803			break;
7804
7805		/*
7806		 * If the registration is an All Registrants type, the key
7807		 * is 0, since it doesn't really matter.
7808		 */
7809		if (lun->pr_res_idx != CTL_PR_ALL_REGISTRANTS) {
7810			scsi_u64to8b(ctl_get_prkey(lun, lun->pr_res_idx),
7811			    res->data.reservation);
7812		}
7813		res->data.scopetype = lun->pr_res_type;
7814		break;
7815	}
7816	case SPRI_RC:     //report capabilities
7817	{
7818		struct scsi_per_res_cap *res_cap;
7819		uint16_t type_mask;
7820
7821		res_cap = (struct scsi_per_res_cap *)ctsio->kern_data_ptr;
7822		scsi_ulto2b(sizeof(*res_cap), res_cap->length);
7823		res_cap->flags1 = SPRI_CRH;
7824		res_cap->flags2 = SPRI_TMV | SPRI_ALLOW_5;
7825		type_mask = SPRI_TM_WR_EX_AR |
7826			    SPRI_TM_EX_AC_RO |
7827			    SPRI_TM_WR_EX_RO |
7828			    SPRI_TM_EX_AC |
7829			    SPRI_TM_WR_EX |
7830			    SPRI_TM_EX_AC_AR;
7831		scsi_ulto2b(type_mask, res_cap->type_mask);
7832		break;
7833	}
7834	case SPRI_RS: { // read full status
7835		struct scsi_per_res_in_full *res_status;
7836		struct scsi_per_res_in_full_desc *res_desc;
7837		struct ctl_port *port;
7838		int i, len;
7839
7840		res_status = (struct scsi_per_res_in_full*)ctsio->kern_data_ptr;
7841
7842		/*
7843		 * We had to drop the lock to allocate our buffer, which
7844		 * leaves time for someone to come in with another
7845		 * persistent reservation.  (That is unlikely, though,
7846		 * since this should be the only persistent reservation
7847		 * command active right now.)
7848		 */
7849		if (total_len < (sizeof(struct scsi_per_res_in_header) +
7850		    (sizeof(struct scsi_per_res_in_full_desc) + 256) *
7851		     lun->pr_key_count)){
7852			mtx_unlock(&lun->lun_lock);
7853			free(ctsio->kern_data_ptr, M_CTL);
7854			printf("%s: reservation length changed, retrying\n",
7855			       __func__);
7856			goto retry;
7857		}
7858
7859		scsi_ulto4b(lun->pr_generation, res_status->header.generation);
7860
7861		res_desc = &res_status->desc[0];
7862		for (i = 0; i < CTL_MAX_INITIATORS; i++) {
7863			if ((key = ctl_get_prkey(lun, i)) == 0)
7864				continue;
7865
7866			scsi_u64to8b(key, res_desc->res_key.key);
7867			if ((lun->flags & CTL_LUN_PR_RESERVED) &&
7868			    (lun->pr_res_idx == i ||
7869			     lun->pr_res_idx == CTL_PR_ALL_REGISTRANTS)) {
7870				res_desc->flags = SPRI_FULL_R_HOLDER;
7871				res_desc->scopetype = lun->pr_res_type;
7872			}
7873			scsi_ulto2b(i / CTL_MAX_INIT_PER_PORT,
7874			    res_desc->rel_trgt_port_id);
7875			len = 0;
7876			port = softc->ctl_ports[i / CTL_MAX_INIT_PER_PORT];
7877			if (port != NULL)
7878				len = ctl_create_iid(port,
7879				    i % CTL_MAX_INIT_PER_PORT,
7880				    res_desc->transport_id);
7881			scsi_ulto4b(len, res_desc->additional_length);
7882			res_desc = (struct scsi_per_res_in_full_desc *)
7883			    &res_desc->transport_id[len];
7884		}
7885		scsi_ulto4b((uint8_t *)res_desc - (uint8_t *)&res_status->desc[0],
7886		    res_status->header.length);
7887		break;
7888	}
7889	default:
7890		panic("%s: Invalid PR type %#x", __func__, cdb->action);
7891	}
7892	mtx_unlock(&lun->lun_lock);
7893
7894	ctl_set_success(ctsio);
7895	ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
7896	ctsio->be_move_done = ctl_config_move_done;
7897	ctl_datamove((union ctl_io *)ctsio);
7898	return (CTL_RETVAL_COMPLETE);
7899}
7900
7901/*
7902 * Returns 0 if ctl_persistent_reserve_out() should continue, non-zero if
7903 * it should return.
7904 */
7905static int
7906ctl_pro_preempt(struct ctl_softc *softc, struct ctl_lun *lun, uint64_t res_key,
7907		uint64_t sa_res_key, uint8_t type, uint32_t residx,
7908		struct ctl_scsiio *ctsio, struct scsi_per_res_out *cdb,
7909		struct scsi_per_res_out_parms* param)
7910{
7911	union ctl_ha_msg persis_io;
7912	int i;
7913
7914	mtx_lock(&lun->lun_lock);
7915	if (sa_res_key == 0) {
7916		if (lun->pr_res_idx == CTL_PR_ALL_REGISTRANTS) {
7917			/* validate scope and type */
7918			if ((cdb->scope_type & SPR_SCOPE_MASK) !=
7919			     SPR_LU_SCOPE) {
7920				mtx_unlock(&lun->lun_lock);
7921				ctl_set_invalid_field(/*ctsio*/ ctsio,
7922						      /*sks_valid*/ 1,
7923						      /*command*/ 1,
7924						      /*field*/ 2,
7925						      /*bit_valid*/ 1,
7926						      /*bit*/ 4);
7927				ctl_done((union ctl_io *)ctsio);
7928				return (1);
7929			}
7930
7931		        if (type>8 || type==2 || type==4 || type==0) {
7932				mtx_unlock(&lun->lun_lock);
7933				ctl_set_invalid_field(/*ctsio*/ ctsio,
7934       	           				      /*sks_valid*/ 1,
7935						      /*command*/ 1,
7936						      /*field*/ 2,
7937						      /*bit_valid*/ 1,
7938						      /*bit*/ 0);
7939				ctl_done((union ctl_io *)ctsio);
7940				return (1);
7941		        }
7942
7943			/*
7944			 * Unregister everybody else and build UA for
7945			 * them
7946			 */
7947			for(i = 0; i < CTL_MAX_INITIATORS; i++) {
7948				if (i == residx || ctl_get_prkey(lun, i) == 0)
7949					continue;
7950
7951				ctl_clr_prkey(lun, i);
7952				ctl_est_ua(lun, i, CTL_UA_REG_PREEMPT);
7953			}
7954			lun->pr_key_count = 1;
7955			lun->pr_res_type = type;
7956			if (lun->pr_res_type != SPR_TYPE_WR_EX_AR &&
7957			    lun->pr_res_type != SPR_TYPE_EX_AC_AR)
7958				lun->pr_res_idx = residx;
7959			lun->pr_generation++;
7960			mtx_unlock(&lun->lun_lock);
7961
7962			/* send msg to other side */
7963			persis_io.hdr.nexus = ctsio->io_hdr.nexus;
7964			persis_io.hdr.msg_type = CTL_MSG_PERS_ACTION;
7965			persis_io.pr.pr_info.action = CTL_PR_PREEMPT;
7966			persis_io.pr.pr_info.residx = lun->pr_res_idx;
7967			persis_io.pr.pr_info.res_type = type;
7968			memcpy(persis_io.pr.pr_info.sa_res_key,
7969			       param->serv_act_res_key,
7970			       sizeof(param->serv_act_res_key));
7971			ctl_ha_msg_send(CTL_HA_CHAN_CTL, &persis_io,
7972			    sizeof(persis_io.pr), M_WAITOK);
7973		} else {
7974			/* not all registrants */
7975			mtx_unlock(&lun->lun_lock);
7976			free(ctsio->kern_data_ptr, M_CTL);
7977			ctl_set_invalid_field(ctsio,
7978					      /*sks_valid*/ 1,
7979					      /*command*/ 0,
7980					      /*field*/ 8,
7981					      /*bit_valid*/ 0,
7982					      /*bit*/ 0);
7983			ctl_done((union ctl_io *)ctsio);
7984			return (1);
7985		}
7986	} else if (lun->pr_res_idx == CTL_PR_ALL_REGISTRANTS
7987		|| !(lun->flags & CTL_LUN_PR_RESERVED)) {
7988		int found = 0;
7989
7990		if (res_key == sa_res_key) {
7991			/* special case */
7992			/*
7993			 * The spec implies this is not good but doesn't
7994			 * say what to do. There are two choices either
7995			 * generate a res conflict or check condition
7996			 * with illegal field in parameter data. Since
7997			 * that is what is done when the sa_res_key is
7998			 * zero I'll take that approach since this has
7999			 * to do with the sa_res_key.
8000			 */
8001			mtx_unlock(&lun->lun_lock);
8002			free(ctsio->kern_data_ptr, M_CTL);
8003			ctl_set_invalid_field(ctsio,
8004					      /*sks_valid*/ 1,
8005					      /*command*/ 0,
8006					      /*field*/ 8,
8007					      /*bit_valid*/ 0,
8008					      /*bit*/ 0);
8009			ctl_done((union ctl_io *)ctsio);
8010			return (1);
8011		}
8012
8013		for (i = 0; i < CTL_MAX_INITIATORS; i++) {
8014			if (ctl_get_prkey(lun, i) != sa_res_key)
8015				continue;
8016
8017			found = 1;
8018			ctl_clr_prkey(lun, i);
8019			lun->pr_key_count--;
8020			ctl_est_ua(lun, i, CTL_UA_REG_PREEMPT);
8021		}
8022		if (!found) {
8023			mtx_unlock(&lun->lun_lock);
8024			free(ctsio->kern_data_ptr, M_CTL);
8025			ctl_set_reservation_conflict(ctsio);
8026			ctl_done((union ctl_io *)ctsio);
8027			return (CTL_RETVAL_COMPLETE);
8028		}
8029		lun->pr_generation++;
8030		mtx_unlock(&lun->lun_lock);
8031
8032		/* send msg to other side */
8033		persis_io.hdr.nexus = ctsio->io_hdr.nexus;
8034		persis_io.hdr.msg_type = CTL_MSG_PERS_ACTION;
8035		persis_io.pr.pr_info.action = CTL_PR_PREEMPT;
8036		persis_io.pr.pr_info.residx = lun->pr_res_idx;
8037		persis_io.pr.pr_info.res_type = type;
8038		memcpy(persis_io.pr.pr_info.sa_res_key,
8039		       param->serv_act_res_key,
8040		       sizeof(param->serv_act_res_key));
8041		ctl_ha_msg_send(CTL_HA_CHAN_CTL, &persis_io,
8042		    sizeof(persis_io.pr), M_WAITOK);
8043	} else {
8044		/* Reserved but not all registrants */
8045		/* sa_res_key is res holder */
8046		if (sa_res_key == ctl_get_prkey(lun, lun->pr_res_idx)) {
8047			/* validate scope and type */
8048			if ((cdb->scope_type & SPR_SCOPE_MASK) !=
8049			     SPR_LU_SCOPE) {
8050				mtx_unlock(&lun->lun_lock);
8051				ctl_set_invalid_field(/*ctsio*/ ctsio,
8052						      /*sks_valid*/ 1,
8053						      /*command*/ 1,
8054						      /*field*/ 2,
8055						      /*bit_valid*/ 1,
8056						      /*bit*/ 4);
8057				ctl_done((union ctl_io *)ctsio);
8058				return (1);
8059			}
8060
8061			if (type>8 || type==2 || type==4 || type==0) {
8062				mtx_unlock(&lun->lun_lock);
8063				ctl_set_invalid_field(/*ctsio*/ ctsio,
8064						      /*sks_valid*/ 1,
8065						      /*command*/ 1,
8066						      /*field*/ 2,
8067						      /*bit_valid*/ 1,
8068						      /*bit*/ 0);
8069				ctl_done((union ctl_io *)ctsio);
8070				return (1);
8071			}
8072
8073			/*
8074			 * Do the following:
8075			 * if sa_res_key != res_key remove all
8076			 * registrants w/sa_res_key and generate UA
8077			 * for these registrants(Registrations
8078			 * Preempted) if it wasn't an exclusive
8079			 * reservation generate UA(Reservations
8080			 * Preempted) for all other registered nexuses
8081			 * if the type has changed. Establish the new
8082			 * reservation and holder. If res_key and
8083			 * sa_res_key are the same do the above
8084			 * except don't unregister the res holder.
8085			 */
8086
8087			for(i = 0; i < CTL_MAX_INITIATORS; i++) {
8088				if (i == residx || ctl_get_prkey(lun, i) == 0)
8089					continue;
8090
8091				if (sa_res_key == ctl_get_prkey(lun, i)) {
8092					ctl_clr_prkey(lun, i);
8093					lun->pr_key_count--;
8094					ctl_est_ua(lun, i, CTL_UA_REG_PREEMPT);
8095				} else if (type != lun->pr_res_type &&
8096				    (lun->pr_res_type == SPR_TYPE_WR_EX_RO ||
8097				     lun->pr_res_type == SPR_TYPE_EX_AC_RO)) {
8098					ctl_est_ua(lun, i, CTL_UA_RES_RELEASE);
8099				}
8100			}
8101			lun->pr_res_type = type;
8102			if (lun->pr_res_type != SPR_TYPE_WR_EX_AR &&
8103			    lun->pr_res_type != SPR_TYPE_EX_AC_AR)
8104				lun->pr_res_idx = residx;
8105			else
8106				lun->pr_res_idx = CTL_PR_ALL_REGISTRANTS;
8107			lun->pr_generation++;
8108			mtx_unlock(&lun->lun_lock);
8109
8110			persis_io.hdr.nexus = ctsio->io_hdr.nexus;
8111			persis_io.hdr.msg_type = CTL_MSG_PERS_ACTION;
8112			persis_io.pr.pr_info.action = CTL_PR_PREEMPT;
8113			persis_io.pr.pr_info.residx = lun->pr_res_idx;
8114			persis_io.pr.pr_info.res_type = type;
8115			memcpy(persis_io.pr.pr_info.sa_res_key,
8116			       param->serv_act_res_key,
8117			       sizeof(param->serv_act_res_key));
8118			ctl_ha_msg_send(CTL_HA_CHAN_CTL, &persis_io,
8119			    sizeof(persis_io.pr), M_WAITOK);
8120		} else {
8121			/*
8122			 * sa_res_key is not the res holder just
8123			 * remove registrants
8124			 */
8125			int found=0;
8126
8127			for (i = 0; i < CTL_MAX_INITIATORS; i++) {
8128				if (sa_res_key != ctl_get_prkey(lun, i))
8129					continue;
8130
8131				found = 1;
8132				ctl_clr_prkey(lun, i);
8133				lun->pr_key_count--;
8134				ctl_est_ua(lun, i, CTL_UA_REG_PREEMPT);
8135			}
8136
8137			if (!found) {
8138				mtx_unlock(&lun->lun_lock);
8139				free(ctsio->kern_data_ptr, M_CTL);
8140				ctl_set_reservation_conflict(ctsio);
8141				ctl_done((union ctl_io *)ctsio);
8142		        	return (1);
8143			}
8144			lun->pr_generation++;
8145			mtx_unlock(&lun->lun_lock);
8146
8147			persis_io.hdr.nexus = ctsio->io_hdr.nexus;
8148			persis_io.hdr.msg_type = CTL_MSG_PERS_ACTION;
8149			persis_io.pr.pr_info.action = CTL_PR_PREEMPT;
8150			persis_io.pr.pr_info.residx = lun->pr_res_idx;
8151			persis_io.pr.pr_info.res_type = type;
8152			memcpy(persis_io.pr.pr_info.sa_res_key,
8153			       param->serv_act_res_key,
8154			       sizeof(param->serv_act_res_key));
8155			ctl_ha_msg_send(CTL_HA_CHAN_CTL, &persis_io,
8156			    sizeof(persis_io.pr), M_WAITOK);
8157		}
8158	}
8159	return (0);
8160}
8161
8162static void
8163ctl_pro_preempt_other(struct ctl_lun *lun, union ctl_ha_msg *msg)
8164{
8165	uint64_t sa_res_key;
8166	int i;
8167
8168	sa_res_key = scsi_8btou64(msg->pr.pr_info.sa_res_key);
8169
8170	if (lun->pr_res_idx == CTL_PR_ALL_REGISTRANTS
8171	 || lun->pr_res_idx == CTL_PR_NO_RESERVATION
8172	 || sa_res_key != ctl_get_prkey(lun, lun->pr_res_idx)) {
8173		if (sa_res_key == 0) {
8174			/*
8175			 * Unregister everybody else and build UA for
8176			 * them
8177			 */
8178			for(i = 0; i < CTL_MAX_INITIATORS; i++) {
8179				if (i == msg->pr.pr_info.residx ||
8180				    ctl_get_prkey(lun, i) == 0)
8181					continue;
8182
8183				ctl_clr_prkey(lun, i);
8184				ctl_est_ua(lun, i, CTL_UA_REG_PREEMPT);
8185			}
8186
8187			lun->pr_key_count = 1;
8188			lun->pr_res_type = msg->pr.pr_info.res_type;
8189			if (lun->pr_res_type != SPR_TYPE_WR_EX_AR &&
8190			    lun->pr_res_type != SPR_TYPE_EX_AC_AR)
8191				lun->pr_res_idx = msg->pr.pr_info.residx;
8192		} else {
8193		        for (i = 0; i < CTL_MAX_INITIATORS; i++) {
8194				if (sa_res_key == ctl_get_prkey(lun, i))
8195					continue;
8196
8197				ctl_clr_prkey(lun, i);
8198				lun->pr_key_count--;
8199				ctl_est_ua(lun, i, CTL_UA_REG_PREEMPT);
8200			}
8201		}
8202	} else {
8203		for (i = 0; i < CTL_MAX_INITIATORS; i++) {
8204			if (i == msg->pr.pr_info.residx ||
8205			    ctl_get_prkey(lun, i) == 0)
8206				continue;
8207
8208			if (sa_res_key == ctl_get_prkey(lun, i)) {
8209				ctl_clr_prkey(lun, i);
8210				lun->pr_key_count--;
8211				ctl_est_ua(lun, i, CTL_UA_REG_PREEMPT);
8212			} else if (msg->pr.pr_info.res_type != lun->pr_res_type
8213			    && (lun->pr_res_type == SPR_TYPE_WR_EX_RO ||
8214			     lun->pr_res_type == SPR_TYPE_EX_AC_RO)) {
8215				ctl_est_ua(lun, i, CTL_UA_RES_RELEASE);
8216			}
8217		}
8218		lun->pr_res_type = msg->pr.pr_info.res_type;
8219		if (lun->pr_res_type != SPR_TYPE_WR_EX_AR &&
8220		    lun->pr_res_type != SPR_TYPE_EX_AC_AR)
8221			lun->pr_res_idx = msg->pr.pr_info.residx;
8222		else
8223			lun->pr_res_idx = CTL_PR_ALL_REGISTRANTS;
8224	}
8225	lun->pr_generation++;
8226
8227}
8228
8229int
8230ctl_persistent_reserve_out(struct ctl_scsiio *ctsio)
8231{
8232	struct ctl_softc *softc = CTL_SOFTC(ctsio);
8233	struct ctl_lun *lun = CTL_LUN(ctsio);
8234	int retval;
8235	uint32_t param_len;
8236	struct scsi_per_res_out *cdb;
8237	struct scsi_per_res_out_parms* param;
8238	uint32_t residx;
8239	uint64_t res_key, sa_res_key, key;
8240	uint8_t type;
8241	union ctl_ha_msg persis_io;
8242	int    i;
8243
8244	CTL_DEBUG_PRINT(("ctl_persistent_reserve_out\n"));
8245
8246	cdb = (struct scsi_per_res_out *)ctsio->cdb;
8247	retval = CTL_RETVAL_COMPLETE;
8248
8249	/*
8250	 * We only support whole-LUN scope.  The scope & type are ignored for
8251	 * register, register and ignore existing key and clear.
8252	 * We sometimes ignore scope and type on preempts too!!
8253	 * Verify reservation type here as well.
8254	 */
8255	type = cdb->scope_type & SPR_TYPE_MASK;
8256	if ((cdb->action == SPRO_RESERVE)
8257	 || (cdb->action == SPRO_RELEASE)) {
8258		if ((cdb->scope_type & SPR_SCOPE_MASK) != SPR_LU_SCOPE) {
8259			ctl_set_invalid_field(/*ctsio*/ ctsio,
8260					      /*sks_valid*/ 1,
8261					      /*command*/ 1,
8262					      /*field*/ 2,
8263					      /*bit_valid*/ 1,
8264					      /*bit*/ 4);
8265			ctl_done((union ctl_io *)ctsio);
8266			return (CTL_RETVAL_COMPLETE);
8267		}
8268
8269		if (type>8 || type==2 || type==4 || type==0) {
8270			ctl_set_invalid_field(/*ctsio*/ ctsio,
8271					      /*sks_valid*/ 1,
8272					      /*command*/ 1,
8273					      /*field*/ 2,
8274					      /*bit_valid*/ 1,
8275					      /*bit*/ 0);
8276			ctl_done((union ctl_io *)ctsio);
8277			return (CTL_RETVAL_COMPLETE);
8278		}
8279	}
8280
8281	param_len = scsi_4btoul(cdb->length);
8282
8283	if ((ctsio->io_hdr.flags & CTL_FLAG_ALLOCATED) == 0) {
8284		ctsio->kern_data_ptr = malloc(param_len, M_CTL, M_WAITOK);
8285		ctsio->kern_data_len = param_len;
8286		ctsio->kern_total_len = param_len;
8287		ctsio->kern_rel_offset = 0;
8288		ctsio->kern_sg_entries = 0;
8289		ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
8290		ctsio->be_move_done = ctl_config_move_done;
8291		ctl_datamove((union ctl_io *)ctsio);
8292
8293		return (CTL_RETVAL_COMPLETE);
8294	}
8295
8296	param = (struct scsi_per_res_out_parms *)ctsio->kern_data_ptr;
8297
8298	residx = ctl_get_initindex(&ctsio->io_hdr.nexus);
8299	res_key = scsi_8btou64(param->res_key.key);
8300	sa_res_key = scsi_8btou64(param->serv_act_res_key);
8301
8302	/*
8303	 * Validate the reservation key here except for SPRO_REG_IGNO
8304	 * This must be done for all other service actions
8305	 */
8306	if ((cdb->action & SPRO_ACTION_MASK) != SPRO_REG_IGNO) {
8307		mtx_lock(&lun->lun_lock);
8308		if ((key = ctl_get_prkey(lun, residx)) != 0) {
8309			if (res_key != key) {
8310				/*
8311				 * The current key passed in doesn't match
8312				 * the one the initiator previously
8313				 * registered.
8314				 */
8315				mtx_unlock(&lun->lun_lock);
8316				free(ctsio->kern_data_ptr, M_CTL);
8317				ctl_set_reservation_conflict(ctsio);
8318				ctl_done((union ctl_io *)ctsio);
8319				return (CTL_RETVAL_COMPLETE);
8320			}
8321		} else if ((cdb->action & SPRO_ACTION_MASK) != SPRO_REGISTER) {
8322			/*
8323			 * We are not registered
8324			 */
8325			mtx_unlock(&lun->lun_lock);
8326			free(ctsio->kern_data_ptr, M_CTL);
8327			ctl_set_reservation_conflict(ctsio);
8328			ctl_done((union ctl_io *)ctsio);
8329			return (CTL_RETVAL_COMPLETE);
8330		} else if (res_key != 0) {
8331			/*
8332			 * We are not registered and trying to register but
8333			 * the register key isn't zero.
8334			 */
8335			mtx_unlock(&lun->lun_lock);
8336			free(ctsio->kern_data_ptr, M_CTL);
8337			ctl_set_reservation_conflict(ctsio);
8338			ctl_done((union ctl_io *)ctsio);
8339			return (CTL_RETVAL_COMPLETE);
8340		}
8341		mtx_unlock(&lun->lun_lock);
8342	}
8343
8344	switch (cdb->action & SPRO_ACTION_MASK) {
8345	case SPRO_REGISTER:
8346	case SPRO_REG_IGNO: {
8347		/*
8348		 * We don't support any of these options, as we report in
8349		 * the read capabilities request (see
8350		 * ctl_persistent_reserve_in(), above).
8351		 */
8352		if ((param->flags & SPR_SPEC_I_PT)
8353		 || (param->flags & SPR_ALL_TG_PT)
8354		 || (param->flags & SPR_APTPL)) {
8355			int bit_ptr;
8356
8357			if (param->flags & SPR_APTPL)
8358				bit_ptr = 0;
8359			else if (param->flags & SPR_ALL_TG_PT)
8360				bit_ptr = 2;
8361			else /* SPR_SPEC_I_PT */
8362				bit_ptr = 3;
8363
8364			free(ctsio->kern_data_ptr, M_CTL);
8365			ctl_set_invalid_field(ctsio,
8366					      /*sks_valid*/ 1,
8367					      /*command*/ 0,
8368					      /*field*/ 20,
8369					      /*bit_valid*/ 1,
8370					      /*bit*/ bit_ptr);
8371			ctl_done((union ctl_io *)ctsio);
8372			return (CTL_RETVAL_COMPLETE);
8373		}
8374
8375		mtx_lock(&lun->lun_lock);
8376
8377		/*
8378		 * The initiator wants to clear the
8379		 * key/unregister.
8380		 */
8381		if (sa_res_key == 0) {
8382			if ((res_key == 0
8383			  && (cdb->action & SPRO_ACTION_MASK) == SPRO_REGISTER)
8384			 || ((cdb->action & SPRO_ACTION_MASK) == SPRO_REG_IGNO
8385			  && ctl_get_prkey(lun, residx) == 0)) {
8386				mtx_unlock(&lun->lun_lock);
8387				goto done;
8388			}
8389
8390			ctl_clr_prkey(lun, residx);
8391			lun->pr_key_count--;
8392
8393			if (residx == lun->pr_res_idx) {
8394				lun->flags &= ~CTL_LUN_PR_RESERVED;
8395				lun->pr_res_idx = CTL_PR_NO_RESERVATION;
8396
8397				if ((lun->pr_res_type == SPR_TYPE_WR_EX_RO ||
8398				     lun->pr_res_type == SPR_TYPE_EX_AC_RO) &&
8399				    lun->pr_key_count) {
8400					/*
8401					 * If the reservation is a registrants
8402					 * only type we need to generate a UA
8403					 * for other registered inits.  The
8404					 * sense code should be RESERVATIONS
8405					 * RELEASED
8406					 */
8407
8408					for (i = softc->init_min; i < softc->init_max; i++){
8409						if (ctl_get_prkey(lun, i) == 0)
8410							continue;
8411						ctl_est_ua(lun, i,
8412						    CTL_UA_RES_RELEASE);
8413					}
8414				}
8415				lun->pr_res_type = 0;
8416			} else if (lun->pr_res_idx == CTL_PR_ALL_REGISTRANTS) {
8417				if (lun->pr_key_count==0) {
8418					lun->flags &= ~CTL_LUN_PR_RESERVED;
8419					lun->pr_res_type = 0;
8420					lun->pr_res_idx = CTL_PR_NO_RESERVATION;
8421				}
8422			}
8423			lun->pr_generation++;
8424			mtx_unlock(&lun->lun_lock);
8425
8426			persis_io.hdr.nexus = ctsio->io_hdr.nexus;
8427			persis_io.hdr.msg_type = CTL_MSG_PERS_ACTION;
8428			persis_io.pr.pr_info.action = CTL_PR_UNREG_KEY;
8429			persis_io.pr.pr_info.residx = residx;
8430			ctl_ha_msg_send(CTL_HA_CHAN_CTL, &persis_io,
8431			    sizeof(persis_io.pr), M_WAITOK);
8432		} else /* sa_res_key != 0 */ {
8433			/*
8434			 * If we aren't registered currently then increment
8435			 * the key count and set the registered flag.
8436			 */
8437			ctl_alloc_prkey(lun, residx);
8438			if (ctl_get_prkey(lun, residx) == 0)
8439				lun->pr_key_count++;
8440			ctl_set_prkey(lun, residx, sa_res_key);
8441			lun->pr_generation++;
8442			mtx_unlock(&lun->lun_lock);
8443
8444			persis_io.hdr.nexus = ctsio->io_hdr.nexus;
8445			persis_io.hdr.msg_type = CTL_MSG_PERS_ACTION;
8446			persis_io.pr.pr_info.action = CTL_PR_REG_KEY;
8447			persis_io.pr.pr_info.residx = residx;
8448			memcpy(persis_io.pr.pr_info.sa_res_key,
8449			       param->serv_act_res_key,
8450			       sizeof(param->serv_act_res_key));
8451			ctl_ha_msg_send(CTL_HA_CHAN_CTL, &persis_io,
8452			    sizeof(persis_io.pr), M_WAITOK);
8453		}
8454
8455		break;
8456	}
8457	case SPRO_RESERVE:
8458		mtx_lock(&lun->lun_lock);
8459		if (lun->flags & CTL_LUN_PR_RESERVED) {
8460			/*
8461			 * if this isn't the reservation holder and it's
8462			 * not a "all registrants" type or if the type is
8463			 * different then we have a conflict
8464			 */
8465			if ((lun->pr_res_idx != residx
8466			  && lun->pr_res_idx != CTL_PR_ALL_REGISTRANTS)
8467			 || lun->pr_res_type != type) {
8468				mtx_unlock(&lun->lun_lock);
8469				free(ctsio->kern_data_ptr, M_CTL);
8470				ctl_set_reservation_conflict(ctsio);
8471				ctl_done((union ctl_io *)ctsio);
8472				return (CTL_RETVAL_COMPLETE);
8473			}
8474			mtx_unlock(&lun->lun_lock);
8475		} else /* create a reservation */ {
8476			/*
8477			 * If it's not an "all registrants" type record
8478			 * reservation holder
8479			 */
8480			if (type != SPR_TYPE_WR_EX_AR
8481			 && type != SPR_TYPE_EX_AC_AR)
8482				lun->pr_res_idx = residx; /* Res holder */
8483			else
8484				lun->pr_res_idx = CTL_PR_ALL_REGISTRANTS;
8485
8486			lun->flags |= CTL_LUN_PR_RESERVED;
8487			lun->pr_res_type = type;
8488
8489			mtx_unlock(&lun->lun_lock);
8490
8491			/* send msg to other side */
8492			persis_io.hdr.nexus = ctsio->io_hdr.nexus;
8493			persis_io.hdr.msg_type = CTL_MSG_PERS_ACTION;
8494			persis_io.pr.pr_info.action = CTL_PR_RESERVE;
8495			persis_io.pr.pr_info.residx = lun->pr_res_idx;
8496			persis_io.pr.pr_info.res_type = type;
8497			ctl_ha_msg_send(CTL_HA_CHAN_CTL, &persis_io,
8498			    sizeof(persis_io.pr), M_WAITOK);
8499		}
8500		break;
8501
8502	case SPRO_RELEASE:
8503		mtx_lock(&lun->lun_lock);
8504		if ((lun->flags & CTL_LUN_PR_RESERVED) == 0) {
8505			/* No reservation exists return good status */
8506			mtx_unlock(&lun->lun_lock);
8507			goto done;
8508		}
8509		/*
8510		 * Is this nexus a reservation holder?
8511		 */
8512		if (lun->pr_res_idx != residx
8513		 && lun->pr_res_idx != CTL_PR_ALL_REGISTRANTS) {
8514			/*
8515			 * not a res holder return good status but
8516			 * do nothing
8517			 */
8518			mtx_unlock(&lun->lun_lock);
8519			goto done;
8520		}
8521
8522		if (lun->pr_res_type != type) {
8523			mtx_unlock(&lun->lun_lock);
8524			free(ctsio->kern_data_ptr, M_CTL);
8525			ctl_set_illegal_pr_release(ctsio);
8526			ctl_done((union ctl_io *)ctsio);
8527			return (CTL_RETVAL_COMPLETE);
8528		}
8529
8530		/* okay to release */
8531		lun->flags &= ~CTL_LUN_PR_RESERVED;
8532		lun->pr_res_idx = CTL_PR_NO_RESERVATION;
8533		lun->pr_res_type = 0;
8534
8535		/*
8536		 * If this isn't an exclusive access reservation and NUAR
8537		 * is not set, generate UA for all other registrants.
8538		 */
8539		if (type != SPR_TYPE_EX_AC && type != SPR_TYPE_WR_EX &&
8540		    (lun->MODE_CTRL.queue_flags & SCP_NUAR) == 0) {
8541			for (i = softc->init_min; i < softc->init_max; i++) {
8542				if (i == residx || ctl_get_prkey(lun, i) == 0)
8543					continue;
8544				ctl_est_ua(lun, i, CTL_UA_RES_RELEASE);
8545			}
8546		}
8547		mtx_unlock(&lun->lun_lock);
8548
8549		/* Send msg to other side */
8550		persis_io.hdr.nexus = ctsio->io_hdr.nexus;
8551		persis_io.hdr.msg_type = CTL_MSG_PERS_ACTION;
8552		persis_io.pr.pr_info.action = CTL_PR_RELEASE;
8553		ctl_ha_msg_send(CTL_HA_CHAN_CTL, &persis_io,
8554		     sizeof(persis_io.pr), M_WAITOK);
8555		break;
8556
8557	case SPRO_CLEAR:
8558		/* send msg to other side */
8559
8560		mtx_lock(&lun->lun_lock);
8561		lun->flags &= ~CTL_LUN_PR_RESERVED;
8562		lun->pr_res_type = 0;
8563		lun->pr_key_count = 0;
8564		lun->pr_res_idx = CTL_PR_NO_RESERVATION;
8565
8566		ctl_clr_prkey(lun, residx);
8567		for (i = 0; i < CTL_MAX_INITIATORS; i++)
8568			if (ctl_get_prkey(lun, i) != 0) {
8569				ctl_clr_prkey(lun, i);
8570				ctl_est_ua(lun, i, CTL_UA_REG_PREEMPT);
8571			}
8572		lun->pr_generation++;
8573		mtx_unlock(&lun->lun_lock);
8574
8575		persis_io.hdr.nexus = ctsio->io_hdr.nexus;
8576		persis_io.hdr.msg_type = CTL_MSG_PERS_ACTION;
8577		persis_io.pr.pr_info.action = CTL_PR_CLEAR;
8578		ctl_ha_msg_send(CTL_HA_CHAN_CTL, &persis_io,
8579		     sizeof(persis_io.pr), M_WAITOK);
8580		break;
8581
8582	case SPRO_PREEMPT:
8583	case SPRO_PRE_ABO: {
8584		int nretval;
8585
8586		nretval = ctl_pro_preempt(softc, lun, res_key, sa_res_key, type,
8587					  residx, ctsio, cdb, param);
8588		if (nretval != 0)
8589			return (CTL_RETVAL_COMPLETE);
8590		break;
8591	}
8592	default:
8593		panic("%s: Invalid PR type %#x", __func__, cdb->action);
8594	}
8595
8596done:
8597	free(ctsio->kern_data_ptr, M_CTL);
8598	ctl_set_success(ctsio);
8599	ctl_done((union ctl_io *)ctsio);
8600
8601	return (retval);
8602}
8603
8604/*
8605 * This routine is for handling a message from the other SC pertaining to
8606 * persistent reserve out. All the error checking will have been done
8607 * so only performing the action need be done here to keep the two
8608 * in sync.
8609 */
8610static void
8611ctl_hndl_per_res_out_on_other_sc(union ctl_io *io)
8612{
8613	struct ctl_softc *softc = CTL_SOFTC(io);
8614	union ctl_ha_msg *msg = (union ctl_ha_msg *)&io->presio.pr_msg;
8615	struct ctl_lun *lun;
8616	int i;
8617	uint32_t residx, targ_lun;
8618
8619	targ_lun = msg->hdr.nexus.targ_mapped_lun;
8620	mtx_lock(&softc->ctl_lock);
8621	if (targ_lun >= ctl_max_luns ||
8622	    (lun = softc->ctl_luns[targ_lun]) == NULL) {
8623		mtx_unlock(&softc->ctl_lock);
8624		return;
8625	}
8626	mtx_lock(&lun->lun_lock);
8627	mtx_unlock(&softc->ctl_lock);
8628	if (lun->flags & CTL_LUN_DISABLED) {
8629		mtx_unlock(&lun->lun_lock);
8630		return;
8631	}
8632	residx = ctl_get_initindex(&msg->hdr.nexus);
8633	switch(msg->pr.pr_info.action) {
8634	case CTL_PR_REG_KEY:
8635		ctl_alloc_prkey(lun, msg->pr.pr_info.residx);
8636		if (ctl_get_prkey(lun, msg->pr.pr_info.residx) == 0)
8637			lun->pr_key_count++;
8638		ctl_set_prkey(lun, msg->pr.pr_info.residx,
8639		    scsi_8btou64(msg->pr.pr_info.sa_res_key));
8640		lun->pr_generation++;
8641		break;
8642
8643	case CTL_PR_UNREG_KEY:
8644		ctl_clr_prkey(lun, msg->pr.pr_info.residx);
8645		lun->pr_key_count--;
8646
8647		/* XXX Need to see if the reservation has been released */
8648		/* if so do we need to generate UA? */
8649		if (msg->pr.pr_info.residx == lun->pr_res_idx) {
8650			lun->flags &= ~CTL_LUN_PR_RESERVED;
8651			lun->pr_res_idx = CTL_PR_NO_RESERVATION;
8652
8653			if ((lun->pr_res_type == SPR_TYPE_WR_EX_RO ||
8654			     lun->pr_res_type == SPR_TYPE_EX_AC_RO) &&
8655			    lun->pr_key_count) {
8656				/*
8657				 * If the reservation is a registrants
8658				 * only type we need to generate a UA
8659				 * for other registered inits.  The
8660				 * sense code should be RESERVATIONS
8661				 * RELEASED
8662				 */
8663
8664				for (i = softc->init_min; i < softc->init_max; i++) {
8665					if (ctl_get_prkey(lun, i) == 0)
8666						continue;
8667
8668					ctl_est_ua(lun, i, CTL_UA_RES_RELEASE);
8669				}
8670			}
8671			lun->pr_res_type = 0;
8672		} else if (lun->pr_res_idx == CTL_PR_ALL_REGISTRANTS) {
8673			if (lun->pr_key_count==0) {
8674				lun->flags &= ~CTL_LUN_PR_RESERVED;
8675				lun->pr_res_type = 0;
8676				lun->pr_res_idx = CTL_PR_NO_RESERVATION;
8677			}
8678		}
8679		lun->pr_generation++;
8680		break;
8681
8682	case CTL_PR_RESERVE:
8683		lun->flags |= CTL_LUN_PR_RESERVED;
8684		lun->pr_res_type = msg->pr.pr_info.res_type;
8685		lun->pr_res_idx = msg->pr.pr_info.residx;
8686
8687		break;
8688
8689	case CTL_PR_RELEASE:
8690		/*
8691		 * If this isn't an exclusive access reservation and NUAR
8692		 * is not set, generate UA for all other registrants.
8693		 */
8694		if (lun->pr_res_type != SPR_TYPE_EX_AC &&
8695		    lun->pr_res_type != SPR_TYPE_WR_EX &&
8696		    (lun->MODE_CTRL.queue_flags & SCP_NUAR) == 0) {
8697			for (i = softc->init_min; i < softc->init_max; i++) {
8698				if (i == residx || ctl_get_prkey(lun, i) == 0)
8699					continue;
8700				ctl_est_ua(lun, i, CTL_UA_RES_RELEASE);
8701			}
8702		}
8703
8704		lun->flags &= ~CTL_LUN_PR_RESERVED;
8705		lun->pr_res_idx = CTL_PR_NO_RESERVATION;
8706		lun->pr_res_type = 0;
8707		break;
8708
8709	case CTL_PR_PREEMPT:
8710		ctl_pro_preempt_other(lun, msg);
8711		break;
8712	case CTL_PR_CLEAR:
8713		lun->flags &= ~CTL_LUN_PR_RESERVED;
8714		lun->pr_res_type = 0;
8715		lun->pr_key_count = 0;
8716		lun->pr_res_idx = CTL_PR_NO_RESERVATION;
8717
8718		for (i=0; i < CTL_MAX_INITIATORS; i++) {
8719			if (ctl_get_prkey(lun, i) == 0)
8720				continue;
8721			ctl_clr_prkey(lun, i);
8722			ctl_est_ua(lun, i, CTL_UA_REG_PREEMPT);
8723		}
8724		lun->pr_generation++;
8725		break;
8726	}
8727
8728	mtx_unlock(&lun->lun_lock);
8729}
8730
8731int
8732ctl_read_write(struct ctl_scsiio *ctsio)
8733{
8734	struct ctl_lun *lun = CTL_LUN(ctsio);
8735	struct ctl_lba_len_flags *lbalen;
8736	uint64_t lba;
8737	uint32_t num_blocks;
8738	int flags, retval;
8739	int isread;
8740
8741	CTL_DEBUG_PRINT(("ctl_read_write: command: %#x\n", ctsio->cdb[0]));
8742
8743	flags = 0;
8744	isread = ctsio->cdb[0] == READ_6  || ctsio->cdb[0] == READ_10
8745	      || ctsio->cdb[0] == READ_12 || ctsio->cdb[0] == READ_16;
8746	switch (ctsio->cdb[0]) {
8747	case READ_6:
8748	case WRITE_6: {
8749		struct scsi_rw_6 *cdb;
8750
8751		cdb = (struct scsi_rw_6 *)ctsio->cdb;
8752
8753		lba = scsi_3btoul(cdb->addr);
8754		/* only 5 bits are valid in the most significant address byte */
8755		lba &= 0x1fffff;
8756		num_blocks = cdb->length;
8757		/*
8758		 * This is correct according to SBC-2.
8759		 */
8760		if (num_blocks == 0)
8761			num_blocks = 256;
8762		break;
8763	}
8764	case READ_10:
8765	case WRITE_10: {
8766		struct scsi_rw_10 *cdb;
8767
8768		cdb = (struct scsi_rw_10 *)ctsio->cdb;
8769		if (cdb->byte2 & SRW10_FUA)
8770			flags |= CTL_LLF_FUA;
8771		if (cdb->byte2 & SRW10_DPO)
8772			flags |= CTL_LLF_DPO;
8773		lba = scsi_4btoul(cdb->addr);
8774		num_blocks = scsi_2btoul(cdb->length);
8775		break;
8776	}
8777	case WRITE_VERIFY_10: {
8778		struct scsi_write_verify_10 *cdb;
8779
8780		cdb = (struct scsi_write_verify_10 *)ctsio->cdb;
8781		flags |= CTL_LLF_FUA;
8782		if (cdb->byte2 & SWV_DPO)
8783			flags |= CTL_LLF_DPO;
8784		lba = scsi_4btoul(cdb->addr);
8785		num_blocks = scsi_2btoul(cdb->length);
8786		break;
8787	}
8788	case READ_12:
8789	case WRITE_12: {
8790		struct scsi_rw_12 *cdb;
8791
8792		cdb = (struct scsi_rw_12 *)ctsio->cdb;
8793		if (cdb->byte2 & SRW12_FUA)
8794			flags |= CTL_LLF_FUA;
8795		if (cdb->byte2 & SRW12_DPO)
8796			flags |= CTL_LLF_DPO;
8797		lba = scsi_4btoul(cdb->addr);
8798		num_blocks = scsi_4btoul(cdb->length);
8799		break;
8800	}
8801	case WRITE_VERIFY_12: {
8802		struct scsi_write_verify_12 *cdb;
8803
8804		cdb = (struct scsi_write_verify_12 *)ctsio->cdb;
8805		flags |= CTL_LLF_FUA;
8806		if (cdb->byte2 & SWV_DPO)
8807			flags |= CTL_LLF_DPO;
8808		lba = scsi_4btoul(cdb->addr);
8809		num_blocks = scsi_4btoul(cdb->length);
8810		break;
8811	}
8812	case READ_16:
8813	case WRITE_16: {
8814		struct scsi_rw_16 *cdb;
8815
8816		cdb = (struct scsi_rw_16 *)ctsio->cdb;
8817		if (cdb->byte2 & SRW12_FUA)
8818			flags |= CTL_LLF_FUA;
8819		if (cdb->byte2 & SRW12_DPO)
8820			flags |= CTL_LLF_DPO;
8821		lba = scsi_8btou64(cdb->addr);
8822		num_blocks = scsi_4btoul(cdb->length);
8823		break;
8824	}
8825	case WRITE_ATOMIC_16: {
8826		struct scsi_write_atomic_16 *cdb;
8827
8828		if (lun->be_lun->atomicblock == 0) {
8829			ctl_set_invalid_opcode(ctsio);
8830			ctl_done((union ctl_io *)ctsio);
8831			return (CTL_RETVAL_COMPLETE);
8832		}
8833
8834		cdb = (struct scsi_write_atomic_16 *)ctsio->cdb;
8835		if (cdb->byte2 & SRW12_FUA)
8836			flags |= CTL_LLF_FUA;
8837		if (cdb->byte2 & SRW12_DPO)
8838			flags |= CTL_LLF_DPO;
8839		lba = scsi_8btou64(cdb->addr);
8840		num_blocks = scsi_2btoul(cdb->length);
8841		if (num_blocks > lun->be_lun->atomicblock) {
8842			ctl_set_invalid_field(ctsio, /*sks_valid*/ 1,
8843			    /*command*/ 1, /*field*/ 12, /*bit_valid*/ 0,
8844			    /*bit*/ 0);
8845			ctl_done((union ctl_io *)ctsio);
8846			return (CTL_RETVAL_COMPLETE);
8847		}
8848		break;
8849	}
8850	case WRITE_VERIFY_16: {
8851		struct scsi_write_verify_16 *cdb;
8852
8853		cdb = (struct scsi_write_verify_16 *)ctsio->cdb;
8854		flags |= CTL_LLF_FUA;
8855		if (cdb->byte2 & SWV_DPO)
8856			flags |= CTL_LLF_DPO;
8857		lba = scsi_8btou64(cdb->addr);
8858		num_blocks = scsi_4btoul(cdb->length);
8859		break;
8860	}
8861	default:
8862		/*
8863		 * We got a command we don't support.  This shouldn't
8864		 * happen, commands should be filtered out above us.
8865		 */
8866		ctl_set_invalid_opcode(ctsio);
8867		ctl_done((union ctl_io *)ctsio);
8868
8869		return (CTL_RETVAL_COMPLETE);
8870		break; /* NOTREACHED */
8871	}
8872
8873	/*
8874	 * The first check is to make sure we're in bounds, the second
8875	 * check is to catch wrap-around problems.  If the lba + num blocks
8876	 * is less than the lba, then we've wrapped around and the block
8877	 * range is invalid anyway.
8878	 */
8879	if (((lba + num_blocks) > (lun->be_lun->maxlba + 1))
8880	 || ((lba + num_blocks) < lba)) {
8881		ctl_set_lba_out_of_range(ctsio,
8882		    MAX(lba, lun->be_lun->maxlba + 1));
8883		ctl_done((union ctl_io *)ctsio);
8884		return (CTL_RETVAL_COMPLETE);
8885	}
8886
8887	/*
8888	 * According to SBC-3, a transfer length of 0 is not an error.
8889	 * Note that this cannot happen with WRITE(6) or READ(6), since 0
8890	 * translates to 256 blocks for those commands.
8891	 */
8892	if (num_blocks == 0) {
8893		ctl_set_success(ctsio);
8894		ctl_done((union ctl_io *)ctsio);
8895		return (CTL_RETVAL_COMPLETE);
8896	}
8897
8898	/* Set FUA and/or DPO if caches are disabled. */
8899	if (isread) {
8900		if ((lun->MODE_CACHING.flags1 & SCP_RCD) != 0)
8901			flags |= CTL_LLF_FUA | CTL_LLF_DPO;
8902	} else {
8903		if ((lun->MODE_CACHING.flags1 & SCP_WCE) == 0)
8904			flags |= CTL_LLF_FUA;
8905	}
8906
8907	lbalen = (struct ctl_lba_len_flags *)
8908	    &ctsio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
8909	lbalen->lba = lba;
8910	lbalen->len = num_blocks;
8911	lbalen->flags = (isread ? CTL_LLF_READ : CTL_LLF_WRITE) | flags;
8912
8913	ctsio->kern_total_len = num_blocks * lun->be_lun->blocksize;
8914	ctsio->kern_rel_offset = 0;
8915
8916	CTL_DEBUG_PRINT(("ctl_read_write: calling data_submit()\n"));
8917
8918	retval = lun->backend->data_submit((union ctl_io *)ctsio);
8919	return (retval);
8920}
8921
8922static int
8923ctl_cnw_cont(union ctl_io *io)
8924{
8925	struct ctl_lun *lun = CTL_LUN(io);
8926	struct ctl_scsiio *ctsio;
8927	struct ctl_lba_len_flags *lbalen;
8928	int retval;
8929
8930	CTL_IO_ASSERT(io, SCSI);
8931
8932	ctsio = &io->scsiio;
8933	ctsio->io_hdr.status = CTL_STATUS_NONE;
8934	ctsio->io_hdr.flags &= ~CTL_FLAG_IO_CONT;
8935	lbalen = (struct ctl_lba_len_flags *)
8936	    &ctsio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
8937	lbalen->flags &= ~CTL_LLF_COMPARE;
8938	lbalen->flags |= CTL_LLF_WRITE;
8939
8940	CTL_DEBUG_PRINT(("ctl_cnw_cont: calling data_submit()\n"));
8941	retval = lun->backend->data_submit((union ctl_io *)ctsio);
8942	return (retval);
8943}
8944
8945int
8946ctl_cnw(struct ctl_scsiio *ctsio)
8947{
8948	struct ctl_lun *lun = CTL_LUN(ctsio);
8949	struct ctl_lba_len_flags *lbalen;
8950	uint64_t lba;
8951	uint32_t num_blocks;
8952	int flags, retval;
8953
8954	CTL_DEBUG_PRINT(("ctl_cnw: command: %#x\n", ctsio->cdb[0]));
8955
8956	flags = 0;
8957	switch (ctsio->cdb[0]) {
8958	case COMPARE_AND_WRITE: {
8959		struct scsi_compare_and_write *cdb;
8960
8961		cdb = (struct scsi_compare_and_write *)ctsio->cdb;
8962		if (cdb->byte2 & SRW10_FUA)
8963			flags |= CTL_LLF_FUA;
8964		if (cdb->byte2 & SRW10_DPO)
8965			flags |= CTL_LLF_DPO;
8966		lba = scsi_8btou64(cdb->addr);
8967		num_blocks = cdb->length;
8968		break;
8969	}
8970	default:
8971		/*
8972		 * We got a command we don't support.  This shouldn't
8973		 * happen, commands should be filtered out above us.
8974		 */
8975		ctl_set_invalid_opcode(ctsio);
8976		ctl_done((union ctl_io *)ctsio);
8977
8978		return (CTL_RETVAL_COMPLETE);
8979		break; /* NOTREACHED */
8980	}
8981
8982	/*
8983	 * The first check is to make sure we're in bounds, the second
8984	 * check is to catch wrap-around problems.  If the lba + num blocks
8985	 * is less than the lba, then we've wrapped around and the block
8986	 * range is invalid anyway.
8987	 */
8988	if (((lba + num_blocks) > (lun->be_lun->maxlba + 1))
8989	 || ((lba + num_blocks) < lba)) {
8990		ctl_set_lba_out_of_range(ctsio,
8991		    MAX(lba, lun->be_lun->maxlba + 1));
8992		ctl_done((union ctl_io *)ctsio);
8993		return (CTL_RETVAL_COMPLETE);
8994	}
8995
8996	/*
8997	 * According to SBC-3, a transfer length of 0 is not an error.
8998	 */
8999	if (num_blocks == 0) {
9000		ctl_set_success(ctsio);
9001		ctl_done((union ctl_io *)ctsio);
9002		return (CTL_RETVAL_COMPLETE);
9003	}
9004
9005	/* Set FUA if write cache is disabled. */
9006	if ((lun->MODE_CACHING.flags1 & SCP_WCE) == 0)
9007		flags |= CTL_LLF_FUA;
9008
9009	ctsio->kern_total_len = 2 * num_blocks * lun->be_lun->blocksize;
9010	ctsio->kern_rel_offset = 0;
9011
9012	/*
9013	 * Set the IO_CONT flag, so that if this I/O gets passed to
9014	 * ctl_data_submit_done(), it'll get passed back to
9015	 * ctl_ctl_cnw_cont() for further processing.
9016	 */
9017	ctsio->io_hdr.flags |= CTL_FLAG_IO_CONT;
9018	ctsio->io_cont = ctl_cnw_cont;
9019
9020	lbalen = (struct ctl_lba_len_flags *)
9021	    &ctsio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
9022	lbalen->lba = lba;
9023	lbalen->len = num_blocks;
9024	lbalen->flags = CTL_LLF_COMPARE | flags;
9025
9026	CTL_DEBUG_PRINT(("ctl_cnw: calling data_submit()\n"));
9027	retval = lun->backend->data_submit((union ctl_io *)ctsio);
9028	return (retval);
9029}
9030
9031int
9032ctl_verify(struct ctl_scsiio *ctsio)
9033{
9034	struct ctl_lun *lun = CTL_LUN(ctsio);
9035	struct ctl_lba_len_flags *lbalen;
9036	uint64_t lba;
9037	uint32_t num_blocks;
9038	int bytchk, flags;
9039	int retval;
9040
9041	CTL_DEBUG_PRINT(("ctl_verify: command: %#x\n", ctsio->cdb[0]));
9042
9043	bytchk = 0;
9044	flags = CTL_LLF_FUA;
9045	switch (ctsio->cdb[0]) {
9046	case VERIFY_10: {
9047		struct scsi_verify_10 *cdb;
9048
9049		cdb = (struct scsi_verify_10 *)ctsio->cdb;
9050		if (cdb->byte2 & SVFY_BYTCHK)
9051			bytchk = 1;
9052		if (cdb->byte2 & SVFY_DPO)
9053			flags |= CTL_LLF_DPO;
9054		lba = scsi_4btoul(cdb->addr);
9055		num_blocks = scsi_2btoul(cdb->length);
9056		break;
9057	}
9058	case VERIFY_12: {
9059		struct scsi_verify_12 *cdb;
9060
9061		cdb = (struct scsi_verify_12 *)ctsio->cdb;
9062		if (cdb->byte2 & SVFY_BYTCHK)
9063			bytchk = 1;
9064		if (cdb->byte2 & SVFY_DPO)
9065			flags |= CTL_LLF_DPO;
9066		lba = scsi_4btoul(cdb->addr);
9067		num_blocks = scsi_4btoul(cdb->length);
9068		break;
9069	}
9070	case VERIFY_16: {
9071		struct scsi_rw_16 *cdb;
9072
9073		cdb = (struct scsi_rw_16 *)ctsio->cdb;
9074		if (cdb->byte2 & SVFY_BYTCHK)
9075			bytchk = 1;
9076		if (cdb->byte2 & SVFY_DPO)
9077			flags |= CTL_LLF_DPO;
9078		lba = scsi_8btou64(cdb->addr);
9079		num_blocks = scsi_4btoul(cdb->length);
9080		break;
9081	}
9082	default:
9083		/*
9084		 * We got a command we don't support.  This shouldn't
9085		 * happen, commands should be filtered out above us.
9086		 */
9087		ctl_set_invalid_opcode(ctsio);
9088		ctl_done((union ctl_io *)ctsio);
9089		return (CTL_RETVAL_COMPLETE);
9090	}
9091
9092	/*
9093	 * The first check is to make sure we're in bounds, the second
9094	 * check is to catch wrap-around problems.  If the lba + num blocks
9095	 * is less than the lba, then we've wrapped around and the block
9096	 * range is invalid anyway.
9097	 */
9098	if (((lba + num_blocks) > (lun->be_lun->maxlba + 1))
9099	 || ((lba + num_blocks) < lba)) {
9100		ctl_set_lba_out_of_range(ctsio,
9101		    MAX(lba, lun->be_lun->maxlba + 1));
9102		ctl_done((union ctl_io *)ctsio);
9103		return (CTL_RETVAL_COMPLETE);
9104	}
9105
9106	/*
9107	 * According to SBC-3, a transfer length of 0 is not an error.
9108	 */
9109	if (num_blocks == 0) {
9110		ctl_set_success(ctsio);
9111		ctl_done((union ctl_io *)ctsio);
9112		return (CTL_RETVAL_COMPLETE);
9113	}
9114
9115	lbalen = (struct ctl_lba_len_flags *)
9116	    &ctsio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
9117	lbalen->lba = lba;
9118	lbalen->len = num_blocks;
9119	if (bytchk) {
9120		lbalen->flags = CTL_LLF_COMPARE | flags;
9121		ctsio->kern_total_len = num_blocks * lun->be_lun->blocksize;
9122	} else {
9123		lbalen->flags = CTL_LLF_VERIFY | flags;
9124		ctsio->kern_total_len = 0;
9125	}
9126	ctsio->kern_rel_offset = 0;
9127
9128	CTL_DEBUG_PRINT(("ctl_verify: calling data_submit()\n"));
9129	retval = lun->backend->data_submit((union ctl_io *)ctsio);
9130	return (retval);
9131}
9132
9133int
9134ctl_report_luns(struct ctl_scsiio *ctsio)
9135{
9136	struct ctl_softc *softc = CTL_SOFTC(ctsio);
9137	struct ctl_port *port = CTL_PORT(ctsio);
9138	struct ctl_lun *lun, *request_lun = CTL_LUN(ctsio);
9139	struct scsi_report_luns *cdb;
9140	struct scsi_report_luns_data *lun_data;
9141	int num_filled, num_luns, num_port_luns, retval;
9142	uint32_t alloc_len, lun_datalen;
9143	uint32_t initidx, targ_lun_id, lun_id;
9144
9145	retval = CTL_RETVAL_COMPLETE;
9146	cdb = (struct scsi_report_luns *)ctsio->cdb;
9147
9148	CTL_DEBUG_PRINT(("ctl_report_luns\n"));
9149
9150	num_luns = 0;
9151	num_port_luns = port->lun_map ? port->lun_map_size : ctl_max_luns;
9152	mtx_lock(&softc->ctl_lock);
9153	for (targ_lun_id = 0; targ_lun_id < num_port_luns; targ_lun_id++) {
9154		if (ctl_lun_map_from_port(port, targ_lun_id) != UINT32_MAX)
9155			num_luns++;
9156	}
9157	mtx_unlock(&softc->ctl_lock);
9158
9159	switch (cdb->select_report) {
9160	case RPL_REPORT_DEFAULT:
9161	case RPL_REPORT_ALL:
9162	case RPL_REPORT_NONSUBSID:
9163		break;
9164	case RPL_REPORT_WELLKNOWN:
9165	case RPL_REPORT_ADMIN:
9166	case RPL_REPORT_CONGLOM:
9167		num_luns = 0;
9168		break;
9169	default:
9170		ctl_set_invalid_field(ctsio,
9171				      /*sks_valid*/ 1,
9172				      /*command*/ 1,
9173				      /*field*/ 2,
9174				      /*bit_valid*/ 0,
9175				      /*bit*/ 0);
9176		ctl_done((union ctl_io *)ctsio);
9177		return (retval);
9178		break; /* NOTREACHED */
9179	}
9180
9181	alloc_len = scsi_4btoul(cdb->length);
9182	/*
9183	 * The initiator has to allocate at least 16 bytes for this request,
9184	 * so he can at least get the header and the first LUN.  Otherwise
9185	 * we reject the request (per SPC-3 rev 14, section 6.21).
9186	 */
9187	if (alloc_len < (sizeof(struct scsi_report_luns_data) +
9188	    sizeof(struct scsi_report_luns_lundata))) {
9189		ctl_set_invalid_field(ctsio,
9190				      /*sks_valid*/ 1,
9191				      /*command*/ 1,
9192				      /*field*/ 6,
9193				      /*bit_valid*/ 0,
9194				      /*bit*/ 0);
9195		ctl_done((union ctl_io *)ctsio);
9196		return (retval);
9197	}
9198
9199	lun_datalen = sizeof(*lun_data) +
9200		(num_luns * sizeof(struct scsi_report_luns_lundata));
9201
9202	ctsio->kern_data_ptr = malloc(lun_datalen, M_CTL, M_WAITOK | M_ZERO);
9203	lun_data = (struct scsi_report_luns_data *)ctsio->kern_data_ptr;
9204	ctsio->kern_sg_entries = 0;
9205
9206	initidx = ctl_get_initindex(&ctsio->io_hdr.nexus);
9207
9208	mtx_lock(&softc->ctl_lock);
9209	for (targ_lun_id = 0, num_filled = 0;
9210	    targ_lun_id < num_port_luns && num_filled < num_luns;
9211	    targ_lun_id++) {
9212		lun_id = ctl_lun_map_from_port(port, targ_lun_id);
9213		if (lun_id == UINT32_MAX)
9214			continue;
9215		lun = softc->ctl_luns[lun_id];
9216		if (lun == NULL)
9217			continue;
9218
9219		be64enc(lun_data->luns[num_filled++].lundata,
9220		    ctl_encode_lun(targ_lun_id));
9221
9222		/*
9223		 * According to SPC-3, rev 14 section 6.21:
9224		 *
9225		 * "The execution of a REPORT LUNS command to any valid and
9226		 * installed logical unit shall clear the REPORTED LUNS DATA
9227		 * HAS CHANGED unit attention condition for all logical
9228		 * units of that target with respect to the requesting
9229		 * initiator. A valid and installed logical unit is one
9230		 * having a PERIPHERAL QUALIFIER of 000b in the standard
9231		 * INQUIRY data (see 6.4.2)."
9232		 *
9233		 * If request_lun is NULL, the LUN this report luns command
9234		 * was issued to is either disabled or doesn't exist. In that
9235		 * case, we shouldn't clear any pending lun change unit
9236		 * attention.
9237		 */
9238		if (request_lun != NULL) {
9239			mtx_lock(&lun->lun_lock);
9240			ctl_clr_ua(lun, initidx, CTL_UA_LUN_CHANGE);
9241			mtx_unlock(&lun->lun_lock);
9242		}
9243	}
9244	mtx_unlock(&softc->ctl_lock);
9245
9246	/*
9247	 * It's quite possible that we've returned fewer LUNs than we allocated
9248	 * space for.  Trim it.
9249	 */
9250	lun_datalen = sizeof(*lun_data) +
9251		(num_filled * sizeof(struct scsi_report_luns_lundata));
9252	ctsio->kern_rel_offset = 0;
9253	ctsio->kern_sg_entries = 0;
9254	ctsio->kern_data_len = min(lun_datalen, alloc_len);
9255	ctsio->kern_total_len = ctsio->kern_data_len;
9256
9257	/*
9258	 * We set this to the actual data length, regardless of how much
9259	 * space we actually have to return results.  If the user looks at
9260	 * this value, he'll know whether or not he allocated enough space
9261	 * and reissue the command if necessary.  We don't support well
9262	 * known logical units, so if the user asks for that, return none.
9263	 */
9264	scsi_ulto4b(lun_datalen - 8, lun_data->length);
9265
9266	/*
9267	 * We can only return SCSI_STATUS_CHECK_COND when we can't satisfy
9268	 * this request.
9269	 */
9270	ctl_set_success(ctsio);
9271	ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
9272	ctsio->be_move_done = ctl_config_move_done;
9273	ctl_datamove((union ctl_io *)ctsio);
9274	return (retval);
9275}
9276
9277int
9278ctl_request_sense(struct ctl_scsiio *ctsio)
9279{
9280	struct ctl_softc *softc = CTL_SOFTC(ctsio);
9281	struct ctl_lun *lun = CTL_LUN(ctsio);
9282	struct scsi_request_sense *cdb;
9283	struct scsi_sense_data *sense_ptr, *ps;
9284	uint32_t initidx;
9285	int have_error;
9286	u_int sense_len = SSD_FULL_SIZE;
9287	scsi_sense_data_type sense_format;
9288	ctl_ua_type ua_type;
9289	uint8_t asc = 0, ascq = 0;
9290
9291	cdb = (struct scsi_request_sense *)ctsio->cdb;
9292
9293	CTL_DEBUG_PRINT(("ctl_request_sense\n"));
9294
9295	/*
9296	 * Determine which sense format the user wants.
9297	 */
9298	if (cdb->byte2 & SRS_DESC)
9299		sense_format = SSD_TYPE_DESC;
9300	else
9301		sense_format = SSD_TYPE_FIXED;
9302
9303	ctsio->kern_data_ptr = malloc(sizeof(*sense_ptr), M_CTL, M_WAITOK);
9304	sense_ptr = (struct scsi_sense_data *)ctsio->kern_data_ptr;
9305	ctsio->kern_sg_entries = 0;
9306	ctsio->kern_rel_offset = 0;
9307
9308	/*
9309	 * struct scsi_sense_data, which is currently set to 256 bytes, is
9310	 * larger than the largest allowed value for the length field in the
9311	 * REQUEST SENSE CDB, which is 252 bytes as of SPC-4.
9312	 */
9313	ctsio->kern_data_len = cdb->length;
9314	ctsio->kern_total_len = cdb->length;
9315
9316	/*
9317	 * If we don't have a LUN, we don't have any pending sense.
9318	 */
9319	if (lun == NULL ||
9320	    ((lun->flags & CTL_LUN_PRIMARY_SC) == 0 &&
9321	     softc->ha_link < CTL_HA_LINK_UNKNOWN)) {
9322		/* "Logical unit not supported" */
9323		ctl_set_sense_data(sense_ptr, &sense_len, NULL, sense_format,
9324		    /*current_error*/ 1,
9325		    /*sense_key*/ SSD_KEY_ILLEGAL_REQUEST,
9326		    /*asc*/ 0x25,
9327		    /*ascq*/ 0x00,
9328		    SSD_ELEM_NONE);
9329		goto send;
9330	}
9331
9332	have_error = 0;
9333	initidx = ctl_get_initindex(&ctsio->io_hdr.nexus);
9334	/*
9335	 * Check for pending sense, and then for pending unit attentions.
9336	 * Pending sense gets returned first, then pending unit attentions.
9337	 */
9338	mtx_lock(&lun->lun_lock);
9339	ps = lun->pending_sense[initidx / CTL_MAX_INIT_PER_PORT];
9340	if (ps != NULL)
9341		ps += initidx % CTL_MAX_INIT_PER_PORT;
9342	if (ps != NULL && ps->error_code != 0) {
9343		scsi_sense_data_type stored_format;
9344
9345		/*
9346		 * Check to see which sense format was used for the stored
9347		 * sense data.
9348		 */
9349		stored_format = scsi_sense_type(ps);
9350
9351		/*
9352		 * If the user requested a different sense format than the
9353		 * one we stored, then we need to convert it to the other
9354		 * format.  If we're going from descriptor to fixed format
9355		 * sense data, we may lose things in translation, depending
9356		 * on what options were used.
9357		 *
9358		 * If the stored format is SSD_TYPE_NONE (i.e. invalid),
9359		 * for some reason we'll just copy it out as-is.
9360		 */
9361		if ((stored_format == SSD_TYPE_FIXED)
9362		 && (sense_format == SSD_TYPE_DESC))
9363			ctl_sense_to_desc((struct scsi_sense_data_fixed *)
9364			    ps, (struct scsi_sense_data_desc *)sense_ptr);
9365		else if ((stored_format == SSD_TYPE_DESC)
9366		      && (sense_format == SSD_TYPE_FIXED))
9367			ctl_sense_to_fixed((struct scsi_sense_data_desc *)
9368			    ps, (struct scsi_sense_data_fixed *)sense_ptr);
9369		else
9370			memcpy(sense_ptr, ps, sizeof(*sense_ptr));
9371
9372		ps->error_code = 0;
9373		have_error = 1;
9374	} else {
9375		ua_type = ctl_build_ua(lun, initidx, sense_ptr, &sense_len,
9376		    sense_format);
9377		if (ua_type != CTL_UA_NONE)
9378			have_error = 1;
9379	}
9380	if (have_error == 0) {
9381		/*
9382		 * Report informational exception if have one and allowed.
9383		 */
9384		if (lun->MODE_IE.mrie != SIEP_MRIE_NO) {
9385			asc = lun->ie_asc;
9386			ascq = lun->ie_ascq;
9387		}
9388		ctl_set_sense_data(sense_ptr, &sense_len, lun, sense_format,
9389		    /*current_error*/ 1,
9390		    /*sense_key*/ SSD_KEY_NO_SENSE,
9391		    /*asc*/ asc,
9392		    /*ascq*/ ascq,
9393		    SSD_ELEM_NONE);
9394	}
9395	mtx_unlock(&lun->lun_lock);
9396
9397send:
9398	/*
9399	 * We report the SCSI status as OK, since the status of the command
9400	 * itself is OK.  We're reporting sense as parameter data.
9401	 */
9402	ctl_set_success(ctsio);
9403	ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
9404	ctsio->be_move_done = ctl_config_move_done;
9405	ctl_datamove((union ctl_io *)ctsio);
9406	return (CTL_RETVAL_COMPLETE);
9407}
9408
9409int
9410ctl_tur(struct ctl_scsiio *ctsio)
9411{
9412
9413	CTL_DEBUG_PRINT(("ctl_tur\n"));
9414
9415	ctl_set_success(ctsio);
9416	ctl_done((union ctl_io *)ctsio);
9417
9418	return (CTL_RETVAL_COMPLETE);
9419}
9420
9421/*
9422 * SCSI VPD page 0x00, the Supported VPD Pages page.
9423 */
9424static int
9425ctl_inquiry_evpd_supported(struct ctl_scsiio *ctsio, int alloc_len)
9426{
9427	struct ctl_lun *lun = CTL_LUN(ctsio);
9428	struct scsi_vpd_supported_pages *pages;
9429	int sup_page_size;
9430	int p;
9431
9432	sup_page_size = sizeof(struct scsi_vpd_supported_pages) *
9433	    SCSI_EVPD_NUM_SUPPORTED_PAGES;
9434	ctsio->kern_data_ptr = malloc(sup_page_size, M_CTL, M_WAITOK | M_ZERO);
9435	pages = (struct scsi_vpd_supported_pages *)ctsio->kern_data_ptr;
9436	ctsio->kern_rel_offset = 0;
9437	ctsio->kern_sg_entries = 0;
9438	ctsio->kern_data_len = min(sup_page_size, alloc_len);
9439	ctsio->kern_total_len = ctsio->kern_data_len;
9440
9441	/*
9442	 * The control device is always connected.  The disk device, on the
9443	 * other hand, may not be online all the time.  Need to change this
9444	 * to figure out whether the disk device is actually online or not.
9445	 */
9446	if (lun != NULL)
9447		pages->device = (SID_QUAL_LU_CONNECTED << 5) |
9448				lun->be_lun->lun_type;
9449	else
9450		pages->device = (SID_QUAL_LU_OFFLINE << 5) | T_DIRECT;
9451
9452	p = 0;
9453	/* Supported VPD pages */
9454	pages->page_list[p++] = SVPD_SUPPORTED_PAGES;
9455	/* Serial Number */
9456	pages->page_list[p++] = SVPD_UNIT_SERIAL_NUMBER;
9457	/* Device Identification */
9458	pages->page_list[p++] = SVPD_DEVICE_ID;
9459	/* Extended INQUIRY Data */
9460	pages->page_list[p++] = SVPD_EXTENDED_INQUIRY_DATA;
9461	/* Mode Page Policy */
9462	pages->page_list[p++] = SVPD_MODE_PAGE_POLICY;
9463	/* SCSI Ports */
9464	pages->page_list[p++] = SVPD_SCSI_PORTS;
9465	/* Third-party Copy */
9466	pages->page_list[p++] = SVPD_SCSI_TPC;
9467	/* SCSI Feature Sets */
9468	pages->page_list[p++] = SVPD_SCSI_SFS;
9469	if (lun != NULL && lun->be_lun->lun_type == T_DIRECT) {
9470		/* Block limits */
9471		pages->page_list[p++] = SVPD_BLOCK_LIMITS;
9472		/* Block Device Characteristics */
9473		pages->page_list[p++] = SVPD_BDC;
9474		/* Logical Block Provisioning */
9475		pages->page_list[p++] = SVPD_LBP;
9476	}
9477	pages->length = p;
9478
9479	ctl_set_success(ctsio);
9480	ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
9481	ctsio->be_move_done = ctl_config_move_done;
9482	ctl_datamove((union ctl_io *)ctsio);
9483	return (CTL_RETVAL_COMPLETE);
9484}
9485
9486/*
9487 * SCSI VPD page 0x80, the Unit Serial Number page.
9488 */
9489static int
9490ctl_inquiry_evpd_serial(struct ctl_scsiio *ctsio, int alloc_len)
9491{
9492	struct ctl_lun *lun = CTL_LUN(ctsio);
9493	struct scsi_vpd_unit_serial_number *sn_ptr;
9494	int data_len;
9495
9496	data_len = 4 + CTL_SN_LEN;
9497	ctsio->kern_data_ptr = malloc(data_len, M_CTL, M_WAITOK | M_ZERO);
9498	sn_ptr = (struct scsi_vpd_unit_serial_number *)ctsio->kern_data_ptr;
9499	ctsio->kern_rel_offset = 0;
9500	ctsio->kern_sg_entries = 0;
9501	ctsio->kern_data_len = min(data_len, alloc_len);
9502	ctsio->kern_total_len = ctsio->kern_data_len;
9503
9504	/*
9505	 * The control device is always connected.  The disk device, on the
9506	 * other hand, may not be online all the time.  Need to change this
9507	 * to figure out whether the disk device is actually online or not.
9508	 */
9509	if (lun != NULL)
9510		sn_ptr->device = (SID_QUAL_LU_CONNECTED << 5) |
9511				  lun->be_lun->lun_type;
9512	else
9513		sn_ptr->device = (SID_QUAL_LU_OFFLINE << 5) | T_DIRECT;
9514
9515	sn_ptr->page_code = SVPD_UNIT_SERIAL_NUMBER;
9516	sn_ptr->length = CTL_SN_LEN;
9517	/*
9518	 * If we don't have a LUN, we just leave the serial number as
9519	 * all spaces.
9520	 */
9521	if (lun != NULL) {
9522		strncpy((char *)sn_ptr->serial_num,
9523			(char *)lun->be_lun->serial_num, CTL_SN_LEN);
9524	} else
9525		memset(sn_ptr->serial_num, 0x20, CTL_SN_LEN);
9526
9527	ctl_set_success(ctsio);
9528	ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
9529	ctsio->be_move_done = ctl_config_move_done;
9530	ctl_datamove((union ctl_io *)ctsio);
9531	return (CTL_RETVAL_COMPLETE);
9532}
9533
9534/*
9535 * SCSI VPD page 0x86, the Extended INQUIRY Data page.
9536 */
9537static int
9538ctl_inquiry_evpd_eid(struct ctl_scsiio *ctsio, int alloc_len)
9539{
9540	struct ctl_lun *lun = CTL_LUN(ctsio);
9541	struct scsi_vpd_extended_inquiry_data *eid_ptr;
9542	int data_len;
9543
9544	data_len = sizeof(struct scsi_vpd_extended_inquiry_data);
9545	ctsio->kern_data_ptr = malloc(data_len, M_CTL, M_WAITOK | M_ZERO);
9546	eid_ptr = (struct scsi_vpd_extended_inquiry_data *)ctsio->kern_data_ptr;
9547	ctsio->kern_sg_entries = 0;
9548	ctsio->kern_rel_offset = 0;
9549	ctsio->kern_data_len = min(data_len, alloc_len);
9550	ctsio->kern_total_len = ctsio->kern_data_len;
9551
9552	/*
9553	 * The control device is always connected.  The disk device, on the
9554	 * other hand, may not be online all the time.
9555	 */
9556	if (lun != NULL)
9557		eid_ptr->device = (SID_QUAL_LU_CONNECTED << 5) |
9558				     lun->be_lun->lun_type;
9559	else
9560		eid_ptr->device = (SID_QUAL_LU_OFFLINE << 5) | T_DIRECT;
9561	eid_ptr->page_code = SVPD_EXTENDED_INQUIRY_DATA;
9562	scsi_ulto2b(data_len - 4, eid_ptr->page_length);
9563	/*
9564	 * We support head of queue, ordered and simple tags.
9565	 */
9566	eid_ptr->flags2 = SVPD_EID_HEADSUP | SVPD_EID_ORDSUP | SVPD_EID_SIMPSUP;
9567	/*
9568	 * Volatile cache supported.
9569	 */
9570	eid_ptr->flags3 = SVPD_EID_V_SUP;
9571
9572	/*
9573	 * This means that we clear the REPORTED LUNS DATA HAS CHANGED unit
9574	 * attention for a particular IT nexus on all LUNs once we report
9575	 * it to that nexus once.  This bit is required as of SPC-4.
9576	 */
9577	eid_ptr->flags4 = SVPD_EID_LUICLR;
9578
9579	/*
9580	 * We support revert to defaults (RTD) bit in MODE SELECT.
9581	 */
9582	eid_ptr->flags5 = SVPD_EID_RTD_SUP;
9583
9584	/*
9585	 * XXX KDM in order to correctly answer this, we would need
9586	 * information from the SIM to determine how much sense data it
9587	 * can send.  So this would really be a path inquiry field, most
9588	 * likely.  This can be set to a maximum of 252 according to SPC-4,
9589	 * but the hardware may or may not be able to support that much.
9590	 * 0 just means that the maximum sense data length is not reported.
9591	 */
9592	eid_ptr->max_sense_length = 0;
9593
9594	ctl_set_success(ctsio);
9595	ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
9596	ctsio->be_move_done = ctl_config_move_done;
9597	ctl_datamove((union ctl_io *)ctsio);
9598	return (CTL_RETVAL_COMPLETE);
9599}
9600
9601static int
9602ctl_inquiry_evpd_mpp(struct ctl_scsiio *ctsio, int alloc_len)
9603{
9604	struct ctl_lun *lun = CTL_LUN(ctsio);
9605	struct scsi_vpd_mode_page_policy *mpp_ptr;
9606	int data_len;
9607
9608	data_len = sizeof(struct scsi_vpd_mode_page_policy) +
9609	    sizeof(struct scsi_vpd_mode_page_policy_descr);
9610
9611	ctsio->kern_data_ptr = malloc(data_len, M_CTL, M_WAITOK | M_ZERO);
9612	mpp_ptr = (struct scsi_vpd_mode_page_policy *)ctsio->kern_data_ptr;
9613	ctsio->kern_rel_offset = 0;
9614	ctsio->kern_sg_entries = 0;
9615	ctsio->kern_data_len = min(data_len, alloc_len);
9616	ctsio->kern_total_len = ctsio->kern_data_len;
9617
9618	/*
9619	 * The control device is always connected.  The disk device, on the
9620	 * other hand, may not be online all the time.
9621	 */
9622	if (lun != NULL)
9623		mpp_ptr->device = (SID_QUAL_LU_CONNECTED << 5) |
9624				     lun->be_lun->lun_type;
9625	else
9626		mpp_ptr->device = (SID_QUAL_LU_OFFLINE << 5) | T_DIRECT;
9627	mpp_ptr->page_code = SVPD_MODE_PAGE_POLICY;
9628	scsi_ulto2b(data_len - 4, mpp_ptr->page_length);
9629	mpp_ptr->descr[0].page_code = 0x3f;
9630	mpp_ptr->descr[0].subpage_code = 0xff;
9631	mpp_ptr->descr[0].policy = SVPD_MPP_SHARED;
9632
9633	ctl_set_success(ctsio);
9634	ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
9635	ctsio->be_move_done = ctl_config_move_done;
9636	ctl_datamove((union ctl_io *)ctsio);
9637	return (CTL_RETVAL_COMPLETE);
9638}
9639
9640/*
9641 * SCSI VPD page 0x83, the Device Identification page.
9642 */
9643static int
9644ctl_inquiry_evpd_devid(struct ctl_scsiio *ctsio, int alloc_len)
9645{
9646	struct ctl_softc *softc = CTL_SOFTC(ctsio);
9647	struct ctl_port *port = CTL_PORT(ctsio);
9648	struct ctl_lun *lun = CTL_LUN(ctsio);
9649	struct scsi_vpd_device_id *devid_ptr;
9650	struct scsi_vpd_id_descriptor *desc;
9651	int data_len, g;
9652	uint8_t proto;
9653
9654	data_len = sizeof(struct scsi_vpd_device_id) +
9655	    sizeof(struct scsi_vpd_id_descriptor) +
9656		sizeof(struct scsi_vpd_id_rel_trgt_port_id) +
9657	    sizeof(struct scsi_vpd_id_descriptor) +
9658		sizeof(struct scsi_vpd_id_trgt_port_grp_id);
9659	if (lun && lun->lun_devid)
9660		data_len += lun->lun_devid->len;
9661	if (port && port->port_devid)
9662		data_len += port->port_devid->len;
9663	if (port && port->target_devid)
9664		data_len += port->target_devid->len;
9665
9666	ctsio->kern_data_ptr = malloc(data_len, M_CTL, M_WAITOK | M_ZERO);
9667	devid_ptr = (struct scsi_vpd_device_id *)ctsio->kern_data_ptr;
9668	ctsio->kern_sg_entries = 0;
9669	ctsio->kern_rel_offset = 0;
9670	ctsio->kern_sg_entries = 0;
9671	ctsio->kern_data_len = min(data_len, alloc_len);
9672	ctsio->kern_total_len = ctsio->kern_data_len;
9673
9674	/*
9675	 * The control device is always connected.  The disk device, on the
9676	 * other hand, may not be online all the time.
9677	 */
9678	if (lun != NULL)
9679		devid_ptr->device = (SID_QUAL_LU_CONNECTED << 5) |
9680				     lun->be_lun->lun_type;
9681	else
9682		devid_ptr->device = (SID_QUAL_LU_OFFLINE << 5) | T_DIRECT;
9683	devid_ptr->page_code = SVPD_DEVICE_ID;
9684	scsi_ulto2b(data_len - 4, devid_ptr->length);
9685
9686	if (port && port->port_type == CTL_PORT_FC)
9687		proto = SCSI_PROTO_FC << 4;
9688	else if (port && port->port_type == CTL_PORT_SAS)
9689		proto = SCSI_PROTO_SAS << 4;
9690	else if (port && port->port_type == CTL_PORT_ISCSI)
9691		proto = SCSI_PROTO_ISCSI << 4;
9692	else
9693		proto = SCSI_PROTO_SPI << 4;
9694	desc = (struct scsi_vpd_id_descriptor *)devid_ptr->desc_list;
9695
9696	/*
9697	 * We're using a LUN association here.  i.e., this device ID is a
9698	 * per-LUN identifier.
9699	 */
9700	if (lun && lun->lun_devid) {
9701		memcpy(desc, lun->lun_devid->data, lun->lun_devid->len);
9702		desc = (struct scsi_vpd_id_descriptor *)((uint8_t *)desc +
9703		    lun->lun_devid->len);
9704	}
9705
9706	/*
9707	 * This is for the WWPN which is a port association.
9708	 */
9709	if (port && port->port_devid) {
9710		memcpy(desc, port->port_devid->data, port->port_devid->len);
9711		desc = (struct scsi_vpd_id_descriptor *)((uint8_t *)desc +
9712		    port->port_devid->len);
9713	}
9714
9715	/*
9716	 * This is for the Relative Target Port(type 4h) identifier
9717	 */
9718	desc->proto_codeset = proto | SVPD_ID_CODESET_BINARY;
9719	desc->id_type = SVPD_ID_PIV | SVPD_ID_ASSOC_PORT |
9720	    SVPD_ID_TYPE_RELTARG;
9721	desc->length = 4;
9722	scsi_ulto2b(ctsio->io_hdr.nexus.targ_port, &desc->identifier[2]);
9723	desc = (struct scsi_vpd_id_descriptor *)(&desc->identifier[0] +
9724	    sizeof(struct scsi_vpd_id_rel_trgt_port_id));
9725
9726	/*
9727	 * This is for the Target Port Group(type 5h) identifier
9728	 */
9729	desc->proto_codeset = proto | SVPD_ID_CODESET_BINARY;
9730	desc->id_type = SVPD_ID_PIV | SVPD_ID_ASSOC_PORT |
9731	    SVPD_ID_TYPE_TPORTGRP;
9732	desc->length = 4;
9733	if (softc->is_single ||
9734	    (port && port->status & CTL_PORT_STATUS_HA_SHARED))
9735		g = 1;
9736	else
9737		g = 2 + ctsio->io_hdr.nexus.targ_port / softc->port_cnt;
9738	scsi_ulto2b(g, &desc->identifier[2]);
9739	desc = (struct scsi_vpd_id_descriptor *)(&desc->identifier[0] +
9740	    sizeof(struct scsi_vpd_id_trgt_port_grp_id));
9741
9742	/*
9743	 * This is for the Target identifier
9744	 */
9745	if (port && port->target_devid) {
9746		memcpy(desc, port->target_devid->data, port->target_devid->len);
9747	}
9748
9749	ctl_set_success(ctsio);
9750	ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
9751	ctsio->be_move_done = ctl_config_move_done;
9752	ctl_datamove((union ctl_io *)ctsio);
9753	return (CTL_RETVAL_COMPLETE);
9754}
9755
9756static int
9757ctl_inquiry_evpd_scsi_ports(struct ctl_scsiio *ctsio, int alloc_len)
9758{
9759	struct ctl_softc *softc = CTL_SOFTC(ctsio);
9760	struct ctl_lun *lun = CTL_LUN(ctsio);
9761	struct scsi_vpd_scsi_ports *sp;
9762	struct scsi_vpd_port_designation *pd;
9763	struct scsi_vpd_port_designation_cont *pdc;
9764	struct ctl_port *port;
9765	int data_len, num_target_ports, iid_len, id_len;
9766
9767	num_target_ports = 0;
9768	iid_len = 0;
9769	id_len = 0;
9770	mtx_lock(&softc->ctl_lock);
9771	STAILQ_FOREACH(port, &softc->port_list, links) {
9772		if ((port->status & CTL_PORT_STATUS_ONLINE) == 0)
9773			continue;
9774		if (lun != NULL &&
9775		    ctl_lun_map_to_port(port, lun->lun) == UINT32_MAX)
9776			continue;
9777		num_target_ports++;
9778		if (port->init_devid)
9779			iid_len += port->init_devid->len;
9780		if (port->port_devid)
9781			id_len += port->port_devid->len;
9782	}
9783	mtx_unlock(&softc->ctl_lock);
9784
9785	data_len = sizeof(struct scsi_vpd_scsi_ports) +
9786	    num_target_ports * (sizeof(struct scsi_vpd_port_designation) +
9787	     sizeof(struct scsi_vpd_port_designation_cont)) + iid_len + id_len;
9788	ctsio->kern_data_ptr = malloc(data_len, M_CTL, M_WAITOK | M_ZERO);
9789	sp = (struct scsi_vpd_scsi_ports *)ctsio->kern_data_ptr;
9790	ctsio->kern_sg_entries = 0;
9791	ctsio->kern_rel_offset = 0;
9792	ctsio->kern_sg_entries = 0;
9793	ctsio->kern_data_len = min(data_len, alloc_len);
9794	ctsio->kern_total_len = ctsio->kern_data_len;
9795
9796	/*
9797	 * The control device is always connected.  The disk device, on the
9798	 * other hand, may not be online all the time.  Need to change this
9799	 * to figure out whether the disk device is actually online or not.
9800	 */
9801	if (lun != NULL)
9802		sp->device = (SID_QUAL_LU_CONNECTED << 5) |
9803				  lun->be_lun->lun_type;
9804	else
9805		sp->device = (SID_QUAL_LU_OFFLINE << 5) | T_DIRECT;
9806
9807	sp->page_code = SVPD_SCSI_PORTS;
9808	scsi_ulto2b(data_len - sizeof(struct scsi_vpd_scsi_ports),
9809	    sp->page_length);
9810	pd = &sp->design[0];
9811
9812	mtx_lock(&softc->ctl_lock);
9813	STAILQ_FOREACH(port, &softc->port_list, links) {
9814		if ((port->status & CTL_PORT_STATUS_ONLINE) == 0)
9815			continue;
9816		if (lun != NULL &&
9817		    ctl_lun_map_to_port(port, lun->lun) == UINT32_MAX)
9818			continue;
9819		scsi_ulto2b(port->targ_port, pd->relative_port_id);
9820		if (port->init_devid) {
9821			iid_len = port->init_devid->len;
9822			memcpy(pd->initiator_transportid,
9823			    port->init_devid->data, port->init_devid->len);
9824		} else
9825			iid_len = 0;
9826		scsi_ulto2b(iid_len, pd->initiator_transportid_length);
9827		pdc = (struct scsi_vpd_port_designation_cont *)
9828		    (&pd->initiator_transportid[iid_len]);
9829		if (port->port_devid) {
9830			id_len = port->port_devid->len;
9831			memcpy(pdc->target_port_descriptors,
9832			    port->port_devid->data, port->port_devid->len);
9833		} else
9834			id_len = 0;
9835		scsi_ulto2b(id_len, pdc->target_port_descriptors_length);
9836		pd = (struct scsi_vpd_port_designation *)
9837		    ((uint8_t *)pdc->target_port_descriptors + id_len);
9838	}
9839	mtx_unlock(&softc->ctl_lock);
9840
9841	ctl_set_success(ctsio);
9842	ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
9843	ctsio->be_move_done = ctl_config_move_done;
9844	ctl_datamove((union ctl_io *)ctsio);
9845	return (CTL_RETVAL_COMPLETE);
9846}
9847
9848static int
9849ctl_inquiry_evpd_sfs(struct ctl_scsiio *ctsio, int alloc_len)
9850{
9851	struct ctl_lun *lun = CTL_LUN(ctsio);
9852	struct scsi_vpd_sfs *sfs_ptr;
9853	int sfs_page_size, n;
9854
9855	sfs_page_size = sizeof(*sfs_ptr) + 5 * 2;
9856	ctsio->kern_data_ptr = malloc(sfs_page_size, M_CTL, M_WAITOK | M_ZERO);
9857	sfs_ptr = (struct scsi_vpd_sfs *)ctsio->kern_data_ptr;
9858	ctsio->kern_sg_entries = 0;
9859	ctsio->kern_rel_offset = 0;
9860	ctsio->kern_sg_entries = 0;
9861	ctsio->kern_data_len = min(sfs_page_size, alloc_len);
9862	ctsio->kern_total_len = ctsio->kern_data_len;
9863
9864	/*
9865	 * The control device is always connected.  The disk device, on the
9866	 * other hand, may not be online all the time.  Need to change this
9867	 * to figure out whether the disk device is actually online or not.
9868	 */
9869	if (lun != NULL)
9870		sfs_ptr->device = (SID_QUAL_LU_CONNECTED << 5) |
9871				  lun->be_lun->lun_type;
9872	else
9873		sfs_ptr->device = (SID_QUAL_LU_OFFLINE << 5) | T_DIRECT;
9874
9875	sfs_ptr->page_code = SVPD_SCSI_SFS;
9876	n = 0;
9877	/* Discovery 2016 */
9878	scsi_ulto2b(0x0001, &sfs_ptr->codes[2 * n++]);
9879	if (lun != NULL && lun->be_lun->lun_type == T_DIRECT) {
9880		 /* SBC Base 2016 */
9881		scsi_ulto2b(0x0101, &sfs_ptr->codes[2 * n++]);
9882		 /* SBC Base 2010 */
9883		scsi_ulto2b(0x0102, &sfs_ptr->codes[2 * n++]);
9884		if (lun->be_lun->flags & CTL_LUN_FLAG_UNMAP) {
9885			/* Basic Provisioning 2016 */
9886			scsi_ulto2b(0x0103, &sfs_ptr->codes[2 * n++]);
9887		}
9888		/* Drive Maintenance 2016 */
9889		//scsi_ulto2b(0x0104, &sfs_ptr->codes[2 * n++]);
9890	}
9891	scsi_ulto2b(4 + 2 * n, sfs_ptr->page_length);
9892
9893	ctl_set_success(ctsio);
9894	ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
9895	ctsio->be_move_done = ctl_config_move_done;
9896	ctl_datamove((union ctl_io *)ctsio);
9897	return (CTL_RETVAL_COMPLETE);
9898}
9899
9900static int
9901ctl_inquiry_evpd_block_limits(struct ctl_scsiio *ctsio, int alloc_len)
9902{
9903	struct ctl_lun *lun = CTL_LUN(ctsio);
9904	struct scsi_vpd_block_limits *bl_ptr;
9905	const char *val;
9906	uint64_t ival;
9907
9908	ctsio->kern_data_ptr = malloc(sizeof(*bl_ptr), M_CTL, M_WAITOK | M_ZERO);
9909	bl_ptr = (struct scsi_vpd_block_limits *)ctsio->kern_data_ptr;
9910	ctsio->kern_sg_entries = 0;
9911	ctsio->kern_rel_offset = 0;
9912	ctsio->kern_sg_entries = 0;
9913	ctsio->kern_data_len = min(sizeof(*bl_ptr), alloc_len);
9914	ctsio->kern_total_len = ctsio->kern_data_len;
9915
9916	/*
9917	 * The control device is always connected.  The disk device, on the
9918	 * other hand, may not be online all the time.  Need to change this
9919	 * to figure out whether the disk device is actually online or not.
9920	 */
9921	if (lun != NULL)
9922		bl_ptr->device = (SID_QUAL_LU_CONNECTED << 5) |
9923				  lun->be_lun->lun_type;
9924	else
9925		bl_ptr->device = (SID_QUAL_LU_OFFLINE << 5) | T_DIRECT;
9926
9927	bl_ptr->page_code = SVPD_BLOCK_LIMITS;
9928	scsi_ulto2b(sizeof(*bl_ptr) - 4, bl_ptr->page_length);
9929	bl_ptr->max_cmp_write_len = 0xff;
9930	scsi_ulto4b(0xffffffff, bl_ptr->max_txfer_len);
9931	if (lun != NULL) {
9932		scsi_ulto4b(lun->be_lun->opttxferlen, bl_ptr->opt_txfer_len);
9933		if (lun->be_lun->flags & CTL_LUN_FLAG_UNMAP) {
9934			ival = 0xffffffff;
9935			val = dnvlist_get_string(lun->be_lun->options,
9936			    "unmap_max_lba", NULL);
9937			if (val != NULL)
9938				ctl_expand_number(val, &ival);
9939			scsi_ulto4b(ival, bl_ptr->max_unmap_lba_cnt);
9940			ival = 0xffffffff;
9941			val = dnvlist_get_string(lun->be_lun->options,
9942			    "unmap_max_descr", NULL);
9943			if (val != NULL)
9944				ctl_expand_number(val, &ival);
9945			scsi_ulto4b(ival, bl_ptr->max_unmap_blk_cnt);
9946			if (lun->be_lun->ublockexp != 0) {
9947				scsi_ulto4b((1 << lun->be_lun->ublockexp),
9948				    bl_ptr->opt_unmap_grain);
9949				scsi_ulto4b(0x80000000 | lun->be_lun->ublockoff,
9950				    bl_ptr->unmap_grain_align);
9951			}
9952		}
9953		scsi_ulto4b(lun->be_lun->atomicblock,
9954		    bl_ptr->max_atomic_transfer_length);
9955		scsi_ulto4b(0, bl_ptr->atomic_alignment);
9956		scsi_ulto4b(0, bl_ptr->atomic_transfer_length_granularity);
9957		scsi_ulto4b(0, bl_ptr->max_atomic_transfer_length_with_atomic_boundary);
9958		scsi_ulto4b(0, bl_ptr->max_atomic_boundary_size);
9959		ival = UINT64_MAX;
9960		val = dnvlist_get_string(lun->be_lun->options,
9961		    "write_same_max_lba", NULL);
9962		if (val != NULL)
9963			ctl_expand_number(val, &ival);
9964		scsi_u64to8b(ival, bl_ptr->max_write_same_length);
9965		if (lun->be_lun->maxlba + 1 > ival)
9966			bl_ptr->flags |= SVPD_BL_WSNZ;
9967	}
9968
9969	ctl_set_success(ctsio);
9970	ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
9971	ctsio->be_move_done = ctl_config_move_done;
9972	ctl_datamove((union ctl_io *)ctsio);
9973	return (CTL_RETVAL_COMPLETE);
9974}
9975
9976static int
9977ctl_inquiry_evpd_bdc(struct ctl_scsiio *ctsio, int alloc_len)
9978{
9979	struct ctl_lun *lun = CTL_LUN(ctsio);
9980	struct scsi_vpd_block_device_characteristics *bdc_ptr;
9981	const char *value;
9982	u_int i;
9983
9984	ctsio->kern_data_ptr = malloc(sizeof(*bdc_ptr), M_CTL, M_WAITOK | M_ZERO);
9985	bdc_ptr = (struct scsi_vpd_block_device_characteristics *)ctsio->kern_data_ptr;
9986	ctsio->kern_sg_entries = 0;
9987	ctsio->kern_rel_offset = 0;
9988	ctsio->kern_data_len = min(sizeof(*bdc_ptr), alloc_len);
9989	ctsio->kern_total_len = ctsio->kern_data_len;
9990
9991	/*
9992	 * The control device is always connected.  The disk device, on the
9993	 * other hand, may not be online all the time.  Need to change this
9994	 * to figure out whether the disk device is actually online or not.
9995	 */
9996	if (lun != NULL)
9997		bdc_ptr->device = (SID_QUAL_LU_CONNECTED << 5) |
9998				  lun->be_lun->lun_type;
9999	else
10000		bdc_ptr->device = (SID_QUAL_LU_OFFLINE << 5) | T_DIRECT;
10001	bdc_ptr->page_code = SVPD_BDC;
10002	scsi_ulto2b(sizeof(*bdc_ptr) - 4, bdc_ptr->page_length);
10003	if (lun != NULL &&
10004	    (value = dnvlist_get_string(lun->be_lun->options, "rpm", NULL)) != NULL)
10005		i = strtol(value, NULL, 0);
10006	else
10007		i = CTL_DEFAULT_ROTATION_RATE;
10008	scsi_ulto2b(i, bdc_ptr->medium_rotation_rate);
10009	if (lun != NULL &&
10010	    (value = dnvlist_get_string(lun->be_lun->options, "formfactor", NULL)) != NULL)
10011		i = strtol(value, NULL, 0);
10012	else
10013		i = 0;
10014	bdc_ptr->wab_wac_ff = (i & 0x0f);
10015	bdc_ptr->flags = SVPD_RBWZ | SVPD_FUAB | SVPD_VBULS;
10016
10017	ctl_set_success(ctsio);
10018	ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
10019	ctsio->be_move_done = ctl_config_move_done;
10020	ctl_datamove((union ctl_io *)ctsio);
10021	return (CTL_RETVAL_COMPLETE);
10022}
10023
10024static int
10025ctl_inquiry_evpd_lbp(struct ctl_scsiio *ctsio, int alloc_len)
10026{
10027	struct ctl_lun *lun = CTL_LUN(ctsio);
10028	struct scsi_vpd_logical_block_prov *lbp_ptr;
10029	const char *value;
10030
10031	ctsio->kern_data_ptr = malloc(sizeof(*lbp_ptr), M_CTL, M_WAITOK | M_ZERO);
10032	lbp_ptr = (struct scsi_vpd_logical_block_prov *)ctsio->kern_data_ptr;
10033	ctsio->kern_sg_entries = 0;
10034	ctsio->kern_rel_offset = 0;
10035	ctsio->kern_data_len = min(sizeof(*lbp_ptr), alloc_len);
10036	ctsio->kern_total_len = ctsio->kern_data_len;
10037
10038	/*
10039	 * The control device is always connected.  The disk device, on the
10040	 * other hand, may not be online all the time.  Need to change this
10041	 * to figure out whether the disk device is actually online or not.
10042	 */
10043	if (lun != NULL)
10044		lbp_ptr->device = (SID_QUAL_LU_CONNECTED << 5) |
10045				  lun->be_lun->lun_type;
10046	else
10047		lbp_ptr->device = (SID_QUAL_LU_OFFLINE << 5) | T_DIRECT;
10048
10049	lbp_ptr->page_code = SVPD_LBP;
10050	scsi_ulto2b(sizeof(*lbp_ptr) - 4, lbp_ptr->page_length);
10051	lbp_ptr->threshold_exponent = CTL_LBP_EXPONENT;
10052	if (lun != NULL && lun->be_lun->flags & CTL_LUN_FLAG_UNMAP) {
10053		lbp_ptr->flags = SVPD_LBP_UNMAP | SVPD_LBP_WS16 |
10054		    SVPD_LBP_WS10 | SVPD_LBP_RZ | SVPD_LBP_ANC_SUP;
10055		value = dnvlist_get_string(lun->be_lun->options,
10056		    "provisioning_type", NULL);
10057		if (value != NULL) {
10058			if (strcmp(value, "resource") == 0)
10059				lbp_ptr->prov_type = SVPD_LBP_RESOURCE;
10060			else if (strcmp(value, "thin") == 0)
10061				lbp_ptr->prov_type = SVPD_LBP_THIN;
10062		} else
10063			lbp_ptr->prov_type = SVPD_LBP_THIN;
10064	}
10065
10066	ctl_set_success(ctsio);
10067	ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
10068	ctsio->be_move_done = ctl_config_move_done;
10069	ctl_datamove((union ctl_io *)ctsio);
10070	return (CTL_RETVAL_COMPLETE);
10071}
10072
10073/*
10074 * INQUIRY with the EVPD bit set.
10075 */
10076static int
10077ctl_inquiry_evpd(struct ctl_scsiio *ctsio)
10078{
10079	struct ctl_lun *lun = CTL_LUN(ctsio);
10080	struct scsi_inquiry *cdb;
10081	int alloc_len, retval;
10082
10083	cdb = (struct scsi_inquiry *)ctsio->cdb;
10084	alloc_len = scsi_2btoul(cdb->length);
10085
10086	switch (cdb->page_code) {
10087	case SVPD_SUPPORTED_PAGES:
10088		retval = ctl_inquiry_evpd_supported(ctsio, alloc_len);
10089		break;
10090	case SVPD_UNIT_SERIAL_NUMBER:
10091		retval = ctl_inquiry_evpd_serial(ctsio, alloc_len);
10092		break;
10093	case SVPD_DEVICE_ID:
10094		retval = ctl_inquiry_evpd_devid(ctsio, alloc_len);
10095		break;
10096	case SVPD_EXTENDED_INQUIRY_DATA:
10097		retval = ctl_inquiry_evpd_eid(ctsio, alloc_len);
10098		break;
10099	case SVPD_MODE_PAGE_POLICY:
10100		retval = ctl_inquiry_evpd_mpp(ctsio, alloc_len);
10101		break;
10102	case SVPD_SCSI_PORTS:
10103		retval = ctl_inquiry_evpd_scsi_ports(ctsio, alloc_len);
10104		break;
10105	case SVPD_SCSI_TPC:
10106		retval = ctl_inquiry_evpd_tpc(ctsio, alloc_len);
10107		break;
10108	case SVPD_SCSI_SFS:
10109		retval = ctl_inquiry_evpd_sfs(ctsio, alloc_len);
10110		break;
10111	case SVPD_BLOCK_LIMITS:
10112		if (lun == NULL || lun->be_lun->lun_type != T_DIRECT)
10113			goto err;
10114		retval = ctl_inquiry_evpd_block_limits(ctsio, alloc_len);
10115		break;
10116	case SVPD_BDC:
10117		if (lun == NULL || lun->be_lun->lun_type != T_DIRECT)
10118			goto err;
10119		retval = ctl_inquiry_evpd_bdc(ctsio, alloc_len);
10120		break;
10121	case SVPD_LBP:
10122		if (lun == NULL || lun->be_lun->lun_type != T_DIRECT)
10123			goto err;
10124		retval = ctl_inquiry_evpd_lbp(ctsio, alloc_len);
10125		break;
10126	default:
10127err:
10128		ctl_set_invalid_field(ctsio,
10129				      /*sks_valid*/ 1,
10130				      /*command*/ 1,
10131				      /*field*/ 2,
10132				      /*bit_valid*/ 0,
10133				      /*bit*/ 0);
10134		ctl_done((union ctl_io *)ctsio);
10135		retval = CTL_RETVAL_COMPLETE;
10136		break;
10137	}
10138
10139	return (retval);
10140}
10141
10142/*
10143 * Standard INQUIRY data.
10144 */
10145static int
10146ctl_inquiry_std(struct ctl_scsiio *ctsio)
10147{
10148	struct ctl_softc *softc = CTL_SOFTC(ctsio);
10149	struct ctl_port *port = CTL_PORT(ctsio);
10150	struct ctl_lun *lun = CTL_LUN(ctsio);
10151	struct scsi_inquiry_data *inq_ptr;
10152	struct scsi_inquiry *cdb;
10153	const char *val;
10154	uint32_t alloc_len, data_len;
10155	ctl_port_type port_type;
10156
10157	port_type = port->port_type;
10158	if (port_type == CTL_PORT_IOCTL || port_type == CTL_PORT_INTERNAL)
10159		port_type = CTL_PORT_SCSI;
10160
10161	cdb = (struct scsi_inquiry *)ctsio->cdb;
10162	alloc_len = scsi_2btoul(cdb->length);
10163
10164	/*
10165	 * We malloc the full inquiry data size here and fill it
10166	 * in.  If the user only asks for less, we'll give him
10167	 * that much.
10168	 */
10169	data_len = offsetof(struct scsi_inquiry_data, vendor_specific1);
10170	ctsio->kern_data_ptr = malloc(data_len, M_CTL, M_WAITOK | M_ZERO);
10171	inq_ptr = (struct scsi_inquiry_data *)ctsio->kern_data_ptr;
10172	ctsio->kern_sg_entries = 0;
10173	ctsio->kern_rel_offset = 0;
10174	ctsio->kern_data_len = min(data_len, alloc_len);
10175	ctsio->kern_total_len = ctsio->kern_data_len;
10176
10177	if (lun != NULL) {
10178		if ((lun->flags & CTL_LUN_PRIMARY_SC) ||
10179		    softc->ha_link >= CTL_HA_LINK_UNKNOWN) {
10180			inq_ptr->device = (SID_QUAL_LU_CONNECTED << 5) |
10181			    lun->be_lun->lun_type;
10182		} else {
10183			inq_ptr->device = (SID_QUAL_LU_OFFLINE << 5) |
10184			    lun->be_lun->lun_type;
10185		}
10186		if (lun->flags & CTL_LUN_REMOVABLE)
10187			inq_ptr->dev_qual2 |= SID_RMB;
10188	} else
10189		inq_ptr->device = (SID_QUAL_BAD_LU << 5) | T_NODEVICE;
10190
10191	/* RMB in byte 2 is 0 */
10192	inq_ptr->version = SCSI_REV_SPC5;
10193
10194	/*
10195	 * According to SAM-3, even if a device only supports a single
10196	 * level of LUN addressing, it should still set the HISUP bit:
10197	 *
10198	 * 4.9.1 Logical unit numbers overview
10199	 *
10200	 * All logical unit number formats described in this standard are
10201	 * hierarchical in structure even when only a single level in that
10202	 * hierarchy is used. The HISUP bit shall be set to one in the
10203	 * standard INQUIRY data (see SPC-2) when any logical unit number
10204	 * format described in this standard is used.  Non-hierarchical
10205	 * formats are outside the scope of this standard.
10206	 *
10207	 * Therefore we set the HiSup bit here.
10208	 *
10209	 * The response format is 2, per SPC-3.
10210	 */
10211	inq_ptr->response_format = SID_HiSup | 2;
10212
10213	inq_ptr->additional_length = data_len -
10214	    (offsetof(struct scsi_inquiry_data, additional_length) + 1);
10215	CTL_DEBUG_PRINT(("additional_length = %d\n",
10216			 inq_ptr->additional_length));
10217
10218	inq_ptr->spc3_flags = SPC3_SID_3PC | SPC3_SID_TPGS_IMPLICIT;
10219	if (port_type == CTL_PORT_SCSI)
10220		inq_ptr->spc2_flags = SPC2_SID_ADDR16;
10221	inq_ptr->spc2_flags |= SPC2_SID_MultiP;
10222	inq_ptr->flags = SID_CmdQue;
10223	if (port_type == CTL_PORT_SCSI)
10224		inq_ptr->flags |= SID_WBus16 | SID_Sync;
10225
10226	/*
10227	 * Per SPC-3, unused bytes in ASCII strings are filled with spaces.
10228	 * We have 8 bytes for the vendor name, and 16 bytes for the device
10229	 * name and 4 bytes for the revision.
10230	 */
10231	if (lun == NULL || (val = dnvlist_get_string(lun->be_lun->options,
10232	    "vendor", NULL)) == NULL) {
10233		strncpy(inq_ptr->vendor, CTL_VENDOR, sizeof(inq_ptr->vendor));
10234	} else {
10235		memset(inq_ptr->vendor, ' ', sizeof(inq_ptr->vendor));
10236		strncpy(inq_ptr->vendor, val,
10237		    min(sizeof(inq_ptr->vendor), strlen(val)));
10238	}
10239	if (lun == NULL) {
10240		strncpy(inq_ptr->product, CTL_DIRECT_PRODUCT,
10241		    sizeof(inq_ptr->product));
10242	} else if ((val = dnvlist_get_string(lun->be_lun->options, "product",
10243	    NULL)) == NULL) {
10244		switch (lun->be_lun->lun_type) {
10245		case T_DIRECT:
10246			strncpy(inq_ptr->product, CTL_DIRECT_PRODUCT,
10247			    sizeof(inq_ptr->product));
10248			break;
10249		case T_PROCESSOR:
10250			strncpy(inq_ptr->product, CTL_PROCESSOR_PRODUCT,
10251			    sizeof(inq_ptr->product));
10252			break;
10253		case T_CDROM:
10254			strncpy(inq_ptr->product, CTL_CDROM_PRODUCT,
10255			    sizeof(inq_ptr->product));
10256			break;
10257		default:
10258			strncpy(inq_ptr->product, CTL_UNKNOWN_PRODUCT,
10259			    sizeof(inq_ptr->product));
10260			break;
10261		}
10262	} else {
10263		memset(inq_ptr->product, ' ', sizeof(inq_ptr->product));
10264		strncpy(inq_ptr->product, val,
10265		    min(sizeof(inq_ptr->product), strlen(val)));
10266	}
10267
10268	/*
10269	 * XXX make this a macro somewhere so it automatically gets
10270	 * incremented when we make changes.
10271	 */
10272	if (lun == NULL || (val = dnvlist_get_string(lun->be_lun->options,
10273	    "revision", NULL)) == NULL) {
10274		strncpy(inq_ptr->revision, "0001", sizeof(inq_ptr->revision));
10275	} else {
10276		memset(inq_ptr->revision, ' ', sizeof(inq_ptr->revision));
10277		strncpy(inq_ptr->revision, val,
10278		    min(sizeof(inq_ptr->revision), strlen(val)));
10279	}
10280
10281	/*
10282	 * For parallel SCSI, we support double transition and single
10283	 * transition clocking.  We also support QAS (Quick Arbitration
10284	 * and Selection) and Information Unit transfers on both the
10285	 * control and array devices.
10286	 */
10287	if (port_type == CTL_PORT_SCSI)
10288		inq_ptr->spi3data = SID_SPI_CLOCK_DT_ST | SID_SPI_QAS |
10289				    SID_SPI_IUS;
10290
10291	/* SAM-6 (no version claimed) */
10292	scsi_ulto2b(0x00C0, inq_ptr->version1);
10293	/* SPC-5 (no version claimed) */
10294	scsi_ulto2b(0x05C0, inq_ptr->version2);
10295	if (port_type == CTL_PORT_FC) {
10296		/* FCP-2 ANSI INCITS.350:2003 */
10297		scsi_ulto2b(0x0917, inq_ptr->version3);
10298	} else if (port_type == CTL_PORT_SCSI) {
10299		/* SPI-4 ANSI INCITS.362:200x */
10300		scsi_ulto2b(0x0B56, inq_ptr->version3);
10301	} else if (port_type == CTL_PORT_ISCSI) {
10302		/* iSCSI (no version claimed) */
10303		scsi_ulto2b(0x0960, inq_ptr->version3);
10304	} else if (port_type == CTL_PORT_SAS) {
10305		/* SAS (no version claimed) */
10306		scsi_ulto2b(0x0BE0, inq_ptr->version3);
10307	} else if (port_type == CTL_PORT_UMASS) {
10308		/* USB Mass Storage Class Bulk-Only Transport, Revision 1.0 */
10309		scsi_ulto2b(0x1730, inq_ptr->version3);
10310	}
10311
10312	if (lun == NULL) {
10313		/* SBC-4 (no version claimed) */
10314		scsi_ulto2b(0x0600, inq_ptr->version4);
10315	} else {
10316		switch (lun->be_lun->lun_type) {
10317		case T_DIRECT:
10318			/* SBC-4 (no version claimed) */
10319			scsi_ulto2b(0x0600, inq_ptr->version4);
10320			break;
10321		case T_PROCESSOR:
10322			break;
10323		case T_CDROM:
10324			/* MMC-6 (no version claimed) */
10325			scsi_ulto2b(0x04E0, inq_ptr->version4);
10326			break;
10327		default:
10328			break;
10329		}
10330	}
10331
10332	ctl_set_success(ctsio);
10333	ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
10334	ctsio->be_move_done = ctl_config_move_done;
10335	ctl_datamove((union ctl_io *)ctsio);
10336	return (CTL_RETVAL_COMPLETE);
10337}
10338
10339int
10340ctl_inquiry(struct ctl_scsiio *ctsio)
10341{
10342	struct scsi_inquiry *cdb;
10343	int retval;
10344
10345	CTL_DEBUG_PRINT(("ctl_inquiry\n"));
10346
10347	cdb = (struct scsi_inquiry *)ctsio->cdb;
10348	if (cdb->byte2 & SI_EVPD)
10349		retval = ctl_inquiry_evpd(ctsio);
10350	else if (cdb->page_code == 0)
10351		retval = ctl_inquiry_std(ctsio);
10352	else {
10353		ctl_set_invalid_field(ctsio,
10354				      /*sks_valid*/ 1,
10355				      /*command*/ 1,
10356				      /*field*/ 2,
10357				      /*bit_valid*/ 0,
10358				      /*bit*/ 0);
10359		ctl_done((union ctl_io *)ctsio);
10360		return (CTL_RETVAL_COMPLETE);
10361	}
10362
10363	return (retval);
10364}
10365
10366int
10367ctl_get_config(struct ctl_scsiio *ctsio)
10368{
10369	struct ctl_lun *lun = CTL_LUN(ctsio);
10370	struct scsi_get_config_header *hdr;
10371	struct scsi_get_config_feature *feature;
10372	struct scsi_get_config *cdb;
10373	uint32_t alloc_len, data_len;
10374	int rt, starting;
10375
10376	cdb = (struct scsi_get_config *)ctsio->cdb;
10377	rt = (cdb->rt & SGC_RT_MASK);
10378	starting = scsi_2btoul(cdb->starting_feature);
10379	alloc_len = scsi_2btoul(cdb->length);
10380
10381	data_len = sizeof(struct scsi_get_config_header) +
10382	    sizeof(struct scsi_get_config_feature) + 8 +
10383	    sizeof(struct scsi_get_config_feature) + 8 +
10384	    sizeof(struct scsi_get_config_feature) + 4 +
10385	    sizeof(struct scsi_get_config_feature) + 4 +
10386	    sizeof(struct scsi_get_config_feature) + 8 +
10387	    sizeof(struct scsi_get_config_feature) +
10388	    sizeof(struct scsi_get_config_feature) + 4 +
10389	    sizeof(struct scsi_get_config_feature) + 4 +
10390	    sizeof(struct scsi_get_config_feature) + 4 +
10391	    sizeof(struct scsi_get_config_feature) + 4 +
10392	    sizeof(struct scsi_get_config_feature) + 4 +
10393	    sizeof(struct scsi_get_config_feature) + 4;
10394	ctsio->kern_data_ptr = malloc(data_len, M_CTL, M_WAITOK | M_ZERO);
10395	ctsio->kern_sg_entries = 0;
10396	ctsio->kern_rel_offset = 0;
10397
10398	hdr = (struct scsi_get_config_header *)ctsio->kern_data_ptr;
10399	if (lun->flags & CTL_LUN_NO_MEDIA)
10400		scsi_ulto2b(0x0000, hdr->current_profile);
10401	else
10402		scsi_ulto2b(0x0010, hdr->current_profile);
10403	feature = (struct scsi_get_config_feature *)(hdr + 1);
10404
10405	if (starting > 0x003b)
10406		goto done;
10407	if (starting > 0x003a)
10408		goto f3b;
10409	if (starting > 0x002b)
10410		goto f3a;
10411	if (starting > 0x002a)
10412		goto f2b;
10413	if (starting > 0x001f)
10414		goto f2a;
10415	if (starting > 0x001e)
10416		goto f1f;
10417	if (starting > 0x001d)
10418		goto f1e;
10419	if (starting > 0x0010)
10420		goto f1d;
10421	if (starting > 0x0003)
10422		goto f10;
10423	if (starting > 0x0002)
10424		goto f3;
10425	if (starting > 0x0001)
10426		goto f2;
10427	if (starting > 0x0000)
10428		goto f1;
10429
10430	/* Profile List */
10431	scsi_ulto2b(0x0000, feature->feature_code);
10432	feature->flags = SGC_F_PERSISTENT | SGC_F_CURRENT;
10433	feature->add_length = 8;
10434	scsi_ulto2b(0x0008, &feature->feature_data[0]);	/* CD-ROM */
10435	feature->feature_data[2] = 0x00;
10436	scsi_ulto2b(0x0010, &feature->feature_data[4]);	/* DVD-ROM */
10437	feature->feature_data[6] = 0x01;
10438	feature = (struct scsi_get_config_feature *)
10439	    &feature->feature_data[feature->add_length];
10440
10441f1:	/* Core */
10442	scsi_ulto2b(0x0001, feature->feature_code);
10443	feature->flags = 0x08 | SGC_F_PERSISTENT | SGC_F_CURRENT;
10444	feature->add_length = 8;
10445	scsi_ulto4b(0x00000000, &feature->feature_data[0]);
10446	feature->feature_data[4] = 0x03;
10447	feature = (struct scsi_get_config_feature *)
10448	    &feature->feature_data[feature->add_length];
10449
10450f2:	/* Morphing */
10451	scsi_ulto2b(0x0002, feature->feature_code);
10452	feature->flags = 0x04 | SGC_F_PERSISTENT | SGC_F_CURRENT;
10453	feature->add_length = 4;
10454	feature->feature_data[0] = 0x02;
10455	feature = (struct scsi_get_config_feature *)
10456	    &feature->feature_data[feature->add_length];
10457
10458f3:	/* Removable Medium */
10459	scsi_ulto2b(0x0003, feature->feature_code);
10460	feature->flags = 0x04 | SGC_F_PERSISTENT | SGC_F_CURRENT;
10461	feature->add_length = 4;
10462	feature->feature_data[0] = 0x39;
10463	feature = (struct scsi_get_config_feature *)
10464	    &feature->feature_data[feature->add_length];
10465
10466	if (rt == SGC_RT_CURRENT && (lun->flags & CTL_LUN_NO_MEDIA))
10467		goto done;
10468
10469f10:	/* Random Read */
10470	scsi_ulto2b(0x0010, feature->feature_code);
10471	feature->flags = 0x00;
10472	if ((lun->flags & CTL_LUN_NO_MEDIA) == 0)
10473		feature->flags |= SGC_F_CURRENT;
10474	feature->add_length = 8;
10475	scsi_ulto4b(lun->be_lun->blocksize, &feature->feature_data[0]);
10476	scsi_ulto2b(1, &feature->feature_data[4]);
10477	feature->feature_data[6] = 0x00;
10478	feature = (struct scsi_get_config_feature *)
10479	    &feature->feature_data[feature->add_length];
10480
10481f1d:	/* Multi-Read */
10482	scsi_ulto2b(0x001D, feature->feature_code);
10483	feature->flags = 0x00;
10484	if ((lun->flags & CTL_LUN_NO_MEDIA) == 0)
10485		feature->flags |= SGC_F_CURRENT;
10486	feature->add_length = 0;
10487	feature = (struct scsi_get_config_feature *)
10488	    &feature->feature_data[feature->add_length];
10489
10490f1e:	/* CD Read */
10491	scsi_ulto2b(0x001E, feature->feature_code);
10492	feature->flags = 0x00;
10493	if ((lun->flags & CTL_LUN_NO_MEDIA) == 0)
10494		feature->flags |= SGC_F_CURRENT;
10495	feature->add_length = 4;
10496	feature->feature_data[0] = 0x00;
10497	feature = (struct scsi_get_config_feature *)
10498	    &feature->feature_data[feature->add_length];
10499
10500f1f:	/* DVD Read */
10501	scsi_ulto2b(0x001F, feature->feature_code);
10502	feature->flags = 0x08;
10503	if ((lun->flags & CTL_LUN_NO_MEDIA) == 0)
10504		feature->flags |= SGC_F_CURRENT;
10505	feature->add_length = 4;
10506	feature->feature_data[0] = 0x01;
10507	feature->feature_data[2] = 0x03;
10508	feature = (struct scsi_get_config_feature *)
10509	    &feature->feature_data[feature->add_length];
10510
10511f2a:	/* DVD+RW */
10512	scsi_ulto2b(0x002A, feature->feature_code);
10513	feature->flags = 0x04;
10514	if ((lun->flags & CTL_LUN_NO_MEDIA) == 0)
10515		feature->flags |= SGC_F_CURRENT;
10516	feature->add_length = 4;
10517	feature->feature_data[0] = 0x00;
10518	feature->feature_data[1] = 0x00;
10519	feature = (struct scsi_get_config_feature *)
10520	    &feature->feature_data[feature->add_length];
10521
10522f2b:	/* DVD+R */
10523	scsi_ulto2b(0x002B, feature->feature_code);
10524	feature->flags = 0x00;
10525	if ((lun->flags & CTL_LUN_NO_MEDIA) == 0)
10526		feature->flags |= SGC_F_CURRENT;
10527	feature->add_length = 4;
10528	feature->feature_data[0] = 0x00;
10529	feature = (struct scsi_get_config_feature *)
10530	    &feature->feature_data[feature->add_length];
10531
10532f3a:	/* DVD+RW Dual Layer */
10533	scsi_ulto2b(0x003A, feature->feature_code);
10534	feature->flags = 0x00;
10535	if ((lun->flags & CTL_LUN_NO_MEDIA) == 0)
10536		feature->flags |= SGC_F_CURRENT;
10537	feature->add_length = 4;
10538	feature->feature_data[0] = 0x00;
10539	feature->feature_data[1] = 0x00;
10540	feature = (struct scsi_get_config_feature *)
10541	    &feature->feature_data[feature->add_length];
10542
10543f3b:	/* DVD+R Dual Layer */
10544	scsi_ulto2b(0x003B, feature->feature_code);
10545	feature->flags = 0x00;
10546	if ((lun->flags & CTL_LUN_NO_MEDIA) == 0)
10547		feature->flags |= SGC_F_CURRENT;
10548	feature->add_length = 4;
10549	feature->feature_data[0] = 0x00;
10550	feature = (struct scsi_get_config_feature *)
10551	    &feature->feature_data[feature->add_length];
10552
10553done:
10554	data_len = (uint8_t *)feature - (uint8_t *)hdr;
10555	if (rt == SGC_RT_SPECIFIC && data_len > 4) {
10556		feature = (struct scsi_get_config_feature *)(hdr + 1);
10557		if (scsi_2btoul(feature->feature_code) == starting)
10558			feature = (struct scsi_get_config_feature *)
10559			    &feature->feature_data[feature->add_length];
10560		data_len = (uint8_t *)feature - (uint8_t *)hdr;
10561	}
10562	scsi_ulto4b(data_len - 4, hdr->data_length);
10563	ctsio->kern_data_len = min(data_len, alloc_len);
10564	ctsio->kern_total_len = ctsio->kern_data_len;
10565
10566	ctl_set_success(ctsio);
10567	ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
10568	ctsio->be_move_done = ctl_config_move_done;
10569	ctl_datamove((union ctl_io *)ctsio);
10570	return (CTL_RETVAL_COMPLETE);
10571}
10572
10573int
10574ctl_get_event_status(struct ctl_scsiio *ctsio)
10575{
10576	struct scsi_get_event_status_header *hdr;
10577	struct scsi_get_event_status *cdb;
10578	uint32_t alloc_len, data_len;
10579
10580	cdb = (struct scsi_get_event_status *)ctsio->cdb;
10581	if ((cdb->byte2 & SGESN_POLLED) == 0) {
10582		ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 1,
10583		    /*field*/ 1, /*bit_valid*/ 1, /*bit*/ 0);
10584		ctl_done((union ctl_io *)ctsio);
10585		return (CTL_RETVAL_COMPLETE);
10586	}
10587	alloc_len = scsi_2btoul(cdb->length);
10588
10589	data_len = sizeof(struct scsi_get_event_status_header);
10590	ctsio->kern_data_ptr = malloc(data_len, M_CTL, M_WAITOK | M_ZERO);
10591	ctsio->kern_sg_entries = 0;
10592	ctsio->kern_rel_offset = 0;
10593	ctsio->kern_data_len = min(data_len, alloc_len);
10594	ctsio->kern_total_len = ctsio->kern_data_len;
10595
10596	hdr = (struct scsi_get_event_status_header *)ctsio->kern_data_ptr;
10597	scsi_ulto2b(0, hdr->descr_length);
10598	hdr->nea_class = SGESN_NEA;
10599	hdr->supported_class = 0;
10600
10601	ctl_set_success(ctsio);
10602	ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
10603	ctsio->be_move_done = ctl_config_move_done;
10604	ctl_datamove((union ctl_io *)ctsio);
10605	return (CTL_RETVAL_COMPLETE);
10606}
10607
10608int
10609ctl_mechanism_status(struct ctl_scsiio *ctsio)
10610{
10611	struct scsi_mechanism_status_header *hdr;
10612	struct scsi_mechanism_status *cdb;
10613	uint32_t alloc_len, data_len;
10614
10615	cdb = (struct scsi_mechanism_status *)ctsio->cdb;
10616	alloc_len = scsi_2btoul(cdb->length);
10617
10618	data_len = sizeof(struct scsi_mechanism_status_header);
10619	ctsio->kern_data_ptr = malloc(data_len, M_CTL, M_WAITOK | M_ZERO);
10620	ctsio->kern_sg_entries = 0;
10621	ctsio->kern_rel_offset = 0;
10622	ctsio->kern_data_len = min(data_len, alloc_len);
10623	ctsio->kern_total_len = ctsio->kern_data_len;
10624
10625	hdr = (struct scsi_mechanism_status_header *)ctsio->kern_data_ptr;
10626	hdr->state1 = 0x00;
10627	hdr->state2 = 0xe0;
10628	scsi_ulto3b(0, hdr->lba);
10629	hdr->slots_num = 0;
10630	scsi_ulto2b(0, hdr->slots_length);
10631
10632	ctl_set_success(ctsio);
10633	ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
10634	ctsio->be_move_done = ctl_config_move_done;
10635	ctl_datamove((union ctl_io *)ctsio);
10636	return (CTL_RETVAL_COMPLETE);
10637}
10638
10639static void
10640ctl_ultomsf(uint32_t lba, uint8_t *buf)
10641{
10642
10643	lba += 150;
10644	buf[0] = 0;
10645	buf[1] = bin2bcd((lba / 75) / 60);
10646	buf[2] = bin2bcd((lba / 75) % 60);
10647	buf[3] = bin2bcd(lba % 75);
10648}
10649
10650int
10651ctl_read_toc(struct ctl_scsiio *ctsio)
10652{
10653	struct ctl_lun *lun = CTL_LUN(ctsio);
10654	struct scsi_read_toc_hdr *hdr;
10655	struct scsi_read_toc_type01_descr *descr;
10656	struct scsi_read_toc *cdb;
10657	uint32_t alloc_len, data_len;
10658	int format, msf;
10659
10660	cdb = (struct scsi_read_toc *)ctsio->cdb;
10661	msf = (cdb->byte2 & CD_MSF) != 0;
10662	format = cdb->format;
10663	alloc_len = scsi_2btoul(cdb->data_len);
10664
10665	data_len = sizeof(struct scsi_read_toc_hdr);
10666	if (format == 0)
10667		data_len += 2 * sizeof(struct scsi_read_toc_type01_descr);
10668	else
10669		data_len += sizeof(struct scsi_read_toc_type01_descr);
10670	ctsio->kern_data_ptr = malloc(data_len, M_CTL, M_WAITOK | M_ZERO);
10671	ctsio->kern_sg_entries = 0;
10672	ctsio->kern_rel_offset = 0;
10673	ctsio->kern_data_len = min(data_len, alloc_len);
10674	ctsio->kern_total_len = ctsio->kern_data_len;
10675
10676	hdr = (struct scsi_read_toc_hdr *)ctsio->kern_data_ptr;
10677	if (format == 0) {
10678		scsi_ulto2b(0x12, hdr->data_length);
10679		hdr->first = 1;
10680		hdr->last = 1;
10681		descr = (struct scsi_read_toc_type01_descr *)(hdr + 1);
10682		descr->addr_ctl = 0x14;
10683		descr->track_number = 1;
10684		if (msf)
10685			ctl_ultomsf(0, descr->track_start);
10686		else
10687			scsi_ulto4b(0, descr->track_start);
10688		descr++;
10689		descr->addr_ctl = 0x14;
10690		descr->track_number = 0xaa;
10691		if (msf)
10692			ctl_ultomsf(lun->be_lun->maxlba+1, descr->track_start);
10693		else
10694			scsi_ulto4b(lun->be_lun->maxlba+1, descr->track_start);
10695	} else {
10696		scsi_ulto2b(0x0a, hdr->data_length);
10697		hdr->first = 1;
10698		hdr->last = 1;
10699		descr = (struct scsi_read_toc_type01_descr *)(hdr + 1);
10700		descr->addr_ctl = 0x14;
10701		descr->track_number = 1;
10702		if (msf)
10703			ctl_ultomsf(0, descr->track_start);
10704		else
10705			scsi_ulto4b(0, descr->track_start);
10706	}
10707
10708	ctl_set_success(ctsio);
10709	ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
10710	ctsio->be_move_done = ctl_config_move_done;
10711	ctl_datamove((union ctl_io *)ctsio);
10712	return (CTL_RETVAL_COMPLETE);
10713}
10714
10715/*
10716 * For NVMe commands, parse the LBA and length.
10717 */
10718static bool
10719ctl_nvme_get_lba_len(struct ctl_nvmeio *ctnio, uint64_t *lba, uint32_t *len)
10720{
10721	CTL_IO_ASSERT(ctnio, NVME);
10722
10723	switch (ctnio->cmd.opc) {
10724	case NVME_OPC_WRITE:
10725	case NVME_OPC_READ:
10726	case NVME_OPC_WRITE_UNCORRECTABLE:
10727	case NVME_OPC_COMPARE:
10728	case NVME_OPC_WRITE_ZEROES:
10729	case NVME_OPC_VERIFY:
10730		*lba = (uint64_t)le32toh(ctnio->cmd.cdw11) << 32 |
10731		    le32toh(ctnio->cmd.cdw10);
10732		*len = (le32toh(ctnio->cmd.cdw12) & 0xffff) + 1;
10733		return (true);
10734	default:
10735		*lba = 0;
10736		*len = 0;
10737		return (false);
10738	}
10739}
10740
10741static bool
10742ctl_nvme_fua(struct ctl_nvmeio *ctnio)
10743{
10744	return ((le32toh(ctnio->cmd.cdw12) & (1U << 30)) != 0);
10745}
10746
10747int
10748ctl_nvme_identify(struct ctl_nvmeio *ctnio)
10749{
10750	struct ctl_lun *lun = CTL_LUN(ctnio);
10751	size_t len;
10752	int retval;
10753	uint8_t cns;
10754
10755	CTL_DEBUG_PRINT(("ctl_nvme_identify\n"));
10756
10757	CTL_IO_ASSERT(ctnio, NVME_ADMIN);
10758	MPASS(ctnio->cmd.opc == NVME_OPC_IDENTIFY);
10759
10760	/*
10761	 * The data buffer for Identify is always 4096 bytes, see
10762	 * 5.51.1 in NVMe base specification 1.4.
10763	 */
10764	len = 4096;
10765
10766	ctnio->kern_data_ptr = malloc(len, M_CTL, M_WAITOK);
10767	ctnio->kern_data_len = len;
10768	ctnio->kern_total_len = len;
10769	ctnio->kern_rel_offset = 0;
10770	ctnio->kern_sg_entries = 0;
10771
10772	ctl_nvme_set_success(ctnio);
10773	ctnio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
10774	ctnio->be_move_done = ctl_config_move_done;
10775
10776	/*
10777	 * If we don't have a LUN, return an empty result for CNS == 0.
10778	 */
10779	if (lun == NULL) {
10780		cns = le32toh(ctnio->cmd.cdw10) & 0xff;
10781		switch (cns) {
10782		case 0:
10783			memset(ctnio->kern_data_ptr, 0, len);
10784			ctl_datamove((union ctl_io *)ctnio);
10785			break;
10786		default:
10787			ctl_nvme_set_invalid_field(ctnio);
10788			break;
10789		}
10790		return (CTL_RETVAL_COMPLETE);
10791	}
10792
10793	retval = lun->backend->config_read((union ctl_io *)ctnio);
10794	return (retval);
10795}
10796
10797int
10798ctl_nvme_flush(struct ctl_nvmeio *ctnio)
10799{
10800	struct ctl_lun *lun = CTL_LUN(ctnio);
10801	int retval;
10802
10803	CTL_DEBUG_PRINT(("ctl_nvme_flush\n"));
10804
10805	CTL_IO_ASSERT(ctnio, NVME);
10806	MPASS(ctnio->cmd.opc == NVME_OPC_FLUSH);
10807
10808	/*
10809	 * NVMe flushes always flush the entire namespace, not an LBA
10810	 * range.
10811	 */
10812	retval = lun->backend->config_write((union ctl_io *)ctnio);
10813
10814	return (retval);
10815}
10816
10817int
10818ctl_nvme_read_write(struct ctl_nvmeio *ctnio)
10819{
10820	struct ctl_lun *lun = CTL_LUN(ctnio);
10821	struct ctl_lba_len_flags *lbalen;
10822	uint64_t lba;
10823	uint32_t num_blocks;
10824	int flags, retval;
10825	bool isread;
10826
10827	CTL_DEBUG_PRINT(("ctl_nvme_read_write: command: %#x\n",
10828	    ctnio->cmd.opc));
10829
10830	CTL_IO_ASSERT(ctnio, NVME);
10831	MPASS(ctnio->cmd.opc == NVME_OPC_WRITE ||
10832	    ctnio->cmd.opc == NVME_OPC_READ);
10833
10834	flags = 0;
10835	isread = ctnio->cmd.opc == NVME_OPC_READ;
10836	ctl_nvme_get_lba_len(ctnio, &lba, &num_blocks);
10837
10838	/*
10839	 * The first check is to make sure we're in bounds, the second
10840	 * check is to catch wrap-around problems.  If the lba + num blocks
10841	 * is less than the lba, then we've wrapped around and the block
10842	 * range is invalid anyway.
10843	 */
10844	if (((lba + num_blocks) > (lun->be_lun->maxlba + 1))
10845	 || ((lba + num_blocks) < lba)) {
10846		ctl_nvme_set_lba_out_of_range(ctnio);
10847		ctl_done((union ctl_io *)ctnio);
10848		return (CTL_RETVAL_COMPLETE);
10849	}
10850
10851	/*
10852	 * Set FUA and/or DPO if caches are disabled.
10853	 *
10854	 * For a read this may not be quite correct for the block
10855	 * backend as any earlier writes to the LBA range should be
10856	 * flushed to backing store as part of the read.
10857	 */
10858	if (ctl_nvme_fua(ctnio)) {
10859		flags |= CTL_LLF_FUA;
10860		if (isread)
10861			flags |= CTL_LLF_DPO;
10862	}
10863
10864	lbalen = (struct ctl_lba_len_flags *)
10865	    &ctnio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
10866	lbalen->lba = lba;
10867	lbalen->len = num_blocks;
10868	lbalen->flags = (isread ? CTL_LLF_READ : CTL_LLF_WRITE) | flags;
10869
10870	ctnio->kern_total_len = num_blocks * lun->be_lun->blocksize;
10871	ctnio->kern_rel_offset = 0;
10872
10873	CTL_DEBUG_PRINT(("ctl_nvme_read_write: calling data_submit()\n"));
10874
10875	retval = lun->backend->data_submit((union ctl_io *)ctnio);
10876	return (retval);
10877}
10878
10879int
10880ctl_nvme_write_uncorrectable(struct ctl_nvmeio *ctnio)
10881{
10882	struct ctl_lun *lun = CTL_LUN(ctnio);
10883	struct ctl_lba_len_flags *lbalen;
10884	uint64_t lba;
10885	uint32_t num_blocks;
10886	int retval;
10887
10888	CTL_DEBUG_PRINT(("ctl_nvme_write_uncorrectable\n"));
10889
10890	CTL_IO_ASSERT(ctnio, NVME);
10891	MPASS(ctnio->cmd.opc == NVME_OPC_WRITE_UNCORRECTABLE);
10892
10893	ctl_nvme_get_lba_len(ctnio, &lba, &num_blocks);
10894
10895	/*
10896	 * The first check is to make sure we're in bounds, the second
10897	 * check is to catch wrap-around problems.  If the lba + num blocks
10898	 * is less than the lba, then we've wrapped around and the block
10899	 * range is invalid anyway.
10900	 */
10901	if (((lba + num_blocks) > (lun->be_lun->maxlba + 1))
10902	 || ((lba + num_blocks) < lba)) {
10903		ctl_nvme_set_lba_out_of_range(ctnio);
10904		ctl_done((union ctl_io *)ctnio);
10905		return (CTL_RETVAL_COMPLETE);
10906	}
10907
10908	lbalen = (struct ctl_lba_len_flags *)
10909	    &ctnio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
10910	lbalen->lba = lba;
10911	lbalen->len = num_blocks;
10912	lbalen->flags = 0;
10913	retval = lun->backend->config_write((union ctl_io *)ctnio);
10914
10915	return (retval);
10916}
10917
10918int
10919ctl_nvme_compare(struct ctl_nvmeio *ctnio)
10920{
10921	struct ctl_lun *lun = CTL_LUN(ctnio);
10922	struct ctl_lba_len_flags *lbalen;
10923	uint64_t lba;
10924	uint32_t num_blocks;
10925	int flags;
10926	int retval;
10927
10928	CTL_DEBUG_PRINT(("ctl_nvme_compare\n"));
10929
10930	CTL_IO_ASSERT(ctnio, NVME);
10931	MPASS(ctnio->cmd.opc == NVME_OPC_COMPARE);
10932
10933	flags = 0;
10934	ctl_nvme_get_lba_len(ctnio, &lba, &num_blocks);
10935	if (ctl_nvme_fua(ctnio))
10936		flags |= CTL_LLF_FUA;
10937
10938	/*
10939	 * The first check is to make sure we're in bounds, the second
10940	 * check is to catch wrap-around problems.  If the lba + num blocks
10941	 * is less than the lba, then we've wrapped around and the block
10942	 * range is invalid anyway.
10943	 */
10944	if (((lba + num_blocks) > (lun->be_lun->maxlba + 1))
10945	 || ((lba + num_blocks) < lba)) {
10946		ctl_nvme_set_lba_out_of_range(ctnio);
10947		ctl_done((union ctl_io *)ctnio);
10948		return (CTL_RETVAL_COMPLETE);
10949	}
10950
10951	lbalen = (struct ctl_lba_len_flags *)
10952	    &ctnio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
10953	lbalen->lba = lba;
10954	lbalen->len = num_blocks;
10955	lbalen->flags = CTL_LLF_COMPARE | flags;
10956	ctnio->kern_total_len = num_blocks * lun->be_lun->blocksize;
10957	ctnio->kern_rel_offset = 0;
10958
10959	CTL_DEBUG_PRINT(("ctl_nvme_compare: calling data_submit()\n"));
10960	retval = lun->backend->data_submit((union ctl_io *)ctnio);
10961	return (retval);
10962}
10963
10964int
10965ctl_nvme_write_zeroes(struct ctl_nvmeio *ctnio)
10966{
10967	struct ctl_lun *lun = CTL_LUN(ctnio);
10968	struct ctl_lba_len_flags *lbalen;
10969	uint64_t lba;
10970	uint32_t num_blocks;
10971	int retval;
10972
10973	CTL_DEBUG_PRINT(("ctl_nvme_write_zeroes\n"));
10974
10975	CTL_IO_ASSERT(ctnio, NVME);
10976	MPASS(ctnio->cmd.opc == NVME_OPC_WRITE_ZEROES);
10977
10978	ctl_nvme_get_lba_len(ctnio, &lba, &num_blocks);
10979
10980	/*
10981	 * The first check is to make sure we're in bounds, the second
10982	 * check is to catch wrap-around problems.  If the lba + num blocks
10983	 * is less than the lba, then we've wrapped around and the block
10984	 * range is invalid anyway.
10985	 */
10986	if (((lba + num_blocks) > (lun->be_lun->maxlba + 1))
10987	 || ((lba + num_blocks) < lba)) {
10988		ctl_nvme_set_lba_out_of_range(ctnio);
10989		ctl_done((union ctl_io *)ctnio);
10990		return (CTL_RETVAL_COMPLETE);
10991	}
10992
10993	lbalen = (struct ctl_lba_len_flags *)
10994	    &ctnio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
10995	lbalen->lba = lba;
10996	lbalen->len = num_blocks;
10997	lbalen->flags = 0;
10998	retval = lun->backend->config_write((union ctl_io *)ctnio);
10999
11000	return (retval);
11001}
11002
11003int
11004ctl_nvme_dataset_management(struct ctl_nvmeio *ctnio)
11005{
11006	struct ctl_lun *lun = CTL_LUN(ctnio);
11007	struct nvme_dsm_range *r;
11008	uint64_t lba;
11009	uint32_t len, num_blocks;
11010	u_int i, ranges;
11011	int retval;
11012
11013	CTL_DEBUG_PRINT(("ctl_nvme_dataset_management\n"));
11014
11015	CTL_IO_ASSERT(ctnio, NVME);
11016	MPASS(ctnio->cmd.opc == NVME_OPC_DATASET_MANAGEMENT);
11017
11018	ranges = le32toh(ctnio->cmd.cdw10) & 0xff;
11019	len = ranges * sizeof(struct nvme_dsm_range);
11020
11021	/*
11022	 * If we've got a kernel request that hasn't been malloced yet,
11023	 * malloc it and tell the caller the data buffer is here.
11024	 */
11025	if ((ctnio->io_hdr.flags & CTL_FLAG_ALLOCATED) == 0) {
11026		ctnio->kern_data_ptr = malloc(len, M_CTL, M_WAITOK);
11027		ctnio->kern_data_len = len;
11028		ctnio->kern_total_len = len;
11029		ctnio->kern_rel_offset = 0;
11030		ctnio->kern_sg_entries = 0;
11031		ctnio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
11032		ctnio->be_move_done = ctl_config_move_done;
11033		ctl_datamove((union ctl_io *)ctnio);
11034
11035		return (CTL_RETVAL_COMPLETE);
11036	}
11037
11038	/*
11039	 * Require a flat buffer of the correct size.
11040	 */
11041	if (ctnio->kern_sg_entries > 0 ||
11042	    ctnio->kern_total_len - ctnio->kern_data_resid != len)
11043		return (CTL_RETVAL_ERROR);
11044
11045	/*
11046	 * Verify that none of the ranges are out of bounds.
11047	 */
11048	r = (struct nvme_dsm_range *)ctnio->kern_data_ptr;
11049	for (i = 0; i < ranges; i++) {
11050		lba = le64toh(r[i].starting_lba);
11051		num_blocks = le32toh(r[i].length);
11052		if (((lba + num_blocks) > (lun->be_lun->maxlba + 1))
11053		    || ((lba + num_blocks) < lba)) {
11054			ctl_nvme_set_lba_out_of_range(ctnio);
11055			ctl_done((union ctl_io *)ctnio);
11056			return (CTL_RETVAL_COMPLETE);
11057		}
11058	}
11059
11060	CTL_DEBUG_PRINT(("ctl_nvme_dataset_management: calling config_write()\n"));
11061	retval = lun->backend->config_write((union ctl_io *)ctnio);
11062	return (retval);
11063}
11064
11065int
11066ctl_nvme_verify(struct ctl_nvmeio *ctnio)
11067{
11068	struct ctl_lun *lun = CTL_LUN(ctnio);
11069	struct ctl_lba_len_flags *lbalen;
11070	uint64_t lba;
11071	uint32_t num_blocks;
11072	int flags;
11073	int retval;
11074
11075	CTL_DEBUG_PRINT(("ctl_nvme_verify\n"));
11076
11077	CTL_IO_ASSERT(ctnio, NVME);
11078	MPASS(ctnio->cmd.opc == NVME_OPC_VERIFY);
11079
11080	flags = 0;
11081	ctl_nvme_get_lba_len(ctnio, &lba, &num_blocks);
11082	if (ctl_nvme_fua(ctnio))
11083		flags |= CTL_LLF_FUA;
11084
11085	/*
11086	 * The first check is to make sure we're in bounds, the second
11087	 * check is to catch wrap-around problems.  If the lba + num blocks
11088	 * is less than the lba, then we've wrapped around and the block
11089	 * range is invalid anyway.
11090	 */
11091	if (((lba + num_blocks) > (lun->be_lun->maxlba + 1))
11092	 || ((lba + num_blocks) < lba)) {
11093		ctl_nvme_set_lba_out_of_range(ctnio);
11094		ctl_done((union ctl_io *)ctnio);
11095		return (CTL_RETVAL_COMPLETE);
11096	}
11097
11098	lbalen = (struct ctl_lba_len_flags *)
11099	    &ctnio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
11100	lbalen->lba = lba;
11101	lbalen->len = num_blocks;
11102	lbalen->flags = CTL_LLF_VERIFY | flags;
11103	ctnio->kern_total_len = 0;
11104	ctnio->kern_rel_offset = 0;
11105
11106	CTL_DEBUG_PRINT(("ctl_nvme_verify: calling data_submit()\n"));
11107	retval = lun->backend->data_submit((union ctl_io *)ctnio);
11108	return (retval);
11109}
11110
11111static const struct ctl_nvme_cmd_entry *
11112ctl_nvme_get_cmd_entry(struct ctl_nvmeio *ctnio)
11113{
11114	const struct ctl_nvme_cmd_entry *entry;
11115
11116	switch (ctnio->io_hdr.io_type) {
11117	case CTL_IO_NVME:
11118		entry = &nvme_nvm_cmd_table[ctnio->cmd.opc];
11119		break;
11120	case CTL_IO_NVME_ADMIN:
11121		entry = &nvme_admin_cmd_table[ctnio->cmd.opc];
11122		break;
11123	default:
11124		__assert_unreachable();
11125	}
11126	return (entry);
11127}
11128
11129static const struct ctl_nvme_cmd_entry *
11130ctl_nvme_validate_command(struct ctl_nvmeio *ctnio)
11131{
11132	const struct ctl_nvme_cmd_entry *entry;
11133
11134	entry = ctl_nvme_get_cmd_entry(ctnio);
11135	if (entry->execute == NULL) {
11136		ctl_nvme_set_invalid_opcode(ctnio);
11137		ctl_done((union ctl_io *)ctnio);
11138		return (NULL);
11139	}
11140
11141	/* Validate fused commands. */
11142	switch (NVMEV(NVME_CMD_FUSE, ctnio->cmd.fuse)) {
11143	case NVME_FUSE_NORMAL:
11144		break;
11145	case NVME_FUSE_FIRST:
11146		if (ctnio->io_hdr.io_type != CTL_IO_NVME ||
11147		    ctnio->cmd.opc != NVME_OPC_COMPARE) {
11148			ctl_nvme_set_invalid_field(ctnio);
11149			ctl_done((union ctl_io *)ctnio);
11150			return (NULL);
11151		}
11152		break;
11153	case NVME_FUSE_SECOND:
11154		if (ctnio->io_hdr.io_type != CTL_IO_NVME ||
11155		    ctnio->cmd.opc != NVME_OPC_COMPARE) {
11156			ctl_nvme_set_invalid_field(ctnio);
11157			ctl_done((union ctl_io *)ctnio);
11158			return (NULL);
11159		}
11160		break;
11161	default:
11162		ctl_nvme_set_invalid_field(ctnio);
11163		ctl_done((union ctl_io *)ctnio);
11164		return (NULL);
11165	}
11166
11167	return (entry);
11168}
11169
11170/*
11171 * This is a simpler version of ctl_scsiio_lun_check that fails
11172 * requests on a LUN without active media.
11173 *
11174 * Returns true if the command has been completed with an error.
11175 */
11176static bool
11177ctl_nvmeio_lun_check(struct ctl_lun *lun,
11178    const struct ctl_nvme_cmd_entry *entry, struct ctl_nvmeio *ctnio)
11179{
11180	mtx_assert(&lun->lun_lock, MA_OWNED);
11181
11182	if ((entry->flags & CTL_CMD_FLAG_OK_ON_NO_MEDIA) == 0) {
11183		if ((lun->flags & (CTL_LUN_EJECTED | CTL_LUN_NO_MEDIA |
11184		    CTL_LUN_STOPPED)) != 0) {
11185			ctl_nvme_set_namespace_not_ready(ctnio);
11186			return (true);
11187		}
11188	}
11189
11190	return (false);
11191}
11192
11193/*
11194 * Check for blockage against the OOA (Order Of Arrival) queue.
11195 * Assumptions:
11196 * - pending_io is generally either incoming, or on the blocked queue
11197 * - starting I/O is the I/O we want to start the check with.
11198 */
11199static ctl_action
11200ctl_nvme_check_ooa(struct ctl_lun *lun, union ctl_io *pending_io,
11201    union ctl_io **starting_io, union ctl_io **aborted_io)
11202{
11203	union ctl_io *ooa_io = *starting_io;
11204
11205	CTL_IO_ASSERT(pending_io, NVME, NVME_ADMIN);
11206
11207	mtx_assert(&lun->lun_lock, MA_OWNED);
11208
11209	*aborted_io = NULL;
11210
11211	/*
11212	 * Aborted commands are not going to be executed and may even
11213	 * not report completion, so we don't care about their order.
11214	 * Let them complete ASAP to clean the OOA queue.
11215	 */
11216	if (__predict_false(pending_io->io_hdr.flags & CTL_FLAG_ABORT))
11217		return (CTL_ACTION_PASS);
11218
11219	/*
11220	 * NVMe has rather simple command ordering requirements.  In
11221	 * particular, there is no requirement on the controller to
11222	 * enforce a specific order for overlapping LBAs.  The only
11223	 * constraint is that fused operations (Compare and Write),
11224	 * must be completed as a unit.
11225	 *
11226	 * To support fused operations, the following strategy is used:
11227	 * - the first half of a fused command is not enqueued to rtr
11228	 *   until the second half is enqueued
11229	 * - the second half of a fused command blocks on the first
11230	 *   half of a fuse command
11231	 * - subsequent commands block on the second half of the
11232	 *   fused command
11233	 */
11234
11235	/*
11236	 * Is the previously submitted command the first half of a
11237	 * fused operation?
11238	 */
11239	if (ooa_io != NULL &&
11240	    NVMEV(NVME_CMD_FUSE, ooa_io->nvmeio.cmd.fuse) == NVME_FUSE_FIRST) {
11241		/*
11242		 * If this is the second half, enqueue the first half
11243		 * and block the second half on the first half.
11244		 */
11245		if (NVMEV(NVME_CMD_FUSE, pending_io->nvmeio.cmd.fuse) ==
11246		    NVME_FUSE_SECOND) {
11247			/*
11248			 * XXX: Do we need to wait for other rtr requests
11249			 * to drain so this is truly atomic?
11250			 */
11251			return (CTL_ACTION_FUSED);
11252		}
11253
11254		/* Abort the first half. */
11255		ctl_nvme_set_missing_fused_command(&ooa_io->nvmeio);
11256		*aborted_io = ooa_io;
11257	} else {
11258		switch (NVMEV(NVME_CMD_FUSE, pending_io->nvmeio.cmd.fuse)) {
11259		case NVME_FUSE_FIRST:
11260			/* First half, wait for the second half. */
11261			return (CTL_ACTION_SKIP);
11262		case NVME_FUSE_SECOND:
11263			/* Second half without a matching first half, abort. */
11264			ctl_nvme_set_missing_fused_command(&pending_io->nvmeio);
11265			*aborted_io = pending_io;
11266			return (CTL_ACTION_SKIP);
11267		}
11268	}
11269
11270	/*
11271	 * Scan the OOA queue looking for the most recent second half
11272	 * of a fused op.
11273	 */
11274	for (; ooa_io != NULL;
11275	     ooa_io = (union ctl_io *)LIST_NEXT(&ooa_io->io_hdr, ooa_links)) {
11276		if (NVMEV(NVME_CMD_FUSE, ooa_io->nvmeio.cmd.fuse) ==
11277		    NVME_FUSE_SECOND) {
11278			*starting_io = ooa_io;
11279			return (CTL_ACTION_BLOCK);
11280		}
11281	}
11282
11283	*starting_io = NULL;
11284	return (CTL_ACTION_PASS);
11285}
11286
11287static void
11288ctl_nvmeio_precheck(struct ctl_nvmeio *ctnio)
11289{
11290	struct ctl_softc *softc = CTL_SOFTC(ctnio);
11291	struct ctl_lun *lun;
11292	const struct ctl_nvme_cmd_entry *entry;
11293	union ctl_io *bio, *aborted_io;
11294	uint32_t targ_lun;
11295
11296	lun = NULL;
11297	targ_lun = ctnio->io_hdr.nexus.targ_mapped_lun;
11298	if (targ_lun < ctl_max_luns)
11299		lun = softc->ctl_luns[targ_lun];
11300	if (lun != NULL) {
11301		/*
11302		 * If the LUN is invalid, pretend that it doesn't exist.
11303		 * It will go away as soon as all pending I/O has been
11304		 * completed.
11305		 */
11306		mtx_lock(&lun->lun_lock);
11307		if (lun->flags & CTL_LUN_DISABLED) {
11308			mtx_unlock(&lun->lun_lock);
11309			lun = NULL;
11310		}
11311	}
11312	CTL_LUN(ctnio) = lun;
11313	if (lun != NULL) {
11314		CTL_BACKEND_LUN(ctnio) = lun->be_lun;
11315
11316		/*
11317		 * Every I/O goes into the OOA queue for a particular LUN,
11318		 * and stays there until completion.
11319		 */
11320#ifdef CTL_TIME_IO
11321		if (LIST_EMPTY(&lun->ooa_queue))
11322			lun->idle_time += getsbinuptime() - lun->last_busy;
11323#endif
11324		LIST_INSERT_HEAD(&lun->ooa_queue, &ctnio->io_hdr, ooa_links);
11325	}
11326
11327	/* Get command entry and return error if it is unsupported. */
11328	entry = ctl_nvme_validate_command(ctnio);
11329	if (entry == NULL) {
11330		if (lun)
11331			mtx_unlock(&lun->lun_lock);
11332		return;
11333	}
11334
11335	ctnio->io_hdr.flags &= ~CTL_FLAG_DATA_MASK;
11336	ctnio->io_hdr.flags |= entry->flags & CTL_FLAG_DATA_MASK;
11337
11338	/* All NVMe commands other than IDENTIFY require a LUN. */
11339	if (lun == NULL) {
11340		if (entry->flags & CTL_CMD_FLAG_OK_ON_NO_LUN) {
11341			ctnio->io_hdr.flags |= CTL_FLAG_IS_WAS_ON_RTR;
11342			ctl_enqueue_rtr((union ctl_io *)ctnio);
11343			return;
11344		}
11345
11346		ctl_nvme_set_invalid_namespace(ctnio);
11347		ctl_done((union ctl_io *)ctnio);
11348		CTL_DEBUG_PRINT(("ctl_nvmeio_precheck: bailing out due to invalid LUN\n"));
11349		return;
11350	} else {
11351		/*
11352		 * NVMe namespaces can only be backed by T_DIRECT LUNs.
11353		 */
11354		if (lun->be_lun->lun_type != T_DIRECT) {
11355			mtx_unlock(&lun->lun_lock);
11356			ctl_nvme_set_invalid_namespace(ctnio);
11357			ctl_done((union ctl_io *)ctnio);
11358			return;
11359		}
11360	}
11361
11362	if (ctl_nvmeio_lun_check(lun, entry, ctnio) != 0) {
11363		mtx_unlock(&lun->lun_lock);
11364		ctl_done((union ctl_io *)ctnio);
11365		return;
11366	}
11367
11368	bio = (union ctl_io *)LIST_NEXT(&ctnio->io_hdr, ooa_links);
11369	switch (ctl_nvme_check_ooa(lun, (union ctl_io *)ctnio, &bio,
11370	    &aborted_io)) {
11371	case CTL_ACTION_PASS:
11372		ctnio->io_hdr.flags |= CTL_FLAG_IS_WAS_ON_RTR;
11373		mtx_unlock(&lun->lun_lock);
11374		ctl_enqueue_rtr((union ctl_io *)ctnio);
11375		break;
11376	case CTL_ACTION_FUSED:
11377		/* Block the second half on the first half. */
11378		ctnio->io_hdr.blocker = bio;
11379		TAILQ_INSERT_TAIL(&bio->io_hdr.blocked_queue, &ctnio->io_hdr,
11380				  blocked_links);
11381
11382		/* Pass the first half. */
11383		bio->io_hdr.flags |= CTL_FLAG_IS_WAS_ON_RTR;
11384		mtx_unlock(&lun->lun_lock);
11385		ctl_enqueue_rtr(bio);
11386		break;
11387	case CTL_ACTION_SKIP:
11388		mtx_unlock(&lun->lun_lock);
11389		break;
11390	case CTL_ACTION_BLOCK:
11391		ctnio->io_hdr.blocker = bio;
11392		TAILQ_INSERT_TAIL(&bio->io_hdr.blocked_queue, &ctnio->io_hdr,
11393				  blocked_links);
11394		mtx_unlock(&lun->lun_lock);
11395		break;
11396	default:
11397		__assert_unreachable();
11398	}
11399	if (aborted_io != NULL)
11400		ctl_done(aborted_io);
11401}
11402
11403static int
11404ctl_nvmeio(struct ctl_nvmeio *ctnio)
11405{
11406	const struct ctl_nvme_cmd_entry *entry;
11407	int retval;
11408
11409	CTL_DEBUG_PRINT(("ctl_nvmeio %s opc=%02X\n",
11410	    ctnio->io_hdr.io_type == CTL_IO_NVME ? "nvm" : "admin",
11411	    ctnio->cmd.opc));
11412
11413	entry = ctl_nvme_get_cmd_entry(ctnio);
11414	MPASS(entry != NULL);
11415
11416	/*
11417	 * If this I/O has been aborted, just send it straight to
11418	 * ctl_done() without executing it.
11419	 */
11420	if (ctnio->io_hdr.flags & CTL_FLAG_ABORT) {
11421		ctl_done((union ctl_io *)ctnio);
11422		return (CTL_RETVAL_COMPLETE);
11423	}
11424
11425	/*
11426	 * All the checks should have been handled by ctl_nvmeio_precheck().
11427	 * We should be clear now to just execute the I/O.
11428	 */
11429	retval = entry->execute(ctnio);
11430
11431	return (retval);
11432}
11433
11434/*
11435 * For known CDB types, parse the LBA and length.
11436 */
11437static int
11438ctl_get_lba_len(union ctl_io *io, uint64_t *lba, uint64_t *len)
11439{
11440
11441	CTL_IO_ASSERT(io, SCSI);
11442
11443	switch (io->scsiio.cdb[0]) {
11444	case COMPARE_AND_WRITE: {
11445		struct scsi_compare_and_write *cdb;
11446
11447		cdb = (struct scsi_compare_and_write *)io->scsiio.cdb;
11448
11449		*lba = scsi_8btou64(cdb->addr);
11450		*len = cdb->length;
11451		break;
11452	}
11453	case READ_6:
11454	case WRITE_6: {
11455		struct scsi_rw_6 *cdb;
11456
11457		cdb = (struct scsi_rw_6 *)io->scsiio.cdb;
11458
11459		*lba = scsi_3btoul(cdb->addr);
11460		/* only 5 bits are valid in the most significant address byte */
11461		*lba &= 0x1fffff;
11462		*len = cdb->length;
11463		break;
11464	}
11465	case READ_10:
11466	case WRITE_10: {
11467		struct scsi_rw_10 *cdb;
11468
11469		cdb = (struct scsi_rw_10 *)io->scsiio.cdb;
11470
11471		*lba = scsi_4btoul(cdb->addr);
11472		*len = scsi_2btoul(cdb->length);
11473		break;
11474	}
11475	case WRITE_VERIFY_10: {
11476		struct scsi_write_verify_10 *cdb;
11477
11478		cdb = (struct scsi_write_verify_10 *)io->scsiio.cdb;
11479
11480		*lba = scsi_4btoul(cdb->addr);
11481		*len = scsi_2btoul(cdb->length);
11482		break;
11483	}
11484	case READ_12:
11485	case WRITE_12: {
11486		struct scsi_rw_12 *cdb;
11487
11488		cdb = (struct scsi_rw_12 *)io->scsiio.cdb;
11489
11490		*lba = scsi_4btoul(cdb->addr);
11491		*len = scsi_4btoul(cdb->length);
11492		break;
11493	}
11494	case WRITE_VERIFY_12: {
11495		struct scsi_write_verify_12 *cdb;
11496
11497		cdb = (struct scsi_write_verify_12 *)io->scsiio.cdb;
11498
11499		*lba = scsi_4btoul(cdb->addr);
11500		*len = scsi_4btoul(cdb->length);
11501		break;
11502	}
11503	case READ_16:
11504	case WRITE_16: {
11505		struct scsi_rw_16 *cdb;
11506
11507		cdb = (struct scsi_rw_16 *)io->scsiio.cdb;
11508
11509		*lba = scsi_8btou64(cdb->addr);
11510		*len = scsi_4btoul(cdb->length);
11511		break;
11512	}
11513	case WRITE_ATOMIC_16: {
11514		struct scsi_write_atomic_16 *cdb;
11515
11516		cdb = (struct scsi_write_atomic_16 *)io->scsiio.cdb;
11517
11518		*lba = scsi_8btou64(cdb->addr);
11519		*len = scsi_2btoul(cdb->length);
11520		break;
11521	}
11522	case WRITE_VERIFY_16: {
11523		struct scsi_write_verify_16 *cdb;
11524
11525		cdb = (struct scsi_write_verify_16 *)io->scsiio.cdb;
11526
11527		*lba = scsi_8btou64(cdb->addr);
11528		*len = scsi_4btoul(cdb->length);
11529		break;
11530	}
11531	case WRITE_SAME_10: {
11532		struct scsi_write_same_10 *cdb;
11533
11534		cdb = (struct scsi_write_same_10 *)io->scsiio.cdb;
11535
11536		*lba = scsi_4btoul(cdb->addr);
11537		*len = scsi_2btoul(cdb->length);
11538		break;
11539	}
11540	case WRITE_SAME_16: {
11541		struct scsi_write_same_16 *cdb;
11542
11543		cdb = (struct scsi_write_same_16 *)io->scsiio.cdb;
11544
11545		*lba = scsi_8btou64(cdb->addr);
11546		*len = scsi_4btoul(cdb->length);
11547		break;
11548	}
11549	case VERIFY_10: {
11550		struct scsi_verify_10 *cdb;
11551
11552		cdb = (struct scsi_verify_10 *)io->scsiio.cdb;
11553
11554		*lba = scsi_4btoul(cdb->addr);
11555		*len = scsi_2btoul(cdb->length);
11556		break;
11557	}
11558	case VERIFY_12: {
11559		struct scsi_verify_12 *cdb;
11560
11561		cdb = (struct scsi_verify_12 *)io->scsiio.cdb;
11562
11563		*lba = scsi_4btoul(cdb->addr);
11564		*len = scsi_4btoul(cdb->length);
11565		break;
11566	}
11567	case VERIFY_16: {
11568		struct scsi_verify_16 *cdb;
11569
11570		cdb = (struct scsi_verify_16 *)io->scsiio.cdb;
11571
11572		*lba = scsi_8btou64(cdb->addr);
11573		*len = scsi_4btoul(cdb->length);
11574		break;
11575	}
11576	case UNMAP: {
11577		*lba = 0;
11578		*len = UINT64_MAX;
11579		break;
11580	}
11581	case SERVICE_ACTION_IN: {	/* GET LBA STATUS */
11582		struct scsi_get_lba_status *cdb;
11583
11584		cdb = (struct scsi_get_lba_status *)io->scsiio.cdb;
11585		*lba = scsi_8btou64(cdb->addr);
11586		*len = UINT32_MAX;
11587		break;
11588	}
11589	default:
11590		*lba = 0;
11591		*len = UINT64_MAX;
11592		return (1);
11593	}
11594
11595	return (0);
11596}
11597
11598static ctl_action
11599ctl_extent_check_lba(uint64_t lba1, uint64_t len1, uint64_t lba2, uint64_t len2,
11600    bool seq)
11601{
11602	uint64_t endlba1, endlba2;
11603
11604	endlba1 = lba1 + len1 - (seq ? 0 : 1);
11605	endlba2 = lba2 + len2 - 1;
11606
11607	if ((endlba1 < lba2) || (endlba2 < lba1))
11608		return (CTL_ACTION_PASS);
11609	else
11610		return (CTL_ACTION_BLOCK);
11611}
11612
11613static int
11614ctl_extent_check_unmap(union ctl_io *io, uint64_t lba2, uint64_t len2)
11615{
11616	struct ctl_ptr_len_flags *ptrlen;
11617	struct scsi_unmap_desc *buf, *end, *range;
11618	uint64_t lba;
11619	uint32_t len;
11620
11621	CTL_IO_ASSERT(io, SCSI);
11622
11623	/* If not UNMAP -- go other way. */
11624	if (io->scsiio.cdb[0] != UNMAP)
11625		return (CTL_ACTION_SKIP);
11626
11627	/* If UNMAP without data -- block and wait for data. */
11628	ptrlen = (struct ctl_ptr_len_flags *)
11629	    &io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
11630	if ((io->io_hdr.flags & CTL_FLAG_ALLOCATED) == 0 ||
11631	    ptrlen->ptr == NULL)
11632		return (CTL_ACTION_BLOCK);
11633
11634	/* UNMAP with data -- check for collision. */
11635	buf = (struct scsi_unmap_desc *)ptrlen->ptr;
11636	end = buf + ptrlen->len / sizeof(*buf);
11637	for (range = buf; range < end; range++) {
11638		lba = scsi_8btou64(range->lba);
11639		len = scsi_4btoul(range->length);
11640		if ((lba < lba2 + len2) && (lba + len > lba2))
11641			return (CTL_ACTION_BLOCK);
11642	}
11643	return (CTL_ACTION_PASS);
11644}
11645
11646static ctl_action
11647ctl_extent_check(union ctl_io *io1, union ctl_io *io2, bool seq)
11648{
11649	uint64_t lba1, lba2;
11650	uint64_t len1, len2;
11651	int retval;
11652
11653	retval = ctl_get_lba_len(io2, &lba2, &len2);
11654	KASSERT(retval == 0, ("ctl_get_lba_len() error"));
11655
11656	retval = ctl_extent_check_unmap(io1, lba2, len2);
11657	if (retval != CTL_ACTION_SKIP)
11658		return (retval);
11659
11660	retval = ctl_get_lba_len(io1, &lba1, &len1);
11661	KASSERT(retval == 0, ("ctl_get_lba_len() error"));
11662
11663	if (seq && (io1->io_hdr.flags & CTL_FLAG_SERSEQ_DONE))
11664		seq = FALSE;
11665	return (ctl_extent_check_lba(lba1, len1, lba2, len2, seq));
11666}
11667
11668static ctl_action
11669ctl_seq_check(union ctl_io *io1, union ctl_io *io2)
11670{
11671	uint64_t lba1, lba2;
11672	uint64_t len1, len2;
11673	int retval __diagused;
11674
11675	if (io1->io_hdr.flags & CTL_FLAG_SERSEQ_DONE)
11676		return (CTL_ACTION_PASS);
11677	retval = ctl_get_lba_len(io1, &lba1, &len1);
11678	KASSERT(retval == 0, ("ctl_get_lba_len() error"));
11679	retval = ctl_get_lba_len(io2, &lba2, &len2);
11680	KASSERT(retval == 0, ("ctl_get_lba_len() error"));
11681
11682	if (lba1 + len1 == lba2)
11683		return (CTL_ACTION_BLOCK);
11684	return (CTL_ACTION_PASS);
11685}
11686
11687static ctl_action
11688ctl_check_for_blockage(struct ctl_lun *lun, union ctl_io *pending_io,
11689    const uint8_t *serialize_row, union ctl_io *ooa_io)
11690{
11691	CTL_IO_ASSERT(pending_io, SCSI);
11692	CTL_IO_ASSERT(ooa_io, SCSI);
11693
11694	/*
11695	 * The initiator attempted multiple untagged commands at the same
11696	 * time.  Can't do that.
11697	 */
11698	if (__predict_false(pending_io->scsiio.tag_type == CTL_TAG_UNTAGGED)
11699	 && __predict_false(ooa_io->scsiio.tag_type == CTL_TAG_UNTAGGED)
11700	 && ((pending_io->io_hdr.nexus.targ_port ==
11701	      ooa_io->io_hdr.nexus.targ_port)
11702	  && (pending_io->io_hdr.nexus.initid ==
11703	      ooa_io->io_hdr.nexus.initid))
11704	 && ((ooa_io->io_hdr.flags & (CTL_FLAG_ABORT |
11705	      CTL_FLAG_STATUS_SENT)) == 0))
11706		return (CTL_ACTION_OVERLAP);
11707
11708	/*
11709	 * The initiator attempted to send multiple tagged commands with
11710	 * the same ID.  (It's fine if different initiators have the same
11711	 * tag ID.)
11712	 *
11713	 * Even if all of those conditions are true, we don't kill the I/O
11714	 * if the command ahead of us has been aborted.  We won't end up
11715	 * sending it to the FETD, and it's perfectly legal to resend a
11716	 * command with the same tag number as long as the previous
11717	 * instance of this tag number has been aborted somehow.
11718	 */
11719	if (__predict_true(pending_io->scsiio.tag_type != CTL_TAG_UNTAGGED)
11720	 && __predict_true(ooa_io->scsiio.tag_type != CTL_TAG_UNTAGGED)
11721	 && __predict_false(pending_io->scsiio.tag_num == ooa_io->scsiio.tag_num)
11722	 && ((pending_io->io_hdr.nexus.targ_port ==
11723	      ooa_io->io_hdr.nexus.targ_port)
11724	  && (pending_io->io_hdr.nexus.initid ==
11725	      ooa_io->io_hdr.nexus.initid))
11726	 && ((ooa_io->io_hdr.flags & (CTL_FLAG_ABORT |
11727	      CTL_FLAG_STATUS_SENT)) == 0))
11728		return (CTL_ACTION_OVERLAP_TAG);
11729
11730	/*
11731	 * If we get a head of queue tag, SAM-3 says that we should
11732	 * immediately execute it.
11733	 *
11734	 * What happens if this command would normally block for some other
11735	 * reason?  e.g. a request sense with a head of queue tag
11736	 * immediately after a write.  Normally that would block, but this
11737	 * will result in its getting executed immediately...
11738	 *
11739	 * We currently return "pass" instead of "skip", so we'll end up
11740	 * going through the rest of the queue to check for overlapped tags.
11741	 *
11742	 * XXX KDM check for other types of blockage first??
11743	 */
11744	if (__predict_false(pending_io->scsiio.tag_type == CTL_TAG_HEAD_OF_QUEUE))
11745		return (CTL_ACTION_PASS);
11746
11747	/*
11748	 * Simple tags get blocked until all head of queue and ordered tags
11749	 * ahead of them have completed.  I'm lumping untagged commands in
11750	 * with simple tags here.  XXX KDM is that the right thing to do?
11751	 */
11752	if (__predict_false(ooa_io->scsiio.tag_type == CTL_TAG_ORDERED) ||
11753	    __predict_false(ooa_io->scsiio.tag_type == CTL_TAG_HEAD_OF_QUEUE))
11754		return (CTL_ACTION_BLOCK);
11755
11756	/* Unsupported command in OOA queue. */
11757	if (__predict_false(ooa_io->scsiio.seridx == CTL_SERIDX_INVLD))
11758		return (CTL_ACTION_PASS);
11759
11760	switch (serialize_row[ooa_io->scsiio.seridx]) {
11761	case CTL_SER_SEQ:
11762		if (lun->be_lun->serseq != CTL_LUN_SERSEQ_OFF)
11763			return (ctl_seq_check(ooa_io, pending_io));
11764		/* FALLTHROUGH */
11765	case CTL_SER_PASS:
11766		return (CTL_ACTION_PASS);
11767	case CTL_SER_EXTENTOPT:
11768		if ((lun->MODE_CTRL.queue_flags & SCP_QUEUE_ALG_MASK) ==
11769		    SCP_QUEUE_ALG_UNRESTRICTED)
11770			return (CTL_ACTION_PASS);
11771		/* FALLTHROUGH */
11772	case CTL_SER_EXTENT:
11773		return (ctl_extent_check(ooa_io, pending_io,
11774		    (lun->be_lun->serseq == CTL_LUN_SERSEQ_ON)));
11775	case CTL_SER_BLOCKOPT:
11776		if ((lun->MODE_CTRL.queue_flags & SCP_QUEUE_ALG_MASK) ==
11777		    SCP_QUEUE_ALG_UNRESTRICTED)
11778			return (CTL_ACTION_PASS);
11779		/* FALLTHROUGH */
11780	case CTL_SER_BLOCK:
11781		return (CTL_ACTION_BLOCK);
11782	default:
11783		__assert_unreachable();
11784	}
11785}
11786
11787/*
11788 * Check for blockage or overlaps against the OOA (Order Of Arrival) queue.
11789 * Assumptions:
11790 * - pending_io is generally either incoming, or on the blocked queue
11791 * - starting I/O is the I/O we want to start the check with.
11792 */
11793static ctl_action
11794ctl_check_ooa(struct ctl_lun *lun, union ctl_io *pending_io,
11795	      union ctl_io **starting_io)
11796{
11797	union ctl_io *ooa_io = *starting_io;
11798	const uint8_t *serialize_row;
11799	ctl_action action;
11800
11801	CTL_IO_ASSERT(pending_io, SCSI);
11802
11803	mtx_assert(&lun->lun_lock, MA_OWNED);
11804
11805	/*
11806	 * Aborted commands are not going to be executed and may even
11807	 * not report completion, so we don't care about their order.
11808	 * Let them complete ASAP to clean the OOA queue.
11809	 */
11810	if (__predict_false(pending_io->io_hdr.flags & CTL_FLAG_ABORT))
11811		return (CTL_ACTION_SKIP);
11812
11813	/*
11814	 * Ordered tags have to block until all items ahead of them have
11815	 * completed.  If we get called with an ordered tag, we always
11816	 * block, if something else is ahead of us in the queue.
11817	 */
11818	if ((pending_io->scsiio.tag_type == CTL_TAG_ORDERED) &&
11819	    (ooa_io != NULL))
11820		return (CTL_ACTION_BLOCK);
11821
11822	serialize_row = ctl_serialize_table[pending_io->scsiio.seridx];
11823
11824	/*
11825	 * Run back along the OOA queue, starting with the current
11826	 * blocked I/O and going through every I/O before it on the
11827	 * queue.  If starting_io is NULL, we'll just end up returning
11828	 * CTL_ACTION_PASS.
11829	 */
11830	for (; ooa_io != NULL;
11831	     ooa_io = (union ctl_io *)LIST_NEXT(&ooa_io->io_hdr, ooa_links)) {
11832		action = ctl_check_for_blockage(lun, pending_io, serialize_row,
11833		    ooa_io);
11834		if (action != CTL_ACTION_PASS) {
11835			*starting_io = ooa_io;
11836			return (action);
11837		}
11838	}
11839
11840	*starting_io = NULL;
11841	return (CTL_ACTION_PASS);
11842}
11843
11844/*
11845 * Try to unblock the specified I/O.
11846 *
11847 * skip parameter allows explicitly skip present blocker of the I/O,
11848 * starting from the previous one on OOA queue.  It can be used when
11849 * we know for sure that the blocker I/O does no longer count.
11850 */
11851static void
11852ctl_scsi_try_unblock_io(struct ctl_lun *lun, union ctl_io *io, bool skip)
11853{
11854	struct ctl_softc *softc = lun->ctl_softc;
11855	union ctl_io *bio, *obio;
11856	const struct ctl_cmd_entry *entry;
11857	union ctl_ha_msg msg_info;
11858	ctl_action action;
11859
11860	CTL_IO_ASSERT(io, SCSI);
11861
11862	mtx_assert(&lun->lun_lock, MA_OWNED);
11863
11864	if (io->io_hdr.blocker == NULL)
11865		return;
11866
11867	obio = bio = io->io_hdr.blocker;
11868	if (skip)
11869		bio = (union ctl_io *)LIST_NEXT(&bio->io_hdr, ooa_links);
11870	action = ctl_check_ooa(lun, io, &bio);
11871	if (action == CTL_ACTION_BLOCK) {
11872		/* Still blocked, but may be by different I/O now. */
11873		if (bio != obio) {
11874			TAILQ_REMOVE(&obio->io_hdr.blocked_queue,
11875			    &io->io_hdr, blocked_links);
11876			TAILQ_INSERT_TAIL(&bio->io_hdr.blocked_queue,
11877			    &io->io_hdr, blocked_links);
11878			io->io_hdr.blocker = bio;
11879		}
11880		return;
11881	}
11882
11883	/* No longer blocked, one way or another. */
11884	TAILQ_REMOVE(&obio->io_hdr.blocked_queue, &io->io_hdr, blocked_links);
11885	io->io_hdr.blocker = NULL;
11886
11887	switch (action) {
11888	case CTL_ACTION_PASS:
11889	case CTL_ACTION_SKIP:
11890
11891		/* Serializing commands from the other SC retire there. */
11892		if ((io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC) &&
11893		    (softc->ha_mode != CTL_HA_MODE_XFER)) {
11894			io->io_hdr.flags &= ~CTL_FLAG_IO_ACTIVE;
11895			msg_info.hdr.original_sc = io->io_hdr.remote_io;
11896			msg_info.hdr.serializing_sc = io;
11897			msg_info.hdr.msg_type = CTL_MSG_R2R;
11898			ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info,
11899			    sizeof(msg_info.hdr), M_NOWAIT);
11900			break;
11901		}
11902
11903		/*
11904		 * Check this I/O for LUN state changes that may have happened
11905		 * while this command was blocked. The LUN state may have been
11906		 * changed by a command ahead of us in the queue.
11907		 */
11908		entry = ctl_get_cmd_entry(&io->scsiio, NULL);
11909		if (ctl_scsiio_lun_check(lun, entry, &io->scsiio) != 0) {
11910			ctl_done(io);
11911			break;
11912		}
11913
11914		io->io_hdr.flags |= CTL_FLAG_IS_WAS_ON_RTR;
11915		ctl_enqueue_rtr(io);
11916		break;
11917	default:
11918		__assert_unreachable();
11919	case CTL_ACTION_OVERLAP:
11920		ctl_set_overlapped_cmd(&io->scsiio);
11921		goto error;
11922	case CTL_ACTION_OVERLAP_TAG:
11923		ctl_set_overlapped_tag(&io->scsiio,
11924		    io->scsiio.tag_num & 0xff);
11925error:
11926		/* Serializing commands from the other SC are done here. */
11927		if ((io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC) &&
11928		    (softc->ha_mode != CTL_HA_MODE_XFER)) {
11929			ctl_try_unblock_others(lun, io, TRUE);
11930			LIST_REMOVE(&io->io_hdr, ooa_links);
11931
11932			ctl_copy_sense_data_back(io, &msg_info);
11933			msg_info.hdr.original_sc = io->io_hdr.remote_io;
11934			msg_info.hdr.serializing_sc = NULL;
11935			msg_info.hdr.msg_type = CTL_MSG_BAD_JUJU;
11936			ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info,
11937			    sizeof(msg_info.scsi), M_WAITOK);
11938			ctl_free_io(io);
11939			break;
11940		}
11941
11942		ctl_done(io);
11943		break;
11944	}
11945}
11946
11947static void
11948ctl_nvme_try_unblock_io(struct ctl_lun *lun, union ctl_io *io, bool skip)
11949{
11950	union ctl_io *bio;
11951	const struct ctl_nvme_cmd_entry *entry;
11952
11953	CTL_IO_ASSERT(io, NVME, NVME_ADMIN);
11954
11955	mtx_assert(&lun->lun_lock, MA_OWNED);
11956
11957	if (io->io_hdr.blocker == NULL)
11958		return;
11959
11960	/*
11961	 * If this is the second half of a fused operation, it should
11962	 * be the only io on the blocked list.  If the first half
11963	 * failed, complete the second half with an appropriate error.
11964	 */
11965	bio = io->io_hdr.blocker;
11966	if (NVMEV(NVME_CMD_FUSE, io->nvmeio.cmd.fuse) == NVME_FUSE_SECOND) {
11967		MPASS(io ==
11968		    (union ctl_io *)TAILQ_FIRST(&bio->io_hdr.blocked_queue));
11969		MPASS(TAILQ_NEXT(&io->io_hdr, blocked_links) == NULL);
11970
11971		TAILQ_REMOVE(&bio->io_hdr.blocked_queue, &io->io_hdr,
11972		    blocked_links);
11973		if (bio->io_hdr.status != CTL_SUCCESS) {
11974			ctl_nvme_set_failed_fused_command(&io->nvmeio);
11975			ctl_done(io);
11976			return;
11977		}
11978	} else {
11979		/*
11980		 * This must be a command that was blocked on the
11981		 * second half of a fused operation.
11982		 */
11983		MPASS(NVMEV(NVME_CMD_FUSE, bio->nvmeio.cmd.fuse) ==
11984		    NVME_FUSE_SECOND);
11985		TAILQ_REMOVE(&bio->io_hdr.blocked_queue, &io->io_hdr,
11986		    blocked_links);
11987	}
11988
11989	entry = ctl_nvme_get_cmd_entry(&io->nvmeio);
11990	if (ctl_nvmeio_lun_check(lun, entry, &io->nvmeio) != 0) {
11991		ctl_done(io);
11992		return;
11993	}
11994
11995	io->io_hdr.flags |= CTL_FLAG_IS_WAS_ON_RTR;
11996	ctl_enqueue_rtr(io);
11997}
11998
11999static void
12000ctl_try_unblock_io(struct ctl_lun *lun, union ctl_io *io, bool skip)
12001{
12002	switch (io->io_hdr.io_type) {
12003	case CTL_IO_SCSI:
12004		return (ctl_scsi_try_unblock_io(lun, io, skip));
12005	case CTL_IO_NVME:
12006	case CTL_IO_NVME_ADMIN:
12007		return (ctl_nvme_try_unblock_io(lun, io, skip));
12008	default:
12009		__assert_unreachable();
12010	}
12011}
12012
12013/*
12014 * Try to unblock I/Os blocked by the specified I/O.
12015 *
12016 * skip parameter allows explicitly skip the specified I/O as blocker,
12017 * starting from the previous one on the OOA queue.  It can be used when
12018 * we know for sure that the specified I/O does no longer count (done).
12019 * It has to be still on OOA queue though so that we know where to start.
12020 */
12021static void
12022ctl_try_unblock_others(struct ctl_lun *lun, union ctl_io *bio, bool skip)
12023{
12024	union ctl_io *io, *next_io;
12025
12026	mtx_assert(&lun->lun_lock, MA_OWNED);
12027
12028	for (io = (union ctl_io *)TAILQ_FIRST(&bio->io_hdr.blocked_queue);
12029	     io != NULL; io = next_io) {
12030		next_io = (union ctl_io *)TAILQ_NEXT(&io->io_hdr, blocked_links);
12031
12032		KASSERT(io->io_hdr.blocker != NULL,
12033		    ("I/O %p on blocked list without blocker", io));
12034		ctl_try_unblock_io(lun, io, skip);
12035	}
12036	KASSERT(!skip || TAILQ_EMPTY(&bio->io_hdr.blocked_queue),
12037	    ("blocked_queue is not empty after skipping %p", bio));
12038}
12039
12040/*
12041 * This routine (with one exception) checks LUN flags that can be set by
12042 * commands ahead of us in the OOA queue.  These flags have to be checked
12043 * when a command initially comes in, and when we pull a command off the
12044 * blocked queue and are preparing to execute it.  The reason we have to
12045 * check these flags for commands on the blocked queue is that the LUN
12046 * state may have been changed by a command ahead of us while we're on the
12047 * blocked queue.
12048 *
12049 * Ordering is somewhat important with these checks, so please pay
12050 * careful attention to the placement of any new checks.
12051 */
12052static int
12053ctl_scsiio_lun_check(struct ctl_lun *lun,
12054    const struct ctl_cmd_entry *entry, struct ctl_scsiio *ctsio)
12055{
12056	struct ctl_softc *softc = lun->ctl_softc;
12057	int retval;
12058	uint32_t residx;
12059
12060	retval = 0;
12061
12062	mtx_assert(&lun->lun_lock, MA_OWNED);
12063
12064	/*
12065	 * If this shelf is a secondary shelf controller, we may have to
12066	 * reject some commands disallowed by HA mode and link state.
12067	 */
12068	if ((lun->flags & CTL_LUN_PRIMARY_SC) == 0) {
12069		if (softc->ha_link == CTL_HA_LINK_OFFLINE &&
12070		    (entry->flags & CTL_CMD_FLAG_OK_ON_UNAVAIL) == 0) {
12071			ctl_set_lun_unavail(ctsio);
12072			retval = 1;
12073			goto bailout;
12074		}
12075		if ((lun->flags & CTL_LUN_PEER_SC_PRIMARY) == 0 &&
12076		    (entry->flags & CTL_CMD_FLAG_OK_ON_UNAVAIL) == 0) {
12077			ctl_set_lun_transit(ctsio);
12078			retval = 1;
12079			goto bailout;
12080		}
12081		if (softc->ha_mode == CTL_HA_MODE_ACT_STBY &&
12082		    (entry->flags & CTL_CMD_FLAG_OK_ON_STANDBY) == 0) {
12083			ctl_set_lun_standby(ctsio);
12084			retval = 1;
12085			goto bailout;
12086		}
12087
12088		/* The rest of checks are only done on executing side */
12089		if (softc->ha_mode == CTL_HA_MODE_XFER)
12090			goto bailout;
12091	}
12092
12093	if (entry->pattern & CTL_LUN_PAT_WRITE) {
12094		if (lun->be_lun->flags & CTL_LUN_FLAG_READONLY) {
12095			ctl_set_hw_write_protected(ctsio);
12096			retval = 1;
12097			goto bailout;
12098		}
12099		if ((lun->MODE_CTRL.eca_and_aen & SCP_SWP) != 0) {
12100			ctl_set_sense(ctsio, /*current_error*/ 1,
12101			    /*sense_key*/ SSD_KEY_DATA_PROTECT,
12102			    /*asc*/ 0x27, /*ascq*/ 0x02, SSD_ELEM_NONE);
12103			retval = 1;
12104			goto bailout;
12105		}
12106	}
12107
12108	/*
12109	 * Check for a reservation conflict.  If this command isn't allowed
12110	 * even on reserved LUNs, and if this initiator isn't the one who
12111	 * reserved us, reject the command with a reservation conflict.
12112	 */
12113	residx = ctl_get_initindex(&ctsio->io_hdr.nexus);
12114	if ((lun->flags & CTL_LUN_RESERVED)
12115	 && ((entry->flags & CTL_CMD_FLAG_ALLOW_ON_RESV) == 0)) {
12116		if (lun->res_idx != residx) {
12117			ctl_set_reservation_conflict(ctsio);
12118			retval = 1;
12119			goto bailout;
12120		}
12121	}
12122
12123	if ((lun->flags & CTL_LUN_PR_RESERVED) == 0 ||
12124	    (entry->flags & CTL_CMD_FLAG_ALLOW_ON_PR_RESV)) {
12125		/* No reservation or command is allowed. */;
12126	} else if ((entry->flags & CTL_CMD_FLAG_ALLOW_ON_PR_WRESV) &&
12127	    (lun->pr_res_type == SPR_TYPE_WR_EX ||
12128	     lun->pr_res_type == SPR_TYPE_WR_EX_RO ||
12129	     lun->pr_res_type == SPR_TYPE_WR_EX_AR)) {
12130		/* The command is allowed for Write Exclusive resv. */;
12131	} else {
12132		/*
12133		 * if we aren't registered or it's a res holder type
12134		 * reservation and this isn't the res holder then set a
12135		 * conflict.
12136		 */
12137		if (ctl_get_prkey(lun, residx) == 0 ||
12138		    (residx != lun->pr_res_idx && lun->pr_res_type < 4)) {
12139			ctl_set_reservation_conflict(ctsio);
12140			retval = 1;
12141			goto bailout;
12142		}
12143	}
12144
12145	if ((entry->flags & CTL_CMD_FLAG_OK_ON_NO_MEDIA) == 0) {
12146		if (lun->flags & CTL_LUN_EJECTED)
12147			ctl_set_lun_ejected(ctsio);
12148		else if (lun->flags & CTL_LUN_NO_MEDIA) {
12149			if (lun->flags & CTL_LUN_REMOVABLE)
12150				ctl_set_lun_no_media(ctsio);
12151			else
12152				ctl_set_lun_int_reqd(ctsio);
12153		} else if (lun->flags & CTL_LUN_STOPPED)
12154			ctl_set_lun_stopped(ctsio);
12155		else
12156			goto bailout;
12157		retval = 1;
12158		goto bailout;
12159	}
12160
12161bailout:
12162	return (retval);
12163}
12164
12165static void
12166ctl_failover_io(union ctl_io *io, int have_lock)
12167{
12168	CTL_IO_ASSERT(io, SCSI);
12169
12170	ctl_set_busy(&io->scsiio);
12171	ctl_done(io);
12172}
12173
12174static void
12175ctl_failover_lun(union ctl_io *rio)
12176{
12177	struct ctl_softc *softc = CTL_SOFTC(rio);
12178	struct ctl_lun *lun;
12179	struct ctl_io_hdr *io, *next_io;
12180	uint32_t targ_lun;
12181
12182	targ_lun = rio->io_hdr.nexus.targ_mapped_lun;
12183	CTL_DEBUG_PRINT(("FAILOVER for lun %u\n", targ_lun));
12184
12185	/* Find and lock the LUN. */
12186	mtx_lock(&softc->ctl_lock);
12187	if (targ_lun > ctl_max_luns ||
12188	    (lun = softc->ctl_luns[targ_lun]) == NULL) {
12189		mtx_unlock(&softc->ctl_lock);
12190		return;
12191	}
12192	mtx_lock(&lun->lun_lock);
12193	mtx_unlock(&softc->ctl_lock);
12194	if (lun->flags & CTL_LUN_DISABLED) {
12195		mtx_unlock(&lun->lun_lock);
12196		return;
12197	}
12198
12199	if (softc->ha_mode == CTL_HA_MODE_XFER) {
12200		LIST_FOREACH_SAFE(io, &lun->ooa_queue, ooa_links, next_io) {
12201			/* We are master */
12202			if (io->flags & CTL_FLAG_FROM_OTHER_SC) {
12203				if (io->flags & CTL_FLAG_IO_ACTIVE) {
12204					io->flags |= CTL_FLAG_ABORT |
12205					    CTL_FLAG_FAILOVER;
12206					ctl_try_unblock_io(lun,
12207					    (union ctl_io *)io, FALSE);
12208				} else { /* This can be only due to DATAMOVE */
12209					io->msg_type = CTL_MSG_DATAMOVE_DONE;
12210					io->flags &= ~CTL_FLAG_DMA_INPROG;
12211					io->flags |= CTL_FLAG_IO_ACTIVE;
12212					io->port_status = 31340;
12213					ctl_enqueue_isc((union ctl_io *)io);
12214				}
12215			} else
12216			/* We are slave */
12217			if (io->flags & CTL_FLAG_SENT_2OTHER_SC) {
12218				io->flags &= ~CTL_FLAG_SENT_2OTHER_SC;
12219				if (io->flags & CTL_FLAG_IO_ACTIVE) {
12220					io->flags |= CTL_FLAG_FAILOVER;
12221				} else {
12222					ctl_set_busy(&((union ctl_io *)io)->
12223					    scsiio);
12224					ctl_done((union ctl_io *)io);
12225				}
12226			}
12227		}
12228	} else { /* SERIALIZE modes */
12229		LIST_FOREACH_SAFE(io, &lun->ooa_queue, ooa_links, next_io) {
12230			/* We are master */
12231			if (io->flags & CTL_FLAG_FROM_OTHER_SC) {
12232				if (io->blocker != NULL) {
12233					TAILQ_REMOVE(&io->blocker->io_hdr.blocked_queue,
12234					    io, blocked_links);
12235					io->blocker = NULL;
12236				}
12237				ctl_try_unblock_others(lun, (union ctl_io *)io,
12238				    TRUE);
12239				LIST_REMOVE(io, ooa_links);
12240				ctl_free_io((union ctl_io *)io);
12241			} else
12242			/* We are slave */
12243			if (io->flags & CTL_FLAG_SENT_2OTHER_SC) {
12244				io->flags &= ~CTL_FLAG_SENT_2OTHER_SC;
12245				if (!(io->flags & CTL_FLAG_IO_ACTIVE)) {
12246					ctl_set_busy(&((union ctl_io *)io)->
12247					    scsiio);
12248					ctl_done((union ctl_io *)io);
12249				}
12250			}
12251		}
12252	}
12253	mtx_unlock(&lun->lun_lock);
12254}
12255
12256static void
12257ctl_scsiio_precheck(struct ctl_scsiio *ctsio)
12258{
12259	struct ctl_softc *softc = CTL_SOFTC(ctsio);
12260	struct ctl_lun *lun;
12261	const struct ctl_cmd_entry *entry;
12262	union ctl_io *bio;
12263	uint32_t initidx, targ_lun;
12264
12265	lun = NULL;
12266	targ_lun = ctsio->io_hdr.nexus.targ_mapped_lun;
12267	if (targ_lun < ctl_max_luns)
12268		lun = softc->ctl_luns[targ_lun];
12269	if (lun) {
12270		/*
12271		 * If the LUN is invalid, pretend that it doesn't exist.
12272		 * It will go away as soon as all pending I/O has been
12273		 * completed.
12274		 */
12275		mtx_lock(&lun->lun_lock);
12276		if (lun->flags & CTL_LUN_DISABLED) {
12277			mtx_unlock(&lun->lun_lock);
12278			lun = NULL;
12279		}
12280	}
12281	CTL_LUN(ctsio) = lun;
12282	if (lun) {
12283		CTL_BACKEND_LUN(ctsio) = lun->be_lun;
12284
12285		/*
12286		 * Every I/O goes into the OOA queue for a particular LUN,
12287		 * and stays there until completion.
12288		 */
12289#ifdef CTL_TIME_IO
12290		if (LIST_EMPTY(&lun->ooa_queue))
12291			lun->idle_time += getsbinuptime() - lun->last_busy;
12292#endif
12293		LIST_INSERT_HEAD(&lun->ooa_queue, &ctsio->io_hdr, ooa_links);
12294	}
12295
12296	/* Get command entry and return error if it is unsuppotyed. */
12297	entry = ctl_validate_command(ctsio);
12298	if (entry == NULL) {
12299		if (lun)
12300			mtx_unlock(&lun->lun_lock);
12301		return;
12302	}
12303
12304	ctsio->io_hdr.flags &= ~CTL_FLAG_DATA_MASK;
12305	ctsio->io_hdr.flags |= entry->flags & CTL_FLAG_DATA_MASK;
12306
12307	/*
12308	 * Check to see whether we can send this command to LUNs that don't
12309	 * exist.  This should pretty much only be the case for inquiry
12310	 * and request sense.  Further checks, below, really require having
12311	 * a LUN, so we can't really check the command anymore.  Just put
12312	 * it on the rtr queue.
12313	 */
12314	if (lun == NULL) {
12315		if (entry->flags & CTL_CMD_FLAG_OK_ON_NO_LUN) {
12316			ctsio->io_hdr.flags |= CTL_FLAG_IS_WAS_ON_RTR;
12317			ctl_enqueue_rtr((union ctl_io *)ctsio);
12318			return;
12319		}
12320
12321		ctl_set_unsupported_lun(ctsio);
12322		ctl_done((union ctl_io *)ctsio);
12323		CTL_DEBUG_PRINT(("ctl_scsiio_precheck: bailing out due to invalid LUN\n"));
12324		return;
12325	} else {
12326		/*
12327		 * Make sure we support this particular command on this LUN.
12328		 * e.g., we don't support writes to the control LUN.
12329		 */
12330		if (!ctl_cmd_applicable(lun->be_lun->lun_type, entry)) {
12331			mtx_unlock(&lun->lun_lock);
12332			ctl_set_invalid_opcode(ctsio);
12333			ctl_done((union ctl_io *)ctsio);
12334			return;
12335		}
12336	}
12337
12338	initidx = ctl_get_initindex(&ctsio->io_hdr.nexus);
12339
12340	/*
12341	 * If we've got a request sense, it'll clear the contingent
12342	 * allegiance condition.  Otherwise, if we have a CA condition for
12343	 * this initiator, clear it, because it sent down a command other
12344	 * than request sense.
12345	 */
12346	if (ctsio->cdb[0] != REQUEST_SENSE) {
12347		struct scsi_sense_data *ps;
12348
12349		ps = lun->pending_sense[initidx / CTL_MAX_INIT_PER_PORT];
12350		if (ps != NULL)
12351			ps[initidx % CTL_MAX_INIT_PER_PORT].error_code = 0;
12352	}
12353
12354	/*
12355	 * If the command has this flag set, it handles its own unit
12356	 * attention reporting, we shouldn't do anything.  Otherwise we
12357	 * check for any pending unit attentions, and send them back to the
12358	 * initiator.  We only do this when a command initially comes in,
12359	 * not when we pull it off the blocked queue.
12360	 *
12361	 * According to SAM-3, section 5.3.2, the order that things get
12362	 * presented back to the host is basically unit attentions caused
12363	 * by some sort of reset event, busy status, reservation conflicts
12364	 * or task set full, and finally any other status.
12365	 *
12366	 * One issue here is that some of the unit attentions we report
12367	 * don't fall into the "reset" category (e.g. "reported luns data
12368	 * has changed").  So reporting it here, before the reservation
12369	 * check, may be technically wrong.  I guess the only thing to do
12370	 * would be to check for and report the reset events here, and then
12371	 * check for the other unit attention types after we check for a
12372	 * reservation conflict.
12373	 *
12374	 * XXX KDM need to fix this
12375	 */
12376	if ((entry->flags & CTL_CMD_FLAG_NO_SENSE) == 0) {
12377		ctl_ua_type ua_type;
12378		u_int sense_len = 0;
12379
12380		ua_type = ctl_build_ua(lun, initidx, &ctsio->sense_data,
12381		    &sense_len, SSD_TYPE_NONE);
12382		if (ua_type != CTL_UA_NONE) {
12383			mtx_unlock(&lun->lun_lock);
12384			ctsio->scsi_status = SCSI_STATUS_CHECK_COND;
12385			ctsio->io_hdr.status = CTL_SCSI_ERROR | CTL_AUTOSENSE;
12386			ctsio->sense_len = sense_len;
12387			ctl_done((union ctl_io *)ctsio);
12388			return;
12389		}
12390	}
12391
12392	if (ctl_scsiio_lun_check(lun, entry, ctsio) != 0) {
12393		mtx_unlock(&lun->lun_lock);
12394		ctl_done((union ctl_io *)ctsio);
12395		return;
12396	}
12397
12398	/*
12399	 * XXX CHD this is where we want to send IO to other side if
12400	 * this LUN is secondary on this SC. We will need to make a copy
12401	 * of the IO and flag the IO on this side as SENT_2OTHER and the flag
12402	 * the copy we send as FROM_OTHER.
12403	 * We also need to stuff the address of the original IO so we can
12404	 * find it easily. Something similar will need be done on the other
12405	 * side so when we are done we can find the copy.
12406	 */
12407	if ((lun->flags & CTL_LUN_PRIMARY_SC) == 0 &&
12408	    (lun->flags & CTL_LUN_PEER_SC_PRIMARY) != 0 &&
12409	    (entry->flags & CTL_CMD_FLAG_RUN_HERE) == 0) {
12410		union ctl_ha_msg msg_info;
12411		int isc_retval;
12412
12413		ctsio->io_hdr.flags |= CTL_FLAG_SENT_2OTHER_SC;
12414		ctsio->io_hdr.flags &= ~CTL_FLAG_IO_ACTIVE;
12415		mtx_unlock(&lun->lun_lock);
12416
12417		msg_info.hdr.msg_type = CTL_MSG_SERIALIZE;
12418		msg_info.hdr.original_sc = (union ctl_io *)ctsio;
12419		msg_info.hdr.serializing_sc = NULL;
12420		msg_info.hdr.nexus = ctsio->io_hdr.nexus;
12421		msg_info.scsi.tag_num = ctsio->tag_num;
12422		msg_info.scsi.tag_type = ctsio->tag_type;
12423		memcpy(msg_info.scsi.cdb, ctsio->cdb, CTL_MAX_CDBLEN);
12424		msg_info.scsi.cdb_len = ctsio->cdb_len;
12425		msg_info.scsi.priority = ctsio->priority;
12426
12427		if ((isc_retval = ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info,
12428		    sizeof(msg_info.scsi) - sizeof(msg_info.scsi.sense_data),
12429		    M_WAITOK)) > CTL_HA_STATUS_SUCCESS) {
12430			ctsio->io_hdr.flags &= ~CTL_FLAG_SENT_2OTHER_SC;
12431			ctsio->io_hdr.flags |= CTL_FLAG_IO_ACTIVE;
12432			ctl_set_busy(ctsio);
12433			ctl_done((union ctl_io *)ctsio);
12434			return;
12435		}
12436		return;
12437	}
12438
12439	bio = (union ctl_io *)LIST_NEXT(&ctsio->io_hdr, ooa_links);
12440	switch (ctl_check_ooa(lun, (union ctl_io *)ctsio, &bio)) {
12441	case CTL_ACTION_PASS:
12442	case CTL_ACTION_SKIP:
12443		ctsio->io_hdr.flags |= CTL_FLAG_IS_WAS_ON_RTR;
12444		mtx_unlock(&lun->lun_lock);
12445		ctl_enqueue_rtr((union ctl_io *)ctsio);
12446		break;
12447	case CTL_ACTION_BLOCK:
12448		ctsio->io_hdr.blocker = bio;
12449		TAILQ_INSERT_TAIL(&bio->io_hdr.blocked_queue, &ctsio->io_hdr,
12450				  blocked_links);
12451		mtx_unlock(&lun->lun_lock);
12452		break;
12453	case CTL_ACTION_OVERLAP:
12454		mtx_unlock(&lun->lun_lock);
12455		ctl_set_overlapped_cmd(ctsio);
12456		ctl_done((union ctl_io *)ctsio);
12457		break;
12458	case CTL_ACTION_OVERLAP_TAG:
12459		mtx_unlock(&lun->lun_lock);
12460		ctl_set_overlapped_tag(ctsio, ctsio->tag_num & 0xff);
12461		ctl_done((union ctl_io *)ctsio);
12462		break;
12463	default:
12464		__assert_unreachable();
12465	}
12466}
12467
12468const struct ctl_cmd_entry *
12469ctl_get_cmd_entry(struct ctl_scsiio *ctsio, int *sa)
12470{
12471	const struct ctl_cmd_entry *entry;
12472	int service_action;
12473
12474	entry = &ctl_cmd_table[ctsio->cdb[0]];
12475	if (sa)
12476		*sa = ((entry->flags & CTL_CMD_FLAG_SA5) != 0);
12477	if (entry->flags & CTL_CMD_FLAG_SA5) {
12478		service_action = ctsio->cdb[1] & SERVICE_ACTION_MASK;
12479		entry = &((const struct ctl_cmd_entry *)
12480		    entry->execute)[service_action];
12481	}
12482	return (entry);
12483}
12484
12485const struct ctl_cmd_entry *
12486ctl_validate_command(struct ctl_scsiio *ctsio)
12487{
12488	const struct ctl_cmd_entry *entry;
12489	int i, sa;
12490	uint8_t diff;
12491
12492	entry = ctl_get_cmd_entry(ctsio, &sa);
12493	ctsio->seridx = entry->seridx;
12494	if (entry->execute == NULL) {
12495		if (sa)
12496			ctl_set_invalid_field(ctsio,
12497					      /*sks_valid*/ 1,
12498					      /*command*/ 1,
12499					      /*field*/ 1,
12500					      /*bit_valid*/ 1,
12501					      /*bit*/ 4);
12502		else
12503			ctl_set_invalid_opcode(ctsio);
12504		ctl_done((union ctl_io *)ctsio);
12505		return (NULL);
12506	}
12507	KASSERT(entry->length > 0,
12508	    ("Not defined length for command 0x%02x/0x%02x",
12509	     ctsio->cdb[0], ctsio->cdb[1]));
12510	for (i = 1; i < entry->length; i++) {
12511		diff = ctsio->cdb[i] & ~entry->usage[i - 1];
12512		if (diff == 0)
12513			continue;
12514		ctl_set_invalid_field(ctsio,
12515				      /*sks_valid*/ 1,
12516				      /*command*/ 1,
12517				      /*field*/ i,
12518				      /*bit_valid*/ 1,
12519				      /*bit*/ fls(diff) - 1);
12520		ctl_done((union ctl_io *)ctsio);
12521		return (NULL);
12522	}
12523	return (entry);
12524}
12525
12526static int
12527ctl_cmd_applicable(uint8_t lun_type, const struct ctl_cmd_entry *entry)
12528{
12529
12530	switch (lun_type) {
12531	case T_DIRECT:
12532		if ((entry->flags & CTL_CMD_FLAG_OK_ON_DIRECT) == 0)
12533			return (0);
12534		break;
12535	case T_PROCESSOR:
12536		if ((entry->flags & CTL_CMD_FLAG_OK_ON_PROC) == 0)
12537			return (0);
12538		break;
12539	case T_CDROM:
12540		if ((entry->flags & CTL_CMD_FLAG_OK_ON_CDROM) == 0)
12541			return (0);
12542		break;
12543	default:
12544		return (0);
12545	}
12546	return (1);
12547}
12548
12549static int
12550ctl_scsiio(struct ctl_scsiio *ctsio)
12551{
12552	int retval;
12553	const struct ctl_cmd_entry *entry;
12554
12555	retval = CTL_RETVAL_COMPLETE;
12556
12557	CTL_DEBUG_PRINT(("ctl_scsiio cdb[0]=%02X\n", ctsio->cdb[0]));
12558
12559	entry = ctl_get_cmd_entry(ctsio, NULL);
12560
12561	/*
12562	 * If this I/O has been aborted, just send it straight to
12563	 * ctl_done() without executing it.
12564	 */
12565	if (ctsio->io_hdr.flags & CTL_FLAG_ABORT) {
12566		ctl_done((union ctl_io *)ctsio);
12567		goto bailout;
12568	}
12569
12570	/*
12571	 * All the checks should have been handled by ctl_scsiio_precheck().
12572	 * We should be clear now to just execute the I/O.
12573	 */
12574	retval = entry->execute(ctsio);
12575
12576bailout:
12577	return (retval);
12578}
12579
12580static int
12581ctl_target_reset(union ctl_io *io)
12582{
12583	struct ctl_softc *softc = CTL_SOFTC(io);
12584	struct ctl_port *port = CTL_PORT(io);
12585	struct ctl_lun *lun;
12586	uint32_t initidx;
12587	ctl_ua_type ua_type;
12588
12589	if (!(io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC)) {
12590		union ctl_ha_msg msg_info;
12591
12592		msg_info.hdr.nexus = io->io_hdr.nexus;
12593		msg_info.task.task_action = io->taskio.task_action;
12594		msg_info.hdr.msg_type = CTL_MSG_MANAGE_TASKS;
12595		msg_info.hdr.original_sc = NULL;
12596		msg_info.hdr.serializing_sc = NULL;
12597		ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info,
12598		    sizeof(msg_info.task), M_WAITOK);
12599	}
12600
12601	initidx = ctl_get_initindex(&io->io_hdr.nexus);
12602	if (io->taskio.task_action == CTL_TASK_TARGET_RESET)
12603		ua_type = CTL_UA_TARG_RESET;
12604	else
12605		ua_type = CTL_UA_BUS_RESET;
12606	mtx_lock(&softc->ctl_lock);
12607	STAILQ_FOREACH(lun, &softc->lun_list, links) {
12608		if (port != NULL &&
12609		    ctl_lun_map_to_port(port, lun->lun) == UINT32_MAX)
12610			continue;
12611		ctl_do_lun_reset(lun, initidx, ua_type);
12612	}
12613	mtx_unlock(&softc->ctl_lock);
12614	io->taskio.task_status = CTL_TASK_FUNCTION_COMPLETE;
12615	return (0);
12616}
12617
12618/*
12619 * The LUN should always be set.  The I/O is optional, and is used to
12620 * distinguish between I/Os sent by this initiator, and by other
12621 * initiators.  We set unit attention for initiators other than this one.
12622 * SAM-3 is vague on this point.  It does say that a unit attention should
12623 * be established for other initiators when a LUN is reset (see section
12624 * 5.7.3), but it doesn't specifically say that the unit attention should
12625 * be established for this particular initiator when a LUN is reset.  Here
12626 * is the relevant text, from SAM-3 rev 8:
12627 *
12628 * 5.7.2 When a SCSI initiator port aborts its own tasks
12629 *
12630 * When a SCSI initiator port causes its own task(s) to be aborted, no
12631 * notification that the task(s) have been aborted shall be returned to
12632 * the SCSI initiator port other than the completion response for the
12633 * command or task management function action that caused the task(s) to
12634 * be aborted and notification(s) associated with related effects of the
12635 * action (e.g., a reset unit attention condition).
12636 *
12637 * XXX KDM for now, we're setting unit attention for all initiators.
12638 */
12639static void
12640ctl_do_lun_reset(struct ctl_lun *lun, uint32_t initidx, ctl_ua_type ua_type)
12641{
12642	struct ctl_io_hdr *xioh;
12643	int i;
12644
12645	mtx_lock(&lun->lun_lock);
12646	/* Abort tasks. */
12647	LIST_FOREACH(xioh, &lun->ooa_queue, ooa_links) {
12648		xioh->flags |= CTL_FLAG_ABORT | CTL_FLAG_ABORT_STATUS;
12649		ctl_try_unblock_io(lun, (union ctl_io *)xioh, FALSE);
12650	}
12651	/* Clear CA. */
12652	for (i = 0; i < ctl_max_ports; i++) {
12653		free(lun->pending_sense[i], M_CTL);
12654		lun->pending_sense[i] = NULL;
12655	}
12656	/* Clear reservation. */
12657	lun->flags &= ~CTL_LUN_RESERVED;
12658	/* Clear prevent media removal. */
12659	if (lun->prevent) {
12660		for (i = 0; i < CTL_MAX_INITIATORS; i++)
12661			ctl_clear_mask(lun->prevent, i);
12662		lun->prevent_count = 0;
12663	}
12664	/* Clear TPC status */
12665	ctl_tpc_lun_clear(lun, -1);
12666	/* Establish UA. */
12667#if 0
12668	ctl_est_ua_all(lun, initidx, ua_type);
12669#else
12670	ctl_est_ua_all(lun, -1, ua_type);
12671#endif
12672	mtx_unlock(&lun->lun_lock);
12673}
12674
12675static int
12676ctl_lun_reset(union ctl_io *io)
12677{
12678	struct ctl_softc *softc = CTL_SOFTC(io);
12679	struct ctl_lun *lun;
12680	uint32_t targ_lun, initidx;
12681
12682	targ_lun = io->io_hdr.nexus.targ_mapped_lun;
12683	initidx = ctl_get_initindex(&io->io_hdr.nexus);
12684	mtx_lock(&softc->ctl_lock);
12685	if (targ_lun >= ctl_max_luns ||
12686	    (lun = softc->ctl_luns[targ_lun]) == NULL) {
12687		mtx_unlock(&softc->ctl_lock);
12688		io->taskio.task_status = CTL_TASK_LUN_DOES_NOT_EXIST;
12689		return (1);
12690	}
12691	ctl_do_lun_reset(lun, initidx, CTL_UA_LUN_RESET);
12692	mtx_unlock(&softc->ctl_lock);
12693	io->taskio.task_status = CTL_TASK_FUNCTION_COMPLETE;
12694
12695	if ((io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC) == 0) {
12696		union ctl_ha_msg msg_info;
12697
12698		msg_info.hdr.msg_type = CTL_MSG_MANAGE_TASKS;
12699		msg_info.hdr.nexus = io->io_hdr.nexus;
12700		msg_info.task.task_action = CTL_TASK_LUN_RESET;
12701		msg_info.hdr.original_sc = NULL;
12702		msg_info.hdr.serializing_sc = NULL;
12703		ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info,
12704		    sizeof(msg_info.task), M_WAITOK);
12705	}
12706	return (0);
12707}
12708
12709static void
12710ctl_abort_tasks_lun(struct ctl_lun *lun, uint32_t targ_port, uint32_t init_id,
12711    int other_sc)
12712{
12713	struct ctl_io_hdr *xioh;
12714
12715	mtx_assert(&lun->lun_lock, MA_OWNED);
12716
12717	/*
12718	 * Run through the OOA queue and attempt to find the given I/O.
12719	 * The target port, initiator ID, tag type and tag number have to
12720	 * match the values that we got from the initiator.  If we have an
12721	 * untagged command to abort, simply abort the first untagged command
12722	 * we come to.  We only allow one untagged command at a time of course.
12723	 */
12724	LIST_FOREACH(xioh, &lun->ooa_queue, ooa_links) {
12725		union ctl_io *xio = (union ctl_io *)xioh;
12726
12727		if ((targ_port == UINT32_MAX ||
12728		     targ_port == xioh->nexus.targ_port) &&
12729		    (init_id == UINT32_MAX ||
12730		     init_id == xioh->nexus.initid)) {
12731			if (targ_port != xioh->nexus.targ_port ||
12732			    init_id != xioh->nexus.initid)
12733				xioh->flags |= CTL_FLAG_ABORT_STATUS;
12734			xioh->flags |= CTL_FLAG_ABORT;
12735			if (!other_sc && !(lun->flags & CTL_LUN_PRIMARY_SC)) {
12736				union ctl_ha_msg msg_info;
12737
12738				CTL_IO_ASSERT(xio, SCSI);
12739				msg_info.hdr.nexus = xioh->nexus;
12740				msg_info.task.task_action = CTL_TASK_ABORT_TASK;
12741				msg_info.task.tag_num = xio->scsiio.tag_num;
12742				msg_info.task.tag_type = xio->scsiio.tag_type;
12743				msg_info.hdr.msg_type = CTL_MSG_MANAGE_TASKS;
12744				msg_info.hdr.original_sc = NULL;
12745				msg_info.hdr.serializing_sc = NULL;
12746				ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info,
12747				    sizeof(msg_info.task), M_NOWAIT);
12748			}
12749			ctl_try_unblock_io(lun, xio, FALSE);
12750		}
12751	}
12752}
12753
12754static int
12755ctl_abort_task_set(union ctl_io *io)
12756{
12757	struct ctl_softc *softc = CTL_SOFTC(io);
12758	struct ctl_lun *lun;
12759	uint32_t targ_lun;
12760
12761	/*
12762	 * Look up the LUN.
12763	 */
12764	targ_lun = io->io_hdr.nexus.targ_mapped_lun;
12765	mtx_lock(&softc->ctl_lock);
12766	if (targ_lun >= ctl_max_luns ||
12767	    (lun = softc->ctl_luns[targ_lun]) == NULL) {
12768		mtx_unlock(&softc->ctl_lock);
12769		io->taskio.task_status = CTL_TASK_LUN_DOES_NOT_EXIST;
12770		return (1);
12771	}
12772
12773	mtx_lock(&lun->lun_lock);
12774	mtx_unlock(&softc->ctl_lock);
12775	if (io->taskio.task_action == CTL_TASK_ABORT_TASK_SET) {
12776		ctl_abort_tasks_lun(lun, io->io_hdr.nexus.targ_port,
12777		    io->io_hdr.nexus.initid,
12778		    (io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC) != 0);
12779	} else { /* CTL_TASK_CLEAR_TASK_SET */
12780		ctl_abort_tasks_lun(lun, UINT32_MAX, UINT32_MAX,
12781		    (io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC) != 0);
12782	}
12783	mtx_unlock(&lun->lun_lock);
12784	io->taskio.task_status = CTL_TASK_FUNCTION_COMPLETE;
12785	return (0);
12786}
12787
12788static void
12789ctl_i_t_nexus_loss(struct ctl_softc *softc, uint32_t initidx,
12790    ctl_ua_type ua_type)
12791{
12792	struct ctl_lun *lun;
12793	struct scsi_sense_data *ps;
12794	uint32_t p, i;
12795
12796	p = initidx / CTL_MAX_INIT_PER_PORT;
12797	i = initidx % CTL_MAX_INIT_PER_PORT;
12798	mtx_lock(&softc->ctl_lock);
12799	STAILQ_FOREACH(lun, &softc->lun_list, links) {
12800		mtx_lock(&lun->lun_lock);
12801		/* Abort tasks. */
12802		ctl_abort_tasks_lun(lun, p, i, 1);
12803		/* Clear CA. */
12804		ps = lun->pending_sense[p];
12805		if (ps != NULL)
12806			ps[i].error_code = 0;
12807		/* Clear reservation. */
12808		if ((lun->flags & CTL_LUN_RESERVED) && (lun->res_idx == initidx))
12809			lun->flags &= ~CTL_LUN_RESERVED;
12810		/* Clear prevent media removal. */
12811		if (lun->prevent && ctl_is_set(lun->prevent, initidx)) {
12812			ctl_clear_mask(lun->prevent, initidx);
12813			lun->prevent_count--;
12814		}
12815		/* Clear TPC status */
12816		ctl_tpc_lun_clear(lun, initidx);
12817		/* Establish UA. */
12818		ctl_est_ua(lun, initidx, ua_type);
12819		mtx_unlock(&lun->lun_lock);
12820	}
12821	mtx_unlock(&softc->ctl_lock);
12822}
12823
12824static int
12825ctl_i_t_nexus_reset(union ctl_io *io)
12826{
12827	struct ctl_softc *softc = CTL_SOFTC(io);
12828	uint32_t initidx;
12829
12830	if (!(io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC)) {
12831		union ctl_ha_msg msg_info;
12832
12833		msg_info.hdr.nexus = io->io_hdr.nexus;
12834		msg_info.task.task_action = CTL_TASK_I_T_NEXUS_RESET;
12835		msg_info.hdr.msg_type = CTL_MSG_MANAGE_TASKS;
12836		msg_info.hdr.original_sc = NULL;
12837		msg_info.hdr.serializing_sc = NULL;
12838		ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info,
12839		    sizeof(msg_info.task), M_WAITOK);
12840	}
12841
12842	initidx = ctl_get_initindex(&io->io_hdr.nexus);
12843	ctl_i_t_nexus_loss(softc, initidx, CTL_UA_I_T_NEXUS_LOSS);
12844	io->taskio.task_status = CTL_TASK_FUNCTION_COMPLETE;
12845	return (0);
12846}
12847
12848static int
12849ctl_abort_task(union ctl_io *io)
12850{
12851	struct ctl_softc *softc = CTL_SOFTC(io);
12852	struct ctl_io_hdr *xioh;
12853	struct ctl_lun *lun;
12854	uint32_t targ_lun;
12855
12856	/*
12857	 * Look up the LUN.
12858	 */
12859	targ_lun = io->io_hdr.nexus.targ_mapped_lun;
12860	mtx_lock(&softc->ctl_lock);
12861	if (targ_lun >= ctl_max_luns ||
12862	    (lun = softc->ctl_luns[targ_lun]) == NULL) {
12863		mtx_unlock(&softc->ctl_lock);
12864		io->taskio.task_status = CTL_TASK_LUN_DOES_NOT_EXIST;
12865		return (1);
12866	}
12867
12868	mtx_lock(&lun->lun_lock);
12869	mtx_unlock(&softc->ctl_lock);
12870	/*
12871	 * Run through the OOA queue and attempt to find the given I/O.
12872	 * The target port, initiator ID, tag type and tag number have to
12873	 * match the values that we got from the initiator.  If we have an
12874	 * untagged command to abort, simply abort the first untagged command
12875	 * we come to.  We only allow one untagged command at a time of course.
12876	 */
12877	LIST_FOREACH(xioh, &lun->ooa_queue, ooa_links) {
12878		union ctl_io *xio = (union ctl_io *)xioh;
12879
12880		CTL_IO_ASSERT(xio, SCSI);
12881		if ((xioh->nexus.targ_port != io->io_hdr.nexus.targ_port)
12882		 || (xioh->nexus.initid != io->io_hdr.nexus.initid)
12883		 || (xioh->flags & CTL_FLAG_ABORT))
12884			continue;
12885
12886		/*
12887		 * If the abort says that the task is untagged, the
12888		 * task in the queue must be untagged.  Otherwise,
12889		 * we just check to see whether the tag numbers
12890		 * match.  This is because the QLogic firmware
12891		 * doesn't pass back the tag type in an abort
12892		 * request.
12893		 */
12894#if 0
12895		if (((xio->scsiio.tag_type == CTL_TAG_UNTAGGED)
12896		  && (io->taskio.tag_type == CTL_TAG_UNTAGGED))
12897		 || (xio->scsiio.tag_num == io->taskio.tag_num)) {
12898#else
12899		/*
12900		 * XXX KDM we've got problems with FC, because it
12901		 * doesn't send down a tag type with aborts.  So we
12902		 * can only really go by the tag number...
12903		 * This may cause problems with parallel SCSI.
12904		 * Need to figure that out!!
12905		 */
12906		if (xio->scsiio.tag_num == io->taskio.tag_num) {
12907#endif
12908			xioh->flags |= CTL_FLAG_ABORT;
12909			if ((io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC) == 0 &&
12910			    !(lun->flags & CTL_LUN_PRIMARY_SC)) {
12911				union ctl_ha_msg msg_info;
12912
12913				msg_info.hdr.nexus = io->io_hdr.nexus;
12914				msg_info.task.task_action = CTL_TASK_ABORT_TASK;
12915				msg_info.task.tag_num = io->taskio.tag_num;
12916				msg_info.task.tag_type = io->taskio.tag_type;
12917				msg_info.hdr.msg_type = CTL_MSG_MANAGE_TASKS;
12918				msg_info.hdr.original_sc = NULL;
12919				msg_info.hdr.serializing_sc = NULL;
12920				ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info,
12921				    sizeof(msg_info.task), M_NOWAIT);
12922			}
12923			ctl_try_unblock_io(lun, xio, FALSE);
12924		}
12925	}
12926	mtx_unlock(&lun->lun_lock);
12927	io->taskio.task_status = CTL_TASK_FUNCTION_COMPLETE;
12928	return (0);
12929}
12930
12931static int
12932ctl_query_task(union ctl_io *io, int task_set)
12933{
12934	struct ctl_softc *softc = CTL_SOFTC(io);
12935	struct ctl_io_hdr *xioh;
12936	struct ctl_lun *lun;
12937	int found = 0;
12938	uint32_t targ_lun;
12939
12940	targ_lun = io->io_hdr.nexus.targ_mapped_lun;
12941	mtx_lock(&softc->ctl_lock);
12942	if (targ_lun >= ctl_max_luns ||
12943	    (lun = softc->ctl_luns[targ_lun]) == NULL) {
12944		mtx_unlock(&softc->ctl_lock);
12945		io->taskio.task_status = CTL_TASK_LUN_DOES_NOT_EXIST;
12946		return (1);
12947	}
12948	mtx_lock(&lun->lun_lock);
12949	mtx_unlock(&softc->ctl_lock);
12950	LIST_FOREACH(xioh, &lun->ooa_queue, ooa_links) {
12951		union ctl_io *xio = (union ctl_io *)xioh;
12952
12953		CTL_IO_ASSERT(xio, SCSI);
12954		if ((xioh->nexus.targ_port != io->io_hdr.nexus.targ_port)
12955		 || (xioh->nexus.initid != io->io_hdr.nexus.initid)
12956		 || (xioh->flags & CTL_FLAG_ABORT))
12957			continue;
12958
12959		if (task_set || xio->scsiio.tag_num == io->taskio.tag_num) {
12960			found = 1;
12961			break;
12962		}
12963	}
12964	mtx_unlock(&lun->lun_lock);
12965	if (found)
12966		io->taskio.task_status = CTL_TASK_FUNCTION_SUCCEEDED;
12967	else
12968		io->taskio.task_status = CTL_TASK_FUNCTION_COMPLETE;
12969	return (0);
12970}
12971
12972static int
12973ctl_query_async_event(union ctl_io *io)
12974{
12975	struct ctl_softc *softc = CTL_SOFTC(io);
12976	struct ctl_lun *lun;
12977	ctl_ua_type ua;
12978	uint32_t targ_lun, initidx;
12979
12980	targ_lun = io->io_hdr.nexus.targ_mapped_lun;
12981	mtx_lock(&softc->ctl_lock);
12982	if (targ_lun >= ctl_max_luns ||
12983	    (lun = softc->ctl_luns[targ_lun]) == NULL) {
12984		mtx_unlock(&softc->ctl_lock);
12985		io->taskio.task_status = CTL_TASK_LUN_DOES_NOT_EXIST;
12986		return (1);
12987	}
12988	mtx_lock(&lun->lun_lock);
12989	mtx_unlock(&softc->ctl_lock);
12990	initidx = ctl_get_initindex(&io->io_hdr.nexus);
12991	ua = ctl_build_qae(lun, initidx, io->taskio.task_resp);
12992	mtx_unlock(&lun->lun_lock);
12993	if (ua != CTL_UA_NONE)
12994		io->taskio.task_status = CTL_TASK_FUNCTION_SUCCEEDED;
12995	else
12996		io->taskio.task_status = CTL_TASK_FUNCTION_COMPLETE;
12997	return (0);
12998}
12999
13000static void
13001ctl_run_task(union ctl_io *io)
13002{
13003	int retval = 1;
13004
13005	CTL_DEBUG_PRINT(("ctl_run_task\n"));
13006	KASSERT(io->io_hdr.io_type == CTL_IO_TASK,
13007	    ("ctl_run_task: Unextected io_type %d\n", io->io_hdr.io_type));
13008	io->taskio.task_status = CTL_TASK_FUNCTION_NOT_SUPPORTED;
13009	bzero(io->taskio.task_resp, sizeof(io->taskio.task_resp));
13010	switch (io->taskio.task_action) {
13011	case CTL_TASK_ABORT_TASK:
13012		retval = ctl_abort_task(io);
13013		break;
13014	case CTL_TASK_ABORT_TASK_SET:
13015	case CTL_TASK_CLEAR_TASK_SET:
13016		retval = ctl_abort_task_set(io);
13017		break;
13018	case CTL_TASK_CLEAR_ACA:
13019		break;
13020	case CTL_TASK_I_T_NEXUS_RESET:
13021		retval = ctl_i_t_nexus_reset(io);
13022		break;
13023	case CTL_TASK_LUN_RESET:
13024		retval = ctl_lun_reset(io);
13025		break;
13026	case CTL_TASK_TARGET_RESET:
13027	case CTL_TASK_BUS_RESET:
13028		retval = ctl_target_reset(io);
13029		break;
13030	case CTL_TASK_PORT_LOGIN:
13031		break;
13032	case CTL_TASK_PORT_LOGOUT:
13033		break;
13034	case CTL_TASK_QUERY_TASK:
13035		retval = ctl_query_task(io, 0);
13036		break;
13037	case CTL_TASK_QUERY_TASK_SET:
13038		retval = ctl_query_task(io, 1);
13039		break;
13040	case CTL_TASK_QUERY_ASYNC_EVENT:
13041		retval = ctl_query_async_event(io);
13042		break;
13043	default:
13044		printf("%s: got unknown task management event %d\n",
13045		       __func__, io->taskio.task_action);
13046		break;
13047	}
13048	if (retval == 0)
13049		io->io_hdr.status = CTL_SUCCESS;
13050	else
13051		io->io_hdr.status = CTL_ERROR;
13052	ctl_done(io);
13053}
13054
13055/*
13056 * For HA operation.  Handle commands that come in from the other
13057 * controller.
13058 */
13059static void
13060ctl_handle_isc(union ctl_io *io)
13061{
13062	struct ctl_softc *softc = CTL_SOFTC(io);
13063	struct ctl_lun *lun;
13064	const struct ctl_cmd_entry *entry;
13065	uint32_t targ_lun;
13066
13067	CTL_IO_ASSERT(io, SCSI);
13068
13069	targ_lun = io->io_hdr.nexus.targ_mapped_lun;
13070	switch (io->io_hdr.msg_type) {
13071	case CTL_MSG_SERIALIZE:
13072		ctl_serialize_other_sc_cmd(&io->scsiio);
13073		break;
13074	case CTL_MSG_R2R:		/* Only used in SER_ONLY mode. */
13075		entry = ctl_get_cmd_entry(&io->scsiio, NULL);
13076		if (targ_lun >= ctl_max_luns ||
13077		    (lun = softc->ctl_luns[targ_lun]) == NULL) {
13078			ctl_done(io);
13079			break;
13080		}
13081		mtx_lock(&lun->lun_lock);
13082		if (ctl_scsiio_lun_check(lun, entry, &io->scsiio) != 0) {
13083			mtx_unlock(&lun->lun_lock);
13084			ctl_done(io);
13085			break;
13086		}
13087		io->io_hdr.flags |= CTL_FLAG_IS_WAS_ON_RTR;
13088		mtx_unlock(&lun->lun_lock);
13089		ctl_enqueue_rtr(io);
13090		break;
13091	case CTL_MSG_FINISH_IO:
13092		if (softc->ha_mode == CTL_HA_MODE_XFER) {
13093			ctl_done(io);
13094			break;
13095		}
13096		if (targ_lun >= ctl_max_luns ||
13097		    (lun = softc->ctl_luns[targ_lun]) == NULL) {
13098			ctl_free_io(io);
13099			break;
13100		}
13101		mtx_lock(&lun->lun_lock);
13102		ctl_try_unblock_others(lun, io, TRUE);
13103		LIST_REMOVE(&io->io_hdr, ooa_links);
13104		mtx_unlock(&lun->lun_lock);
13105		ctl_free_io(io);
13106		break;
13107	case CTL_MSG_PERS_ACTION:
13108		ctl_hndl_per_res_out_on_other_sc(io);
13109		ctl_free_io(io);
13110		break;
13111	case CTL_MSG_BAD_JUJU:
13112		ctl_done(io);
13113		break;
13114	case CTL_MSG_DATAMOVE:		/* Only used in XFER mode */
13115		ctl_datamove_remote(io);
13116		break;
13117	case CTL_MSG_DATAMOVE_DONE:	/* Only used in XFER mode */
13118		ctl_datamove_done(io, false);
13119		break;
13120	case CTL_MSG_FAILOVER:
13121		ctl_failover_lun(io);
13122		ctl_free_io(io);
13123		break;
13124	default:
13125		printf("%s: Invalid message type %d\n",
13126		       __func__, io->io_hdr.msg_type);
13127		ctl_free_io(io);
13128		break;
13129	}
13130
13131}
13132
13133/*
13134 * Returns the match type in the case of a match, or CTL_LUN_PAT_NONE if
13135 * there is no match.
13136 */
13137static ctl_lun_error_pattern
13138ctl_cmd_pattern_match(struct ctl_scsiio *ctsio, struct ctl_error_desc *desc)
13139{
13140	const struct ctl_cmd_entry *entry;
13141	ctl_lun_error_pattern filtered_pattern, pattern;
13142
13143	pattern = desc->error_pattern;
13144
13145	/*
13146	 * XXX KDM we need more data passed into this function to match a
13147	 * custom pattern, and we actually need to implement custom pattern
13148	 * matching.
13149	 */
13150	if (pattern & CTL_LUN_PAT_CMD)
13151		return (CTL_LUN_PAT_CMD);
13152
13153	if ((pattern & CTL_LUN_PAT_MASK) == CTL_LUN_PAT_ANY)
13154		return (CTL_LUN_PAT_ANY);
13155
13156	entry = ctl_get_cmd_entry(ctsio, NULL);
13157
13158	filtered_pattern = entry->pattern & pattern;
13159
13160	/*
13161	 * If the user requested specific flags in the pattern (e.g.
13162	 * CTL_LUN_PAT_RANGE), make sure the command supports all of those
13163	 * flags.
13164	 *
13165	 * If the user did not specify any flags, it doesn't matter whether
13166	 * or not the command supports the flags.
13167	 */
13168	if ((filtered_pattern & ~CTL_LUN_PAT_MASK) !=
13169	     (pattern & ~CTL_LUN_PAT_MASK))
13170		return (CTL_LUN_PAT_NONE);
13171
13172	/*
13173	 * If the user asked for a range check, see if the requested LBA
13174	 * range overlaps with this command's LBA range.
13175	 */
13176	if (filtered_pattern & CTL_LUN_PAT_RANGE) {
13177		uint64_t lba1;
13178		uint64_t len1;
13179		ctl_action action;
13180		int retval;
13181
13182		retval = ctl_get_lba_len((union ctl_io *)ctsio, &lba1, &len1);
13183		if (retval != 0)
13184			return (CTL_LUN_PAT_NONE);
13185
13186		action = ctl_extent_check_lba(lba1, len1, desc->lba_range.lba,
13187					      desc->lba_range.len, FALSE);
13188		/*
13189		 * A "pass" means that the LBA ranges don't overlap, so
13190		 * this doesn't match the user's range criteria.
13191		 */
13192		if (action == CTL_ACTION_PASS)
13193			return (CTL_LUN_PAT_NONE);
13194	}
13195
13196	return (filtered_pattern);
13197}
13198
13199static void
13200ctl_inject_error(struct ctl_lun *lun, union ctl_io *io)
13201{
13202	struct ctl_error_desc *desc, *desc2;
13203
13204	CTL_IO_ASSERT(io, SCSI);
13205
13206	mtx_assert(&lun->lun_lock, MA_OWNED);
13207
13208	STAILQ_FOREACH_SAFE(desc, &lun->error_list, links, desc2) {
13209		ctl_lun_error_pattern pattern;
13210		/*
13211		 * Check to see whether this particular command matches
13212		 * the pattern in the descriptor.
13213		 */
13214		pattern = ctl_cmd_pattern_match(&io->scsiio, desc);
13215		if ((pattern & CTL_LUN_PAT_MASK) == CTL_LUN_PAT_NONE)
13216			continue;
13217
13218		switch (desc->lun_error & CTL_LUN_INJ_TYPE) {
13219		case CTL_LUN_INJ_ABORTED:
13220			ctl_set_aborted(&io->scsiio);
13221			break;
13222		case CTL_LUN_INJ_MEDIUM_ERR:
13223			ctl_set_medium_error(&io->scsiio,
13224			    (io->io_hdr.flags & CTL_FLAG_DATA_MASK) !=
13225			     CTL_FLAG_DATA_OUT);
13226			break;
13227		case CTL_LUN_INJ_UA:
13228			/* 29h/00h  POWER ON, RESET, OR BUS DEVICE RESET
13229			 * OCCURRED */
13230			ctl_set_ua(&io->scsiio, 0x29, 0x00);
13231			break;
13232		case CTL_LUN_INJ_CUSTOM:
13233			/*
13234			 * We're assuming the user knows what he is doing.
13235			 * Just copy the sense information without doing
13236			 * checks.
13237			 */
13238			bcopy(&desc->custom_sense, &io->scsiio.sense_data,
13239			      MIN(sizeof(desc->custom_sense),
13240				  sizeof(io->scsiio.sense_data)));
13241			io->scsiio.scsi_status = SCSI_STATUS_CHECK_COND;
13242			io->scsiio.sense_len = SSD_FULL_SIZE;
13243			io->io_hdr.status = CTL_SCSI_ERROR | CTL_AUTOSENSE;
13244			break;
13245		case CTL_LUN_INJ_NONE:
13246		default:
13247			/*
13248			 * If this is an error injection type we don't know
13249			 * about, clear the continuous flag (if it is set)
13250			 * so it will get deleted below.
13251			 */
13252			desc->lun_error &= ~CTL_LUN_INJ_CONTINUOUS;
13253			break;
13254		}
13255		/*
13256		 * By default, each error injection action is a one-shot
13257		 */
13258		if (desc->lun_error & CTL_LUN_INJ_CONTINUOUS)
13259			continue;
13260
13261		STAILQ_REMOVE(&lun->error_list, desc, ctl_error_desc, links);
13262
13263		free(desc, M_CTL);
13264	}
13265}
13266
13267#ifdef CTL_IO_DELAY
13268static void
13269ctl_datamove_timer_wakeup(void *arg)
13270{
13271	union ctl_io *io;
13272
13273	io = (union ctl_io *)arg;
13274
13275	ctl_datamove(io);
13276}
13277#endif /* CTL_IO_DELAY */
13278
13279static void
13280ctl_datamove_done_process(union ctl_io *io)
13281{
13282#ifdef CTL_TIME_IO
13283	struct bintime cur_bt;
13284
13285	getbinuptime(&cur_bt);
13286	bintime_sub(&cur_bt, &io->io_hdr.dma_start_bt);
13287	bintime_add(&io->io_hdr.dma_bt, &cur_bt);
13288#endif
13289	io->io_hdr.num_dmas++;
13290
13291	if ((io->io_hdr.port_status != 0) &&
13292	    ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE ||
13293	     (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) {
13294		switch (io->io_hdr.io_type) {
13295		case CTL_IO_SCSI:
13296			ctl_set_internal_failure(&io->scsiio, /*sks_valid*/ 1,
13297			    /*retry_count*/ io->io_hdr.port_status);
13298			break;
13299		case CTL_IO_NVME:
13300		case CTL_IO_NVME_ADMIN:
13301			if (io->io_hdr.flags & CTL_FLAG_ABORT)
13302				ctl_nvme_set_command_aborted(&io->nvmeio);
13303			else
13304				ctl_nvme_set_data_transfer_error(&io->nvmeio);
13305			break;
13306		default:
13307			__assert_unreachable();
13308		}
13309	} else if (ctl_kern_data_resid(io) != 0 &&
13310	    (io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_OUT &&
13311	    ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE ||
13312	     (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) {
13313		switch (io->io_hdr.io_type) {
13314		case CTL_IO_SCSI:
13315			ctl_set_invalid_field_ciu(&io->scsiio);
13316			break;
13317		case CTL_IO_NVME:
13318		case CTL_IO_NVME_ADMIN:
13319			ctl_nvme_set_data_transfer_error(&io->nvmeio);
13320			break;
13321		default:
13322			__assert_unreachable();
13323		}
13324	} else if (ctl_debug & CTL_DEBUG_CDB_DATA)
13325		ctl_data_print(io);
13326}
13327
13328void
13329ctl_datamove_done(union ctl_io *io, bool samethr)
13330{
13331
13332	ctl_datamove_done_process(io);
13333	ctl_be_move_done(io, samethr);
13334}
13335
13336void
13337ctl_datamove(union ctl_io *io)
13338{
13339	void (*fe_datamove)(union ctl_io *io);
13340
13341	mtx_assert(&((struct ctl_softc *)CTL_SOFTC(io))->ctl_lock, MA_NOTOWNED);
13342
13343	CTL_DEBUG_PRINT(("ctl_datamove\n"));
13344
13345	/* No data transferred yet.  Frontend must update this when done. */
13346	ctl_set_kern_data_resid(io, ctl_kern_data_len(io));
13347
13348#ifdef CTL_TIME_IO
13349	getbinuptime(&io->io_hdr.dma_start_bt);
13350#endif /* CTL_TIME_IO */
13351
13352#ifdef CTL_IO_DELAY
13353	if (io->io_hdr.flags & CTL_FLAG_DELAY_DONE) {
13354		io->io_hdr.flags &= ~CTL_FLAG_DELAY_DONE;
13355	} else {
13356		struct ctl_lun *lun;
13357
13358		lun = CTL_LUN(io);
13359		if ((lun != NULL)
13360		 && (lun->delay_info.datamove_delay > 0)) {
13361			callout_init(&io->io_hdr.delay_callout, /*mpsafe*/ 1);
13362			io->io_hdr.flags |= CTL_FLAG_DELAY_DONE;
13363			callout_reset(&io->io_hdr.delay_callout,
13364				      lun->delay_info.datamove_delay * hz,
13365				      ctl_datamove_timer_wakeup, io);
13366			if (lun->delay_info.datamove_type ==
13367			    CTL_DELAY_TYPE_ONESHOT)
13368				lun->delay_info.datamove_delay = 0;
13369			return;
13370		}
13371	}
13372#endif
13373
13374	/*
13375	 * This command has been aborted.  Set the port status, so we fail
13376	 * the data move.
13377	 */
13378	if (io->io_hdr.flags & CTL_FLAG_ABORT) {
13379		switch (io->io_hdr.io_type) {
13380		case CTL_IO_SCSI:
13381			printf("ctl_datamove: tag 0x%jx on (%u:%u:%u) aborted\n",
13382			    io->scsiio.tag_num, io->io_hdr.nexus.initid,
13383			    io->io_hdr.nexus.targ_port,
13384			    io->io_hdr.nexus.targ_lun);
13385			break;
13386		case CTL_IO_NVME:
13387		case CTL_IO_NVME_ADMIN:
13388			printf("ctl_datamove: CID 0x%x on (%u:%u:%u) aborted\n",
13389			    le16toh(io->nvmeio.cmd.cid),
13390			    io->io_hdr.nexus.initid, io->io_hdr.nexus.targ_port,
13391			    io->io_hdr.nexus.targ_lun);
13392			break;
13393		default:
13394			__assert_unreachable();
13395		}
13396		io->io_hdr.port_status = 31337;
13397		ctl_datamove_done_process(io);
13398		ctl_be_move_done(io, true);
13399		return;
13400	}
13401
13402	/* Don't confuse frontend with zero length data move. */
13403	if (ctl_kern_data_len(io) == 0) {
13404		ctl_datamove_done_process(io);
13405		ctl_be_move_done(io, true);
13406		return;
13407	}
13408
13409	fe_datamove = CTL_PORT(io)->fe_datamove;
13410	fe_datamove(io);
13411}
13412
13413static void
13414ctl_send_datamove_done(union ctl_io *io, int have_lock)
13415{
13416	union ctl_ha_msg msg;
13417#ifdef CTL_TIME_IO
13418	struct bintime cur_bt;
13419#endif
13420
13421	CTL_IO_ASSERT(io, SCSI);
13422
13423	memset(&msg, 0, sizeof(msg));
13424	msg.hdr.msg_type = CTL_MSG_DATAMOVE_DONE;
13425	msg.hdr.original_sc = io;
13426	msg.hdr.serializing_sc = io->io_hdr.remote_io;
13427	msg.hdr.nexus = io->io_hdr.nexus;
13428	msg.hdr.status = io->io_hdr.status;
13429	msg.scsi.kern_data_resid = io->scsiio.kern_data_resid;
13430	msg.scsi.tag_num = io->scsiio.tag_num;
13431	msg.scsi.tag_type = io->scsiio.tag_type;
13432	msg.scsi.scsi_status = io->scsiio.scsi_status;
13433	memcpy(&msg.scsi.sense_data, &io->scsiio.sense_data,
13434	       io->scsiio.sense_len);
13435	msg.scsi.sense_len = io->scsiio.sense_len;
13436	msg.scsi.port_status = io->io_hdr.port_status;
13437	io->io_hdr.flags &= ~CTL_FLAG_IO_ACTIVE;
13438	if (io->io_hdr.flags & CTL_FLAG_FAILOVER) {
13439		ctl_failover_io(io, /*have_lock*/ have_lock);
13440		return;
13441	}
13442	ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg,
13443	    sizeof(msg.scsi) - sizeof(msg.scsi.sense_data) +
13444	    msg.scsi.sense_len, M_WAITOK);
13445
13446#ifdef CTL_TIME_IO
13447	getbinuptime(&cur_bt);
13448	bintime_sub(&cur_bt, &io->io_hdr.dma_start_bt);
13449	bintime_add(&io->io_hdr.dma_bt, &cur_bt);
13450#endif
13451	io->io_hdr.num_dmas++;
13452}
13453
13454/*
13455 * The DMA to the remote side is done, now we need to tell the other side
13456 * we're done so it can continue with its data movement.
13457 */
13458static void
13459ctl_datamove_remote_write_cb(struct ctl_ha_dt_req *rq)
13460{
13461	union ctl_io *io;
13462	uint32_t i;
13463
13464	io = rq->context;
13465	CTL_IO_ASSERT(io, SCSI);
13466
13467	if (rq->ret != CTL_HA_STATUS_SUCCESS) {
13468		printf("%s: ISC DMA write failed with error %d", __func__,
13469		       rq->ret);
13470		ctl_set_internal_failure(&io->scsiio,
13471					 /*sks_valid*/ 1,
13472					 /*retry_count*/ rq->ret);
13473	}
13474
13475	ctl_dt_req_free(rq);
13476
13477	for (i = 0; i < io->scsiio.kern_sg_entries; i++)
13478		free(CTL_LSGLT(io)[i].addr, M_CTL);
13479	free(CTL_RSGL(io), M_CTL);
13480	CTL_RSGL(io) = NULL;
13481	CTL_LSGL(io) = NULL;
13482
13483	/*
13484	 * The data is in local and remote memory, so now we need to send
13485	 * status (good or back) back to the other side.
13486	 */
13487	ctl_send_datamove_done(io, /*have_lock*/ 0);
13488}
13489
13490/*
13491 * We've moved the data from the host/controller into local memory.  Now we
13492 * need to push it over to the remote controller's memory.
13493 */
13494static int
13495ctl_datamove_remote_dm_write_cb(union ctl_io *io, bool samethr)
13496{
13497	int retval;
13498
13499	retval = ctl_datamove_remote_xfer(io, CTL_HA_DT_CMD_WRITE,
13500					  ctl_datamove_remote_write_cb);
13501	return (retval);
13502}
13503
13504static void
13505ctl_datamove_remote_write(union ctl_io *io)
13506{
13507	int retval;
13508	void (*fe_datamove)(union ctl_io *io);
13509
13510	CTL_IO_ASSERT(io, SCSI);
13511
13512	/*
13513	 * - Get the data from the host/HBA into local memory.
13514	 * - DMA memory from the local controller to the remote controller.
13515	 * - Send status back to the remote controller.
13516	 */
13517
13518	retval = ctl_datamove_remote_sgl_setup(io);
13519	if (retval != 0)
13520		return;
13521
13522	/* Switch the pointer over so the FETD knows what to do */
13523	io->scsiio.kern_data_ptr = (uint8_t *)CTL_LSGL(io);
13524
13525	/*
13526	 * Use a custom move done callback, since we need to send completion
13527	 * back to the other controller, not to the backend on this side.
13528	 */
13529	io->scsiio.be_move_done = ctl_datamove_remote_dm_write_cb;
13530
13531	fe_datamove = CTL_PORT(io)->fe_datamove;
13532	fe_datamove(io);
13533}
13534
13535static int
13536ctl_datamove_remote_dm_read_cb(union ctl_io *io, bool samethr)
13537{
13538	uint32_t i;
13539
13540	CTL_IO_ASSERT(io, SCSI);
13541
13542	for (i = 0; i < io->scsiio.kern_sg_entries; i++)
13543		free(CTL_LSGLT(io)[i].addr, M_CTL);
13544	free(CTL_RSGL(io), M_CTL);
13545	CTL_RSGL(io) = NULL;
13546	CTL_LSGL(io) = NULL;
13547
13548	/*
13549	 * The read is done, now we need to send status (good or bad) back
13550	 * to the other side.
13551	 */
13552	ctl_send_datamove_done(io, /*have_lock*/ 0);
13553
13554	return (0);
13555}
13556
13557static void
13558ctl_datamove_remote_read_cb(struct ctl_ha_dt_req *rq)
13559{
13560	union ctl_io *io;
13561	void (*fe_datamove)(union ctl_io *io);
13562
13563	io = rq->context;
13564	CTL_IO_ASSERT(io, SCSI);
13565
13566	if (rq->ret != CTL_HA_STATUS_SUCCESS) {
13567		printf("%s: ISC DMA read failed with error %d\n", __func__,
13568		       rq->ret);
13569		ctl_set_internal_failure(&io->scsiio,
13570					 /*sks_valid*/ 1,
13571					 /*retry_count*/ rq->ret);
13572	}
13573
13574	ctl_dt_req_free(rq);
13575
13576	/* Switch the pointer over so the FETD knows what to do */
13577	io->scsiio.kern_data_ptr = (uint8_t *)CTL_LSGL(io);
13578
13579	/*
13580	 * Use a custom move done callback, since we need to send completion
13581	 * back to the other controller, not to the backend on this side.
13582	 */
13583	io->scsiio.be_move_done = ctl_datamove_remote_dm_read_cb;
13584
13585	/* XXX KDM add checks like the ones in ctl_datamove? */
13586
13587	fe_datamove = CTL_PORT(io)->fe_datamove;
13588	fe_datamove(io);
13589}
13590
13591static int
13592ctl_datamove_remote_sgl_setup(union ctl_io *io)
13593{
13594	struct ctl_sg_entry *local_sglist;
13595	uint32_t len_to_go;
13596	int retval;
13597	int i;
13598
13599	CTL_IO_ASSERT(io, SCSI);
13600
13601	retval = 0;
13602	local_sglist = CTL_LSGL(io);
13603	len_to_go = io->scsiio.kern_data_len;
13604
13605	/*
13606	 * The difficult thing here is that the size of the various
13607	 * S/G segments may be different than the size from the
13608	 * remote controller.  That'll make it harder when DMAing
13609	 * the data back to the other side.
13610	 */
13611	for (i = 0; len_to_go > 0; i++) {
13612		local_sglist[i].len = MIN(len_to_go, CTL_HA_DATAMOVE_SEGMENT);
13613		local_sglist[i].addr =
13614		    malloc(local_sglist[i].len, M_CTL, M_WAITOK);
13615
13616		len_to_go -= local_sglist[i].len;
13617	}
13618	/*
13619	 * Reset the number of S/G entries accordingly.  The original
13620	 * number of S/G entries is available in rem_sg_entries.
13621	 */
13622	io->scsiio.kern_sg_entries = i;
13623
13624	return (retval);
13625}
13626
13627static int
13628ctl_datamove_remote_xfer(union ctl_io *io, unsigned command,
13629			 ctl_ha_dt_cb callback)
13630{
13631	struct ctl_ha_dt_req *rq;
13632	struct ctl_sg_entry *remote_sglist, *local_sglist;
13633	uint32_t local_used, remote_used, total_used;
13634	int i, j, isc_ret;
13635
13636	rq = ctl_dt_req_alloc();
13637
13638	CTL_IO_ASSERT(io, SCSI);
13639
13640	/*
13641	 * If we failed to allocate the request, and if the DMA didn't fail
13642	 * anyway, set busy status.  This is just a resource allocation
13643	 * failure.
13644	 */
13645	if ((rq == NULL)
13646	 && ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
13647	     (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS))
13648		ctl_set_busy(&io->scsiio);
13649
13650	if ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
13651	    (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS) {
13652		if (rq != NULL)
13653			ctl_dt_req_free(rq);
13654
13655		/*
13656		 * The data move failed.  We need to return status back
13657		 * to the other controller.  No point in trying to DMA
13658		 * data to the remote controller.
13659		 */
13660
13661		ctl_send_datamove_done(io, /*have_lock*/ 0);
13662
13663		return (1);
13664	}
13665
13666	local_sglist = CTL_LSGL(io);
13667	remote_sglist = CTL_RSGL(io);
13668	local_used = 0;
13669	remote_used = 0;
13670	total_used = 0;
13671
13672	/*
13673	 * Pull/push the data over the wire from/to the other controller.
13674	 * This takes into account the possibility that the local and
13675	 * remote sglists may not be identical in terms of the size of
13676	 * the elements and the number of elements.
13677	 *
13678	 * One fundamental assumption here is that the length allocated for
13679	 * both the local and remote sglists is identical.  Otherwise, we've
13680	 * essentially got a coding error of some sort.
13681	 */
13682	isc_ret = CTL_HA_STATUS_SUCCESS;
13683	for (i = 0, j = 0; total_used < io->scsiio.kern_data_len; ) {
13684		uint32_t cur_len;
13685		uint8_t *tmp_ptr;
13686
13687		rq->command = command;
13688		rq->context = io;
13689
13690		/*
13691		 * Both pointers should be aligned.  But it is possible
13692		 * that the allocation length is not.  They should both
13693		 * also have enough slack left over at the end, though,
13694		 * to round up to the next 8 byte boundary.
13695		 */
13696		cur_len = MIN(local_sglist[i].len - local_used,
13697			      remote_sglist[j].len - remote_used);
13698		rq->size = cur_len;
13699
13700		tmp_ptr = (uint8_t *)local_sglist[i].addr;
13701		tmp_ptr += local_used;
13702
13703#if 0
13704		/* Use physical addresses when talking to ISC hardware */
13705		if ((io->io_hdr.flags & CTL_FLAG_BUS_ADDR) == 0) {
13706			/* XXX KDM use busdma */
13707			rq->local = vtophys(tmp_ptr);
13708		} else
13709			rq->local = tmp_ptr;
13710#else
13711		KASSERT((io->io_hdr.flags & CTL_FLAG_BUS_ADDR) == 0,
13712		    ("HA does not support BUS_ADDR"));
13713		rq->local = tmp_ptr;
13714#endif
13715
13716		tmp_ptr = (uint8_t *)remote_sglist[j].addr;
13717		tmp_ptr += remote_used;
13718		rq->remote = tmp_ptr;
13719
13720		rq->callback = NULL;
13721
13722		local_used += cur_len;
13723		if (local_used >= local_sglist[i].len) {
13724			i++;
13725			local_used = 0;
13726		}
13727
13728		remote_used += cur_len;
13729		if (remote_used >= remote_sglist[j].len) {
13730			j++;
13731			remote_used = 0;
13732		}
13733		total_used += cur_len;
13734
13735		if (total_used >= io->scsiio.kern_data_len)
13736			rq->callback = callback;
13737
13738		isc_ret = ctl_dt_single(rq);
13739		if (isc_ret > CTL_HA_STATUS_SUCCESS)
13740			break;
13741	}
13742	if (isc_ret != CTL_HA_STATUS_WAIT) {
13743		rq->ret = isc_ret;
13744		callback(rq);
13745	}
13746
13747	return (0);
13748}
13749
13750static void
13751ctl_datamove_remote_read(union ctl_io *io)
13752{
13753	int retval;
13754	uint32_t i;
13755
13756	/*
13757	 * This will send an error to the other controller in the case of a
13758	 * failure.
13759	 */
13760	retval = ctl_datamove_remote_sgl_setup(io);
13761	if (retval != 0)
13762		return;
13763
13764	retval = ctl_datamove_remote_xfer(io, CTL_HA_DT_CMD_READ,
13765					  ctl_datamove_remote_read_cb);
13766	if (retval != 0) {
13767		/*
13768		 * Make sure we free memory if there was an error..  The
13769		 * ctl_datamove_remote_xfer() function will send the
13770		 * datamove done message, or call the callback with an
13771		 * error if there is a problem.
13772		 */
13773		for (i = 0; i < ctl_kern_sg_entries(io); i++)
13774			free(CTL_LSGLT(io)[i].addr, M_CTL);
13775		free(CTL_RSGL(io), M_CTL);
13776		CTL_RSGL(io) = NULL;
13777		CTL_LSGL(io) = NULL;
13778	}
13779}
13780
13781/*
13782 * Process a datamove request from the other controller.  This is used for
13783 * XFER mode only, not SER_ONLY mode.  For writes, we DMA into local memory
13784 * first.  Once that is complete, the data gets DMAed into the remote
13785 * controller's memory.  For reads, we DMA from the remote controller's
13786 * memory into our memory first, and then move it out to the FETD.
13787 */
13788static void
13789ctl_datamove_remote(union ctl_io *io)
13790{
13791	CTL_IO_ASSERT(io, SCSI);
13792
13793	mtx_assert(&((struct ctl_softc *)CTL_SOFTC(io))->ctl_lock, MA_NOTOWNED);
13794
13795	if (io->io_hdr.flags & CTL_FLAG_FAILOVER) {
13796		ctl_failover_io(io, /*have_lock*/ 0);
13797		return;
13798	}
13799
13800	/*
13801	 * Note that we look for an aborted I/O here, but don't do some of
13802	 * the other checks that ctl_datamove() normally does.
13803	 * We don't need to run the datamove delay code, since that should
13804	 * have been done if need be on the other controller.
13805	 */
13806	if (io->io_hdr.flags & CTL_FLAG_ABORT) {
13807		printf("%s: tag 0x%jx on (%u:%u:%u) aborted\n", __func__,
13808		       io->scsiio.tag_num, io->io_hdr.nexus.initid,
13809		       io->io_hdr.nexus.targ_port,
13810		       io->io_hdr.nexus.targ_lun);
13811		io->io_hdr.port_status = 31338;
13812		ctl_send_datamove_done(io, /*have_lock*/ 0);
13813		return;
13814	}
13815
13816	if ((io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_OUT)
13817		ctl_datamove_remote_write(io);
13818	else if ((io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_IN)
13819		ctl_datamove_remote_read(io);
13820	else {
13821		io->io_hdr.port_status = 31339;
13822		ctl_send_datamove_done(io, /*have_lock*/ 0);
13823	}
13824}
13825
13826static void
13827ctl_process_done(union ctl_io *io)
13828{
13829	struct ctl_softc *softc = CTL_SOFTC(io);
13830	struct ctl_port *port = CTL_PORT(io);
13831	struct ctl_lun *lun = CTL_LUN(io);
13832	void (*fe_done)(union ctl_io *io);
13833	union ctl_ha_msg msg;
13834
13835	CTL_DEBUG_PRINT(("ctl_process_done\n"));
13836	fe_done = port->fe_done;
13837
13838#ifdef CTL_TIME_IO
13839	if ((time_uptime - io->io_hdr.start_time) > ctl_time_io_secs) {
13840		char str[256];
13841		char path_str[64];
13842		struct sbuf sb;
13843
13844		ctl_scsi_path_string(&io->io_hdr, path_str, sizeof(path_str));
13845		sbuf_new(&sb, str, sizeof(str), SBUF_FIXEDLEN);
13846
13847		ctl_io_sbuf(io, &sb);
13848		sbuf_cat(&sb, path_str);
13849		sbuf_printf(&sb, "ctl_process_done: %jd seconds\n",
13850			    (intmax_t)time_uptime - io->io_hdr.start_time);
13851		sbuf_finish(&sb);
13852		printf("%s", sbuf_data(&sb));
13853	}
13854#endif /* CTL_TIME_IO */
13855
13856	switch (io->io_hdr.io_type) {
13857	case CTL_IO_SCSI:
13858	case CTL_IO_NVME:
13859	case CTL_IO_NVME_ADMIN:
13860		break;
13861	case CTL_IO_TASK:
13862		if (ctl_debug & CTL_DEBUG_INFO)
13863			ctl_io_error_print(io, NULL);
13864		fe_done(io);
13865		return;
13866	default:
13867		panic("%s: Invalid CTL I/O type %d\n",
13868		    __func__, io->io_hdr.io_type);
13869	}
13870
13871	if (lun == NULL) {
13872		CTL_DEBUG_PRINT(("NULL LUN for lun %d\n",
13873				 io->io_hdr.nexus.targ_mapped_lun));
13874		goto bailout;
13875	}
13876
13877	mtx_lock(&lun->lun_lock);
13878
13879	/*
13880	 * Check to see if we have any informational exception and status
13881	 * of this command can be modified to report it in form of either
13882	 * RECOVERED ERROR or NO SENSE, depending on MRIE mode page field.
13883	 */
13884	if (lun->ie_reported == 0 && lun->ie_asc != 0 &&
13885	    io->io_hdr.status == CTL_SUCCESS &&
13886	    (io->io_hdr.flags & CTL_FLAG_STATUS_SENT) == 0) {
13887		uint8_t mrie = lun->MODE_IE.mrie;
13888		uint8_t per = ((lun->MODE_RWER.byte3 & SMS_RWER_PER) ||
13889		    (lun->MODE_VER.byte3 & SMS_VER_PER));
13890
13891		CTL_IO_ASSERT(io, SCSI);
13892		if (((mrie == SIEP_MRIE_REC_COND && per) ||
13893		     mrie == SIEP_MRIE_REC_UNCOND ||
13894		     mrie == SIEP_MRIE_NO_SENSE) &&
13895		    (ctl_get_cmd_entry(&io->scsiio, NULL)->flags &
13896		     CTL_CMD_FLAG_NO_SENSE) == 0) {
13897			ctl_set_sense(&io->scsiio,
13898			      /*current_error*/ 1,
13899			      /*sense_key*/ (mrie == SIEP_MRIE_NO_SENSE) ?
13900			        SSD_KEY_NO_SENSE : SSD_KEY_RECOVERED_ERROR,
13901			      /*asc*/ lun->ie_asc,
13902			      /*ascq*/ lun->ie_ascq,
13903			      SSD_ELEM_NONE);
13904			lun->ie_reported = 1;
13905		}
13906	} else if (lun->ie_reported < 0)
13907		lun->ie_reported = 0;
13908
13909	/*
13910	 * Check to see if we have any errors to inject here.  We only
13911	 * inject errors for commands that don't already have errors set.
13912	 */
13913	if (!STAILQ_EMPTY(&lun->error_list) &&
13914	    ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS) &&
13915	    ((io->io_hdr.flags & CTL_FLAG_STATUS_SENT) == 0))
13916		ctl_inject_error(lun, io);
13917
13918	/*
13919	 * XXX KDM how do we treat commands that aren't completed
13920	 * successfully?
13921	 *
13922	 * XXX KDM should we also track I/O latency?
13923	 */
13924	if ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS &&
13925	    (io->io_hdr.io_type == CTL_IO_SCSI ||
13926	    io->io_hdr.io_type == CTL_IO_NVME ||
13927	    io->io_hdr.io_type == CTL_IO_NVME_ADMIN)) {
13928		int type;
13929#ifdef CTL_TIME_IO
13930		struct bintime bt;
13931
13932		getbinuptime(&bt);
13933		bintime_sub(&bt, &io->io_hdr.start_bt);
13934#endif
13935		if ((io->io_hdr.flags & CTL_FLAG_DATA_MASK) ==
13936		    CTL_FLAG_DATA_IN)
13937			type = CTL_STATS_READ;
13938		else if ((io->io_hdr.flags & CTL_FLAG_DATA_MASK) ==
13939		    CTL_FLAG_DATA_OUT)
13940			type = CTL_STATS_WRITE;
13941		else
13942			type = CTL_STATS_NO_IO;
13943
13944		lun->stats.bytes[type] += ctl_kern_total_len(io);
13945		lun->stats.operations[type] ++;
13946		lun->stats.dmas[type] += io->io_hdr.num_dmas;
13947#ifdef CTL_TIME_IO
13948		bintime_add(&lun->stats.dma_time[type], &io->io_hdr.dma_bt);
13949		bintime_add(&lun->stats.time[type], &bt);
13950#endif
13951
13952		mtx_lock(&port->port_lock);
13953		port->stats.bytes[type] += ctl_kern_total_len(io);
13954		port->stats.operations[type] ++;
13955		port->stats.dmas[type] += io->io_hdr.num_dmas;
13956#ifdef CTL_TIME_IO
13957		bintime_add(&port->stats.dma_time[type], &io->io_hdr.dma_bt);
13958		bintime_add(&port->stats.time[type], &bt);
13959#endif
13960		mtx_unlock(&port->port_lock);
13961	}
13962
13963	/*
13964	 * Run through the blocked queue of this I/O and see if anything
13965	 * can be unblocked, now that this I/O is done and will be removed.
13966	 * We need to do it before removal to have OOA position to start.
13967	 */
13968	ctl_try_unblock_others(lun, io, TRUE);
13969
13970	/*
13971	 * Remove this from the OOA queue.
13972	 */
13973	LIST_REMOVE(&io->io_hdr, ooa_links);
13974#ifdef CTL_TIME_IO
13975	if (LIST_EMPTY(&lun->ooa_queue))
13976		lun->last_busy = getsbinuptime();
13977#endif
13978
13979	/*
13980	 * If the LUN has been invalidated, free it if there is nothing
13981	 * left on its OOA queue.
13982	 */
13983	if ((lun->flags & CTL_LUN_INVALID)
13984	 && LIST_EMPTY(&lun->ooa_queue)) {
13985		mtx_unlock(&lun->lun_lock);
13986		ctl_free_lun(lun);
13987	} else
13988		mtx_unlock(&lun->lun_lock);
13989
13990bailout:
13991
13992	/*
13993	 * If this command has been aborted, make sure we set the status
13994	 * properly.  The FETD is responsible for freeing the I/O and doing
13995	 * whatever it needs to do to clean up its state.
13996	 */
13997	if (io->io_hdr.flags & CTL_FLAG_ABORT) {
13998		switch (io->io_hdr.io_type) {
13999		case CTL_IO_SCSI:
14000			ctl_set_task_aborted(&io->scsiio);
14001			break;
14002		case CTL_IO_NVME:
14003		case CTL_IO_NVME_ADMIN:
14004			ctl_nvme_set_command_aborted(&io->nvmeio);
14005			break;
14006		default:
14007			__assert_unreachable();
14008		}
14009	}
14010
14011	/*
14012	 * If enabled, print command error status.
14013	 */
14014	if ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS &&
14015	    (ctl_debug & CTL_DEBUG_INFO) != 0)
14016		ctl_io_error_print(io, NULL);
14017
14018	/*
14019	 * Tell the FETD or the other shelf controller we're done with this
14020	 * command.  Note that only SCSI commands get to this point.  Task
14021	 * management commands are completed above.
14022	 */
14023	if ((softc->ha_mode != CTL_HA_MODE_XFER) &&
14024	    (io->io_hdr.flags & CTL_FLAG_SENT_2OTHER_SC)) {
14025		memset(&msg, 0, sizeof(msg));
14026		msg.hdr.msg_type = CTL_MSG_FINISH_IO;
14027		msg.hdr.serializing_sc = io->io_hdr.remote_io;
14028		msg.hdr.nexus = io->io_hdr.nexus;
14029		ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg,
14030		    sizeof(msg.scsi) - sizeof(msg.scsi.sense_data),
14031		    M_WAITOK);
14032	}
14033
14034	fe_done(io);
14035}
14036
14037/*
14038 * Front end should call this if it doesn't do autosense.  When the request
14039 * sense comes back in from the initiator, we'll dequeue this and send it.
14040 */
14041int
14042ctl_queue_sense(union ctl_io *io)
14043{
14044	struct ctl_softc *softc = CTL_SOFTC(io);
14045	struct ctl_port *port = CTL_PORT(io);
14046	struct ctl_lun *lun;
14047	struct scsi_sense_data *ps;
14048	uint32_t initidx, p, targ_lun;
14049
14050	CTL_DEBUG_PRINT(("ctl_queue_sense\n"));
14051	CTL_IO_ASSERT(io, SCSI);
14052
14053	targ_lun = ctl_lun_map_from_port(port, io->io_hdr.nexus.targ_lun);
14054
14055	/*
14056	 * LUN lookup will likely move to the ctl_work_thread() once we
14057	 * have our new queueing infrastructure (that doesn't put things on
14058	 * a per-LUN queue initially).  That is so that we can handle
14059	 * things like an INQUIRY to a LUN that we don't have enabled.  We
14060	 * can't deal with that right now.
14061	 * If we don't have a LUN for this, just toss the sense information.
14062	 */
14063	mtx_lock(&softc->ctl_lock);
14064	if (targ_lun >= ctl_max_luns ||
14065	    (lun = softc->ctl_luns[targ_lun]) == NULL) {
14066		mtx_unlock(&softc->ctl_lock);
14067		goto bailout;
14068	}
14069	mtx_lock(&lun->lun_lock);
14070	mtx_unlock(&softc->ctl_lock);
14071
14072	initidx = ctl_get_initindex(&io->io_hdr.nexus);
14073	p = initidx / CTL_MAX_INIT_PER_PORT;
14074	if (lun->pending_sense[p] == NULL) {
14075		lun->pending_sense[p] = malloc(sizeof(*ps) * CTL_MAX_INIT_PER_PORT,
14076		    M_CTL, M_NOWAIT | M_ZERO);
14077	}
14078	if ((ps = lun->pending_sense[p]) != NULL) {
14079		ps += initidx % CTL_MAX_INIT_PER_PORT;
14080		memset(ps, 0, sizeof(*ps));
14081		memcpy(ps, &io->scsiio.sense_data, io->scsiio.sense_len);
14082	}
14083	mtx_unlock(&lun->lun_lock);
14084
14085bailout:
14086	ctl_free_io(io);
14087	return (CTL_RETVAL_COMPLETE);
14088}
14089
14090/*
14091 * Primary command inlet from frontend ports.  All SCSI and task I/O
14092 * requests must go through this function.
14093 */
14094int
14095ctl_queue(union ctl_io *io)
14096{
14097	struct ctl_port *port = CTL_PORT(io);
14098
14099	switch (io->io_hdr.io_type) {
14100	case CTL_IO_SCSI:
14101	case CTL_IO_TASK:
14102		CTL_DEBUG_PRINT(("ctl_queue cdb[0]=%02X\n", io->scsiio.cdb[0]));
14103		break;
14104	case CTL_IO_NVME:
14105		CTL_DEBUG_PRINT(("ctl_queue nvme nvm cmd=%02X\n",
14106		    io->nvmeio.cmd.opc));
14107		break;
14108	case CTL_IO_NVME_ADMIN:
14109		CTL_DEBUG_PRINT(("ctl_queue nvme admin cmd=%02X\n",
14110		    io->nvmeio.cmd.opc));
14111		break;
14112	default:
14113		break;
14114	}
14115
14116#ifdef CTL_TIME_IO
14117	io->io_hdr.start_time = time_uptime;
14118	getbinuptime(&io->io_hdr.start_bt);
14119#endif /* CTL_TIME_IO */
14120
14121	/* Map FE-specific LUN ID into global one. */
14122	io->io_hdr.nexus.targ_mapped_lun =
14123	    ctl_lun_map_from_port(port, io->io_hdr.nexus.targ_lun);
14124
14125	switch (io->io_hdr.io_type) {
14126	case CTL_IO_SCSI:
14127	case CTL_IO_TASK:
14128	case CTL_IO_NVME:
14129	case CTL_IO_NVME_ADMIN:
14130		if (ctl_debug & CTL_DEBUG_CDB)
14131			ctl_io_print(io);
14132		ctl_enqueue_incoming(io);
14133		break;
14134	default:
14135		printf("ctl_queue: unknown I/O type %d\n", io->io_hdr.io_type);
14136		return (EINVAL);
14137	}
14138
14139	return (CTL_RETVAL_COMPLETE);
14140}
14141
14142int
14143ctl_run(union ctl_io *io)
14144{
14145	struct ctl_port *port = CTL_PORT(io);
14146
14147	CTL_DEBUG_PRINT(("ctl_run cdb[0]=%02X\n", io->scsiio.cdb[0]));
14148
14149#ifdef CTL_TIME_IO
14150	io->io_hdr.start_time = time_uptime;
14151	getbinuptime(&io->io_hdr.start_bt);
14152#endif /* CTL_TIME_IO */
14153
14154	/* Map FE-specific LUN ID into global one. */
14155	io->io_hdr.nexus.targ_mapped_lun =
14156	    ctl_lun_map_from_port(port, io->io_hdr.nexus.targ_lun);
14157
14158	switch (io->io_hdr.io_type) {
14159	case CTL_IO_SCSI:
14160		if (ctl_debug & CTL_DEBUG_CDB)
14161			ctl_io_print(io);
14162		ctl_scsiio_precheck(&io->scsiio);
14163		break;
14164	case CTL_IO_TASK:
14165		if (ctl_debug & CTL_DEBUG_CDB)
14166			ctl_io_print(io);
14167		ctl_run_task(io);
14168		break;
14169	case CTL_IO_NVME:
14170	case CTL_IO_NVME_ADMIN:
14171		if (ctl_debug & CTL_DEBUG_CDB)
14172			ctl_io_print(io);
14173		ctl_nvmeio_precheck(&io->nvmeio);
14174		break;
14175	default:
14176		printf("ctl_run: unknown I/O type %d\n", io->io_hdr.io_type);
14177		return (EINVAL);
14178	}
14179
14180	return (CTL_RETVAL_COMPLETE);
14181}
14182
14183#ifdef CTL_IO_DELAY
14184static void
14185ctl_done_timer_wakeup(void *arg)
14186{
14187	union ctl_io *io;
14188
14189	io = (union ctl_io *)arg;
14190	ctl_done(io);
14191}
14192#endif /* CTL_IO_DELAY */
14193
14194void
14195ctl_serseq_done(union ctl_io *io)
14196{
14197	struct ctl_lun *lun = CTL_LUN(io);
14198
14199	/* This is racy, but should not be a problem. */
14200	if (!TAILQ_EMPTY(&io->io_hdr.blocked_queue)) {
14201		mtx_lock(&lun->lun_lock);
14202		io->io_hdr.flags |= CTL_FLAG_SERSEQ_DONE;
14203		ctl_try_unblock_others(lun, io, FALSE);
14204		mtx_unlock(&lun->lun_lock);
14205	} else
14206		io->io_hdr.flags |= CTL_FLAG_SERSEQ_DONE;
14207}
14208
14209void
14210ctl_done(union ctl_io *io)
14211{
14212
14213	/*
14214	 * Enable this to catch duplicate completion issues.
14215	 */
14216#if 0
14217	if (io->io_hdr.flags & CTL_FLAG_ALREADY_DONE) {
14218		switch (io->io_hdr.io_type) {
14219		case CTL_IO_SCSI:
14220		case CTL_IO_TASK:
14221			printf("%s: type %d msg %d cdb %x iptl: "
14222			    "%u:%u:%u tag 0x%04lx "
14223			    "flag %#x status %x\n",
14224			    __func__,
14225			    io->io_hdr.io_type,
14226			    io->io_hdr.msg_type,
14227			    io->scsiio.cdb[0],
14228			    io->io_hdr.nexus.initid,
14229			    io->io_hdr.nexus.targ_port,
14230			    io->io_hdr.nexus.targ_lun,
14231			    (io->io_hdr.io_type == CTL_IO_TASK) ?
14232			    io->taskio.tag_num :
14233			    io->scsiio.tag_num,
14234			    io->io_hdr.flags,
14235			    io->io_hdr.status);
14236			break;
14237		case CTL_IO_NVME:
14238		case CTL_IO_NVME_ADMIN:
14239			printf("%s: type %d msg %d opc %x iptl: "
14240			    "%u:%u:%u cid 0x%04x "
14241			    "flag %#x status %x\n",
14242			    __func__,
14243			    io->io_hdr.io_type,
14244			    io->io_hdr.msg_type,
14245			    io->nvmeio.cmd.opc,
14246			    io->io_hdr.nexus.initid,
14247			    io->io_hdr.nexus.targ_port,
14248			    io->io_hdr.nexus.targ_lun,
14249			    io->nvmeio.cmd.cid,
14250			    io->io_hdr.flags,
14251			    io->io_hdr.status);
14252			break;
14253		default:
14254			printf("%s: type %d msg %d iptl: "
14255			    "%u:%u:%u flag %#x status %x\n",
14256			    __func__,
14257			    io->io_hdr.io_type,
14258			    io->io_hdr.msg_type,
14259			    io->io_hdr.nexus.initid,
14260			    io->io_hdr.nexus.targ_port,
14261			    io->io_hdr.nexus.targ_lun,
14262			    io->io_hdr.flags,
14263			    io->io_hdr.status);
14264			break;
14265		}
14266	} else
14267		io->io_hdr.flags |= CTL_FLAG_ALREADY_DONE;
14268#endif
14269
14270	/*
14271	 * This is an internal copy of an I/O, and should not go through
14272	 * the normal done processing logic.
14273	 */
14274	if (io->io_hdr.flags & CTL_FLAG_INT_COPY)
14275		return;
14276
14277#ifdef CTL_IO_DELAY
14278	if (io->io_hdr.flags & CTL_FLAG_DELAY_DONE) {
14279		io->io_hdr.flags &= ~CTL_FLAG_DELAY_DONE;
14280	} else {
14281		struct ctl_lun *lun = CTL_LUN(io);
14282
14283		if ((lun != NULL)
14284		 && (lun->delay_info.done_delay > 0)) {
14285			callout_init(&io->io_hdr.delay_callout, /*mpsafe*/ 1);
14286			io->io_hdr.flags |= CTL_FLAG_DELAY_DONE;
14287			callout_reset(&io->io_hdr.delay_callout,
14288				      lun->delay_info.done_delay * hz,
14289				      ctl_done_timer_wakeup, io);
14290			if (lun->delay_info.done_type == CTL_DELAY_TYPE_ONESHOT)
14291				lun->delay_info.done_delay = 0;
14292			return;
14293		}
14294	}
14295#endif /* CTL_IO_DELAY */
14296
14297	ctl_enqueue_done(io);
14298}
14299
14300static void
14301ctl_work_thread(void *arg)
14302{
14303	struct ctl_thread *thr = (struct ctl_thread *)arg;
14304	struct ctl_softc *softc = thr->ctl_softc;
14305	union ctl_io *io;
14306	int retval;
14307
14308	CTL_DEBUG_PRINT(("ctl_work_thread starting\n"));
14309	thread_lock(curthread);
14310	sched_prio(curthread, PUSER - 1);
14311	thread_unlock(curthread);
14312
14313	while (!softc->shutdown) {
14314		/*
14315		 * We handle the queues in this order:
14316		 * - ISC
14317		 * - done queue (to free up resources, unblock other commands)
14318		 * - incoming queue
14319		 * - RtR queue
14320		 *
14321		 * If those queues are empty, we break out of the loop and
14322		 * go to sleep.
14323		 */
14324		mtx_lock(&thr->queue_lock);
14325		io = (union ctl_io *)STAILQ_FIRST(&thr->isc_queue);
14326		if (io != NULL) {
14327			STAILQ_REMOVE_HEAD(&thr->isc_queue, links);
14328			mtx_unlock(&thr->queue_lock);
14329			ctl_handle_isc(io);
14330			continue;
14331		}
14332		io = (union ctl_io *)STAILQ_FIRST(&thr->done_queue);
14333		if (io != NULL) {
14334			STAILQ_REMOVE_HEAD(&thr->done_queue, links);
14335			/* clear any blocked commands, call fe_done */
14336			mtx_unlock(&thr->queue_lock);
14337			ctl_process_done(io);
14338			continue;
14339		}
14340		io = (union ctl_io *)STAILQ_FIRST(&thr->incoming_queue);
14341		if (io != NULL) {
14342			STAILQ_REMOVE_HEAD(&thr->incoming_queue, links);
14343			mtx_unlock(&thr->queue_lock);
14344			switch (io->io_hdr.io_type) {
14345			case CTL_IO_TASK:
14346				ctl_run_task(io);
14347				break;
14348			case CTL_IO_SCSI:
14349				ctl_scsiio_precheck(&io->scsiio);
14350				break;
14351			case CTL_IO_NVME:
14352			case CTL_IO_NVME_ADMIN:
14353				ctl_nvmeio_precheck(&io->nvmeio);
14354				break;
14355			default:
14356				__assert_unreachable();
14357			}
14358			continue;
14359		}
14360		io = (union ctl_io *)STAILQ_FIRST(&thr->rtr_queue);
14361		if (io != NULL) {
14362			STAILQ_REMOVE_HEAD(&thr->rtr_queue, links);
14363			mtx_unlock(&thr->queue_lock);
14364			switch (io->io_hdr.io_type) {
14365			case CTL_IO_SCSI:
14366				retval = ctl_scsiio(&io->scsiio);
14367				if (retval != CTL_RETVAL_COMPLETE)
14368					CTL_DEBUG_PRINT(("ctl_scsiio failed\n"));
14369				break;
14370			case CTL_IO_NVME:
14371			case CTL_IO_NVME_ADMIN:
14372				retval = ctl_nvmeio(&io->nvmeio);
14373				if (retval != CTL_RETVAL_COMPLETE)
14374					CTL_DEBUG_PRINT(("ctl_nvmeio failed\n"));
14375				break;
14376			default:
14377				__assert_unreachable();
14378			}
14379			continue;
14380		}
14381
14382		/* Sleep until we have something to do. */
14383		mtx_sleep(thr, &thr->queue_lock, PDROP, "-", 0);
14384	}
14385	thr->thread = NULL;
14386	kthread_exit();
14387}
14388
14389static void
14390ctl_thresh_thread(void *arg)
14391{
14392	struct ctl_softc *softc = (struct ctl_softc *)arg;
14393	struct ctl_lun *lun;
14394	struct ctl_logical_block_provisioning_page *page;
14395	const char *attr;
14396	union ctl_ha_msg msg;
14397	uint64_t thres, val;
14398	int i, e, set;
14399
14400	CTL_DEBUG_PRINT(("ctl_thresh_thread starting\n"));
14401	thread_lock(curthread);
14402	sched_prio(curthread, PUSER - 1);
14403	thread_unlock(curthread);
14404
14405	while (!softc->shutdown) {
14406		mtx_lock(&softc->ctl_lock);
14407		STAILQ_FOREACH(lun, &softc->lun_list, links) {
14408			if ((lun->flags & CTL_LUN_DISABLED) ||
14409			    (lun->flags & CTL_LUN_NO_MEDIA) ||
14410			    lun->backend->lun_attr == NULL)
14411				continue;
14412			if ((lun->flags & CTL_LUN_PRIMARY_SC) == 0 &&
14413			    softc->ha_mode == CTL_HA_MODE_XFER)
14414				continue;
14415			if ((lun->MODE_RWER.byte8 & SMS_RWER_LBPERE) == 0)
14416				continue;
14417			e = 0;
14418			page = &lun->MODE_LBP;
14419			for (i = 0; i < CTL_NUM_LBP_THRESH; i++) {
14420				if ((page->descr[i].flags & SLBPPD_ENABLED) == 0)
14421					continue;
14422				thres = scsi_4btoul(page->descr[i].count);
14423				thres <<= CTL_LBP_EXPONENT;
14424				switch (page->descr[i].resource) {
14425				case 0x01:
14426					attr = "blocksavail";
14427					break;
14428				case 0x02:
14429					attr = "blocksused";
14430					break;
14431				case 0xf1:
14432					attr = "poolblocksavail";
14433					break;
14434				case 0xf2:
14435					attr = "poolblocksused";
14436					break;
14437				default:
14438					continue;
14439				}
14440				mtx_unlock(&softc->ctl_lock); // XXX
14441				val = lun->backend->lun_attr(lun->be_lun, attr);
14442				mtx_lock(&softc->ctl_lock);
14443				if (val == UINT64_MAX)
14444					continue;
14445				if ((page->descr[i].flags & SLBPPD_ARMING_MASK)
14446				    == SLBPPD_ARMING_INC)
14447					e = (val >= thres);
14448				else
14449					e = (val <= thres);
14450				if (e)
14451					break;
14452			}
14453			mtx_lock(&lun->lun_lock);
14454			if (e) {
14455				scsi_u64to8b((uint8_t *)&page->descr[i] -
14456				    (uint8_t *)page, lun->ua_tpt_info);
14457				if (lun->lasttpt == 0 ||
14458				    time_uptime - lun->lasttpt >= CTL_LBP_UA_PERIOD) {
14459					lun->lasttpt = time_uptime;
14460					ctl_est_ua_all(lun, -1, CTL_UA_THIN_PROV_THRES);
14461					set = 1;
14462				} else
14463					set = 0;
14464			} else {
14465				lun->lasttpt = 0;
14466				ctl_clr_ua_all(lun, -1, CTL_UA_THIN_PROV_THRES);
14467				set = -1;
14468			}
14469			mtx_unlock(&lun->lun_lock);
14470			if (set != 0 &&
14471			    lun->ctl_softc->ha_mode == CTL_HA_MODE_XFER) {
14472				/* Send msg to other side. */
14473				bzero(&msg.ua, sizeof(msg.ua));
14474				msg.hdr.msg_type = CTL_MSG_UA;
14475				msg.hdr.nexus.initid = -1;
14476				msg.hdr.nexus.targ_port = -1;
14477				msg.hdr.nexus.targ_lun = lun->lun;
14478				msg.hdr.nexus.targ_mapped_lun = lun->lun;
14479				msg.ua.ua_all = 1;
14480				msg.ua.ua_set = (set > 0);
14481				msg.ua.ua_type = CTL_UA_THIN_PROV_THRES;
14482				memcpy(msg.ua.ua_info, lun->ua_tpt_info, 8);
14483				mtx_unlock(&softc->ctl_lock); // XXX
14484				ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg,
14485				    sizeof(msg.ua), M_WAITOK);
14486				mtx_lock(&softc->ctl_lock);
14487			}
14488		}
14489		mtx_sleep(&softc->thresh_thread, &softc->ctl_lock,
14490		    PDROP, "-", CTL_LBP_PERIOD * hz);
14491	}
14492	softc->thresh_thread = NULL;
14493	kthread_exit();
14494}
14495
14496static void
14497ctl_enqueue_incoming(union ctl_io *io)
14498{
14499	struct ctl_softc *softc = CTL_SOFTC(io);
14500	struct ctl_thread *thr;
14501	u_int idx;
14502
14503	idx = (io->io_hdr.nexus.targ_port * 127 +
14504	       io->io_hdr.nexus.initid) % worker_threads;
14505	thr = &softc->threads[idx];
14506	mtx_lock(&thr->queue_lock);
14507	STAILQ_INSERT_TAIL(&thr->incoming_queue, &io->io_hdr, links);
14508	mtx_unlock(&thr->queue_lock);
14509	wakeup(thr);
14510}
14511
14512static void
14513ctl_enqueue_rtr(union ctl_io *io)
14514{
14515	struct ctl_softc *softc = CTL_SOFTC(io);
14516	struct ctl_thread *thr;
14517
14518	thr = &softc->threads[io->io_hdr.nexus.targ_mapped_lun % worker_threads];
14519	mtx_lock(&thr->queue_lock);
14520	STAILQ_INSERT_TAIL(&thr->rtr_queue, &io->io_hdr, links);
14521	mtx_unlock(&thr->queue_lock);
14522	wakeup(thr);
14523}
14524
14525static void
14526ctl_enqueue_done(union ctl_io *io)
14527{
14528	struct ctl_softc *softc = CTL_SOFTC(io);
14529	struct ctl_thread *thr;
14530
14531	thr = &softc->threads[io->io_hdr.nexus.targ_mapped_lun % worker_threads];
14532	mtx_lock(&thr->queue_lock);
14533	STAILQ_INSERT_TAIL(&thr->done_queue, &io->io_hdr, links);
14534	mtx_unlock(&thr->queue_lock);
14535	wakeup(thr);
14536}
14537
14538static void
14539ctl_enqueue_isc(union ctl_io *io)
14540{
14541	struct ctl_softc *softc = CTL_SOFTC(io);
14542	struct ctl_thread *thr;
14543
14544	thr = &softc->threads[io->io_hdr.nexus.targ_mapped_lun % worker_threads];
14545	mtx_lock(&thr->queue_lock);
14546	STAILQ_INSERT_TAIL(&thr->isc_queue, &io->io_hdr, links);
14547	mtx_unlock(&thr->queue_lock);
14548	wakeup(thr);
14549}
14550
14551/*
14552 *  vim: ts=8
14553 */
14554