iscsi.c revision 265496
1/*-
2 * Copyright (c) 2012 The FreeBSD Foundation
3 * All rights reserved.
4 *
5 * This software was developed by Edward Tomasz Napierala under sponsorship
6 * from the FreeBSD Foundation.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * $FreeBSD: stable/10/sys/dev/iscsi/iscsi.c 265496 2014-05-07 06:31:45Z trasz $
30 */
31
32#include <sys/param.h>
33#include <sys/condvar.h>
34#include <sys/conf.h>
35#include <sys/eventhandler.h>
36#include <sys/file.h>
37#include <sys/kernel.h>
38#include <sys/kthread.h>
39#include <sys/lock.h>
40#include <sys/malloc.h>
41#include <sys/mutex.h>
42#include <sys/module.h>
43#include <sys/sysctl.h>
44#include <sys/systm.h>
45#include <sys/sx.h>
46#include <vm/uma.h>
47
48#include <cam/cam.h>
49#include <cam/cam_ccb.h>
50#include <cam/cam_xpt.h>
51#include <cam/cam_debug.h>
52#include <cam/cam_sim.h>
53#include <cam/cam_xpt_sim.h>
54#include <cam/cam_xpt_periph.h>
55#include <cam/cam_periph.h>
56#include <cam/scsi/scsi_all.h>
57#include <cam/scsi/scsi_message.h>
58
59#include "iscsi_ioctl.h"
60#include "iscsi.h"
61#include "icl.h"
62#include "iscsi_proto.h"
63
64#ifdef ICL_KERNEL_PROXY
65#include <sys/socketvar.h>
66#endif
67
68/*
69 * XXX: This is global so the iscsi_unload() can access it.
70 * 	Think about how to do this properly.
71 */
72static struct iscsi_softc	*sc;
73
74SYSCTL_NODE(_kern, OID_AUTO, iscsi, CTLFLAG_RD, 0, "iSCSI initiator");
75static int debug = 1;
76TUNABLE_INT("kern.iscsi.debug", &debug);
77SYSCTL_INT(_kern_iscsi, OID_AUTO, debug, CTLFLAG_RW,
78    &debug, 2, "Enable debug messages");
79static int ping_timeout = 5;
80TUNABLE_INT("kern.iscsi.ping_timeout", &ping_timeout);
81SYSCTL_INT(_kern_iscsi, OID_AUTO, ping_timeout, CTLFLAG_RW, &ping_timeout,
82    5, "Timeout for ping (NOP-Out) requests, in seconds");
83static int iscsid_timeout = 60;
84TUNABLE_INT("kern.iscsi.iscsid_timeout", &iscsid_timeout);
85SYSCTL_INT(_kern_iscsi, OID_AUTO, iscsid_timeout, CTLFLAG_RW, &iscsid_timeout,
86    60, "Time to wait for iscsid(8) to handle reconnection, in seconds");
87static int login_timeout = 60;
88TUNABLE_INT("kern.iscsi.login_timeout", &login_timeout);
89SYSCTL_INT(_kern_iscsi, OID_AUTO, login_timeout, CTLFLAG_RW, &login_timeout,
90    60, "Time to wait for iscsid(8) to finish Login Phase, in seconds");
91static int maxtags = 255;
92TUNABLE_INT("kern.iscsi.maxtags", &maxtags);
93SYSCTL_INT(_kern_iscsi, OID_AUTO, maxtags, CTLFLAG_RW, &maxtags,
94    255, "Max number of IO requests queued");
95
96static MALLOC_DEFINE(M_ISCSI, "iSCSI", "iSCSI initiator");
97static uma_zone_t iscsi_outstanding_zone;
98
99#define	CONN_SESSION(X)	((struct iscsi_session *)X->ic_prv0)
100#define	PDU_SESSION(X)	(CONN_SESSION(X->ip_conn))
101
102#define	ISCSI_DEBUG(X, ...)					\
103	if (debug > 1) {					\
104		printf("%s: " X "\n", __func__, ## __VA_ARGS__);\
105	} while (0)
106
107#define	ISCSI_WARN(X, ...)					\
108	if (debug > 0) {					\
109		printf("WARNING: %s: " X "\n",			\
110		    __func__, ## __VA_ARGS__);			\
111	} while (0)
112
113#define	ISCSI_SESSION_DEBUG(S, X, ...)				\
114	if (debug > 1) {					\
115		printf("%s: %s (%s): " X "\n",			\
116		    __func__, S->is_conf.isc_target_addr,	\
117		    S->is_conf.isc_target, ## __VA_ARGS__);	\
118	} while (0)
119
120#define	ISCSI_SESSION_WARN(S, X, ...)				\
121	if (debug > 0) {					\
122		printf("WARNING: %s (%s): " X "\n",		\
123		    S->is_conf.isc_target_addr,			\
124		    S->is_conf.isc_target, ## __VA_ARGS__);	\
125	} while (0)
126
127#define ISCSI_SESSION_LOCK(X)		mtx_lock(&X->is_lock)
128#define ISCSI_SESSION_UNLOCK(X)		mtx_unlock(&X->is_lock)
129#define ISCSI_SESSION_LOCK_ASSERT(X)	mtx_assert(&X->is_lock, MA_OWNED)
130
131static int	iscsi_ioctl(struct cdev *dev, u_long cmd, caddr_t arg,
132		    int mode, struct thread *td);
133
134static struct cdevsw iscsi_cdevsw = {
135     .d_version = D_VERSION,
136     .d_ioctl   = iscsi_ioctl,
137     .d_name    = "iscsi",
138};
139
140static void	iscsi_pdu_queue_locked(struct icl_pdu *request);
141static void	iscsi_pdu_queue(struct icl_pdu *request);
142static void	iscsi_pdu_update_statsn(const struct icl_pdu *response);
143static void	iscsi_pdu_handle_nop_in(struct icl_pdu *response);
144static void	iscsi_pdu_handle_scsi_response(struct icl_pdu *response);
145static void	iscsi_pdu_handle_data_in(struct icl_pdu *response);
146static void	iscsi_pdu_handle_logout_response(struct icl_pdu *response);
147static void	iscsi_pdu_handle_r2t(struct icl_pdu *response);
148static void	iscsi_pdu_handle_async_message(struct icl_pdu *response);
149static void	iscsi_pdu_handle_reject(struct icl_pdu *response);
150static void	iscsi_session_reconnect(struct iscsi_session *is);
151static void	iscsi_session_terminate(struct iscsi_session *is);
152static void	iscsi_action(struct cam_sim *sim, union ccb *ccb);
153static void	iscsi_poll(struct cam_sim *sim);
154static struct iscsi_outstanding	*iscsi_outstanding_find(struct iscsi_session *is,
155		    uint32_t initiator_task_tag);
156static int	iscsi_outstanding_add(struct iscsi_session *is,
157		    uint32_t initiator_task_tag, union ccb *ccb);
158static void	iscsi_outstanding_remove(struct iscsi_session *is,
159		    struct iscsi_outstanding *io);
160
161static bool
162iscsi_pdu_prepare(struct icl_pdu *request)
163{
164	struct iscsi_session *is;
165	struct iscsi_bhs_scsi_command *bhssc;
166
167	is = PDU_SESSION(request);
168
169	ISCSI_SESSION_LOCK_ASSERT(is);
170
171	/*
172	 * We're only using fields common for all the request
173	 * (initiator -> target) PDUs.
174	 */
175	bhssc = (struct iscsi_bhs_scsi_command *)request->ip_bhs;
176
177	/*
178	 * Data-Out PDU does not contain CmdSN.
179	 */
180	if (bhssc->bhssc_opcode != ISCSI_BHS_OPCODE_SCSI_DATA_OUT) {
181		if (is->is_cmdsn > is->is_maxcmdsn &&
182		    (bhssc->bhssc_opcode & ISCSI_BHS_OPCODE_IMMEDIATE) == 0) {
183			/*
184			 * Current MaxCmdSN prevents us from sending any more
185			 * SCSI Command PDUs to the target; postpone the PDU.
186			 * It will get resent by either iscsi_pdu_queue(),
187			 * or by maintenance thread.
188			 */
189#if 0
190			ISCSI_SESSION_DEBUG(is, "postponing send, CmdSN %d, ExpCmdSN %d, MaxCmdSN %d, opcode 0x%x",
191			    is->is_cmdsn, is->is_expcmdsn, is->is_maxcmdsn, bhssc->bhssc_opcode);
192#endif
193			return (true);
194		}
195		bhssc->bhssc_cmdsn = htonl(is->is_cmdsn);
196		if ((bhssc->bhssc_opcode & ISCSI_BHS_OPCODE_IMMEDIATE) == 0)
197			is->is_cmdsn++;
198	}
199	bhssc->bhssc_expstatsn = htonl(is->is_statsn + 1);
200
201	return (false);
202}
203
204static void
205iscsi_session_send_postponed(struct iscsi_session *is)
206{
207	struct icl_pdu *request;
208	bool postpone;
209
210	ISCSI_SESSION_LOCK_ASSERT(is);
211
212	while (!TAILQ_EMPTY(&is->is_postponed)) {
213		request = TAILQ_FIRST(&is->is_postponed);
214		postpone = iscsi_pdu_prepare(request);
215		if (postpone)
216			break;
217		TAILQ_REMOVE(&is->is_postponed, request, ip_next);
218		icl_pdu_queue(request);
219	}
220}
221
222static void
223iscsi_pdu_queue_locked(struct icl_pdu *request)
224{
225	struct iscsi_session *is;
226	bool postpone;
227
228	is = PDU_SESSION(request);
229	ISCSI_SESSION_LOCK_ASSERT(is);
230	iscsi_session_send_postponed(is);
231	postpone = iscsi_pdu_prepare(request);
232	if (postpone) {
233		TAILQ_INSERT_TAIL(&is->is_postponed, request, ip_next);
234		return;
235	}
236	icl_pdu_queue(request);
237}
238
239static void
240iscsi_pdu_queue(struct icl_pdu *request)
241{
242	struct iscsi_session *is;
243
244	is = PDU_SESSION(request);
245	ISCSI_SESSION_LOCK(is);
246	iscsi_pdu_queue_locked(request);
247	ISCSI_SESSION_UNLOCK(is);
248}
249
250static void
251iscsi_session_logout(struct iscsi_session *is)
252{
253	struct icl_pdu *request;
254	struct iscsi_bhs_logout_request *bhslr;
255
256	request = icl_pdu_new_bhs(is->is_conn, M_NOWAIT);
257	if (request == NULL)
258		return;
259
260	bhslr = (struct iscsi_bhs_logout_request *)request->ip_bhs;
261	bhslr->bhslr_opcode = ISCSI_BHS_OPCODE_LOGOUT_REQUEST;
262	bhslr->bhslr_reason = BHSLR_REASON_CLOSE_SESSION;
263	iscsi_pdu_queue_locked(request);
264}
265
266static void
267iscsi_session_terminate_tasks(struct iscsi_session *is, bool requeue)
268{
269	struct iscsi_outstanding *io, *tmp;
270
271	ISCSI_SESSION_LOCK_ASSERT(is);
272
273	TAILQ_FOREACH_SAFE(io, &is->is_outstanding, io_next, tmp) {
274		if (requeue) {
275			io->io_ccb->ccb_h.status &= ~CAM_SIM_QUEUED;
276			io->io_ccb->ccb_h.status |= CAM_REQUEUE_REQ;
277		} else {
278			io->io_ccb->ccb_h.status = CAM_REQ_ABORTED;
279		}
280
281		if ((io->io_ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) {
282			xpt_freeze_devq(io->io_ccb->ccb_h.path, 1);
283			ISCSI_SESSION_DEBUG(is, "freezing devq");
284		}
285		io->io_ccb->ccb_h.status |= CAM_DEV_QFRZN;
286		xpt_done(io->io_ccb);
287		iscsi_outstanding_remove(is, io);
288	}
289}
290
291static void
292iscsi_maintenance_thread_reconnect(struct iscsi_session *is)
293{
294	struct icl_pdu *pdu;
295
296	icl_conn_shutdown(is->is_conn);
297	icl_conn_close(is->is_conn);
298
299	ISCSI_SESSION_LOCK(is);
300
301#ifdef ICL_KERNEL_PROXY
302	if (is->is_login_pdu != NULL) {
303		icl_pdu_free(is->is_login_pdu);
304		is->is_login_pdu = NULL;
305	}
306	cv_signal(&is->is_login_cv);
307#endif
308
309	/*
310	 * Don't queue any new PDUs.
311	 */
312	if (is->is_sim != NULL && is->is_simq_frozen == false) {
313		ISCSI_SESSION_DEBUG(is, "freezing");
314		xpt_freeze_simq(is->is_sim, 1);
315		is->is_simq_frozen = true;
316	}
317
318	/*
319	 * Remove postponed PDUs.
320	 */
321	while (!TAILQ_EMPTY(&is->is_postponed)) {
322		pdu = TAILQ_FIRST(&is->is_postponed);
323		TAILQ_REMOVE(&is->is_postponed, pdu, ip_next);
324		icl_pdu_free(pdu);
325	}
326
327	/*
328	 * Terminate SCSI tasks, asking CAM to requeue them.
329	 */
330	//ISCSI_SESSION_DEBUG(is, "terminating tasks");
331	iscsi_session_terminate_tasks(is, true);
332
333	KASSERT(TAILQ_EMPTY(&is->is_outstanding),
334	    ("destroying session with active tasks"));
335	KASSERT(TAILQ_EMPTY(&is->is_postponed),
336	    ("destroying session with postponed PDUs"));
337
338	/*
339	 * Request immediate reconnection from iscsid(8).
340	 */
341	//ISCSI_SESSION_DEBUG(is, "waking up iscsid(8)");
342	is->is_connected = false;
343	is->is_reconnecting = false;
344	is->is_login_phase = false;
345	is->is_waiting_for_iscsid = true;
346	strlcpy(is->is_reason, "Waiting for iscsid(8)", sizeof(is->is_reason));
347	is->is_timeout = 0;
348	ISCSI_SESSION_UNLOCK(is);
349	cv_signal(&is->is_softc->sc_cv);
350}
351
352static void
353iscsi_maintenance_thread_terminate(struct iscsi_session *is)
354{
355	struct iscsi_softc *sc;
356	struct icl_pdu *pdu;
357
358	sc = is->is_softc;
359	sx_xlock(&sc->sc_lock);
360	TAILQ_REMOVE(&sc->sc_sessions, is, is_next);
361	sx_xunlock(&sc->sc_lock);
362
363	icl_conn_close(is->is_conn);
364
365	ISCSI_SESSION_LOCK(is);
366
367	KASSERT(is->is_terminating, ("is_terminating == false"));
368
369#ifdef ICL_KERNEL_PROXY
370	if (is->is_login_pdu != NULL) {
371		icl_pdu_free(is->is_login_pdu);
372		is->is_login_pdu = NULL;
373	}
374	cv_signal(&is->is_login_cv);
375#endif
376
377	/*
378	 * Don't queue any new PDUs.
379	 */
380	callout_drain(&is->is_callout);
381	if (is->is_sim != NULL && is->is_simq_frozen == false) {
382		ISCSI_SESSION_DEBUG(is, "freezing");
383		xpt_freeze_simq(is->is_sim, 1);
384		is->is_simq_frozen = true;
385	}
386
387	/*
388	 * Remove postponed PDUs.
389	 */
390	while (!TAILQ_EMPTY(&is->is_postponed)) {
391		pdu = TAILQ_FIRST(&is->is_postponed);
392		TAILQ_REMOVE(&is->is_postponed, pdu, ip_next);
393		icl_pdu_free(pdu);
394	}
395
396	/*
397	 * Forcibly terminate SCSI tasks.
398	 */
399	ISCSI_SESSION_DEBUG(is, "terminating tasks");
400	iscsi_session_terminate_tasks(is, false);
401
402	/*
403	 * Deregister CAM.
404	 */
405	if (is->is_sim != NULL) {
406		ISCSI_SESSION_DEBUG(is, "deregistering SIM");
407		xpt_async(AC_LOST_DEVICE, is->is_path, NULL);
408
409		if (is->is_simq_frozen) {
410			xpt_release_simq(is->is_sim, 1);
411			is->is_simq_frozen = false;
412		}
413
414		xpt_free_path(is->is_path);
415		xpt_bus_deregister(cam_sim_path(is->is_sim));
416		cam_sim_free(is->is_sim, TRUE /*free_devq*/);
417		is->is_sim = NULL;
418	}
419
420	KASSERT(TAILQ_EMPTY(&is->is_outstanding),
421	    ("destroying session with active tasks"));
422	KASSERT(TAILQ_EMPTY(&is->is_postponed),
423	    ("destroying session with postponed PDUs"));
424
425	ISCSI_SESSION_UNLOCK(is);
426
427	icl_conn_free(is->is_conn);
428	mtx_destroy(&is->is_lock);
429	cv_destroy(&is->is_maintenance_cv);
430#ifdef ICL_KERNEL_PROXY
431	cv_destroy(&is->is_login_cv);
432#endif
433	ISCSI_SESSION_DEBUG(is, "terminated");
434	free(is, M_ISCSI);
435
436	/*
437	 * The iscsi_unload() routine might be waiting.
438	 */
439	cv_signal(&sc->sc_cv);
440}
441
442static void
443iscsi_maintenance_thread(void *arg)
444{
445	struct iscsi_session *is;
446
447	is = arg;
448
449	for (;;) {
450		ISCSI_SESSION_LOCK(is);
451		if (is->is_reconnecting == false &&
452		    is->is_terminating == false &&
453		    TAILQ_EMPTY(&is->is_postponed))
454			cv_wait(&is->is_maintenance_cv, &is->is_lock);
455
456		if (is->is_reconnecting) {
457			ISCSI_SESSION_UNLOCK(is);
458			iscsi_maintenance_thread_reconnect(is);
459			continue;
460		}
461
462		if (is->is_terminating) {
463			ISCSI_SESSION_UNLOCK(is);
464			iscsi_maintenance_thread_terminate(is);
465			kthread_exit();
466			return;
467		}
468
469		iscsi_session_send_postponed(is);
470		ISCSI_SESSION_UNLOCK(is);
471	}
472}
473
474static void
475iscsi_session_reconnect(struct iscsi_session *is)
476{
477
478	/*
479	 * XXX: We can't use locking here, because
480	 * 	it's being called from various contexts.
481	 * 	Hope it doesn't break anything.
482	 */
483	if (is->is_reconnecting)
484		return;
485
486	is->is_reconnecting = true;
487	cv_signal(&is->is_maintenance_cv);
488}
489
490static void
491iscsi_session_terminate(struct iscsi_session *is)
492{
493	if (is->is_terminating)
494		return;
495
496	is->is_terminating = true;
497
498#if 0
499	iscsi_session_logout(is);
500#endif
501	cv_signal(&is->is_maintenance_cv);
502}
503
504static void
505iscsi_callout(void *context)
506{
507	struct icl_pdu *request;
508	struct iscsi_bhs_nop_out *bhsno;
509	struct iscsi_session *is;
510	bool reconnect_needed = false;
511
512	is = context;
513
514	if (is->is_terminating)
515		return;
516
517	callout_schedule(&is->is_callout, 1 * hz);
518
519	ISCSI_SESSION_LOCK(is);
520	is->is_timeout++;
521
522	if (is->is_waiting_for_iscsid) {
523		if (is->is_timeout > iscsid_timeout) {
524			ISCSI_SESSION_WARN(is, "timed out waiting for iscsid(8) "
525			    "for %d seconds; reconnecting",
526			    is->is_timeout);
527			reconnect_needed = true;
528		}
529		goto out;
530	}
531
532	if (is->is_login_phase) {
533		if (is->is_timeout > login_timeout) {
534			ISCSI_SESSION_WARN(is, "login timed out after %d seconds; "
535			    "reconnecting", is->is_timeout);
536			reconnect_needed = true;
537		}
538		goto out;
539	}
540
541	if (is->is_timeout >= ping_timeout) {
542		ISCSI_SESSION_WARN(is, "no ping reply (NOP-In) after %d seconds; "
543		    "reconnecting", ping_timeout);
544		reconnect_needed = true;
545		goto out;
546	}
547
548	ISCSI_SESSION_UNLOCK(is);
549
550	/*
551	 * If the ping was reset less than one second ago - which means
552	 * that we've received some PDU during the last second - assume
553	 * the traffic flows correctly and don't bother sending a NOP-Out.
554	 *
555	 * (It's 2 - one for one second, and one for incrementing is_timeout
556	 * earlier in this routine.)
557	 */
558	if (is->is_timeout < 2)
559		return;
560
561	request = icl_pdu_new_bhs(is->is_conn, M_NOWAIT);
562	if (request == NULL) {
563		ISCSI_SESSION_WARN(is, "failed to allocate PDU");
564		return;
565	}
566	bhsno = (struct iscsi_bhs_nop_out *)request->ip_bhs;
567	bhsno->bhsno_opcode = ISCSI_BHS_OPCODE_NOP_OUT |
568	    ISCSI_BHS_OPCODE_IMMEDIATE;
569	bhsno->bhsno_flags = 0x80;
570	bhsno->bhsno_target_transfer_tag = 0xffffffff;
571	iscsi_pdu_queue(request);
572	return;
573
574out:
575	ISCSI_SESSION_UNLOCK(is);
576
577	if (reconnect_needed)
578		iscsi_session_reconnect(is);
579}
580
581static void
582iscsi_pdu_update_statsn(const struct icl_pdu *response)
583{
584	const struct iscsi_bhs_data_in *bhsdi;
585	struct iscsi_session *is;
586	uint32_t expcmdsn, maxcmdsn;
587
588	is = PDU_SESSION(response);
589
590	ISCSI_SESSION_LOCK_ASSERT(is);
591
592	/*
593	 * We're only using fields common for all the response
594	 * (target -> initiator) PDUs.
595	 */
596	bhsdi = (const struct iscsi_bhs_data_in *)response->ip_bhs;
597	/*
598	 * Ok, I lied.  In case of Data-In, "The fields StatSN, Status,
599	 * and Residual Count only have meaningful content if the S bit
600	 * is set to 1", so we also need to check the bit specific for
601	 * Data-In PDU.
602	 */
603	if (bhsdi->bhsdi_opcode != ISCSI_BHS_OPCODE_SCSI_DATA_IN ||
604	    (bhsdi->bhsdi_flags & BHSDI_FLAGS_S) != 0) {
605		if (ntohl(bhsdi->bhsdi_statsn) < is->is_statsn) {
606			ISCSI_SESSION_WARN(is,
607			    "PDU StatSN %d >= session StatSN %d, opcode 0x%x",
608			    is->is_statsn, ntohl(bhsdi->bhsdi_statsn),
609			    bhsdi->bhsdi_opcode);
610		}
611		is->is_statsn = ntohl(bhsdi->bhsdi_statsn);
612	}
613
614	expcmdsn = ntohl(bhsdi->bhsdi_expcmdsn);
615	maxcmdsn = ntohl(bhsdi->bhsdi_maxcmdsn);
616
617	/*
618	 * XXX: Compare using Serial Arithmetic Sense.
619	 */
620	if (maxcmdsn + 1 < expcmdsn) {
621		ISCSI_SESSION_DEBUG(is, "PDU MaxCmdSN %d + 1 < PDU ExpCmdSN %d; ignoring",
622		    maxcmdsn, expcmdsn);
623	} else {
624		if (maxcmdsn > is->is_maxcmdsn) {
625			is->is_maxcmdsn = maxcmdsn;
626
627			/*
628			 * Command window increased; kick the maintanance thread
629			 * to send out postponed commands.
630			 */
631			if (!TAILQ_EMPTY(&is->is_postponed))
632				cv_signal(&is->is_maintenance_cv);
633		} else if (maxcmdsn < is->is_maxcmdsn) {
634			ISCSI_SESSION_DEBUG(is, "PDU MaxCmdSN %d < session MaxCmdSN %d; ignoring",
635			    maxcmdsn, is->is_maxcmdsn);
636		}
637
638		if (expcmdsn > is->is_expcmdsn) {
639			is->is_expcmdsn = expcmdsn;
640		} else if (expcmdsn < is->is_expcmdsn) {
641			ISCSI_SESSION_DEBUG(is, "PDU ExpCmdSN %d < session ExpCmdSN %d; ignoring",
642			    expcmdsn, is->is_expcmdsn);
643		}
644	}
645
646	/*
647	 * Every incoming PDU - not just NOP-In - resets the ping timer.
648	 * The purpose of the timeout is to reset the connection when it stalls;
649	 * we don't want this to happen when NOP-In or NOP-Out ends up delayed
650	 * in some queue.
651	 */
652	is->is_timeout = 0;
653}
654
655static void
656iscsi_receive_callback(struct icl_pdu *response)
657{
658	struct iscsi_session *is;
659
660	is = PDU_SESSION(response);
661
662	ISCSI_SESSION_LOCK(is);
663
664#ifdef ICL_KERNEL_PROXY
665	if (is->is_login_phase) {
666		if (is->is_login_pdu == NULL)
667			is->is_login_pdu = response;
668		else
669			icl_pdu_free(response);
670		ISCSI_SESSION_UNLOCK(is);
671		cv_signal(&is->is_login_cv);
672		return;
673	}
674#endif
675
676	iscsi_pdu_update_statsn(response);
677
678	/*
679	 * The handling routine is responsible for freeing the PDU
680	 * when it's no longer needed.
681	 */
682	switch (response->ip_bhs->bhs_opcode) {
683	case ISCSI_BHS_OPCODE_NOP_IN:
684		iscsi_pdu_handle_nop_in(response);
685		break;
686	case ISCSI_BHS_OPCODE_SCSI_RESPONSE:
687		iscsi_pdu_handle_scsi_response(response);
688		break;
689	case ISCSI_BHS_OPCODE_SCSI_DATA_IN:
690		iscsi_pdu_handle_data_in(response);
691		break;
692	case ISCSI_BHS_OPCODE_LOGOUT_RESPONSE:
693		iscsi_pdu_handle_logout_response(response);
694		break;
695	case ISCSI_BHS_OPCODE_R2T:
696		iscsi_pdu_handle_r2t(response);
697		break;
698	case ISCSI_BHS_OPCODE_ASYNC_MESSAGE:
699		iscsi_pdu_handle_async_message(response);
700		break;
701	case ISCSI_BHS_OPCODE_REJECT:
702		iscsi_pdu_handle_reject(response);
703		break;
704	default:
705		ISCSI_SESSION_WARN(is, "received PDU with unsupported "
706		    "opcode 0x%x; reconnecting",
707		    response->ip_bhs->bhs_opcode);
708		iscsi_session_reconnect(is);
709		icl_pdu_free(response);
710	}
711
712	ISCSI_SESSION_UNLOCK(is);
713}
714
715static void
716iscsi_error_callback(struct icl_conn *ic)
717{
718	struct iscsi_session *is;
719
720	is = CONN_SESSION(ic);
721
722	ISCSI_SESSION_WARN(is, "connection error; reconnecting");
723	iscsi_session_reconnect(is);
724}
725
726static void
727iscsi_pdu_handle_nop_in(struct icl_pdu *response)
728{
729	struct iscsi_session *is;
730	struct iscsi_bhs_nop_out *bhsno;
731	struct iscsi_bhs_nop_in *bhsni;
732	struct icl_pdu *request;
733	void *data = NULL;
734	size_t datasize;
735	int error;
736
737	is = PDU_SESSION(response);
738	bhsni = (struct iscsi_bhs_nop_in *)response->ip_bhs;
739
740	if (bhsni->bhsni_target_transfer_tag == 0xffffffff) {
741		/*
742		 * Nothing to do; iscsi_pdu_update_statsn() already
743		 * zeroed the timeout.
744		 */
745		icl_pdu_free(response);
746		return;
747	}
748
749	datasize = icl_pdu_data_segment_length(response);
750	if (datasize > 0) {
751		data = malloc(datasize, M_ISCSI, M_NOWAIT | M_ZERO);
752		if (data == NULL) {
753			ISCSI_SESSION_WARN(is, "failed to allocate memory; "
754			    "reconnecting");
755			icl_pdu_free(response);
756			iscsi_session_reconnect(is);
757			return;
758		}
759		icl_pdu_get_data(response, 0, data, datasize);
760	}
761
762	request = icl_pdu_new_bhs(response->ip_conn, M_NOWAIT);
763	if (request == NULL) {
764		ISCSI_SESSION_WARN(is, "failed to allocate memory; "
765		    "reconnecting");
766		free(data, M_ISCSI);
767		icl_pdu_free(response);
768		iscsi_session_reconnect(is);
769		return;
770	}
771	bhsno = (struct iscsi_bhs_nop_out *)request->ip_bhs;
772	bhsno->bhsno_opcode = ISCSI_BHS_OPCODE_NOP_OUT |
773	    ISCSI_BHS_OPCODE_IMMEDIATE;
774	bhsno->bhsno_flags = 0x80;
775	bhsno->bhsno_initiator_task_tag = 0xffffffff;
776	bhsno->bhsno_target_transfer_tag = bhsni->bhsni_target_transfer_tag;
777	if (datasize > 0) {
778		error = icl_pdu_append_data(request, data, datasize, M_NOWAIT);
779		if (error != 0) {
780			ISCSI_SESSION_WARN(is, "failed to allocate memory; "
781			    "reconnecting");
782			free(data, M_ISCSI);
783			icl_pdu_free(request);
784			icl_pdu_free(response);
785			iscsi_session_reconnect(is);
786			return;
787		}
788		free(data, M_ISCSI);
789	}
790
791	icl_pdu_free(response);
792	iscsi_pdu_queue_locked(request);
793}
794
795static void
796iscsi_pdu_handle_scsi_response(struct icl_pdu *response)
797{
798	struct iscsi_bhs_scsi_response *bhssr;
799	struct iscsi_outstanding *io;
800	struct iscsi_session *is;
801	struct ccb_scsiio *csio;
802	size_t data_segment_len;
803	uint16_t sense_len;
804
805	is = PDU_SESSION(response);
806
807	bhssr = (struct iscsi_bhs_scsi_response *)response->ip_bhs;
808	io = iscsi_outstanding_find(is, bhssr->bhssr_initiator_task_tag);
809	if (io == NULL) {
810		ISCSI_SESSION_WARN(is, "bad itt 0x%x", bhssr->bhssr_initiator_task_tag);
811		icl_pdu_free(response);
812		iscsi_session_reconnect(is);
813		return;
814	}
815
816	if (bhssr->bhssr_response != BHSSR_RESPONSE_COMMAND_COMPLETED) {
817		ISCSI_SESSION_WARN(is, "service response 0x%x", bhssr->bhssr_response);
818 		if ((io->io_ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) {
819 			xpt_freeze_devq(io->io_ccb->ccb_h.path, 1);
820			ISCSI_SESSION_DEBUG(is, "freezing devq");
821		}
822 		io->io_ccb->ccb_h.status = CAM_REQ_CMP_ERR | CAM_DEV_QFRZN;
823	} else if (bhssr->bhssr_status == 0) {
824		io->io_ccb->ccb_h.status = CAM_REQ_CMP;
825	} else {
826 		if ((io->io_ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) {
827 			xpt_freeze_devq(io->io_ccb->ccb_h.path, 1);
828			ISCSI_SESSION_DEBUG(is, "freezing devq");
829		}
830 		io->io_ccb->ccb_h.status = CAM_SCSI_STATUS_ERROR | CAM_DEV_QFRZN;
831		io->io_ccb->csio.scsi_status = bhssr->bhssr_status;
832	}
833
834	if (bhssr->bhssr_flags & BHSSR_FLAGS_RESIDUAL_OVERFLOW) {
835		ISCSI_SESSION_WARN(is, "target indicated residual overflow");
836		icl_pdu_free(response);
837		iscsi_session_reconnect(is);
838		return;
839	}
840
841	csio = &io->io_ccb->csio;
842
843	data_segment_len = icl_pdu_data_segment_length(response);
844	if (data_segment_len > 0) {
845		if (data_segment_len < sizeof(sense_len)) {
846			ISCSI_SESSION_WARN(is, "truncated data segment (%zd bytes)",
847			    data_segment_len);
848			if ((io->io_ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) {
849				xpt_freeze_devq(io->io_ccb->ccb_h.path, 1);
850				ISCSI_SESSION_DEBUG(is, "freezing devq");
851			}
852			io->io_ccb->ccb_h.status = CAM_REQ_CMP_ERR | CAM_DEV_QFRZN;
853			goto out;
854		}
855		icl_pdu_get_data(response, 0, &sense_len, sizeof(sense_len));
856		sense_len = ntohs(sense_len);
857#if 0
858		ISCSI_SESSION_DEBUG(is, "sense_len %d, data len %zd",
859		    sense_len, data_segment_len);
860#endif
861		if (sizeof(sense_len) + sense_len > data_segment_len) {
862			ISCSI_SESSION_WARN(is, "truncated data segment "
863			    "(%zd bytes, should be %zd)",
864			    data_segment_len, sizeof(sense_len) + sense_len);
865			if ((io->io_ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) {
866				xpt_freeze_devq(io->io_ccb->ccb_h.path, 1);
867				ISCSI_SESSION_DEBUG(is, "freezing devq");
868			}
869			io->io_ccb->ccb_h.status = CAM_REQ_CMP_ERR | CAM_DEV_QFRZN;
870			goto out;
871		} else if (sizeof(sense_len) + sense_len < data_segment_len)
872			ISCSI_SESSION_WARN(is, "oversize data segment "
873			    "(%zd bytes, should be %zd)",
874			    data_segment_len, sizeof(sense_len) + sense_len);
875		if (sense_len > csio->sense_len) {
876			ISCSI_SESSION_DEBUG(is, "truncating sense from %d to %d",
877			    sense_len, csio->sense_len);
878			sense_len = csio->sense_len;
879		}
880		icl_pdu_get_data(response, sizeof(sense_len), &csio->sense_data, sense_len);
881		csio->sense_resid = csio->sense_len - sense_len;
882		io->io_ccb->ccb_h.status |= CAM_AUTOSNS_VALID;
883	}
884
885out:
886	if (bhssr->bhssr_flags & BHSSR_FLAGS_RESIDUAL_UNDERFLOW)
887		csio->resid = ntohl(bhssr->bhssr_residual_count);
888
889	if ((csio->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) {
890		KASSERT(io->io_received <= csio->dxfer_len,
891		    ("io->io_received > csio->dxfer_len"));
892		if (io->io_received < csio->dxfer_len) {
893			if (csio->resid != csio->dxfer_len - io->io_received) {
894				ISCSI_SESSION_WARN(is, "underflow mismatch: "
895				    "target indicates %d, we calculated %zd",
896				    csio->resid,
897				    csio->dxfer_len - io->io_received);
898			}
899			csio->resid = csio->dxfer_len - io->io_received;
900		}
901	}
902
903	xpt_done(io->io_ccb);
904	iscsi_outstanding_remove(is, io);
905	icl_pdu_free(response);
906}
907
908static void
909iscsi_pdu_handle_data_in(struct icl_pdu *response)
910{
911	struct iscsi_bhs_data_in *bhsdi;
912	struct iscsi_outstanding *io;
913	struct iscsi_session *is;
914	struct ccb_scsiio *csio;
915	size_t data_segment_len;
916
917	is = PDU_SESSION(response);
918	bhsdi = (struct iscsi_bhs_data_in *)response->ip_bhs;
919	io = iscsi_outstanding_find(is, bhsdi->bhsdi_initiator_task_tag);
920	if (io == NULL) {
921		ISCSI_SESSION_WARN(is, "bad itt 0x%x", bhsdi->bhsdi_initiator_task_tag);
922		icl_pdu_free(response);
923		iscsi_session_reconnect(is);
924		return;
925	}
926
927	data_segment_len = icl_pdu_data_segment_length(response);
928	if (data_segment_len == 0) {
929		/*
930		 * "The sending of 0 length data segments should be avoided,
931		 * but initiators and targets MUST be able to properly receive
932		 * 0 length data segments."
933		 */
934		icl_pdu_free(response);
935		return;
936	}
937
938	/*
939	 * We need to track this for security reasons - without it, malicious target
940	 * could respond to SCSI READ without sending Data-In PDUs, which would result
941	 * in read operation on the initiator side returning random kernel data.
942	 */
943	if (ntohl(bhsdi->bhsdi_buffer_offset) != io->io_received) {
944		ISCSI_SESSION_WARN(is, "data out of order; expected offset %zd, got %zd",
945		    io->io_received, (size_t)ntohl(bhsdi->bhsdi_buffer_offset));
946		icl_pdu_free(response);
947		iscsi_session_reconnect(is);
948		return;
949	}
950
951	csio = &io->io_ccb->csio;
952
953	if (io->io_received + data_segment_len > csio->dxfer_len) {
954		ISCSI_SESSION_WARN(is, "oversize data segment (%zd bytes "
955		    "at offset %zd, buffer is %d)",
956		    data_segment_len, io->io_received, csio->dxfer_len);
957		icl_pdu_free(response);
958		iscsi_session_reconnect(is);
959		return;
960	}
961
962	icl_pdu_get_data(response, 0, csio->data_ptr + io->io_received, data_segment_len);
963	io->io_received += data_segment_len;
964
965	/*
966	 * XXX: Check DataSN.
967	 * XXX: Check F.
968	 */
969	if ((bhsdi->bhsdi_flags & BHSDI_FLAGS_S) == 0) {
970		/*
971		 * Nothing more to do.
972		 */
973		icl_pdu_free(response);
974		return;
975	}
976
977	//ISCSI_SESSION_DEBUG(is, "got S flag; status 0x%x", bhsdi->bhsdi_status);
978	if (bhsdi->bhsdi_status == 0) {
979		io->io_ccb->ccb_h.status = CAM_REQ_CMP;
980	} else {
981		if ((io->io_ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) {
982			xpt_freeze_devq(io->io_ccb->ccb_h.path, 1);
983			ISCSI_SESSION_DEBUG(is, "freezing devq");
984		}
985		io->io_ccb->ccb_h.status = CAM_SCSI_STATUS_ERROR | CAM_DEV_QFRZN;
986		csio->scsi_status = bhsdi->bhsdi_status;
987	}
988
989	if ((csio->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) {
990		KASSERT(io->io_received <= csio->dxfer_len,
991		    ("io->io_received > csio->dxfer_len"));
992		if (io->io_received < csio->dxfer_len) {
993			csio->resid = ntohl(bhsdi->bhsdi_residual_count);
994			if (csio->resid != csio->dxfer_len - io->io_received) {
995				ISCSI_SESSION_WARN(is, "underflow mismatch: "
996				    "target indicates %d, we calculated %zd",
997				    csio->resid,
998				    csio->dxfer_len - io->io_received);
999			}
1000			csio->resid = csio->dxfer_len - io->io_received;
1001		}
1002	}
1003
1004	xpt_done(io->io_ccb);
1005	iscsi_outstanding_remove(is, io);
1006	icl_pdu_free(response);
1007}
1008
1009static void
1010iscsi_pdu_handle_logout_response(struct icl_pdu *response)
1011{
1012
1013	ISCSI_SESSION_DEBUG(PDU_SESSION(response), "logout response");
1014	icl_pdu_free(response);
1015}
1016
1017static void
1018iscsi_pdu_handle_r2t(struct icl_pdu *response)
1019{
1020	struct icl_pdu *request;
1021	struct iscsi_session *is;
1022	struct iscsi_bhs_r2t *bhsr2t;
1023	struct iscsi_bhs_data_out *bhsdo;
1024	struct iscsi_outstanding *io;
1025	struct ccb_scsiio *csio;
1026	size_t off, len, total_len;
1027	int error;
1028
1029	is = PDU_SESSION(response);
1030
1031	bhsr2t = (struct iscsi_bhs_r2t *)response->ip_bhs;
1032	io = iscsi_outstanding_find(is, bhsr2t->bhsr2t_initiator_task_tag);
1033	if (io == NULL) {
1034		ISCSI_SESSION_WARN(is, "bad itt 0x%x; reconnecting",
1035		    bhsr2t->bhsr2t_initiator_task_tag);
1036		icl_pdu_free(response);
1037		iscsi_session_reconnect(is);
1038		return;
1039	}
1040
1041	csio = &io->io_ccb->csio;
1042
1043	if ((csio->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_OUT) {
1044		ISCSI_SESSION_WARN(is, "received R2T for read command; reconnecting");
1045		icl_pdu_free(response);
1046		iscsi_session_reconnect(is);
1047		return;
1048	}
1049
1050	/*
1051	 * XXX: Verify R2TSN.
1052	 */
1053
1054	io->io_datasn = 0;
1055
1056	off = ntohl(bhsr2t->bhsr2t_buffer_offset);
1057	if (off > csio->dxfer_len) {
1058		ISCSI_SESSION_WARN(is, "target requested invalid offset "
1059		    "%zd, buffer is is %d; reconnecting", off, csio->dxfer_len);
1060		icl_pdu_free(response);
1061		iscsi_session_reconnect(is);
1062		return;
1063	}
1064
1065	total_len = ntohl(bhsr2t->bhsr2t_desired_data_transfer_length);
1066	if (total_len == 0 || total_len > csio->dxfer_len) {
1067		ISCSI_SESSION_WARN(is, "target requested invalid length "
1068		    "%zd, buffer is %d; reconnecting", total_len, csio->dxfer_len);
1069		icl_pdu_free(response);
1070		iscsi_session_reconnect(is);
1071		return;
1072	}
1073
1074	//ISCSI_SESSION_DEBUG(is, "r2t; off %zd, len %zd", off, total_len);
1075
1076	for (;;) {
1077		len = total_len;
1078
1079		if (len > is->is_max_data_segment_length)
1080			len = is->is_max_data_segment_length;
1081
1082		if (off + len > csio->dxfer_len) {
1083			ISCSI_SESSION_WARN(is, "target requested invalid "
1084			    "length/offset %zd, buffer is %d; reconnecting",
1085			    off + len, csio->dxfer_len);
1086			icl_pdu_free(response);
1087			iscsi_session_reconnect(is);
1088			return;
1089		}
1090
1091		request = icl_pdu_new_bhs(response->ip_conn, M_NOWAIT);
1092		if (request == NULL) {
1093			icl_pdu_free(response);
1094			iscsi_session_reconnect(is);
1095			return;
1096		}
1097
1098		bhsdo = (struct iscsi_bhs_data_out *)request->ip_bhs;
1099		bhsdo->bhsdo_opcode = ISCSI_BHS_OPCODE_SCSI_DATA_OUT;
1100		bhsdo->bhsdo_lun = bhsr2t->bhsr2t_lun;
1101		bhsdo->bhsdo_initiator_task_tag =
1102		    bhsr2t->bhsr2t_initiator_task_tag;
1103		bhsdo->bhsdo_target_transfer_tag =
1104		    bhsr2t->bhsr2t_target_transfer_tag;
1105		bhsdo->bhsdo_datasn = htonl(io->io_datasn++);
1106		bhsdo->bhsdo_buffer_offset = htonl(off);
1107		error = icl_pdu_append_data(request, csio->data_ptr + off, len,
1108		    M_NOWAIT);
1109		if (error != 0) {
1110			ISCSI_SESSION_WARN(is, "failed to allocate memory; "
1111			    "reconnecting");
1112			icl_pdu_free(request);
1113			icl_pdu_free(response);
1114			iscsi_session_reconnect(is);
1115			return;
1116		}
1117
1118		off += len;
1119		total_len -= len;
1120
1121		if (total_len == 0) {
1122			bhsdo->bhsdo_flags |= BHSDO_FLAGS_F;
1123			//ISCSI_SESSION_DEBUG(is, "setting F, off %zd", off);
1124		} else {
1125			//ISCSI_SESSION_DEBUG(is, "not finished, off %zd", off);
1126		}
1127
1128		iscsi_pdu_queue_locked(request);
1129
1130		if (total_len == 0)
1131			break;
1132	}
1133
1134	icl_pdu_free(response);
1135}
1136
1137static void
1138iscsi_pdu_handle_async_message(struct icl_pdu *response)
1139{
1140	struct iscsi_bhs_asynchronous_message *bhsam;
1141	struct iscsi_session *is;
1142
1143	is = PDU_SESSION(response);
1144	bhsam = (struct iscsi_bhs_asynchronous_message *)response->ip_bhs;
1145	switch (bhsam->bhsam_async_event) {
1146	case BHSAM_EVENT_TARGET_REQUESTS_LOGOUT:
1147		ISCSI_SESSION_WARN(is, "target requests logout; removing session");
1148		iscsi_session_logout(is);
1149		iscsi_session_terminate(is);
1150		break;
1151	case BHSAM_EVENT_TARGET_TERMINATES_CONNECTION:
1152		ISCSI_SESSION_WARN(is, "target indicates it will drop drop the connection");
1153		break;
1154	case BHSAM_EVENT_TARGET_TERMINATES_SESSION:
1155		ISCSI_SESSION_WARN(is, "target indicates it will drop drop the session");
1156		break;
1157	default:
1158		/*
1159		 * XXX: Technically, we're obligated to also handle
1160		 * 	parameter renegotiation.
1161		 */
1162		ISCSI_SESSION_WARN(is, "ignoring AsyncEvent %d", bhsam->bhsam_async_event);
1163		break;
1164	}
1165
1166	icl_pdu_free(response);
1167}
1168
1169static void
1170iscsi_pdu_handle_reject(struct icl_pdu *response)
1171{
1172	struct iscsi_bhs_reject *bhsr;
1173	struct iscsi_session *is;
1174
1175	is = PDU_SESSION(response);
1176	bhsr = (struct iscsi_bhs_reject *)response->ip_bhs;
1177	ISCSI_SESSION_WARN(is, "received Reject PDU, reason 0x%x; protocol error?",
1178	    bhsr->bhsr_reason);
1179
1180	icl_pdu_free(response);
1181}
1182
1183static int
1184iscsi_ioctl_daemon_wait(struct iscsi_softc *sc,
1185    struct iscsi_daemon_request *request)
1186{
1187	struct iscsi_session *is;
1188	int error;
1189
1190	sx_slock(&sc->sc_lock);
1191	for (;;) {
1192		TAILQ_FOREACH(is, &sc->sc_sessions, is_next) {
1193			if (is->is_waiting_for_iscsid)
1194				break;
1195		}
1196
1197		if (is == NULL) {
1198			/*
1199			 * No session requires attention from iscsid(8); wait.
1200			 */
1201			error = cv_wait_sig(&sc->sc_cv, &sc->sc_lock);
1202			if (error != 0) {
1203				sx_sunlock(&sc->sc_lock);
1204				return (error);
1205			}
1206			continue;
1207		}
1208
1209		ISCSI_SESSION_LOCK(is);
1210		is->is_waiting_for_iscsid = false;
1211		is->is_login_phase = true;
1212		is->is_reason[0] = '\0';
1213		ISCSI_SESSION_UNLOCK(is);
1214
1215		request->idr_session_id = is->is_id;
1216		memcpy(&request->idr_conf, &is->is_conf,
1217		    sizeof(request->idr_conf));
1218
1219		sx_sunlock(&sc->sc_lock);
1220		return (0);
1221	}
1222}
1223
1224static int
1225iscsi_ioctl_daemon_handoff(struct iscsi_softc *sc,
1226    struct iscsi_daemon_handoff *handoff)
1227{
1228	struct iscsi_session *is;
1229	int error;
1230
1231	sx_slock(&sc->sc_lock);
1232
1233	/*
1234	 * Find the session to hand off socket to.
1235	 */
1236	TAILQ_FOREACH(is, &sc->sc_sessions, is_next) {
1237		if (is->is_id == handoff->idh_session_id)
1238			break;
1239	}
1240	if (is == NULL) {
1241		sx_sunlock(&sc->sc_lock);
1242		return (ESRCH);
1243	}
1244	ISCSI_SESSION_LOCK(is);
1245	if (is->is_conf.isc_discovery || is->is_terminating) {
1246		ISCSI_SESSION_UNLOCK(is);
1247		sx_sunlock(&sc->sc_lock);
1248		return (EINVAL);
1249	}
1250	if (is->is_connected) {
1251		/*
1252		 * This might have happened because another iscsid(8)
1253		 * instance handed off the connection in the meantime.
1254		 * Just return.
1255		 */
1256		ISCSI_SESSION_WARN(is, "handoff on already connected "
1257		    "session");
1258		ISCSI_SESSION_UNLOCK(is);
1259		sx_sunlock(&sc->sc_lock);
1260		return (EBUSY);
1261	}
1262
1263	strlcpy(is->is_target_alias, handoff->idh_target_alias,
1264	    sizeof(is->is_target_alias));
1265	memcpy(is->is_isid, handoff->idh_isid, sizeof(is->is_isid));
1266	is->is_statsn = handoff->idh_statsn;
1267	is->is_initial_r2t = handoff->idh_initial_r2t;
1268	is->is_immediate_data = handoff->idh_immediate_data;
1269	is->is_max_data_segment_length = handoff->idh_max_data_segment_length;
1270	is->is_max_burst_length = handoff->idh_max_burst_length;
1271	is->is_first_burst_length = handoff->idh_first_burst_length;
1272
1273	if (handoff->idh_header_digest == ISCSI_DIGEST_CRC32C)
1274		is->is_conn->ic_header_crc32c = true;
1275	else
1276		is->is_conn->ic_header_crc32c = false;
1277	if (handoff->idh_data_digest == ISCSI_DIGEST_CRC32C)
1278		is->is_conn->ic_data_crc32c = true;
1279	else
1280		is->is_conn->ic_data_crc32c = false;
1281
1282	is->is_cmdsn = 0;
1283	is->is_expcmdsn = 0;
1284	is->is_maxcmdsn = 0;
1285	is->is_waiting_for_iscsid = false;
1286	is->is_login_phase = false;
1287	is->is_timeout = 0;
1288	is->is_connected = true;
1289	is->is_reason[0] = '\0';
1290
1291	ISCSI_SESSION_UNLOCK(is);
1292
1293#ifndef ICL_KERNEL_PROXY
1294	error = icl_conn_handoff(is->is_conn, handoff->idh_socket);
1295	if (error != 0) {
1296		sx_sunlock(&sc->sc_lock);
1297		iscsi_session_terminate(is);
1298		return (error);
1299	}
1300#endif
1301
1302	sx_sunlock(&sc->sc_lock);
1303
1304	if (is->is_sim != NULL) {
1305		/*
1306		 * When reconnecting, there already is SIM allocated for the session.
1307		 */
1308		KASSERT(is->is_simq_frozen, ("reconnect without frozen simq"));
1309		ISCSI_SESSION_LOCK(is);
1310		ISCSI_SESSION_DEBUG(is, "releasing");
1311		xpt_release_simq(is->is_sim, 1);
1312		is->is_simq_frozen = false;
1313		ISCSI_SESSION_UNLOCK(is);
1314
1315	} else {
1316		ISCSI_SESSION_LOCK(is);
1317		is->is_devq = cam_simq_alloc(maxtags);
1318		if (is->is_devq == NULL) {
1319			ISCSI_SESSION_WARN(is, "failed to allocate simq");
1320			iscsi_session_terminate(is);
1321			return (ENOMEM);
1322		}
1323
1324		is->is_sim = cam_sim_alloc(iscsi_action, iscsi_poll, "iscsi",
1325		    is, is->is_id /* unit */, &is->is_lock,
1326		    maxtags, maxtags, is->is_devq);
1327		if (is->is_sim == NULL) {
1328			ISCSI_SESSION_UNLOCK(is);
1329			ISCSI_SESSION_WARN(is, "failed to allocate SIM");
1330			cam_simq_free(is->is_devq);
1331			iscsi_session_terminate(is);
1332			return (ENOMEM);
1333		}
1334
1335		error = xpt_bus_register(is->is_sim, NULL, 0);
1336		if (error != 0) {
1337			ISCSI_SESSION_UNLOCK(is);
1338			ISCSI_SESSION_WARN(is, "failed to register bus");
1339			iscsi_session_terminate(is);
1340			return (ENOMEM);
1341		}
1342
1343		error = xpt_create_path(&is->is_path, /*periph*/NULL,
1344		    cam_sim_path(is->is_sim), CAM_TARGET_WILDCARD,
1345		    CAM_LUN_WILDCARD);
1346		if (error != CAM_REQ_CMP) {
1347			ISCSI_SESSION_UNLOCK(is);
1348			ISCSI_SESSION_WARN(is, "failed to create path");
1349			iscsi_session_terminate(is);
1350			return (ENOMEM);
1351		}
1352		ISCSI_SESSION_UNLOCK(is);
1353	}
1354
1355	return (0);
1356}
1357
1358static int
1359iscsi_ioctl_daemon_fail(struct iscsi_softc *sc,
1360    struct iscsi_daemon_fail *fail)
1361{
1362	struct iscsi_session *is;
1363
1364	sx_slock(&sc->sc_lock);
1365
1366	TAILQ_FOREACH(is, &sc->sc_sessions, is_next) {
1367		if (is->is_id == fail->idf_session_id)
1368			break;
1369	}
1370	if (is == NULL) {
1371		sx_sunlock(&sc->sc_lock);
1372		return (ESRCH);
1373	}
1374	ISCSI_SESSION_LOCK(is);
1375	ISCSI_SESSION_DEBUG(is, "iscsid(8) failed: %s",
1376	    fail->idf_reason);
1377	strlcpy(is->is_reason, fail->idf_reason, sizeof(is->is_reason));
1378	//is->is_waiting_for_iscsid = false;
1379	//is->is_login_phase = true;
1380	//iscsi_session_reconnect(is);
1381	ISCSI_SESSION_UNLOCK(is);
1382	sx_sunlock(&sc->sc_lock);
1383
1384	return (0);
1385}
1386
1387#ifdef ICL_KERNEL_PROXY
1388static int
1389iscsi_ioctl_daemon_connect(struct iscsi_softc *sc,
1390    struct iscsi_daemon_connect *idc)
1391{
1392	struct iscsi_session *is;
1393	struct sockaddr *from_sa, *to_sa;
1394	int error;
1395
1396	sx_slock(&sc->sc_lock);
1397	TAILQ_FOREACH(is, &sc->sc_sessions, is_next) {
1398		if (is->is_id == idc->idc_session_id)
1399			break;
1400	}
1401	if (is == NULL) {
1402		sx_sunlock(&sc->sc_lock);
1403		return (ESRCH);
1404	}
1405	sx_sunlock(&sc->sc_lock);
1406
1407	if (idc->idc_from_addrlen > 0) {
1408		error = getsockaddr(&from_sa, (void *)idc->idc_from_addr, idc->idc_from_addrlen);
1409		if (error != 0)
1410			return (error);
1411	} else {
1412		from_sa = NULL;
1413	}
1414	error = getsockaddr(&to_sa, (void *)idc->idc_to_addr, idc->idc_to_addrlen);
1415	if (error != 0) {
1416		free(from_sa, M_SONAME);
1417		return (error);
1418	}
1419
1420	ISCSI_SESSION_LOCK(is);
1421	is->is_waiting_for_iscsid = false;
1422	is->is_login_phase = true;
1423	is->is_timeout = 0;
1424	ISCSI_SESSION_UNLOCK(is);
1425
1426	error = icl_conn_connect(is->is_conn, idc->idc_iser, idc->idc_domain,
1427	    idc->idc_socktype, idc->idc_protocol, from_sa, to_sa);
1428	free(from_sa, M_SONAME);
1429	free(to_sa, M_SONAME);
1430
1431	/*
1432	 * Digests are always disabled during login phase.
1433	 */
1434	is->is_conn->ic_header_crc32c = false;
1435	is->is_conn->ic_data_crc32c = false;
1436
1437	return (error);
1438}
1439
1440static int
1441iscsi_ioctl_daemon_send(struct iscsi_softc *sc,
1442    struct iscsi_daemon_send *ids)
1443{
1444	struct iscsi_session *is;
1445	struct icl_pdu *ip;
1446	size_t datalen;
1447	void *data;
1448	int error;
1449
1450	sx_slock(&sc->sc_lock);
1451	TAILQ_FOREACH(is, &sc->sc_sessions, is_next) {
1452		if (is->is_id == ids->ids_session_id)
1453			break;
1454	}
1455	if (is == NULL) {
1456		sx_sunlock(&sc->sc_lock);
1457		return (ESRCH);
1458	}
1459	sx_sunlock(&sc->sc_lock);
1460
1461	if (is->is_login_phase == false)
1462		return (EBUSY);
1463
1464	if (is->is_terminating || is->is_reconnecting)
1465		return (EIO);
1466
1467	datalen = ids->ids_data_segment_len;
1468	if (datalen > ISCSI_MAX_DATA_SEGMENT_LENGTH)
1469		return (EINVAL);
1470	if (datalen > 0) {
1471		data = malloc(datalen, M_ISCSI, M_WAITOK);
1472		error = copyin(ids->ids_data_segment, data, datalen);
1473		if (error != 0) {
1474			free(data, M_ISCSI);
1475			return (error);
1476		}
1477	}
1478
1479	ip = icl_pdu_new_bhs(is->is_conn, M_WAITOK);
1480	memcpy(ip->ip_bhs, ids->ids_bhs, sizeof(*ip->ip_bhs));
1481	if (datalen > 0) {
1482		error = icl_pdu_append_data(ip, data, datalen, M_WAITOK);
1483		KASSERT(error == 0, ("icl_pdu_append_data(..., M_WAITOK) failed"));
1484		free(data, M_ISCSI);
1485	}
1486	icl_pdu_queue(ip);
1487
1488	return (0);
1489}
1490
1491static int
1492iscsi_ioctl_daemon_receive(struct iscsi_softc *sc,
1493    struct iscsi_daemon_receive *idr)
1494{
1495	struct iscsi_session *is;
1496	struct icl_pdu *ip;
1497	void *data;
1498
1499	sx_slock(&sc->sc_lock);
1500	TAILQ_FOREACH(is, &sc->sc_sessions, is_next) {
1501		if (is->is_id == idr->idr_session_id)
1502			break;
1503	}
1504	if (is == NULL) {
1505		sx_sunlock(&sc->sc_lock);
1506		return (ESRCH);
1507	}
1508	sx_sunlock(&sc->sc_lock);
1509
1510	if (is->is_login_phase == false)
1511		return (EBUSY);
1512
1513	ISCSI_SESSION_LOCK(is);
1514	while (is->is_login_pdu == NULL &&
1515	    is->is_terminating == false &&
1516	    is->is_reconnecting == false)
1517		cv_wait(&is->is_login_cv, &is->is_lock);
1518	if (is->is_terminating || is->is_reconnecting) {
1519		ISCSI_SESSION_UNLOCK(is);
1520		return (EIO);
1521	}
1522	ip = is->is_login_pdu;
1523	is->is_login_pdu = NULL;
1524	ISCSI_SESSION_UNLOCK(is);
1525
1526	if (ip->ip_data_len > idr->idr_data_segment_len) {
1527		icl_pdu_free(ip);
1528		return (EMSGSIZE);
1529	}
1530
1531	copyout(ip->ip_bhs, idr->idr_bhs, sizeof(*ip->ip_bhs));
1532	if (ip->ip_data_len > 0) {
1533		data = malloc(ip->ip_data_len, M_ISCSI, M_WAITOK);
1534		icl_pdu_get_data(ip, 0, data, ip->ip_data_len);
1535		copyout(data, idr->idr_data_segment, ip->ip_data_len);
1536		free(data, M_ISCSI);
1537	}
1538
1539	icl_pdu_free(ip);
1540
1541	return (0);
1542}
1543
1544static int
1545iscsi_ioctl_daemon_close(struct iscsi_softc *sc,
1546    struct iscsi_daemon_close *idc)
1547{
1548	struct iscsi_session *is;
1549
1550	sx_slock(&sc->sc_lock);
1551	TAILQ_FOREACH(is, &sc->sc_sessions, is_next) {
1552		if (is->is_id == idc->idc_session_id)
1553			break;
1554	}
1555	if (is == NULL) {
1556		sx_sunlock(&sc->sc_lock);
1557		return (ESRCH);
1558	}
1559	sx_sunlock(&sc->sc_lock);
1560
1561	iscsi_session_reconnect(is);
1562
1563	return (0);
1564}
1565#endif /* ICL_KERNEL_PROXY */
1566
1567static void
1568iscsi_sanitize_session_conf(struct iscsi_session_conf *isc)
1569{
1570	/*
1571	 * Just make sure all the fields are null-terminated.
1572	 *
1573	 * XXX: This is not particularly secure.  We should
1574	 * 	create our own conf and then copy in relevant
1575	 * 	fields.
1576	 */
1577	isc->isc_initiator[ISCSI_NAME_LEN - 1] = '\0';
1578	isc->isc_initiator_addr[ISCSI_ADDR_LEN - 1] = '\0';
1579	isc->isc_initiator_alias[ISCSI_ALIAS_LEN - 1] = '\0';
1580	isc->isc_target[ISCSI_NAME_LEN - 1] = '\0';
1581	isc->isc_target_addr[ISCSI_ADDR_LEN - 1] = '\0';
1582	isc->isc_user[ISCSI_NAME_LEN - 1] = '\0';
1583	isc->isc_secret[ISCSI_SECRET_LEN - 1] = '\0';
1584	isc->isc_mutual_user[ISCSI_NAME_LEN - 1] = '\0';
1585	isc->isc_mutual_secret[ISCSI_SECRET_LEN - 1] = '\0';
1586}
1587
1588static int
1589iscsi_ioctl_session_add(struct iscsi_softc *sc, struct iscsi_session_add *isa)
1590{
1591	struct iscsi_session *is;
1592	const struct iscsi_session *is2;
1593	int error;
1594
1595	iscsi_sanitize_session_conf(&isa->isa_conf);
1596
1597	is = malloc(sizeof(*is), M_ISCSI, M_ZERO | M_WAITOK);
1598	memcpy(&is->is_conf, &isa->isa_conf, sizeof(is->is_conf));
1599
1600	if (is->is_conf.isc_initiator[0] == '\0' ||
1601	    is->is_conf.isc_target_addr[0] == '\0') {
1602		free(is, M_ISCSI);
1603		return (EINVAL);
1604	}
1605
1606	if ((is->is_conf.isc_discovery != 0 && is->is_conf.isc_target[0] != 0) ||
1607	    (is->is_conf.isc_discovery == 0 && is->is_conf.isc_target[0] == 0)) {
1608		free(is, M_ISCSI);
1609		return (EINVAL);
1610	}
1611
1612	sx_xlock(&sc->sc_lock);
1613
1614	/*
1615	 * Prevent duplicates.
1616	 */
1617	TAILQ_FOREACH(is2, &sc->sc_sessions, is_next) {
1618		if (!!is->is_conf.isc_discovery !=
1619		    !!is2->is_conf.isc_discovery)
1620			continue;
1621
1622		if (strcmp(is->is_conf.isc_target_addr,
1623		    is2->is_conf.isc_target_addr) != 0)
1624			continue;
1625
1626		if (is->is_conf.isc_discovery == 0 &&
1627		    strcmp(is->is_conf.isc_target,
1628		    is2->is_conf.isc_target) != 0)
1629			continue;
1630
1631		sx_xunlock(&sc->sc_lock);
1632		free(is, M_ISCSI);
1633		return (EBUSY);
1634	}
1635
1636	is->is_conn = icl_conn_new("iscsi", &is->is_lock);
1637	is->is_conn->ic_receive = iscsi_receive_callback;
1638	is->is_conn->ic_error = iscsi_error_callback;
1639	is->is_conn->ic_prv0 = is;
1640	TAILQ_INIT(&is->is_outstanding);
1641	TAILQ_INIT(&is->is_postponed);
1642	mtx_init(&is->is_lock, "iscsi_lock", NULL, MTX_DEF);
1643	cv_init(&is->is_maintenance_cv, "iscsi_mt");
1644#ifdef ICL_KERNEL_PROXY
1645	cv_init(&is->is_login_cv, "iscsi_login");
1646#endif
1647
1648	is->is_softc = sc;
1649	sc->sc_last_session_id++;
1650	is->is_id = sc->sc_last_session_id;
1651	callout_init(&is->is_callout, 1);
1652	callout_reset(&is->is_callout, 1 * hz, iscsi_callout, is);
1653	TAILQ_INSERT_TAIL(&sc->sc_sessions, is, is_next);
1654
1655	error = kthread_add(iscsi_maintenance_thread, is, NULL, NULL, 0, 0, "iscsimt");
1656	if (error != 0) {
1657		ISCSI_SESSION_WARN(is, "kthread_add(9) failed with error %d", error);
1658		return (error);
1659	}
1660
1661	/*
1662	 * Trigger immediate reconnection.
1663	 */
1664	is->is_waiting_for_iscsid = true;
1665	strlcpy(is->is_reason, "Waiting for iscsid(8)", sizeof(is->is_reason));
1666	cv_signal(&sc->sc_cv);
1667
1668	sx_xunlock(&sc->sc_lock);
1669
1670	return (0);
1671}
1672
1673static bool
1674iscsi_session_conf_matches(unsigned int id1, const struct iscsi_session_conf *c1,
1675    unsigned int id2, const struct iscsi_session_conf *c2)
1676{
1677	if (id2 == 0 && c2->isc_target[0] == '\0' &&
1678	    c2->isc_target_addr[0] == '\0')
1679		return (true);
1680	if (id2 != 0 && id2 == id1)
1681		return (true);
1682	if (c2->isc_target[0] != '\0' &&
1683	    strcmp(c1->isc_target, c2->isc_target) == 0)
1684		return (true);
1685	if (c2->isc_target_addr[0] != '\0' &&
1686	    strcmp(c1->isc_target_addr, c2->isc_target_addr) == 0)
1687		return (true);
1688	return (false);
1689}
1690
1691static int
1692iscsi_ioctl_session_remove(struct iscsi_softc *sc,
1693    struct iscsi_session_remove *isr)
1694{
1695	struct iscsi_session *is, *tmp;
1696	bool found = false;
1697
1698	iscsi_sanitize_session_conf(&isr->isr_conf);
1699
1700	sx_xlock(&sc->sc_lock);
1701	TAILQ_FOREACH_SAFE(is, &sc->sc_sessions, is_next, tmp) {
1702		ISCSI_SESSION_LOCK(is);
1703		if (iscsi_session_conf_matches(is->is_id, &is->is_conf,
1704		    isr->isr_session_id, &isr->isr_conf)) {
1705			found = true;
1706			iscsi_session_logout(is);
1707			iscsi_session_terminate(is);
1708		}
1709		ISCSI_SESSION_UNLOCK(is);
1710	}
1711	sx_xunlock(&sc->sc_lock);
1712
1713	if (!found)
1714		return (ESRCH);
1715
1716	return (0);
1717}
1718
1719static int
1720iscsi_ioctl_session_list(struct iscsi_softc *sc, struct iscsi_session_list *isl)
1721{
1722	int error;
1723	unsigned int i = 0;
1724	struct iscsi_session *is;
1725	struct iscsi_session_state iss;
1726
1727	sx_slock(&sc->sc_lock);
1728	TAILQ_FOREACH(is, &sc->sc_sessions, is_next) {
1729		if (i >= isl->isl_nentries) {
1730			sx_sunlock(&sc->sc_lock);
1731			return (EMSGSIZE);
1732		}
1733		memset(&iss, 0, sizeof(iss));
1734		memcpy(&iss.iss_conf, &is->is_conf, sizeof(iss.iss_conf));
1735		iss.iss_id = is->is_id;
1736		strlcpy(iss.iss_target_alias, is->is_target_alias, sizeof(iss.iss_target_alias));
1737		strlcpy(iss.iss_reason, is->is_reason, sizeof(iss.iss_reason));
1738
1739		if (is->is_conn->ic_header_crc32c)
1740			iss.iss_header_digest = ISCSI_DIGEST_CRC32C;
1741		else
1742			iss.iss_header_digest = ISCSI_DIGEST_NONE;
1743
1744		if (is->is_conn->ic_data_crc32c)
1745			iss.iss_data_digest = ISCSI_DIGEST_CRC32C;
1746		else
1747			iss.iss_data_digest = ISCSI_DIGEST_NONE;
1748
1749		iss.iss_max_data_segment_length = is->is_max_data_segment_length;
1750		iss.iss_immediate_data = is->is_immediate_data;
1751		iss.iss_connected = is->is_connected;
1752
1753		error = copyout(&iss, isl->isl_pstates + i, sizeof(iss));
1754		if (error != 0) {
1755			sx_sunlock(&sc->sc_lock);
1756			return (error);
1757		}
1758		i++;
1759	}
1760	sx_sunlock(&sc->sc_lock);
1761
1762	isl->isl_nentries = i;
1763
1764	return (0);
1765}
1766
1767static int
1768iscsi_ioctl(struct cdev *dev, u_long cmd, caddr_t arg, int mode,
1769    struct thread *td)
1770{
1771	struct iscsi_softc *sc;
1772
1773	sc = dev->si_drv1;
1774
1775	switch (cmd) {
1776	case ISCSIDWAIT:
1777		return (iscsi_ioctl_daemon_wait(sc,
1778		    (struct iscsi_daemon_request *)arg));
1779	case ISCSIDHANDOFF:
1780		return (iscsi_ioctl_daemon_handoff(sc,
1781		    (struct iscsi_daemon_handoff *)arg));
1782	case ISCSIDFAIL:
1783		return (iscsi_ioctl_daemon_fail(sc,
1784		    (struct iscsi_daemon_fail *)arg));
1785#ifdef ICL_KERNEL_PROXY
1786	case ISCSIDCONNECT:
1787		return (iscsi_ioctl_daemon_connect(sc,
1788		    (struct iscsi_daemon_connect *)arg));
1789	case ISCSIDSEND:
1790		return (iscsi_ioctl_daemon_send(sc,
1791		    (struct iscsi_daemon_send *)arg));
1792	case ISCSIDRECEIVE:
1793		return (iscsi_ioctl_daemon_receive(sc,
1794		    (struct iscsi_daemon_receive *)arg));
1795	case ISCSIDCLOSE:
1796		return (iscsi_ioctl_daemon_close(sc,
1797		    (struct iscsi_daemon_close *)arg));
1798#endif /* ICL_KERNEL_PROXY */
1799	case ISCSISADD:
1800		return (iscsi_ioctl_session_add(sc,
1801		    (struct iscsi_session_add *)arg));
1802	case ISCSISREMOVE:
1803		return (iscsi_ioctl_session_remove(sc,
1804		    (struct iscsi_session_remove *)arg));
1805	case ISCSISLIST:
1806		return (iscsi_ioctl_session_list(sc,
1807		    (struct iscsi_session_list *)arg));
1808	default:
1809		return (EINVAL);
1810	}
1811}
1812
1813static uint64_t
1814iscsi_encode_lun(uint32_t lun)
1815{
1816	uint8_t encoded[8];
1817	uint64_t result;
1818
1819	memset(encoded, 0, sizeof(encoded));
1820
1821	if (lun < 256) {
1822		/*
1823		 * Peripheral device addressing.
1824		 */
1825		encoded[1] = lun;
1826	} else if (lun < 16384) {
1827		/*
1828		 * Flat space addressing.
1829		 */
1830		encoded[0] = 0x40;
1831		encoded[0] |= (lun >> 8) & 0x3f;
1832		encoded[1] = lun & 0xff;
1833	} else {
1834		/*
1835		 * Extended flat space addressing.
1836		 */
1837		encoded[0] = 0xd2;
1838		encoded[1] = lun >> 16;
1839		encoded[2] = lun >> 8;
1840		encoded[3] = lun;
1841	}
1842
1843	memcpy(&result, encoded, sizeof(result));
1844	return (result);
1845}
1846
1847static struct iscsi_outstanding *
1848iscsi_outstanding_find(struct iscsi_session *is, uint32_t initiator_task_tag)
1849{
1850	struct iscsi_outstanding *io;
1851
1852	ISCSI_SESSION_LOCK_ASSERT(is);
1853
1854	TAILQ_FOREACH(io, &is->is_outstanding, io_next) {
1855		if (io->io_initiator_task_tag == initiator_task_tag)
1856			return (io);
1857	}
1858	return (NULL);
1859}
1860
1861static int
1862iscsi_outstanding_add(struct iscsi_session *is,
1863    uint32_t initiator_task_tag, union ccb *ccb)
1864{
1865	struct iscsi_outstanding *io;
1866
1867	ISCSI_SESSION_LOCK_ASSERT(is);
1868
1869	KASSERT(iscsi_outstanding_find(is, initiator_task_tag) == NULL,
1870	    ("initiator_task_tag 0x%x already added", initiator_task_tag));
1871
1872	io = uma_zalloc(iscsi_outstanding_zone, M_NOWAIT | M_ZERO);
1873	if (io == NULL) {
1874		ISCSI_SESSION_WARN(is, "failed to allocate %zd bytes", sizeof(*io));
1875		return (ENOMEM);
1876	}
1877	io->io_initiator_task_tag = initiator_task_tag;
1878	io->io_ccb = ccb;
1879	TAILQ_INSERT_TAIL(&is->is_outstanding, io, io_next);
1880	return (0);
1881}
1882
1883static void
1884iscsi_outstanding_remove(struct iscsi_session *is, struct iscsi_outstanding *io)
1885{
1886
1887	ISCSI_SESSION_LOCK_ASSERT(is);
1888
1889	TAILQ_REMOVE(&is->is_outstanding, io, io_next);
1890	uma_zfree(iscsi_outstanding_zone, io);
1891}
1892
1893static void
1894iscsi_action_scsiio(struct iscsi_session *is, union ccb *ccb)
1895{
1896	struct icl_pdu *request;
1897	struct iscsi_bhs_scsi_command *bhssc;
1898	struct ccb_scsiio *csio;
1899	size_t len;
1900	int error;
1901
1902	ISCSI_SESSION_LOCK_ASSERT(is);
1903
1904#if 0
1905	KASSERT(is->is_login_phase == false, ("%s called during Login Phase", __func__));
1906#else
1907	if (is->is_login_phase) {
1908		ISCSI_SESSION_DEBUG(is, "called during login phase");
1909		if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) {
1910			xpt_freeze_devq(ccb->ccb_h.path, 1);
1911			ISCSI_SESSION_DEBUG(is, "freezing devq");
1912		}
1913		ccb->ccb_h.status = CAM_REQ_ABORTED | CAM_DEV_QFRZN;
1914		xpt_done(ccb);
1915		return;
1916	}
1917#endif
1918
1919	request = icl_pdu_new_bhs(is->is_conn, M_NOWAIT);
1920	if (request == NULL) {
1921		if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) {
1922			xpt_freeze_devq(ccb->ccb_h.path, 1);
1923			ISCSI_SESSION_DEBUG(is, "freezing devq");
1924		}
1925		ccb->ccb_h.status = CAM_RESRC_UNAVAIL | CAM_DEV_QFRZN;
1926		xpt_done(ccb);
1927		return;
1928	}
1929
1930	csio = &ccb->csio;
1931	bhssc = (struct iscsi_bhs_scsi_command *)request->ip_bhs;
1932	bhssc->bhssc_opcode = ISCSI_BHS_OPCODE_SCSI_COMMAND;
1933	bhssc->bhssc_flags |= BHSSC_FLAGS_F;
1934	switch (csio->ccb_h.flags & CAM_DIR_MASK) {
1935	case CAM_DIR_IN:
1936		bhssc->bhssc_flags |= BHSSC_FLAGS_R;
1937		break;
1938	case CAM_DIR_OUT:
1939		bhssc->bhssc_flags |= BHSSC_FLAGS_W;
1940		break;
1941	}
1942
1943        switch (csio->tag_action) {
1944        case MSG_HEAD_OF_Q_TAG:
1945		bhssc->bhssc_flags |= BHSSC_FLAGS_ATTR_HOQ;
1946		break;
1947                break;
1948        case MSG_ORDERED_Q_TAG:
1949		bhssc->bhssc_flags |= BHSSC_FLAGS_ATTR_ORDERED;
1950                break;
1951        case MSG_ACA_TASK:
1952		bhssc->bhssc_flags |= BHSSC_FLAGS_ATTR_ACA;
1953                break;
1954        case CAM_TAG_ACTION_NONE:
1955        case MSG_SIMPLE_Q_TAG:
1956        default:
1957		bhssc->bhssc_flags |= BHSSC_FLAGS_ATTR_SIMPLE;
1958                break;
1959        }
1960
1961	bhssc->bhssc_lun = iscsi_encode_lun(csio->ccb_h.target_lun);
1962	bhssc->bhssc_initiator_task_tag = is->is_initiator_task_tag;
1963	is->is_initiator_task_tag++;
1964	bhssc->bhssc_expected_data_transfer_length = htonl(csio->dxfer_len);
1965	KASSERT(csio->cdb_len <= sizeof(bhssc->bhssc_cdb),
1966	    ("unsupported CDB size %zd", (size_t)csio->cdb_len));
1967
1968	if (csio->ccb_h.flags & CAM_CDB_POINTER)
1969		memcpy(&bhssc->bhssc_cdb, csio->cdb_io.cdb_ptr, csio->cdb_len);
1970	else
1971		memcpy(&bhssc->bhssc_cdb, csio->cdb_io.cdb_bytes, csio->cdb_len);
1972
1973	error = iscsi_outstanding_add(is, bhssc->bhssc_initiator_task_tag, ccb);
1974	if (error != 0) {
1975		icl_pdu_free(request);
1976		if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) {
1977			xpt_freeze_devq(ccb->ccb_h.path, 1);
1978			ISCSI_SESSION_DEBUG(is, "freezing devq");
1979		}
1980		ccb->ccb_h.status = CAM_RESRC_UNAVAIL | CAM_DEV_QFRZN;
1981		xpt_done(ccb);
1982		return;
1983	}
1984
1985	if (is->is_immediate_data &&
1986	    (csio->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_OUT) {
1987		len = csio->dxfer_len;
1988		//ISCSI_SESSION_DEBUG(is, "adding %zd of immediate data", len);
1989		if (len > is->is_first_burst_length) {
1990			ISCSI_SESSION_DEBUG(is, "len %zd -> %zd", len, is->is_first_burst_length);
1991			len = is->is_first_burst_length;
1992		}
1993
1994		error = icl_pdu_append_data(request, csio->data_ptr, len, M_NOWAIT);
1995		if (error != 0) {
1996			icl_pdu_free(request);
1997			if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) {
1998				xpt_freeze_devq(ccb->ccb_h.path, 1);
1999				ISCSI_SESSION_DEBUG(is, "freezing devq");
2000			}
2001			ccb->ccb_h.status = CAM_RESRC_UNAVAIL | CAM_DEV_QFRZN;
2002			xpt_done(ccb);
2003			return;
2004		}
2005	}
2006	iscsi_pdu_queue_locked(request);
2007}
2008
2009static void
2010iscsi_action(struct cam_sim *sim, union ccb *ccb)
2011{
2012	struct iscsi_session *is;
2013
2014	is = cam_sim_softc(sim);
2015
2016	ISCSI_SESSION_LOCK_ASSERT(is);
2017
2018	if (is->is_terminating) {
2019		ISCSI_SESSION_DEBUG(is, "called during termination");
2020		ccb->ccb_h.status = CAM_DEV_NOT_THERE;
2021		xpt_done(ccb);
2022		return;
2023	}
2024
2025	switch (ccb->ccb_h.func_code) {
2026	case XPT_PATH_INQ:
2027	{
2028		struct ccb_pathinq *cpi = &ccb->cpi;
2029
2030		cpi->version_num = 1;
2031		cpi->hba_inquiry = PI_TAG_ABLE;
2032		cpi->target_sprt = 0;
2033		//cpi->hba_misc = PIM_NOBUSRESET;
2034		cpi->hba_misc = 0;
2035		cpi->hba_eng_cnt = 0;
2036		cpi->max_target = 0;
2037		cpi->max_lun = 255;
2038		//cpi->initiator_id = 0; /* XXX */
2039		cpi->initiator_id = 64; /* XXX */
2040		strlcpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN);
2041		strlcpy(cpi->hba_vid, "iSCSI", HBA_IDLEN);
2042		strlcpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN);
2043		cpi->unit_number = cam_sim_unit(sim);
2044		cpi->bus_id = cam_sim_bus(sim);
2045		cpi->base_transfer_speed = 150000; /* XXX */
2046		cpi->transport = XPORT_ISCSI;
2047		cpi->transport_version = 0;
2048		cpi->protocol = PROTO_SCSI;
2049		cpi->protocol_version = SCSI_REV_SPC3;
2050		cpi->maxio = MAXPHYS;
2051		cpi->ccb_h.status = CAM_REQ_CMP;
2052		break;
2053	}
2054	case XPT_CALC_GEOMETRY:
2055		cam_calc_geometry(&ccb->ccg, /*extended*/1);
2056		ccb->ccb_h.status = CAM_REQ_CMP;
2057		break;
2058#if 0
2059	/*
2060	 * XXX: What's the point?
2061	 */
2062	case XPT_RESET_BUS:
2063	case XPT_ABORT:
2064	case XPT_TERM_IO:
2065		ISCSI_SESSION_DEBUG(is, "faking success for reset, abort, or term_io");
2066		ccb->ccb_h.status = CAM_REQ_CMP;
2067		break;
2068#endif
2069	case XPT_SCSI_IO:
2070		iscsi_action_scsiio(is, ccb);
2071		return;
2072	default:
2073#if 0
2074		ISCSI_SESSION_DEBUG(is, "got unsupported code 0x%x", ccb->ccb_h.func_code);
2075#endif
2076		ccb->ccb_h.status = CAM_FUNC_NOTAVAIL;
2077		break;
2078	}
2079	xpt_done(ccb);
2080}
2081
2082static void
2083iscsi_poll(struct cam_sim *sim)
2084{
2085
2086	KASSERT(0, ("%s: you're not supposed to be here", __func__));
2087}
2088
2089static void
2090iscsi_shutdown(struct iscsi_softc *sc)
2091{
2092	struct iscsi_session *is;
2093
2094	ISCSI_DEBUG("removing all sessions due to shutdown");
2095
2096	sx_slock(&sc->sc_lock);
2097	TAILQ_FOREACH(is, &sc->sc_sessions, is_next)
2098		iscsi_session_terminate(is);
2099	sx_sunlock(&sc->sc_lock);
2100}
2101
2102static int
2103iscsi_load(void)
2104{
2105	int error;
2106
2107	sc = malloc(sizeof(*sc), M_ISCSI, M_ZERO | M_WAITOK);
2108	sx_init(&sc->sc_lock, "iscsi");
2109	TAILQ_INIT(&sc->sc_sessions);
2110	cv_init(&sc->sc_cv, "iscsi_cv");
2111
2112	iscsi_outstanding_zone = uma_zcreate("iscsi_outstanding",
2113	    sizeof(struct iscsi_outstanding), NULL, NULL, NULL, NULL,
2114	    UMA_ALIGN_PTR, 0);
2115
2116	error = make_dev_p(MAKEDEV_CHECKNAME, &sc->sc_cdev, &iscsi_cdevsw,
2117	    NULL, UID_ROOT, GID_WHEEL, 0600, "iscsi");
2118	if (error != 0) {
2119		ISCSI_WARN("failed to create device node, error %d", error);
2120		return (error);
2121	}
2122	sc->sc_cdev->si_drv1 = sc;
2123
2124	/*
2125	 * Note that this needs to get run before dashutdown().  Otherwise,
2126	 * when rebooting with iSCSI session with outstanding requests,
2127	 * but disconnected, dashutdown() will hang on cam_periph_runccb().
2128	 */
2129	sc->sc_shutdown_eh = EVENTHANDLER_REGISTER(shutdown_post_sync,
2130	    iscsi_shutdown, sc, SHUTDOWN_PRI_FIRST);
2131
2132	return (0);
2133}
2134
2135static int
2136iscsi_unload(void)
2137{
2138	struct iscsi_session *is, *tmp;
2139
2140	if (sc->sc_cdev != NULL) {
2141		ISCSI_DEBUG("removing device node");
2142		destroy_dev(sc->sc_cdev);
2143		ISCSI_DEBUG("device node removed");
2144	}
2145
2146	if (sc->sc_shutdown_eh != NULL)
2147		EVENTHANDLER_DEREGISTER(shutdown_post_sync, sc->sc_shutdown_eh);
2148
2149	sx_slock(&sc->sc_lock);
2150	TAILQ_FOREACH_SAFE(is, &sc->sc_sessions, is_next, tmp)
2151		iscsi_session_terminate(is);
2152	while(!TAILQ_EMPTY(&sc->sc_sessions)) {
2153		ISCSI_DEBUG("waiting for sessions to terminate");
2154		cv_wait(&sc->sc_cv, &sc->sc_lock);
2155	}
2156	ISCSI_DEBUG("all sessions terminated");
2157	sx_sunlock(&sc->sc_lock);
2158
2159	uma_zdestroy(iscsi_outstanding_zone);
2160	sx_destroy(&sc->sc_lock);
2161	cv_destroy(&sc->sc_cv);
2162	free(sc, M_ISCSI);
2163	return (0);
2164}
2165
2166static int
2167iscsi_quiesce(void)
2168{
2169	sx_slock(&sc->sc_lock);
2170	if (!TAILQ_EMPTY(&sc->sc_sessions)) {
2171		sx_sunlock(&sc->sc_lock);
2172		return (EBUSY);
2173	}
2174	sx_sunlock(&sc->sc_lock);
2175	return (0);
2176}
2177
2178static int
2179iscsi_modevent(module_t mod, int what, void *arg)
2180{
2181	int error;
2182
2183	switch (what) {
2184	case MOD_LOAD:
2185		error = iscsi_load();
2186		break;
2187	case MOD_UNLOAD:
2188		error = iscsi_unload();
2189		break;
2190	case MOD_QUIESCE:
2191		error = iscsi_quiesce();
2192		break;
2193	default:
2194		error = EINVAL;
2195		break;
2196	}
2197	return (error);
2198}
2199
2200moduledata_t iscsi_data = {
2201	"iscsi",
2202	iscsi_modevent,
2203	0
2204};
2205
2206DECLARE_MODULE(iscsi, iscsi_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE);
2207MODULE_DEPEND(iscsi, cam, 1, 1, 1);
2208MODULE_DEPEND(iscsi, icl, 1, 1, 1);
2209