icl.c revision 273313
155505Sshin/*-
255505Sshin * Copyright (c) 2012 The FreeBSD Foundation
355505Sshin * All rights reserved.
455505Sshin *
555505Sshin * This software was developed by Edward Tomasz Napierala under sponsorship
655505Sshin * from the FreeBSD Foundation.
755505Sshin *
855505Sshin * Redistribution and use in source and binary forms, with or without
955505Sshin * modification, are permitted provided that the following conditions
1055505Sshin * are met:
1155505Sshin * 1. Redistributions of source code must retain the above copyright
1255505Sshin *    notice, this list of conditions and the following disclaimer.
1355505Sshin * 2. Redistributions in binary form must reproduce the above copyright
1455505Sshin *    notice, this list of conditions and the following disclaimer in the
1555505Sshin *    documentation and/or other materials provided with the distribution.
1655505Sshin *
1755505Sshin * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1855505Sshin * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1955505Sshin * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2055505Sshin * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
2155505Sshin * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2255505Sshin * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2355505Sshin * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2455505Sshin * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2555505Sshin * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2655505Sshin * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2755505Sshin * SUCH DAMAGE.
2855505Sshin *
2955505Sshin */
3056976Sshin
3156976Sshin/*
3255505Sshin * iSCSI Common Layer.  It's used by both the initiator and target to send
3359266Ssteve * and receive iSCSI PDUs.
3459266Ssteve */
3559266Ssteve
3659266Ssteve#include <sys/cdefs.h>
3755505Sshin__FBSDID("$FreeBSD: stable/10/sys/dev/iscsi/icl.c 273313 2014-10-20 07:35:46Z mav $");
3855505Sshin
3955505Sshin#include <sys/param.h>
4055505Sshin#include <sys/capability.h>
4155505Sshin#include <sys/condvar.h>
4255505Sshin#include <sys/conf.h>
4355505Sshin#include <sys/file.h>
4458906Sshin#include <sys/kernel.h>
4558906Sshin#include <sys/kthread.h>
4655505Sshin#include <sys/lock.h>
4755505Sshin#include <sys/mbuf.h>
4855505Sshin#include <sys/mutex.h>
4955505Sshin#include <sys/module.h>
5055505Sshin#include <sys/protosw.h>
5155505Sshin#include <sys/socket.h>
5255505Sshin#include <sys/socketvar.h>
5355505Sshin#include <sys/sysctl.h>
5455505Sshin#include <sys/systm.h>
55#include <sys/sx.h>
56#include <sys/uio.h>
57#include <vm/uma.h>
58#include <netinet/in.h>
59#include <netinet/tcp.h>
60
61#include <dev/iscsi/icl.h>
62#include <dev/iscsi/iscsi_proto.h>
63
64SYSCTL_NODE(_kern, OID_AUTO, icl, CTLFLAG_RD, 0, "iSCSI Common Layer");
65static int debug = 1;
66TUNABLE_INT("kern.icl.debug", &debug);
67SYSCTL_INT(_kern_icl, OID_AUTO, debug, CTLFLAG_RWTUN,
68    &debug, 0, "Enable debug messages");
69static int coalesce = 1;
70TUNABLE_INT("kern.icl.coalesce", &coalesce);
71SYSCTL_INT(_kern_icl, OID_AUTO, coalesce, CTLFLAG_RWTUN,
72    &coalesce, 0, "Try to coalesce PDUs before sending");
73static int partial_receive_len = 128 * 1024;
74TUNABLE_INT("kern.icl.partial_receive_len", &partial_receive_len);
75SYSCTL_INT(_kern_icl, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN,
76    &partial_receive_len, 0, "Minimum read size for partially received "
77    "data segment");
78static int sendspace = 1048576;
79TUNABLE_INT("kern.icl.sendspace", &sendspace);
80SYSCTL_INT(_kern_icl, OID_AUTO, sendspace, CTLFLAG_RWTUN,
81    &sendspace, 0, "Default send socket buffer size");
82static int recvspace = 1048576;
83TUNABLE_INT("kern.icl.recvspace", &recvspace);
84SYSCTL_INT(_kern_icl, OID_AUTO, recvspace, CTLFLAG_RWTUN,
85    &recvspace, 0, "Default receive socket buffer size");
86
87static uma_zone_t icl_conn_zone;
88static uma_zone_t icl_pdu_zone;
89
90static volatile u_int	icl_ncons;
91
92#define	ICL_DEBUG(X, ...)						\
93	do {								\
94		if (debug > 1)						\
95			printf("%s: " X "\n", __func__, ## __VA_ARGS__);\
96	} while (0)
97
98#define	ICL_WARN(X, ...)						\
99	do {								\
100		if (debug > 0) {					\
101			printf("WARNING: %s: " X "\n",			\
102			    __func__, ## __VA_ARGS__);			\
103		}							\
104	} while (0)
105
106#define ICL_CONN_LOCK(X)		mtx_lock(X->ic_lock)
107#define ICL_CONN_UNLOCK(X)		mtx_unlock(X->ic_lock)
108#define ICL_CONN_LOCK_ASSERT(X)		mtx_assert(X->ic_lock, MA_OWNED)
109#define ICL_CONN_LOCK_ASSERT_NOT(X)	mtx_assert(X->ic_lock, MA_NOTOWNED)
110
111STAILQ_HEAD(icl_pdu_stailq, icl_pdu);
112
113static void
114icl_conn_fail(struct icl_conn *ic)
115{
116	if (ic->ic_socket == NULL)
117		return;
118
119	/*
120	 * XXX
121	 */
122	ic->ic_socket->so_error = EDOOFUS;
123	(ic->ic_error)(ic);
124}
125
126static struct mbuf *
127icl_conn_receive(struct icl_conn *ic, size_t len)
128{
129	struct uio uio;
130	struct socket *so;
131	struct mbuf *m;
132	int error, flags;
133
134	so = ic->ic_socket;
135
136	memset(&uio, 0, sizeof(uio));
137	uio.uio_resid = len;
138
139	flags = MSG_DONTWAIT;
140	error = soreceive(so, NULL, &uio, &m, NULL, &flags);
141	if (error != 0) {
142		ICL_DEBUG("soreceive error %d", error);
143		return (NULL);
144	}
145	if (uio.uio_resid != 0) {
146		m_freem(m);
147		ICL_DEBUG("short read");
148		return (NULL);
149	}
150
151	return (m);
152}
153
154static struct icl_pdu *
155icl_pdu_new(struct icl_conn *ic, int flags)
156{
157	struct icl_pdu *ip;
158
159#ifdef DIAGNOSTIC
160	refcount_acquire(&ic->ic_outstanding_pdus);
161#endif
162	ip = uma_zalloc(icl_pdu_zone, flags | M_ZERO);
163	if (ip == NULL) {
164		ICL_WARN("failed to allocate %zd bytes", sizeof(*ip));
165#ifdef DIAGNOSTIC
166		refcount_release(&ic->ic_outstanding_pdus);
167#endif
168		return (NULL);
169	}
170
171	ip->ip_conn = ic;
172
173	return (ip);
174}
175
176void
177icl_pdu_free(struct icl_pdu *ip)
178{
179	struct icl_conn *ic;
180
181	ic = ip->ip_conn;
182
183	m_freem(ip->ip_bhs_mbuf);
184	m_freem(ip->ip_ahs_mbuf);
185	m_freem(ip->ip_data_mbuf);
186	uma_zfree(icl_pdu_zone, ip);
187#ifdef DIAGNOSTIC
188	refcount_release(&ic->ic_outstanding_pdus);
189#endif
190}
191
192/*
193 * Allocate icl_pdu with empty BHS to fill up by the caller.
194 */
195struct icl_pdu *
196icl_pdu_new_bhs(struct icl_conn *ic, int flags)
197{
198	struct icl_pdu *ip;
199
200	ip = icl_pdu_new(ic, flags);
201	if (ip == NULL)
202		return (NULL);
203
204	ip->ip_bhs_mbuf = m_getm2(NULL, sizeof(struct iscsi_bhs),
205	    flags, MT_DATA, M_PKTHDR);
206	if (ip->ip_bhs_mbuf == NULL) {
207		ICL_WARN("failed to allocate %zd bytes", sizeof(*ip));
208		icl_pdu_free(ip);
209		return (NULL);
210	}
211	ip->ip_bhs = mtod(ip->ip_bhs_mbuf, struct iscsi_bhs *);
212	memset(ip->ip_bhs, 0, sizeof(struct iscsi_bhs));
213	ip->ip_bhs_mbuf->m_len = sizeof(struct iscsi_bhs);
214
215	return (ip);
216}
217
218static int
219icl_pdu_ahs_length(const struct icl_pdu *request)
220{
221
222	return (request->ip_bhs->bhs_total_ahs_len * 4);
223}
224
225size_t
226icl_pdu_data_segment_length(const struct icl_pdu *request)
227{
228	uint32_t len = 0;
229
230	len += request->ip_bhs->bhs_data_segment_len[0];
231	len <<= 8;
232	len += request->ip_bhs->bhs_data_segment_len[1];
233	len <<= 8;
234	len += request->ip_bhs->bhs_data_segment_len[2];
235
236	return (len);
237}
238
239static void
240icl_pdu_set_data_segment_length(struct icl_pdu *response, uint32_t len)
241{
242
243	response->ip_bhs->bhs_data_segment_len[2] = len;
244	response->ip_bhs->bhs_data_segment_len[1] = len >> 8;
245	response->ip_bhs->bhs_data_segment_len[0] = len >> 16;
246}
247
248static size_t
249icl_pdu_padding(const struct icl_pdu *ip)
250{
251
252	if ((ip->ip_data_len % 4) != 0)
253		return (4 - (ip->ip_data_len % 4));
254
255	return (0);
256}
257
258static size_t
259icl_pdu_size(const struct icl_pdu *response)
260{
261	size_t len;
262
263	KASSERT(response->ip_ahs_len == 0, ("responding with AHS"));
264
265	len = sizeof(struct iscsi_bhs) + response->ip_data_len +
266	    icl_pdu_padding(response);
267	if (response->ip_conn->ic_header_crc32c)
268		len += ISCSI_HEADER_DIGEST_SIZE;
269	if (response->ip_data_len != 0 && response->ip_conn->ic_data_crc32c)
270		len += ISCSI_DATA_DIGEST_SIZE;
271
272	return (len);
273}
274
275static int
276icl_pdu_receive_bhs(struct icl_pdu *request, size_t *availablep)
277{
278	struct mbuf *m;
279
280	m = icl_conn_receive(request->ip_conn, sizeof(struct iscsi_bhs));
281	if (m == NULL) {
282		ICL_DEBUG("failed to receive BHS");
283		return (-1);
284	}
285
286	request->ip_bhs_mbuf = m_pullup(m, sizeof(struct iscsi_bhs));
287	if (request->ip_bhs_mbuf == NULL) {
288		ICL_WARN("m_pullup failed");
289		return (-1);
290	}
291	request->ip_bhs = mtod(request->ip_bhs_mbuf, struct iscsi_bhs *);
292
293	/*
294	 * XXX: For architectures with strict alignment requirements
295	 * 	we may need to allocate ip_bhs and copy the data into it.
296	 * 	For some reason, though, not doing this doesn't seem
297	 * 	to cause problems; tested on sparc64.
298	 */
299
300	*availablep -= sizeof(struct iscsi_bhs);
301	return (0);
302}
303
304static int
305icl_pdu_receive_ahs(struct icl_pdu *request, size_t *availablep)
306{
307
308	request->ip_ahs_len = icl_pdu_ahs_length(request);
309	if (request->ip_ahs_len == 0)
310		return (0);
311
312	request->ip_ahs_mbuf = icl_conn_receive(request->ip_conn,
313	    request->ip_ahs_len);
314	if (request->ip_ahs_mbuf == NULL) {
315		ICL_DEBUG("failed to receive AHS");
316		return (-1);
317	}
318
319	*availablep -= request->ip_ahs_len;
320	return (0);
321}
322
323static uint32_t
324icl_mbuf_to_crc32c(const struct mbuf *m0)
325{
326	uint32_t digest = 0xffffffff;
327	const struct mbuf *m;
328
329	for (m = m0; m != NULL; m = m->m_next)
330		digest = calculate_crc32c(digest,
331		    mtod(m, const void *), m->m_len);
332
333	digest = digest ^ 0xffffffff;
334
335	return (digest);
336}
337
338static int
339icl_pdu_check_header_digest(struct icl_pdu *request, size_t *availablep)
340{
341	struct mbuf *m;
342	uint32_t received_digest, valid_digest;
343
344	if (request->ip_conn->ic_header_crc32c == false)
345		return (0);
346
347	m = icl_conn_receive(request->ip_conn, ISCSI_HEADER_DIGEST_SIZE);
348	if (m == NULL) {
349		ICL_DEBUG("failed to receive header digest");
350		return (-1);
351	}
352
353	CTASSERT(sizeof(received_digest) == ISCSI_HEADER_DIGEST_SIZE);
354	m_copydata(m, 0, ISCSI_HEADER_DIGEST_SIZE, (void *)&received_digest);
355	m_freem(m);
356
357	*availablep -= ISCSI_HEADER_DIGEST_SIZE;
358
359	/*
360	 * XXX: Handle AHS.
361	 */
362	valid_digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf);
363	if (received_digest != valid_digest) {
364		ICL_WARN("header digest check failed; got 0x%x, "
365		    "should be 0x%x", received_digest, valid_digest);
366		return (-1);
367	}
368
369	return (0);
370}
371
372/*
373 * Return the number of bytes that should be waiting in the receive socket
374 * before icl_pdu_receive_data_segment() gets called.
375 */
376static size_t
377icl_pdu_data_segment_receive_len(const struct icl_pdu *request)
378{
379	size_t len;
380
381	len = icl_pdu_data_segment_length(request);
382	if (len == 0)
383		return (0);
384
385	/*
386	 * Account for the parts of data segment already read from
387	 * the socket buffer.
388	 */
389	KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len"));
390	len -= request->ip_data_len;
391
392	/*
393	 * Don't always wait for the full data segment to be delivered
394	 * to the socket; this might badly affect performance due to
395	 * TCP window scaling.
396	 */
397	if (len > partial_receive_len) {
398#if 0
399		ICL_DEBUG("need %zd bytes of data, limiting to %zd",
400		    len, partial_receive_len));
401#endif
402		len = partial_receive_len;
403
404		return (len);
405	}
406
407	/*
408	 * Account for padding.  Note that due to the way code is written,
409	 * the icl_pdu_receive_data_segment() must always receive padding
410	 * along with the last part of data segment, because it would be
411	 * impossible to tell whether we've already received the full data
412	 * segment including padding, or without it.
413	 */
414	if ((len % 4) != 0)
415		len += 4 - (len % 4);
416
417#if 0
418	ICL_DEBUG("need %zd bytes of data", len));
419#endif
420
421	return (len);
422}
423
424static int
425icl_pdu_receive_data_segment(struct icl_pdu *request,
426    size_t *availablep, bool *more_neededp)
427{
428	struct icl_conn *ic;
429	size_t len, padding = 0;
430	struct mbuf *m;
431
432	ic = request->ip_conn;
433
434	*more_neededp = false;
435	ic->ic_receive_len = 0;
436
437	len = icl_pdu_data_segment_length(request);
438	if (len == 0)
439		return (0);
440
441	if ((len % 4) != 0)
442		padding = 4 - (len % 4);
443
444	/*
445	 * Account for already received parts of data segment.
446	 */
447	KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len"));
448	len -= request->ip_data_len;
449
450	if (len + padding > *availablep) {
451		/*
452		 * Not enough data in the socket buffer.  Receive as much
453		 * as we can.  Don't receive padding, since, obviously, it's
454		 * not the end of data segment yet.
455		 */
456#if 0
457		ICL_DEBUG("limited from %zd to %zd",
458		    len + padding, *availablep - padding));
459#endif
460		len = *availablep - padding;
461		*more_neededp = true;
462		padding = 0;
463	}
464
465	/*
466	 * Must not try to receive padding without at least one byte
467	 * of actual data segment.
468	 */
469	if (len > 0) {
470		m = icl_conn_receive(request->ip_conn, len + padding);
471		if (m == NULL) {
472			ICL_DEBUG("failed to receive data segment");
473			return (-1);
474		}
475
476		if (request->ip_data_mbuf == NULL)
477			request->ip_data_mbuf = m;
478		else
479			m_cat(request->ip_data_mbuf, m);
480
481		request->ip_data_len += len;
482		*availablep -= len + padding;
483	} else
484		ICL_DEBUG("len 0");
485
486	if (*more_neededp)
487		ic->ic_receive_len =
488		    icl_pdu_data_segment_receive_len(request);
489
490	return (0);
491}
492
493static int
494icl_pdu_check_data_digest(struct icl_pdu *request, size_t *availablep)
495{
496	struct mbuf *m;
497	uint32_t received_digest, valid_digest;
498
499	if (request->ip_conn->ic_data_crc32c == false)
500		return (0);
501
502	if (request->ip_data_len == 0)
503		return (0);
504
505	m = icl_conn_receive(request->ip_conn, ISCSI_DATA_DIGEST_SIZE);
506	if (m == NULL) {
507		ICL_DEBUG("failed to receive data digest");
508		return (-1);
509	}
510
511	CTASSERT(sizeof(received_digest) == ISCSI_DATA_DIGEST_SIZE);
512	m_copydata(m, 0, ISCSI_DATA_DIGEST_SIZE, (void *)&received_digest);
513	m_freem(m);
514
515	*availablep -= ISCSI_DATA_DIGEST_SIZE;
516
517	/*
518	 * Note that ip_data_mbuf also contains padding; since digest
519	 * calculation is supposed to include that, we iterate over
520	 * the entire ip_data_mbuf chain, not just ip_data_len bytes of it.
521	 */
522	valid_digest = icl_mbuf_to_crc32c(request->ip_data_mbuf);
523	if (received_digest != valid_digest) {
524		ICL_WARN("data digest check failed; got 0x%x, "
525		    "should be 0x%x", received_digest, valid_digest);
526		return (-1);
527	}
528
529	return (0);
530}
531
532/*
533 * Somewhat contrary to the name, this attempts to receive only one
534 * "part" of PDU at a time; call it repeatedly until it returns non-NULL.
535 */
536static struct icl_pdu *
537icl_conn_receive_pdu(struct icl_conn *ic, size_t *availablep)
538{
539	struct icl_pdu *request;
540	struct socket *so;
541	size_t len;
542	int error;
543	bool more_needed;
544
545	so = ic->ic_socket;
546
547	if (ic->ic_receive_state == ICL_CONN_STATE_BHS) {
548		KASSERT(ic->ic_receive_pdu == NULL,
549		    ("ic->ic_receive_pdu != NULL"));
550		request = icl_pdu_new(ic, M_NOWAIT);
551		if (request == NULL) {
552			ICL_DEBUG("failed to allocate PDU; "
553			    "dropping connection");
554			icl_conn_fail(ic);
555			return (NULL);
556		}
557		ic->ic_receive_pdu = request;
558	} else {
559		KASSERT(ic->ic_receive_pdu != NULL,
560		    ("ic->ic_receive_pdu == NULL"));
561		request = ic->ic_receive_pdu;
562	}
563
564	if (*availablep < ic->ic_receive_len) {
565#if 0
566		ICL_DEBUG("not enough data; need %zd, "
567		    "have %zd", ic->ic_receive_len, *availablep);
568#endif
569		return (NULL);
570	}
571
572	switch (ic->ic_receive_state) {
573	case ICL_CONN_STATE_BHS:
574		//ICL_DEBUG("receiving BHS");
575		error = icl_pdu_receive_bhs(request, availablep);
576		if (error != 0) {
577			ICL_DEBUG("failed to receive BHS; "
578			    "dropping connection");
579			break;
580		}
581
582		/*
583		 * We don't enforce any limit for AHS length;
584		 * its length is stored in 8 bit field.
585		 */
586
587		len = icl_pdu_data_segment_length(request);
588		if (len > ic->ic_max_data_segment_length) {
589			ICL_WARN("received data segment "
590			    "length %zd is larger than negotiated "
591			    "MaxDataSegmentLength %zd; "
592			    "dropping connection",
593			    len, ic->ic_max_data_segment_length);
594			error = EINVAL;
595			break;
596		}
597
598		ic->ic_receive_state = ICL_CONN_STATE_AHS;
599		ic->ic_receive_len = icl_pdu_ahs_length(request);
600		break;
601
602	case ICL_CONN_STATE_AHS:
603		//ICL_DEBUG("receiving AHS");
604		error = icl_pdu_receive_ahs(request, availablep);
605		if (error != 0) {
606			ICL_DEBUG("failed to receive AHS; "
607			    "dropping connection");
608			break;
609		}
610		ic->ic_receive_state = ICL_CONN_STATE_HEADER_DIGEST;
611		if (ic->ic_header_crc32c == false)
612			ic->ic_receive_len = 0;
613		else
614			ic->ic_receive_len = ISCSI_HEADER_DIGEST_SIZE;
615		break;
616
617	case ICL_CONN_STATE_HEADER_DIGEST:
618		//ICL_DEBUG("receiving header digest");
619		error = icl_pdu_check_header_digest(request, availablep);
620		if (error != 0) {
621			ICL_DEBUG("header digest failed; "
622			    "dropping connection");
623			break;
624		}
625
626		ic->ic_receive_state = ICL_CONN_STATE_DATA;
627		ic->ic_receive_len =
628		    icl_pdu_data_segment_receive_len(request);
629		break;
630
631	case ICL_CONN_STATE_DATA:
632		//ICL_DEBUG("receiving data segment");
633		error = icl_pdu_receive_data_segment(request, availablep,
634		    &more_needed);
635		if (error != 0) {
636			ICL_DEBUG("failed to receive data segment;"
637			    "dropping connection");
638			break;
639		}
640
641		if (more_needed)
642			break;
643
644		ic->ic_receive_state = ICL_CONN_STATE_DATA_DIGEST;
645		if (request->ip_data_len == 0 || ic->ic_data_crc32c == false)
646			ic->ic_receive_len = 0;
647		else
648			ic->ic_receive_len = ISCSI_DATA_DIGEST_SIZE;
649		break;
650
651	case ICL_CONN_STATE_DATA_DIGEST:
652		//ICL_DEBUG("receiving data digest");
653		error = icl_pdu_check_data_digest(request, availablep);
654		if (error != 0) {
655			ICL_DEBUG("data digest failed; "
656			    "dropping connection");
657			break;
658		}
659
660		/*
661		 * We've received complete PDU; reset the receive state machine
662		 * and return the PDU.
663		 */
664		ic->ic_receive_state = ICL_CONN_STATE_BHS;
665		ic->ic_receive_len = sizeof(struct iscsi_bhs);
666		ic->ic_receive_pdu = NULL;
667		return (request);
668
669	default:
670		panic("invalid ic_receive_state %d\n", ic->ic_receive_state);
671	}
672
673	if (error != 0) {
674		/*
675		 * Don't free the PDU; it's pointed to by ic->ic_receive_pdu
676		 * and will get freed in icl_conn_close().
677		 */
678		icl_conn_fail(ic);
679	}
680
681	return (NULL);
682}
683
684static void
685icl_conn_receive_pdus(struct icl_conn *ic, size_t available)
686{
687	struct icl_pdu *response;
688	struct socket *so;
689
690	so = ic->ic_socket;
691
692	/*
693	 * This can never happen; we're careful to only mess with ic->ic_socket
694	 * pointer when the send/receive threads are not running.
695	 */
696	KASSERT(so != NULL, ("NULL socket"));
697
698	for (;;) {
699		if (ic->ic_disconnecting)
700			return;
701
702		if (so->so_error != 0) {
703			ICL_DEBUG("connection error %d; "
704			    "dropping connection", so->so_error);
705			icl_conn_fail(ic);
706			return;
707		}
708
709		/*
710		 * Loop until we have a complete PDU or there is not enough
711		 * data in the socket buffer.
712		 */
713		if (available < ic->ic_receive_len) {
714#if 0
715			ICL_DEBUG("not enough data; have %zd, "
716			    "need %zd", available,
717			    ic->ic_receive_len);
718#endif
719			return;
720		}
721
722		response = icl_conn_receive_pdu(ic, &available);
723		if (response == NULL)
724			continue;
725
726		if (response->ip_ahs_len > 0) {
727			ICL_WARN("received PDU with unsupported "
728			    "AHS; opcode 0x%x; dropping connection",
729			    response->ip_bhs->bhs_opcode);
730			icl_pdu_free(response);
731			icl_conn_fail(ic);
732			return;
733		}
734
735		(ic->ic_receive)(response);
736	}
737}
738
739static void
740icl_receive_thread(void *arg)
741{
742	struct icl_conn *ic;
743	size_t available;
744	struct socket *so;
745
746	ic = arg;
747	so = ic->ic_socket;
748
749	ICL_CONN_LOCK(ic);
750	ic->ic_receive_running = true;
751	ICL_CONN_UNLOCK(ic);
752
753	for (;;) {
754		if (ic->ic_disconnecting) {
755			//ICL_DEBUG("terminating");
756			break;
757		}
758
759		/*
760		 * Set the low watermark, to be checked by
761		 * soreadable() in icl_soupcall_receive()
762		 * to avoid unneccessary wakeups until there
763		 * is enough data received to read the PDU.
764		 */
765		SOCKBUF_LOCK(&so->so_rcv);
766		available = so->so_rcv.sb_cc;
767		if (available < ic->ic_receive_len) {
768			so->so_rcv.sb_lowat = ic->ic_receive_len;
769			cv_wait(&ic->ic_receive_cv, &so->so_rcv.sb_mtx);
770		} else
771			so->so_rcv.sb_lowat = so->so_rcv.sb_hiwat + 1;
772		SOCKBUF_UNLOCK(&so->so_rcv);
773
774		icl_conn_receive_pdus(ic, available);
775	}
776
777	ICL_CONN_LOCK(ic);
778	ic->ic_receive_running = false;
779	ICL_CONN_UNLOCK(ic);
780	kthread_exit();
781}
782
783static int
784icl_soupcall_receive(struct socket *so, void *arg, int waitflag)
785{
786	struct icl_conn *ic;
787
788	if (!soreadable(so))
789		return (SU_OK);
790
791	ic = arg;
792	cv_signal(&ic->ic_receive_cv);
793	return (SU_OK);
794}
795
796static int
797icl_pdu_finalize(struct icl_pdu *request)
798{
799	size_t padding, pdu_len;
800	uint32_t digest, zero = 0;
801	int ok;
802	struct icl_conn *ic;
803
804	ic = request->ip_conn;
805
806	icl_pdu_set_data_segment_length(request, request->ip_data_len);
807
808	pdu_len = icl_pdu_size(request);
809
810	if (ic->ic_header_crc32c) {
811		digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf);
812		ok = m_append(request->ip_bhs_mbuf, sizeof(digest),
813		    (void *)&digest);
814		if (ok != 1) {
815			ICL_WARN("failed to append header digest");
816			return (1);
817		}
818	}
819
820	if (request->ip_data_len != 0) {
821		padding = icl_pdu_padding(request);
822		if (padding > 0) {
823			ok = m_append(request->ip_data_mbuf, padding,
824			    (void *)&zero);
825			if (ok != 1) {
826				ICL_WARN("failed to append padding");
827				return (1);
828			}
829		}
830
831		if (ic->ic_data_crc32c) {
832			digest = icl_mbuf_to_crc32c(request->ip_data_mbuf);
833
834			ok = m_append(request->ip_data_mbuf, sizeof(digest),
835			    (void *)&digest);
836			if (ok != 1) {
837				ICL_WARN("failed to append data digest");
838				return (1);
839			}
840		}
841
842		m_cat(request->ip_bhs_mbuf, request->ip_data_mbuf);
843		request->ip_data_mbuf = NULL;
844	}
845
846	request->ip_bhs_mbuf->m_pkthdr.len = pdu_len;
847
848	return (0);
849}
850
851static void
852icl_conn_send_pdus(struct icl_conn *ic, struct icl_pdu_stailq *queue)
853{
854	struct icl_pdu *request, *request2;
855	struct socket *so;
856	size_t available, size, size2;
857	int coalesced, error;
858
859	ICL_CONN_LOCK_ASSERT_NOT(ic);
860
861	so = ic->ic_socket;
862
863	SOCKBUF_LOCK(&so->so_snd);
864	/*
865	 * Check how much space do we have for transmit.  We can't just
866	 * call sosend() and retry when we get EWOULDBLOCK or EMSGSIZE,
867	 * as it always frees the mbuf chain passed to it, even in case
868	 * of error.
869	 */
870	available = sbspace(&so->so_snd);
871
872	/*
873	 * Notify the socket upcall that we don't need wakeups
874	 * for the time being.
875	 */
876	so->so_snd.sb_lowat = so->so_snd.sb_hiwat + 1;
877	SOCKBUF_UNLOCK(&so->so_snd);
878
879	while (!STAILQ_EMPTY(queue)) {
880		request = STAILQ_FIRST(queue);
881		size = icl_pdu_size(request);
882		if (available < size) {
883
884			/*
885			 * Set the low watermark, to be checked by
886			 * sowriteable() in icl_soupcall_send()
887			 * to avoid unneccessary wakeups until there
888			 * is enough space for the PDU to fit.
889			 */
890			SOCKBUF_LOCK(&so->so_snd);
891			available = sbspace(&so->so_snd);
892			if (available < size) {
893#if 1
894				ICL_DEBUG("no space to send; "
895				    "have %zd, need %zd",
896				    available, size);
897#endif
898				so->so_snd.sb_lowat = size;
899				SOCKBUF_UNLOCK(&so->so_snd);
900				return;
901			}
902			SOCKBUF_UNLOCK(&so->so_snd);
903		}
904		STAILQ_REMOVE_HEAD(queue, ip_next);
905		error = icl_pdu_finalize(request);
906		if (error != 0) {
907			ICL_DEBUG("failed to finalize PDU; "
908			    "dropping connection");
909			icl_conn_fail(ic);
910			icl_pdu_free(request);
911			return;
912		}
913		if (coalesce) {
914			coalesced = 1;
915			for (;;) {
916				request2 = STAILQ_FIRST(queue);
917				if (request2 == NULL)
918					break;
919				size2 = icl_pdu_size(request2);
920				if (available < size + size2)
921					break;
922				STAILQ_REMOVE_HEAD(queue, ip_next);
923				error = icl_pdu_finalize(request2);
924				if (error != 0) {
925					ICL_DEBUG("failed to finalize PDU; "
926					    "dropping connection");
927					icl_conn_fail(ic);
928					icl_pdu_free(request);
929					icl_pdu_free(request2);
930					return;
931				}
932				m_cat(request->ip_bhs_mbuf, request2->ip_bhs_mbuf);
933				request2->ip_bhs_mbuf = NULL;
934				request->ip_bhs_mbuf->m_pkthdr.len += size2;
935				size += size2;
936				STAILQ_REMOVE_AFTER(queue, request, ip_next);
937				icl_pdu_free(request2);
938				coalesced++;
939			}
940#if 0
941			if (coalesced > 1) {
942				ICL_DEBUG("coalesced %d PDUs into %zd bytes",
943				    coalesced, size);
944			}
945#endif
946		}
947		available -= size;
948		error = sosend(so, NULL, NULL, request->ip_bhs_mbuf,
949		    NULL, MSG_DONTWAIT, curthread);
950		request->ip_bhs_mbuf = NULL; /* Sosend consumes the mbuf. */
951		if (error != 0) {
952			ICL_DEBUG("failed to send PDU, error %d; "
953			    "dropping connection", error);
954			icl_conn_fail(ic);
955			icl_pdu_free(request);
956			return;
957		}
958		icl_pdu_free(request);
959	}
960}
961
962static void
963icl_send_thread(void *arg)
964{
965	struct icl_conn *ic;
966	struct icl_pdu_stailq queue;
967
968	ic = arg;
969
970	STAILQ_INIT(&queue);
971
972	ICL_CONN_LOCK(ic);
973	ic->ic_send_running = true;
974
975	for (;;) {
976		for (;;) {
977			/*
978			 * If the local queue is empty, populate it from
979			 * the main one.  This way the icl_conn_send_pdus()
980			 * can go through all the queued PDUs without holding
981			 * any locks.
982			 */
983			if (STAILQ_EMPTY(&queue))
984				STAILQ_SWAP(&ic->ic_to_send, &queue, icl_pdu);
985
986			ic->ic_check_send_space = false;
987			ICL_CONN_UNLOCK(ic);
988			icl_conn_send_pdus(ic, &queue);
989			ICL_CONN_LOCK(ic);
990
991			/*
992			 * The icl_soupcall_send() was called since the last
993			 * call to sbspace(); go around;
994			 */
995			if (ic->ic_check_send_space)
996				continue;
997
998			/*
999			 * Local queue is empty, but we still have PDUs
1000			 * in the main one; go around.
1001			 */
1002			if (STAILQ_EMPTY(&queue) &&
1003			    !STAILQ_EMPTY(&ic->ic_to_send))
1004				continue;
1005
1006			/*
1007			 * There might be some stuff in the local queue,
1008			 * which didn't get sent due to not having enough send
1009			 * space.  Wait for socket upcall.
1010			 */
1011			break;
1012		}
1013
1014		if (ic->ic_disconnecting) {
1015			//ICL_DEBUG("terminating");
1016			break;
1017		}
1018
1019		cv_wait(&ic->ic_send_cv, ic->ic_lock);
1020	}
1021
1022	/*
1023	 * We're exiting; move PDUs back to the main queue, so they can
1024	 * get freed properly.  At this point ordering doesn't matter.
1025	 */
1026	STAILQ_CONCAT(&ic->ic_to_send, &queue);
1027
1028	ic->ic_send_running = false;
1029	ICL_CONN_UNLOCK(ic);
1030	kthread_exit();
1031}
1032
1033static int
1034icl_soupcall_send(struct socket *so, void *arg, int waitflag)
1035{
1036	struct icl_conn *ic;
1037
1038	if (!sowriteable(so))
1039		return (SU_OK);
1040
1041	ic = arg;
1042
1043	ICL_CONN_LOCK(ic);
1044	ic->ic_check_send_space = true;
1045	ICL_CONN_UNLOCK(ic);
1046
1047	cv_signal(&ic->ic_send_cv);
1048
1049	return (SU_OK);
1050}
1051
1052int
1053icl_pdu_append_data(struct icl_pdu *request, const void *addr, size_t len,
1054    int flags)
1055{
1056	struct mbuf *mb, *newmb;
1057	size_t copylen, off = 0;
1058
1059	KASSERT(len > 0, ("len == 0"));
1060
1061	newmb = m_getm2(NULL, len, flags, MT_DATA, M_PKTHDR);
1062	if (newmb == NULL) {
1063		ICL_WARN("failed to allocate mbuf for %zd bytes", len);
1064		return (ENOMEM);
1065	}
1066
1067	for (mb = newmb; mb != NULL; mb = mb->m_next) {
1068		copylen = min(M_TRAILINGSPACE(mb), len - off);
1069		memcpy(mtod(mb, char *), (const char *)addr + off, copylen);
1070		mb->m_len = copylen;
1071		off += copylen;
1072	}
1073	KASSERT(off == len, ("%s: off != len", __func__));
1074
1075	if (request->ip_data_mbuf == NULL) {
1076		request->ip_data_mbuf = newmb;
1077		request->ip_data_len = len;
1078	} else {
1079		m_cat(request->ip_data_mbuf, newmb);
1080		request->ip_data_len += len;
1081	}
1082
1083	return (0);
1084}
1085
1086void
1087icl_pdu_get_data(struct icl_pdu *ip, size_t off, void *addr, size_t len)
1088{
1089
1090	m_copydata(ip->ip_data_mbuf, off, len, addr);
1091}
1092
1093void
1094icl_pdu_queue(struct icl_pdu *ip)
1095{
1096	struct icl_conn *ic;
1097
1098	ic = ip->ip_conn;
1099
1100	ICL_CONN_LOCK_ASSERT(ic);
1101
1102	if (ic->ic_disconnecting || ic->ic_socket == NULL) {
1103		ICL_DEBUG("icl_pdu_queue on closed connection");
1104		icl_pdu_free(ip);
1105		return;
1106	}
1107
1108	if (!STAILQ_EMPTY(&ic->ic_to_send)) {
1109		STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next);
1110		/*
1111		 * If the queue is not empty, someone else had already
1112		 * signaled the send thread; no need to do that again,
1113		 * just return.
1114		 */
1115		return;
1116	}
1117
1118	STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next);
1119	cv_signal(&ic->ic_send_cv);
1120}
1121
1122struct icl_conn *
1123icl_conn_new(const char *name, struct mtx *lock)
1124{
1125	struct icl_conn *ic;
1126
1127	refcount_acquire(&icl_ncons);
1128
1129	ic = uma_zalloc(icl_conn_zone, M_WAITOK | M_ZERO);
1130
1131	STAILQ_INIT(&ic->ic_to_send);
1132	ic->ic_lock = lock;
1133	cv_init(&ic->ic_send_cv, "icl_tx");
1134	cv_init(&ic->ic_receive_cv, "icl_rx");
1135#ifdef DIAGNOSTIC
1136	refcount_init(&ic->ic_outstanding_pdus, 0);
1137#endif
1138	ic->ic_max_data_segment_length = ICL_MAX_DATA_SEGMENT_LENGTH;
1139	ic->ic_name = name;
1140
1141	return (ic);
1142}
1143
1144void
1145icl_conn_free(struct icl_conn *ic)
1146{
1147
1148	cv_destroy(&ic->ic_send_cv);
1149	cv_destroy(&ic->ic_receive_cv);
1150	uma_zfree(icl_conn_zone, ic);
1151	refcount_release(&icl_ncons);
1152}
1153
1154static int
1155icl_conn_start(struct icl_conn *ic)
1156{
1157	size_t minspace;
1158	struct sockopt opt;
1159	int error, one = 1;
1160
1161	ICL_CONN_LOCK(ic);
1162
1163	/*
1164	 * XXX: Ugly hack.
1165	 */
1166	if (ic->ic_socket == NULL) {
1167		ICL_CONN_UNLOCK(ic);
1168		return (EINVAL);
1169	}
1170
1171	ic->ic_receive_state = ICL_CONN_STATE_BHS;
1172	ic->ic_receive_len = sizeof(struct iscsi_bhs);
1173	ic->ic_disconnecting = false;
1174
1175	ICL_CONN_UNLOCK(ic);
1176
1177	/*
1178	 * For sendspace, this is required because the current code cannot
1179	 * send a PDU in pieces; thus, the minimum buffer size is equal
1180	 * to the maximum PDU size.  "+4" is to account for possible padding.
1181	 *
1182	 * What we should actually do here is to use autoscaling, but set
1183	 * some minimal buffer size to "minspace".  I don't know a way to do
1184	 * that, though.
1185	 */
1186	minspace = sizeof(struct iscsi_bhs) + ic->ic_max_data_segment_length +
1187	    ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4;
1188	if (sendspace < minspace) {
1189		ICL_WARN("kern.icl.sendspace too low; must be at least %zd",
1190		    minspace);
1191		sendspace = minspace;
1192	}
1193	if (recvspace < minspace) {
1194		ICL_WARN("kern.icl.recvspace too low; must be at least %zd",
1195		    minspace);
1196		recvspace = minspace;
1197	}
1198
1199	error = soreserve(ic->ic_socket, sendspace, recvspace);
1200	if (error != 0) {
1201		ICL_WARN("soreserve failed with error %d", error);
1202		icl_conn_close(ic);
1203		return (error);
1204	}
1205
1206	/*
1207	 * Disable Nagle.
1208	 */
1209	bzero(&opt, sizeof(opt));
1210	opt.sopt_dir = SOPT_SET;
1211	opt.sopt_level = IPPROTO_TCP;
1212	opt.sopt_name = TCP_NODELAY;
1213	opt.sopt_val = &one;
1214	opt.sopt_valsize = sizeof(one);
1215	error = sosetopt(ic->ic_socket, &opt);
1216	if (error != 0) {
1217		ICL_WARN("disabling TCP_NODELAY failed with error %d", error);
1218		icl_conn_close(ic);
1219		return (error);
1220	}
1221
1222	/*
1223	 * Start threads.
1224	 */
1225	error = kthread_add(icl_send_thread, ic, NULL, NULL, 0, 0, "%stx",
1226	    ic->ic_name);
1227	if (error != 0) {
1228		ICL_WARN("kthread_add(9) failed with error %d", error);
1229		icl_conn_close(ic);
1230		return (error);
1231	}
1232
1233	error = kthread_add(icl_receive_thread, ic, NULL, NULL, 0, 0, "%srx",
1234	    ic->ic_name);
1235	if (error != 0) {
1236		ICL_WARN("kthread_add(9) failed with error %d", error);
1237		icl_conn_close(ic);
1238		return (error);
1239	}
1240
1241	/*
1242	 * Register socket upcall, to get notified about incoming PDUs
1243	 * and free space to send outgoing ones.
1244	 */
1245	SOCKBUF_LOCK(&ic->ic_socket->so_snd);
1246	soupcall_set(ic->ic_socket, SO_SND, icl_soupcall_send, ic);
1247	SOCKBUF_UNLOCK(&ic->ic_socket->so_snd);
1248	SOCKBUF_LOCK(&ic->ic_socket->so_rcv);
1249	soupcall_set(ic->ic_socket, SO_RCV, icl_soupcall_receive, ic);
1250	SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv);
1251
1252	return (0);
1253}
1254
1255int
1256icl_conn_handoff(struct icl_conn *ic, int fd)
1257{
1258	struct file *fp;
1259	struct socket *so;
1260	cap_rights_t rights;
1261	int error;
1262
1263	ICL_CONN_LOCK_ASSERT_NOT(ic);
1264
1265	/*
1266	 * Steal the socket from userland.
1267	 */
1268	error = fget(curthread, fd,
1269	    cap_rights_init(&rights, CAP_SOCK_CLIENT), &fp);
1270	if (error != 0)
1271		return (error);
1272	if (fp->f_type != DTYPE_SOCKET) {
1273		fdrop(fp, curthread);
1274		return (EINVAL);
1275	}
1276	so = fp->f_data;
1277	if (so->so_type != SOCK_STREAM) {
1278		fdrop(fp, curthread);
1279		return (EINVAL);
1280	}
1281
1282	ICL_CONN_LOCK(ic);
1283
1284	if (ic->ic_socket != NULL) {
1285		ICL_CONN_UNLOCK(ic);
1286		fdrop(fp, curthread);
1287		return (EBUSY);
1288	}
1289
1290	ic->ic_socket = fp->f_data;
1291	fp->f_ops = &badfileops;
1292	fp->f_data = NULL;
1293	fdrop(fp, curthread);
1294	ICL_CONN_UNLOCK(ic);
1295
1296	error = icl_conn_start(ic);
1297
1298	return (error);
1299}
1300
1301void
1302icl_conn_close(struct icl_conn *ic)
1303{
1304	struct icl_pdu *pdu;
1305
1306	ICL_CONN_LOCK_ASSERT_NOT(ic);
1307
1308	ICL_CONN_LOCK(ic);
1309	if (ic->ic_socket == NULL) {
1310		ICL_CONN_UNLOCK(ic);
1311		return;
1312	}
1313
1314	/*
1315	 * Deregister socket upcalls.
1316	 */
1317	ICL_CONN_UNLOCK(ic);
1318	SOCKBUF_LOCK(&ic->ic_socket->so_snd);
1319	if (ic->ic_socket->so_snd.sb_upcall != NULL)
1320		soupcall_clear(ic->ic_socket, SO_SND);
1321	SOCKBUF_UNLOCK(&ic->ic_socket->so_snd);
1322	SOCKBUF_LOCK(&ic->ic_socket->so_rcv);
1323	if (ic->ic_socket->so_rcv.sb_upcall != NULL)
1324		soupcall_clear(ic->ic_socket, SO_RCV);
1325	SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv);
1326	ICL_CONN_LOCK(ic);
1327
1328	ic->ic_disconnecting = true;
1329
1330	/*
1331	 * Wake up the threads, so they can properly terminate.
1332	 */
1333	cv_signal(&ic->ic_receive_cv);
1334	cv_signal(&ic->ic_send_cv);
1335	while (ic->ic_receive_running || ic->ic_send_running) {
1336		//ICL_DEBUG("waiting for send/receive threads to terminate");
1337		ICL_CONN_UNLOCK(ic);
1338		cv_signal(&ic->ic_receive_cv);
1339		cv_signal(&ic->ic_send_cv);
1340		pause("icl_close", 1 * hz);
1341		ICL_CONN_LOCK(ic);
1342	}
1343	//ICL_DEBUG("send/receive threads terminated");
1344
1345	ICL_CONN_UNLOCK(ic);
1346	soclose(ic->ic_socket);
1347	ICL_CONN_LOCK(ic);
1348	ic->ic_socket = NULL;
1349
1350	if (ic->ic_receive_pdu != NULL) {
1351		//ICL_DEBUG("freeing partially received PDU");
1352		icl_pdu_free(ic->ic_receive_pdu);
1353		ic->ic_receive_pdu = NULL;
1354	}
1355
1356	/*
1357	 * Remove any outstanding PDUs from the send queue.
1358	 */
1359	while (!STAILQ_EMPTY(&ic->ic_to_send)) {
1360		pdu = STAILQ_FIRST(&ic->ic_to_send);
1361		STAILQ_REMOVE_HEAD(&ic->ic_to_send, ip_next);
1362		icl_pdu_free(pdu);
1363	}
1364
1365	KASSERT(STAILQ_EMPTY(&ic->ic_to_send),
1366	    ("destroying session with non-empty send queue"));
1367#ifdef DIAGNOSTIC
1368	KASSERT(ic->ic_outstanding_pdus == 0,
1369	    ("destroying session with %d outstanding PDUs",
1370	     ic->ic_outstanding_pdus));
1371#endif
1372	ICL_CONN_UNLOCK(ic);
1373}
1374
1375bool
1376icl_conn_connected(struct icl_conn *ic)
1377{
1378	ICL_CONN_LOCK_ASSERT_NOT(ic);
1379
1380	ICL_CONN_LOCK(ic);
1381	if (ic->ic_socket == NULL) {
1382		ICL_CONN_UNLOCK(ic);
1383		return (false);
1384	}
1385	if (ic->ic_socket->so_error != 0) {
1386		ICL_CONN_UNLOCK(ic);
1387		return (false);
1388	}
1389	ICL_CONN_UNLOCK(ic);
1390	return (true);
1391}
1392
1393#ifdef ICL_KERNEL_PROXY
1394int
1395icl_conn_handoff_sock(struct icl_conn *ic, struct socket *so)
1396{
1397	int error;
1398
1399	ICL_CONN_LOCK_ASSERT_NOT(ic);
1400
1401	if (so->so_type != SOCK_STREAM)
1402		return (EINVAL);
1403
1404	ICL_CONN_LOCK(ic);
1405	if (ic->ic_socket != NULL) {
1406		ICL_CONN_UNLOCK(ic);
1407		return (EBUSY);
1408	}
1409	ic->ic_socket = so;
1410	ICL_CONN_UNLOCK(ic);
1411
1412	error = icl_conn_start(ic);
1413
1414	return (error);
1415}
1416#endif /* ICL_KERNEL_PROXY */
1417
1418static int
1419icl_unload(void)
1420{
1421
1422	if (icl_ncons != 0)
1423		return (EBUSY);
1424
1425	uma_zdestroy(icl_conn_zone);
1426	uma_zdestroy(icl_pdu_zone);
1427
1428	return (0);
1429}
1430
1431static void
1432icl_load(void)
1433{
1434
1435	icl_conn_zone = uma_zcreate("icl_conn",
1436	    sizeof(struct icl_conn), NULL, NULL, NULL, NULL,
1437	    UMA_ALIGN_PTR, 0);
1438	icl_pdu_zone = uma_zcreate("icl_pdu",
1439	    sizeof(struct icl_pdu), NULL, NULL, NULL, NULL,
1440	    UMA_ALIGN_PTR, 0);
1441
1442	refcount_init(&icl_ncons, 0);
1443}
1444
1445static int
1446icl_modevent(module_t mod, int what, void *arg)
1447{
1448
1449	switch (what) {
1450	case MOD_LOAD:
1451		icl_load();
1452		return (0);
1453	case MOD_UNLOAD:
1454		return (icl_unload());
1455	default:
1456		return (EINVAL);
1457	}
1458}
1459
1460moduledata_t icl_data = {
1461	"icl",
1462	icl_modevent,
1463	0
1464};
1465
1466DECLARE_MODULE(icl, icl_data, SI_SUB_DRIVERS, SI_ORDER_FIRST);
1467MODULE_VERSION(icl, 1);
1468