icl.c revision 270888
1/*-
2 * Copyright (c) 2012 The FreeBSD Foundation
3 * All rights reserved.
4 *
5 * This software was developed by Edward Tomasz Napierala under sponsorship
6 * from the FreeBSD Foundation.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 */
30
31/*
32 * iSCSI Common Layer.  It's used by both the initiator and target to send
33 * and receive iSCSI PDUs.
34 */
35
36#include <sys/cdefs.h>
37__FBSDID("$FreeBSD: stable/10/sys/dev/iscsi/icl.c 270888 2014-08-31 20:21:08Z trasz $");
38
39#include <sys/param.h>
40#include <sys/capability.h>
41#include <sys/condvar.h>
42#include <sys/conf.h>
43#include <sys/file.h>
44#include <sys/kernel.h>
45#include <sys/kthread.h>
46#include <sys/lock.h>
47#include <sys/mbuf.h>
48#include <sys/mutex.h>
49#include <sys/module.h>
50#include <sys/protosw.h>
51#include <sys/socket.h>
52#include <sys/socketvar.h>
53#include <sys/sysctl.h>
54#include <sys/systm.h>
55#include <sys/sx.h>
56#include <sys/uio.h>
57#include <vm/uma.h>
58#include <netinet/in.h>
59#include <netinet/tcp.h>
60
61#include "icl.h"
62#include "iscsi_proto.h"
63
64SYSCTL_NODE(_kern, OID_AUTO, icl, CTLFLAG_RD, 0, "iSCSI Common Layer");
65static int debug = 1;
66TUNABLE_INT("kern.icl.debug", &debug);
67SYSCTL_INT(_kern_icl, OID_AUTO, debug, CTLFLAG_RWTUN,
68    &debug, 0, "Enable debug messages");
69static int coalesce = 1;
70TUNABLE_INT("kern.icl.coalesce", &coalesce);
71SYSCTL_INT(_kern_icl, OID_AUTO, coalesce, CTLFLAG_RWTUN,
72    &coalesce, 0, "Try to coalesce PDUs before sending");
73static int partial_receive_len = 128 * 1024;
74TUNABLE_INT("kern.icl.partial_receive_len", &partial_receive_len);
75SYSCTL_INT(_kern_icl, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN,
76    &partial_receive_len, 0, "Minimum read size for partially received "
77    "data segment");
78static int sendspace = 1048576;
79TUNABLE_INT("kern.icl.sendspace", &sendspace);
80SYSCTL_INT(_kern_icl, OID_AUTO, sendspace, CTLFLAG_RWTUN,
81    &sendspace, 0, "Default send socket buffer size");
82static int recvspace = 1048576;
83TUNABLE_INT("kern.icl.recvspace", &recvspace);
84SYSCTL_INT(_kern_icl, OID_AUTO, recvspace, CTLFLAG_RWTUN,
85    &recvspace, 0, "Default receive socket buffer size");
86
87static uma_zone_t icl_conn_zone;
88static uma_zone_t icl_pdu_zone;
89
90static volatile u_int	icl_ncons;
91
92#define	ICL_DEBUG(X, ...)						\
93	do {								\
94		if (debug > 1)						\
95			printf("%s: " X "\n", __func__, ## __VA_ARGS__);\
96	} while (0)
97
98#define	ICL_WARN(X, ...)						\
99	do {								\
100		if (debug > 0) {					\
101			printf("WARNING: %s: " X "\n",			\
102			    __func__, ## __VA_ARGS__);			\
103		}							\
104	} while (0)
105
106#define ICL_CONN_LOCK(X)		mtx_lock(X->ic_lock)
107#define ICL_CONN_UNLOCK(X)		mtx_unlock(X->ic_lock)
108#define ICL_CONN_LOCK_ASSERT(X)		mtx_assert(X->ic_lock, MA_OWNED)
109#define ICL_CONN_LOCK_ASSERT_NOT(X)	mtx_assert(X->ic_lock, MA_NOTOWNED)
110
111STAILQ_HEAD(icl_pdu_stailq, icl_pdu);
112
113static void
114icl_conn_fail(struct icl_conn *ic)
115{
116	if (ic->ic_socket == NULL)
117		return;
118
119	/*
120	 * XXX
121	 */
122	ic->ic_socket->so_error = EDOOFUS;
123	(ic->ic_error)(ic);
124}
125
126static struct mbuf *
127icl_conn_receive(struct icl_conn *ic, size_t len)
128{
129	struct uio uio;
130	struct socket *so;
131	struct mbuf *m;
132	int error, flags;
133
134	so = ic->ic_socket;
135
136	memset(&uio, 0, sizeof(uio));
137	uio.uio_resid = len;
138
139	flags = MSG_DONTWAIT;
140	error = soreceive(so, NULL, &uio, &m, NULL, &flags);
141	if (error != 0) {
142		ICL_DEBUG("soreceive error %d", error);
143		return (NULL);
144	}
145	if (uio.uio_resid != 0) {
146		m_freem(m);
147		ICL_DEBUG("short read");
148		return (NULL);
149	}
150
151	return (m);
152}
153
154static struct icl_pdu *
155icl_pdu_new(struct icl_conn *ic, int flags)
156{
157	struct icl_pdu *ip;
158
159#ifdef DIAGNOSTIC
160	refcount_acquire(&ic->ic_outstanding_pdus);
161#endif
162	ip = uma_zalloc(icl_pdu_zone, flags | M_ZERO);
163	if (ip == NULL) {
164		ICL_WARN("failed to allocate %zd bytes", sizeof(*ip));
165#ifdef DIAGNOSTIC
166		refcount_release(&ic->ic_outstanding_pdus);
167#endif
168		return (NULL);
169	}
170
171	ip->ip_conn = ic;
172
173	return (ip);
174}
175
176void
177icl_pdu_free(struct icl_pdu *ip)
178{
179	struct icl_conn *ic;
180
181	ic = ip->ip_conn;
182
183	m_freem(ip->ip_bhs_mbuf);
184	m_freem(ip->ip_ahs_mbuf);
185	m_freem(ip->ip_data_mbuf);
186	uma_zfree(icl_pdu_zone, ip);
187#ifdef DIAGNOSTIC
188	refcount_release(&ic->ic_outstanding_pdus);
189#endif
190}
191
192/*
193 * Allocate icl_pdu with empty BHS to fill up by the caller.
194 */
195struct icl_pdu *
196icl_pdu_new_bhs(struct icl_conn *ic, int flags)
197{
198	struct icl_pdu *ip;
199
200	ip = icl_pdu_new(ic, flags);
201	if (ip == NULL)
202		return (NULL);
203
204	ip->ip_bhs_mbuf = m_getm2(NULL, sizeof(struct iscsi_bhs),
205	    flags, MT_DATA, M_PKTHDR);
206	if (ip->ip_bhs_mbuf == NULL) {
207		ICL_WARN("failed to allocate %zd bytes", sizeof(*ip));
208		icl_pdu_free(ip);
209		return (NULL);
210	}
211	ip->ip_bhs = mtod(ip->ip_bhs_mbuf, struct iscsi_bhs *);
212	memset(ip->ip_bhs, 0, sizeof(struct iscsi_bhs));
213	ip->ip_bhs_mbuf->m_len = sizeof(struct iscsi_bhs);
214
215	return (ip);
216}
217
218static int
219icl_pdu_ahs_length(const struct icl_pdu *request)
220{
221
222	return (request->ip_bhs->bhs_total_ahs_len * 4);
223}
224
225size_t
226icl_pdu_data_segment_length(const struct icl_pdu *request)
227{
228	uint32_t len = 0;
229
230	len += request->ip_bhs->bhs_data_segment_len[0];
231	len <<= 8;
232	len += request->ip_bhs->bhs_data_segment_len[1];
233	len <<= 8;
234	len += request->ip_bhs->bhs_data_segment_len[2];
235
236	return (len);
237}
238
239static void
240icl_pdu_set_data_segment_length(struct icl_pdu *response, uint32_t len)
241{
242
243	response->ip_bhs->bhs_data_segment_len[2] = len;
244	response->ip_bhs->bhs_data_segment_len[1] = len >> 8;
245	response->ip_bhs->bhs_data_segment_len[0] = len >> 16;
246}
247
248static size_t
249icl_pdu_padding(const struct icl_pdu *ip)
250{
251
252	if ((ip->ip_data_len % 4) != 0)
253		return (4 - (ip->ip_data_len % 4));
254
255	return (0);
256}
257
258static size_t
259icl_pdu_size(const struct icl_pdu *response)
260{
261	size_t len;
262
263	KASSERT(response->ip_ahs_len == 0, ("responding with AHS"));
264
265	len = sizeof(struct iscsi_bhs) + response->ip_data_len +
266	    icl_pdu_padding(response);
267	if (response->ip_conn->ic_header_crc32c)
268		len += ISCSI_HEADER_DIGEST_SIZE;
269	if (response->ip_data_len != 0 && response->ip_conn->ic_data_crc32c)
270		len += ISCSI_DATA_DIGEST_SIZE;
271
272	return (len);
273}
274
275static int
276icl_pdu_receive_bhs(struct icl_pdu *request, size_t *availablep)
277{
278	struct mbuf *m;
279
280	m = icl_conn_receive(request->ip_conn, sizeof(struct iscsi_bhs));
281	if (m == NULL) {
282		ICL_DEBUG("failed to receive BHS");
283		return (-1);
284	}
285
286	request->ip_bhs_mbuf = m_pullup(m, sizeof(struct iscsi_bhs));
287	if (request->ip_bhs_mbuf == NULL) {
288		ICL_WARN("m_pullup failed");
289		return (-1);
290	}
291	request->ip_bhs = mtod(request->ip_bhs_mbuf, struct iscsi_bhs *);
292
293	/*
294	 * XXX: For architectures with strict alignment requirements
295	 * 	we may need to allocate ip_bhs and copy the data into it.
296	 * 	For some reason, though, not doing this doesn't seem
297	 * 	to cause problems; tested on sparc64.
298	 */
299
300	*availablep -= sizeof(struct iscsi_bhs);
301	return (0);
302}
303
304static int
305icl_pdu_receive_ahs(struct icl_pdu *request, size_t *availablep)
306{
307
308	request->ip_ahs_len = icl_pdu_ahs_length(request);
309	if (request->ip_ahs_len == 0)
310		return (0);
311
312	request->ip_ahs_mbuf = icl_conn_receive(request->ip_conn,
313	    request->ip_ahs_len);
314	if (request->ip_ahs_mbuf == NULL) {
315		ICL_DEBUG("failed to receive AHS");
316		return (-1);
317	}
318
319	*availablep -= request->ip_ahs_len;
320	return (0);
321}
322
323static uint32_t
324icl_mbuf_to_crc32c(const struct mbuf *m0)
325{
326	uint32_t digest = 0xffffffff;
327	const struct mbuf *m;
328
329	for (m = m0; m != NULL; m = m->m_next)
330		digest = calculate_crc32c(digest,
331		    mtod(m, const void *), m->m_len);
332
333	digest = digest ^ 0xffffffff;
334
335	return (digest);
336}
337
338static int
339icl_pdu_check_header_digest(struct icl_pdu *request, size_t *availablep)
340{
341	struct mbuf *m;
342	uint32_t received_digest, valid_digest;
343
344	if (request->ip_conn->ic_header_crc32c == false)
345		return (0);
346
347	m = icl_conn_receive(request->ip_conn, ISCSI_HEADER_DIGEST_SIZE);
348	if (m == NULL) {
349		ICL_DEBUG("failed to receive header digest");
350		return (-1);
351	}
352
353	CTASSERT(sizeof(received_digest) == ISCSI_HEADER_DIGEST_SIZE);
354	m_copydata(m, 0, ISCSI_HEADER_DIGEST_SIZE, (void *)&received_digest);
355	m_freem(m);
356
357	*availablep -= ISCSI_HEADER_DIGEST_SIZE;
358
359	/*
360	 * XXX: Handle AHS.
361	 */
362	valid_digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf);
363	if (received_digest != valid_digest) {
364		ICL_WARN("header digest check failed; got 0x%x, "
365		    "should be 0x%x", received_digest, valid_digest);
366		return (-1);
367	}
368
369	return (0);
370}
371
372/*
373 * Return the number of bytes that should be waiting in the receive socket
374 * before icl_pdu_receive_data_segment() gets called.
375 */
376static size_t
377icl_pdu_data_segment_receive_len(const struct icl_pdu *request)
378{
379	size_t len;
380
381	len = icl_pdu_data_segment_length(request);
382	if (len == 0)
383		return (0);
384
385	/*
386	 * Account for the parts of data segment already read from
387	 * the socket buffer.
388	 */
389	KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len"));
390	len -= request->ip_data_len;
391
392	/*
393	 * Don't always wait for the full data segment to be delivered
394	 * to the socket; this might badly affect performance due to
395	 * TCP window scaling.
396	 */
397	if (len > partial_receive_len) {
398#if 0
399		ICL_DEBUG("need %zd bytes of data, limiting to %zd",
400		    len, partial_receive_len));
401#endif
402		len = partial_receive_len;
403
404		return (len);
405	}
406
407	/*
408	 * Account for padding.  Note that due to the way code is written,
409	 * the icl_pdu_receive_data_segment() must always receive padding
410	 * along with the last part of data segment, because it would be
411	 * impossible to tell whether we've already received the full data
412	 * segment including padding, or without it.
413	 */
414	if ((len % 4) != 0)
415		len += 4 - (len % 4);
416
417#if 0
418	ICL_DEBUG("need %zd bytes of data", len));
419#endif
420
421	return (len);
422}
423
424static int
425icl_pdu_receive_data_segment(struct icl_pdu *request,
426    size_t *availablep, bool *more_neededp)
427{
428	struct icl_conn *ic;
429	size_t len, padding = 0;
430	struct mbuf *m;
431
432	ic = request->ip_conn;
433
434	*more_neededp = false;
435	ic->ic_receive_len = 0;
436
437	len = icl_pdu_data_segment_length(request);
438	if (len == 0)
439		return (0);
440
441	if ((len % 4) != 0)
442		padding = 4 - (len % 4);
443
444	/*
445	 * Account for already received parts of data segment.
446	 */
447	KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len"));
448	len -= request->ip_data_len;
449
450	if (len + padding > *availablep) {
451		/*
452		 * Not enough data in the socket buffer.  Receive as much
453		 * as we can.  Don't receive padding, since, obviously, it's
454		 * not the end of data segment yet.
455		 */
456#if 0
457		ICL_DEBUG("limited from %zd to %zd",
458		    len + padding, *availablep - padding));
459#endif
460		len = *availablep - padding;
461		*more_neededp = true;
462		padding = 0;
463	}
464
465	/*
466	 * Must not try to receive padding without at least one byte
467	 * of actual data segment.
468	 */
469	if (len > 0) {
470		m = icl_conn_receive(request->ip_conn, len + padding);
471		if (m == NULL) {
472			ICL_DEBUG("failed to receive data segment");
473			return (-1);
474		}
475
476		if (request->ip_data_mbuf == NULL)
477			request->ip_data_mbuf = m;
478		else
479			m_cat(request->ip_data_mbuf, m);
480
481		request->ip_data_len += len;
482		*availablep -= len + padding;
483	} else
484		ICL_DEBUG("len 0");
485
486	if (*more_neededp)
487		ic->ic_receive_len =
488		    icl_pdu_data_segment_receive_len(request);
489
490	return (0);
491}
492
493static int
494icl_pdu_check_data_digest(struct icl_pdu *request, size_t *availablep)
495{
496	struct mbuf *m;
497	uint32_t received_digest, valid_digest;
498
499	if (request->ip_conn->ic_data_crc32c == false)
500		return (0);
501
502	if (request->ip_data_len == 0)
503		return (0);
504
505	m = icl_conn_receive(request->ip_conn, ISCSI_DATA_DIGEST_SIZE);
506	if (m == NULL) {
507		ICL_DEBUG("failed to receive data digest");
508		return (-1);
509	}
510
511	CTASSERT(sizeof(received_digest) == ISCSI_DATA_DIGEST_SIZE);
512	m_copydata(m, 0, ISCSI_DATA_DIGEST_SIZE, (void *)&received_digest);
513	m_freem(m);
514
515	*availablep -= ISCSI_DATA_DIGEST_SIZE;
516
517	/*
518	 * Note that ip_data_mbuf also contains padding; since digest
519	 * calculation is supposed to include that, we iterate over
520	 * the entire ip_data_mbuf chain, not just ip_data_len bytes of it.
521	 */
522	valid_digest = icl_mbuf_to_crc32c(request->ip_data_mbuf);
523	if (received_digest != valid_digest) {
524		ICL_WARN("data digest check failed; got 0x%x, "
525		    "should be 0x%x", received_digest, valid_digest);
526		return (-1);
527	}
528
529	return (0);
530}
531
532/*
533 * Somewhat contrary to the name, this attempts to receive only one
534 * "part" of PDU at a time; call it repeatedly until it returns non-NULL.
535 */
536static struct icl_pdu *
537icl_conn_receive_pdu(struct icl_conn *ic, size_t *availablep)
538{
539	struct icl_pdu *request;
540	struct socket *so;
541	size_t len;
542	int error;
543	bool more_needed;
544
545	so = ic->ic_socket;
546
547	if (ic->ic_receive_state == ICL_CONN_STATE_BHS) {
548		KASSERT(ic->ic_receive_pdu == NULL,
549		    ("ic->ic_receive_pdu != NULL"));
550		request = icl_pdu_new(ic, M_NOWAIT);
551		if (request == NULL) {
552			ICL_DEBUG("failed to allocate PDU; "
553			    "dropping connection");
554			icl_conn_fail(ic);
555			return (NULL);
556		}
557		ic->ic_receive_pdu = request;
558	} else {
559		KASSERT(ic->ic_receive_pdu != NULL,
560		    ("ic->ic_receive_pdu == NULL"));
561		request = ic->ic_receive_pdu;
562	}
563
564	if (*availablep < ic->ic_receive_len) {
565#if 0
566		ICL_DEBUG("not enough data; need %zd, "
567		    "have %zd", ic->ic_receive_len, *availablep);
568#endif
569		return (NULL);
570	}
571
572	switch (ic->ic_receive_state) {
573	case ICL_CONN_STATE_BHS:
574		//ICL_DEBUG("receiving BHS");
575		error = icl_pdu_receive_bhs(request, availablep);
576		if (error != 0) {
577			ICL_DEBUG("failed to receive BHS; "
578			    "dropping connection");
579			break;
580		}
581
582		/*
583		 * We don't enforce any limit for AHS length;
584		 * its length is stored in 8 bit field.
585		 */
586
587		len = icl_pdu_data_segment_length(request);
588		if (len > ic->ic_max_data_segment_length) {
589			ICL_WARN("received data segment "
590			    "length %zd is larger than negotiated "
591			    "MaxDataSegmentLength %zd; "
592			    "dropping connection",
593			    len, ic->ic_max_data_segment_length);
594			error = EINVAL;
595			break;
596		}
597
598		ic->ic_receive_state = ICL_CONN_STATE_AHS;
599		ic->ic_receive_len = icl_pdu_ahs_length(request);
600		break;
601
602	case ICL_CONN_STATE_AHS:
603		//ICL_DEBUG("receiving AHS");
604		error = icl_pdu_receive_ahs(request, availablep);
605		if (error != 0) {
606			ICL_DEBUG("failed to receive AHS; "
607			    "dropping connection");
608			break;
609		}
610		ic->ic_receive_state = ICL_CONN_STATE_HEADER_DIGEST;
611		if (ic->ic_header_crc32c == false)
612			ic->ic_receive_len = 0;
613		else
614			ic->ic_receive_len = ISCSI_HEADER_DIGEST_SIZE;
615		break;
616
617	case ICL_CONN_STATE_HEADER_DIGEST:
618		//ICL_DEBUG("receiving header digest");
619		error = icl_pdu_check_header_digest(request, availablep);
620		if (error != 0) {
621			ICL_DEBUG("header digest failed; "
622			    "dropping connection");
623			break;
624		}
625
626		ic->ic_receive_state = ICL_CONN_STATE_DATA;
627		ic->ic_receive_len =
628		    icl_pdu_data_segment_receive_len(request);
629		break;
630
631	case ICL_CONN_STATE_DATA:
632		//ICL_DEBUG("receiving data segment");
633		error = icl_pdu_receive_data_segment(request, availablep,
634		    &more_needed);
635		if (error != 0) {
636			ICL_DEBUG("failed to receive data segment;"
637			    "dropping connection");
638			break;
639		}
640
641		if (more_needed)
642			break;
643
644		ic->ic_receive_state = ICL_CONN_STATE_DATA_DIGEST;
645		if (request->ip_data_len == 0 || ic->ic_data_crc32c == false)
646			ic->ic_receive_len = 0;
647		else
648			ic->ic_receive_len = ISCSI_DATA_DIGEST_SIZE;
649		break;
650
651	case ICL_CONN_STATE_DATA_DIGEST:
652		//ICL_DEBUG("receiving data digest");
653		error = icl_pdu_check_data_digest(request, availablep);
654		if (error != 0) {
655			ICL_DEBUG("data digest failed; "
656			    "dropping connection");
657			break;
658		}
659
660		/*
661		 * We've received complete PDU; reset the receive state machine
662		 * and return the PDU.
663		 */
664		ic->ic_receive_state = ICL_CONN_STATE_BHS;
665		ic->ic_receive_len = sizeof(struct iscsi_bhs);
666		ic->ic_receive_pdu = NULL;
667		return (request);
668
669	default:
670		panic("invalid ic_receive_state %d\n", ic->ic_receive_state);
671	}
672
673	if (error != 0) {
674		/*
675		 * Don't free the PDU; it's pointed to by ic->ic_receive_pdu
676		 * and will get freed in icl_conn_close().
677		 */
678		icl_conn_fail(ic);
679	}
680
681	return (NULL);
682}
683
684static void
685icl_conn_receive_pdus(struct icl_conn *ic, size_t available)
686{
687	struct icl_pdu *response;
688	struct socket *so;
689
690	so = ic->ic_socket;
691
692	/*
693	 * This can never happen; we're careful to only mess with ic->ic_socket
694	 * pointer when the send/receive threads are not running.
695	 */
696	KASSERT(so != NULL, ("NULL socket"));
697
698	for (;;) {
699		if (ic->ic_disconnecting)
700			return;
701
702		if (so->so_error != 0) {
703			ICL_DEBUG("connection error %d; "
704			    "dropping connection", so->so_error);
705			icl_conn_fail(ic);
706			return;
707		}
708
709		/*
710		 * Loop until we have a complete PDU or there is not enough
711		 * data in the socket buffer.
712		 */
713		if (available < ic->ic_receive_len) {
714#if 0
715			ICL_DEBUG("not enough data; have %zd, "
716			    "need %zd", available,
717			    ic->ic_receive_len);
718#endif
719			return;
720		}
721
722		response = icl_conn_receive_pdu(ic, &available);
723		if (response == NULL)
724			continue;
725
726		if (response->ip_ahs_len > 0) {
727			ICL_WARN("received PDU with unsupported "
728			    "AHS; opcode 0x%x; dropping connection",
729			    response->ip_bhs->bhs_opcode);
730			icl_pdu_free(response);
731			icl_conn_fail(ic);
732			return;
733		}
734
735		(ic->ic_receive)(response);
736	}
737}
738
739static void
740icl_receive_thread(void *arg)
741{
742	struct icl_conn *ic;
743	size_t available;
744	struct socket *so;
745
746	ic = arg;
747	so = ic->ic_socket;
748
749	ICL_CONN_LOCK(ic);
750	ic->ic_receive_running = true;
751	ICL_CONN_UNLOCK(ic);
752
753	for (;;) {
754		if (ic->ic_disconnecting) {
755			//ICL_DEBUG("terminating");
756			break;
757		}
758
759		/*
760		 * Set the low watermark, to be checked by
761		 * soreadable() in icl_soupcall_receive()
762		 * to avoid unneccessary wakeups until there
763		 * is enough data received to read the PDU.
764		 */
765		SOCKBUF_LOCK(&so->so_rcv);
766		available = so->so_rcv.sb_cc;
767		if (available < ic->ic_receive_len) {
768			so->so_rcv.sb_lowat = ic->ic_receive_len;
769			cv_wait(&ic->ic_receive_cv, &so->so_rcv.sb_mtx);
770		} else
771			so->so_rcv.sb_lowat = so->so_rcv.sb_hiwat + 1;
772		SOCKBUF_UNLOCK(&so->so_rcv);
773
774		icl_conn_receive_pdus(ic, available);
775	}
776
777	ICL_CONN_LOCK(ic);
778	ic->ic_receive_running = false;
779	ICL_CONN_UNLOCK(ic);
780	kthread_exit();
781}
782
783static int
784icl_soupcall_receive(struct socket *so, void *arg, int waitflag)
785{
786	struct icl_conn *ic;
787
788	if (!soreadable(so))
789		return (SU_OK);
790
791	ic = arg;
792	cv_signal(&ic->ic_receive_cv);
793	return (SU_OK);
794}
795
796static int
797icl_pdu_finalize(struct icl_pdu *request)
798{
799	size_t padding, pdu_len;
800	uint32_t digest, zero = 0;
801	int ok;
802	struct icl_conn *ic;
803
804	ic = request->ip_conn;
805
806	icl_pdu_set_data_segment_length(request, request->ip_data_len);
807
808	pdu_len = icl_pdu_size(request);
809
810	if (ic->ic_header_crc32c) {
811		digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf);
812		ok = m_append(request->ip_bhs_mbuf, sizeof(digest),
813		    (void *)&digest);
814		if (ok != 1) {
815			ICL_WARN("failed to append header digest");
816			return (1);
817		}
818	}
819
820	if (request->ip_data_len != 0) {
821		padding = icl_pdu_padding(request);
822		if (padding > 0) {
823			ok = m_append(request->ip_data_mbuf, padding,
824			    (void *)&zero);
825			if (ok != 1) {
826				ICL_WARN("failed to append padding");
827				return (1);
828			}
829		}
830
831		if (ic->ic_data_crc32c) {
832			digest = icl_mbuf_to_crc32c(request->ip_data_mbuf);
833
834			ok = m_append(request->ip_data_mbuf, sizeof(digest),
835			    (void *)&digest);
836			if (ok != 1) {
837				ICL_WARN("failed to append data digest");
838				return (1);
839			}
840		}
841
842		m_cat(request->ip_bhs_mbuf, request->ip_data_mbuf);
843		request->ip_data_mbuf = NULL;
844	}
845
846	request->ip_bhs_mbuf->m_pkthdr.len = pdu_len;
847
848	return (0);
849}
850
851static void
852icl_conn_send_pdus(struct icl_conn *ic, struct icl_pdu_stailq *queue)
853{
854	struct icl_pdu *request, *request2;
855	struct socket *so;
856	size_t available, size, size2;
857	int coalesced, error;
858
859	ICL_CONN_LOCK_ASSERT_NOT(ic);
860
861	so = ic->ic_socket;
862
863	SOCKBUF_LOCK(&so->so_snd);
864	/*
865	 * Check how much space do we have for transmit.  We can't just
866	 * call sosend() and retry when we get EWOULDBLOCK or EMSGSIZE,
867	 * as it always frees the mbuf chain passed to it, even in case
868	 * of error.
869	 */
870	available = sbspace(&so->so_snd);
871
872	/*
873	 * Notify the socket upcall that we don't need wakeups
874	 * for the time being.
875	 */
876	so->so_snd.sb_lowat = so->so_snd.sb_hiwat + 1;
877	SOCKBUF_UNLOCK(&so->so_snd);
878
879	while (!STAILQ_EMPTY(queue)) {
880		if (ic->ic_disconnecting)
881			return;
882		request = STAILQ_FIRST(queue);
883		size = icl_pdu_size(request);
884		if (available < size) {
885
886			/*
887			 * Set the low watermark, to be checked by
888			 * sowriteable() in icl_soupcall_send()
889			 * to avoid unneccessary wakeups until there
890			 * is enough space for the PDU to fit.
891			 */
892			SOCKBUF_LOCK(&so->so_snd);
893			available = sbspace(&so->so_snd);
894			if (available < size) {
895#if 1
896				ICL_DEBUG("no space to send; "
897				    "have %zd, need %zd",
898				    available, size);
899#endif
900				so->so_snd.sb_lowat = size;
901				SOCKBUF_UNLOCK(&so->so_snd);
902				return;
903			}
904			SOCKBUF_UNLOCK(&so->so_snd);
905		}
906		STAILQ_REMOVE_HEAD(queue, ip_next);
907		error = icl_pdu_finalize(request);
908		if (error != 0) {
909			ICL_DEBUG("failed to finalize PDU; "
910			    "dropping connection");
911			icl_conn_fail(ic);
912			icl_pdu_free(request);
913			return;
914		}
915		if (coalesce) {
916			coalesced = 1;
917			for (;;) {
918				request2 = STAILQ_FIRST(queue);
919				if (request2 == NULL)
920					break;
921				size2 = icl_pdu_size(request2);
922				if (available < size + size2)
923					break;
924				STAILQ_REMOVE_HEAD(queue, ip_next);
925				error = icl_pdu_finalize(request2);
926				if (error != 0) {
927					ICL_DEBUG("failed to finalize PDU; "
928					    "dropping connection");
929					icl_conn_fail(ic);
930					icl_pdu_free(request);
931					icl_pdu_free(request2);
932					return;
933				}
934				m_cat(request->ip_bhs_mbuf, request2->ip_bhs_mbuf);
935				request2->ip_bhs_mbuf = NULL;
936				request->ip_bhs_mbuf->m_pkthdr.len += size2;
937				size += size2;
938				STAILQ_REMOVE_AFTER(queue, request, ip_next);
939				icl_pdu_free(request2);
940				coalesced++;
941			}
942#if 0
943			if (coalesced > 1) {
944				ICL_DEBUG("coalesced %d PDUs into %zd bytes",
945				    coalesced, size);
946			}
947#endif
948		}
949		available -= size;
950		error = sosend(so, NULL, NULL, request->ip_bhs_mbuf,
951		    NULL, MSG_DONTWAIT, curthread);
952		request->ip_bhs_mbuf = NULL; /* Sosend consumes the mbuf. */
953		if (error != 0) {
954			ICL_DEBUG("failed to send PDU, error %d; "
955			    "dropping connection", error);
956			icl_conn_fail(ic);
957			icl_pdu_free(request);
958			return;
959		}
960		icl_pdu_free(request);
961	}
962}
963
964static void
965icl_send_thread(void *arg)
966{
967	struct icl_conn *ic;
968	struct icl_pdu_stailq queue;
969
970	ic = arg;
971
972	STAILQ_INIT(&queue);
973
974	ICL_CONN_LOCK(ic);
975	ic->ic_send_running = true;
976
977	for (;;) {
978		if (ic->ic_disconnecting) {
979			//ICL_DEBUG("terminating");
980			break;
981		}
982
983		for (;;) {
984			/*
985			 * If the local queue is empty, populate it from
986			 * the main one.  This way the icl_conn_send_pdus()
987			 * can go through all the queued PDUs without holding
988			 * any locks.
989			 */
990			if (STAILQ_EMPTY(&queue))
991				STAILQ_SWAP(&ic->ic_to_send, &queue, icl_pdu);
992
993			ic->ic_check_send_space = false;
994			ICL_CONN_UNLOCK(ic);
995			icl_conn_send_pdus(ic, &queue);
996			ICL_CONN_LOCK(ic);
997
998			/*
999			 * The icl_soupcall_send() was called since the last
1000			 * call to sbspace(); go around;
1001			 */
1002			if (ic->ic_check_send_space)
1003				continue;
1004
1005			/*
1006			 * Local queue is empty, but we still have PDUs
1007			 * in the main one; go around.
1008			 */
1009			if (STAILQ_EMPTY(&queue) &&
1010			    !STAILQ_EMPTY(&ic->ic_to_send))
1011				continue;
1012
1013			/*
1014			 * There might be some stuff in the local queue,
1015			 * which didn't get sent due to not having enough send
1016			 * space.  Wait for socket upcall.
1017			 */
1018			break;
1019		}
1020
1021		cv_wait(&ic->ic_send_cv, ic->ic_lock);
1022	}
1023
1024	/*
1025	 * We're exiting; move PDUs back to the main queue, so they can
1026	 * get freed properly.  At this point ordering doesn't matter.
1027	 */
1028	STAILQ_CONCAT(&ic->ic_to_send, &queue);
1029
1030	ic->ic_send_running = false;
1031	ICL_CONN_UNLOCK(ic);
1032	kthread_exit();
1033}
1034
1035static int
1036icl_soupcall_send(struct socket *so, void *arg, int waitflag)
1037{
1038	struct icl_conn *ic;
1039
1040	if (!sowriteable(so))
1041		return (SU_OK);
1042
1043	ic = arg;
1044
1045	ICL_CONN_LOCK(ic);
1046	ic->ic_check_send_space = true;
1047	ICL_CONN_UNLOCK(ic);
1048
1049	cv_signal(&ic->ic_send_cv);
1050
1051	return (SU_OK);
1052}
1053
1054int
1055icl_pdu_append_data(struct icl_pdu *request, const void *addr, size_t len,
1056    int flags)
1057{
1058	struct mbuf *mb, *newmb;
1059	size_t copylen, off = 0;
1060
1061	KASSERT(len > 0, ("len == 0"));
1062
1063	newmb = m_getm2(NULL, len, flags, MT_DATA, M_PKTHDR);
1064	if (newmb == NULL) {
1065		ICL_WARN("failed to allocate mbuf for %zd bytes", len);
1066		return (ENOMEM);
1067	}
1068
1069	for (mb = newmb; mb != NULL; mb = mb->m_next) {
1070		copylen = min(M_TRAILINGSPACE(mb), len - off);
1071		memcpy(mtod(mb, char *), (const char *)addr + off, copylen);
1072		mb->m_len = copylen;
1073		off += copylen;
1074	}
1075	KASSERT(off == len, ("%s: off != len", __func__));
1076
1077	if (request->ip_data_mbuf == NULL) {
1078		request->ip_data_mbuf = newmb;
1079		request->ip_data_len = len;
1080	} else {
1081		m_cat(request->ip_data_mbuf, newmb);
1082		request->ip_data_len += len;
1083	}
1084
1085	return (0);
1086}
1087
1088void
1089icl_pdu_get_data(struct icl_pdu *ip, size_t off, void *addr, size_t len)
1090{
1091
1092	m_copydata(ip->ip_data_mbuf, off, len, addr);
1093}
1094
1095void
1096icl_pdu_queue(struct icl_pdu *ip)
1097{
1098	struct icl_conn *ic;
1099
1100	ic = ip->ip_conn;
1101
1102	ICL_CONN_LOCK_ASSERT(ic);
1103
1104	if (ic->ic_disconnecting || ic->ic_socket == NULL) {
1105		ICL_DEBUG("icl_pdu_queue on closed connection");
1106		icl_pdu_free(ip);
1107		return;
1108	}
1109
1110	if (!STAILQ_EMPTY(&ic->ic_to_send)) {
1111		STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next);
1112		/*
1113		 * If the queue is not empty, someone else had already
1114		 * signaled the send thread; no need to do that again,
1115		 * just return.
1116		 */
1117		return;
1118	}
1119
1120	STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next);
1121	cv_signal(&ic->ic_send_cv);
1122}
1123
1124struct icl_conn *
1125icl_conn_new(const char *name, struct mtx *lock)
1126{
1127	struct icl_conn *ic;
1128
1129	refcount_acquire(&icl_ncons);
1130
1131	ic = uma_zalloc(icl_conn_zone, M_WAITOK | M_ZERO);
1132
1133	STAILQ_INIT(&ic->ic_to_send);
1134	ic->ic_lock = lock;
1135	cv_init(&ic->ic_send_cv, "icl_tx");
1136	cv_init(&ic->ic_receive_cv, "icl_rx");
1137#ifdef DIAGNOSTIC
1138	refcount_init(&ic->ic_outstanding_pdus, 0);
1139#endif
1140	ic->ic_max_data_segment_length = ICL_MAX_DATA_SEGMENT_LENGTH;
1141	ic->ic_name = name;
1142
1143	return (ic);
1144}
1145
1146void
1147icl_conn_free(struct icl_conn *ic)
1148{
1149
1150	cv_destroy(&ic->ic_send_cv);
1151	cv_destroy(&ic->ic_receive_cv);
1152	uma_zfree(icl_conn_zone, ic);
1153	refcount_release(&icl_ncons);
1154}
1155
1156static int
1157icl_conn_start(struct icl_conn *ic)
1158{
1159	size_t minspace;
1160	struct sockopt opt;
1161	int error, one = 1;
1162
1163	ICL_CONN_LOCK(ic);
1164
1165	/*
1166	 * XXX: Ugly hack.
1167	 */
1168	if (ic->ic_socket == NULL) {
1169		ICL_CONN_UNLOCK(ic);
1170		return (EINVAL);
1171	}
1172
1173	ic->ic_receive_state = ICL_CONN_STATE_BHS;
1174	ic->ic_receive_len = sizeof(struct iscsi_bhs);
1175	ic->ic_disconnecting = false;
1176
1177	ICL_CONN_UNLOCK(ic);
1178
1179	/*
1180	 * For sendspace, this is required because the current code cannot
1181	 * send a PDU in pieces; thus, the minimum buffer size is equal
1182	 * to the maximum PDU size.  "+4" is to account for possible padding.
1183	 *
1184	 * What we should actually do here is to use autoscaling, but set
1185	 * some minimal buffer size to "minspace".  I don't know a way to do
1186	 * that, though.
1187	 */
1188	minspace = sizeof(struct iscsi_bhs) + ic->ic_max_data_segment_length +
1189	    ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4;
1190	if (sendspace < minspace) {
1191		ICL_WARN("kern.icl.sendspace too low; must be at least %zd",
1192		    minspace);
1193		sendspace = minspace;
1194	}
1195	if (recvspace < minspace) {
1196		ICL_WARN("kern.icl.recvspace too low; must be at least %zd",
1197		    minspace);
1198		recvspace = minspace;
1199	}
1200
1201	error = soreserve(ic->ic_socket, sendspace, recvspace);
1202	if (error != 0) {
1203		ICL_WARN("soreserve failed with error %d", error);
1204		icl_conn_close(ic);
1205		return (error);
1206	}
1207
1208	/*
1209	 * Disable Nagle.
1210	 */
1211	bzero(&opt, sizeof(opt));
1212	opt.sopt_dir = SOPT_SET;
1213	opt.sopt_level = IPPROTO_TCP;
1214	opt.sopt_name = TCP_NODELAY;
1215	opt.sopt_val = &one;
1216	opt.sopt_valsize = sizeof(one);
1217	error = sosetopt(ic->ic_socket, &opt);
1218	if (error != 0) {
1219		ICL_WARN("disabling TCP_NODELAY failed with error %d", error);
1220		icl_conn_close(ic);
1221		return (error);
1222	}
1223
1224	/*
1225	 * Start threads.
1226	 */
1227	error = kthread_add(icl_send_thread, ic, NULL, NULL, 0, 0, "%stx",
1228	    ic->ic_name);
1229	if (error != 0) {
1230		ICL_WARN("kthread_add(9) failed with error %d", error);
1231		icl_conn_close(ic);
1232		return (error);
1233	}
1234
1235	error = kthread_add(icl_receive_thread, ic, NULL, NULL, 0, 0, "%srx",
1236	    ic->ic_name);
1237	if (error != 0) {
1238		ICL_WARN("kthread_add(9) failed with error %d", error);
1239		icl_conn_close(ic);
1240		return (error);
1241	}
1242
1243	/*
1244	 * Register socket upcall, to get notified about incoming PDUs
1245	 * and free space to send outgoing ones.
1246	 */
1247	SOCKBUF_LOCK(&ic->ic_socket->so_snd);
1248	soupcall_set(ic->ic_socket, SO_SND, icl_soupcall_send, ic);
1249	SOCKBUF_UNLOCK(&ic->ic_socket->so_snd);
1250	SOCKBUF_LOCK(&ic->ic_socket->so_rcv);
1251	soupcall_set(ic->ic_socket, SO_RCV, icl_soupcall_receive, ic);
1252	SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv);
1253
1254	return (0);
1255}
1256
1257int
1258icl_conn_handoff(struct icl_conn *ic, int fd)
1259{
1260	struct file *fp;
1261	struct socket *so;
1262	cap_rights_t rights;
1263	int error;
1264
1265	ICL_CONN_LOCK_ASSERT_NOT(ic);
1266
1267	/*
1268	 * Steal the socket from userland.
1269	 */
1270	error = fget(curthread, fd,
1271	    cap_rights_init(&rights, CAP_SOCK_CLIENT), &fp);
1272	if (error != 0)
1273		return (error);
1274	if (fp->f_type != DTYPE_SOCKET) {
1275		fdrop(fp, curthread);
1276		return (EINVAL);
1277	}
1278	so = fp->f_data;
1279	if (so->so_type != SOCK_STREAM) {
1280		fdrop(fp, curthread);
1281		return (EINVAL);
1282	}
1283
1284	ICL_CONN_LOCK(ic);
1285
1286	if (ic->ic_socket != NULL) {
1287		ICL_CONN_UNLOCK(ic);
1288		fdrop(fp, curthread);
1289		return (EBUSY);
1290	}
1291
1292	ic->ic_socket = fp->f_data;
1293	fp->f_ops = &badfileops;
1294	fp->f_data = NULL;
1295	fdrop(fp, curthread);
1296	ICL_CONN_UNLOCK(ic);
1297
1298	error = icl_conn_start(ic);
1299
1300	return (error);
1301}
1302
1303void
1304icl_conn_shutdown(struct icl_conn *ic)
1305{
1306	ICL_CONN_LOCK_ASSERT_NOT(ic);
1307
1308	ICL_CONN_LOCK(ic);
1309	if (ic->ic_socket == NULL) {
1310		ICL_CONN_UNLOCK(ic);
1311		return;
1312	}
1313	ICL_CONN_UNLOCK(ic);
1314
1315	soshutdown(ic->ic_socket, SHUT_RDWR);
1316}
1317
1318void
1319icl_conn_close(struct icl_conn *ic)
1320{
1321	struct icl_pdu *pdu;
1322
1323	ICL_CONN_LOCK_ASSERT_NOT(ic);
1324
1325	ICL_CONN_LOCK(ic);
1326	if (ic->ic_socket == NULL) {
1327		ICL_CONN_UNLOCK(ic);
1328		return;
1329	}
1330
1331	/*
1332	 * Deregister socket upcalls.
1333	 */
1334	ICL_CONN_UNLOCK(ic);
1335	SOCKBUF_LOCK(&ic->ic_socket->so_snd);
1336	if (ic->ic_socket->so_snd.sb_upcall != NULL)
1337		soupcall_clear(ic->ic_socket, SO_SND);
1338	SOCKBUF_UNLOCK(&ic->ic_socket->so_snd);
1339	SOCKBUF_LOCK(&ic->ic_socket->so_rcv);
1340	if (ic->ic_socket->so_rcv.sb_upcall != NULL)
1341		soupcall_clear(ic->ic_socket, SO_RCV);
1342	SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv);
1343	ICL_CONN_LOCK(ic);
1344
1345	ic->ic_disconnecting = true;
1346
1347	/*
1348	 * Wake up the threads, so they can properly terminate.
1349	 */
1350	cv_signal(&ic->ic_receive_cv);
1351	cv_signal(&ic->ic_send_cv);
1352	while (ic->ic_receive_running || ic->ic_send_running) {
1353		//ICL_DEBUG("waiting for send/receive threads to terminate");
1354		ICL_CONN_UNLOCK(ic);
1355		cv_signal(&ic->ic_receive_cv);
1356		cv_signal(&ic->ic_send_cv);
1357		pause("icl_close", 1 * hz);
1358		ICL_CONN_LOCK(ic);
1359	}
1360	//ICL_DEBUG("send/receive threads terminated");
1361
1362	ICL_CONN_UNLOCK(ic);
1363	soclose(ic->ic_socket);
1364	ICL_CONN_LOCK(ic);
1365	ic->ic_socket = NULL;
1366
1367	if (ic->ic_receive_pdu != NULL) {
1368		//ICL_DEBUG("freeing partially received PDU");
1369		icl_pdu_free(ic->ic_receive_pdu);
1370		ic->ic_receive_pdu = NULL;
1371	}
1372
1373	/*
1374	 * Remove any outstanding PDUs from the send queue.
1375	 */
1376	while (!STAILQ_EMPTY(&ic->ic_to_send)) {
1377		pdu = STAILQ_FIRST(&ic->ic_to_send);
1378		STAILQ_REMOVE_HEAD(&ic->ic_to_send, ip_next);
1379		icl_pdu_free(pdu);
1380	}
1381
1382	KASSERT(STAILQ_EMPTY(&ic->ic_to_send),
1383	    ("destroying session with non-empty send queue"));
1384#ifdef DIAGNOSTIC
1385	KASSERT(ic->ic_outstanding_pdus == 0,
1386	    ("destroying session with %d outstanding PDUs",
1387	     ic->ic_outstanding_pdus));
1388#endif
1389	ICL_CONN_UNLOCK(ic);
1390}
1391
1392bool
1393icl_conn_connected(struct icl_conn *ic)
1394{
1395	ICL_CONN_LOCK_ASSERT_NOT(ic);
1396
1397	ICL_CONN_LOCK(ic);
1398	if (ic->ic_socket == NULL) {
1399		ICL_CONN_UNLOCK(ic);
1400		return (false);
1401	}
1402	if (ic->ic_socket->so_error != 0) {
1403		ICL_CONN_UNLOCK(ic);
1404		return (false);
1405	}
1406	ICL_CONN_UNLOCK(ic);
1407	return (true);
1408}
1409
1410#ifdef ICL_KERNEL_PROXY
1411int
1412icl_conn_handoff_sock(struct icl_conn *ic, struct socket *so)
1413{
1414	int error;
1415
1416	ICL_CONN_LOCK_ASSERT_NOT(ic);
1417
1418	if (so->so_type != SOCK_STREAM)
1419		return (EINVAL);
1420
1421	ICL_CONN_LOCK(ic);
1422	if (ic->ic_socket != NULL) {
1423		ICL_CONN_UNLOCK(ic);
1424		return (EBUSY);
1425	}
1426	ic->ic_socket = so;
1427	ICL_CONN_UNLOCK(ic);
1428
1429	error = icl_conn_start(ic);
1430
1431	return (error);
1432}
1433#endif /* ICL_KERNEL_PROXY */
1434
1435static int
1436icl_unload(void)
1437{
1438
1439	if (icl_ncons != 0)
1440		return (EBUSY);
1441
1442	uma_zdestroy(icl_conn_zone);
1443	uma_zdestroy(icl_pdu_zone);
1444
1445	return (0);
1446}
1447
1448static void
1449icl_load(void)
1450{
1451
1452	icl_conn_zone = uma_zcreate("icl_conn",
1453	    sizeof(struct icl_conn), NULL, NULL, NULL, NULL,
1454	    UMA_ALIGN_PTR, 0);
1455	icl_pdu_zone = uma_zcreate("icl_pdu",
1456	    sizeof(struct icl_pdu), NULL, NULL, NULL, NULL,
1457	    UMA_ALIGN_PTR, 0);
1458
1459	refcount_init(&icl_ncons, 0);
1460}
1461
1462static int
1463icl_modevent(module_t mod, int what, void *arg)
1464{
1465
1466	switch (what) {
1467	case MOD_LOAD:
1468		icl_load();
1469		return (0);
1470	case MOD_UNLOAD:
1471		return (icl_unload());
1472	default:
1473		return (EINVAL);
1474	}
1475}
1476
1477moduledata_t icl_data = {
1478	"icl",
1479	icl_modevent,
1480	0
1481};
1482
1483DECLARE_MODULE(icl, icl_data, SI_SUB_DRIVERS, SI_ORDER_FIRST);
1484MODULE_VERSION(icl, 1);
1485