icl.c revision 280258
1/*-
2 * Copyright (c) 2012 The FreeBSD Foundation
3 * All rights reserved.
4 *
5 * This software was developed by Edward Tomasz Napierala under sponsorship
6 * from the FreeBSD Foundation.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 */
30
31/*
32 * iSCSI Common Layer.  It's used by both the initiator and target to send
33 * and receive iSCSI PDUs.
34 */
35
36#include <sys/cdefs.h>
37__FBSDID("$FreeBSD: stable/10/sys/dev/iscsi/icl.c 280258 2015-03-19 13:37:36Z rwatson $");
38
39#include <sys/param.h>
40#include <sys/capsicum.h>
41#include <sys/condvar.h>
42#include <sys/conf.h>
43#include <sys/file.h>
44#include <sys/kernel.h>
45#include <sys/kthread.h>
46#include <sys/lock.h>
47#include <sys/mbuf.h>
48#include <sys/mutex.h>
49#include <sys/module.h>
50#include <sys/protosw.h>
51#include <sys/socket.h>
52#include <sys/socketvar.h>
53#include <sys/sysctl.h>
54#include <sys/systm.h>
55#include <sys/sx.h>
56#include <sys/uio.h>
57#include <vm/uma.h>
58#include <netinet/in.h>
59#include <netinet/tcp.h>
60
61#include <dev/iscsi/icl.h>
62#include <dev/iscsi/iscsi_proto.h>
63
64SYSCTL_NODE(_kern, OID_AUTO, icl, CTLFLAG_RD, 0, "iSCSI Common Layer");
65static int debug = 1;
66TUNABLE_INT("kern.icl.debug", &debug);
67SYSCTL_INT(_kern_icl, OID_AUTO, debug, CTLFLAG_RWTUN,
68    &debug, 0, "Enable debug messages");
69static int coalesce = 1;
70TUNABLE_INT("kern.icl.coalesce", &coalesce);
71SYSCTL_INT(_kern_icl, OID_AUTO, coalesce, CTLFLAG_RWTUN,
72    &coalesce, 0, "Try to coalesce PDUs before sending");
73static int partial_receive_len = 128 * 1024;
74TUNABLE_INT("kern.icl.partial_receive_len", &partial_receive_len);
75SYSCTL_INT(_kern_icl, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN,
76    &partial_receive_len, 0, "Minimum read size for partially received "
77    "data segment");
78static int sendspace = 1048576;
79TUNABLE_INT("kern.icl.sendspace", &sendspace);
80SYSCTL_INT(_kern_icl, OID_AUTO, sendspace, CTLFLAG_RWTUN,
81    &sendspace, 0, "Default send socket buffer size");
82static int recvspace = 1048576;
83TUNABLE_INT("kern.icl.recvspace", &recvspace);
84SYSCTL_INT(_kern_icl, OID_AUTO, recvspace, CTLFLAG_RWTUN,
85    &recvspace, 0, "Default receive socket buffer size");
86
87static uma_zone_t icl_conn_zone;
88static uma_zone_t icl_pdu_zone;
89
90static volatile u_int	icl_ncons;
91
92#define	ICL_DEBUG(X, ...)						\
93	do {								\
94		if (debug > 1)						\
95			printf("%s: " X "\n", __func__, ## __VA_ARGS__);\
96	} while (0)
97
98#define	ICL_WARN(X, ...)						\
99	do {								\
100		if (debug > 0) {					\
101			printf("WARNING: %s: " X "\n",			\
102			    __func__, ## __VA_ARGS__);			\
103		}							\
104	} while (0)
105
106#define ICL_CONN_LOCK(X)		mtx_lock(X->ic_lock)
107#define ICL_CONN_UNLOCK(X)		mtx_unlock(X->ic_lock)
108#define ICL_CONN_LOCK_ASSERT(X)		mtx_assert(X->ic_lock, MA_OWNED)
109#define ICL_CONN_LOCK_ASSERT_NOT(X)	mtx_assert(X->ic_lock, MA_NOTOWNED)
110
111STAILQ_HEAD(icl_pdu_stailq, icl_pdu);
112
113static void
114icl_conn_fail(struct icl_conn *ic)
115{
116	if (ic->ic_socket == NULL)
117		return;
118
119	/*
120	 * XXX
121	 */
122	ic->ic_socket->so_error = EDOOFUS;
123	(ic->ic_error)(ic);
124}
125
126static struct mbuf *
127icl_conn_receive(struct icl_conn *ic, size_t len)
128{
129	struct uio uio;
130	struct socket *so;
131	struct mbuf *m;
132	int error, flags;
133
134	so = ic->ic_socket;
135
136	memset(&uio, 0, sizeof(uio));
137	uio.uio_resid = len;
138
139	flags = MSG_DONTWAIT;
140	error = soreceive(so, NULL, &uio, &m, NULL, &flags);
141	if (error != 0) {
142		ICL_DEBUG("soreceive error %d", error);
143		return (NULL);
144	}
145	if (uio.uio_resid != 0) {
146		m_freem(m);
147		ICL_DEBUG("short read");
148		return (NULL);
149	}
150
151	return (m);
152}
153
154static struct icl_pdu *
155icl_pdu_new_empty(struct icl_conn *ic, int flags)
156{
157	struct icl_pdu *ip;
158
159#ifdef DIAGNOSTIC
160	refcount_acquire(&ic->ic_outstanding_pdus);
161#endif
162	ip = uma_zalloc(icl_pdu_zone, flags | M_ZERO);
163	if (ip == NULL) {
164		ICL_WARN("failed to allocate %zd bytes", sizeof(*ip));
165#ifdef DIAGNOSTIC
166		refcount_release(&ic->ic_outstanding_pdus);
167#endif
168		return (NULL);
169	}
170
171	ip->ip_conn = ic;
172
173	return (ip);
174}
175
176void
177icl_pdu_free(struct icl_pdu *ip)
178{
179	struct icl_conn *ic;
180
181	ic = ip->ip_conn;
182
183	m_freem(ip->ip_bhs_mbuf);
184	m_freem(ip->ip_ahs_mbuf);
185	m_freem(ip->ip_data_mbuf);
186	uma_zfree(icl_pdu_zone, ip);
187#ifdef DIAGNOSTIC
188	refcount_release(&ic->ic_outstanding_pdus);
189#endif
190}
191
192/*
193 * Allocate icl_pdu with empty BHS to fill up by the caller.
194 */
195struct icl_pdu *
196icl_pdu_new(struct icl_conn *ic, int flags)
197{
198	struct icl_pdu *ip;
199
200	ip = icl_pdu_new_empty(ic, flags);
201	if (ip == NULL)
202		return (NULL);
203
204	ip->ip_bhs_mbuf = m_getm2(NULL, sizeof(struct iscsi_bhs),
205	    flags, MT_DATA, M_PKTHDR);
206	if (ip->ip_bhs_mbuf == NULL) {
207		ICL_WARN("failed to allocate %zd bytes", sizeof(*ip));
208		icl_pdu_free(ip);
209		return (NULL);
210	}
211	ip->ip_bhs = mtod(ip->ip_bhs_mbuf, struct iscsi_bhs *);
212	memset(ip->ip_bhs, 0, sizeof(struct iscsi_bhs));
213	ip->ip_bhs_mbuf->m_len = sizeof(struct iscsi_bhs);
214
215	return (ip);
216}
217
218static int
219icl_pdu_ahs_length(const struct icl_pdu *request)
220{
221
222	return (request->ip_bhs->bhs_total_ahs_len * 4);
223}
224
225size_t
226icl_pdu_data_segment_length(const struct icl_pdu *request)
227{
228	uint32_t len = 0;
229
230	len += request->ip_bhs->bhs_data_segment_len[0];
231	len <<= 8;
232	len += request->ip_bhs->bhs_data_segment_len[1];
233	len <<= 8;
234	len += request->ip_bhs->bhs_data_segment_len[2];
235
236	return (len);
237}
238
239static void
240icl_pdu_set_data_segment_length(struct icl_pdu *response, uint32_t len)
241{
242
243	response->ip_bhs->bhs_data_segment_len[2] = len;
244	response->ip_bhs->bhs_data_segment_len[1] = len >> 8;
245	response->ip_bhs->bhs_data_segment_len[0] = len >> 16;
246}
247
248static size_t
249icl_pdu_padding(const struct icl_pdu *ip)
250{
251
252	if ((ip->ip_data_len % 4) != 0)
253		return (4 - (ip->ip_data_len % 4));
254
255	return (0);
256}
257
258static size_t
259icl_pdu_size(const struct icl_pdu *response)
260{
261	size_t len;
262
263	KASSERT(response->ip_ahs_len == 0, ("responding with AHS"));
264
265	len = sizeof(struct iscsi_bhs) + response->ip_data_len +
266	    icl_pdu_padding(response);
267	if (response->ip_conn->ic_header_crc32c)
268		len += ISCSI_HEADER_DIGEST_SIZE;
269	if (response->ip_data_len != 0 && response->ip_conn->ic_data_crc32c)
270		len += ISCSI_DATA_DIGEST_SIZE;
271
272	return (len);
273}
274
275static int
276icl_pdu_receive_bhs(struct icl_pdu *request, size_t *availablep)
277{
278	struct mbuf *m;
279
280	m = icl_conn_receive(request->ip_conn, sizeof(struct iscsi_bhs));
281	if (m == NULL) {
282		ICL_DEBUG("failed to receive BHS");
283		return (-1);
284	}
285
286	request->ip_bhs_mbuf = m_pullup(m, sizeof(struct iscsi_bhs));
287	if (request->ip_bhs_mbuf == NULL) {
288		ICL_WARN("m_pullup failed");
289		return (-1);
290	}
291	request->ip_bhs = mtod(request->ip_bhs_mbuf, struct iscsi_bhs *);
292
293	/*
294	 * XXX: For architectures with strict alignment requirements
295	 * 	we may need to allocate ip_bhs and copy the data into it.
296	 * 	For some reason, though, not doing this doesn't seem
297	 * 	to cause problems; tested on sparc64.
298	 */
299
300	*availablep -= sizeof(struct iscsi_bhs);
301	return (0);
302}
303
304static int
305icl_pdu_receive_ahs(struct icl_pdu *request, size_t *availablep)
306{
307
308	request->ip_ahs_len = icl_pdu_ahs_length(request);
309	if (request->ip_ahs_len == 0)
310		return (0);
311
312	request->ip_ahs_mbuf = icl_conn_receive(request->ip_conn,
313	    request->ip_ahs_len);
314	if (request->ip_ahs_mbuf == NULL) {
315		ICL_DEBUG("failed to receive AHS");
316		return (-1);
317	}
318
319	*availablep -= request->ip_ahs_len;
320	return (0);
321}
322
323static uint32_t
324icl_mbuf_to_crc32c(const struct mbuf *m0)
325{
326	uint32_t digest = 0xffffffff;
327	const struct mbuf *m;
328
329	for (m = m0; m != NULL; m = m->m_next)
330		digest = calculate_crc32c(digest,
331		    mtod(m, const void *), m->m_len);
332
333	digest = digest ^ 0xffffffff;
334
335	return (digest);
336}
337
338static int
339icl_pdu_check_header_digest(struct icl_pdu *request, size_t *availablep)
340{
341	struct mbuf *m;
342	uint32_t received_digest, valid_digest;
343
344	if (request->ip_conn->ic_header_crc32c == false)
345		return (0);
346
347	m = icl_conn_receive(request->ip_conn, ISCSI_HEADER_DIGEST_SIZE);
348	if (m == NULL) {
349		ICL_DEBUG("failed to receive header digest");
350		return (-1);
351	}
352
353	CTASSERT(sizeof(received_digest) == ISCSI_HEADER_DIGEST_SIZE);
354	m_copydata(m, 0, ISCSI_HEADER_DIGEST_SIZE, (void *)&received_digest);
355	m_freem(m);
356
357	*availablep -= ISCSI_HEADER_DIGEST_SIZE;
358
359	/*
360	 * XXX: Handle AHS.
361	 */
362	valid_digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf);
363	if (received_digest != valid_digest) {
364		ICL_WARN("header digest check failed; got 0x%x, "
365		    "should be 0x%x", received_digest, valid_digest);
366		return (-1);
367	}
368
369	return (0);
370}
371
372/*
373 * Return the number of bytes that should be waiting in the receive socket
374 * before icl_pdu_receive_data_segment() gets called.
375 */
376static size_t
377icl_pdu_data_segment_receive_len(const struct icl_pdu *request)
378{
379	size_t len;
380
381	len = icl_pdu_data_segment_length(request);
382	if (len == 0)
383		return (0);
384
385	/*
386	 * Account for the parts of data segment already read from
387	 * the socket buffer.
388	 */
389	KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len"));
390	len -= request->ip_data_len;
391
392	/*
393	 * Don't always wait for the full data segment to be delivered
394	 * to the socket; this might badly affect performance due to
395	 * TCP window scaling.
396	 */
397	if (len > partial_receive_len) {
398#if 0
399		ICL_DEBUG("need %zd bytes of data, limiting to %zd",
400		    len, partial_receive_len));
401#endif
402		len = partial_receive_len;
403
404		return (len);
405	}
406
407	/*
408	 * Account for padding.  Note that due to the way code is written,
409	 * the icl_pdu_receive_data_segment() must always receive padding
410	 * along with the last part of data segment, because it would be
411	 * impossible to tell whether we've already received the full data
412	 * segment including padding, or without it.
413	 */
414	if ((len % 4) != 0)
415		len += 4 - (len % 4);
416
417#if 0
418	ICL_DEBUG("need %zd bytes of data", len));
419#endif
420
421	return (len);
422}
423
424static int
425icl_pdu_receive_data_segment(struct icl_pdu *request,
426    size_t *availablep, bool *more_neededp)
427{
428	struct icl_conn *ic;
429	size_t len, padding = 0;
430	struct mbuf *m;
431
432	ic = request->ip_conn;
433
434	*more_neededp = false;
435	ic->ic_receive_len = 0;
436
437	len = icl_pdu_data_segment_length(request);
438	if (len == 0)
439		return (0);
440
441	if ((len % 4) != 0)
442		padding = 4 - (len % 4);
443
444	/*
445	 * Account for already received parts of data segment.
446	 */
447	KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len"));
448	len -= request->ip_data_len;
449
450	if (len + padding > *availablep) {
451		/*
452		 * Not enough data in the socket buffer.  Receive as much
453		 * as we can.  Don't receive padding, since, obviously, it's
454		 * not the end of data segment yet.
455		 */
456#if 0
457		ICL_DEBUG("limited from %zd to %zd",
458		    len + padding, *availablep - padding));
459#endif
460		len = *availablep - padding;
461		*more_neededp = true;
462		padding = 0;
463	}
464
465	/*
466	 * Must not try to receive padding without at least one byte
467	 * of actual data segment.
468	 */
469	if (len > 0) {
470		m = icl_conn_receive(request->ip_conn, len + padding);
471		if (m == NULL) {
472			ICL_DEBUG("failed to receive data segment");
473			return (-1);
474		}
475
476		if (request->ip_data_mbuf == NULL)
477			request->ip_data_mbuf = m;
478		else
479			m_cat(request->ip_data_mbuf, m);
480
481		request->ip_data_len += len;
482		*availablep -= len + padding;
483	} else
484		ICL_DEBUG("len 0");
485
486	if (*more_neededp)
487		ic->ic_receive_len =
488		    icl_pdu_data_segment_receive_len(request);
489
490	return (0);
491}
492
493static int
494icl_pdu_check_data_digest(struct icl_pdu *request, size_t *availablep)
495{
496	struct mbuf *m;
497	uint32_t received_digest, valid_digest;
498
499	if (request->ip_conn->ic_data_crc32c == false)
500		return (0);
501
502	if (request->ip_data_len == 0)
503		return (0);
504
505	m = icl_conn_receive(request->ip_conn, ISCSI_DATA_DIGEST_SIZE);
506	if (m == NULL) {
507		ICL_DEBUG("failed to receive data digest");
508		return (-1);
509	}
510
511	CTASSERT(sizeof(received_digest) == ISCSI_DATA_DIGEST_SIZE);
512	m_copydata(m, 0, ISCSI_DATA_DIGEST_SIZE, (void *)&received_digest);
513	m_freem(m);
514
515	*availablep -= ISCSI_DATA_DIGEST_SIZE;
516
517	/*
518	 * Note that ip_data_mbuf also contains padding; since digest
519	 * calculation is supposed to include that, we iterate over
520	 * the entire ip_data_mbuf chain, not just ip_data_len bytes of it.
521	 */
522	valid_digest = icl_mbuf_to_crc32c(request->ip_data_mbuf);
523	if (received_digest != valid_digest) {
524		ICL_WARN("data digest check failed; got 0x%x, "
525		    "should be 0x%x", received_digest, valid_digest);
526		return (-1);
527	}
528
529	return (0);
530}
531
532/*
533 * Somewhat contrary to the name, this attempts to receive only one
534 * "part" of PDU at a time; call it repeatedly until it returns non-NULL.
535 */
536static struct icl_pdu *
537icl_conn_receive_pdu(struct icl_conn *ic, size_t *availablep)
538{
539	struct icl_pdu *request;
540	struct socket *so;
541	size_t len;
542	int error;
543	bool more_needed;
544
545	so = ic->ic_socket;
546
547	if (ic->ic_receive_state == ICL_CONN_STATE_BHS) {
548		KASSERT(ic->ic_receive_pdu == NULL,
549		    ("ic->ic_receive_pdu != NULL"));
550		request = icl_pdu_new_empty(ic, M_NOWAIT);
551		if (request == NULL) {
552			ICL_DEBUG("failed to allocate PDU; "
553			    "dropping connection");
554			icl_conn_fail(ic);
555			return (NULL);
556		}
557		ic->ic_receive_pdu = request;
558	} else {
559		KASSERT(ic->ic_receive_pdu != NULL,
560		    ("ic->ic_receive_pdu == NULL"));
561		request = ic->ic_receive_pdu;
562	}
563
564	if (*availablep < ic->ic_receive_len) {
565#if 0
566		ICL_DEBUG("not enough data; need %zd, "
567		    "have %zd", ic->ic_receive_len, *availablep);
568#endif
569		return (NULL);
570	}
571
572	switch (ic->ic_receive_state) {
573	case ICL_CONN_STATE_BHS:
574		//ICL_DEBUG("receiving BHS");
575		error = icl_pdu_receive_bhs(request, availablep);
576		if (error != 0) {
577			ICL_DEBUG("failed to receive BHS; "
578			    "dropping connection");
579			break;
580		}
581
582		/*
583		 * We don't enforce any limit for AHS length;
584		 * its length is stored in 8 bit field.
585		 */
586
587		len = icl_pdu_data_segment_length(request);
588		if (len > ic->ic_max_data_segment_length) {
589			ICL_WARN("received data segment "
590			    "length %zd is larger than negotiated "
591			    "MaxDataSegmentLength %zd; "
592			    "dropping connection",
593			    len, ic->ic_max_data_segment_length);
594			error = EINVAL;
595			break;
596		}
597
598		ic->ic_receive_state = ICL_CONN_STATE_AHS;
599		ic->ic_receive_len = icl_pdu_ahs_length(request);
600		break;
601
602	case ICL_CONN_STATE_AHS:
603		//ICL_DEBUG("receiving AHS");
604		error = icl_pdu_receive_ahs(request, availablep);
605		if (error != 0) {
606			ICL_DEBUG("failed to receive AHS; "
607			    "dropping connection");
608			break;
609		}
610		ic->ic_receive_state = ICL_CONN_STATE_HEADER_DIGEST;
611		if (ic->ic_header_crc32c == false)
612			ic->ic_receive_len = 0;
613		else
614			ic->ic_receive_len = ISCSI_HEADER_DIGEST_SIZE;
615		break;
616
617	case ICL_CONN_STATE_HEADER_DIGEST:
618		//ICL_DEBUG("receiving header digest");
619		error = icl_pdu_check_header_digest(request, availablep);
620		if (error != 0) {
621			ICL_DEBUG("header digest failed; "
622			    "dropping connection");
623			break;
624		}
625
626		ic->ic_receive_state = ICL_CONN_STATE_DATA;
627		ic->ic_receive_len =
628		    icl_pdu_data_segment_receive_len(request);
629		break;
630
631	case ICL_CONN_STATE_DATA:
632		//ICL_DEBUG("receiving data segment");
633		error = icl_pdu_receive_data_segment(request, availablep,
634		    &more_needed);
635		if (error != 0) {
636			ICL_DEBUG("failed to receive data segment;"
637			    "dropping connection");
638			break;
639		}
640
641		if (more_needed)
642			break;
643
644		ic->ic_receive_state = ICL_CONN_STATE_DATA_DIGEST;
645		if (request->ip_data_len == 0 || ic->ic_data_crc32c == false)
646			ic->ic_receive_len = 0;
647		else
648			ic->ic_receive_len = ISCSI_DATA_DIGEST_SIZE;
649		break;
650
651	case ICL_CONN_STATE_DATA_DIGEST:
652		//ICL_DEBUG("receiving data digest");
653		error = icl_pdu_check_data_digest(request, availablep);
654		if (error != 0) {
655			ICL_DEBUG("data digest failed; "
656			    "dropping connection");
657			break;
658		}
659
660		/*
661		 * We've received complete PDU; reset the receive state machine
662		 * and return the PDU.
663		 */
664		ic->ic_receive_state = ICL_CONN_STATE_BHS;
665		ic->ic_receive_len = sizeof(struct iscsi_bhs);
666		ic->ic_receive_pdu = NULL;
667		return (request);
668
669	default:
670		panic("invalid ic_receive_state %d\n", ic->ic_receive_state);
671	}
672
673	if (error != 0) {
674		/*
675		 * Don't free the PDU; it's pointed to by ic->ic_receive_pdu
676		 * and will get freed in icl_conn_close().
677		 */
678		icl_conn_fail(ic);
679	}
680
681	return (NULL);
682}
683
684static void
685icl_conn_receive_pdus(struct icl_conn *ic, size_t available)
686{
687	struct icl_pdu *response;
688	struct socket *so;
689
690	so = ic->ic_socket;
691
692	/*
693	 * This can never happen; we're careful to only mess with ic->ic_socket
694	 * pointer when the send/receive threads are not running.
695	 */
696	KASSERT(so != NULL, ("NULL socket"));
697
698	for (;;) {
699		if (ic->ic_disconnecting)
700			return;
701
702		if (so->so_error != 0) {
703			ICL_DEBUG("connection error %d; "
704			    "dropping connection", so->so_error);
705			icl_conn_fail(ic);
706			return;
707		}
708
709		/*
710		 * Loop until we have a complete PDU or there is not enough
711		 * data in the socket buffer.
712		 */
713		if (available < ic->ic_receive_len) {
714#if 0
715			ICL_DEBUG("not enough data; have %zd, "
716			    "need %zd", available,
717			    ic->ic_receive_len);
718#endif
719			return;
720		}
721
722		response = icl_conn_receive_pdu(ic, &available);
723		if (response == NULL)
724			continue;
725
726		if (response->ip_ahs_len > 0) {
727			ICL_WARN("received PDU with unsupported "
728			    "AHS; opcode 0x%x; dropping connection",
729			    response->ip_bhs->bhs_opcode);
730			icl_pdu_free(response);
731			icl_conn_fail(ic);
732			return;
733		}
734
735		(ic->ic_receive)(response);
736	}
737}
738
739static void
740icl_receive_thread(void *arg)
741{
742	struct icl_conn *ic;
743	size_t available;
744	struct socket *so;
745
746	ic = arg;
747	so = ic->ic_socket;
748
749	ICL_CONN_LOCK(ic);
750	ic->ic_receive_running = true;
751	ICL_CONN_UNLOCK(ic);
752
753	for (;;) {
754		if (ic->ic_disconnecting) {
755			//ICL_DEBUG("terminating");
756			break;
757		}
758
759		/*
760		 * Set the low watermark, to be checked by
761		 * soreadable() in icl_soupcall_receive()
762		 * to avoid unneccessary wakeups until there
763		 * is enough data received to read the PDU.
764		 */
765		SOCKBUF_LOCK(&so->so_rcv);
766		available = so->so_rcv.sb_cc;
767		if (available < ic->ic_receive_len) {
768			so->so_rcv.sb_lowat = ic->ic_receive_len;
769			cv_wait(&ic->ic_receive_cv, &so->so_rcv.sb_mtx);
770		} else
771			so->so_rcv.sb_lowat = so->so_rcv.sb_hiwat + 1;
772		SOCKBUF_UNLOCK(&so->so_rcv);
773
774		icl_conn_receive_pdus(ic, available);
775	}
776
777	ICL_CONN_LOCK(ic);
778	ic->ic_receive_running = false;
779	cv_signal(&ic->ic_send_cv);
780	ICL_CONN_UNLOCK(ic);
781	kthread_exit();
782}
783
784static int
785icl_soupcall_receive(struct socket *so, void *arg, int waitflag)
786{
787	struct icl_conn *ic;
788
789	if (!soreadable(so))
790		return (SU_OK);
791
792	ic = arg;
793	cv_signal(&ic->ic_receive_cv);
794	return (SU_OK);
795}
796
797static int
798icl_pdu_finalize(struct icl_pdu *request)
799{
800	size_t padding, pdu_len;
801	uint32_t digest, zero = 0;
802	int ok;
803	struct icl_conn *ic;
804
805	ic = request->ip_conn;
806
807	icl_pdu_set_data_segment_length(request, request->ip_data_len);
808
809	pdu_len = icl_pdu_size(request);
810
811	if (ic->ic_header_crc32c) {
812		digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf);
813		ok = m_append(request->ip_bhs_mbuf, sizeof(digest),
814		    (void *)&digest);
815		if (ok != 1) {
816			ICL_WARN("failed to append header digest");
817			return (1);
818		}
819	}
820
821	if (request->ip_data_len != 0) {
822		padding = icl_pdu_padding(request);
823		if (padding > 0) {
824			ok = m_append(request->ip_data_mbuf, padding,
825			    (void *)&zero);
826			if (ok != 1) {
827				ICL_WARN("failed to append padding");
828				return (1);
829			}
830		}
831
832		if (ic->ic_data_crc32c) {
833			digest = icl_mbuf_to_crc32c(request->ip_data_mbuf);
834
835			ok = m_append(request->ip_data_mbuf, sizeof(digest),
836			    (void *)&digest);
837			if (ok != 1) {
838				ICL_WARN("failed to append data digest");
839				return (1);
840			}
841		}
842
843		m_cat(request->ip_bhs_mbuf, request->ip_data_mbuf);
844		request->ip_data_mbuf = NULL;
845	}
846
847	request->ip_bhs_mbuf->m_pkthdr.len = pdu_len;
848
849	return (0);
850}
851
852static void
853icl_conn_send_pdus(struct icl_conn *ic, struct icl_pdu_stailq *queue)
854{
855	struct icl_pdu *request, *request2;
856	struct socket *so;
857	size_t available, size, size2;
858	int coalesced, error;
859
860	ICL_CONN_LOCK_ASSERT_NOT(ic);
861
862	so = ic->ic_socket;
863
864	SOCKBUF_LOCK(&so->so_snd);
865	/*
866	 * Check how much space do we have for transmit.  We can't just
867	 * call sosend() and retry when we get EWOULDBLOCK or EMSGSIZE,
868	 * as it always frees the mbuf chain passed to it, even in case
869	 * of error.
870	 */
871	available = sbspace(&so->so_snd);
872
873	/*
874	 * Notify the socket upcall that we don't need wakeups
875	 * for the time being.
876	 */
877	so->so_snd.sb_lowat = so->so_snd.sb_hiwat + 1;
878	SOCKBUF_UNLOCK(&so->so_snd);
879
880	while (!STAILQ_EMPTY(queue)) {
881		request = STAILQ_FIRST(queue);
882		size = icl_pdu_size(request);
883		if (available < size) {
884
885			/*
886			 * Set the low watermark, to be checked by
887			 * sowriteable() in icl_soupcall_send()
888			 * to avoid unneccessary wakeups until there
889			 * is enough space for the PDU to fit.
890			 */
891			SOCKBUF_LOCK(&so->so_snd);
892			available = sbspace(&so->so_snd);
893			if (available < size) {
894#if 1
895				ICL_DEBUG("no space to send; "
896				    "have %zd, need %zd",
897				    available, size);
898#endif
899				so->so_snd.sb_lowat = size;
900				SOCKBUF_UNLOCK(&so->so_snd);
901				return;
902			}
903			SOCKBUF_UNLOCK(&so->so_snd);
904		}
905		STAILQ_REMOVE_HEAD(queue, ip_next);
906		error = icl_pdu_finalize(request);
907		if (error != 0) {
908			ICL_DEBUG("failed to finalize PDU; "
909			    "dropping connection");
910			icl_conn_fail(ic);
911			icl_pdu_free(request);
912			return;
913		}
914		if (coalesce) {
915			coalesced = 1;
916			for (;;) {
917				request2 = STAILQ_FIRST(queue);
918				if (request2 == NULL)
919					break;
920				size2 = icl_pdu_size(request2);
921				if (available < size + size2)
922					break;
923				STAILQ_REMOVE_HEAD(queue, ip_next);
924				error = icl_pdu_finalize(request2);
925				if (error != 0) {
926					ICL_DEBUG("failed to finalize PDU; "
927					    "dropping connection");
928					icl_conn_fail(ic);
929					icl_pdu_free(request);
930					icl_pdu_free(request2);
931					return;
932				}
933				m_cat(request->ip_bhs_mbuf, request2->ip_bhs_mbuf);
934				request2->ip_bhs_mbuf = NULL;
935				request->ip_bhs_mbuf->m_pkthdr.len += size2;
936				size += size2;
937				STAILQ_REMOVE_AFTER(queue, request, ip_next);
938				icl_pdu_free(request2);
939				coalesced++;
940			}
941#if 0
942			if (coalesced > 1) {
943				ICL_DEBUG("coalesced %d PDUs into %zd bytes",
944				    coalesced, size);
945			}
946#endif
947		}
948		available -= size;
949		error = sosend(so, NULL, NULL, request->ip_bhs_mbuf,
950		    NULL, MSG_DONTWAIT, curthread);
951		request->ip_bhs_mbuf = NULL; /* Sosend consumes the mbuf. */
952		if (error != 0) {
953			ICL_DEBUG("failed to send PDU, error %d; "
954			    "dropping connection", error);
955			icl_conn_fail(ic);
956			icl_pdu_free(request);
957			return;
958		}
959		icl_pdu_free(request);
960	}
961}
962
963static void
964icl_send_thread(void *arg)
965{
966	struct icl_conn *ic;
967	struct icl_pdu_stailq queue;
968
969	ic = arg;
970
971	STAILQ_INIT(&queue);
972
973	ICL_CONN_LOCK(ic);
974	ic->ic_send_running = true;
975
976	for (;;) {
977		for (;;) {
978			/*
979			 * If the local queue is empty, populate it from
980			 * the main one.  This way the icl_conn_send_pdus()
981			 * can go through all the queued PDUs without holding
982			 * any locks.
983			 */
984			if (STAILQ_EMPTY(&queue))
985				STAILQ_SWAP(&ic->ic_to_send, &queue, icl_pdu);
986
987			ic->ic_check_send_space = false;
988			ICL_CONN_UNLOCK(ic);
989			icl_conn_send_pdus(ic, &queue);
990			ICL_CONN_LOCK(ic);
991
992			/*
993			 * The icl_soupcall_send() was called since the last
994			 * call to sbspace(); go around;
995			 */
996			if (ic->ic_check_send_space)
997				continue;
998
999			/*
1000			 * Local queue is empty, but we still have PDUs
1001			 * in the main one; go around.
1002			 */
1003			if (STAILQ_EMPTY(&queue) &&
1004			    !STAILQ_EMPTY(&ic->ic_to_send))
1005				continue;
1006
1007			/*
1008			 * There might be some stuff in the local queue,
1009			 * which didn't get sent due to not having enough send
1010			 * space.  Wait for socket upcall.
1011			 */
1012			break;
1013		}
1014
1015		if (ic->ic_disconnecting) {
1016			//ICL_DEBUG("terminating");
1017			break;
1018		}
1019
1020		cv_wait(&ic->ic_send_cv, ic->ic_lock);
1021	}
1022
1023	/*
1024	 * We're exiting; move PDUs back to the main queue, so they can
1025	 * get freed properly.  At this point ordering doesn't matter.
1026	 */
1027	STAILQ_CONCAT(&ic->ic_to_send, &queue);
1028
1029	ic->ic_send_running = false;
1030	cv_signal(&ic->ic_send_cv);
1031	ICL_CONN_UNLOCK(ic);
1032	kthread_exit();
1033}
1034
1035static int
1036icl_soupcall_send(struct socket *so, void *arg, int waitflag)
1037{
1038	struct icl_conn *ic;
1039
1040	if (!sowriteable(so))
1041		return (SU_OK);
1042
1043	ic = arg;
1044
1045	ICL_CONN_LOCK(ic);
1046	ic->ic_check_send_space = true;
1047	ICL_CONN_UNLOCK(ic);
1048
1049	cv_signal(&ic->ic_send_cv);
1050
1051	return (SU_OK);
1052}
1053
1054int
1055icl_pdu_append_data(struct icl_pdu *request, const void *addr, size_t len,
1056    int flags)
1057{
1058	struct mbuf *mb, *newmb;
1059	size_t copylen, off = 0;
1060
1061	KASSERT(len > 0, ("len == 0"));
1062
1063	newmb = m_getm2(NULL, len, flags, MT_DATA, M_PKTHDR);
1064	if (newmb == NULL) {
1065		ICL_WARN("failed to allocate mbuf for %zd bytes", len);
1066		return (ENOMEM);
1067	}
1068
1069	for (mb = newmb; mb != NULL; mb = mb->m_next) {
1070		copylen = min(M_TRAILINGSPACE(mb), len - off);
1071		memcpy(mtod(mb, char *), (const char *)addr + off, copylen);
1072		mb->m_len = copylen;
1073		off += copylen;
1074	}
1075	KASSERT(off == len, ("%s: off != len", __func__));
1076
1077	if (request->ip_data_mbuf == NULL) {
1078		request->ip_data_mbuf = newmb;
1079		request->ip_data_len = len;
1080	} else {
1081		m_cat(request->ip_data_mbuf, newmb);
1082		request->ip_data_len += len;
1083	}
1084
1085	return (0);
1086}
1087
1088void
1089icl_pdu_get_data(struct icl_pdu *ip, size_t off, void *addr, size_t len)
1090{
1091
1092	m_copydata(ip->ip_data_mbuf, off, len, addr);
1093}
1094
1095void
1096icl_pdu_queue(struct icl_pdu *ip)
1097{
1098	struct icl_conn *ic;
1099
1100	ic = ip->ip_conn;
1101
1102	ICL_CONN_LOCK_ASSERT(ic);
1103
1104	if (ic->ic_disconnecting || ic->ic_socket == NULL) {
1105		ICL_DEBUG("icl_pdu_queue on closed connection");
1106		icl_pdu_free(ip);
1107		return;
1108	}
1109
1110	if (!STAILQ_EMPTY(&ic->ic_to_send)) {
1111		STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next);
1112		/*
1113		 * If the queue is not empty, someone else had already
1114		 * signaled the send thread; no need to do that again,
1115		 * just return.
1116		 */
1117		return;
1118	}
1119
1120	STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next);
1121	cv_signal(&ic->ic_send_cv);
1122}
1123
1124struct icl_conn *
1125icl_conn_new(const char *name, struct mtx *lock)
1126{
1127	struct icl_conn *ic;
1128
1129	refcount_acquire(&icl_ncons);
1130
1131	ic = uma_zalloc(icl_conn_zone, M_WAITOK | M_ZERO);
1132
1133	STAILQ_INIT(&ic->ic_to_send);
1134	ic->ic_lock = lock;
1135	cv_init(&ic->ic_send_cv, "icl_tx");
1136	cv_init(&ic->ic_receive_cv, "icl_rx");
1137#ifdef DIAGNOSTIC
1138	refcount_init(&ic->ic_outstanding_pdus, 0);
1139#endif
1140	ic->ic_max_data_segment_length = ICL_MAX_DATA_SEGMENT_LENGTH;
1141	ic->ic_name = name;
1142
1143	return (ic);
1144}
1145
1146void
1147icl_conn_free(struct icl_conn *ic)
1148{
1149
1150	cv_destroy(&ic->ic_send_cv);
1151	cv_destroy(&ic->ic_receive_cv);
1152	uma_zfree(icl_conn_zone, ic);
1153	refcount_release(&icl_ncons);
1154}
1155
1156static int
1157icl_conn_start(struct icl_conn *ic)
1158{
1159	size_t minspace;
1160	struct sockopt opt;
1161	int error, one = 1;
1162
1163	ICL_CONN_LOCK(ic);
1164
1165	/*
1166	 * XXX: Ugly hack.
1167	 */
1168	if (ic->ic_socket == NULL) {
1169		ICL_CONN_UNLOCK(ic);
1170		return (EINVAL);
1171	}
1172
1173	ic->ic_receive_state = ICL_CONN_STATE_BHS;
1174	ic->ic_receive_len = sizeof(struct iscsi_bhs);
1175	ic->ic_disconnecting = false;
1176
1177	ICL_CONN_UNLOCK(ic);
1178
1179	/*
1180	 * For sendspace, this is required because the current code cannot
1181	 * send a PDU in pieces; thus, the minimum buffer size is equal
1182	 * to the maximum PDU size.  "+4" is to account for possible padding.
1183	 *
1184	 * What we should actually do here is to use autoscaling, but set
1185	 * some minimal buffer size to "minspace".  I don't know a way to do
1186	 * that, though.
1187	 */
1188	minspace = sizeof(struct iscsi_bhs) + ic->ic_max_data_segment_length +
1189	    ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4;
1190	if (sendspace < minspace) {
1191		ICL_WARN("kern.icl.sendspace too low; must be at least %zd",
1192		    minspace);
1193		sendspace = minspace;
1194	}
1195	if (recvspace < minspace) {
1196		ICL_WARN("kern.icl.recvspace too low; must be at least %zd",
1197		    minspace);
1198		recvspace = minspace;
1199	}
1200
1201	error = soreserve(ic->ic_socket, sendspace, recvspace);
1202	if (error != 0) {
1203		ICL_WARN("soreserve failed with error %d", error);
1204		icl_conn_close(ic);
1205		return (error);
1206	}
1207	ic->ic_socket->so_snd.sb_flags |= SB_AUTOSIZE;
1208	ic->ic_socket->so_rcv.sb_flags |= SB_AUTOSIZE;
1209
1210	/*
1211	 * Disable Nagle.
1212	 */
1213	bzero(&opt, sizeof(opt));
1214	opt.sopt_dir = SOPT_SET;
1215	opt.sopt_level = IPPROTO_TCP;
1216	opt.sopt_name = TCP_NODELAY;
1217	opt.sopt_val = &one;
1218	opt.sopt_valsize = sizeof(one);
1219	error = sosetopt(ic->ic_socket, &opt);
1220	if (error != 0) {
1221		ICL_WARN("disabling TCP_NODELAY failed with error %d", error);
1222		icl_conn_close(ic);
1223		return (error);
1224	}
1225
1226	/*
1227	 * Start threads.
1228	 */
1229	error = kthread_add(icl_send_thread, ic, NULL, NULL, 0, 0, "%stx",
1230	    ic->ic_name);
1231	if (error != 0) {
1232		ICL_WARN("kthread_add(9) failed with error %d", error);
1233		icl_conn_close(ic);
1234		return (error);
1235	}
1236
1237	error = kthread_add(icl_receive_thread, ic, NULL, NULL, 0, 0, "%srx",
1238	    ic->ic_name);
1239	if (error != 0) {
1240		ICL_WARN("kthread_add(9) failed with error %d", error);
1241		icl_conn_close(ic);
1242		return (error);
1243	}
1244
1245	/*
1246	 * Register socket upcall, to get notified about incoming PDUs
1247	 * and free space to send outgoing ones.
1248	 */
1249	SOCKBUF_LOCK(&ic->ic_socket->so_snd);
1250	soupcall_set(ic->ic_socket, SO_SND, icl_soupcall_send, ic);
1251	SOCKBUF_UNLOCK(&ic->ic_socket->so_snd);
1252	SOCKBUF_LOCK(&ic->ic_socket->so_rcv);
1253	soupcall_set(ic->ic_socket, SO_RCV, icl_soupcall_receive, ic);
1254	SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv);
1255
1256	return (0);
1257}
1258
1259int
1260icl_conn_handoff(struct icl_conn *ic, int fd)
1261{
1262	struct file *fp;
1263	struct socket *so;
1264	cap_rights_t rights;
1265	int error;
1266
1267	ICL_CONN_LOCK_ASSERT_NOT(ic);
1268
1269	/*
1270	 * Steal the socket from userland.
1271	 */
1272	error = fget(curthread, fd,
1273	    cap_rights_init(&rights, CAP_SOCK_CLIENT), &fp);
1274	if (error != 0)
1275		return (error);
1276	if (fp->f_type != DTYPE_SOCKET) {
1277		fdrop(fp, curthread);
1278		return (EINVAL);
1279	}
1280	so = fp->f_data;
1281	if (so->so_type != SOCK_STREAM) {
1282		fdrop(fp, curthread);
1283		return (EINVAL);
1284	}
1285
1286	ICL_CONN_LOCK(ic);
1287
1288	if (ic->ic_socket != NULL) {
1289		ICL_CONN_UNLOCK(ic);
1290		fdrop(fp, curthread);
1291		return (EBUSY);
1292	}
1293
1294	ic->ic_socket = fp->f_data;
1295	fp->f_ops = &badfileops;
1296	fp->f_data = NULL;
1297	fdrop(fp, curthread);
1298	ICL_CONN_UNLOCK(ic);
1299
1300	error = icl_conn_start(ic);
1301
1302	return (error);
1303}
1304
1305void
1306icl_conn_close(struct icl_conn *ic)
1307{
1308	struct icl_pdu *pdu;
1309
1310	ICL_CONN_LOCK_ASSERT_NOT(ic);
1311
1312	ICL_CONN_LOCK(ic);
1313	if (ic->ic_socket == NULL) {
1314		ICL_CONN_UNLOCK(ic);
1315		return;
1316	}
1317
1318	/*
1319	 * Deregister socket upcalls.
1320	 */
1321	ICL_CONN_UNLOCK(ic);
1322	SOCKBUF_LOCK(&ic->ic_socket->so_snd);
1323	if (ic->ic_socket->so_snd.sb_upcall != NULL)
1324		soupcall_clear(ic->ic_socket, SO_SND);
1325	SOCKBUF_UNLOCK(&ic->ic_socket->so_snd);
1326	SOCKBUF_LOCK(&ic->ic_socket->so_rcv);
1327	if (ic->ic_socket->so_rcv.sb_upcall != NULL)
1328		soupcall_clear(ic->ic_socket, SO_RCV);
1329	SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv);
1330	ICL_CONN_LOCK(ic);
1331
1332	ic->ic_disconnecting = true;
1333
1334	/*
1335	 * Wake up the threads, so they can properly terminate.
1336	 */
1337	while (ic->ic_receive_running || ic->ic_send_running) {
1338		//ICL_DEBUG("waiting for send/receive threads to terminate");
1339		cv_signal(&ic->ic_receive_cv);
1340		cv_signal(&ic->ic_send_cv);
1341		cv_wait(&ic->ic_send_cv, ic->ic_lock);
1342	}
1343	//ICL_DEBUG("send/receive threads terminated");
1344
1345	ICL_CONN_UNLOCK(ic);
1346	soclose(ic->ic_socket);
1347	ICL_CONN_LOCK(ic);
1348	ic->ic_socket = NULL;
1349
1350	if (ic->ic_receive_pdu != NULL) {
1351		//ICL_DEBUG("freeing partially received PDU");
1352		icl_pdu_free(ic->ic_receive_pdu);
1353		ic->ic_receive_pdu = NULL;
1354	}
1355
1356	/*
1357	 * Remove any outstanding PDUs from the send queue.
1358	 */
1359	while (!STAILQ_EMPTY(&ic->ic_to_send)) {
1360		pdu = STAILQ_FIRST(&ic->ic_to_send);
1361		STAILQ_REMOVE_HEAD(&ic->ic_to_send, ip_next);
1362		icl_pdu_free(pdu);
1363	}
1364
1365	KASSERT(STAILQ_EMPTY(&ic->ic_to_send),
1366	    ("destroying session with non-empty send queue"));
1367#ifdef DIAGNOSTIC
1368	KASSERT(ic->ic_outstanding_pdus == 0,
1369	    ("destroying session with %d outstanding PDUs",
1370	     ic->ic_outstanding_pdus));
1371#endif
1372	ICL_CONN_UNLOCK(ic);
1373}
1374
1375bool
1376icl_conn_connected(struct icl_conn *ic)
1377{
1378	ICL_CONN_LOCK_ASSERT_NOT(ic);
1379
1380	ICL_CONN_LOCK(ic);
1381	if (ic->ic_socket == NULL) {
1382		ICL_CONN_UNLOCK(ic);
1383		return (false);
1384	}
1385	if (ic->ic_socket->so_error != 0) {
1386		ICL_CONN_UNLOCK(ic);
1387		return (false);
1388	}
1389	ICL_CONN_UNLOCK(ic);
1390	return (true);
1391}
1392
1393#ifdef ICL_KERNEL_PROXY
1394int
1395icl_conn_handoff_sock(struct icl_conn *ic, struct socket *so)
1396{
1397	int error;
1398
1399	ICL_CONN_LOCK_ASSERT_NOT(ic);
1400
1401	if (so->so_type != SOCK_STREAM)
1402		return (EINVAL);
1403
1404	ICL_CONN_LOCK(ic);
1405	if (ic->ic_socket != NULL) {
1406		ICL_CONN_UNLOCK(ic);
1407		return (EBUSY);
1408	}
1409	ic->ic_socket = so;
1410	ICL_CONN_UNLOCK(ic);
1411
1412	error = icl_conn_start(ic);
1413
1414	return (error);
1415}
1416#endif /* ICL_KERNEL_PROXY */
1417
1418static int
1419icl_unload(void)
1420{
1421
1422	if (icl_ncons != 0)
1423		return (EBUSY);
1424
1425	uma_zdestroy(icl_conn_zone);
1426	uma_zdestroy(icl_pdu_zone);
1427
1428	return (0);
1429}
1430
1431static void
1432icl_load(void)
1433{
1434
1435	icl_conn_zone = uma_zcreate("icl_conn",
1436	    sizeof(struct icl_conn), NULL, NULL, NULL, NULL,
1437	    UMA_ALIGN_PTR, 0);
1438	icl_pdu_zone = uma_zcreate("icl_pdu",
1439	    sizeof(struct icl_pdu), NULL, NULL, NULL, NULL,
1440	    UMA_ALIGN_PTR, 0);
1441
1442	refcount_init(&icl_ncons, 0);
1443}
1444
1445static int
1446icl_modevent(module_t mod, int what, void *arg)
1447{
1448
1449	switch (what) {
1450	case MOD_LOAD:
1451		icl_load();
1452		return (0);
1453	case MOD_UNLOAD:
1454		return (icl_unload());
1455	default:
1456		return (EINVAL);
1457	}
1458}
1459
1460moduledata_t icl_data = {
1461	"icl",
1462	icl_modevent,
1463	0
1464};
1465
1466DECLARE_MODULE(icl, icl_data, SI_SUB_DRIVERS, SI_ORDER_FIRST);
1467MODULE_VERSION(icl, 1);
1468