icl_proxy.c revision 265513
1/*-
2 * Copyright (c) 2012 The FreeBSD Foundation
3 * All rights reserved.
4 *
5 * This software was developed by Edward Tomasz Napierala under sponsorship
6 * from the FreeBSD Foundation.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * $FreeBSD: stable/10/sys/dev/iscsi/icl_proxy.c 265513 2014-05-07 07:37:55Z trasz $
30 */
31/*-
32 * Copyright (c) 1982, 1986, 1989, 1990, 1993
33 *	The Regents of the University of California.  All rights reserved.
34 *
35 * sendfile(2) and related extensions:
36 * Copyright (c) 1998, David Greenman. All rights reserved.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 *    notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 *    notice, this list of conditions and the following disclaimer in the
45 *    documentation and/or other materials provided with the distribution.
46 * 4. Neither the name of the University nor the names of its contributors
47 *    may be used to endorse or promote products derived from this software
48 *    without specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 *
62 *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
63 */
64
65/*
66 * iSCSI Common Layer, kernel proxy part.
67 */
68
69#ifdef ICL_KERNEL_PROXY
70
71#include <sys/param.h>
72#include <sys/capability.h>
73#include <sys/condvar.h>
74#include <sys/conf.h>
75#include <sys/kernel.h>
76#include <sys/kthread.h>
77#include <sys/malloc.h>
78#include <sys/proc.h>
79#include <sys/socket.h>
80#include <sys/socketvar.h>
81#include <sys/sx.h>
82#include <sys/systm.h>
83#include <netinet/in.h>
84#include <netinet/tcp.h>
85#include <linux/types.h>
86#include <rdma/rdma_cm.h>
87
88#include "icl.h"
89
90static int debug = 1;
91
92#define	ICL_DEBUG(X, ...)					\
93	if (debug > 1) {					\
94		printf("%s: " X "\n", __func__, ## __VA_ARGS__);\
95	} while (0)
96
97#define	ICL_WARN(X, ...)					\
98	if (debug > 0) {					\
99		printf("WARNING: %s: " X "\n",			\
100		    __func__, ## __VA_ARGS__);			\
101	} while (0)
102
103static MALLOC_DEFINE(M_ICL_PROXY, "ICL_PROXY", "iSCSI common layer proxy");
104
105#ifdef ICL_RDMA
106static int	icl_conn_connect_rdma(struct icl_conn *ic, int domain, int socktype,
107    int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa);
108static int	icl_listen_add_rdma(struct icl_listen *il, int domain, int socktype, int protocol,
109    struct sockaddr *sa);
110#endif /* ICL_RDMA */
111
112static int
113icl_conn_connect_tcp(struct icl_conn *ic, int domain, int socktype,
114    int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa)
115{
116	struct socket *so;
117	int error;
118	int interrupted = 0;
119
120	error = socreate(domain, &so, socktype, protocol,
121	    curthread->td_ucred, curthread);
122	if (error != 0)
123		return (error);
124
125	if (from_sa != NULL) {
126		error = sobind(so, from_sa, curthread);
127		if (error != 0) {
128			soclose(so);
129			return (error);
130		}
131	}
132
133	error = soconnect(so, to_sa, curthread);
134	if (error != 0) {
135		soclose(so);
136		return (error);
137	}
138
139	SOCK_LOCK(so);
140	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
141		error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
142		    "icl_connect", 0);
143		if (error) {
144			if (error == EINTR || error == ERESTART)
145				interrupted = 1;
146			break;
147		}
148	}
149	if (error == 0) {
150		error = so->so_error;
151		so->so_error = 0;
152	}
153	SOCK_UNLOCK(so);
154
155	if (error != 0) {
156		soclose(so);
157		return (error);
158	}
159
160	error = icl_conn_handoff_sock(ic, so);
161	if (error != 0)
162		soclose(so);
163
164	return (error);
165}
166
167int
168icl_conn_connect(struct icl_conn *ic, bool rdma, int domain, int socktype,
169    int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa)
170{
171
172	if (rdma) {
173#ifdef ICL_RDMA
174		return (icl_conn_connect_rdma(ic, domain, socktype, protocol, from_sa, to_sa));
175#else
176		ICL_DEBUG("RDMA not supported");
177		return (EOPNOTSUPP);
178#endif
179	}
180
181	return (icl_conn_connect_tcp(ic, domain, socktype, protocol, from_sa, to_sa));
182}
183
184struct icl_listen *
185icl_listen_new(void (*accept_cb)(struct socket *, struct sockaddr *, int))
186{
187	struct icl_listen *il;
188
189	il = malloc(sizeof(*il), M_ICL_PROXY, M_ZERO | M_WAITOK);
190	TAILQ_INIT(&il->il_sockets);
191	sx_init(&il->il_lock, "icl_listen");
192	il->il_accept = accept_cb;
193
194	return (il);
195}
196
197void
198icl_listen_free(struct icl_listen *il)
199{
200	struct icl_listen_sock *ils;
201
202	sx_xlock(&il->il_lock);
203	while (!TAILQ_EMPTY(&il->il_sockets)) {
204		ils = TAILQ_FIRST(&il->il_sockets);
205		while (ils->ils_running) {
206			ICL_DEBUG("waiting for accept thread to terminate");
207			sx_xunlock(&il->il_lock);
208			ils->ils_disconnecting = true;
209			wakeup(&ils->ils_socket->so_timeo);
210			pause("icl_unlisten", 1 * hz);
211			sx_xlock(&il->il_lock);
212		}
213
214		TAILQ_REMOVE(&il->il_sockets, ils, ils_next);
215		soclose(ils->ils_socket);
216		free(ils, M_ICL_PROXY);
217	}
218	sx_xunlock(&il->il_lock);
219
220	free(il, M_ICL_PROXY);
221}
222
223/*
224 * XXX: Doing accept in a separate thread in each socket might not be the best way
225 * 	to do stuff, but it's pretty clean and debuggable - and you probably won't
226 * 	have hundreds of listening sockets anyway.
227 */
228static void
229icl_accept_thread(void *arg)
230{
231	struct icl_listen_sock *ils;
232	struct socket *head, *so;
233	struct sockaddr *sa;
234	int error;
235
236	ils = arg;
237	head = ils->ils_socket;
238
239	ils->ils_running = true;
240
241	for (;;) {
242		ACCEPT_LOCK();
243		while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0 && ils->ils_disconnecting == false) {
244			if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
245				head->so_error = ECONNABORTED;
246				break;
247			}
248			error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
249			    "accept", 0);
250			if (error) {
251				ACCEPT_UNLOCK();
252				ICL_WARN("msleep failed with error %d", error);
253				continue;
254			}
255			if (ils->ils_disconnecting) {
256				ACCEPT_UNLOCK();
257				ICL_DEBUG("terminating");
258				ils->ils_running = false;
259				kthread_exit();
260				return;
261			}
262		}
263		if (head->so_error) {
264			error = head->so_error;
265			head->so_error = 0;
266			ACCEPT_UNLOCK();
267			ICL_WARN("socket error %d", error);
268			continue;
269		}
270		so = TAILQ_FIRST(&head->so_comp);
271		KASSERT(so != NULL, ("NULL so"));
272		KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
273		KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
274
275		/*
276		 * Before changing the flags on the socket, we have to bump the
277		 * reference count.  Otherwise, if the protocol calls sofree(),
278		 * the socket will be released due to a zero refcount.
279		 */
280		SOCK_LOCK(so);			/* soref() and so_state update */
281		soref(so);			/* file descriptor reference */
282
283		TAILQ_REMOVE(&head->so_comp, so, so_list);
284		head->so_qlen--;
285		so->so_state |= (head->so_state & SS_NBIO);
286		so->so_qstate &= ~SQ_COMP;
287		so->so_head = NULL;
288
289		SOCK_UNLOCK(so);
290		ACCEPT_UNLOCK();
291
292		sa = NULL;
293		error = soaccept(so, &sa);
294		if (error != 0) {
295			ICL_WARN("soaccept error %d", error);
296			if (sa != NULL)
297				free(sa, M_SONAME);
298			soclose(so);
299			continue;
300		}
301
302		(ils->ils_listen->il_accept)(so, sa, ils->ils_id);
303	}
304}
305
306static int
307icl_listen_add_tcp(struct icl_listen *il, int domain, int socktype,
308    int protocol, struct sockaddr *sa, int portal_id)
309{
310	struct icl_listen_sock *ils;
311	struct socket *so;
312	struct sockopt sopt;
313	int error, one = 1;
314
315	error = socreate(domain, &so, socktype, protocol,
316	    curthread->td_ucred, curthread);
317	if (error != 0) {
318		ICL_WARN("socreate failed with error %d", error);
319		return (error);
320	}
321
322	sopt.sopt_dir = SOPT_SET;
323	sopt.sopt_level = SOL_SOCKET;
324	sopt.sopt_name = SO_REUSEADDR;
325	sopt.sopt_val = &one;
326	sopt.sopt_valsize = sizeof(one);
327	sopt.sopt_td = NULL;
328	error = sosetopt(so, &sopt);
329	if (error != 0) {
330		ICL_WARN("failed to set SO_REUSEADDR with error %d", error);
331		soclose(so);
332		return (error);
333	}
334
335	error = sobind(so, sa, curthread);
336	if (error != 0) {
337		ICL_WARN("sobind failed with error %d", error);
338		soclose(so);
339		return (error);
340	}
341
342	error = solisten(so, -1, curthread);
343	if (error != 0) {
344		ICL_WARN("solisten failed with error %d", error);
345		soclose(so);
346		return (error);
347	}
348
349	ils = malloc(sizeof(*ils), M_ICL_PROXY, M_ZERO | M_WAITOK);
350	ils->ils_listen = il;
351	ils->ils_socket = so;
352	ils->ils_id = portal_id;
353
354	error = kthread_add(icl_accept_thread, ils, NULL, NULL, 0, 0, "iclacc");
355	if (error != 0) {
356		ICL_WARN("kthread_add failed with error %d", error);
357		soclose(so);
358		free(ils, M_ICL_PROXY);
359
360		return (error);
361	}
362
363	sx_xlock(&il->il_lock);
364	TAILQ_INSERT_TAIL(&il->il_sockets, ils, ils_next);
365	sx_xunlock(&il->il_lock);
366
367	return (0);
368}
369
370int
371icl_listen_add(struct icl_listen *il, bool rdma, int domain, int socktype,
372    int protocol, struct sockaddr *sa, int portal_id)
373{
374
375	if (rdma) {
376#ifndef ICL_RDMA
377		ICL_DEBUG("RDMA not supported");
378		return (EOPNOTSUPP);
379#else
380		return (icl_listen_add_rdma(il, domain, socktype, protocol,
381		    sa, portal_id));
382#endif
383	}
384
385
386	return (icl_listen_add_tcp(il, domain, socktype, protocol, sa,
387	    portal_id));
388}
389
390int
391icl_listen_remove(struct icl_listen *il, struct sockaddr *sa)
392{
393
394	/*
395	 * XXX
396	 */
397
398	return (EOPNOTSUPP);
399}
400
401#endif /* ICL_KERNEL_PROXY */
402