icl_proxy.c revision 280258
1/*-
2 * Copyright (c) 2012 The FreeBSD Foundation
3 * All rights reserved.
4 *
5 * This software was developed by Edward Tomasz Napierala under sponsorship
6 * from the FreeBSD Foundation.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 */
30/*-
31 * Copyright (c) 1982, 1986, 1989, 1990, 1993
32 *	The Regents of the University of California.  All rights reserved.
33 *
34 * sendfile(2) and related extensions:
35 * Copyright (c) 1998, David Greenman. All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 *    notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 *    notice, this list of conditions and the following disclaimer in the
44 *    documentation and/or other materials provided with the distribution.
45 * 4. Neither the name of the University nor the names of its contributors
46 *    may be used to endorse or promote products derived from this software
47 *    without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
62 */
63
64/*
65 * iSCSI Common Layer, kernel proxy part.
66 */
67
68#ifdef ICL_KERNEL_PROXY
69
70#include <sys/cdefs.h>
71__FBSDID("$FreeBSD: stable/10/sys/dev/iscsi/icl_proxy.c 280258 2015-03-19 13:37:36Z rwatson $");
72
73#include <sys/param.h>
74#include <sys/capsicum.h>
75#include <sys/condvar.h>
76#include <sys/conf.h>
77#include <sys/kernel.h>
78#include <sys/kthread.h>
79#include <sys/malloc.h>
80#include <sys/proc.h>
81#include <sys/socket.h>
82#include <sys/socketvar.h>
83#include <sys/sx.h>
84#include <sys/systm.h>
85#include <netinet/in.h>
86#include <netinet/tcp.h>
87#include <linux/types.h>
88#include <rdma/rdma_cm.h>
89
90#include <dev/iscsi/icl.h>
91
92static int debug = 1;
93
94#define	ICL_DEBUG(X, ...)					\
95	if (debug > 1) {					\
96		printf("%s: " X "\n", __func__, ## __VA_ARGS__);\
97	} while (0)
98
99#define	ICL_WARN(X, ...)					\
100	if (debug > 0) {					\
101		printf("WARNING: %s: " X "\n",			\
102		    __func__, ## __VA_ARGS__);			\
103	} while (0)
104
105static MALLOC_DEFINE(M_ICL_PROXY, "ICL_PROXY", "iSCSI common layer proxy");
106
107#ifdef ICL_RDMA
108static int	icl_conn_connect_rdma(struct icl_conn *ic, int domain, int socktype,
109    int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa);
110static int	icl_listen_add_rdma(struct icl_listen *il, int domain, int socktype, int protocol,
111    struct sockaddr *sa);
112#endif /* ICL_RDMA */
113
114static int
115icl_conn_connect_tcp(struct icl_conn *ic, int domain, int socktype,
116    int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa)
117{
118	struct socket *so;
119	int error;
120	int interrupted = 0;
121
122	error = socreate(domain, &so, socktype, protocol,
123	    curthread->td_ucred, curthread);
124	if (error != 0)
125		return (error);
126
127	if (from_sa != NULL) {
128		error = sobind(so, from_sa, curthread);
129		if (error != 0) {
130			soclose(so);
131			return (error);
132		}
133	}
134
135	error = soconnect(so, to_sa, curthread);
136	if (error != 0) {
137		soclose(so);
138		return (error);
139	}
140
141	SOCK_LOCK(so);
142	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
143		error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
144		    "icl_connect", 0);
145		if (error) {
146			if (error == EINTR || error == ERESTART)
147				interrupted = 1;
148			break;
149		}
150	}
151	if (error == 0) {
152		error = so->so_error;
153		so->so_error = 0;
154	}
155	SOCK_UNLOCK(so);
156
157	if (error != 0) {
158		soclose(so);
159		return (error);
160	}
161
162	error = icl_conn_handoff_sock(ic, so);
163	if (error != 0)
164		soclose(so);
165
166	return (error);
167}
168
169int
170icl_conn_connect(struct icl_conn *ic, bool rdma, int domain, int socktype,
171    int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa)
172{
173
174	if (rdma) {
175#ifdef ICL_RDMA
176		return (icl_conn_connect_rdma(ic, domain, socktype, protocol, from_sa, to_sa));
177#else
178		ICL_DEBUG("RDMA not supported");
179		return (EOPNOTSUPP);
180#endif
181	}
182
183	return (icl_conn_connect_tcp(ic, domain, socktype, protocol, from_sa, to_sa));
184}
185
186struct icl_listen *
187icl_listen_new(void (*accept_cb)(struct socket *, struct sockaddr *, int))
188{
189	struct icl_listen *il;
190
191	il = malloc(sizeof(*il), M_ICL_PROXY, M_ZERO | M_WAITOK);
192	TAILQ_INIT(&il->il_sockets);
193	sx_init(&il->il_lock, "icl_listen");
194	il->il_accept = accept_cb;
195
196	return (il);
197}
198
199void
200icl_listen_free(struct icl_listen *il)
201{
202	struct icl_listen_sock *ils;
203
204	sx_xlock(&il->il_lock);
205	while (!TAILQ_EMPTY(&il->il_sockets)) {
206		ils = TAILQ_FIRST(&il->il_sockets);
207		while (ils->ils_running) {
208			ICL_DEBUG("waiting for accept thread to terminate");
209			sx_xunlock(&il->il_lock);
210			ils->ils_disconnecting = true;
211			wakeup(&ils->ils_socket->so_timeo);
212			pause("icl_unlisten", 1 * hz);
213			sx_xlock(&il->il_lock);
214		}
215
216		TAILQ_REMOVE(&il->il_sockets, ils, ils_next);
217		soclose(ils->ils_socket);
218		free(ils, M_ICL_PROXY);
219	}
220	sx_xunlock(&il->il_lock);
221
222	free(il, M_ICL_PROXY);
223}
224
225/*
226 * XXX: Doing accept in a separate thread in each socket might not be the best way
227 * 	to do stuff, but it's pretty clean and debuggable - and you probably won't
228 * 	have hundreds of listening sockets anyway.
229 */
230static void
231icl_accept_thread(void *arg)
232{
233	struct icl_listen_sock *ils;
234	struct socket *head, *so;
235	struct sockaddr *sa;
236	int error;
237
238	ils = arg;
239	head = ils->ils_socket;
240
241	ils->ils_running = true;
242
243	for (;;) {
244		ACCEPT_LOCK();
245		while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0 && ils->ils_disconnecting == false) {
246			if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
247				head->so_error = ECONNABORTED;
248				break;
249			}
250			error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
251			    "accept", 0);
252			if (error) {
253				ACCEPT_UNLOCK();
254				ICL_WARN("msleep failed with error %d", error);
255				continue;
256			}
257			if (ils->ils_disconnecting) {
258				ACCEPT_UNLOCK();
259				ICL_DEBUG("terminating");
260				ils->ils_running = false;
261				kthread_exit();
262				return;
263			}
264		}
265		if (head->so_error) {
266			error = head->so_error;
267			head->so_error = 0;
268			ACCEPT_UNLOCK();
269			ICL_WARN("socket error %d", error);
270			continue;
271		}
272		so = TAILQ_FIRST(&head->so_comp);
273		KASSERT(so != NULL, ("NULL so"));
274		KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
275		KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
276
277		/*
278		 * Before changing the flags on the socket, we have to bump the
279		 * reference count.  Otherwise, if the protocol calls sofree(),
280		 * the socket will be released due to a zero refcount.
281		 */
282		SOCK_LOCK(so);			/* soref() and so_state update */
283		soref(so);			/* file descriptor reference */
284
285		TAILQ_REMOVE(&head->so_comp, so, so_list);
286		head->so_qlen--;
287		so->so_state |= (head->so_state & SS_NBIO);
288		so->so_qstate &= ~SQ_COMP;
289		so->so_head = NULL;
290
291		SOCK_UNLOCK(so);
292		ACCEPT_UNLOCK();
293
294		sa = NULL;
295		error = soaccept(so, &sa);
296		if (error != 0) {
297			ICL_WARN("soaccept error %d", error);
298			if (sa != NULL)
299				free(sa, M_SONAME);
300			soclose(so);
301			continue;
302		}
303
304		(ils->ils_listen->il_accept)(so, sa, ils->ils_id);
305	}
306}
307
308static int
309icl_listen_add_tcp(struct icl_listen *il, int domain, int socktype,
310    int protocol, struct sockaddr *sa, int portal_id)
311{
312	struct icl_listen_sock *ils;
313	struct socket *so;
314	struct sockopt sopt;
315	int error, one = 1;
316
317	error = socreate(domain, &so, socktype, protocol,
318	    curthread->td_ucred, curthread);
319	if (error != 0) {
320		ICL_WARN("socreate failed with error %d", error);
321		return (error);
322	}
323
324	sopt.sopt_dir = SOPT_SET;
325	sopt.sopt_level = SOL_SOCKET;
326	sopt.sopt_name = SO_REUSEADDR;
327	sopt.sopt_val = &one;
328	sopt.sopt_valsize = sizeof(one);
329	sopt.sopt_td = NULL;
330	error = sosetopt(so, &sopt);
331	if (error != 0) {
332		ICL_WARN("failed to set SO_REUSEADDR with error %d", error);
333		soclose(so);
334		return (error);
335	}
336
337	error = sobind(so, sa, curthread);
338	if (error != 0) {
339		ICL_WARN("sobind failed with error %d", error);
340		soclose(so);
341		return (error);
342	}
343
344	error = solisten(so, -1, curthread);
345	if (error != 0) {
346		ICL_WARN("solisten failed with error %d", error);
347		soclose(so);
348		return (error);
349	}
350
351	ils = malloc(sizeof(*ils), M_ICL_PROXY, M_ZERO | M_WAITOK);
352	ils->ils_listen = il;
353	ils->ils_socket = so;
354	ils->ils_id = portal_id;
355
356	error = kthread_add(icl_accept_thread, ils, NULL, NULL, 0, 0, "iclacc");
357	if (error != 0) {
358		ICL_WARN("kthread_add failed with error %d", error);
359		soclose(so);
360		free(ils, M_ICL_PROXY);
361
362		return (error);
363	}
364
365	sx_xlock(&il->il_lock);
366	TAILQ_INSERT_TAIL(&il->il_sockets, ils, ils_next);
367	sx_xunlock(&il->il_lock);
368
369	return (0);
370}
371
372int
373icl_listen_add(struct icl_listen *il, bool rdma, int domain, int socktype,
374    int protocol, struct sockaddr *sa, int portal_id)
375{
376
377	if (rdma) {
378#ifndef ICL_RDMA
379		ICL_DEBUG("RDMA not supported");
380		return (EOPNOTSUPP);
381#else
382		return (icl_listen_add_rdma(il, domain, socktype, protocol,
383		    sa, portal_id));
384#endif
385	}
386
387
388	return (icl_listen_add_tcp(il, domain, socktype, protocol, sa,
389	    portal_id));
390}
391
392int
393icl_listen_remove(struct icl_listen *il, struct sockaddr *sa)
394{
395
396	/*
397	 * XXX
398	 */
399
400	return (EOPNOTSUPP);
401}
402
403#endif /* ICL_KERNEL_PROXY */
404