1/*	$NetBSD: svc_vc.c,v 1.7 2000/08/03 00:01:53 fvdl Exp $	*/
2
3/*-
4 * SPDX-License-Identifier: BSD-3-Clause
5 *
6 * Copyright (c) 2009, Sun Microsystems, Inc.
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions are met:
11 * - Redistributions of source code must retain the above copyright notice,
12 *   this list of conditions and the following disclaimer.
13 * - Redistributions in binary form must reproduce the above copyright notice,
14 *   this list of conditions and the following disclaimer in the documentation
15 *   and/or other materials provided with the distribution.
16 * - Neither the name of Sun Microsystems, Inc. nor the names of its
17 *   contributors may be used to endorse or promote products derived
18 *   from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
24 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#include <sys/cdefs.h>
34/*
35 * svc_vc.c, Server side for Connection Oriented based RPC.
36 *
37 * Actually implements two flavors of transporter -
38 * a tcp rendezvouser (a listener and connection establisher)
39 * and a record/tcp stream.
40 */
41
42#include "opt_kern_tls.h"
43
44#include <sys/param.h>
45#include <sys/limits.h>
46#include <sys/lock.h>
47#include <sys/kernel.h>
48#include <sys/ktls.h>
49#include <sys/malloc.h>
50#include <sys/mbuf.h>
51#include <sys/mutex.h>
52#include <sys/proc.h>
53#include <sys/protosw.h>
54#include <sys/queue.h>
55#include <sys/socket.h>
56#include <sys/socketvar.h>
57#include <sys/sx.h>
58#include <sys/systm.h>
59#include <sys/uio.h>
60
61#include <net/vnet.h>
62
63#include <netinet/tcp.h>
64
65#include <rpc/rpc.h>
66#include <rpc/rpcsec_tls.h>
67
68#include <rpc/krpc.h>
69#include <rpc/rpc_com.h>
70
71#include <security/mac/mac_framework.h>
72
73SYSCTL_NODE(_kern, OID_AUTO, rpc, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
74    "RPC");
75SYSCTL_NODE(_kern_rpc, OID_AUTO, tls, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
76    "TLS");
77SYSCTL_NODE(_kern_rpc, OID_AUTO, unenc, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
78    "unencrypted");
79
80KRPC_VNET_DEFINE_STATIC(uint64_t, svc_vc_rx_msgbytes) = 0;
81SYSCTL_U64(_kern_rpc_unenc, OID_AUTO, rx_msgbytes, CTLFLAG_KRPC_VNET | CTLFLAG_RW,
82    &KRPC_VNET_NAME(svc_vc_rx_msgbytes), 0, "Count of non-TLS rx bytes");
83
84KRPC_VNET_DEFINE_STATIC(uint64_t, svc_vc_rx_msgcnt) = 0;
85SYSCTL_U64(_kern_rpc_unenc, OID_AUTO, rx_msgcnt, CTLFLAG_KRPC_VNET | CTLFLAG_RW,
86    &KRPC_VNET_NAME(svc_vc_rx_msgcnt), 0, "Count of non-TLS rx messages");
87
88KRPC_VNET_DEFINE_STATIC(uint64_t, svc_vc_tx_msgbytes) = 0;
89SYSCTL_U64(_kern_rpc_unenc, OID_AUTO, tx_msgbytes, CTLFLAG_KRPC_VNET | CTLFLAG_RW,
90    &KRPC_VNET_NAME(svc_vc_tx_msgbytes), 0, "Count of non-TLS tx bytes");
91
92KRPC_VNET_DEFINE_STATIC(uint64_t, svc_vc_tx_msgcnt) = 0;
93SYSCTL_U64(_kern_rpc_unenc, OID_AUTO, tx_msgcnt, CTLFLAG_KRPC_VNET | CTLFLAG_RW,
94    &KRPC_VNET_NAME(svc_vc_tx_msgcnt), 0, "Count of non-TLS tx messages");
95
96KRPC_VNET_DEFINE_STATIC(uint64_t, svc_vc_tls_alerts) = 0;
97SYSCTL_U64(_kern_rpc_tls, OID_AUTO, alerts,
98    CTLFLAG_KRPC_VNET | CTLFLAG_RW, &KRPC_VNET_NAME(svc_vc_tls_alerts), 0,
99    "Count of TLS alert messages");
100
101KRPC_VNET_DEFINE(uint64_t, svc_vc_tls_handshake_failed) = 0;
102SYSCTL_U64(_kern_rpc_tls, OID_AUTO, handshake_failed,
103    CTLFLAG_KRPC_VNET | CTLFLAG_RW,
104    &KRPC_VNET_NAME(svc_vc_tls_handshake_failed), 0,
105    "Count of TLS failed handshakes");
106
107KRPC_VNET_DEFINE(uint64_t, svc_vc_tls_handshake_success) = 0;
108SYSCTL_U64(_kern_rpc_tls, OID_AUTO, handshake_success,
109    CTLFLAG_KRPC_VNET | CTLFLAG_RW,
110    &KRPC_VNET_NAME(svc_vc_tls_handshake_success), 0,
111    "Count of TLS successful handshakes");
112
113KRPC_VNET_DEFINE_STATIC(uint64_t, svc_vc_tls_rx_msgbytes) = 0;
114SYSCTL_U64(_kern_rpc_tls, OID_AUTO, rx_msgbytes,
115    CTLFLAG_KRPC_VNET | CTLFLAG_RW, &KRPC_VNET_NAME(svc_vc_tls_rx_msgbytes), 0,
116    "Count of TLS rx bytes");
117
118KRPC_VNET_DEFINE_STATIC(uint64_t, svc_vc_tls_rx_msgcnt) = 0;
119SYSCTL_U64(_kern_rpc_tls, OID_AUTO, rx_msgcnt,
120    CTLFLAG_KRPC_VNET | CTLFLAG_RW, &KRPC_VNET_NAME(svc_vc_tls_rx_msgcnt), 0,
121    "Count of TLS rx messages");
122
123KRPC_VNET_DEFINE_STATIC(uint64_t, svc_vc_tls_tx_msgbytes) = 0;
124SYSCTL_U64(_kern_rpc_tls, OID_AUTO, tx_msgbytes,
125    CTLFLAG_KRPC_VNET | CTLFLAG_RW, &KRPC_VNET_NAME(svc_vc_tls_tx_msgbytes), 0,
126    "Count of TLS tx bytes");
127
128KRPC_VNET_DEFINE_STATIC(uint64_t, svc_vc_tls_tx_msgcnt) = 0;
129SYSCTL_U64(_kern_rpc_tls, OID_AUTO, tx_msgcnt,
130    CTLFLAG_KRPC_VNET | CTLFLAG_RW, &KRPC_VNET_NAME(svc_vc_tls_tx_msgcnt), 0,
131    "Count of TLS tx messages");
132
133static bool_t svc_vc_rendezvous_recv(SVCXPRT *, struct rpc_msg *,
134    struct sockaddr **, struct mbuf **);
135static enum xprt_stat svc_vc_rendezvous_stat(SVCXPRT *);
136static void svc_vc_rendezvous_destroy(SVCXPRT *);
137static bool_t svc_vc_null(void);
138static void svc_vc_destroy(SVCXPRT *);
139static enum xprt_stat svc_vc_stat(SVCXPRT *);
140static bool_t svc_vc_ack(SVCXPRT *, uint32_t *);
141static bool_t svc_vc_recv(SVCXPRT *, struct rpc_msg *,
142    struct sockaddr **, struct mbuf **);
143static bool_t svc_vc_reply(SVCXPRT *, struct rpc_msg *,
144    struct sockaddr *, struct mbuf *, uint32_t *seq);
145static bool_t svc_vc_control(SVCXPRT *xprt, const u_int rq, void *in);
146static bool_t svc_vc_rendezvous_control (SVCXPRT *xprt, const u_int rq,
147    void *in);
148static void svc_vc_backchannel_destroy(SVCXPRT *);
149static enum xprt_stat svc_vc_backchannel_stat(SVCXPRT *);
150static bool_t svc_vc_backchannel_recv(SVCXPRT *, struct rpc_msg *,
151    struct sockaddr **, struct mbuf **);
152static bool_t svc_vc_backchannel_reply(SVCXPRT *, struct rpc_msg *,
153    struct sockaddr *, struct mbuf *, uint32_t *);
154static bool_t svc_vc_backchannel_control(SVCXPRT *xprt, const u_int rq,
155    void *in);
156static SVCXPRT *svc_vc_create_conn(SVCPOOL *pool, struct socket *so,
157    struct sockaddr *raddr);
158static int svc_vc_accept(struct socket *head, struct socket **sop);
159static int svc_vc_soupcall(struct socket *so, void *arg, int waitflag);
160static int svc_vc_rendezvous_soupcall(struct socket *, void *, int);
161
162static const struct xp_ops svc_vc_rendezvous_ops = {
163	.xp_recv =	svc_vc_rendezvous_recv,
164	.xp_stat =	svc_vc_rendezvous_stat,
165	.xp_reply =	(bool_t (*)(SVCXPRT *, struct rpc_msg *,
166		struct sockaddr *, struct mbuf *, uint32_t *))svc_vc_null,
167	.xp_destroy =	svc_vc_rendezvous_destroy,
168	.xp_control =	svc_vc_rendezvous_control
169};
170
171static const struct xp_ops svc_vc_ops = {
172	.xp_recv =	svc_vc_recv,
173	.xp_stat =	svc_vc_stat,
174	.xp_ack =	svc_vc_ack,
175	.xp_reply =	svc_vc_reply,
176	.xp_destroy =	svc_vc_destroy,
177	.xp_control =	svc_vc_control
178};
179
180static const struct xp_ops svc_vc_backchannel_ops = {
181	.xp_recv =	svc_vc_backchannel_recv,
182	.xp_stat =	svc_vc_backchannel_stat,
183	.xp_reply =	svc_vc_backchannel_reply,
184	.xp_destroy =	svc_vc_backchannel_destroy,
185	.xp_control =	svc_vc_backchannel_control
186};
187
188/*
189 * Usage:
190 *	xprt = svc_vc_create(sock, send_buf_size, recv_buf_size);
191 *
192 * Creates, registers, and returns a (rpc) tcp based transporter.
193 * Once *xprt is initialized, it is registered as a transporter
194 * see (svc.h, xprt_register).  This routine returns
195 * a NULL if a problem occurred.
196 *
197 * The filedescriptor passed in is expected to refer to a bound, but
198 * not yet connected socket.
199 *
200 * Since streams do buffered io similar to stdio, the caller can specify
201 * how big the send and receive buffers are via the second and third parms;
202 * 0 => use the system default.
203 */
204SVCXPRT *
205svc_vc_create(SVCPOOL *pool, struct socket *so, size_t sendsize,
206    size_t recvsize)
207{
208	SVCXPRT *xprt;
209	int error;
210
211	SOCK_LOCK(so);
212	if (so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED)) {
213		struct sockaddr_storage ss = { .ss_len = sizeof(ss) };
214
215		SOCK_UNLOCK(so);
216		error = sopeeraddr(so, (struct sockaddr *)&ss);
217		if (error)
218			return (NULL);
219		xprt = svc_vc_create_conn(pool, so, (struct sockaddr *)&ss);
220		return (xprt);
221	}
222	SOCK_UNLOCK(so);
223
224	xprt = svc_xprt_alloc();
225	sx_init(&xprt->xp_lock, "xprt->xp_lock");
226	xprt->xp_pool = pool;
227	xprt->xp_socket = so;
228	xprt->xp_p1 = NULL;
229	xprt->xp_p2 = NULL;
230	xprt->xp_ops = &svc_vc_rendezvous_ops;
231
232	xprt->xp_ltaddr.ss_len = sizeof(xprt->xp_ltaddr);
233	error = sosockaddr(so, (struct sockaddr *)&xprt->xp_ltaddr);
234	if (error) {
235		goto cleanup_svc_vc_create;
236	}
237
238	xprt_register(xprt);
239
240	solisten(so, -1, curthread);
241
242	SOLISTEN_LOCK(so);
243	xprt->xp_upcallset = 1;
244	solisten_upcall_set(so, svc_vc_rendezvous_soupcall, xprt);
245	SOLISTEN_UNLOCK(so);
246
247	return (xprt);
248
249cleanup_svc_vc_create:
250	sx_destroy(&xprt->xp_lock);
251	svc_xprt_free(xprt);
252
253	return (NULL);
254}
255
256/*
257 * Create a new transport for a socket optained via soaccept().
258 */
259SVCXPRT *
260svc_vc_create_conn(SVCPOOL *pool, struct socket *so, struct sockaddr *raddr)
261{
262	SVCXPRT *xprt;
263	struct cf_conn *cd;
264	struct sockopt opt;
265	int one = 1;
266	int error;
267
268	bzero(&opt, sizeof(struct sockopt));
269	opt.sopt_dir = SOPT_SET;
270	opt.sopt_level = SOL_SOCKET;
271	opt.sopt_name = SO_KEEPALIVE;
272	opt.sopt_val = &one;
273	opt.sopt_valsize = sizeof(one);
274	error = sosetopt(so, &opt);
275	if (error) {
276		return (NULL);
277	}
278
279	if (so->so_proto->pr_protocol == IPPROTO_TCP) {
280		bzero(&opt, sizeof(struct sockopt));
281		opt.sopt_dir = SOPT_SET;
282		opt.sopt_level = IPPROTO_TCP;
283		opt.sopt_name = TCP_NODELAY;
284		opt.sopt_val = &one;
285		opt.sopt_valsize = sizeof(one);
286		error = sosetopt(so, &opt);
287		if (error) {
288			return (NULL);
289		}
290	}
291
292	cd = mem_alloc(sizeof(*cd));
293	cd->strm_stat = XPRT_IDLE;
294
295	xprt = svc_xprt_alloc();
296	sx_init(&xprt->xp_lock, "xprt->xp_lock");
297	xprt->xp_pool = pool;
298	xprt->xp_socket = so;
299	xprt->xp_p1 = cd;
300	xprt->xp_p2 = NULL;
301	xprt->xp_ops = &svc_vc_ops;
302
303	/*
304	 * See http://www.connectathon.org/talks96/nfstcp.pdf - client
305	 * has a 5 minute timer, server has a 6 minute timer.
306	 */
307	xprt->xp_idletimeout = 6 * 60;
308
309	memcpy(&xprt->xp_rtaddr, raddr, raddr->sa_len);
310
311	xprt->xp_ltaddr.ss_len = sizeof(xprt->xp_ltaddr);
312	error = sosockaddr(so, (struct sockaddr *)&xprt->xp_ltaddr);
313	if (error)
314		goto cleanup_svc_vc_create;
315
316	xprt_register(xprt);
317
318	SOCK_RECVBUF_LOCK(so);
319	xprt->xp_upcallset = 1;
320	soupcall_set(so, SO_RCV, svc_vc_soupcall, xprt);
321	SOCK_RECVBUF_UNLOCK(so);
322
323	/*
324	 * Throw the transport into the active list in case it already
325	 * has some data buffered.
326	 */
327	sx_xlock(&xprt->xp_lock);
328	xprt_active(xprt);
329	sx_xunlock(&xprt->xp_lock);
330
331	return (xprt);
332cleanup_svc_vc_create:
333	sx_destroy(&xprt->xp_lock);
334	svc_xprt_free(xprt);
335	mem_free(cd, sizeof(*cd));
336
337	return (NULL);
338}
339
340/*
341 * Create a new transport for a backchannel on a clnt_vc socket.
342 */
343SVCXPRT *
344svc_vc_create_backchannel(SVCPOOL *pool)
345{
346	SVCXPRT *xprt = NULL;
347	struct cf_conn *cd = NULL;
348
349	cd = mem_alloc(sizeof(*cd));
350	cd->strm_stat = XPRT_IDLE;
351
352	xprt = svc_xprt_alloc();
353	sx_init(&xprt->xp_lock, "xprt->xp_lock");
354	xprt->xp_pool = pool;
355	xprt->xp_socket = NULL;
356	xprt->xp_p1 = cd;
357	xprt->xp_p2 = NULL;
358	xprt->xp_ops = &svc_vc_backchannel_ops;
359	return (xprt);
360}
361
362/*
363 * This does all of the accept except the final call to soaccept. The
364 * caller will call soaccept after dropping its locks (soaccept may
365 * call malloc).
366 */
367int
368svc_vc_accept(struct socket *head, struct socket **sop)
369{
370	struct socket *so;
371	int error = 0;
372	short nbio;
373
374	KASSERT(SOLISTENING(head),
375	    ("%s: socket %p is not listening", __func__, head));
376
377#ifdef MAC
378	error = mac_socket_check_accept(curthread->td_ucred, head);
379	if (error != 0)
380		goto done;
381#endif
382	/*
383	 * XXXGL: we want non-blocking semantics.  The socket could be a
384	 * socket created by kernel as well as socket shared with userland,
385	 * so we can't be sure about presense of SS_NBIO.  We also shall not
386	 * toggle it on the socket, since that may surprise userland.  So we
387	 * set SS_NBIO only temporarily.
388	 */
389	SOLISTEN_LOCK(head);
390	nbio = head->so_state & SS_NBIO;
391	head->so_state |= SS_NBIO;
392	error = solisten_dequeue(head, &so, 0);
393	head->so_state &= (nbio & ~SS_NBIO);
394	if (error)
395		goto done;
396
397	so->so_state |= nbio;
398	*sop = so;
399
400	/* connection has been removed from the listen queue */
401	KNOTE_UNLOCKED(&head->so_rdsel.si_note, 0);
402done:
403	return (error);
404}
405
406/*ARGSUSED*/
407static bool_t
408svc_vc_rendezvous_recv(SVCXPRT *xprt, struct rpc_msg *msg,
409    struct sockaddr **addrp, struct mbuf **mp)
410{
411	struct socket *so = NULL;
412	struct sockaddr_storage ss = { .ss_len = sizeof(ss) };
413	int error;
414	SVCXPRT *new_xprt;
415
416	/*
417	 * The socket upcall calls xprt_active() which will eventually
418	 * cause the server to call us here. We attempt to accept a
419	 * connection from the socket and turn it into a new
420	 * transport. If the accept fails, we have drained all pending
421	 * connections so we call xprt_inactive().
422	 */
423	sx_xlock(&xprt->xp_lock);
424
425	error = svc_vc_accept(xprt->xp_socket, &so);
426
427	if (error == EWOULDBLOCK) {
428		/*
429		 * We must re-test for new connections after taking
430		 * the lock to protect us in the case where a new
431		 * connection arrives after our call to accept fails
432		 * with EWOULDBLOCK.
433		 */
434		SOLISTEN_LOCK(xprt->xp_socket);
435		if (TAILQ_EMPTY(&xprt->xp_socket->sol_comp))
436			xprt_inactive_self(xprt);
437		SOLISTEN_UNLOCK(xprt->xp_socket);
438		sx_xunlock(&xprt->xp_lock);
439		return (FALSE);
440	}
441
442	if (error) {
443		SOLISTEN_LOCK(xprt->xp_socket);
444		if (xprt->xp_upcallset) {
445			xprt->xp_upcallset = 0;
446			soupcall_clear(xprt->xp_socket, SO_RCV);
447		}
448		SOLISTEN_UNLOCK(xprt->xp_socket);
449		xprt_inactive_self(xprt);
450		sx_xunlock(&xprt->xp_lock);
451		return (FALSE);
452	}
453
454	sx_xunlock(&xprt->xp_lock);
455
456	error = soaccept(so, (struct sockaddr *)&ss);
457
458	if (error) {
459		/*
460		 * XXX not sure if I need to call sofree or soclose here.
461		 */
462		return (FALSE);
463	}
464
465	/*
466	 * svc_vc_create_conn will call xprt_register - we don't need
467	 * to do anything with the new connection except derefence it.
468	 */
469	new_xprt = svc_vc_create_conn(xprt->xp_pool, so,
470	    (struct sockaddr *)&ss);
471	if (!new_xprt) {
472		soclose(so);
473	} else {
474		SVC_RELEASE(new_xprt);
475	}
476
477	return (FALSE); /* there is never an rpc msg to be processed */
478}
479
480/*ARGSUSED*/
481static enum xprt_stat
482svc_vc_rendezvous_stat(SVCXPRT *xprt)
483{
484
485	return (XPRT_IDLE);
486}
487
488static void
489svc_vc_destroy_common(SVCXPRT *xprt)
490{
491	uint32_t reterr;
492
493	if (xprt->xp_socket) {
494		if ((xprt->xp_tls & (RPCTLS_FLAGS_HANDSHAKE |
495		    RPCTLS_FLAGS_HANDSHFAIL)) != 0) {
496			if ((xprt->xp_tls & RPCTLS_FLAGS_HANDSHAKE) != 0) {
497				/*
498				 * If the upcall fails, the socket has
499				 * probably been closed via the rpctlssd
500				 * daemon having crashed or been
501				 * restarted, so just ignore returned stat.
502				 */
503				rpctls_srv_disconnect(xprt->xp_sslsec,
504				    xprt->xp_sslusec, xprt->xp_sslrefno,
505				    xprt->xp_sslproc, &reterr);
506			}
507			/* Must sorele() to get rid of reference. */
508			CURVNET_SET(xprt->xp_socket->so_vnet);
509			sorele(xprt->xp_socket);
510			CURVNET_RESTORE();
511		} else
512			(void)soclose(xprt->xp_socket);
513	}
514
515	if (xprt->xp_netid)
516		(void) mem_free(xprt->xp_netid, strlen(xprt->xp_netid) + 1);
517	svc_xprt_free(xprt);
518}
519
520static void
521svc_vc_rendezvous_destroy(SVCXPRT *xprt)
522{
523
524	SOLISTEN_LOCK(xprt->xp_socket);
525	if (xprt->xp_upcallset) {
526		xprt->xp_upcallset = 0;
527		solisten_upcall_set(xprt->xp_socket, NULL, NULL);
528	}
529	SOLISTEN_UNLOCK(xprt->xp_socket);
530
531	svc_vc_destroy_common(xprt);
532}
533
534static void
535svc_vc_destroy(SVCXPRT *xprt)
536{
537	struct cf_conn *cd = (struct cf_conn *)xprt->xp_p1;
538	CLIENT *cl = (CLIENT *)xprt->xp_p2;
539
540	SOCK_RECVBUF_LOCK(xprt->xp_socket);
541	if (xprt->xp_upcallset) {
542		xprt->xp_upcallset = 0;
543		if (xprt->xp_socket->so_rcv.sb_upcall != NULL)
544			soupcall_clear(xprt->xp_socket, SO_RCV);
545	}
546	SOCK_RECVBUF_UNLOCK(xprt->xp_socket);
547
548	if (cl != NULL)
549		CLNT_RELEASE(cl);
550
551	svc_vc_destroy_common(xprt);
552
553	if (cd->mreq)
554		m_freem(cd->mreq);
555	if (cd->mpending)
556		m_freem(cd->mpending);
557	mem_free(cd, sizeof(*cd));
558}
559
560static void
561svc_vc_backchannel_destroy(SVCXPRT *xprt)
562{
563	struct cf_conn *cd = (struct cf_conn *)xprt->xp_p1;
564	struct mbuf *m, *m2;
565
566	svc_xprt_free(xprt);
567	m = cd->mreq;
568	while (m != NULL) {
569		m2 = m;
570		m = m->m_nextpkt;
571		m_freem(m2);
572	}
573	mem_free(cd, sizeof(*cd));
574}
575
576/*ARGSUSED*/
577static bool_t
578svc_vc_control(SVCXPRT *xprt, const u_int rq, void *in)
579{
580	return (FALSE);
581}
582
583static bool_t
584svc_vc_rendezvous_control(SVCXPRT *xprt, const u_int rq, void *in)
585{
586
587	return (FALSE);
588}
589
590static bool_t
591svc_vc_backchannel_control(SVCXPRT *xprt, const u_int rq, void *in)
592{
593
594	return (FALSE);
595}
596
597static enum xprt_stat
598svc_vc_stat(SVCXPRT *xprt)
599{
600	struct cf_conn *cd;
601
602	cd = (struct cf_conn *)(xprt->xp_p1);
603
604	if (cd->strm_stat == XPRT_DIED)
605		return (XPRT_DIED);
606
607	if (cd->mreq != NULL && cd->resid == 0 && cd->eor)
608		return (XPRT_MOREREQS);
609
610	if (soreadable(xprt->xp_socket))
611		return (XPRT_MOREREQS);
612
613	return (XPRT_IDLE);
614}
615
616static bool_t
617svc_vc_ack(SVCXPRT *xprt, uint32_t *ack)
618{
619
620	*ack = atomic_load_acq_32(&xprt->xp_snt_cnt);
621	*ack -= sbused(&xprt->xp_socket->so_snd);
622	return (TRUE);
623}
624
625static enum xprt_stat
626svc_vc_backchannel_stat(SVCXPRT *xprt)
627{
628	struct cf_conn *cd;
629
630	cd = (struct cf_conn *)(xprt->xp_p1);
631
632	if (cd->mreq != NULL)
633		return (XPRT_MOREREQS);
634
635	return (XPRT_IDLE);
636}
637
638/*
639 * If we have an mbuf chain in cd->mpending, try to parse a record from it,
640 * leaving the result in cd->mreq. If we don't have a complete record, leave
641 * the partial result in cd->mreq and try to read more from the socket.
642 */
643static int
644svc_vc_process_pending(SVCXPRT *xprt)
645{
646	struct cf_conn *cd = (struct cf_conn *) xprt->xp_p1;
647	struct socket *so = xprt->xp_socket;
648	struct mbuf *m;
649
650	/*
651	 * If cd->resid is non-zero, we have part of the
652	 * record already, otherwise we are expecting a record
653	 * marker.
654	 */
655	if (!cd->resid && cd->mpending) {
656		/*
657		 * See if there is enough data buffered to
658		 * make up a record marker. Make sure we can
659		 * handle the case where the record marker is
660		 * split across more than one mbuf.
661		 */
662		size_t n = 0;
663		uint32_t header;
664
665		m = cd->mpending;
666		while (n < sizeof(uint32_t) && m) {
667			n += m->m_len;
668			m = m->m_next;
669		}
670		if (n < sizeof(uint32_t)) {
671			so->so_rcv.sb_lowat = sizeof(uint32_t) - n;
672			return (FALSE);
673		}
674		m_copydata(cd->mpending, 0, sizeof(header),
675		    (char *)&header);
676		header = ntohl(header);
677		cd->eor = (header & 0x80000000) != 0;
678		cd->resid = header & 0x7fffffff;
679		m_adj(cd->mpending, sizeof(uint32_t));
680	}
681
682	/*
683	 * Start pulling off mbufs from cd->mpending
684	 * until we either have a complete record or
685	 * we run out of data. We use m_split to pull
686	 * data - it will pull as much as possible and
687	 * split the last mbuf if necessary.
688	 */
689	while (cd->mpending && cd->resid) {
690		m = cd->mpending;
691		if (cd->mpending->m_next
692		    || cd->mpending->m_len > cd->resid)
693			cd->mpending = m_split(cd->mpending,
694			    cd->resid, M_WAITOK);
695		else
696			cd->mpending = NULL;
697		if (cd->mreq)
698			m_last(cd->mreq)->m_next = m;
699		else
700			cd->mreq = m;
701		while (m) {
702			cd->resid -= m->m_len;
703			m = m->m_next;
704		}
705	}
706
707	/*
708	 * Block receive upcalls if we have more data pending,
709	 * otherwise report our need.
710	 */
711	if (cd->mpending)
712		so->so_rcv.sb_lowat = INT_MAX;
713	else
714		so->so_rcv.sb_lowat =
715		    imax(1, imin(cd->resid, so->so_rcv.sb_hiwat / 2));
716	return (TRUE);
717}
718
719static bool_t
720svc_vc_recv(SVCXPRT *xprt, struct rpc_msg *msg,
721    struct sockaddr **addrp, struct mbuf **mp)
722{
723	struct cf_conn *cd = (struct cf_conn *) xprt->xp_p1;
724	struct uio uio;
725	struct mbuf *m, *ctrl;
726	struct socket* so = xprt->xp_socket;
727	XDR xdrs;
728	int error, rcvflag;
729	uint32_t reterr, xid_plus_direction[2];
730	struct cmsghdr *cmsg;
731	struct tls_get_record tgr;
732	enum clnt_stat ret;
733
734	/*
735	 * Serialise access to the socket and our own record parsing
736	 * state.
737	 */
738	sx_xlock(&xprt->xp_lock);
739
740	for (;;) {
741		/* If we have no request ready, check pending queue. */
742		while (cd->mpending &&
743		    (cd->mreq == NULL || cd->resid != 0 || !cd->eor)) {
744			if (!svc_vc_process_pending(xprt))
745				break;
746		}
747
748		/* Process and return complete request in cd->mreq. */
749		if (cd->mreq != NULL && cd->resid == 0 && cd->eor) {
750
751			/*
752			 * Now, check for a backchannel reply.
753			 * The XID is in the first uint32_t of the reply
754			 * and the message direction is the second one.
755			 */
756			if ((cd->mreq->m_len >= sizeof(xid_plus_direction) ||
757			    m_length(cd->mreq, NULL) >=
758			    sizeof(xid_plus_direction)) &&
759			    xprt->xp_p2 != NULL) {
760				m_copydata(cd->mreq, 0,
761				    sizeof(xid_plus_direction),
762				    (char *)xid_plus_direction);
763				xid_plus_direction[0] =
764				    ntohl(xid_plus_direction[0]);
765				xid_plus_direction[1] =
766				    ntohl(xid_plus_direction[1]);
767				/* Check message direction. */
768				if (xid_plus_direction[1] == REPLY) {
769					clnt_bck_svccall(xprt->xp_p2,
770					    cd->mreq,
771					    xid_plus_direction[0]);
772					cd->mreq = NULL;
773					continue;
774				}
775			}
776
777			xdrmbuf_create(&xdrs, cd->mreq, XDR_DECODE);
778			cd->mreq = NULL;
779
780			/* Check for next request in a pending queue. */
781			svc_vc_process_pending(xprt);
782			if (cd->mreq == NULL || cd->resid != 0) {
783				SOCK_RECVBUF_LOCK(so);
784				if (!soreadable(so))
785					xprt_inactive_self(xprt);
786				SOCK_RECVBUF_UNLOCK(so);
787			}
788
789			sx_xunlock(&xprt->xp_lock);
790
791			if (! xdr_callmsg(&xdrs, msg)) {
792				XDR_DESTROY(&xdrs);
793				return (FALSE);
794			}
795
796			*addrp = NULL;
797			*mp = xdrmbuf_getall(&xdrs);
798			XDR_DESTROY(&xdrs);
799
800			return (TRUE);
801		}
802
803		/*
804		 * If receiving is disabled so that a TLS handshake can be
805		 * done by the rpctlssd daemon, return FALSE here.
806		 */
807		rcvflag = MSG_DONTWAIT;
808		if ((xprt->xp_tls & RPCTLS_FLAGS_HANDSHAKE) != 0)
809			rcvflag |= MSG_TLSAPPDATA;
810tryagain:
811		if (xprt->xp_dontrcv) {
812			sx_xunlock(&xprt->xp_lock);
813			return (FALSE);
814		}
815
816		/*
817		 * The socket upcall calls xprt_active() which will eventually
818		 * cause the server to call us here. We attempt to
819		 * read as much as possible from the socket and put
820		 * the result in cd->mpending. If the read fails,
821		 * we have drained both cd->mpending and the socket so
822		 * we can call xprt_inactive().
823		 */
824		uio.uio_resid = 1000000000;
825		uio.uio_td = curthread;
826		ctrl = m = NULL;
827		error = soreceive(so, NULL, &uio, &m, &ctrl, &rcvflag);
828
829		if (error == EWOULDBLOCK) {
830			/*
831			 * We must re-test for readability after
832			 * taking the lock to protect us in the case
833			 * where a new packet arrives on the socket
834			 * after our call to soreceive fails with
835			 * EWOULDBLOCK.
836			 */
837			SOCK_RECVBUF_LOCK(so);
838			if (!soreadable(so))
839				xprt_inactive_self(xprt);
840			SOCK_RECVBUF_UNLOCK(so);
841			sx_xunlock(&xprt->xp_lock);
842			return (FALSE);
843		}
844
845		/*
846		 * A return of ENXIO indicates that there is an
847		 * alert record at the head of the
848		 * socket's receive queue, for TLS connections.
849		 * This record needs to be handled in userland
850		 * via an SSL_read() call, so do an upcall to the daemon.
851		 */
852		KRPC_CURVNET_SET(so->so_vnet);
853		if ((xprt->xp_tls & RPCTLS_FLAGS_HANDSHAKE) != 0 &&
854		    error == ENXIO) {
855			KRPC_VNET(svc_vc_tls_alerts)++;
856			KRPC_CURVNET_RESTORE();
857			/* Disable reception. */
858			xprt->xp_dontrcv = TRUE;
859			sx_xunlock(&xprt->xp_lock);
860			ret = rpctls_srv_handlerecord(xprt->xp_sslsec,
861			    xprt->xp_sslusec, xprt->xp_sslrefno,
862			    xprt->xp_sslproc, &reterr);
863			sx_xlock(&xprt->xp_lock);
864			xprt->xp_dontrcv = FALSE;
865			if (ret != RPC_SUCCESS || reterr != RPCTLSERR_OK) {
866				/*
867				 * All we can do is soreceive() it and
868				 * then toss it.
869				 */
870				rcvflag = MSG_DONTWAIT;
871				goto tryagain;
872			}
873			sx_xunlock(&xprt->xp_lock);
874			xprt_active(xprt);   /* Harmless if already active. */
875			return (FALSE);
876		}
877
878		if (error) {
879			KRPC_CURVNET_RESTORE();
880			SOCK_RECVBUF_LOCK(so);
881			if (xprt->xp_upcallset) {
882				xprt->xp_upcallset = 0;
883				soupcall_clear(so, SO_RCV);
884			}
885			SOCK_RECVBUF_UNLOCK(so);
886			xprt_inactive_self(xprt);
887			cd->strm_stat = XPRT_DIED;
888			sx_xunlock(&xprt->xp_lock);
889			return (FALSE);
890		}
891
892		if (!m) {
893			KRPC_CURVNET_RESTORE();
894			/*
895			 * EOF - the other end has closed the socket.
896			 */
897			xprt_inactive_self(xprt);
898			cd->strm_stat = XPRT_DIED;
899			sx_xunlock(&xprt->xp_lock);
900			return (FALSE);
901		}
902
903		/* Process any record header(s). */
904		if (ctrl != NULL) {
905			cmsg = mtod(ctrl, struct cmsghdr *);
906			if (cmsg->cmsg_type == TLS_GET_RECORD &&
907			    cmsg->cmsg_len == CMSG_LEN(sizeof(tgr))) {
908				memcpy(&tgr, CMSG_DATA(cmsg), sizeof(tgr));
909				/*
910				 * TLS_RLTYPE_ALERT records should be handled
911				 * since soreceive() would have returned
912				 * ENXIO.  Just throw any other
913				 * non-TLS_RLTYPE_APP records away.
914				 */
915				if (tgr.tls_type != TLS_RLTYPE_APP) {
916					m_freem(m);
917					m_free(ctrl);
918					rcvflag = MSG_DONTWAIT | MSG_TLSAPPDATA;
919					KRPC_CURVNET_RESTORE();
920					goto tryagain;
921				}
922				KRPC_VNET(svc_vc_tls_rx_msgcnt)++;
923				KRPC_VNET(svc_vc_tls_rx_msgbytes) +=
924				    1000000000 - uio.uio_resid;
925			}
926			m_free(ctrl);
927		} else {
928			KRPC_VNET(svc_vc_rx_msgcnt)++;
929			KRPC_VNET(svc_vc_rx_msgbytes) += 1000000000 -
930			    uio.uio_resid;
931		}
932		KRPC_CURVNET_RESTORE();
933
934		if (cd->mpending)
935			m_last(cd->mpending)->m_next = m;
936		else
937			cd->mpending = m;
938	}
939}
940
941static bool_t
942svc_vc_backchannel_recv(SVCXPRT *xprt, struct rpc_msg *msg,
943    struct sockaddr **addrp, struct mbuf **mp)
944{
945	struct cf_conn *cd = (struct cf_conn *) xprt->xp_p1;
946	struct ct_data *ct;
947	struct mbuf *m;
948	XDR xdrs;
949
950	sx_xlock(&xprt->xp_lock);
951	ct = (struct ct_data *)xprt->xp_p2;
952	if (ct == NULL) {
953		sx_xunlock(&xprt->xp_lock);
954		return (FALSE);
955	}
956	mtx_lock(&ct->ct_lock);
957	m = cd->mreq;
958	if (m == NULL) {
959		xprt_inactive_self(xprt);
960		mtx_unlock(&ct->ct_lock);
961		sx_xunlock(&xprt->xp_lock);
962		return (FALSE);
963	}
964	cd->mreq = m->m_nextpkt;
965	mtx_unlock(&ct->ct_lock);
966	sx_xunlock(&xprt->xp_lock);
967
968	xdrmbuf_create(&xdrs, m, XDR_DECODE);
969	if (! xdr_callmsg(&xdrs, msg)) {
970		XDR_DESTROY(&xdrs);
971		return (FALSE);
972	}
973	*addrp = NULL;
974	*mp = xdrmbuf_getall(&xdrs);
975	XDR_DESTROY(&xdrs);
976	return (TRUE);
977}
978
979static bool_t
980svc_vc_reply(SVCXPRT *xprt, struct rpc_msg *msg,
981    struct sockaddr *addr, struct mbuf *m, uint32_t *seq)
982{
983	XDR xdrs;
984	struct mbuf *mrep;
985	bool_t stat = TRUE;
986	int error, len, maxextsiz;
987#ifdef KERN_TLS
988	u_int maxlen;
989#endif
990
991	/*
992	 * Leave space for record mark.
993	 */
994	mrep = m_gethdr(M_WAITOK, MT_DATA);
995	mrep->m_data += sizeof(uint32_t);
996
997	xdrmbuf_create(&xdrs, mrep, XDR_ENCODE);
998
999	if (msg->rm_reply.rp_stat == MSG_ACCEPTED &&
1000	    msg->rm_reply.rp_acpt.ar_stat == SUCCESS) {
1001		if (!xdr_replymsg(&xdrs, msg))
1002			stat = FALSE;
1003		else
1004			xdrmbuf_append(&xdrs, m);
1005	} else {
1006		stat = xdr_replymsg(&xdrs, msg);
1007	}
1008
1009	if (stat) {
1010		m_fixhdr(mrep);
1011
1012		/*
1013		 * Prepend a record marker containing the reply length.
1014		 */
1015		M_PREPEND(mrep, sizeof(uint32_t), M_WAITOK);
1016		len = mrep->m_pkthdr.len;
1017		*mtod(mrep, uint32_t *) =
1018			htonl(0x80000000 | (len - sizeof(uint32_t)));
1019
1020		/* For RPC-over-TLS, copy mrep to a chain of ext_pgs. */
1021		KRPC_CURVNET_SET(xprt->xp_socket->so_vnet);
1022		if ((xprt->xp_tls & RPCTLS_FLAGS_HANDSHAKE) != 0) {
1023			/*
1024			 * Copy the mbuf chain to a chain of
1025			 * ext_pgs mbuf(s) as required by KERN_TLS.
1026			 */
1027			maxextsiz = TLS_MAX_MSG_SIZE_V10_2;
1028#ifdef KERN_TLS
1029			if (rpctls_getinfo(&maxlen, false, false))
1030				maxextsiz = min(maxextsiz, maxlen);
1031#endif
1032			mrep = _rpc_copym_into_ext_pgs(mrep, maxextsiz);
1033			KRPC_VNET(svc_vc_tls_tx_msgcnt)++;
1034			KRPC_VNET(svc_vc_tls_tx_msgbytes) += len;
1035		} else {
1036			KRPC_VNET(svc_vc_tx_msgcnt)++;
1037			KRPC_VNET(svc_vc_tx_msgbytes) += len;
1038		}
1039		KRPC_CURVNET_RESTORE();
1040		atomic_add_32(&xprt->xp_snd_cnt, len);
1041		/*
1042		 * sosend consumes mreq.
1043		 */
1044		error = sosend(xprt->xp_socket, NULL, NULL, mrep, NULL,
1045		    0, curthread);
1046		if (!error) {
1047			atomic_add_rel_32(&xprt->xp_snt_cnt, len);
1048			if (seq)
1049				*seq = xprt->xp_snd_cnt;
1050			stat = TRUE;
1051		} else
1052			atomic_subtract_32(&xprt->xp_snd_cnt, len);
1053	} else {
1054		m_freem(mrep);
1055	}
1056
1057	XDR_DESTROY(&xdrs);
1058
1059	return (stat);
1060}
1061
1062static bool_t
1063svc_vc_backchannel_reply(SVCXPRT *xprt, struct rpc_msg *msg,
1064    struct sockaddr *addr, struct mbuf *m, uint32_t *seq)
1065{
1066	struct ct_data *ct;
1067	XDR xdrs;
1068	struct mbuf *mrep;
1069	bool_t stat = TRUE;
1070	int error, maxextsiz;
1071#ifdef KERN_TLS
1072	u_int maxlen;
1073#endif
1074
1075	/*
1076	 * Leave space for record mark.
1077	 */
1078	mrep = m_gethdr(M_WAITOK, MT_DATA);
1079	mrep->m_data += sizeof(uint32_t);
1080
1081	xdrmbuf_create(&xdrs, mrep, XDR_ENCODE);
1082
1083	if (msg->rm_reply.rp_stat == MSG_ACCEPTED &&
1084	    msg->rm_reply.rp_acpt.ar_stat == SUCCESS) {
1085		if (!xdr_replymsg(&xdrs, msg))
1086			stat = FALSE;
1087		else
1088			xdrmbuf_append(&xdrs, m);
1089	} else {
1090		stat = xdr_replymsg(&xdrs, msg);
1091	}
1092
1093	if (stat) {
1094		m_fixhdr(mrep);
1095
1096		/*
1097		 * Prepend a record marker containing the reply length.
1098		 */
1099		M_PREPEND(mrep, sizeof(uint32_t), M_WAITOK);
1100		*mtod(mrep, uint32_t *) =
1101			htonl(0x80000000 | (mrep->m_pkthdr.len
1102				- sizeof(uint32_t)));
1103
1104		/* For RPC-over-TLS, copy mrep to a chain of ext_pgs. */
1105		if ((xprt->xp_tls & RPCTLS_FLAGS_HANDSHAKE) != 0) {
1106			/*
1107			 * Copy the mbuf chain to a chain of
1108			 * ext_pgs mbuf(s) as required by KERN_TLS.
1109			 */
1110			maxextsiz = TLS_MAX_MSG_SIZE_V10_2;
1111#ifdef KERN_TLS
1112			if (rpctls_getinfo(&maxlen, false, false))
1113				maxextsiz = min(maxextsiz, maxlen);
1114#endif
1115			mrep = _rpc_copym_into_ext_pgs(mrep, maxextsiz);
1116		}
1117		sx_xlock(&xprt->xp_lock);
1118		ct = (struct ct_data *)xprt->xp_p2;
1119		if (ct != NULL)
1120			error = sosend(ct->ct_socket, NULL, NULL, mrep, NULL,
1121			    0, curthread);
1122		else
1123			error = EPIPE;
1124		sx_xunlock(&xprt->xp_lock);
1125		if (!error) {
1126			stat = TRUE;
1127		}
1128	} else {
1129		m_freem(mrep);
1130	}
1131
1132	XDR_DESTROY(&xdrs);
1133
1134	return (stat);
1135}
1136
1137static bool_t
1138svc_vc_null(void)
1139{
1140
1141	return (FALSE);
1142}
1143
1144static int
1145svc_vc_soupcall(struct socket *so, void *arg, int waitflag)
1146{
1147	SVCXPRT *xprt = (SVCXPRT *) arg;
1148
1149	if (soreadable(xprt->xp_socket))
1150		xprt_active(xprt);
1151	return (SU_OK);
1152}
1153
1154static int
1155svc_vc_rendezvous_soupcall(struct socket *head, void *arg, int waitflag)
1156{
1157	SVCXPRT *xprt = (SVCXPRT *) arg;
1158
1159	if (!TAILQ_EMPTY(&head->sol_comp))
1160		xprt_active(xprt);
1161	return (SU_OK);
1162}
1163
1164#if 0
1165/*
1166 * Get the effective UID of the sending process. Used by rpcbind, keyserv
1167 * and rpc.yppasswdd on AF_LOCAL.
1168 */
1169int
1170__rpc_get_local_uid(SVCXPRT *transp, uid_t *uid) {
1171	int sock, ret;
1172	gid_t egid;
1173	uid_t euid;
1174	struct sockaddr *sa;
1175
1176	sock = transp->xp_fd;
1177	sa = (struct sockaddr *)transp->xp_rtaddr;
1178	if (sa->sa_family == AF_LOCAL) {
1179		ret = getpeereid(sock, &euid, &egid);
1180		if (ret == 0)
1181			*uid = euid;
1182		return (ret);
1183	} else
1184		return (-1);
1185}
1186#endif
1187