1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1982, 1986, 1990, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31#ifndef _SYS_SOCKBUF_H_
32#define _SYS_SOCKBUF_H_
33
34/*
35 * Constants for sb_flags field of struct sockbuf/xsockbuf.
36 */
37#define	SB_TLS_RX	0x01		/* using KTLS on RX */
38#define	SB_TLS_RX_RUNNING 0x02		/* KTLS RX operation running */
39#define	SB_WAIT		0x04		/* someone is waiting for data/space */
40#define	SB_SEL		0x08		/* someone is selecting */
41#define	SB_ASYNC	0x10		/* ASYNC I/O, need signals */
42#define	SB_UPCALL	0x20		/* someone wants an upcall */
43#define	SB_NOINTR	0x40		/* operations not interruptible */
44#define	SB_AIO		0x80		/* AIO operations queued */
45#define	SB_KNOTE	0x100		/* kernel note attached */
46#define	SB_NOCOALESCE	0x200		/* don't coalesce new data into existing mbufs */
47#define	SB_IN_TOE	0x400		/* socket buffer is in the middle of an operation */
48#define	SB_AUTOSIZE	0x800		/* automatically size socket buffer */
49#define	SB_STOP		0x1000		/* backpressure indicator */
50#define	SB_AIO_RUNNING	0x2000		/* AIO operation running */
51#define	SB_UNUSED	0x4000		/* previously used for SB_TLS_IFNET */
52#define	SB_TLS_RX_RESYNC 0x8000		/* KTLS RX lost HW sync */
53
54#define	SBS_CANTSENDMORE	0x0010	/* can't send more data to peer */
55#define	SBS_CANTRCVMORE		0x0020	/* can't receive more data from peer */
56#define	SBS_RCVATMARK		0x0040	/* at mark on input */
57
58#if defined(_KERNEL) || defined(_WANT_SOCKET)
59#include <sys/_lock.h>
60#include <sys/_mutex.h>
61#include <sys/_sx.h>
62#include <sys/_task.h>
63
64#define	SB_MAX		(2*1024*1024)	/* default for max chars in sockbuf */
65
66struct ktls_session;
67struct mbuf;
68struct sockaddr;
69struct socket;
70struct sockopt;
71struct thread;
72struct selinfo;
73
74/*
75 * Socket buffer
76 *
77 * A buffer starts with the fields that are accessed by I/O multiplexing
78 * APIs like select(2), kevent(2) or AIO and thus are shared between different
79 * buffer implementations.  They are protected by the SOCK_RECVBUF_LOCK()
80 * or SOCK_SENDBUF_LOCK() of the owning socket.
81 *
82 * XXX: sb_acc, sb_ccc and sb_mbcnt shall become implementation specific
83 * methods.
84 *
85 * Protocol specific implementations follow in a union.
86 */
87struct sockbuf {
88	struct	selinfo *sb_sel;	/* process selecting read/write */
89	short	sb_state;		/* socket state on sockbuf */
90	short	sb_flags;		/* flags, see above */
91	u_int	sb_acc;			/* available chars in buffer */
92	u_int	sb_ccc;			/* claimed chars in buffer */
93	u_int	sb_mbcnt;		/* chars of mbufs used */
94	u_int	sb_ctl;			/* non-data chars in buffer */
95	u_int	sb_hiwat;		/* max actual char count */
96	u_int	sb_lowat;		/* low water mark */
97	u_int	sb_mbmax;		/* max chars of mbufs to use */
98	sbintime_t sb_timeo;		/* timeout for read/write */
99	int	(*sb_upcall)(struct socket *, void *, int);
100	void	*sb_upcallarg;
101	TAILQ_HEAD(, kaiocb) sb_aiojobq;	/* pending AIO ops */
102	struct	task sb_aiotask;		/* AIO task */
103	union {
104		/*
105		 * Classic BSD one-size-fits-all socket buffer, capable of
106		 * doing streams and datagrams. The stream part is able
107		 * to perform special features:
108		 * - not ready data (sendfile)
109		 * - TLS
110		 */
111		struct {
112			/* compat: sockbuf lock pointer */
113			struct	mtx *sb_mtx;
114			/* first and last mbufs in the chain */
115			struct	mbuf *sb_mb;
116			struct	mbuf *sb_mbtail;
117			/* first mbuf of last record in socket buffer */
118			struct	mbuf *sb_lastrecord;
119			/* pointer to data to send next (TCP */
120			struct	mbuf *sb_sndptr;
121			/* pointer to first not ready buffer */
122			struct	mbuf *sb_fnrdy;
123			/* byte offset of ptr into chain, used with sb_sndptr */
124			u_int	sb_sndptroff;
125			/* TLS */
126			u_int	sb_tlscc;	/* TLS chain characters */
127			u_int	sb_tlsdcc;	/* characters being decrypted */
128			struct	mbuf *sb_mtls;	/*  TLS mbuf chain */
129			struct	mbuf *sb_mtlstail; /* last mbuf in TLS chain */
130			uint64_t sb_tls_seqno;	/* TLS seqno */
131			struct	ktls_session *sb_tls_info; /* TLS state */
132		};
133		/*
134		 * PF_UNIX/SOCK_DGRAM
135		 *
136		 * Local protocol, thus we should buffer on the receive side
137		 * only.  However, in one to many configuration we don't want
138		 * a single receive buffer to be shared.  So we would link
139		 * send buffers onto receive buffer.  All the fields are locked
140		 * by the receive buffer lock.
141		 */
142		struct {
143			/*
144			 * For receive buffer: own queue of this buffer for
145			 * unconnected sends.  For send buffer: queue lended
146			 * to the peer receive buffer, to isolate ourselves
147			 * from other senders.
148			 */
149			STAILQ_HEAD(, mbuf)	uxdg_mb;
150			/* For receive buffer: datagram seen via MSG_PEEK. */
151			struct mbuf		*uxdg_peeked;
152			/*
153			 * For receive buffer: queue of send buffers of
154			 * connected peers.  For send buffer: linkage on
155			 * connected peer receive buffer queue.
156			 */
157			union {
158				TAILQ_HEAD(, sockbuf)	uxdg_conns;
159				TAILQ_ENTRY(sockbuf)	uxdg_clist;
160			};
161			/* Counters for this buffer uxdg_mb chain + peeked. */
162			u_int uxdg_cc;
163			u_int uxdg_ctl;
164			u_int uxdg_mbcnt;
165		};
166		/*
167		 * Netlink socket.
168		 */
169		struct {
170			TAILQ_HEAD(, nl_buf)	nl_queue;
171		};
172	};
173};
174
175#endif	/* defined(_KERNEL) || defined(_WANT_SOCKET) */
176#ifdef _KERNEL
177
178/* 'which' values for KPIs that operate on one buffer of a socket. */
179typedef enum { SO_RCV, SO_SND } sb_which;
180
181/*
182 * Per-socket buffer mutex used to protect most fields in the socket buffer.
183 * These make use of the mutex pointer embedded in struct sockbuf, which
184 * currently just references mutexes in the containing socket.  The
185 * SOCK_SENDBUF_LOCK() etc. macros can be used instead of or in combination with
186 * these locking macros.
187 */
188#define	SOCKBUF_MTX(_sb)		((_sb)->sb_mtx)
189#define	SOCKBUF_LOCK(_sb)		mtx_lock(SOCKBUF_MTX(_sb))
190#define	SOCKBUF_OWNED(_sb)		mtx_owned(SOCKBUF_MTX(_sb))
191#define	SOCKBUF_UNLOCK(_sb)		mtx_unlock(SOCKBUF_MTX(_sb))
192#define	SOCKBUF_LOCK_ASSERT(_sb)	mtx_assert(SOCKBUF_MTX(_sb), MA_OWNED)
193#define	SOCKBUF_UNLOCK_ASSERT(_sb)	mtx_assert(SOCKBUF_MTX(_sb), MA_NOTOWNED)
194
195/*
196 * Socket buffer private mbuf(9) flags.
197 */
198#define	M_NOTREADY	M_PROTO1	/* m_data not populated yet */
199#define	M_BLOCKED	M_PROTO2	/* M_NOTREADY in front of m */
200#define	M_NOTAVAIL	(M_NOTREADY | M_BLOCKED)
201
202void	sbappend(struct sockbuf *sb, struct mbuf *m, int flags);
203void	sbappend_locked(struct sockbuf *sb, struct mbuf *m, int flags);
204void	sbappendstream(struct sockbuf *sb, struct mbuf *m, int flags);
205void	sbappendstream_locked(struct sockbuf *sb, struct mbuf *m, int flags);
206int	sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa,
207	    struct mbuf *m0, struct mbuf *control);
208int	sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa,
209	    struct mbuf *m0, struct mbuf *control);
210int	sbappendaddr_nospacecheck_locked(struct sockbuf *sb,
211	    const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control);
212void	sbappendcontrol(struct sockbuf *sb, struct mbuf *m0,
213	    struct mbuf *control, int flags);
214void	sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0,
215	    struct mbuf *control, int flags);
216void	sbappendrecord(struct sockbuf *sb, struct mbuf *m0);
217void	sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0);
218void	sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n);
219struct mbuf *
220	sbcreatecontrol(const void *p, u_int size, int type, int level,
221	    int wait);
222void	sbdestroy(struct socket *, sb_which);
223void	sbdrop(struct sockbuf *sb, int len);
224void	sbdrop_locked(struct sockbuf *sb, int len);
225struct mbuf *
226	sbcut_locked(struct sockbuf *sb, int len);
227void	sbdroprecord(struct sockbuf *sb);
228void	sbdroprecord_locked(struct sockbuf *sb);
229void	sbflush(struct sockbuf *sb);
230void	sbflush_locked(struct sockbuf *sb);
231void	sbrelease(struct socket *, sb_which);
232void	sbrelease_locked(struct socket *, sb_which);
233int	sbsetopt(struct socket *so, struct sockopt *);
234bool	sbreserve_locked(struct socket *so, sb_which which, u_long cc,
235	    struct thread *td);
236bool	sbreserve_locked_limit(struct socket *so, sb_which which, u_long cc,
237	    u_long buf_max, struct thread *td);
238void	sbsndptr_adv(struct sockbuf *sb, struct mbuf *mb, u_int len);
239struct mbuf *
240	sbsndptr_noadv(struct sockbuf *sb, u_int off, u_int *moff);
241struct mbuf *
242	sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff);
243int	sbwait(struct socket *, sb_which);
244void	sballoc(struct sockbuf *, struct mbuf *);
245void	sbfree(struct sockbuf *, struct mbuf *);
246void	sballoc_ktls_rx(struct sockbuf *sb, struct mbuf *m);
247void	sbfree_ktls_rx(struct sockbuf *sb, struct mbuf *m);
248int	sbready(struct sockbuf *, struct mbuf *, int);
249
250/*
251 * Return how much data is available to be taken out of socket
252 * buffer right now.
253 */
254static inline u_int
255sbavail(struct sockbuf *sb)
256{
257
258#if 0
259	SOCKBUF_LOCK_ASSERT(sb);
260#endif
261	return (sb->sb_acc);
262}
263
264/*
265 * Return how much data sits there in the socket buffer
266 * It might be that some data is not yet ready to be read.
267 */
268static inline u_int
269sbused(struct sockbuf *sb)
270{
271
272#if 0
273	SOCKBUF_LOCK_ASSERT(sb);
274#endif
275	return (sb->sb_ccc);
276}
277
278/*
279 * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
280 * This is problematical if the fields are unsigned, as the space might
281 * still be negative (ccc > hiwat or mbcnt > mbmax).
282 */
283static inline long
284sbspace(struct sockbuf *sb)
285{
286	int bleft, mleft;		/* size should match sockbuf fields */
287
288#if 0
289	SOCKBUF_LOCK_ASSERT(sb);
290#endif
291
292	if (sb->sb_flags & SB_STOP)
293		return(0);
294
295	bleft = sb->sb_hiwat - sb->sb_ccc;
296	mleft = sb->sb_mbmax - sb->sb_mbcnt;
297
298	return ((bleft < mleft) ? bleft : mleft);
299}
300
301#define SB_EMPTY_FIXUP(sb) do {						\
302	if ((sb)->sb_mb == NULL) {					\
303		(sb)->sb_mbtail = NULL;					\
304		(sb)->sb_lastrecord = NULL;				\
305	}								\
306} while (/*CONSTCOND*/0)
307
308#ifdef SOCKBUF_DEBUG
309void	sblastrecordchk(struct sockbuf *, const char *, int);
310void	sblastmbufchk(struct sockbuf *, const char *, int);
311void	sbcheck(struct sockbuf *, const char *, int);
312#define	SBLASTRECORDCHK(sb)	sblastrecordchk((sb), __FILE__, __LINE__)
313#define	SBLASTMBUFCHK(sb)	sblastmbufchk((sb), __FILE__, __LINE__)
314#define	SBCHECK(sb)		sbcheck((sb), __FILE__, __LINE__)
315#else
316#define	SBLASTRECORDCHK(sb)	do {} while (0)
317#define	SBLASTMBUFCHK(sb)	do {} while (0)
318#define	SBCHECK(sb)		do {} while (0)
319#endif /* SOCKBUF_DEBUG */
320
321#endif /* _KERNEL */
322
323#endif /* _SYS_SOCKBUF_H_ */
324