1/*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1982, 1986, 1990, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31#ifndef _SYS_SOCKBUF_H_ 32#define _SYS_SOCKBUF_H_ 33 34/* 35 * Constants for sb_flags field of struct sockbuf/xsockbuf. 36 */ 37#define SB_TLS_RX 0x01 /* using KTLS on RX */ 38#define SB_TLS_RX_RUNNING 0x02 /* KTLS RX operation running */ 39#define SB_WAIT 0x04 /* someone is waiting for data/space */ 40#define SB_SEL 0x08 /* someone is selecting */ 41#define SB_ASYNC 0x10 /* ASYNC I/O, need signals */ 42#define SB_UPCALL 0x20 /* someone wants an upcall */ 43#define SB_NOINTR 0x40 /* operations not interruptible */ 44#define SB_AIO 0x80 /* AIO operations queued */ 45#define SB_KNOTE 0x100 /* kernel note attached */ 46#define SB_NOCOALESCE 0x200 /* don't coalesce new data into existing mbufs */ 47#define SB_IN_TOE 0x400 /* socket buffer is in the middle of an operation */ 48#define SB_AUTOSIZE 0x800 /* automatically size socket buffer */ 49#define SB_STOP 0x1000 /* backpressure indicator */ 50#define SB_AIO_RUNNING 0x2000 /* AIO operation running */ 51#define SB_UNUSED 0x4000 /* previously used for SB_TLS_IFNET */ 52#define SB_TLS_RX_RESYNC 0x8000 /* KTLS RX lost HW sync */ 53 54#define SBS_CANTSENDMORE 0x0010 /* can't send more data to peer */ 55#define SBS_CANTRCVMORE 0x0020 /* can't receive more data from peer */ 56#define SBS_RCVATMARK 0x0040 /* at mark on input */ 57 58#if defined(_KERNEL) || defined(_WANT_SOCKET) 59#include <sys/_lock.h> 60#include <sys/_mutex.h> 61#include <sys/_sx.h> 62#include <sys/_task.h> 63 64#define SB_MAX (2*1024*1024) /* default for max chars in sockbuf */ 65 66struct ktls_session; 67struct mbuf; 68struct sockaddr; 69struct socket; 70struct sockopt; 71struct thread; 72struct selinfo; 73 74/* 75 * Socket buffer 76 * 77 * A buffer starts with the fields that are accessed by I/O multiplexing 78 * APIs like select(2), kevent(2) or AIO and thus are shared between different 79 * buffer implementations. They are protected by the SOCK_RECVBUF_LOCK() 80 * or SOCK_SENDBUF_LOCK() of the owning socket. 81 * 82 * XXX: sb_acc, sb_ccc and sb_mbcnt shall become implementation specific 83 * methods. 84 * 85 * Protocol specific implementations follow in a union. 86 */ 87struct sockbuf { 88 struct selinfo *sb_sel; /* process selecting read/write */ 89 short sb_state; /* socket state on sockbuf */ 90 short sb_flags; /* flags, see above */ 91 u_int sb_acc; /* available chars in buffer */ 92 u_int sb_ccc; /* claimed chars in buffer */ 93 u_int sb_mbcnt; /* chars of mbufs used */ 94 u_int sb_ctl; /* non-data chars in buffer */ 95 u_int sb_hiwat; /* max actual char count */ 96 u_int sb_lowat; /* low water mark */ 97 u_int sb_mbmax; /* max chars of mbufs to use */ 98 sbintime_t sb_timeo; /* timeout for read/write */ 99 int (*sb_upcall)(struct socket *, void *, int); 100 void *sb_upcallarg; 101 TAILQ_HEAD(, kaiocb) sb_aiojobq; /* pending AIO ops */ 102 struct task sb_aiotask; /* AIO task */ 103 union { 104 /* 105 * Classic BSD one-size-fits-all socket buffer, capable of 106 * doing streams and datagrams. The stream part is able 107 * to perform special features: 108 * - not ready data (sendfile) 109 * - TLS 110 */ 111 struct { 112 /* compat: sockbuf lock pointer */ 113 struct mtx *sb_mtx; 114 /* first and last mbufs in the chain */ 115 struct mbuf *sb_mb; 116 struct mbuf *sb_mbtail; 117 /* first mbuf of last record in socket buffer */ 118 struct mbuf *sb_lastrecord; 119 /* pointer to data to send next (TCP */ 120 struct mbuf *sb_sndptr; 121 /* pointer to first not ready buffer */ 122 struct mbuf *sb_fnrdy; 123 /* byte offset of ptr into chain, used with sb_sndptr */ 124 u_int sb_sndptroff; 125 /* TLS */ 126 u_int sb_tlscc; /* TLS chain characters */ 127 u_int sb_tlsdcc; /* characters being decrypted */ 128 struct mbuf *sb_mtls; /* TLS mbuf chain */ 129 struct mbuf *sb_mtlstail; /* last mbuf in TLS chain */ 130 uint64_t sb_tls_seqno; /* TLS seqno */ 131 struct ktls_session *sb_tls_info; /* TLS state */ 132 }; 133 /* 134 * PF_UNIX/SOCK_DGRAM 135 * 136 * Local protocol, thus we should buffer on the receive side 137 * only. However, in one to many configuration we don't want 138 * a single receive buffer to be shared. So we would link 139 * send buffers onto receive buffer. All the fields are locked 140 * by the receive buffer lock. 141 */ 142 struct { 143 /* 144 * For receive buffer: own queue of this buffer for 145 * unconnected sends. For send buffer: queue lended 146 * to the peer receive buffer, to isolate ourselves 147 * from other senders. 148 */ 149 STAILQ_HEAD(, mbuf) uxdg_mb; 150 /* For receive buffer: datagram seen via MSG_PEEK. */ 151 struct mbuf *uxdg_peeked; 152 /* 153 * For receive buffer: queue of send buffers of 154 * connected peers. For send buffer: linkage on 155 * connected peer receive buffer queue. 156 */ 157 union { 158 TAILQ_HEAD(, sockbuf) uxdg_conns; 159 TAILQ_ENTRY(sockbuf) uxdg_clist; 160 }; 161 /* Counters for this buffer uxdg_mb chain + peeked. */ 162 u_int uxdg_cc; 163 u_int uxdg_ctl; 164 u_int uxdg_mbcnt; 165 }; 166 /* 167 * Netlink socket. 168 */ 169 struct { 170 TAILQ_HEAD(, nl_buf) nl_queue; 171 }; 172 }; 173}; 174 175#endif /* defined(_KERNEL) || defined(_WANT_SOCKET) */ 176#ifdef _KERNEL 177 178/* 'which' values for KPIs that operate on one buffer of a socket. */ 179typedef enum { SO_RCV, SO_SND } sb_which; 180 181/* 182 * Per-socket buffer mutex used to protect most fields in the socket buffer. 183 * These make use of the mutex pointer embedded in struct sockbuf, which 184 * currently just references mutexes in the containing socket. The 185 * SOCK_SENDBUF_LOCK() etc. macros can be used instead of or in combination with 186 * these locking macros. 187 */ 188#define SOCKBUF_MTX(_sb) ((_sb)->sb_mtx) 189#define SOCKBUF_LOCK(_sb) mtx_lock(SOCKBUF_MTX(_sb)) 190#define SOCKBUF_OWNED(_sb) mtx_owned(SOCKBUF_MTX(_sb)) 191#define SOCKBUF_UNLOCK(_sb) mtx_unlock(SOCKBUF_MTX(_sb)) 192#define SOCKBUF_LOCK_ASSERT(_sb) mtx_assert(SOCKBUF_MTX(_sb), MA_OWNED) 193#define SOCKBUF_UNLOCK_ASSERT(_sb) mtx_assert(SOCKBUF_MTX(_sb), MA_NOTOWNED) 194 195/* 196 * Socket buffer private mbuf(9) flags. 197 */ 198#define M_NOTREADY M_PROTO1 /* m_data not populated yet */ 199#define M_BLOCKED M_PROTO2 /* M_NOTREADY in front of m */ 200#define M_NOTAVAIL (M_NOTREADY | M_BLOCKED) 201 202void sbappend(struct sockbuf *sb, struct mbuf *m, int flags); 203void sbappend_locked(struct sockbuf *sb, struct mbuf *m, int flags); 204void sbappendstream(struct sockbuf *sb, struct mbuf *m, int flags); 205void sbappendstream_locked(struct sockbuf *sb, struct mbuf *m, int flags); 206int sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa, 207 struct mbuf *m0, struct mbuf *control); 208int sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa, 209 struct mbuf *m0, struct mbuf *control); 210int sbappendaddr_nospacecheck_locked(struct sockbuf *sb, 211 const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control); 212void sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, 213 struct mbuf *control, int flags); 214void sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0, 215 struct mbuf *control, int flags); 216void sbappendrecord(struct sockbuf *sb, struct mbuf *m0); 217void sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0); 218void sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n); 219struct mbuf * 220 sbcreatecontrol(const void *p, u_int size, int type, int level, 221 int wait); 222void sbdestroy(struct socket *, sb_which); 223void sbdrop(struct sockbuf *sb, int len); 224void sbdrop_locked(struct sockbuf *sb, int len); 225struct mbuf * 226 sbcut_locked(struct sockbuf *sb, int len); 227void sbdroprecord(struct sockbuf *sb); 228void sbdroprecord_locked(struct sockbuf *sb); 229void sbflush(struct sockbuf *sb); 230void sbflush_locked(struct sockbuf *sb); 231void sbrelease(struct socket *, sb_which); 232void sbrelease_locked(struct socket *, sb_which); 233int sbsetopt(struct socket *so, struct sockopt *); 234bool sbreserve_locked(struct socket *so, sb_which which, u_long cc, 235 struct thread *td); 236bool sbreserve_locked_limit(struct socket *so, sb_which which, u_long cc, 237 u_long buf_max, struct thread *td); 238void sbsndptr_adv(struct sockbuf *sb, struct mbuf *mb, u_int len); 239struct mbuf * 240 sbsndptr_noadv(struct sockbuf *sb, u_int off, u_int *moff); 241struct mbuf * 242 sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff); 243int sbwait(struct socket *, sb_which); 244void sballoc(struct sockbuf *, struct mbuf *); 245void sbfree(struct sockbuf *, struct mbuf *); 246void sballoc_ktls_rx(struct sockbuf *sb, struct mbuf *m); 247void sbfree_ktls_rx(struct sockbuf *sb, struct mbuf *m); 248int sbready(struct sockbuf *, struct mbuf *, int); 249 250/* 251 * Return how much data is available to be taken out of socket 252 * buffer right now. 253 */ 254static inline u_int 255sbavail(struct sockbuf *sb) 256{ 257 258#if 0 259 SOCKBUF_LOCK_ASSERT(sb); 260#endif 261 return (sb->sb_acc); 262} 263 264/* 265 * Return how much data sits there in the socket buffer 266 * It might be that some data is not yet ready to be read. 267 */ 268static inline u_int 269sbused(struct sockbuf *sb) 270{ 271 272#if 0 273 SOCKBUF_LOCK_ASSERT(sb); 274#endif 275 return (sb->sb_ccc); 276} 277 278/* 279 * How much space is there in a socket buffer (so->so_snd or so->so_rcv)? 280 * This is problematical if the fields are unsigned, as the space might 281 * still be negative (ccc > hiwat or mbcnt > mbmax). 282 */ 283static inline long 284sbspace(struct sockbuf *sb) 285{ 286 int bleft, mleft; /* size should match sockbuf fields */ 287 288#if 0 289 SOCKBUF_LOCK_ASSERT(sb); 290#endif 291 292 if (sb->sb_flags & SB_STOP) 293 return(0); 294 295 bleft = sb->sb_hiwat - sb->sb_ccc; 296 mleft = sb->sb_mbmax - sb->sb_mbcnt; 297 298 return ((bleft < mleft) ? bleft : mleft); 299} 300 301#define SB_EMPTY_FIXUP(sb) do { \ 302 if ((sb)->sb_mb == NULL) { \ 303 (sb)->sb_mbtail = NULL; \ 304 (sb)->sb_lastrecord = NULL; \ 305 } \ 306} while (/*CONSTCOND*/0) 307 308#ifdef SOCKBUF_DEBUG 309void sblastrecordchk(struct sockbuf *, const char *, int); 310void sblastmbufchk(struct sockbuf *, const char *, int); 311void sbcheck(struct sockbuf *, const char *, int); 312#define SBLASTRECORDCHK(sb) sblastrecordchk((sb), __FILE__, __LINE__) 313#define SBLASTMBUFCHK(sb) sblastmbufchk((sb), __FILE__, __LINE__) 314#define SBCHECK(sb) sbcheck((sb), __FILE__, __LINE__) 315#else 316#define SBLASTRECORDCHK(sb) do {} while (0) 317#define SBLASTMBUFCHK(sb) do {} while (0) 318#define SBCHECK(sb) do {} while (0) 319#endif /* SOCKBUF_DEBUG */ 320 321#endif /* _KERNEL */ 322 323#endif /* _SYS_SOCKBUF_H_ */ 324