1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1982, 1986, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32#ifndef _NETINET_TCP_H_
33#define _NETINET_TCP_H_
34
35#include <sys/types.h>
36
37#if __BSD_VISIBLE
38
39typedef	u_int32_t tcp_seq;
40
41#define tcp6_seq	tcp_seq	/* for KAME src sync over BSD*'s */
42#define tcp6hdr		tcphdr	/* for KAME src sync over BSD*'s */
43
44/*
45 * TCP header.
46 * Per RFC 793, September, 1981.
47 */
48struct tcphdr {
49	u_short	th_sport;		/* source port */
50	u_short	th_dport;		/* destination port */
51	tcp_seq	th_seq;			/* sequence number */
52	tcp_seq	th_ack;			/* acknowledgement number */
53#if BYTE_ORDER == LITTLE_ENDIAN
54	u_char	th_x2:4,		/* upper 4 (reserved) flags */
55		th_off:4;		/* data offset */
56#endif
57#if BYTE_ORDER == BIG_ENDIAN
58	u_char	th_off:4,		/* data offset */
59		th_x2:4;		/* upper 4 (reserved) flags */
60#endif
61	u_char	th_flags;
62#define	TH_FIN	0x01
63#define	TH_SYN	0x02
64#define	TH_RST	0x04
65#define	TH_PUSH	0x08
66#define	TH_ACK	0x10
67#define	TH_URG	0x20
68#define	TH_ECE	0x40
69#define	TH_CWR	0x80
70#define	TH_AE	0x100			/* maps into th_x2 */
71#define	TH_RES3	0x200
72#define	TH_RES2	0x400
73#define	TH_RES1	0x800
74#define	TH_FLAGS	(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG|TH_ECE|TH_CWR)
75#define	PRINT_TH_FLAGS	"\20\1FIN\2SYN\3RST\4PUSH\5ACK\6URG\7ECE\10CWR\11AE"
76
77	u_short	th_win;			/* window */
78	u_short	th_sum;			/* checksum */
79	u_short	th_urp;			/* urgent pointer */
80};
81
82static __inline uint16_t
83__tcp_get_flags(const struct tcphdr *th)
84{
85	return (((uint16_t)th->th_x2 << 8) | th->th_flags);
86}
87
88static __inline void
89__tcp_set_flags(struct tcphdr *th, uint16_t flags)
90{
91	th->th_x2 = (flags >> 8) & 0x0f;
92	th->th_flags = flags & 0xff;
93}
94
95#ifdef _KERNEL
96#define tcp_get_flags(th) __tcp_get_flags(th)
97#define tcp_set_flags(th, flags) __tcp_set_flags(th, flags)
98#endif
99
100#define	PADTCPOLEN(len)		((((len) / 4) + !!((len) % 4)) * 4)
101
102#define	TCPOPT_EOL		0
103#define	   TCPOLEN_EOL			1
104#define	TCPOPT_PAD		0		/* padding after EOL */
105#define	   TCPOLEN_PAD			1
106#define	TCPOPT_NOP		1
107#define	   TCPOLEN_NOP			1
108#define	TCPOPT_MAXSEG		2
109#define    TCPOLEN_MAXSEG		4
110#define TCPOPT_WINDOW		3
111#define    TCPOLEN_WINDOW		3
112#define TCPOPT_SACK_PERMITTED	4
113#define    TCPOLEN_SACK_PERMITTED	2
114#define TCPOPT_SACK		5
115#define	   TCPOLEN_SACKHDR		2
116#define    TCPOLEN_SACK			8	/* 2*sizeof(tcp_seq) */
117#define TCPOPT_TIMESTAMP	8
118#define    TCPOLEN_TIMESTAMP		10
119#define    TCPOLEN_TSTAMP_APPA		(TCPOLEN_TIMESTAMP+2) /* appendix A */
120#define	TCPOPT_SIGNATURE	19		/* Keyed MD5: RFC 2385 */
121#define	   TCPOLEN_SIGNATURE		18
122#define	TCPOPT_FAST_OPEN	34
123#define	   TCPOLEN_FAST_OPEN_EMPTY	2
124
125#define	MAX_TCPOPTLEN		40	/* Absolute maximum TCP options len */
126
127/* Miscellaneous constants */
128#define	MAX_SACK_BLKS	6	/* Max # SACK blocks stored at receiver side */
129#define	TCP_MAX_SACK	4	/* MAX # SACKs sent in any segment */
130
131/*
132 * The default maximum segment size (MSS) to be used for new TCP connections
133 * when path MTU discovery is not enabled.
134 *
135 * RFC879 derives the default MSS from the largest datagram size hosts are
136 * minimally required to handle directly or through IP reassembly minus the
137 * size of the IP and TCP header.  With IPv6 the minimum MTU is specified
138 * in RFC2460.
139 *
140 * For IPv4 the MSS is 576 - sizeof(struct tcpiphdr)
141 * For IPv6 the MSS is IPV6_MMTU - sizeof(struct ip6_hdr) - sizeof(struct tcphdr)
142 *
143 * We use explicit numerical definition here to avoid header pollution.
144 */
145#define	TCP_MSS		536
146#define	TCP6_MSS	1220
147
148/*
149 * Limit the lowest MSS we accept for path MTU discovery and the TCP SYN MSS
150 * option.  Allowing low values of MSS can consume significant resources and
151 * be used to mount a resource exhaustion attack.
152 * Connections requesting lower MSS values will be rounded up to this value
153 * and the IP_DF flag will be cleared to allow fragmentation along the path.
154 *
155 * See tcp_subr.c tcp_minmss SYSCTL declaration for more comments.  Setting
156 * it to "0" disables the minmss check.
157 *
158 * The default value is fine for TCP across the Internet's smallest official
159 * link MTU (256 bytes for AX.25 packet radio).  However, a connection is very
160 * unlikely to come across such low MTU interfaces these days (anno domini 2003).
161 */
162#define	TCP_MINMSS 216
163
164#define	TCP_MAXWIN	65535	/* largest value for (unscaled) window */
165#define	TTCP_CLIENT_SND_WND	4096	/* dflt send window for T/TCP client */
166
167#define TCP_MAX_WINSHIFT	14	/* maximum window shift */
168
169#define TCP_MAXBURST		4	/* maximum segments in a burst */
170
171#define TCP_MAXHLEN	(0xf<<2)	/* max length of header in bytes */
172#define TCP_MAXOLEN	(TCP_MAXHLEN - sizeof(struct tcphdr))
173					/* max space left for options */
174
175#define TCP_FASTOPEN_MIN_COOKIE_LEN	4	/* Per RFC7413 */
176#define TCP_FASTOPEN_MAX_COOKIE_LEN	16	/* Per RFC7413 */
177#define TCP_FASTOPEN_PSK_LEN		16	/* Same as TCP_FASTOPEN_KEY_LEN */
178#endif /* __BSD_VISIBLE */
179
180/*
181 * User-settable options (used with setsockopt).  These are discrete
182 * values and are not masked together.  Some values appear to be
183 * bitmasks for historical reasons.
184 */
185#define	TCP_NODELAY	1	/* don't delay send to coalesce packets */
186#if __BSD_VISIBLE
187#define	TCP_MAXSEG	2	/* set maximum segment size */
188#define TCP_NOPUSH	4	/* don't push last block of write */
189#define TCP_NOOPT	8	/* don't use TCP options */
190#define TCP_MD5SIG	16	/* use MD5 digests (RFC2385) */
191#define	TCP_INFO	32	/* retrieve tcp_info structure */
192#define	TCP_STATS	33	/* retrieve stats blob structure */
193#define	TCP_LOG		34	/* configure event logging for connection */
194#define	TCP_LOGBUF	35	/* retrieve event log for connection */
195#define	TCP_LOGID	36	/* configure log ID to correlate connections */
196#define	TCP_LOGDUMP	37	/* dump connection log events to device */
197#define	TCP_LOGDUMPID	38	/* dump events from connections with same ID to
198				   device */
199#define	TCP_TXTLS_ENABLE 39	/* TLS framing and encryption for transmit */
200#define	TCP_TXTLS_MODE	40	/* Transmit TLS mode */
201#define	TCP_RXTLS_ENABLE 41	/* TLS framing and encryption for receive */
202#define	TCP_RXTLS_MODE	42	/* Receive TLS mode */
203#define	TCP_IWND_NB	43	/* Override initial window (units: bytes) */
204#define	TCP_IWND_NSEG	44	/* Override initial window (units: MSS segs) */
205#ifdef _KERNEL
206#define	TCP_USE_DDP	45	/* Use direct data placement for so_rcvbuf */
207#endif
208#define	TCP_LOGID_CNT	46	/* get number of connections with the same ID */
209#define	TCP_LOG_TAG	47	/* configure tag for grouping logs */
210#define	TCP_USER_LOG	48	/* userspace log event */
211#define	TCP_CONGESTION	64	/* get/set congestion control algorithm */
212#define	TCP_CCALGOOPT	65	/* get/set cc algorithm specific options */
213#define	TCP_MAXUNACKTIME 68	/* maximum time without making progress (sec) */
214#define	TCP_MAXPEAKRATE 69	/* maximum peak rate allowed (kbps) */
215#define TCP_IDLE_REDUCE 70	/* Reduce cwnd on idle input */
216#define TCP_REMOTE_UDP_ENCAPS_PORT 71	/* Enable TCP over UDP tunneling via the specified port */
217#define TCP_DELACK  	72	/* socket option for delayed ack */
218#define TCP_FIN_IS_RST 73	/* A fin from the peer is treated has a RST */
219#define TCP_LOG_LIMIT  74	/* Limit to number of records in tcp-log */
220#define TCP_SHARED_CWND_ALLOWED 75 	/* Use of a shared cwnd is allowed */
221#define TCP_PROC_ACCOUNTING 76	/* Do accounting on tcp cpu usage and counts */
222#define TCP_USE_CMP_ACKS 77 	/* The transport can handle the Compressed mbuf acks */
223#define	TCP_PERF_INFO	78	/* retrieve accounting counters */
224#define	TCP_KEEPINIT	128	/* N, time to establish connection */
225#define	TCP_KEEPIDLE	256	/* L,N,X start keeplives after this period */
226#define	TCP_KEEPINTVL	512	/* L,N interval between keepalives */
227#define	TCP_KEEPCNT	1024	/* L,N number of keepalives before close */
228#define	TCP_FASTOPEN	1025	/* enable TFO / was created via TFO */
229#define	TCP_PCAP_OUT	2048	/* number of output packets to keep */
230#define	TCP_PCAP_IN	4096	/* number of input packets to keep */
231#define TCP_FUNCTION_BLK 8192	/* Set the tcp function pointers to the specified stack */
232#define TCP_FUNCTION_ALIAS 8193	/* Get the current tcp function pointer name alias */
233/* Options for Rack and BBR */
234#define	TCP_REUSPORT_LB_NUMA   1026	/* set listen socket numa domain */
235#define TCP_RACK_MBUF_QUEUE   1050 /* Do we allow mbuf queuing if supported */
236#define TCP_RACK_PROP	      1051 /* Not used */
237#define TCP_RACK_TLP_REDUCE   1052 /* RACK TLP cwnd reduction (bool) */
238#define TCP_RACK_PACE_REDUCE  1053 /* RACK Pacingv reduction factor (divisor) */
239#define TCP_RACK_PACE_MAX_SEG 1054 /* Max TSO size we will send  */
240#define TCP_RACK_PACE_ALWAYS  1055 /* Use the always pace method */
241#define TCP_RACK_PROP_RATE    1056 /* Not used */
242#define TCP_RACK_PRR_SENDALOT 1057 /* Allow PRR to send more than one seg */
243#define TCP_RACK_MIN_TO       1058 /* Minimum time between rack t-o's in ms */
244#define TCP_RACK_EARLY_RECOV  1059 /* Not used */
245#define TCP_RACK_EARLY_SEG    1060 /* If early recovery max segments */
246#define TCP_RACK_REORD_THRESH 1061 /* RACK reorder threshold (shift amount) */
247#define TCP_RACK_REORD_FADE   1062 /* Does reordering fade after ms time */
248#define TCP_RACK_TLP_THRESH   1063 /* RACK TLP theshold i.e. srtt+(srtt/N) */
249#define TCP_RACK_PKT_DELAY    1064 /* RACK added ms i.e. rack-rtt + reord + N */
250#define TCP_RACK_TLP_INC_VAR  1065 /* Does TLP include rtt variance in t-o */
251#define TCP_BBR_IWINTSO	      1067 /* Initial TSO window for BBRs first sends */
252#define TCP_BBR_RECFORCE      1068 /* Enter recovery force out a segment disregard pacer no longer valid */
253#define TCP_BBR_STARTUP_PG    1069 /* Startup pacing gain */
254#define TCP_BBR_DRAIN_PG      1070 /* Drain pacing gain */
255#define TCP_BBR_RWND_IS_APP   1071 /* Rwnd limited is considered app limited */
256#define TCP_BBR_PROBE_RTT_INT 1072 /* How long in useconds between probe-rtt */
257#define TCP_BBR_ONE_RETRAN    1073 /* Is only one segment allowed out during retran */
258#define TCP_BBR_STARTUP_LOSS_EXIT 1074	/* Do we exit a loss during startup if not 20% incr */
259#define TCP_BBR_USE_LOWGAIN   1075 /* lower the gain in PROBE_BW enable */
260#define TCP_BBR_LOWGAIN_THRESH 1076 /* Unused after 2.3 morphs to TSLIMITS >= 2.3 */
261#define TCP_BBR_TSLIMITS 1076	   /* Do we use experimental Timestamp limiting for our algo */
262#define TCP_BBR_LOWGAIN_HALF  1077 /* Unused after 2.3 */
263#define TCP_BBR_PACE_OH        1077 /* Reused in 4.2 for pacing overhead setting */
264#define TCP_BBR_LOWGAIN_FD    1078 /* Unused after 2.3 */
265#define TCP_BBR_HOLD_TARGET 1078	/* For 4.3 on */
266#define TCP_BBR_USEDEL_RATE   1079 /* Enable use of delivery rate for loss recovery */
267#define TCP_BBR_MIN_RTO       1080 /* Min RTO in milliseconds */
268#define TCP_BBR_MAX_RTO	      1081 /* Max RTO in milliseconds */
269#define TCP_BBR_REC_OVER_HPTS 1082 /* Recovery override htps settings 0/1/3 */
270#define TCP_BBR_UNLIMITED     1083 /* Not used before 2.3 and morphs to algorithm >= 2.3 */
271#define TCP_BBR_ALGORITHM     1083 /* What measurement algo does BBR use netflix=0, google=1 */
272#define TCP_BBR_DRAIN_INC_EXTRA 1084 /* Does the 3/4 drain target include the extra gain */
273#define TCP_BBR_STARTUP_EXIT_EPOCH 1085 /* what epoch gets us out of startup */
274#define TCP_BBR_PACE_PER_SEC   1086
275#define TCP_BBR_PACE_DEL_TAR   1087
276#define TCP_BBR_PACE_SEG_MAX   1088
277#define TCP_BBR_PACE_SEG_MIN   1089
278#define TCP_BBR_PACE_CROSS     1090
279#define TCP_RACK_IDLE_REDUCE_HIGH 1092  /* Reduce the highest cwnd seen to IW on idle */
280#define TCP_RACK_MIN_PACE      1093 	/* Do we enforce rack min pace time */
281#define TCP_RACK_MIN_PACE_SEG  1094	/* If so what is the seg threshould */
282#define TCP_RACK_GP_INCREASE   1094	/* After 4.1 its the GP increase in older rack */
283#define TCP_RACK_TLP_USE       1095
284#define TCP_BBR_ACK_COMP_ALG   1096 	/* Not used */
285#define TCP_BBR_TMR_PACE_OH    1096	/* Recycled in 4.2 */
286#define TCP_BBR_EXTRA_GAIN     1097
287#define TCP_RACK_DO_DETECTION  1097	/* Recycle of extra gain for rack, attack detection */
288#define TCP_BBR_RACK_RTT_USE   1098	/* what RTT should we use 0, 1, or 2? */
289#define TCP_BBR_RETRAN_WTSO    1099
290#define TCP_DATA_AFTER_CLOSE   1100
291#define TCP_BBR_PROBE_RTT_GAIN 1101
292#define TCP_BBR_PROBE_RTT_LEN  1102
293#define TCP_BBR_SEND_IWND_IN_TSO 1103	/* Do we burst out whole iwin size chunks at start? */
294#define TCP_BBR_USE_RACK_RR	 1104	/* Do we use the rack rapid recovery for pacing rxt's */
295#define TCP_BBR_USE_RACK_CHEAT TCP_BBR_USE_RACK_RR /* Compat. */
296#define TCP_BBR_HDWR_PACE      1105	/* Enable/disable hardware pacing */
297#define TCP_BBR_UTTER_MAX_TSO  1106	/* Do we enforce an utter max TSO size */
298#define TCP_BBR_EXTRA_STATE    1107	/* Special exit-persist catch up */
299#define TCP_BBR_FLOOR_MIN_TSO  1108     /* The min tso size */
300#define TCP_BBR_MIN_TOPACEOUT  1109	/* Do we suspend pacing until */
301#define TCP_BBR_TSTMP_RAISES   1110	/* Can a timestamp measurement raise the b/w */
302#define TCP_BBR_POLICER_DETECT 1111	/* Turn on/off google mode policer detection */
303#define TCP_BBR_RACK_INIT_RATE 1112	/* Set an initial pacing rate for when we have no b/w in kbits per sec */
304#define TCP_RACK_RR_CONF	1113 /* Rack rapid recovery configuration control*/
305#define TCP_RACK_CHEAT_NOT_CONF_RATE TCP_RACK_RR_CONF
306#define TCP_RACK_GP_INCREASE_CA   1114	/* GP increase for Congestion Avoidance */
307#define TCP_RACK_GP_INCREASE_SS   1115	/* GP increase for Slow Start */
308#define TCP_RACK_GP_INCREASE_REC  1116	/* GP increase for Recovery */
309#define TCP_RACK_FORCE_MSEG	1117	/* Override to use the user set max-seg value */
310#define TCP_RACK_PACE_RATE_CA  1118 /* Pacing rate for Congestion Avoidance */
311#define TCP_RACK_PACE_RATE_SS  1119 /* Pacing rate for Slow Start */
312#define TCP_RACK_PACE_RATE_REC  1120 /* Pacing rate for Recovery */
313#define TCP_NO_PRR         	1122 /* If pacing, don't use prr  */
314#define TCP_RACK_NONRXT_CFG_RATE 1123 /* In recovery does a non-rxt use the cfg rate */
315#define TCP_SHARED_CWND_ENABLE   1124 	/* Use a shared cwnd if allowed */
316#define TCP_TIMELY_DYN_ADJ       1125 /* Do we attempt dynamic multipler adjustment with timely. */
317#define TCP_RACK_NO_PUSH_AT_MAX 1126 /* For timely do not push if we are over max rtt */
318#define TCP_RACK_PACE_TO_FILL 1127 /* If we are not in recovery, always pace to fill the cwnd in 1 RTT */
319#define TCP_SHARED_CWND_TIME_LIMIT 1128 /* we should limit to low time values the scwnd life */
320#define TCP_RACK_PROFILE 1129	/* Select a profile that sets multiple options */
321#define TCP_HDWR_RATE_CAP 1130 /* Allow hardware rates to cap pacing rate */
322#define TCP_PACING_RATE_CAP 1131 /* Highest rate allowed in pacing in bytes per second (uint64_t) */
323#define TCP_HDWR_UP_ONLY 1132	/* Allow the pacing rate to climb but not descend (with the exception of fill-cw */
324#define TCP_RACK_ABC_VAL 1133	/* Set a local ABC value different then the system default */
325#define TCP_REC_ABC_VAL 1134	/* Do we use the ABC value for recovery or the override one from sysctl  */
326#define TCP_RACK_MEASURE_CNT 1135 /* How many measurements are required in GP pacing */
327#define TCP_DEFER_OPTIONS 1136 /* Defer options until the proper number of measurements occur, does not defer TCP_RACK_MEASURE_CNT */
328#define TCP_FAST_RSM_HACK 1137	/* Not used in modern stacks */
329#define TCP_RACK_PACING_BETA 1138	/* Changing the beta for pacing */
330#define TCP_RACK_PACING_BETA_ECN 1139	/* Changing the beta for ecn with pacing */
331#define TCP_RACK_TIMER_SLOP 1140	/* Set or get the timer slop used */
332#define TCP_RACK_DSACK_OPT 1141		/* How do we setup rack timer DSACK options bit 1/2 */
333#define TCP_RACK_ENABLE_HYSTART 1142	/* Do we allow hystart in the CC modules */
334#define TCP_RACK_SET_RXT_OPTIONS 1143	/* Set the bits in the retransmit options */
335#define TCP_RACK_HI_BETA 1144 /* Turn on/off high beta */
336#define TCP_RACK_SPLIT_LIMIT 1145	/* Set a split limit for split allocations */
337#define TCP_RACK_PACING_DIVISOR 1146 /* Pacing divisor given to rate-limit code for burst sizing */
338#define TCP_RACK_PACE_MIN_SEG 1147	/* Pacing min seg size rack will use */
339#define TCP_RACK_DGP_IN_REC 1148	/* Do we use full DGP in recovery? */
340#define TCP_POLICER_DETECT 1149 	/* Do we apply a thresholds to rack to detect and compensate for policers? */
341#define TCP_RXT_CLAMP TCP_POLICER_DETECT
342#define TCP_HYBRID_PACING   1150	/* Hybrid pacing enablement */
343#define TCP_PACING_DND	    1151	/* When pacing with rr_config=3 can sacks disturb us */
344#define TCP_SS_EEXIT        1152	/* Do we do early exit from slowtart if no  b/w growth */
345#define TCP_DGP_UPPER_BOUNDS 1153	/* SS and CA upper bound in percentage */
346#define TCP_NO_TIMELY	    1154	/* Disable/enable Timely */
347#define TCP_HONOR_HPTS_MIN  1155	/* Do we honor hpts min to */
348#define TCP_REC_IS_DYN      1156	/* Do we allow timely to change recovery multiplier? */
349#define TCP_SIDECHAN_DIS    1157	/* Disable/enable the side-channel */
350#define TCP_FILLCW_RATE_CAP 1158	/* Set a cap for DGP's fillcw */
351#define TCP_POLICER_MSS     1159	/* Policer MSS requirement */
352#define TCP_STACK_SPEC_INFO 1160	/* Get stack specific information (if present) */
353#define RACK_CSPR_IS_FCC    1161
354#define TCP_GP_USE_LTBW     1162	/* how we use lt_bw 0=not, 1=min, 2=max */
355
356
357/* Start of reserved space for third-party user-settable options. */
358#define	TCP_VENDOR	SO_VENDOR
359
360#define	TCP_CA_NAME_MAX	16	/* max congestion control name length */
361
362#define	TCPI_OPT_TIMESTAMPS	0x01
363#define	TCPI_OPT_SACK		0x02
364#define	TCPI_OPT_WSCALE		0x04
365#define	TCPI_OPT_ECN		0x08
366#define	TCPI_OPT_TOE		0x10
367#define	TCPI_OPT_TFO		0x20
368#define	TCPI_OPT_ACE		0x40
369
370/* Maximum length of log ID. */
371#define TCP_LOG_ID_LEN	64
372
373/* TCP accounting counters */
374#define TCP_NUM_PROC_COUNTERS 11
375#define TCP_NUM_CNT_COUNTERS 13
376
377/* Must match counter array sizes in tcpcb */
378struct tcp_perf_info {
379	uint64_t	tcp_cnt_counters[TCP_NUM_CNT_COUNTERS];
380	uint64_t	tcp_proc_time[TCP_NUM_CNT_COUNTERS];
381	uint64_t	timebase;	/* timebase for tcp_proc_time */
382	uint8_t		tb_is_stable;	/* timebase is stable/invariant */
383};
384
385/*
386 * The TCP_INFO socket option comes from the Linux 2.6 TCP API, and permits
387 * the caller to query certain information about the state of a TCP
388 * connection.  We provide an overlapping set of fields with the Linux
389 * implementation, but since this is a fixed size structure, room has been
390 * left for growth.  In order to maximize potential future compatibility with
391 * the Linux API, the same variable names and order have been adopted, and
392 * padding left to make room for omitted fields in case they are added later.
393 *
394 * XXX: This is currently an unstable ABI/API, in that it is expected to
395 * change.
396 */
397struct tcp_info {
398	u_int8_t	tcpi_state;		/* TCP FSM state. */
399	u_int8_t	__tcpi_ca_state;
400	u_int8_t	__tcpi_retransmits;
401	u_int8_t	__tcpi_probes;
402	u_int8_t	__tcpi_backoff;
403	u_int8_t	tcpi_options;		/* Options enabled on conn. */
404	u_int8_t	tcpi_snd_wscale:4,	/* RFC1323 send shift value. */
405			tcpi_rcv_wscale:4;	/* RFC1323 recv shift value. */
406
407	u_int32_t	tcpi_rto;		/* Retransmission timeout (usec). */
408	u_int32_t	__tcpi_ato;
409	u_int32_t	tcpi_snd_mss;		/* Max segment size for send. */
410	u_int32_t	tcpi_rcv_mss;		/* Max segment size for receive. */
411
412	u_int32_t	__tcpi_unacked;
413	u_int32_t	__tcpi_sacked;
414	u_int32_t	__tcpi_lost;
415	u_int32_t	__tcpi_retrans;
416	u_int32_t	__tcpi_fackets;
417
418	/* Times; measurements in usecs. */
419	u_int32_t	__tcpi_last_data_sent;
420	u_int32_t	__tcpi_last_ack_sent;	/* Also unimpl. on Linux? */
421	u_int32_t	tcpi_last_data_recv;	/* Time since last recv data. */
422	u_int32_t	__tcpi_last_ack_recv;
423
424	/* Metrics; variable units. */
425	u_int32_t	__tcpi_pmtu;
426	u_int32_t	__tcpi_rcv_ssthresh;
427	u_int32_t	tcpi_rtt;		/* Smoothed RTT in usecs. */
428	u_int32_t	tcpi_rttvar;		/* RTT variance in usecs. */
429	u_int32_t	tcpi_snd_ssthresh;	/* Slow start threshold. */
430	u_int32_t	tcpi_snd_cwnd;		/* Send congestion window. */
431	u_int32_t	__tcpi_advmss;
432	u_int32_t	__tcpi_reordering;
433
434	u_int32_t	__tcpi_rcv_rtt;
435	u_int32_t	tcpi_rcv_space;		/* Advertised recv window. */
436
437	/* FreeBSD extensions to tcp_info. */
438	u_int32_t	tcpi_snd_wnd;		/* Advertised send window. */
439	u_int32_t	tcpi_snd_bwnd;		/* No longer used. */
440	u_int32_t	tcpi_snd_nxt;		/* Next egress seqno */
441	u_int32_t	tcpi_rcv_nxt;		/* Next ingress seqno */
442	u_int32_t	tcpi_toe_tid;		/* HWTID for TOE endpoints */
443	u_int32_t	tcpi_snd_rexmitpack;	/* Retransmitted packets */
444	u_int32_t	tcpi_rcv_ooopack;	/* Out-of-order packets */
445	u_int32_t	tcpi_snd_zerowin;	/* Zero-sized windows sent */
446
447	/* Accurate ECN counters. */
448	u_int32_t	tcpi_delivered_ce;
449	u_int32_t	tcpi_received_ce;		/* # of CE marks received */
450	u_int32_t	__tcpi_delivered_e1_bytes;
451	u_int32_t	__tcpi_delivered_e0_bytes;
452	u_int32_t	__tcpi_delivered_ce_bytes;
453	u_int32_t	__tcpi_received_e1_bytes;
454	u_int32_t	__tcpi_received_e0_bytes;
455	u_int32_t	__tcpi_received_ce_bytes;
456
457	u_int32_t	tcpi_total_tlp;		/* tail loss probes sent */
458	u_int64_t	tcpi_total_tlp_bytes;	/* tail loss probe bytes sent */
459
460	u_int32_t	tcpi_snd_una;		/* Unacked seqno sent */
461	u_int32_t	tcpi_snd_max;		/* Highest seqno sent */
462	u_int32_t	tcpi_rcv_numsacks;	/* Distinct SACK blks present */
463	u_int32_t	tcpi_rcv_adv;		/* Peer advertised window */
464	u_int32_t	tcpi_dupacks;		/* Consecutive dup ACKs recvd */
465
466	u_int32_t	tcpi_rttmin;		/* Min observed RTT */
467	/* Padding to grow without breaking ABI. */
468	u_int32_t	__tcpi_pad[14];		/* Padding. */
469};
470
471/*
472 * If this structure is provided when setting the TCP_FASTOPEN socket
473 * option, and the enable member is non-zero, a subsequent connect will use
474 * pre-shared key (PSK) mode using the provided key.
475 */
476struct tcp_fastopen {
477	int enable;
478	uint8_t psk[TCP_FASTOPEN_PSK_LEN];
479};
480
481#define TCP_FUNCTION_NAME_LEN_MAX 32
482
483struct stack_specific_info {
484	char stack_name[TCP_FUNCTION_NAME_LEN_MAX];
485	uint64_t policer_last_bw;	/* Only valid if detection enabled and policer detected */
486	uint64_t bytes_transmitted;
487	uint64_t bytes_retransmitted;
488	uint32_t policer_detection_enabled: 1,
489		 policer_detected : 1,  /* transport thinks a policer is on path */
490		 highly_buffered : 1,	/* transport considers the path highly buffered */
491		 spare : 29;
492	uint32_t policer_bucket_size;	/* Only valid if detection enabled and policer detected */
493	uint32_t current_round;
494	uint32_t _rack_i_pad[18];
495};
496
497struct tcp_function_set {
498	char function_set_name[TCP_FUNCTION_NAME_LEN_MAX];
499	uint32_t pcbcnt;
500};
501
502/* TLS modes for TCP_TXTLS_MODE */
503#define	TCP_TLS_MODE_NONE	0
504#define	TCP_TLS_MODE_SW		1
505#define	TCP_TLS_MODE_IFNET	2
506#define	TCP_TLS_MODE_TOE	3
507
508/*
509 * TCP Control message types
510 */
511#define	TLS_SET_RECORD_TYPE	1
512#define	TLS_GET_RECORD		2
513
514/*
515 * TCP log user opaque
516 */
517struct tcp_snd_req {
518	uint64_t timestamp;
519	uint64_t start;
520	uint64_t end;
521	uint32_t flags;
522	uint32_t playout_ms;
523};
524
525union tcp_log_userdata {
526	struct tcp_snd_req tcp_req;
527};
528
529struct tcp_log_user {
530	uint32_t type;
531	uint32_t subtype;
532	union tcp_log_userdata data;
533};
534
535/* user types, i.e. apps */
536#define TCP_LOG_USER_HTTPD	1
537
538/* user subtypes */
539#define TCP_LOG_HTTPD_TS	1	/* client timestamp */
540#define TCP_LOG_HTTPD_TS_REQ	2	/* client timestamp and request info */
541
542/* HTTPD REQ flags */
543#define TCP_LOG_HTTPD_RANGE_START	0x0001
544#define TCP_LOG_HTTPD_RANGE_END		0x0002
545
546/* Flags for hybrid pacing */
547#define TCP_HYBRID_PACING_CU		0x0001		/* Enable catch-up mode */
548#define TCP_HYBRID_PACING_DTL		0x0002		/* Enable Detailed logging */
549#define TCP_HYBRID_PACING_CSPR		0x0004		/* A client suggested rate is present  */
550#define TCP_HYBRID_PACING_H_MS		0x0008		/* A client hint for maxseg is present  */
551#define TCP_HYBRID_PACING_ENABLE	0x0010		/* We are enabling hybrid pacing else disable */
552#define TCP_HYBRID_PACING_S_MSS		0x0020		/* Clent wants us to set the mss overriding gp est in CU */
553#define TCP_HAS_PLAYOUT_MS		0x0040		/* The client included the chunk playout milliseconds: deprecate */
554/* the below are internal only flags */
555#define TCP_HYBRID_PACING_USER_MASK	0x0FFF		/* Non-internal flags mask */
556#define TCP_HYBRID_PACING_SETMSS	0x1000		/* Internal flag that tells us we set the mss on this entry */
557#define TCP_HYBRID_PACING_WASSET	0x2000		/* We init to this to know if a hybrid command was issued */
558#define TCP_HYBRID_PACING_SENDTIME	0x4000		/* Duplicate tm to last, use sendtime for catch up mode */
559
560struct tcp_hybrid_req {
561	struct tcp_snd_req req;
562	uint64_t cspr;
563	uint32_t hint_maxseg;
564	uint32_t hybrid_flags;
565};
566
567/*
568 * TCP specific variables of interest for tp->t_stats stats(9) accounting.
569 */
570#define	VOI_TCP_TXPB		0 /* Transmit payload bytes */
571#define	VOI_TCP_RETXPB		1 /* Retransmit payload bytes */
572#define	VOI_TCP_FRWIN		2 /* Foreign receive window */
573#define	VOI_TCP_LCWIN		3 /* Local congesiton window */
574#define	VOI_TCP_RTT		4 /* Round trip time */
575#define	VOI_TCP_CSIG		5 /* Congestion signal */
576#define	VOI_TCP_GPUT		6 /* Goodput */
577#define	VOI_TCP_CALCFRWINDIFF	7 /* Congestion avoidance LCWIN - FRWIN */
578#define	VOI_TCP_GPUT_ND		8 /* Goodput normalised delta */
579#define	VOI_TCP_ACKLEN		9 /* Average ACKed bytes per ACK */
580#define VOI_TCP_PATHRTT		10 /* The path RTT based on ACK arrival */
581
582#define TCP_REUSPORT_LB_NUMA_NODOM	(-2) /* remove numa binding */
583#define TCP_REUSPORT_LB_NUMA_CURDOM	(-1) /* bind to current domain */
584
585#endif /* __BSD_VISIBLE */
586#endif /* !_NETINET_TCP_H_ */
587