1/*-
2 * Copyright (c) 2015-2017 Patrick Kelsey
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27/*
28 * This is an implementation of TCP Fast Open (TFO) [RFC7413]. To include
29 * this code, add the following line to your kernel config:
30 *
31 * options TCP_RFC7413
32 *
33 *
34 * The generated TFO cookies are the 64-bit output of
35 * SipHash24(key=<16-byte-key>, msg=<client-ip>).  Multiple concurrent valid
36 * keys are supported so that time-based rolling cookie invalidation
37 * policies can be implemented in the system.  The default number of
38 * concurrent keys is 2.  This can be adjusted in the kernel config as
39 * follows:
40 *
41 * options TCP_RFC7413_MAX_KEYS=<num-keys>
42 *
43 *
44 * In addition to the facilities defined in RFC7413, this implementation
45 * supports a pre-shared key (PSK) mode of operation in which the TFO server
46 * requires the client to be in posession of a shared secret in order for
47 * the client to be able to successfully open TFO connections with the
48 * server.  This is useful, for example, in environments where TFO servers
49 * are exposed to both internal and external clients and only wish to allow
50 * TFO connections from internal clients.
51 *
52 * In the PSK mode of operation, the server generates and sends TFO cookies
53 * to requesting clients as usual.  However, when validating cookies
54 * received in TFO SYNs from clients, the server requires the
55 * client-supplied cookie to equal SipHash24(key=<16-byte-psk>,
56 * msg=<cookie-sent-to-client>).
57 *
58 * Multiple concurrent valid pre-shared keys are supported so that
59 * time-based rolling PSK invalidation policies can be implemented in the
60 * system.  The default number of concurrent pre-shared keys is 2.  This can
61 * be adjusted in the kernel config as follows:
62 *
63 * options TCP_RFC7413_MAX_PSKS=<num-psks>
64 *
65 *
66 * The following TFO-specific sysctls are defined:
67 *
68 * net.inet.tcp.fastopen.acceptany (RW, default 0)
69 *     When non-zero, all client-supplied TFO cookies will be considered to
70 *     be valid.
71 *
72 * net.inet.tcp.fastopen.autokey (RW, default 120)
73 *     When this and net.inet.tcp.fastopen.server_enable are non-zero, a new
74 *     key will be automatically generated after this many seconds.
75 *
76 * net.inet.tcp.fastopen.ccache_bucket_limit
77 *                     (RWTUN, default TCP_FASTOPEN_CCACHE_BUCKET_LIMIT_DEFAULT)
78 *     The maximum number of entries in a client cookie cache bucket.
79 *
80 * net.inet.tcp.fastopen.ccache_buckets
81 *                          (RDTUN, default TCP_FASTOPEN_CCACHE_BUCKETS_DEFAULT)
82 *     The number of client cookie cache buckets.
83 *
84 * net.inet.tcp.fastopen.ccache_list (RO)
85 *     Print the client cookie cache.
86 *
87 * net.inet.tcp.fastopen.client_enable (RW, default 0)
88 *     When zero, no new active (i.e., client) TFO connections can be
89 *     created.  On the transition from enabled to disabled, the client
90 *     cookie cache is cleared and disabled.  The transition from enabled to
91 *     disabled does not affect any active TFO connections in progress; it
92 *     only prevents new ones from being made.
93 *
94 * net.inet.tcp.fastopen.keylen (RD)
95 *     The key length in bytes.
96 *
97 * net.inet.tcp.fastopen.maxkeys (RD)
98 *     The maximum number of keys supported.
99 *
100 * net.inet.tcp.fastopen.maxpsks (RD)
101 *     The maximum number of pre-shared keys supported.
102 *
103 * net.inet.tcp.fastopen.numkeys (RD)
104 *     The current number of keys installed.
105 *
106 * net.inet.tcp.fastopen.numpsks (RD)
107 *     The current number of pre-shared keys installed.
108 *
109 * net.inet.tcp.fastopen.path_disable_time
110 *                          (RW, default TCP_FASTOPEN_PATH_DISABLE_TIME_DEFAULT)
111 *     When a failure occurs while trying to create a new active (i.e.,
112 *     client) TFO connection, new active connections on the same path, as
113 *     determined by the tuple {client_ip, server_ip, server_port}, will be
114 *     forced to be non-TFO for this many seconds.  Note that the path
115 *     disable mechanism relies on state stored in client cookie cache
116 *     entries, so it is possible for the disable time for a given path to
117 *     be reduced if the corresponding client cookie cache entry is reused
118 *     due to resource pressure before the disable period has elapsed.
119 *
120 * net.inet.tcp.fastopen.psk_enable (RW, default 0)
121 *     When non-zero, pre-shared key (PSK) mode is enabled for all TFO
122 *     servers.  On the transition from enabled to disabled, all installed
123 *     pre-shared keys are removed.
124 *
125 * net.inet.tcp.fastopen.server_enable (RW, default 0)
126 *     When zero, no new passive (i.e., server) TFO connections can be
127 *     created.  On the transition from enabled to disabled, all installed
128 *     keys and pre-shared keys are removed.  On the transition from
129 *     disabled to enabled, if net.inet.tcp.fastopen.autokey is non-zero and
130 *     there are no keys installed, a new key will be generated immediately.
131 *     The transition from enabled to disabled does not affect any passive
132 *     TFO connections in progress; it only prevents new ones from being
133 *     made.
134 *
135 * net.inet.tcp.fastopen.setkey (WR)
136 *     Install a new key by writing net.inet.tcp.fastopen.keylen bytes to
137 *     this sysctl.
138 *
139 * net.inet.tcp.fastopen.setpsk (WR)
140 *     Install a new pre-shared key by writing net.inet.tcp.fastopen.keylen
141 *     bytes to this sysctl.
142 *
143 * In order for TFO connections to be created via a listen socket, that
144 * socket must have the TCP_FASTOPEN socket option set on it.  This option
145 * can be set on the socket either before or after the listen() is invoked.
146 * Clearing this option on a listen socket after it has been set has no
147 * effect on existing TFO connections or TFO connections in progress; it
148 * only prevents new TFO connections from being made.
149 *
150 * For passively-created sockets, the TCP_FASTOPEN socket option can be
151 * queried to determine whether the connection was established using TFO.
152 * Note that connections that are established via a TFO SYN, but that fall
153 * back to using a non-TFO SYN|ACK will have the TCP_FASTOPEN socket option
154 * set.
155 *
156 * Per the RFC, this implementation limits the number of TFO connections
157 * that can be in the SYN_RECEIVED state on a per listen-socket basis.
158 * Whenever this limit is exceeded, requests for new TFO connections are
159 * serviced as non-TFO requests.  Without such a limit, given a valid TFO
160 * cookie, an attacker could keep the listen queue in an overflow condition
161 * using a TFO SYN flood.  This implementation sets the limit at half the
162 * configured listen backlog.
163 *
164 */
165
166#include <sys/cdefs.h>
167__FBSDID("$FreeBSD$");
168
169#include "opt_inet.h"
170
171#include <sys/param.h>
172#include <sys/jail.h>
173#include <sys/kernel.h>
174#include <sys/hash.h>
175#include <sys/limits.h>
176#include <sys/lock.h>
177#include <sys/proc.h>
178#include <sys/rmlock.h>
179#include <sys/sbuf.h>
180#include <sys/socket.h>
181#include <sys/socketvar.h>
182#include <sys/sysctl.h>
183#include <sys/systm.h>
184
185#include <crypto/siphash/siphash.h>
186
187#include <net/vnet.h>
188
189#include <netinet/in.h>
190#include <netinet/in_pcb.h>
191#include <netinet/tcp_var.h>
192#include <netinet/tcp_fastopen.h>
193
194#define	TCP_FASTOPEN_KEY_LEN	SIPHASH_KEY_LENGTH
195
196#if TCP_FASTOPEN_PSK_LEN != TCP_FASTOPEN_KEY_LEN
197#error TCP_FASTOPEN_PSK_LEN must be equal to TCP_FASTOPEN_KEY_LEN
198#endif
199
200/*
201 * Because a PSK-mode setsockopt() uses tcpcb.t_tfo_cookie.client to hold
202 * the PSK until the connect occurs.
203 */
204#if TCP_FASTOPEN_MAX_COOKIE_LEN < TCP_FASTOPEN_PSK_LEN
205#error TCP_FASTOPEN_MAX_COOKIE_LEN must be >= TCP_FASTOPEN_PSK_LEN
206#endif
207
208#define TCP_FASTOPEN_CCACHE_BUCKET_LIMIT_DEFAULT	16
209#define TCP_FASTOPEN_CCACHE_BUCKETS_DEFAULT		2048 /* must be power of 2 */
210
211#define TCP_FASTOPEN_PATH_DISABLE_TIME_DEFAULT		900 /* seconds */
212
213#if !defined(TCP_RFC7413_MAX_KEYS) || (TCP_RFC7413_MAX_KEYS < 1)
214#define	TCP_FASTOPEN_MAX_KEYS	2
215#else
216#define	TCP_FASTOPEN_MAX_KEYS	TCP_RFC7413_MAX_KEYS
217#endif
218
219#if TCP_FASTOPEN_MAX_KEYS > 10
220#undef TCP_FASTOPEN_MAX_KEYS
221#define	TCP_FASTOPEN_MAX_KEYS	10
222#endif
223
224#if !defined(TCP_RFC7413_MAX_PSKS) || (TCP_RFC7413_MAX_PSKS < 1)
225#define	TCP_FASTOPEN_MAX_PSKS	2
226#else
227#define	TCP_FASTOPEN_MAX_PSKS	TCP_RFC7413_MAX_PSKS
228#endif
229
230#if TCP_FASTOPEN_MAX_PSKS > 10
231#undef TCP_FASTOPEN_MAX_PSKS
232#define	TCP_FASTOPEN_MAX_PSKS	10
233#endif
234
235struct tcp_fastopen_keylist {
236	unsigned int newest;
237	unsigned int newest_psk;
238	uint8_t key[TCP_FASTOPEN_MAX_KEYS][TCP_FASTOPEN_KEY_LEN];
239	uint8_t psk[TCP_FASTOPEN_MAX_PSKS][TCP_FASTOPEN_KEY_LEN];
240};
241
242struct tcp_fastopen_callout {
243	struct callout c;
244	struct vnet *v;
245};
246
247static struct tcp_fastopen_ccache_entry *tcp_fastopen_ccache_lookup(
248    struct in_conninfo *, struct tcp_fastopen_ccache_bucket **);
249static struct tcp_fastopen_ccache_entry *tcp_fastopen_ccache_create(
250    struct tcp_fastopen_ccache_bucket *, struct in_conninfo *, uint16_t, uint8_t,
251    uint8_t *);
252static void tcp_fastopen_ccache_bucket_trim(struct tcp_fastopen_ccache_bucket *,
253    unsigned int);
254static void tcp_fastopen_ccache_entry_drop(struct tcp_fastopen_ccache_entry *,
255    struct tcp_fastopen_ccache_bucket *);
256
257SYSCTL_NODE(_net_inet_tcp, OID_AUTO, fastopen, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
258    "TCP Fast Open");
259
260VNET_DEFINE_STATIC(int, tcp_fastopen_acceptany) = 0;
261#define	V_tcp_fastopen_acceptany	VNET(tcp_fastopen_acceptany)
262SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, acceptany,
263    CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_fastopen_acceptany), 0,
264    "Accept any non-empty cookie");
265
266VNET_DEFINE_STATIC(unsigned int, tcp_fastopen_autokey) = 120;
267#define	V_tcp_fastopen_autokey	VNET(tcp_fastopen_autokey)
268static int sysctl_net_inet_tcp_fastopen_autokey(SYSCTL_HANDLER_ARGS);
269SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, autokey,
270    CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
271    NULL, 0, &sysctl_net_inet_tcp_fastopen_autokey, "IU",
272    "Number of seconds between auto-generation of a new key; zero disables");
273
274static int sysctl_net_inet_tcp_fastopen_ccache_bucket_limit(SYSCTL_HANDLER_ARGS);
275SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, ccache_bucket_limit,
276    CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_NEEDGIANT,
277    NULL, 0, &sysctl_net_inet_tcp_fastopen_ccache_bucket_limit, "IU",
278    "Max entries per bucket in client cookie cache");
279
280VNET_DEFINE_STATIC(unsigned int, tcp_fastopen_ccache_buckets) =
281    TCP_FASTOPEN_CCACHE_BUCKETS_DEFAULT;
282#define	V_tcp_fastopen_ccache_buckets VNET(tcp_fastopen_ccache_buckets)
283SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, ccache_buckets,
284    CTLFLAG_VNET | CTLFLAG_RDTUN, &VNET_NAME(tcp_fastopen_ccache_buckets), 0,
285    "Client cookie cache number of buckets (power of 2)");
286
287VNET_DEFINE(unsigned int, tcp_fastopen_client_enable) = 1;
288static int sysctl_net_inet_tcp_fastopen_client_enable(SYSCTL_HANDLER_ARGS);
289SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, client_enable,
290    CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
291    NULL, 0, &sysctl_net_inet_tcp_fastopen_client_enable, "IU",
292    "Enable/disable TCP Fast Open client functionality");
293
294SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, keylen,
295    CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_KEY_LEN,
296    "Key length in bytes");
297
298SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, maxkeys,
299    CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_MAX_KEYS,
300    "Maximum number of keys supported");
301
302SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, maxpsks,
303    CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_MAX_PSKS,
304    "Maximum number of pre-shared keys supported");
305
306VNET_DEFINE_STATIC(unsigned int, tcp_fastopen_numkeys) = 0;
307#define	V_tcp_fastopen_numkeys	VNET(tcp_fastopen_numkeys)
308SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, numkeys,
309    CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(tcp_fastopen_numkeys), 0,
310    "Number of keys installed");
311
312VNET_DEFINE_STATIC(unsigned int, tcp_fastopen_numpsks) = 0;
313#define	V_tcp_fastopen_numpsks	VNET(tcp_fastopen_numpsks)
314SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, numpsks,
315    CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(tcp_fastopen_numpsks), 0,
316    "Number of pre-shared keys installed");
317
318VNET_DEFINE_STATIC(unsigned int, tcp_fastopen_path_disable_time) =
319    TCP_FASTOPEN_PATH_DISABLE_TIME_DEFAULT;
320#define	V_tcp_fastopen_path_disable_time VNET(tcp_fastopen_path_disable_time)
321SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, path_disable_time,
322    CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_fastopen_path_disable_time), 0,
323    "Seconds a TFO failure disables a {client_ip, server_ip, server_port} path");
324
325VNET_DEFINE_STATIC(unsigned int, tcp_fastopen_psk_enable) = 0;
326#define	V_tcp_fastopen_psk_enable	VNET(tcp_fastopen_psk_enable)
327static int sysctl_net_inet_tcp_fastopen_psk_enable(SYSCTL_HANDLER_ARGS);
328SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, psk_enable,
329    CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
330    NULL, 0, &sysctl_net_inet_tcp_fastopen_psk_enable, "IU",
331    "Enable/disable TCP Fast Open server pre-shared key mode");
332
333VNET_DEFINE(unsigned int, tcp_fastopen_server_enable) = 0;
334static int sysctl_net_inet_tcp_fastopen_server_enable(SYSCTL_HANDLER_ARGS);
335SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, server_enable,
336    CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
337    NULL, 0, &sysctl_net_inet_tcp_fastopen_server_enable, "IU",
338    "Enable/disable TCP Fast Open server functionality");
339
340static int sysctl_net_inet_tcp_fastopen_setkey(SYSCTL_HANDLER_ARGS);
341SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, setkey,
342    CTLFLAG_VNET | CTLTYPE_OPAQUE | CTLFLAG_WR | CTLFLAG_MPSAFE,
343    NULL, 0, &sysctl_net_inet_tcp_fastopen_setkey, "",
344    "Install a new key");
345
346static int sysctl_net_inet_tcp_fastopen_setpsk(SYSCTL_HANDLER_ARGS);
347SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, setpsk,
348    CTLFLAG_VNET | CTLTYPE_OPAQUE | CTLFLAG_WR | CTLFLAG_MPSAFE,
349    NULL, 0, &sysctl_net_inet_tcp_fastopen_setpsk, "",
350    "Install a new pre-shared key");
351
352static int sysctl_net_inet_tcp_fastopen_ccache_list(SYSCTL_HANDLER_ARGS);
353SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, ccache_list,
354    CTLFLAG_VNET | CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP | CTLFLAG_MPSAFE,
355    NULL, 0, sysctl_net_inet_tcp_fastopen_ccache_list, "A",
356    "List of all client cookie cache entries");
357
358VNET_DEFINE_STATIC(struct rmlock, tcp_fastopen_keylock);
359#define	V_tcp_fastopen_keylock	VNET(tcp_fastopen_keylock)
360
361#define TCP_FASTOPEN_KEYS_RLOCK(t)	rm_rlock(&V_tcp_fastopen_keylock, (t))
362#define TCP_FASTOPEN_KEYS_RUNLOCK(t)	rm_runlock(&V_tcp_fastopen_keylock, (t))
363#define TCP_FASTOPEN_KEYS_WLOCK()	rm_wlock(&V_tcp_fastopen_keylock)
364#define TCP_FASTOPEN_KEYS_WUNLOCK()	rm_wunlock(&V_tcp_fastopen_keylock)
365
366VNET_DEFINE_STATIC(struct tcp_fastopen_keylist, tcp_fastopen_keys);
367#define V_tcp_fastopen_keys	VNET(tcp_fastopen_keys)
368
369VNET_DEFINE_STATIC(struct tcp_fastopen_callout, tcp_fastopen_autokey_ctx);
370#define V_tcp_fastopen_autokey_ctx	VNET(tcp_fastopen_autokey_ctx)
371
372VNET_DEFINE_STATIC(uma_zone_t, counter_zone);
373#define	V_counter_zone			VNET(counter_zone)
374
375static MALLOC_DEFINE(M_TCP_FASTOPEN_CCACHE, "tfo_ccache", "TFO client cookie cache buckets");
376
377VNET_DEFINE_STATIC(struct tcp_fastopen_ccache, tcp_fastopen_ccache);
378#define V_tcp_fastopen_ccache	VNET(tcp_fastopen_ccache)
379
380#define	CCB_LOCK(ccb)		mtx_lock(&(ccb)->ccb_mtx)
381#define	CCB_UNLOCK(ccb)		mtx_unlock(&(ccb)->ccb_mtx)
382#define	CCB_LOCK_ASSERT(ccb)	mtx_assert(&(ccb)->ccb_mtx, MA_OWNED)
383
384void
385tcp_fastopen_init(void)
386{
387	unsigned int i;
388
389	V_counter_zone = uma_zcreate("tfo", sizeof(unsigned int),
390	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
391	rm_init(&V_tcp_fastopen_keylock, "tfo_keylock");
392	callout_init_rm(&V_tcp_fastopen_autokey_ctx.c,
393	    &V_tcp_fastopen_keylock, 0);
394	V_tcp_fastopen_autokey_ctx.v = curvnet;
395	V_tcp_fastopen_keys.newest = TCP_FASTOPEN_MAX_KEYS - 1;
396	V_tcp_fastopen_keys.newest_psk = TCP_FASTOPEN_MAX_PSKS - 1;
397
398	/* May already be non-zero if kernel tunable was set */
399	if (V_tcp_fastopen_ccache.bucket_limit == 0)
400		V_tcp_fastopen_ccache.bucket_limit =
401		    TCP_FASTOPEN_CCACHE_BUCKET_LIMIT_DEFAULT;
402
403	/* May already be non-zero if kernel tunable was set */
404	if ((V_tcp_fastopen_ccache_buckets == 0) ||
405	    !powerof2(V_tcp_fastopen_ccache_buckets))
406		V_tcp_fastopen_ccache.buckets =
407			TCP_FASTOPEN_CCACHE_BUCKETS_DEFAULT;
408	else
409		V_tcp_fastopen_ccache.buckets = V_tcp_fastopen_ccache_buckets;
410
411	V_tcp_fastopen_ccache.mask = V_tcp_fastopen_ccache.buckets - 1;
412	V_tcp_fastopen_ccache.secret = arc4random();
413
414	V_tcp_fastopen_ccache.base = malloc(V_tcp_fastopen_ccache.buckets *
415	    sizeof(struct tcp_fastopen_ccache_bucket), M_TCP_FASTOPEN_CCACHE,
416	    M_WAITOK | M_ZERO);
417
418	for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) {
419		TAILQ_INIT(&V_tcp_fastopen_ccache.base[i].ccb_entries);
420		mtx_init(&V_tcp_fastopen_ccache.base[i].ccb_mtx, "tfo_ccache_bucket",
421			 NULL, MTX_DEF);
422		if (V_tcp_fastopen_client_enable) {
423			/* enable bucket */
424			V_tcp_fastopen_ccache.base[i].ccb_num_entries = 0;
425		} else {
426			/* disable bucket */
427			V_tcp_fastopen_ccache.base[i].ccb_num_entries = -1;
428		}
429		V_tcp_fastopen_ccache.base[i].ccb_ccache = &V_tcp_fastopen_ccache;
430	}
431
432	/*
433	 * Note that while the total number of entries in the cookie cache
434	 * is limited by the table management logic to
435	 * V_tcp_fastopen_ccache.buckets *
436	 * V_tcp_fastopen_ccache.bucket_limit, the total number of items in
437	 * this zone can exceed that amount by the number of CPUs in the
438	 * system times the maximum number of unallocated items that can be
439	 * present in each UMA per-CPU cache for this zone.
440	 */
441	V_tcp_fastopen_ccache.zone = uma_zcreate("tfo_ccache_entries",
442	    sizeof(struct tcp_fastopen_ccache_entry), NULL, NULL, NULL, NULL,
443	    UMA_ALIGN_CACHE, 0);
444}
445
446void
447tcp_fastopen_destroy(void)
448{
449	struct tcp_fastopen_ccache_bucket *ccb;
450	unsigned int i;
451
452	for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) {
453		ccb = &V_tcp_fastopen_ccache.base[i];
454		tcp_fastopen_ccache_bucket_trim(ccb, 0);
455		mtx_destroy(&ccb->ccb_mtx);
456	}
457
458	KASSERT(uma_zone_get_cur(V_tcp_fastopen_ccache.zone) == 0,
459	    ("%s: TFO ccache zone allocation count not 0", __func__));
460	uma_zdestroy(V_tcp_fastopen_ccache.zone);
461	free(V_tcp_fastopen_ccache.base, M_TCP_FASTOPEN_CCACHE);
462
463	callout_drain(&V_tcp_fastopen_autokey_ctx.c);
464	rm_destroy(&V_tcp_fastopen_keylock);
465	uma_zdestroy(V_counter_zone);
466}
467
468unsigned int *
469tcp_fastopen_alloc_counter(void)
470{
471	unsigned int *counter;
472	counter = uma_zalloc(V_counter_zone, M_NOWAIT);
473	if (counter)
474		*counter = 1;
475	return (counter);
476}
477
478void
479tcp_fastopen_decrement_counter(unsigned int *counter)
480{
481	if (*counter == 1)
482		uma_zfree(V_counter_zone, counter);
483	else
484		atomic_subtract_int(counter, 1);
485}
486
487static void
488tcp_fastopen_addkey_locked(uint8_t *key)
489{
490
491	V_tcp_fastopen_keys.newest++;
492	if (V_tcp_fastopen_keys.newest == TCP_FASTOPEN_MAX_KEYS)
493		V_tcp_fastopen_keys.newest = 0;
494	memcpy(V_tcp_fastopen_keys.key[V_tcp_fastopen_keys.newest], key,
495	    TCP_FASTOPEN_KEY_LEN);
496	if (V_tcp_fastopen_numkeys < TCP_FASTOPEN_MAX_KEYS)
497		V_tcp_fastopen_numkeys++;
498}
499
500static void
501tcp_fastopen_addpsk_locked(uint8_t *psk)
502{
503
504	V_tcp_fastopen_keys.newest_psk++;
505	if (V_tcp_fastopen_keys.newest_psk == TCP_FASTOPEN_MAX_PSKS)
506		V_tcp_fastopen_keys.newest_psk = 0;
507	memcpy(V_tcp_fastopen_keys.psk[V_tcp_fastopen_keys.newest_psk], psk,
508	    TCP_FASTOPEN_KEY_LEN);
509	if (V_tcp_fastopen_numpsks < TCP_FASTOPEN_MAX_PSKS)
510		V_tcp_fastopen_numpsks++;
511}
512
513static void
514tcp_fastopen_autokey_locked(void)
515{
516	uint8_t newkey[TCP_FASTOPEN_KEY_LEN];
517
518	arc4rand(newkey, TCP_FASTOPEN_KEY_LEN, 0);
519	tcp_fastopen_addkey_locked(newkey);
520}
521
522static void
523tcp_fastopen_autokey_callout(void *arg)
524{
525	struct tcp_fastopen_callout *ctx = arg;
526
527	CURVNET_SET(ctx->v);
528	tcp_fastopen_autokey_locked();
529	callout_reset(&ctx->c, V_tcp_fastopen_autokey * hz,
530		      tcp_fastopen_autokey_callout, ctx);
531	CURVNET_RESTORE();
532}
533
534static uint64_t
535tcp_fastopen_make_cookie(uint8_t key[SIPHASH_KEY_LENGTH], struct in_conninfo *inc)
536{
537	SIPHASH_CTX ctx;
538	uint64_t siphash;
539
540	SipHash24_Init(&ctx);
541	SipHash_SetKey(&ctx, key);
542	switch (inc->inc_flags & INC_ISIPV6) {
543#ifdef INET
544	case 0:
545		SipHash_Update(&ctx, &inc->inc_faddr, sizeof(inc->inc_faddr));
546		break;
547#endif
548#ifdef INET6
549	case INC_ISIPV6:
550		SipHash_Update(&ctx, &inc->inc6_faddr, sizeof(inc->inc6_faddr));
551		break;
552#endif
553	}
554	SipHash_Final((u_int8_t *)&siphash, &ctx);
555
556	return (siphash);
557}
558
559static uint64_t
560tcp_fastopen_make_psk_cookie(uint8_t *psk, uint8_t *cookie, uint8_t cookie_len)
561{
562	SIPHASH_CTX ctx;
563	uint64_t psk_cookie;
564
565	SipHash24_Init(&ctx);
566	SipHash_SetKey(&ctx, psk);
567	SipHash_Update(&ctx, cookie, cookie_len);
568	SipHash_Final((u_int8_t *)&psk_cookie, &ctx);
569
570	return (psk_cookie);
571}
572
573static int
574tcp_fastopen_find_cookie_match_locked(uint8_t *wire_cookie, uint64_t *cur_cookie)
575{
576	unsigned int i, psk_index;
577	uint64_t psk_cookie;
578
579	if (V_tcp_fastopen_psk_enable) {
580		psk_index = V_tcp_fastopen_keys.newest_psk;
581		for (i = 0; i < V_tcp_fastopen_numpsks; i++) {
582			psk_cookie =
583			    tcp_fastopen_make_psk_cookie(
584				 V_tcp_fastopen_keys.psk[psk_index],
585				 (uint8_t *)cur_cookie,
586				 TCP_FASTOPEN_COOKIE_LEN);
587
588			if (memcmp(wire_cookie, &psk_cookie,
589				   TCP_FASTOPEN_COOKIE_LEN) == 0)
590				return (1);
591
592			if (psk_index == 0)
593				psk_index = TCP_FASTOPEN_MAX_PSKS - 1;
594			else
595				psk_index--;
596		}
597	} else if (memcmp(wire_cookie, cur_cookie, TCP_FASTOPEN_COOKIE_LEN) == 0)
598		return (1);
599
600	return (0);
601}
602
603/*
604 * Return values:
605 *	-1	the cookie is invalid and no valid cookie is available
606 *	 0	the cookie is invalid and the latest cookie has been returned
607 *	 1	the cookie is valid and the latest cookie has been returned
608 */
609int
610tcp_fastopen_check_cookie(struct in_conninfo *inc, uint8_t *cookie,
611    unsigned int len, uint64_t *latest_cookie)
612{
613	struct rm_priotracker tracker;
614	unsigned int i, key_index;
615	int rv;
616	uint64_t cur_cookie;
617
618	if (V_tcp_fastopen_acceptany) {
619		*latest_cookie = 0;
620		return (1);
621	}
622
623	TCP_FASTOPEN_KEYS_RLOCK(&tracker);
624	if (len != TCP_FASTOPEN_COOKIE_LEN) {
625		if (V_tcp_fastopen_numkeys > 0) {
626			*latest_cookie =
627			    tcp_fastopen_make_cookie(
628				V_tcp_fastopen_keys.key[V_tcp_fastopen_keys.newest],
629				inc);
630			rv = 0;
631		} else
632			rv = -1;
633		goto out;
634	}
635
636	/*
637	 * Check against each available key, from newest to oldest.
638	 */
639	key_index = V_tcp_fastopen_keys.newest;
640	for (i = 0; i < V_tcp_fastopen_numkeys; i++) {
641		cur_cookie =
642		    tcp_fastopen_make_cookie(V_tcp_fastopen_keys.key[key_index],
643			inc);
644		if (i == 0)
645			*latest_cookie = cur_cookie;
646		rv = tcp_fastopen_find_cookie_match_locked(cookie, &cur_cookie);
647		if (rv)
648			goto out;
649		if (key_index == 0)
650			key_index = TCP_FASTOPEN_MAX_KEYS - 1;
651		else
652			key_index--;
653	}
654	rv = 0;
655
656 out:
657	TCP_FASTOPEN_KEYS_RUNLOCK(&tracker);
658	return (rv);
659}
660
661static int
662sysctl_net_inet_tcp_fastopen_autokey(SYSCTL_HANDLER_ARGS)
663{
664	int error;
665	unsigned int new;
666
667	new = V_tcp_fastopen_autokey;
668	error = sysctl_handle_int(oidp, &new, 0, req);
669	if (error == 0 && req->newptr) {
670		if (new > (INT_MAX / hz))
671			return (EINVAL);
672
673		TCP_FASTOPEN_KEYS_WLOCK();
674		if (V_tcp_fastopen_server_enable) {
675			if (V_tcp_fastopen_autokey && !new)
676				callout_stop(&V_tcp_fastopen_autokey_ctx.c);
677			else if (new)
678				callout_reset(&V_tcp_fastopen_autokey_ctx.c,
679				    new * hz, tcp_fastopen_autokey_callout,
680				    &V_tcp_fastopen_autokey_ctx);
681		}
682		V_tcp_fastopen_autokey = new;
683		TCP_FASTOPEN_KEYS_WUNLOCK();
684	}
685
686	return (error);
687}
688
689static int
690sysctl_net_inet_tcp_fastopen_psk_enable(SYSCTL_HANDLER_ARGS)
691{
692	int error;
693	unsigned int new;
694
695	new = V_tcp_fastopen_psk_enable;
696	error = sysctl_handle_int(oidp, &new, 0, req);
697	if (error == 0 && req->newptr) {
698		if (V_tcp_fastopen_psk_enable && !new) {
699			/* enabled -> disabled */
700			TCP_FASTOPEN_KEYS_WLOCK();
701			V_tcp_fastopen_numpsks = 0;
702			V_tcp_fastopen_keys.newest_psk =
703			    TCP_FASTOPEN_MAX_PSKS - 1;
704			V_tcp_fastopen_psk_enable = 0;
705			TCP_FASTOPEN_KEYS_WUNLOCK();
706		} else if (!V_tcp_fastopen_psk_enable && new) {
707			/* disabled -> enabled */
708			TCP_FASTOPEN_KEYS_WLOCK();
709			V_tcp_fastopen_psk_enable = 1;
710			TCP_FASTOPEN_KEYS_WUNLOCK();
711		}
712	}
713	return (error);
714}
715
716static int
717sysctl_net_inet_tcp_fastopen_server_enable(SYSCTL_HANDLER_ARGS)
718{
719	int error;
720	unsigned int new;
721
722	new = V_tcp_fastopen_server_enable;
723	error = sysctl_handle_int(oidp, &new, 0, req);
724	if (error == 0 && req->newptr) {
725		if (V_tcp_fastopen_server_enable && !new) {
726			/* enabled -> disabled */
727			TCP_FASTOPEN_KEYS_WLOCK();
728			V_tcp_fastopen_numkeys = 0;
729			V_tcp_fastopen_keys.newest = TCP_FASTOPEN_MAX_KEYS - 1;
730			if (V_tcp_fastopen_autokey)
731				callout_stop(&V_tcp_fastopen_autokey_ctx.c);
732			V_tcp_fastopen_numpsks = 0;
733			V_tcp_fastopen_keys.newest_psk =
734			    TCP_FASTOPEN_MAX_PSKS - 1;
735			V_tcp_fastopen_server_enable = 0;
736			TCP_FASTOPEN_KEYS_WUNLOCK();
737		} else if (!V_tcp_fastopen_server_enable && new) {
738			/* disabled -> enabled */
739			TCP_FASTOPEN_KEYS_WLOCK();
740			if (V_tcp_fastopen_autokey &&
741			    (V_tcp_fastopen_numkeys == 0)) {
742				tcp_fastopen_autokey_locked();
743				callout_reset(&V_tcp_fastopen_autokey_ctx.c,
744				    V_tcp_fastopen_autokey * hz,
745				    tcp_fastopen_autokey_callout,
746				    &V_tcp_fastopen_autokey_ctx);
747			}
748			V_tcp_fastopen_server_enable = 1;
749			TCP_FASTOPEN_KEYS_WUNLOCK();
750		}
751	}
752	return (error);
753}
754
755static int
756sysctl_net_inet_tcp_fastopen_setkey(SYSCTL_HANDLER_ARGS)
757{
758	int error;
759	uint8_t newkey[TCP_FASTOPEN_KEY_LEN];
760
761	if (req->oldptr != NULL || req->oldlen != 0)
762		return (EINVAL);
763	if (req->newptr == NULL)
764		return (EPERM);
765	if (req->newlen != sizeof(newkey))
766		return (EINVAL);
767	error = SYSCTL_IN(req, newkey, sizeof(newkey));
768	if (error)
769		return (error);
770
771	TCP_FASTOPEN_KEYS_WLOCK();
772	tcp_fastopen_addkey_locked(newkey);
773	TCP_FASTOPEN_KEYS_WUNLOCK();
774
775	return (0);
776}
777
778static int
779sysctl_net_inet_tcp_fastopen_setpsk(SYSCTL_HANDLER_ARGS)
780{
781	int error;
782	uint8_t newpsk[TCP_FASTOPEN_KEY_LEN];
783
784	if (req->oldptr != NULL || req->oldlen != 0)
785		return (EINVAL);
786	if (req->newptr == NULL)
787		return (EPERM);
788	if (req->newlen != sizeof(newpsk))
789		return (EINVAL);
790	error = SYSCTL_IN(req, newpsk, sizeof(newpsk));
791	if (error)
792		return (error);
793
794	TCP_FASTOPEN_KEYS_WLOCK();
795	tcp_fastopen_addpsk_locked(newpsk);
796	TCP_FASTOPEN_KEYS_WUNLOCK();
797
798	return (0);
799}
800
801static int
802sysctl_net_inet_tcp_fastopen_ccache_bucket_limit(SYSCTL_HANDLER_ARGS)
803{
804	struct tcp_fastopen_ccache_bucket *ccb;
805	int error;
806	unsigned int new;
807	unsigned int i;
808
809	new = V_tcp_fastopen_ccache.bucket_limit;
810	error = sysctl_handle_int(oidp, &new, 0, req);
811	if (error == 0 && req->newptr) {
812		if ((new == 0) || (new > INT_MAX))
813			error = EINVAL;
814		else {
815			if (new < V_tcp_fastopen_ccache.bucket_limit) {
816				for (i = 0; i < V_tcp_fastopen_ccache.buckets;
817				     i++) {
818					ccb = &V_tcp_fastopen_ccache.base[i];
819					tcp_fastopen_ccache_bucket_trim(ccb, new);
820				}
821			}
822			V_tcp_fastopen_ccache.bucket_limit = new;
823		}
824	}
825	return (error);
826}
827
828static int
829sysctl_net_inet_tcp_fastopen_client_enable(SYSCTL_HANDLER_ARGS)
830{
831	struct tcp_fastopen_ccache_bucket *ccb;
832	int error;
833	unsigned int new, i;
834
835	new = V_tcp_fastopen_client_enable;
836	error = sysctl_handle_int(oidp, &new, 0, req);
837	if (error == 0 && req->newptr) {
838		if (V_tcp_fastopen_client_enable && !new) {
839			/* enabled -> disabled */
840			for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) {
841				ccb = &V_tcp_fastopen_ccache.base[i];
842				KASSERT(ccb->ccb_num_entries > -1,
843				    ("%s: ccb->ccb_num_entries %d is negative",
844					__func__, ccb->ccb_num_entries));
845				tcp_fastopen_ccache_bucket_trim(ccb, 0);
846			}
847			V_tcp_fastopen_client_enable = 0;
848		} else if (!V_tcp_fastopen_client_enable && new) {
849			/* disabled -> enabled */
850			for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) {
851				ccb = &V_tcp_fastopen_ccache.base[i];
852				CCB_LOCK(ccb);
853				KASSERT(TAILQ_EMPTY(&ccb->ccb_entries),
854				    ("%s: ccb->ccb_entries not empty", __func__));
855				KASSERT(ccb->ccb_num_entries == -1,
856				    ("%s: ccb->ccb_num_entries %d not -1", __func__,
857					ccb->ccb_num_entries));
858				ccb->ccb_num_entries = 0; /* enable bucket */
859				CCB_UNLOCK(ccb);
860			}
861			V_tcp_fastopen_client_enable = 1;
862		}
863	}
864	return (error);
865}
866
867void
868tcp_fastopen_connect(struct tcpcb *tp)
869{
870	struct inpcb *inp;
871	struct tcp_fastopen_ccache_bucket *ccb;
872	struct tcp_fastopen_ccache_entry *cce;
873	sbintime_t now;
874	uint16_t server_mss;
875	uint64_t psk_cookie;
876
877	psk_cookie = 0;
878	inp = tp->t_inpcb;
879	cce = tcp_fastopen_ccache_lookup(&inp->inp_inc, &ccb);
880	if (cce) {
881		if (cce->disable_time == 0) {
882			if ((cce->cookie_len > 0) &&
883			    (tp->t_tfo_client_cookie_len ==
884			     TCP_FASTOPEN_PSK_LEN)) {
885				psk_cookie =
886				    tcp_fastopen_make_psk_cookie(
887					tp->t_tfo_cookie.client,
888					cce->cookie, cce->cookie_len);
889			} else {
890				tp->t_tfo_client_cookie_len = cce->cookie_len;
891				memcpy(tp->t_tfo_cookie.client, cce->cookie,
892				    cce->cookie_len);
893			}
894			server_mss = cce->server_mss;
895			CCB_UNLOCK(ccb);
896			if (tp->t_tfo_client_cookie_len ==
897			    TCP_FASTOPEN_PSK_LEN && psk_cookie) {
898				tp->t_tfo_client_cookie_len =
899				    TCP_FASTOPEN_COOKIE_LEN;
900				memcpy(tp->t_tfo_cookie.client, &psk_cookie,
901				    TCP_FASTOPEN_COOKIE_LEN);
902			}
903			tcp_mss(tp, server_mss ? server_mss : -1);
904			tp->snd_wnd = tp->t_maxseg;
905		} else {
906			/*
907			 * The path is disabled.  Check the time and
908			 * possibly re-enable.
909			 */
910			now = getsbinuptime();
911			if (now - cce->disable_time >
912			    ((sbintime_t)V_tcp_fastopen_path_disable_time << 32)) {
913				/*
914				 * Re-enable path.  Force a TFO cookie
915				 * request.  Forget the old MSS as it may be
916				 * bogus now, and we will rediscover it in
917				 * the SYN|ACK.
918				 */
919				cce->disable_time = 0;
920				cce->server_mss = 0;
921				cce->cookie_len = 0;
922				/*
923				 * tp->t_tfo... cookie details are already
924				 * zero from the tcpcb init.
925				 */
926			} else {
927				/*
928				 * Path is disabled, so disable TFO on this
929				 * connection.
930				 */
931				tp->t_flags &= ~TF_FASTOPEN;
932			}
933			CCB_UNLOCK(ccb);
934			tcp_mss(tp, -1);
935			/*
936			 * snd_wnd is irrelevant since we are either forcing
937			 * a TFO cookie request or disabling TFO - either
938			 * way, no data with the SYN.
939			 */
940		}
941	} else {
942		/*
943		 * A new entry for this path will be created when a SYN|ACK
944		 * comes back, or the attempt otherwise fails.
945		 */
946		CCB_UNLOCK(ccb);
947		tcp_mss(tp, -1);
948		/*
949		 * snd_wnd is irrelevant since we are forcing a TFO cookie
950		 * request.
951		 */
952	}
953}
954
955void
956tcp_fastopen_disable_path(struct tcpcb *tp)
957{
958	struct in_conninfo *inc = &tp->t_inpcb->inp_inc;
959	struct tcp_fastopen_ccache_bucket *ccb;
960	struct tcp_fastopen_ccache_entry *cce;
961
962	cce = tcp_fastopen_ccache_lookup(inc, &ccb);
963	if (cce) {
964		cce->server_mss = 0;
965		cce->cookie_len = 0;
966		/*
967		 * Preserve the existing disable time if it is already
968		 * disabled.
969		 */
970		if (cce->disable_time == 0)
971			cce->disable_time = getsbinuptime();
972	} else /* use invalid cookie len to create disabled entry */
973		tcp_fastopen_ccache_create(ccb, inc, 0,
974	   	    TCP_FASTOPEN_MAX_COOKIE_LEN + 1, NULL);
975
976	CCB_UNLOCK(ccb);
977	tp->t_flags &= ~TF_FASTOPEN;
978}
979
980void
981tcp_fastopen_update_cache(struct tcpcb *tp, uint16_t mss,
982    uint8_t cookie_len, uint8_t *cookie)
983{
984	struct in_conninfo *inc = &tp->t_inpcb->inp_inc;
985	struct tcp_fastopen_ccache_bucket *ccb;
986	struct tcp_fastopen_ccache_entry *cce;
987
988	cce = tcp_fastopen_ccache_lookup(inc, &ccb);
989	if (cce) {
990		if ((cookie_len >= TCP_FASTOPEN_MIN_COOKIE_LEN) &&
991		    (cookie_len <= TCP_FASTOPEN_MAX_COOKIE_LEN) &&
992		    ((cookie_len & 0x1) == 0)) {
993			cce->server_mss = mss;
994			cce->cookie_len = cookie_len;
995			memcpy(cce->cookie, cookie, cookie_len);
996			cce->disable_time = 0;
997		} else {
998			/* invalid cookie length, disable entry */
999			cce->server_mss = 0;
1000			cce->cookie_len = 0;
1001			/*
1002			 * Preserve the existing disable time if it is
1003			 * already disabled.
1004			 */
1005			if (cce->disable_time == 0)
1006				cce->disable_time = getsbinuptime();
1007		}
1008	} else
1009		tcp_fastopen_ccache_create(ccb, inc, mss, cookie_len, cookie);
1010
1011	CCB_UNLOCK(ccb);
1012}
1013
1014static struct tcp_fastopen_ccache_entry *
1015tcp_fastopen_ccache_lookup(struct in_conninfo *inc,
1016    struct tcp_fastopen_ccache_bucket **ccbp)
1017{
1018	struct tcp_fastopen_ccache_bucket *ccb;
1019	struct tcp_fastopen_ccache_entry *cce;
1020	uint32_t last_word;
1021	uint32_t hash;
1022
1023	hash = jenkins_hash32((uint32_t *)&inc->inc_ie.ie_dependladdr, 4,
1024	    V_tcp_fastopen_ccache.secret);
1025	hash = jenkins_hash32((uint32_t *)&inc->inc_ie.ie_dependfaddr, 4,
1026	    hash);
1027	last_word = inc->inc_fport;
1028	hash = jenkins_hash32(&last_word, 1, hash);
1029	ccb = &V_tcp_fastopen_ccache.base[hash & V_tcp_fastopen_ccache.mask];
1030	*ccbp = ccb;
1031	CCB_LOCK(ccb);
1032
1033	/*
1034	 * Always returns with locked bucket.
1035	 */
1036	TAILQ_FOREACH(cce, &ccb->ccb_entries, cce_link)
1037		if ((!(cce->af == AF_INET6) == !(inc->inc_flags & INC_ISIPV6)) &&
1038		    (cce->server_port == inc->inc_ie.ie_fport) &&
1039		    (((cce->af == AF_INET) &&
1040		      (cce->cce_client_ip.v4.s_addr == inc->inc_laddr.s_addr) &&
1041		      (cce->cce_server_ip.v4.s_addr == inc->inc_faddr.s_addr)) ||
1042		     ((cce->af == AF_INET6) &&
1043		      IN6_ARE_ADDR_EQUAL(&cce->cce_client_ip.v6, &inc->inc6_laddr) &&
1044		      IN6_ARE_ADDR_EQUAL(&cce->cce_server_ip.v6, &inc->inc6_faddr))))
1045			break;
1046
1047	return (cce);
1048}
1049
1050static struct tcp_fastopen_ccache_entry *
1051tcp_fastopen_ccache_create(struct tcp_fastopen_ccache_bucket *ccb,
1052    struct in_conninfo *inc, uint16_t mss, uint8_t cookie_len, uint8_t *cookie)
1053{
1054	struct tcp_fastopen_ccache_entry *cce;
1055
1056	/*
1057	 * 1. Create a new entry, or
1058	 * 2. Reclaim an existing entry, or
1059	 * 3. Fail
1060	 */
1061
1062	CCB_LOCK_ASSERT(ccb);
1063
1064	cce = NULL;
1065	if (ccb->ccb_num_entries < V_tcp_fastopen_ccache.bucket_limit)
1066		cce = uma_zalloc(V_tcp_fastopen_ccache.zone, M_NOWAIT);
1067
1068	if (cce == NULL) {
1069		/*
1070		 * At bucket limit, or out of memory - reclaim last
1071		 * entry in bucket.
1072		 */
1073		cce = TAILQ_LAST(&ccb->ccb_entries, bucket_entries);
1074		if (cce == NULL) {
1075			/* XXX count this event */
1076			return (NULL);
1077		}
1078
1079		TAILQ_REMOVE(&ccb->ccb_entries, cce, cce_link);
1080	} else
1081		ccb->ccb_num_entries++;
1082
1083	TAILQ_INSERT_HEAD(&ccb->ccb_entries, cce, cce_link);
1084	cce->af = (inc->inc_flags & INC_ISIPV6) ? AF_INET6 : AF_INET;
1085	if (cce->af == AF_INET) {
1086		cce->cce_client_ip.v4 = inc->inc_laddr;
1087		cce->cce_server_ip.v4 = inc->inc_faddr;
1088	} else {
1089		cce->cce_client_ip.v6 = inc->inc6_laddr;
1090		cce->cce_server_ip.v6 = inc->inc6_faddr;
1091	}
1092	cce->server_port = inc->inc_fport;
1093	if ((cookie_len >= TCP_FASTOPEN_MIN_COOKIE_LEN) &&
1094	    (cookie_len <= TCP_FASTOPEN_MAX_COOKIE_LEN) &&
1095	    ((cookie_len & 0x1) == 0)) {
1096		cce->server_mss = mss;
1097		cce->cookie_len = cookie_len;
1098		memcpy(cce->cookie, cookie, cookie_len);
1099		cce->disable_time = 0;
1100	} else {
1101		/* invalid cookie length, disable cce */
1102		cce->server_mss = 0;
1103		cce->cookie_len = 0;
1104		cce->disable_time = getsbinuptime();
1105	}
1106
1107	return (cce);
1108}
1109
1110static void
1111tcp_fastopen_ccache_bucket_trim(struct tcp_fastopen_ccache_bucket *ccb,
1112    unsigned int limit)
1113{
1114	struct tcp_fastopen_ccache_entry *cce, *cce_tmp;
1115	unsigned int entries;
1116
1117	CCB_LOCK(ccb);
1118	entries = 0;
1119	TAILQ_FOREACH_SAFE(cce, &ccb->ccb_entries, cce_link, cce_tmp) {
1120		entries++;
1121		if (entries > limit)
1122			tcp_fastopen_ccache_entry_drop(cce, ccb);
1123	}
1124	KASSERT(ccb->ccb_num_entries <= (int)limit,
1125	    ("%s: ccb->ccb_num_entries %d exceeds limit %d", __func__,
1126		ccb->ccb_num_entries, limit));
1127	if (limit == 0) {
1128		KASSERT(TAILQ_EMPTY(&ccb->ccb_entries),
1129		    ("%s: ccb->ccb_entries not empty", __func__));
1130		ccb->ccb_num_entries = -1; /* disable bucket */
1131	}
1132	CCB_UNLOCK(ccb);
1133}
1134
1135static void
1136tcp_fastopen_ccache_entry_drop(struct tcp_fastopen_ccache_entry *cce,
1137    struct tcp_fastopen_ccache_bucket *ccb)
1138{
1139
1140	CCB_LOCK_ASSERT(ccb);
1141
1142	TAILQ_REMOVE(&ccb->ccb_entries, cce, cce_link);
1143	ccb->ccb_num_entries--;
1144	uma_zfree(V_tcp_fastopen_ccache.zone, cce);
1145}
1146
1147static int
1148sysctl_net_inet_tcp_fastopen_ccache_list(SYSCTL_HANDLER_ARGS)
1149{
1150	struct sbuf sb;
1151	struct tcp_fastopen_ccache_bucket *ccb;
1152	struct tcp_fastopen_ccache_entry *cce;
1153	sbintime_t now, duration, limit;
1154	const int linesize = 128;
1155	int i, error, num_entries;
1156	unsigned int j;
1157#ifdef INET6
1158	char clt_buf[INET6_ADDRSTRLEN], srv_buf[INET6_ADDRSTRLEN];
1159#else
1160	char clt_buf[INET_ADDRSTRLEN], srv_buf[INET_ADDRSTRLEN];
1161#endif
1162
1163	if (jailed_without_vnet(curthread->td_ucred) != 0)
1164		return (EPERM);
1165
1166	/* Only allow root to read the client cookie cache */
1167	if (curthread->td_ucred->cr_uid != 0)
1168		return (EPERM);
1169
1170	num_entries = 0;
1171	for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) {
1172		ccb = &V_tcp_fastopen_ccache.base[i];
1173		CCB_LOCK(ccb);
1174		if (ccb->ccb_num_entries > 0)
1175			num_entries += ccb->ccb_num_entries;
1176		CCB_UNLOCK(ccb);
1177	}
1178	sbuf_new(&sb, NULL, linesize * (num_entries + 1), SBUF_INCLUDENUL);
1179
1180	sbuf_printf(&sb,
1181	            "\nLocal IP address     Remote IP address     Port   MSS"
1182	            " Disabled Cookie\n");
1183
1184	now = getsbinuptime();
1185	limit = (sbintime_t)V_tcp_fastopen_path_disable_time << 32;
1186	for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) {
1187		ccb = &V_tcp_fastopen_ccache.base[i];
1188		CCB_LOCK(ccb);
1189		TAILQ_FOREACH(cce, &ccb->ccb_entries, cce_link) {
1190			if (cce->disable_time != 0) {
1191				duration = now - cce->disable_time;
1192				if (limit >= duration)
1193					duration = limit - duration;
1194				else
1195					duration = 0;
1196			} else
1197				duration = 0;
1198			sbuf_printf(&sb,
1199			            "%-20s %-20s %5u %5u ",
1200			            inet_ntop(cce->af, &cce->cce_client_ip,
1201			                clt_buf, sizeof(clt_buf)),
1202			            inet_ntop(cce->af, &cce->cce_server_ip,
1203			                srv_buf, sizeof(srv_buf)),
1204			            ntohs(cce->server_port),
1205			            cce->server_mss);
1206			if (duration > 0)
1207				sbuf_printf(&sb, "%7ds ", sbintime_getsec(duration));
1208			else
1209				sbuf_printf(&sb, "%8s ", "No");
1210			for (j = 0; j < cce->cookie_len; j++)
1211				sbuf_printf(&sb, "%02x", cce->cookie[j]);
1212			sbuf_putc(&sb, '\n');
1213		}
1214		CCB_UNLOCK(ccb);
1215	}
1216	error = sbuf_finish(&sb);
1217	if (error == 0)
1218		error = SYSCTL_OUT(req, sbuf_data(&sb), sbuf_len(&sb));
1219	sbuf_delete(&sb);
1220	return (error);
1221}
1222