1/*	$OpenBSD: pf_syncookies.c,v 1.7 2018/09/10 15:54:28 henning Exp $ */
2
3/* Copyright (c) 2016,2017 Henning Brauer <henning@openbsd.org>
4 * Copyright (c) 2016 Alexandr Nedvedicky <sashan@openbsd.org>
5 *
6 * syncookie parts based on FreeBSD sys/netinet/tcp_syncache.c
7 *
8 * Copyright (c) 2001 McAfee, Inc.
9 * Copyright (c) 2006,2013 Andre Oppermann, Internet Business Solutions AG
10 * All rights reserved.
11 *
12 * This software was developed for the FreeBSD Project by Jonathan Lemon
13 * and McAfee Research, the Security Research Division of McAfee, Inc. under
14 * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
15 * DARPA CHATS research program. [2001 McAfee, Inc.]
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions
19 * are met:
20 * 1. Redistributions of source code must retain the above copyright
21 *    notice, this list of conditions and the following disclaimer.
22 * 2. Redistributions in binary form must reproduce the above copyright
23 *    notice, this list of conditions and the following disclaimer in the
24 *    documentation and/or other materials provided with the distribution.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 */
38
39/*
40 * when we're under synflood, we use syncookies to prevent state table
41 * exhaustion. Trigger for the synflood mode is the number of half-open
42 * connections in the state table.
43 * We leave synflood mode when the number of half-open states - including
44 * in-flight syncookies - drops far enough again
45 */
46
47/*
48 * syncookie enabled Initial Sequence Number:
49 *  24 bit MAC
50 *   3 bit WSCALE index
51 *   3 bit MSS index
52 *   1 bit SACK permitted
53 *   1 bit odd/even secret
54 *
55 * References:
56 *  RFC4987 TCP SYN Flooding Attacks and Common Mitigations
57 *  http://cr.yp.to/syncookies.html    (overview)
58 *  http://cr.yp.to/syncookies/archive (details)
59 */
60
61//#include "pflog.h"
62
63#include <sys/param.h>
64#include <sys/systm.h>
65#include <sys/mbuf.h>
66#include <sys/filio.h>
67#include <sys/socket.h>
68#include <sys/socketvar.h>
69#include <sys/kernel.h>
70#include <sys/time.h>
71#include <sys/proc.h>
72#include <sys/rwlock.h>
73#include <sys/syslog.h>
74
75#include <crypto/siphash/siphash.h>
76
77#include <net/if.h>
78#include <net/if_var.h>
79#include <net/if_types.h>
80#include <net/route.h>
81
82#include <netinet/in.h>
83#include <netinet/in_pcb.h>
84#include <netinet/ip.h>
85#include <netinet/tcp.h>
86#include <netinet/tcp_var.h>
87
88#include <net/pfvar.h>
89#include <netpfil/pf/pf_nv.h>
90
91#define	DPFPRINTF(n, x)	if (V_pf_status.debug >= (n)) printf x
92
93union pf_syncookie {
94	uint8_t		cookie;
95	struct {
96		uint8_t	oddeven:1,
97			sack_ok:1,
98			wscale_idx:3,
99			mss_idx:3;
100	} flags;
101};
102
103#define	PF_SYNCOOKIE_SECRET_SIZE	SIPHASH_KEY_LENGTH
104#define	PF_SYNCOOKIE_SECRET_LIFETIME	15 /* seconds */
105
106/* Protected by PF_RULES_xLOCK. */
107struct pf_syncookie_status {
108	struct callout	keytimeout;
109	uint8_t		oddeven;
110	uint8_t		key[2][SIPHASH_KEY_LENGTH];
111	uint32_t	hiwat;	/* absolute; # of states */
112	uint32_t	lowat;
113};
114VNET_DEFINE_STATIC(struct pf_syncookie_status, pf_syncookie_status);
115#define V_pf_syncookie_status	VNET(pf_syncookie_status)
116
117static int	pf_syncookies_setmode(u_int8_t);
118void		pf_syncookie_rotate(void *);
119void		pf_syncookie_newkey(void);
120uint32_t	pf_syncookie_mac(struct pf_pdesc *, union pf_syncookie,
121		    uint32_t);
122uint32_t	pf_syncookie_generate(struct mbuf *m, int off, struct pf_pdesc *,
123		    uint16_t);
124
125void
126pf_syncookies_init(void)
127{
128	callout_init(&V_pf_syncookie_status.keytimeout, 1);
129	PF_RULES_WLOCK();
130
131	V_pf_syncookie_status.hiwat = PF_SYNCOOKIES_HIWATPCT *
132	    V_pf_limits[PF_LIMIT_STATES].limit / 100;
133	V_pf_syncookie_status.lowat = PF_SYNCOOKIES_LOWATPCT *
134	    V_pf_limits[PF_LIMIT_STATES].limit / 100;
135	pf_syncookies_setmode(PF_SYNCOOKIES_ADAPTIVE);
136
137	PF_RULES_WUNLOCK();
138}
139
140void
141pf_syncookies_cleanup(void)
142{
143	callout_stop(&V_pf_syncookie_status.keytimeout);
144}
145
146int
147pf_get_syncookies(struct pfioc_nv *nv)
148{
149	nvlist_t	*nvl = NULL;
150	void		*nvlpacked = NULL;
151	int		 error;
152
153#define ERROUT(x)	ERROUT_FUNCTION(errout, x)
154
155	nvl = nvlist_create(0);
156	if (nvl == NULL)
157		ERROUT(ENOMEM);
158
159	nvlist_add_bool(nvl, "enabled",
160	    V_pf_status.syncookies_mode != PF_SYNCOOKIES_NEVER);
161	nvlist_add_bool(nvl, "adaptive",
162	    V_pf_status.syncookies_mode == PF_SYNCOOKIES_ADAPTIVE);
163	nvlist_add_number(nvl, "highwater", V_pf_syncookie_status.hiwat);
164	nvlist_add_number(nvl, "lowwater", V_pf_syncookie_status.lowat);
165	nvlist_add_number(nvl, "halfopen_states",
166	    atomic_load_32(&V_pf_status.states_halfopen));
167
168	nvlpacked = nvlist_pack(nvl, &nv->len);
169	if (nvlpacked == NULL)
170		ERROUT(ENOMEM);
171
172	if (nv->size == 0) {
173		ERROUT(0);
174	} else if (nv->size < nv->len) {
175		ERROUT(ENOSPC);
176	}
177
178	error = copyout(nvlpacked, nv->data, nv->len);
179
180#undef ERROUT
181errout:
182	nvlist_destroy(nvl);
183	free(nvlpacked, M_NVLIST);
184
185	return (error);
186}
187
188int
189pf_set_syncookies(struct pfioc_nv *nv)
190{
191	nvlist_t	*nvl = NULL;
192	void		*nvlpacked = NULL;
193	int		 error;
194	bool		 enabled, adaptive;
195	uint32_t	 hiwat, lowat;
196	uint8_t		 newmode;
197
198#define ERROUT(x)	ERROUT_FUNCTION(errout, x)
199
200	if (nv->len > pf_ioctl_maxcount)
201		return (ENOMEM);
202
203	nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
204	if (nvlpacked == NULL)
205		return (ENOMEM);
206
207	error = copyin(nv->data, nvlpacked, nv->len);
208	if (error)
209		ERROUT(error);
210
211	nvl = nvlist_unpack(nvlpacked, nv->len, 0);
212	if (nvl == NULL)
213		ERROUT(EBADMSG);
214
215	if (! nvlist_exists_bool(nvl, "enabled")
216	    || ! nvlist_exists_bool(nvl, "adaptive"))
217		ERROUT(EBADMSG);
218
219	enabled = nvlist_get_bool(nvl, "enabled");
220	adaptive = nvlist_get_bool(nvl, "adaptive");
221	PFNV_CHK(pf_nvuint32_opt(nvl, "highwater", &hiwat,
222	    V_pf_syncookie_status.hiwat));
223	PFNV_CHK(pf_nvuint32_opt(nvl, "lowwater", &lowat,
224	    V_pf_syncookie_status.lowat));
225
226	if (lowat >= hiwat)
227		ERROUT(EINVAL);
228
229	newmode = PF_SYNCOOKIES_NEVER;
230	if (enabled)
231		newmode = adaptive ? PF_SYNCOOKIES_ADAPTIVE : PF_SYNCOOKIES_ALWAYS;
232
233	PF_RULES_WLOCK();
234	error = pf_syncookies_setmode(newmode);
235
236	V_pf_syncookie_status.lowat = lowat;
237	V_pf_syncookie_status.hiwat = hiwat;
238
239	PF_RULES_WUNLOCK();
240
241#undef ERROUT
242errout:
243	nvlist_destroy(nvl);
244	free(nvlpacked, M_NVLIST);
245
246	return (error);
247}
248
249static int
250pf_syncookies_setmode(u_int8_t mode)
251{
252	if (mode > PF_SYNCOOKIES_MODE_MAX)
253		return (EINVAL);
254
255	if (V_pf_status.syncookies_mode == mode)
256		return (0);
257
258	V_pf_status.syncookies_mode = mode;
259	if (V_pf_status.syncookies_mode == PF_SYNCOOKIES_ALWAYS) {
260		pf_syncookie_newkey();
261		V_pf_status.syncookies_active = true;
262	}
263	return (0);
264}
265
266int
267pf_synflood_check(struct pf_pdesc *pd)
268{
269	MPASS(pd->proto == IPPROTO_TCP);
270	PF_RULES_RASSERT();
271
272	if (pd->pf_mtag && (pd->pf_mtag->flags & PF_MTAG_FLAG_SYNCOOKIE_RECREATED))
273		return (0);
274
275	if (V_pf_status.syncookies_mode != PF_SYNCOOKIES_ADAPTIVE)
276		return (V_pf_status.syncookies_mode);
277
278	if (!V_pf_status.syncookies_active &&
279	    atomic_load_32(&V_pf_status.states_halfopen) >
280	    V_pf_syncookie_status.hiwat) {
281		/* We'd want to 'pf_syncookie_newkey()' here, but that requires
282		 * the rules write lock, which we can't get with the read lock
283		 * held. */
284		callout_reset(&V_pf_syncookie_status.keytimeout, 0,
285		    pf_syncookie_rotate, curvnet);
286		V_pf_status.syncookies_active = true;
287		DPFPRINTF(LOG_WARNING,
288		    ("synflood detected, enabling syncookies\n"));
289		// XXXTODO V_pf_status.lcounters[LCNT_SYNFLOODS]++;
290	}
291
292	return (V_pf_status.syncookies_active);
293}
294
295void
296pf_syncookie_send(struct mbuf *m, int off, struct pf_pdesc *pd)
297{
298	uint16_t	mss;
299	uint32_t	iss;
300
301	mss = max(V_tcp_mssdflt, pf_get_mss(m, off, pd->hdr.tcp.th_off, pd->af));
302	iss = pf_syncookie_generate(m, off, pd, mss);
303	pf_send_tcp(NULL, pd->af, pd->dst, pd->src, *pd->dport, *pd->sport,
304	    iss, ntohl(pd->hdr.tcp.th_seq) + 1, TH_SYN|TH_ACK, 0, mss,
305	    0, true, 0, 0, pd->act.rtableid);
306	counter_u64_add(V_pf_status.lcounters[KLCNT_SYNCOOKIES_SENT], 1);
307	/* XXX Maybe only in adaptive mode? */
308	atomic_add_64(&V_pf_status.syncookies_inflight[V_pf_syncookie_status.oddeven],
309	    1);
310}
311
312bool
313pf_syncookie_check(struct pf_pdesc *pd)
314{
315	uint32_t		 hash, ack, seq;
316	union pf_syncookie	 cookie;
317
318	MPASS(pd->proto == IPPROTO_TCP);
319	PF_RULES_RASSERT();
320
321	seq = ntohl(pd->hdr.tcp.th_seq) - 1;
322	ack = ntohl(pd->hdr.tcp.th_ack) - 1;
323	cookie.cookie = (ack & 0xff) ^ (ack >> 24);
324
325	/* we don't know oddeven before setting the cookie (union) */
326	if (atomic_load_64(&V_pf_status.syncookies_inflight[cookie.flags.oddeven])
327	    == 0)
328		return (0);
329
330	hash = pf_syncookie_mac(pd, cookie, seq);
331	if ((ack & ~0xff) != (hash & ~0xff))
332		return (false);
333
334	return (true);
335}
336
337uint8_t
338pf_syncookie_validate(struct pf_pdesc *pd)
339{
340	uint32_t		 ack;
341	union pf_syncookie	 cookie;
342
343	if (! pf_syncookie_check(pd))
344		return (0);
345
346	ack = ntohl(pd->hdr.tcp.th_ack) - 1;
347	cookie.cookie = (ack & 0xff) ^ (ack >> 24);
348
349	counter_u64_add(V_pf_status.lcounters[KLCNT_SYNCOOKIES_VALID], 1);
350	atomic_add_64(&V_pf_status.syncookies_inflight[cookie.flags.oddeven], -1);
351
352	return (1);
353}
354
355/*
356 * all following functions private
357 */
358void
359pf_syncookie_rotate(void *arg)
360{
361	CURVNET_SET((struct vnet *)arg);
362
363	/* do we want to disable syncookies? */
364	if (V_pf_status.syncookies_active &&
365	    ((V_pf_status.syncookies_mode == PF_SYNCOOKIES_ADAPTIVE &&
366	    (atomic_load_32(&V_pf_status.states_halfopen) +
367	    atomic_load_64(&V_pf_status.syncookies_inflight[0]) +
368	    atomic_load_64(&V_pf_status.syncookies_inflight[1])) <
369	    V_pf_syncookie_status.lowat) ||
370	    V_pf_status.syncookies_mode == PF_SYNCOOKIES_NEVER)
371			) {
372		V_pf_status.syncookies_active = false;
373		DPFPRINTF(PF_DEBUG_MISC, ("syncookies disabled\n"));
374	}
375
376	/* nothing in flight any more? delete keys and return */
377	if (!V_pf_status.syncookies_active &&
378	    atomic_load_64(&V_pf_status.syncookies_inflight[0]) == 0 &&
379	    atomic_load_64(&V_pf_status.syncookies_inflight[1]) == 0) {
380		memset(V_pf_syncookie_status.key[0], 0,
381		    PF_SYNCOOKIE_SECRET_SIZE);
382		memset(V_pf_syncookie_status.key[1], 0,
383		    PF_SYNCOOKIE_SECRET_SIZE);
384		CURVNET_RESTORE();
385		return;
386	}
387
388	PF_RULES_WLOCK();
389	/* new key, including timeout */
390	pf_syncookie_newkey();
391	PF_RULES_WUNLOCK();
392
393	CURVNET_RESTORE();
394}
395
396void
397pf_syncookie_newkey(void)
398{
399	PF_RULES_WASSERT();
400
401	MPASS(V_pf_syncookie_status.oddeven < 2);
402	V_pf_syncookie_status.oddeven = (V_pf_syncookie_status.oddeven + 1) & 0x1;
403	atomic_store_64(&V_pf_status.syncookies_inflight[V_pf_syncookie_status.oddeven], 0);
404	arc4random_buf(V_pf_syncookie_status.key[V_pf_syncookie_status.oddeven],
405	    PF_SYNCOOKIE_SECRET_SIZE);
406	callout_reset(&V_pf_syncookie_status.keytimeout,
407	    PF_SYNCOOKIE_SECRET_LIFETIME * hz, pf_syncookie_rotate, curvnet);
408}
409
410/*
411 * Distribution and probability of certain MSS values.  Those in between are
412 * rounded down to the next lower one.
413 * [An Analysis of TCP Maximum Segment Sizes, S. Alcock and R. Nelson, 2011]
414 *   .2%  .3%   5%    7%    7%    20%   15%   45%
415 */
416static int pf_syncookie_msstab[] =
417    { 216, 536, 1200, 1360, 1400, 1440, 1452, 1460 };
418
419/*
420 * Distribution and probability of certain WSCALE values.
421 * The absence of the WSCALE option is encoded with index zero.
422 * [WSCALE values histograms, Allman, 2012]
423 *                                  X 10 10 35  5  6 14 10%   by host
424 *                                  X 11  4  5  5 18 49  3%   by connections
425 */
426static int pf_syncookie_wstab[] = { 0, 0, 1, 2, 4, 6, 7, 8 };
427
428uint32_t
429pf_syncookie_mac(struct pf_pdesc *pd, union pf_syncookie cookie, uint32_t seq)
430{
431	SIPHASH_CTX	ctx;
432	uint32_t	siphash[2];
433
434	PF_RULES_RASSERT();
435	MPASS(pd->proto == IPPROTO_TCP);
436
437	SipHash24_Init(&ctx);
438	SipHash_SetKey(&ctx, V_pf_syncookie_status.key[cookie.flags.oddeven]);
439
440	switch (pd->af) {
441	case AF_INET:
442		SipHash_Update(&ctx, pd->src, sizeof(pd->src->v4));
443		SipHash_Update(&ctx, pd->dst, sizeof(pd->dst->v4));
444		break;
445	case AF_INET6:
446		SipHash_Update(&ctx, pd->src, sizeof(pd->src->v6));
447		SipHash_Update(&ctx, pd->dst, sizeof(pd->dst->v6));
448		break;
449	default:
450		panic("unknown address family");
451	}
452
453	SipHash_Update(&ctx, pd->sport, sizeof(*pd->sport));
454	SipHash_Update(&ctx, pd->dport, sizeof(*pd->dport));
455	SipHash_Update(&ctx, &seq, sizeof(seq));
456	SipHash_Update(&ctx, &cookie, sizeof(cookie));
457	SipHash_Final((uint8_t *)&siphash, &ctx);
458
459	return (siphash[0] ^ siphash[1]);
460}
461
462uint32_t
463pf_syncookie_generate(struct mbuf *m, int off, struct pf_pdesc *pd,
464    uint16_t mss)
465{
466	uint8_t			 i, wscale;
467	uint32_t		 iss, hash;
468	union pf_syncookie	 cookie;
469
470	PF_RULES_RASSERT();
471
472	cookie.cookie = 0;
473
474	/* map MSS */
475	for (i = nitems(pf_syncookie_msstab) - 1;
476	    pf_syncookie_msstab[i] > mss && i > 0; i--)
477		/* nada */;
478	cookie.flags.mss_idx = i;
479
480	/* map WSCALE */
481	wscale = pf_get_wscale(m, off, pd->hdr.tcp.th_off, pd->af);
482	for (i = nitems(pf_syncookie_wstab) - 1;
483	    pf_syncookie_wstab[i] > wscale && i > 0; i--)
484		/* nada */;
485	cookie.flags.wscale_idx = i;
486	cookie.flags.sack_ok = 0;	/* XXX */
487
488	cookie.flags.oddeven = V_pf_syncookie_status.oddeven;
489	hash = pf_syncookie_mac(pd, cookie, ntohl(pd->hdr.tcp.th_seq));
490
491	/*
492	 * Put the flags into the hash and XOR them to get better ISS number
493	 * variance.  This doesn't enhance the cryptographic strength and is
494	 * done to prevent the 8 cookie bits from showing up directly on the
495	 * wire.
496	 */
497	iss = hash & ~0xff;
498	iss |= cookie.cookie ^ (hash >> 24);
499
500	return (iss);
501}
502
503struct mbuf *
504pf_syncookie_recreate_syn(uint8_t ttl, int off, struct pf_pdesc *pd)
505{
506	uint8_t			 wscale;
507	uint16_t		 mss;
508	uint32_t		 ack, seq;
509	union pf_syncookie	 cookie;
510
511	seq = ntohl(pd->hdr.tcp.th_seq) - 1;
512	ack = ntohl(pd->hdr.tcp.th_ack) - 1;
513	cookie.cookie = (ack & 0xff) ^ (ack >> 24);
514
515	if (cookie.flags.mss_idx >= nitems(pf_syncookie_msstab) ||
516	    cookie.flags.wscale_idx >= nitems(pf_syncookie_wstab))
517		return (NULL);
518
519	mss = pf_syncookie_msstab[cookie.flags.mss_idx];
520	wscale = pf_syncookie_wstab[cookie.flags.wscale_idx];
521
522	return (pf_build_tcp(NULL, pd->af, pd->src, pd->dst, *pd->sport,
523	    *pd->dport, seq, 0, TH_SYN, wscale, mss, ttl, false, 0,
524	    PF_MTAG_FLAG_SYNCOOKIE_RECREATED, pd->act.rtableid));
525}
526