1/*-
2 * ng_tcpmss.c
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 *
6 * Copyright (c) 2004, Alexey Popov <lollypop@flexuser.ru>
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice unmodified, this list of conditions, and the following
14 *    disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 * This software includes fragments of the following programs:
32 *	tcpmssd		Ruslan Ermilov <ru@FreeBSD.org>
33 */
34
35/*
36 * This node is netgraph tool for workaround of PMTUD problem. It acts
37 * like filter for IP packets. If configured, it reduces MSS of TCP SYN
38 * packets.
39 *
40 * Configuration can be done by sending NGM_TCPMSS_CONFIG message. The
41 * message sets filter for incoming packets on hook 'inHook'. Packet's
42 * TCP MSS field is lowered to 'maxMSS' parameter and resulting packet
43 * is sent to 'outHook'.
44 *
45 * XXX: statistics are updated not atomically, so they may broke on SMP.
46 */
47
48#include <sys/param.h>
49#include <sys/systm.h>
50#include <sys/endian.h>
51#include <sys/errno.h>
52#include <sys/kernel.h>
53#include <sys/malloc.h>
54#include <sys/mbuf.h>
55
56#include <netinet/in.h>
57#include <netinet/in_systm.h>
58#include <netinet/ip.h>
59#include <netinet/tcp.h>
60
61#include <netgraph/ng_message.h>
62#include <netgraph/netgraph.h>
63#include <netgraph/ng_parse.h>
64#include <netgraph/ng_tcpmss.h>
65
66#ifdef NG_SEPARATE_MALLOC
67static MALLOC_DEFINE(M_NETGRAPH_TCPMSS, "netgraph_tcpmss", "netgraph tcpmss node");
68#else
69#define M_NETGRAPH_TCPMSS M_NETGRAPH
70#endif
71
72/* Per hook info. */
73typedef struct {
74	hook_p				outHook;
75	struct ng_tcpmss_hookstat	stats;
76} *hpriv_p;
77
78/* Netgraph methods. */
79static ng_constructor_t	ng_tcpmss_constructor;
80static ng_rcvmsg_t	ng_tcpmss_rcvmsg;
81static ng_newhook_t	ng_tcpmss_newhook;
82static ng_rcvdata_t	ng_tcpmss_rcvdata;
83static ng_disconnect_t	ng_tcpmss_disconnect;
84
85static int correct_mss(struct tcphdr *, int, uint16_t, int);
86
87/* Parse type for struct ng_tcpmss_hookstat. */
88static const struct ng_parse_struct_field ng_tcpmss_hookstat_type_fields[]
89	= NG_TCPMSS_HOOKSTAT_INFO;
90static const struct ng_parse_type ng_tcpmss_hookstat_type = {
91	&ng_parse_struct_type,
92	&ng_tcpmss_hookstat_type_fields
93};
94
95/* Parse type for struct ng_tcpmss_config. */
96static const struct ng_parse_struct_field ng_tcpmss_config_type_fields[]
97	= NG_TCPMSS_CONFIG_INFO;
98static const struct ng_parse_type ng_tcpmss_config_type = {
99	&ng_parse_struct_type,
100	ng_tcpmss_config_type_fields
101};
102
103/* List of commands and how to convert arguments to/from ASCII. */
104static const struct ng_cmdlist ng_tcpmss_cmds[] = {
105	{
106	  NGM_TCPMSS_COOKIE,
107	  NGM_TCPMSS_GET_STATS,
108	  "getstats",
109	  &ng_parse_hookbuf_type,
110	  &ng_tcpmss_hookstat_type
111	},
112	{
113	  NGM_TCPMSS_COOKIE,
114	  NGM_TCPMSS_CLR_STATS,
115	  "clrstats",
116	  &ng_parse_hookbuf_type,
117	  NULL
118	},
119	{
120	  NGM_TCPMSS_COOKIE,
121	  NGM_TCPMSS_GETCLR_STATS,
122	  "getclrstats",
123	  &ng_parse_hookbuf_type,
124	  &ng_tcpmss_hookstat_type
125	},
126	{
127	  NGM_TCPMSS_COOKIE,
128	  NGM_TCPMSS_CONFIG,
129	  "config",
130	  &ng_tcpmss_config_type,
131	  NULL
132	},
133	{ 0 }
134};
135
136/* Netgraph type descriptor. */
137static struct ng_type ng_tcpmss_typestruct = {
138	.version =	NG_ABI_VERSION,
139	.name =		NG_TCPMSS_NODE_TYPE,
140	.constructor =	ng_tcpmss_constructor,
141	.rcvmsg =	ng_tcpmss_rcvmsg,
142	.newhook =	ng_tcpmss_newhook,
143	.rcvdata =	ng_tcpmss_rcvdata,
144	.disconnect =	ng_tcpmss_disconnect,
145	.cmdlist =	ng_tcpmss_cmds,
146};
147
148NETGRAPH_INIT(tcpmss, &ng_tcpmss_typestruct);
149#define	ERROUT(x)	{ error = (x); goto done; }
150
151/*
152 * Node constructor. No special actions required.
153 */
154static int
155ng_tcpmss_constructor(node_p node)
156{
157	return (0);
158}
159
160/*
161 * Add a hook. Any unique name is OK.
162 */
163static int
164ng_tcpmss_newhook(node_p node, hook_p hook, const char *name)
165{
166	hpriv_p priv;
167
168	priv = malloc(sizeof(*priv), M_NETGRAPH_TCPMSS, M_NOWAIT | M_ZERO);
169	if (priv == NULL)
170		return (ENOMEM);
171
172	NG_HOOK_SET_PRIVATE(hook, priv);
173
174	return (0);
175}
176
177/*
178 * Receive a control message.
179 */
180static int
181ng_tcpmss_rcvmsg
182(node_p node, item_p item, hook_p lasthook)
183{
184	struct ng_mesg *msg, *resp = NULL;
185	int error = 0;
186
187	NGI_GET_MSG(item, msg);
188
189	switch (msg->header.typecookie) {
190	case NGM_TCPMSS_COOKIE:
191		switch (msg->header.cmd) {
192		case NGM_TCPMSS_GET_STATS:
193		case NGM_TCPMSS_CLR_STATS:
194		case NGM_TCPMSS_GETCLR_STATS:
195		    {
196			hook_p hook;
197			hpriv_p priv;
198
199			/* Check that message is long enough. */
200			if (msg->header.arglen != NG_HOOKSIZ)
201				ERROUT(EINVAL);
202
203			/* Find this hook. */
204			hook = ng_findhook(node, (char *)msg->data);
205			if (hook == NULL)
206				ERROUT(ENOENT);
207
208			priv = NG_HOOK_PRIVATE(hook);
209
210			/* Create response. */
211			if (msg->header.cmd != NGM_TCPMSS_CLR_STATS) {
212				NG_MKRESPONSE(resp, msg,
213				    sizeof(struct ng_tcpmss_hookstat), M_NOWAIT);
214				if (resp == NULL)
215					ERROUT(ENOMEM);
216				bcopy(&priv->stats, resp->data,
217				    sizeof(struct ng_tcpmss_hookstat));
218			}
219
220			if (msg->header.cmd != NGM_TCPMSS_GET_STATS)
221				bzero(&priv->stats,
222				    sizeof(struct ng_tcpmss_hookstat));
223			break;
224		    }
225		case NGM_TCPMSS_CONFIG:
226		    {
227			struct ng_tcpmss_config *set;
228			hook_p in, out;
229			hpriv_p priv;
230
231			/* Check that message is long enough. */
232			if (msg->header.arglen !=
233			    sizeof(struct ng_tcpmss_config))
234				ERROUT(EINVAL);
235
236			set = (struct ng_tcpmss_config *)msg->data;
237			in = ng_findhook(node, set->inHook);
238			out = ng_findhook(node, set->outHook);
239			if (in == NULL || out == NULL)
240				ERROUT(ENOENT);
241
242			/* Configure MSS hack. */
243			priv = NG_HOOK_PRIVATE(in);
244			priv->outHook = out;
245			priv->stats.maxMSS = set->maxMSS;
246
247			break;
248 		    }
249		default:
250			error = EINVAL;
251			break;
252		}
253		break;
254	default:
255		error = EINVAL;
256		break;
257	}
258
259done:
260	NG_RESPOND_MSG(error, node, item, resp);
261	NG_FREE_MSG(msg);
262
263	return (error);
264}
265
266/*
267 * Receive data on a hook, and hack MSS.
268 *
269 */
270static int
271ng_tcpmss_rcvdata(hook_p hook, item_p item)
272{
273	hpriv_p priv = NG_HOOK_PRIVATE(hook);
274	struct mbuf *m = NULL;
275	struct ip *ip;
276	struct tcphdr *tcp;
277	int iphlen, tcphlen, pktlen;
278	int pullup_len = 0;
279	int error = 0;
280
281	/* Drop packets if filter is not configured on this hook. */
282	if (priv->outHook == NULL)
283		goto done;
284
285	NGI_GET_M(item, m);
286
287	/* Update stats on incoming hook. */
288	pktlen = m->m_pkthdr.len;
289	priv->stats.Octets += pktlen;
290	priv->stats.Packets++;
291
292	/* Check whether we configured to fix MSS. */
293	if (priv->stats.maxMSS == 0)
294		goto send;
295
296#define	M_CHECK(length) do {					\
297	pullup_len += length;					\
298	if ((m)->m_pkthdr.len < pullup_len)			\
299		goto send;					\
300	if ((m)->m_len < pullup_len &&				\
301	   (((m) = m_pullup((m), pullup_len)) == NULL))		\
302		ERROUT(ENOBUFS);				\
303	} while (0)
304
305	/* Check mbuf packet size and arrange for IP header. */
306	M_CHECK(sizeof(struct ip));
307	ip = mtod(m, struct ip *);
308
309	/* Check IP version. */
310	if (ip->ip_v != IPVERSION)
311		ERROUT(EINVAL);
312
313	/* Check IP header length. */
314	iphlen = ip->ip_hl << 2;
315	if (iphlen < sizeof(struct ip) || iphlen > pktlen )
316		ERROUT(EINVAL);
317
318        /* Check if it is TCP. */
319	if (!(ip->ip_p == IPPROTO_TCP))
320		goto send;
321
322	/* Check mbuf packet size and arrange for IP+TCP header */
323	M_CHECK(iphlen - sizeof(struct ip) + sizeof(struct tcphdr));
324	ip = mtod(m, struct ip *);
325	tcp = (struct tcphdr *)((caddr_t )ip + iphlen);
326
327	/* Check TCP header length. */
328	tcphlen = tcp->th_off << 2;
329	if (tcphlen < sizeof(struct tcphdr) || tcphlen > pktlen - iphlen)
330		ERROUT(EINVAL);
331
332	/* Check SYN packet and has options. */
333	if (!(tcp->th_flags & TH_SYN) || tcphlen == sizeof(struct tcphdr))
334		goto send;
335
336	/* Update SYN stats. */
337	priv->stats.SYNPkts++;
338
339	M_CHECK(tcphlen - sizeof(struct tcphdr));
340	ip = mtod(m, struct ip *);
341	tcp = (struct tcphdr *)((caddr_t )ip + iphlen);
342
343#undef	M_CHECK
344
345	/* Fix MSS and update stats. */
346	if (correct_mss(tcp, tcphlen, priv->stats.maxMSS,
347	    m->m_pkthdr.csum_flags))
348		priv->stats.FixedPkts++;
349
350send:
351	/* Deliver frame out destination hook. */
352	NG_FWD_NEW_DATA(error, item, priv->outHook, m);
353
354	return (error);
355
356done:
357	NG_FREE_ITEM(item);
358	NG_FREE_M(m);
359
360	return (error);
361}
362
363/*
364 * Hook disconnection.
365 * We must check all hooks, since they may reference this one.
366 */
367static int
368ng_tcpmss_disconnect(hook_p hook)
369{
370	node_p node = NG_HOOK_NODE(hook);
371	hook_p hook2;
372
373	LIST_FOREACH(hook2, &node->nd_hooks, hk_hooks) {
374		hpriv_p priv = NG_HOOK_PRIVATE(hook2);
375
376		if (priv->outHook == hook)
377			priv->outHook = NULL;
378	}
379
380	free(NG_HOOK_PRIVATE(hook), M_NETGRAPH_TCPMSS);
381
382	if (NG_NODE_NUMHOOKS(NG_HOOK_NODE(hook)) == 0)
383		ng_rmnode_self(NG_HOOK_NODE(hook));
384
385	return (0);
386}
387
388/*
389 * Code from tcpmssd.
390 */
391
392/*-
393 * The following macro is used to update an
394 * internet checksum.  "acc" is a 32-bit
395 * accumulation of all the changes to the
396 * checksum (adding in old 16-bit words and
397 * subtracting out new words), and "cksum"
398 * is the checksum value to be updated.
399 */
400#define TCPMSS_ADJUST_CHECKSUM(acc, cksum) do {		\
401	acc += cksum;					\
402	if (acc < 0) {					\
403		acc = -acc;				\
404		acc = (acc >> 16) + (acc & 0xffff);	\
405		acc += acc >> 16;			\
406		cksum = (u_short) ~acc;			\
407	} else {					\
408		acc = (acc >> 16) + (acc & 0xffff);	\
409		acc += acc >> 16;			\
410		cksum = (u_short) acc;			\
411	}						\
412} while (0);
413
414static int
415correct_mss(struct tcphdr *tc, int hlen, uint16_t maxmss, int flags)
416{
417	int olen, optlen;
418	u_char *opt;
419	int accumulate;
420	int res = 0;
421	uint16_t sum;
422
423	for (olen = hlen - sizeof(struct tcphdr), opt = (u_char *)(tc + 1);
424	     olen > 0; olen -= optlen, opt += optlen) {
425		if (*opt == TCPOPT_EOL)
426			break;
427		else if (*opt == TCPOPT_NOP)
428			optlen = 1;
429		else {
430			optlen = *(opt + 1);
431			if (optlen <= 0 || optlen > olen)
432				break;
433			if (*opt == TCPOPT_MAXSEG) {
434				if (optlen != TCPOLEN_MAXSEG)
435					continue;
436				accumulate = be16dec(opt + 2);
437				if (accumulate > maxmss) {
438					if ((flags & CSUM_TCP) == 0) {
439						accumulate -= maxmss;
440						sum = be16dec(&tc->th_sum);
441						TCPMSS_ADJUST_CHECKSUM(accumulate, sum);
442						be16enc(&tc->th_sum, sum);
443					}
444					be16enc(opt + 2, maxmss);
445					res = 1;
446				}
447			}
448		}
449	}
450	return (res);
451}
452