1// SPDX-License-Identifier: GPL-2.0
2/* Copyright (c) 2020 Facebook */
3
4#include <stddef.h>
5#include <errno.h>
6#include <stdbool.h>
7#include <sys/types.h>
8#include <sys/socket.h>
9#include <linux/tcp.h>
10#include <linux/socket.h>
11#include <linux/bpf.h>
12#include <linux/types.h>
13#include <bpf/bpf_helpers.h>
14#include <bpf/bpf_endian.h>
15#define BPF_PROG_TEST_TCP_HDR_OPTIONS
16#include "test_tcp_hdr_options.h"
17
18#ifndef sizeof_field
19#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
20#endif
21
22__u8 test_kind = TCPOPT_EXP;
23__u16 test_magic = 0xeB9F;
24__u32 inherit_cb_flags = 0;
25
26struct bpf_test_option passive_synack_out = {};
27struct bpf_test_option passive_fin_out	= {};
28
29struct bpf_test_option passive_estab_in = {};
30struct bpf_test_option passive_fin_in	= {};
31
32struct bpf_test_option active_syn_out	= {};
33struct bpf_test_option active_fin_out	= {};
34
35struct bpf_test_option active_estab_in	= {};
36struct bpf_test_option active_fin_in	= {};
37
38struct {
39	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
40	__uint(map_flags, BPF_F_NO_PREALLOC);
41	__type(key, int);
42	__type(value, struct hdr_stg);
43} hdr_stg_map SEC(".maps");
44
45static bool skops_want_cookie(const struct bpf_sock_ops *skops)
46{
47	return skops->args[0] == BPF_WRITE_HDR_TCP_SYNACK_COOKIE;
48}
49
50static bool skops_current_mss(const struct bpf_sock_ops *skops)
51{
52	return skops->args[0] == BPF_WRITE_HDR_TCP_CURRENT_MSS;
53}
54
55static __u8 option_total_len(__u8 flags)
56{
57	__u8 i, len = 1; /* +1 for flags */
58
59	if (!flags)
60		return 0;
61
62	/* RESEND bit does not use a byte */
63	for (i = OPTION_RESEND + 1; i < __NR_OPTION_FLAGS; i++)
64		len += !!TEST_OPTION_FLAGS(flags, i);
65
66	if (test_kind == TCPOPT_EXP)
67		return len + TCP_BPF_EXPOPT_BASE_LEN;
68	else
69		return len + 2; /* +1 kind, +1 kind-len */
70}
71
72static void write_test_option(const struct bpf_test_option *test_opt,
73			      __u8 *data)
74{
75	__u8 offset = 0;
76
77	data[offset++] = test_opt->flags;
78	if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_MAX_DELACK_MS))
79		data[offset++] = test_opt->max_delack_ms;
80
81	if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_RAND))
82		data[offset++] = test_opt->rand;
83}
84
85static int store_option(struct bpf_sock_ops *skops,
86			const struct bpf_test_option *test_opt)
87{
88	union {
89		struct tcp_exprm_opt exprm;
90		struct tcp_opt regular;
91	} write_opt;
92	int err;
93
94	if (test_kind == TCPOPT_EXP) {
95		write_opt.exprm.kind = TCPOPT_EXP;
96		write_opt.exprm.len = option_total_len(test_opt->flags);
97		write_opt.exprm.magic = __bpf_htons(test_magic);
98		write_opt.exprm.data32 = 0;
99		write_test_option(test_opt, write_opt.exprm.data);
100		err = bpf_store_hdr_opt(skops, &write_opt.exprm,
101					sizeof(write_opt.exprm), 0);
102	} else {
103		write_opt.regular.kind = test_kind;
104		write_opt.regular.len = option_total_len(test_opt->flags);
105		write_opt.regular.data32 = 0;
106		write_test_option(test_opt, write_opt.regular.data);
107		err = bpf_store_hdr_opt(skops, &write_opt.regular,
108					sizeof(write_opt.regular), 0);
109	}
110
111	if (err)
112		RET_CG_ERR(err);
113
114	return CG_OK;
115}
116
117static int parse_test_option(struct bpf_test_option *opt, const __u8 *start)
118{
119	opt->flags = *start++;
120
121	if (TEST_OPTION_FLAGS(opt->flags, OPTION_MAX_DELACK_MS))
122		opt->max_delack_ms = *start++;
123
124	if (TEST_OPTION_FLAGS(opt->flags, OPTION_RAND))
125		opt->rand = *start++;
126
127	return 0;
128}
129
130static int load_option(struct bpf_sock_ops *skops,
131		       struct bpf_test_option *test_opt, bool from_syn)
132{
133	union {
134		struct tcp_exprm_opt exprm;
135		struct tcp_opt regular;
136	} search_opt;
137	int ret, load_flags = from_syn ? BPF_LOAD_HDR_OPT_TCP_SYN : 0;
138
139	if (test_kind == TCPOPT_EXP) {
140		search_opt.exprm.kind = TCPOPT_EXP;
141		search_opt.exprm.len = 4;
142		search_opt.exprm.magic = __bpf_htons(test_magic);
143		search_opt.exprm.data32 = 0;
144		ret = bpf_load_hdr_opt(skops, &search_opt.exprm,
145				       sizeof(search_opt.exprm), load_flags);
146		if (ret < 0)
147			return ret;
148		return parse_test_option(test_opt, search_opt.exprm.data);
149	} else {
150		search_opt.regular.kind = test_kind;
151		search_opt.regular.len = 0;
152		search_opt.regular.data32 = 0;
153		ret = bpf_load_hdr_opt(skops, &search_opt.regular,
154				       sizeof(search_opt.regular), load_flags);
155		if (ret < 0)
156			return ret;
157		return parse_test_option(test_opt, search_opt.regular.data);
158	}
159}
160
161static int synack_opt_len(struct bpf_sock_ops *skops)
162{
163	struct bpf_test_option test_opt = {};
164	__u8 optlen;
165	int err;
166
167	if (!passive_synack_out.flags)
168		return CG_OK;
169
170	err = load_option(skops, &test_opt, true);
171
172	/* bpf_test_option is not found */
173	if (err == -ENOMSG)
174		return CG_OK;
175
176	if (err)
177		RET_CG_ERR(err);
178
179	optlen = option_total_len(passive_synack_out.flags);
180	if (optlen) {
181		err = bpf_reserve_hdr_opt(skops, optlen, 0);
182		if (err)
183			RET_CG_ERR(err);
184	}
185
186	return CG_OK;
187}
188
189static int write_synack_opt(struct bpf_sock_ops *skops)
190{
191	struct bpf_test_option opt;
192
193	if (!passive_synack_out.flags)
194		/* We should not even be called since no header
195		 * space has been reserved.
196		 */
197		RET_CG_ERR(0);
198
199	opt = passive_synack_out;
200	if (skops_want_cookie(skops))
201		SET_OPTION_FLAGS(opt.flags, OPTION_RESEND);
202
203	return store_option(skops, &opt);
204}
205
206static int syn_opt_len(struct bpf_sock_ops *skops)
207{
208	__u8 optlen;
209	int err;
210
211	if (!active_syn_out.flags)
212		return CG_OK;
213
214	optlen = option_total_len(active_syn_out.flags);
215	if (optlen) {
216		err = bpf_reserve_hdr_opt(skops, optlen, 0);
217		if (err)
218			RET_CG_ERR(err);
219	}
220
221	return CG_OK;
222}
223
224static int write_syn_opt(struct bpf_sock_ops *skops)
225{
226	if (!active_syn_out.flags)
227		RET_CG_ERR(0);
228
229	return store_option(skops, &active_syn_out);
230}
231
232static int fin_opt_len(struct bpf_sock_ops *skops)
233{
234	struct bpf_test_option *opt;
235	struct hdr_stg *hdr_stg;
236	__u8 optlen;
237	int err;
238
239	if (!skops->sk)
240		RET_CG_ERR(0);
241
242	hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
243	if (!hdr_stg)
244		RET_CG_ERR(0);
245
246	if (hdr_stg->active)
247		opt = &active_fin_out;
248	else
249		opt = &passive_fin_out;
250
251	optlen = option_total_len(opt->flags);
252	if (optlen) {
253		err = bpf_reserve_hdr_opt(skops, optlen, 0);
254		if (err)
255			RET_CG_ERR(err);
256	}
257
258	return CG_OK;
259}
260
261static int write_fin_opt(struct bpf_sock_ops *skops)
262{
263	struct bpf_test_option *opt;
264	struct hdr_stg *hdr_stg;
265
266	if (!skops->sk)
267		RET_CG_ERR(0);
268
269	hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
270	if (!hdr_stg)
271		RET_CG_ERR(0);
272
273	if (hdr_stg->active)
274		opt = &active_fin_out;
275	else
276		opt = &passive_fin_out;
277
278	if (!opt->flags)
279		RET_CG_ERR(0);
280
281	return store_option(skops, opt);
282}
283
284static int resend_in_ack(struct bpf_sock_ops *skops)
285{
286	struct hdr_stg *hdr_stg;
287
288	if (!skops->sk)
289		return -1;
290
291	hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
292	if (!hdr_stg)
293		return -1;
294
295	return !!hdr_stg->resend_syn;
296}
297
298static int nodata_opt_len(struct bpf_sock_ops *skops)
299{
300	int resend;
301
302	resend = resend_in_ack(skops);
303	if (resend < 0)
304		RET_CG_ERR(0);
305
306	if (resend)
307		return syn_opt_len(skops);
308
309	return CG_OK;
310}
311
312static int write_nodata_opt(struct bpf_sock_ops *skops)
313{
314	int resend;
315
316	resend = resend_in_ack(skops);
317	if (resend < 0)
318		RET_CG_ERR(0);
319
320	if (resend)
321		return write_syn_opt(skops);
322
323	return CG_OK;
324}
325
326static int data_opt_len(struct bpf_sock_ops *skops)
327{
328	/* Same as the nodata version.  Mostly to show
329	 * an example usage on skops->skb_len.
330	 */
331	return nodata_opt_len(skops);
332}
333
334static int write_data_opt(struct bpf_sock_ops *skops)
335{
336	return write_nodata_opt(skops);
337}
338
339static int current_mss_opt_len(struct bpf_sock_ops *skops)
340{
341	/* Reserve maximum that may be needed */
342	int err;
343
344	err = bpf_reserve_hdr_opt(skops, option_total_len(OPTION_MASK), 0);
345	if (err)
346		RET_CG_ERR(err);
347
348	return CG_OK;
349}
350
351static int handle_hdr_opt_len(struct bpf_sock_ops *skops)
352{
353	__u8 tcp_flags = skops_tcp_flags(skops);
354
355	if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK)
356		return synack_opt_len(skops);
357
358	if (tcp_flags & TCPHDR_SYN)
359		return syn_opt_len(skops);
360
361	if (tcp_flags & TCPHDR_FIN)
362		return fin_opt_len(skops);
363
364	if (skops_current_mss(skops))
365		/* The kernel is calculating the MSS */
366		return current_mss_opt_len(skops);
367
368	if (skops->skb_len)
369		return data_opt_len(skops);
370
371	return nodata_opt_len(skops);
372}
373
374static int handle_write_hdr_opt(struct bpf_sock_ops *skops)
375{
376	__u8 tcp_flags = skops_tcp_flags(skops);
377	struct tcphdr *th;
378
379	if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK)
380		return write_synack_opt(skops);
381
382	if (tcp_flags & TCPHDR_SYN)
383		return write_syn_opt(skops);
384
385	if (tcp_flags & TCPHDR_FIN)
386		return write_fin_opt(skops);
387
388	th = skops->skb_data;
389	if (th + 1 > skops->skb_data_end)
390		RET_CG_ERR(0);
391
392	if (skops->skb_len > tcp_hdrlen(th))
393		return write_data_opt(skops);
394
395	return write_nodata_opt(skops);
396}
397
398static int set_delack_max(struct bpf_sock_ops *skops, __u8 max_delack_ms)
399{
400	__u32 max_delack_us = max_delack_ms * 1000;
401
402	return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_DELACK_MAX,
403			      &max_delack_us, sizeof(max_delack_us));
404}
405
406static int set_rto_min(struct bpf_sock_ops *skops, __u8 peer_max_delack_ms)
407{
408	__u32 min_rto_us = peer_max_delack_ms * 1000;
409
410	return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_RTO_MIN, &min_rto_us,
411			      sizeof(min_rto_us));
412}
413
414static int handle_active_estab(struct bpf_sock_ops *skops)
415{
416	struct hdr_stg init_stg = {
417		.active = true,
418	};
419	int err;
420
421	err = load_option(skops, &active_estab_in, false);
422	if (err && err != -ENOMSG)
423		RET_CG_ERR(err);
424
425	init_stg.resend_syn = TEST_OPTION_FLAGS(active_estab_in.flags,
426						OPTION_RESEND);
427	if (!skops->sk || !bpf_sk_storage_get(&hdr_stg_map, skops->sk,
428					      &init_stg,
429					      BPF_SK_STORAGE_GET_F_CREATE))
430		RET_CG_ERR(0);
431
432	if (init_stg.resend_syn)
433		/* Don't clear the write_hdr cb now because
434		 * the ACK may get lost and retransmit may
435		 * be needed.
436		 *
437		 * PARSE_ALL_HDR cb flag is set to learn if this
438		 * resend_syn option has received by the peer.
439		 *
440		 * The header option will be resent until a valid
441		 * packet is received at handle_parse_hdr()
442		 * and all hdr cb flags will be cleared in
443		 * handle_parse_hdr().
444		 */
445		set_parse_all_hdr_cb_flags(skops);
446	else if (!active_fin_out.flags)
447		/* No options will be written from now */
448		clear_hdr_cb_flags(skops);
449
450	if (active_syn_out.max_delack_ms) {
451		err = set_delack_max(skops, active_syn_out.max_delack_ms);
452		if (err)
453			RET_CG_ERR(err);
454	}
455
456	if (active_estab_in.max_delack_ms) {
457		err = set_rto_min(skops, active_estab_in.max_delack_ms);
458		if (err)
459			RET_CG_ERR(err);
460	}
461
462	return CG_OK;
463}
464
465static int handle_passive_estab(struct bpf_sock_ops *skops)
466{
467	struct hdr_stg init_stg = {};
468	struct tcphdr *th;
469	int err;
470
471	inherit_cb_flags = skops->bpf_sock_ops_cb_flags;
472
473	err = load_option(skops, &passive_estab_in, true);
474	if (err == -ENOENT) {
475		/* saved_syn is not found. It was in syncookie mode.
476		 * We have asked the active side to resend the options
477		 * in ACK, so try to find the bpf_test_option from ACK now.
478		 */
479		err = load_option(skops, &passive_estab_in, false);
480		init_stg.syncookie = true;
481	}
482
483	/* ENOMSG: The bpf_test_option is not found which is fine.
484	 * Bail out now for all other errors.
485	 */
486	if (err && err != -ENOMSG)
487		RET_CG_ERR(err);
488
489	th = skops->skb_data;
490	if (th + 1 > skops->skb_data_end)
491		RET_CG_ERR(0);
492
493	if (th->syn) {
494		/* Fastopen */
495
496		/* Cannot clear cb_flags to stop write_hdr cb.
497		 * synack is not sent yet for fast open.
498		 * Even it was, the synack may need to be retransmitted.
499		 *
500		 * PARSE_ALL_HDR cb flag is set to learn
501		 * if synack has reached the peer.
502		 * All cb_flags will be cleared in handle_parse_hdr().
503		 */
504		set_parse_all_hdr_cb_flags(skops);
505		init_stg.fastopen = true;
506	} else if (!passive_fin_out.flags) {
507		/* No options will be written from now */
508		clear_hdr_cb_flags(skops);
509	}
510
511	if (!skops->sk ||
512	    !bpf_sk_storage_get(&hdr_stg_map, skops->sk, &init_stg,
513				BPF_SK_STORAGE_GET_F_CREATE))
514		RET_CG_ERR(0);
515
516	if (passive_synack_out.max_delack_ms) {
517		err = set_delack_max(skops, passive_synack_out.max_delack_ms);
518		if (err)
519			RET_CG_ERR(err);
520	}
521
522	if (passive_estab_in.max_delack_ms) {
523		err = set_rto_min(skops, passive_estab_in.max_delack_ms);
524		if (err)
525			RET_CG_ERR(err);
526	}
527
528	return CG_OK;
529}
530
531static int handle_parse_hdr(struct bpf_sock_ops *skops)
532{
533	struct hdr_stg *hdr_stg;
534	struct tcphdr *th;
535
536	if (!skops->sk)
537		RET_CG_ERR(0);
538
539	th = skops->skb_data;
540	if (th + 1 > skops->skb_data_end)
541		RET_CG_ERR(0);
542
543	hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
544	if (!hdr_stg)
545		RET_CG_ERR(0);
546
547	if (hdr_stg->resend_syn || hdr_stg->fastopen)
548		/* The PARSE_ALL_HDR cb flag was turned on
549		 * to ensure that the previously written
550		 * options have reached the peer.
551		 * Those previously written option includes:
552		 *     - Active side: resend_syn in ACK during syncookie
553		 *      or
554		 *     - Passive side: SYNACK during fastopen
555		 *
556		 * A valid packet has been received here after
557		 * the 3WHS, so the PARSE_ALL_HDR cb flag
558		 * can be cleared now.
559		 */
560		clear_parse_all_hdr_cb_flags(skops);
561
562	if (hdr_stg->resend_syn && !active_fin_out.flags)
563		/* Active side resent the syn option in ACK
564		 * because the server was in syncookie mode.
565		 * A valid packet has been received, so
566		 * clear header cb flags if there is no
567		 * more option to send.
568		 */
569		clear_hdr_cb_flags(skops);
570
571	if (hdr_stg->fastopen && !passive_fin_out.flags)
572		/* Passive side was in fastopen.
573		 * A valid packet has been received, so
574		 * the SYNACK has reached the peer.
575		 * Clear header cb flags if there is no more
576		 * option to send.
577		 */
578		clear_hdr_cb_flags(skops);
579
580	if (th->fin) {
581		struct bpf_test_option *fin_opt;
582		int err;
583
584		if (hdr_stg->active)
585			fin_opt = &active_fin_in;
586		else
587			fin_opt = &passive_fin_in;
588
589		err = load_option(skops, fin_opt, false);
590		if (err && err != -ENOMSG)
591			RET_CG_ERR(err);
592	}
593
594	return CG_OK;
595}
596
597SEC("sockops")
598int estab(struct bpf_sock_ops *skops)
599{
600	int true_val = 1;
601
602	switch (skops->op) {
603	case BPF_SOCK_OPS_TCP_LISTEN_CB:
604		bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN,
605			       &true_val, sizeof(true_val));
606		set_hdr_cb_flags(skops, BPF_SOCK_OPS_STATE_CB_FLAG);
607		break;
608	case BPF_SOCK_OPS_TCP_CONNECT_CB:
609		set_hdr_cb_flags(skops, 0);
610		break;
611	case BPF_SOCK_OPS_PARSE_HDR_OPT_CB:
612		return handle_parse_hdr(skops);
613	case BPF_SOCK_OPS_HDR_OPT_LEN_CB:
614		return handle_hdr_opt_len(skops);
615	case BPF_SOCK_OPS_WRITE_HDR_OPT_CB:
616		return handle_write_hdr_opt(skops);
617	case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
618		return handle_passive_estab(skops);
619	case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
620		return handle_active_estab(skops);
621	}
622
623	return CG_OK;
624}
625
626char _license[] SEC("license") = "GPL";
627