1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2020 Cloudflare
3/*
4 * Test suite for SOCKMAP/SOCKHASH holding listening sockets.
5 * Covers:
6 *  1. BPF map operations - bpf_map_{update,lookup delete}_elem
7 *  2. BPF redirect helpers - bpf_{sk,msg}_redirect_map
8 *  3. BPF reuseport helper - bpf_sk_select_reuseport
9 */
10
11#include <linux/compiler.h>
12#include <errno.h>
13#include <error.h>
14#include <limits.h>
15#include <netinet/in.h>
16#include <pthread.h>
17#include <stdlib.h>
18#include <string.h>
19#include <sys/select.h>
20#include <unistd.h>
21#include <linux/vm_sockets.h>
22
23#include <bpf/bpf.h>
24#include <bpf/libbpf.h>
25
26#include "bpf_util.h"
27#include "test_progs.h"
28#include "test_sockmap_listen.skel.h"
29
30#include "sockmap_helpers.h"
31
32static void test_insert_invalid(struct test_sockmap_listen *skel __always_unused,
33				int family, int sotype, int mapfd)
34{
35	u32 key = 0;
36	u64 value;
37	int err;
38
39	value = -1;
40	err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
41	if (!err || errno != EINVAL)
42		FAIL_ERRNO("map_update: expected EINVAL");
43
44	value = INT_MAX;
45	err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
46	if (!err || errno != EBADF)
47		FAIL_ERRNO("map_update: expected EBADF");
48}
49
50static void test_insert_opened(struct test_sockmap_listen *skel __always_unused,
51			       int family, int sotype, int mapfd)
52{
53	u32 key = 0;
54	u64 value;
55	int err, s;
56
57	s = xsocket(family, sotype, 0);
58	if (s == -1)
59		return;
60
61	errno = 0;
62	value = s;
63	err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
64	if (sotype == SOCK_STREAM) {
65		if (!err || errno != EOPNOTSUPP)
66			FAIL_ERRNO("map_update: expected EOPNOTSUPP");
67	} else if (err)
68		FAIL_ERRNO("map_update: expected success");
69	xclose(s);
70}
71
72static void test_insert_bound(struct test_sockmap_listen *skel __always_unused,
73			      int family, int sotype, int mapfd)
74{
75	struct sockaddr_storage addr;
76	socklen_t len = 0;
77	u32 key = 0;
78	u64 value;
79	int err, s;
80
81	init_addr_loopback(family, &addr, &len);
82
83	s = xsocket(family, sotype, 0);
84	if (s == -1)
85		return;
86
87	err = xbind(s, sockaddr(&addr), len);
88	if (err)
89		goto close;
90
91	errno = 0;
92	value = s;
93	err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
94	if (!err || errno != EOPNOTSUPP)
95		FAIL_ERRNO("map_update: expected EOPNOTSUPP");
96close:
97	xclose(s);
98}
99
100static void test_insert(struct test_sockmap_listen *skel __always_unused,
101			int family, int sotype, int mapfd)
102{
103	u64 value;
104	u32 key;
105	int s;
106
107	s = socket_loopback(family, sotype);
108	if (s < 0)
109		return;
110
111	key = 0;
112	value = s;
113	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
114	xclose(s);
115}
116
117static void test_delete_after_insert(struct test_sockmap_listen *skel __always_unused,
118				     int family, int sotype, int mapfd)
119{
120	u64 value;
121	u32 key;
122	int s;
123
124	s = socket_loopback(family, sotype);
125	if (s < 0)
126		return;
127
128	key = 0;
129	value = s;
130	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
131	xbpf_map_delete_elem(mapfd, &key);
132	xclose(s);
133}
134
135static void test_delete_after_close(struct test_sockmap_listen *skel __always_unused,
136				    int family, int sotype, int mapfd)
137{
138	int err, s;
139	u64 value;
140	u32 key;
141
142	s = socket_loopback(family, sotype);
143	if (s < 0)
144		return;
145
146	key = 0;
147	value = s;
148	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
149
150	xclose(s);
151
152	errno = 0;
153	err = bpf_map_delete_elem(mapfd, &key);
154	if (!err || (errno != EINVAL && errno != ENOENT))
155		/* SOCKMAP and SOCKHASH return different error codes */
156		FAIL_ERRNO("map_delete: expected EINVAL/EINVAL");
157}
158
159static void test_lookup_after_insert(struct test_sockmap_listen *skel __always_unused,
160				     int family, int sotype, int mapfd)
161{
162	u64 cookie, value;
163	socklen_t len;
164	u32 key;
165	int s;
166
167	s = socket_loopback(family, sotype);
168	if (s < 0)
169		return;
170
171	key = 0;
172	value = s;
173	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
174
175	len = sizeof(cookie);
176	xgetsockopt(s, SOL_SOCKET, SO_COOKIE, &cookie, &len);
177
178	xbpf_map_lookup_elem(mapfd, &key, &value);
179
180	if (value != cookie) {
181		FAIL("map_lookup: have %#llx, want %#llx",
182		     (unsigned long long)value, (unsigned long long)cookie);
183	}
184
185	xclose(s);
186}
187
188static void test_lookup_after_delete(struct test_sockmap_listen *skel __always_unused,
189				     int family, int sotype, int mapfd)
190{
191	int err, s;
192	u64 value;
193	u32 key;
194
195	s = socket_loopback(family, sotype);
196	if (s < 0)
197		return;
198
199	key = 0;
200	value = s;
201	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
202	xbpf_map_delete_elem(mapfd, &key);
203
204	errno = 0;
205	err = bpf_map_lookup_elem(mapfd, &key, &value);
206	if (!err || errno != ENOENT)
207		FAIL_ERRNO("map_lookup: expected ENOENT");
208
209	xclose(s);
210}
211
212static void test_lookup_32_bit_value(struct test_sockmap_listen *skel __always_unused,
213				     int family, int sotype, int mapfd)
214{
215	u32 key, value32;
216	int err, s;
217
218	s = socket_loopback(family, sotype);
219	if (s < 0)
220		return;
221
222	mapfd = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(key),
223			       sizeof(value32), 1, NULL);
224	if (mapfd < 0) {
225		FAIL_ERRNO("map_create");
226		goto close;
227	}
228
229	key = 0;
230	value32 = s;
231	xbpf_map_update_elem(mapfd, &key, &value32, BPF_NOEXIST);
232
233	errno = 0;
234	err = bpf_map_lookup_elem(mapfd, &key, &value32);
235	if (!err || errno != ENOSPC)
236		FAIL_ERRNO("map_lookup: expected ENOSPC");
237
238	xclose(mapfd);
239close:
240	xclose(s);
241}
242
243static void test_update_existing(struct test_sockmap_listen *skel __always_unused,
244				 int family, int sotype, int mapfd)
245{
246	int s1, s2;
247	u64 value;
248	u32 key;
249
250	s1 = socket_loopback(family, sotype);
251	if (s1 < 0)
252		return;
253
254	s2 = socket_loopback(family, sotype);
255	if (s2 < 0)
256		goto close_s1;
257
258	key = 0;
259	value = s1;
260	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
261
262	value = s2;
263	xbpf_map_update_elem(mapfd, &key, &value, BPF_EXIST);
264	xclose(s2);
265close_s1:
266	xclose(s1);
267}
268
269/* Exercise the code path where we destroy child sockets that never
270 * got accept()'ed, aka orphans, when parent socket gets closed.
271 */
272static void do_destroy_orphan_child(int family, int sotype, int mapfd)
273{
274	struct sockaddr_storage addr;
275	socklen_t len;
276	int err, s, c;
277	u64 value;
278	u32 key;
279
280	s = socket_loopback(family, sotype);
281	if (s < 0)
282		return;
283
284	len = sizeof(addr);
285	err = xgetsockname(s, sockaddr(&addr), &len);
286	if (err)
287		goto close_srv;
288
289	key = 0;
290	value = s;
291	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
292
293	c = xsocket(family, sotype, 0);
294	if (c == -1)
295		goto close_srv;
296
297	xconnect(c, sockaddr(&addr), len);
298	xclose(c);
299close_srv:
300	xclose(s);
301}
302
303static void test_destroy_orphan_child(struct test_sockmap_listen *skel,
304				      int family, int sotype, int mapfd)
305{
306	int msg_verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
307	int skb_verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
308	const struct test {
309		int progfd;
310		enum bpf_attach_type atype;
311	} tests[] = {
312		{ -1, -1 },
313		{ msg_verdict, BPF_SK_MSG_VERDICT },
314		{ skb_verdict, BPF_SK_SKB_VERDICT },
315	};
316	const struct test *t;
317
318	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
319		if (t->progfd != -1 &&
320		    xbpf_prog_attach(t->progfd, mapfd, t->atype, 0) != 0)
321			return;
322
323		do_destroy_orphan_child(family, sotype, mapfd);
324
325		if (t->progfd != -1)
326			xbpf_prog_detach2(t->progfd, mapfd, t->atype);
327	}
328}
329
330/* Perform a passive open after removing listening socket from SOCKMAP
331 * to ensure that callbacks get restored properly.
332 */
333static void test_clone_after_delete(struct test_sockmap_listen *skel __always_unused,
334				    int family, int sotype, int mapfd)
335{
336	struct sockaddr_storage addr;
337	socklen_t len;
338	int err, s, c;
339	u64 value;
340	u32 key;
341
342	s = socket_loopback(family, sotype);
343	if (s < 0)
344		return;
345
346	len = sizeof(addr);
347	err = xgetsockname(s, sockaddr(&addr), &len);
348	if (err)
349		goto close_srv;
350
351	key = 0;
352	value = s;
353	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
354	xbpf_map_delete_elem(mapfd, &key);
355
356	c = xsocket(family, sotype, 0);
357	if (c < 0)
358		goto close_srv;
359
360	xconnect(c, sockaddr(&addr), len);
361	xclose(c);
362close_srv:
363	xclose(s);
364}
365
366/* Check that child socket that got created while parent was in a
367 * SOCKMAP, but got accept()'ed only after the parent has been removed
368 * from SOCKMAP, gets cloned without parent psock state or callbacks.
369 */
370static void test_accept_after_delete(struct test_sockmap_listen *skel __always_unused,
371				     int family, int sotype, int mapfd)
372{
373	struct sockaddr_storage addr;
374	const u32 zero = 0;
375	int err, s, c, p;
376	socklen_t len;
377	u64 value;
378
379	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
380	if (s == -1)
381		return;
382
383	len = sizeof(addr);
384	err = xgetsockname(s, sockaddr(&addr), &len);
385	if (err)
386		goto close_srv;
387
388	value = s;
389	err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
390	if (err)
391		goto close_srv;
392
393	c = xsocket(family, sotype, 0);
394	if (c == -1)
395		goto close_srv;
396
397	/* Create child while parent is in sockmap */
398	err = xconnect(c, sockaddr(&addr), len);
399	if (err)
400		goto close_cli;
401
402	/* Remove parent from sockmap */
403	err = xbpf_map_delete_elem(mapfd, &zero);
404	if (err)
405		goto close_cli;
406
407	p = xaccept_nonblock(s, NULL, NULL);
408	if (p == -1)
409		goto close_cli;
410
411	/* Check that child sk_user_data is not set */
412	value = p;
413	xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
414
415	xclose(p);
416close_cli:
417	xclose(c);
418close_srv:
419	xclose(s);
420}
421
422/* Check that child socket that got created and accepted while parent
423 * was in a SOCKMAP is cloned without parent psock state or callbacks.
424 */
425static void test_accept_before_delete(struct test_sockmap_listen *skel __always_unused,
426				      int family, int sotype, int mapfd)
427{
428	struct sockaddr_storage addr;
429	const u32 zero = 0, one = 1;
430	int err, s, c, p;
431	socklen_t len;
432	u64 value;
433
434	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
435	if (s == -1)
436		return;
437
438	len = sizeof(addr);
439	err = xgetsockname(s, sockaddr(&addr), &len);
440	if (err)
441		goto close_srv;
442
443	value = s;
444	err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
445	if (err)
446		goto close_srv;
447
448	c = xsocket(family, sotype, 0);
449	if (c == -1)
450		goto close_srv;
451
452	/* Create & accept child while parent is in sockmap */
453	err = xconnect(c, sockaddr(&addr), len);
454	if (err)
455		goto close_cli;
456
457	p = xaccept_nonblock(s, NULL, NULL);
458	if (p == -1)
459		goto close_cli;
460
461	/* Check that child sk_user_data is not set */
462	value = p;
463	xbpf_map_update_elem(mapfd, &one, &value, BPF_NOEXIST);
464
465	xclose(p);
466close_cli:
467	xclose(c);
468close_srv:
469	xclose(s);
470}
471
472struct connect_accept_ctx {
473	int sockfd;
474	unsigned int done;
475	unsigned int nr_iter;
476};
477
478static bool is_thread_done(struct connect_accept_ctx *ctx)
479{
480	return READ_ONCE(ctx->done);
481}
482
483static void *connect_accept_thread(void *arg)
484{
485	struct connect_accept_ctx *ctx = arg;
486	struct sockaddr_storage addr;
487	int family, socktype;
488	socklen_t len;
489	int err, i, s;
490
491	s = ctx->sockfd;
492
493	len = sizeof(addr);
494	err = xgetsockname(s, sockaddr(&addr), &len);
495	if (err)
496		goto done;
497
498	len = sizeof(family);
499	err = xgetsockopt(s, SOL_SOCKET, SO_DOMAIN, &family, &len);
500	if (err)
501		goto done;
502
503	len = sizeof(socktype);
504	err = xgetsockopt(s, SOL_SOCKET, SO_TYPE, &socktype, &len);
505	if (err)
506		goto done;
507
508	for (i = 0; i < ctx->nr_iter; i++) {
509		int c, p;
510
511		c = xsocket(family, socktype, 0);
512		if (c < 0)
513			break;
514
515		err = xconnect(c, (struct sockaddr *)&addr, sizeof(addr));
516		if (err) {
517			xclose(c);
518			break;
519		}
520
521		p = xaccept_nonblock(s, NULL, NULL);
522		if (p < 0) {
523			xclose(c);
524			break;
525		}
526
527		xclose(p);
528		xclose(c);
529	}
530done:
531	WRITE_ONCE(ctx->done, 1);
532	return NULL;
533}
534
535static void test_syn_recv_insert_delete(struct test_sockmap_listen *skel __always_unused,
536					int family, int sotype, int mapfd)
537{
538	struct connect_accept_ctx ctx = { 0 };
539	struct sockaddr_storage addr;
540	socklen_t len;
541	u32 zero = 0;
542	pthread_t t;
543	int err, s;
544	u64 value;
545
546	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
547	if (s < 0)
548		return;
549
550	len = sizeof(addr);
551	err = xgetsockname(s, sockaddr(&addr), &len);
552	if (err)
553		goto close;
554
555	ctx.sockfd = s;
556	ctx.nr_iter = 1000;
557
558	err = xpthread_create(&t, NULL, connect_accept_thread, &ctx);
559	if (err)
560		goto close;
561
562	value = s;
563	while (!is_thread_done(&ctx)) {
564		err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
565		if (err)
566			break;
567
568		err = xbpf_map_delete_elem(mapfd, &zero);
569		if (err)
570			break;
571	}
572
573	xpthread_join(t, NULL);
574close:
575	xclose(s);
576}
577
578static void *listen_thread(void *arg)
579{
580	struct sockaddr unspec = { AF_UNSPEC };
581	struct connect_accept_ctx *ctx = arg;
582	int err, i, s;
583
584	s = ctx->sockfd;
585
586	for (i = 0; i < ctx->nr_iter; i++) {
587		err = xlisten(s, 1);
588		if (err)
589			break;
590		err = xconnect(s, &unspec, sizeof(unspec));
591		if (err)
592			break;
593	}
594
595	WRITE_ONCE(ctx->done, 1);
596	return NULL;
597}
598
599static void test_race_insert_listen(struct test_sockmap_listen *skel __always_unused,
600				    int family, int socktype, int mapfd)
601{
602	struct connect_accept_ctx ctx = { 0 };
603	const u32 zero = 0;
604	const int one = 1;
605	pthread_t t;
606	int err, s;
607	u64 value;
608
609	s = xsocket(family, socktype, 0);
610	if (s < 0)
611		return;
612
613	err = xsetsockopt(s, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
614	if (err)
615		goto close;
616
617	ctx.sockfd = s;
618	ctx.nr_iter = 10000;
619
620	err = pthread_create(&t, NULL, listen_thread, &ctx);
621	if (err)
622		goto close;
623
624	value = s;
625	while (!is_thread_done(&ctx)) {
626		err = bpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
627		/* Expecting EOPNOTSUPP before listen() */
628		if (err && errno != EOPNOTSUPP) {
629			FAIL_ERRNO("map_update");
630			break;
631		}
632
633		err = bpf_map_delete_elem(mapfd, &zero);
634		/* Expecting no entry after unhash on connect(AF_UNSPEC) */
635		if (err && errno != EINVAL && errno != ENOENT) {
636			FAIL_ERRNO("map_delete");
637			break;
638		}
639	}
640
641	xpthread_join(t, NULL);
642close:
643	xclose(s);
644}
645
646static void zero_verdict_count(int mapfd)
647{
648	unsigned int zero = 0;
649	int key;
650
651	key = SK_DROP;
652	xbpf_map_update_elem(mapfd, &key, &zero, BPF_ANY);
653	key = SK_PASS;
654	xbpf_map_update_elem(mapfd, &key, &zero, BPF_ANY);
655}
656
657enum redir_mode {
658	REDIR_INGRESS,
659	REDIR_EGRESS,
660};
661
662static const char *redir_mode_str(enum redir_mode mode)
663{
664	switch (mode) {
665	case REDIR_INGRESS:
666		return "ingress";
667	case REDIR_EGRESS:
668		return "egress";
669	default:
670		return "unknown";
671	}
672}
673
674static void redir_to_connected(int family, int sotype, int sock_mapfd,
675			       int verd_mapfd, enum redir_mode mode)
676{
677	const char *log_prefix = redir_mode_str(mode);
678	int s, c0, c1, p0, p1;
679	unsigned int pass;
680	int err, n;
681	u32 key;
682	char b;
683
684	zero_verdict_count(verd_mapfd);
685
686	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
687	if (s < 0)
688		return;
689
690	err = create_socket_pairs(s, family, sotype, &c0, &c1, &p0, &p1);
691	if (err)
692		goto close_srv;
693
694	err = add_to_sockmap(sock_mapfd, p0, p1);
695	if (err)
696		goto close;
697
698	n = write(mode == REDIR_INGRESS ? c1 : p1, "a", 1);
699	if (n < 0)
700		FAIL_ERRNO("%s: write", log_prefix);
701	if (n == 0)
702		FAIL("%s: incomplete write", log_prefix);
703	if (n < 1)
704		goto close;
705
706	key = SK_PASS;
707	err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
708	if (err)
709		goto close;
710	if (pass != 1)
711		FAIL("%s: want pass count 1, have %d", log_prefix, pass);
712	n = recv_timeout(c0, &b, 1, 0, IO_TIMEOUT_SEC);
713	if (n < 0)
714		FAIL_ERRNO("%s: recv_timeout", log_prefix);
715	if (n == 0)
716		FAIL("%s: incomplete recv", log_prefix);
717
718close:
719	xclose(p1);
720	xclose(c1);
721	xclose(p0);
722	xclose(c0);
723close_srv:
724	xclose(s);
725}
726
727static void test_skb_redir_to_connected(struct test_sockmap_listen *skel,
728					struct bpf_map *inner_map, int family,
729					int sotype)
730{
731	int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
732	int parser = bpf_program__fd(skel->progs.prog_stream_parser);
733	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
734	int sock_map = bpf_map__fd(inner_map);
735	int err;
736
737	err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0);
738	if (err)
739		return;
740	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0);
741	if (err)
742		goto detach;
743
744	redir_to_connected(family, sotype, sock_map, verdict_map,
745			   REDIR_INGRESS);
746
747	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT);
748detach:
749	xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER);
750}
751
752static void test_msg_redir_to_connected(struct test_sockmap_listen *skel,
753					struct bpf_map *inner_map, int family,
754					int sotype)
755{
756	int verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
757	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
758	int sock_map = bpf_map__fd(inner_map);
759	int err;
760
761	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_MSG_VERDICT, 0);
762	if (err)
763		return;
764
765	redir_to_connected(family, sotype, sock_map, verdict_map, REDIR_EGRESS);
766
767	xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT);
768}
769
770static void test_msg_redir_to_connected_with_link(struct test_sockmap_listen *skel,
771						  struct bpf_map *inner_map, int family,
772						  int sotype)
773{
774	int prog_msg_verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
775	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
776	int sock_map = bpf_map__fd(inner_map);
777	int link_fd;
778
779	link_fd = bpf_link_create(prog_msg_verdict, sock_map, BPF_SK_MSG_VERDICT, NULL);
780	if (!ASSERT_GE(link_fd, 0, "bpf_link_create"))
781		return;
782
783	redir_to_connected(family, sotype, sock_map, verdict_map, REDIR_EGRESS);
784
785	close(link_fd);
786}
787
788static void redir_to_listening(int family, int sotype, int sock_mapfd,
789			       int verd_mapfd, enum redir_mode mode)
790{
791	const char *log_prefix = redir_mode_str(mode);
792	struct sockaddr_storage addr;
793	int s, c, p, err, n;
794	unsigned int drop;
795	socklen_t len;
796	u32 key;
797
798	zero_verdict_count(verd_mapfd);
799
800	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
801	if (s < 0)
802		return;
803
804	len = sizeof(addr);
805	err = xgetsockname(s, sockaddr(&addr), &len);
806	if (err)
807		goto close_srv;
808
809	c = xsocket(family, sotype, 0);
810	if (c < 0)
811		goto close_srv;
812	err = xconnect(c, sockaddr(&addr), len);
813	if (err)
814		goto close_cli;
815
816	p = xaccept_nonblock(s, NULL, NULL);
817	if (p < 0)
818		goto close_cli;
819
820	err = add_to_sockmap(sock_mapfd, s, p);
821	if (err)
822		goto close_peer;
823
824	n = write(mode == REDIR_INGRESS ? c : p, "a", 1);
825	if (n < 0 && errno != EACCES)
826		FAIL_ERRNO("%s: write", log_prefix);
827	if (n == 0)
828		FAIL("%s: incomplete write", log_prefix);
829	if (n < 1)
830		goto close_peer;
831
832	key = SK_DROP;
833	err = xbpf_map_lookup_elem(verd_mapfd, &key, &drop);
834	if (err)
835		goto close_peer;
836	if (drop != 1)
837		FAIL("%s: want drop count 1, have %d", log_prefix, drop);
838
839close_peer:
840	xclose(p);
841close_cli:
842	xclose(c);
843close_srv:
844	xclose(s);
845}
846
847static void test_skb_redir_to_listening(struct test_sockmap_listen *skel,
848					struct bpf_map *inner_map, int family,
849					int sotype)
850{
851	int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
852	int parser = bpf_program__fd(skel->progs.prog_stream_parser);
853	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
854	int sock_map = bpf_map__fd(inner_map);
855	int err;
856
857	err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0);
858	if (err)
859		return;
860	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0);
861	if (err)
862		goto detach;
863
864	redir_to_listening(family, sotype, sock_map, verdict_map,
865			   REDIR_INGRESS);
866
867	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT);
868detach:
869	xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER);
870}
871
872static void test_msg_redir_to_listening(struct test_sockmap_listen *skel,
873					struct bpf_map *inner_map, int family,
874					int sotype)
875{
876	int verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
877	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
878	int sock_map = bpf_map__fd(inner_map);
879	int err;
880
881	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_MSG_VERDICT, 0);
882	if (err)
883		return;
884
885	redir_to_listening(family, sotype, sock_map, verdict_map, REDIR_EGRESS);
886
887	xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT);
888}
889
890static void test_msg_redir_to_listening_with_link(struct test_sockmap_listen *skel,
891						  struct bpf_map *inner_map, int family,
892						  int sotype)
893{
894	struct bpf_program *verdict = skel->progs.prog_msg_verdict;
895	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
896	int sock_map = bpf_map__fd(inner_map);
897	struct bpf_link *link;
898
899	link = bpf_program__attach_sockmap(verdict, sock_map);
900	if (!ASSERT_OK_PTR(link, "bpf_program__attach_sockmap"))
901		return;
902
903	redir_to_listening(family, sotype, sock_map, verdict_map, REDIR_EGRESS);
904
905	bpf_link__detach(link);
906}
907
908static void redir_partial(int family, int sotype, int sock_map, int parser_map)
909{
910	int s, c0 = -1, c1 = -1, p0 = -1, p1 = -1;
911	int err, n, key, value;
912	char buf[] = "abc";
913
914	key = 0;
915	value = sizeof(buf) - 1;
916	err = xbpf_map_update_elem(parser_map, &key, &value, 0);
917	if (err)
918		return;
919
920	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
921	if (s < 0)
922		goto clean_parser_map;
923
924	err = create_socket_pairs(s, family, sotype, &c0, &c1, &p0, &p1);
925	if (err)
926		goto close_srv;
927
928	err = add_to_sockmap(sock_map, p0, p1);
929	if (err)
930		goto close;
931
932	n = xsend(c1, buf, sizeof(buf), 0);
933	if (n < sizeof(buf))
934		FAIL("incomplete write");
935
936	n = xrecv_nonblock(c0, buf, sizeof(buf), 0);
937	if (n != sizeof(buf) - 1)
938		FAIL("expect %zu, received %d", sizeof(buf) - 1, n);
939
940close:
941	xclose(c0);
942	xclose(p0);
943	xclose(c1);
944	xclose(p1);
945close_srv:
946	xclose(s);
947
948clean_parser_map:
949	key = 0;
950	value = 0;
951	xbpf_map_update_elem(parser_map, &key, &value, 0);
952}
953
954static void test_skb_redir_partial(struct test_sockmap_listen *skel,
955				   struct bpf_map *inner_map, int family,
956				   int sotype)
957{
958	int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
959	int parser = bpf_program__fd(skel->progs.prog_stream_parser);
960	int parser_map = bpf_map__fd(skel->maps.parser_map);
961	int sock_map = bpf_map__fd(inner_map);
962	int err;
963
964	err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0);
965	if (err)
966		return;
967
968	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0);
969	if (err)
970		goto detach;
971
972	redir_partial(family, sotype, sock_map, parser_map);
973
974	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT);
975detach:
976	xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER);
977}
978
979static void test_reuseport_select_listening(int family, int sotype,
980					    int sock_map, int verd_map,
981					    int reuseport_prog)
982{
983	struct sockaddr_storage addr;
984	unsigned int pass;
985	int s, c, err;
986	socklen_t len;
987	u64 value;
988	u32 key;
989
990	zero_verdict_count(verd_map);
991
992	s = socket_loopback_reuseport(family, sotype | SOCK_NONBLOCK,
993				      reuseport_prog);
994	if (s < 0)
995		return;
996
997	len = sizeof(addr);
998	err = xgetsockname(s, sockaddr(&addr), &len);
999	if (err)
1000		goto close_srv;
1001
1002	key = 0;
1003	value = s;
1004	err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
1005	if (err)
1006		goto close_srv;
1007
1008	c = xsocket(family, sotype, 0);
1009	if (c < 0)
1010		goto close_srv;
1011	err = xconnect(c, sockaddr(&addr), len);
1012	if (err)
1013		goto close_cli;
1014
1015	if (sotype == SOCK_STREAM) {
1016		int p;
1017
1018		p = xaccept_nonblock(s, NULL, NULL);
1019		if (p < 0)
1020			goto close_cli;
1021		xclose(p);
1022	} else {
1023		char b = 'a';
1024		ssize_t n;
1025
1026		n = xsend(c, &b, sizeof(b), 0);
1027		if (n == -1)
1028			goto close_cli;
1029
1030		n = xrecv_nonblock(s, &b, sizeof(b), 0);
1031		if (n == -1)
1032			goto close_cli;
1033	}
1034
1035	key = SK_PASS;
1036	err = xbpf_map_lookup_elem(verd_map, &key, &pass);
1037	if (err)
1038		goto close_cli;
1039	if (pass != 1)
1040		FAIL("want pass count 1, have %d", pass);
1041
1042close_cli:
1043	xclose(c);
1044close_srv:
1045	xclose(s);
1046}
1047
1048static void test_reuseport_select_connected(int family, int sotype,
1049					    int sock_map, int verd_map,
1050					    int reuseport_prog)
1051{
1052	struct sockaddr_storage addr;
1053	int s, c0, c1, p0, err;
1054	unsigned int drop;
1055	socklen_t len;
1056	u64 value;
1057	u32 key;
1058
1059	zero_verdict_count(verd_map);
1060
1061	s = socket_loopback_reuseport(family, sotype, reuseport_prog);
1062	if (s < 0)
1063		return;
1064
1065	/* Populate sock_map[0] to avoid ENOENT on first connection */
1066	key = 0;
1067	value = s;
1068	err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
1069	if (err)
1070		goto close_srv;
1071
1072	len = sizeof(addr);
1073	err = xgetsockname(s, sockaddr(&addr), &len);
1074	if (err)
1075		goto close_srv;
1076
1077	c0 = xsocket(family, sotype, 0);
1078	if (c0 < 0)
1079		goto close_srv;
1080
1081	err = xconnect(c0, sockaddr(&addr), len);
1082	if (err)
1083		goto close_cli0;
1084
1085	if (sotype == SOCK_STREAM) {
1086		p0 = xaccept_nonblock(s, NULL, NULL);
1087		if (p0 < 0)
1088			goto close_cli0;
1089	} else {
1090		p0 = xsocket(family, sotype, 0);
1091		if (p0 < 0)
1092			goto close_cli0;
1093
1094		len = sizeof(addr);
1095		err = xgetsockname(c0, sockaddr(&addr), &len);
1096		if (err)
1097			goto close_cli0;
1098
1099		err = xconnect(p0, sockaddr(&addr), len);
1100		if (err)
1101			goto close_cli0;
1102	}
1103
1104	/* Update sock_map[0] to redirect to a connected socket */
1105	key = 0;
1106	value = p0;
1107	err = xbpf_map_update_elem(sock_map, &key, &value, BPF_EXIST);
1108	if (err)
1109		goto close_peer0;
1110
1111	c1 = xsocket(family, sotype, 0);
1112	if (c1 < 0)
1113		goto close_peer0;
1114
1115	len = sizeof(addr);
1116	err = xgetsockname(s, sockaddr(&addr), &len);
1117	if (err)
1118		goto close_srv;
1119
1120	errno = 0;
1121	err = connect(c1, sockaddr(&addr), len);
1122	if (sotype == SOCK_DGRAM) {
1123		char b = 'a';
1124		ssize_t n;
1125
1126		n = xsend(c1, &b, sizeof(b), 0);
1127		if (n == -1)
1128			goto close_cli1;
1129
1130		n = recv_timeout(c1, &b, sizeof(b), 0, IO_TIMEOUT_SEC);
1131		err = n == -1;
1132	}
1133	if (!err || errno != ECONNREFUSED)
1134		FAIL_ERRNO("connect: expected ECONNREFUSED");
1135
1136	key = SK_DROP;
1137	err = xbpf_map_lookup_elem(verd_map, &key, &drop);
1138	if (err)
1139		goto close_cli1;
1140	if (drop != 1)
1141		FAIL("want drop count 1, have %d", drop);
1142
1143close_cli1:
1144	xclose(c1);
1145close_peer0:
1146	xclose(p0);
1147close_cli0:
1148	xclose(c0);
1149close_srv:
1150	xclose(s);
1151}
1152
1153/* Check that redirecting across reuseport groups is not allowed. */
1154static void test_reuseport_mixed_groups(int family, int sotype, int sock_map,
1155					int verd_map, int reuseport_prog)
1156{
1157	struct sockaddr_storage addr;
1158	int s1, s2, c, err;
1159	unsigned int drop;
1160	socklen_t len;
1161	u32 key;
1162
1163	zero_verdict_count(verd_map);
1164
1165	/* Create two listeners, each in its own reuseport group */
1166	s1 = socket_loopback_reuseport(family, sotype, reuseport_prog);
1167	if (s1 < 0)
1168		return;
1169
1170	s2 = socket_loopback_reuseport(family, sotype, reuseport_prog);
1171	if (s2 < 0)
1172		goto close_srv1;
1173
1174	err = add_to_sockmap(sock_map, s1, s2);
1175	if (err)
1176		goto close_srv2;
1177
1178	/* Connect to s2, reuseport BPF selects s1 via sock_map[0] */
1179	len = sizeof(addr);
1180	err = xgetsockname(s2, sockaddr(&addr), &len);
1181	if (err)
1182		goto close_srv2;
1183
1184	c = xsocket(family, sotype, 0);
1185	if (c < 0)
1186		goto close_srv2;
1187
1188	err = connect(c, sockaddr(&addr), len);
1189	if (sotype == SOCK_DGRAM) {
1190		char b = 'a';
1191		ssize_t n;
1192
1193		n = xsend(c, &b, sizeof(b), 0);
1194		if (n == -1)
1195			goto close_cli;
1196
1197		n = recv_timeout(c, &b, sizeof(b), 0, IO_TIMEOUT_SEC);
1198		err = n == -1;
1199	}
1200	if (!err || errno != ECONNREFUSED) {
1201		FAIL_ERRNO("connect: expected ECONNREFUSED");
1202		goto close_cli;
1203	}
1204
1205	/* Expect drop, can't redirect outside of reuseport group */
1206	key = SK_DROP;
1207	err = xbpf_map_lookup_elem(verd_map, &key, &drop);
1208	if (err)
1209		goto close_cli;
1210	if (drop != 1)
1211		FAIL("want drop count 1, have %d", drop);
1212
1213close_cli:
1214	xclose(c);
1215close_srv2:
1216	xclose(s2);
1217close_srv1:
1218	xclose(s1);
1219}
1220
1221#define TEST(fn, ...)                                                          \
1222	{                                                                      \
1223		fn, #fn, __VA_ARGS__                                           \
1224	}
1225
1226static void test_ops_cleanup(const struct bpf_map *map)
1227{
1228	int err, mapfd;
1229	u32 key;
1230
1231	mapfd = bpf_map__fd(map);
1232
1233	for (key = 0; key < bpf_map__max_entries(map); key++) {
1234		err = bpf_map_delete_elem(mapfd, &key);
1235		if (err && errno != EINVAL && errno != ENOENT)
1236			FAIL_ERRNO("map_delete: expected EINVAL/ENOENT");
1237	}
1238}
1239
1240static const char *family_str(sa_family_t family)
1241{
1242	switch (family) {
1243	case AF_INET:
1244		return "IPv4";
1245	case AF_INET6:
1246		return "IPv6";
1247	case AF_UNIX:
1248		return "Unix";
1249	case AF_VSOCK:
1250		return "VSOCK";
1251	default:
1252		return "unknown";
1253	}
1254}
1255
1256static const char *map_type_str(const struct bpf_map *map)
1257{
1258	int type;
1259
1260	if (!map)
1261		return "invalid";
1262	type = bpf_map__type(map);
1263
1264	switch (type) {
1265	case BPF_MAP_TYPE_SOCKMAP:
1266		return "sockmap";
1267	case BPF_MAP_TYPE_SOCKHASH:
1268		return "sockhash";
1269	default:
1270		return "unknown";
1271	}
1272}
1273
1274static const char *sotype_str(int sotype)
1275{
1276	switch (sotype) {
1277	case SOCK_DGRAM:
1278		return "UDP";
1279	case SOCK_STREAM:
1280		return "TCP";
1281	default:
1282		return "unknown";
1283	}
1284}
1285
1286static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map,
1287		     int family, int sotype)
1288{
1289	const struct op_test {
1290		void (*fn)(struct test_sockmap_listen *skel,
1291			   int family, int sotype, int mapfd);
1292		const char *name;
1293		int sotype;
1294	} tests[] = {
1295		/* insert */
1296		TEST(test_insert_invalid),
1297		TEST(test_insert_opened),
1298		TEST(test_insert_bound, SOCK_STREAM),
1299		TEST(test_insert),
1300		/* delete */
1301		TEST(test_delete_after_insert),
1302		TEST(test_delete_after_close),
1303		/* lookup */
1304		TEST(test_lookup_after_insert),
1305		TEST(test_lookup_after_delete),
1306		TEST(test_lookup_32_bit_value),
1307		/* update */
1308		TEST(test_update_existing),
1309		/* races with insert/delete */
1310		TEST(test_destroy_orphan_child, SOCK_STREAM),
1311		TEST(test_syn_recv_insert_delete, SOCK_STREAM),
1312		TEST(test_race_insert_listen, SOCK_STREAM),
1313		/* child clone */
1314		TEST(test_clone_after_delete, SOCK_STREAM),
1315		TEST(test_accept_after_delete, SOCK_STREAM),
1316		TEST(test_accept_before_delete, SOCK_STREAM),
1317	};
1318	const char *family_name, *map_name, *sotype_name;
1319	const struct op_test *t;
1320	char s[MAX_TEST_NAME];
1321	int map_fd;
1322
1323	family_name = family_str(family);
1324	map_name = map_type_str(map);
1325	sotype_name = sotype_str(sotype);
1326	map_fd = bpf_map__fd(map);
1327
1328	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
1329		snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name,
1330			 sotype_name, t->name);
1331
1332		if (t->sotype != 0 && t->sotype != sotype)
1333			continue;
1334
1335		if (!test__start_subtest(s))
1336			continue;
1337
1338		t->fn(skel, family, sotype, map_fd);
1339		test_ops_cleanup(map);
1340	}
1341}
1342
1343static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
1344		       int family, int sotype)
1345{
1346	const struct redir_test {
1347		void (*fn)(struct test_sockmap_listen *skel,
1348			   struct bpf_map *map, int family, int sotype);
1349		const char *name;
1350	} tests[] = {
1351		TEST(test_skb_redir_to_connected),
1352		TEST(test_skb_redir_to_listening),
1353		TEST(test_skb_redir_partial),
1354		TEST(test_msg_redir_to_connected),
1355		TEST(test_msg_redir_to_connected_with_link),
1356		TEST(test_msg_redir_to_listening),
1357		TEST(test_msg_redir_to_listening_with_link),
1358	};
1359	const char *family_name, *map_name;
1360	const struct redir_test *t;
1361	char s[MAX_TEST_NAME];
1362
1363	family_name = family_str(family);
1364	map_name = map_type_str(map);
1365
1366	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
1367		snprintf(s, sizeof(s), "%s %s %s", map_name, family_name,
1368			 t->name);
1369
1370		if (!test__start_subtest(s))
1371			continue;
1372
1373		t->fn(skel, map, family, sotype);
1374	}
1375}
1376
1377static void pairs_redir_to_connected(int cli0, int peer0, int cli1, int peer1,
1378				     int sock_mapfd, int nop_mapfd,
1379				     int verd_mapfd, enum redir_mode mode)
1380{
1381	const char *log_prefix = redir_mode_str(mode);
1382	unsigned int pass;
1383	int err, n;
1384	u32 key;
1385	char b;
1386
1387	zero_verdict_count(verd_mapfd);
1388
1389	err = add_to_sockmap(sock_mapfd, peer0, peer1);
1390	if (err)
1391		return;
1392
1393	if (nop_mapfd >= 0) {
1394		err = add_to_sockmap(nop_mapfd, cli0, cli1);
1395		if (err)
1396			return;
1397	}
1398
1399	n = write(cli1, "a", 1);
1400	if (n < 0)
1401		FAIL_ERRNO("%s: write", log_prefix);
1402	if (n == 0)
1403		FAIL("%s: incomplete write", log_prefix);
1404	if (n < 1)
1405		return;
1406
1407	key = SK_PASS;
1408	err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
1409	if (err)
1410		return;
1411	if (pass != 1)
1412		FAIL("%s: want pass count 1, have %d", log_prefix, pass);
1413
1414	n = recv_timeout(mode == REDIR_INGRESS ? peer0 : cli0, &b, 1, 0, IO_TIMEOUT_SEC);
1415	if (n < 0)
1416		FAIL_ERRNO("%s: recv_timeout", log_prefix);
1417	if (n == 0)
1418		FAIL("%s: incomplete recv", log_prefix);
1419}
1420
1421static void unix_redir_to_connected(int sotype, int sock_mapfd,
1422			       int verd_mapfd, enum redir_mode mode)
1423{
1424	int c0, c1, p0, p1;
1425	int sfd[2];
1426
1427	if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
1428		return;
1429	c0 = sfd[0], p0 = sfd[1];
1430
1431	if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
1432		goto close0;
1433	c1 = sfd[0], p1 = sfd[1];
1434
1435	pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, mode);
1436
1437	xclose(c1);
1438	xclose(p1);
1439close0:
1440	xclose(c0);
1441	xclose(p0);
1442}
1443
1444static void unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
1445					struct bpf_map *inner_map, int sotype)
1446{
1447	int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1448	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1449	int sock_map = bpf_map__fd(inner_map);
1450	int err;
1451
1452	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1453	if (err)
1454		return;
1455
1456	skel->bss->test_ingress = false;
1457	unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_EGRESS);
1458	skel->bss->test_ingress = true;
1459	unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_INGRESS);
1460
1461	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1462}
1463
1464static void test_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
1465			    int sotype)
1466{
1467	const char *family_name, *map_name;
1468	char s[MAX_TEST_NAME];
1469
1470	family_name = family_str(AF_UNIX);
1471	map_name = map_type_str(map);
1472	snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
1473	if (!test__start_subtest(s))
1474		return;
1475	unix_skb_redir_to_connected(skel, map, sotype);
1476}
1477
1478/* Returns two connected loopback vsock sockets */
1479static int vsock_socketpair_connectible(int sotype, int *v0, int *v1)
1480{
1481	struct sockaddr_storage addr;
1482	socklen_t len = sizeof(addr);
1483	int s, p, c;
1484
1485	s = socket_loopback(AF_VSOCK, sotype);
1486	if (s < 0)
1487		return -1;
1488
1489	c = xsocket(AF_VSOCK, sotype | SOCK_NONBLOCK, 0);
1490	if (c == -1)
1491		goto close_srv;
1492
1493	if (getsockname(s, sockaddr(&addr), &len) < 0)
1494		goto close_cli;
1495
1496	if (connect(c, sockaddr(&addr), len) < 0 && errno != EINPROGRESS) {
1497		FAIL_ERRNO("connect");
1498		goto close_cli;
1499	}
1500
1501	len = sizeof(addr);
1502	p = accept_timeout(s, sockaddr(&addr), &len, IO_TIMEOUT_SEC);
1503	if (p < 0)
1504		goto close_cli;
1505
1506	if (poll_connect(c, IO_TIMEOUT_SEC) < 0) {
1507		FAIL_ERRNO("poll_connect");
1508		goto close_acc;
1509	}
1510
1511	*v0 = p;
1512	*v1 = c;
1513
1514	return 0;
1515
1516close_acc:
1517	close(p);
1518close_cli:
1519	close(c);
1520close_srv:
1521	close(s);
1522
1523	return -1;
1524}
1525
1526static void vsock_unix_redir_connectible(int sock_mapfd, int verd_mapfd,
1527					 enum redir_mode mode, int sotype)
1528{
1529	const char *log_prefix = redir_mode_str(mode);
1530	char a = 'a', b = 'b';
1531	int u0, u1, v0, v1;
1532	int sfd[2];
1533	unsigned int pass;
1534	int err, n;
1535	u32 key;
1536
1537	zero_verdict_count(verd_mapfd);
1538
1539	if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0, sfd))
1540		return;
1541
1542	u0 = sfd[0];
1543	u1 = sfd[1];
1544
1545	err = vsock_socketpair_connectible(sotype, &v0, &v1);
1546	if (err) {
1547		FAIL("vsock_socketpair_connectible() failed");
1548		goto close_uds;
1549	}
1550
1551	err = add_to_sockmap(sock_mapfd, u0, v0);
1552	if (err) {
1553		FAIL("add_to_sockmap failed");
1554		goto close_vsock;
1555	}
1556
1557	n = write(v1, &a, sizeof(a));
1558	if (n < 0)
1559		FAIL_ERRNO("%s: write", log_prefix);
1560	if (n == 0)
1561		FAIL("%s: incomplete write", log_prefix);
1562	if (n < 1)
1563		goto out;
1564
1565	n = xrecv_nonblock(mode == REDIR_INGRESS ? u0 : u1, &b, sizeof(b), 0);
1566	if (n < 0)
1567		FAIL("%s: recv() err, errno=%d", log_prefix, errno);
1568	if (n == 0)
1569		FAIL("%s: incomplete recv", log_prefix);
1570	if (b != a)
1571		FAIL("%s: vsock socket map failed, %c != %c", log_prefix, a, b);
1572
1573	key = SK_PASS;
1574	err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
1575	if (err)
1576		goto out;
1577	if (pass != 1)
1578		FAIL("%s: want pass count 1, have %d", log_prefix, pass);
1579out:
1580	key = 0;
1581	bpf_map_delete_elem(sock_mapfd, &key);
1582	key = 1;
1583	bpf_map_delete_elem(sock_mapfd, &key);
1584
1585close_vsock:
1586	close(v0);
1587	close(v1);
1588
1589close_uds:
1590	close(u0);
1591	close(u1);
1592}
1593
1594static void vsock_unix_skb_redir_connectible(struct test_sockmap_listen *skel,
1595					     struct bpf_map *inner_map,
1596					     int sotype)
1597{
1598	int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1599	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1600	int sock_map = bpf_map__fd(inner_map);
1601	int err;
1602
1603	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1604	if (err)
1605		return;
1606
1607	skel->bss->test_ingress = false;
1608	vsock_unix_redir_connectible(sock_map, verdict_map, REDIR_EGRESS, sotype);
1609	skel->bss->test_ingress = true;
1610	vsock_unix_redir_connectible(sock_map, verdict_map, REDIR_INGRESS, sotype);
1611
1612	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1613}
1614
1615static void test_vsock_redir(struct test_sockmap_listen *skel, struct bpf_map *map)
1616{
1617	const char *family_name, *map_name;
1618	char s[MAX_TEST_NAME];
1619
1620	family_name = family_str(AF_VSOCK);
1621	map_name = map_type_str(map);
1622	snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
1623	if (!test__start_subtest(s))
1624		return;
1625
1626	vsock_unix_skb_redir_connectible(skel, map, SOCK_STREAM);
1627	vsock_unix_skb_redir_connectible(skel, map, SOCK_SEQPACKET);
1628}
1629
1630static void test_reuseport(struct test_sockmap_listen *skel,
1631			   struct bpf_map *map, int family, int sotype)
1632{
1633	const struct reuseport_test {
1634		void (*fn)(int family, int sotype, int socket_map,
1635			   int verdict_map, int reuseport_prog);
1636		const char *name;
1637		int sotype;
1638	} tests[] = {
1639		TEST(test_reuseport_select_listening),
1640		TEST(test_reuseport_select_connected),
1641		TEST(test_reuseport_mixed_groups),
1642	};
1643	int socket_map, verdict_map, reuseport_prog;
1644	const char *family_name, *map_name, *sotype_name;
1645	const struct reuseport_test *t;
1646	char s[MAX_TEST_NAME];
1647
1648	family_name = family_str(family);
1649	map_name = map_type_str(map);
1650	sotype_name = sotype_str(sotype);
1651
1652	socket_map = bpf_map__fd(map);
1653	verdict_map = bpf_map__fd(skel->maps.verdict_map);
1654	reuseport_prog = bpf_program__fd(skel->progs.prog_reuseport);
1655
1656	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
1657		snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name,
1658			 sotype_name, t->name);
1659
1660		if (t->sotype != 0 && t->sotype != sotype)
1661			continue;
1662
1663		if (!test__start_subtest(s))
1664			continue;
1665
1666		t->fn(family, sotype, socket_map, verdict_map, reuseport_prog);
1667	}
1668}
1669
1670static int inet_socketpair(int family, int type, int *s, int *c)
1671{
1672	struct sockaddr_storage addr;
1673	socklen_t len;
1674	int p0, c0;
1675	int err;
1676
1677	p0 = socket_loopback(family, type | SOCK_NONBLOCK);
1678	if (p0 < 0)
1679		return p0;
1680
1681	len = sizeof(addr);
1682	err = xgetsockname(p0, sockaddr(&addr), &len);
1683	if (err)
1684		goto close_peer0;
1685
1686	c0 = xsocket(family, type | SOCK_NONBLOCK, 0);
1687	if (c0 < 0) {
1688		err = c0;
1689		goto close_peer0;
1690	}
1691	err = xconnect(c0, sockaddr(&addr), len);
1692	if (err)
1693		goto close_cli0;
1694	err = xgetsockname(c0, sockaddr(&addr), &len);
1695	if (err)
1696		goto close_cli0;
1697	err = xconnect(p0, sockaddr(&addr), len);
1698	if (err)
1699		goto close_cli0;
1700
1701	*s = p0;
1702	*c = c0;
1703	return 0;
1704
1705close_cli0:
1706	xclose(c0);
1707close_peer0:
1708	xclose(p0);
1709	return err;
1710}
1711
1712static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd,
1713				   enum redir_mode mode)
1714{
1715	int c0, c1, p0, p1;
1716	int err;
1717
1718	err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
1719	if (err)
1720		return;
1721	err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1);
1722	if (err)
1723		goto close_cli0;
1724
1725	pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, mode);
1726
1727	xclose(c1);
1728	xclose(p1);
1729close_cli0:
1730	xclose(c0);
1731	xclose(p0);
1732}
1733
1734static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel,
1735				       struct bpf_map *inner_map, int family)
1736{
1737	int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1738	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1739	int sock_map = bpf_map__fd(inner_map);
1740	int err;
1741
1742	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1743	if (err)
1744		return;
1745
1746	skel->bss->test_ingress = false;
1747	udp_redir_to_connected(family, sock_map, verdict_map, REDIR_EGRESS);
1748	skel->bss->test_ingress = true;
1749	udp_redir_to_connected(family, sock_map, verdict_map, REDIR_INGRESS);
1750
1751	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1752}
1753
1754static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
1755			   int family)
1756{
1757	const char *family_name, *map_name;
1758	char s[MAX_TEST_NAME];
1759
1760	family_name = family_str(family);
1761	map_name = map_type_str(map);
1762	snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
1763	if (!test__start_subtest(s))
1764		return;
1765	udp_skb_redir_to_connected(skel, map, family);
1766}
1767
1768static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd,
1769					int verd_mapfd, enum redir_mode mode)
1770{
1771	int c0, c1, p0, p1;
1772	int sfd[2];
1773	int err;
1774
1775	if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd))
1776		return;
1777	c0 = sfd[0], p0 = sfd[1];
1778
1779	err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1);
1780	if (err)
1781		goto close;
1782
1783	pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, mode);
1784
1785	xclose(c1);
1786	xclose(p1);
1787close:
1788	xclose(c0);
1789	xclose(p0);
1790}
1791
1792static void inet_unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
1793					    struct bpf_map *inner_map, int family)
1794{
1795	int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1796	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1797	int sock_map = bpf_map__fd(inner_map);
1798	int err;
1799
1800	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1801	if (err)
1802		return;
1803
1804	skel->bss->test_ingress = false;
1805	inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
1806				    REDIR_EGRESS);
1807	inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
1808				    REDIR_EGRESS);
1809	skel->bss->test_ingress = true;
1810	inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
1811				    REDIR_INGRESS);
1812	inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
1813				    REDIR_INGRESS);
1814
1815	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1816}
1817
1818static void unix_inet_redir_to_connected(int family, int type,
1819					int sock_mapfd, int nop_mapfd,
1820					int verd_mapfd,
1821					enum redir_mode mode)
1822{
1823	int c0, c1, p0, p1;
1824	int sfd[2];
1825	int err;
1826
1827	err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
1828	if (err)
1829		return;
1830
1831	if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd))
1832		goto close_cli0;
1833	c1 = sfd[0], p1 = sfd[1];
1834
1835	pairs_redir_to_connected(c0, p0, c1, p1,
1836				 sock_mapfd, nop_mapfd, verd_mapfd, mode);
1837
1838	xclose(c1);
1839	xclose(p1);
1840close_cli0:
1841	xclose(c0);
1842	xclose(p0);
1843
1844}
1845
1846static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel,
1847					    struct bpf_map *inner_map, int family)
1848{
1849	int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1850	int nop_map = bpf_map__fd(skel->maps.nop_map);
1851	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1852	int sock_map = bpf_map__fd(inner_map);
1853	int err;
1854
1855	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1856	if (err)
1857		return;
1858
1859	skel->bss->test_ingress = false;
1860	unix_inet_redir_to_connected(family, SOCK_DGRAM,
1861				     sock_map, -1, verdict_map,
1862				     REDIR_EGRESS);
1863	unix_inet_redir_to_connected(family, SOCK_DGRAM,
1864				     sock_map, -1, verdict_map,
1865				     REDIR_EGRESS);
1866
1867	unix_inet_redir_to_connected(family, SOCK_DGRAM,
1868				     sock_map, nop_map, verdict_map,
1869				     REDIR_EGRESS);
1870	unix_inet_redir_to_connected(family, SOCK_STREAM,
1871				     sock_map, nop_map, verdict_map,
1872				     REDIR_EGRESS);
1873	skel->bss->test_ingress = true;
1874	unix_inet_redir_to_connected(family, SOCK_DGRAM,
1875				     sock_map, -1, verdict_map,
1876				     REDIR_INGRESS);
1877	unix_inet_redir_to_connected(family, SOCK_STREAM,
1878				     sock_map, -1, verdict_map,
1879				     REDIR_INGRESS);
1880
1881	unix_inet_redir_to_connected(family, SOCK_DGRAM,
1882				     sock_map, nop_map, verdict_map,
1883				     REDIR_INGRESS);
1884	unix_inet_redir_to_connected(family, SOCK_STREAM,
1885				     sock_map, nop_map, verdict_map,
1886				     REDIR_INGRESS);
1887
1888	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1889}
1890
1891static void test_udp_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
1892				int family)
1893{
1894	const char *family_name, *map_name;
1895	char s[MAX_TEST_NAME];
1896
1897	family_name = family_str(family);
1898	map_name = map_type_str(map);
1899	snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
1900	if (!test__start_subtest(s))
1901		return;
1902	inet_unix_skb_redir_to_connected(skel, map, family);
1903	unix_inet_skb_redir_to_connected(skel, map, family);
1904}
1905
1906static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
1907		      int family)
1908{
1909	test_ops(skel, map, family, SOCK_STREAM);
1910	test_ops(skel, map, family, SOCK_DGRAM);
1911	test_redir(skel, map, family, SOCK_STREAM);
1912	test_reuseport(skel, map, family, SOCK_STREAM);
1913	test_reuseport(skel, map, family, SOCK_DGRAM);
1914	test_udp_redir(skel, map, family);
1915	test_udp_unix_redir(skel, map, family);
1916}
1917
1918void serial_test_sockmap_listen(void)
1919{
1920	struct test_sockmap_listen *skel;
1921
1922	skel = test_sockmap_listen__open_and_load();
1923	if (!skel) {
1924		FAIL("skeleton open/load failed");
1925		return;
1926	}
1927
1928	skel->bss->test_sockmap = true;
1929	run_tests(skel, skel->maps.sock_map, AF_INET);
1930	run_tests(skel, skel->maps.sock_map, AF_INET6);
1931	test_unix_redir(skel, skel->maps.sock_map, SOCK_DGRAM);
1932	test_unix_redir(skel, skel->maps.sock_map, SOCK_STREAM);
1933	test_vsock_redir(skel, skel->maps.sock_map);
1934
1935	skel->bss->test_sockmap = false;
1936	run_tests(skel, skel->maps.sock_hash, AF_INET);
1937	run_tests(skel, skel->maps.sock_hash, AF_INET6);
1938	test_unix_redir(skel, skel->maps.sock_hash, SOCK_DGRAM);
1939	test_unix_redir(skel, skel->maps.sock_hash, SOCK_STREAM);
1940	test_vsock_redir(skel, skel->maps.sock_hash);
1941
1942	test_sockmap_listen__destroy(skel);
1943}
1944