1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2015-2019 Yandex LLC
5 * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
6 * Copyright (c) 2015-2019 Andrey V. Elsukov <ae@FreeBSD.org>
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/counter.h>
33#include <sys/ck.h>
34#include <sys/epoch.h>
35#include <sys/errno.h>
36#include <sys/kernel.h>
37#include <sys/lock.h>
38#include <sys/malloc.h>
39#include <sys/mbuf.h>
40#include <sys/module.h>
41#include <sys/rmlock.h>
42#include <sys/rwlock.h>
43#include <sys/socket.h>
44#include <sys/sockopt.h>
45
46#include <net/if.h>
47
48#include <netinet/in.h>
49#include <netinet/ip.h>
50#include <netinet/ip_var.h>
51#include <netinet/ip_fw.h>
52#include <netinet6/ip_fw_nat64.h>
53
54#include <netpfil/ipfw/ip_fw_private.h>
55
56#include "nat64lsn.h"
57
58VNET_DEFINE(uint16_t, nat64lsn_eid) = 0;
59
60static struct nat64lsn_cfg *
61nat64lsn_find(struct namedobj_instance *ni, const char *name, uint8_t set)
62{
63	struct nat64lsn_cfg *cfg;
64
65	cfg = (struct nat64lsn_cfg *)ipfw_objhash_lookup_name_type(ni, set,
66	    IPFW_TLV_NAT64LSN_NAME, name);
67
68	return (cfg);
69}
70
71static void
72nat64lsn_default_config(ipfw_nat64lsn_cfg *uc)
73{
74
75	if (uc->jmaxlen == 0)
76		uc->jmaxlen = NAT64LSN_JMAXLEN;
77	if (uc->jmaxlen > 65536)
78		uc->jmaxlen = 65536;
79	if (uc->nh_delete_delay == 0)
80		uc->nh_delete_delay = NAT64LSN_HOST_AGE;
81	if (uc->pg_delete_delay == 0)
82		uc->pg_delete_delay = NAT64LSN_PG_AGE;
83	if (uc->st_syn_ttl == 0)
84		uc->st_syn_ttl = NAT64LSN_TCP_SYN_AGE;
85	if (uc->st_close_ttl == 0)
86		uc->st_close_ttl = NAT64LSN_TCP_FIN_AGE;
87	if (uc->st_estab_ttl == 0)
88		uc->st_estab_ttl = NAT64LSN_TCP_EST_AGE;
89	if (uc->st_udp_ttl == 0)
90		uc->st_udp_ttl = NAT64LSN_UDP_AGE;
91	if (uc->st_icmp_ttl == 0)
92		uc->st_icmp_ttl = NAT64LSN_ICMP_AGE;
93
94	if (uc->states_chunks == 0)
95		uc->states_chunks = 1;
96	else if (uc->states_chunks >= 128)
97		uc->states_chunks = 128;
98	else if (!powerof2(uc->states_chunks))
99		uc->states_chunks = 1 << fls(uc->states_chunks);
100}
101
102/*
103 * Creates new nat64lsn instance.
104 * Data layout (v0)(current):
105 * Request: [ ipfw_obj_lheader ipfw_nat64lsn_cfg ]
106 *
107 * Returns 0 on success
108 */
109static int
110nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
111    struct sockopt_data *sd)
112{
113	ipfw_obj_lheader *olh;
114	ipfw_nat64lsn_cfg *uc;
115	struct nat64lsn_cfg *cfg;
116	struct namedobj_instance *ni;
117	uint32_t addr4, mask4;
118
119	if (sd->valsize != sizeof(*olh) + sizeof(*uc))
120		return (EINVAL);
121
122	olh = (ipfw_obj_lheader *)sd->kbuf;
123	uc = (ipfw_nat64lsn_cfg *)(olh + 1);
124
125	if (ipfw_check_object_name_generic(uc->name) != 0)
126		return (EINVAL);
127
128	if (uc->set >= IPFW_MAX_SETS)
129		return (EINVAL);
130
131	if (uc->plen4 > 32)
132		return (EINVAL);
133
134	/*
135	 * Unspecified address has special meaning. But it must
136	 * have valid prefix length. This length will be used to
137	 * correctly extract and embedd IPv4 address into IPv6.
138	 */
139	if (nat64_check_prefix6(&uc->prefix6, uc->plen6) != 0 &&
140	    IN6_IS_ADDR_UNSPECIFIED(&uc->prefix6) &&
141	    nat64_check_prefixlen(uc->plen6) != 0)
142		return (EINVAL);
143
144	/* XXX: Check prefix4 to be global */
145	addr4 = ntohl(uc->prefix4.s_addr);
146	mask4 = ~((1 << (32 - uc->plen4)) - 1);
147	if ((addr4 & mask4) != addr4)
148		return (EINVAL);
149
150	nat64lsn_default_config(uc);
151
152	ni = CHAIN_TO_SRV(ch);
153	IPFW_UH_RLOCK(ch);
154	if (nat64lsn_find(ni, uc->name, uc->set) != NULL) {
155		IPFW_UH_RUNLOCK(ch);
156		return (EEXIST);
157	}
158	IPFW_UH_RUNLOCK(ch);
159
160	cfg = nat64lsn_init_instance(ch, addr4, uc->plen4);
161	strlcpy(cfg->name, uc->name, sizeof(cfg->name));
162	cfg->no.name = cfg->name;
163	cfg->no.etlv = IPFW_TLV_NAT64LSN_NAME;
164	cfg->no.set = uc->set;
165
166	cfg->base.plat_prefix = uc->prefix6;
167	cfg->base.plat_plen = uc->plen6;
168	cfg->base.flags = (uc->flags & NAT64LSN_FLAGSMASK) | NAT64_PLATPFX;
169	if (IN6_IS_ADDR_WKPFX(&cfg->base.plat_prefix))
170		cfg->base.flags |= NAT64_WKPFX;
171	else if (IN6_IS_ADDR_UNSPECIFIED(&cfg->base.plat_prefix))
172		cfg->base.flags |= NAT64LSN_ANYPREFIX;
173
174	cfg->states_chunks = uc->states_chunks;
175	cfg->jmaxlen = uc->jmaxlen;
176	cfg->host_delete_delay = uc->nh_delete_delay;
177	cfg->pg_delete_delay = uc->pg_delete_delay;
178	cfg->st_syn_ttl = uc->st_syn_ttl;
179	cfg->st_close_ttl = uc->st_close_ttl;
180	cfg->st_estab_ttl = uc->st_estab_ttl;
181	cfg->st_udp_ttl = uc->st_udp_ttl;
182	cfg->st_icmp_ttl = uc->st_icmp_ttl;
183
184	cfg->nomatch_verdict = IP_FW_DENY;
185
186	IPFW_UH_WLOCK(ch);
187
188	if (nat64lsn_find(ni, uc->name, uc->set) != NULL) {
189		IPFW_UH_WUNLOCK(ch);
190		nat64lsn_destroy_instance(cfg);
191		return (EEXIST);
192	}
193
194	if (ipfw_objhash_alloc_idx(CHAIN_TO_SRV(ch), &cfg->no.kidx) != 0) {
195		IPFW_UH_WUNLOCK(ch);
196		nat64lsn_destroy_instance(cfg);
197		return (ENOSPC);
198	}
199	ipfw_objhash_add(CHAIN_TO_SRV(ch), &cfg->no);
200
201	/* Okay, let's link data */
202	SRV_OBJECT(ch, cfg->no.kidx) = cfg;
203	nat64lsn_start_instance(cfg);
204
205	IPFW_UH_WUNLOCK(ch);
206	return (0);
207}
208
209static void
210nat64lsn_detach_config(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg)
211{
212
213	IPFW_UH_WLOCK_ASSERT(ch);
214
215	ipfw_objhash_del(CHAIN_TO_SRV(ch), &cfg->no);
216	ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), cfg->no.kidx);
217}
218
219/*
220 * Destroys nat64 instance.
221 * Data layout (v0)(current):
222 * Request: [ ipfw_obj_header ]
223 *
224 * Returns 0 on success
225 */
226static int
227nat64lsn_destroy(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
228    struct sockopt_data *sd)
229{
230	struct nat64lsn_cfg *cfg;
231	ipfw_obj_header *oh;
232
233	if (sd->valsize != sizeof(*oh))
234		return (EINVAL);
235
236	oh = (ipfw_obj_header *)op3;
237
238	IPFW_UH_WLOCK(ch);
239	cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
240	if (cfg == NULL) {
241		IPFW_UH_WUNLOCK(ch);
242		return (ENOENT);
243	}
244
245	if (cfg->no.refcnt > 0) {
246		IPFW_UH_WUNLOCK(ch);
247		return (EBUSY);
248	}
249
250	ipfw_reset_eaction_instance(ch, V_nat64lsn_eid, cfg->no.kidx);
251	SRV_OBJECT(ch, cfg->no.kidx) = NULL;
252	nat64lsn_detach_config(ch, cfg);
253	IPFW_UH_WUNLOCK(ch);
254
255	nat64lsn_destroy_instance(cfg);
256	return (0);
257}
258
259#define	__COPY_STAT_FIELD(_cfg, _stats, _field)	\
260	(_stats)->_field = NAT64STAT_FETCH(&(_cfg)->base.stats, _field)
261static void
262export_stats(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg,
263    struct ipfw_nat64lsn_stats *stats)
264{
265	struct nat64lsn_alias *alias;
266	int i, j;
267
268	__COPY_STAT_FIELD(cfg, stats, opcnt64);
269	__COPY_STAT_FIELD(cfg, stats, opcnt46);
270	__COPY_STAT_FIELD(cfg, stats, ofrags);
271	__COPY_STAT_FIELD(cfg, stats, ifrags);
272	__COPY_STAT_FIELD(cfg, stats, oerrors);
273	__COPY_STAT_FIELD(cfg, stats, noroute4);
274	__COPY_STAT_FIELD(cfg, stats, noroute6);
275	__COPY_STAT_FIELD(cfg, stats, nomatch4);
276	__COPY_STAT_FIELD(cfg, stats, noproto);
277	__COPY_STAT_FIELD(cfg, stats, nomem);
278	__COPY_STAT_FIELD(cfg, stats, dropped);
279
280	__COPY_STAT_FIELD(cfg, stats, jcalls);
281	__COPY_STAT_FIELD(cfg, stats, jrequests);
282	__COPY_STAT_FIELD(cfg, stats, jhostsreq);
283	__COPY_STAT_FIELD(cfg, stats, jportreq);
284	__COPY_STAT_FIELD(cfg, stats, jhostfails);
285	__COPY_STAT_FIELD(cfg, stats, jportfails);
286	__COPY_STAT_FIELD(cfg, stats, jmaxlen);
287	__COPY_STAT_FIELD(cfg, stats, jnomem);
288	__COPY_STAT_FIELD(cfg, stats, jreinjected);
289	__COPY_STAT_FIELD(cfg, stats, screated);
290	__COPY_STAT_FIELD(cfg, stats, sdeleted);
291	__COPY_STAT_FIELD(cfg, stats, spgcreated);
292	__COPY_STAT_FIELD(cfg, stats, spgdeleted);
293
294	stats->hostcount = cfg->hosts_count;
295	for (i = 0; i < (1 << (32 - cfg->plen4)); i++) {
296		alias = &cfg->aliases[i];
297		for (j = 0; j < 32 && ISSET32(alias->tcp_chunkmask, j); j++)
298			stats->tcpchunks += bitcount32(alias->tcp_pgmask[j]);
299		for (j = 0; j < 32 && ISSET32(alias->udp_chunkmask, j); j++)
300			stats->udpchunks += bitcount32(alias->udp_pgmask[j]);
301		for (j = 0; j < 32 && ISSET32(alias->icmp_chunkmask, j); j++)
302			stats->icmpchunks += bitcount32(alias->icmp_pgmask[j]);
303	}
304}
305#undef	__COPY_STAT_FIELD
306
307static void
308nat64lsn_export_config(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg,
309    ipfw_nat64lsn_cfg *uc)
310{
311
312	uc->flags = cfg->base.flags & NAT64LSN_FLAGSMASK;
313	uc->states_chunks = cfg->states_chunks;
314	uc->jmaxlen = cfg->jmaxlen;
315	uc->nh_delete_delay = cfg->host_delete_delay;
316	uc->pg_delete_delay = cfg->pg_delete_delay;
317	uc->st_syn_ttl = cfg->st_syn_ttl;
318	uc->st_close_ttl = cfg->st_close_ttl;
319	uc->st_estab_ttl = cfg->st_estab_ttl;
320	uc->st_udp_ttl = cfg->st_udp_ttl;
321	uc->st_icmp_ttl = cfg->st_icmp_ttl;
322	uc->prefix4.s_addr = htonl(cfg->prefix4);
323	uc->prefix6 = cfg->base.plat_prefix;
324	uc->plen4 = cfg->plen4;
325	uc->plen6 = cfg->base.plat_plen;
326	uc->set = cfg->no.set;
327	strlcpy(uc->name, cfg->no.name, sizeof(uc->name));
328}
329
330struct nat64_dump_arg {
331	struct ip_fw_chain *ch;
332	struct sockopt_data *sd;
333};
334
335static int
336export_config_cb(struct namedobj_instance *ni, struct named_object *no,
337    void *arg)
338{
339	struct nat64_dump_arg *da = (struct nat64_dump_arg *)arg;
340	ipfw_nat64lsn_cfg *uc;
341
342	uc = (struct _ipfw_nat64lsn_cfg *)ipfw_get_sopt_space(da->sd,
343	    sizeof(*uc));
344	nat64lsn_export_config(da->ch, (struct nat64lsn_cfg *)no, uc);
345	return (0);
346}
347
348/*
349 * Lists all nat64 lsn instances currently available in kernel.
350 * Data layout (v0)(current):
351 * Request: [ ipfw_obj_lheader ]
352 * Reply: [ ipfw_obj_lheader ipfw_nat64lsn_cfg x N ]
353 *
354 * Returns 0 on success
355 */
356static int
357nat64lsn_list(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
358    struct sockopt_data *sd)
359{
360	ipfw_obj_lheader *olh;
361	struct nat64_dump_arg da;
362
363	/* Check minimum header size */
364	if (sd->valsize < sizeof(ipfw_obj_lheader))
365		return (EINVAL);
366
367	olh = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*olh));
368
369	IPFW_UH_RLOCK(ch);
370	olh->count = ipfw_objhash_count_type(CHAIN_TO_SRV(ch),
371	    IPFW_TLV_NAT64LSN_NAME);
372	olh->objsize = sizeof(ipfw_nat64lsn_cfg);
373	olh->size = sizeof(*olh) + olh->count * olh->objsize;
374
375	if (sd->valsize < olh->size) {
376		IPFW_UH_RUNLOCK(ch);
377		return (ENOMEM);
378	}
379	memset(&da, 0, sizeof(da));
380	da.ch = ch;
381	da.sd = sd;
382	ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), export_config_cb, &da,
383	    IPFW_TLV_NAT64LSN_NAME);
384	IPFW_UH_RUNLOCK(ch);
385
386	return (0);
387}
388
389/*
390 * Change existing nat64lsn instance configuration.
391 * Data layout (v0)(current):
392 * Request: [ ipfw_obj_header ipfw_nat64lsn_cfg ]
393 * Reply: [ ipfw_obj_header ipfw_nat64lsn_cfg ]
394 *
395 * Returns 0 on success
396 */
397static int
398nat64lsn_config(struct ip_fw_chain *ch, ip_fw3_opheader *op,
399    struct sockopt_data *sd)
400{
401	ipfw_obj_header *oh;
402	ipfw_nat64lsn_cfg *uc;
403	struct nat64lsn_cfg *cfg;
404	struct namedobj_instance *ni;
405
406	if (sd->valsize != sizeof(*oh) + sizeof(*uc))
407		return (EINVAL);
408
409	oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd,
410	    sizeof(*oh) + sizeof(*uc));
411	uc = (ipfw_nat64lsn_cfg *)(oh + 1);
412
413	if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
414	    oh->ntlv.set >= IPFW_MAX_SETS)
415		return (EINVAL);
416
417	ni = CHAIN_TO_SRV(ch);
418	if (sd->sopt->sopt_dir == SOPT_GET) {
419		IPFW_UH_RLOCK(ch);
420		cfg = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set);
421		if (cfg == NULL) {
422			IPFW_UH_RUNLOCK(ch);
423			return (ENOENT);
424		}
425		nat64lsn_export_config(ch, cfg, uc);
426		IPFW_UH_RUNLOCK(ch);
427		return (0);
428	}
429
430	nat64lsn_default_config(uc);
431
432	IPFW_UH_WLOCK(ch);
433	cfg = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set);
434	if (cfg == NULL) {
435		IPFW_UH_WUNLOCK(ch);
436		return (ENOENT);
437	}
438
439	/*
440	 * For now allow to change only following values:
441	 *  jmaxlen, nh_del_age, pg_del_age, tcp_syn_age, tcp_close_age,
442	 *  tcp_est_age, udp_age, icmp_age, flags, states_chunks.
443	 */
444
445	cfg->states_chunks = uc->states_chunks;
446	cfg->jmaxlen = uc->jmaxlen;
447	cfg->host_delete_delay = uc->nh_delete_delay;
448	cfg->pg_delete_delay = uc->pg_delete_delay;
449	cfg->st_syn_ttl = uc->st_syn_ttl;
450	cfg->st_close_ttl = uc->st_close_ttl;
451	cfg->st_estab_ttl = uc->st_estab_ttl;
452	cfg->st_udp_ttl = uc->st_udp_ttl;
453	cfg->st_icmp_ttl = uc->st_icmp_ttl;
454	cfg->base.flags &= ~NAT64LSN_FLAGSMASK;
455	cfg->base.flags |= uc->flags & NAT64LSN_FLAGSMASK;
456
457	IPFW_UH_WUNLOCK(ch);
458
459	return (0);
460}
461
462/*
463 * Get nat64lsn statistics.
464 * Data layout (v0)(current):
465 * Request: [ ipfw_obj_header ]
466 * Reply: [ ipfw_obj_header ipfw_counter_tlv ]
467 *
468 * Returns 0 on success
469 */
470static int
471nat64lsn_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
472    struct sockopt_data *sd)
473{
474	struct ipfw_nat64lsn_stats stats;
475	struct nat64lsn_cfg *cfg;
476	ipfw_obj_header *oh;
477	ipfw_obj_ctlv *ctlv;
478	size_t sz;
479
480	sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_ctlv) + sizeof(stats);
481	if (sd->valsize % sizeof(uint64_t))
482		return (EINVAL);
483	if (sd->valsize < sz)
484		return (ENOMEM);
485	oh = (ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
486	if (oh == NULL)
487		return (EINVAL);
488	memset(&stats, 0, sizeof(stats));
489
490	IPFW_UH_RLOCK(ch);
491	cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
492	if (cfg == NULL) {
493		IPFW_UH_RUNLOCK(ch);
494		return (ENOENT);
495	}
496
497	export_stats(ch, cfg, &stats);
498	IPFW_UH_RUNLOCK(ch);
499
500	ctlv = (ipfw_obj_ctlv *)(oh + 1);
501	memset(ctlv, 0, sizeof(*ctlv));
502	ctlv->head.type = IPFW_TLV_COUNTERS;
503	ctlv->head.length = sz - sizeof(ipfw_obj_header);
504	ctlv->count = sizeof(stats) / sizeof(uint64_t);
505	ctlv->objsize = sizeof(uint64_t);
506	ctlv->version = IPFW_NAT64_VERSION;
507	memcpy(ctlv + 1, &stats, sizeof(stats));
508	return (0);
509}
510
511/*
512 * Reset nat64lsn statistics.
513 * Data layout (v0)(current):
514 * Request: [ ipfw_obj_header ]
515 *
516 * Returns 0 on success
517 */
518static int
519nat64lsn_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
520    struct sockopt_data *sd)
521{
522	struct nat64lsn_cfg *cfg;
523	ipfw_obj_header *oh;
524
525	if (sd->valsize != sizeof(*oh))
526		return (EINVAL);
527	oh = (ipfw_obj_header *)sd->kbuf;
528	if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
529	    oh->ntlv.set >= IPFW_MAX_SETS)
530		return (EINVAL);
531
532	IPFW_UH_WLOCK(ch);
533	cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
534	if (cfg == NULL) {
535		IPFW_UH_WUNLOCK(ch);
536		return (ENOENT);
537	}
538	COUNTER_ARRAY_ZERO(cfg->base.stats.cnt, NAT64STATS);
539	IPFW_UH_WUNLOCK(ch);
540	return (0);
541}
542
543#ifdef __LP64__
544#define	FREEMASK_COPY(pg, n, out)	(out) = *FREEMASK_CHUNK((pg), (n))
545#else
546#define	FREEMASK_COPY(pg, n, out)	(out) = *FREEMASK_CHUNK((pg), (n)) | \
547    ((uint64_t)*(FREEMASK_CHUNK((pg), (n)) + 1) << 32)
548#endif
549/*
550 * Reply: [ ipfw_obj_header ipfw_obj_data [ ipfw_nat64lsn_stg
551 *	ipfw_nat64lsn_state x count, ... ] ]
552 */
553static int
554nat64lsn_export_states_v1(struct nat64lsn_cfg *cfg, union nat64lsn_pgidx *idx,
555    struct nat64lsn_pg *pg, struct sockopt_data *sd, uint32_t *ret_count)
556{
557	ipfw_nat64lsn_state_v1 *s;
558	struct nat64lsn_state *state;
559	uint64_t freemask;
560	uint32_t i, count;
561
562	/* validate user input */
563	if (idx->chunk > pg->chunks_count - 1)
564		return (EINVAL);
565
566	FREEMASK_COPY(pg, idx->chunk, freemask);
567	count = 64 - bitcount64(freemask);
568	if (count == 0)
569		return (0);	/* Try next PG/chunk */
570
571	DPRINTF(DP_STATE, "EXPORT PG 0x%16jx, count %d",
572	    (uintmax_t)idx->index, count);
573
574	s = (ipfw_nat64lsn_state_v1 *)ipfw_get_sopt_space(sd,
575	    count * sizeof(ipfw_nat64lsn_state_v1));
576	if (s == NULL)
577		return (ENOMEM);
578
579	for (i = 0; i < 64; i++) {
580		if (ISSET64(freemask, i))
581			continue;
582		state = pg->chunks_count == 1 ? &pg->states->state[i] :
583		    &pg->states_chunk[idx->chunk]->state[i];
584
585		s->host6 = state->host->addr;
586		s->daddr.s_addr = htonl(state->ip_dst);
587		s->dport = state->dport;
588		s->sport = state->sport;
589		s->aport = state->aport;
590		s->flags = (uint8_t)(state->flags & 7);
591		s->proto = state->proto;
592		s->idle = GET_AGE(state->timestamp);
593		s++;
594	}
595	*ret_count = count;
596	return (0);
597}
598
599#define	LAST_IDX	0xFF
600static int
601nat64lsn_next_pgidx(struct nat64lsn_cfg *cfg, struct nat64lsn_pg *pg,
602    union nat64lsn_pgidx *idx)
603{
604
605	/* First iterate over chunks */
606	if (pg != NULL) {
607		if (idx->chunk < pg->chunks_count - 1) {
608			idx->chunk++;
609			return (0);
610		}
611	}
612	idx->chunk = 0;
613	/* Then over PGs */
614	if (idx->port < UINT16_MAX - 64) {
615		idx->port += 64;
616		return (0);
617	}
618	idx->port = NAT64_MIN_PORT;
619	/* Then over supported protocols */
620	switch (idx->proto) {
621	case IPPROTO_ICMP:
622		idx->proto = IPPROTO_TCP;
623		return (0);
624	case IPPROTO_TCP:
625		idx->proto = IPPROTO_UDP;
626		return (0);
627	default:
628		idx->proto = IPPROTO_ICMP;
629	}
630	/* And then over IPv4 alias addresses */
631	if (idx->addr < cfg->pmask4) {
632		idx->addr++;
633		return (1);	/* New states group is needed */
634	}
635	idx->index = LAST_IDX;
636	return (-1);		/* No more states */
637}
638
639static struct nat64lsn_pg*
640nat64lsn_get_pg_byidx(struct nat64lsn_cfg *cfg, union nat64lsn_pgidx *idx)
641{
642	struct nat64lsn_alias *alias;
643	int pg_idx;
644
645	alias = &cfg->aliases[idx->addr & ((1 << (32 - cfg->plen4)) - 1)];
646	MPASS(alias->addr == idx->addr);
647
648	pg_idx = (idx->port - NAT64_MIN_PORT) / 64;
649	switch (idx->proto) {
650	case IPPROTO_ICMP:
651		if (ISSET32(alias->icmp_pgmask[pg_idx / 32], pg_idx % 32))
652			return (alias->icmp[pg_idx / 32]->pgptr[pg_idx % 32]);
653		break;
654	case IPPROTO_TCP:
655		if (ISSET32(alias->tcp_pgmask[pg_idx / 32], pg_idx % 32))
656			return (alias->tcp[pg_idx / 32]->pgptr[pg_idx % 32]);
657		break;
658	case IPPROTO_UDP:
659		if (ISSET32(alias->udp_pgmask[pg_idx / 32], pg_idx % 32))
660			return (alias->udp[pg_idx / 32]->pgptr[pg_idx % 32]);
661		break;
662	}
663	return (NULL);
664}
665
666/*
667 * Lists nat64lsn states.
668 * Data layout (v0):
669 * Request: [ ipfw_obj_header ipfw_obj_data [ uint64_t ]]
670 * Reply: [ ipfw_obj_header ipfw_obj_data [
671 *		ipfw_nat64lsn_stg ipfw_nat64lsn_state x N] ]
672 *
673 * Returns 0 on success
674 */
675static int
676nat64lsn_states_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
677    struct sockopt_data *sd)
678{
679
680	/* TODO: implement states listing for old ipfw(8) binaries  */
681	return (EOPNOTSUPP);
682}
683
684/*
685 * Lists nat64lsn states.
686 * Data layout (v1)(current):
687 * Request: [ ipfw_obj_header ipfw_obj_data [ uint64_t ]]
688 * Reply: [ ipfw_obj_header ipfw_obj_data [
689 *		ipfw_nat64lsn_stg_v1 ipfw_nat64lsn_state_v1 x N] ]
690 *
691 * Returns 0 on success
692 */
693static int
694nat64lsn_states_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
695    struct sockopt_data *sd)
696{
697	ipfw_obj_header *oh;
698	ipfw_obj_data *od;
699	ipfw_nat64lsn_stg_v1 *stg;
700	struct nat64lsn_cfg *cfg;
701	struct nat64lsn_pg *pg;
702	union nat64lsn_pgidx idx;
703	size_t sz;
704	uint32_t count, total;
705	int ret;
706
707	sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) +
708	    sizeof(uint64_t);
709	/* Check minimum header size */
710	if (sd->valsize < sz)
711		return (EINVAL);
712
713	oh = (ipfw_obj_header *)sd->kbuf;
714	od = (ipfw_obj_data *)(oh + 1);
715	if (od->head.type != IPFW_TLV_OBJDATA ||
716	    od->head.length != sz - sizeof(ipfw_obj_header))
717		return (EINVAL);
718
719	idx.index = *(uint64_t *)(od + 1);
720	if (idx.index != 0 && idx.proto != IPPROTO_ICMP &&
721	    idx.proto != IPPROTO_TCP && idx.proto != IPPROTO_UDP)
722		return (EINVAL);
723	if (idx.index == LAST_IDX)
724		return (EINVAL);
725
726	IPFW_UH_RLOCK(ch);
727	cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
728	if (cfg == NULL) {
729		IPFW_UH_RUNLOCK(ch);
730		return (ENOENT);
731	}
732	if (idx.index == 0) {	/* Fill in starting point */
733		idx.addr = cfg->prefix4;
734		idx.proto = IPPROTO_ICMP;
735		idx.port = NAT64_MIN_PORT;
736	}
737	if (idx.addr < cfg->prefix4 || idx.addr > cfg->pmask4 ||
738	    idx.port < NAT64_MIN_PORT) {
739		IPFW_UH_RUNLOCK(ch);
740		return (EINVAL);
741	}
742	sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) +
743	    sizeof(ipfw_nat64lsn_stg_v1);
744	if (sd->valsize < sz) {
745		IPFW_UH_RUNLOCK(ch);
746		return (ENOMEM);
747	}
748	oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd, sz);
749	od = (ipfw_obj_data *)(oh + 1);
750	od->head.type = IPFW_TLV_OBJDATA;
751	od->head.length = sz - sizeof(ipfw_obj_header);
752	stg = (ipfw_nat64lsn_stg_v1 *)(od + 1);
753	stg->count = total = 0;
754	stg->next.index = idx.index;
755	/*
756	 * Acquire CALLOUT_LOCK to avoid races with expiration code.
757	 * Thus states, hosts and PGs will not expire while we hold it.
758	 */
759	CALLOUT_LOCK(cfg);
760	ret = 0;
761	do {
762		pg = nat64lsn_get_pg_byidx(cfg, &idx);
763		if (pg != NULL) {
764			count = 0;
765			ret = nat64lsn_export_states_v1(cfg, &idx, pg,
766			    sd, &count);
767			if (ret != 0)
768				break;
769			if (count > 0) {
770				stg->count += count;
771				total += count;
772				/* Update total size of reply */
773				od->head.length +=
774				    count * sizeof(ipfw_nat64lsn_state_v1);
775				sz += count * sizeof(ipfw_nat64lsn_state_v1);
776			}
777			stg->alias4.s_addr = htonl(idx.addr);
778		}
779		/* Determine new index */
780		switch (nat64lsn_next_pgidx(cfg, pg, &idx)) {
781		case -1:
782			ret = ENOENT; /* End of search */
783			break;
784		case 1: /*
785			 * Next alias address, new group may be needed.
786			 * If states count is zero, use this group.
787			 */
788			if (stg->count == 0)
789				continue;
790			/* Otherwise try to create new group */
791			sz += sizeof(ipfw_nat64lsn_stg_v1);
792			if (sd->valsize < sz) {
793				ret = ENOMEM;
794				break;
795			}
796			/* Save next index in current group */
797			stg->next.index = idx.index;
798			stg = (ipfw_nat64lsn_stg_v1 *)ipfw_get_sopt_space(sd,
799			    sizeof(ipfw_nat64lsn_stg_v1));
800			od->head.length += sizeof(ipfw_nat64lsn_stg_v1);
801			stg->count = 0;
802			break;
803		}
804		stg->next.index = idx.index;
805	} while (ret == 0);
806	CALLOUT_UNLOCK(cfg);
807	IPFW_UH_RUNLOCK(ch);
808	return ((total > 0 || idx.index == LAST_IDX) ? 0: ret);
809}
810
811static struct ipfw_sopt_handler	scodes[] = {
812	{ IP_FW_NAT64LSN_CREATE, 0,	HDIR_BOTH,	nat64lsn_create },
813	{ IP_FW_NAT64LSN_DESTROY,0,	HDIR_SET,	nat64lsn_destroy },
814	{ IP_FW_NAT64LSN_CONFIG, 0,	HDIR_BOTH,	nat64lsn_config },
815	{ IP_FW_NAT64LSN_LIST,	 0,	HDIR_GET,	nat64lsn_list },
816	{ IP_FW_NAT64LSN_STATS,	 0,	HDIR_GET,	nat64lsn_stats },
817	{ IP_FW_NAT64LSN_RESET_STATS,0,	HDIR_SET,	nat64lsn_reset_stats },
818	{ IP_FW_NAT64LSN_LIST_STATES,0,	HDIR_GET,	nat64lsn_states_v0 },
819	{ IP_FW_NAT64LSN_LIST_STATES,1,	HDIR_GET,	nat64lsn_states_v1 },
820};
821
822static int
823nat64lsn_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
824{
825	ipfw_insn *icmd;
826
827	icmd = cmd - 1;
828	if (icmd->opcode != O_EXTERNAL_ACTION ||
829	    icmd->arg1 != V_nat64lsn_eid)
830		return (1);
831
832	*puidx = cmd->arg1;
833	*ptype = 0;
834	return (0);
835}
836
837static void
838nat64lsn_update_arg1(ipfw_insn *cmd, uint16_t idx)
839{
840
841	cmd->arg1 = idx;
842}
843
844static int
845nat64lsn_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
846    struct named_object **pno)
847{
848	int err;
849
850	err = ipfw_objhash_find_type(CHAIN_TO_SRV(ch), ti,
851	    IPFW_TLV_NAT64LSN_NAME, pno);
852	return (err);
853}
854
855static struct named_object *
856nat64lsn_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
857{
858	struct namedobj_instance *ni;
859	struct named_object *no;
860
861	IPFW_UH_WLOCK_ASSERT(ch);
862	ni = CHAIN_TO_SRV(ch);
863	no = ipfw_objhash_lookup_kidx(ni, idx);
864	KASSERT(no != NULL, ("NAT64LSN with index %d not found", idx));
865
866	return (no);
867}
868
869static int
870nat64lsn_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set,
871    enum ipfw_sets_cmd cmd)
872{
873
874	return (ipfw_obj_manage_sets(CHAIN_TO_SRV(ch), IPFW_TLV_NAT64LSN_NAME,
875	    set, new_set, cmd));
876}
877
878static struct opcode_obj_rewrite opcodes[] = {
879	{
880		.opcode = O_EXTERNAL_INSTANCE,
881		.etlv = IPFW_TLV_EACTION /* just show it isn't table */,
882		.classifier = nat64lsn_classify,
883		.update = nat64lsn_update_arg1,
884		.find_byname = nat64lsn_findbyname,
885		.find_bykidx = nat64lsn_findbykidx,
886		.manage_sets = nat64lsn_manage_sets,
887	},
888};
889
890static int
891destroy_config_cb(struct namedobj_instance *ni, struct named_object *no,
892    void *arg)
893{
894	struct nat64lsn_cfg *cfg;
895	struct ip_fw_chain *ch;
896
897	ch = (struct ip_fw_chain *)arg;
898	cfg = (struct nat64lsn_cfg *)SRV_OBJECT(ch, no->kidx);
899	SRV_OBJECT(ch, no->kidx) = NULL;
900	nat64lsn_detach_config(ch, cfg);
901	nat64lsn_destroy_instance(cfg);
902	return (0);
903}
904
905int
906nat64lsn_init(struct ip_fw_chain *ch, int first)
907{
908
909	if (first != 0)
910		nat64lsn_init_internal();
911	V_nat64lsn_eid = ipfw_add_eaction(ch, ipfw_nat64lsn, "nat64lsn");
912	if (V_nat64lsn_eid == 0)
913		return (ENXIO);
914	IPFW_ADD_SOPT_HANDLER(first, scodes);
915	IPFW_ADD_OBJ_REWRITER(first, opcodes);
916	return (0);
917}
918
919void
920nat64lsn_uninit(struct ip_fw_chain *ch, int last)
921{
922
923	IPFW_DEL_OBJ_REWRITER(last, opcodes);
924	IPFW_DEL_SOPT_HANDLER(last, scodes);
925	ipfw_del_eaction(ch, V_nat64lsn_eid);
926	/*
927	 * Since we already have deregistered external action,
928	 * our named objects become unaccessible via rules, because
929	 * all rules were truncated by ipfw_del_eaction().
930	 * So, we can unlink and destroy our named objects without holding
931	 * IPFW_WLOCK().
932	 */
933	IPFW_UH_WLOCK(ch);
934	ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), destroy_config_cb, ch,
935	    IPFW_TLV_NAT64LSN_NAME);
936	V_nat64lsn_eid = 0;
937	IPFW_UH_WUNLOCK(ch);
938	if (last != 0)
939		nat64lsn_uninit_internal();
940}
941