1/*-
2 * Copyright (c) 2016 Yandex LLC
3 * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include <sys/param.h>
29#include <sys/systm.h>
30#include <sys/counter.h>
31#include <sys/eventhandler.h>
32#include <sys/errno.h>
33#include <sys/kernel.h>
34#include <sys/lock.h>
35#include <sys/malloc.h>
36#include <sys/mbuf.h>
37#include <sys/module.h>
38#include <sys/rmlock.h>
39#include <sys/rwlock.h>
40#include <sys/socket.h>
41#include <sys/queue.h>
42#include <sys/syslog.h>
43#include <sys/sysctl.h>
44
45#include <net/if.h>
46#include <net/if_var.h>
47#include <net/if_private.h>
48#include <net/netisr.h>
49#include <net/pfil.h>
50#include <net/vnet.h>
51
52#include <netinet/in.h>
53#include <netinet/ip_var.h>
54#include <netinet/ip_fw.h>
55#include <netinet/ip6.h>
56#include <netinet/icmp6.h>
57#include <netinet6/in6_var.h>
58#include <netinet6/ip6_var.h>
59
60#include <netpfil/ipfw/ip_fw_private.h>
61#include <netpfil/ipfw/nptv6/nptv6.h>
62
63VNET_DEFINE_STATIC(uint16_t, nptv6_eid) = 0;
64#define	V_nptv6_eid	VNET(nptv6_eid)
65#define	IPFW_TLV_NPTV6_NAME	IPFW_TLV_EACTION_NAME(V_nptv6_eid)
66
67static eventhandler_tag nptv6_ifaddr_event;
68
69static struct nptv6_cfg *nptv6_alloc_config(const char *name, uint8_t set);
70static void nptv6_free_config(struct nptv6_cfg *cfg);
71static struct nptv6_cfg *nptv6_find(struct namedobj_instance *ni,
72    const char *name, uint8_t set);
73static int nptv6_rewrite_internal(struct nptv6_cfg *cfg, struct mbuf **mp,
74    int offset);
75static int nptv6_rewrite_external(struct nptv6_cfg *cfg, struct mbuf **mp,
76    int offset);
77
78#define	NPTV6_LOOKUP(chain, cmd)	\
79    (struct nptv6_cfg *)SRV_OBJECT((chain), (cmd)->arg1)
80
81#ifndef IN6_MASK_ADDR
82#define IN6_MASK_ADDR(a, m)	do { \
83	(a)->s6_addr32[0] &= (m)->s6_addr32[0]; \
84	(a)->s6_addr32[1] &= (m)->s6_addr32[1]; \
85	(a)->s6_addr32[2] &= (m)->s6_addr32[2]; \
86	(a)->s6_addr32[3] &= (m)->s6_addr32[3]; \
87} while (0)
88#endif
89#ifndef IN6_ARE_MASKED_ADDR_EQUAL
90#define IN6_ARE_MASKED_ADDR_EQUAL(d, a, m)	(	\
91	(((d)->s6_addr32[0] ^ (a)->s6_addr32[0]) & (m)->s6_addr32[0]) == 0 && \
92	(((d)->s6_addr32[1] ^ (a)->s6_addr32[1]) & (m)->s6_addr32[1]) == 0 && \
93	(((d)->s6_addr32[2] ^ (a)->s6_addr32[2]) & (m)->s6_addr32[2]) == 0 && \
94	(((d)->s6_addr32[3] ^ (a)->s6_addr32[3]) & (m)->s6_addr32[3]) == 0 )
95#endif
96
97#if 0
98#define	NPTV6_DEBUG(fmt, ...)	do {			\
99	printf("%s: " fmt "\n", __func__, ## __VA_ARGS__);	\
100} while (0)
101#define	NPTV6_IPDEBUG(fmt, ...)	do {			\
102	char _s[INET6_ADDRSTRLEN], _d[INET6_ADDRSTRLEN];	\
103	printf("%s: " fmt "\n", __func__, ## __VA_ARGS__);	\
104} while (0)
105#else
106#define	NPTV6_DEBUG(fmt, ...)
107#define	NPTV6_IPDEBUG(fmt, ...)
108#endif
109
110static int
111nptv6_getlasthdr(struct nptv6_cfg *cfg, struct mbuf *m, int *offset)
112{
113	struct ip6_hdr *ip6;
114	struct ip6_hbh *hbh;
115	int proto, hlen;
116
117	hlen = (offset == NULL) ? 0: *offset;
118	if (m->m_len < hlen)
119		return (-1);
120	ip6 = mtodo(m, hlen);
121	hlen += sizeof(*ip6);
122	proto = ip6->ip6_nxt;
123	while (proto == IPPROTO_HOPOPTS || proto == IPPROTO_ROUTING ||
124	    proto == IPPROTO_DSTOPTS) {
125		hbh = mtodo(m, hlen);
126		if (m->m_len < hlen)
127			return (-1);
128		proto = hbh->ip6h_nxt;
129		hlen += (hbh->ip6h_len + 1) << 3;
130	}
131	if (offset != NULL)
132		*offset = hlen;
133	return (proto);
134}
135
136static int
137nptv6_translate_icmpv6(struct nptv6_cfg *cfg, struct mbuf **mp, int offset)
138{
139	struct icmp6_hdr *icmp6;
140	struct ip6_hdr *ip6;
141	struct mbuf *m;
142
143	m = *mp;
144	if (offset > m->m_len)
145		return (-1);
146	icmp6 = mtodo(m, offset);
147	NPTV6_DEBUG("ICMPv6 type %d", icmp6->icmp6_type);
148	switch (icmp6->icmp6_type) {
149	case ICMP6_DST_UNREACH:
150	case ICMP6_PACKET_TOO_BIG:
151	case ICMP6_TIME_EXCEEDED:
152	case ICMP6_PARAM_PROB:
153		break;
154	case ICMP6_ECHO_REQUEST:
155	case ICMP6_ECHO_REPLY:
156		/* nothing to translate */
157		return (0);
158	default:
159		/*
160		 * XXX: We can add some checks to not translate NDP and MLD
161		 * messages. Currently user must explicitly allow these message
162		 * types, otherwise packets will be dropped.
163		 */
164		return (-1);
165	}
166	offset += sizeof(*icmp6);
167	if (offset + sizeof(*ip6) > m->m_pkthdr.len)
168		return (-1);
169	if (offset + sizeof(*ip6) > m->m_len)
170		*mp = m = m_pullup(m, offset + sizeof(*ip6));
171	if (m == NULL)
172		return (-1);
173	ip6 = mtodo(m, offset);
174	NPTV6_IPDEBUG("offset %d, %s -> %s %d", offset,
175	    inet_ntop(AF_INET6, &ip6->ip6_src, _s, sizeof(_s)),
176	    inet_ntop(AF_INET6, &ip6->ip6_dst, _d, sizeof(_d)),
177	    ip6->ip6_nxt);
178	if (IN6_ARE_MASKED_ADDR_EQUAL(&ip6->ip6_src,
179	    &cfg->external, &cfg->mask))
180		return (nptv6_rewrite_external(cfg, mp, offset));
181	else if (IN6_ARE_MASKED_ADDR_EQUAL(&ip6->ip6_dst,
182	    &cfg->internal, &cfg->mask))
183		return (nptv6_rewrite_internal(cfg, mp, offset));
184	/*
185	 * Addresses in the inner IPv6 header doesn't matched to
186	 * our prefixes.
187	 */
188	return (-1);
189}
190
191static int
192nptv6_search_index(struct nptv6_cfg *cfg, struct in6_addr *a)
193{
194	int idx;
195
196	if (cfg->flags & NPTV6_48PLEN)
197		return (3);
198
199	/* Search suitable word index for adjustment */
200	for (idx = 4; idx < 8; idx++)
201		if (a->s6_addr16[idx] != 0xffff)
202			break;
203	/*
204	 * RFC 6296 p3.7: If an NPTv6 Translator discovers a datagram with
205	 * an IID of all-zeros while performing address mapping, that
206	 * datagram MUST be dropped, and an ICMPv6 Parameter Problem error
207	 * SHOULD be generated.
208	 */
209	if (idx == 8 ||
210	    (a->s6_addr32[2] == 0 && a->s6_addr32[3] == 0))
211		return (-1);
212	return (idx);
213}
214
215static void
216nptv6_copy_addr(struct in6_addr *src, struct in6_addr *dst,
217    struct in6_addr *mask)
218{
219	int i;
220
221	for (i = 0; i < 8 && mask->s6_addr8[i] != 0; i++) {
222		dst->s6_addr8[i] &=  ~mask->s6_addr8[i];
223		dst->s6_addr8[i] |= src->s6_addr8[i] & mask->s6_addr8[i];
224	}
225}
226
227static int
228nptv6_rewrite_internal(struct nptv6_cfg *cfg, struct mbuf **mp, int offset)
229{
230	struct in6_addr *addr;
231	struct ip6_hdr *ip6;
232	int idx, proto;
233	uint16_t adj;
234
235	ip6 = mtodo(*mp, offset);
236	NPTV6_IPDEBUG("offset %d, %s -> %s %d", offset,
237	    inet_ntop(AF_INET6, &ip6->ip6_src, _s, sizeof(_s)),
238	    inet_ntop(AF_INET6, &ip6->ip6_dst, _d, sizeof(_d)),
239	    ip6->ip6_nxt);
240	if (offset == 0)
241		addr = &ip6->ip6_src;
242	else {
243		/*
244		 * When we rewriting inner IPv6 header, we need to rewrite
245		 * destination address back to external prefix. The datagram in
246		 * the ICMPv6 payload should looks like it was send from
247		 * external prefix.
248		 */
249		addr = &ip6->ip6_dst;
250	}
251	idx = nptv6_search_index(cfg, addr);
252	if (idx < 0) {
253		/*
254		 * Do not send ICMPv6 error when offset isn't zero.
255		 * This means we are rewriting inner IPv6 header in the
256		 * ICMPv6 error message.
257		 */
258		if (offset == 0) {
259			icmp6_error2(*mp, ICMP6_DST_UNREACH,
260			    ICMP6_DST_UNREACH_ADDR, 0, (*mp)->m_pkthdr.rcvif);
261			*mp = NULL;
262		}
263		return (IP_FW_DENY);
264	}
265	adj = addr->s6_addr16[idx];
266	nptv6_copy_addr(&cfg->external, addr, &cfg->mask);
267	adj = cksum_add(adj, cfg->adjustment);
268	if (adj == 0xffff)
269		adj = 0;
270	addr->s6_addr16[idx] = adj;
271	if (offset == 0) {
272		/*
273		 * We may need to translate addresses in the inner IPv6
274		 * header for ICMPv6 error messages.
275		 */
276		proto = nptv6_getlasthdr(cfg, *mp, &offset);
277		if (proto < 0 || (proto == IPPROTO_ICMPV6 &&
278		    nptv6_translate_icmpv6(cfg, mp, offset) != 0))
279			return (IP_FW_DENY);
280		NPTV6STAT_INC(cfg, in2ex);
281	}
282	return (0);
283}
284
285static int
286nptv6_rewrite_external(struct nptv6_cfg *cfg, struct mbuf **mp, int offset)
287{
288	struct in6_addr *addr;
289	struct ip6_hdr *ip6;
290	int idx, proto;
291	uint16_t adj;
292
293	ip6 = mtodo(*mp, offset);
294	NPTV6_IPDEBUG("offset %d, %s -> %s %d", offset,
295	    inet_ntop(AF_INET6, &ip6->ip6_src, _s, sizeof(_s)),
296	    inet_ntop(AF_INET6, &ip6->ip6_dst, _d, sizeof(_d)),
297	    ip6->ip6_nxt);
298	if (offset == 0)
299		addr = &ip6->ip6_dst;
300	else {
301		/*
302		 * When we rewriting inner IPv6 header, we need to rewrite
303		 * source address back to internal prefix. The datagram in
304		 * the ICMPv6 payload should looks like it was send from
305		 * internal prefix.
306		 */
307		addr = &ip6->ip6_src;
308	}
309	idx = nptv6_search_index(cfg, addr);
310	if (idx < 0) {
311		/*
312		 * Do not send ICMPv6 error when offset isn't zero.
313		 * This means we are rewriting inner IPv6 header in the
314		 * ICMPv6 error message.
315		 */
316		if (offset == 0) {
317			icmp6_error2(*mp, ICMP6_DST_UNREACH,
318			    ICMP6_DST_UNREACH_ADDR, 0, (*mp)->m_pkthdr.rcvif);
319			*mp = NULL;
320		}
321		return (IP_FW_DENY);
322	}
323	adj = addr->s6_addr16[idx];
324	nptv6_copy_addr(&cfg->internal, addr, &cfg->mask);
325	adj = cksum_add(adj, ~cfg->adjustment);
326	if (adj == 0xffff)
327		adj = 0;
328	addr->s6_addr16[idx] = adj;
329	if (offset == 0) {
330		/*
331		 * We may need to translate addresses in the inner IPv6
332		 * header for ICMPv6 error messages.
333		 */
334		proto = nptv6_getlasthdr(cfg, *mp, &offset);
335		if (proto < 0 || (proto == IPPROTO_ICMPV6 &&
336		    nptv6_translate_icmpv6(cfg, mp, offset) != 0))
337			return (IP_FW_DENY);
338		NPTV6STAT_INC(cfg, ex2in);
339	}
340	return (0);
341}
342
343/*
344 * ipfw external action handler.
345 */
346static int
347ipfw_nptv6(struct ip_fw_chain *chain, struct ip_fw_args *args,
348    ipfw_insn *cmd, int *done)
349{
350	struct ip6_hdr *ip6;
351	struct nptv6_cfg *cfg;
352	ipfw_insn *icmd;
353	int ret;
354
355	*done = 0; /* try next rule if not matched */
356	ret = IP_FW_DENY;
357	icmd = cmd + 1;
358	if (cmd->opcode != O_EXTERNAL_ACTION ||
359	    cmd->arg1 != V_nptv6_eid ||
360	    icmd->opcode != O_EXTERNAL_INSTANCE ||
361	    (cfg = NPTV6_LOOKUP(chain, icmd)) == NULL ||
362	    (cfg->flags & NPTV6_READY) == 0)
363		return (ret);
364	/*
365	 * We need act as router, so when forwarding is disabled -
366	 * do nothing.
367	 */
368	if (V_ip6_forwarding == 0 || args->f_id.addr_type != 6)
369		return (ret);
370	/*
371	 * NOTE: we expect ipfw_chk() did m_pullup() up to upper level
372	 * protocol's headers. Also we skip some checks, that ip6_input(),
373	 * ip6_forward(), ip6_fastfwd() and ipfw_chk() already did.
374	 */
375	ip6 = mtod(args->m, struct ip6_hdr *);
376	NPTV6_IPDEBUG("eid %u, oid %u, %s -> %s %d",
377	    cmd->arg1, icmd->arg1,
378	    inet_ntop(AF_INET6, &ip6->ip6_src, _s, sizeof(_s)),
379	    inet_ntop(AF_INET6, &ip6->ip6_dst, _d, sizeof(_d)),
380	    ip6->ip6_nxt);
381	if (IN6_ARE_MASKED_ADDR_EQUAL(&ip6->ip6_src,
382	    &cfg->internal, &cfg->mask)) {
383		/*
384		 * XXX: Do not translate packets when both src and dst
385		 * are from internal prefix.
386		 */
387		if (IN6_ARE_MASKED_ADDR_EQUAL(&ip6->ip6_dst,
388		    &cfg->internal, &cfg->mask))
389			return (ret);
390		ret = nptv6_rewrite_internal(cfg, &args->m, 0);
391	} else if (IN6_ARE_MASKED_ADDR_EQUAL(&ip6->ip6_dst,
392	    &cfg->external, &cfg->mask))
393		ret = nptv6_rewrite_external(cfg, &args->m, 0);
394	else
395		return (ret);
396	/*
397	 * If address wasn't rewrited - free mbuf and terminate the search.
398	 */
399	if (ret != 0) {
400		if (args->m != NULL) {
401			m_freem(args->m);
402			args->m = NULL; /* mark mbuf as consumed */
403		}
404		NPTV6STAT_INC(cfg, dropped);
405		*done = 1;
406	} else {
407		/* Terminate the search if one_pass is set */
408		*done = V_fw_one_pass;
409		/* Update args->f_id when one_pass is off */
410		if (*done == 0) {
411			ip6 = mtod(args->m, struct ip6_hdr *);
412			args->f_id.src_ip6 = ip6->ip6_src;
413			args->f_id.dst_ip6 = ip6->ip6_dst;
414		}
415	}
416	return (ret);
417}
418
419static struct nptv6_cfg *
420nptv6_alloc_config(const char *name, uint8_t set)
421{
422	struct nptv6_cfg *cfg;
423
424	cfg = malloc(sizeof(struct nptv6_cfg), M_IPFW, M_WAITOK | M_ZERO);
425	COUNTER_ARRAY_ALLOC(cfg->stats, NPTV6STATS, M_WAITOK);
426	cfg->no.name = cfg->name;
427	cfg->no.etlv = IPFW_TLV_NPTV6_NAME;
428	cfg->no.set = set;
429	strlcpy(cfg->name, name, sizeof(cfg->name));
430	return (cfg);
431}
432
433static void
434nptv6_free_config(struct nptv6_cfg *cfg)
435{
436
437	COUNTER_ARRAY_FREE(cfg->stats, NPTV6STATS);
438	free(cfg, M_IPFW);
439}
440
441static void
442nptv6_export_config(struct ip_fw_chain *ch, struct nptv6_cfg *cfg,
443    ipfw_nptv6_cfg *uc)
444{
445
446	uc->internal = cfg->internal;
447	if (cfg->flags & NPTV6_DYNAMIC_PREFIX)
448		memcpy(uc->if_name, cfg->if_name, IF_NAMESIZE);
449	else
450		uc->external = cfg->external;
451	uc->plen = cfg->plen;
452	uc->flags = cfg->flags & NPTV6_FLAGSMASK;
453	uc->set = cfg->no.set;
454	strlcpy(uc->name, cfg->no.name, sizeof(uc->name));
455}
456
457struct nptv6_dump_arg {
458	struct ip_fw_chain *ch;
459	struct sockopt_data *sd;
460};
461
462static int
463export_config_cb(struct namedobj_instance *ni, struct named_object *no,
464    void *arg)
465{
466	struct nptv6_dump_arg *da = (struct nptv6_dump_arg *)arg;
467	ipfw_nptv6_cfg *uc;
468
469	uc = (ipfw_nptv6_cfg *)ipfw_get_sopt_space(da->sd, sizeof(*uc));
470	nptv6_export_config(da->ch, (struct nptv6_cfg *)no, uc);
471	return (0);
472}
473
474static struct nptv6_cfg *
475nptv6_find(struct namedobj_instance *ni, const char *name, uint8_t set)
476{
477	struct nptv6_cfg *cfg;
478
479	cfg = (struct nptv6_cfg *)ipfw_objhash_lookup_name_type(ni, set,
480	    IPFW_TLV_NPTV6_NAME, name);
481
482	return (cfg);
483}
484
485static void
486nptv6_calculate_adjustment(struct nptv6_cfg *cfg)
487{
488	uint16_t i, e;
489	uint16_t *p;
490
491	/* Calculate checksum of internal prefix */
492	for (i = 0, p = (uint16_t *)&cfg->internal;
493	    p < (uint16_t *)(&cfg->internal + 1); p++)
494		i = cksum_add(i, *p);
495
496	/* Calculate checksum of external prefix */
497	for (e = 0, p = (uint16_t *)&cfg->external;
498	    p < (uint16_t *)(&cfg->external + 1); p++)
499		e = cksum_add(e, *p);
500
501	/* Adjustment value for Int->Ext direction */
502	cfg->adjustment = cksum_add(~e, i);
503}
504
505static int
506nptv6_check_prefix(const struct in6_addr *addr)
507{
508
509	if (IN6_IS_ADDR_MULTICAST(addr) ||
510	    IN6_IS_ADDR_LINKLOCAL(addr) ||
511	    IN6_IS_ADDR_LOOPBACK(addr) ||
512	    IN6_IS_ADDR_UNSPECIFIED(addr))
513		return (EINVAL);
514	return (0);
515}
516
517static void
518nptv6_set_external(struct nptv6_cfg *cfg, struct in6_addr *addr)
519{
520
521	cfg->external = *addr;
522	IN6_MASK_ADDR(&cfg->external, &cfg->mask);
523	nptv6_calculate_adjustment(cfg);
524	cfg->flags |= NPTV6_READY;
525}
526
527/*
528 * Try to determine what prefix to use as external for
529 * configured interface name.
530 */
531static void
532nptv6_find_prefix(struct ip_fw_chain *ch, struct nptv6_cfg *cfg,
533    struct ifnet *ifp)
534{
535	struct epoch_tracker et;
536	struct ifaddr *ifa;
537	struct in6_ifaddr *ia;
538
539	MPASS(cfg->flags & NPTV6_DYNAMIC_PREFIX);
540	IPFW_UH_WLOCK_ASSERT(ch);
541
542	if (ifp == NULL) {
543		ifp = ifunit_ref(cfg->if_name);
544		if (ifp == NULL)
545			return;
546	}
547	NET_EPOCH_ENTER(et);
548	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
549		if (ifa->ifa_addr->sa_family != AF_INET6)
550			continue;
551		ia = (struct in6_ifaddr *)ifa;
552		if (nptv6_check_prefix(&ia->ia_addr.sin6_addr) ||
553		    IN6_ARE_MASKED_ADDR_EQUAL(&ia->ia_addr.sin6_addr,
554		    &cfg->internal, &cfg->mask))
555			continue;
556		/* Suitable address is found. */
557		nptv6_set_external(cfg, &ia->ia_addr.sin6_addr);
558		break;
559	}
560	NET_EPOCH_EXIT(et);
561	if_rele(ifp);
562}
563
564struct ifaddr_event_args {
565	struct ifnet *ifp;
566	const struct in6_addr *addr;
567	int event;
568};
569
570static int
571ifaddr_cb(struct namedobj_instance *ni, struct named_object *no,
572    void *arg)
573{
574	struct ifaddr_event_args *args;
575	struct ip_fw_chain *ch;
576	struct nptv6_cfg *cfg;
577
578	ch = &V_layer3_chain;
579	cfg = (struct nptv6_cfg *)SRV_OBJECT(ch, no->kidx);
580	if ((cfg->flags & NPTV6_DYNAMIC_PREFIX) == 0)
581		return (0);
582
583	args = arg;
584	/* If interface name doesn't match, ignore */
585	if (strncmp(args->ifp->if_xname, cfg->if_name, IF_NAMESIZE))
586		return (0);
587	if (args->ifp->if_flags & IFF_DYING) { /* XXX: is it possible? */
588		cfg->flags &= ~NPTV6_READY;
589		return (0);
590	}
591	if (args->event == IFADDR_EVENT_DEL) {
592		/* If instance is not ready, ignore */
593		if ((cfg->flags & NPTV6_READY) == 0)
594			return (0);
595		/* If address does not match the external prefix, ignore */
596		if (IN6_ARE_MASKED_ADDR_EQUAL(&cfg->external, args->addr,
597		    &cfg->mask) != 0)
598			return (0);
599		/* Otherwise clear READY flag */
600		cfg->flags &= ~NPTV6_READY;
601	} else {/* IFADDR_EVENT_ADD */
602		/* If instance is already ready, ignore */
603		if (cfg->flags & NPTV6_READY)
604			return (0);
605		/* If address is not suitable for prefix, ignore */
606		if (nptv6_check_prefix(args->addr) ||
607		    IN6_ARE_MASKED_ADDR_EQUAL(args->addr, &cfg->internal,
608		    &cfg->mask))
609			return (0);
610		/* FALLTHROUGH */
611	}
612	MPASS(!(cfg->flags & NPTV6_READY));
613	/* Try to determine the prefix */
614	if_ref(args->ifp);
615	nptv6_find_prefix(ch, cfg, args->ifp);
616	return (0);
617}
618
619static void
620nptv6_ifaddrevent_handler(void *arg __unused, struct ifnet *ifp,
621    struct ifaddr *ifa, int event)
622{
623	struct ifaddr_event_args args;
624	struct ip_fw_chain *ch;
625
626	if (ifa->ifa_addr->sa_family != AF_INET6)
627		return;
628
629	args.ifp = ifp;
630	args.addr = &((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr;
631	args.event = event;
632
633	ch = &V_layer3_chain;
634	IPFW_UH_WLOCK(ch);
635	ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), ifaddr_cb, &args,
636	    IPFW_TLV_NPTV6_NAME);
637	IPFW_UH_WUNLOCK(ch);
638}
639
640/*
641 * Creates new NPTv6 instance.
642 * Data layout (v0)(current):
643 * Request: [ ipfw_obj_lheader ipfw_nptv6_cfg ]
644 *
645 * Returns 0 on success
646 */
647static int
648nptv6_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
649    struct sockopt_data *sd)
650{
651	struct in6_addr mask;
652	ipfw_obj_lheader *olh;
653	ipfw_nptv6_cfg *uc;
654	struct namedobj_instance *ni;
655	struct nptv6_cfg *cfg;
656
657	if (sd->valsize != sizeof(*olh) + sizeof(*uc))
658		return (EINVAL);
659
660	olh = (ipfw_obj_lheader *)sd->kbuf;
661	uc = (ipfw_nptv6_cfg *)(olh + 1);
662	if (ipfw_check_object_name_generic(uc->name) != 0)
663		return (EINVAL);
664	if (uc->plen < 8 || uc->plen > 64 || uc->set >= IPFW_MAX_SETS)
665		return (EINVAL);
666	if (nptv6_check_prefix(&uc->internal))
667		return (EINVAL);
668	in6_prefixlen2mask(&mask, uc->plen);
669	if ((uc->flags & NPTV6_DYNAMIC_PREFIX) == 0 && (
670	    nptv6_check_prefix(&uc->external) ||
671	    IN6_ARE_MASKED_ADDR_EQUAL(&uc->external, &uc->internal, &mask)))
672		return (EINVAL);
673
674	ni = CHAIN_TO_SRV(ch);
675	IPFW_UH_RLOCK(ch);
676	if (nptv6_find(ni, uc->name, uc->set) != NULL) {
677		IPFW_UH_RUNLOCK(ch);
678		return (EEXIST);
679	}
680	IPFW_UH_RUNLOCK(ch);
681
682	cfg = nptv6_alloc_config(uc->name, uc->set);
683	cfg->plen = uc->plen;
684	cfg->flags = uc->flags & NPTV6_FLAGSMASK;
685	if (cfg->plen <= 48)
686		cfg->flags |= NPTV6_48PLEN;
687	cfg->mask = mask;
688	cfg->internal = uc->internal;
689	IN6_MASK_ADDR(&cfg->internal, &mask);
690	if (cfg->flags & NPTV6_DYNAMIC_PREFIX)
691		memcpy(cfg->if_name, uc->if_name, IF_NAMESIZE);
692	else
693		nptv6_set_external(cfg, &uc->external);
694
695	if ((uc->flags & NPTV6_DYNAMIC_PREFIX) != 0 &&
696	    nptv6_ifaddr_event == NULL)
697		nptv6_ifaddr_event = EVENTHANDLER_REGISTER(
698		    ifaddr_event_ext, nptv6_ifaddrevent_handler, NULL,
699		    EVENTHANDLER_PRI_ANY);
700
701	IPFW_UH_WLOCK(ch);
702	if (ipfw_objhash_alloc_idx(ni, &cfg->no.kidx) != 0) {
703		IPFW_UH_WUNLOCK(ch);
704		nptv6_free_config(cfg);
705		return (ENOSPC);
706	}
707	ipfw_objhash_add(ni, &cfg->no);
708	SRV_OBJECT(ch, cfg->no.kidx) = cfg;
709	if (cfg->flags & NPTV6_DYNAMIC_PREFIX)
710		nptv6_find_prefix(ch, cfg, NULL);
711	IPFW_UH_WUNLOCK(ch);
712
713	return (0);
714}
715
716/*
717 * Destroys NPTv6 instance.
718 * Data layout (v0)(current):
719 * Request: [ ipfw_obj_header ]
720 *
721 * Returns 0 on success
722 */
723static int
724nptv6_destroy(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
725    struct sockopt_data *sd)
726{
727	ipfw_obj_header *oh;
728	struct nptv6_cfg *cfg;
729
730	if (sd->valsize != sizeof(*oh))
731		return (EINVAL);
732
733	oh = (ipfw_obj_header *)sd->kbuf;
734	if (ipfw_check_object_name_generic(oh->ntlv.name) != 0)
735		return (EINVAL);
736
737	IPFW_UH_WLOCK(ch);
738	cfg = nptv6_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
739	if (cfg == NULL) {
740		IPFW_UH_WUNLOCK(ch);
741		return (ESRCH);
742	}
743	if (cfg->no.refcnt > 0) {
744		IPFW_UH_WUNLOCK(ch);
745		return (EBUSY);
746	}
747
748	ipfw_reset_eaction_instance(ch, V_nptv6_eid, cfg->no.kidx);
749	SRV_OBJECT(ch, cfg->no.kidx) = NULL;
750	ipfw_objhash_del(CHAIN_TO_SRV(ch), &cfg->no);
751	ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), cfg->no.kidx);
752	IPFW_UH_WUNLOCK(ch);
753
754	nptv6_free_config(cfg);
755	return (0);
756}
757
758/*
759 * Get or change nptv6 instance config.
760 * Request: [ ipfw_obj_header [ ipfw_nptv6_cfg ] ]
761 */
762static int
763nptv6_config(struct ip_fw_chain *chain, ip_fw3_opheader *op,
764    struct sockopt_data *sd)
765{
766
767	return (EOPNOTSUPP);
768}
769
770/*
771 * Lists all NPTv6 instances currently available in kernel.
772 * Data layout (v0)(current):
773 * Request: [ ipfw_obj_lheader ]
774 * Reply: [ ipfw_obj_lheader ipfw_nptv6_cfg x N ]
775 *
776 * Returns 0 on success
777 */
778static int
779nptv6_list(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
780    struct sockopt_data *sd)
781{
782	ipfw_obj_lheader *olh;
783	struct nptv6_dump_arg da;
784
785	/* Check minimum header size */
786	if (sd->valsize < sizeof(ipfw_obj_lheader))
787		return (EINVAL);
788
789	olh = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*olh));
790
791	IPFW_UH_RLOCK(ch);
792	olh->count = ipfw_objhash_count_type(CHAIN_TO_SRV(ch),
793	    IPFW_TLV_NPTV6_NAME);
794	olh->objsize = sizeof(ipfw_nptv6_cfg);
795	olh->size = sizeof(*olh) + olh->count * olh->objsize;
796
797	if (sd->valsize < olh->size) {
798		IPFW_UH_RUNLOCK(ch);
799		return (ENOMEM);
800	}
801	memset(&da, 0, sizeof(da));
802	da.ch = ch;
803	da.sd = sd;
804	ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), export_config_cb,
805	    &da, IPFW_TLV_NPTV6_NAME);
806	IPFW_UH_RUNLOCK(ch);
807
808	return (0);
809}
810
811#define	__COPY_STAT_FIELD(_cfg, _stats, _field)	\
812	(_stats)->_field = NPTV6STAT_FETCH(_cfg, _field)
813static void
814export_stats(struct ip_fw_chain *ch, struct nptv6_cfg *cfg,
815    struct ipfw_nptv6_stats *stats)
816{
817
818	__COPY_STAT_FIELD(cfg, stats, in2ex);
819	__COPY_STAT_FIELD(cfg, stats, ex2in);
820	__COPY_STAT_FIELD(cfg, stats, dropped);
821}
822
823/*
824 * Get NPTv6 statistics.
825 * Data layout (v0)(current):
826 * Request: [ ipfw_obj_header ]
827 * Reply: [ ipfw_obj_header ipfw_obj_ctlv [ uint64_t x N ]]
828 *
829 * Returns 0 on success
830 */
831static int
832nptv6_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
833    struct sockopt_data *sd)
834{
835	struct ipfw_nptv6_stats stats;
836	struct nptv6_cfg *cfg;
837	ipfw_obj_header *oh;
838	ipfw_obj_ctlv *ctlv;
839	size_t sz;
840
841	sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_ctlv) + sizeof(stats);
842	if (sd->valsize % sizeof(uint64_t))
843		return (EINVAL);
844	if (sd->valsize < sz)
845		return (ENOMEM);
846	oh = (ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
847	if (oh == NULL)
848		return (EINVAL);
849	if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
850	    oh->ntlv.set >= IPFW_MAX_SETS)
851		return (EINVAL);
852	memset(&stats, 0, sizeof(stats));
853
854	IPFW_UH_RLOCK(ch);
855	cfg = nptv6_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
856	if (cfg == NULL) {
857		IPFW_UH_RUNLOCK(ch);
858		return (ESRCH);
859	}
860	export_stats(ch, cfg, &stats);
861	IPFW_UH_RUNLOCK(ch);
862
863	ctlv = (ipfw_obj_ctlv *)(oh + 1);
864	memset(ctlv, 0, sizeof(*ctlv));
865	ctlv->head.type = IPFW_TLV_COUNTERS;
866	ctlv->head.length = sz - sizeof(ipfw_obj_header);
867	ctlv->count = sizeof(stats) / sizeof(uint64_t);
868	ctlv->objsize = sizeof(uint64_t);
869	ctlv->version = 1;
870	memcpy(ctlv + 1, &stats, sizeof(stats));
871	return (0);
872}
873
874/*
875 * Reset NPTv6 statistics.
876 * Data layout (v0)(current):
877 * Request: [ ipfw_obj_header ]
878 *
879 * Returns 0 on success
880 */
881static int
882nptv6_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
883    struct sockopt_data *sd)
884{
885	struct nptv6_cfg *cfg;
886	ipfw_obj_header *oh;
887
888	if (sd->valsize != sizeof(*oh))
889		return (EINVAL);
890	oh = (ipfw_obj_header *)sd->kbuf;
891	if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
892	    oh->ntlv.set >= IPFW_MAX_SETS)
893		return (EINVAL);
894
895	IPFW_UH_WLOCK(ch);
896	cfg = nptv6_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
897	if (cfg == NULL) {
898		IPFW_UH_WUNLOCK(ch);
899		return (ESRCH);
900	}
901	COUNTER_ARRAY_ZERO(cfg->stats, NPTV6STATS);
902	IPFW_UH_WUNLOCK(ch);
903	return (0);
904}
905
906static struct ipfw_sopt_handler	scodes[] = {
907	{ IP_FW_NPTV6_CREATE, 0,	HDIR_SET,	nptv6_create },
908	{ IP_FW_NPTV6_DESTROY,0,	HDIR_SET,	nptv6_destroy },
909	{ IP_FW_NPTV6_CONFIG, 0,	HDIR_BOTH,	nptv6_config },
910	{ IP_FW_NPTV6_LIST,   0,	HDIR_GET,	nptv6_list },
911	{ IP_FW_NPTV6_STATS,  0,	HDIR_GET,	nptv6_stats },
912	{ IP_FW_NPTV6_RESET_STATS,0,	HDIR_SET,	nptv6_reset_stats },
913};
914
915static int
916nptv6_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
917{
918	ipfw_insn *icmd;
919
920	icmd = cmd - 1;
921	NPTV6_DEBUG("opcode %d, arg1 %d, opcode0 %d, arg1 %d",
922	    cmd->opcode, cmd->arg1, icmd->opcode, icmd->arg1);
923	if (icmd->opcode != O_EXTERNAL_ACTION ||
924	    icmd->arg1 != V_nptv6_eid)
925		return (1);
926
927	*puidx = cmd->arg1;
928	*ptype = 0;
929	return (0);
930}
931
932static void
933nptv6_update_arg1(ipfw_insn *cmd, uint16_t idx)
934{
935
936	cmd->arg1 = idx;
937	NPTV6_DEBUG("opcode %d, arg1 -> %d", cmd->opcode, cmd->arg1);
938}
939
940static int
941nptv6_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
942    struct named_object **pno)
943{
944	int err;
945
946	err = ipfw_objhash_find_type(CHAIN_TO_SRV(ch), ti,
947	    IPFW_TLV_NPTV6_NAME, pno);
948	NPTV6_DEBUG("uidx %u, type %u, err %d", ti->uidx, ti->type, err);
949	return (err);
950}
951
952static struct named_object *
953nptv6_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
954{
955	struct namedobj_instance *ni;
956	struct named_object *no;
957
958	IPFW_UH_WLOCK_ASSERT(ch);
959	ni = CHAIN_TO_SRV(ch);
960	no = ipfw_objhash_lookup_kidx(ni, idx);
961	KASSERT(no != NULL, ("NPT with index %d not found", idx));
962
963	NPTV6_DEBUG("kidx %u -> %s", idx, no->name);
964	return (no);
965}
966
967static int
968nptv6_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set,
969    enum ipfw_sets_cmd cmd)
970{
971
972	return (ipfw_obj_manage_sets(CHAIN_TO_SRV(ch), IPFW_TLV_NPTV6_NAME,
973	    set, new_set, cmd));
974}
975
976static struct opcode_obj_rewrite opcodes[] = {
977	{
978		.opcode	= O_EXTERNAL_INSTANCE,
979		.etlv = IPFW_TLV_EACTION /* just show it isn't table */,
980		.classifier = nptv6_classify,
981		.update = nptv6_update_arg1,
982		.find_byname = nptv6_findbyname,
983		.find_bykidx = nptv6_findbykidx,
984		.manage_sets = nptv6_manage_sets,
985	},
986};
987
988static int
989destroy_config_cb(struct namedobj_instance *ni, struct named_object *no,
990    void *arg)
991{
992	struct nptv6_cfg *cfg;
993	struct ip_fw_chain *ch;
994
995	ch = (struct ip_fw_chain *)arg;
996	IPFW_UH_WLOCK_ASSERT(ch);
997
998	cfg = (struct nptv6_cfg *)SRV_OBJECT(ch, no->kidx);
999	SRV_OBJECT(ch, no->kidx) = NULL;
1000	ipfw_objhash_del(ni, &cfg->no);
1001	ipfw_objhash_free_idx(ni, cfg->no.kidx);
1002	nptv6_free_config(cfg);
1003	return (0);
1004}
1005
1006int
1007nptv6_init(struct ip_fw_chain *ch, int first)
1008{
1009
1010	V_nptv6_eid = ipfw_add_eaction(ch, ipfw_nptv6, "nptv6");
1011	if (V_nptv6_eid == 0)
1012		return (ENXIO);
1013	IPFW_ADD_SOPT_HANDLER(first, scodes);
1014	IPFW_ADD_OBJ_REWRITER(first, opcodes);
1015	return (0);
1016}
1017
1018void
1019nptv6_uninit(struct ip_fw_chain *ch, int last)
1020{
1021
1022	if (last && nptv6_ifaddr_event != NULL)
1023		EVENTHANDLER_DEREGISTER(ifaddr_event_ext, nptv6_ifaddr_event);
1024	IPFW_DEL_OBJ_REWRITER(last, opcodes);
1025	IPFW_DEL_SOPT_HANDLER(last, scodes);
1026	ipfw_del_eaction(ch, V_nptv6_eid);
1027	/*
1028	 * Since we already have deregistered external action,
1029	 * our named objects become unaccessible via rules, because
1030	 * all rules were truncated by ipfw_del_eaction().
1031	 * So, we can unlink and destroy our named objects without holding
1032	 * IPFW_WLOCK().
1033	 */
1034	IPFW_UH_WLOCK(ch);
1035	ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), destroy_config_cb, ch,
1036	    IPFW_TLV_NPTV6_NAME);
1037	V_nptv6_eid = 0;
1038	IPFW_UH_WUNLOCK(ch);
1039}
1040