1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2001 Daniel Hartmeier
5 * Copyright (c) 2002,2003 Henning Brauer
6 * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org>
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 *    - Redistributions of source code must retain the above copyright
14 *      notice, this list of conditions and the following disclaimer.
15 *    - Redistributions in binary form must reproduce the above
16 *      copyright notice, this list of conditions and the following
17 *      disclaimer in the documentation and/or other materials provided
18 *      with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Effort sponsored in part by the Defense Advanced Research Projects
34 * Agency (DARPA) and Air Force Research Laboratory, Air Force
35 * Materiel Command, USAF, under agreement number F30602-01-2-0537.
36 *
37 *	$OpenBSD: pf_ioctl.c,v 1.213 2009/02/15 21:46:12 mbalmer Exp $
38 */
39
40#include <sys/cdefs.h>
41#include "opt_inet.h"
42#include "opt_inet6.h"
43#include "opt_bpf.h"
44#include "opt_pf.h"
45
46#include <sys/param.h>
47#include <sys/_bitset.h>
48#include <sys/bitset.h>
49#include <sys/bus.h>
50#include <sys/conf.h>
51#include <sys/endian.h>
52#include <sys/fcntl.h>
53#include <sys/filio.h>
54#include <sys/hash.h>
55#include <sys/interrupt.h>
56#include <sys/jail.h>
57#include <sys/kernel.h>
58#include <sys/kthread.h>
59#include <sys/lock.h>
60#include <sys/mbuf.h>
61#include <sys/module.h>
62#include <sys/nv.h>
63#include <sys/proc.h>
64#include <sys/sdt.h>
65#include <sys/smp.h>
66#include <sys/socket.h>
67#include <sys/sysctl.h>
68#include <sys/md5.h>
69#include <sys/ucred.h>
70
71#include <net/if.h>
72#include <net/if_var.h>
73#include <net/if_private.h>
74#include <net/vnet.h>
75#include <net/route.h>
76#include <net/pfil.h>
77#include <net/pfvar.h>
78#include <net/if_pfsync.h>
79#include <net/if_pflog.h>
80
81#include <netinet/in.h>
82#include <netinet/ip.h>
83#include <netinet/ip_var.h>
84#include <netinet6/ip6_var.h>
85#include <netinet/ip_icmp.h>
86#include <netpfil/pf/pf_nl.h>
87#include <netpfil/pf/pf_nv.h>
88
89#ifdef INET6
90#include <netinet/ip6.h>
91#endif /* INET6 */
92
93#ifdef ALTQ
94#include <net/altq/altq.h>
95#endif
96
97SDT_PROBE_DEFINE3(pf, ioctl, ioctl, error, "int", "int", "int");
98SDT_PROBE_DEFINE3(pf, ioctl, function, error, "char *", "int", "int");
99SDT_PROBE_DEFINE2(pf, ioctl, addrule, error, "int", "int");
100SDT_PROBE_DEFINE2(pf, ioctl, nvchk, error, "int", "int");
101
102static struct pf_kpool	*pf_get_kpool(const char *, u_int32_t, u_int8_t,
103			    u_int32_t, u_int8_t, u_int8_t, u_int8_t);
104
105static void		 pf_mv_kpool(struct pf_kpalist *, struct pf_kpalist *);
106static void		 pf_empty_kpool(struct pf_kpalist *);
107static int		 pfioctl(struct cdev *, u_long, caddr_t, int,
108			    struct thread *);
109static int		 pf_begin_eth(uint32_t *, const char *);
110static void		 pf_rollback_eth_cb(struct epoch_context *);
111static int		 pf_rollback_eth(uint32_t, const char *);
112static int		 pf_commit_eth(uint32_t, const char *);
113static void		 pf_free_eth_rule(struct pf_keth_rule *);
114#ifdef ALTQ
115static int		 pf_begin_altq(u_int32_t *);
116static int		 pf_rollback_altq(u_int32_t);
117static int		 pf_commit_altq(u_int32_t);
118static int		 pf_enable_altq(struct pf_altq *);
119static int		 pf_disable_altq(struct pf_altq *);
120static uint16_t		 pf_qname2qid(const char *);
121static void		 pf_qid_unref(uint16_t);
122#endif /* ALTQ */
123static int		 pf_begin_rules(u_int32_t *, int, const char *);
124static int		 pf_rollback_rules(u_int32_t, int, char *);
125static int		 pf_setup_pfsync_matching(struct pf_kruleset *);
126static void		 pf_hash_rule_rolling(MD5_CTX *, struct pf_krule *);
127static void		 pf_hash_rule(struct pf_krule *);
128static void		 pf_hash_rule_addr(MD5_CTX *, struct pf_rule_addr *);
129static int		 pf_commit_rules(u_int32_t, int, char *);
130static int		 pf_addr_setup(struct pf_kruleset *,
131			    struct pf_addr_wrap *, sa_family_t);
132static void		 pf_addr_copyout(struct pf_addr_wrap *);
133static void		 pf_src_node_copy(const struct pf_ksrc_node *,
134			    struct pf_src_node *);
135#ifdef ALTQ
136static int		 pf_export_kaltq(struct pf_altq *,
137			    struct pfioc_altq_v1 *, size_t);
138static int		 pf_import_kaltq(struct pfioc_altq_v1 *,
139			    struct pf_altq *, size_t);
140#endif /* ALTQ */
141
142VNET_DEFINE(struct pf_krule,	pf_default_rule);
143
144static __inline int             pf_krule_compare(struct pf_krule *,
145				    struct pf_krule *);
146
147RB_GENERATE(pf_krule_global, pf_krule, entry_global, pf_krule_compare);
148
149#ifdef ALTQ
150VNET_DEFINE_STATIC(int,		pf_altq_running);
151#define	V_pf_altq_running	VNET(pf_altq_running)
152#endif
153
154#define	TAGID_MAX	 50000
155struct pf_tagname {
156	TAILQ_ENTRY(pf_tagname)	namehash_entries;
157	TAILQ_ENTRY(pf_tagname)	taghash_entries;
158	char			name[PF_TAG_NAME_SIZE];
159	uint16_t		tag;
160	int			ref;
161};
162
163struct pf_tagset {
164	TAILQ_HEAD(, pf_tagname)	*namehash;
165	TAILQ_HEAD(, pf_tagname)	*taghash;
166	unsigned int			 mask;
167	uint32_t			 seed;
168	BITSET_DEFINE(, TAGID_MAX)	 avail;
169};
170
171VNET_DEFINE(struct pf_tagset, pf_tags);
172#define	V_pf_tags	VNET(pf_tags)
173static unsigned int	pf_rule_tag_hashsize;
174#define	PF_RULE_TAG_HASH_SIZE_DEFAULT	128
175SYSCTL_UINT(_net_pf, OID_AUTO, rule_tag_hashsize, CTLFLAG_RDTUN,
176    &pf_rule_tag_hashsize, PF_RULE_TAG_HASH_SIZE_DEFAULT,
177    "Size of pf(4) rule tag hashtable");
178
179#ifdef ALTQ
180VNET_DEFINE(struct pf_tagset, pf_qids);
181#define	V_pf_qids	VNET(pf_qids)
182static unsigned int	pf_queue_tag_hashsize;
183#define	PF_QUEUE_TAG_HASH_SIZE_DEFAULT	128
184SYSCTL_UINT(_net_pf, OID_AUTO, queue_tag_hashsize, CTLFLAG_RDTUN,
185    &pf_queue_tag_hashsize, PF_QUEUE_TAG_HASH_SIZE_DEFAULT,
186    "Size of pf(4) queue tag hashtable");
187#endif
188VNET_DEFINE(uma_zone_t,	 pf_tag_z);
189#define	V_pf_tag_z		 VNET(pf_tag_z)
190static MALLOC_DEFINE(M_PFALTQ, "pf_altq", "pf(4) altq configuration db");
191static MALLOC_DEFINE(M_PFRULE, "pf_rule", "pf(4) rules");
192
193#if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE)
194#error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE
195#endif
196
197VNET_DEFINE_STATIC(bool, pf_filter_local) = false;
198#define V_pf_filter_local	VNET(pf_filter_local)
199SYSCTL_BOOL(_net_pf, OID_AUTO, filter_local, CTLFLAG_VNET | CTLFLAG_RW,
200    &VNET_NAME(pf_filter_local), false,
201    "Enable filtering for packets delivered to local network stack");
202
203#ifdef PF_DEFAULT_TO_DROP
204VNET_DEFINE_STATIC(bool, default_to_drop) = true;
205#else
206VNET_DEFINE_STATIC(bool, default_to_drop);
207#endif
208#define	V_default_to_drop VNET(default_to_drop)
209SYSCTL_BOOL(_net_pf, OID_AUTO, default_to_drop, CTLFLAG_RDTUN | CTLFLAG_VNET,
210    &VNET_NAME(default_to_drop), false,
211    "Make the default rule drop all packets.");
212
213static void		 pf_init_tagset(struct pf_tagset *, unsigned int *,
214			    unsigned int);
215static void		 pf_cleanup_tagset(struct pf_tagset *);
216static uint16_t		 tagname2hashindex(const struct pf_tagset *, const char *);
217static uint16_t		 tag2hashindex(const struct pf_tagset *, uint16_t);
218static u_int16_t	 tagname2tag(struct pf_tagset *, const char *);
219static u_int16_t	 pf_tagname2tag(const char *);
220static void		 tag_unref(struct pf_tagset *, u_int16_t);
221
222#define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x
223
224struct cdev *pf_dev;
225
226/*
227 * XXX - These are new and need to be checked when moveing to a new version
228 */
229static void		 pf_clear_all_states(void);
230static int		 pf_killstates_row(struct pf_kstate_kill *,
231			    struct pf_idhash *);
232static int		 pf_killstates_nv(struct pfioc_nv *);
233static int		 pf_clearstates_nv(struct pfioc_nv *);
234static int		 pf_getstate(struct pfioc_nv *);
235static int		 pf_getstatus(struct pfioc_nv *);
236static int		 pf_clear_tables(void);
237static void		 pf_clear_srcnodes(struct pf_ksrc_node *);
238static void		 pf_kill_srcnodes(struct pfioc_src_node_kill *);
239static int		 pf_keepcounters(struct pfioc_nv *);
240static void		 pf_tbladdr_copyout(struct pf_addr_wrap *);
241
242/*
243 * Wrapper functions for pfil(9) hooks
244 */
245static pfil_return_t pf_eth_check_in(struct mbuf **m, struct ifnet *ifp,
246    int flags, void *ruleset __unused, struct inpcb *inp);
247static pfil_return_t pf_eth_check_out(struct mbuf **m, struct ifnet *ifp,
248    int flags, void *ruleset __unused, struct inpcb *inp);
249#ifdef INET
250static pfil_return_t pf_check_in(struct mbuf **m, struct ifnet *ifp,
251    int flags, void *ruleset __unused, struct inpcb *inp);
252static pfil_return_t pf_check_out(struct mbuf **m, struct ifnet *ifp,
253    int flags, void *ruleset __unused, struct inpcb *inp);
254#endif
255#ifdef INET6
256static pfil_return_t pf_check6_in(struct mbuf **m, struct ifnet *ifp,
257    int flags, void *ruleset __unused, struct inpcb *inp);
258static pfil_return_t pf_check6_out(struct mbuf **m, struct ifnet *ifp,
259    int flags, void *ruleset __unused, struct inpcb *inp);
260#endif
261
262static void		hook_pf_eth(void);
263static void		hook_pf(void);
264static void		dehook_pf_eth(void);
265static void		dehook_pf(void);
266static int		shutdown_pf(void);
267static int		pf_load(void);
268static void		pf_unload(void);
269
270static struct cdevsw pf_cdevsw = {
271	.d_ioctl =	pfioctl,
272	.d_name =	PF_NAME,
273	.d_version =	D_VERSION,
274};
275
276VNET_DEFINE_STATIC(bool, pf_pfil_hooked);
277#define V_pf_pfil_hooked	VNET(pf_pfil_hooked)
278VNET_DEFINE_STATIC(bool, pf_pfil_eth_hooked);
279#define V_pf_pfil_eth_hooked	VNET(pf_pfil_eth_hooked)
280
281/*
282 * We need a flag that is neither hooked nor running to know when
283 * the VNET is "valid".  We primarily need this to control (global)
284 * external event, e.g., eventhandlers.
285 */
286VNET_DEFINE(int, pf_vnet_active);
287#define V_pf_vnet_active	VNET(pf_vnet_active)
288
289int pf_end_threads;
290struct proc *pf_purge_proc;
291
292VNET_DEFINE(struct rmlock, pf_rules_lock);
293VNET_DEFINE_STATIC(struct sx, pf_ioctl_lock);
294#define	V_pf_ioctl_lock		VNET(pf_ioctl_lock)
295struct sx			pf_end_lock;
296
297/* pfsync */
298VNET_DEFINE(pfsync_state_import_t *, pfsync_state_import_ptr);
299VNET_DEFINE(pfsync_insert_state_t *, pfsync_insert_state_ptr);
300VNET_DEFINE(pfsync_update_state_t *, pfsync_update_state_ptr);
301VNET_DEFINE(pfsync_delete_state_t *, pfsync_delete_state_ptr);
302VNET_DEFINE(pfsync_clear_states_t *, pfsync_clear_states_ptr);
303VNET_DEFINE(pfsync_defer_t *, pfsync_defer_ptr);
304VNET_DEFINE(pflow_export_state_t *, pflow_export_state_ptr);
305pfsync_detach_ifnet_t *pfsync_detach_ifnet_ptr;
306
307/* pflog */
308pflog_packet_t			*pflog_packet_ptr = NULL;
309
310/*
311 * Copy a user-provided string, returning an error if truncation would occur.
312 * Avoid scanning past "sz" bytes in the source string since there's no
313 * guarantee that it's nul-terminated.
314 */
315static int
316pf_user_strcpy(char *dst, const char *src, size_t sz)
317{
318	if (strnlen(src, sz) == sz)
319		return (EINVAL);
320	(void)strlcpy(dst, src, sz);
321	return (0);
322}
323
324static void
325pfattach_vnet(void)
326{
327	u_int32_t *my_timeout = V_pf_default_rule.timeout;
328
329	bzero(&V_pf_status, sizeof(V_pf_status));
330
331	pf_initialize();
332	pfr_initialize();
333	pfi_initialize_vnet();
334	pf_normalize_init();
335	pf_syncookies_init();
336
337	V_pf_limits[PF_LIMIT_STATES].limit = PFSTATE_HIWAT;
338	V_pf_limits[PF_LIMIT_SRC_NODES].limit = PFSNODE_HIWAT;
339
340	RB_INIT(&V_pf_anchors);
341	pf_init_kruleset(&pf_main_ruleset);
342
343	pf_init_keth(V_pf_keth);
344
345	/* default rule should never be garbage collected */
346	V_pf_default_rule.entries.tqe_prev = &V_pf_default_rule.entries.tqe_next;
347	V_pf_default_rule.action = V_default_to_drop ? PF_DROP : PF_PASS;
348	V_pf_default_rule.nr = -1;
349	V_pf_default_rule.rtableid = -1;
350
351	pf_counter_u64_init(&V_pf_default_rule.evaluations, M_WAITOK);
352	for (int i = 0; i < 2; i++) {
353		pf_counter_u64_init(&V_pf_default_rule.packets[i], M_WAITOK);
354		pf_counter_u64_init(&V_pf_default_rule.bytes[i], M_WAITOK);
355	}
356	V_pf_default_rule.states_cur = counter_u64_alloc(M_WAITOK);
357	V_pf_default_rule.states_tot = counter_u64_alloc(M_WAITOK);
358	V_pf_default_rule.src_nodes = counter_u64_alloc(M_WAITOK);
359
360	V_pf_default_rule.timestamp = uma_zalloc_pcpu(pf_timestamp_pcpu_zone,
361	    M_WAITOK | M_ZERO);
362
363#ifdef PF_WANT_32_TO_64_COUNTER
364	V_pf_kifmarker = malloc(sizeof(*V_pf_kifmarker), PFI_MTYPE, M_WAITOK | M_ZERO);
365	V_pf_rulemarker = malloc(sizeof(*V_pf_rulemarker), M_PFRULE, M_WAITOK | M_ZERO);
366	PF_RULES_WLOCK();
367	LIST_INSERT_HEAD(&V_pf_allkiflist, V_pf_kifmarker, pfik_allkiflist);
368	LIST_INSERT_HEAD(&V_pf_allrulelist, &V_pf_default_rule, allrulelist);
369	V_pf_allrulecount++;
370	LIST_INSERT_HEAD(&V_pf_allrulelist, V_pf_rulemarker, allrulelist);
371	PF_RULES_WUNLOCK();
372#endif
373
374	/* initialize default timeouts */
375	my_timeout[PFTM_TCP_FIRST_PACKET] = PFTM_TCP_FIRST_PACKET_VAL;
376	my_timeout[PFTM_TCP_OPENING] = PFTM_TCP_OPENING_VAL;
377	my_timeout[PFTM_TCP_ESTABLISHED] = PFTM_TCP_ESTABLISHED_VAL;
378	my_timeout[PFTM_TCP_CLOSING] = PFTM_TCP_CLOSING_VAL;
379	my_timeout[PFTM_TCP_FIN_WAIT] = PFTM_TCP_FIN_WAIT_VAL;
380	my_timeout[PFTM_TCP_CLOSED] = PFTM_TCP_CLOSED_VAL;
381	my_timeout[PFTM_SCTP_FIRST_PACKET] = PFTM_TCP_FIRST_PACKET_VAL;
382	my_timeout[PFTM_SCTP_OPENING] = PFTM_TCP_OPENING_VAL;
383	my_timeout[PFTM_SCTP_ESTABLISHED] = PFTM_TCP_ESTABLISHED_VAL;
384	my_timeout[PFTM_SCTP_CLOSING] = PFTM_TCP_CLOSING_VAL;
385	my_timeout[PFTM_SCTP_CLOSED] = PFTM_TCP_CLOSED_VAL;
386	my_timeout[PFTM_UDP_FIRST_PACKET] = PFTM_UDP_FIRST_PACKET_VAL;
387	my_timeout[PFTM_UDP_SINGLE] = PFTM_UDP_SINGLE_VAL;
388	my_timeout[PFTM_UDP_MULTIPLE] = PFTM_UDP_MULTIPLE_VAL;
389	my_timeout[PFTM_ICMP_FIRST_PACKET] = PFTM_ICMP_FIRST_PACKET_VAL;
390	my_timeout[PFTM_ICMP_ERROR_REPLY] = PFTM_ICMP_ERROR_REPLY_VAL;
391	my_timeout[PFTM_OTHER_FIRST_PACKET] = PFTM_OTHER_FIRST_PACKET_VAL;
392	my_timeout[PFTM_OTHER_SINGLE] = PFTM_OTHER_SINGLE_VAL;
393	my_timeout[PFTM_OTHER_MULTIPLE] = PFTM_OTHER_MULTIPLE_VAL;
394	my_timeout[PFTM_FRAG] = PFTM_FRAG_VAL;
395	my_timeout[PFTM_INTERVAL] = PFTM_INTERVAL_VAL;
396	my_timeout[PFTM_SRC_NODE] = PFTM_SRC_NODE_VAL;
397	my_timeout[PFTM_TS_DIFF] = PFTM_TS_DIFF_VAL;
398	my_timeout[PFTM_ADAPTIVE_START] = PFSTATE_ADAPT_START;
399	my_timeout[PFTM_ADAPTIVE_END] = PFSTATE_ADAPT_END;
400
401	V_pf_status.debug = PF_DEBUG_URGENT;
402	/*
403	 * XXX This is different than in OpenBSD where reassembly is enabled by
404	 * defult. In FreeBSD we expect people to still use scrub rules and
405	 * switch to the new syntax later. Only when they switch they must
406	 * explicitly enable reassemle. We could change the default once the
407	 * scrub rule functionality is hopefully removed some day in future.
408	 */
409	V_pf_status.reass = 0;
410
411	V_pf_pfil_hooked = false;
412	V_pf_pfil_eth_hooked = false;
413
414	/* XXX do our best to avoid a conflict */
415	V_pf_status.hostid = arc4random();
416
417	for (int i = 0; i < PFRES_MAX; i++)
418		V_pf_status.counters[i] = counter_u64_alloc(M_WAITOK);
419	for (int i = 0; i < KLCNT_MAX; i++)
420		V_pf_status.lcounters[i] = counter_u64_alloc(M_WAITOK);
421	for (int i = 0; i < FCNT_MAX; i++)
422		pf_counter_u64_init(&V_pf_status.fcounters[i], M_WAITOK);
423	for (int i = 0; i < SCNT_MAX; i++)
424		V_pf_status.scounters[i] = counter_u64_alloc(M_WAITOK);
425
426	if (swi_add(&V_pf_swi_ie, "pf send", pf_intr, curvnet, SWI_NET,
427	    INTR_MPSAFE, &V_pf_swi_cookie) != 0)
428		/* XXXGL: leaked all above. */
429		return;
430}
431
432static struct pf_kpool *
433pf_get_kpool(const char *anchor, u_int32_t ticket, u_int8_t rule_action,
434    u_int32_t rule_number, u_int8_t r_last, u_int8_t active,
435    u_int8_t check_ticket)
436{
437	struct pf_kruleset	*ruleset;
438	struct pf_krule		*rule;
439	int			 rs_num;
440
441	ruleset = pf_find_kruleset(anchor);
442	if (ruleset == NULL)
443		return (NULL);
444	rs_num = pf_get_ruleset_number(rule_action);
445	if (rs_num >= PF_RULESET_MAX)
446		return (NULL);
447	if (active) {
448		if (check_ticket && ticket !=
449		    ruleset->rules[rs_num].active.ticket)
450			return (NULL);
451		if (r_last)
452			rule = TAILQ_LAST(ruleset->rules[rs_num].active.ptr,
453			    pf_krulequeue);
454		else
455			rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr);
456	} else {
457		if (check_ticket && ticket !=
458		    ruleset->rules[rs_num].inactive.ticket)
459			return (NULL);
460		if (r_last)
461			rule = TAILQ_LAST(ruleset->rules[rs_num].inactive.ptr,
462			    pf_krulequeue);
463		else
464			rule = TAILQ_FIRST(ruleset->rules[rs_num].inactive.ptr);
465	}
466	if (!r_last) {
467		while ((rule != NULL) && (rule->nr != rule_number))
468			rule = TAILQ_NEXT(rule, entries);
469	}
470	if (rule == NULL)
471		return (NULL);
472
473	return (&rule->rpool);
474}
475
476static void
477pf_mv_kpool(struct pf_kpalist *poola, struct pf_kpalist *poolb)
478{
479	struct pf_kpooladdr	*mv_pool_pa;
480
481	while ((mv_pool_pa = TAILQ_FIRST(poola)) != NULL) {
482		TAILQ_REMOVE(poola, mv_pool_pa, entries);
483		TAILQ_INSERT_TAIL(poolb, mv_pool_pa, entries);
484	}
485}
486
487static void
488pf_empty_kpool(struct pf_kpalist *poola)
489{
490	struct pf_kpooladdr *pa;
491
492	while ((pa = TAILQ_FIRST(poola)) != NULL) {
493		switch (pa->addr.type) {
494		case PF_ADDR_DYNIFTL:
495			pfi_dynaddr_remove(pa->addr.p.dyn);
496			break;
497		case PF_ADDR_TABLE:
498			/* XXX: this could be unfinished pooladdr on pabuf */
499			if (pa->addr.p.tbl != NULL)
500				pfr_detach_table(pa->addr.p.tbl);
501			break;
502		}
503		if (pa->kif)
504			pfi_kkif_unref(pa->kif);
505		TAILQ_REMOVE(poola, pa, entries);
506		free(pa, M_PFRULE);
507	}
508}
509
510static void
511pf_unlink_rule_locked(struct pf_krulequeue *rulequeue, struct pf_krule *rule)
512{
513
514	PF_RULES_WASSERT();
515	PF_UNLNKDRULES_ASSERT();
516
517	TAILQ_REMOVE(rulequeue, rule, entries);
518
519	rule->rule_ref |= PFRULE_REFS;
520	TAILQ_INSERT_TAIL(&V_pf_unlinked_rules, rule, entries);
521}
522
523static void
524pf_unlink_rule(struct pf_krulequeue *rulequeue, struct pf_krule *rule)
525{
526
527	PF_RULES_WASSERT();
528
529	PF_UNLNKDRULES_LOCK();
530	pf_unlink_rule_locked(rulequeue, rule);
531	PF_UNLNKDRULES_UNLOCK();
532}
533
534static void
535pf_free_eth_rule(struct pf_keth_rule *rule)
536{
537	PF_RULES_WASSERT();
538
539	if (rule == NULL)
540		return;
541
542	if (rule->tag)
543		tag_unref(&V_pf_tags, rule->tag);
544	if (rule->match_tag)
545		tag_unref(&V_pf_tags, rule->match_tag);
546#ifdef ALTQ
547	pf_qid_unref(rule->qid);
548#endif
549
550	if (rule->bridge_to)
551		pfi_kkif_unref(rule->bridge_to);
552	if (rule->kif)
553		pfi_kkif_unref(rule->kif);
554
555	if (rule->ipsrc.addr.type == PF_ADDR_TABLE)
556		pfr_detach_table(rule->ipsrc.addr.p.tbl);
557	if (rule->ipdst.addr.type == PF_ADDR_TABLE)
558		pfr_detach_table(rule->ipdst.addr.p.tbl);
559
560	counter_u64_free(rule->evaluations);
561	for (int i = 0; i < 2; i++) {
562		counter_u64_free(rule->packets[i]);
563		counter_u64_free(rule->bytes[i]);
564	}
565	uma_zfree_pcpu(pf_timestamp_pcpu_zone, rule->timestamp);
566	pf_keth_anchor_remove(rule);
567
568	free(rule, M_PFRULE);
569}
570
571void
572pf_free_rule(struct pf_krule *rule)
573{
574
575	PF_RULES_WASSERT();
576	PF_CONFIG_ASSERT();
577
578	if (rule->tag)
579		tag_unref(&V_pf_tags, rule->tag);
580	if (rule->match_tag)
581		tag_unref(&V_pf_tags, rule->match_tag);
582#ifdef ALTQ
583	if (rule->pqid != rule->qid)
584		pf_qid_unref(rule->pqid);
585	pf_qid_unref(rule->qid);
586#endif
587	switch (rule->src.addr.type) {
588	case PF_ADDR_DYNIFTL:
589		pfi_dynaddr_remove(rule->src.addr.p.dyn);
590		break;
591	case PF_ADDR_TABLE:
592		pfr_detach_table(rule->src.addr.p.tbl);
593		break;
594	}
595	switch (rule->dst.addr.type) {
596	case PF_ADDR_DYNIFTL:
597		pfi_dynaddr_remove(rule->dst.addr.p.dyn);
598		break;
599	case PF_ADDR_TABLE:
600		pfr_detach_table(rule->dst.addr.p.tbl);
601		break;
602	}
603	if (rule->overload_tbl)
604		pfr_detach_table(rule->overload_tbl);
605	if (rule->kif)
606		pfi_kkif_unref(rule->kif);
607	pf_kanchor_remove(rule);
608	pf_empty_kpool(&rule->rpool.list);
609
610	pf_krule_free(rule);
611}
612
613static void
614pf_init_tagset(struct pf_tagset *ts, unsigned int *tunable_size,
615    unsigned int default_size)
616{
617	unsigned int i;
618	unsigned int hashsize;
619
620	if (*tunable_size == 0 || !powerof2(*tunable_size))
621		*tunable_size = default_size;
622
623	hashsize = *tunable_size;
624	ts->namehash = mallocarray(hashsize, sizeof(*ts->namehash), M_PFHASH,
625	    M_WAITOK);
626	ts->taghash = mallocarray(hashsize, sizeof(*ts->taghash), M_PFHASH,
627	    M_WAITOK);
628	ts->mask = hashsize - 1;
629	ts->seed = arc4random();
630	for (i = 0; i < hashsize; i++) {
631		TAILQ_INIT(&ts->namehash[i]);
632		TAILQ_INIT(&ts->taghash[i]);
633	}
634	BIT_FILL(TAGID_MAX, &ts->avail);
635}
636
637static void
638pf_cleanup_tagset(struct pf_tagset *ts)
639{
640	unsigned int i;
641	unsigned int hashsize;
642	struct pf_tagname *t, *tmp;
643
644	/*
645	 * Only need to clean up one of the hashes as each tag is hashed
646	 * into each table.
647	 */
648	hashsize = ts->mask + 1;
649	for (i = 0; i < hashsize; i++)
650		TAILQ_FOREACH_SAFE(t, &ts->namehash[i], namehash_entries, tmp)
651			uma_zfree(V_pf_tag_z, t);
652
653	free(ts->namehash, M_PFHASH);
654	free(ts->taghash, M_PFHASH);
655}
656
657static uint16_t
658tagname2hashindex(const struct pf_tagset *ts, const char *tagname)
659{
660	size_t len;
661
662	len = strnlen(tagname, PF_TAG_NAME_SIZE - 1);
663	return (murmur3_32_hash(tagname, len, ts->seed) & ts->mask);
664}
665
666static uint16_t
667tag2hashindex(const struct pf_tagset *ts, uint16_t tag)
668{
669
670	return (tag & ts->mask);
671}
672
673static u_int16_t
674tagname2tag(struct pf_tagset *ts, const char *tagname)
675{
676	struct pf_tagname	*tag;
677	u_int32_t		 index;
678	u_int16_t		 new_tagid;
679
680	PF_RULES_WASSERT();
681
682	index = tagname2hashindex(ts, tagname);
683	TAILQ_FOREACH(tag, &ts->namehash[index], namehash_entries)
684		if (strcmp(tagname, tag->name) == 0) {
685			tag->ref++;
686			return (tag->tag);
687		}
688
689	/*
690	 * new entry
691	 *
692	 * to avoid fragmentation, we do a linear search from the beginning
693	 * and take the first free slot we find.
694	 */
695	new_tagid = BIT_FFS(TAGID_MAX, &ts->avail);
696	/*
697	 * Tags are 1-based, with valid tags in the range [1..TAGID_MAX].
698	 * BIT_FFS() returns a 1-based bit number, with 0 indicating no bits
699	 * set.  It may also return a bit number greater than TAGID_MAX due
700	 * to rounding of the number of bits in the vector up to a multiple
701	 * of the vector word size at declaration/allocation time.
702	 */
703	if ((new_tagid == 0) || (new_tagid > TAGID_MAX))
704		return (0);
705
706	/* Mark the tag as in use.  Bits are 0-based for BIT_CLR() */
707	BIT_CLR(TAGID_MAX, new_tagid - 1, &ts->avail);
708
709	/* allocate and fill new struct pf_tagname */
710	tag = uma_zalloc(V_pf_tag_z, M_NOWAIT);
711	if (tag == NULL)
712		return (0);
713	strlcpy(tag->name, tagname, sizeof(tag->name));
714	tag->tag = new_tagid;
715	tag->ref = 1;
716
717	/* Insert into namehash */
718	TAILQ_INSERT_TAIL(&ts->namehash[index], tag, namehash_entries);
719
720	/* Insert into taghash */
721	index = tag2hashindex(ts, new_tagid);
722	TAILQ_INSERT_TAIL(&ts->taghash[index], tag, taghash_entries);
723
724	return (tag->tag);
725}
726
727static void
728tag_unref(struct pf_tagset *ts, u_int16_t tag)
729{
730	struct pf_tagname	*t;
731	uint16_t		 index;
732
733	PF_RULES_WASSERT();
734
735	index = tag2hashindex(ts, tag);
736	TAILQ_FOREACH(t, &ts->taghash[index], taghash_entries)
737		if (tag == t->tag) {
738			if (--t->ref == 0) {
739				TAILQ_REMOVE(&ts->taghash[index], t,
740				    taghash_entries);
741				index = tagname2hashindex(ts, t->name);
742				TAILQ_REMOVE(&ts->namehash[index], t,
743				    namehash_entries);
744				/* Bits are 0-based for BIT_SET() */
745				BIT_SET(TAGID_MAX, tag - 1, &ts->avail);
746				uma_zfree(V_pf_tag_z, t);
747			}
748			break;
749		}
750}
751
752static uint16_t
753pf_tagname2tag(const char *tagname)
754{
755	return (tagname2tag(&V_pf_tags, tagname));
756}
757
758static int
759pf_begin_eth(uint32_t *ticket, const char *anchor)
760{
761	struct pf_keth_rule *rule, *tmp;
762	struct pf_keth_ruleset *rs;
763
764	PF_RULES_WASSERT();
765
766	rs = pf_find_or_create_keth_ruleset(anchor);
767	if (rs == NULL)
768		return (EINVAL);
769
770	/* Purge old inactive rules. */
771	TAILQ_FOREACH_SAFE(rule, rs->inactive.rules, entries,
772	    tmp) {
773		TAILQ_REMOVE(rs->inactive.rules, rule,
774		    entries);
775		pf_free_eth_rule(rule);
776	}
777
778	*ticket = ++rs->inactive.ticket;
779	rs->inactive.open = 1;
780
781	return (0);
782}
783
784static void
785pf_rollback_eth_cb(struct epoch_context *ctx)
786{
787	struct pf_keth_ruleset *rs;
788
789	rs = __containerof(ctx, struct pf_keth_ruleset, epoch_ctx);
790
791	CURVNET_SET(rs->vnet);
792
793	PF_RULES_WLOCK();
794	pf_rollback_eth(rs->inactive.ticket,
795	    rs->anchor ? rs->anchor->path : "");
796	PF_RULES_WUNLOCK();
797
798	CURVNET_RESTORE();
799}
800
801static int
802pf_rollback_eth(uint32_t ticket, const char *anchor)
803{
804	struct pf_keth_rule *rule, *tmp;
805	struct pf_keth_ruleset *rs;
806
807	PF_RULES_WASSERT();
808
809	rs = pf_find_keth_ruleset(anchor);
810	if (rs == NULL)
811		return (EINVAL);
812
813	if (!rs->inactive.open ||
814	    ticket != rs->inactive.ticket)
815		return (0);
816
817	/* Purge old inactive rules. */
818	TAILQ_FOREACH_SAFE(rule, rs->inactive.rules, entries,
819	    tmp) {
820		TAILQ_REMOVE(rs->inactive.rules, rule, entries);
821		pf_free_eth_rule(rule);
822	}
823
824	rs->inactive.open = 0;
825
826	pf_remove_if_empty_keth_ruleset(rs);
827
828	return (0);
829}
830
831#define	PF_SET_SKIP_STEPS(i)					\
832	do {							\
833		while (head[i] != cur) {			\
834			head[i]->skip[i].ptr = cur;		\
835			head[i] = TAILQ_NEXT(head[i], entries);	\
836		}						\
837	} while (0)
838
839static void
840pf_eth_calc_skip_steps(struct pf_keth_ruleq *rules)
841{
842	struct pf_keth_rule *cur, *prev, *head[PFE_SKIP_COUNT];
843	int i;
844
845	cur = TAILQ_FIRST(rules);
846	prev = cur;
847	for (i = 0; i < PFE_SKIP_COUNT; ++i)
848		head[i] = cur;
849	while (cur != NULL) {
850		if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
851			PF_SET_SKIP_STEPS(PFE_SKIP_IFP);
852		if (cur->direction != prev->direction)
853			PF_SET_SKIP_STEPS(PFE_SKIP_DIR);
854		if (cur->proto != prev->proto)
855			PF_SET_SKIP_STEPS(PFE_SKIP_PROTO);
856		if (memcmp(&cur->src, &prev->src, sizeof(cur->src)) != 0)
857			PF_SET_SKIP_STEPS(PFE_SKIP_SRC_ADDR);
858		if (memcmp(&cur->dst, &prev->dst, sizeof(cur->dst)) != 0)
859			PF_SET_SKIP_STEPS(PFE_SKIP_DST_ADDR);
860		if (cur->ipsrc.neg != prev->ipsrc.neg ||
861		    pf_addr_wrap_neq(&cur->ipsrc.addr, &prev->ipsrc.addr))
862			PF_SET_SKIP_STEPS(PFE_SKIP_SRC_IP_ADDR);
863		if (cur->ipdst.neg != prev->ipdst.neg ||
864		    pf_addr_wrap_neq(&cur->ipdst.addr, &prev->ipdst.addr))
865			PF_SET_SKIP_STEPS(PFE_SKIP_DST_IP_ADDR);
866
867		prev = cur;
868		cur = TAILQ_NEXT(cur, entries);
869	}
870	for (i = 0; i < PFE_SKIP_COUNT; ++i)
871		PF_SET_SKIP_STEPS(i);
872}
873
874static int
875pf_commit_eth(uint32_t ticket, const char *anchor)
876{
877	struct pf_keth_ruleq *rules;
878	struct pf_keth_ruleset *rs;
879
880	rs = pf_find_keth_ruleset(anchor);
881	if (rs == NULL) {
882		return (EINVAL);
883	}
884
885	if (!rs->inactive.open ||
886	    ticket != rs->inactive.ticket)
887		return (EBUSY);
888
889	PF_RULES_WASSERT();
890
891	pf_eth_calc_skip_steps(rs->inactive.rules);
892
893	rules = rs->active.rules;
894	ck_pr_store_ptr(&rs->active.rules, rs->inactive.rules);
895	rs->inactive.rules = rules;
896	rs->inactive.ticket = rs->active.ticket;
897
898	/* Clean up inactive rules (i.e. previously active rules), only when
899	 * we're sure they're no longer used. */
900	NET_EPOCH_CALL(pf_rollback_eth_cb, &rs->epoch_ctx);
901
902	return (0);
903}
904
905#ifdef ALTQ
906static uint16_t
907pf_qname2qid(const char *qname)
908{
909	return (tagname2tag(&V_pf_qids, qname));
910}
911
912static void
913pf_qid_unref(uint16_t qid)
914{
915	tag_unref(&V_pf_qids, qid);
916}
917
918static int
919pf_begin_altq(u_int32_t *ticket)
920{
921	struct pf_altq	*altq, *tmp;
922	int		 error = 0;
923
924	PF_RULES_WASSERT();
925
926	/* Purge the old altq lists */
927	TAILQ_FOREACH_SAFE(altq, V_pf_altq_ifs_inactive, entries, tmp) {
928		if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
929			/* detach and destroy the discipline */
930			error = altq_remove(altq);
931		}
932		free(altq, M_PFALTQ);
933	}
934	TAILQ_INIT(V_pf_altq_ifs_inactive);
935	TAILQ_FOREACH_SAFE(altq, V_pf_altqs_inactive, entries, tmp) {
936		pf_qid_unref(altq->qid);
937		free(altq, M_PFALTQ);
938	}
939	TAILQ_INIT(V_pf_altqs_inactive);
940	if (error)
941		return (error);
942	*ticket = ++V_ticket_altqs_inactive;
943	V_altqs_inactive_open = 1;
944	return (0);
945}
946
947static int
948pf_rollback_altq(u_int32_t ticket)
949{
950	struct pf_altq	*altq, *tmp;
951	int		 error = 0;
952
953	PF_RULES_WASSERT();
954
955	if (!V_altqs_inactive_open || ticket != V_ticket_altqs_inactive)
956		return (0);
957	/* Purge the old altq lists */
958	TAILQ_FOREACH_SAFE(altq, V_pf_altq_ifs_inactive, entries, tmp) {
959		if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
960			/* detach and destroy the discipline */
961			error = altq_remove(altq);
962		}
963		free(altq, M_PFALTQ);
964	}
965	TAILQ_INIT(V_pf_altq_ifs_inactive);
966	TAILQ_FOREACH_SAFE(altq, V_pf_altqs_inactive, entries, tmp) {
967		pf_qid_unref(altq->qid);
968		free(altq, M_PFALTQ);
969	}
970	TAILQ_INIT(V_pf_altqs_inactive);
971	V_altqs_inactive_open = 0;
972	return (error);
973}
974
975static int
976pf_commit_altq(u_int32_t ticket)
977{
978	struct pf_altqqueue	*old_altqs, *old_altq_ifs;
979	struct pf_altq		*altq, *tmp;
980	int			 err, error = 0;
981
982	PF_RULES_WASSERT();
983
984	if (!V_altqs_inactive_open || ticket != V_ticket_altqs_inactive)
985		return (EBUSY);
986
987	/* swap altqs, keep the old. */
988	old_altqs = V_pf_altqs_active;
989	old_altq_ifs = V_pf_altq_ifs_active;
990	V_pf_altqs_active = V_pf_altqs_inactive;
991	V_pf_altq_ifs_active = V_pf_altq_ifs_inactive;
992	V_pf_altqs_inactive = old_altqs;
993	V_pf_altq_ifs_inactive = old_altq_ifs;
994	V_ticket_altqs_active = V_ticket_altqs_inactive;
995
996	/* Attach new disciplines */
997	TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) {
998		if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
999			/* attach the discipline */
1000			error = altq_pfattach(altq);
1001			if (error == 0 && V_pf_altq_running)
1002				error = pf_enable_altq(altq);
1003			if (error != 0)
1004				return (error);
1005		}
1006	}
1007
1008	/* Purge the old altq lists */
1009	TAILQ_FOREACH_SAFE(altq, V_pf_altq_ifs_inactive, entries, tmp) {
1010		if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
1011			/* detach and destroy the discipline */
1012			if (V_pf_altq_running)
1013				error = pf_disable_altq(altq);
1014			err = altq_pfdetach(altq);
1015			if (err != 0 && error == 0)
1016				error = err;
1017			err = altq_remove(altq);
1018			if (err != 0 && error == 0)
1019				error = err;
1020		}
1021		free(altq, M_PFALTQ);
1022	}
1023	TAILQ_INIT(V_pf_altq_ifs_inactive);
1024	TAILQ_FOREACH_SAFE(altq, V_pf_altqs_inactive, entries, tmp) {
1025		pf_qid_unref(altq->qid);
1026		free(altq, M_PFALTQ);
1027	}
1028	TAILQ_INIT(V_pf_altqs_inactive);
1029
1030	V_altqs_inactive_open = 0;
1031	return (error);
1032}
1033
1034static int
1035pf_enable_altq(struct pf_altq *altq)
1036{
1037	struct ifnet		*ifp;
1038	struct tb_profile	 tb;
1039	int			 error = 0;
1040
1041	if ((ifp = ifunit(altq->ifname)) == NULL)
1042		return (EINVAL);
1043
1044	if (ifp->if_snd.altq_type != ALTQT_NONE)
1045		error = altq_enable(&ifp->if_snd);
1046
1047	/* set tokenbucket regulator */
1048	if (error == 0 && ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {
1049		tb.rate = altq->ifbandwidth;
1050		tb.depth = altq->tbrsize;
1051		error = tbr_set(&ifp->if_snd, &tb);
1052	}
1053
1054	return (error);
1055}
1056
1057static int
1058pf_disable_altq(struct pf_altq *altq)
1059{
1060	struct ifnet		*ifp;
1061	struct tb_profile	 tb;
1062	int			 error;
1063
1064	if ((ifp = ifunit(altq->ifname)) == NULL)
1065		return (EINVAL);
1066
1067	/*
1068	 * when the discipline is no longer referenced, it was overridden
1069	 * by a new one.  if so, just return.
1070	 */
1071	if (altq->altq_disc != ifp->if_snd.altq_disc)
1072		return (0);
1073
1074	error = altq_disable(&ifp->if_snd);
1075
1076	if (error == 0) {
1077		/* clear tokenbucket regulator */
1078		tb.rate = 0;
1079		error = tbr_set(&ifp->if_snd, &tb);
1080	}
1081
1082	return (error);
1083}
1084
1085static int
1086pf_altq_ifnet_event_add(struct ifnet *ifp, int remove, u_int32_t ticket,
1087    struct pf_altq *altq)
1088{
1089	struct ifnet	*ifp1;
1090	int		 error = 0;
1091
1092	/* Deactivate the interface in question */
1093	altq->local_flags &= ~PFALTQ_FLAG_IF_REMOVED;
1094	if ((ifp1 = ifunit(altq->ifname)) == NULL ||
1095	    (remove && ifp1 == ifp)) {
1096		altq->local_flags |= PFALTQ_FLAG_IF_REMOVED;
1097	} else {
1098		error = altq_add(ifp1, altq);
1099
1100		if (ticket != V_ticket_altqs_inactive)
1101			error = EBUSY;
1102
1103		if (error)
1104			free(altq, M_PFALTQ);
1105	}
1106
1107	return (error);
1108}
1109
1110void
1111pf_altq_ifnet_event(struct ifnet *ifp, int remove)
1112{
1113	struct pf_altq	*a1, *a2, *a3;
1114	u_int32_t	 ticket;
1115	int		 error = 0;
1116
1117	/*
1118	 * No need to re-evaluate the configuration for events on interfaces
1119	 * that do not support ALTQ, as it's not possible for such
1120	 * interfaces to be part of the configuration.
1121	 */
1122	if (!ALTQ_IS_READY(&ifp->if_snd))
1123		return;
1124
1125	/* Interrupt userland queue modifications */
1126	if (V_altqs_inactive_open)
1127		pf_rollback_altq(V_ticket_altqs_inactive);
1128
1129	/* Start new altq ruleset */
1130	if (pf_begin_altq(&ticket))
1131		return;
1132
1133	/* Copy the current active set */
1134	TAILQ_FOREACH(a1, V_pf_altq_ifs_active, entries) {
1135		a2 = malloc(sizeof(*a2), M_PFALTQ, M_NOWAIT);
1136		if (a2 == NULL) {
1137			error = ENOMEM;
1138			break;
1139		}
1140		bcopy(a1, a2, sizeof(struct pf_altq));
1141
1142		error = pf_altq_ifnet_event_add(ifp, remove, ticket, a2);
1143		if (error)
1144			break;
1145
1146		TAILQ_INSERT_TAIL(V_pf_altq_ifs_inactive, a2, entries);
1147	}
1148	if (error)
1149		goto out;
1150	TAILQ_FOREACH(a1, V_pf_altqs_active, entries) {
1151		a2 = malloc(sizeof(*a2), M_PFALTQ, M_NOWAIT);
1152		if (a2 == NULL) {
1153			error = ENOMEM;
1154			break;
1155		}
1156		bcopy(a1, a2, sizeof(struct pf_altq));
1157
1158		if ((a2->qid = pf_qname2qid(a2->qname)) == 0) {
1159			error = EBUSY;
1160			free(a2, M_PFALTQ);
1161			break;
1162		}
1163		a2->altq_disc = NULL;
1164		TAILQ_FOREACH(a3, V_pf_altq_ifs_inactive, entries) {
1165			if (strncmp(a3->ifname, a2->ifname,
1166				IFNAMSIZ) == 0) {
1167				a2->altq_disc = a3->altq_disc;
1168				break;
1169			}
1170		}
1171		error = pf_altq_ifnet_event_add(ifp, remove, ticket, a2);
1172		if (error)
1173			break;
1174
1175		TAILQ_INSERT_TAIL(V_pf_altqs_inactive, a2, entries);
1176	}
1177
1178out:
1179	if (error != 0)
1180		pf_rollback_altq(ticket);
1181	else
1182		pf_commit_altq(ticket);
1183}
1184#endif /* ALTQ */
1185
1186static struct pf_krule_global *
1187pf_rule_tree_alloc(int flags)
1188{
1189	struct pf_krule_global *tree;
1190
1191	tree = malloc(sizeof(struct pf_krule_global), M_TEMP, flags);
1192	if (tree == NULL)
1193		return (NULL);
1194	RB_INIT(tree);
1195	return (tree);
1196}
1197
1198static void
1199pf_rule_tree_free(struct pf_krule_global *tree)
1200{
1201
1202	free(tree, M_TEMP);
1203}
1204
1205static int
1206pf_begin_rules(u_int32_t *ticket, int rs_num, const char *anchor)
1207{
1208	struct pf_krule_global *tree;
1209	struct pf_kruleset	*rs;
1210	struct pf_krule		*rule;
1211
1212	PF_RULES_WASSERT();
1213
1214	if (rs_num < 0 || rs_num >= PF_RULESET_MAX)
1215		return (EINVAL);
1216	tree = pf_rule_tree_alloc(M_NOWAIT);
1217	if (tree == NULL)
1218		return (ENOMEM);
1219	rs = pf_find_or_create_kruleset(anchor);
1220	if (rs == NULL) {
1221		free(tree, M_TEMP);
1222		return (EINVAL);
1223	}
1224	pf_rule_tree_free(rs->rules[rs_num].inactive.tree);
1225	rs->rules[rs_num].inactive.tree = tree;
1226
1227	while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) {
1228		pf_unlink_rule(rs->rules[rs_num].inactive.ptr, rule);
1229		rs->rules[rs_num].inactive.rcount--;
1230	}
1231	*ticket = ++rs->rules[rs_num].inactive.ticket;
1232	rs->rules[rs_num].inactive.open = 1;
1233	return (0);
1234}
1235
1236static int
1237pf_rollback_rules(u_int32_t ticket, int rs_num, char *anchor)
1238{
1239	struct pf_kruleset	*rs;
1240	struct pf_krule		*rule;
1241
1242	PF_RULES_WASSERT();
1243
1244	if (rs_num < 0 || rs_num >= PF_RULESET_MAX)
1245		return (EINVAL);
1246	rs = pf_find_kruleset(anchor);
1247	if (rs == NULL || !rs->rules[rs_num].inactive.open ||
1248	    rs->rules[rs_num].inactive.ticket != ticket)
1249		return (0);
1250	while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) {
1251		pf_unlink_rule(rs->rules[rs_num].inactive.ptr, rule);
1252		rs->rules[rs_num].inactive.rcount--;
1253	}
1254	rs->rules[rs_num].inactive.open = 0;
1255	return (0);
1256}
1257
1258#define PF_MD5_UPD(st, elm)						\
1259		MD5Update(ctx, (u_int8_t *) &(st)->elm, sizeof((st)->elm))
1260
1261#define PF_MD5_UPD_STR(st, elm)						\
1262		MD5Update(ctx, (u_int8_t *) (st)->elm, strlen((st)->elm))
1263
1264#define PF_MD5_UPD_HTONL(st, elm, stor) do {				\
1265		(stor) = htonl((st)->elm);				\
1266		MD5Update(ctx, (u_int8_t *) &(stor), sizeof(u_int32_t));\
1267} while (0)
1268
1269#define PF_MD5_UPD_HTONS(st, elm, stor) do {				\
1270		(stor) = htons((st)->elm);				\
1271		MD5Update(ctx, (u_int8_t *) &(stor), sizeof(u_int16_t));\
1272} while (0)
1273
1274static void
1275pf_hash_rule_addr(MD5_CTX *ctx, struct pf_rule_addr *pfr)
1276{
1277	PF_MD5_UPD(pfr, addr.type);
1278	switch (pfr->addr.type) {
1279		case PF_ADDR_DYNIFTL:
1280			PF_MD5_UPD(pfr, addr.v.ifname);
1281			PF_MD5_UPD(pfr, addr.iflags);
1282			break;
1283		case PF_ADDR_TABLE:
1284			PF_MD5_UPD(pfr, addr.v.tblname);
1285			break;
1286		case PF_ADDR_ADDRMASK:
1287			/* XXX ignore af? */
1288			PF_MD5_UPD(pfr, addr.v.a.addr.addr32);
1289			PF_MD5_UPD(pfr, addr.v.a.mask.addr32);
1290			break;
1291	}
1292
1293	PF_MD5_UPD(pfr, port[0]);
1294	PF_MD5_UPD(pfr, port[1]);
1295	PF_MD5_UPD(pfr, neg);
1296	PF_MD5_UPD(pfr, port_op);
1297}
1298
1299static void
1300pf_hash_rule_rolling(MD5_CTX *ctx, struct pf_krule *rule)
1301{
1302	u_int16_t x;
1303	u_int32_t y;
1304
1305	pf_hash_rule_addr(ctx, &rule->src);
1306	pf_hash_rule_addr(ctx, &rule->dst);
1307	for (int i = 0; i < PF_RULE_MAX_LABEL_COUNT; i++)
1308		PF_MD5_UPD_STR(rule, label[i]);
1309	PF_MD5_UPD_STR(rule, ifname);
1310	PF_MD5_UPD_STR(rule, match_tagname);
1311	PF_MD5_UPD_HTONS(rule, match_tag, x); /* dup? */
1312	PF_MD5_UPD_HTONL(rule, os_fingerprint, y);
1313	PF_MD5_UPD_HTONL(rule, prob, y);
1314	PF_MD5_UPD_HTONL(rule, uid.uid[0], y);
1315	PF_MD5_UPD_HTONL(rule, uid.uid[1], y);
1316	PF_MD5_UPD(rule, uid.op);
1317	PF_MD5_UPD_HTONL(rule, gid.gid[0], y);
1318	PF_MD5_UPD_HTONL(rule, gid.gid[1], y);
1319	PF_MD5_UPD(rule, gid.op);
1320	PF_MD5_UPD_HTONL(rule, rule_flag, y);
1321	PF_MD5_UPD(rule, action);
1322	PF_MD5_UPD(rule, direction);
1323	PF_MD5_UPD(rule, af);
1324	PF_MD5_UPD(rule, quick);
1325	PF_MD5_UPD(rule, ifnot);
1326	PF_MD5_UPD(rule, match_tag_not);
1327	PF_MD5_UPD(rule, natpass);
1328	PF_MD5_UPD(rule, keep_state);
1329	PF_MD5_UPD(rule, proto);
1330	PF_MD5_UPD(rule, type);
1331	PF_MD5_UPD(rule, code);
1332	PF_MD5_UPD(rule, flags);
1333	PF_MD5_UPD(rule, flagset);
1334	PF_MD5_UPD(rule, allow_opts);
1335	PF_MD5_UPD(rule, rt);
1336	PF_MD5_UPD(rule, tos);
1337	PF_MD5_UPD(rule, scrub_flags);
1338	PF_MD5_UPD(rule, min_ttl);
1339	PF_MD5_UPD(rule, set_tos);
1340	if (rule->anchor != NULL)
1341		PF_MD5_UPD_STR(rule, anchor->path);
1342}
1343
1344static void
1345pf_hash_rule(struct pf_krule *rule)
1346{
1347	MD5_CTX		ctx;
1348
1349	MD5Init(&ctx);
1350	pf_hash_rule_rolling(&ctx, rule);
1351	MD5Final(rule->md5sum, &ctx);
1352}
1353
1354static int
1355pf_krule_compare(struct pf_krule *a, struct pf_krule *b)
1356{
1357
1358	return (memcmp(a->md5sum, b->md5sum, PF_MD5_DIGEST_LENGTH));
1359}
1360
1361static int
1362pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor)
1363{
1364	struct pf_kruleset	*rs;
1365	struct pf_krule		*rule, **old_array, *old_rule;
1366	struct pf_krulequeue	*old_rules;
1367	struct pf_krule_global  *old_tree;
1368	int			 error;
1369	u_int32_t		 old_rcount;
1370
1371	PF_RULES_WASSERT();
1372
1373	if (rs_num < 0 || rs_num >= PF_RULESET_MAX)
1374		return (EINVAL);
1375	rs = pf_find_kruleset(anchor);
1376	if (rs == NULL || !rs->rules[rs_num].inactive.open ||
1377	    ticket != rs->rules[rs_num].inactive.ticket)
1378		return (EBUSY);
1379
1380	/* Calculate checksum for the main ruleset */
1381	if (rs == &pf_main_ruleset) {
1382		error = pf_setup_pfsync_matching(rs);
1383		if (error != 0)
1384			return (error);
1385	}
1386
1387	/* Swap rules, keep the old. */
1388	old_rules = rs->rules[rs_num].active.ptr;
1389	old_rcount = rs->rules[rs_num].active.rcount;
1390	old_array = rs->rules[rs_num].active.ptr_array;
1391	old_tree = rs->rules[rs_num].active.tree;
1392
1393	rs->rules[rs_num].active.ptr =
1394	    rs->rules[rs_num].inactive.ptr;
1395	rs->rules[rs_num].active.ptr_array =
1396	    rs->rules[rs_num].inactive.ptr_array;
1397	rs->rules[rs_num].active.tree =
1398	    rs->rules[rs_num].inactive.tree;
1399	rs->rules[rs_num].active.rcount =
1400	    rs->rules[rs_num].inactive.rcount;
1401
1402	/* Attempt to preserve counter information. */
1403	if (V_pf_status.keep_counters && old_tree != NULL) {
1404		TAILQ_FOREACH(rule, rs->rules[rs_num].active.ptr,
1405		    entries) {
1406			old_rule = RB_FIND(pf_krule_global, old_tree, rule);
1407			if (old_rule == NULL) {
1408				continue;
1409			}
1410			pf_counter_u64_critical_enter();
1411			pf_counter_u64_add_protected(&rule->evaluations,
1412			    pf_counter_u64_fetch(&old_rule->evaluations));
1413			pf_counter_u64_add_protected(&rule->packets[0],
1414			    pf_counter_u64_fetch(&old_rule->packets[0]));
1415			pf_counter_u64_add_protected(&rule->packets[1],
1416			    pf_counter_u64_fetch(&old_rule->packets[1]));
1417			pf_counter_u64_add_protected(&rule->bytes[0],
1418			    pf_counter_u64_fetch(&old_rule->bytes[0]));
1419			pf_counter_u64_add_protected(&rule->bytes[1],
1420			    pf_counter_u64_fetch(&old_rule->bytes[1]));
1421			pf_counter_u64_critical_exit();
1422		}
1423	}
1424
1425	rs->rules[rs_num].inactive.ptr = old_rules;
1426	rs->rules[rs_num].inactive.ptr_array = old_array;
1427	rs->rules[rs_num].inactive.tree = NULL; /* important for pf_ioctl_addrule */
1428	rs->rules[rs_num].inactive.rcount = old_rcount;
1429
1430	rs->rules[rs_num].active.ticket =
1431	    rs->rules[rs_num].inactive.ticket;
1432	pf_calc_skip_steps(rs->rules[rs_num].active.ptr);
1433
1434	/* Purge the old rule list. */
1435	PF_UNLNKDRULES_LOCK();
1436	while ((rule = TAILQ_FIRST(old_rules)) != NULL)
1437		pf_unlink_rule_locked(old_rules, rule);
1438	PF_UNLNKDRULES_UNLOCK();
1439	if (rs->rules[rs_num].inactive.ptr_array)
1440		free(rs->rules[rs_num].inactive.ptr_array, M_TEMP);
1441	rs->rules[rs_num].inactive.ptr_array = NULL;
1442	rs->rules[rs_num].inactive.rcount = 0;
1443	rs->rules[rs_num].inactive.open = 0;
1444	pf_remove_if_empty_kruleset(rs);
1445	free(old_tree, M_TEMP);
1446
1447	return (0);
1448}
1449
1450static int
1451pf_setup_pfsync_matching(struct pf_kruleset *rs)
1452{
1453	MD5_CTX			 ctx;
1454	struct pf_krule		*rule;
1455	int			 rs_cnt;
1456	u_int8_t		 digest[PF_MD5_DIGEST_LENGTH];
1457
1458	MD5Init(&ctx);
1459	for (rs_cnt = 0; rs_cnt < PF_RULESET_MAX; rs_cnt++) {
1460		/* XXX PF_RULESET_SCRUB as well? */
1461		if (rs_cnt == PF_RULESET_SCRUB)
1462			continue;
1463
1464		if (rs->rules[rs_cnt].inactive.ptr_array)
1465			free(rs->rules[rs_cnt].inactive.ptr_array, M_TEMP);
1466		rs->rules[rs_cnt].inactive.ptr_array = NULL;
1467
1468		if (rs->rules[rs_cnt].inactive.rcount) {
1469			rs->rules[rs_cnt].inactive.ptr_array =
1470			    mallocarray(rs->rules[rs_cnt].inactive.rcount,
1471			    sizeof(struct pf_rule **),
1472			    M_TEMP, M_NOWAIT);
1473
1474			if (!rs->rules[rs_cnt].inactive.ptr_array)
1475				return (ENOMEM);
1476		}
1477
1478		TAILQ_FOREACH(rule, rs->rules[rs_cnt].inactive.ptr,
1479		    entries) {
1480			pf_hash_rule_rolling(&ctx, rule);
1481			(rs->rules[rs_cnt].inactive.ptr_array)[rule->nr] = rule;
1482		}
1483	}
1484
1485	MD5Final(digest, &ctx);
1486	memcpy(V_pf_status.pf_chksum, digest, sizeof(V_pf_status.pf_chksum));
1487	return (0);
1488}
1489
1490static int
1491pf_eth_addr_setup(struct pf_keth_ruleset *ruleset, struct pf_addr_wrap *addr)
1492{
1493	int error = 0;
1494
1495	switch (addr->type) {
1496	case PF_ADDR_TABLE:
1497		addr->p.tbl = pfr_eth_attach_table(ruleset, addr->v.tblname);
1498		if (addr->p.tbl == NULL)
1499			error = ENOMEM;
1500		break;
1501	default:
1502		error = EINVAL;
1503	}
1504
1505	return (error);
1506}
1507
1508static int
1509pf_addr_setup(struct pf_kruleset *ruleset, struct pf_addr_wrap *addr,
1510    sa_family_t af)
1511{
1512	int error = 0;
1513
1514	switch (addr->type) {
1515	case PF_ADDR_TABLE:
1516		addr->p.tbl = pfr_attach_table(ruleset, addr->v.tblname);
1517		if (addr->p.tbl == NULL)
1518			error = ENOMEM;
1519		break;
1520	case PF_ADDR_DYNIFTL:
1521		error = pfi_dynaddr_setup(addr, af);
1522		break;
1523	}
1524
1525	return (error);
1526}
1527
1528static void
1529pf_addr_copyout(struct pf_addr_wrap *addr)
1530{
1531
1532	switch (addr->type) {
1533	case PF_ADDR_DYNIFTL:
1534		pfi_dynaddr_copyout(addr);
1535		break;
1536	case PF_ADDR_TABLE:
1537		pf_tbladdr_copyout(addr);
1538		break;
1539	}
1540}
1541
1542static void
1543pf_src_node_copy(const struct pf_ksrc_node *in, struct pf_src_node *out)
1544{
1545	int	secs = time_uptime, diff;
1546
1547	bzero(out, sizeof(struct pf_src_node));
1548
1549	bcopy(&in->addr, &out->addr, sizeof(struct pf_addr));
1550	bcopy(&in->raddr, &out->raddr, sizeof(struct pf_addr));
1551
1552	if (in->rule.ptr != NULL)
1553		out->rule.nr = in->rule.ptr->nr;
1554
1555	for (int i = 0; i < 2; i++) {
1556		out->bytes[i] = counter_u64_fetch(in->bytes[i]);
1557		out->packets[i] = counter_u64_fetch(in->packets[i]);
1558	}
1559
1560	out->states = in->states;
1561	out->conn = in->conn;
1562	out->af = in->af;
1563	out->ruletype = in->ruletype;
1564
1565	out->creation = secs - in->creation;
1566	if (out->expire > secs)
1567		out->expire -= secs;
1568	else
1569		out->expire = 0;
1570
1571	/* Adjust the connection rate estimate. */
1572	diff = secs - in->conn_rate.last;
1573	if (diff >= in->conn_rate.seconds)
1574		out->conn_rate.count = 0;
1575	else
1576		out->conn_rate.count -=
1577		    in->conn_rate.count * diff /
1578		    in->conn_rate.seconds;
1579}
1580
1581#ifdef ALTQ
1582/*
1583 * Handle export of struct pf_kaltq to user binaries that may be using any
1584 * version of struct pf_altq.
1585 */
1586static int
1587pf_export_kaltq(struct pf_altq *q, struct pfioc_altq_v1 *pa, size_t ioc_size)
1588{
1589	u_int32_t version;
1590
1591	if (ioc_size == sizeof(struct pfioc_altq_v0))
1592		version = 0;
1593	else
1594		version = pa->version;
1595
1596	if (version > PFIOC_ALTQ_VERSION)
1597		return (EINVAL);
1598
1599#define ASSIGN(x) exported_q->x = q->x
1600#define COPY(x) \
1601	bcopy(&q->x, &exported_q->x, min(sizeof(q->x), sizeof(exported_q->x)))
1602#define SATU16(x) (u_int32_t)uqmin((x), USHRT_MAX)
1603#define SATU32(x) (u_int32_t)uqmin((x), UINT_MAX)
1604
1605	switch (version) {
1606	case 0: {
1607		struct pf_altq_v0 *exported_q =
1608		    &((struct pfioc_altq_v0 *)pa)->altq;
1609
1610		COPY(ifname);
1611
1612		ASSIGN(scheduler);
1613		ASSIGN(tbrsize);
1614		exported_q->tbrsize = SATU16(q->tbrsize);
1615		exported_q->ifbandwidth = SATU32(q->ifbandwidth);
1616
1617		COPY(qname);
1618		COPY(parent);
1619		ASSIGN(parent_qid);
1620		exported_q->bandwidth = SATU32(q->bandwidth);
1621		ASSIGN(priority);
1622		ASSIGN(local_flags);
1623
1624		ASSIGN(qlimit);
1625		ASSIGN(flags);
1626
1627		if (q->scheduler == ALTQT_HFSC) {
1628#define ASSIGN_OPT(x) exported_q->pq_u.hfsc_opts.x = q->pq_u.hfsc_opts.x
1629#define ASSIGN_OPT_SATU32(x) exported_q->pq_u.hfsc_opts.x = \
1630			    SATU32(q->pq_u.hfsc_opts.x)
1631
1632			ASSIGN_OPT_SATU32(rtsc_m1);
1633			ASSIGN_OPT(rtsc_d);
1634			ASSIGN_OPT_SATU32(rtsc_m2);
1635
1636			ASSIGN_OPT_SATU32(lssc_m1);
1637			ASSIGN_OPT(lssc_d);
1638			ASSIGN_OPT_SATU32(lssc_m2);
1639
1640			ASSIGN_OPT_SATU32(ulsc_m1);
1641			ASSIGN_OPT(ulsc_d);
1642			ASSIGN_OPT_SATU32(ulsc_m2);
1643
1644			ASSIGN_OPT(flags);
1645
1646#undef ASSIGN_OPT
1647#undef ASSIGN_OPT_SATU32
1648		} else
1649			COPY(pq_u);
1650
1651		ASSIGN(qid);
1652		break;
1653	}
1654	case 1:	{
1655		struct pf_altq_v1 *exported_q =
1656		    &((struct pfioc_altq_v1 *)pa)->altq;
1657
1658		COPY(ifname);
1659
1660		ASSIGN(scheduler);
1661		ASSIGN(tbrsize);
1662		ASSIGN(ifbandwidth);
1663
1664		COPY(qname);
1665		COPY(parent);
1666		ASSIGN(parent_qid);
1667		ASSIGN(bandwidth);
1668		ASSIGN(priority);
1669		ASSIGN(local_flags);
1670
1671		ASSIGN(qlimit);
1672		ASSIGN(flags);
1673		COPY(pq_u);
1674
1675		ASSIGN(qid);
1676		break;
1677	}
1678	default:
1679		panic("%s: unhandled struct pfioc_altq version", __func__);
1680		break;
1681	}
1682
1683#undef ASSIGN
1684#undef COPY
1685#undef SATU16
1686#undef SATU32
1687
1688	return (0);
1689}
1690
1691/*
1692 * Handle import to struct pf_kaltq of struct pf_altq from user binaries
1693 * that may be using any version of it.
1694 */
1695static int
1696pf_import_kaltq(struct pfioc_altq_v1 *pa, struct pf_altq *q, size_t ioc_size)
1697{
1698	u_int32_t version;
1699
1700	if (ioc_size == sizeof(struct pfioc_altq_v0))
1701		version = 0;
1702	else
1703		version = pa->version;
1704
1705	if (version > PFIOC_ALTQ_VERSION)
1706		return (EINVAL);
1707
1708#define ASSIGN(x) q->x = imported_q->x
1709#define COPY(x) \
1710	bcopy(&imported_q->x, &q->x, min(sizeof(imported_q->x), sizeof(q->x)))
1711
1712	switch (version) {
1713	case 0: {
1714		struct pf_altq_v0 *imported_q =
1715		    &((struct pfioc_altq_v0 *)pa)->altq;
1716
1717		COPY(ifname);
1718
1719		ASSIGN(scheduler);
1720		ASSIGN(tbrsize); /* 16-bit -> 32-bit */
1721		ASSIGN(ifbandwidth); /* 32-bit -> 64-bit */
1722
1723		COPY(qname);
1724		COPY(parent);
1725		ASSIGN(parent_qid);
1726		ASSIGN(bandwidth); /* 32-bit -> 64-bit */
1727		ASSIGN(priority);
1728		ASSIGN(local_flags);
1729
1730		ASSIGN(qlimit);
1731		ASSIGN(flags);
1732
1733		if (imported_q->scheduler == ALTQT_HFSC) {
1734#define ASSIGN_OPT(x) q->pq_u.hfsc_opts.x = imported_q->pq_u.hfsc_opts.x
1735
1736			/*
1737			 * The m1 and m2 parameters are being copied from
1738			 * 32-bit to 64-bit.
1739			 */
1740			ASSIGN_OPT(rtsc_m1);
1741			ASSIGN_OPT(rtsc_d);
1742			ASSIGN_OPT(rtsc_m2);
1743
1744			ASSIGN_OPT(lssc_m1);
1745			ASSIGN_OPT(lssc_d);
1746			ASSIGN_OPT(lssc_m2);
1747
1748			ASSIGN_OPT(ulsc_m1);
1749			ASSIGN_OPT(ulsc_d);
1750			ASSIGN_OPT(ulsc_m2);
1751
1752			ASSIGN_OPT(flags);
1753
1754#undef ASSIGN_OPT
1755		} else
1756			COPY(pq_u);
1757
1758		ASSIGN(qid);
1759		break;
1760	}
1761	case 1: {
1762		struct pf_altq_v1 *imported_q =
1763		    &((struct pfioc_altq_v1 *)pa)->altq;
1764
1765		COPY(ifname);
1766
1767		ASSIGN(scheduler);
1768		ASSIGN(tbrsize);
1769		ASSIGN(ifbandwidth);
1770
1771		COPY(qname);
1772		COPY(parent);
1773		ASSIGN(parent_qid);
1774		ASSIGN(bandwidth);
1775		ASSIGN(priority);
1776		ASSIGN(local_flags);
1777
1778		ASSIGN(qlimit);
1779		ASSIGN(flags);
1780		COPY(pq_u);
1781
1782		ASSIGN(qid);
1783		break;
1784	}
1785	default:
1786		panic("%s: unhandled struct pfioc_altq version", __func__);
1787		break;
1788	}
1789
1790#undef ASSIGN
1791#undef COPY
1792
1793	return (0);
1794}
1795
1796static struct pf_altq *
1797pf_altq_get_nth_active(u_int32_t n)
1798{
1799	struct pf_altq		*altq;
1800	u_int32_t		 nr;
1801
1802	nr = 0;
1803	TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) {
1804		if (nr == n)
1805			return (altq);
1806		nr++;
1807	}
1808
1809	TAILQ_FOREACH(altq, V_pf_altqs_active, entries) {
1810		if (nr == n)
1811			return (altq);
1812		nr++;
1813	}
1814
1815	return (NULL);
1816}
1817#endif /* ALTQ */
1818
1819struct pf_krule *
1820pf_krule_alloc(void)
1821{
1822	struct pf_krule *rule;
1823
1824	rule = malloc(sizeof(struct pf_krule), M_PFRULE, M_WAITOK | M_ZERO);
1825	mtx_init(&rule->rpool.mtx, "pf_krule_pool", NULL, MTX_DEF);
1826	rule->timestamp = uma_zalloc_pcpu(pf_timestamp_pcpu_zone,
1827	    M_WAITOK | M_ZERO);
1828	return (rule);
1829}
1830
1831void
1832pf_krule_free(struct pf_krule *rule)
1833{
1834#ifdef PF_WANT_32_TO_64_COUNTER
1835	bool wowned;
1836#endif
1837
1838	if (rule == NULL)
1839		return;
1840
1841#ifdef PF_WANT_32_TO_64_COUNTER
1842	if (rule->allrulelinked) {
1843		wowned = PF_RULES_WOWNED();
1844		if (!wowned)
1845			PF_RULES_WLOCK();
1846		LIST_REMOVE(rule, allrulelist);
1847		V_pf_allrulecount--;
1848		if (!wowned)
1849			PF_RULES_WUNLOCK();
1850	}
1851#endif
1852
1853	pf_counter_u64_deinit(&rule->evaluations);
1854	for (int i = 0; i < 2; i++) {
1855		pf_counter_u64_deinit(&rule->packets[i]);
1856		pf_counter_u64_deinit(&rule->bytes[i]);
1857	}
1858	counter_u64_free(rule->states_cur);
1859	counter_u64_free(rule->states_tot);
1860	counter_u64_free(rule->src_nodes);
1861	uma_zfree_pcpu(pf_timestamp_pcpu_zone, rule->timestamp);
1862
1863	mtx_destroy(&rule->rpool.mtx);
1864	free(rule, M_PFRULE);
1865}
1866
1867void
1868pf_krule_clear_counters(struct pf_krule *rule)
1869{
1870	pf_counter_u64_zero(&rule->evaluations);
1871	for (int i = 0; i < 2; i++) {
1872		pf_counter_u64_zero(&rule->packets[i]);
1873		pf_counter_u64_zero(&rule->bytes[i]);
1874	}
1875	counter_u64_zero(rule->states_tot);
1876}
1877
1878static void
1879pf_kpooladdr_to_pooladdr(const struct pf_kpooladdr *kpool,
1880    struct pf_pooladdr *pool)
1881{
1882
1883	bzero(pool, sizeof(*pool));
1884	bcopy(&kpool->addr, &pool->addr, sizeof(pool->addr));
1885	strlcpy(pool->ifname, kpool->ifname, sizeof(pool->ifname));
1886}
1887
1888static int
1889pf_pooladdr_to_kpooladdr(const struct pf_pooladdr *pool,
1890    struct pf_kpooladdr *kpool)
1891{
1892	int ret;
1893
1894	bzero(kpool, sizeof(*kpool));
1895	bcopy(&pool->addr, &kpool->addr, sizeof(kpool->addr));
1896	ret = pf_user_strcpy(kpool->ifname, pool->ifname,
1897	    sizeof(kpool->ifname));
1898	return (ret);
1899}
1900
1901static void
1902pf_pool_to_kpool(const struct pf_pool *pool, struct pf_kpool *kpool)
1903{
1904	_Static_assert(sizeof(pool->key) == sizeof(kpool->key), "");
1905	_Static_assert(sizeof(pool->counter) == sizeof(kpool->counter), "");
1906
1907	bcopy(&pool->key, &kpool->key, sizeof(kpool->key));
1908	bcopy(&pool->counter, &kpool->counter, sizeof(kpool->counter));
1909
1910	kpool->tblidx = pool->tblidx;
1911	kpool->proxy_port[0] = pool->proxy_port[0];
1912	kpool->proxy_port[1] = pool->proxy_port[1];
1913	kpool->opts = pool->opts;
1914}
1915
1916static int
1917pf_rule_to_krule(const struct pf_rule *rule, struct pf_krule *krule)
1918{
1919	int ret;
1920
1921#ifndef INET
1922	if (rule->af == AF_INET) {
1923		return (EAFNOSUPPORT);
1924	}
1925#endif /* INET */
1926#ifndef INET6
1927	if (rule->af == AF_INET6) {
1928		return (EAFNOSUPPORT);
1929	}
1930#endif /* INET6 */
1931
1932	ret = pf_check_rule_addr(&rule->src);
1933	if (ret != 0)
1934		return (ret);
1935	ret = pf_check_rule_addr(&rule->dst);
1936	if (ret != 0)
1937		return (ret);
1938
1939	bcopy(&rule->src, &krule->src, sizeof(rule->src));
1940	bcopy(&rule->dst, &krule->dst, sizeof(rule->dst));
1941
1942	ret = pf_user_strcpy(krule->label[0], rule->label, sizeof(rule->label));
1943	if (ret != 0)
1944		return (ret);
1945	ret = pf_user_strcpy(krule->ifname, rule->ifname, sizeof(rule->ifname));
1946	if (ret != 0)
1947		return (ret);
1948	ret = pf_user_strcpy(krule->qname, rule->qname, sizeof(rule->qname));
1949	if (ret != 0)
1950		return (ret);
1951	ret = pf_user_strcpy(krule->pqname, rule->pqname, sizeof(rule->pqname));
1952	if (ret != 0)
1953		return (ret);
1954	ret = pf_user_strcpy(krule->tagname, rule->tagname,
1955	    sizeof(rule->tagname));
1956	if (ret != 0)
1957		return (ret);
1958	ret = pf_user_strcpy(krule->match_tagname, rule->match_tagname,
1959	    sizeof(rule->match_tagname));
1960	if (ret != 0)
1961		return (ret);
1962	ret = pf_user_strcpy(krule->overload_tblname, rule->overload_tblname,
1963	    sizeof(rule->overload_tblname));
1964	if (ret != 0)
1965		return (ret);
1966
1967	pf_pool_to_kpool(&rule->rpool, &krule->rpool);
1968
1969	/* Don't allow userspace to set evaluations, packets or bytes. */
1970	/* kif, anchor, overload_tbl are not copied over. */
1971
1972	krule->os_fingerprint = rule->os_fingerprint;
1973
1974	krule->rtableid = rule->rtableid;
1975	/* pf_rule->timeout is smaller than pf_krule->timeout */
1976	bcopy(rule->timeout, krule->timeout, sizeof(rule->timeout));
1977	krule->max_states = rule->max_states;
1978	krule->max_src_nodes = rule->max_src_nodes;
1979	krule->max_src_states = rule->max_src_states;
1980	krule->max_src_conn = rule->max_src_conn;
1981	krule->max_src_conn_rate.limit = rule->max_src_conn_rate.limit;
1982	krule->max_src_conn_rate.seconds = rule->max_src_conn_rate.seconds;
1983	krule->qid = rule->qid;
1984	krule->pqid = rule->pqid;
1985	krule->nr = rule->nr;
1986	krule->prob = rule->prob;
1987	krule->cuid = rule->cuid;
1988	krule->cpid = rule->cpid;
1989
1990	krule->return_icmp = rule->return_icmp;
1991	krule->return_icmp6 = rule->return_icmp6;
1992	krule->max_mss = rule->max_mss;
1993	krule->tag = rule->tag;
1994	krule->match_tag = rule->match_tag;
1995	krule->scrub_flags = rule->scrub_flags;
1996
1997	bcopy(&rule->uid, &krule->uid, sizeof(krule->uid));
1998	bcopy(&rule->gid, &krule->gid, sizeof(krule->gid));
1999
2000	krule->rule_flag = rule->rule_flag;
2001	krule->action = rule->action;
2002	krule->direction = rule->direction;
2003	krule->log = rule->log;
2004	krule->logif = rule->logif;
2005	krule->quick = rule->quick;
2006	krule->ifnot = rule->ifnot;
2007	krule->match_tag_not = rule->match_tag_not;
2008	krule->natpass = rule->natpass;
2009
2010	krule->keep_state = rule->keep_state;
2011	krule->af = rule->af;
2012	krule->proto = rule->proto;
2013	krule->type = rule->type;
2014	krule->code = rule->code;
2015	krule->flags = rule->flags;
2016	krule->flagset = rule->flagset;
2017	krule->min_ttl = rule->min_ttl;
2018	krule->allow_opts = rule->allow_opts;
2019	krule->rt = rule->rt;
2020	krule->return_ttl = rule->return_ttl;
2021	krule->tos = rule->tos;
2022	krule->set_tos = rule->set_tos;
2023
2024	krule->flush = rule->flush;
2025	krule->prio = rule->prio;
2026	krule->set_prio[0] = rule->set_prio[0];
2027	krule->set_prio[1] = rule->set_prio[1];
2028
2029	bcopy(&rule->divert, &krule->divert, sizeof(krule->divert));
2030
2031	return (0);
2032}
2033
2034int
2035pf_ioctl_getrules(struct pfioc_rule *pr)
2036{
2037	struct pf_kruleset	*ruleset;
2038	struct pf_krule		*tail;
2039	int			 rs_num;
2040
2041	PF_RULES_WLOCK();
2042	ruleset = pf_find_kruleset(pr->anchor);
2043	if (ruleset == NULL) {
2044		PF_RULES_WUNLOCK();
2045		return (EINVAL);
2046	}
2047	rs_num = pf_get_ruleset_number(pr->rule.action);
2048	if (rs_num >= PF_RULESET_MAX) {
2049		PF_RULES_WUNLOCK();
2050		return (EINVAL);
2051	}
2052	tail = TAILQ_LAST(ruleset->rules[rs_num].active.ptr,
2053	    pf_krulequeue);
2054	if (tail)
2055		pr->nr = tail->nr + 1;
2056	else
2057		pr->nr = 0;
2058	pr->ticket = ruleset->rules[rs_num].active.ticket;
2059	PF_RULES_WUNLOCK();
2060
2061	return (0);
2062}
2063
2064int
2065pf_ioctl_addrule(struct pf_krule *rule, uint32_t ticket,
2066    uint32_t pool_ticket, const char *anchor, const char *anchor_call,
2067    uid_t uid, pid_t pid)
2068{
2069	struct pf_kruleset	*ruleset;
2070	struct pf_krule		*tail;
2071	struct pf_kpooladdr	*pa;
2072	struct pfi_kkif		*kif = NULL;
2073	int			 rs_num;
2074	int			 error = 0;
2075
2076	if ((rule->return_icmp >> 8) > ICMP_MAXTYPE) {
2077		error = EINVAL;
2078		goto errout_unlocked;
2079	}
2080
2081#define	ERROUT(x)	ERROUT_FUNCTION(errout, x)
2082
2083	if (rule->ifname[0])
2084		kif = pf_kkif_create(M_WAITOK);
2085	pf_counter_u64_init(&rule->evaluations, M_WAITOK);
2086	for (int i = 0; i < 2; i++) {
2087		pf_counter_u64_init(&rule->packets[i], M_WAITOK);
2088		pf_counter_u64_init(&rule->bytes[i], M_WAITOK);
2089	}
2090	rule->states_cur = counter_u64_alloc(M_WAITOK);
2091	rule->states_tot = counter_u64_alloc(M_WAITOK);
2092	rule->src_nodes = counter_u64_alloc(M_WAITOK);
2093	rule->cuid = uid;
2094	rule->cpid = pid;
2095	TAILQ_INIT(&rule->rpool.list);
2096
2097	PF_CONFIG_LOCK();
2098	PF_RULES_WLOCK();
2099#ifdef PF_WANT_32_TO_64_COUNTER
2100	LIST_INSERT_HEAD(&V_pf_allrulelist, rule, allrulelist);
2101	MPASS(!rule->allrulelinked);
2102	rule->allrulelinked = true;
2103	V_pf_allrulecount++;
2104#endif
2105	ruleset = pf_find_kruleset(anchor);
2106	if (ruleset == NULL)
2107		ERROUT(EINVAL);
2108	rs_num = pf_get_ruleset_number(rule->action);
2109	if (rs_num >= PF_RULESET_MAX)
2110		ERROUT(EINVAL);
2111	if (ticket != ruleset->rules[rs_num].inactive.ticket) {
2112		DPFPRINTF(PF_DEBUG_MISC,
2113		    ("ticket: %d != [%d]%d\n", ticket, rs_num,
2114		    ruleset->rules[rs_num].inactive.ticket));
2115		ERROUT(EBUSY);
2116	}
2117	if (pool_ticket != V_ticket_pabuf) {
2118		DPFPRINTF(PF_DEBUG_MISC,
2119		    ("pool_ticket: %d != %d\n", pool_ticket,
2120		    V_ticket_pabuf));
2121		ERROUT(EBUSY);
2122	}
2123	/*
2124	 * XXXMJG hack: there is no mechanism to ensure they started the
2125	 * transaction. Ticket checked above may happen to match by accident,
2126	 * even if nobody called DIOCXBEGIN, let alone this process.
2127	 * Partially work around it by checking if the RB tree got allocated,
2128	 * see pf_begin_rules.
2129	 */
2130	if (ruleset->rules[rs_num].inactive.tree == NULL) {
2131		ERROUT(EINVAL);
2132	}
2133
2134	tail = TAILQ_LAST(ruleset->rules[rs_num].inactive.ptr,
2135	    pf_krulequeue);
2136	if (tail)
2137		rule->nr = tail->nr + 1;
2138	else
2139		rule->nr = 0;
2140	if (rule->ifname[0]) {
2141		rule->kif = pfi_kkif_attach(kif, rule->ifname);
2142		kif = NULL;
2143		pfi_kkif_ref(rule->kif);
2144	} else
2145		rule->kif = NULL;
2146
2147	if (rule->rtableid > 0 && rule->rtableid >= rt_numfibs)
2148		error = EBUSY;
2149
2150#ifdef ALTQ
2151	/* set queue IDs */
2152	if (rule->qname[0] != 0) {
2153		if ((rule->qid = pf_qname2qid(rule->qname)) == 0)
2154			error = EBUSY;
2155		else if (rule->pqname[0] != 0) {
2156			if ((rule->pqid =
2157			    pf_qname2qid(rule->pqname)) == 0)
2158				error = EBUSY;
2159		} else
2160			rule->pqid = rule->qid;
2161	}
2162#endif
2163	if (rule->tagname[0])
2164		if ((rule->tag = pf_tagname2tag(rule->tagname)) == 0)
2165			error = EBUSY;
2166	if (rule->match_tagname[0])
2167		if ((rule->match_tag =
2168		    pf_tagname2tag(rule->match_tagname)) == 0)
2169			error = EBUSY;
2170	if (rule->rt && !rule->direction)
2171		error = EINVAL;
2172	if (!rule->log)
2173		rule->logif = 0;
2174	if (rule->logif >= PFLOGIFS_MAX)
2175		error = EINVAL;
2176	if (pf_addr_setup(ruleset, &rule->src.addr, rule->af))
2177		error = ENOMEM;
2178	if (pf_addr_setup(ruleset, &rule->dst.addr, rule->af))
2179		error = ENOMEM;
2180	if (pf_kanchor_setup(rule, ruleset, anchor_call))
2181		error = EINVAL;
2182	if (rule->scrub_flags & PFSTATE_SETPRIO &&
2183	    (rule->set_prio[0] > PF_PRIO_MAX ||
2184	    rule->set_prio[1] > PF_PRIO_MAX))
2185		error = EINVAL;
2186	TAILQ_FOREACH(pa, &V_pf_pabuf, entries)
2187		if (pa->addr.type == PF_ADDR_TABLE) {
2188			pa->addr.p.tbl = pfr_attach_table(ruleset,
2189			    pa->addr.v.tblname);
2190			if (pa->addr.p.tbl == NULL)
2191				error = ENOMEM;
2192		}
2193
2194	rule->overload_tbl = NULL;
2195	if (rule->overload_tblname[0]) {
2196		if ((rule->overload_tbl = pfr_attach_table(ruleset,
2197		    rule->overload_tblname)) == NULL)
2198			error = EINVAL;
2199		else
2200			rule->overload_tbl->pfrkt_flags |=
2201			    PFR_TFLAG_ACTIVE;
2202	}
2203
2204	pf_mv_kpool(&V_pf_pabuf, &rule->rpool.list);
2205	if (((((rule->action == PF_NAT) || (rule->action == PF_RDR) ||
2206	    (rule->action == PF_BINAT)) && rule->anchor == NULL) ||
2207	    (rule->rt > PF_NOPFROUTE)) &&
2208	    (TAILQ_FIRST(&rule->rpool.list) == NULL))
2209		error = EINVAL;
2210
2211	if (error) {
2212		pf_free_rule(rule);
2213		rule = NULL;
2214		ERROUT(error);
2215	}
2216
2217	rule->rpool.cur = TAILQ_FIRST(&rule->rpool.list);
2218	TAILQ_INSERT_TAIL(ruleset->rules[rs_num].inactive.ptr,
2219	    rule, entries);
2220	ruleset->rules[rs_num].inactive.rcount++;
2221
2222	PF_RULES_WUNLOCK();
2223	pf_hash_rule(rule);
2224	if (RB_INSERT(pf_krule_global, ruleset->rules[rs_num].inactive.tree, rule) != NULL) {
2225		PF_RULES_WLOCK();
2226		TAILQ_REMOVE(ruleset->rules[rs_num].inactive.ptr, rule, entries);
2227		ruleset->rules[rs_num].inactive.rcount--;
2228		pf_free_rule(rule);
2229		rule = NULL;
2230		ERROUT(EEXIST);
2231	}
2232	PF_CONFIG_UNLOCK();
2233
2234	return (0);
2235
2236#undef ERROUT
2237errout:
2238	PF_RULES_WUNLOCK();
2239	PF_CONFIG_UNLOCK();
2240errout_unlocked:
2241	pf_kkif_free(kif);
2242	pf_krule_free(rule);
2243	return (error);
2244}
2245
2246static bool
2247pf_label_match(const struct pf_krule *rule, const char *label)
2248{
2249	int i = 0;
2250
2251	while (*rule->label[i]) {
2252		if (strcmp(rule->label[i], label) == 0)
2253			return (true);
2254		i++;
2255	}
2256
2257	return (false);
2258}
2259
2260static unsigned int
2261pf_kill_matching_state(struct pf_state_key_cmp *key, int dir)
2262{
2263	struct pf_kstate *s;
2264	int more = 0;
2265
2266	s = pf_find_state_all(key, dir, &more);
2267	if (s == NULL)
2268		return (0);
2269
2270	if (more) {
2271		PF_STATE_UNLOCK(s);
2272		return (0);
2273	}
2274
2275	pf_unlink_state(s);
2276	return (1);
2277}
2278
2279static int
2280pf_killstates_row(struct pf_kstate_kill *psk, struct pf_idhash *ih)
2281{
2282	struct pf_kstate	*s;
2283	struct pf_state_key	*sk;
2284	struct pf_addr		*srcaddr, *dstaddr;
2285	struct pf_state_key_cmp	 match_key;
2286	int			 idx, killed = 0;
2287	unsigned int		 dir;
2288	u_int16_t		 srcport, dstport;
2289	struct pfi_kkif		*kif;
2290
2291relock_DIOCKILLSTATES:
2292	PF_HASHROW_LOCK(ih);
2293	LIST_FOREACH(s, &ih->states, entry) {
2294		/* For floating states look at the original kif. */
2295		kif = s->kif == V_pfi_all ? s->orig_kif : s->kif;
2296
2297		sk = s->key[psk->psk_nat ? PF_SK_STACK : PF_SK_WIRE];
2298		if (s->direction == PF_OUT) {
2299			srcaddr = &sk->addr[1];
2300			dstaddr = &sk->addr[0];
2301			srcport = sk->port[1];
2302			dstport = sk->port[0];
2303		} else {
2304			srcaddr = &sk->addr[0];
2305			dstaddr = &sk->addr[1];
2306			srcport = sk->port[0];
2307			dstport = sk->port[1];
2308		}
2309
2310		if (psk->psk_af && sk->af != psk->psk_af)
2311			continue;
2312
2313		if (psk->psk_proto && psk->psk_proto != sk->proto)
2314			continue;
2315
2316		if (! PF_MATCHA(psk->psk_src.neg, &psk->psk_src.addr.v.a.addr,
2317		    &psk->psk_src.addr.v.a.mask, srcaddr, sk->af))
2318			continue;
2319
2320		if (! PF_MATCHA(psk->psk_dst.neg, &psk->psk_dst.addr.v.a.addr,
2321		    &psk->psk_dst.addr.v.a.mask, dstaddr, sk->af))
2322			continue;
2323
2324		if (!  PF_MATCHA(psk->psk_rt_addr.neg,
2325		    &psk->psk_rt_addr.addr.v.a.addr,
2326		    &psk->psk_rt_addr.addr.v.a.mask,
2327		    &s->rt_addr, sk->af))
2328			continue;
2329
2330		if (psk->psk_src.port_op != 0 &&
2331		    ! pf_match_port(psk->psk_src.port_op,
2332		    psk->psk_src.port[0], psk->psk_src.port[1], srcport))
2333			continue;
2334
2335		if (psk->psk_dst.port_op != 0 &&
2336		    ! pf_match_port(psk->psk_dst.port_op,
2337		    psk->psk_dst.port[0], psk->psk_dst.port[1], dstport))
2338			continue;
2339
2340		if (psk->psk_label[0] &&
2341		    ! pf_label_match(s->rule.ptr, psk->psk_label))
2342			continue;
2343
2344		if (psk->psk_ifname[0] && strcmp(psk->psk_ifname,
2345		    kif->pfik_name))
2346			continue;
2347
2348		if (psk->psk_kill_match) {
2349			/* Create the key to find matching states, with lock
2350			 * held. */
2351
2352			bzero(&match_key, sizeof(match_key));
2353
2354			if (s->direction == PF_OUT) {
2355				dir = PF_IN;
2356				idx = psk->psk_nat ? PF_SK_WIRE : PF_SK_STACK;
2357			} else {
2358				dir = PF_OUT;
2359				idx = psk->psk_nat ? PF_SK_STACK : PF_SK_WIRE;
2360			}
2361
2362			match_key.af = s->key[idx]->af;
2363			match_key.proto = s->key[idx]->proto;
2364			PF_ACPY(&match_key.addr[0],
2365			    &s->key[idx]->addr[1], match_key.af);
2366			match_key.port[0] = s->key[idx]->port[1];
2367			PF_ACPY(&match_key.addr[1],
2368			    &s->key[idx]->addr[0], match_key.af);
2369			match_key.port[1] = s->key[idx]->port[0];
2370		}
2371
2372		pf_unlink_state(s);
2373		killed++;
2374
2375		if (psk->psk_kill_match)
2376			killed += pf_kill_matching_state(&match_key, dir);
2377
2378		goto relock_DIOCKILLSTATES;
2379	}
2380	PF_HASHROW_UNLOCK(ih);
2381
2382	return (killed);
2383}
2384
2385int
2386pf_start(void)
2387{
2388	int error = 0;
2389
2390	sx_xlock(&V_pf_ioctl_lock);
2391	if (V_pf_status.running)
2392		error = EEXIST;
2393	else {
2394		hook_pf();
2395		if (! TAILQ_EMPTY(V_pf_keth->active.rules))
2396			hook_pf_eth();
2397		V_pf_status.running = 1;
2398		V_pf_status.since = time_second;
2399		new_unrhdr64(&V_pf_stateid, time_second);
2400
2401		DPFPRINTF(PF_DEBUG_MISC, ("pf: started\n"));
2402	}
2403	sx_xunlock(&V_pf_ioctl_lock);
2404
2405	return (error);
2406}
2407
2408int
2409pf_stop(void)
2410{
2411	int error = 0;
2412
2413	sx_xlock(&V_pf_ioctl_lock);
2414	if (!V_pf_status.running)
2415		error = ENOENT;
2416	else {
2417		V_pf_status.running = 0;
2418		dehook_pf();
2419		dehook_pf_eth();
2420		V_pf_status.since = time_second;
2421		DPFPRINTF(PF_DEBUG_MISC, ("pf: stopped\n"));
2422	}
2423	sx_xunlock(&V_pf_ioctl_lock);
2424
2425	return (error);
2426}
2427
2428void
2429pf_ioctl_clear_status(void)
2430{
2431	PF_RULES_WLOCK();
2432	for (int i = 0; i < PFRES_MAX; i++)
2433		counter_u64_zero(V_pf_status.counters[i]);
2434	for (int i = 0; i < FCNT_MAX; i++)
2435		pf_counter_u64_zero(&V_pf_status.fcounters[i]);
2436	for (int i = 0; i < SCNT_MAX; i++)
2437		counter_u64_zero(V_pf_status.scounters[i]);
2438	for (int i = 0; i < KLCNT_MAX; i++)
2439		counter_u64_zero(V_pf_status.lcounters[i]);
2440	V_pf_status.since = time_second;
2441	if (*V_pf_status.ifname)
2442		pfi_update_status(V_pf_status.ifname, NULL);
2443	PF_RULES_WUNLOCK();
2444}
2445
2446static int
2447pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td)
2448{
2449	int			 error = 0;
2450	PF_RULES_RLOCK_TRACKER;
2451
2452#define	ERROUT_IOCTL(target, x)					\
2453    do {								\
2454	    error = (x);						\
2455	    SDT_PROBE3(pf, ioctl, ioctl, error, cmd, error, __LINE__);	\
2456	    goto target;						\
2457    } while (0)
2458
2459
2460	/* XXX keep in sync with switch() below */
2461	if (securelevel_gt(td->td_ucred, 2))
2462		switch (cmd) {
2463		case DIOCGETRULES:
2464		case DIOCGETRULENV:
2465		case DIOCGETADDRS:
2466		case DIOCGETADDR:
2467		case DIOCGETSTATE:
2468		case DIOCGETSTATENV:
2469		case DIOCSETSTATUSIF:
2470		case DIOCGETSTATUSNV:
2471		case DIOCCLRSTATUS:
2472		case DIOCNATLOOK:
2473		case DIOCSETDEBUG:
2474#ifdef COMPAT_FREEBSD14
2475		case DIOCGETSTATES:
2476		case DIOCGETSTATESV2:
2477#endif
2478		case DIOCGETTIMEOUT:
2479		case DIOCCLRRULECTRS:
2480		case DIOCGETLIMIT:
2481		case DIOCGETALTQSV0:
2482		case DIOCGETALTQSV1:
2483		case DIOCGETALTQV0:
2484		case DIOCGETALTQV1:
2485		case DIOCGETQSTATSV0:
2486		case DIOCGETQSTATSV1:
2487		case DIOCGETRULESETS:
2488		case DIOCGETRULESET:
2489		case DIOCRGETTABLES:
2490		case DIOCRGETTSTATS:
2491		case DIOCRCLRTSTATS:
2492		case DIOCRCLRADDRS:
2493		case DIOCRADDADDRS:
2494		case DIOCRDELADDRS:
2495		case DIOCRSETADDRS:
2496		case DIOCRGETADDRS:
2497		case DIOCRGETASTATS:
2498		case DIOCRCLRASTATS:
2499		case DIOCRTSTADDRS:
2500		case DIOCOSFPGET:
2501		case DIOCGETSRCNODES:
2502		case DIOCCLRSRCNODES:
2503		case DIOCGETSYNCOOKIES:
2504		case DIOCIGETIFACES:
2505		case DIOCGIFSPEEDV0:
2506		case DIOCGIFSPEEDV1:
2507		case DIOCSETIFFLAG:
2508		case DIOCCLRIFFLAG:
2509		case DIOCGETETHRULES:
2510		case DIOCGETETHRULE:
2511		case DIOCGETETHRULESETS:
2512		case DIOCGETETHRULESET:
2513			break;
2514		case DIOCRCLRTABLES:
2515		case DIOCRADDTABLES:
2516		case DIOCRDELTABLES:
2517		case DIOCRSETTFLAGS:
2518			if (((struct pfioc_table *)addr)->pfrio_flags &
2519			    PFR_FLAG_DUMMY)
2520				break; /* dummy operation ok */
2521			return (EPERM);
2522		default:
2523			return (EPERM);
2524		}
2525
2526	if (!(flags & FWRITE))
2527		switch (cmd) {
2528		case DIOCGETRULES:
2529		case DIOCGETADDRS:
2530		case DIOCGETADDR:
2531		case DIOCGETSTATE:
2532		case DIOCGETSTATENV:
2533		case DIOCGETSTATUSNV:
2534#ifdef COMPAT_FREEBSD14
2535		case DIOCGETSTATES:
2536		case DIOCGETSTATESV2:
2537#endif
2538		case DIOCGETTIMEOUT:
2539		case DIOCGETLIMIT:
2540		case DIOCGETALTQSV0:
2541		case DIOCGETALTQSV1:
2542		case DIOCGETALTQV0:
2543		case DIOCGETALTQV1:
2544		case DIOCGETQSTATSV0:
2545		case DIOCGETQSTATSV1:
2546		case DIOCGETRULESETS:
2547		case DIOCGETRULESET:
2548		case DIOCNATLOOK:
2549		case DIOCRGETTABLES:
2550		case DIOCRGETTSTATS:
2551		case DIOCRGETADDRS:
2552		case DIOCRGETASTATS:
2553		case DIOCRTSTADDRS:
2554		case DIOCOSFPGET:
2555		case DIOCGETSRCNODES:
2556		case DIOCGETSYNCOOKIES:
2557		case DIOCIGETIFACES:
2558		case DIOCGIFSPEEDV1:
2559		case DIOCGIFSPEEDV0:
2560		case DIOCGETRULENV:
2561		case DIOCGETETHRULES:
2562		case DIOCGETETHRULE:
2563		case DIOCGETETHRULESETS:
2564		case DIOCGETETHRULESET:
2565			break;
2566		case DIOCRCLRTABLES:
2567		case DIOCRADDTABLES:
2568		case DIOCRDELTABLES:
2569		case DIOCRCLRTSTATS:
2570		case DIOCRCLRADDRS:
2571		case DIOCRADDADDRS:
2572		case DIOCRDELADDRS:
2573		case DIOCRSETADDRS:
2574		case DIOCRSETTFLAGS:
2575			if (((struct pfioc_table *)addr)->pfrio_flags &
2576			    PFR_FLAG_DUMMY) {
2577				flags |= FWRITE; /* need write lock for dummy */
2578				break; /* dummy operation ok */
2579			}
2580			return (EACCES);
2581		default:
2582			return (EACCES);
2583		}
2584
2585	CURVNET_SET(TD_TO_VNET(td));
2586
2587	switch (cmd) {
2588#ifdef COMPAT_FREEBSD14
2589	case DIOCSTART:
2590		error = pf_start();
2591		break;
2592
2593	case DIOCSTOP:
2594		error = pf_stop();
2595		break;
2596#endif
2597
2598	case DIOCGETETHRULES: {
2599		struct pfioc_nv		*nv = (struct pfioc_nv *)addr;
2600		nvlist_t		*nvl;
2601		void			*packed;
2602		struct pf_keth_rule	*tail;
2603		struct pf_keth_ruleset	*rs;
2604		u_int32_t		 ticket, nr;
2605		const char		*anchor = "";
2606
2607		nvl = NULL;
2608		packed = NULL;
2609
2610#define	ERROUT(x)	ERROUT_IOCTL(DIOCGETETHRULES_error, x)
2611
2612		if (nv->len > pf_ioctl_maxcount)
2613			ERROUT(ENOMEM);
2614
2615		/* Copy the request in */
2616		packed = malloc(nv->len, M_NVLIST, M_WAITOK);
2617		if (packed == NULL)
2618			ERROUT(ENOMEM);
2619
2620		error = copyin(nv->data, packed, nv->len);
2621		if (error)
2622			ERROUT(error);
2623
2624		nvl = nvlist_unpack(packed, nv->len, 0);
2625		if (nvl == NULL)
2626			ERROUT(EBADMSG);
2627
2628		if (! nvlist_exists_string(nvl, "anchor"))
2629			ERROUT(EBADMSG);
2630
2631		anchor = nvlist_get_string(nvl, "anchor");
2632
2633		rs = pf_find_keth_ruleset(anchor);
2634
2635		nvlist_destroy(nvl);
2636		nvl = NULL;
2637		free(packed, M_NVLIST);
2638		packed = NULL;
2639
2640		if (rs == NULL)
2641			ERROUT(ENOENT);
2642
2643		/* Reply */
2644		nvl = nvlist_create(0);
2645		if (nvl == NULL)
2646			ERROUT(ENOMEM);
2647
2648		PF_RULES_RLOCK();
2649
2650		ticket = rs->active.ticket;
2651		tail = TAILQ_LAST(rs->active.rules, pf_keth_ruleq);
2652		if (tail)
2653			nr = tail->nr + 1;
2654		else
2655			nr = 0;
2656
2657		PF_RULES_RUNLOCK();
2658
2659		nvlist_add_number(nvl, "ticket", ticket);
2660		nvlist_add_number(nvl, "nr", nr);
2661
2662		packed = nvlist_pack(nvl, &nv->len);
2663		if (packed == NULL)
2664			ERROUT(ENOMEM);
2665
2666		if (nv->size == 0)
2667			ERROUT(0);
2668		else if (nv->size < nv->len)
2669			ERROUT(ENOSPC);
2670
2671		error = copyout(packed, nv->data, nv->len);
2672
2673#undef ERROUT
2674DIOCGETETHRULES_error:
2675		free(packed, M_NVLIST);
2676		nvlist_destroy(nvl);
2677		break;
2678	}
2679
2680	case DIOCGETETHRULE: {
2681		struct epoch_tracker	 et;
2682		struct pfioc_nv		*nv = (struct pfioc_nv *)addr;
2683		nvlist_t		*nvl = NULL;
2684		void			*nvlpacked = NULL;
2685		struct pf_keth_rule	*rule = NULL;
2686		struct pf_keth_ruleset	*rs;
2687		u_int32_t		 ticket, nr;
2688		bool			 clear = false;
2689		const char		*anchor;
2690
2691#define ERROUT(x)	ERROUT_IOCTL(DIOCGETETHRULE_error, x)
2692
2693		if (nv->len > pf_ioctl_maxcount)
2694			ERROUT(ENOMEM);
2695
2696		nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
2697		if (nvlpacked == NULL)
2698			ERROUT(ENOMEM);
2699
2700		error = copyin(nv->data, nvlpacked, nv->len);
2701		if (error)
2702			ERROUT(error);
2703
2704		nvl = nvlist_unpack(nvlpacked, nv->len, 0);
2705		if (nvl == NULL)
2706			ERROUT(EBADMSG);
2707		if (! nvlist_exists_number(nvl, "ticket"))
2708			ERROUT(EBADMSG);
2709		ticket = nvlist_get_number(nvl, "ticket");
2710		if (! nvlist_exists_string(nvl, "anchor"))
2711			ERROUT(EBADMSG);
2712		anchor = nvlist_get_string(nvl, "anchor");
2713
2714		if (nvlist_exists_bool(nvl, "clear"))
2715			clear = nvlist_get_bool(nvl, "clear");
2716
2717		if (clear && !(flags & FWRITE))
2718			ERROUT(EACCES);
2719
2720		if (! nvlist_exists_number(nvl, "nr"))
2721			ERROUT(EBADMSG);
2722		nr = nvlist_get_number(nvl, "nr");
2723
2724		PF_RULES_RLOCK();
2725		rs = pf_find_keth_ruleset(anchor);
2726		if (rs == NULL) {
2727			PF_RULES_RUNLOCK();
2728			ERROUT(ENOENT);
2729		}
2730		if (ticket != rs->active.ticket) {
2731			PF_RULES_RUNLOCK();
2732			ERROUT(EBUSY);
2733		}
2734
2735		nvlist_destroy(nvl);
2736		nvl = NULL;
2737		free(nvlpacked, M_NVLIST);
2738		nvlpacked = NULL;
2739
2740		rule = TAILQ_FIRST(rs->active.rules);
2741		while ((rule != NULL) && (rule->nr != nr))
2742			rule = TAILQ_NEXT(rule, entries);
2743		if (rule == NULL) {
2744			PF_RULES_RUNLOCK();
2745			ERROUT(ENOENT);
2746		}
2747		/* Make sure rule can't go away. */
2748		NET_EPOCH_ENTER(et);
2749		PF_RULES_RUNLOCK();
2750		nvl = pf_keth_rule_to_nveth_rule(rule);
2751		if (pf_keth_anchor_nvcopyout(rs, rule, nvl))
2752			ERROUT(EBUSY);
2753		NET_EPOCH_EXIT(et);
2754		if (nvl == NULL)
2755			ERROUT(ENOMEM);
2756
2757		nvlpacked = nvlist_pack(nvl, &nv->len);
2758		if (nvlpacked == NULL)
2759			ERROUT(ENOMEM);
2760
2761		if (nv->size == 0)
2762			ERROUT(0);
2763		else if (nv->size < nv->len)
2764			ERROUT(ENOSPC);
2765
2766		error = copyout(nvlpacked, nv->data, nv->len);
2767		if (error == 0 && clear) {
2768			counter_u64_zero(rule->evaluations);
2769			for (int i = 0; i < 2; i++) {
2770				counter_u64_zero(rule->packets[i]);
2771				counter_u64_zero(rule->bytes[i]);
2772			}
2773		}
2774
2775#undef ERROUT
2776DIOCGETETHRULE_error:
2777		free(nvlpacked, M_NVLIST);
2778		nvlist_destroy(nvl);
2779		break;
2780	}
2781
2782	case DIOCADDETHRULE: {
2783		struct pfioc_nv		*nv = (struct pfioc_nv *)addr;
2784		nvlist_t		*nvl = NULL;
2785		void			*nvlpacked = NULL;
2786		struct pf_keth_rule	*rule = NULL, *tail = NULL;
2787		struct pf_keth_ruleset	*ruleset = NULL;
2788		struct pfi_kkif		*kif = NULL, *bridge_to_kif = NULL;
2789		const char		*anchor = "", *anchor_call = "";
2790
2791#define ERROUT(x)	ERROUT_IOCTL(DIOCADDETHRULE_error, x)
2792
2793		if (nv->len > pf_ioctl_maxcount)
2794			ERROUT(ENOMEM);
2795
2796		nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
2797		if (nvlpacked == NULL)
2798			ERROUT(ENOMEM);
2799
2800		error = copyin(nv->data, nvlpacked, nv->len);
2801		if (error)
2802			ERROUT(error);
2803
2804		nvl = nvlist_unpack(nvlpacked, nv->len, 0);
2805		if (nvl == NULL)
2806			ERROUT(EBADMSG);
2807
2808		if (! nvlist_exists_number(nvl, "ticket"))
2809			ERROUT(EBADMSG);
2810
2811		if (nvlist_exists_string(nvl, "anchor"))
2812			anchor = nvlist_get_string(nvl, "anchor");
2813		if (nvlist_exists_string(nvl, "anchor_call"))
2814			anchor_call = nvlist_get_string(nvl, "anchor_call");
2815
2816		ruleset = pf_find_keth_ruleset(anchor);
2817		if (ruleset == NULL)
2818			ERROUT(EINVAL);
2819
2820		if (nvlist_get_number(nvl, "ticket") !=
2821		    ruleset->inactive.ticket) {
2822			DPFPRINTF(PF_DEBUG_MISC,
2823			    ("ticket: %d != %d\n",
2824			    (u_int32_t)nvlist_get_number(nvl, "ticket"),
2825			    ruleset->inactive.ticket));
2826			ERROUT(EBUSY);
2827		}
2828
2829		rule = malloc(sizeof(*rule), M_PFRULE, M_WAITOK);
2830		if (rule == NULL)
2831			ERROUT(ENOMEM);
2832		rule->timestamp = NULL;
2833
2834		error = pf_nveth_rule_to_keth_rule(nvl, rule);
2835		if (error != 0)
2836			ERROUT(error);
2837
2838		if (rule->ifname[0])
2839			kif = pf_kkif_create(M_WAITOK);
2840		if (rule->bridge_to_name[0])
2841			bridge_to_kif = pf_kkif_create(M_WAITOK);
2842		rule->evaluations = counter_u64_alloc(M_WAITOK);
2843		for (int i = 0; i < 2; i++) {
2844			rule->packets[i] = counter_u64_alloc(M_WAITOK);
2845			rule->bytes[i] = counter_u64_alloc(M_WAITOK);
2846		}
2847		rule->timestamp = uma_zalloc_pcpu(pf_timestamp_pcpu_zone,
2848		    M_WAITOK | M_ZERO);
2849
2850		PF_RULES_WLOCK();
2851
2852		if (rule->ifname[0]) {
2853			rule->kif = pfi_kkif_attach(kif, rule->ifname);
2854			pfi_kkif_ref(rule->kif);
2855		} else
2856			rule->kif = NULL;
2857		if (rule->bridge_to_name[0]) {
2858			rule->bridge_to = pfi_kkif_attach(bridge_to_kif,
2859			    rule->bridge_to_name);
2860			pfi_kkif_ref(rule->bridge_to);
2861		} else
2862			rule->bridge_to = NULL;
2863
2864#ifdef ALTQ
2865		/* set queue IDs */
2866		if (rule->qname[0] != 0) {
2867			if ((rule->qid = pf_qname2qid(rule->qname)) == 0)
2868				error = EBUSY;
2869			else
2870				rule->qid = rule->qid;
2871		}
2872#endif
2873		if (rule->tagname[0])
2874			if ((rule->tag = pf_tagname2tag(rule->tagname)) == 0)
2875				error = EBUSY;
2876		if (rule->match_tagname[0])
2877			if ((rule->match_tag = pf_tagname2tag(
2878			    rule->match_tagname)) == 0)
2879				error = EBUSY;
2880
2881		if (error == 0 && rule->ipdst.addr.type == PF_ADDR_TABLE)
2882			error = pf_eth_addr_setup(ruleset, &rule->ipdst.addr);
2883		if (error == 0 && rule->ipsrc.addr.type == PF_ADDR_TABLE)
2884			error = pf_eth_addr_setup(ruleset, &rule->ipsrc.addr);
2885
2886		if (error) {
2887			pf_free_eth_rule(rule);
2888			PF_RULES_WUNLOCK();
2889			ERROUT(error);
2890		}
2891
2892		if (pf_keth_anchor_setup(rule, ruleset, anchor_call)) {
2893			pf_free_eth_rule(rule);
2894			PF_RULES_WUNLOCK();
2895			ERROUT(EINVAL);
2896		}
2897
2898		tail = TAILQ_LAST(ruleset->inactive.rules, pf_keth_ruleq);
2899		if (tail)
2900			rule->nr = tail->nr + 1;
2901		else
2902			rule->nr = 0;
2903
2904		TAILQ_INSERT_TAIL(ruleset->inactive.rules, rule, entries);
2905
2906		PF_RULES_WUNLOCK();
2907
2908#undef ERROUT
2909DIOCADDETHRULE_error:
2910		nvlist_destroy(nvl);
2911		free(nvlpacked, M_NVLIST);
2912		break;
2913	}
2914
2915	case DIOCGETETHRULESETS: {
2916		struct epoch_tracker	 et;
2917		struct pfioc_nv		*nv = (struct pfioc_nv *)addr;
2918		nvlist_t		*nvl = NULL;
2919		void			*nvlpacked = NULL;
2920		struct pf_keth_ruleset	*ruleset;
2921		struct pf_keth_anchor	*anchor;
2922		int			 nr = 0;
2923
2924#define ERROUT(x)	ERROUT_IOCTL(DIOCGETETHRULESETS_error, x)
2925
2926		if (nv->len > pf_ioctl_maxcount)
2927			ERROUT(ENOMEM);
2928
2929		nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
2930		if (nvlpacked == NULL)
2931			ERROUT(ENOMEM);
2932
2933		error = copyin(nv->data, nvlpacked, nv->len);
2934		if (error)
2935			ERROUT(error);
2936
2937		nvl = nvlist_unpack(nvlpacked, nv->len, 0);
2938		if (nvl == NULL)
2939			ERROUT(EBADMSG);
2940		if (! nvlist_exists_string(nvl, "path"))
2941			ERROUT(EBADMSG);
2942
2943		NET_EPOCH_ENTER(et);
2944
2945		if ((ruleset = pf_find_keth_ruleset(
2946		    nvlist_get_string(nvl, "path"))) == NULL) {
2947			NET_EPOCH_EXIT(et);
2948			ERROUT(ENOENT);
2949		}
2950
2951		if (ruleset->anchor == NULL) {
2952			RB_FOREACH(anchor, pf_keth_anchor_global, &V_pf_keth_anchors)
2953				if (anchor->parent == NULL)
2954					nr++;
2955		} else {
2956			RB_FOREACH(anchor, pf_keth_anchor_node,
2957			    &ruleset->anchor->children)
2958				nr++;
2959		}
2960
2961		NET_EPOCH_EXIT(et);
2962
2963		nvlist_destroy(nvl);
2964		nvl = NULL;
2965		free(nvlpacked, M_NVLIST);
2966		nvlpacked = NULL;
2967
2968		nvl = nvlist_create(0);
2969		if (nvl == NULL)
2970			ERROUT(ENOMEM);
2971
2972		nvlist_add_number(nvl, "nr", nr);
2973
2974		nvlpacked = nvlist_pack(nvl, &nv->len);
2975		if (nvlpacked == NULL)
2976			ERROUT(ENOMEM);
2977
2978		if (nv->size == 0)
2979			ERROUT(0);
2980		else if (nv->size < nv->len)
2981			ERROUT(ENOSPC);
2982
2983		error = copyout(nvlpacked, nv->data, nv->len);
2984
2985#undef ERROUT
2986DIOCGETETHRULESETS_error:
2987		free(nvlpacked, M_NVLIST);
2988		nvlist_destroy(nvl);
2989		break;
2990	}
2991
2992	case DIOCGETETHRULESET: {
2993		struct epoch_tracker	 et;
2994		struct pfioc_nv		*nv = (struct pfioc_nv *)addr;
2995		nvlist_t		*nvl = NULL;
2996		void			*nvlpacked = NULL;
2997		struct pf_keth_ruleset	*ruleset;
2998		struct pf_keth_anchor	*anchor;
2999		int			 nr = 0, req_nr = 0;
3000		bool			 found = false;
3001
3002#define ERROUT(x)	ERROUT_IOCTL(DIOCGETETHRULESET_error, x)
3003
3004		if (nv->len > pf_ioctl_maxcount)
3005			ERROUT(ENOMEM);
3006
3007		nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
3008		if (nvlpacked == NULL)
3009			ERROUT(ENOMEM);
3010
3011		error = copyin(nv->data, nvlpacked, nv->len);
3012		if (error)
3013			ERROUT(error);
3014
3015		nvl = nvlist_unpack(nvlpacked, nv->len, 0);
3016		if (nvl == NULL)
3017			ERROUT(EBADMSG);
3018		if (! nvlist_exists_string(nvl, "path"))
3019			ERROUT(EBADMSG);
3020		if (! nvlist_exists_number(nvl, "nr"))
3021			ERROUT(EBADMSG);
3022
3023		req_nr = nvlist_get_number(nvl, "nr");
3024
3025		NET_EPOCH_ENTER(et);
3026
3027		if ((ruleset = pf_find_keth_ruleset(
3028		    nvlist_get_string(nvl, "path"))) == NULL) {
3029			NET_EPOCH_EXIT(et);
3030			ERROUT(ENOENT);
3031		}
3032
3033		nvlist_destroy(nvl);
3034		nvl = NULL;
3035		free(nvlpacked, M_NVLIST);
3036		nvlpacked = NULL;
3037
3038		nvl = nvlist_create(0);
3039		if (nvl == NULL) {
3040			NET_EPOCH_EXIT(et);
3041			ERROUT(ENOMEM);
3042		}
3043
3044		if (ruleset->anchor == NULL) {
3045			RB_FOREACH(anchor, pf_keth_anchor_global,
3046			    &V_pf_keth_anchors) {
3047				if (anchor->parent == NULL && nr++ == req_nr) {
3048					found = true;
3049					break;
3050				}
3051			}
3052		} else {
3053			RB_FOREACH(anchor, pf_keth_anchor_node,
3054			     &ruleset->anchor->children) {
3055				if (nr++ == req_nr) {
3056					found = true;
3057					break;
3058				}
3059			}
3060		}
3061
3062		NET_EPOCH_EXIT(et);
3063		if (found) {
3064			nvlist_add_number(nvl, "nr", nr);
3065			nvlist_add_string(nvl, "name", anchor->name);
3066			if (ruleset->anchor)
3067				nvlist_add_string(nvl, "path",
3068				    ruleset->anchor->path);
3069			else
3070				nvlist_add_string(nvl, "path", "");
3071		} else {
3072			ERROUT(EBUSY);
3073		}
3074
3075		nvlpacked = nvlist_pack(nvl, &nv->len);
3076		if (nvlpacked == NULL)
3077			ERROUT(ENOMEM);
3078
3079		if (nv->size == 0)
3080			ERROUT(0);
3081		else if (nv->size < nv->len)
3082			ERROUT(ENOSPC);
3083
3084		error = copyout(nvlpacked, nv->data, nv->len);
3085
3086#undef ERROUT
3087DIOCGETETHRULESET_error:
3088		free(nvlpacked, M_NVLIST);
3089		nvlist_destroy(nvl);
3090		break;
3091	}
3092
3093	case DIOCADDRULENV: {
3094		struct pfioc_nv	*nv = (struct pfioc_nv *)addr;
3095		nvlist_t	*nvl = NULL;
3096		void		*nvlpacked = NULL;
3097		struct pf_krule	*rule = NULL;
3098		const char	*anchor = "", *anchor_call = "";
3099		uint32_t	 ticket = 0, pool_ticket = 0;
3100
3101#define	ERROUT(x)	ERROUT_IOCTL(DIOCADDRULENV_error, x)
3102
3103		if (nv->len > pf_ioctl_maxcount)
3104			ERROUT(ENOMEM);
3105
3106		nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
3107		error = copyin(nv->data, nvlpacked, nv->len);
3108		if (error)
3109			ERROUT(error);
3110
3111		nvl = nvlist_unpack(nvlpacked, nv->len, 0);
3112		if (nvl == NULL)
3113			ERROUT(EBADMSG);
3114
3115		if (! nvlist_exists_number(nvl, "ticket"))
3116			ERROUT(EINVAL);
3117		ticket = nvlist_get_number(nvl, "ticket");
3118
3119		if (! nvlist_exists_number(nvl, "pool_ticket"))
3120			ERROUT(EINVAL);
3121		pool_ticket = nvlist_get_number(nvl, "pool_ticket");
3122
3123		if (! nvlist_exists_nvlist(nvl, "rule"))
3124			ERROUT(EINVAL);
3125
3126		rule = pf_krule_alloc();
3127		error = pf_nvrule_to_krule(nvlist_get_nvlist(nvl, "rule"),
3128		    rule);
3129		if (error)
3130			ERROUT(error);
3131
3132		if (nvlist_exists_string(nvl, "anchor"))
3133			anchor = nvlist_get_string(nvl, "anchor");
3134		if (nvlist_exists_string(nvl, "anchor_call"))
3135			anchor_call = nvlist_get_string(nvl, "anchor_call");
3136
3137		if ((error = nvlist_error(nvl)))
3138			ERROUT(error);
3139
3140		/* Frees rule on error */
3141		error = pf_ioctl_addrule(rule, ticket, pool_ticket, anchor,
3142		    anchor_call, td->td_ucred->cr_ruid,
3143		    td->td_proc ? td->td_proc->p_pid : 0);
3144
3145		nvlist_destroy(nvl);
3146		free(nvlpacked, M_NVLIST);
3147		break;
3148#undef ERROUT
3149DIOCADDRULENV_error:
3150		pf_krule_free(rule);
3151		nvlist_destroy(nvl);
3152		free(nvlpacked, M_NVLIST);
3153
3154		break;
3155	}
3156	case DIOCADDRULE: {
3157		struct pfioc_rule	*pr = (struct pfioc_rule *)addr;
3158		struct pf_krule		*rule;
3159
3160		rule = pf_krule_alloc();
3161		error = pf_rule_to_krule(&pr->rule, rule);
3162		if (error != 0) {
3163			pf_krule_free(rule);
3164			break;
3165		}
3166
3167		pr->anchor[sizeof(pr->anchor) - 1] = 0;
3168
3169		/* Frees rule on error */
3170		error = pf_ioctl_addrule(rule, pr->ticket, pr->pool_ticket,
3171		    pr->anchor, pr->anchor_call, td->td_ucred->cr_ruid,
3172		    td->td_proc ? td->td_proc->p_pid : 0);
3173		break;
3174	}
3175
3176	case DIOCGETRULES: {
3177		struct pfioc_rule	*pr = (struct pfioc_rule *)addr;
3178
3179		pr->anchor[sizeof(pr->anchor) - 1] = 0;
3180
3181		error = pf_ioctl_getrules(pr);
3182
3183		break;
3184	}
3185
3186	case DIOCGETRULENV: {
3187		struct pfioc_nv		*nv = (struct pfioc_nv *)addr;
3188		nvlist_t		*nvrule = NULL;
3189		nvlist_t		*nvl = NULL;
3190		struct pf_kruleset	*ruleset;
3191		struct pf_krule		*rule;
3192		void			*nvlpacked = NULL;
3193		int			 rs_num, nr;
3194		bool			 clear_counter = false;
3195
3196#define	ERROUT(x)	ERROUT_IOCTL(DIOCGETRULENV_error, x)
3197
3198		if (nv->len > pf_ioctl_maxcount)
3199			ERROUT(ENOMEM);
3200
3201		/* Copy the request in */
3202		nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
3203		if (nvlpacked == NULL)
3204			ERROUT(ENOMEM);
3205
3206		error = copyin(nv->data, nvlpacked, nv->len);
3207		if (error)
3208			ERROUT(error);
3209
3210		nvl = nvlist_unpack(nvlpacked, nv->len, 0);
3211		if (nvl == NULL)
3212			ERROUT(EBADMSG);
3213
3214		if (! nvlist_exists_string(nvl, "anchor"))
3215			ERROUT(EBADMSG);
3216		if (! nvlist_exists_number(nvl, "ruleset"))
3217			ERROUT(EBADMSG);
3218		if (! nvlist_exists_number(nvl, "ticket"))
3219			ERROUT(EBADMSG);
3220		if (! nvlist_exists_number(nvl, "nr"))
3221			ERROUT(EBADMSG);
3222
3223		if (nvlist_exists_bool(nvl, "clear_counter"))
3224			clear_counter = nvlist_get_bool(nvl, "clear_counter");
3225
3226		if (clear_counter && !(flags & FWRITE))
3227			ERROUT(EACCES);
3228
3229		nr = nvlist_get_number(nvl, "nr");
3230
3231		PF_RULES_WLOCK();
3232		ruleset = pf_find_kruleset(nvlist_get_string(nvl, "anchor"));
3233		if (ruleset == NULL) {
3234			PF_RULES_WUNLOCK();
3235			ERROUT(ENOENT);
3236		}
3237
3238		rs_num = pf_get_ruleset_number(nvlist_get_number(nvl, "ruleset"));
3239		if (rs_num >= PF_RULESET_MAX) {
3240			PF_RULES_WUNLOCK();
3241			ERROUT(EINVAL);
3242		}
3243
3244		if (nvlist_get_number(nvl, "ticket") !=
3245		    ruleset->rules[rs_num].active.ticket) {
3246			PF_RULES_WUNLOCK();
3247			ERROUT(EBUSY);
3248		}
3249
3250		if ((error = nvlist_error(nvl))) {
3251			PF_RULES_WUNLOCK();
3252			ERROUT(error);
3253		}
3254
3255		rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr);
3256		while ((rule != NULL) && (rule->nr != nr))
3257			rule = TAILQ_NEXT(rule, entries);
3258		if (rule == NULL) {
3259			PF_RULES_WUNLOCK();
3260			ERROUT(EBUSY);
3261		}
3262
3263		nvrule = pf_krule_to_nvrule(rule);
3264
3265		nvlist_destroy(nvl);
3266		nvl = nvlist_create(0);
3267		if (nvl == NULL) {
3268			PF_RULES_WUNLOCK();
3269			ERROUT(ENOMEM);
3270		}
3271		nvlist_add_number(nvl, "nr", nr);
3272		nvlist_add_nvlist(nvl, "rule", nvrule);
3273		nvlist_destroy(nvrule);
3274		nvrule = NULL;
3275		if (pf_kanchor_nvcopyout(ruleset, rule, nvl)) {
3276			PF_RULES_WUNLOCK();
3277			ERROUT(EBUSY);
3278		}
3279
3280		free(nvlpacked, M_NVLIST);
3281		nvlpacked = nvlist_pack(nvl, &nv->len);
3282		if (nvlpacked == NULL) {
3283			PF_RULES_WUNLOCK();
3284			ERROUT(ENOMEM);
3285		}
3286
3287		if (nv->size == 0) {
3288			PF_RULES_WUNLOCK();
3289			ERROUT(0);
3290		}
3291		else if (nv->size < nv->len) {
3292			PF_RULES_WUNLOCK();
3293			ERROUT(ENOSPC);
3294		}
3295
3296		if (clear_counter)
3297			pf_krule_clear_counters(rule);
3298
3299		PF_RULES_WUNLOCK();
3300
3301		error = copyout(nvlpacked, nv->data, nv->len);
3302
3303#undef ERROUT
3304DIOCGETRULENV_error:
3305		free(nvlpacked, M_NVLIST);
3306		nvlist_destroy(nvrule);
3307		nvlist_destroy(nvl);
3308
3309		break;
3310	}
3311
3312	case DIOCCHANGERULE: {
3313		struct pfioc_rule	*pcr = (struct pfioc_rule *)addr;
3314		struct pf_kruleset	*ruleset;
3315		struct pf_krule		*oldrule = NULL, *newrule = NULL;
3316		struct pfi_kkif		*kif = NULL;
3317		struct pf_kpooladdr	*pa;
3318		u_int32_t		 nr = 0;
3319		int			 rs_num;
3320
3321		pcr->anchor[sizeof(pcr->anchor) - 1] = 0;
3322
3323		if (pcr->action < PF_CHANGE_ADD_HEAD ||
3324		    pcr->action > PF_CHANGE_GET_TICKET) {
3325			error = EINVAL;
3326			break;
3327		}
3328		if (pcr->rule.return_icmp >> 8 > ICMP_MAXTYPE) {
3329			error = EINVAL;
3330			break;
3331		}
3332
3333		if (pcr->action != PF_CHANGE_REMOVE) {
3334			newrule = pf_krule_alloc();
3335			error = pf_rule_to_krule(&pcr->rule, newrule);
3336			if (error != 0) {
3337				pf_krule_free(newrule);
3338				break;
3339			}
3340
3341			if (newrule->ifname[0])
3342				kif = pf_kkif_create(M_WAITOK);
3343			pf_counter_u64_init(&newrule->evaluations, M_WAITOK);
3344			for (int i = 0; i < 2; i++) {
3345				pf_counter_u64_init(&newrule->packets[i], M_WAITOK);
3346				pf_counter_u64_init(&newrule->bytes[i], M_WAITOK);
3347			}
3348			newrule->states_cur = counter_u64_alloc(M_WAITOK);
3349			newrule->states_tot = counter_u64_alloc(M_WAITOK);
3350			newrule->src_nodes = counter_u64_alloc(M_WAITOK);
3351			newrule->cuid = td->td_ucred->cr_ruid;
3352			newrule->cpid = td->td_proc ? td->td_proc->p_pid : 0;
3353			TAILQ_INIT(&newrule->rpool.list);
3354		}
3355#define	ERROUT(x)	ERROUT_IOCTL(DIOCCHANGERULE_error, x)
3356
3357		PF_CONFIG_LOCK();
3358		PF_RULES_WLOCK();
3359#ifdef PF_WANT_32_TO_64_COUNTER
3360		if (newrule != NULL) {
3361			LIST_INSERT_HEAD(&V_pf_allrulelist, newrule, allrulelist);
3362			newrule->allrulelinked = true;
3363			V_pf_allrulecount++;
3364		}
3365#endif
3366
3367		if (!(pcr->action == PF_CHANGE_REMOVE ||
3368		    pcr->action == PF_CHANGE_GET_TICKET) &&
3369		    pcr->pool_ticket != V_ticket_pabuf)
3370			ERROUT(EBUSY);
3371
3372		ruleset = pf_find_kruleset(pcr->anchor);
3373		if (ruleset == NULL)
3374			ERROUT(EINVAL);
3375
3376		rs_num = pf_get_ruleset_number(pcr->rule.action);
3377		if (rs_num >= PF_RULESET_MAX)
3378			ERROUT(EINVAL);
3379
3380		/*
3381		 * XXXMJG: there is no guarantee that the ruleset was
3382		 * created by the usual route of calling DIOCXBEGIN.
3383		 * As a result it is possible the rule tree will not
3384		 * be allocated yet. Hack around it by doing it here.
3385		 * Note it is fine to let the tree persist in case of
3386		 * error as it will be freed down the road on future
3387		 * updates (if need be).
3388		 */
3389		if (ruleset->rules[rs_num].active.tree == NULL) {
3390			ruleset->rules[rs_num].active.tree = pf_rule_tree_alloc(M_NOWAIT);
3391			if (ruleset->rules[rs_num].active.tree == NULL) {
3392				ERROUT(ENOMEM);
3393			}
3394		}
3395
3396		if (pcr->action == PF_CHANGE_GET_TICKET) {
3397			pcr->ticket = ++ruleset->rules[rs_num].active.ticket;
3398			ERROUT(0);
3399		} else if (pcr->ticket !=
3400			    ruleset->rules[rs_num].active.ticket)
3401				ERROUT(EINVAL);
3402
3403		if (pcr->action != PF_CHANGE_REMOVE) {
3404			if (newrule->ifname[0]) {
3405				newrule->kif = pfi_kkif_attach(kif,
3406				    newrule->ifname);
3407				kif = NULL;
3408				pfi_kkif_ref(newrule->kif);
3409			} else
3410				newrule->kif = NULL;
3411
3412			if (newrule->rtableid > 0 &&
3413			    newrule->rtableid >= rt_numfibs)
3414				error = EBUSY;
3415
3416#ifdef ALTQ
3417			/* set queue IDs */
3418			if (newrule->qname[0] != 0) {
3419				if ((newrule->qid =
3420				    pf_qname2qid(newrule->qname)) == 0)
3421					error = EBUSY;
3422				else if (newrule->pqname[0] != 0) {
3423					if ((newrule->pqid =
3424					    pf_qname2qid(newrule->pqname)) == 0)
3425						error = EBUSY;
3426				} else
3427					newrule->pqid = newrule->qid;
3428			}
3429#endif /* ALTQ */
3430			if (newrule->tagname[0])
3431				if ((newrule->tag =
3432				    pf_tagname2tag(newrule->tagname)) == 0)
3433					error = EBUSY;
3434			if (newrule->match_tagname[0])
3435				if ((newrule->match_tag = pf_tagname2tag(
3436				    newrule->match_tagname)) == 0)
3437					error = EBUSY;
3438			if (newrule->rt && !newrule->direction)
3439				error = EINVAL;
3440			if (!newrule->log)
3441				newrule->logif = 0;
3442			if (newrule->logif >= PFLOGIFS_MAX)
3443				error = EINVAL;
3444			if (pf_addr_setup(ruleset, &newrule->src.addr, newrule->af))
3445				error = ENOMEM;
3446			if (pf_addr_setup(ruleset, &newrule->dst.addr, newrule->af))
3447				error = ENOMEM;
3448			if (pf_kanchor_setup(newrule, ruleset, pcr->anchor_call))
3449				error = EINVAL;
3450			TAILQ_FOREACH(pa, &V_pf_pabuf, entries)
3451				if (pa->addr.type == PF_ADDR_TABLE) {
3452					pa->addr.p.tbl =
3453					    pfr_attach_table(ruleset,
3454					    pa->addr.v.tblname);
3455					if (pa->addr.p.tbl == NULL)
3456						error = ENOMEM;
3457				}
3458
3459			newrule->overload_tbl = NULL;
3460			if (newrule->overload_tblname[0]) {
3461				if ((newrule->overload_tbl = pfr_attach_table(
3462				    ruleset, newrule->overload_tblname)) ==
3463				    NULL)
3464					error = EINVAL;
3465				else
3466					newrule->overload_tbl->pfrkt_flags |=
3467					    PFR_TFLAG_ACTIVE;
3468			}
3469
3470			pf_mv_kpool(&V_pf_pabuf, &newrule->rpool.list);
3471			if (((((newrule->action == PF_NAT) ||
3472			    (newrule->action == PF_RDR) ||
3473			    (newrule->action == PF_BINAT) ||
3474			    (newrule->rt > PF_NOPFROUTE)) &&
3475			    !newrule->anchor)) &&
3476			    (TAILQ_FIRST(&newrule->rpool.list) == NULL))
3477				error = EINVAL;
3478
3479			if (error) {
3480				pf_free_rule(newrule);
3481				PF_RULES_WUNLOCK();
3482				PF_CONFIG_UNLOCK();
3483				break;
3484			}
3485
3486			newrule->rpool.cur = TAILQ_FIRST(&newrule->rpool.list);
3487		}
3488		pf_empty_kpool(&V_pf_pabuf);
3489
3490		if (pcr->action == PF_CHANGE_ADD_HEAD)
3491			oldrule = TAILQ_FIRST(
3492			    ruleset->rules[rs_num].active.ptr);
3493		else if (pcr->action == PF_CHANGE_ADD_TAIL)
3494			oldrule = TAILQ_LAST(
3495			    ruleset->rules[rs_num].active.ptr, pf_krulequeue);
3496		else {
3497			oldrule = TAILQ_FIRST(
3498			    ruleset->rules[rs_num].active.ptr);
3499			while ((oldrule != NULL) && (oldrule->nr != pcr->nr))
3500				oldrule = TAILQ_NEXT(oldrule, entries);
3501			if (oldrule == NULL) {
3502				if (newrule != NULL)
3503					pf_free_rule(newrule);
3504				PF_RULES_WUNLOCK();
3505				PF_CONFIG_UNLOCK();
3506				error = EINVAL;
3507				break;
3508			}
3509		}
3510
3511		if (pcr->action == PF_CHANGE_REMOVE) {
3512			pf_unlink_rule(ruleset->rules[rs_num].active.ptr,
3513			    oldrule);
3514			RB_REMOVE(pf_krule_global,
3515			    ruleset->rules[rs_num].active.tree, oldrule);
3516			ruleset->rules[rs_num].active.rcount--;
3517		} else {
3518			pf_hash_rule(newrule);
3519			if (RB_INSERT(pf_krule_global,
3520			    ruleset->rules[rs_num].active.tree, newrule) != NULL) {
3521				pf_free_rule(newrule);
3522				PF_RULES_WUNLOCK();
3523				PF_CONFIG_UNLOCK();
3524				error = EEXIST;
3525				break;
3526			}
3527
3528			if (oldrule == NULL)
3529				TAILQ_INSERT_TAIL(
3530				    ruleset->rules[rs_num].active.ptr,
3531				    newrule, entries);
3532			else if (pcr->action == PF_CHANGE_ADD_HEAD ||
3533			    pcr->action == PF_CHANGE_ADD_BEFORE)
3534				TAILQ_INSERT_BEFORE(oldrule, newrule, entries);
3535			else
3536				TAILQ_INSERT_AFTER(
3537				    ruleset->rules[rs_num].active.ptr,
3538				    oldrule, newrule, entries);
3539			ruleset->rules[rs_num].active.rcount++;
3540		}
3541
3542		nr = 0;
3543		TAILQ_FOREACH(oldrule,
3544		    ruleset->rules[rs_num].active.ptr, entries)
3545			oldrule->nr = nr++;
3546
3547		ruleset->rules[rs_num].active.ticket++;
3548
3549		pf_calc_skip_steps(ruleset->rules[rs_num].active.ptr);
3550		pf_remove_if_empty_kruleset(ruleset);
3551
3552		PF_RULES_WUNLOCK();
3553		PF_CONFIG_UNLOCK();
3554		break;
3555
3556#undef ERROUT
3557DIOCCHANGERULE_error:
3558		PF_RULES_WUNLOCK();
3559		PF_CONFIG_UNLOCK();
3560		pf_krule_free(newrule);
3561		pf_kkif_free(kif);
3562		break;
3563	}
3564
3565	case DIOCCLRSTATESNV: {
3566		error = pf_clearstates_nv((struct pfioc_nv *)addr);
3567		break;
3568	}
3569
3570	case DIOCKILLSTATESNV: {
3571		error = pf_killstates_nv((struct pfioc_nv *)addr);
3572		break;
3573	}
3574
3575	case DIOCADDSTATE: {
3576		struct pfioc_state		*ps = (struct pfioc_state *)addr;
3577		struct pfsync_state_1301	*sp = &ps->state;
3578
3579		if (sp->timeout >= PFTM_MAX) {
3580			error = EINVAL;
3581			break;
3582		}
3583		if (V_pfsync_state_import_ptr != NULL) {
3584			PF_RULES_RLOCK();
3585			error = V_pfsync_state_import_ptr(
3586			    (union pfsync_state_union *)sp, PFSYNC_SI_IOCTL,
3587			    PFSYNC_MSG_VERSION_1301);
3588			PF_RULES_RUNLOCK();
3589		} else
3590			error = EOPNOTSUPP;
3591		break;
3592	}
3593
3594	case DIOCGETSTATE: {
3595		struct pfioc_state	*ps = (struct pfioc_state *)addr;
3596		struct pf_kstate	*s;
3597
3598		s = pf_find_state_byid(ps->state.id, ps->state.creatorid);
3599		if (s == NULL) {
3600			error = ENOENT;
3601			break;
3602		}
3603
3604		pfsync_state_export((union pfsync_state_union*)&ps->state,
3605		    s, PFSYNC_MSG_VERSION_1301);
3606		PF_STATE_UNLOCK(s);
3607		break;
3608	}
3609
3610	case DIOCGETSTATENV: {
3611		error = pf_getstate((struct pfioc_nv *)addr);
3612		break;
3613	}
3614
3615#ifdef COMPAT_FREEBSD14
3616	case DIOCGETSTATES: {
3617		struct pfioc_states	*ps = (struct pfioc_states *)addr;
3618		struct pf_kstate	*s;
3619		struct pfsync_state_1301	*pstore, *p;
3620		int			 i, nr;
3621		size_t			 slice_count = 16, count;
3622		void			*out;
3623
3624		if (ps->ps_len <= 0) {
3625			nr = uma_zone_get_cur(V_pf_state_z);
3626			ps->ps_len = sizeof(struct pfsync_state_1301) * nr;
3627			break;
3628		}
3629
3630		out = ps->ps_states;
3631		pstore = mallocarray(slice_count,
3632		    sizeof(struct pfsync_state_1301), M_TEMP, M_WAITOK | M_ZERO);
3633		nr = 0;
3634
3635		for (i = 0; i <= pf_hashmask; i++) {
3636			struct pf_idhash *ih = &V_pf_idhash[i];
3637
3638DIOCGETSTATES_retry:
3639			p = pstore;
3640
3641			if (LIST_EMPTY(&ih->states))
3642				continue;
3643
3644			PF_HASHROW_LOCK(ih);
3645			count = 0;
3646			LIST_FOREACH(s, &ih->states, entry) {
3647				if (s->timeout == PFTM_UNLINKED)
3648					continue;
3649				count++;
3650			}
3651
3652			if (count > slice_count) {
3653				PF_HASHROW_UNLOCK(ih);
3654				free(pstore, M_TEMP);
3655				slice_count = count * 2;
3656				pstore = mallocarray(slice_count,
3657				    sizeof(struct pfsync_state_1301), M_TEMP,
3658				    M_WAITOK | M_ZERO);
3659				goto DIOCGETSTATES_retry;
3660			}
3661
3662			if ((nr+count) * sizeof(*p) > ps->ps_len) {
3663				PF_HASHROW_UNLOCK(ih);
3664				goto DIOCGETSTATES_full;
3665			}
3666
3667			LIST_FOREACH(s, &ih->states, entry) {
3668				if (s->timeout == PFTM_UNLINKED)
3669					continue;
3670
3671				pfsync_state_export((union pfsync_state_union*)p,
3672				    s, PFSYNC_MSG_VERSION_1301);
3673				p++;
3674				nr++;
3675			}
3676			PF_HASHROW_UNLOCK(ih);
3677			error = copyout(pstore, out,
3678			    sizeof(struct pfsync_state_1301) * count);
3679			if (error)
3680				break;
3681			out = ps->ps_states + nr;
3682		}
3683DIOCGETSTATES_full:
3684		ps->ps_len = sizeof(struct pfsync_state_1301) * nr;
3685		free(pstore, M_TEMP);
3686
3687		break;
3688	}
3689
3690	case DIOCGETSTATESV2: {
3691		struct pfioc_states_v2	*ps = (struct pfioc_states_v2 *)addr;
3692		struct pf_kstate	*s;
3693		struct pf_state_export	*pstore, *p;
3694		int i, nr;
3695		size_t slice_count = 16, count;
3696		void *out;
3697
3698		if (ps->ps_req_version > PF_STATE_VERSION) {
3699			error = ENOTSUP;
3700			break;
3701		}
3702
3703		if (ps->ps_len <= 0) {
3704			nr = uma_zone_get_cur(V_pf_state_z);
3705			ps->ps_len = sizeof(struct pf_state_export) * nr;
3706			break;
3707		}
3708
3709		out = ps->ps_states;
3710		pstore = mallocarray(slice_count,
3711		    sizeof(struct pf_state_export), M_TEMP, M_WAITOK | M_ZERO);
3712		nr = 0;
3713
3714		for (i = 0; i <= pf_hashmask; i++) {
3715			struct pf_idhash *ih = &V_pf_idhash[i];
3716
3717DIOCGETSTATESV2_retry:
3718			p = pstore;
3719
3720			if (LIST_EMPTY(&ih->states))
3721				continue;
3722
3723			PF_HASHROW_LOCK(ih);
3724			count = 0;
3725			LIST_FOREACH(s, &ih->states, entry) {
3726				if (s->timeout == PFTM_UNLINKED)
3727					continue;
3728				count++;
3729			}
3730
3731			if (count > slice_count) {
3732				PF_HASHROW_UNLOCK(ih);
3733				free(pstore, M_TEMP);
3734				slice_count = count * 2;
3735				pstore = mallocarray(slice_count,
3736				    sizeof(struct pf_state_export), M_TEMP,
3737				    M_WAITOK | M_ZERO);
3738				goto DIOCGETSTATESV2_retry;
3739			}
3740
3741			if ((nr+count) * sizeof(*p) > ps->ps_len) {
3742				PF_HASHROW_UNLOCK(ih);
3743				goto DIOCGETSTATESV2_full;
3744			}
3745
3746			LIST_FOREACH(s, &ih->states, entry) {
3747				if (s->timeout == PFTM_UNLINKED)
3748					continue;
3749
3750				pf_state_export(p, s);
3751				p++;
3752				nr++;
3753			}
3754			PF_HASHROW_UNLOCK(ih);
3755			error = copyout(pstore, out,
3756			    sizeof(struct pf_state_export) * count);
3757			if (error)
3758				break;
3759			out = ps->ps_states + nr;
3760		}
3761DIOCGETSTATESV2_full:
3762		ps->ps_len = nr * sizeof(struct pf_state_export);
3763		free(pstore, M_TEMP);
3764
3765		break;
3766	}
3767#endif
3768	case DIOCGETSTATUSNV: {
3769		error = pf_getstatus((struct pfioc_nv *)addr);
3770		break;
3771	}
3772
3773	case DIOCSETSTATUSIF: {
3774		struct pfioc_if	*pi = (struct pfioc_if *)addr;
3775
3776		if (pi->ifname[0] == 0) {
3777			bzero(V_pf_status.ifname, IFNAMSIZ);
3778			break;
3779		}
3780		PF_RULES_WLOCK();
3781		error = pf_user_strcpy(V_pf_status.ifname, pi->ifname, IFNAMSIZ);
3782		PF_RULES_WUNLOCK();
3783		break;
3784	}
3785
3786	case DIOCCLRSTATUS: {
3787		pf_ioctl_clear_status();
3788		break;
3789	}
3790
3791	case DIOCNATLOOK: {
3792		struct pfioc_natlook	*pnl = (struct pfioc_natlook *)addr;
3793		struct pf_state_key	*sk;
3794		struct pf_kstate	*state;
3795		struct pf_state_key_cmp	 key;
3796		int			 m = 0, direction = pnl->direction;
3797		int			 sidx, didx;
3798
3799		/* NATLOOK src and dst are reversed, so reverse sidx/didx */
3800		sidx = (direction == PF_IN) ? 1 : 0;
3801		didx = (direction == PF_IN) ? 0 : 1;
3802
3803		if (!pnl->proto ||
3804		    PF_AZERO(&pnl->saddr, pnl->af) ||
3805		    PF_AZERO(&pnl->daddr, pnl->af) ||
3806		    ((pnl->proto == IPPROTO_TCP ||
3807		    pnl->proto == IPPROTO_UDP) &&
3808		    (!pnl->dport || !pnl->sport)))
3809			error = EINVAL;
3810		else {
3811			bzero(&key, sizeof(key));
3812			key.af = pnl->af;
3813			key.proto = pnl->proto;
3814			PF_ACPY(&key.addr[sidx], &pnl->saddr, pnl->af);
3815			key.port[sidx] = pnl->sport;
3816			PF_ACPY(&key.addr[didx], &pnl->daddr, pnl->af);
3817			key.port[didx] = pnl->dport;
3818
3819			state = pf_find_state_all(&key, direction, &m);
3820			if (state == NULL) {
3821				error = ENOENT;
3822			} else {
3823				if (m > 1) {
3824					PF_STATE_UNLOCK(state);
3825					error = E2BIG;	/* more than one state */
3826				} else {
3827					sk = state->key[sidx];
3828					PF_ACPY(&pnl->rsaddr, &sk->addr[sidx], sk->af);
3829					pnl->rsport = sk->port[sidx];
3830					PF_ACPY(&pnl->rdaddr, &sk->addr[didx], sk->af);
3831					pnl->rdport = sk->port[didx];
3832					PF_STATE_UNLOCK(state);
3833				}
3834			}
3835		}
3836		break;
3837	}
3838
3839	case DIOCSETTIMEOUT: {
3840		struct pfioc_tm	*pt = (struct pfioc_tm *)addr;
3841		int		 old;
3842
3843		if (pt->timeout < 0 || pt->timeout >= PFTM_MAX ||
3844		    pt->seconds < 0) {
3845			error = EINVAL;
3846			break;
3847		}
3848		PF_RULES_WLOCK();
3849		old = V_pf_default_rule.timeout[pt->timeout];
3850		if (pt->timeout == PFTM_INTERVAL && pt->seconds == 0)
3851			pt->seconds = 1;
3852		V_pf_default_rule.timeout[pt->timeout] = pt->seconds;
3853		if (pt->timeout == PFTM_INTERVAL && pt->seconds < old)
3854			wakeup(pf_purge_thread);
3855		pt->seconds = old;
3856		PF_RULES_WUNLOCK();
3857		break;
3858	}
3859
3860	case DIOCGETTIMEOUT: {
3861		struct pfioc_tm	*pt = (struct pfioc_tm *)addr;
3862
3863		if (pt->timeout < 0 || pt->timeout >= PFTM_MAX) {
3864			error = EINVAL;
3865			break;
3866		}
3867		PF_RULES_RLOCK();
3868		pt->seconds = V_pf_default_rule.timeout[pt->timeout];
3869		PF_RULES_RUNLOCK();
3870		break;
3871	}
3872
3873	case DIOCGETLIMIT: {
3874		struct pfioc_limit	*pl = (struct pfioc_limit *)addr;
3875
3876		if (pl->index < 0 || pl->index >= PF_LIMIT_MAX) {
3877			error = EINVAL;
3878			break;
3879		}
3880		PF_RULES_RLOCK();
3881		pl->limit = V_pf_limits[pl->index].limit;
3882		PF_RULES_RUNLOCK();
3883		break;
3884	}
3885
3886	case DIOCSETLIMIT: {
3887		struct pfioc_limit	*pl = (struct pfioc_limit *)addr;
3888		int			 old_limit;
3889
3890		PF_RULES_WLOCK();
3891		if (pl->index < 0 || pl->index >= PF_LIMIT_MAX ||
3892		    V_pf_limits[pl->index].zone == NULL) {
3893			PF_RULES_WUNLOCK();
3894			error = EINVAL;
3895			break;
3896		}
3897		uma_zone_set_max(V_pf_limits[pl->index].zone, pl->limit);
3898		old_limit = V_pf_limits[pl->index].limit;
3899		V_pf_limits[pl->index].limit = pl->limit;
3900		pl->limit = old_limit;
3901		PF_RULES_WUNLOCK();
3902		break;
3903	}
3904
3905	case DIOCSETDEBUG: {
3906		u_int32_t	*level = (u_int32_t *)addr;
3907
3908		PF_RULES_WLOCK();
3909		V_pf_status.debug = *level;
3910		PF_RULES_WUNLOCK();
3911		break;
3912	}
3913
3914	case DIOCCLRRULECTRS: {
3915		/* obsoleted by DIOCGETRULE with action=PF_GET_CLR_CNTR */
3916		struct pf_kruleset	*ruleset = &pf_main_ruleset;
3917		struct pf_krule		*rule;
3918
3919		PF_RULES_WLOCK();
3920		TAILQ_FOREACH(rule,
3921		    ruleset->rules[PF_RULESET_FILTER].active.ptr, entries) {
3922			pf_counter_u64_zero(&rule->evaluations);
3923			for (int i = 0; i < 2; i++) {
3924				pf_counter_u64_zero(&rule->packets[i]);
3925				pf_counter_u64_zero(&rule->bytes[i]);
3926			}
3927		}
3928		PF_RULES_WUNLOCK();
3929		break;
3930	}
3931
3932	case DIOCGIFSPEEDV0:
3933	case DIOCGIFSPEEDV1: {
3934		struct pf_ifspeed_v1	*psp = (struct pf_ifspeed_v1 *)addr;
3935		struct pf_ifspeed_v1	ps;
3936		struct ifnet		*ifp;
3937
3938		if (psp->ifname[0] == '\0') {
3939			error = EINVAL;
3940			break;
3941		}
3942
3943		error = pf_user_strcpy(ps.ifname, psp->ifname, IFNAMSIZ);
3944		if (error != 0)
3945			break;
3946		ifp = ifunit(ps.ifname);
3947		if (ifp != NULL) {
3948			psp->baudrate32 =
3949			    (u_int32_t)uqmin(ifp->if_baudrate, UINT_MAX);
3950			if (cmd == DIOCGIFSPEEDV1)
3951				psp->baudrate = ifp->if_baudrate;
3952		} else {
3953			error = EINVAL;
3954		}
3955		break;
3956	}
3957
3958#ifdef ALTQ
3959	case DIOCSTARTALTQ: {
3960		struct pf_altq		*altq;
3961
3962		PF_RULES_WLOCK();
3963		/* enable all altq interfaces on active list */
3964		TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) {
3965			if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
3966				error = pf_enable_altq(altq);
3967				if (error != 0)
3968					break;
3969			}
3970		}
3971		if (error == 0)
3972			V_pf_altq_running = 1;
3973		PF_RULES_WUNLOCK();
3974		DPFPRINTF(PF_DEBUG_MISC, ("altq: started\n"));
3975		break;
3976	}
3977
3978	case DIOCSTOPALTQ: {
3979		struct pf_altq		*altq;
3980
3981		PF_RULES_WLOCK();
3982		/* disable all altq interfaces on active list */
3983		TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) {
3984			if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
3985				error = pf_disable_altq(altq);
3986				if (error != 0)
3987					break;
3988			}
3989		}
3990		if (error == 0)
3991			V_pf_altq_running = 0;
3992		PF_RULES_WUNLOCK();
3993		DPFPRINTF(PF_DEBUG_MISC, ("altq: stopped\n"));
3994		break;
3995	}
3996
3997	case DIOCADDALTQV0:
3998	case DIOCADDALTQV1: {
3999		struct pfioc_altq_v1	*pa = (struct pfioc_altq_v1 *)addr;
4000		struct pf_altq		*altq, *a;
4001		struct ifnet		*ifp;
4002
4003		altq = malloc(sizeof(*altq), M_PFALTQ, M_WAITOK | M_ZERO);
4004		error = pf_import_kaltq(pa, altq, IOCPARM_LEN(cmd));
4005		if (error)
4006			break;
4007		altq->local_flags = 0;
4008
4009		PF_RULES_WLOCK();
4010		if (pa->ticket != V_ticket_altqs_inactive) {
4011			PF_RULES_WUNLOCK();
4012			free(altq, M_PFALTQ);
4013			error = EBUSY;
4014			break;
4015		}
4016
4017		/*
4018		 * if this is for a queue, find the discipline and
4019		 * copy the necessary fields
4020		 */
4021		if (altq->qname[0] != 0) {
4022			if ((altq->qid = pf_qname2qid(altq->qname)) == 0) {
4023				PF_RULES_WUNLOCK();
4024				error = EBUSY;
4025				free(altq, M_PFALTQ);
4026				break;
4027			}
4028			altq->altq_disc = NULL;
4029			TAILQ_FOREACH(a, V_pf_altq_ifs_inactive, entries) {
4030				if (strncmp(a->ifname, altq->ifname,
4031				    IFNAMSIZ) == 0) {
4032					altq->altq_disc = a->altq_disc;
4033					break;
4034				}
4035			}
4036		}
4037
4038		if ((ifp = ifunit(altq->ifname)) == NULL)
4039			altq->local_flags |= PFALTQ_FLAG_IF_REMOVED;
4040		else
4041			error = altq_add(ifp, altq);
4042
4043		if (error) {
4044			PF_RULES_WUNLOCK();
4045			free(altq, M_PFALTQ);
4046			break;
4047		}
4048
4049		if (altq->qname[0] != 0)
4050			TAILQ_INSERT_TAIL(V_pf_altqs_inactive, altq, entries);
4051		else
4052			TAILQ_INSERT_TAIL(V_pf_altq_ifs_inactive, altq, entries);
4053		/* version error check done on import above */
4054		pf_export_kaltq(altq, pa, IOCPARM_LEN(cmd));
4055		PF_RULES_WUNLOCK();
4056		break;
4057	}
4058
4059	case DIOCGETALTQSV0:
4060	case DIOCGETALTQSV1: {
4061		struct pfioc_altq_v1	*pa = (struct pfioc_altq_v1 *)addr;
4062		struct pf_altq		*altq;
4063
4064		PF_RULES_RLOCK();
4065		pa->nr = 0;
4066		TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries)
4067			pa->nr++;
4068		TAILQ_FOREACH(altq, V_pf_altqs_active, entries)
4069			pa->nr++;
4070		pa->ticket = V_ticket_altqs_active;
4071		PF_RULES_RUNLOCK();
4072		break;
4073	}
4074
4075	case DIOCGETALTQV0:
4076	case DIOCGETALTQV1: {
4077		struct pfioc_altq_v1	*pa = (struct pfioc_altq_v1 *)addr;
4078		struct pf_altq		*altq;
4079
4080		PF_RULES_RLOCK();
4081		if (pa->ticket != V_ticket_altqs_active) {
4082			PF_RULES_RUNLOCK();
4083			error = EBUSY;
4084			break;
4085		}
4086		altq = pf_altq_get_nth_active(pa->nr);
4087		if (altq == NULL) {
4088			PF_RULES_RUNLOCK();
4089			error = EBUSY;
4090			break;
4091		}
4092		pf_export_kaltq(altq, pa, IOCPARM_LEN(cmd));
4093		PF_RULES_RUNLOCK();
4094		break;
4095	}
4096
4097	case DIOCCHANGEALTQV0:
4098	case DIOCCHANGEALTQV1:
4099		/* CHANGEALTQ not supported yet! */
4100		error = ENODEV;
4101		break;
4102
4103	case DIOCGETQSTATSV0:
4104	case DIOCGETQSTATSV1: {
4105		struct pfioc_qstats_v1	*pq = (struct pfioc_qstats_v1 *)addr;
4106		struct pf_altq		*altq;
4107		int			 nbytes;
4108		u_int32_t		 version;
4109
4110		PF_RULES_RLOCK();
4111		if (pq->ticket != V_ticket_altqs_active) {
4112			PF_RULES_RUNLOCK();
4113			error = EBUSY;
4114			break;
4115		}
4116		nbytes = pq->nbytes;
4117		altq = pf_altq_get_nth_active(pq->nr);
4118		if (altq == NULL) {
4119			PF_RULES_RUNLOCK();
4120			error = EBUSY;
4121			break;
4122		}
4123
4124		if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) != 0) {
4125			PF_RULES_RUNLOCK();
4126			error = ENXIO;
4127			break;
4128		}
4129		PF_RULES_RUNLOCK();
4130		if (cmd == DIOCGETQSTATSV0)
4131			version = 0;  /* DIOCGETQSTATSV0 means stats struct v0 */
4132		else
4133			version = pq->version;
4134		error = altq_getqstats(altq, pq->buf, &nbytes, version);
4135		if (error == 0) {
4136			pq->scheduler = altq->scheduler;
4137			pq->nbytes = nbytes;
4138		}
4139		break;
4140	}
4141#endif /* ALTQ */
4142
4143	case DIOCBEGINADDRS: {
4144		struct pfioc_pooladdr	*pp = (struct pfioc_pooladdr *)addr;
4145
4146		PF_RULES_WLOCK();
4147		pf_empty_kpool(&V_pf_pabuf);
4148		pp->ticket = ++V_ticket_pabuf;
4149		PF_RULES_WUNLOCK();
4150		break;
4151	}
4152
4153	case DIOCADDADDR: {
4154		struct pfioc_pooladdr	*pp = (struct pfioc_pooladdr *)addr;
4155		struct pf_kpooladdr	*pa;
4156		struct pfi_kkif		*kif = NULL;
4157
4158#ifndef INET
4159		if (pp->af == AF_INET) {
4160			error = EAFNOSUPPORT;
4161			break;
4162		}
4163#endif /* INET */
4164#ifndef INET6
4165		if (pp->af == AF_INET6) {
4166			error = EAFNOSUPPORT;
4167			break;
4168		}
4169#endif /* INET6 */
4170		if (pp->addr.addr.type != PF_ADDR_ADDRMASK &&
4171		    pp->addr.addr.type != PF_ADDR_DYNIFTL &&
4172		    pp->addr.addr.type != PF_ADDR_TABLE) {
4173			error = EINVAL;
4174			break;
4175		}
4176		if (pp->addr.addr.p.dyn != NULL) {
4177			error = EINVAL;
4178			break;
4179		}
4180		pa = malloc(sizeof(*pa), M_PFRULE, M_WAITOK);
4181		error = pf_pooladdr_to_kpooladdr(&pp->addr, pa);
4182		if (error != 0)
4183			break;
4184		if (pa->ifname[0])
4185			kif = pf_kkif_create(M_WAITOK);
4186		PF_RULES_WLOCK();
4187		if (pp->ticket != V_ticket_pabuf) {
4188			PF_RULES_WUNLOCK();
4189			if (pa->ifname[0])
4190				pf_kkif_free(kif);
4191			free(pa, M_PFRULE);
4192			error = EBUSY;
4193			break;
4194		}
4195		if (pa->ifname[0]) {
4196			pa->kif = pfi_kkif_attach(kif, pa->ifname);
4197			kif = NULL;
4198			pfi_kkif_ref(pa->kif);
4199		} else
4200			pa->kif = NULL;
4201		if (pa->addr.type == PF_ADDR_DYNIFTL && ((error =
4202		    pfi_dynaddr_setup(&pa->addr, pp->af)) != 0)) {
4203			if (pa->ifname[0])
4204				pfi_kkif_unref(pa->kif);
4205			PF_RULES_WUNLOCK();
4206			free(pa, M_PFRULE);
4207			break;
4208		}
4209		TAILQ_INSERT_TAIL(&V_pf_pabuf, pa, entries);
4210		PF_RULES_WUNLOCK();
4211		break;
4212	}
4213
4214	case DIOCGETADDRS: {
4215		struct pfioc_pooladdr	*pp = (struct pfioc_pooladdr *)addr;
4216		struct pf_kpool		*pool;
4217		struct pf_kpooladdr	*pa;
4218
4219		pp->anchor[sizeof(pp->anchor) - 1] = 0;
4220		pp->nr = 0;
4221
4222		PF_RULES_RLOCK();
4223		pool = pf_get_kpool(pp->anchor, pp->ticket, pp->r_action,
4224		    pp->r_num, 0, 1, 0);
4225		if (pool == NULL) {
4226			PF_RULES_RUNLOCK();
4227			error = EBUSY;
4228			break;
4229		}
4230		TAILQ_FOREACH(pa, &pool->list, entries)
4231			pp->nr++;
4232		PF_RULES_RUNLOCK();
4233		break;
4234	}
4235
4236	case DIOCGETADDR: {
4237		struct pfioc_pooladdr	*pp = (struct pfioc_pooladdr *)addr;
4238		struct pf_kpool		*pool;
4239		struct pf_kpooladdr	*pa;
4240		u_int32_t		 nr = 0;
4241
4242		pp->anchor[sizeof(pp->anchor) - 1] = 0;
4243
4244		PF_RULES_RLOCK();
4245		pool = pf_get_kpool(pp->anchor, pp->ticket, pp->r_action,
4246		    pp->r_num, 0, 1, 1);
4247		if (pool == NULL) {
4248			PF_RULES_RUNLOCK();
4249			error = EBUSY;
4250			break;
4251		}
4252		pa = TAILQ_FIRST(&pool->list);
4253		while ((pa != NULL) && (nr < pp->nr)) {
4254			pa = TAILQ_NEXT(pa, entries);
4255			nr++;
4256		}
4257		if (pa == NULL) {
4258			PF_RULES_RUNLOCK();
4259			error = EBUSY;
4260			break;
4261		}
4262		pf_kpooladdr_to_pooladdr(pa, &pp->addr);
4263		pf_addr_copyout(&pp->addr.addr);
4264		PF_RULES_RUNLOCK();
4265		break;
4266	}
4267
4268	case DIOCCHANGEADDR: {
4269		struct pfioc_pooladdr	*pca = (struct pfioc_pooladdr *)addr;
4270		struct pf_kpool		*pool;
4271		struct pf_kpooladdr	*oldpa = NULL, *newpa = NULL;
4272		struct pf_kruleset	*ruleset;
4273		struct pfi_kkif		*kif = NULL;
4274
4275		pca->anchor[sizeof(pca->anchor) - 1] = 0;
4276
4277		if (pca->action < PF_CHANGE_ADD_HEAD ||
4278		    pca->action > PF_CHANGE_REMOVE) {
4279			error = EINVAL;
4280			break;
4281		}
4282		if (pca->addr.addr.type != PF_ADDR_ADDRMASK &&
4283		    pca->addr.addr.type != PF_ADDR_DYNIFTL &&
4284		    pca->addr.addr.type != PF_ADDR_TABLE) {
4285			error = EINVAL;
4286			break;
4287		}
4288		if (pca->addr.addr.p.dyn != NULL) {
4289			error = EINVAL;
4290			break;
4291		}
4292
4293		if (pca->action != PF_CHANGE_REMOVE) {
4294#ifndef INET
4295			if (pca->af == AF_INET) {
4296				error = EAFNOSUPPORT;
4297				break;
4298			}
4299#endif /* INET */
4300#ifndef INET6
4301			if (pca->af == AF_INET6) {
4302				error = EAFNOSUPPORT;
4303				break;
4304			}
4305#endif /* INET6 */
4306			newpa = malloc(sizeof(*newpa), M_PFRULE, M_WAITOK);
4307			bcopy(&pca->addr, newpa, sizeof(struct pf_pooladdr));
4308			if (newpa->ifname[0])
4309				kif = pf_kkif_create(M_WAITOK);
4310			newpa->kif = NULL;
4311		}
4312#define	ERROUT(x)	ERROUT_IOCTL(DIOCCHANGEADDR_error, x)
4313		PF_RULES_WLOCK();
4314		ruleset = pf_find_kruleset(pca->anchor);
4315		if (ruleset == NULL)
4316			ERROUT(EBUSY);
4317
4318		pool = pf_get_kpool(pca->anchor, pca->ticket, pca->r_action,
4319		    pca->r_num, pca->r_last, 1, 1);
4320		if (pool == NULL)
4321			ERROUT(EBUSY);
4322
4323		if (pca->action != PF_CHANGE_REMOVE) {
4324			if (newpa->ifname[0]) {
4325				newpa->kif = pfi_kkif_attach(kif, newpa->ifname);
4326				pfi_kkif_ref(newpa->kif);
4327				kif = NULL;
4328			}
4329
4330			switch (newpa->addr.type) {
4331			case PF_ADDR_DYNIFTL:
4332				error = pfi_dynaddr_setup(&newpa->addr,
4333				    pca->af);
4334				break;
4335			case PF_ADDR_TABLE:
4336				newpa->addr.p.tbl = pfr_attach_table(ruleset,
4337				    newpa->addr.v.tblname);
4338				if (newpa->addr.p.tbl == NULL)
4339					error = ENOMEM;
4340				break;
4341			}
4342			if (error)
4343				goto DIOCCHANGEADDR_error;
4344		}
4345
4346		switch (pca->action) {
4347		case PF_CHANGE_ADD_HEAD:
4348			oldpa = TAILQ_FIRST(&pool->list);
4349			break;
4350		case PF_CHANGE_ADD_TAIL:
4351			oldpa = TAILQ_LAST(&pool->list, pf_kpalist);
4352			break;
4353		default:
4354			oldpa = TAILQ_FIRST(&pool->list);
4355			for (int i = 0; oldpa && i < pca->nr; i++)
4356				oldpa = TAILQ_NEXT(oldpa, entries);
4357
4358			if (oldpa == NULL)
4359				ERROUT(EINVAL);
4360		}
4361
4362		if (pca->action == PF_CHANGE_REMOVE) {
4363			TAILQ_REMOVE(&pool->list, oldpa, entries);
4364			switch (oldpa->addr.type) {
4365			case PF_ADDR_DYNIFTL:
4366				pfi_dynaddr_remove(oldpa->addr.p.dyn);
4367				break;
4368			case PF_ADDR_TABLE:
4369				pfr_detach_table(oldpa->addr.p.tbl);
4370				break;
4371			}
4372			if (oldpa->kif)
4373				pfi_kkif_unref(oldpa->kif);
4374			free(oldpa, M_PFRULE);
4375		} else {
4376			if (oldpa == NULL)
4377				TAILQ_INSERT_TAIL(&pool->list, newpa, entries);
4378			else if (pca->action == PF_CHANGE_ADD_HEAD ||
4379			    pca->action == PF_CHANGE_ADD_BEFORE)
4380				TAILQ_INSERT_BEFORE(oldpa, newpa, entries);
4381			else
4382				TAILQ_INSERT_AFTER(&pool->list, oldpa,
4383				    newpa, entries);
4384		}
4385
4386		pool->cur = TAILQ_FIRST(&pool->list);
4387		PF_ACPY(&pool->counter, &pool->cur->addr.v.a.addr, pca->af);
4388		PF_RULES_WUNLOCK();
4389		break;
4390
4391#undef ERROUT
4392DIOCCHANGEADDR_error:
4393		if (newpa != NULL) {
4394			if (newpa->kif)
4395				pfi_kkif_unref(newpa->kif);
4396			free(newpa, M_PFRULE);
4397		}
4398		PF_RULES_WUNLOCK();
4399		pf_kkif_free(kif);
4400		break;
4401	}
4402
4403	case DIOCGETRULESETS: {
4404		struct pfioc_ruleset	*pr = (struct pfioc_ruleset *)addr;
4405		struct pf_kruleset	*ruleset;
4406		struct pf_kanchor	*anchor;
4407
4408		pr->path[sizeof(pr->path) - 1] = 0;
4409
4410		PF_RULES_RLOCK();
4411		if ((ruleset = pf_find_kruleset(pr->path)) == NULL) {
4412			PF_RULES_RUNLOCK();
4413			error = ENOENT;
4414			break;
4415		}
4416		pr->nr = 0;
4417		if (ruleset->anchor == NULL) {
4418			/* XXX kludge for pf_main_ruleset */
4419			RB_FOREACH(anchor, pf_kanchor_global, &V_pf_anchors)
4420				if (anchor->parent == NULL)
4421					pr->nr++;
4422		} else {
4423			RB_FOREACH(anchor, pf_kanchor_node,
4424			    &ruleset->anchor->children)
4425				pr->nr++;
4426		}
4427		PF_RULES_RUNLOCK();
4428		break;
4429	}
4430
4431	case DIOCGETRULESET: {
4432		struct pfioc_ruleset	*pr = (struct pfioc_ruleset *)addr;
4433		struct pf_kruleset	*ruleset;
4434		struct pf_kanchor	*anchor;
4435		u_int32_t		 nr = 0;
4436
4437		pr->path[sizeof(pr->path) - 1] = 0;
4438
4439		PF_RULES_RLOCK();
4440		if ((ruleset = pf_find_kruleset(pr->path)) == NULL) {
4441			PF_RULES_RUNLOCK();
4442			error = ENOENT;
4443			break;
4444		}
4445		pr->name[0] = 0;
4446		if (ruleset->anchor == NULL) {
4447			/* XXX kludge for pf_main_ruleset */
4448			RB_FOREACH(anchor, pf_kanchor_global, &V_pf_anchors)
4449				if (anchor->parent == NULL && nr++ == pr->nr) {
4450					strlcpy(pr->name, anchor->name,
4451					    sizeof(pr->name));
4452					break;
4453				}
4454		} else {
4455			RB_FOREACH(anchor, pf_kanchor_node,
4456			    &ruleset->anchor->children)
4457				if (nr++ == pr->nr) {
4458					strlcpy(pr->name, anchor->name,
4459					    sizeof(pr->name));
4460					break;
4461				}
4462		}
4463		if (!pr->name[0])
4464			error = EBUSY;
4465		PF_RULES_RUNLOCK();
4466		break;
4467	}
4468
4469	case DIOCRCLRTABLES: {
4470		struct pfioc_table *io = (struct pfioc_table *)addr;
4471
4472		if (io->pfrio_esize != 0) {
4473			error = ENODEV;
4474			break;
4475		}
4476		PF_RULES_WLOCK();
4477		error = pfr_clr_tables(&io->pfrio_table, &io->pfrio_ndel,
4478		    io->pfrio_flags | PFR_FLAG_USERIOCTL);
4479		PF_RULES_WUNLOCK();
4480		break;
4481	}
4482
4483	case DIOCRADDTABLES: {
4484		struct pfioc_table *io = (struct pfioc_table *)addr;
4485		struct pfr_table *pfrts;
4486		size_t totlen;
4487
4488		if (io->pfrio_esize != sizeof(struct pfr_table)) {
4489			error = ENODEV;
4490			break;
4491		}
4492
4493		if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount ||
4494		    WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_table))) {
4495			error = ENOMEM;
4496			break;
4497		}
4498
4499		totlen = io->pfrio_size * sizeof(struct pfr_table);
4500		pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table),
4501		    M_TEMP, M_WAITOK);
4502		error = copyin(io->pfrio_buffer, pfrts, totlen);
4503		if (error) {
4504			free(pfrts, M_TEMP);
4505			break;
4506		}
4507		PF_RULES_WLOCK();
4508		error = pfr_add_tables(pfrts, io->pfrio_size,
4509		    &io->pfrio_nadd, io->pfrio_flags | PFR_FLAG_USERIOCTL);
4510		PF_RULES_WUNLOCK();
4511		free(pfrts, M_TEMP);
4512		break;
4513	}
4514
4515	case DIOCRDELTABLES: {
4516		struct pfioc_table *io = (struct pfioc_table *)addr;
4517		struct pfr_table *pfrts;
4518		size_t totlen;
4519
4520		if (io->pfrio_esize != sizeof(struct pfr_table)) {
4521			error = ENODEV;
4522			break;
4523		}
4524
4525		if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount ||
4526		    WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_table))) {
4527			error = ENOMEM;
4528			break;
4529		}
4530
4531		totlen = io->pfrio_size * sizeof(struct pfr_table);
4532		pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table),
4533		    M_TEMP, M_WAITOK);
4534		error = copyin(io->pfrio_buffer, pfrts, totlen);
4535		if (error) {
4536			free(pfrts, M_TEMP);
4537			break;
4538		}
4539		PF_RULES_WLOCK();
4540		error = pfr_del_tables(pfrts, io->pfrio_size,
4541		    &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL);
4542		PF_RULES_WUNLOCK();
4543		free(pfrts, M_TEMP);
4544		break;
4545	}
4546
4547	case DIOCRGETTABLES: {
4548		struct pfioc_table *io = (struct pfioc_table *)addr;
4549		struct pfr_table *pfrts;
4550		size_t totlen;
4551		int n;
4552
4553		if (io->pfrio_esize != sizeof(struct pfr_table)) {
4554			error = ENODEV;
4555			break;
4556		}
4557		PF_RULES_RLOCK();
4558		n = pfr_table_count(&io->pfrio_table, io->pfrio_flags);
4559		if (n < 0) {
4560			PF_RULES_RUNLOCK();
4561			error = EINVAL;
4562			break;
4563		}
4564		io->pfrio_size = min(io->pfrio_size, n);
4565
4566		totlen = io->pfrio_size * sizeof(struct pfr_table);
4567
4568		pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table),
4569		    M_TEMP, M_NOWAIT | M_ZERO);
4570		if (pfrts == NULL) {
4571			error = ENOMEM;
4572			PF_RULES_RUNLOCK();
4573			break;
4574		}
4575		error = pfr_get_tables(&io->pfrio_table, pfrts,
4576		    &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
4577		PF_RULES_RUNLOCK();
4578		if (error == 0)
4579			error = copyout(pfrts, io->pfrio_buffer, totlen);
4580		free(pfrts, M_TEMP);
4581		break;
4582	}
4583
4584	case DIOCRGETTSTATS: {
4585		struct pfioc_table *io = (struct pfioc_table *)addr;
4586		struct pfr_tstats *pfrtstats;
4587		size_t totlen;
4588		int n;
4589
4590		if (io->pfrio_esize != sizeof(struct pfr_tstats)) {
4591			error = ENODEV;
4592			break;
4593		}
4594		PF_TABLE_STATS_LOCK();
4595		PF_RULES_RLOCK();
4596		n = pfr_table_count(&io->pfrio_table, io->pfrio_flags);
4597		if (n < 0) {
4598			PF_RULES_RUNLOCK();
4599			PF_TABLE_STATS_UNLOCK();
4600			error = EINVAL;
4601			break;
4602		}
4603		io->pfrio_size = min(io->pfrio_size, n);
4604
4605		totlen = io->pfrio_size * sizeof(struct pfr_tstats);
4606		pfrtstats = mallocarray(io->pfrio_size,
4607		    sizeof(struct pfr_tstats), M_TEMP, M_NOWAIT | M_ZERO);
4608		if (pfrtstats == NULL) {
4609			error = ENOMEM;
4610			PF_RULES_RUNLOCK();
4611			PF_TABLE_STATS_UNLOCK();
4612			break;
4613		}
4614		error = pfr_get_tstats(&io->pfrio_table, pfrtstats,
4615		    &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
4616		PF_RULES_RUNLOCK();
4617		PF_TABLE_STATS_UNLOCK();
4618		if (error == 0)
4619			error = copyout(pfrtstats, io->pfrio_buffer, totlen);
4620		free(pfrtstats, M_TEMP);
4621		break;
4622	}
4623
4624	case DIOCRCLRTSTATS: {
4625		struct pfioc_table *io = (struct pfioc_table *)addr;
4626		struct pfr_table *pfrts;
4627		size_t totlen;
4628
4629		if (io->pfrio_esize != sizeof(struct pfr_table)) {
4630			error = ENODEV;
4631			break;
4632		}
4633
4634		if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount ||
4635		    WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_table))) {
4636			/* We used to count tables and use the minimum required
4637			 * size, so we didn't fail on overly large requests.
4638			 * Keep doing so. */
4639			io->pfrio_size = pf_ioctl_maxcount;
4640			break;
4641		}
4642
4643		totlen = io->pfrio_size * sizeof(struct pfr_table);
4644		pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table),
4645		    M_TEMP, M_WAITOK);
4646		error = copyin(io->pfrio_buffer, pfrts, totlen);
4647		if (error) {
4648			free(pfrts, M_TEMP);
4649			break;
4650		}
4651
4652		PF_TABLE_STATS_LOCK();
4653		PF_RULES_RLOCK();
4654		error = pfr_clr_tstats(pfrts, io->pfrio_size,
4655		    &io->pfrio_nzero, io->pfrio_flags | PFR_FLAG_USERIOCTL);
4656		PF_RULES_RUNLOCK();
4657		PF_TABLE_STATS_UNLOCK();
4658		free(pfrts, M_TEMP);
4659		break;
4660	}
4661
4662	case DIOCRSETTFLAGS: {
4663		struct pfioc_table *io = (struct pfioc_table *)addr;
4664		struct pfr_table *pfrts;
4665		size_t totlen;
4666		int n;
4667
4668		if (io->pfrio_esize != sizeof(struct pfr_table)) {
4669			error = ENODEV;
4670			break;
4671		}
4672
4673		PF_RULES_RLOCK();
4674		n = pfr_table_count(&io->pfrio_table, io->pfrio_flags);
4675		if (n < 0) {
4676			PF_RULES_RUNLOCK();
4677			error = EINVAL;
4678			break;
4679		}
4680
4681		io->pfrio_size = min(io->pfrio_size, n);
4682		PF_RULES_RUNLOCK();
4683
4684		totlen = io->pfrio_size * sizeof(struct pfr_table);
4685		pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table),
4686		    M_TEMP, M_WAITOK);
4687		error = copyin(io->pfrio_buffer, pfrts, totlen);
4688		if (error) {
4689			free(pfrts, M_TEMP);
4690			break;
4691		}
4692		PF_RULES_WLOCK();
4693		error = pfr_set_tflags(pfrts, io->pfrio_size,
4694		    io->pfrio_setflag, io->pfrio_clrflag, &io->pfrio_nchange,
4695		    &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL);
4696		PF_RULES_WUNLOCK();
4697		free(pfrts, M_TEMP);
4698		break;
4699	}
4700
4701	case DIOCRCLRADDRS: {
4702		struct pfioc_table *io = (struct pfioc_table *)addr;
4703
4704		if (io->pfrio_esize != 0) {
4705			error = ENODEV;
4706			break;
4707		}
4708		PF_RULES_WLOCK();
4709		error = pfr_clr_addrs(&io->pfrio_table, &io->pfrio_ndel,
4710		    io->pfrio_flags | PFR_FLAG_USERIOCTL);
4711		PF_RULES_WUNLOCK();
4712		break;
4713	}
4714
4715	case DIOCRADDADDRS: {
4716		struct pfioc_table *io = (struct pfioc_table *)addr;
4717		struct pfr_addr *pfras;
4718		size_t totlen;
4719
4720		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
4721			error = ENODEV;
4722			break;
4723		}
4724		if (io->pfrio_size < 0 ||
4725		    io->pfrio_size > pf_ioctl_maxcount ||
4726		    WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) {
4727			error = EINVAL;
4728			break;
4729		}
4730		totlen = io->pfrio_size * sizeof(struct pfr_addr);
4731		pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
4732		    M_TEMP, M_WAITOK);
4733		error = copyin(io->pfrio_buffer, pfras, totlen);
4734		if (error) {
4735			free(pfras, M_TEMP);
4736			break;
4737		}
4738		PF_RULES_WLOCK();
4739		error = pfr_add_addrs(&io->pfrio_table, pfras,
4740		    io->pfrio_size, &io->pfrio_nadd, io->pfrio_flags |
4741		    PFR_FLAG_USERIOCTL);
4742		PF_RULES_WUNLOCK();
4743		if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK)
4744			error = copyout(pfras, io->pfrio_buffer, totlen);
4745		free(pfras, M_TEMP);
4746		break;
4747	}
4748
4749	case DIOCRDELADDRS: {
4750		struct pfioc_table *io = (struct pfioc_table *)addr;
4751		struct pfr_addr *pfras;
4752		size_t totlen;
4753
4754		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
4755			error = ENODEV;
4756			break;
4757		}
4758		if (io->pfrio_size < 0 ||
4759		    io->pfrio_size > pf_ioctl_maxcount ||
4760		    WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) {
4761			error = EINVAL;
4762			break;
4763		}
4764		totlen = io->pfrio_size * sizeof(struct pfr_addr);
4765		pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
4766		    M_TEMP, M_WAITOK);
4767		error = copyin(io->pfrio_buffer, pfras, totlen);
4768		if (error) {
4769			free(pfras, M_TEMP);
4770			break;
4771		}
4772		PF_RULES_WLOCK();
4773		error = pfr_del_addrs(&io->pfrio_table, pfras,
4774		    io->pfrio_size, &io->pfrio_ndel, io->pfrio_flags |
4775		    PFR_FLAG_USERIOCTL);
4776		PF_RULES_WUNLOCK();
4777		if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK)
4778			error = copyout(pfras, io->pfrio_buffer, totlen);
4779		free(pfras, M_TEMP);
4780		break;
4781	}
4782
4783	case DIOCRSETADDRS: {
4784		struct pfioc_table *io = (struct pfioc_table *)addr;
4785		struct pfr_addr *pfras;
4786		size_t totlen, count;
4787
4788		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
4789			error = ENODEV;
4790			break;
4791		}
4792		if (io->pfrio_size < 0 || io->pfrio_size2 < 0) {
4793			error = EINVAL;
4794			break;
4795		}
4796		count = max(io->pfrio_size, io->pfrio_size2);
4797		if (count > pf_ioctl_maxcount ||
4798		    WOULD_OVERFLOW(count, sizeof(struct pfr_addr))) {
4799			error = EINVAL;
4800			break;
4801		}
4802		totlen = count * sizeof(struct pfr_addr);
4803		pfras = mallocarray(count, sizeof(struct pfr_addr), M_TEMP,
4804		    M_WAITOK);
4805		error = copyin(io->pfrio_buffer, pfras, totlen);
4806		if (error) {
4807			free(pfras, M_TEMP);
4808			break;
4809		}
4810		PF_RULES_WLOCK();
4811		error = pfr_set_addrs(&io->pfrio_table, pfras,
4812		    io->pfrio_size, &io->pfrio_size2, &io->pfrio_nadd,
4813		    &io->pfrio_ndel, &io->pfrio_nchange, io->pfrio_flags |
4814		    PFR_FLAG_USERIOCTL, 0);
4815		PF_RULES_WUNLOCK();
4816		if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK)
4817			error = copyout(pfras, io->pfrio_buffer, totlen);
4818		free(pfras, M_TEMP);
4819		break;
4820	}
4821
4822	case DIOCRGETADDRS: {
4823		struct pfioc_table *io = (struct pfioc_table *)addr;
4824		struct pfr_addr *pfras;
4825		size_t totlen;
4826
4827		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
4828			error = ENODEV;
4829			break;
4830		}
4831		if (io->pfrio_size < 0 ||
4832		    io->pfrio_size > pf_ioctl_maxcount ||
4833		    WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) {
4834			error = EINVAL;
4835			break;
4836		}
4837		totlen = io->pfrio_size * sizeof(struct pfr_addr);
4838		pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
4839		    M_TEMP, M_WAITOK | M_ZERO);
4840		PF_RULES_RLOCK();
4841		error = pfr_get_addrs(&io->pfrio_table, pfras,
4842		    &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
4843		PF_RULES_RUNLOCK();
4844		if (error == 0)
4845			error = copyout(pfras, io->pfrio_buffer, totlen);
4846		free(pfras, M_TEMP);
4847		break;
4848	}
4849
4850	case DIOCRGETASTATS: {
4851		struct pfioc_table *io = (struct pfioc_table *)addr;
4852		struct pfr_astats *pfrastats;
4853		size_t totlen;
4854
4855		if (io->pfrio_esize != sizeof(struct pfr_astats)) {
4856			error = ENODEV;
4857			break;
4858		}
4859		if (io->pfrio_size < 0 ||
4860		    io->pfrio_size > pf_ioctl_maxcount ||
4861		    WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_astats))) {
4862			error = EINVAL;
4863			break;
4864		}
4865		totlen = io->pfrio_size * sizeof(struct pfr_astats);
4866		pfrastats = mallocarray(io->pfrio_size,
4867		    sizeof(struct pfr_astats), M_TEMP, M_WAITOK | M_ZERO);
4868		PF_RULES_RLOCK();
4869		error = pfr_get_astats(&io->pfrio_table, pfrastats,
4870		    &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
4871		PF_RULES_RUNLOCK();
4872		if (error == 0)
4873			error = copyout(pfrastats, io->pfrio_buffer, totlen);
4874		free(pfrastats, M_TEMP);
4875		break;
4876	}
4877
4878	case DIOCRCLRASTATS: {
4879		struct pfioc_table *io = (struct pfioc_table *)addr;
4880		struct pfr_addr *pfras;
4881		size_t totlen;
4882
4883		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
4884			error = ENODEV;
4885			break;
4886		}
4887		if (io->pfrio_size < 0 ||
4888		    io->pfrio_size > pf_ioctl_maxcount ||
4889		    WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) {
4890			error = EINVAL;
4891			break;
4892		}
4893		totlen = io->pfrio_size * sizeof(struct pfr_addr);
4894		pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
4895		    M_TEMP, M_WAITOK);
4896		error = copyin(io->pfrio_buffer, pfras, totlen);
4897		if (error) {
4898			free(pfras, M_TEMP);
4899			break;
4900		}
4901		PF_RULES_WLOCK();
4902		error = pfr_clr_astats(&io->pfrio_table, pfras,
4903		    io->pfrio_size, &io->pfrio_nzero, io->pfrio_flags |
4904		    PFR_FLAG_USERIOCTL);
4905		PF_RULES_WUNLOCK();
4906		if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK)
4907			error = copyout(pfras, io->pfrio_buffer, totlen);
4908		free(pfras, M_TEMP);
4909		break;
4910	}
4911
4912	case DIOCRTSTADDRS: {
4913		struct pfioc_table *io = (struct pfioc_table *)addr;
4914		struct pfr_addr *pfras;
4915		size_t totlen;
4916
4917		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
4918			error = ENODEV;
4919			break;
4920		}
4921		if (io->pfrio_size < 0 ||
4922		    io->pfrio_size > pf_ioctl_maxcount ||
4923		    WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) {
4924			error = EINVAL;
4925			break;
4926		}
4927		totlen = io->pfrio_size * sizeof(struct pfr_addr);
4928		pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
4929		    M_TEMP, M_WAITOK);
4930		error = copyin(io->pfrio_buffer, pfras, totlen);
4931		if (error) {
4932			free(pfras, M_TEMP);
4933			break;
4934		}
4935		PF_RULES_RLOCK();
4936		error = pfr_tst_addrs(&io->pfrio_table, pfras,
4937		    io->pfrio_size, &io->pfrio_nmatch, io->pfrio_flags |
4938		    PFR_FLAG_USERIOCTL);
4939		PF_RULES_RUNLOCK();
4940		if (error == 0)
4941			error = copyout(pfras, io->pfrio_buffer, totlen);
4942		free(pfras, M_TEMP);
4943		break;
4944	}
4945
4946	case DIOCRINADEFINE: {
4947		struct pfioc_table *io = (struct pfioc_table *)addr;
4948		struct pfr_addr *pfras;
4949		size_t totlen;
4950
4951		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
4952			error = ENODEV;
4953			break;
4954		}
4955		if (io->pfrio_size < 0 ||
4956		    io->pfrio_size > pf_ioctl_maxcount ||
4957		    WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) {
4958			error = EINVAL;
4959			break;
4960		}
4961		totlen = io->pfrio_size * sizeof(struct pfr_addr);
4962		pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
4963		    M_TEMP, M_WAITOK);
4964		error = copyin(io->pfrio_buffer, pfras, totlen);
4965		if (error) {
4966			free(pfras, M_TEMP);
4967			break;
4968		}
4969		PF_RULES_WLOCK();
4970		error = pfr_ina_define(&io->pfrio_table, pfras,
4971		    io->pfrio_size, &io->pfrio_nadd, &io->pfrio_naddr,
4972		    io->pfrio_ticket, io->pfrio_flags | PFR_FLAG_USERIOCTL);
4973		PF_RULES_WUNLOCK();
4974		free(pfras, M_TEMP);
4975		break;
4976	}
4977
4978	case DIOCOSFPADD: {
4979		struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr;
4980		PF_RULES_WLOCK();
4981		error = pf_osfp_add(io);
4982		PF_RULES_WUNLOCK();
4983		break;
4984	}
4985
4986	case DIOCOSFPGET: {
4987		struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr;
4988		PF_RULES_RLOCK();
4989		error = pf_osfp_get(io);
4990		PF_RULES_RUNLOCK();
4991		break;
4992	}
4993
4994	case DIOCXBEGIN: {
4995		struct pfioc_trans	*io = (struct pfioc_trans *)addr;
4996		struct pfioc_trans_e	*ioes, *ioe;
4997		size_t			 totlen;
4998		int			 i;
4999
5000		if (io->esize != sizeof(*ioe)) {
5001			error = ENODEV;
5002			break;
5003		}
5004		if (io->size < 0 ||
5005		    io->size > pf_ioctl_maxcount ||
5006		    WOULD_OVERFLOW(io->size, sizeof(struct pfioc_trans_e))) {
5007			error = EINVAL;
5008			break;
5009		}
5010		totlen = sizeof(struct pfioc_trans_e) * io->size;
5011		ioes = mallocarray(io->size, sizeof(struct pfioc_trans_e),
5012		    M_TEMP, M_WAITOK);
5013		error = copyin(io->array, ioes, totlen);
5014		if (error) {
5015			free(ioes, M_TEMP);
5016			break;
5017		}
5018		/* Ensure there's no more ethernet rules to clean up. */
5019		NET_EPOCH_DRAIN_CALLBACKS();
5020		PF_RULES_WLOCK();
5021		for (i = 0, ioe = ioes; i < io->size; i++, ioe++) {
5022			ioe->anchor[sizeof(ioe->anchor) - 1] = '\0';
5023			switch (ioe->rs_num) {
5024			case PF_RULESET_ETH:
5025				if ((error = pf_begin_eth(&ioe->ticket, ioe->anchor))) {
5026					PF_RULES_WUNLOCK();
5027					free(ioes, M_TEMP);
5028					goto fail;
5029				}
5030				break;
5031#ifdef ALTQ
5032			case PF_RULESET_ALTQ:
5033				if (ioe->anchor[0]) {
5034					PF_RULES_WUNLOCK();
5035					free(ioes, M_TEMP);
5036					error = EINVAL;
5037					goto fail;
5038				}
5039				if ((error = pf_begin_altq(&ioe->ticket))) {
5040					PF_RULES_WUNLOCK();
5041					free(ioes, M_TEMP);
5042					goto fail;
5043				}
5044				break;
5045#endif /* ALTQ */
5046			case PF_RULESET_TABLE:
5047			    {
5048				struct pfr_table table;
5049
5050				bzero(&table, sizeof(table));
5051				strlcpy(table.pfrt_anchor, ioe->anchor,
5052				    sizeof(table.pfrt_anchor));
5053				if ((error = pfr_ina_begin(&table,
5054				    &ioe->ticket, NULL, 0))) {
5055					PF_RULES_WUNLOCK();
5056					free(ioes, M_TEMP);
5057					goto fail;
5058				}
5059				break;
5060			    }
5061			default:
5062				if ((error = pf_begin_rules(&ioe->ticket,
5063				    ioe->rs_num, ioe->anchor))) {
5064					PF_RULES_WUNLOCK();
5065					free(ioes, M_TEMP);
5066					goto fail;
5067				}
5068				break;
5069			}
5070		}
5071		PF_RULES_WUNLOCK();
5072		error = copyout(ioes, io->array, totlen);
5073		free(ioes, M_TEMP);
5074		break;
5075	}
5076
5077	case DIOCXROLLBACK: {
5078		struct pfioc_trans	*io = (struct pfioc_trans *)addr;
5079		struct pfioc_trans_e	*ioe, *ioes;
5080		size_t			 totlen;
5081		int			 i;
5082
5083		if (io->esize != sizeof(*ioe)) {
5084			error = ENODEV;
5085			break;
5086		}
5087		if (io->size < 0 ||
5088		    io->size > pf_ioctl_maxcount ||
5089		    WOULD_OVERFLOW(io->size, sizeof(struct pfioc_trans_e))) {
5090			error = EINVAL;
5091			break;
5092		}
5093		totlen = sizeof(struct pfioc_trans_e) * io->size;
5094		ioes = mallocarray(io->size, sizeof(struct pfioc_trans_e),
5095		    M_TEMP, M_WAITOK);
5096		error = copyin(io->array, ioes, totlen);
5097		if (error) {
5098			free(ioes, M_TEMP);
5099			break;
5100		}
5101		PF_RULES_WLOCK();
5102		for (i = 0, ioe = ioes; i < io->size; i++, ioe++) {
5103			ioe->anchor[sizeof(ioe->anchor) - 1] = '\0';
5104			switch (ioe->rs_num) {
5105			case PF_RULESET_ETH:
5106				if ((error = pf_rollback_eth(ioe->ticket,
5107				    ioe->anchor))) {
5108					PF_RULES_WUNLOCK();
5109					free(ioes, M_TEMP);
5110					goto fail; /* really bad */
5111				}
5112				break;
5113#ifdef ALTQ
5114			case PF_RULESET_ALTQ:
5115				if (ioe->anchor[0]) {
5116					PF_RULES_WUNLOCK();
5117					free(ioes, M_TEMP);
5118					error = EINVAL;
5119					goto fail;
5120				}
5121				if ((error = pf_rollback_altq(ioe->ticket))) {
5122					PF_RULES_WUNLOCK();
5123					free(ioes, M_TEMP);
5124					goto fail; /* really bad */
5125				}
5126				break;
5127#endif /* ALTQ */
5128			case PF_RULESET_TABLE:
5129			    {
5130				struct pfr_table table;
5131
5132				bzero(&table, sizeof(table));
5133				strlcpy(table.pfrt_anchor, ioe->anchor,
5134				    sizeof(table.pfrt_anchor));
5135				if ((error = pfr_ina_rollback(&table,
5136				    ioe->ticket, NULL, 0))) {
5137					PF_RULES_WUNLOCK();
5138					free(ioes, M_TEMP);
5139					goto fail; /* really bad */
5140				}
5141				break;
5142			    }
5143			default:
5144				if ((error = pf_rollback_rules(ioe->ticket,
5145				    ioe->rs_num, ioe->anchor))) {
5146					PF_RULES_WUNLOCK();
5147					free(ioes, M_TEMP);
5148					goto fail; /* really bad */
5149				}
5150				break;
5151			}
5152		}
5153		PF_RULES_WUNLOCK();
5154		free(ioes, M_TEMP);
5155		break;
5156	}
5157
5158	case DIOCXCOMMIT: {
5159		struct pfioc_trans	*io = (struct pfioc_trans *)addr;
5160		struct pfioc_trans_e	*ioe, *ioes;
5161		struct pf_kruleset	*rs;
5162		struct pf_keth_ruleset	*ers;
5163		size_t			 totlen;
5164		int			 i;
5165
5166		if (io->esize != sizeof(*ioe)) {
5167			error = ENODEV;
5168			break;
5169		}
5170
5171		if (io->size < 0 ||
5172		    io->size > pf_ioctl_maxcount ||
5173		    WOULD_OVERFLOW(io->size, sizeof(struct pfioc_trans_e))) {
5174			error = EINVAL;
5175			break;
5176		}
5177
5178		totlen = sizeof(struct pfioc_trans_e) * io->size;
5179		ioes = mallocarray(io->size, sizeof(struct pfioc_trans_e),
5180		    M_TEMP, M_WAITOK);
5181		error = copyin(io->array, ioes, totlen);
5182		if (error) {
5183			free(ioes, M_TEMP);
5184			break;
5185		}
5186		PF_RULES_WLOCK();
5187		/* First makes sure everything will succeed. */
5188		for (i = 0, ioe = ioes; i < io->size; i++, ioe++) {
5189			ioe->anchor[sizeof(ioe->anchor) - 1] = 0;
5190			switch (ioe->rs_num) {
5191			case PF_RULESET_ETH:
5192				ers = pf_find_keth_ruleset(ioe->anchor);
5193				if (ers == NULL || ioe->ticket == 0 ||
5194				    ioe->ticket != ers->inactive.ticket) {
5195					PF_RULES_WUNLOCK();
5196					free(ioes, M_TEMP);
5197					error = EINVAL;
5198					goto fail;
5199				}
5200				break;
5201#ifdef ALTQ
5202			case PF_RULESET_ALTQ:
5203				if (ioe->anchor[0]) {
5204					PF_RULES_WUNLOCK();
5205					free(ioes, M_TEMP);
5206					error = EINVAL;
5207					goto fail;
5208				}
5209				if (!V_altqs_inactive_open || ioe->ticket !=
5210				    V_ticket_altqs_inactive) {
5211					PF_RULES_WUNLOCK();
5212					free(ioes, M_TEMP);
5213					error = EBUSY;
5214					goto fail;
5215				}
5216				break;
5217#endif /* ALTQ */
5218			case PF_RULESET_TABLE:
5219				rs = pf_find_kruleset(ioe->anchor);
5220				if (rs == NULL || !rs->topen || ioe->ticket !=
5221				    rs->tticket) {
5222					PF_RULES_WUNLOCK();
5223					free(ioes, M_TEMP);
5224					error = EBUSY;
5225					goto fail;
5226				}
5227				break;
5228			default:
5229				if (ioe->rs_num < 0 || ioe->rs_num >=
5230				    PF_RULESET_MAX) {
5231					PF_RULES_WUNLOCK();
5232					free(ioes, M_TEMP);
5233					error = EINVAL;
5234					goto fail;
5235				}
5236				rs = pf_find_kruleset(ioe->anchor);
5237				if (rs == NULL ||
5238				    !rs->rules[ioe->rs_num].inactive.open ||
5239				    rs->rules[ioe->rs_num].inactive.ticket !=
5240				    ioe->ticket) {
5241					PF_RULES_WUNLOCK();
5242					free(ioes, M_TEMP);
5243					error = EBUSY;
5244					goto fail;
5245				}
5246				break;
5247			}
5248		}
5249		/* Now do the commit - no errors should happen here. */
5250		for (i = 0, ioe = ioes; i < io->size; i++, ioe++) {
5251			switch (ioe->rs_num) {
5252			case PF_RULESET_ETH:
5253				if ((error = pf_commit_eth(ioe->ticket, ioe->anchor))) {
5254					PF_RULES_WUNLOCK();
5255					free(ioes, M_TEMP);
5256					goto fail; /* really bad */
5257				}
5258				break;
5259#ifdef ALTQ
5260			case PF_RULESET_ALTQ:
5261				if ((error = pf_commit_altq(ioe->ticket))) {
5262					PF_RULES_WUNLOCK();
5263					free(ioes, M_TEMP);
5264					goto fail; /* really bad */
5265				}
5266				break;
5267#endif /* ALTQ */
5268			case PF_RULESET_TABLE:
5269			    {
5270				struct pfr_table table;
5271
5272				bzero(&table, sizeof(table));
5273				(void)strlcpy(table.pfrt_anchor, ioe->anchor,
5274				    sizeof(table.pfrt_anchor));
5275				if ((error = pfr_ina_commit(&table,
5276				    ioe->ticket, NULL, NULL, 0))) {
5277					PF_RULES_WUNLOCK();
5278					free(ioes, M_TEMP);
5279					goto fail; /* really bad */
5280				}
5281				break;
5282			    }
5283			default:
5284				if ((error = pf_commit_rules(ioe->ticket,
5285				    ioe->rs_num, ioe->anchor))) {
5286					PF_RULES_WUNLOCK();
5287					free(ioes, M_TEMP);
5288					goto fail; /* really bad */
5289				}
5290				break;
5291			}
5292		}
5293		PF_RULES_WUNLOCK();
5294
5295		/* Only hook into EtherNet taffic if we've got rules for it. */
5296		if (! TAILQ_EMPTY(V_pf_keth->active.rules))
5297			hook_pf_eth();
5298		else
5299			dehook_pf_eth();
5300
5301		free(ioes, M_TEMP);
5302		break;
5303	}
5304
5305	case DIOCGETSRCNODES: {
5306		struct pfioc_src_nodes	*psn = (struct pfioc_src_nodes *)addr;
5307		struct pf_srchash	*sh;
5308		struct pf_ksrc_node	*n;
5309		struct pf_src_node	*p, *pstore;
5310		uint32_t		 i, nr = 0;
5311
5312		for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask;
5313				i++, sh++) {
5314			PF_HASHROW_LOCK(sh);
5315			LIST_FOREACH(n, &sh->nodes, entry)
5316				nr++;
5317			PF_HASHROW_UNLOCK(sh);
5318		}
5319
5320		psn->psn_len = min(psn->psn_len,
5321		    sizeof(struct pf_src_node) * nr);
5322
5323		if (psn->psn_len == 0) {
5324			psn->psn_len = sizeof(struct pf_src_node) * nr;
5325			break;
5326		}
5327
5328		nr = 0;
5329
5330		p = pstore = malloc(psn->psn_len, M_TEMP, M_WAITOK | M_ZERO);
5331		for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask;
5332		    i++, sh++) {
5333		    PF_HASHROW_LOCK(sh);
5334		    LIST_FOREACH(n, &sh->nodes, entry) {
5335
5336			if ((nr + 1) * sizeof(*p) > (unsigned)psn->psn_len)
5337				break;
5338
5339			pf_src_node_copy(n, p);
5340
5341			p++;
5342			nr++;
5343		    }
5344		    PF_HASHROW_UNLOCK(sh);
5345		}
5346		error = copyout(pstore, psn->psn_src_nodes,
5347		    sizeof(struct pf_src_node) * nr);
5348		if (error) {
5349			free(pstore, M_TEMP);
5350			break;
5351		}
5352		psn->psn_len = sizeof(struct pf_src_node) * nr;
5353		free(pstore, M_TEMP);
5354		break;
5355	}
5356
5357	case DIOCCLRSRCNODES: {
5358		pf_clear_srcnodes(NULL);
5359		pf_purge_expired_src_nodes();
5360		break;
5361	}
5362
5363	case DIOCKILLSRCNODES:
5364		pf_kill_srcnodes((struct pfioc_src_node_kill *)addr);
5365		break;
5366
5367#ifdef COMPAT_FREEBSD13
5368	case DIOCKEEPCOUNTERS_FREEBSD13:
5369#endif
5370	case DIOCKEEPCOUNTERS:
5371		error = pf_keepcounters((struct pfioc_nv *)addr);
5372		break;
5373
5374	case DIOCGETSYNCOOKIES:
5375		error = pf_get_syncookies((struct pfioc_nv *)addr);
5376		break;
5377
5378	case DIOCSETSYNCOOKIES:
5379		error = pf_set_syncookies((struct pfioc_nv *)addr);
5380		break;
5381
5382	case DIOCSETHOSTID: {
5383		u_int32_t	*hostid = (u_int32_t *)addr;
5384
5385		PF_RULES_WLOCK();
5386		if (*hostid == 0)
5387			V_pf_status.hostid = arc4random();
5388		else
5389			V_pf_status.hostid = *hostid;
5390		PF_RULES_WUNLOCK();
5391		break;
5392	}
5393
5394	case DIOCOSFPFLUSH:
5395		PF_RULES_WLOCK();
5396		pf_osfp_flush();
5397		PF_RULES_WUNLOCK();
5398		break;
5399
5400	case DIOCIGETIFACES: {
5401		struct pfioc_iface *io = (struct pfioc_iface *)addr;
5402		struct pfi_kif *ifstore;
5403		size_t bufsiz;
5404
5405		if (io->pfiio_esize != sizeof(struct pfi_kif)) {
5406			error = ENODEV;
5407			break;
5408		}
5409
5410		if (io->pfiio_size < 0 ||
5411		    io->pfiio_size > pf_ioctl_maxcount ||
5412		    WOULD_OVERFLOW(io->pfiio_size, sizeof(struct pfi_kif))) {
5413			error = EINVAL;
5414			break;
5415		}
5416
5417		io->pfiio_name[sizeof(io->pfiio_name) - 1] = '\0';
5418
5419		bufsiz = io->pfiio_size * sizeof(struct pfi_kif);
5420		ifstore = mallocarray(io->pfiio_size, sizeof(struct pfi_kif),
5421		    M_TEMP, M_WAITOK | M_ZERO);
5422
5423		PF_RULES_RLOCK();
5424		pfi_get_ifaces(io->pfiio_name, ifstore, &io->pfiio_size);
5425		PF_RULES_RUNLOCK();
5426		error = copyout(ifstore, io->pfiio_buffer, bufsiz);
5427		free(ifstore, M_TEMP);
5428		break;
5429	}
5430
5431	case DIOCSETIFFLAG: {
5432		struct pfioc_iface *io = (struct pfioc_iface *)addr;
5433
5434		io->pfiio_name[sizeof(io->pfiio_name) - 1] = '\0';
5435
5436		PF_RULES_WLOCK();
5437		error = pfi_set_flags(io->pfiio_name, io->pfiio_flags);
5438		PF_RULES_WUNLOCK();
5439		break;
5440	}
5441
5442	case DIOCCLRIFFLAG: {
5443		struct pfioc_iface *io = (struct pfioc_iface *)addr;
5444
5445		io->pfiio_name[sizeof(io->pfiio_name) - 1] = '\0';
5446
5447		PF_RULES_WLOCK();
5448		error = pfi_clear_flags(io->pfiio_name, io->pfiio_flags);
5449		PF_RULES_WUNLOCK();
5450		break;
5451	}
5452
5453	case DIOCSETREASS: {
5454		u_int32_t	*reass = (u_int32_t *)addr;
5455
5456		V_pf_status.reass = *reass & (PF_REASS_ENABLED|PF_REASS_NODF);
5457		/* Removal of DF flag without reassembly enabled is not a
5458		 * valid combination. Disable reassembly in such case. */
5459		if (!(V_pf_status.reass & PF_REASS_ENABLED))
5460			V_pf_status.reass = 0;
5461		break;
5462	}
5463
5464	default:
5465		error = ENODEV;
5466		break;
5467	}
5468fail:
5469	CURVNET_RESTORE();
5470
5471#undef ERROUT_IOCTL
5472
5473	return (error);
5474}
5475
5476void
5477pfsync_state_export(union pfsync_state_union *sp, struct pf_kstate *st, int msg_version)
5478{
5479	bzero(sp, sizeof(union pfsync_state_union));
5480
5481	/* copy from state key */
5482	sp->pfs_1301.key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0];
5483	sp->pfs_1301.key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1];
5484	sp->pfs_1301.key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0];
5485	sp->pfs_1301.key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1];
5486	sp->pfs_1301.key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0];
5487	sp->pfs_1301.key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1];
5488	sp->pfs_1301.key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0];
5489	sp->pfs_1301.key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1];
5490	sp->pfs_1301.proto = st->key[PF_SK_WIRE]->proto;
5491	sp->pfs_1301.af = st->key[PF_SK_WIRE]->af;
5492
5493	/* copy from state */
5494	strlcpy(sp->pfs_1301.ifname, st->kif->pfik_name, sizeof(sp->pfs_1301.ifname));
5495	bcopy(&st->rt_addr, &sp->pfs_1301.rt_addr, sizeof(sp->pfs_1301.rt_addr));
5496	sp->pfs_1301.creation = htonl(time_uptime - (st->creation / 1000));
5497	sp->pfs_1301.expire = pf_state_expires(st);
5498	if (sp->pfs_1301.expire <= time_uptime)
5499		sp->pfs_1301.expire = htonl(0);
5500	else
5501		sp->pfs_1301.expire = htonl(sp->pfs_1301.expire - time_uptime);
5502
5503	sp->pfs_1301.direction = st->direction;
5504	sp->pfs_1301.log = st->act.log;
5505	sp->pfs_1301.timeout = st->timeout;
5506
5507	switch (msg_version) {
5508		case PFSYNC_MSG_VERSION_1301:
5509			sp->pfs_1301.state_flags = st->state_flags;
5510			break;
5511		case PFSYNC_MSG_VERSION_1400:
5512			sp->pfs_1400.state_flags = htons(st->state_flags);
5513			sp->pfs_1400.qid = htons(st->act.qid);
5514			sp->pfs_1400.pqid = htons(st->act.pqid);
5515			sp->pfs_1400.dnpipe = htons(st->act.dnpipe);
5516			sp->pfs_1400.dnrpipe = htons(st->act.dnrpipe);
5517			sp->pfs_1400.rtableid = htonl(st->act.rtableid);
5518			sp->pfs_1400.min_ttl = st->act.min_ttl;
5519			sp->pfs_1400.set_tos = st->act.set_tos;
5520			sp->pfs_1400.max_mss = htons(st->act.max_mss);
5521			sp->pfs_1400.set_prio[0] = st->act.set_prio[0];
5522			sp->pfs_1400.set_prio[1] = st->act.set_prio[1];
5523			sp->pfs_1400.rt = st->rt;
5524			if (st->rt_kif)
5525				strlcpy(sp->pfs_1400.rt_ifname,
5526				    st->rt_kif->pfik_name,
5527				    sizeof(sp->pfs_1400.rt_ifname));
5528			break;
5529		default:
5530			panic("%s: Unsupported pfsync_msg_version %d",
5531			    __func__, msg_version);
5532	}
5533
5534	if (st->src_node)
5535		sp->pfs_1301.sync_flags |= PFSYNC_FLAG_SRCNODE;
5536	if (st->nat_src_node)
5537		sp->pfs_1301.sync_flags |= PFSYNC_FLAG_NATSRCNODE;
5538
5539	sp->pfs_1301.id = st->id;
5540	sp->pfs_1301.creatorid = st->creatorid;
5541	pf_state_peer_hton(&st->src, &sp->pfs_1301.src);
5542	pf_state_peer_hton(&st->dst, &sp->pfs_1301.dst);
5543
5544	if (st->rule.ptr == NULL)
5545		sp->pfs_1301.rule = htonl(-1);
5546	else
5547		sp->pfs_1301.rule = htonl(st->rule.ptr->nr);
5548	if (st->anchor.ptr == NULL)
5549		sp->pfs_1301.anchor = htonl(-1);
5550	else
5551		sp->pfs_1301.anchor = htonl(st->anchor.ptr->nr);
5552	if (st->nat_rule.ptr == NULL)
5553		sp->pfs_1301.nat_rule = htonl(-1);
5554	else
5555		sp->pfs_1301.nat_rule = htonl(st->nat_rule.ptr->nr);
5556
5557	pf_state_counter_hton(st->packets[0], sp->pfs_1301.packets[0]);
5558	pf_state_counter_hton(st->packets[1], sp->pfs_1301.packets[1]);
5559	pf_state_counter_hton(st->bytes[0], sp->pfs_1301.bytes[0]);
5560	pf_state_counter_hton(st->bytes[1], sp->pfs_1301.bytes[1]);
5561}
5562
5563void
5564pf_state_export(struct pf_state_export *sp, struct pf_kstate *st)
5565{
5566	bzero(sp, sizeof(*sp));
5567
5568	sp->version = PF_STATE_VERSION;
5569
5570	/* copy from state key */
5571	sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0];
5572	sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1];
5573	sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0];
5574	sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1];
5575	sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0];
5576	sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1];
5577	sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0];
5578	sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1];
5579	sp->proto = st->key[PF_SK_WIRE]->proto;
5580	sp->af = st->key[PF_SK_WIRE]->af;
5581
5582	/* copy from state */
5583	strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname));
5584	strlcpy(sp->orig_ifname, st->orig_kif->pfik_name,
5585	    sizeof(sp->orig_ifname));
5586	bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr));
5587	sp->creation = htonl(time_uptime - (st->creation / 1000));
5588	sp->expire = pf_state_expires(st);
5589	if (sp->expire <= time_uptime)
5590		sp->expire = htonl(0);
5591	else
5592		sp->expire = htonl(sp->expire - time_uptime);
5593
5594	sp->direction = st->direction;
5595	sp->log = st->act.log;
5596	sp->timeout = st->timeout;
5597	/* 8 bits for the old libpfctl, 16 bits for the new libpfctl */
5598	sp->state_flags_compat = st->state_flags;
5599	sp->state_flags = htons(st->state_flags);
5600	if (st->src_node)
5601		sp->sync_flags |= PFSYNC_FLAG_SRCNODE;
5602	if (st->nat_src_node)
5603		sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE;
5604
5605	sp->id = st->id;
5606	sp->creatorid = st->creatorid;
5607	pf_state_peer_hton(&st->src, &sp->src);
5608	pf_state_peer_hton(&st->dst, &sp->dst);
5609
5610	if (st->rule.ptr == NULL)
5611		sp->rule = htonl(-1);
5612	else
5613		sp->rule = htonl(st->rule.ptr->nr);
5614	if (st->anchor.ptr == NULL)
5615		sp->anchor = htonl(-1);
5616	else
5617		sp->anchor = htonl(st->anchor.ptr->nr);
5618	if (st->nat_rule.ptr == NULL)
5619		sp->nat_rule = htonl(-1);
5620	else
5621		sp->nat_rule = htonl(st->nat_rule.ptr->nr);
5622
5623	sp->packets[0] = st->packets[0];
5624	sp->packets[1] = st->packets[1];
5625	sp->bytes[0] = st->bytes[0];
5626	sp->bytes[1] = st->bytes[1];
5627
5628	sp->qid = htons(st->act.qid);
5629	sp->pqid = htons(st->act.pqid);
5630	sp->dnpipe = htons(st->act.dnpipe);
5631	sp->dnrpipe = htons(st->act.dnrpipe);
5632	sp->rtableid = htonl(st->act.rtableid);
5633	sp->min_ttl = st->act.min_ttl;
5634	sp->set_tos = st->act.set_tos;
5635	sp->max_mss = htons(st->act.max_mss);
5636	sp->rt = st->rt;
5637	if (st->rt_kif)
5638		strlcpy(sp->rt_ifname, st->rt_kif->pfik_name,
5639		    sizeof(sp->rt_ifname));
5640	sp->set_prio[0] = st->act.set_prio[0];
5641	sp->set_prio[1] = st->act.set_prio[1];
5642
5643}
5644
5645static void
5646pf_tbladdr_copyout(struct pf_addr_wrap *aw)
5647{
5648	struct pfr_ktable *kt;
5649
5650	KASSERT(aw->type == PF_ADDR_TABLE, ("%s: type %u", __func__, aw->type));
5651
5652	kt = aw->p.tbl;
5653	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
5654		kt = kt->pfrkt_root;
5655	aw->p.tbl = NULL;
5656	aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ?
5657		kt->pfrkt_cnt : -1;
5658}
5659
5660static int
5661pf_add_status_counters(nvlist_t *nvl, const char *name, counter_u64_t *counters,
5662    size_t number, char **names)
5663{
5664	nvlist_t        *nvc;
5665
5666	nvc = nvlist_create(0);
5667	if (nvc == NULL)
5668		return (ENOMEM);
5669
5670	for (int i = 0; i < number; i++) {
5671		nvlist_append_number_array(nvc, "counters",
5672		    counter_u64_fetch(counters[i]));
5673		nvlist_append_string_array(nvc, "names",
5674		    names[i]);
5675		nvlist_append_number_array(nvc, "ids",
5676		    i);
5677	}
5678	nvlist_add_nvlist(nvl, name, nvc);
5679	nvlist_destroy(nvc);
5680
5681	return (0);
5682}
5683
5684static int
5685pf_getstatus(struct pfioc_nv *nv)
5686{
5687	nvlist_t        *nvl = NULL, *nvc = NULL;
5688	void            *nvlpacked = NULL;
5689	int              error;
5690	struct pf_status s;
5691	char *pf_reasons[PFRES_MAX+1] = PFRES_NAMES;
5692	char *pf_lcounter[KLCNT_MAX+1] = KLCNT_NAMES;
5693	char *pf_fcounter[FCNT_MAX+1] = FCNT_NAMES;
5694	PF_RULES_RLOCK_TRACKER;
5695
5696#define ERROUT(x)      ERROUT_FUNCTION(errout, x)
5697
5698	PF_RULES_RLOCK();
5699
5700	nvl = nvlist_create(0);
5701	if (nvl == NULL)
5702		ERROUT(ENOMEM);
5703
5704	nvlist_add_bool(nvl, "running", V_pf_status.running);
5705	nvlist_add_number(nvl, "since", V_pf_status.since);
5706	nvlist_add_number(nvl, "debug", V_pf_status.debug);
5707	nvlist_add_number(nvl, "hostid", V_pf_status.hostid);
5708	nvlist_add_number(nvl, "states", V_pf_status.states);
5709	nvlist_add_number(nvl, "src_nodes", V_pf_status.src_nodes);
5710	nvlist_add_number(nvl, "reass", V_pf_status.reass);
5711	nvlist_add_bool(nvl, "syncookies_active",
5712	    V_pf_status.syncookies_active);
5713	nvlist_add_number(nvl, "halfopen_states", V_pf_status.states_halfopen);
5714
5715	/* counters */
5716	error = pf_add_status_counters(nvl, "counters", V_pf_status.counters,
5717	    PFRES_MAX, pf_reasons);
5718	if (error != 0)
5719		ERROUT(error);
5720
5721	/* lcounters */
5722	error = pf_add_status_counters(nvl, "lcounters", V_pf_status.lcounters,
5723	    KLCNT_MAX, pf_lcounter);
5724	if (error != 0)
5725		ERROUT(error);
5726
5727	/* fcounters */
5728	nvc = nvlist_create(0);
5729	if (nvc == NULL)
5730		ERROUT(ENOMEM);
5731
5732	for (int i = 0; i < FCNT_MAX; i++) {
5733		nvlist_append_number_array(nvc, "counters",
5734		    pf_counter_u64_fetch(&V_pf_status.fcounters[i]));
5735		nvlist_append_string_array(nvc, "names",
5736		    pf_fcounter[i]);
5737		nvlist_append_number_array(nvc, "ids",
5738		    i);
5739	}
5740	nvlist_add_nvlist(nvl, "fcounters", nvc);
5741	nvlist_destroy(nvc);
5742	nvc = NULL;
5743
5744	/* scounters */
5745	error = pf_add_status_counters(nvl, "scounters", V_pf_status.scounters,
5746	    SCNT_MAX, pf_fcounter);
5747	if (error != 0)
5748		ERROUT(error);
5749
5750	nvlist_add_string(nvl, "ifname", V_pf_status.ifname);
5751	nvlist_add_binary(nvl, "chksum", V_pf_status.pf_chksum,
5752	    PF_MD5_DIGEST_LENGTH);
5753
5754	pfi_update_status(V_pf_status.ifname, &s);
5755
5756	/* pcounters / bcounters */
5757	for (int i = 0; i < 2; i++) {
5758		for (int j = 0; j < 2; j++) {
5759			for (int k = 0; k < 2; k++) {
5760				nvlist_append_number_array(nvl, "pcounters",
5761				    s.pcounters[i][j][k]);
5762			}
5763			nvlist_append_number_array(nvl, "bcounters",
5764			    s.bcounters[i][j]);
5765		}
5766	}
5767
5768	nvlpacked = nvlist_pack(nvl, &nv->len);
5769	if (nvlpacked == NULL)
5770		ERROUT(ENOMEM);
5771
5772	if (nv->size == 0)
5773		ERROUT(0);
5774	else if (nv->size < nv->len)
5775		ERROUT(ENOSPC);
5776
5777	PF_RULES_RUNLOCK();
5778	error = copyout(nvlpacked, nv->data, nv->len);
5779	goto done;
5780
5781#undef ERROUT
5782errout:
5783	PF_RULES_RUNLOCK();
5784done:
5785	free(nvlpacked, M_NVLIST);
5786	nvlist_destroy(nvc);
5787	nvlist_destroy(nvl);
5788
5789	return (error);
5790}
5791
5792/*
5793 * XXX - Check for version mismatch!!!
5794 */
5795static void
5796pf_clear_all_states(void)
5797{
5798	struct epoch_tracker	 et;
5799	struct pf_kstate	*s;
5800	u_int i;
5801
5802	NET_EPOCH_ENTER(et);
5803	for (i = 0; i <= pf_hashmask; i++) {
5804		struct pf_idhash *ih = &V_pf_idhash[i];
5805relock:
5806		PF_HASHROW_LOCK(ih);
5807		LIST_FOREACH(s, &ih->states, entry) {
5808			s->timeout = PFTM_PURGE;
5809			/* Don't send out individual delete messages. */
5810			s->state_flags |= PFSTATE_NOSYNC;
5811			pf_unlink_state(s);
5812			goto relock;
5813		}
5814		PF_HASHROW_UNLOCK(ih);
5815	}
5816	NET_EPOCH_EXIT(et);
5817}
5818
5819static int
5820pf_clear_tables(void)
5821{
5822	struct pfioc_table io;
5823	int error;
5824
5825	bzero(&io, sizeof(io));
5826	io.pfrio_flags |= PFR_FLAG_ALLRSETS;
5827
5828	error = pfr_clr_tables(&io.pfrio_table, &io.pfrio_ndel,
5829	    io.pfrio_flags);
5830
5831	return (error);
5832}
5833
5834static void
5835pf_clear_srcnodes(struct pf_ksrc_node *n)
5836{
5837	struct pf_kstate *s;
5838	int i;
5839
5840	for (i = 0; i <= pf_hashmask; i++) {
5841		struct pf_idhash *ih = &V_pf_idhash[i];
5842
5843		PF_HASHROW_LOCK(ih);
5844		LIST_FOREACH(s, &ih->states, entry) {
5845			if (n == NULL || n == s->src_node)
5846				s->src_node = NULL;
5847			if (n == NULL || n == s->nat_src_node)
5848				s->nat_src_node = NULL;
5849		}
5850		PF_HASHROW_UNLOCK(ih);
5851	}
5852
5853	if (n == NULL) {
5854		struct pf_srchash *sh;
5855
5856		for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask;
5857		    i++, sh++) {
5858			PF_HASHROW_LOCK(sh);
5859			LIST_FOREACH(n, &sh->nodes, entry) {
5860				n->expire = 1;
5861				n->states = 0;
5862			}
5863			PF_HASHROW_UNLOCK(sh);
5864		}
5865	} else {
5866		/* XXX: hash slot should already be locked here. */
5867		n->expire = 1;
5868		n->states = 0;
5869	}
5870}
5871
5872static void
5873pf_kill_srcnodes(struct pfioc_src_node_kill *psnk)
5874{
5875	struct pf_ksrc_node_list	 kill;
5876
5877	LIST_INIT(&kill);
5878	for (int i = 0; i <= pf_srchashmask; i++) {
5879		struct pf_srchash *sh = &V_pf_srchash[i];
5880		struct pf_ksrc_node *sn, *tmp;
5881
5882		PF_HASHROW_LOCK(sh);
5883		LIST_FOREACH_SAFE(sn, &sh->nodes, entry, tmp)
5884			if (PF_MATCHA(psnk->psnk_src.neg,
5885			      &psnk->psnk_src.addr.v.a.addr,
5886			      &psnk->psnk_src.addr.v.a.mask,
5887			      &sn->addr, sn->af) &&
5888			    PF_MATCHA(psnk->psnk_dst.neg,
5889			      &psnk->psnk_dst.addr.v.a.addr,
5890			      &psnk->psnk_dst.addr.v.a.mask,
5891			      &sn->raddr, sn->af)) {
5892				pf_unlink_src_node(sn);
5893				LIST_INSERT_HEAD(&kill, sn, entry);
5894				sn->expire = 1;
5895			}
5896		PF_HASHROW_UNLOCK(sh);
5897	}
5898
5899	for (int i = 0; i <= pf_hashmask; i++) {
5900		struct pf_idhash *ih = &V_pf_idhash[i];
5901		struct pf_kstate *s;
5902
5903		PF_HASHROW_LOCK(ih);
5904		LIST_FOREACH(s, &ih->states, entry) {
5905			if (s->src_node && s->src_node->expire == 1)
5906				s->src_node = NULL;
5907			if (s->nat_src_node && s->nat_src_node->expire == 1)
5908				s->nat_src_node = NULL;
5909		}
5910		PF_HASHROW_UNLOCK(ih);
5911	}
5912
5913	psnk->psnk_killed = pf_free_src_nodes(&kill);
5914}
5915
5916static int
5917pf_keepcounters(struct pfioc_nv *nv)
5918{
5919	nvlist_t	*nvl = NULL;
5920	void		*nvlpacked = NULL;
5921	int		 error = 0;
5922
5923#define	ERROUT(x)	ERROUT_FUNCTION(on_error, x)
5924
5925	if (nv->len > pf_ioctl_maxcount)
5926		ERROUT(ENOMEM);
5927
5928	nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
5929	if (nvlpacked == NULL)
5930		ERROUT(ENOMEM);
5931
5932	error = copyin(nv->data, nvlpacked, nv->len);
5933	if (error)
5934		ERROUT(error);
5935
5936	nvl = nvlist_unpack(nvlpacked, nv->len, 0);
5937	if (nvl == NULL)
5938		ERROUT(EBADMSG);
5939
5940	if (! nvlist_exists_bool(nvl, "keep_counters"))
5941		ERROUT(EBADMSG);
5942
5943	V_pf_status.keep_counters = nvlist_get_bool(nvl, "keep_counters");
5944
5945on_error:
5946	nvlist_destroy(nvl);
5947	free(nvlpacked, M_NVLIST);
5948	return (error);
5949}
5950
5951unsigned int
5952pf_clear_states(const struct pf_kstate_kill *kill)
5953{
5954	struct pf_state_key_cmp	 match_key;
5955	struct pf_kstate	*s;
5956	struct pfi_kkif	*kif;
5957	int		 idx;
5958	unsigned int	 killed = 0, dir;
5959
5960	NET_EPOCH_ASSERT();
5961
5962	for (unsigned int i = 0; i <= pf_hashmask; i++) {
5963		struct pf_idhash *ih = &V_pf_idhash[i];
5964
5965relock_DIOCCLRSTATES:
5966		PF_HASHROW_LOCK(ih);
5967		LIST_FOREACH(s, &ih->states, entry) {
5968			/* For floating states look at the original kif. */
5969			kif = s->kif == V_pfi_all ? s->orig_kif : s->kif;
5970
5971			if (kill->psk_ifname[0] &&
5972			    strcmp(kill->psk_ifname,
5973			    kif->pfik_name))
5974				continue;
5975
5976			if (kill->psk_kill_match) {
5977				bzero(&match_key, sizeof(match_key));
5978
5979				if (s->direction == PF_OUT) {
5980					dir = PF_IN;
5981					idx = PF_SK_STACK;
5982				} else {
5983					dir = PF_OUT;
5984					idx = PF_SK_WIRE;
5985				}
5986
5987				match_key.af = s->key[idx]->af;
5988				match_key.proto = s->key[idx]->proto;
5989				PF_ACPY(&match_key.addr[0],
5990				    &s->key[idx]->addr[1], match_key.af);
5991				match_key.port[0] = s->key[idx]->port[1];
5992				PF_ACPY(&match_key.addr[1],
5993				    &s->key[idx]->addr[0], match_key.af);
5994				match_key.port[1] = s->key[idx]->port[0];
5995			}
5996
5997			/*
5998			 * Don't send out individual
5999			 * delete messages.
6000			 */
6001			s->state_flags |= PFSTATE_NOSYNC;
6002			pf_unlink_state(s);
6003			killed++;
6004
6005			if (kill->psk_kill_match)
6006				killed += pf_kill_matching_state(&match_key,
6007				    dir);
6008
6009			goto relock_DIOCCLRSTATES;
6010		}
6011		PF_HASHROW_UNLOCK(ih);
6012	}
6013
6014	if (V_pfsync_clear_states_ptr != NULL)
6015		V_pfsync_clear_states_ptr(V_pf_status.hostid, kill->psk_ifname);
6016
6017	return (killed);
6018}
6019
6020void
6021pf_killstates(struct pf_kstate_kill *kill, unsigned int *killed)
6022{
6023	struct pf_kstate	*s;
6024
6025	NET_EPOCH_ASSERT();
6026	if (kill->psk_pfcmp.id) {
6027		if (kill->psk_pfcmp.creatorid == 0)
6028			kill->psk_pfcmp.creatorid = V_pf_status.hostid;
6029		if ((s = pf_find_state_byid(kill->psk_pfcmp.id,
6030		    kill->psk_pfcmp.creatorid))) {
6031			pf_unlink_state(s);
6032			*killed = 1;
6033		}
6034		return;
6035	}
6036
6037	for (unsigned int i = 0; i <= pf_hashmask; i++)
6038		*killed += pf_killstates_row(kill, &V_pf_idhash[i]);
6039}
6040
6041static int
6042pf_killstates_nv(struct pfioc_nv *nv)
6043{
6044	struct pf_kstate_kill	 kill;
6045	struct epoch_tracker	 et;
6046	nvlist_t		*nvl = NULL;
6047	void			*nvlpacked = NULL;
6048	int			 error = 0;
6049	unsigned int		 killed = 0;
6050
6051#define ERROUT(x)	ERROUT_FUNCTION(on_error, x)
6052
6053	if (nv->len > pf_ioctl_maxcount)
6054		ERROUT(ENOMEM);
6055
6056	nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
6057	if (nvlpacked == NULL)
6058		ERROUT(ENOMEM);
6059
6060	error = copyin(nv->data, nvlpacked, nv->len);
6061	if (error)
6062		ERROUT(error);
6063
6064	nvl = nvlist_unpack(nvlpacked, nv->len, 0);
6065	if (nvl == NULL)
6066		ERROUT(EBADMSG);
6067
6068	error = pf_nvstate_kill_to_kstate_kill(nvl, &kill);
6069	if (error)
6070		ERROUT(error);
6071
6072	NET_EPOCH_ENTER(et);
6073	pf_killstates(&kill, &killed);
6074	NET_EPOCH_EXIT(et);
6075
6076	free(nvlpacked, M_NVLIST);
6077	nvlpacked = NULL;
6078	nvlist_destroy(nvl);
6079	nvl = nvlist_create(0);
6080	if (nvl == NULL)
6081		ERROUT(ENOMEM);
6082
6083	nvlist_add_number(nvl, "killed", killed);
6084
6085	nvlpacked = nvlist_pack(nvl, &nv->len);
6086	if (nvlpacked == NULL)
6087		ERROUT(ENOMEM);
6088
6089	if (nv->size == 0)
6090		ERROUT(0);
6091	else if (nv->size < nv->len)
6092		ERROUT(ENOSPC);
6093
6094	error = copyout(nvlpacked, nv->data, nv->len);
6095
6096on_error:
6097	nvlist_destroy(nvl);
6098	free(nvlpacked, M_NVLIST);
6099	return (error);
6100}
6101
6102static int
6103pf_clearstates_nv(struct pfioc_nv *nv)
6104{
6105	struct pf_kstate_kill	 kill;
6106	struct epoch_tracker	 et;
6107	nvlist_t		*nvl = NULL;
6108	void			*nvlpacked = NULL;
6109	int			 error = 0;
6110	unsigned int		 killed;
6111
6112#define ERROUT(x)	ERROUT_FUNCTION(on_error, x)
6113
6114	if (nv->len > pf_ioctl_maxcount)
6115		ERROUT(ENOMEM);
6116
6117	nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
6118	if (nvlpacked == NULL)
6119		ERROUT(ENOMEM);
6120
6121	error = copyin(nv->data, nvlpacked, nv->len);
6122	if (error)
6123		ERROUT(error);
6124
6125	nvl = nvlist_unpack(nvlpacked, nv->len, 0);
6126	if (nvl == NULL)
6127		ERROUT(EBADMSG);
6128
6129	error = pf_nvstate_kill_to_kstate_kill(nvl, &kill);
6130	if (error)
6131		ERROUT(error);
6132
6133	NET_EPOCH_ENTER(et);
6134	killed = pf_clear_states(&kill);
6135	NET_EPOCH_EXIT(et);
6136
6137	free(nvlpacked, M_NVLIST);
6138	nvlpacked = NULL;
6139	nvlist_destroy(nvl);
6140	nvl = nvlist_create(0);
6141	if (nvl == NULL)
6142		ERROUT(ENOMEM);
6143
6144	nvlist_add_number(nvl, "killed", killed);
6145
6146	nvlpacked = nvlist_pack(nvl, &nv->len);
6147	if (nvlpacked == NULL)
6148		ERROUT(ENOMEM);
6149
6150	if (nv->size == 0)
6151		ERROUT(0);
6152	else if (nv->size < nv->len)
6153		ERROUT(ENOSPC);
6154
6155	error = copyout(nvlpacked, nv->data, nv->len);
6156
6157#undef ERROUT
6158on_error:
6159	nvlist_destroy(nvl);
6160	free(nvlpacked, M_NVLIST);
6161	return (error);
6162}
6163
6164static int
6165pf_getstate(struct pfioc_nv *nv)
6166{
6167	nvlist_t		*nvl = NULL, *nvls;
6168	void			*nvlpacked = NULL;
6169	struct pf_kstate	*s = NULL;
6170	int			 error = 0;
6171	uint64_t		 id, creatorid;
6172
6173#define ERROUT(x)	ERROUT_FUNCTION(errout, x)
6174
6175	if (nv->len > pf_ioctl_maxcount)
6176		ERROUT(ENOMEM);
6177
6178	nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
6179	if (nvlpacked == NULL)
6180		ERROUT(ENOMEM);
6181
6182	error = copyin(nv->data, nvlpacked, nv->len);
6183	if (error)
6184		ERROUT(error);
6185
6186	nvl = nvlist_unpack(nvlpacked, nv->len, 0);
6187	if (nvl == NULL)
6188		ERROUT(EBADMSG);
6189
6190	PFNV_CHK(pf_nvuint64(nvl, "id", &id));
6191	PFNV_CHK(pf_nvuint64(nvl, "creatorid", &creatorid));
6192
6193	s = pf_find_state_byid(id, creatorid);
6194	if (s == NULL)
6195		ERROUT(ENOENT);
6196
6197	free(nvlpacked, M_NVLIST);
6198	nvlpacked = NULL;
6199	nvlist_destroy(nvl);
6200	nvl = nvlist_create(0);
6201	if (nvl == NULL)
6202		ERROUT(ENOMEM);
6203
6204	nvls = pf_state_to_nvstate(s);
6205	if (nvls == NULL)
6206		ERROUT(ENOMEM);
6207
6208	nvlist_add_nvlist(nvl, "state", nvls);
6209	nvlist_destroy(nvls);
6210
6211	nvlpacked = nvlist_pack(nvl, &nv->len);
6212	if (nvlpacked == NULL)
6213		ERROUT(ENOMEM);
6214
6215	if (nv->size == 0)
6216		ERROUT(0);
6217	else if (nv->size < nv->len)
6218		ERROUT(ENOSPC);
6219
6220	error = copyout(nvlpacked, nv->data, nv->len);
6221
6222#undef ERROUT
6223errout:
6224	if (s != NULL)
6225		PF_STATE_UNLOCK(s);
6226	free(nvlpacked, M_NVLIST);
6227	nvlist_destroy(nvl);
6228	return (error);
6229}
6230
6231/*
6232 * XXX - Check for version mismatch!!!
6233 */
6234
6235/*
6236 * Duplicate pfctl -Fa operation to get rid of as much as we can.
6237 */
6238static int
6239shutdown_pf(void)
6240{
6241	int error = 0;
6242	u_int32_t t[5];
6243	char nn = '\0';
6244	struct pf_kanchor *anchor;
6245	struct pf_keth_anchor *eth_anchor;
6246	int rs_num;
6247
6248	do {
6249		/* Unlink rules of all user defined anchors */
6250		RB_FOREACH(anchor, pf_kanchor_global, &V_pf_anchors) {
6251			/* Wildcard based anchors may not have a respective
6252			 * explicit anchor rule or they may be left empty
6253			 * without rules. It leads to anchor.refcnt=0, and the
6254			 * rest of the logic does not expect it. */
6255			if (anchor->refcnt == 0)
6256				anchor->refcnt = 1;
6257			for (rs_num = 0; rs_num < PF_RULESET_MAX; ++rs_num) {
6258				if ((error = pf_begin_rules(&t[rs_num], rs_num,
6259				    anchor->path)) != 0) {
6260					DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: "
6261					    "anchor.path=%s rs_num=%d\n",
6262					    anchor->path, rs_num));
6263					goto error;	/* XXX: rollback? */
6264				}
6265			}
6266			for (rs_num = 0; rs_num < PF_RULESET_MAX; ++rs_num) {
6267				error = pf_commit_rules(t[rs_num], rs_num,
6268				    anchor->path);
6269				MPASS(error == 0);
6270			}
6271		}
6272
6273		/* Unlink rules of all user defined ether anchors */
6274		RB_FOREACH(eth_anchor, pf_keth_anchor_global,
6275		    &V_pf_keth_anchors) {
6276			/* Wildcard based anchors may not have a respective
6277			 * explicit anchor rule or they may be left empty
6278			 * without rules. It leads to anchor.refcnt=0, and the
6279			 * rest of the logic does not expect it. */
6280			if (eth_anchor->refcnt == 0)
6281				eth_anchor->refcnt = 1;
6282			if ((error = pf_begin_eth(&t[0], eth_anchor->path))
6283			    != 0) {
6284				DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: eth "
6285				    "anchor.path=%s\n", eth_anchor->path));
6286				goto error;
6287			}
6288			error = pf_commit_eth(t[0], eth_anchor->path);
6289			MPASS(error == 0);
6290		}
6291
6292		if ((error = pf_begin_rules(&t[0], PF_RULESET_SCRUB, &nn))
6293		    != 0) {
6294			DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: SCRUB\n"));
6295			break;
6296		}
6297		if ((error = pf_begin_rules(&t[1], PF_RULESET_FILTER, &nn))
6298		    != 0) {
6299			DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: FILTER\n"));
6300			break;		/* XXX: rollback? */
6301		}
6302		if ((error = pf_begin_rules(&t[2], PF_RULESET_NAT, &nn))
6303		    != 0) {
6304			DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: NAT\n"));
6305			break;		/* XXX: rollback? */
6306		}
6307		if ((error = pf_begin_rules(&t[3], PF_RULESET_BINAT, &nn))
6308		    != 0) {
6309			DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: BINAT\n"));
6310			break;		/* XXX: rollback? */
6311		}
6312		if ((error = pf_begin_rules(&t[4], PF_RULESET_RDR, &nn))
6313		    != 0) {
6314			DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: RDR\n"));
6315			break;		/* XXX: rollback? */
6316		}
6317
6318		error = pf_commit_rules(t[0], PF_RULESET_SCRUB, &nn);
6319		MPASS(error == 0);
6320		error = pf_commit_rules(t[1], PF_RULESET_FILTER, &nn);
6321		MPASS(error == 0);
6322		error = pf_commit_rules(t[2], PF_RULESET_NAT, &nn);
6323		MPASS(error == 0);
6324		error = pf_commit_rules(t[3], PF_RULESET_BINAT, &nn);
6325		MPASS(error == 0);
6326		error = pf_commit_rules(t[4], PF_RULESET_RDR, &nn);
6327		MPASS(error == 0);
6328
6329		if ((error = pf_clear_tables()) != 0)
6330			break;
6331
6332		if ((error = pf_begin_eth(&t[0], &nn)) != 0) {
6333			DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: eth\n"));
6334			break;
6335		}
6336		error = pf_commit_eth(t[0], &nn);
6337		MPASS(error == 0);
6338
6339#ifdef ALTQ
6340		if ((error = pf_begin_altq(&t[0])) != 0) {
6341			DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: ALTQ\n"));
6342			break;
6343		}
6344		pf_commit_altq(t[0]);
6345#endif
6346
6347		pf_clear_all_states();
6348
6349		pf_clear_srcnodes(NULL);
6350
6351		/* status does not use malloced mem so no need to cleanup */
6352		/* fingerprints and interfaces have their own cleanup code */
6353	} while(0);
6354
6355error:
6356	return (error);
6357}
6358
6359static pfil_return_t
6360pf_check_return(int chk, struct mbuf **m)
6361{
6362
6363	switch (chk) {
6364	case PF_PASS:
6365		if (*m == NULL)
6366			return (PFIL_CONSUMED);
6367		else
6368			return (PFIL_PASS);
6369		break;
6370	default:
6371		if (*m != NULL) {
6372			m_freem(*m);
6373			*m = NULL;
6374		}
6375		return (PFIL_DROPPED);
6376	}
6377}
6378
6379static pfil_return_t
6380pf_eth_check_in(struct mbuf **m, struct ifnet *ifp, int flags,
6381    void *ruleset __unused, struct inpcb *inp)
6382{
6383	int chk;
6384
6385	chk = pf_test_eth(PF_IN, flags, ifp, m, inp);
6386
6387	return (pf_check_return(chk, m));
6388}
6389
6390static pfil_return_t
6391pf_eth_check_out(struct mbuf **m, struct ifnet *ifp, int flags,
6392    void *ruleset __unused, struct inpcb *inp)
6393{
6394	int chk;
6395
6396	chk = pf_test_eth(PF_OUT, flags, ifp, m, inp);
6397
6398	return (pf_check_return(chk, m));
6399}
6400
6401#ifdef INET
6402static pfil_return_t
6403pf_check_in(struct mbuf **m, struct ifnet *ifp, int flags,
6404    void *ruleset __unused, struct inpcb *inp)
6405{
6406	int chk;
6407
6408	chk = pf_test(PF_IN, flags, ifp, m, inp, NULL);
6409
6410	return (pf_check_return(chk, m));
6411}
6412
6413static pfil_return_t
6414pf_check_out(struct mbuf **m, struct ifnet *ifp, int flags,
6415    void *ruleset __unused,  struct inpcb *inp)
6416{
6417	int chk;
6418
6419	chk = pf_test(PF_OUT, flags, ifp, m, inp, NULL);
6420
6421	return (pf_check_return(chk, m));
6422}
6423#endif
6424
6425#ifdef INET6
6426static pfil_return_t
6427pf_check6_in(struct mbuf **m, struct ifnet *ifp, int flags,
6428    void *ruleset __unused,  struct inpcb *inp)
6429{
6430	int chk;
6431
6432	/*
6433	 * In case of loopback traffic IPv6 uses the real interface in
6434	 * order to support scoped addresses. In order to support stateful
6435	 * filtering we have change this to lo0 as it is the case in IPv4.
6436	 */
6437	CURVNET_SET(ifp->if_vnet);
6438	chk = pf_test6(PF_IN, flags, (*m)->m_flags & M_LOOP ? V_loif : ifp,
6439	    m, inp, NULL);
6440	CURVNET_RESTORE();
6441
6442	return (pf_check_return(chk, m));
6443}
6444
6445static pfil_return_t
6446pf_check6_out(struct mbuf **m, struct ifnet *ifp, int flags,
6447    void *ruleset __unused,  struct inpcb *inp)
6448{
6449	int chk;
6450
6451	CURVNET_SET(ifp->if_vnet);
6452	chk = pf_test6(PF_OUT, flags, ifp, m, inp, NULL);
6453	CURVNET_RESTORE();
6454
6455	return (pf_check_return(chk, m));
6456}
6457#endif /* INET6 */
6458
6459VNET_DEFINE_STATIC(pfil_hook_t, pf_eth_in_hook);
6460VNET_DEFINE_STATIC(pfil_hook_t, pf_eth_out_hook);
6461#define	V_pf_eth_in_hook	VNET(pf_eth_in_hook)
6462#define	V_pf_eth_out_hook	VNET(pf_eth_out_hook)
6463
6464#ifdef INET
6465VNET_DEFINE_STATIC(pfil_hook_t, pf_ip4_in_hook);
6466VNET_DEFINE_STATIC(pfil_hook_t, pf_ip4_out_hook);
6467#define	V_pf_ip4_in_hook	VNET(pf_ip4_in_hook)
6468#define	V_pf_ip4_out_hook	VNET(pf_ip4_out_hook)
6469#endif
6470#ifdef INET6
6471VNET_DEFINE_STATIC(pfil_hook_t, pf_ip6_in_hook);
6472VNET_DEFINE_STATIC(pfil_hook_t, pf_ip6_out_hook);
6473#define	V_pf_ip6_in_hook	VNET(pf_ip6_in_hook)
6474#define	V_pf_ip6_out_hook	VNET(pf_ip6_out_hook)
6475#endif
6476
6477static void
6478hook_pf_eth(void)
6479{
6480	struct pfil_hook_args pha = {
6481		.pa_version = PFIL_VERSION,
6482		.pa_modname = "pf",
6483		.pa_type = PFIL_TYPE_ETHERNET,
6484	};
6485	struct pfil_link_args pla = {
6486		.pa_version = PFIL_VERSION,
6487	};
6488	int ret __diagused;
6489
6490	if (atomic_load_bool(&V_pf_pfil_eth_hooked))
6491		return;
6492
6493	pha.pa_mbuf_chk = pf_eth_check_in;
6494	pha.pa_flags = PFIL_IN;
6495	pha.pa_rulname = "eth-in";
6496	V_pf_eth_in_hook = pfil_add_hook(&pha);
6497	pla.pa_flags = PFIL_IN | PFIL_HEADPTR | PFIL_HOOKPTR;
6498	pla.pa_head = V_link_pfil_head;
6499	pla.pa_hook = V_pf_eth_in_hook;
6500	ret = pfil_link(&pla);
6501	MPASS(ret == 0);
6502	pha.pa_mbuf_chk = pf_eth_check_out;
6503	pha.pa_flags = PFIL_OUT;
6504	pha.pa_rulname = "eth-out";
6505	V_pf_eth_out_hook = pfil_add_hook(&pha);
6506	pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR;
6507	pla.pa_head = V_link_pfil_head;
6508	pla.pa_hook = V_pf_eth_out_hook;
6509	ret = pfil_link(&pla);
6510	MPASS(ret == 0);
6511
6512	atomic_store_bool(&V_pf_pfil_eth_hooked, true);
6513}
6514
6515static void
6516hook_pf(void)
6517{
6518	struct pfil_hook_args pha = {
6519		.pa_version = PFIL_VERSION,
6520		.pa_modname = "pf",
6521	};
6522	struct pfil_link_args pla = {
6523		.pa_version = PFIL_VERSION,
6524	};
6525	int ret __diagused;
6526
6527	if (atomic_load_bool(&V_pf_pfil_hooked))
6528		return;
6529
6530#ifdef INET
6531	pha.pa_type = PFIL_TYPE_IP4;
6532	pha.pa_mbuf_chk = pf_check_in;
6533	pha.pa_flags = PFIL_IN;
6534	pha.pa_rulname = "default-in";
6535	V_pf_ip4_in_hook = pfil_add_hook(&pha);
6536	pla.pa_flags = PFIL_IN | PFIL_HEADPTR | PFIL_HOOKPTR;
6537	pla.pa_head = V_inet_pfil_head;
6538	pla.pa_hook = V_pf_ip4_in_hook;
6539	ret = pfil_link(&pla);
6540	MPASS(ret == 0);
6541	pha.pa_mbuf_chk = pf_check_out;
6542	pha.pa_flags = PFIL_OUT;
6543	pha.pa_rulname = "default-out";
6544	V_pf_ip4_out_hook = pfil_add_hook(&pha);
6545	pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR;
6546	pla.pa_head = V_inet_pfil_head;
6547	pla.pa_hook = V_pf_ip4_out_hook;
6548	ret = pfil_link(&pla);
6549	MPASS(ret == 0);
6550	if (V_pf_filter_local) {
6551		pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR;
6552		pla.pa_head = V_inet_local_pfil_head;
6553		pla.pa_hook = V_pf_ip4_out_hook;
6554		ret = pfil_link(&pla);
6555		MPASS(ret == 0);
6556	}
6557#endif
6558#ifdef INET6
6559	pha.pa_type = PFIL_TYPE_IP6;
6560	pha.pa_mbuf_chk = pf_check6_in;
6561	pha.pa_flags = PFIL_IN;
6562	pha.pa_rulname = "default-in6";
6563	V_pf_ip6_in_hook = pfil_add_hook(&pha);
6564	pla.pa_flags = PFIL_IN | PFIL_HEADPTR | PFIL_HOOKPTR;
6565	pla.pa_head = V_inet6_pfil_head;
6566	pla.pa_hook = V_pf_ip6_in_hook;
6567	ret = pfil_link(&pla);
6568	MPASS(ret == 0);
6569	pha.pa_mbuf_chk = pf_check6_out;
6570	pha.pa_rulname = "default-out6";
6571	pha.pa_flags = PFIL_OUT;
6572	V_pf_ip6_out_hook = pfil_add_hook(&pha);
6573	pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR;
6574	pla.pa_head = V_inet6_pfil_head;
6575	pla.pa_hook = V_pf_ip6_out_hook;
6576	ret = pfil_link(&pla);
6577	MPASS(ret == 0);
6578	if (V_pf_filter_local) {
6579		pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR;
6580		pla.pa_head = V_inet6_local_pfil_head;
6581		pla.pa_hook = V_pf_ip6_out_hook;
6582		ret = pfil_link(&pla);
6583		MPASS(ret == 0);
6584	}
6585#endif
6586
6587	atomic_store_bool(&V_pf_pfil_hooked, true);
6588}
6589
6590static void
6591dehook_pf_eth(void)
6592{
6593
6594	if (!atomic_load_bool(&V_pf_pfil_eth_hooked))
6595		return;
6596
6597	pfil_remove_hook(V_pf_eth_in_hook);
6598	pfil_remove_hook(V_pf_eth_out_hook);
6599
6600	atomic_store_bool(&V_pf_pfil_eth_hooked, false);
6601}
6602
6603static void
6604dehook_pf(void)
6605{
6606
6607	if (!atomic_load_bool(&V_pf_pfil_hooked))
6608		return;
6609
6610#ifdef INET
6611	pfil_remove_hook(V_pf_ip4_in_hook);
6612	pfil_remove_hook(V_pf_ip4_out_hook);
6613#endif
6614#ifdef INET6
6615	pfil_remove_hook(V_pf_ip6_in_hook);
6616	pfil_remove_hook(V_pf_ip6_out_hook);
6617#endif
6618
6619	atomic_store_bool(&V_pf_pfil_hooked, false);
6620}
6621
6622static void
6623pf_load_vnet(void)
6624{
6625	V_pf_tag_z = uma_zcreate("pf tags", sizeof(struct pf_tagname),
6626	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
6627
6628	rm_init_flags(&V_pf_rules_lock, "pf rulesets", RM_RECURSE);
6629	sx_init(&V_pf_ioctl_lock, "pf ioctl");
6630
6631	pf_init_tagset(&V_pf_tags, &pf_rule_tag_hashsize,
6632	    PF_RULE_TAG_HASH_SIZE_DEFAULT);
6633#ifdef ALTQ
6634	pf_init_tagset(&V_pf_qids, &pf_queue_tag_hashsize,
6635	    PF_QUEUE_TAG_HASH_SIZE_DEFAULT);
6636#endif
6637
6638	V_pf_keth = &V_pf_main_keth_anchor.ruleset;
6639
6640	pfattach_vnet();
6641	V_pf_vnet_active = 1;
6642}
6643
6644static int
6645pf_load(void)
6646{
6647	int error;
6648
6649	sx_init(&pf_end_lock, "pf end thread");
6650
6651	pf_mtag_initialize();
6652
6653	pf_dev = make_dev(&pf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, PF_NAME);
6654	if (pf_dev == NULL)
6655		return (ENOMEM);
6656
6657	pf_end_threads = 0;
6658	error = kproc_create(pf_purge_thread, NULL, &pf_purge_proc, 0, 0, "pf purge");
6659	if (error != 0)
6660		return (error);
6661
6662	pfi_initialize();
6663
6664	return (0);
6665}
6666
6667static void
6668pf_unload_vnet(void)
6669{
6670	int ret __diagused;
6671
6672	V_pf_vnet_active = 0;
6673	V_pf_status.running = 0;
6674	dehook_pf();
6675	dehook_pf_eth();
6676
6677	PF_RULES_WLOCK();
6678	pf_syncookies_cleanup();
6679	shutdown_pf();
6680	PF_RULES_WUNLOCK();
6681
6682	/* Make sure we've cleaned up ethernet rules before we continue. */
6683	NET_EPOCH_DRAIN_CALLBACKS();
6684
6685	ret = swi_remove(V_pf_swi_cookie);
6686	MPASS(ret == 0);
6687	ret = intr_event_destroy(V_pf_swi_ie);
6688	MPASS(ret == 0);
6689
6690	pf_unload_vnet_purge();
6691
6692	pf_normalize_cleanup();
6693	PF_RULES_WLOCK();
6694	pfi_cleanup_vnet();
6695	PF_RULES_WUNLOCK();
6696	pfr_cleanup();
6697	pf_osfp_flush();
6698	pf_cleanup();
6699	if (IS_DEFAULT_VNET(curvnet))
6700		pf_mtag_cleanup();
6701
6702	pf_cleanup_tagset(&V_pf_tags);
6703#ifdef ALTQ
6704	pf_cleanup_tagset(&V_pf_qids);
6705#endif
6706	uma_zdestroy(V_pf_tag_z);
6707
6708#ifdef PF_WANT_32_TO_64_COUNTER
6709	PF_RULES_WLOCK();
6710	LIST_REMOVE(V_pf_kifmarker, pfik_allkiflist);
6711
6712	MPASS(LIST_EMPTY(&V_pf_allkiflist));
6713	MPASS(V_pf_allkifcount == 0);
6714
6715	LIST_REMOVE(&V_pf_default_rule, allrulelist);
6716	V_pf_allrulecount--;
6717	LIST_REMOVE(V_pf_rulemarker, allrulelist);
6718
6719	MPASS(LIST_EMPTY(&V_pf_allrulelist));
6720	MPASS(V_pf_allrulecount == 0);
6721
6722	PF_RULES_WUNLOCK();
6723
6724	free(V_pf_kifmarker, PFI_MTYPE);
6725	free(V_pf_rulemarker, M_PFRULE);
6726#endif
6727
6728	/* Free counters last as we updated them during shutdown. */
6729	pf_counter_u64_deinit(&V_pf_default_rule.evaluations);
6730	for (int i = 0; i < 2; i++) {
6731		pf_counter_u64_deinit(&V_pf_default_rule.packets[i]);
6732		pf_counter_u64_deinit(&V_pf_default_rule.bytes[i]);
6733	}
6734	counter_u64_free(V_pf_default_rule.states_cur);
6735	counter_u64_free(V_pf_default_rule.states_tot);
6736	counter_u64_free(V_pf_default_rule.src_nodes);
6737	uma_zfree_pcpu(pf_timestamp_pcpu_zone, V_pf_default_rule.timestamp);
6738
6739	for (int i = 0; i < PFRES_MAX; i++)
6740		counter_u64_free(V_pf_status.counters[i]);
6741	for (int i = 0; i < KLCNT_MAX; i++)
6742		counter_u64_free(V_pf_status.lcounters[i]);
6743	for (int i = 0; i < FCNT_MAX; i++)
6744		pf_counter_u64_deinit(&V_pf_status.fcounters[i]);
6745	for (int i = 0; i < SCNT_MAX; i++)
6746		counter_u64_free(V_pf_status.scounters[i]);
6747
6748	rm_destroy(&V_pf_rules_lock);
6749	sx_destroy(&V_pf_ioctl_lock);
6750}
6751
6752static void
6753pf_unload(void)
6754{
6755
6756	sx_xlock(&pf_end_lock);
6757	pf_end_threads = 1;
6758	while (pf_end_threads < 2) {
6759		wakeup_one(pf_purge_thread);
6760		sx_sleep(pf_purge_proc, &pf_end_lock, 0, "pftmo", 0);
6761	}
6762	sx_xunlock(&pf_end_lock);
6763
6764	pf_nl_unregister();
6765
6766	if (pf_dev != NULL)
6767		destroy_dev(pf_dev);
6768
6769	pfi_cleanup();
6770
6771	sx_destroy(&pf_end_lock);
6772}
6773
6774static void
6775vnet_pf_init(void *unused __unused)
6776{
6777
6778	pf_load_vnet();
6779}
6780VNET_SYSINIT(vnet_pf_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_THIRD,
6781    vnet_pf_init, NULL);
6782
6783static void
6784vnet_pf_uninit(const void *unused __unused)
6785{
6786
6787	pf_unload_vnet();
6788}
6789SYSUNINIT(pf_unload, SI_SUB_PROTO_FIREWALL, SI_ORDER_SECOND, pf_unload, NULL);
6790VNET_SYSUNINIT(vnet_pf_uninit, SI_SUB_PROTO_FIREWALL, SI_ORDER_THIRD,
6791    vnet_pf_uninit, NULL);
6792
6793static int
6794pf_modevent(module_t mod, int type, void *data)
6795{
6796	int error = 0;
6797
6798	switch(type) {
6799	case MOD_LOAD:
6800		error = pf_load();
6801		pf_nl_register();
6802		break;
6803	case MOD_UNLOAD:
6804		/* Handled in SYSUNINIT(pf_unload) to ensure it's done after
6805		 * the vnet_pf_uninit()s */
6806		break;
6807	default:
6808		error = EINVAL;
6809		break;
6810	}
6811
6812	return (error);
6813}
6814
6815static moduledata_t pf_mod = {
6816	"pf",
6817	pf_modevent,
6818	0
6819};
6820
6821DECLARE_MODULE(pf, pf_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_SECOND);
6822MODULE_DEPEND(pf, netlink, 1, 1, 1);
6823MODULE_VERSION(pf, PF_MODVER);
6824