1244769Sglebius/*-
2126258Smlaier * Copyright (c) 2001 Daniel Hartmeier
3223637Sbz * Copyright (c) 2002 - 2008 Henning Brauer
4244769Sglebius * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org>
5126258Smlaier * All rights reserved.
6126258Smlaier *
7126258Smlaier * Redistribution and use in source and binary forms, with or without
8126258Smlaier * modification, are permitted provided that the following conditions
9126258Smlaier * are met:
10126258Smlaier *
11126258Smlaier *    - Redistributions of source code must retain the above copyright
12126258Smlaier *      notice, this list of conditions and the following disclaimer.
13126258Smlaier *    - Redistributions in binary form must reproduce the above
14126258Smlaier *      copyright notice, this list of conditions and the following
15126258Smlaier *      disclaimer in the documentation and/or other materials provided
16126258Smlaier *      with the distribution.
17126258Smlaier *
18126258Smlaier * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19126258Smlaier * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20126258Smlaier * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21126258Smlaier * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22126258Smlaier * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23126258Smlaier * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24126258Smlaier * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25126258Smlaier * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26126258Smlaier * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27126258Smlaier * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28126258Smlaier * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29126258Smlaier * POSSIBILITY OF SUCH DAMAGE.
30126258Smlaier *
31126258Smlaier * Effort sponsored in part by the Defense Advanced Research Projects
32126258Smlaier * Agency (DARPA) and Air Force Research Laboratory, Air Force
33126258Smlaier * Materiel Command, USAF, under agreement number F30602-01-2-0537.
34126258Smlaier *
35244769Sglebius *	$OpenBSD: pf.c,v 1.634 2009/02/27 12:37:45 henning Exp $
36126258Smlaier */
37126258Smlaier
38240233Sglebius#include <sys/cdefs.h>
39171168Smlaier__FBSDID("$FreeBSD: stable/10/sys/netpfil/pf/pf.c 335252 2018-06-16 11:42:27Z kp $");
40126261Smlaier
41240233Sglebius#include "opt_inet.h"
42240233Sglebius#include "opt_inet6.h"
43126261Smlaier#include "opt_bpf.h"
44126261Smlaier#include "opt_pf.h"
45153110Sru
46126258Smlaier#include <sys/param.h>
47240233Sglebius#include <sys/bus.h>
48240233Sglebius#include <sys/endian.h>
49240233Sglebius#include <sys/hash.h>
50240233Sglebius#include <sys/interrupt.h>
51240233Sglebius#include <sys/kernel.h>
52240233Sglebius#include <sys/kthread.h>
53240233Sglebius#include <sys/limits.h>
54126258Smlaier#include <sys/mbuf.h>
55240233Sglebius#include <sys/md5.h>
56240233Sglebius#include <sys/random.h>
57240233Sglebius#include <sys/refcount.h>
58126258Smlaier#include <sys/socket.h>
59126261Smlaier#include <sys/sysctl.h>
60240233Sglebius#include <sys/taskqueue.h>
61240233Sglebius#include <sys/ucred.h>
62126258Smlaier
63126258Smlaier#include <net/if.h>
64126258Smlaier#include <net/if_types.h>
65126258Smlaier#include <net/route.h>
66171168Smlaier#include <net/radix_mpath.h>
67240233Sglebius#include <net/vnet.h>
68126258Smlaier
69240233Sglebius#include <net/pfvar.h>
70240233Sglebius#include <net/if_pflog.h>
71240233Sglebius#include <net/if_pfsync.h>
72240233Sglebius
73240233Sglebius#include <netinet/in_pcb.h>
74126258Smlaier#include <netinet/in_var.h>
75126258Smlaier#include <netinet/ip.h>
76240233Sglebius#include <netinet/ip_fw.h>
77240233Sglebius#include <netinet/ip_icmp.h>
78240233Sglebius#include <netinet/icmp_var.h>
79126258Smlaier#include <netinet/ip_var.h>
80126258Smlaier#include <netinet/tcp.h>
81240233Sglebius#include <netinet/tcp_fsm.h>
82126258Smlaier#include <netinet/tcp_seq.h>
83126258Smlaier#include <netinet/tcp_timer.h>
84126258Smlaier#include <netinet/tcp_var.h>
85240233Sglebius#include <netinet/udp.h>
86126258Smlaier#include <netinet/udp_var.h>
87126258Smlaier
88240494Sglebius#include <netpfil/ipfw/ip_fw_private.h> /* XXX: only for DIR_IN/DIR_OUT */
89240494Sglebius
90126258Smlaier#ifdef INET6
91126258Smlaier#include <netinet/ip6.h>
92126258Smlaier#include <netinet/icmp6.h>
93126258Smlaier#include <netinet6/nd6.h>
94126261Smlaier#include <netinet6/ip6_var.h>
95126261Smlaier#include <netinet6/in6_pcb.h>
96126258Smlaier#endif /* INET6 */
97126258Smlaier
98126261Smlaier#include <machine/in_cksum.h>
99163606Srwatson#include <security/mac/mac_framework.h>
100126258Smlaier
101223637Sbz#define	DPFPRINTF(n, x)	if (V_pf_status.debug >= (n)) printf x
102126258Smlaier
103126258Smlaier/*
104126258Smlaier * Global variables
105126258Smlaier */
106126258Smlaier
107223637Sbz/* state tables */
108223637SbzVNET_DEFINE(struct pf_altqqueue,	 pf_altqs[2]);
109223637SbzVNET_DEFINE(struct pf_palist,		 pf_pabuf);
110223637SbzVNET_DEFINE(struct pf_altqqueue *,	 pf_altqs_active);
111223637SbzVNET_DEFINE(struct pf_altqqueue *,	 pf_altqs_inactive);
112270574SglebiusVNET_DEFINE(struct pf_kstatus,		 pf_status);
113223637Sbz
114223637SbzVNET_DEFINE(u_int32_t,			 ticket_altqs_active);
115223637SbzVNET_DEFINE(u_int32_t,			 ticket_altqs_inactive);
116223637SbzVNET_DEFINE(int,			 altqs_inactive_open);
117223637SbzVNET_DEFINE(u_int32_t,			 ticket_pabuf);
118223637Sbz
119223637SbzVNET_DEFINE(MD5_CTX,			 pf_tcp_secret_ctx);
120223637Sbz#define	V_pf_tcp_secret_ctx		 VNET(pf_tcp_secret_ctx)
121223637SbzVNET_DEFINE(u_char,			 pf_tcp_secret[16]);
122223637Sbz#define	V_pf_tcp_secret			 VNET(pf_tcp_secret)
123223637SbzVNET_DEFINE(int,			 pf_tcp_secret_init);
124223637Sbz#define	V_pf_tcp_secret_init		 VNET(pf_tcp_secret_init)
125223637SbzVNET_DEFINE(int,			 pf_tcp_iss_off);
126223637Sbz#define	V_pf_tcp_iss_off		 VNET(pf_tcp_iss_off)
127223637Sbz
128240233Sglebius/*
129240233Sglebius * Queue for pf_intr() sends.
130240233Sglebius */
131240233Sglebiusstatic MALLOC_DEFINE(M_PFTEMP, "pf_temp", "pf(4) temporary allocations");
132240233Sglebiusstruct pf_send_entry {
133240233Sglebius	STAILQ_ENTRY(pf_send_entry)	pfse_next;
134240233Sglebius	struct mbuf			*pfse_m;
135240233Sglebius	enum {
136240233Sglebius		PFSE_IP,
137240233Sglebius		PFSE_IP6,
138240233Sglebius		PFSE_ICMP,
139240233Sglebius		PFSE_ICMP6,
140240233Sglebius	}				pfse_type;
141240233Sglebius	union {
142240233Sglebius		struct route		ro;
143240233Sglebius		struct {
144240233Sglebius			int		type;
145240233Sglebius			int		code;
146240233Sglebius			int		mtu;
147240233Sglebius		} icmpopts;
148240233Sglebius	} u;
149240233Sglebius#define	pfse_ro		u.ro
150240233Sglebius#define	pfse_icmp_type	u.icmpopts.type
151240233Sglebius#define	pfse_icmp_code	u.icmpopts.code
152240233Sglebius#define	pfse_icmp_mtu	u.icmpopts.mtu
153240233Sglebius};
154223637Sbz
155240233SglebiusSTAILQ_HEAD(pf_send_head, pf_send_entry);
156240233Sglebiusstatic VNET_DEFINE(struct pf_send_head, pf_sendqueue);
157240233Sglebius#define	V_pf_sendqueue	VNET(pf_sendqueue)
158126258Smlaier
159240233Sglebiusstatic struct mtx pf_sendqueue_mtx;
160240233Sglebius#define	PF_SENDQ_LOCK()		mtx_lock(&pf_sendqueue_mtx)
161240233Sglebius#define	PF_SENDQ_UNLOCK()	mtx_unlock(&pf_sendqueue_mtx)
162126258Smlaier
163240233Sglebius/*
164240811Sglebius * Queue for pf_overload_task() tasks.
165240233Sglebius */
166240811Sglebiusstruct pf_overload_entry {
167240811Sglebius	SLIST_ENTRY(pf_overload_entry)	next;
168240233Sglebius	struct pf_addr  		addr;
169240233Sglebius	sa_family_t			af;
170240233Sglebius	uint8_t				dir;
171240811Sglebius	struct pf_rule  		*rule;
172240233Sglebius};
173223637Sbz
174240811SglebiusSLIST_HEAD(pf_overload_head, pf_overload_entry);
175240811Sglebiusstatic VNET_DEFINE(struct pf_overload_head, pf_overloadqueue);
176240811Sglebius#define V_pf_overloadqueue	VNET(pf_overloadqueue)
177240811Sglebiusstatic VNET_DEFINE(struct task, pf_overloadtask);
178240811Sglebius#define	V_pf_overloadtask	VNET(pf_overloadtask)
179126261Smlaier
180240811Sglebiusstatic struct mtx pf_overloadqueue_mtx;
181240811Sglebius#define	PF_OVERLOADQ_LOCK()	mtx_lock(&pf_overloadqueue_mtx)
182240811Sglebius#define	PF_OVERLOADQ_UNLOCK()	mtx_unlock(&pf_overloadqueue_mtx)
183126258Smlaier
184240233SglebiusVNET_DEFINE(struct pf_rulequeue, pf_unlinked_rules);
185240233Sglebiusstruct mtx pf_unlnkdrules_mtx;
186240233Sglebius
187240233Sglebiusstatic VNET_DEFINE(uma_zone_t,	pf_sources_z);
188240233Sglebius#define	V_pf_sources_z	VNET(pf_sources_z)
189265008Smmuma_zone_t		pf_mtag_z;
190240233SglebiusVNET_DEFINE(uma_zone_t,	 pf_state_z);
191240233SglebiusVNET_DEFINE(uma_zone_t,	 pf_state_key_z);
192240233Sglebius
193240233SglebiusVNET_DEFINE(uint64_t, pf_stateid[MAXCPU]);
194240233Sglebius#define	PFID_CPUBITS	8
195240233Sglebius#define	PFID_CPUSHIFT	(sizeof(uint64_t) * NBBY - PFID_CPUBITS)
196240233Sglebius#define	PFID_CPUMASK	((uint64_t)((1 << PFID_CPUBITS) - 1) <<	PFID_CPUSHIFT)
197240233Sglebius#define	PFID_MAXID	(~PFID_CPUMASK)
198283303SjhbCTASSERT((1 << PFID_CPUBITS) >= MAXCPU);
199240233Sglebius
200240233Sglebiusstatic void		 pf_src_tree_remove_state(struct pf_state *);
201240233Sglebiusstatic void		 pf_init_threshold(struct pf_threshold *, u_int32_t,
202145836Smlaier			    u_int32_t);
203240233Sglebiusstatic void		 pf_add_threshold(struct pf_threshold *);
204240233Sglebiusstatic int		 pf_check_threshold(struct pf_threshold *);
205145836Smlaier
206289703Skpstatic void		 pf_change_ap(struct mbuf *, struct pf_addr *, u_int16_t *,
207126258Smlaier			    u_int16_t *, u_int16_t *, struct pf_addr *,
208126258Smlaier			    u_int16_t, u_int8_t, sa_family_t);
209240233Sglebiusstatic int		 pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *,
210171168Smlaier			    struct tcphdr *, struct pf_state_peer *);
211240233Sglebiusstatic void		 pf_change_icmp(struct pf_addr *, u_int16_t *,
212126258Smlaier			    struct pf_addr *, struct pf_addr *, u_int16_t,
213126258Smlaier			    u_int16_t *, u_int16_t *, u_int16_t *,
214126258Smlaier			    u_int16_t *, u_int8_t, sa_family_t);
215240233Sglebiusstatic void		 pf_send_tcp(struct mbuf *,
216162238Scsjp			    const struct pf_rule *, sa_family_t,
217126258Smlaier			    const struct pf_addr *, const struct pf_addr *,
218126258Smlaier			    u_int16_t, u_int16_t, u_int32_t, u_int32_t,
219145836Smlaier			    u_int8_t, u_int16_t, u_int16_t, u_int8_t, int,
220240233Sglebius			    u_int16_t, struct ifnet *);
221223637Sbzstatic void		 pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
222126258Smlaier			    sa_family_t, struct pf_rule *);
223240233Sglebiusstatic void		 pf_detach_state(struct pf_state *);
224240233Sglebiusstatic int		 pf_state_key_attach(struct pf_state_key *,
225240233Sglebius			    struct pf_state_key *, struct pf_state *);
226240233Sglebiusstatic void		 pf_state_key_detach(struct pf_state *, int);
227240233Sglebiusstatic int		 pf_state_key_ctor(void *, int, void *, int);
228240233Sglebiusstatic u_int32_t	 pf_tcp_iss(struct pf_pdesc *);
229240233Sglebiusstatic int		 pf_test_rule(struct pf_rule **, struct pf_state **,
230130613Smlaier			    int, struct pfi_kif *, struct mbuf *, int,
231240233Sglebius			    struct pf_pdesc *, struct pf_rule **,
232240233Sglebius			    struct pf_ruleset **, struct inpcb *);
233240233Sglebiusstatic int		 pf_create_state(struct pf_rule *, struct pf_rule *,
234223637Sbz			    struct pf_rule *, struct pf_pdesc *,
235223637Sbz			    struct pf_src_node *, struct pf_state_key *,
236223637Sbz			    struct pf_state_key *, struct mbuf *, int,
237223637Sbz			    u_int16_t, u_int16_t, int *, struct pfi_kif *,
238223637Sbz			    struct pf_state **, int, u_int16_t, u_int16_t,
239223637Sbz			    int);
240240233Sglebiusstatic int		 pf_test_fragment(struct pf_rule **, int,
241130613Smlaier			    struct pfi_kif *, struct mbuf *, void *,
242126258Smlaier			    struct pf_pdesc *, struct pf_rule **,
243126258Smlaier			    struct pf_ruleset **);
244240233Sglebiusstatic int		 pf_tcp_track_full(struct pf_state_peer *,
245200930Sdelphij			    struct pf_state_peer *, struct pf_state **,
246200930Sdelphij			    struct pfi_kif *, struct mbuf *, int,
247200930Sdelphij			    struct pf_pdesc *, u_short *, int *);
248240233Sglebiusstatic int		 pf_tcp_track_sloppy(struct pf_state_peer *,
249200930Sdelphij			    struct pf_state_peer *, struct pf_state **,
250200930Sdelphij			    struct pf_pdesc *, u_short *);
251240233Sglebiusstatic int		 pf_test_state_tcp(struct pf_state **, int,
252130613Smlaier			    struct pfi_kif *, struct mbuf *, int,
253126258Smlaier			    void *, struct pf_pdesc *, u_short *);
254240233Sglebiusstatic int		 pf_test_state_udp(struct pf_state **, int,
255130613Smlaier			    struct pfi_kif *, struct mbuf *, int,
256126258Smlaier			    void *, struct pf_pdesc *);
257240233Sglebiusstatic int		 pf_test_state_icmp(struct pf_state **, int,
258130613Smlaier			    struct pfi_kif *, struct mbuf *, int,
259145836Smlaier			    void *, struct pf_pdesc *, u_short *);
260240233Sglebiusstatic int		 pf_test_state_other(struct pf_state **, int,
261223637Sbz			    struct pfi_kif *, struct mbuf *, struct pf_pdesc *);
262240233Sglebiusstatic u_int8_t		 pf_get_wscale(struct mbuf *, int, u_int16_t,
263126258Smlaier			    sa_family_t);
264240233Sglebiusstatic u_int16_t	 pf_get_mss(struct mbuf *, int, u_int16_t,
265126258Smlaier			    sa_family_t);
266240233Sglebiusstatic u_int16_t	 pf_calc_mss(struct pf_addr *, sa_family_t,
267231852Sbz				int, u_int16_t);
268240233Sglebiusstatic int		 pf_check_proto_cksum(struct mbuf *, int, int,
269126258Smlaier			    u_int8_t, sa_family_t);
270240233Sglebiusstatic void		 pf_print_state_parts(struct pf_state *,
271223637Sbz			    struct pf_state_key *, struct pf_state_key *);
272240233Sglebiusstatic int		 pf_addr_wrap_neq(struct pf_addr_wrap *,
273126258Smlaier			    struct pf_addr_wrap *);
274240233Sglebiusstatic struct pf_state	*pf_find_state(struct pfi_kif *,
275240233Sglebius			    struct pf_state_key_cmp *, u_int);
276240233Sglebiusstatic int		 pf_src_connlimit(struct pf_state **);
277264454Smmstatic void		 pf_overload_task(void *v, int pending);
278240233Sglebiusstatic int		 pf_insert_src_node(struct pf_src_node **,
279240233Sglebius			    struct pf_rule *, struct pf_addr *, sa_family_t);
280241039Sglebiusstatic u_int		 pf_purge_expired_states(u_int, int);
281240233Sglebiusstatic void		 pf_purge_unlinked_rules(void);
282265008Smmstatic int		 pf_mtag_uminit(void *, int, int);
283240233Sglebiusstatic void		 pf_mtag_free(struct m_tag *);
284240233Sglebius#ifdef INET
285240233Sglebiusstatic void		 pf_route(struct mbuf **, struct pf_rule *, int,
286240233Sglebius			    struct ifnet *, struct pf_state *,
287335252Skp			    struct pf_pdesc *, struct inpcb *);
288240233Sglebius#endif /* INET */
289240233Sglebius#ifdef INET6
290240233Sglebiusstatic void		 pf_change_a6(struct pf_addr *, u_int16_t *,
291240233Sglebius			    struct pf_addr *, u_int8_t);
292240233Sglebiusstatic void		 pf_route6(struct mbuf **, struct pf_rule *, int,
293240233Sglebius			    struct ifnet *, struct pf_state *,
294335252Skp			    struct pf_pdesc *, struct inpcb *);
295240233Sglebius#endif /* INET6 */
296126258Smlaier
297126261Smlaierint in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len);
298126258Smlaier
299223637SbzVNET_DECLARE(int, pf_end_threads);
300171168Smlaier
301240233SglebiusVNET_DEFINE(struct pf_limit, pf_limits[PF_LIMIT_MAX]);
302145836Smlaier
303240233Sglebius#define	PACKET_LOOPED(pd)	((pd)->pf_mtag &&			\
304240233Sglebius				 (pd)->pf_mtag->flags & PF_PACKET_LOOPED)
305126258Smlaier
306240233Sglebius#define	STATE_LOOKUP(i, k, d, s, pd)					\
307126258Smlaier	do {								\
308240233Sglebius		(s) = pf_find_state((i), (k), (d));			\
309261018Sglebius		if ((s) == NULL)					\
310126258Smlaier			return (PF_DROP);				\
311240233Sglebius		if (PACKET_LOOPED(pd))					\
312126258Smlaier			return (PF_PASS);				\
313240233Sglebius		if ((d) == PF_OUT &&					\
314223637Sbz		    (((s)->rule.ptr->rt == PF_ROUTETO &&		\
315223637Sbz		    (s)->rule.ptr->direction == PF_OUT) ||		\
316223637Sbz		    ((s)->rule.ptr->rt == PF_REPLYTO &&			\
317223637Sbz		    (s)->rule.ptr->direction == PF_IN)) &&		\
318223637Sbz		    (s)->rt_kif != NULL &&				\
319240233Sglebius		    (s)->rt_kif != (i))					\
320223637Sbz			return (PF_PASS);				\
321126258Smlaier	} while (0)
322126258Smlaier
323223637Sbz#define	BOUND_IFACE(r, k) \
324223637Sbz	((r)->rule_flag & PFRULE_IFBOUND) ? (k) : V_pfi_all
325126258Smlaier
326263029Sglebius#define	STATE_INC_COUNTERS(s)						\
327263029Sglebius	do {								\
328263029Sglebius		counter_u64_add(s->rule.ptr->states_cur, 1);		\
329263029Sglebius		counter_u64_add(s->rule.ptr->states_tot, 1);		\
330263029Sglebius		if (s->anchor.ptr != NULL) {				\
331263029Sglebius			counter_u64_add(s->anchor.ptr->states_cur, 1);	\
332263029Sglebius			counter_u64_add(s->anchor.ptr->states_tot, 1);	\
333263029Sglebius		}							\
334263029Sglebius		if (s->nat_rule.ptr != NULL) {				\
335263029Sglebius			counter_u64_add(s->nat_rule.ptr->states_cur, 1);\
336263029Sglebius			counter_u64_add(s->nat_rule.ptr->states_tot, 1);\
337263029Sglebius		}							\
338145836Smlaier	} while (0)
339145836Smlaier
340263029Sglebius#define	STATE_DEC_COUNTERS(s)						\
341263029Sglebius	do {								\
342263029Sglebius		if (s->nat_rule.ptr != NULL)				\
343263029Sglebius			counter_u64_add(s->nat_rule.ptr->states_cur, -1);\
344263029Sglebius		if (s->anchor.ptr != NULL)				\
345263029Sglebius			counter_u64_add(s->anchor.ptr->states_cur, -1);	\
346263029Sglebius		counter_u64_add(s->rule.ptr->states_cur, -1);		\
347145836Smlaier	} while (0)
348145836Smlaier
349240233Sglebiusstatic MALLOC_DEFINE(M_PFHASH, "pf_hash", "pf(4) hash header structures");
350240233SglebiusVNET_DEFINE(struct pf_keyhash *, pf_keyhash);
351240233SglebiusVNET_DEFINE(struct pf_idhash *, pf_idhash);
352240233SglebiusVNET_DEFINE(struct pf_srchash *, pf_srchash);
353223637Sbz
354240233SglebiusSYSCTL_NODE(_net, OID_AUTO, pf, CTLFLAG_RW, 0, "pf(4)");
355223637Sbz
356273736Shselaskyu_long	pf_hashmask;
357273736Shselaskyu_long	pf_srchashmask;
358273736Shselaskystatic u_long	pf_hashsize;
359273736Shselaskystatic u_long	pf_srchashsize;
360130613Smlaier
361273736ShselaskySYSCTL_ULONG(_net_pf, OID_AUTO, states_hashsize, CTLFLAG_RDTUN,
362273736Shselasky    &pf_hashsize, 0, "Size of pf(4) states hashtable");
363273736ShselaskySYSCTL_ULONG(_net_pf, OID_AUTO, source_nodes_hashsize, CTLFLAG_RDTUN,
364273736Shselasky    &pf_srchashsize, 0, "Size of pf(4) source nodes hashtable");
365171168Smlaier
366240233SglebiusVNET_DEFINE(void *, pf_swi_cookie);
367130613Smlaier
368240233SglebiusVNET_DEFINE(uint32_t, pf_hashseed);
369240233Sglebius#define	V_pf_hashseed	VNET(pf_hashseed)
370240233Sglebius
371284569Skpint
372284569Skppf_addr_cmp(struct pf_addr *a, struct pf_addr *b, sa_family_t af)
373284569Skp{
374284569Skp
375284569Skp	switch (af) {
376284569Skp#ifdef INET
377284569Skp	case AF_INET:
378284569Skp		if (a->addr32[0] > b->addr32[0])
379284569Skp			return (1);
380284569Skp		if (a->addr32[0] < b->addr32[0])
381284569Skp			return (-1);
382284569Skp		break;
383284569Skp#endif /* INET */
384284569Skp#ifdef INET6
385284569Skp	case AF_INET6:
386284569Skp		if (a->addr32[3] > b->addr32[3])
387284569Skp			return (1);
388284569Skp		if (a->addr32[3] < b->addr32[3])
389284569Skp			return (-1);
390284569Skp		if (a->addr32[2] > b->addr32[2])
391284569Skp			return (1);
392284569Skp		if (a->addr32[2] < b->addr32[2])
393284569Skp			return (-1);
394284569Skp		if (a->addr32[1] > b->addr32[1])
395284569Skp			return (1);
396284569Skp		if (a->addr32[1] < b->addr32[1])
397284569Skp			return (-1);
398284569Skp		if (a->addr32[0] > b->addr32[0])
399284569Skp			return (1);
400284569Skp		if (a->addr32[0] < b->addr32[0])
401284569Skp			return (-1);
402284569Skp		break;
403284569Skp#endif /* INET6 */
404284569Skp	default:
405284569Skp		panic("%s: unknown address family %u", __func__, af);
406284569Skp	}
407284569Skp	return (0);
408284569Skp}
409284569Skp
410240233Sglebiusstatic __inline uint32_t
411240233Sglebiuspf_hashkey(struct pf_state_key *sk)
412126258Smlaier{
413240233Sglebius	uint32_t h;
414126258Smlaier
415274486Sgnn	h = murmur3_aligned_32((uint32_t *)sk,
416274486Sgnn			       sizeof(struct pf_state_key_cmp),
417274486Sgnn			       V_pf_hashseed);
418240233Sglebius
419273736Shselasky	return (h & pf_hashmask);
420130613Smlaier}
421130613Smlaier
422240736Sglebiusstatic __inline uint32_t
423240736Sglebiuspf_hashsrc(struct pf_addr *addr, sa_family_t af)
424240736Sglebius{
425240736Sglebius	uint32_t h;
426240736Sglebius
427240736Sglebius	switch (af) {
428240736Sglebius	case AF_INET:
429274486Sgnn		h = murmur3_aligned_32((uint32_t *)&addr->v4,
430274486Sgnn				       sizeof(addr->v4), V_pf_hashseed);
431240736Sglebius		break;
432240736Sglebius	case AF_INET6:
433274486Sgnn		h = murmur3_aligned_32((uint32_t *)&addr->v6,
434274486Sgnn				       sizeof(addr->v6), V_pf_hashseed);
435240736Sglebius		break;
436240736Sglebius	default:
437240736Sglebius		panic("%s: unknown address family %u", __func__, af);
438240736Sglebius	}
439240736Sglebius
440273736Shselasky	return (h & pf_srchashmask);
441240736Sglebius}
442240736Sglebius
443298091Sloos#ifdef ALTQ
444298091Sloosstatic int
445298091Sloospf_state_hash(struct pf_state *s)
446298091Sloos{
447298091Sloos	u_int32_t hv = (intptr_t)s / sizeof(*s);
448298091Sloos
449298091Sloos	hv ^= crc32(&s->src, sizeof(s->src));
450298091Sloos	hv ^= crc32(&s->dst, sizeof(s->dst));
451298091Sloos	if (hv == 0)
452298091Sloos		hv = 1;
453298091Sloos	return (hv);
454298091Sloos}
455298091Sloos#endif
456298091Sloos
457126258Smlaier#ifdef INET6
458126258Smlaiervoid
459126258Smlaierpf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
460126258Smlaier{
461126258Smlaier	switch (af) {
462126258Smlaier#ifdef INET
463126258Smlaier	case AF_INET:
464126258Smlaier		dst->addr32[0] = src->addr32[0];
465126258Smlaier		break;
466126258Smlaier#endif /* INET */
467126258Smlaier	case AF_INET6:
468126258Smlaier		dst->addr32[0] = src->addr32[0];
469126258Smlaier		dst->addr32[1] = src->addr32[1];
470126258Smlaier		dst->addr32[2] = src->addr32[2];
471126258Smlaier		dst->addr32[3] = src->addr32[3];
472126258Smlaier		break;
473126258Smlaier	}
474126258Smlaier}
475145836Smlaier#endif /* INET6 */
476126258Smlaier
477240233Sglebiusstatic void
478145836Smlaierpf_init_threshold(struct pf_threshold *threshold,
479145836Smlaier    u_int32_t limit, u_int32_t seconds)
480145836Smlaier{
481145836Smlaier	threshold->limit = limit * PF_THRESHOLD_MULT;
482145836Smlaier	threshold->seconds = seconds;
483145836Smlaier	threshold->count = 0;
484240233Sglebius	threshold->last = time_uptime;
485145836Smlaier}
486145836Smlaier
487240233Sglebiusstatic void
488145836Smlaierpf_add_threshold(struct pf_threshold *threshold)
489145836Smlaier{
490240233Sglebius	u_int32_t t = time_uptime, diff = t - threshold->last;
491145836Smlaier
492145836Smlaier	if (diff >= threshold->seconds)
493145836Smlaier		threshold->count = 0;
494145836Smlaier	else
495145836Smlaier		threshold->count -= threshold->count * diff /
496145836Smlaier		    threshold->seconds;
497145836Smlaier	threshold->count += PF_THRESHOLD_MULT;
498145836Smlaier	threshold->last = t;
499145836Smlaier}
500145836Smlaier
501240233Sglebiusstatic int
502145836Smlaierpf_check_threshold(struct pf_threshold *threshold)
503145836Smlaier{
504145836Smlaier	return (threshold->count > threshold->limit);
505145836Smlaier}
506145836Smlaier
507240233Sglebiusstatic int
508145836Smlaierpf_src_connlimit(struct pf_state **state)
509145836Smlaier{
510240811Sglebius	struct pf_overload_entry *pfoe;
511145836Smlaier	int bad = 0;
512145836Smlaier
513240233Sglebius	PF_STATE_LOCK_ASSERT(*state);
514240233Sglebius
515145836Smlaier	(*state)->src_node->conn++;
516171168Smlaier	(*state)->src.tcp_est = 1;
517145836Smlaier	pf_add_threshold(&(*state)->src_node->conn_rate);
518145836Smlaier
519145836Smlaier	if ((*state)->rule.ptr->max_src_conn &&
520145836Smlaier	    (*state)->rule.ptr->max_src_conn <
521145836Smlaier	    (*state)->src_node->conn) {
522270574Sglebius		counter_u64_add(V_pf_status.lcounters[LCNT_SRCCONN], 1);
523145836Smlaier		bad++;
524145836Smlaier	}
525145836Smlaier
526145836Smlaier	if ((*state)->rule.ptr->max_src_conn_rate.limit &&
527145836Smlaier	    pf_check_threshold(&(*state)->src_node->conn_rate)) {
528270574Sglebius		counter_u64_add(V_pf_status.lcounters[LCNT_SRCCONNRATE], 1);
529145836Smlaier		bad++;
530145836Smlaier	}
531145836Smlaier
532145836Smlaier	if (!bad)
533145836Smlaier		return (0);
534145836Smlaier
535240233Sglebius	/* Kill this state. */
536240233Sglebius	(*state)->timeout = PFTM_PURGE;
537240233Sglebius	(*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
538145836Smlaier
539240233Sglebius	if ((*state)->rule.ptr->overload_tbl == NULL)
540240233Sglebius		return (1);
541145836Smlaier
542240811Sglebius	/* Schedule overloading and flushing task. */
543240811Sglebius	pfoe = malloc(sizeof(*pfoe), M_PFTEMP, M_NOWAIT);
544240811Sglebius	if (pfoe == NULL)
545240233Sglebius		return (1);	/* too bad :( */
546240233Sglebius
547240811Sglebius	bcopy(&(*state)->src_node->addr, &pfoe->addr, sizeof(pfoe->addr));
548240811Sglebius	pfoe->af = (*state)->key[PF_SK_WIRE]->af;
549240811Sglebius	pfoe->rule = (*state)->rule.ptr;
550240811Sglebius	pfoe->dir = (*state)->direction;
551240811Sglebius	PF_OVERLOADQ_LOCK();
552240811Sglebius	SLIST_INSERT_HEAD(&V_pf_overloadqueue, pfoe, next);
553240811Sglebius	PF_OVERLOADQ_UNLOCK();
554240811Sglebius	taskqueue_enqueue(taskqueue_swi, &V_pf_overloadtask);
555240233Sglebius
556240233Sglebius	return (1);
557240233Sglebius}
558240233Sglebius
559240233Sglebiusstatic void
560264454Smmpf_overload_task(void *v, int pending)
561240233Sglebius{
562240811Sglebius	struct pf_overload_head queue;
563240811Sglebius	struct pfr_addr p;
564240811Sglebius	struct pf_overload_entry *pfoe, *pfoe1;
565240233Sglebius	uint32_t killed = 0;
566240233Sglebius
567264454Smm	CURVNET_SET((struct vnet *)v);
568264454Smm
569240811Sglebius	PF_OVERLOADQ_LOCK();
570264454Smm	queue = V_pf_overloadqueue;
571264454Smm	SLIST_INIT(&V_pf_overloadqueue);
572240811Sglebius	PF_OVERLOADQ_UNLOCK();
573240233Sglebius
574240811Sglebius	bzero(&p, sizeof(p));
575240811Sglebius	SLIST_FOREACH(pfoe, &queue, next) {
576270574Sglebius		counter_u64_add(V_pf_status.lcounters[LCNT_OVERLOAD_TABLE], 1);
577240811Sglebius		if (V_pf_status.debug >= PF_DEBUG_MISC) {
578240811Sglebius			printf("%s: blocking address ", __func__);
579240811Sglebius			pf_print_host(&pfoe->addr, 0, pfoe->af);
580240811Sglebius			printf("\n");
581240811Sglebius		}
582240233Sglebius
583240811Sglebius		p.pfra_af = pfoe->af;
584240811Sglebius		switch (pfoe->af) {
585240811Sglebius#ifdef INET
586240811Sglebius		case AF_INET:
587240811Sglebius			p.pfra_net = 32;
588240811Sglebius			p.pfra_ip4addr = pfoe->addr.v4;
589240811Sglebius			break;
590240811Sglebius#endif
591240811Sglebius#ifdef INET6
592240811Sglebius		case AF_INET6:
593240811Sglebius			p.pfra_net = 128;
594240811Sglebius			p.pfra_ip6addr = pfoe->addr.v6;
595240811Sglebius			break;
596240811Sglebius#endif
597240811Sglebius		}
598240811Sglebius
599240811Sglebius		PF_RULES_WLOCK();
600240811Sglebius		pfr_insert_kentry(pfoe->rule->overload_tbl, &p, time_second);
601240811Sglebius		PF_RULES_WUNLOCK();
602240811Sglebius	}
603240811Sglebius
604240811Sglebius	/*
605240811Sglebius	 * Remove those entries, that don't need flushing.
606240811Sglebius	 */
607240811Sglebius	SLIST_FOREACH_SAFE(pfoe, &queue, next, pfoe1)
608240811Sglebius		if (pfoe->rule->flush == 0) {
609240811Sglebius			SLIST_REMOVE(&queue, pfoe, pf_overload_entry, next);
610240811Sglebius			free(pfoe, M_PFTEMP);
611240811Sglebius		} else
612270574Sglebius			counter_u64_add(
613270574Sglebius			    V_pf_status.lcounters[LCNT_OVERLOAD_FLUSH], 1);
614240811Sglebius
615240811Sglebius	/* If nothing to flush, return. */
616264454Smm	if (SLIST_EMPTY(&queue)) {
617264454Smm		CURVNET_RESTORE();
618240811Sglebius		return;
619264454Smm	}
620240811Sglebius
621273736Shselasky	for (int i = 0; i <= pf_hashmask; i++) {
622240233Sglebius		struct pf_idhash *ih = &V_pf_idhash[i];
623240233Sglebius		struct pf_state_key *sk;
624240233Sglebius		struct pf_state *s;
625240233Sglebius
626240233Sglebius		PF_HASHROW_LOCK(ih);
627240233Sglebius		LIST_FOREACH(s, &ih->states, entry) {
628240233Sglebius		    sk = s->key[PF_SK_WIRE];
629240811Sglebius		    SLIST_FOREACH(pfoe, &queue, next)
630240811Sglebius			if (sk->af == pfoe->af &&
631240811Sglebius			    ((pfoe->rule->flush & PF_FLUSH_GLOBAL) ||
632240811Sglebius			    pfoe->rule == s->rule.ptr) &&
633240811Sglebius			    ((pfoe->dir == PF_OUT &&
634240811Sglebius			    PF_AEQ(&pfoe->addr, &sk->addr[1], sk->af)) ||
635240811Sglebius			    (pfoe->dir == PF_IN &&
636240811Sglebius			    PF_AEQ(&pfoe->addr, &sk->addr[0], sk->af)))) {
637240233Sglebius				s->timeout = PFTM_PURGE;
638240233Sglebius				s->src.state = s->dst.state = TCPS_CLOSED;
639240233Sglebius				killed++;
640145836Smlaier			}
641145836Smlaier		}
642240233Sglebius		PF_HASHROW_UNLOCK(ih);
643145836Smlaier	}
644240811Sglebius	SLIST_FOREACH_SAFE(pfoe, &queue, next, pfoe1)
645240811Sglebius		free(pfoe, M_PFTEMP);
646240233Sglebius	if (V_pf_status.debug >= PF_DEBUG_MISC)
647240233Sglebius		printf("%s: %u states killed", __func__, killed);
648264454Smm
649264454Smm	CURVNET_RESTORE();
650240233Sglebius}
651145836Smlaier
652240233Sglebius/*
653240233Sglebius * Can return locked on failure, so that we can consistently
654240233Sglebius * allocate and insert a new one.
655240233Sglebius */
656240233Sglebiusstruct pf_src_node *
657240233Sglebiuspf_find_src_node(struct pf_addr *src, struct pf_rule *rule, sa_family_t af,
658240233Sglebius	int returnlocked)
659240233Sglebius{
660240233Sglebius	struct pf_srchash *sh;
661240233Sglebius	struct pf_src_node *n;
662240233Sglebius
663270574Sglebius	counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_SEARCH], 1);
664240233Sglebius
665240233Sglebius	sh = &V_pf_srchash[pf_hashsrc(src, af)];
666240233Sglebius	PF_HASHROW_LOCK(sh);
667240233Sglebius	LIST_FOREACH(n, &sh->nodes, entry)
668240233Sglebius		if (n->rule.ptr == rule && n->af == af &&
669240233Sglebius		    ((af == AF_INET && n->addr.v4.s_addr == src->v4.s_addr) ||
670240233Sglebius		    (af == AF_INET6 && bcmp(&n->addr, src, sizeof(*src)) == 0)))
671240233Sglebius			break;
672285940Sglebius	if (n != NULL) {
673285940Sglebius		n->states++;
674240233Sglebius		PF_HASHROW_UNLOCK(sh);
675285940Sglebius	} else if (returnlocked == 0)
676285940Sglebius		PF_HASHROW_UNLOCK(sh);
677240233Sglebius
678240233Sglebius	return (n);
679145836Smlaier}
680145836Smlaier
681240233Sglebiusstatic int
682130613Smlaierpf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
683130613Smlaier    struct pf_addr *src, sa_family_t af)
684126258Smlaier{
685126258Smlaier
686240233Sglebius	KASSERT((rule->rule_flag & PFRULE_RULESRCTRACK ||
687240233Sglebius	    rule->rpool.opts & PF_POOL_STICKYADDR),
688240233Sglebius	    ("%s for non-tracking rule %p", __func__, rule));
689240233Sglebius
690240233Sglebius	if (*sn == NULL)
691240233Sglebius		*sn = pf_find_src_node(src, rule, af, 1);
692240233Sglebius
693130613Smlaier	if (*sn == NULL) {
694240233Sglebius		struct pf_srchash *sh = &V_pf_srchash[pf_hashsrc(src, af)];
695240233Sglebius
696240233Sglebius		PF_HASHROW_ASSERT(sh);
697240233Sglebius
698130613Smlaier		if (!rule->max_src_nodes ||
699263029Sglebius		    counter_u64_fetch(rule->src_nodes) < rule->max_src_nodes)
700240233Sglebius			(*sn) = uma_zalloc(V_pf_sources_z, M_NOWAIT | M_ZERO);
701145836Smlaier		else
702270574Sglebius			counter_u64_add(V_pf_status.lcounters[LCNT_SRCNODES],
703270574Sglebius			    1);
704240233Sglebius		if ((*sn) == NULL) {
705240233Sglebius			PF_HASHROW_UNLOCK(sh);
706130613Smlaier			return (-1);
707240233Sglebius		}
708145836Smlaier
709145836Smlaier		pf_init_threshold(&(*sn)->conn_rate,
710145836Smlaier		    rule->max_src_conn_rate.limit,
711145836Smlaier		    rule->max_src_conn_rate.seconds);
712145836Smlaier
713130613Smlaier		(*sn)->af = af;
714240233Sglebius		(*sn)->rule.ptr = rule;
715130613Smlaier		PF_ACPY(&(*sn)->addr, src, af);
716240233Sglebius		LIST_INSERT_HEAD(&sh->nodes, *sn, entry);
717240233Sglebius		(*sn)->creation = time_uptime;
718130613Smlaier		(*sn)->ruletype = rule->action;
719285940Sglebius		(*sn)->states = 1;
720130613Smlaier		if ((*sn)->rule.ptr != NULL)
721263029Sglebius			counter_u64_add((*sn)->rule.ptr->src_nodes, 1);
722240233Sglebius		PF_HASHROW_UNLOCK(sh);
723270574Sglebius		counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_INSERT], 1);
724130613Smlaier	} else {
725130613Smlaier		if (rule->max_src_states &&
726145836Smlaier		    (*sn)->states >= rule->max_src_states) {
727270574Sglebius			counter_u64_add(V_pf_status.lcounters[LCNT_SRCSTATES],
728270574Sglebius			    1);
729130613Smlaier			return (-1);
730145836Smlaier		}
731130613Smlaier	}
732130613Smlaier	return (0);
733130613Smlaier}
734126258Smlaier
735261019Sglebiusvoid
736285940Sglebiuspf_unlink_src_node(struct pf_src_node *src)
737240233Sglebius{
738223637Sbz
739285940Sglebius	PF_HASHROW_ASSERT(&V_pf_srchash[pf_hashsrc(&src->addr, src->af)]);
740240233Sglebius	LIST_REMOVE(src, entry);
741261019Sglebius	if (src->rule.ptr)
742263029Sglebius		counter_u64_add(src->rule.ptr->src_nodes, -1);
743261019Sglebius}
744240737Sglebius
745261019Sglebiusu_int
746261019Sglebiuspf_free_src_nodes(struct pf_src_node_list *head)
747261019Sglebius{
748261019Sglebius	struct pf_src_node *sn, *tmp;
749261019Sglebius	u_int count = 0;
750261019Sglebius
751261019Sglebius	LIST_FOREACH_SAFE(sn, head, entry, tmp) {
752285940Sglebius		uma_zfree(V_pf_sources_z, sn);
753261019Sglebius		count++;
754261019Sglebius	}
755261019Sglebius
756285940Sglebius	counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], count);
757285940Sglebius
758261019Sglebius	return (count);
759261019Sglebius}
760261019Sglebius
761240233Sglebiusvoid
762265008Smmpf_mtag_initialize()
763265008Smm{
764265008Smm
765265008Smm	pf_mtag_z = uma_zcreate("pf mtags", sizeof(struct m_tag) +
766265008Smm	    sizeof(struct pf_mtag), NULL, NULL, pf_mtag_uminit, NULL,
767265008Smm	    UMA_ALIGN_PTR, 0);
768265008Smm}
769265008Smm
770265008Smm/* Per-vnet data storage structures initialization. */
771265008Smmvoid
772240233Sglebiuspf_initialize()
773223637Sbz{
774240233Sglebius	struct pf_keyhash	*kh;
775240233Sglebius	struct pf_idhash	*ih;
776240233Sglebius	struct pf_srchash	*sh;
777240233Sglebius	u_int i;
778223637Sbz
779273736Shselasky	TUNABLE_ULONG_FETCH("net.pf.states_hashsize", &pf_hashsize);
780273736Shselasky	if (pf_hashsize == 0 || !powerof2(pf_hashsize))
781273736Shselasky		pf_hashsize = PF_HASHSIZ;
782273736Shselasky	TUNABLE_ULONG_FETCH("net.pf.source_nodes_hashsize", &pf_srchashsize);
783273736Shselasky	if (pf_srchashsize == 0 || !powerof2(pf_srchashsize))
784331117Skp		pf_srchashsize = PF_SRCHASHSIZ;
785240233Sglebius
786240233Sglebius	V_pf_hashseed = arc4random();
787240233Sglebius
788240233Sglebius	/* States and state keys storage. */
789240233Sglebius	V_pf_state_z = uma_zcreate("pf states", sizeof(struct pf_state),
790240233Sglebius	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
791240233Sglebius	V_pf_limits[PF_LIMIT_STATES].zone = V_pf_state_z;
792240233Sglebius	uma_zone_set_max(V_pf_state_z, PFSTATE_HIWAT);
793244347Spjd	uma_zone_set_warning(V_pf_state_z, "PF states limit reached");
794240233Sglebius
795240233Sglebius	V_pf_state_key_z = uma_zcreate("pf state keys",
796240233Sglebius	    sizeof(struct pf_state_key), pf_state_key_ctor, NULL, NULL, NULL,
797240233Sglebius	    UMA_ALIGN_PTR, 0);
798331117Skp
799331117Skp	V_pf_keyhash = mallocarray(pf_hashsize, sizeof(struct pf_keyhash),
800331117Skp	    M_PFHASH, M_NOWAIT | M_ZERO);
801331117Skp	V_pf_idhash = mallocarray(pf_hashsize, sizeof(struct pf_idhash),
802331117Skp	    M_PFHASH, M_NOWAIT | M_ZERO);
803331117Skp	if (V_pf_keyhash == NULL || V_pf_idhash == NULL) {
804331117Skp		printf("pf: Unable to allocate memory for "
805331117Skp		    "state_hashsize %lu.\n", pf_hashsize);
806331117Skp
807331117Skp		free(V_pf_keyhash, M_PFHASH);
808331117Skp		free(V_pf_idhash, M_PFHASH);
809331117Skp
810331117Skp		pf_hashsize = PF_HASHSIZ;
811331117Skp		V_pf_keyhash = mallocarray(pf_hashsize,
812331117Skp		    sizeof(struct pf_keyhash), M_PFHASH, M_WAITOK | M_ZERO);
813331117Skp		V_pf_idhash = mallocarray(pf_hashsize,
814331117Skp		    sizeof(struct pf_idhash), M_PFHASH, M_WAITOK | M_ZERO);
815331117Skp	}
816331117Skp
817273736Shselasky	pf_hashmask = pf_hashsize - 1;
818273736Shselasky	for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash; i <= pf_hashmask;
819240233Sglebius	    i++, kh++, ih++) {
820251681Sglebius		mtx_init(&kh->lock, "pf_keyhash", NULL, MTX_DEF | MTX_DUPOK);
821240233Sglebius		mtx_init(&ih->lock, "pf_idhash", NULL, MTX_DEF);
822223637Sbz	}
823223637Sbz
824240233Sglebius	/* Source nodes. */
825240233Sglebius	V_pf_sources_z = uma_zcreate("pf source nodes",
826240233Sglebius	    sizeof(struct pf_src_node), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
827240233Sglebius	    0);
828240233Sglebius	V_pf_limits[PF_LIMIT_SRC_NODES].zone = V_pf_sources_z;
829240233Sglebius	uma_zone_set_max(V_pf_sources_z, PFSNODE_HIWAT);
830244347Spjd	uma_zone_set_warning(V_pf_sources_z, "PF source nodes limit reached");
831331117Skp
832331117Skp	V_pf_srchash = mallocarray(pf_srchashsize,
833331117Skp	    sizeof(struct pf_srchash), M_PFHASH, M_NOWAIT | M_ZERO);
834331117Skp	if (V_pf_srchash == NULL) {
835331117Skp		printf("pf: Unable to allocate memory for "
836331117Skp		    "source_hashsize %lu.\n", pf_srchashsize);
837331117Skp
838331117Skp		pf_srchashsize = PF_SRCHASHSIZ;
839331117Skp		V_pf_srchash = mallocarray(pf_srchashsize,
840331117Skp		    sizeof(struct pf_srchash), M_PFHASH, M_WAITOK | M_ZERO);
841331117Skp	}
842331117Skp
843273736Shselasky	pf_srchashmask = pf_srchashsize - 1;
844273736Shselasky	for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask; i++, sh++)
845240233Sglebius		mtx_init(&sh->lock, "pf_srchash", NULL, MTX_DEF);
846223637Sbz
847240233Sglebius	/* ALTQ */
848240233Sglebius	TAILQ_INIT(&V_pf_altqs[0]);
849240233Sglebius	TAILQ_INIT(&V_pf_altqs[1]);
850240233Sglebius	TAILQ_INIT(&V_pf_pabuf);
851240233Sglebius	V_pf_altqs_active = &V_pf_altqs[0];
852240233Sglebius	V_pf_altqs_inactive = &V_pf_altqs[1];
853240233Sglebius
854240233Sglebius
855240811Sglebius	/* Send & overload+flush queues. */
856240233Sglebius	STAILQ_INIT(&V_pf_sendqueue);
857240811Sglebius	SLIST_INIT(&V_pf_overloadqueue);
858264454Smm	TASK_INIT(&V_pf_overloadtask, 0, pf_overload_task, curvnet);
859240233Sglebius	mtx_init(&pf_sendqueue_mtx, "pf send queue", NULL, MTX_DEF);
860240811Sglebius	mtx_init(&pf_overloadqueue_mtx, "pf overload/flush queue", NULL,
861240811Sglebius	    MTX_DEF);
862240233Sglebius
863240233Sglebius	/* Unlinked, but may be referenced rules. */
864240233Sglebius	TAILQ_INIT(&V_pf_unlinked_rules);
865240233Sglebius	mtx_init(&pf_unlnkdrules_mtx, "pf unlinked rules", NULL, MTX_DEF);
866223637Sbz}
867223637Sbz
868240233Sglebiusvoid
869265008Smmpf_mtag_cleanup()
870265008Smm{
871265008Smm
872265008Smm	uma_zdestroy(pf_mtag_z);
873265008Smm}
874265008Smm
875265008Smmvoid
876240233Sglebiuspf_cleanup()
877223637Sbz{
878240233Sglebius	struct pf_keyhash	*kh;
879240233Sglebius	struct pf_idhash	*ih;
880240233Sglebius	struct pf_srchash	*sh;
881240233Sglebius	struct pf_send_entry	*pfse, *next;
882240233Sglebius	u_int i;
883223637Sbz
884273736Shselasky	for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash; i <= pf_hashmask;
885240233Sglebius	    i++, kh++, ih++) {
886240233Sglebius		KASSERT(LIST_EMPTY(&kh->keys), ("%s: key hash not empty",
887240233Sglebius		    __func__));
888240233Sglebius		KASSERT(LIST_EMPTY(&ih->states), ("%s: id hash not empty",
889240233Sglebius		    __func__));
890240233Sglebius		mtx_destroy(&kh->lock);
891240233Sglebius		mtx_destroy(&ih->lock);
892240233Sglebius	}
893240233Sglebius	free(V_pf_keyhash, M_PFHASH);
894240233Sglebius	free(V_pf_idhash, M_PFHASH);
895240233Sglebius
896273736Shselasky	for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask; i++, sh++) {
897240233Sglebius		KASSERT(LIST_EMPTY(&sh->nodes),
898240233Sglebius		    ("%s: source node hash not empty", __func__));
899240233Sglebius		mtx_destroy(&sh->lock);
900240233Sglebius	}
901240233Sglebius	free(V_pf_srchash, M_PFHASH);
902240233Sglebius
903240233Sglebius	STAILQ_FOREACH_SAFE(pfse, &V_pf_sendqueue, pfse_next, next) {
904240233Sglebius		m_freem(pfse->pfse_m);
905240233Sglebius		free(pfse, M_PFTEMP);
906240233Sglebius	}
907240233Sglebius
908240233Sglebius	mtx_destroy(&pf_sendqueue_mtx);
909240811Sglebius	mtx_destroy(&pf_overloadqueue_mtx);
910240233Sglebius	mtx_destroy(&pf_unlnkdrules_mtx);
911240233Sglebius
912240233Sglebius	uma_zdestroy(V_pf_sources_z);
913240233Sglebius	uma_zdestroy(V_pf_state_z);
914240233Sglebius	uma_zdestroy(V_pf_state_key_z);
915240233Sglebius}
916240233Sglebius
917240233Sglebiusstatic int
918265008Smmpf_mtag_uminit(void *mem, int size, int how)
919240233Sglebius{
920240233Sglebius	struct m_tag *t;
921240233Sglebius
922240233Sglebius	t = (struct m_tag *)mem;
923240233Sglebius	t->m_tag_cookie = MTAG_ABI_COMPAT;
924240233Sglebius	t->m_tag_id = PACKET_TAG_PF;
925240233Sglebius	t->m_tag_len = sizeof(struct pf_mtag);
926240233Sglebius	t->m_tag_free = pf_mtag_free;
927240233Sglebius
928223637Sbz	return (0);
929223637Sbz}
930223637Sbz
931240233Sglebiusstatic void
932240233Sglebiuspf_mtag_free(struct m_tag *t)
933130613Smlaier{
934223637Sbz
935265008Smm	uma_zfree(pf_mtag_z, t);
936240233Sglebius}
937223637Sbz
938240233Sglebiusstruct pf_mtag *
939240233Sglebiuspf_get_mtag(struct mbuf *m)
940240233Sglebius{
941240233Sglebius	struct m_tag *mtag;
942240233Sglebius
943240233Sglebius	if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) != NULL)
944240233Sglebius		return ((struct pf_mtag *)(mtag + 1));
945240233Sglebius
946265008Smm	mtag = uma_zalloc(pf_mtag_z, M_NOWAIT);
947240233Sglebius	if (mtag == NULL)
948240233Sglebius		return (NULL);
949240233Sglebius	bzero(mtag + 1, sizeof(struct pf_mtag));
950240233Sglebius	m_tag_prepend(m, mtag);
951240233Sglebius
952240233Sglebius	return ((struct pf_mtag *)(mtag + 1));
953240233Sglebius}
954240233Sglebius
955240233Sglebiusstatic int
956240233Sglebiuspf_state_key_attach(struct pf_state_key *skw, struct pf_state_key *sks,
957240233Sglebius    struct pf_state *s)
958240233Sglebius{
959251681Sglebius	struct pf_keyhash	*khs, *khw, *kh;
960240233Sglebius	struct pf_state_key	*sk, *cur;
961240233Sglebius	struct pf_state		*si, *olds = NULL;
962240233Sglebius	int idx;
963240233Sglebius
964240233Sglebius	KASSERT(s->refs == 0, ("%s: state not pristine", __func__));
965240233Sglebius	KASSERT(s->key[PF_SK_WIRE] == NULL, ("%s: state has key", __func__));
966240233Sglebius	KASSERT(s->key[PF_SK_STACK] == NULL, ("%s: state has key", __func__));
967240233Sglebius
968240233Sglebius	/*
969251681Sglebius	 * We need to lock hash slots of both keys. To avoid deadlock
970251681Sglebius	 * we always lock the slot with lower address first. Unlock order
971251681Sglebius	 * isn't important.
972251681Sglebius	 *
973251681Sglebius	 * We also need to lock ID hash slot before dropping key
974251681Sglebius	 * locks. On success we return with ID hash slot locked.
975251681Sglebius	 */
976251681Sglebius
977251681Sglebius	if (skw == sks) {
978251681Sglebius		khs = khw = &V_pf_keyhash[pf_hashkey(skw)];
979251681Sglebius		PF_HASHROW_LOCK(khs);
980251681Sglebius	} else {
981251681Sglebius		khs = &V_pf_keyhash[pf_hashkey(sks)];
982251681Sglebius		khw = &V_pf_keyhash[pf_hashkey(skw)];
983251681Sglebius		if (khs == khw) {
984251681Sglebius			PF_HASHROW_LOCK(khs);
985251681Sglebius		} else if (khs < khw) {
986251681Sglebius			PF_HASHROW_LOCK(khs);
987251681Sglebius			PF_HASHROW_LOCK(khw);
988251681Sglebius		} else {
989251681Sglebius			PF_HASHROW_LOCK(khw);
990251681Sglebius			PF_HASHROW_LOCK(khs);
991251681Sglebius		}
992251681Sglebius	}
993251681Sglebius
994251681Sglebius#define	KEYS_UNLOCK()	do {			\
995251681Sglebius	if (khs != khw) {			\
996251681Sglebius		PF_HASHROW_UNLOCK(khs);		\
997251681Sglebius		PF_HASHROW_UNLOCK(khw);		\
998251681Sglebius	} else					\
999251681Sglebius		PF_HASHROW_UNLOCK(khs);		\
1000251681Sglebius} while (0)
1001251681Sglebius
1002251681Sglebius	/*
1003240233Sglebius	 * First run: start with wire key.
1004240233Sglebius	 */
1005240233Sglebius	sk = skw;
1006251681Sglebius	kh = khw;
1007240233Sglebius	idx = PF_SK_WIRE;
1008240233Sglebius
1009240233Sglebiuskeyattach:
1010240233Sglebius	LIST_FOREACH(cur, &kh->keys, entry)
1011240233Sglebius		if (bcmp(cur, sk, sizeof(struct pf_state_key_cmp)) == 0)
1012240233Sglebius			break;
1013240233Sglebius
1014240233Sglebius	if (cur != NULL) {
1015240233Sglebius		/* Key exists. Check for same kif, if none, add to key. */
1016240233Sglebius		TAILQ_FOREACH(si, &cur->states[idx], key_list[idx]) {
1017240233Sglebius			struct pf_idhash *ih = &V_pf_idhash[PF_IDHASH(si)];
1018240233Sglebius
1019240233Sglebius			PF_HASHROW_LOCK(ih);
1020240233Sglebius			if (si->kif == s->kif &&
1021240233Sglebius			    si->direction == s->direction) {
1022223637Sbz				if (sk->proto == IPPROTO_TCP &&
1023240233Sglebius				    si->src.state >= TCPS_FIN_WAIT_2 &&
1024240233Sglebius				    si->dst.state >= TCPS_FIN_WAIT_2) {
1025251681Sglebius					/*
1026251681Sglebius					 * New state matches an old >FIN_WAIT_2
1027251681Sglebius					 * state. We can't drop key hash locks,
1028251681Sglebius					 * thus we can't unlink it properly.
1029251681Sglebius					 *
1030251681Sglebius					 * As a workaround we drop it into
1031251681Sglebius					 * TCPS_CLOSED state, schedule purge
1032251681Sglebius					 * ASAP and push it into the very end
1033251681Sglebius					 * of the slot TAILQ, so that it won't
1034251681Sglebius					 * conflict with our new state.
1035251681Sglebius					 */
1036240233Sglebius					si->src.state = si->dst.state =
1037223637Sbz					    TCPS_CLOSED;
1038251681Sglebius					si->timeout = PFTM_PURGE;
1039240233Sglebius					olds = si;
1040223637Sbz				} else {
1041223637Sbz					if (V_pf_status.debug >= PF_DEBUG_MISC) {
1042223637Sbz						printf("pf: %s key attach "
1043223637Sbz						    "failed on %s: ",
1044223637Sbz						    (idx == PF_SK_WIRE) ?
1045223637Sbz						    "wire" : "stack",
1046223637Sbz						    s->kif->pfik_name);
1047223637Sbz						pf_print_state_parts(s,
1048223637Sbz						    (idx == PF_SK_WIRE) ?
1049223637Sbz						    sk : NULL,
1050223637Sbz						    (idx == PF_SK_STACK) ?
1051223637Sbz						    sk : NULL);
1052223637Sbz						printf(", existing: ");
1053240233Sglebius						pf_print_state_parts(si,
1054223637Sbz						    (idx == PF_SK_WIRE) ?
1055223637Sbz						    sk : NULL,
1056223637Sbz						    (idx == PF_SK_STACK) ?
1057223637Sbz						    sk : NULL);
1058223637Sbz						printf("\n");
1059223637Sbz					}
1060240233Sglebius					PF_HASHROW_UNLOCK(ih);
1061251681Sglebius					KEYS_UNLOCK();
1062240233Sglebius					uma_zfree(V_pf_state_key_z, sk);
1063240233Sglebius					if (idx == PF_SK_STACK)
1064240233Sglebius						pf_detach_state(s);
1065250522Sglebius					return (EEXIST); /* collision! */
1066223637Sbz				}
1067223637Sbz			}
1068240233Sglebius			PF_HASHROW_UNLOCK(ih);
1069240233Sglebius		}
1070240233Sglebius		uma_zfree(V_pf_state_key_z, sk);
1071223637Sbz		s->key[idx] = cur;
1072240233Sglebius	} else {
1073240233Sglebius		LIST_INSERT_HEAD(&kh->keys, sk, entry);
1074223637Sbz		s->key[idx] = sk;
1075126258Smlaier	}
1076126258Smlaier
1077240233Sglebiusstateattach:
1078240233Sglebius	/* List is sorted, if-bound states before floating. */
1079223637Sbz	if (s->kif == V_pfi_all)
1080240233Sglebius		TAILQ_INSERT_TAIL(&s->key[idx]->states[idx], s, key_list[idx]);
1081223637Sbz	else
1082240233Sglebius		TAILQ_INSERT_HEAD(&s->key[idx]->states[idx], s, key_list[idx]);
1083223637Sbz
1084251681Sglebius	if (olds) {
1085251681Sglebius		TAILQ_REMOVE(&s->key[idx]->states[idx], olds, key_list[idx]);
1086251681Sglebius		TAILQ_INSERT_TAIL(&s->key[idx]->states[idx], olds,
1087251681Sglebius		    key_list[idx]);
1088251681Sglebius		olds = NULL;
1089251681Sglebius	}
1090251681Sglebius
1091240233Sglebius	/*
1092240233Sglebius	 * Attach done. See how should we (or should not?)
1093240233Sglebius	 * attach a second key.
1094240233Sglebius	 */
1095240233Sglebius	if (sks == skw) {
1096240233Sglebius		s->key[PF_SK_STACK] = s->key[PF_SK_WIRE];
1097240233Sglebius		idx = PF_SK_STACK;
1098240233Sglebius		sks = NULL;
1099240233Sglebius		goto stateattach;
1100240233Sglebius	} else if (sks != NULL) {
1101240233Sglebius		/*
1102240233Sglebius		 * Continue attaching with stack key.
1103240233Sglebius		 */
1104240233Sglebius		sk = sks;
1105251681Sglebius		kh = khs;
1106240233Sglebius		idx = PF_SK_STACK;
1107240233Sglebius		sks = NULL;
1108240233Sglebius		goto keyattach;
1109240233Sglebius	}
1110240233Sglebius
1111251681Sglebius	PF_STATE_LOCK(s);
1112251681Sglebius	KEYS_UNLOCK();
1113251681Sglebius
1114240233Sglebius	KASSERT(s->key[PF_SK_WIRE] != NULL && s->key[PF_SK_STACK] != NULL,
1115240233Sglebius	    ("%s failure", __func__));
1116240233Sglebius
1117223637Sbz	return (0);
1118251681Sglebius#undef	KEYS_UNLOCK
1119223637Sbz}
1120223637Sbz
1121240233Sglebiusstatic void
1122223637Sbzpf_detach_state(struct pf_state *s)
1123223637Sbz{
1124240233Sglebius	struct pf_state_key *sks = s->key[PF_SK_STACK];
1125240233Sglebius	struct pf_keyhash *kh;
1126223637Sbz
1127240233Sglebius	if (sks != NULL) {
1128240233Sglebius		kh = &V_pf_keyhash[pf_hashkey(sks)];
1129240233Sglebius		PF_HASHROW_LOCK(kh);
1130240233Sglebius		if (s->key[PF_SK_STACK] != NULL)
1131240233Sglebius			pf_state_key_detach(s, PF_SK_STACK);
1132240233Sglebius		/*
1133240233Sglebius		 * If both point to same key, then we are done.
1134240233Sglebius		 */
1135240233Sglebius		if (sks == s->key[PF_SK_WIRE]) {
1136240233Sglebius			pf_state_key_detach(s, PF_SK_WIRE);
1137240233Sglebius			PF_HASHROW_UNLOCK(kh);
1138240233Sglebius			return;
1139240233Sglebius		}
1140240233Sglebius		PF_HASHROW_UNLOCK(kh);
1141240233Sglebius	}
1142223637Sbz
1143240233Sglebius	if (s->key[PF_SK_WIRE] != NULL) {
1144240233Sglebius		kh = &V_pf_keyhash[pf_hashkey(s->key[PF_SK_WIRE])];
1145240233Sglebius		PF_HASHROW_LOCK(kh);
1146240233Sglebius		if (s->key[PF_SK_WIRE] != NULL)
1147240233Sglebius			pf_state_key_detach(s, PF_SK_WIRE);
1148240233Sglebius		PF_HASHROW_UNLOCK(kh);
1149240233Sglebius	}
1150223637Sbz}
1151223637Sbz
1152240233Sglebiusstatic void
1153223637Sbzpf_state_key_detach(struct pf_state *s, int idx)
1154223637Sbz{
1155240233Sglebius	struct pf_state_key *sk = s->key[idx];
1156240233Sglebius#ifdef INVARIANTS
1157240233Sglebius	struct pf_keyhash *kh = &V_pf_keyhash[pf_hashkey(sk)];
1158223637Sbz
1159240233Sglebius	PF_HASHROW_ASSERT(kh);
1160223637Sbz#endif
1161240233Sglebius	TAILQ_REMOVE(&sk->states[idx], s, key_list[idx]);
1162240233Sglebius	s->key[idx] = NULL;
1163223637Sbz
1164240233Sglebius	if (TAILQ_EMPTY(&sk->states[0]) && TAILQ_EMPTY(&sk->states[1])) {
1165240233Sglebius		LIST_REMOVE(sk, entry);
1166240233Sglebius		uma_zfree(V_pf_state_key_z, sk);
1167223637Sbz	}
1168223637Sbz}
1169223637Sbz
1170240233Sglebiusstatic int
1171240233Sglebiuspf_state_key_ctor(void *mem, int size, void *arg, int flags)
1172240233Sglebius{
1173240233Sglebius	struct pf_state_key *sk = mem;
1174240233Sglebius
1175240233Sglebius	bzero(sk, sizeof(struct pf_state_key_cmp));
1176240233Sglebius	TAILQ_INIT(&sk->states[PF_SK_WIRE]);
1177240233Sglebius	TAILQ_INIT(&sk->states[PF_SK_STACK]);
1178240233Sglebius
1179240233Sglebius	return (0);
1180240233Sglebius}
1181240233Sglebius
1182223637Sbzstruct pf_state_key *
1183240233Sglebiuspf_state_key_setup(struct pf_pdesc *pd, struct pf_addr *saddr,
1184240233Sglebius	struct pf_addr *daddr, u_int16_t sport, u_int16_t dport)
1185223637Sbz{
1186240233Sglebius	struct pf_state_key *sk;
1187223637Sbz
1188240233Sglebius	sk = uma_zalloc(V_pf_state_key_z, M_NOWAIT);
1189240233Sglebius	if (sk == NULL)
1190223637Sbz		return (NULL);
1191223637Sbz
1192240233Sglebius	PF_ACPY(&sk->addr[pd->sidx], saddr, pd->af);
1193240233Sglebius	PF_ACPY(&sk->addr[pd->didx], daddr, pd->af);
1194240233Sglebius	sk->port[pd->sidx] = sport;
1195240233Sglebius	sk->port[pd->didx] = dport;
1196240233Sglebius	sk->proto = pd->proto;
1197240233Sglebius	sk->af = pd->af;
1198240233Sglebius
1199223637Sbz	return (sk);
1200223637Sbz}
1201223637Sbz
1202240233Sglebiusstruct pf_state_key *
1203240233Sglebiuspf_state_key_clone(struct pf_state_key *orig)
1204223637Sbz{
1205240233Sglebius	struct pf_state_key *sk;
1206223637Sbz
1207240233Sglebius	sk = uma_zalloc(V_pf_state_key_z, M_NOWAIT);
1208240233Sglebius	if (sk == NULL)
1209240233Sglebius		return (NULL);
1210223637Sbz
1211240233Sglebius	bcopy(orig, sk, sizeof(struct pf_state_key_cmp));
1212223637Sbz
1213240233Sglebius	return (sk);
1214223637Sbz}
1215223637Sbz
1216223637Sbzint
1217223637Sbzpf_state_insert(struct pfi_kif *kif, struct pf_state_key *skw,
1218223637Sbz    struct pf_state_key *sks, struct pf_state *s)
1219223637Sbz{
1220240233Sglebius	struct pf_idhash *ih;
1221240233Sglebius	struct pf_state *cur;
1222250522Sglebius	int error;
1223223637Sbz
1224240233Sglebius	KASSERT(TAILQ_EMPTY(&sks->states[0]) && TAILQ_EMPTY(&sks->states[1]),
1225240233Sglebius	    ("%s: sks not pristine", __func__));
1226240233Sglebius	KASSERT(TAILQ_EMPTY(&skw->states[0]) && TAILQ_EMPTY(&skw->states[1]),
1227240233Sglebius	    ("%s: skw not pristine", __func__));
1228240233Sglebius	KASSERT(s->refs == 0, ("%s: state not pristine", __func__));
1229240233Sglebius
1230223637Sbz	s->kif = kif;
1231223637Sbz
1232223637Sbz	if (s->id == 0 && s->creatorid == 0) {
1233240233Sglebius		/* XXX: should be atomic, but probability of collision low */
1234240233Sglebius		if ((s->id = V_pf_stateid[curcpu]++) == PFID_MAXID)
1235240233Sglebius			V_pf_stateid[curcpu] = 1;
1236240233Sglebius		s->id |= (uint64_t )curcpu << PFID_CPUSHIFT;
1237240233Sglebius		s->id = htobe64(s->id);
1238223637Sbz		s->creatorid = V_pf_status.hostid;
1239130613Smlaier	}
1240240233Sglebius
1241251681Sglebius	/* Returns with ID locked on success. */
1242250522Sglebius	if ((error = pf_state_key_attach(skw, sks, s)) != 0)
1243250522Sglebius		return (error);
1244244184Sglebius
1245240233Sglebius	ih = &V_pf_idhash[PF_IDHASH(s)];
1246251681Sglebius	PF_HASHROW_ASSERT(ih);
1247240233Sglebius	LIST_FOREACH(cur, &ih->states, entry)
1248240233Sglebius		if (cur->id == s->id && cur->creatorid == s->creatorid)
1249240233Sglebius			break;
1250240233Sglebius
1251240233Sglebius	if (cur != NULL) {
1252240233Sglebius		PF_HASHROW_UNLOCK(ih);
1253223637Sbz		if (V_pf_status.debug >= PF_DEBUG_MISC) {
1254250521Sglebius			printf("pf: state ID collision: "
1255250312Sglebius			    "id: %016llx creatorid: %08x\n",
1256240233Sglebius			    (unsigned long long)be64toh(s->id),
1257240233Sglebius			    ntohl(s->creatorid));
1258130613Smlaier		}
1259223637Sbz		pf_detach_state(s);
1260250522Sglebius		return (EEXIST);
1261130613Smlaier	}
1262240233Sglebius	LIST_INSERT_HEAD(&ih->states, s, entry);
1263240233Sglebius	/* One for keys, one for ID hash. */
1264240233Sglebius	refcount_init(&s->refs, 2);
1265240233Sglebius
1266270574Sglebius	counter_u64_add(V_pf_status.fcounters[FCNT_STATE_INSERT], 1);
1267223637Sbz	if (pfsync_insert_state_ptr != NULL)
1268223637Sbz		pfsync_insert_state_ptr(s);
1269240233Sglebius
1270240233Sglebius	/* Returns locked. */
1271126258Smlaier	return (0);
1272126258Smlaier}
1273126258Smlaier
1274240233Sglebius/*
1275240233Sglebius * Find state by ID: returns with locked row on success.
1276240233Sglebius */
1277223637Sbzstruct pf_state *
1278240233Sglebiuspf_find_state_byid(uint64_t id, uint32_t creatorid)
1279223637Sbz{
1280240233Sglebius	struct pf_idhash *ih;
1281240233Sglebius	struct pf_state *s;
1282240233Sglebius
1283270574Sglebius	counter_u64_add(V_pf_status.fcounters[FCNT_STATE_SEARCH], 1);
1284223637Sbz
1285273736Shselasky	ih = &V_pf_idhash[(be64toh(id) % (pf_hashmask + 1))];
1286223637Sbz
1287240233Sglebius	PF_HASHROW_LOCK(ih);
1288240233Sglebius	LIST_FOREACH(s, &ih->states, entry)
1289240233Sglebius		if (s->id == id && s->creatorid == creatorid)
1290240233Sglebius			break;
1291223637Sbz
1292240233Sglebius	if (s == NULL)
1293240233Sglebius		PF_HASHROW_UNLOCK(ih);
1294240233Sglebius
1295240233Sglebius	return (s);
1296223637Sbz}
1297223637Sbz
1298240233Sglebius/*
1299240233Sglebius * Find state by key.
1300240233Sglebius * Returns with ID hash slot locked on success.
1301240233Sglebius */
1302240233Sglebiusstatic struct pf_state *
1303240233Sglebiuspf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir)
1304223637Sbz{
1305240233Sglebius	struct pf_keyhash	*kh;
1306223637Sbz	struct pf_state_key	*sk;
1307240233Sglebius	struct pf_state		*s;
1308240233Sglebius	int idx;
1309223637Sbz
1310270574Sglebius	counter_u64_add(V_pf_status.fcounters[FCNT_STATE_SEARCH], 1);
1311223637Sbz
1312240233Sglebius	kh = &V_pf_keyhash[pf_hashkey((struct pf_state_key *)key)];
1313240233Sglebius
1314240233Sglebius	PF_HASHROW_LOCK(kh);
1315240233Sglebius	LIST_FOREACH(sk, &kh->keys, entry)
1316240233Sglebius		if (bcmp(sk, key, sizeof(struct pf_state_key_cmp)) == 0)
1317240233Sglebius			break;
1318240233Sglebius	if (sk == NULL) {
1319240233Sglebius		PF_HASHROW_UNLOCK(kh);
1320240233Sglebius		return (NULL);
1321223637Sbz	}
1322223637Sbz
1323240233Sglebius	idx = (dir == PF_IN ? PF_SK_WIRE : PF_SK_STACK);
1324223637Sbz
1325240233Sglebius	/* List is sorted, if-bound states before floating ones. */
1326240233Sglebius	TAILQ_FOREACH(s, &sk->states[idx], key_list[idx])
1327240233Sglebius		if (s->kif == V_pfi_all || s->kif == kif) {
1328240233Sglebius			PF_STATE_LOCK(s);
1329240233Sglebius			PF_HASHROW_UNLOCK(kh);
1330261018Sglebius			if (s->timeout >= PFTM_MAX) {
1331240233Sglebius				/*
1332261018Sglebius				 * State is either being processed by
1333261018Sglebius				 * pf_unlink_state() in an other thread, or
1334261018Sglebius				 * is scheduled for immediate expiry.
1335240233Sglebius				 */
1336240233Sglebius				PF_STATE_UNLOCK(s);
1337240233Sglebius				return (NULL);
1338240233Sglebius			}
1339240233Sglebius			return (s);
1340240233Sglebius		}
1341240233Sglebius	PF_HASHROW_UNLOCK(kh);
1342223637Sbz
1343223637Sbz	return (NULL);
1344223637Sbz}
1345223637Sbz
1346223637Sbzstruct pf_state *
1347223637Sbzpf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more)
1348223637Sbz{
1349240233Sglebius	struct pf_keyhash	*kh;
1350223637Sbz	struct pf_state_key	*sk;
1351240233Sglebius	struct pf_state		*s, *ret = NULL;
1352240233Sglebius	int			 idx, inout = 0;
1353223637Sbz
1354270574Sglebius	counter_u64_add(V_pf_status.fcounters[FCNT_STATE_SEARCH], 1);
1355223637Sbz
1356240233Sglebius	kh = &V_pf_keyhash[pf_hashkey((struct pf_state_key *)key)];
1357223637Sbz
1358240233Sglebius	PF_HASHROW_LOCK(kh);
1359240233Sglebius	LIST_FOREACH(sk, &kh->keys, entry)
1360240233Sglebius		if (bcmp(sk, key, sizeof(struct pf_state_key_cmp)) == 0)
1361240233Sglebius			break;
1362240233Sglebius	if (sk == NULL) {
1363240233Sglebius		PF_HASHROW_UNLOCK(kh);
1364240233Sglebius		return (NULL);
1365223637Sbz	}
1366240233Sglebius	switch (dir) {
1367240233Sglebius	case PF_IN:
1368240233Sglebius		idx = PF_SK_WIRE;
1369240233Sglebius		break;
1370240233Sglebius	case PF_OUT:
1371240233Sglebius		idx = PF_SK_STACK;
1372240233Sglebius		break;
1373240233Sglebius	case PF_INOUT:
1374240233Sglebius		idx = PF_SK_WIRE;
1375240233Sglebius		inout = 1;
1376240233Sglebius		break;
1377240233Sglebius	default:
1378240233Sglebius		panic("%s: dir %u", __func__, dir);
1379240233Sglebius	}
1380240233Sglebiussecond_run:
1381240233Sglebius	TAILQ_FOREACH(s, &sk->states[idx], key_list[idx]) {
1382240233Sglebius		if (more == NULL) {
1383240233Sglebius			PF_HASHROW_UNLOCK(kh);
1384240233Sglebius			return (s);
1385240233Sglebius		}
1386240233Sglebius
1387240233Sglebius		if (ret)
1388240233Sglebius			(*more)++;
1389240233Sglebius		else
1390240233Sglebius			ret = s;
1391240233Sglebius	}
1392240233Sglebius	if (inout == 1) {
1393240233Sglebius		inout = 0;
1394240233Sglebius		idx = PF_SK_STACK;
1395240233Sglebius		goto second_run;
1396240233Sglebius	}
1397240233Sglebius	PF_HASHROW_UNLOCK(kh);
1398240233Sglebius
1399240233Sglebius	return (ret);
1400223637Sbz}
1401223637Sbz
1402223637Sbz/* END state table stuff */
1403223637Sbz
1404240233Sglebiusstatic void
1405240233Sglebiuspf_send(struct pf_send_entry *pfse)
1406240233Sglebius{
1407223637Sbz
1408240233Sglebius	PF_SENDQ_LOCK();
1409240233Sglebius	STAILQ_INSERT_TAIL(&V_pf_sendqueue, pfse, pfse_next);
1410240233Sglebius	PF_SENDQ_UNLOCK();
1411240233Sglebius	swi_sched(V_pf_swi_cookie, 0);
1412240233Sglebius}
1413240233Sglebius
1414126258Smlaiervoid
1415240233Sglebiuspf_intr(void *v)
1416240233Sglebius{
1417240233Sglebius	struct pf_send_head queue;
1418240233Sglebius	struct pf_send_entry *pfse, *next;
1419240233Sglebius
1420240233Sglebius	CURVNET_SET((struct vnet *)v);
1421240233Sglebius
1422240233Sglebius	PF_SENDQ_LOCK();
1423240233Sglebius	queue = V_pf_sendqueue;
1424240233Sglebius	STAILQ_INIT(&V_pf_sendqueue);
1425240233Sglebius	PF_SENDQ_UNLOCK();
1426240233Sglebius
1427240233Sglebius	STAILQ_FOREACH_SAFE(pfse, &queue, pfse_next, next) {
1428240233Sglebius		switch (pfse->pfse_type) {
1429240233Sglebius#ifdef INET
1430240233Sglebius		case PFSE_IP:
1431240233Sglebius			ip_output(pfse->pfse_m, NULL, NULL, 0, NULL, NULL);
1432240233Sglebius			break;
1433240233Sglebius		case PFSE_ICMP:
1434240233Sglebius			icmp_error(pfse->pfse_m, pfse->pfse_icmp_type,
1435240233Sglebius			    pfse->pfse_icmp_code, 0, pfse->pfse_icmp_mtu);
1436240233Sglebius			break;
1437240233Sglebius#endif /* INET */
1438240233Sglebius#ifdef INET6
1439240233Sglebius		case PFSE_IP6:
1440240233Sglebius			ip6_output(pfse->pfse_m, NULL, NULL, 0, NULL, NULL,
1441240233Sglebius			    NULL);
1442240233Sglebius			break;
1443240233Sglebius		case PFSE_ICMP6:
1444240233Sglebius			icmp6_error(pfse->pfse_m, pfse->pfse_icmp_type,
1445240233Sglebius			    pfse->pfse_icmp_code, pfse->pfse_icmp_mtu);
1446240233Sglebius			break;
1447240233Sglebius#endif /* INET6 */
1448240233Sglebius		default:
1449240233Sglebius			panic("%s: unknown type", __func__);
1450240233Sglebius		}
1451240233Sglebius		free(pfse, M_PFTEMP);
1452240233Sglebius	}
1453240233Sglebius	CURVNET_RESTORE();
1454240233Sglebius}
1455240233Sglebius
1456240233Sglebiusvoid
1457171168Smlaierpf_purge_thread(void *v)
1458126258Smlaier{
1459241039Sglebius	u_int idx = 0;
1460171168Smlaier
1461223637Sbz	CURVNET_SET((struct vnet *)v);
1462223637Sbz
1463171168Smlaier	for (;;) {
1464240233Sglebius		PF_RULES_RLOCK();
1465240233Sglebius		rw_sleep(pf_purge_thread, &pf_rules_lock, 0, "pftm", hz / 10);
1466171168Smlaier
1467226527Sbz		if (V_pf_end_threads) {
1468240233Sglebius			/*
1469240233Sglebius			 * To cleanse up all kifs and rules we need
1470240233Sglebius			 * two runs: first one clears reference flags,
1471240233Sglebius			 * then pf_purge_expired_states() doesn't
1472240233Sglebius			 * raise them, and then second run frees.
1473240233Sglebius			 */
1474240233Sglebius			PF_RULES_RUNLOCK();
1475240233Sglebius			pf_purge_unlinked_rules();
1476240233Sglebius			pfi_kif_purge();
1477171168Smlaier
1478240233Sglebius			/*
1479240233Sglebius			 * Now purge everything.
1480240233Sglebius			 */
1481273736Shselasky			pf_purge_expired_states(0, pf_hashmask);
1482226527Sbz			pf_purge_expired_fragments();
1483240233Sglebius			pf_purge_expired_src_nodes();
1484240233Sglebius
1485240233Sglebius			/*
1486240233Sglebius			 * Now all kifs & rules should be unreferenced,
1487240233Sglebius			 * thus should be successfully freed.
1488240233Sglebius			 */
1489240233Sglebius			pf_purge_unlinked_rules();
1490240233Sglebius			pfi_kif_purge();
1491240233Sglebius
1492240233Sglebius			/*
1493240233Sglebius			 * Announce success and exit.
1494240233Sglebius			 */
1495240233Sglebius			PF_RULES_RLOCK();
1496226527Sbz			V_pf_end_threads++;
1497240233Sglebius			PF_RULES_RUNLOCK();
1498226527Sbz			wakeup(pf_purge_thread);
1499226527Sbz			kproc_exit(0);
1500226527Sbz		}
1501240233Sglebius		PF_RULES_RUNLOCK();
1502126258Smlaier
1503240233Sglebius		/* Process 1/interval fraction of the state table every run. */
1504273736Shselasky		idx = pf_purge_expired_states(idx, pf_hashmask /
1505240233Sglebius			    (V_pf_default_rule.timeout[PFTM_INTERVAL] * 10));
1506196372Smlaier
1507240233Sglebius		/* Purge other expired types every PFTM_INTERVAL seconds. */
1508241039Sglebius		if (idx == 0) {
1509240233Sglebius			/*
1510240233Sglebius			 * Order is important:
1511240233Sglebius			 * - states and src nodes reference rules
1512240233Sglebius			 * - states and rules reference kifs
1513240233Sglebius			 */
1514171168Smlaier			pf_purge_expired_fragments();
1515240233Sglebius			pf_purge_expired_src_nodes();
1516240233Sglebius			pf_purge_unlinked_rules();
1517240233Sglebius			pfi_kif_purge();
1518171168Smlaier		}
1519171168Smlaier	}
1520240233Sglebius	/* not reached */
1521223637Sbz	CURVNET_RESTORE();
1522126258Smlaier}
1523126258Smlaier
1524126258Smlaieru_int32_t
1525126258Smlaierpf_state_expires(const struct pf_state *state)
1526126258Smlaier{
1527126258Smlaier	u_int32_t	timeout;
1528126258Smlaier	u_int32_t	start;
1529126258Smlaier	u_int32_t	end;
1530126258Smlaier	u_int32_t	states;
1531126258Smlaier
1532126258Smlaier	/* handle all PFTM_* > PFTM_MAX here */
1533126258Smlaier	if (state->timeout == PFTM_PURGE)
1534240233Sglebius		return (time_uptime);
1535171168Smlaier	KASSERT(state->timeout != PFTM_UNLINKED,
1536171168Smlaier	    ("pf_state_expires: timeout == PFTM_UNLINKED"));
1537240233Sglebius	KASSERT((state->timeout < PFTM_MAX),
1538126261Smlaier	    ("pf_state_expires: timeout > PFTM_MAX"));
1539126258Smlaier	timeout = state->rule.ptr->timeout[state->timeout];
1540126258Smlaier	if (!timeout)
1541223637Sbz		timeout = V_pf_default_rule.timeout[state->timeout];
1542126258Smlaier	start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
1543126258Smlaier	if (start) {
1544126258Smlaier		end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
1545263029Sglebius		states = counter_u64_fetch(state->rule.ptr->states_cur);
1546126258Smlaier	} else {
1547223637Sbz		start = V_pf_default_rule.timeout[PFTM_ADAPTIVE_START];
1548223637Sbz		end = V_pf_default_rule.timeout[PFTM_ADAPTIVE_END];
1549223637Sbz		states = V_pf_status.states;
1550126258Smlaier	}
1551126258Smlaier	if (end && states > start && start < end) {
1552126258Smlaier		if (states < end)
1553126258Smlaier			return (state->expire + timeout * (end - states) /
1554126258Smlaier			    (end - start));
1555126258Smlaier		else
1556240233Sglebius			return (time_uptime);
1557126258Smlaier	}
1558126258Smlaier	return (state->expire + timeout);
1559126258Smlaier}
1560126258Smlaier
1561126258Smlaiervoid
1562240233Sglebiuspf_purge_expired_src_nodes()
1563126258Smlaier{
1564261019Sglebius	struct pf_src_node_list	 freelist;
1565240233Sglebius	struct pf_srchash	*sh;
1566240233Sglebius	struct pf_src_node	*cur, *next;
1567240233Sglebius	int i;
1568126258Smlaier
1569261019Sglebius	LIST_INIT(&freelist);
1570273736Shselasky	for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask; i++, sh++) {
1571240233Sglebius	    PF_HASHROW_LOCK(sh);
1572240233Sglebius	    LIST_FOREACH_SAFE(cur, &sh->nodes, entry, next)
1573261018Sglebius		if (cur->states == 0 && cur->expire <= time_uptime) {
1574285940Sglebius			pf_unlink_src_node(cur);
1575261019Sglebius			LIST_INSERT_HEAD(&freelist, cur, entry);
1576240233Sglebius		} else if (cur->rule.ptr != NULL)
1577240233Sglebius			cur->rule.ptr->rule_flag |= PFRULE_REFS;
1578240233Sglebius	    PF_HASHROW_UNLOCK(sh);
1579223637Sbz	}
1580261019Sglebius
1581261019Sglebius	pf_free_src_nodes(&freelist);
1582270574Sglebius
1583270574Sglebius	V_pf_status.src_nodes = uma_zone_get_cur(V_pf_sources_z);
1584130613Smlaier}
1585126258Smlaier
1586240233Sglebiusstatic void
1587130613Smlaierpf_src_tree_remove_state(struct pf_state *s)
1588130613Smlaier{
1589285940Sglebius	struct pf_src_node *sn;
1590285940Sglebius	struct pf_srchash *sh;
1591285940Sglebius	uint32_t timeout;
1592126258Smlaier
1593285940Sglebius	timeout = s->rule.ptr->timeout[PFTM_SRC_NODE] ?
1594285940Sglebius	    s->rule.ptr->timeout[PFTM_SRC_NODE] :
1595285940Sglebius	    V_pf_default_rule.timeout[PFTM_SRC_NODE];
1596285940Sglebius
1597130613Smlaier	if (s->src_node != NULL) {
1598285940Sglebius		sn = s->src_node;
1599285940Sglebius		sh = &V_pf_srchash[pf_hashsrc(&sn->addr, sn->af)];
1600285940Sglebius	    	PF_HASHROW_LOCK(sh);
1601223637Sbz		if (s->src.tcp_est)
1602285940Sglebius			--sn->conn;
1603285940Sglebius		if (--sn->states == 0)
1604285940Sglebius			sn->expire = time_uptime + timeout;
1605285940Sglebius	    	PF_HASHROW_UNLOCK(sh);
1606130613Smlaier	}
1607130613Smlaier	if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
1608285940Sglebius		sn = s->nat_src_node;
1609285940Sglebius		sh = &V_pf_srchash[pf_hashsrc(&sn->addr, sn->af)];
1610285940Sglebius	    	PF_HASHROW_LOCK(sh);
1611285940Sglebius		if (--sn->states == 0)
1612285940Sglebius			sn->expire = time_uptime + timeout;
1613285940Sglebius	    	PF_HASHROW_UNLOCK(sh);
1614130613Smlaier	}
1615130613Smlaier	s->src_node = s->nat_src_node = NULL;
1616130613Smlaier}
1617126258Smlaier
1618240233Sglebius/*
1619240233Sglebius * Unlink and potentilly free a state. Function may be
1620240233Sglebius * called with ID hash row locked, but always returns
1621240233Sglebius * unlocked, since it needs to go through key hash locking.
1622240233Sglebius */
1623240233Sglebiusint
1624240233Sglebiuspf_unlink_state(struct pf_state *s, u_int flags)
1625145836Smlaier{
1626240233Sglebius	struct pf_idhash *ih = &V_pf_idhash[PF_IDHASH(s)];
1627223637Sbz
1628240233Sglebius	if ((flags & PF_ENTER_LOCKED) == 0)
1629240233Sglebius		PF_HASHROW_LOCK(ih);
1630240233Sglebius	else
1631240233Sglebius		PF_HASHROW_ASSERT(ih);
1632240233Sglebius
1633240233Sglebius	if (s->timeout == PFTM_UNLINKED) {
1634240233Sglebius		/*
1635240233Sglebius		 * State is being processed
1636240233Sglebius		 * by pf_unlink_state() in
1637240233Sglebius		 * an other thread.
1638240233Sglebius		 */
1639240233Sglebius		PF_HASHROW_UNLOCK(ih);
1640240233Sglebius		return (0);	/* XXXGL: undefined actually */
1641240233Sglebius	}
1642240233Sglebius
1643240233Sglebius	if (s->src.state == PF_TCPS_PROXY_DST) {
1644223637Sbz		/* XXX wire key the right one? */
1645240233Sglebius		pf_send_tcp(NULL, s->rule.ptr, s->key[PF_SK_WIRE]->af,
1646240233Sglebius		    &s->key[PF_SK_WIRE]->addr[1],
1647240233Sglebius		    &s->key[PF_SK_WIRE]->addr[0],
1648240233Sglebius		    s->key[PF_SK_WIRE]->port[1],
1649240233Sglebius		    s->key[PF_SK_WIRE]->port[0],
1650240233Sglebius		    s->src.seqhi, s->src.seqlo + 1,
1651240233Sglebius		    TH_RST|TH_ACK, 0, 0, 0, 1, s->tag, NULL);
1652171168Smlaier	}
1653240233Sglebius
1654240233Sglebius	LIST_REMOVE(s, entry);
1655240233Sglebius	pf_src_tree_remove_state(s);
1656240233Sglebius
1657223637Sbz	if (pfsync_delete_state_ptr != NULL)
1658240233Sglebius		pfsync_delete_state_ptr(s);
1659240233Sglebius
1660263029Sglebius	STATE_DEC_COUNTERS(s);
1661243944Sglebius
1662243941Sglebius	s->timeout = PFTM_UNLINKED;
1663243941Sglebius
1664243941Sglebius	PF_HASHROW_UNLOCK(ih);
1665243941Sglebius
1666240233Sglebius	pf_detach_state(s);
1667240233Sglebius	refcount_release(&s->refs);
1668240233Sglebius
1669240233Sglebius	return (pf_release_state(s));
1670171168Smlaier}
1671171168Smlaier
1672171168Smlaiervoid
1673171168Smlaierpf_free_state(struct pf_state *cur)
1674171168Smlaier{
1675223637Sbz
1676240233Sglebius	KASSERT(cur->refs == 0, ("%s: %p has refs", __func__, cur));
1677240233Sglebius	KASSERT(cur->timeout == PFTM_UNLINKED, ("%s: timeout %u", __func__,
1678240233Sglebius	    cur->timeout));
1679243944Sglebius
1680145836Smlaier	pf_normalize_tcp_cleanup(cur);
1681240233Sglebius	uma_zfree(V_pf_state_z, cur);
1682270574Sglebius	counter_u64_add(V_pf_status.fcounters[FCNT_STATE_REMOVALS], 1);
1683145836Smlaier}
1684145836Smlaier
1685240233Sglebius/*
1686240233Sglebius * Called only from pf_purge_thread(), thus serialized.
1687240233Sglebius */
1688241039Sglebiusstatic u_int
1689241039Sglebiuspf_purge_expired_states(u_int i, int maxcheck)
1690130613Smlaier{
1691240233Sglebius	struct pf_idhash *ih;
1692240233Sglebius	struct pf_state *s;
1693240233Sglebius
1694240233Sglebius	V_pf_status.states = uma_zone_get_cur(V_pf_state_z);
1695240233Sglebius
1696240233Sglebius	/*
1697240233Sglebius	 * Go through hash and unlink states that expire now.
1698240233Sglebius	 */
1699240233Sglebius	while (maxcheck > 0) {
1700240233Sglebius
1701240233Sglebius		ih = &V_pf_idhash[i];
1702240233Sglebiusrelock:
1703240233Sglebius		PF_HASHROW_LOCK(ih);
1704240233Sglebius		LIST_FOREACH(s, &ih->states, entry) {
1705240233Sglebius			if (pf_state_expires(s) <= time_uptime) {
1706240233Sglebius				V_pf_status.states -=
1707240233Sglebius				    pf_unlink_state(s, PF_ENTER_LOCKED);
1708240233Sglebius				goto relock;
1709171168Smlaier			}
1710240233Sglebius			s->rule.ptr->rule_flag |= PFRULE_REFS;
1711240233Sglebius			if (s->nat_rule.ptr != NULL)
1712240233Sglebius				s->nat_rule.ptr->rule_flag |= PFRULE_REFS;
1713240233Sglebius			if (s->anchor.ptr != NULL)
1714240233Sglebius				s->anchor.ptr->rule_flag |= PFRULE_REFS;
1715240233Sglebius			s->kif->pfik_flags |= PFI_IFLAG_REFS;
1716240233Sglebius			if (s->rt_kif)
1717240233Sglebius				s->rt_kif->pfik_flags |= PFI_IFLAG_REFS;
1718171168Smlaier		}
1719240233Sglebius		PF_HASHROW_UNLOCK(ih);
1720241039Sglebius
1721241039Sglebius		/* Return when we hit end of hash. */
1722273736Shselasky		if (++i > pf_hashmask) {
1723241039Sglebius			V_pf_status.states = uma_zone_get_cur(V_pf_state_z);
1724241039Sglebius			return (0);
1725241039Sglebius		}
1726241039Sglebius
1727240233Sglebius		maxcheck--;
1728126258Smlaier	}
1729171168Smlaier
1730240233Sglebius	V_pf_status.states = uma_zone_get_cur(V_pf_state_z);
1731196372Smlaier
1732241039Sglebius	return (i);
1733126258Smlaier}
1734126258Smlaier
1735240233Sglebiusstatic void
1736240233Sglebiuspf_purge_unlinked_rules()
1737126258Smlaier{
1738240233Sglebius	struct pf_rulequeue tmpq;
1739240233Sglebius	struct pf_rule *r, *r1;
1740126258Smlaier
1741240233Sglebius	/*
1742240811Sglebius	 * If we have overloading task pending, then we'd
1743240811Sglebius	 * better skip purging this time. There is a tiny
1744240811Sglebius	 * probability that overloading task references
1745240811Sglebius	 * an already unlinked rule.
1746240811Sglebius	 */
1747240811Sglebius	PF_OVERLOADQ_LOCK();
1748240811Sglebius	if (!SLIST_EMPTY(&V_pf_overloadqueue)) {
1749240811Sglebius		PF_OVERLOADQ_UNLOCK();
1750240811Sglebius		return;
1751240811Sglebius	}
1752240811Sglebius	PF_OVERLOADQ_UNLOCK();
1753240811Sglebius
1754240811Sglebius	/*
1755240233Sglebius	 * Do naive mark-and-sweep garbage collecting of old rules.
1756240233Sglebius	 * Reference flag is raised by pf_purge_expired_states()
1757240233Sglebius	 * and pf_purge_expired_src_nodes().
1758240233Sglebius	 *
1759240233Sglebius	 * To avoid LOR between PF_UNLNKDRULES_LOCK/PF_RULES_WLOCK,
1760240233Sglebius	 * use a temporary queue.
1761240233Sglebius	 */
1762240233Sglebius	TAILQ_INIT(&tmpq);
1763240233Sglebius	PF_UNLNKDRULES_LOCK();
1764240233Sglebius	TAILQ_FOREACH_SAFE(r, &V_pf_unlinked_rules, entries, r1) {
1765240233Sglebius		if (!(r->rule_flag & PFRULE_REFS)) {
1766240233Sglebius			TAILQ_REMOVE(&V_pf_unlinked_rules, r, entries);
1767240233Sglebius			TAILQ_INSERT_TAIL(&tmpq, r, entries);
1768240233Sglebius		} else
1769240233Sglebius			r->rule_flag &= ~PFRULE_REFS;
1770240233Sglebius	}
1771240233Sglebius	PF_UNLNKDRULES_UNLOCK();
1772126258Smlaier
1773240233Sglebius	if (!TAILQ_EMPTY(&tmpq)) {
1774240233Sglebius		PF_RULES_WLOCK();
1775240233Sglebius		TAILQ_FOREACH_SAFE(r, &tmpq, entries, r1) {
1776240233Sglebius			TAILQ_REMOVE(&tmpq, r, entries);
1777240233Sglebius			pf_free_rule(r);
1778240233Sglebius		}
1779240233Sglebius		PF_RULES_WUNLOCK();
1780240233Sglebius	}
1781126258Smlaier}
1782126258Smlaier
1783126258Smlaiervoid
1784126258Smlaierpf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
1785126258Smlaier{
1786126258Smlaier	switch (af) {
1787126258Smlaier#ifdef INET
1788126258Smlaier	case AF_INET: {
1789126258Smlaier		u_int32_t a = ntohl(addr->addr32[0]);
1790126258Smlaier		printf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255,
1791126258Smlaier		    (a>>8)&255, a&255);
1792126258Smlaier		if (p) {
1793126258Smlaier			p = ntohs(p);
1794126258Smlaier			printf(":%u", p);
1795126258Smlaier		}
1796126258Smlaier		break;
1797126258Smlaier	}
1798126258Smlaier#endif /* INET */
1799126258Smlaier#ifdef INET6
1800126258Smlaier	case AF_INET6: {
1801126258Smlaier		u_int16_t b;
1802223637Sbz		u_int8_t i, curstart, curend, maxstart, maxend;
1803223637Sbz		curstart = curend = maxstart = maxend = 255;
1804126258Smlaier		for (i = 0; i < 8; i++) {
1805126258Smlaier			if (!addr->addr16[i]) {
1806126258Smlaier				if (curstart == 255)
1807126258Smlaier					curstart = i;
1808223637Sbz				curend = i;
1809126258Smlaier			} else {
1810223637Sbz				if ((curend - curstart) >
1811223637Sbz				    (maxend - maxstart)) {
1812223637Sbz					maxstart = curstart;
1813223637Sbz					maxend = curend;
1814126258Smlaier				}
1815223637Sbz				curstart = curend = 255;
1816126258Smlaier			}
1817126258Smlaier		}
1818223637Sbz		if ((curend - curstart) >
1819223637Sbz		    (maxend - maxstart)) {
1820223637Sbz			maxstart = curstart;
1821223637Sbz			maxend = curend;
1822223637Sbz		}
1823126258Smlaier		for (i = 0; i < 8; i++) {
1824126258Smlaier			if (i >= maxstart && i <= maxend) {
1825223637Sbz				if (i == 0)
1826223637Sbz					printf(":");
1827223637Sbz				if (i == maxend)
1828223637Sbz					printf(":");
1829126258Smlaier			} else {
1830126258Smlaier				b = ntohs(addr->addr16[i]);
1831126258Smlaier				printf("%x", b);
1832126258Smlaier				if (i < 7)
1833126258Smlaier					printf(":");
1834126258Smlaier			}
1835126258Smlaier		}
1836126258Smlaier		if (p) {
1837126258Smlaier			p = ntohs(p);
1838126258Smlaier			printf("[%u]", p);
1839126258Smlaier		}
1840126258Smlaier		break;
1841126258Smlaier	}
1842126258Smlaier#endif /* INET6 */
1843126258Smlaier	}
1844126258Smlaier}
1845126258Smlaier
1846126258Smlaiervoid
1847126258Smlaierpf_print_state(struct pf_state *s)
1848126258Smlaier{
1849223637Sbz	pf_print_state_parts(s, NULL, NULL);
1850223637Sbz}
1851223637Sbz
1852240233Sglebiusstatic void
1853223637Sbzpf_print_state_parts(struct pf_state *s,
1854223637Sbz    struct pf_state_key *skwp, struct pf_state_key *sksp)
1855223637Sbz{
1856223637Sbz	struct pf_state_key *skw, *sks;
1857223637Sbz	u_int8_t proto, dir;
1858223637Sbz
1859223637Sbz	/* Do our best to fill these, but they're skipped if NULL */
1860223637Sbz	skw = skwp ? skwp : (s ? s->key[PF_SK_WIRE] : NULL);
1861223637Sbz	sks = sksp ? sksp : (s ? s->key[PF_SK_STACK] : NULL);
1862223637Sbz	proto = skw ? skw->proto : (sks ? sks->proto : 0);
1863223637Sbz	dir = s ? s->direction : 0;
1864223637Sbz
1865223637Sbz	switch (proto) {
1866223637Sbz	case IPPROTO_IPV4:
1867223637Sbz		printf("IPv4");
1868223637Sbz		break;
1869223637Sbz	case IPPROTO_IPV6:
1870223637Sbz		printf("IPv6");
1871223637Sbz		break;
1872126258Smlaier	case IPPROTO_TCP:
1873223637Sbz		printf("TCP");
1874126258Smlaier		break;
1875126258Smlaier	case IPPROTO_UDP:
1876223637Sbz		printf("UDP");
1877126258Smlaier		break;
1878126258Smlaier	case IPPROTO_ICMP:
1879223637Sbz		printf("ICMP");
1880126258Smlaier		break;
1881126258Smlaier	case IPPROTO_ICMPV6:
1882223637Sbz		printf("ICMPv6");
1883126258Smlaier		break;
1884126258Smlaier	default:
1885223637Sbz		printf("%u", skw->proto);
1886126258Smlaier		break;
1887126258Smlaier	}
1888223637Sbz	switch (dir) {
1889223637Sbz	case PF_IN:
1890223637Sbz		printf(" in");
1891223637Sbz		break;
1892223637Sbz	case PF_OUT:
1893223637Sbz		printf(" out");
1894223637Sbz		break;
1895223637Sbz	}
1896223637Sbz	if (skw) {
1897223637Sbz		printf(" wire: ");
1898223637Sbz		pf_print_host(&skw->addr[0], skw->port[0], skw->af);
1899223637Sbz		printf(" ");
1900223637Sbz		pf_print_host(&skw->addr[1], skw->port[1], skw->af);
1901223637Sbz	}
1902223637Sbz	if (sks) {
1903223637Sbz		printf(" stack: ");
1904223637Sbz		if (sks != skw) {
1905223637Sbz			pf_print_host(&sks->addr[0], sks->port[0], sks->af);
1906223637Sbz			printf(" ");
1907223637Sbz			pf_print_host(&sks->addr[1], sks->port[1], sks->af);
1908223637Sbz		} else
1909223637Sbz			printf("-");
1910223637Sbz	}
1911223637Sbz	if (s) {
1912223637Sbz		if (proto == IPPROTO_TCP) {
1913223637Sbz			printf(" [lo=%u high=%u win=%u modulator=%u",
1914223637Sbz			    s->src.seqlo, s->src.seqhi,
1915223637Sbz			    s->src.max_win, s->src.seqdiff);
1916223637Sbz			if (s->src.wscale && s->dst.wscale)
1917223637Sbz				printf(" wscale=%u",
1918223637Sbz				    s->src.wscale & PF_WSCALE_MASK);
1919223637Sbz			printf("]");
1920223637Sbz			printf(" [lo=%u high=%u win=%u modulator=%u",
1921223637Sbz			    s->dst.seqlo, s->dst.seqhi,
1922223637Sbz			    s->dst.max_win, s->dst.seqdiff);
1923223637Sbz			if (s->src.wscale && s->dst.wscale)
1924223637Sbz				printf(" wscale=%u",
1925223637Sbz				s->dst.wscale & PF_WSCALE_MASK);
1926223637Sbz			printf("]");
1927223637Sbz		}
1928223637Sbz		printf(" %u:%u", s->src.state, s->dst.state);
1929223637Sbz	}
1930126258Smlaier}
1931126258Smlaier
1932126258Smlaiervoid
1933126258Smlaierpf_print_flags(u_int8_t f)
1934126258Smlaier{
1935126258Smlaier	if (f)
1936126258Smlaier		printf(" ");
1937126258Smlaier	if (f & TH_FIN)
1938126258Smlaier		printf("F");
1939126258Smlaier	if (f & TH_SYN)
1940126258Smlaier		printf("S");
1941126258Smlaier	if (f & TH_RST)
1942126258Smlaier		printf("R");
1943126258Smlaier	if (f & TH_PUSH)
1944126258Smlaier		printf("P");
1945126258Smlaier	if (f & TH_ACK)
1946126258Smlaier		printf("A");
1947126258Smlaier	if (f & TH_URG)
1948126258Smlaier		printf("U");
1949126258Smlaier	if (f & TH_ECE)
1950126258Smlaier		printf("E");
1951126258Smlaier	if (f & TH_CWR)
1952126258Smlaier		printf("W");
1953126258Smlaier}
1954126258Smlaier
1955126258Smlaier#define	PF_SET_SKIP_STEPS(i)					\
1956126258Smlaier	do {							\
1957126258Smlaier		while (head[i] != cur) {			\
1958126258Smlaier			head[i]->skip[i].ptr = cur;		\
1959126258Smlaier			head[i] = TAILQ_NEXT(head[i], entries);	\
1960126258Smlaier		}						\
1961126258Smlaier	} while (0)
1962126258Smlaier
1963126258Smlaiervoid
1964126258Smlaierpf_calc_skip_steps(struct pf_rulequeue *rules)
1965126258Smlaier{
1966126258Smlaier	struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT];
1967126258Smlaier	int i;
1968126258Smlaier
1969126258Smlaier	cur = TAILQ_FIRST(rules);
1970126258Smlaier	prev = cur;
1971126258Smlaier	for (i = 0; i < PF_SKIP_COUNT; ++i)
1972126258Smlaier		head[i] = cur;
1973126258Smlaier	while (cur != NULL) {
1974126258Smlaier
1975130613Smlaier		if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
1976126258Smlaier			PF_SET_SKIP_STEPS(PF_SKIP_IFP);
1977126258Smlaier		if (cur->direction != prev->direction)
1978126258Smlaier			PF_SET_SKIP_STEPS(PF_SKIP_DIR);
1979126258Smlaier		if (cur->af != prev->af)
1980126258Smlaier			PF_SET_SKIP_STEPS(PF_SKIP_AF);
1981126258Smlaier		if (cur->proto != prev->proto)
1982126258Smlaier			PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
1983145836Smlaier		if (cur->src.neg != prev->src.neg ||
1984126258Smlaier		    pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr))
1985126258Smlaier			PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
1986126258Smlaier		if (cur->src.port[0] != prev->src.port[0] ||
1987126258Smlaier		    cur->src.port[1] != prev->src.port[1] ||
1988126258Smlaier		    cur->src.port_op != prev->src.port_op)
1989126258Smlaier			PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
1990145836Smlaier		if (cur->dst.neg != prev->dst.neg ||
1991126258Smlaier		    pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr))
1992126258Smlaier			PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
1993126258Smlaier		if (cur->dst.port[0] != prev->dst.port[0] ||
1994126258Smlaier		    cur->dst.port[1] != prev->dst.port[1] ||
1995126258Smlaier		    cur->dst.port_op != prev->dst.port_op)
1996126258Smlaier			PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
1997126258Smlaier
1998126258Smlaier		prev = cur;
1999126258Smlaier		cur = TAILQ_NEXT(cur, entries);
2000126258Smlaier	}
2001126258Smlaier	for (i = 0; i < PF_SKIP_COUNT; ++i)
2002126258Smlaier		PF_SET_SKIP_STEPS(i);
2003126258Smlaier}
2004126258Smlaier
2005240233Sglebiusstatic int
2006126258Smlaierpf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
2007126258Smlaier{
2008126258Smlaier	if (aw1->type != aw2->type)
2009126258Smlaier		return (1);
2010126258Smlaier	switch (aw1->type) {
2011126258Smlaier	case PF_ADDR_ADDRMASK:
2012223637Sbz	case PF_ADDR_RANGE:
2013303850Skp		if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, AF_INET6))
2014126258Smlaier			return (1);
2015303850Skp		if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, AF_INET6))
2016126258Smlaier			return (1);
2017126258Smlaier		return (0);
2018126258Smlaier	case PF_ADDR_DYNIFTL:
2019130613Smlaier		return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt);
2020126258Smlaier	case PF_ADDR_NOROUTE:
2021171168Smlaier	case PF_ADDR_URPFFAILED:
2022126258Smlaier		return (0);
2023126258Smlaier	case PF_ADDR_TABLE:
2024126258Smlaier		return (aw1->p.tbl != aw2->p.tbl);
2025126258Smlaier	default:
2026126258Smlaier		printf("invalid address type: %d\n", aw1->type);
2027126258Smlaier		return (1);
2028126258Smlaier	}
2029126258Smlaier}
2030126258Smlaier
2031289703Skp/**
2032289703Skp * Checksum updates are a little complicated because the checksum in the TCP/UDP
2033289703Skp * header isn't always a full checksum. In some cases (i.e. output) it's a
2034289703Skp * pseudo-header checksum, which is a partial checksum over src/dst IP
2035289703Skp * addresses, protocol number and length.
2036289703Skp *
2037289703Skp * That means we have the following cases:
2038289703Skp *  * Input or forwarding: we don't have TSO, the checksum fields are full
2039289703Skp *  	checksums, we need to update the checksum whenever we change anything.
2040289703Skp *  * Output (i.e. the checksum is a pseudo-header checksum):
2041289703Skp *  	x The field being updated is src/dst address or affects the length of
2042289703Skp *  	the packet. We need to update the pseudo-header checksum (note that this
2043289703Skp *  	checksum is not ones' complement).
2044289703Skp *  	x Some other field is being modified (e.g. src/dst port numbers): We
2045289703Skp *  	don't have to update anything.
2046289703Skp **/
2047126258Smlaieru_int16_t
2048126258Smlaierpf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
2049126258Smlaier{
2050126258Smlaier	u_int32_t	l;
2051126258Smlaier
2052126258Smlaier	if (udp && !cksum)
2053126258Smlaier		return (0x0000);
2054126258Smlaier	l = cksum + old - new;
2055126258Smlaier	l = (l >> 16) + (l & 65535);
2056126258Smlaier	l = l & 65535;
2057126258Smlaier	if (udp && !l)
2058126258Smlaier		return (0xFFFF);
2059126258Smlaier	return (l);
2060126258Smlaier}
2061126258Smlaier
2062289703Skpu_int16_t
2063289703Skppf_proto_cksum_fixup(struct mbuf *m, u_int16_t cksum, u_int16_t old,
2064289703Skp        u_int16_t new, u_int8_t udp)
2065289703Skp{
2066289703Skp	if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6))
2067289703Skp		return (cksum);
2068289703Skp
2069289703Skp	return (pf_cksum_fixup(cksum, old, new, udp));
2070289703Skp}
2071289703Skp
2072240233Sglebiusstatic void
2073289703Skppf_change_ap(struct mbuf *m, struct pf_addr *a, u_int16_t *p, u_int16_t *ic,
2074289703Skp        u_int16_t *pc, struct pf_addr *an, u_int16_t pn, u_int8_t u,
2075289703Skp        sa_family_t af)
2076126258Smlaier{
2077126258Smlaier	struct pf_addr	ao;
2078126258Smlaier	u_int16_t	po = *p;
2079126258Smlaier
2080126258Smlaier	PF_ACPY(&ao, a, af);
2081126258Smlaier	PF_ACPY(a, an, af);
2082126258Smlaier
2083289703Skp	if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6))
2084289703Skp		*pc = ~*pc;
2085289703Skp
2086126258Smlaier	*p = pn;
2087126258Smlaier
2088126258Smlaier	switch (af) {
2089126258Smlaier#ifdef INET
2090126258Smlaier	case AF_INET:
2091126258Smlaier		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
2092126258Smlaier		    ao.addr16[0], an->addr16[0], 0),
2093126258Smlaier		    ao.addr16[1], an->addr16[1], 0);
2094126258Smlaier		*p = pn;
2095289703Skp
2096289703Skp		*pc = pf_cksum_fixup(pf_cksum_fixup(*pc,
2097126258Smlaier		    ao.addr16[0], an->addr16[0], u),
2098289703Skp		    ao.addr16[1], an->addr16[1], u);
2099289703Skp
2100289703Skp		*pc = pf_proto_cksum_fixup(m, *pc, po, pn, u);
2101126258Smlaier		break;
2102126258Smlaier#endif /* INET */
2103126258Smlaier#ifdef INET6
2104126258Smlaier	case AF_INET6:
2105126258Smlaier		*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2106126258Smlaier		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2107289703Skp		    pf_cksum_fixup(pf_cksum_fixup(*pc,
2108126258Smlaier		    ao.addr16[0], an->addr16[0], u),
2109126258Smlaier		    ao.addr16[1], an->addr16[1], u),
2110126258Smlaier		    ao.addr16[2], an->addr16[2], u),
2111126258Smlaier		    ao.addr16[3], an->addr16[3], u),
2112126258Smlaier		    ao.addr16[4], an->addr16[4], u),
2113126258Smlaier		    ao.addr16[5], an->addr16[5], u),
2114126258Smlaier		    ao.addr16[6], an->addr16[6], u),
2115289703Skp		    ao.addr16[7], an->addr16[7], u);
2116289703Skp
2117289703Skp		*pc = pf_proto_cksum_fixup(m, *pc, po, pn, u);
2118126258Smlaier		break;
2119126258Smlaier#endif /* INET6 */
2120126258Smlaier	}
2121289703Skp
2122289703Skp	if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA |
2123289703Skp	    CSUM_DELAY_DATA_IPV6)) {
2124289703Skp		*pc = ~*pc;
2125289703Skp		if (! *pc)
2126289703Skp			*pc = 0xffff;
2127289703Skp	}
2128126258Smlaier}
2129126258Smlaier
2130126258Smlaier/* Changes a u_int32_t.  Uses a void * so there are no align restrictions */
2131126258Smlaiervoid
2132126258Smlaierpf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
2133126258Smlaier{
2134126258Smlaier	u_int32_t	ao;
2135126258Smlaier
2136126258Smlaier	memcpy(&ao, a, sizeof(ao));
2137126258Smlaier	memcpy(a, &an, sizeof(u_int32_t));
2138126258Smlaier	*c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
2139126258Smlaier	    ao % 65536, an % 65536, u);
2140126258Smlaier}
2141126258Smlaier
2142289703Skpvoid
2143289703Skppf_change_proto_a(struct mbuf *m, void *a, u_int16_t *c, u_int32_t an, u_int8_t udp)
2144289703Skp{
2145289703Skp	u_int32_t	ao;
2146289703Skp
2147289703Skp	memcpy(&ao, a, sizeof(ao));
2148289703Skp	memcpy(a, &an, sizeof(u_int32_t));
2149289703Skp
2150289703Skp	*c = pf_proto_cksum_fixup(m,
2151289703Skp	    pf_proto_cksum_fixup(m, *c, ao / 65536, an / 65536, udp),
2152289703Skp	    ao % 65536, an % 65536, udp);
2153289703Skp}
2154289703Skp
2155126258Smlaier#ifdef INET6
2156240233Sglebiusstatic void
2157126258Smlaierpf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
2158126258Smlaier{
2159126258Smlaier	struct pf_addr	ao;
2160126258Smlaier
2161126258Smlaier	PF_ACPY(&ao, a, AF_INET6);
2162126258Smlaier	PF_ACPY(a, an, AF_INET6);
2163126258Smlaier
2164126258Smlaier	*c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2165126258Smlaier	    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2166126258Smlaier	    pf_cksum_fixup(pf_cksum_fixup(*c,
2167126258Smlaier	    ao.addr16[0], an->addr16[0], u),
2168126258Smlaier	    ao.addr16[1], an->addr16[1], u),
2169126258Smlaier	    ao.addr16[2], an->addr16[2], u),
2170126258Smlaier	    ao.addr16[3], an->addr16[3], u),
2171126258Smlaier	    ao.addr16[4], an->addr16[4], u),
2172126258Smlaier	    ao.addr16[5], an->addr16[5], u),
2173126258Smlaier	    ao.addr16[6], an->addr16[6], u),
2174126258Smlaier	    ao.addr16[7], an->addr16[7], u);
2175126258Smlaier}
2176126258Smlaier#endif /* INET6 */
2177126258Smlaier
2178240233Sglebiusstatic void
2179126258Smlaierpf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
2180126258Smlaier    struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
2181126258Smlaier    u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
2182126258Smlaier{
2183126258Smlaier	struct pf_addr	oia, ooa;
2184126258Smlaier
2185126258Smlaier	PF_ACPY(&oia, ia, af);
2186223637Sbz	if (oa)
2187223637Sbz		PF_ACPY(&ooa, oa, af);
2188126258Smlaier
2189126258Smlaier	/* Change inner protocol port, fix inner protocol checksum. */
2190126258Smlaier	if (ip != NULL) {
2191126258Smlaier		u_int16_t	oip = *ip;
2192223637Sbz		u_int32_t	opc;
2193126258Smlaier
2194126258Smlaier		if (pc != NULL)
2195126258Smlaier			opc = *pc;
2196126258Smlaier		*ip = np;
2197126258Smlaier		if (pc != NULL)
2198126258Smlaier			*pc = pf_cksum_fixup(*pc, oip, *ip, u);
2199126258Smlaier		*ic = pf_cksum_fixup(*ic, oip, *ip, 0);
2200126258Smlaier		if (pc != NULL)
2201126258Smlaier			*ic = pf_cksum_fixup(*ic, opc, *pc, 0);
2202126258Smlaier	}
2203126258Smlaier	/* Change inner ip address, fix inner ip and icmp checksums. */
2204126258Smlaier	PF_ACPY(ia, na, af);
2205126258Smlaier	switch (af) {
2206126258Smlaier#ifdef INET
2207126258Smlaier	case AF_INET: {
2208126258Smlaier		u_int32_t	 oh2c = *h2c;
2209126258Smlaier
2210126258Smlaier		*h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
2211126258Smlaier		    oia.addr16[0], ia->addr16[0], 0),
2212126258Smlaier		    oia.addr16[1], ia->addr16[1], 0);
2213126258Smlaier		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
2214126258Smlaier		    oia.addr16[0], ia->addr16[0], 0),
2215126258Smlaier		    oia.addr16[1], ia->addr16[1], 0);
2216126258Smlaier		*ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
2217126258Smlaier		break;
2218126258Smlaier	}
2219126258Smlaier#endif /* INET */
2220126258Smlaier#ifdef INET6
2221126258Smlaier	case AF_INET6:
2222126258Smlaier		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2223126258Smlaier		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2224126258Smlaier		    pf_cksum_fixup(pf_cksum_fixup(*ic,
2225126258Smlaier		    oia.addr16[0], ia->addr16[0], u),
2226126258Smlaier		    oia.addr16[1], ia->addr16[1], u),
2227126258Smlaier		    oia.addr16[2], ia->addr16[2], u),
2228126258Smlaier		    oia.addr16[3], ia->addr16[3], u),
2229126258Smlaier		    oia.addr16[4], ia->addr16[4], u),
2230126258Smlaier		    oia.addr16[5], ia->addr16[5], u),
2231126258Smlaier		    oia.addr16[6], ia->addr16[6], u),
2232126258Smlaier		    oia.addr16[7], ia->addr16[7], u);
2233126258Smlaier		break;
2234126258Smlaier#endif /* INET6 */
2235126258Smlaier	}
2236223637Sbz	/* Outer ip address, fix outer ip or icmpv6 checksum, if necessary. */
2237223637Sbz	if (oa) {
2238223637Sbz		PF_ACPY(oa, na, af);
2239223637Sbz		switch (af) {
2240126258Smlaier#ifdef INET
2241223637Sbz		case AF_INET:
2242223637Sbz			*hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
2243223637Sbz			    ooa.addr16[0], oa->addr16[0], 0),
2244223637Sbz			    ooa.addr16[1], oa->addr16[1], 0);
2245223637Sbz			break;
2246126258Smlaier#endif /* INET */
2247126258Smlaier#ifdef INET6
2248223637Sbz		case AF_INET6:
2249223637Sbz			*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2250223637Sbz			    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2251223637Sbz			    pf_cksum_fixup(pf_cksum_fixup(*ic,
2252223637Sbz			    ooa.addr16[0], oa->addr16[0], u),
2253223637Sbz			    ooa.addr16[1], oa->addr16[1], u),
2254223637Sbz			    ooa.addr16[2], oa->addr16[2], u),
2255223637Sbz			    ooa.addr16[3], oa->addr16[3], u),
2256223637Sbz			    ooa.addr16[4], oa->addr16[4], u),
2257223637Sbz			    ooa.addr16[5], oa->addr16[5], u),
2258223637Sbz			    ooa.addr16[6], oa->addr16[6], u),
2259223637Sbz			    ooa.addr16[7], oa->addr16[7], u);
2260223637Sbz			break;
2261126258Smlaier#endif /* INET6 */
2262223637Sbz		}
2263126258Smlaier	}
2264126258Smlaier}
2265126258Smlaier
2266171168Smlaier
2267171168Smlaier/*
2268171168Smlaier * Need to modulate the sequence numbers in the TCP SACK option
2269171168Smlaier * (credits to Krzysztof Pfaff for report and patch)
2270171168Smlaier */
2271240233Sglebiusstatic int
2272171168Smlaierpf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd,
2273171168Smlaier    struct tcphdr *th, struct pf_state_peer *dst)
2274171168Smlaier{
2275171168Smlaier	int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen;
2276171168Smlaier	u_int8_t opts[TCP_MAXOLEN], *opt = opts;
2277171168Smlaier	int copyback = 0, i, olen;
2278171168Smlaier	struct sackblk sack;
2279171168Smlaier
2280223637Sbz#define	TCPOLEN_SACKLEN	(TCPOLEN_SACK + 2)
2281171168Smlaier	if (hlen < TCPOLEN_SACKLEN ||
2282171168Smlaier	    !pf_pull_hdr(m, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af))
2283171168Smlaier		return 0;
2284171168Smlaier
2285171168Smlaier	while (hlen >= TCPOLEN_SACKLEN) {
2286171168Smlaier		olen = opt[1];
2287171168Smlaier		switch (*opt) {
2288171168Smlaier		case TCPOPT_EOL:	/* FALLTHROUGH */
2289171168Smlaier		case TCPOPT_NOP:
2290171168Smlaier			opt++;
2291171168Smlaier			hlen--;
2292171168Smlaier			break;
2293171168Smlaier		case TCPOPT_SACK:
2294171168Smlaier			if (olen > hlen)
2295171168Smlaier				olen = hlen;
2296171168Smlaier			if (olen >= TCPOLEN_SACKLEN) {
2297171168Smlaier				for (i = 2; i + TCPOLEN_SACK <= olen;
2298171168Smlaier				    i += TCPOLEN_SACK) {
2299171168Smlaier					memcpy(&sack, &opt[i], sizeof(sack));
2300289703Skp					pf_change_proto_a(m, &sack.start, &th->th_sum,
2301289703Skp					    htonl(ntohl(sack.start) - dst->seqdiff), 0);
2302289703Skp					pf_change_proto_a(m, &sack.end, &th->th_sum,
2303289703Skp					    htonl(ntohl(sack.end) - dst->seqdiff), 0);
2304171168Smlaier					memcpy(&opt[i], &sack, sizeof(sack));
2305171168Smlaier				}
2306171168Smlaier				copyback = 1;
2307171168Smlaier			}
2308171168Smlaier			/* FALLTHROUGH */
2309171168Smlaier		default:
2310171168Smlaier			if (olen < 2)
2311171168Smlaier				olen = 2;
2312171168Smlaier			hlen -= olen;
2313171168Smlaier			opt += olen;
2314171168Smlaier		}
2315171168Smlaier	}
2316171168Smlaier
2317171168Smlaier	if (copyback)
2318171168Smlaier		m_copyback(m, off + sizeof(*th), thoptlen, (caddr_t)opts);
2319171168Smlaier	return (copyback);
2320171168Smlaier}
2321171168Smlaier
2322240233Sglebiusstatic void
2323162238Scsjppf_send_tcp(struct mbuf *replyto, const struct pf_rule *r, sa_family_t af,
2324126258Smlaier    const struct pf_addr *saddr, const struct pf_addr *daddr,
2325126258Smlaier    u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
2326145836Smlaier    u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag,
2327240233Sglebius    u_int16_t rtag, struct ifnet *ifp)
2328126258Smlaier{
2329240233Sglebius	struct pf_send_entry *pfse;
2330126258Smlaier	struct mbuf	*m;
2331171168Smlaier	int		 len, tlen;
2332126258Smlaier#ifdef INET
2333240233Sglebius	struct ip	*h = NULL;
2334126258Smlaier#endif /* INET */
2335126258Smlaier#ifdef INET6
2336240233Sglebius	struct ip6_hdr	*h6 = NULL;
2337126258Smlaier#endif /* INET6 */
2338171168Smlaier	struct tcphdr	*th;
2339171168Smlaier	char		*opt;
2340223637Sbz	struct pf_mtag  *pf_mtag;
2341126258Smlaier
2342171168Smlaier	len = 0;
2343171168Smlaier	th = NULL;
2344171168Smlaier
2345126258Smlaier	/* maximum segment size tcp option */
2346126258Smlaier	tlen = sizeof(struct tcphdr);
2347126258Smlaier	if (mss)
2348126258Smlaier		tlen += 4;
2349126258Smlaier
2350126258Smlaier	switch (af) {
2351126258Smlaier#ifdef INET
2352126258Smlaier	case AF_INET:
2353126258Smlaier		len = sizeof(struct ip) + tlen;
2354126258Smlaier		break;
2355126258Smlaier#endif /* INET */
2356126258Smlaier#ifdef INET6
2357126258Smlaier	case AF_INET6:
2358126258Smlaier		len = sizeof(struct ip6_hdr) + tlen;
2359126258Smlaier		break;
2360126258Smlaier#endif /* INET6 */
2361240233Sglebius	default:
2362240233Sglebius		panic("%s: unsupported af %d", __func__, af);
2363126258Smlaier	}
2364126258Smlaier
2365240233Sglebius	/* Allocate outgoing queue entry, mbuf and mbuf tag. */
2366240233Sglebius	pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT);
2367240233Sglebius	if (pfse == NULL)
2368132280Smlaier		return;
2369248324Sglebius	m = m_gethdr(M_NOWAIT, MT_DATA);
2370240233Sglebius	if (m == NULL) {
2371240233Sglebius		free(pfse, M_PFTEMP);
2372240233Sglebius		return;
2373240233Sglebius	}
2374162238Scsjp#ifdef MAC
2375223637Sbz	mac_netinet_firewall_send(m);
2376162238Scsjp#endif
2377171168Smlaier	if ((pf_mtag = pf_get_mtag(m)) == NULL) {
2378240233Sglebius		free(pfse, M_PFTEMP);
2379171168Smlaier		m_freem(m);
2380171168Smlaier		return;
2381171168Smlaier	}
2382171168Smlaier	if (tag)
2383145836Smlaier		m->m_flags |= M_SKIP_FIREWALL;
2384223637Sbz	pf_mtag->tag = rtag;
2385145836Smlaier
2386171168Smlaier	if (r != NULL && r->rtableid >= 0)
2387178888Sjulian		M_SETFIB(m, r->rtableid);
2388223637Sbz
2389126258Smlaier#ifdef ALTQ
2390126258Smlaier	if (r != NULL && r->qid) {
2391171168Smlaier		pf_mtag->qid = r->qid;
2392223637Sbz
2393171168Smlaier		/* add hints for ecn */
2394171168Smlaier		pf_mtag->hdr = mtod(m, struct ip *);
2395126258Smlaier	}
2396145836Smlaier#endif /* ALTQ */
2397126258Smlaier	m->m_data += max_linkhdr;
2398126258Smlaier	m->m_pkthdr.len = m->m_len = len;
2399126258Smlaier	m->m_pkthdr.rcvif = NULL;
2400126258Smlaier	bzero(m->m_data, len);
2401126258Smlaier	switch (af) {
2402126258Smlaier#ifdef INET
2403126258Smlaier	case AF_INET:
2404126258Smlaier		h = mtod(m, struct ip *);
2405126258Smlaier
2406126258Smlaier		/* IP header fields included in the TCP checksum */
2407126258Smlaier		h->ip_p = IPPROTO_TCP;
2408126258Smlaier		h->ip_len = htons(tlen);
2409126258Smlaier		h->ip_src.s_addr = saddr->v4.s_addr;
2410126258Smlaier		h->ip_dst.s_addr = daddr->v4.s_addr;
2411126258Smlaier
2412126258Smlaier		th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip));
2413126258Smlaier		break;
2414126258Smlaier#endif /* INET */
2415126258Smlaier#ifdef INET6
2416126258Smlaier	case AF_INET6:
2417126258Smlaier		h6 = mtod(m, struct ip6_hdr *);
2418126258Smlaier
2419126258Smlaier		/* IP header fields included in the TCP checksum */
2420126258Smlaier		h6->ip6_nxt = IPPROTO_TCP;
2421126258Smlaier		h6->ip6_plen = htons(tlen);
2422126258Smlaier		memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr));
2423126258Smlaier		memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr));
2424126258Smlaier
2425126258Smlaier		th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr));
2426126258Smlaier		break;
2427126258Smlaier#endif /* INET6 */
2428126258Smlaier	}
2429126258Smlaier
2430126258Smlaier	/* TCP header */
2431126258Smlaier	th->th_sport = sport;
2432126258Smlaier	th->th_dport = dport;
2433126258Smlaier	th->th_seq = htonl(seq);
2434126258Smlaier	th->th_ack = htonl(ack);
2435126258Smlaier	th->th_off = tlen >> 2;
2436126258Smlaier	th->th_flags = flags;
2437126258Smlaier	th->th_win = htons(win);
2438126258Smlaier
2439126258Smlaier	if (mss) {
2440126258Smlaier		opt = (char *)(th + 1);
2441126258Smlaier		opt[0] = TCPOPT_MAXSEG;
2442126258Smlaier		opt[1] = 4;
2443126258Smlaier		HTONS(mss);
2444126258Smlaier		bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2);
2445126258Smlaier	}
2446126258Smlaier
2447126258Smlaier	switch (af) {
2448126258Smlaier#ifdef INET
2449126258Smlaier	case AF_INET:
2450126258Smlaier		/* TCP checksum */
2451126258Smlaier		th->th_sum = in_cksum(m, len);
2452126258Smlaier
2453126258Smlaier		/* Finish the IP header */
2454126258Smlaier		h->ip_v = 4;
2455126258Smlaier		h->ip_hl = sizeof(*h) >> 2;
2456126258Smlaier		h->ip_tos = IPTOS_LOWDELAY;
2457241913Sglebius		h->ip_off = htons(V_path_mtu_discovery ? IP_DF : 0);
2458241913Sglebius		h->ip_len = htons(len);
2459223637Sbz		h->ip_ttl = ttl ? ttl : V_ip_defttl;
2460126258Smlaier		h->ip_sum = 0;
2461145836Smlaier
2462240233Sglebius		pfse->pfse_type = PFSE_IP;
2463126258Smlaier		break;
2464126258Smlaier#endif /* INET */
2465126258Smlaier#ifdef INET6
2466126258Smlaier	case AF_INET6:
2467126258Smlaier		/* TCP checksum */
2468126258Smlaier		th->th_sum = in6_cksum(m, IPPROTO_TCP,
2469126258Smlaier		    sizeof(struct ip6_hdr), tlen);
2470126258Smlaier
2471126258Smlaier		h6->ip6_vfc |= IPV6_VERSION;
2472126258Smlaier		h6->ip6_hlim = IPV6_DEFHLIM;
2473126258Smlaier
2474240233Sglebius		pfse->pfse_type = PFSE_IP6;
2475126258Smlaier		break;
2476126258Smlaier#endif /* INET6 */
2477126258Smlaier	}
2478240233Sglebius	pfse->pfse_m = m;
2479240233Sglebius	pf_send(pfse);
2480126258Smlaier}
2481126258Smlaier
2482223637Sbzstatic void
2483126258Smlaierpf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af,
2484126258Smlaier    struct pf_rule *r)
2485126258Smlaier{
2486240233Sglebius	struct pf_send_entry *pfse;
2487240233Sglebius	struct mbuf *m0;
2488223637Sbz	struct pf_mtag *pf_mtag;
2489126258Smlaier
2490240233Sglebius	/* Allocate outgoing queue entry, mbuf and mbuf tag. */
2491240233Sglebius	pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT);
2492240233Sglebius	if (pfse == NULL)
2493132280Smlaier		return;
2494240233Sglebius
2495240233Sglebius	if ((m0 = m_copypacket(m, M_NOWAIT)) == NULL) {
2496240233Sglebius		free(pfse, M_PFTEMP);
2497223637Sbz		return;
2498240233Sglebius	}
2499223637Sbz
2500240233Sglebius	if ((pf_mtag = pf_get_mtag(m0)) == NULL) {
2501240233Sglebius		free(pfse, M_PFTEMP);
2502126258Smlaier		return;
2503240233Sglebius	}
2504171168Smlaier	/* XXX: revisit */
2505171168Smlaier	m0->m_flags |= M_SKIP_FIREWALL;
2506126258Smlaier
2507171168Smlaier	if (r->rtableid >= 0)
2508178888Sjulian		M_SETFIB(m0, r->rtableid);
2509171168Smlaier
2510126258Smlaier#ifdef ALTQ
2511126258Smlaier	if (r->qid) {
2512171168Smlaier		pf_mtag->qid = r->qid;
2513171168Smlaier		/* add hints for ecn */
2514171168Smlaier		pf_mtag->hdr = mtod(m0, struct ip *);
2515126258Smlaier	}
2516145836Smlaier#endif /* ALTQ */
2517126258Smlaier
2518126258Smlaier	switch (af) {
2519126258Smlaier#ifdef INET
2520126258Smlaier	case AF_INET:
2521240233Sglebius		pfse->pfse_type = PFSE_ICMP;
2522126258Smlaier		break;
2523126258Smlaier#endif /* INET */
2524126258Smlaier#ifdef INET6
2525126258Smlaier	case AF_INET6:
2526240233Sglebius		pfse->pfse_type = PFSE_ICMP6;
2527126258Smlaier		break;
2528126258Smlaier#endif /* INET6 */
2529126258Smlaier	}
2530240233Sglebius	pfse->pfse_m = m0;
2531240233Sglebius	pfse->pfse_icmp_type = type;
2532240233Sglebius	pfse->pfse_icmp_code = code;
2533240233Sglebius	pf_send(pfse);
2534126258Smlaier}
2535126258Smlaier
2536126258Smlaier/*
2537126258Smlaier * Return 1 if the addresses a and b match (with mask m), otherwise return 0.
2538126258Smlaier * If n is 0, they match if they are equal. If n is != 0, they match if they
2539126258Smlaier * are different.
2540126258Smlaier */
2541126258Smlaierint
2542126258Smlaierpf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
2543126258Smlaier    struct pf_addr *b, sa_family_t af)
2544126258Smlaier{
2545126258Smlaier	int	match = 0;
2546126258Smlaier
2547126258Smlaier	switch (af) {
2548126258Smlaier#ifdef INET
2549126258Smlaier	case AF_INET:
2550126258Smlaier		if ((a->addr32[0] & m->addr32[0]) ==
2551126258Smlaier		    (b->addr32[0] & m->addr32[0]))
2552126258Smlaier			match++;
2553126258Smlaier		break;
2554126258Smlaier#endif /* INET */
2555126258Smlaier#ifdef INET6
2556126258Smlaier	case AF_INET6:
2557126258Smlaier		if (((a->addr32[0] & m->addr32[0]) ==
2558126258Smlaier		     (b->addr32[0] & m->addr32[0])) &&
2559126258Smlaier		    ((a->addr32[1] & m->addr32[1]) ==
2560126258Smlaier		     (b->addr32[1] & m->addr32[1])) &&
2561126258Smlaier		    ((a->addr32[2] & m->addr32[2]) ==
2562126258Smlaier		     (b->addr32[2] & m->addr32[2])) &&
2563126258Smlaier		    ((a->addr32[3] & m->addr32[3]) ==
2564126258Smlaier		     (b->addr32[3] & m->addr32[3])))
2565126258Smlaier			match++;
2566126258Smlaier		break;
2567126258Smlaier#endif /* INET6 */
2568126258Smlaier	}
2569126258Smlaier	if (match) {
2570126258Smlaier		if (n)
2571126258Smlaier			return (0);
2572126258Smlaier		else
2573126258Smlaier			return (1);
2574126258Smlaier	} else {
2575126258Smlaier		if (n)
2576126258Smlaier			return (1);
2577126258Smlaier		else
2578126258Smlaier			return (0);
2579126258Smlaier	}
2580126258Smlaier}
2581126258Smlaier
2582223637Sbz/*
2583223637Sbz * Return 1 if b <= a <= e, otherwise return 0.
2584223637Sbz */
2585126258Smlaierint
2586223637Sbzpf_match_addr_range(struct pf_addr *b, struct pf_addr *e,
2587223637Sbz    struct pf_addr *a, sa_family_t af)
2588223637Sbz{
2589223637Sbz	switch (af) {
2590223637Sbz#ifdef INET
2591223637Sbz	case AF_INET:
2592304463Skp		if ((ntohl(a->addr32[0]) < ntohl(b->addr32[0])) ||
2593304463Skp		    (ntohl(a->addr32[0]) > ntohl(e->addr32[0])))
2594223637Sbz			return (0);
2595223637Sbz		break;
2596223637Sbz#endif /* INET */
2597223637Sbz#ifdef INET6
2598223637Sbz	case AF_INET6: {
2599223637Sbz		int	i;
2600223637Sbz
2601223637Sbz		/* check a >= b */
2602223637Sbz		for (i = 0; i < 4; ++i)
2603304463Skp			if (ntohl(a->addr32[i]) > ntohl(b->addr32[i]))
2604223637Sbz				break;
2605304463Skp			else if (ntohl(a->addr32[i]) < ntohl(b->addr32[i]))
2606223637Sbz				return (0);
2607223637Sbz		/* check a <= e */
2608223637Sbz		for (i = 0; i < 4; ++i)
2609304463Skp			if (ntohl(a->addr32[i]) < ntohl(e->addr32[i]))
2610223637Sbz				break;
2611304463Skp			else if (ntohl(a->addr32[i]) > ntohl(e->addr32[i]))
2612223637Sbz				return (0);
2613223637Sbz		break;
2614223637Sbz	}
2615223637Sbz#endif /* INET6 */
2616223637Sbz	}
2617223637Sbz	return (1);
2618223637Sbz}
2619223637Sbz
2620240233Sglebiusstatic int
2621126258Smlaierpf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
2622126258Smlaier{
2623126258Smlaier	switch (op) {
2624126258Smlaier	case PF_OP_IRG:
2625126258Smlaier		return ((p > a1) && (p < a2));
2626126258Smlaier	case PF_OP_XRG:
2627126258Smlaier		return ((p < a1) || (p > a2));
2628126258Smlaier	case PF_OP_RRG:
2629126258Smlaier		return ((p >= a1) && (p <= a2));
2630126258Smlaier	case PF_OP_EQ:
2631126258Smlaier		return (p == a1);
2632126258Smlaier	case PF_OP_NE:
2633126258Smlaier		return (p != a1);
2634126258Smlaier	case PF_OP_LT:
2635126258Smlaier		return (p < a1);
2636126258Smlaier	case PF_OP_LE:
2637126258Smlaier		return (p <= a1);
2638126258Smlaier	case PF_OP_GT:
2639126258Smlaier		return (p > a1);
2640126258Smlaier	case PF_OP_GE:
2641126258Smlaier		return (p >= a1);
2642126258Smlaier	}
2643126258Smlaier	return (0); /* never reached */
2644126258Smlaier}
2645126258Smlaier
2646126258Smlaierint
2647126258Smlaierpf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
2648126258Smlaier{
2649126258Smlaier	NTOHS(a1);
2650126258Smlaier	NTOHS(a2);
2651126258Smlaier	NTOHS(p);
2652126258Smlaier	return (pf_match(op, a1, a2, p));
2653126258Smlaier}
2654126258Smlaier
2655240233Sglebiusstatic int
2656126258Smlaierpf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
2657126258Smlaier{
2658126258Smlaier	if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
2659126258Smlaier		return (0);
2660126258Smlaier	return (pf_match(op, a1, a2, u));
2661126258Smlaier}
2662126258Smlaier
2663240233Sglebiusstatic int
2664126258Smlaierpf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
2665126258Smlaier{
2666126258Smlaier	if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
2667126258Smlaier		return (0);
2668126258Smlaier	return (pf_match(op, a1, a2, g));
2669126258Smlaier}
2670126258Smlaier
2671223637Sbzint
2672240233Sglebiuspf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag, int mtag)
2673171168Smlaier{
2674171168Smlaier	if (*tag == -1)
2675240233Sglebius		*tag = mtag;
2676171168Smlaier
2677126258Smlaier	return ((!r->match_tag_not && r->match_tag == *tag) ||
2678126258Smlaier	    (r->match_tag_not && r->match_tag != *tag));
2679126258Smlaier}
2680126258Smlaier
2681126258Smlaierint
2682240233Sglebiuspf_tag_packet(struct mbuf *m, struct pf_pdesc *pd, int tag)
2683126258Smlaier{
2684126258Smlaier
2685240233Sglebius	KASSERT(tag > 0, ("%s: tag %d", __func__, tag));
2686126258Smlaier
2687240233Sglebius	if (pd->pf_mtag == NULL && ((pd->pf_mtag = pf_get_mtag(m)) == NULL))
2688240233Sglebius		return (ENOMEM);
2689240233Sglebius
2690240233Sglebius	pd->pf_mtag->tag = tag;
2691240233Sglebius
2692126258Smlaier	return (0);
2693126258Smlaier}
2694126258Smlaier
2695240641Sglebius#define	PF_ANCHOR_STACKSIZE	32
2696240641Sglebiusstruct pf_anchor_stackframe {
2697240641Sglebius	struct pf_ruleset	*rs;
2698240641Sglebius	struct pf_rule		*r;	/* XXX: + match bit */
2699240641Sglebius	struct pf_anchor	*child;
2700240641Sglebius};
2701240641Sglebius
2702240641Sglebius/*
2703240641Sglebius * XXX: We rely on malloc(9) returning pointer aligned addresses.
2704240641Sglebius */
2705240641Sglebius#define	PF_ANCHORSTACK_MATCH	0x00000001
2706240641Sglebius#define	PF_ANCHORSTACK_MASK	(PF_ANCHORSTACK_MATCH)
2707240641Sglebius
2708240641Sglebius#define	PF_ANCHOR_MATCH(f)	((uintptr_t)(f)->r & PF_ANCHORSTACK_MATCH)
2709240641Sglebius#define	PF_ANCHOR_RULE(f)	(struct pf_rule *)			\
2710240641Sglebius				((uintptr_t)(f)->r & ~PF_ANCHORSTACK_MASK)
2711240641Sglebius#define	PF_ANCHOR_SET_MATCH(f)	do { (f)->r = (void *) 			\
2712240641Sglebius				((uintptr_t)(f)->r | PF_ANCHORSTACK_MATCH);  \
2713240641Sglebius} while (0)
2714240641Sglebius
2715223637Sbzvoid
2716240641Sglebiuspf_step_into_anchor(struct pf_anchor_stackframe *stack, int *depth,
2717240641Sglebius    struct pf_ruleset **rs, int n, struct pf_rule **r, struct pf_rule **a,
2718240641Sglebius    int *match)
2719145836Smlaier{
2720145836Smlaier	struct pf_anchor_stackframe	*f;
2721126258Smlaier
2722240233Sglebius	PF_RULES_RASSERT();
2723240233Sglebius
2724171168Smlaier	if (match)
2725171168Smlaier		*match = 0;
2726240641Sglebius	if (*depth >= PF_ANCHOR_STACKSIZE) {
2727240641Sglebius		printf("%s: anchor stack overflow on %s\n",
2728240641Sglebius		    __func__, (*r)->anchor->name);
2729145836Smlaier		*r = TAILQ_NEXT(*r, entries);
2730145836Smlaier		return;
2731145836Smlaier	} else if (*depth == 0 && a != NULL)
2732145836Smlaier		*a = *r;
2733240641Sglebius	f = stack + (*depth)++;
2734145836Smlaier	f->rs = *rs;
2735145836Smlaier	f->r = *r;
2736145836Smlaier	if ((*r)->anchor_wildcard) {
2737240641Sglebius		struct pf_anchor_node *parent = &(*r)->anchor->children;
2738240641Sglebius
2739240641Sglebius		if ((f->child = RB_MIN(pf_anchor_node, parent)) == NULL) {
2740145836Smlaier			*r = NULL;
2741145836Smlaier			return;
2742145836Smlaier		}
2743145836Smlaier		*rs = &f->child->ruleset;
2744145836Smlaier	} else {
2745145836Smlaier		f->child = NULL;
2746145836Smlaier		*rs = &(*r)->anchor->ruleset;
2747145836Smlaier	}
2748145836Smlaier	*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
2749145836Smlaier}
2750126258Smlaier
2751171168Smlaierint
2752240641Sglebiuspf_step_out_of_anchor(struct pf_anchor_stackframe *stack, int *depth,
2753240641Sglebius    struct pf_ruleset **rs, int n, struct pf_rule **r, struct pf_rule **a,
2754240641Sglebius    int *match)
2755145836Smlaier{
2756145836Smlaier	struct pf_anchor_stackframe	*f;
2757240641Sglebius	struct pf_rule *fr;
2758171168Smlaier	int quick = 0;
2759145836Smlaier
2760240233Sglebius	PF_RULES_RASSERT();
2761240233Sglebius
2762145836Smlaier	do {
2763145836Smlaier		if (*depth <= 0)
2764145836Smlaier			break;
2765240641Sglebius		f = stack + *depth - 1;
2766240641Sglebius		fr = PF_ANCHOR_RULE(f);
2767240641Sglebius		if (f->child != NULL) {
2768240641Sglebius			struct pf_anchor_node *parent;
2769240641Sglebius
2770240641Sglebius			/*
2771240641Sglebius			 * This block traverses through
2772240641Sglebius			 * a wildcard anchor.
2773240641Sglebius			 */
2774240641Sglebius			parent = &fr->anchor->children;
2775240641Sglebius			if (match != NULL && *match) {
2776240641Sglebius				/*
2777240641Sglebius				 * If any of "*" matched, then
2778240641Sglebius				 * "foo/ *" matched, mark frame
2779240641Sglebius				 * appropriately.
2780240641Sglebius				 */
2781240641Sglebius				PF_ANCHOR_SET_MATCH(f);
2782171168Smlaier				*match = 0;
2783171168Smlaier			}
2784240641Sglebius			f->child = RB_NEXT(pf_anchor_node, parent, f->child);
2785145836Smlaier			if (f->child != NULL) {
2786145836Smlaier				*rs = &f->child->ruleset;
2787145836Smlaier				*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
2788145836Smlaier				if (*r == NULL)
2789145836Smlaier					continue;
2790145836Smlaier				else
2791145836Smlaier					break;
2792145836Smlaier			}
2793145836Smlaier		}
2794145836Smlaier		(*depth)--;
2795145836Smlaier		if (*depth == 0 && a != NULL)
2796145836Smlaier			*a = NULL;
2797145836Smlaier		*rs = f->rs;
2798240641Sglebius		if (PF_ANCHOR_MATCH(f) || (match != NULL && *match))
2799240641Sglebius			quick = fr->quick;
2800240641Sglebius		*r = TAILQ_NEXT(fr, entries);
2801145836Smlaier	} while (*r == NULL);
2802171168Smlaier
2803171168Smlaier	return (quick);
2804145836Smlaier}
2805145836Smlaier
2806126258Smlaier#ifdef INET6
2807126258Smlaiervoid
2808126258Smlaierpf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
2809126258Smlaier    struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
2810126258Smlaier{
2811126258Smlaier	switch (af) {
2812126258Smlaier#ifdef INET
2813126258Smlaier	case AF_INET:
2814126258Smlaier		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
2815126258Smlaier		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
2816126258Smlaier		break;
2817126258Smlaier#endif /* INET */
2818126258Smlaier	case AF_INET6:
2819126258Smlaier		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
2820126258Smlaier		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
2821126258Smlaier		naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
2822126258Smlaier		((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]);
2823126258Smlaier		naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
2824126258Smlaier		((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]);
2825126258Smlaier		naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
2826126258Smlaier		((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]);
2827126258Smlaier		break;
2828126258Smlaier	}
2829126258Smlaier}
2830126258Smlaier
2831126258Smlaiervoid
2832130613Smlaierpf_addr_inc(struct pf_addr *addr, sa_family_t af)
2833126258Smlaier{
2834126258Smlaier	switch (af) {
2835126258Smlaier#ifdef INET
2836126258Smlaier	case AF_INET:
2837126258Smlaier		addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
2838126258Smlaier		break;
2839126258Smlaier#endif /* INET */
2840126258Smlaier	case AF_INET6:
2841126258Smlaier		if (addr->addr32[3] == 0xffffffff) {
2842126258Smlaier			addr->addr32[3] = 0;
2843126258Smlaier			if (addr->addr32[2] == 0xffffffff) {
2844126258Smlaier				addr->addr32[2] = 0;
2845126258Smlaier				if (addr->addr32[1] == 0xffffffff) {
2846126258Smlaier					addr->addr32[1] = 0;
2847126258Smlaier					addr->addr32[0] =
2848126258Smlaier					    htonl(ntohl(addr->addr32[0]) + 1);
2849126258Smlaier				} else
2850126258Smlaier					addr->addr32[1] =
2851126258Smlaier					    htonl(ntohl(addr->addr32[1]) + 1);
2852126258Smlaier			} else
2853126258Smlaier				addr->addr32[2] =
2854126258Smlaier				    htonl(ntohl(addr->addr32[2]) + 1);
2855126258Smlaier		} else
2856126258Smlaier			addr->addr32[3] =
2857126258Smlaier			    htonl(ntohl(addr->addr32[3]) + 1);
2858126258Smlaier		break;
2859126258Smlaier	}
2860126258Smlaier}
2861126258Smlaier#endif /* INET6 */
2862126258Smlaier
2863126258Smlaierint
2864240233Sglebiuspf_socket_lookup(int direction, struct pf_pdesc *pd, struct mbuf *m)
2865126258Smlaier{
2866126258Smlaier	struct pf_addr		*saddr, *daddr;
2867126258Smlaier	u_int16_t		 sport, dport;
2868126261Smlaier	struct inpcbinfo	*pi;
2869126258Smlaier	struct inpcb		*inp;
2870126258Smlaier
2871171168Smlaier	pd->lookup.uid = UID_MAX;
2872171168Smlaier	pd->lookup.gid = GID_MAX;
2873223637Sbz
2874130613Smlaier	switch (pd->proto) {
2875126258Smlaier	case IPPROTO_TCP:
2876171168Smlaier		if (pd->hdr.tcp == NULL)
2877171168Smlaier			return (-1);
2878126258Smlaier		sport = pd->hdr.tcp->th_sport;
2879126258Smlaier		dport = pd->hdr.tcp->th_dport;
2880181803Sbz		pi = &V_tcbinfo;
2881126258Smlaier		break;
2882126258Smlaier	case IPPROTO_UDP:
2883171168Smlaier		if (pd->hdr.udp == NULL)
2884171168Smlaier			return (-1);
2885126258Smlaier		sport = pd->hdr.udp->uh_sport;
2886126258Smlaier		dport = pd->hdr.udp->uh_dport;
2887181803Sbz		pi = &V_udbinfo;
2888126258Smlaier		break;
2889126258Smlaier	default:
2890171168Smlaier		return (-1);
2891126258Smlaier	}
2892126258Smlaier	if (direction == PF_IN) {
2893126258Smlaier		saddr = pd->src;
2894126258Smlaier		daddr = pd->dst;
2895126258Smlaier	} else {
2896126258Smlaier		u_int16_t	p;
2897126258Smlaier
2898126258Smlaier		p = sport;
2899126258Smlaier		sport = dport;
2900126258Smlaier		dport = p;
2901126258Smlaier		saddr = pd->dst;
2902126258Smlaier		daddr = pd->src;
2903126258Smlaier	}
2904130613Smlaier	switch (pd->af) {
2905145836Smlaier#ifdef INET
2906126258Smlaier	case AF_INET:
2907240233Sglebius		inp = in_pcblookup_mbuf(pi, saddr->v4, sport, daddr->v4,
2908240233Sglebius		    dport, INPLOOKUP_RLOCKPCB, NULL, m);
2909126261Smlaier		if (inp == NULL) {
2910240233Sglebius			inp = in_pcblookup_mbuf(pi, saddr->v4, sport,
2911222488Srwatson			   daddr->v4, dport, INPLOOKUP_WILDCARD |
2912240233Sglebius			   INPLOOKUP_RLOCKPCB, NULL, m);
2913222488Srwatson			if (inp == NULL)
2914171168Smlaier				return (-1);
2915126261Smlaier		}
2916126258Smlaier		break;
2917145836Smlaier#endif /* INET */
2918126258Smlaier#ifdef INET6
2919126258Smlaier	case AF_INET6:
2920240233Sglebius		inp = in6_pcblookup_mbuf(pi, &saddr->v6, sport, &daddr->v6,
2921240233Sglebius		    dport, INPLOOKUP_RLOCKPCB, NULL, m);
2922126261Smlaier		if (inp == NULL) {
2923240233Sglebius			inp = in6_pcblookup_mbuf(pi, &saddr->v6, sport,
2924222488Srwatson			    &daddr->v6, dport, INPLOOKUP_WILDCARD |
2925240233Sglebius			    INPLOOKUP_RLOCKPCB, NULL, m);
2926222488Srwatson			if (inp == NULL)
2927171168Smlaier				return (-1);
2928126261Smlaier		}
2929126258Smlaier		break;
2930126258Smlaier#endif /* INET6 */
2931126258Smlaier
2932126258Smlaier	default:
2933171168Smlaier		return (-1);
2934126258Smlaier	}
2935222488Srwatson	INP_RLOCK_ASSERT(inp);
2936183606Sbz	pd->lookup.uid = inp->inp_cred->cr_uid;
2937183606Sbz	pd->lookup.gid = inp->inp_cred->cr_groups[0];
2938222488Srwatson	INP_RUNLOCK(inp);
2939240233Sglebius
2940126258Smlaier	return (1);
2941126258Smlaier}
2942126258Smlaier
2943240233Sglebiusstatic u_int8_t
2944126258Smlaierpf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
2945126258Smlaier{
2946126258Smlaier	int		 hlen;
2947126258Smlaier	u_int8_t	 hdr[60];
2948126258Smlaier	u_int8_t	*opt, optlen;
2949126258Smlaier	u_int8_t	 wscale = 0;
2950126258Smlaier
2951126258Smlaier	hlen = th_off << 2;		/* hlen <= sizeof(hdr) */
2952126258Smlaier	if (hlen <= sizeof(struct tcphdr))
2953126258Smlaier		return (0);
2954126258Smlaier	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
2955126258Smlaier		return (0);
2956126258Smlaier	opt = hdr + sizeof(struct tcphdr);
2957126258Smlaier	hlen -= sizeof(struct tcphdr);
2958126258Smlaier	while (hlen >= 3) {
2959126258Smlaier		switch (*opt) {
2960126258Smlaier		case TCPOPT_EOL:
2961126258Smlaier		case TCPOPT_NOP:
2962126258Smlaier			++opt;
2963126258Smlaier			--hlen;
2964126258Smlaier			break;
2965126258Smlaier		case TCPOPT_WINDOW:
2966126258Smlaier			wscale = opt[2];
2967126258Smlaier			if (wscale > TCP_MAX_WINSHIFT)
2968126258Smlaier				wscale = TCP_MAX_WINSHIFT;
2969126258Smlaier			wscale |= PF_WSCALE_FLAG;
2970130613Smlaier			/* FALLTHROUGH */
2971126258Smlaier		default:
2972126258Smlaier			optlen = opt[1];
2973126258Smlaier			if (optlen < 2)
2974126258Smlaier				optlen = 2;
2975126258Smlaier			hlen -= optlen;
2976126258Smlaier			opt += optlen;
2977130613Smlaier			break;
2978126258Smlaier		}
2979126258Smlaier	}
2980126258Smlaier	return (wscale);
2981126258Smlaier}
2982126258Smlaier
2983240233Sglebiusstatic u_int16_t
2984126258Smlaierpf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
2985126258Smlaier{
2986126258Smlaier	int		 hlen;
2987126258Smlaier	u_int8_t	 hdr[60];
2988126258Smlaier	u_int8_t	*opt, optlen;
2989181803Sbz	u_int16_t	 mss = V_tcp_mssdflt;
2990126258Smlaier
2991126258Smlaier	hlen = th_off << 2;	/* hlen <= sizeof(hdr) */
2992126258Smlaier	if (hlen <= sizeof(struct tcphdr))
2993126258Smlaier		return (0);
2994126258Smlaier	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
2995126258Smlaier		return (0);
2996126258Smlaier	opt = hdr + sizeof(struct tcphdr);
2997126258Smlaier	hlen -= sizeof(struct tcphdr);
2998126258Smlaier	while (hlen >= TCPOLEN_MAXSEG) {
2999126258Smlaier		switch (*opt) {
3000126258Smlaier		case TCPOPT_EOL:
3001126258Smlaier		case TCPOPT_NOP:
3002126258Smlaier			++opt;
3003126258Smlaier			--hlen;
3004126258Smlaier			break;
3005126258Smlaier		case TCPOPT_MAXSEG:
3006126258Smlaier			bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2);
3007145030Sglebius			NTOHS(mss);
3008130613Smlaier			/* FALLTHROUGH */
3009126258Smlaier		default:
3010126258Smlaier			optlen = opt[1];
3011126258Smlaier			if (optlen < 2)
3012126258Smlaier				optlen = 2;
3013126258Smlaier			hlen -= optlen;
3014126258Smlaier			opt += optlen;
3015130613Smlaier			break;
3016126258Smlaier		}
3017126258Smlaier	}
3018126258Smlaier	return (mss);
3019126258Smlaier}
3020126258Smlaier
3021240233Sglebiusstatic u_int16_t
3022231852Sbzpf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer)
3023126258Smlaier{
3024126258Smlaier#ifdef INET
3025126258Smlaier	struct sockaddr_in	*dst;
3026126258Smlaier	struct route		 ro;
3027126258Smlaier#endif /* INET */
3028126258Smlaier#ifdef INET6
3029126258Smlaier	struct sockaddr_in6	*dst6;
3030126258Smlaier	struct route_in6	 ro6;
3031126258Smlaier#endif /* INET6 */
3032126258Smlaier	struct rtentry		*rt = NULL;
3033223637Sbz	int			 hlen = 0;
3034181803Sbz	u_int16_t		 mss = V_tcp_mssdflt;
3035126258Smlaier
3036126258Smlaier	switch (af) {
3037126258Smlaier#ifdef INET
3038126258Smlaier	case AF_INET:
3039126258Smlaier		hlen = sizeof(struct ip);
3040126258Smlaier		bzero(&ro, sizeof(ro));
3041126258Smlaier		dst = (struct sockaddr_in *)&ro.ro_dst;
3042126258Smlaier		dst->sin_family = AF_INET;
3043126258Smlaier		dst->sin_len = sizeof(*dst);
3044126258Smlaier		dst->sin_addr = addr->v4;
3045231852Sbz		in_rtalloc_ign(&ro, 0, rtableid);
3046126258Smlaier		rt = ro.ro_rt;
3047126258Smlaier		break;
3048126258Smlaier#endif /* INET */
3049126258Smlaier#ifdef INET6
3050126258Smlaier	case AF_INET6:
3051126258Smlaier		hlen = sizeof(struct ip6_hdr);
3052126258Smlaier		bzero(&ro6, sizeof(ro6));
3053126258Smlaier		dst6 = (struct sockaddr_in6 *)&ro6.ro_dst;
3054126258Smlaier		dst6->sin6_family = AF_INET6;
3055126258Smlaier		dst6->sin6_len = sizeof(*dst6);
3056126258Smlaier		dst6->sin6_addr = addr->v6;
3057231852Sbz		in6_rtalloc_ign(&ro6, 0, rtableid);
3058126258Smlaier		rt = ro6.ro_rt;
3059126258Smlaier		break;
3060126258Smlaier#endif /* INET6 */
3061126258Smlaier	}
3062126258Smlaier
3063126258Smlaier	if (rt && rt->rt_ifp) {
3064126258Smlaier		mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr);
3065181803Sbz		mss = max(V_tcp_mssdflt, mss);
3066126258Smlaier		RTFREE(rt);
3067126258Smlaier	}
3068126258Smlaier	mss = min(mss, offer);
3069126258Smlaier	mss = max(mss, 64);		/* sanity - at least max opt space */
3070126258Smlaier	return (mss);
3071126258Smlaier}
3072126258Smlaier
3073240233Sglebiusstatic u_int32_t
3074223637Sbzpf_tcp_iss(struct pf_pdesc *pd)
3075223637Sbz{
3076223637Sbz	MD5_CTX ctx;
3077223637Sbz	u_int32_t digest[4];
3078223637Sbz
3079223637Sbz	if (V_pf_tcp_secret_init == 0) {
3080223637Sbz		read_random(&V_pf_tcp_secret, sizeof(V_pf_tcp_secret));
3081223637Sbz		MD5Init(&V_pf_tcp_secret_ctx);
3082223637Sbz		MD5Update(&V_pf_tcp_secret_ctx, V_pf_tcp_secret,
3083223637Sbz		    sizeof(V_pf_tcp_secret));
3084223637Sbz		V_pf_tcp_secret_init = 1;
3085223637Sbz	}
3086223637Sbz
3087223637Sbz	ctx = V_pf_tcp_secret_ctx;
3088223637Sbz
3089223637Sbz	MD5Update(&ctx, (char *)&pd->hdr.tcp->th_sport, sizeof(u_short));
3090223637Sbz	MD5Update(&ctx, (char *)&pd->hdr.tcp->th_dport, sizeof(u_short));
3091223637Sbz	if (pd->af == AF_INET6) {
3092223637Sbz		MD5Update(&ctx, (char *)&pd->src->v6, sizeof(struct in6_addr));
3093223637Sbz		MD5Update(&ctx, (char *)&pd->dst->v6, sizeof(struct in6_addr));
3094223637Sbz	} else {
3095223637Sbz		MD5Update(&ctx, (char *)&pd->src->v4, sizeof(struct in_addr));
3096223637Sbz		MD5Update(&ctx, (char *)&pd->dst->v4, sizeof(struct in_addr));
3097223637Sbz	}
3098223637Sbz	MD5Final((u_char *)digest, &ctx);
3099223637Sbz	V_pf_tcp_iss_off += 4096;
3100223637Sbz#define	ISN_RANDOM_INCREMENT (4096 - 1)
3101223637Sbz	return (digest[0] + (arc4random() & ISN_RANDOM_INCREMENT) +
3102223637Sbz	    V_pf_tcp_iss_off);
3103223637Sbz#undef	ISN_RANDOM_INCREMENT
3104223637Sbz}
3105223637Sbz
3106240233Sglebiusstatic int
3107223637Sbzpf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
3108240233Sglebius    struct pfi_kif *kif, struct mbuf *m, int off, struct pf_pdesc *pd,
3109240233Sglebius    struct pf_rule **am, struct pf_ruleset **rsm, struct inpcb *inp)
3110126258Smlaier{
3111130613Smlaier	struct pf_rule		*nr = NULL;
3112240233Sglebius	struct pf_addr		* const saddr = pd->src;
3113240233Sglebius	struct pf_addr		* const daddr = pd->dst;
3114126258Smlaier	sa_family_t		 af = pd->af;
3115126258Smlaier	struct pf_rule		*r, *a = NULL;
3116126258Smlaier	struct pf_ruleset	*ruleset = NULL;
3117130613Smlaier	struct pf_src_node	*nsn = NULL;
3118223637Sbz	struct tcphdr		*th = pd->hdr.tcp;
3119223637Sbz	struct pf_state_key	*sk = NULL, *nk = NULL;
3120126258Smlaier	u_short			 reason;
3121223637Sbz	int			 rewrite = 0, hdrlen = 0;
3122171168Smlaier	int			 tag = -1, rtableid = -1;
3123145836Smlaier	int			 asd = 0;
3124171168Smlaier	int			 match = 0;
3125223637Sbz	int			 state_icmp = 0;
3126223637Sbz	u_int16_t		 sport = 0, dport = 0;
3127223637Sbz	u_int16_t		 bproto_sum = 0, bip_sum = 0;
3128223637Sbz	u_int8_t		 icmptype = 0, icmpcode = 0;
3129240641Sglebius	struct pf_anchor_stackframe	anchor_stack[PF_ANCHOR_STACKSIZE];
3130126258Smlaier
3131240233Sglebius	PF_RULES_RASSERT();
3132223637Sbz
3133240233Sglebius	if (inp != NULL) {
3134240233Sglebius		INP_LOCK_ASSERT(inp);
3135240233Sglebius		pd->lookup.uid = inp->inp_cred->cr_uid;
3136240233Sglebius		pd->lookup.gid = inp->inp_cred->cr_groups[0];
3137240233Sglebius		pd->lookup.done = 1;
3138145836Smlaier	}
3139145836Smlaier
3140223637Sbz	switch (pd->proto) {
3141223637Sbz	case IPPROTO_TCP:
3142223637Sbz		sport = th->th_sport;
3143223637Sbz		dport = th->th_dport;
3144223637Sbz		hdrlen = sizeof(*th);
3145223637Sbz		break;
3146223637Sbz	case IPPROTO_UDP:
3147223637Sbz		sport = pd->hdr.udp->uh_sport;
3148223637Sbz		dport = pd->hdr.udp->uh_dport;
3149223637Sbz		hdrlen = sizeof(*pd->hdr.udp);
3150223637Sbz		break;
3151223637Sbz#ifdef INET
3152223637Sbz	case IPPROTO_ICMP:
3153223637Sbz		if (pd->af != AF_INET)
3154223637Sbz			break;
3155223637Sbz		sport = dport = pd->hdr.icmp->icmp_id;
3156223637Sbz		hdrlen = sizeof(*pd->hdr.icmp);
3157223637Sbz		icmptype = pd->hdr.icmp->icmp_type;
3158223637Sbz		icmpcode = pd->hdr.icmp->icmp_code;
3159223637Sbz
3160223637Sbz		if (icmptype == ICMP_UNREACH ||
3161223637Sbz		    icmptype == ICMP_SOURCEQUENCH ||
3162223637Sbz		    icmptype == ICMP_REDIRECT ||
3163223637Sbz		    icmptype == ICMP_TIMXCEED ||
3164223637Sbz		    icmptype == ICMP_PARAMPROB)
3165223637Sbz			state_icmp++;
3166223637Sbz		break;
3167223637Sbz#endif /* INET */
3168223637Sbz#ifdef INET6
3169223637Sbz	case IPPROTO_ICMPV6:
3170223637Sbz		if (af != AF_INET6)
3171223637Sbz			break;
3172223637Sbz		sport = dport = pd->hdr.icmp6->icmp6_id;
3173223637Sbz		hdrlen = sizeof(*pd->hdr.icmp6);
3174223637Sbz		icmptype = pd->hdr.icmp6->icmp6_type;
3175223637Sbz		icmpcode = pd->hdr.icmp6->icmp6_code;
3176223637Sbz
3177223637Sbz		if (icmptype == ICMP6_DST_UNREACH ||
3178223637Sbz		    icmptype == ICMP6_PACKET_TOO_BIG ||
3179223637Sbz		    icmptype == ICMP6_TIME_EXCEEDED ||
3180223637Sbz		    icmptype == ICMP6_PARAM_PROB)
3181223637Sbz			state_icmp++;
3182223637Sbz		break;
3183223637Sbz#endif /* INET6 */
3184223637Sbz	default:
3185223637Sbz		sport = dport = hdrlen = 0;
3186223637Sbz		break;
3187223637Sbz	}
3188223637Sbz
3189126258Smlaier	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3190126258Smlaier
3191223637Sbz	/* check packet for BINAT/NAT/RDR */
3192240233Sglebius	if ((nr = pf_get_translation(pd, m, off, direction, kif, &nsn, &sk,
3193240641Sglebius	    &nk, saddr, daddr, sport, dport, anchor_stack)) != NULL) {
3194240233Sglebius		KASSERT(sk != NULL, ("%s: null sk", __func__));
3195240233Sglebius		KASSERT(nk != NULL, ("%s: null nk", __func__));
3196223637Sbz
3197223637Sbz		if (pd->ip_sum)
3198223637Sbz			bip_sum = *pd->ip_sum;
3199223637Sbz
3200223637Sbz		switch (pd->proto) {
3201223637Sbz		case IPPROTO_TCP:
3202223637Sbz			bproto_sum = th->th_sum;
3203223637Sbz			pd->proto_sum = &th->th_sum;
3204223637Sbz
3205223637Sbz			if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) ||
3206223637Sbz			    nk->port[pd->sidx] != sport) {
3207289703Skp				pf_change_ap(m, saddr, &th->th_sport, pd->ip_sum,
3208223637Sbz				    &th->th_sum, &nk->addr[pd->sidx],
3209223637Sbz				    nk->port[pd->sidx], 0, af);
3210223637Sbz				pd->sport = &th->th_sport;
3211223637Sbz				sport = th->th_sport;
3212223637Sbz			}
3213223637Sbz
3214223637Sbz			if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) ||
3215223637Sbz			    nk->port[pd->didx] != dport) {
3216289703Skp				pf_change_ap(m, daddr, &th->th_dport, pd->ip_sum,
3217223637Sbz				    &th->th_sum, &nk->addr[pd->didx],
3218223637Sbz				    nk->port[pd->didx], 0, af);
3219223637Sbz				dport = th->th_dport;
3220223637Sbz				pd->dport = &th->th_dport;
3221223637Sbz			}
3222126258Smlaier			rewrite++;
3223223637Sbz			break;
3224223637Sbz		case IPPROTO_UDP:
3225223637Sbz			bproto_sum = pd->hdr.udp->uh_sum;
3226223637Sbz			pd->proto_sum = &pd->hdr.udp->uh_sum;
3227223637Sbz
3228223637Sbz			if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) ||
3229223637Sbz			    nk->port[pd->sidx] != sport) {
3230289703Skp				pf_change_ap(m, saddr, &pd->hdr.udp->uh_sport,
3231223637Sbz				    pd->ip_sum, &pd->hdr.udp->uh_sum,
3232223637Sbz				    &nk->addr[pd->sidx],
3233223637Sbz				    nk->port[pd->sidx], 1, af);
3234223637Sbz				sport = pd->hdr.udp->uh_sport;
3235223637Sbz				pd->sport = &pd->hdr.udp->uh_sport;
3236223637Sbz			}
3237223637Sbz
3238223637Sbz			if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) ||
3239223637Sbz			    nk->port[pd->didx] != dport) {
3240289703Skp				pf_change_ap(m, daddr, &pd->hdr.udp->uh_dport,
3241223637Sbz				    pd->ip_sum, &pd->hdr.udp->uh_sum,
3242223637Sbz				    &nk->addr[pd->didx],
3243223637Sbz				    nk->port[pd->didx], 1, af);
3244223637Sbz				dport = pd->hdr.udp->uh_dport;
3245223637Sbz				pd->dport = &pd->hdr.udp->uh_dport;
3246223637Sbz			}
3247223637Sbz			rewrite++;
3248223637Sbz			break;
3249223637Sbz#ifdef INET
3250223637Sbz		case IPPROTO_ICMP:
3251223637Sbz			nk->port[0] = nk->port[1];
3252223637Sbz			if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET))
3253223637Sbz				pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
3254223637Sbz				    nk->addr[pd->sidx].v4.s_addr, 0);
3255223637Sbz
3256223637Sbz			if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET))
3257223637Sbz				pf_change_a(&daddr->v4.s_addr, pd->ip_sum,
3258223637Sbz				    nk->addr[pd->didx].v4.s_addr, 0);
3259223637Sbz
3260223637Sbz			if (nk->port[1] != pd->hdr.icmp->icmp_id) {
3261223637Sbz				pd->hdr.icmp->icmp_cksum = pf_cksum_fixup(
3262223637Sbz				    pd->hdr.icmp->icmp_cksum, sport,
3263223637Sbz				    nk->port[1], 0);
3264223637Sbz				pd->hdr.icmp->icmp_id = nk->port[1];
3265223637Sbz				pd->sport = &pd->hdr.icmp->icmp_id;
3266223637Sbz			}
3267223637Sbz			m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp);
3268223637Sbz			break;
3269223637Sbz#endif /* INET */
3270223637Sbz#ifdef INET6
3271223637Sbz		case IPPROTO_ICMPV6:
3272223637Sbz			nk->port[0] = nk->port[1];
3273223637Sbz			if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET6))
3274223637Sbz				pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum,
3275223637Sbz				    &nk->addr[pd->sidx], 0);
3276223637Sbz
3277223637Sbz			if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET6))
3278223637Sbz				pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum,
3279223637Sbz				    &nk->addr[pd->didx], 0);
3280223637Sbz			rewrite++;
3281223637Sbz			break;
3282223637Sbz#endif /* INET */
3283223637Sbz		default:
3284223637Sbz			switch (af) {
3285223637Sbz#ifdef INET
3286223637Sbz			case AF_INET:
3287223637Sbz				if (PF_ANEQ(saddr,
3288223637Sbz				    &nk->addr[pd->sidx], AF_INET))
3289223637Sbz					pf_change_a(&saddr->v4.s_addr,
3290223637Sbz					    pd->ip_sum,
3291223637Sbz					    nk->addr[pd->sidx].v4.s_addr, 0);
3292223637Sbz
3293223637Sbz				if (PF_ANEQ(daddr,
3294223637Sbz				    &nk->addr[pd->didx], AF_INET))
3295223637Sbz					pf_change_a(&daddr->v4.s_addr,
3296223637Sbz					    pd->ip_sum,
3297223637Sbz					    nk->addr[pd->didx].v4.s_addr, 0);
3298223637Sbz				break;
3299223637Sbz#endif /* INET */
3300223637Sbz#ifdef INET6
3301223637Sbz			case AF_INET6:
3302223637Sbz				if (PF_ANEQ(saddr,
3303223637Sbz				    &nk->addr[pd->sidx], AF_INET6))
3304223637Sbz					PF_ACPY(saddr, &nk->addr[pd->sidx], af);
3305223637Sbz
3306223637Sbz				if (PF_ANEQ(daddr,
3307223637Sbz				    &nk->addr[pd->didx], AF_INET6))
3308223637Sbz					PF_ACPY(saddr, &nk->addr[pd->didx], af);
3309223637Sbz				break;
3310223637Sbz#endif /* INET */
3311223637Sbz			}
3312223637Sbz			break;
3313126258Smlaier		}
3314223637Sbz		if (nr->natpass)
3315223637Sbz			r = NULL;
3316223637Sbz		pd->nat_rule = nr;
3317126258Smlaier	}
3318126258Smlaier
3319126258Smlaier	while (r != NULL) {
3320126258Smlaier		r->evaluations++;
3321171168Smlaier		if (pfi_kif_match(r->kif, kif) == r->ifnot)
3322126258Smlaier			r = r->skip[PF_SKIP_IFP].ptr;
3323126258Smlaier		else if (r->direction && r->direction != direction)
3324126258Smlaier			r = r->skip[PF_SKIP_DIR].ptr;
3325126258Smlaier		else if (r->af && r->af != af)
3326126258Smlaier			r = r->skip[PF_SKIP_AF].ptr;
3327223637Sbz		else if (r->proto && r->proto != pd->proto)
3328126258Smlaier			r = r->skip[PF_SKIP_PROTO].ptr;
3329171168Smlaier		else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
3330231852Sbz		    r->src.neg, kif, M_GETFIB(m)))
3331126258Smlaier			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3332223637Sbz		/* tcp/udp only. port_op always 0 in other cases */
3333126258Smlaier		else if (r->src.port_op && !pf_match_port(r->src.port_op,
3334223637Sbz		    r->src.port[0], r->src.port[1], sport))
3335126258Smlaier			r = r->skip[PF_SKIP_SRC_PORT].ptr;
3336171168Smlaier		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
3337231852Sbz		    r->dst.neg, NULL, M_GETFIB(m)))
3338126258Smlaier			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3339223637Sbz		/* tcp/udp only. port_op always 0 in other cases */
3340126258Smlaier		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
3341223637Sbz		    r->dst.port[0], r->dst.port[1], dport))
3342126258Smlaier			r = r->skip[PF_SKIP_DST_PORT].ptr;
3343223637Sbz		/* icmp only. type always 0 in other cases */
3344223637Sbz		else if (r->type && r->type != icmptype + 1)
3345223637Sbz			r = TAILQ_NEXT(r, entries);
3346223637Sbz		/* icmp only. type always 0 in other cases */
3347223637Sbz		else if (r->code && r->code != icmpcode + 1)
3348223637Sbz			r = TAILQ_NEXT(r, entries);
3349171168Smlaier		else if (r->tos && !(r->tos == pd->tos))
3350126258Smlaier			r = TAILQ_NEXT(r, entries);
3351126258Smlaier		else if (r->rule_flag & PFRULE_FRAGMENT)
3352126258Smlaier			r = TAILQ_NEXT(r, entries);
3353223637Sbz		else if (pd->proto == IPPROTO_TCP &&
3354223637Sbz		    (r->flagset & th->th_flags) != r->flags)
3355126258Smlaier			r = TAILQ_NEXT(r, entries);
3356223637Sbz		/* tcp/udp only. uid.op always 0 in other cases */
3357171168Smlaier		else if (r->uid.op && (pd->lookup.done || (pd->lookup.done =
3358240233Sglebius		    pf_socket_lookup(direction, pd, m), 1)) &&
3359126258Smlaier		    !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
3360171168Smlaier		    pd->lookup.uid))
3361126258Smlaier			r = TAILQ_NEXT(r, entries);
3362223637Sbz		/* tcp/udp only. gid.op always 0 in other cases */
3363171168Smlaier		else if (r->gid.op && (pd->lookup.done || (pd->lookup.done =
3364240233Sglebius		    pf_socket_lookup(direction, pd, m), 1)) &&
3365126258Smlaier		    !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
3366171168Smlaier		    pd->lookup.gid))
3367126258Smlaier			r = TAILQ_NEXT(r, entries);
3368223637Sbz		else if (r->prob &&
3369223637Sbz		    r->prob <= arc4random())
3370126258Smlaier			r = TAILQ_NEXT(r, entries);
3371240233Sglebius		else if (r->match_tag && !pf_match_tag(m, r, &tag,
3372240233Sglebius		    pd->pf_mtag ? pd->pf_mtag->tag : 0))
3373126258Smlaier			r = TAILQ_NEXT(r, entries);
3374223637Sbz		else if (r->os_fingerprint != PF_OSFP_ANY &&
3375223637Sbz		    (pd->proto != IPPROTO_TCP || !pf_osfp_match(
3376223637Sbz		    pf_osfp_fingerprint(pd, m, off, th),
3377223637Sbz		    r->os_fingerprint)))
3378126258Smlaier			r = TAILQ_NEXT(r, entries);
3379126258Smlaier		else {
3380126258Smlaier			if (r->tag)
3381126258Smlaier				tag = r->tag;
3382171168Smlaier			if (r->rtableid >= 0)
3383171168Smlaier				rtableid = r->rtableid;
3384126258Smlaier			if (r->anchor == NULL) {
3385171168Smlaier				match = 1;
3386126258Smlaier				*rm = r;
3387126258Smlaier				*am = a;
3388126258Smlaier				*rsm = ruleset;
3389126258Smlaier				if ((*rm)->quick)
3390126258Smlaier					break;
3391126258Smlaier				r = TAILQ_NEXT(r, entries);
3392126258Smlaier			} else
3393240641Sglebius				pf_step_into_anchor(anchor_stack, &asd,
3394240641Sglebius				    &ruleset, PF_RULESET_FILTER, &r, &a,
3395240641Sglebius				    &match);
3396126258Smlaier		}
3397240641Sglebius		if (r == NULL && pf_step_out_of_anchor(anchor_stack, &asd,
3398240641Sglebius		    &ruleset, PF_RULESET_FILTER, &r, &a, &match))
3399171168Smlaier			break;
3400126258Smlaier	}
3401126258Smlaier	r = *rm;
3402126258Smlaier	a = *am;
3403126258Smlaier	ruleset = *rsm;
3404126258Smlaier
3405126258Smlaier	REASON_SET(&reason, PFRES_MATCH);
3406126258Smlaier
3407223637Sbz	if (r->log || (nr != NULL && nr->log)) {
3408126258Smlaier		if (rewrite)
3409223637Sbz			m_copyback(m, off, hdrlen, pd->hdr.any);
3410240233Sglebius		PFLOG_PACKET(kif, m, af, direction, reason, r->log ? r : nr, a,
3411240233Sglebius		    ruleset, pd, 1);
3412126258Smlaier	}
3413126258Smlaier
3414126258Smlaier	if ((r->action == PF_DROP) &&
3415126258Smlaier	    ((r->rule_flag & PFRULE_RETURNRST) ||
3416126258Smlaier	    (r->rule_flag & PFRULE_RETURNICMP) ||
3417126258Smlaier	    (r->rule_flag & PFRULE_RETURN))) {
3418126258Smlaier		/* undo NAT changes, if they have taken place */
3419130613Smlaier		if (nr != NULL) {
3420223637Sbz			PF_ACPY(saddr, &sk->addr[pd->sidx], af);
3421223637Sbz			PF_ACPY(daddr, &sk->addr[pd->didx], af);
3422223637Sbz			if (pd->sport)
3423223637Sbz				*pd->sport = sk->port[pd->sidx];
3424223637Sbz			if (pd->dport)
3425223637Sbz				*pd->dport = sk->port[pd->didx];
3426223637Sbz			if (pd->proto_sum)
3427223637Sbz				*pd->proto_sum = bproto_sum;
3428223637Sbz			if (pd->ip_sum)
3429223637Sbz				*pd->ip_sum = bip_sum;
3430223637Sbz			m_copyback(m, off, hdrlen, pd->hdr.any);
3431126258Smlaier		}
3432223637Sbz		if (pd->proto == IPPROTO_TCP &&
3433223637Sbz		    ((r->rule_flag & PFRULE_RETURNRST) ||
3434126258Smlaier		    (r->rule_flag & PFRULE_RETURN)) &&
3435126258Smlaier		    !(th->th_flags & TH_RST)) {
3436223637Sbz			u_int32_t	 ack = ntohl(th->th_seq) + pd->p_len;
3437223637Sbz			int		 len = 0;
3438223637Sbz#ifdef INET
3439223637Sbz			struct ip	*h4;
3440223637Sbz#endif
3441223637Sbz#ifdef INET6
3442223637Sbz			struct ip6_hdr	*h6;
3443223637Sbz#endif
3444126258Smlaier
3445223637Sbz			switch (af) {
3446223637Sbz#ifdef INET
3447223637Sbz			case AF_INET:
3448223637Sbz				h4 = mtod(m, struct ip *);
3449223637Sbz				len = ntohs(h4->ip_len) - off;
3450223637Sbz				break;
3451223637Sbz#endif
3452223637Sbz#ifdef INET6
3453223637Sbz			case AF_INET6:
3454223637Sbz				h6 = mtod(m, struct ip6_hdr *);
3455223637Sbz				len = ntohs(h6->ip6_plen) - (off - sizeof(*h6));
3456223637Sbz				break;
3457223637Sbz#endif
3458223637Sbz			}
3459223637Sbz
3460223637Sbz			if (pf_check_proto_cksum(m, off, len, IPPROTO_TCP, af))
3461223637Sbz				REASON_SET(&reason, PFRES_PROTCKSUM);
3462223637Sbz			else {
3463223637Sbz				if (th->th_flags & TH_SYN)
3464223637Sbz					ack++;
3465223637Sbz				if (th->th_flags & TH_FIN)
3466223637Sbz					ack++;
3467223637Sbz				pf_send_tcp(m, r, af, pd->dst,
3468223637Sbz				    pd->src, th->th_dport, th->th_sport,
3469223637Sbz				    ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
3470240233Sglebius				    r->return_ttl, 1, 0, kif->pfik_ifp);
3471223637Sbz			}
3472223637Sbz		} else if (pd->proto != IPPROTO_ICMP && af == AF_INET &&
3473223637Sbz		    r->return_icmp)
3474126258Smlaier			pf_send_icmp(m, r->return_icmp >> 8,
3475126258Smlaier			    r->return_icmp & 255, af, r);
3476223637Sbz		else if (pd->proto != IPPROTO_ICMPV6 && af == AF_INET6 &&
3477223637Sbz		    r->return_icmp6)
3478126258Smlaier			pf_send_icmp(m, r->return_icmp6 >> 8,
3479126258Smlaier			    r->return_icmp6 & 255, af, r);
3480126258Smlaier	}
3481126258Smlaier
3482126258Smlaier	if (r->action == PF_DROP)
3483223637Sbz		goto cleanup;
3484126258Smlaier
3485240233Sglebius	if (tag > 0 && pf_tag_packet(m, pd, tag)) {
3486126258Smlaier		REASON_SET(&reason, PFRES_MEMORY);
3487223637Sbz		goto cleanup;
3488126258Smlaier	}
3489240233Sglebius	if (rtableid >= 0)
3490240233Sglebius		M_SETFIB(m, rtableid);
3491126258Smlaier
3492223637Sbz	if (!state_icmp && (r->keep_state || nr != NULL ||
3493223637Sbz	    (pd->flags & PFDESC_TCP_NORM))) {
3494223637Sbz		int action;
3495240233Sglebius		action = pf_create_state(r, nr, a, pd, nsn, nk, sk, m, off,
3496240233Sglebius		    sport, dport, &rewrite, kif, sm, tag, bproto_sum, bip_sum,
3497240233Sglebius		    hdrlen);
3498223637Sbz		if (action != PF_PASS)
3499223637Sbz			return (action);
3500223637Sbz	} else {
3501223637Sbz		if (sk != NULL)
3502240233Sglebius			uma_zfree(V_pf_state_key_z, sk);
3503223637Sbz		if (nk != NULL)
3504240233Sglebius			uma_zfree(V_pf_state_key_z, nk);
3505223637Sbz	}
3506126258Smlaier
3507223637Sbz	/* copy back packet headers if we performed NAT operations */
3508223637Sbz	if (rewrite)
3509223637Sbz		m_copyback(m, off, hdrlen, pd->hdr.any);
3510130613Smlaier
3511240233Sglebius	if (*sm != NULL && !((*sm)->state_flags & PFSTATE_NOSYNC) &&
3512240233Sglebius	    direction == PF_OUT &&
3513240233Sglebius	    pfsync_defer_ptr != NULL && pfsync_defer_ptr(*sm, m))
3514223637Sbz		/*
3515223637Sbz		 * We want the state created, but we dont
3516223637Sbz		 * want to send this in case a partner
3517223637Sbz		 * firewall has to know about it to allow
3518223637Sbz		 * replies through it.
3519223637Sbz		 */
3520240233Sglebius		return (PF_DEFER);
3521223637Sbz
3522223637Sbz	return (PF_PASS);
3523223637Sbz
3524130613Smlaiercleanup:
3525223637Sbz	if (sk != NULL)
3526240233Sglebius		uma_zfree(V_pf_state_key_z, sk);
3527223637Sbz	if (nk != NULL)
3528240233Sglebius		uma_zfree(V_pf_state_key_z, nk);
3529223637Sbz	return (PF_DROP);
3530223637Sbz}
3531126258Smlaier
3532240233Sglebiusstatic int
3533223637Sbzpf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a,
3534240233Sglebius    struct pf_pdesc *pd, struct pf_src_node *nsn, struct pf_state_key *nk,
3535240233Sglebius    struct pf_state_key *sk, struct mbuf *m, int off, u_int16_t sport,
3536240233Sglebius    u_int16_t dport, int *rewrite, struct pfi_kif *kif, struct pf_state **sm,
3537240233Sglebius    int tag, u_int16_t bproto_sum, u_int16_t bip_sum, int hdrlen)
3538223637Sbz{
3539223637Sbz	struct pf_state		*s = NULL;
3540223637Sbz	struct pf_src_node	*sn = NULL;
3541223637Sbz	struct tcphdr		*th = pd->hdr.tcp;
3542223637Sbz	u_int16_t		 mss = V_tcp_mssdflt;
3543223637Sbz	u_short			 reason;
3544223637Sbz
3545223637Sbz	/* check maximums */
3546263029Sglebius	if (r->max_states &&
3547263029Sglebius	    (counter_u64_fetch(r->states_cur) >= r->max_states)) {
3548270574Sglebius		counter_u64_add(V_pf_status.lcounters[LCNT_STATES], 1);
3549223637Sbz		REASON_SET(&reason, PFRES_MAXSTATES);
3550316641Skp		goto csfailed;
3551223637Sbz	}
3552223637Sbz	/* src node for filter rule */
3553223637Sbz	if ((r->rule_flag & PFRULE_SRCTRACK ||
3554223637Sbz	    r->rpool.opts & PF_POOL_STICKYADDR) &&
3555223637Sbz	    pf_insert_src_node(&sn, r, pd->src, pd->af) != 0) {
3556223637Sbz		REASON_SET(&reason, PFRES_SRCLIMIT);
3557223637Sbz		goto csfailed;
3558223637Sbz	}
3559223637Sbz	/* src node for translation rule */
3560223637Sbz	if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
3561223637Sbz	    pf_insert_src_node(&nsn, nr, &sk->addr[pd->sidx], pd->af)) {
3562223637Sbz		REASON_SET(&reason, PFRES_SRCLIMIT);
3563223637Sbz		goto csfailed;
3564223637Sbz	}
3565240233Sglebius	s = uma_zalloc(V_pf_state_z, M_NOWAIT | M_ZERO);
3566223637Sbz	if (s == NULL) {
3567223637Sbz		REASON_SET(&reason, PFRES_MEMORY);
3568223637Sbz		goto csfailed;
3569223637Sbz	}
3570223637Sbz	s->rule.ptr = r;
3571223637Sbz	s->nat_rule.ptr = nr;
3572223637Sbz	s->anchor.ptr = a;
3573223637Sbz	STATE_INC_COUNTERS(s);
3574223637Sbz	if (r->allow_opts)
3575223637Sbz		s->state_flags |= PFSTATE_ALLOWOPTS;
3576223637Sbz	if (r->rule_flag & PFRULE_STATESLOPPY)
3577223637Sbz		s->state_flags |= PFSTATE_SLOPPY;
3578223637Sbz	s->log = r->log & PF_LOG_ALL;
3579223637Sbz	s->sync_state = PFSYNC_S_NONE;
3580223637Sbz	if (nr != NULL)
3581223637Sbz		s->log |= nr->log & PF_LOG_ALL;
3582223637Sbz	switch (pd->proto) {
3583223637Sbz	case IPPROTO_TCP:
3584126258Smlaier		s->src.seqlo = ntohl(th->th_seq);
3585223637Sbz		s->src.seqhi = s->src.seqlo + pd->p_len + 1;
3586126258Smlaier		if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
3587126258Smlaier		    r->keep_state == PF_STATE_MODULATE) {
3588126258Smlaier			/* Generate sequence number modulator */
3589223637Sbz			if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) ==
3590223637Sbz			    0)
3591223637Sbz				s->src.seqdiff = 1;
3592289703Skp			pf_change_proto_a(m, &th->th_seq, &th->th_sum,
3593126258Smlaier			    htonl(s->src.seqlo + s->src.seqdiff), 0);
3594223637Sbz			*rewrite = 1;
3595126258Smlaier		} else
3596126258Smlaier			s->src.seqdiff = 0;
3597126258Smlaier		if (th->th_flags & TH_SYN) {
3598126258Smlaier			s->src.seqhi++;
3599223637Sbz			s->src.wscale = pf_get_wscale(m, off,
3600223637Sbz			    th->th_off, pd->af);
3601126258Smlaier		}
3602126258Smlaier		s->src.max_win = MAX(ntohs(th->th_win), 1);
3603126258Smlaier		if (s->src.wscale & PF_WSCALE_MASK) {
3604126258Smlaier			/* Remove scale factor from initial window */
3605126258Smlaier			int win = s->src.max_win;
3606126258Smlaier			win += 1 << (s->src.wscale & PF_WSCALE_MASK);
3607126258Smlaier			s->src.max_win = (win - 1) >>
3608126258Smlaier			    (s->src.wscale & PF_WSCALE_MASK);
3609126258Smlaier		}
3610126258Smlaier		if (th->th_flags & TH_FIN)
3611126258Smlaier			s->src.seqhi++;
3612126258Smlaier		s->dst.seqhi = 1;
3613126258Smlaier		s->dst.max_win = 1;
3614126258Smlaier		s->src.state = TCPS_SYN_SENT;
3615126258Smlaier		s->dst.state = TCPS_CLOSED;
3616126258Smlaier		s->timeout = PFTM_TCP_FIRST_PACKET;
3617223637Sbz		break;
3618223637Sbz	case IPPROTO_UDP:
3619223637Sbz		s->src.state = PFUDPS_SINGLE;
3620223637Sbz		s->dst.state = PFUDPS_NO_TRAFFIC;
3621223637Sbz		s->timeout = PFTM_UDP_FIRST_PACKET;
3622223637Sbz		break;
3623223637Sbz	case IPPROTO_ICMP:
3624223637Sbz#ifdef INET6
3625223637Sbz	case IPPROTO_ICMPV6:
3626223637Sbz#endif
3627223637Sbz		s->timeout = PFTM_ICMP_FIRST_PACKET;
3628223637Sbz		break;
3629223637Sbz	default:
3630223637Sbz		s->src.state = PFOTHERS_SINGLE;
3631223637Sbz		s->dst.state = PFOTHERS_NO_TRAFFIC;
3632223637Sbz		s->timeout = PFTM_OTHER_FIRST_PACKET;
3633223637Sbz	}
3634223637Sbz
3635270576Sglebius	if (r->rt && r->rt != PF_FASTROUTE) {
3636270576Sglebius		if (pf_map_addr(pd->af, r, pd->src, &s->rt_addr, NULL, &sn)) {
3637270925Sglebius			REASON_SET(&reason, PFRES_BADSTATE);
3638270576Sglebius			pf_src_tree_remove_state(s);
3639270576Sglebius			STATE_DEC_COUNTERS(s);
3640270576Sglebius			uma_zfree(V_pf_state_z, s);
3641270576Sglebius			goto csfailed;
3642270576Sglebius		}
3643270576Sglebius		s->rt_kif = r->rpool.cur->kif;
3644270576Sglebius	}
3645270576Sglebius
3646240233Sglebius	s->creation = time_uptime;
3647240233Sglebius	s->expire = time_uptime;
3648223637Sbz
3649285940Sglebius	if (sn != NULL)
3650223637Sbz		s->src_node = sn;
3651223637Sbz	if (nsn != NULL) {
3652223637Sbz		/* XXX We only modify one side for now. */
3653223637Sbz		PF_ACPY(&nsn->raddr, &nk->addr[1], pd->af);
3654223637Sbz		s->nat_src_node = nsn;
3655223637Sbz	}
3656223637Sbz	if (pd->proto == IPPROTO_TCP) {
3657126258Smlaier		if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m,
3658126258Smlaier		    off, pd, th, &s->src, &s->dst)) {
3659126258Smlaier			REASON_SET(&reason, PFRES_MEMORY);
3660130613Smlaier			pf_src_tree_remove_state(s);
3661145836Smlaier			STATE_DEC_COUNTERS(s);
3662240233Sglebius			uma_zfree(V_pf_state_z, s);
3663126258Smlaier			return (PF_DROP);
3664126258Smlaier		}
3665126258Smlaier		if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub &&
3666145836Smlaier		    pf_normalize_tcp_stateful(m, off, pd, &reason, th, s,
3667223637Sbz		    &s->src, &s->dst, rewrite)) {
3668145836Smlaier			/* This really shouldn't happen!!! */
3669145836Smlaier			DPFPRINTF(PF_DEBUG_URGENT,
3670145836Smlaier			    ("pf_normalize_tcp_stateful failed on first pkt"));
3671126258Smlaier			pf_normalize_tcp_cleanup(s);
3672130613Smlaier			pf_src_tree_remove_state(s);
3673145836Smlaier			STATE_DEC_COUNTERS(s);
3674240233Sglebius			uma_zfree(V_pf_state_z, s);
3675223637Sbz			return (PF_DROP);
3676126258Smlaier		}
3677126258Smlaier	}
3678223637Sbz	s->direction = pd->dir;
3679126258Smlaier
3680240233Sglebius	/*
3681240233Sglebius	 * sk/nk could already been setup by pf_get_translation().
3682240233Sglebius	 */
3683240233Sglebius	if (nr == NULL) {
3684240233Sglebius		KASSERT((sk == NULL && nk == NULL), ("%s: nr %p sk %p, nk %p",
3685240233Sglebius		    __func__, nr, sk, nk));
3686240233Sglebius		sk = pf_state_key_setup(pd, pd->src, pd->dst, sport, dport);
3687240233Sglebius		if (sk == NULL)
3688240233Sglebius			goto csfailed;
3689240233Sglebius		nk = sk;
3690240233Sglebius	} else
3691240233Sglebius		KASSERT((sk != NULL && nk != NULL), ("%s: nr %p sk %p, nk %p",
3692240233Sglebius		    __func__, nr, sk, nk));
3693126258Smlaier
3694240233Sglebius	/* Swap sk/nk for PF_OUT. */
3695240233Sglebius	if (pf_state_insert(BOUND_IFACE(r, kif),
3696240233Sglebius	    (pd->dir == PF_IN) ? sk : nk,
3697240233Sglebius	    (pd->dir == PF_IN) ? nk : sk, s)) {
3698223637Sbz		if (pd->proto == IPPROTO_TCP)
3699223637Sbz			pf_normalize_tcp_cleanup(s);
3700223637Sbz		REASON_SET(&reason, PFRES_STATEINS);
3701223637Sbz		pf_src_tree_remove_state(s);
3702223637Sbz		STATE_DEC_COUNTERS(s);
3703240233Sglebius		uma_zfree(V_pf_state_z, s);
3704223637Sbz		return (PF_DROP);
3705223637Sbz	} else
3706223637Sbz		*sm = s;
3707126258Smlaier
3708240233Sglebius	if (tag > 0)
3709223637Sbz		s->tag = tag;
3710223637Sbz	if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) ==
3711223637Sbz	    TH_SYN && r->keep_state == PF_STATE_SYNPROXY) {
3712223637Sbz		s->src.state = PF_TCPS_PROXY_SRC;
3713223637Sbz		/* undo NAT changes, if they have taken place */
3714223637Sbz		if (nr != NULL) {
3715223637Sbz			struct pf_state_key *skt = s->key[PF_SK_WIRE];
3716223637Sbz			if (pd->dir == PF_OUT)
3717223637Sbz				skt = s->key[PF_SK_STACK];
3718223637Sbz			PF_ACPY(pd->src, &skt->addr[pd->sidx], pd->af);
3719223637Sbz			PF_ACPY(pd->dst, &skt->addr[pd->didx], pd->af);
3720223637Sbz			if (pd->sport)
3721223637Sbz				*pd->sport = skt->port[pd->sidx];
3722223637Sbz			if (pd->dport)
3723223637Sbz				*pd->dport = skt->port[pd->didx];
3724223637Sbz			if (pd->proto_sum)
3725223637Sbz				*pd->proto_sum = bproto_sum;
3726223637Sbz			if (pd->ip_sum)
3727223637Sbz				*pd->ip_sum = bip_sum;
3728223637Sbz			m_copyback(m, off, hdrlen, pd->hdr.any);
3729223637Sbz		}
3730223637Sbz		s->src.seqhi = htonl(arc4random());
3731223637Sbz		/* Find mss option */
3732231852Sbz		int rtid = M_GETFIB(m);
3733223637Sbz		mss = pf_get_mss(m, off, th->th_off, pd->af);
3734231852Sbz		mss = pf_calc_mss(pd->src, pd->af, rtid, mss);
3735231852Sbz		mss = pf_calc_mss(pd->dst, pd->af, rtid, mss);
3736223637Sbz		s->src.mss = mss;
3737223637Sbz		pf_send_tcp(NULL, r, pd->af, pd->dst, pd->src, th->th_dport,
3738223637Sbz		    th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
3739240233Sglebius		    TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL);
3740223637Sbz		REASON_SET(&reason, PFRES_SYNPROXY);
3741223637Sbz		return (PF_SYNPROXY_DROP);
3742171168Smlaier	}
3743165631Smlaier
3744223637Sbz	return (PF_PASS);
3745126258Smlaier
3746223637Sbzcsfailed:
3747223637Sbz	if (sk != NULL)
3748240233Sglebius		uma_zfree(V_pf_state_key_z, sk);
3749223637Sbz	if (nk != NULL)
3750240233Sglebius		uma_zfree(V_pf_state_key_z, nk);
3751223637Sbz
3752285940Sglebius	if (sn != NULL) {
3753285940Sglebius		struct pf_srchash *sh;
3754285940Sglebius
3755285940Sglebius		sh = &V_pf_srchash[pf_hashsrc(&sn->addr, sn->af)];
3756285940Sglebius		PF_HASHROW_LOCK(sh);
3757285940Sglebius		if (--sn->states == 0 && sn->expire == 0) {
3758285940Sglebius			pf_unlink_src_node(sn);
3759285940Sglebius			uma_zfree(V_pf_sources_z, sn);
3760285940Sglebius			counter_u64_add(
3761285940Sglebius			    V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], 1);
3762285940Sglebius		}
3763285940Sglebius		PF_HASHROW_UNLOCK(sh);
3764261019Sglebius	}
3765240737Sglebius
3766285940Sglebius	if (nsn != sn && nsn != NULL) {
3767285940Sglebius		struct pf_srchash *sh;
3768285940Sglebius
3769285940Sglebius		sh = &V_pf_srchash[pf_hashsrc(&nsn->addr, nsn->af)];
3770285940Sglebius		PF_HASHROW_LOCK(sh);
3771286004Sglebius		if (--nsn->states == 0 && nsn->expire == 0) {
3772285940Sglebius			pf_unlink_src_node(nsn);
3773285940Sglebius			uma_zfree(V_pf_sources_z, nsn);
3774285940Sglebius			counter_u64_add(
3775285940Sglebius			    V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], 1);
3776285940Sglebius		}
3777285940Sglebius		PF_HASHROW_UNLOCK(sh);
3778261019Sglebius	}
3779240737Sglebius
3780223637Sbz	return (PF_DROP);
3781126258Smlaier}
3782126258Smlaier
3783240233Sglebiusstatic int
3784130613Smlaierpf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
3785126258Smlaier    struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am,
3786126258Smlaier    struct pf_ruleset **rsm)
3787126258Smlaier{
3788126258Smlaier	struct pf_rule		*r, *a = NULL;
3789126258Smlaier	struct pf_ruleset	*ruleset = NULL;
3790126258Smlaier	sa_family_t		 af = pd->af;
3791126258Smlaier	u_short			 reason;
3792126258Smlaier	int			 tag = -1;
3793145836Smlaier	int			 asd = 0;
3794171168Smlaier	int			 match = 0;
3795240641Sglebius	struct pf_anchor_stackframe	anchor_stack[PF_ANCHOR_STACKSIZE];
3796126258Smlaier
3797240233Sglebius	PF_RULES_RASSERT();
3798240233Sglebius
3799126258Smlaier	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3800126258Smlaier	while (r != NULL) {
3801126258Smlaier		r->evaluations++;
3802171168Smlaier		if (pfi_kif_match(r->kif, kif) == r->ifnot)
3803126258Smlaier			r = r->skip[PF_SKIP_IFP].ptr;
3804126258Smlaier		else if (r->direction && r->direction != direction)
3805126258Smlaier			r = r->skip[PF_SKIP_DIR].ptr;
3806126258Smlaier		else if (r->af && r->af != af)
3807126258Smlaier			r = r->skip[PF_SKIP_AF].ptr;
3808126258Smlaier		else if (r->proto && r->proto != pd->proto)
3809126258Smlaier			r = r->skip[PF_SKIP_PROTO].ptr;
3810171168Smlaier		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
3811231852Sbz		    r->src.neg, kif, M_GETFIB(m)))
3812126258Smlaier			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3813171168Smlaier		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
3814231852Sbz		    r->dst.neg, NULL, M_GETFIB(m)))
3815126258Smlaier			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3816171168Smlaier		else if (r->tos && !(r->tos == pd->tos))
3817126258Smlaier			r = TAILQ_NEXT(r, entries);
3818173815Smlaier		else if (r->os_fingerprint != PF_OSFP_ANY)
3819126258Smlaier			r = TAILQ_NEXT(r, entries);
3820173815Smlaier		else if (pd->proto == IPPROTO_UDP &&
3821173815Smlaier		    (r->src.port_op || r->dst.port_op))
3822173815Smlaier			r = TAILQ_NEXT(r, entries);
3823173815Smlaier		else if (pd->proto == IPPROTO_TCP &&
3824173815Smlaier		    (r->src.port_op || r->dst.port_op || r->flagset))
3825173815Smlaier			r = TAILQ_NEXT(r, entries);
3826173815Smlaier		else if ((pd->proto == IPPROTO_ICMP ||
3827173815Smlaier		    pd->proto == IPPROTO_ICMPV6) &&
3828173815Smlaier		    (r->type || r->code))
3829173815Smlaier			r = TAILQ_NEXT(r, entries);
3830223637Sbz		else if (r->prob && r->prob <=
3831223637Sbz		    (arc4random() % (UINT_MAX - 1) + 1))
3832126258Smlaier			r = TAILQ_NEXT(r, entries);
3833240233Sglebius		else if (r->match_tag && !pf_match_tag(m, r, &tag,
3834240233Sglebius		    pd->pf_mtag ? pd->pf_mtag->tag : 0))
3835126258Smlaier			r = TAILQ_NEXT(r, entries);
3836126258Smlaier		else {
3837126258Smlaier			if (r->anchor == NULL) {
3838171168Smlaier				match = 1;
3839126258Smlaier				*rm = r;
3840126258Smlaier				*am = a;
3841126258Smlaier				*rsm = ruleset;
3842126258Smlaier				if ((*rm)->quick)
3843126258Smlaier					break;
3844126258Smlaier				r = TAILQ_NEXT(r, entries);
3845126258Smlaier			} else
3846240641Sglebius				pf_step_into_anchor(anchor_stack, &asd,
3847240641Sglebius				    &ruleset, PF_RULESET_FILTER, &r, &a,
3848240641Sglebius				    &match);
3849126258Smlaier		}
3850240641Sglebius		if (r == NULL && pf_step_out_of_anchor(anchor_stack, &asd,
3851240641Sglebius		    &ruleset, PF_RULESET_FILTER, &r, &a, &match))
3852171168Smlaier			break;
3853126258Smlaier	}
3854126258Smlaier	r = *rm;
3855126258Smlaier	a = *am;
3856126258Smlaier	ruleset = *rsm;
3857126258Smlaier
3858126258Smlaier	REASON_SET(&reason, PFRES_MATCH);
3859130613Smlaier
3860126258Smlaier	if (r->log)
3861240233Sglebius		PFLOG_PACKET(kif, m, af, direction, reason, r, a, ruleset, pd,
3862240233Sglebius		    1);
3863126258Smlaier
3864126258Smlaier	if (r->action != PF_PASS)
3865126258Smlaier		return (PF_DROP);
3866126258Smlaier
3867240233Sglebius	if (tag > 0 && pf_tag_packet(m, pd, tag)) {
3868126258Smlaier		REASON_SET(&reason, PFRES_MEMORY);
3869126258Smlaier		return (PF_DROP);
3870126258Smlaier	}
3871126258Smlaier
3872126258Smlaier	return (PF_PASS);
3873126258Smlaier}
3874126258Smlaier
3875240233Sglebiusstatic int
3876200930Sdelphijpf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst,
3877200930Sdelphij	struct pf_state **state, struct pfi_kif *kif, struct mbuf *m, int off,
3878200930Sdelphij	struct pf_pdesc *pd, u_short *reason, int *copyback)
3879126258Smlaier{
3880223637Sbz	struct tcphdr		*th = pd->hdr.tcp;
3881223637Sbz	u_int16_t		 win = ntohs(th->th_win);
3882223637Sbz	u_int32_t		 ack, end, seq, orig_seq;
3883223637Sbz	u_int8_t		 sws, dws;
3884223637Sbz	int			 ackskew;
3885126258Smlaier
3886126258Smlaier	if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) {
3887126258Smlaier		sws = src->wscale & PF_WSCALE_MASK;
3888126258Smlaier		dws = dst->wscale & PF_WSCALE_MASK;
3889126258Smlaier	} else
3890126258Smlaier		sws = dws = 0;
3891126258Smlaier
3892126258Smlaier	/*
3893126258Smlaier	 * Sequence tracking algorithm from Guido van Rooij's paper:
3894126258Smlaier	 *   http://www.madison-gurkha.com/publications/tcp_filtering/
3895126258Smlaier	 *	tcp_filtering.ps
3896126258Smlaier	 */
3897126258Smlaier
3898145836Smlaier	orig_seq = seq = ntohl(th->th_seq);
3899126258Smlaier	if (src->seqlo == 0) {
3900126258Smlaier		/* First packet from this end. Set its state */
3901126258Smlaier
3902126258Smlaier		if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) &&
3903126258Smlaier		    src->scrub == NULL) {
3904126258Smlaier			if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) {
3905126258Smlaier				REASON_SET(reason, PFRES_MEMORY);
3906126258Smlaier				return (PF_DROP);
3907126258Smlaier			}
3908126258Smlaier		}
3909126258Smlaier
3910126258Smlaier		/* Deferred generation of sequence number modulator */
3911126258Smlaier		if (dst->seqdiff && !src->seqdiff) {
3912223637Sbz			/* use random iss for the TCP server */
3913223637Sbz			while ((src->seqdiff = arc4random() - seq) == 0)
3914126258Smlaier				;
3915126258Smlaier			ack = ntohl(th->th_ack) - dst->seqdiff;
3916289703Skp			pf_change_proto_a(m, &th->th_seq, &th->th_sum, htonl(seq +
3917126258Smlaier			    src->seqdiff), 0);
3918289703Skp			pf_change_proto_a(m, &th->th_ack, &th->th_sum, htonl(ack), 0);
3919200930Sdelphij			*copyback = 1;
3920126258Smlaier		} else {
3921126258Smlaier			ack = ntohl(th->th_ack);
3922126258Smlaier		}
3923126258Smlaier
3924126258Smlaier		end = seq + pd->p_len;
3925126258Smlaier		if (th->th_flags & TH_SYN) {
3926126258Smlaier			end++;
3927126258Smlaier			if (dst->wscale & PF_WSCALE_FLAG) {
3928126258Smlaier				src->wscale = pf_get_wscale(m, off, th->th_off,
3929126258Smlaier				    pd->af);
3930126258Smlaier				if (src->wscale & PF_WSCALE_FLAG) {
3931126258Smlaier					/* Remove scale factor from initial
3932126258Smlaier					 * window */
3933126258Smlaier					sws = src->wscale & PF_WSCALE_MASK;
3934126258Smlaier					win = ((u_int32_t)win + (1 << sws) - 1)
3935126258Smlaier					    >> sws;
3936126258Smlaier					dws = dst->wscale & PF_WSCALE_MASK;
3937126258Smlaier				} else {
3938126258Smlaier					/* fixup other window */
3939126258Smlaier					dst->max_win <<= dst->wscale &
3940126258Smlaier					    PF_WSCALE_MASK;
3941126258Smlaier					/* in case of a retrans SYN|ACK */
3942126258Smlaier					dst->wscale = 0;
3943126258Smlaier				}
3944126258Smlaier			}
3945126258Smlaier		}
3946126258Smlaier		if (th->th_flags & TH_FIN)
3947126258Smlaier			end++;
3948126258Smlaier
3949126258Smlaier		src->seqlo = seq;
3950126258Smlaier		if (src->state < TCPS_SYN_SENT)
3951126258Smlaier			src->state = TCPS_SYN_SENT;
3952126258Smlaier
3953126258Smlaier		/*
3954126258Smlaier		 * May need to slide the window (seqhi may have been set by
3955126258Smlaier		 * the crappy stack check or if we picked up the connection
3956126258Smlaier		 * after establishment)
3957126258Smlaier		 */
3958126258Smlaier		if (src->seqhi == 1 ||
3959126258Smlaier		    SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi))
3960126258Smlaier			src->seqhi = end + MAX(1, dst->max_win << dws);
3961126258Smlaier		if (win > src->max_win)
3962126258Smlaier			src->max_win = win;
3963126258Smlaier
3964126258Smlaier	} else {
3965126258Smlaier		ack = ntohl(th->th_ack) - dst->seqdiff;
3966126258Smlaier		if (src->seqdiff) {
3967126258Smlaier			/* Modulate sequence numbers */
3968289703Skp			pf_change_proto_a(m, &th->th_seq, &th->th_sum, htonl(seq +
3969126258Smlaier			    src->seqdiff), 0);
3970289703Skp			pf_change_proto_a(m, &th->th_ack, &th->th_sum, htonl(ack), 0);
3971200930Sdelphij			*copyback = 1;
3972126258Smlaier		}
3973126258Smlaier		end = seq + pd->p_len;
3974126258Smlaier		if (th->th_flags & TH_SYN)
3975126258Smlaier			end++;
3976126258Smlaier		if (th->th_flags & TH_FIN)
3977126258Smlaier			end++;
3978126258Smlaier	}
3979126258Smlaier
3980126258Smlaier	if ((th->th_flags & TH_ACK) == 0) {
3981126258Smlaier		/* Let it pass through the ack skew check */
3982126258Smlaier		ack = dst->seqlo;
3983126258Smlaier	} else if ((ack == 0 &&
3984126258Smlaier	    (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) ||
3985126258Smlaier	    /* broken tcp stacks do not set ack */
3986126258Smlaier	    (dst->state < TCPS_SYN_SENT)) {
3987126258Smlaier		/*
3988126258Smlaier		 * Many stacks (ours included) will set the ACK number in an
3989126258Smlaier		 * FIN|ACK if the SYN times out -- no sequence to ACK.
3990126258Smlaier		 */
3991126258Smlaier		ack = dst->seqlo;
3992126258Smlaier	}
3993126258Smlaier
3994126258Smlaier	if (seq == end) {
3995126258Smlaier		/* Ease sequencing restrictions on no data packets */
3996126258Smlaier		seq = src->seqlo;
3997126258Smlaier		end = seq;
3998126258Smlaier	}
3999126258Smlaier
4000126258Smlaier	ackskew = dst->seqlo - ack;
4001126258Smlaier
4002171168Smlaier
4003171168Smlaier	/*
4004171168Smlaier	 * Need to demodulate the sequence numbers in any TCP SACK options
4005171168Smlaier	 * (Selective ACK). We could optionally validate the SACK values
4006171168Smlaier	 * against the current ACK window, either forwards or backwards, but
4007171168Smlaier	 * I'm not confident that SACK has been implemented properly
4008171168Smlaier	 * everywhere. It wouldn't surprise me if several stacks accidently
4009171168Smlaier	 * SACK too far backwards of previously ACKed data. There really aren't
4010171168Smlaier	 * any security implications of bad SACKing unless the target stack
4011171168Smlaier	 * doesn't validate the option length correctly. Someone trying to
4012171168Smlaier	 * spoof into a TCP connection won't bother blindly sending SACK
4013171168Smlaier	 * options anyway.
4014171168Smlaier	 */
4015171168Smlaier	if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) {
4016171168Smlaier		if (pf_modulate_sack(m, off, pd, th, dst))
4017200930Sdelphij			*copyback = 1;
4018171168Smlaier	}
4019171168Smlaier
4020171168Smlaier
4021223637Sbz#define	MAXACKWINDOW (0xffff + 1500)	/* 1500 is an arbitrary fudge factor */
4022126258Smlaier	if (SEQ_GEQ(src->seqhi, end) &&
4023126258Smlaier	    /* Last octet inside other's window space */
4024126258Smlaier	    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) &&
4025126258Smlaier	    /* Retrans: not more than one window back */
4026126258Smlaier	    (ackskew >= -MAXACKWINDOW) &&
4027126258Smlaier	    /* Acking not more than one reassembled fragment backwards */
4028145836Smlaier	    (ackskew <= (MAXACKWINDOW << sws)) &&
4029126258Smlaier	    /* Acking not more than one window forward */
4030145836Smlaier	    ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo ||
4031223637Sbz	    (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo) ||
4032223637Sbz	    (pd->flags & PFDESC_IP_REAS) == 0)) {
4033171168Smlaier	    /* Require an exact/+1 sequence match on resets when possible */
4034126258Smlaier
4035145836Smlaier		if (dst->scrub || src->scrub) {
4036145836Smlaier			if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
4037200930Sdelphij			    *state, src, dst, copyback))
4038145836Smlaier				return (PF_DROP);
4039145836Smlaier		}
4040145836Smlaier
4041126258Smlaier		/* update max window */
4042126258Smlaier		if (src->max_win < win)
4043126258Smlaier			src->max_win = win;
4044126258Smlaier		/* synchronize sequencing */
4045126258Smlaier		if (SEQ_GT(end, src->seqlo))
4046126258Smlaier			src->seqlo = end;
4047126258Smlaier		/* slide the window of what the other end can send */
4048126258Smlaier		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
4049126258Smlaier			dst->seqhi = ack + MAX((win << sws), 1);
4050126258Smlaier
4051126258Smlaier
4052126258Smlaier		/* update states */
4053126258Smlaier		if (th->th_flags & TH_SYN)
4054126258Smlaier			if (src->state < TCPS_SYN_SENT)
4055126258Smlaier				src->state = TCPS_SYN_SENT;
4056126258Smlaier		if (th->th_flags & TH_FIN)
4057126258Smlaier			if (src->state < TCPS_CLOSING)
4058126258Smlaier				src->state = TCPS_CLOSING;
4059126258Smlaier		if (th->th_flags & TH_ACK) {
4060145836Smlaier			if (dst->state == TCPS_SYN_SENT) {
4061126258Smlaier				dst->state = TCPS_ESTABLISHED;
4062145836Smlaier				if (src->state == TCPS_ESTABLISHED &&
4063145836Smlaier				    (*state)->src_node != NULL &&
4064145836Smlaier				    pf_src_connlimit(state)) {
4065145836Smlaier					REASON_SET(reason, PFRES_SRCLIMIT);
4066145836Smlaier					return (PF_DROP);
4067145836Smlaier				}
4068145836Smlaier			} else if (dst->state == TCPS_CLOSING)
4069126258Smlaier				dst->state = TCPS_FIN_WAIT_2;
4070126258Smlaier		}
4071126258Smlaier		if (th->th_flags & TH_RST)
4072126258Smlaier			src->state = dst->state = TCPS_TIME_WAIT;
4073126258Smlaier
4074126258Smlaier		/* update expire time */
4075240233Sglebius		(*state)->expire = time_uptime;
4076126258Smlaier		if (src->state >= TCPS_FIN_WAIT_2 &&
4077126258Smlaier		    dst->state >= TCPS_FIN_WAIT_2)
4078126258Smlaier			(*state)->timeout = PFTM_TCP_CLOSED;
4079171168Smlaier		else if (src->state >= TCPS_CLOSING &&
4080171168Smlaier		    dst->state >= TCPS_CLOSING)
4081126258Smlaier			(*state)->timeout = PFTM_TCP_FIN_WAIT;
4082126258Smlaier		else if (src->state < TCPS_ESTABLISHED ||
4083126258Smlaier		    dst->state < TCPS_ESTABLISHED)
4084126258Smlaier			(*state)->timeout = PFTM_TCP_OPENING;
4085126258Smlaier		else if (src->state >= TCPS_CLOSING ||
4086126258Smlaier		    dst->state >= TCPS_CLOSING)
4087126258Smlaier			(*state)->timeout = PFTM_TCP_CLOSING;
4088126258Smlaier		else
4089126258Smlaier			(*state)->timeout = PFTM_TCP_ESTABLISHED;
4090126258Smlaier
4091126258Smlaier		/* Fall through to PASS packet */
4092126258Smlaier
4093126258Smlaier	} else if ((dst->state < TCPS_SYN_SENT ||
4094126258Smlaier		dst->state >= TCPS_FIN_WAIT_2 ||
4095126258Smlaier		src->state >= TCPS_FIN_WAIT_2) &&
4096126258Smlaier	    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) &&
4097126258Smlaier	    /* Within a window forward of the originating packet */
4098126258Smlaier	    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
4099126258Smlaier	    /* Within a window backward of the originating packet */
4100126258Smlaier
4101126258Smlaier		/*
4102126258Smlaier		 * This currently handles three situations:
4103126258Smlaier		 *  1) Stupid stacks will shotgun SYNs before their peer
4104126258Smlaier		 *     replies.
4105126258Smlaier		 *  2) When PF catches an already established stream (the
4106126258Smlaier		 *     firewall rebooted, the state table was flushed, routes
4107126258Smlaier		 *     changed...)
4108126258Smlaier		 *  3) Packets get funky immediately after the connection
4109126258Smlaier		 *     closes (this should catch Solaris spurious ACK|FINs
4110126258Smlaier		 *     that web servers like to spew after a close)
4111126258Smlaier		 *
4112126258Smlaier		 * This must be a little more careful than the above code
4113126258Smlaier		 * since packet floods will also be caught here. We don't
4114126258Smlaier		 * update the TTL here to mitigate the damage of a packet
4115126258Smlaier		 * flood and so the same code can handle awkward establishment
4116126258Smlaier		 * and a loosened connection close.
4117126258Smlaier		 * In the establishment case, a correct peer response will
4118126258Smlaier		 * validate the connection, go through the normal state code
4119126258Smlaier		 * and keep updating the state TTL.
4120126258Smlaier		 */
4121126258Smlaier
4122223637Sbz		if (V_pf_status.debug >= PF_DEBUG_MISC) {
4123126258Smlaier			printf("pf: loose state match: ");
4124126258Smlaier			pf_print_state(*state);
4125126258Smlaier			pf_print_flags(th->th_flags);
4126171168Smlaier			printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
4127223637Sbz			    "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack,
4128223637Sbz			    pd->p_len, ackskew, (unsigned long long)(*state)->packets[0],
4129223637Sbz			    (unsigned long long)(*state)->packets[1],
4130223637Sbz			    pd->dir == PF_IN ? "in" : "out",
4131223637Sbz			    pd->dir == (*state)->direction ? "fwd" : "rev");
4132126258Smlaier		}
4133126258Smlaier
4134145836Smlaier		if (dst->scrub || src->scrub) {
4135145836Smlaier			if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
4136200930Sdelphij			    *state, src, dst, copyback))
4137145836Smlaier				return (PF_DROP);
4138145836Smlaier		}
4139145836Smlaier
4140126258Smlaier		/* update max window */
4141126258Smlaier		if (src->max_win < win)
4142126258Smlaier			src->max_win = win;
4143126258Smlaier		/* synchronize sequencing */
4144126258Smlaier		if (SEQ_GT(end, src->seqlo))
4145126258Smlaier			src->seqlo = end;
4146126258Smlaier		/* slide the window of what the other end can send */
4147126258Smlaier		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
4148126258Smlaier			dst->seqhi = ack + MAX((win << sws), 1);
4149126258Smlaier
4150126258Smlaier		/*
4151126258Smlaier		 * Cannot set dst->seqhi here since this could be a shotgunned
4152126258Smlaier		 * SYN and not an already established connection.
4153126258Smlaier		 */
4154126258Smlaier
4155126258Smlaier		if (th->th_flags & TH_FIN)
4156126258Smlaier			if (src->state < TCPS_CLOSING)
4157126258Smlaier				src->state = TCPS_CLOSING;
4158126258Smlaier		if (th->th_flags & TH_RST)
4159126258Smlaier			src->state = dst->state = TCPS_TIME_WAIT;
4160126258Smlaier
4161126258Smlaier		/* Fall through to PASS packet */
4162126258Smlaier
4163126258Smlaier	} else {
4164126258Smlaier		if ((*state)->dst.state == TCPS_SYN_SENT &&
4165126258Smlaier		    (*state)->src.state == TCPS_SYN_SENT) {
4166126258Smlaier			/* Send RST for state mismatches during handshake */
4167145836Smlaier			if (!(th->th_flags & TH_RST))
4168223637Sbz				pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
4169126258Smlaier				    pd->dst, pd->src, th->th_dport,
4170145836Smlaier				    th->th_sport, ntohl(th->th_ack), 0,
4171145836Smlaier				    TH_RST, 0, 0,
4172171168Smlaier				    (*state)->rule.ptr->return_ttl, 1, 0,
4173240233Sglebius				    kif->pfik_ifp);
4174126258Smlaier			src->seqlo = 0;
4175126258Smlaier			src->seqhi = 1;
4176126258Smlaier			src->max_win = 1;
4177223637Sbz		} else if (V_pf_status.debug >= PF_DEBUG_MISC) {
4178126258Smlaier			printf("pf: BAD state: ");
4179126258Smlaier			pf_print_state(*state);
4180126258Smlaier			pf_print_flags(th->th_flags);
4181171168Smlaier			printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
4182171168Smlaier			    "pkts=%llu:%llu dir=%s,%s\n",
4183171168Smlaier			    seq, orig_seq, ack, pd->p_len, ackskew,
4184171168Smlaier			    (unsigned long long)(*state)->packets[0],
4185171168Smlaier			    (unsigned long long)(*state)->packets[1],
4186223637Sbz			    pd->dir == PF_IN ? "in" : "out",
4187223637Sbz			    pd->dir == (*state)->direction ? "fwd" : "rev");
4188126258Smlaier			printf("pf: State failure on: %c %c %c %c | %c %c\n",
4189126258Smlaier			    SEQ_GEQ(src->seqhi, end) ? ' ' : '1',
4190126258Smlaier			    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ?
4191126258Smlaier			    ' ': '2',
4192126258Smlaier			    (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
4193126258Smlaier			    (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
4194126258Smlaier			    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5',
4195126258Smlaier			    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6');
4196126258Smlaier		}
4197145836Smlaier		REASON_SET(reason, PFRES_BADSTATE);
4198126258Smlaier		return (PF_DROP);
4199126258Smlaier	}
4200126258Smlaier
4201200930Sdelphij	return (PF_PASS);
4202200930Sdelphij}
4203126258Smlaier
4204240233Sglebiusstatic int
4205200930Sdelphijpf_tcp_track_sloppy(struct pf_state_peer *src, struct pf_state_peer *dst,
4206200930Sdelphij	struct pf_state **state, struct pf_pdesc *pd, u_short *reason)
4207200930Sdelphij{
4208200930Sdelphij	struct tcphdr		*th = pd->hdr.tcp;
4209200930Sdelphij
4210200930Sdelphij	if (th->th_flags & TH_SYN)
4211200930Sdelphij		if (src->state < TCPS_SYN_SENT)
4212200930Sdelphij			src->state = TCPS_SYN_SENT;
4213200930Sdelphij	if (th->th_flags & TH_FIN)
4214200930Sdelphij		if (src->state < TCPS_CLOSING)
4215200930Sdelphij			src->state = TCPS_CLOSING;
4216200930Sdelphij	if (th->th_flags & TH_ACK) {
4217200930Sdelphij		if (dst->state == TCPS_SYN_SENT) {
4218200930Sdelphij			dst->state = TCPS_ESTABLISHED;
4219200930Sdelphij			if (src->state == TCPS_ESTABLISHED &&
4220200930Sdelphij			    (*state)->src_node != NULL &&
4221200930Sdelphij			    pf_src_connlimit(state)) {
4222200930Sdelphij				REASON_SET(reason, PFRES_SRCLIMIT);
4223200930Sdelphij				return (PF_DROP);
4224200930Sdelphij			}
4225200930Sdelphij		} else if (dst->state == TCPS_CLOSING) {
4226200930Sdelphij			dst->state = TCPS_FIN_WAIT_2;
4227200930Sdelphij		} else if (src->state == TCPS_SYN_SENT &&
4228200930Sdelphij		    dst->state < TCPS_SYN_SENT) {
4229200930Sdelphij			/*
4230200930Sdelphij			 * Handle a special sloppy case where we only see one
4231200930Sdelphij			 * half of the connection. If there is a ACK after
4232200930Sdelphij			 * the initial SYN without ever seeing a packet from
4233200930Sdelphij			 * the destination, set the connection to established.
4234200930Sdelphij			 */
4235200930Sdelphij			dst->state = src->state = TCPS_ESTABLISHED;
4236200930Sdelphij			if ((*state)->src_node != NULL &&
4237200930Sdelphij			    pf_src_connlimit(state)) {
4238200930Sdelphij				REASON_SET(reason, PFRES_SRCLIMIT);
4239200930Sdelphij				return (PF_DROP);
4240200930Sdelphij			}
4241200930Sdelphij		} else if (src->state == TCPS_CLOSING &&
4242200930Sdelphij		    dst->state == TCPS_ESTABLISHED &&
4243200930Sdelphij		    dst->seqlo == 0) {
4244200930Sdelphij			/*
4245200930Sdelphij			 * Handle the closing of half connections where we
4246200930Sdelphij			 * don't see the full bidirectional FIN/ACK+ACK
4247200930Sdelphij			 * handshake.
4248200930Sdelphij			 */
4249200930Sdelphij			dst->state = TCPS_CLOSING;
4250200930Sdelphij		}
4251200930Sdelphij	}
4252200930Sdelphij	if (th->th_flags & TH_RST)
4253200930Sdelphij		src->state = dst->state = TCPS_TIME_WAIT;
4254200930Sdelphij
4255200930Sdelphij	/* update expire time */
4256240233Sglebius	(*state)->expire = time_uptime;
4257200930Sdelphij	if (src->state >= TCPS_FIN_WAIT_2 &&
4258200930Sdelphij	    dst->state >= TCPS_FIN_WAIT_2)
4259200930Sdelphij		(*state)->timeout = PFTM_TCP_CLOSED;
4260200930Sdelphij	else if (src->state >= TCPS_CLOSING &&
4261200930Sdelphij	    dst->state >= TCPS_CLOSING)
4262200930Sdelphij		(*state)->timeout = PFTM_TCP_FIN_WAIT;
4263200930Sdelphij	else if (src->state < TCPS_ESTABLISHED ||
4264200930Sdelphij	    dst->state < TCPS_ESTABLISHED)
4265200930Sdelphij		(*state)->timeout = PFTM_TCP_OPENING;
4266200930Sdelphij	else if (src->state >= TCPS_CLOSING ||
4267200930Sdelphij	    dst->state >= TCPS_CLOSING)
4268200930Sdelphij		(*state)->timeout = PFTM_TCP_CLOSING;
4269200930Sdelphij	else
4270200930Sdelphij		(*state)->timeout = PFTM_TCP_ESTABLISHED;
4271200930Sdelphij
4272200930Sdelphij	return (PF_PASS);
4273200930Sdelphij}
4274200930Sdelphij
4275240233Sglebiusstatic int
4276200930Sdelphijpf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
4277200930Sdelphij    struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
4278200930Sdelphij    u_short *reason)
4279200930Sdelphij{
4280223637Sbz	struct pf_state_key_cmp	 key;
4281200930Sdelphij	struct tcphdr		*th = pd->hdr.tcp;
4282200930Sdelphij	int			 copyback = 0;
4283200930Sdelphij	struct pf_state_peer	*src, *dst;
4284223637Sbz	struct pf_state_key	*sk;
4285200930Sdelphij
4286240233Sglebius	bzero(&key, sizeof(key));
4287200930Sdelphij	key.af = pd->af;
4288200930Sdelphij	key.proto = IPPROTO_TCP;
4289223637Sbz	if (direction == PF_IN)	{	/* wire side, straight */
4290223637Sbz		PF_ACPY(&key.addr[0], pd->src, key.af);
4291223637Sbz		PF_ACPY(&key.addr[1], pd->dst, key.af);
4292223637Sbz		key.port[0] = th->th_sport;
4293223637Sbz		key.port[1] = th->th_dport;
4294223637Sbz	} else {			/* stack side, reverse */
4295223637Sbz		PF_ACPY(&key.addr[1], pd->src, key.af);
4296223637Sbz		PF_ACPY(&key.addr[0], pd->dst, key.af);
4297223637Sbz		key.port[1] = th->th_sport;
4298223637Sbz		key.port[0] = th->th_dport;
4299200930Sdelphij	}
4300200930Sdelphij
4301240233Sglebius	STATE_LOOKUP(kif, &key, direction, *state, pd);
4302200930Sdelphij
4303200930Sdelphij	if (direction == (*state)->direction) {
4304200930Sdelphij		src = &(*state)->src;
4305200930Sdelphij		dst = &(*state)->dst;
4306200930Sdelphij	} else {
4307200930Sdelphij		src = &(*state)->dst;
4308200930Sdelphij		dst = &(*state)->src;
4309200930Sdelphij	}
4310200930Sdelphij
4311223637Sbz	sk = (*state)->key[pd->didx];
4312223637Sbz
4313200930Sdelphij	if ((*state)->src.state == PF_TCPS_PROXY_SRC) {
4314200930Sdelphij		if (direction != (*state)->direction) {
4315200930Sdelphij			REASON_SET(reason, PFRES_SYNPROXY);
4316200930Sdelphij			return (PF_SYNPROXY_DROP);
4317200930Sdelphij		}
4318200930Sdelphij		if (th->th_flags & TH_SYN) {
4319200930Sdelphij			if (ntohl(th->th_seq) != (*state)->src.seqlo) {
4320200930Sdelphij				REASON_SET(reason, PFRES_SYNPROXY);
4321200930Sdelphij				return (PF_DROP);
4322200930Sdelphij			}
4323200930Sdelphij			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, pd->dst,
4324200930Sdelphij			    pd->src, th->th_dport, th->th_sport,
4325200930Sdelphij			    (*state)->src.seqhi, ntohl(th->th_seq) + 1,
4326240233Sglebius			    TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1, 0, NULL);
4327200930Sdelphij			REASON_SET(reason, PFRES_SYNPROXY);
4328200930Sdelphij			return (PF_SYNPROXY_DROP);
4329200930Sdelphij		} else if (!(th->th_flags & TH_ACK) ||
4330200930Sdelphij		    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
4331200930Sdelphij		    (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
4332200930Sdelphij			REASON_SET(reason, PFRES_SYNPROXY);
4333200930Sdelphij			return (PF_DROP);
4334200930Sdelphij		} else if ((*state)->src_node != NULL &&
4335200930Sdelphij		    pf_src_connlimit(state)) {
4336200930Sdelphij			REASON_SET(reason, PFRES_SRCLIMIT);
4337200930Sdelphij			return (PF_DROP);
4338200930Sdelphij		} else
4339200930Sdelphij			(*state)->src.state = PF_TCPS_PROXY_DST;
4340200930Sdelphij	}
4341200930Sdelphij	if ((*state)->src.state == PF_TCPS_PROXY_DST) {
4342200930Sdelphij		if (direction == (*state)->direction) {
4343200930Sdelphij			if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) ||
4344200930Sdelphij			    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
4345200930Sdelphij			    (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
4346200930Sdelphij				REASON_SET(reason, PFRES_SYNPROXY);
4347200930Sdelphij				return (PF_DROP);
4348200930Sdelphij			}
4349200930Sdelphij			(*state)->src.max_win = MAX(ntohs(th->th_win), 1);
4350200930Sdelphij			if ((*state)->dst.seqhi == 1)
4351200930Sdelphij				(*state)->dst.seqhi = htonl(arc4random());
4352200930Sdelphij			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
4353223637Sbz			    &sk->addr[pd->sidx], &sk->addr[pd->didx],
4354223637Sbz			    sk->port[pd->sidx], sk->port[pd->didx],
4355200930Sdelphij			    (*state)->dst.seqhi, 0, TH_SYN, 0,
4356240233Sglebius			    (*state)->src.mss, 0, 0, (*state)->tag, NULL);
4357200930Sdelphij			REASON_SET(reason, PFRES_SYNPROXY);
4358200930Sdelphij			return (PF_SYNPROXY_DROP);
4359200930Sdelphij		} else if (((th->th_flags & (TH_SYN|TH_ACK)) !=
4360200930Sdelphij		    (TH_SYN|TH_ACK)) ||
4361200930Sdelphij		    (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) {
4362200930Sdelphij			REASON_SET(reason, PFRES_SYNPROXY);
4363200930Sdelphij			return (PF_DROP);
4364200930Sdelphij		} else {
4365200930Sdelphij			(*state)->dst.max_win = MAX(ntohs(th->th_win), 1);
4366200930Sdelphij			(*state)->dst.seqlo = ntohl(th->th_seq);
4367200930Sdelphij			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, pd->dst,
4368200930Sdelphij			    pd->src, th->th_dport, th->th_sport,
4369200930Sdelphij			    ntohl(th->th_ack), ntohl(th->th_seq) + 1,
4370200930Sdelphij			    TH_ACK, (*state)->src.max_win, 0, 0, 0,
4371240233Sglebius			    (*state)->tag, NULL);
4372200930Sdelphij			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
4373223637Sbz			    &sk->addr[pd->sidx], &sk->addr[pd->didx],
4374223637Sbz			    sk->port[pd->sidx], sk->port[pd->didx],
4375200930Sdelphij			    (*state)->src.seqhi + 1, (*state)->src.seqlo + 1,
4376240233Sglebius			    TH_ACK, (*state)->dst.max_win, 0, 0, 1, 0, NULL);
4377200930Sdelphij			(*state)->src.seqdiff = (*state)->dst.seqhi -
4378200930Sdelphij			    (*state)->src.seqlo;
4379200930Sdelphij			(*state)->dst.seqdiff = (*state)->src.seqhi -
4380200930Sdelphij			    (*state)->dst.seqlo;
4381200930Sdelphij			(*state)->src.seqhi = (*state)->src.seqlo +
4382200930Sdelphij			    (*state)->dst.max_win;
4383200930Sdelphij			(*state)->dst.seqhi = (*state)->dst.seqlo +
4384200930Sdelphij			    (*state)->src.max_win;
4385200930Sdelphij			(*state)->src.wscale = (*state)->dst.wscale = 0;
4386200930Sdelphij			(*state)->src.state = (*state)->dst.state =
4387200930Sdelphij			    TCPS_ESTABLISHED;
4388200930Sdelphij			REASON_SET(reason, PFRES_SYNPROXY);
4389200930Sdelphij			return (PF_SYNPROXY_DROP);
4390200930Sdelphij		}
4391200930Sdelphij	}
4392200930Sdelphij
4393200930Sdelphij	if (((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN) &&
4394200930Sdelphij	    dst->state >= TCPS_FIN_WAIT_2 &&
4395200930Sdelphij	    src->state >= TCPS_FIN_WAIT_2) {
4396223637Sbz		if (V_pf_status.debug >= PF_DEBUG_MISC) {
4397200930Sdelphij			printf("pf: state reuse ");
4398200930Sdelphij			pf_print_state(*state);
4399200930Sdelphij			pf_print_flags(th->th_flags);
4400200930Sdelphij			printf("\n");
4401200930Sdelphij		}
4402200930Sdelphij		/* XXX make sure it's the same direction ?? */
4403200930Sdelphij		(*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
4404240233Sglebius		pf_unlink_state(*state, PF_ENTER_LOCKED);
4405200930Sdelphij		*state = NULL;
4406200930Sdelphij		return (PF_DROP);
4407200930Sdelphij	}
4408200930Sdelphij
4409200930Sdelphij	if ((*state)->state_flags & PFSTATE_SLOPPY) {
4410200930Sdelphij		if (pf_tcp_track_sloppy(src, dst, state, pd, reason) == PF_DROP)
4411200930Sdelphij			return (PF_DROP);
4412200930Sdelphij	} else {
4413200930Sdelphij		if (pf_tcp_track_full(src, dst, state, kif, m, off, pd, reason,
4414200930Sdelphij		    &copyback) == PF_DROP)
4415200930Sdelphij			return (PF_DROP);
4416200930Sdelphij	}
4417200930Sdelphij
4418126258Smlaier	/* translate source/destination address, if necessary */
4419223637Sbz	if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
4420223637Sbz		struct pf_state_key *nk = (*state)->key[pd->didx];
4421223637Sbz
4422223637Sbz		if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) ||
4423223637Sbz		    nk->port[pd->sidx] != th->th_sport)
4424289703Skp			pf_change_ap(m, pd->src, &th->th_sport,
4425289703Skp			    pd->ip_sum, &th->th_sum, &nk->addr[pd->sidx],
4426223637Sbz			    nk->port[pd->sidx], 0, pd->af);
4427223637Sbz
4428223637Sbz		if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) ||
4429223637Sbz		    nk->port[pd->didx] != th->th_dport)
4430289703Skp			pf_change_ap(m, pd->dst, &th->th_dport,
4431289703Skp			    pd->ip_sum, &th->th_sum, &nk->addr[pd->didx],
4432223637Sbz			    nk->port[pd->didx], 0, pd->af);
4433223637Sbz		copyback = 1;
4434126258Smlaier	}
4435126258Smlaier
4436223637Sbz	/* Copyback sequence modulation or stateful scrub changes if needed */
4437223637Sbz	if (copyback)
4438223637Sbz		m_copyback(m, off, sizeof(*th), (caddr_t)th);
4439223637Sbz
4440126258Smlaier	return (PF_PASS);
4441126258Smlaier}
4442126258Smlaier
4443240233Sglebiusstatic int
4444130613Smlaierpf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
4445130613Smlaier    struct mbuf *m, int off, void *h, struct pf_pdesc *pd)
4446126258Smlaier{
4447126258Smlaier	struct pf_state_peer	*src, *dst;
4448223637Sbz	struct pf_state_key_cmp	 key;
4449126258Smlaier	struct udphdr		*uh = pd->hdr.udp;
4450126258Smlaier
4451240233Sglebius	bzero(&key, sizeof(key));
4452126258Smlaier	key.af = pd->af;
4453126258Smlaier	key.proto = IPPROTO_UDP;
4454223637Sbz	if (direction == PF_IN)	{	/* wire side, straight */
4455223637Sbz		PF_ACPY(&key.addr[0], pd->src, key.af);
4456223637Sbz		PF_ACPY(&key.addr[1], pd->dst, key.af);
4457223637Sbz		key.port[0] = uh->uh_sport;
4458223637Sbz		key.port[1] = uh->uh_dport;
4459223637Sbz	} else {			/* stack side, reverse */
4460223637Sbz		PF_ACPY(&key.addr[1], pd->src, key.af);
4461223637Sbz		PF_ACPY(&key.addr[0], pd->dst, key.af);
4462223637Sbz		key.port[1] = uh->uh_sport;
4463223637Sbz		key.port[0] = uh->uh_dport;
4464130613Smlaier	}
4465126258Smlaier
4466240233Sglebius	STATE_LOOKUP(kif, &key, direction, *state, pd);
4467126258Smlaier
4468126258Smlaier	if (direction == (*state)->direction) {
4469126258Smlaier		src = &(*state)->src;
4470126258Smlaier		dst = &(*state)->dst;
4471126258Smlaier	} else {
4472126258Smlaier		src = &(*state)->dst;
4473126258Smlaier		dst = &(*state)->src;
4474126258Smlaier	}
4475126258Smlaier
4476126258Smlaier	/* update states */
4477126258Smlaier	if (src->state < PFUDPS_SINGLE)
4478126258Smlaier		src->state = PFUDPS_SINGLE;
4479126258Smlaier	if (dst->state == PFUDPS_SINGLE)
4480126258Smlaier		dst->state = PFUDPS_MULTIPLE;
4481126258Smlaier
4482126258Smlaier	/* update expire time */
4483240233Sglebius	(*state)->expire = time_uptime;
4484126258Smlaier	if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE)
4485126258Smlaier		(*state)->timeout = PFTM_UDP_MULTIPLE;
4486126258Smlaier	else
4487126258Smlaier		(*state)->timeout = PFTM_UDP_SINGLE;
4488126258Smlaier
4489126258Smlaier	/* translate source/destination address, if necessary */
4490223637Sbz	if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
4491223637Sbz		struct pf_state_key *nk = (*state)->key[pd->didx];
4492223637Sbz
4493223637Sbz		if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) ||
4494223637Sbz		    nk->port[pd->sidx] != uh->uh_sport)
4495289703Skp			pf_change_ap(m, pd->src, &uh->uh_sport, pd->ip_sum,
4496223637Sbz			    &uh->uh_sum, &nk->addr[pd->sidx],
4497223637Sbz			    nk->port[pd->sidx], 1, pd->af);
4498223637Sbz
4499223637Sbz		if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) ||
4500223637Sbz		    nk->port[pd->didx] != uh->uh_dport)
4501289703Skp			pf_change_ap(m, pd->dst, &uh->uh_dport, pd->ip_sum,
4502223637Sbz			    &uh->uh_sum, &nk->addr[pd->didx],
4503223637Sbz			    nk->port[pd->didx], 1, pd->af);
4504126261Smlaier		m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
4505126258Smlaier	}
4506126258Smlaier
4507126258Smlaier	return (PF_PASS);
4508126258Smlaier}
4509126258Smlaier
4510240233Sglebiusstatic int
4511130613Smlaierpf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
4512145836Smlaier    struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason)
4513126258Smlaier{
4514223637Sbz	struct pf_addr  *saddr = pd->src, *daddr = pd->dst;
4515223637Sbz	u_int16_t	 icmpid = 0, *icmpsum;
4516223637Sbz	u_int8_t	 icmptype;
4517130613Smlaier	int		 state_icmp = 0;
4518223637Sbz	struct pf_state_key_cmp key;
4519126258Smlaier
4520240233Sglebius	bzero(&key, sizeof(key));
4521126258Smlaier	switch (pd->proto) {
4522126258Smlaier#ifdef INET
4523126258Smlaier	case IPPROTO_ICMP:
4524126258Smlaier		icmptype = pd->hdr.icmp->icmp_type;
4525126258Smlaier		icmpid = pd->hdr.icmp->icmp_id;
4526126258Smlaier		icmpsum = &pd->hdr.icmp->icmp_cksum;
4527126258Smlaier
4528126258Smlaier		if (icmptype == ICMP_UNREACH ||
4529126258Smlaier		    icmptype == ICMP_SOURCEQUENCH ||
4530126258Smlaier		    icmptype == ICMP_REDIRECT ||
4531126258Smlaier		    icmptype == ICMP_TIMXCEED ||
4532126258Smlaier		    icmptype == ICMP_PARAMPROB)
4533126258Smlaier			state_icmp++;
4534126258Smlaier		break;
4535126258Smlaier#endif /* INET */
4536126258Smlaier#ifdef INET6
4537126258Smlaier	case IPPROTO_ICMPV6:
4538126258Smlaier		icmptype = pd->hdr.icmp6->icmp6_type;
4539126258Smlaier		icmpid = pd->hdr.icmp6->icmp6_id;
4540126258Smlaier		icmpsum = &pd->hdr.icmp6->icmp6_cksum;
4541126258Smlaier
4542126258Smlaier		if (icmptype == ICMP6_DST_UNREACH ||
4543126258Smlaier		    icmptype == ICMP6_PACKET_TOO_BIG ||
4544126258Smlaier		    icmptype == ICMP6_TIME_EXCEEDED ||
4545126258Smlaier		    icmptype == ICMP6_PARAM_PROB)
4546126258Smlaier			state_icmp++;
4547126258Smlaier		break;
4548126258Smlaier#endif /* INET6 */
4549126258Smlaier	}
4550126258Smlaier
4551126258Smlaier	if (!state_icmp) {
4552126258Smlaier
4553126258Smlaier		/*
4554126258Smlaier		 * ICMP query/reply message not related to a TCP/UDP packet.
4555126258Smlaier		 * Search for an ICMP state.
4556126258Smlaier		 */
4557126258Smlaier		key.af = pd->af;
4558126258Smlaier		key.proto = pd->proto;
4559223637Sbz		key.port[0] = key.port[1] = icmpid;
4560223637Sbz		if (direction == PF_IN)	{	/* wire side, straight */
4561223637Sbz			PF_ACPY(&key.addr[0], pd->src, key.af);
4562223637Sbz			PF_ACPY(&key.addr[1], pd->dst, key.af);
4563223637Sbz		} else {			/* stack side, reverse */
4564223637Sbz			PF_ACPY(&key.addr[1], pd->src, key.af);
4565223637Sbz			PF_ACPY(&key.addr[0], pd->dst, key.af);
4566130613Smlaier		}
4567126258Smlaier
4568240233Sglebius		STATE_LOOKUP(kif, &key, direction, *state, pd);
4569126258Smlaier
4570240233Sglebius		(*state)->expire = time_uptime;
4571126258Smlaier		(*state)->timeout = PFTM_ICMP_ERROR_REPLY;
4572126258Smlaier
4573126258Smlaier		/* translate source/destination address, if necessary */
4574223637Sbz		if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
4575223637Sbz			struct pf_state_key *nk = (*state)->key[pd->didx];
4576223637Sbz
4577223637Sbz			switch (pd->af) {
4578126258Smlaier#ifdef INET
4579223637Sbz			case AF_INET:
4580223637Sbz				if (PF_ANEQ(pd->src,
4581223637Sbz				    &nk->addr[pd->sidx], AF_INET))
4582126258Smlaier					pf_change_a(&saddr->v4.s_addr,
4583126258Smlaier					    pd->ip_sum,
4584223637Sbz					    nk->addr[pd->sidx].v4.s_addr, 0);
4585223637Sbz
4586223637Sbz				if (PF_ANEQ(pd->dst, &nk->addr[pd->didx],
4587223637Sbz				    AF_INET))
4588223637Sbz					pf_change_a(&daddr->v4.s_addr,
4589223637Sbz					    pd->ip_sum,
4590223637Sbz					    nk->addr[pd->didx].v4.s_addr, 0);
4591223637Sbz
4592223637Sbz				if (nk->port[0] !=
4593223637Sbz				    pd->hdr.icmp->icmp_id) {
4594149884Smlaier					pd->hdr.icmp->icmp_cksum =
4595149884Smlaier					    pf_cksum_fixup(
4596149884Smlaier					    pd->hdr.icmp->icmp_cksum, icmpid,
4597223637Sbz					    nk->port[pd->sidx], 0);
4598149884Smlaier					pd->hdr.icmp->icmp_id =
4599223637Sbz					    nk->port[pd->sidx];
4600223637Sbz				}
4601223637Sbz
4602223637Sbz				m_copyback(m, off, ICMP_MINLEN,
4603240233Sglebius				    (caddr_t )pd->hdr.icmp);
4604223637Sbz				break;
4605126258Smlaier#endif /* INET */
4606126258Smlaier#ifdef INET6
4607223637Sbz			case AF_INET6:
4608223637Sbz				if (PF_ANEQ(pd->src,
4609223637Sbz				    &nk->addr[pd->sidx], AF_INET6))
4610126258Smlaier					pf_change_a6(saddr,
4611126258Smlaier					    &pd->hdr.icmp6->icmp6_cksum,
4612223637Sbz					    &nk->addr[pd->sidx], 0);
4613223637Sbz
4614223637Sbz				if (PF_ANEQ(pd->dst,
4615223637Sbz				    &nk->addr[pd->didx], AF_INET6))
4616126258Smlaier					pf_change_a6(daddr,
4617126258Smlaier					    &pd->hdr.icmp6->icmp6_cksum,
4618223637Sbz					    &nk->addr[pd->didx], 0);
4619223637Sbz
4620240233Sglebius				m_copyback(m, off, sizeof(struct icmp6_hdr),
4621240233Sglebius				    (caddr_t )pd->hdr.icmp6);
4622223637Sbz				break;
4623126258Smlaier#endif /* INET6 */
4624126258Smlaier			}
4625126258Smlaier		}
4626126258Smlaier		return (PF_PASS);
4627126258Smlaier
4628126258Smlaier	} else {
4629126258Smlaier		/*
4630126258Smlaier		 * ICMP error message in response to a TCP/UDP packet.
4631126258Smlaier		 * Extract the inner TCP/UDP header and search for that state.
4632126258Smlaier		 */
4633126258Smlaier
4634126258Smlaier		struct pf_pdesc	pd2;
4635223637Sbz		bzero(&pd2, sizeof pd2);
4636126258Smlaier#ifdef INET
4637126258Smlaier		struct ip	h2;
4638126258Smlaier#endif /* INET */
4639126258Smlaier#ifdef INET6
4640126258Smlaier		struct ip6_hdr	h2_6;
4641126258Smlaier		int		terminal = 0;
4642126258Smlaier#endif /* INET6 */
4643223637Sbz		int		ipoff2 = 0;
4644223637Sbz		int		off2 = 0;
4645126258Smlaier
4646126258Smlaier		pd2.af = pd->af;
4647223637Sbz		/* Payload packet is from the opposite direction. */
4648223637Sbz		pd2.sidx = (direction == PF_IN) ? 1 : 0;
4649223637Sbz		pd2.didx = (direction == PF_IN) ? 0 : 1;
4650126258Smlaier		switch (pd->af) {
4651126258Smlaier#ifdef INET
4652126258Smlaier		case AF_INET:
4653126258Smlaier			/* offset of h2 in mbuf chain */
4654126258Smlaier			ipoff2 = off + ICMP_MINLEN;
4655126258Smlaier
4656126258Smlaier			if (!pf_pull_hdr(m, ipoff2, &h2, sizeof(h2),
4657145836Smlaier			    NULL, reason, pd2.af)) {
4658126258Smlaier				DPFPRINTF(PF_DEBUG_MISC,
4659126258Smlaier				    ("pf: ICMP error message too short "
4660126258Smlaier				    "(ip)\n"));
4661126258Smlaier				return (PF_DROP);
4662126258Smlaier			}
4663126258Smlaier			/*
4664126258Smlaier			 * ICMP error messages don't refer to non-first
4665126258Smlaier			 * fragments
4666126258Smlaier			 */
4667145836Smlaier			if (h2.ip_off & htons(IP_OFFMASK)) {
4668145836Smlaier				REASON_SET(reason, PFRES_FRAG);
4669126258Smlaier				return (PF_DROP);
4670145836Smlaier			}
4671126258Smlaier
4672126258Smlaier			/* offset of protocol header that follows h2 */
4673126258Smlaier			off2 = ipoff2 + (h2.ip_hl << 2);
4674126258Smlaier
4675126258Smlaier			pd2.proto = h2.ip_p;
4676126258Smlaier			pd2.src = (struct pf_addr *)&h2.ip_src;
4677126258Smlaier			pd2.dst = (struct pf_addr *)&h2.ip_dst;
4678126258Smlaier			pd2.ip_sum = &h2.ip_sum;
4679126258Smlaier			break;
4680126258Smlaier#endif /* INET */
4681126258Smlaier#ifdef INET6
4682126258Smlaier		case AF_INET6:
4683126258Smlaier			ipoff2 = off + sizeof(struct icmp6_hdr);
4684126258Smlaier
4685126258Smlaier			if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof(h2_6),
4686145836Smlaier			    NULL, reason, pd2.af)) {
4687126258Smlaier				DPFPRINTF(PF_DEBUG_MISC,
4688126258Smlaier				    ("pf: ICMP error message too short "
4689126258Smlaier				    "(ip6)\n"));
4690126258Smlaier				return (PF_DROP);
4691126258Smlaier			}
4692126258Smlaier			pd2.proto = h2_6.ip6_nxt;
4693126258Smlaier			pd2.src = (struct pf_addr *)&h2_6.ip6_src;
4694126258Smlaier			pd2.dst = (struct pf_addr *)&h2_6.ip6_dst;
4695126258Smlaier			pd2.ip_sum = NULL;
4696126258Smlaier			off2 = ipoff2 + sizeof(h2_6);
4697126258Smlaier			do {
4698126258Smlaier				switch (pd2.proto) {
4699126258Smlaier				case IPPROTO_FRAGMENT:
4700126258Smlaier					/*
4701126258Smlaier					 * ICMPv6 error messages for
4702126258Smlaier					 * non-first fragments
4703126258Smlaier					 */
4704145836Smlaier					REASON_SET(reason, PFRES_FRAG);
4705126258Smlaier					return (PF_DROP);
4706126258Smlaier				case IPPROTO_AH:
4707126258Smlaier				case IPPROTO_HOPOPTS:
4708126258Smlaier				case IPPROTO_ROUTING:
4709126258Smlaier				case IPPROTO_DSTOPTS: {
4710126258Smlaier					/* get next header and header length */
4711126258Smlaier					struct ip6_ext opt6;
4712126258Smlaier
4713126258Smlaier					if (!pf_pull_hdr(m, off2, &opt6,
4714145836Smlaier					    sizeof(opt6), NULL, reason,
4715145836Smlaier					    pd2.af)) {
4716126258Smlaier						DPFPRINTF(PF_DEBUG_MISC,
4717126258Smlaier						    ("pf: ICMPv6 short opt\n"));
4718126258Smlaier						return (PF_DROP);
4719126258Smlaier					}
4720126258Smlaier					if (pd2.proto == IPPROTO_AH)
4721126258Smlaier						off2 += (opt6.ip6e_len + 2) * 4;
4722126258Smlaier					else
4723126258Smlaier						off2 += (opt6.ip6e_len + 1) * 8;
4724126258Smlaier					pd2.proto = opt6.ip6e_nxt;
4725126258Smlaier					/* goto the next header */
4726126258Smlaier					break;
4727126258Smlaier				}
4728126258Smlaier				default:
4729126258Smlaier					terminal++;
4730126258Smlaier					break;
4731126258Smlaier				}
4732126258Smlaier			} while (!terminal);
4733126258Smlaier			break;
4734126258Smlaier#endif /* INET6 */
4735126258Smlaier		}
4736126258Smlaier
4737126258Smlaier		switch (pd2.proto) {
4738126258Smlaier		case IPPROTO_TCP: {
4739126258Smlaier			struct tcphdr		 th;
4740126258Smlaier			u_int32_t		 seq;
4741126258Smlaier			struct pf_state_peer	*src, *dst;
4742126258Smlaier			u_int8_t		 dws;
4743128129Smlaier			int			 copyback = 0;
4744126258Smlaier
4745126258Smlaier			/*
4746126258Smlaier			 * Only the first 8 bytes of the TCP header can be
4747126258Smlaier			 * expected. Don't access any TCP header fields after
4748126258Smlaier			 * th_seq, an ackskew test is not possible.
4749126258Smlaier			 */
4750145836Smlaier			if (!pf_pull_hdr(m, off2, &th, 8, NULL, reason,
4751145836Smlaier			    pd2.af)) {
4752126258Smlaier				DPFPRINTF(PF_DEBUG_MISC,
4753126258Smlaier				    ("pf: ICMP error message too short "
4754126258Smlaier				    "(tcp)\n"));
4755126258Smlaier				return (PF_DROP);
4756126258Smlaier			}
4757126258Smlaier
4758126258Smlaier			key.af = pd2.af;
4759126258Smlaier			key.proto = IPPROTO_TCP;
4760223637Sbz			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
4761223637Sbz			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
4762223637Sbz			key.port[pd2.sidx] = th.th_sport;
4763223637Sbz			key.port[pd2.didx] = th.th_dport;
4764126258Smlaier
4765240233Sglebius			STATE_LOOKUP(kif, &key, direction, *state, pd);
4766126258Smlaier
4767126258Smlaier			if (direction == (*state)->direction) {
4768126258Smlaier				src = &(*state)->dst;
4769126258Smlaier				dst = &(*state)->src;
4770126258Smlaier			} else {
4771126258Smlaier				src = &(*state)->src;
4772126258Smlaier				dst = &(*state)->dst;
4773126258Smlaier			}
4774126258Smlaier
4775171929Sdhartmei			if (src->wscale && dst->wscale)
4776126258Smlaier				dws = dst->wscale & PF_WSCALE_MASK;
4777126258Smlaier			else
4778126258Smlaier				dws = 0;
4779126258Smlaier
4780126258Smlaier			/* Demodulate sequence number */
4781126258Smlaier			seq = ntohl(th.th_seq) - src->seqdiff;
4782128129Smlaier			if (src->seqdiff) {
4783128129Smlaier				pf_change_a(&th.th_seq, icmpsum,
4784126258Smlaier				    htonl(seq), 0);
4785128129Smlaier				copyback = 1;
4786128129Smlaier			}
4787126258Smlaier
4788200930Sdelphij			if (!((*state)->state_flags & PFSTATE_SLOPPY) &&
4789200930Sdelphij			    (!SEQ_GEQ(src->seqhi, seq) ||
4790200930Sdelphij			    !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)))) {
4791223637Sbz				if (V_pf_status.debug >= PF_DEBUG_MISC) {
4792126258Smlaier					printf("pf: BAD ICMP %d:%d ",
4793126258Smlaier					    icmptype, pd->hdr.icmp->icmp_code);
4794126258Smlaier					pf_print_host(pd->src, 0, pd->af);
4795126258Smlaier					printf(" -> ");
4796126258Smlaier					pf_print_host(pd->dst, 0, pd->af);
4797126258Smlaier					printf(" state: ");
4798126258Smlaier					pf_print_state(*state);
4799126258Smlaier					printf(" seq=%u\n", seq);
4800126258Smlaier				}
4801145836Smlaier				REASON_SET(reason, PFRES_BADSTATE);
4802126258Smlaier				return (PF_DROP);
4803223637Sbz			} else {
4804223637Sbz				if (V_pf_status.debug >= PF_DEBUG_MISC) {
4805223637Sbz					printf("pf: OK ICMP %d:%d ",
4806223637Sbz					    icmptype, pd->hdr.icmp->icmp_code);
4807223637Sbz					pf_print_host(pd->src, 0, pd->af);
4808223637Sbz					printf(" -> ");
4809223637Sbz					pf_print_host(pd->dst, 0, pd->af);
4810223637Sbz					printf(" state: ");
4811223637Sbz					pf_print_state(*state);
4812223637Sbz					printf(" seq=%u\n", seq);
4813223637Sbz				}
4814126258Smlaier			}
4815126258Smlaier
4816223637Sbz			/* translate source/destination address, if necessary */
4817223637Sbz			if ((*state)->key[PF_SK_WIRE] !=
4818223637Sbz			    (*state)->key[PF_SK_STACK]) {
4819223637Sbz				struct pf_state_key *nk =
4820223637Sbz				    (*state)->key[pd->didx];
4821223637Sbz
4822223637Sbz				if (PF_ANEQ(pd2.src,
4823223637Sbz				    &nk->addr[pd2.sidx], pd2.af) ||
4824223637Sbz				    nk->port[pd2.sidx] != th.th_sport)
4825126258Smlaier					pf_change_icmp(pd2.src, &th.th_sport,
4826223637Sbz					    daddr, &nk->addr[pd2.sidx],
4827223637Sbz					    nk->port[pd2.sidx], NULL,
4828126258Smlaier					    pd2.ip_sum, icmpsum,
4829126258Smlaier					    pd->ip_sum, 0, pd2.af);
4830223637Sbz
4831223637Sbz				if (PF_ANEQ(pd2.dst,
4832223637Sbz				    &nk->addr[pd2.didx], pd2.af) ||
4833223637Sbz				    nk->port[pd2.didx] != th.th_dport)
4834126258Smlaier					pf_change_icmp(pd2.dst, &th.th_dport,
4835300979Skp					    saddr, &nk->addr[pd2.didx],
4836223637Sbz					    nk->port[pd2.didx], NULL,
4837126258Smlaier					    pd2.ip_sum, icmpsum,
4838126258Smlaier					    pd->ip_sum, 0, pd2.af);
4839128129Smlaier				copyback = 1;
4840128129Smlaier			}
4841128129Smlaier
4842128129Smlaier			if (copyback) {
4843126258Smlaier				switch (pd2.af) {
4844126258Smlaier#ifdef INET
4845126258Smlaier				case AF_INET:
4846126258Smlaier					m_copyback(m, off, ICMP_MINLEN,
4847240233Sglebius					    (caddr_t )pd->hdr.icmp);
4848126258Smlaier					m_copyback(m, ipoff2, sizeof(h2),
4849240233Sglebius					    (caddr_t )&h2);
4850126258Smlaier					break;
4851126258Smlaier#endif /* INET */
4852126258Smlaier#ifdef INET6
4853126258Smlaier				case AF_INET6:
4854126258Smlaier					m_copyback(m, off,
4855126258Smlaier					    sizeof(struct icmp6_hdr),
4856240233Sglebius					    (caddr_t )pd->hdr.icmp6);
4857126258Smlaier					m_copyback(m, ipoff2, sizeof(h2_6),
4858240233Sglebius					    (caddr_t )&h2_6);
4859126258Smlaier					break;
4860126258Smlaier#endif /* INET6 */
4861126258Smlaier				}
4862126261Smlaier				m_copyback(m, off2, 8, (caddr_t)&th);
4863126258Smlaier			}
4864126258Smlaier
4865126258Smlaier			return (PF_PASS);
4866126258Smlaier			break;
4867126258Smlaier		}
4868126258Smlaier		case IPPROTO_UDP: {
4869126258Smlaier			struct udphdr		uh;
4870126258Smlaier
4871126258Smlaier			if (!pf_pull_hdr(m, off2, &uh, sizeof(uh),
4872145836Smlaier			    NULL, reason, pd2.af)) {
4873126258Smlaier				DPFPRINTF(PF_DEBUG_MISC,
4874126258Smlaier				    ("pf: ICMP error message too short "
4875126258Smlaier				    "(udp)\n"));
4876126258Smlaier				return (PF_DROP);
4877126258Smlaier			}
4878126258Smlaier
4879126258Smlaier			key.af = pd2.af;
4880126258Smlaier			key.proto = IPPROTO_UDP;
4881223637Sbz			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
4882223637Sbz			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
4883223637Sbz			key.port[pd2.sidx] = uh.uh_sport;
4884223637Sbz			key.port[pd2.didx] = uh.uh_dport;
4885126258Smlaier
4886240233Sglebius			STATE_LOOKUP(kif, &key, direction, *state, pd);
4887126258Smlaier
4888223637Sbz			/* translate source/destination address, if necessary */
4889223637Sbz			if ((*state)->key[PF_SK_WIRE] !=
4890223637Sbz			    (*state)->key[PF_SK_STACK]) {
4891223637Sbz				struct pf_state_key *nk =
4892223637Sbz				    (*state)->key[pd->didx];
4893223637Sbz
4894223637Sbz				if (PF_ANEQ(pd2.src,
4895223637Sbz				    &nk->addr[pd2.sidx], pd2.af) ||
4896223637Sbz				    nk->port[pd2.sidx] != uh.uh_sport)
4897126258Smlaier					pf_change_icmp(pd2.src, &uh.uh_sport,
4898223637Sbz					    daddr, &nk->addr[pd2.sidx],
4899223637Sbz					    nk->port[pd2.sidx], &uh.uh_sum,
4900126258Smlaier					    pd2.ip_sum, icmpsum,
4901126258Smlaier					    pd->ip_sum, 1, pd2.af);
4902223637Sbz
4903223637Sbz				if (PF_ANEQ(pd2.dst,
4904223637Sbz				    &nk->addr[pd2.didx], pd2.af) ||
4905223637Sbz				    nk->port[pd2.didx] != uh.uh_dport)
4906126258Smlaier					pf_change_icmp(pd2.dst, &uh.uh_dport,
4907300979Skp					    saddr, &nk->addr[pd2.didx],
4908223637Sbz					    nk->port[pd2.didx], &uh.uh_sum,
4909126258Smlaier					    pd2.ip_sum, icmpsum,
4910126258Smlaier					    pd->ip_sum, 1, pd2.af);
4911223637Sbz
4912126258Smlaier				switch (pd2.af) {
4913126258Smlaier#ifdef INET
4914126258Smlaier				case AF_INET:
4915126258Smlaier					m_copyback(m, off, ICMP_MINLEN,
4916240233Sglebius					    (caddr_t )pd->hdr.icmp);
4917223637Sbz					m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
4918126258Smlaier					break;
4919126258Smlaier#endif /* INET */
4920126258Smlaier#ifdef INET6
4921126258Smlaier				case AF_INET6:
4922126258Smlaier					m_copyback(m, off,
4923126258Smlaier					    sizeof(struct icmp6_hdr),
4924240233Sglebius					    (caddr_t )pd->hdr.icmp6);
4925126258Smlaier					m_copyback(m, ipoff2, sizeof(h2_6),
4926240233Sglebius					    (caddr_t )&h2_6);
4927126258Smlaier					break;
4928126258Smlaier#endif /* INET6 */
4929126258Smlaier				}
4930223637Sbz				m_copyback(m, off2, sizeof(uh), (caddr_t)&uh);
4931126258Smlaier			}
4932126258Smlaier			return (PF_PASS);
4933126258Smlaier			break;
4934126258Smlaier		}
4935126258Smlaier#ifdef INET
4936126258Smlaier		case IPPROTO_ICMP: {
4937126258Smlaier			struct icmp		iih;
4938126258Smlaier
4939126258Smlaier			if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN,
4940145836Smlaier			    NULL, reason, pd2.af)) {
4941126258Smlaier				DPFPRINTF(PF_DEBUG_MISC,
4942126258Smlaier				    ("pf: ICMP error message too short i"
4943126258Smlaier				    "(icmp)\n"));
4944126258Smlaier				return (PF_DROP);
4945126258Smlaier			}
4946126258Smlaier
4947126258Smlaier			key.af = pd2.af;
4948126258Smlaier			key.proto = IPPROTO_ICMP;
4949223637Sbz			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
4950223637Sbz			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
4951223637Sbz			key.port[0] = key.port[1] = iih.icmp_id;
4952126258Smlaier
4953240233Sglebius			STATE_LOOKUP(kif, &key, direction, *state, pd);
4954126258Smlaier
4955223637Sbz			/* translate source/destination address, if necessary */
4956223637Sbz			if ((*state)->key[PF_SK_WIRE] !=
4957223637Sbz			    (*state)->key[PF_SK_STACK]) {
4958223637Sbz				struct pf_state_key *nk =
4959223637Sbz				    (*state)->key[pd->didx];
4960223637Sbz
4961223637Sbz				if (PF_ANEQ(pd2.src,
4962223637Sbz				    &nk->addr[pd2.sidx], pd2.af) ||
4963223637Sbz				    nk->port[pd2.sidx] != iih.icmp_id)
4964126258Smlaier					pf_change_icmp(pd2.src, &iih.icmp_id,
4965223637Sbz					    daddr, &nk->addr[pd2.sidx],
4966223637Sbz					    nk->port[pd2.sidx], NULL,
4967126258Smlaier					    pd2.ip_sum, icmpsum,
4968126258Smlaier					    pd->ip_sum, 0, AF_INET);
4969223637Sbz
4970223637Sbz				if (PF_ANEQ(pd2.dst,
4971223637Sbz				    &nk->addr[pd2.didx], pd2.af) ||
4972223637Sbz				    nk->port[pd2.didx] != iih.icmp_id)
4973126258Smlaier					pf_change_icmp(pd2.dst, &iih.icmp_id,
4974300979Skp					    saddr, &nk->addr[pd2.didx],
4975223637Sbz					    nk->port[pd2.didx], NULL,
4976126258Smlaier					    pd2.ip_sum, icmpsum,
4977126258Smlaier					    pd->ip_sum, 0, AF_INET);
4978223637Sbz
4979223637Sbz				m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp);
4980223637Sbz				m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
4981223637Sbz				m_copyback(m, off2, ICMP_MINLEN, (caddr_t)&iih);
4982126258Smlaier			}
4983126258Smlaier			return (PF_PASS);
4984126258Smlaier			break;
4985126258Smlaier		}
4986126258Smlaier#endif /* INET */
4987126258Smlaier#ifdef INET6
4988126258Smlaier		case IPPROTO_ICMPV6: {
4989126258Smlaier			struct icmp6_hdr	iih;
4990126258Smlaier
4991126258Smlaier			if (!pf_pull_hdr(m, off2, &iih,
4992145836Smlaier			    sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) {
4993126258Smlaier				DPFPRINTF(PF_DEBUG_MISC,
4994126258Smlaier				    ("pf: ICMP error message too short "
4995126258Smlaier				    "(icmp6)\n"));
4996126258Smlaier				return (PF_DROP);
4997126258Smlaier			}
4998126258Smlaier
4999126258Smlaier			key.af = pd2.af;
5000126258Smlaier			key.proto = IPPROTO_ICMPV6;
5001223637Sbz			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
5002223637Sbz			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
5003223637Sbz			key.port[0] = key.port[1] = iih.icmp6_id;
5004126258Smlaier
5005240233Sglebius			STATE_LOOKUP(kif, &key, direction, *state, pd);
5006126258Smlaier
5007223637Sbz			/* translate source/destination address, if necessary */
5008223637Sbz			if ((*state)->key[PF_SK_WIRE] !=
5009223637Sbz			    (*state)->key[PF_SK_STACK]) {
5010223637Sbz				struct pf_state_key *nk =
5011223637Sbz				    (*state)->key[pd->didx];
5012223637Sbz
5013223637Sbz				if (PF_ANEQ(pd2.src,
5014223637Sbz				    &nk->addr[pd2.sidx], pd2.af) ||
5015223637Sbz				    nk->port[pd2.sidx] != iih.icmp6_id)
5016126258Smlaier					pf_change_icmp(pd2.src, &iih.icmp6_id,
5017223637Sbz					    daddr, &nk->addr[pd2.sidx],
5018223637Sbz					    nk->port[pd2.sidx], NULL,
5019126258Smlaier					    pd2.ip_sum, icmpsum,
5020126258Smlaier					    pd->ip_sum, 0, AF_INET6);
5021223637Sbz
5022223637Sbz				if (PF_ANEQ(pd2.dst,
5023223637Sbz				    &nk->addr[pd2.didx], pd2.af) ||
5024223637Sbz				    nk->port[pd2.didx] != iih.icmp6_id)
5025126258Smlaier					pf_change_icmp(pd2.dst, &iih.icmp6_id,
5026300979Skp					    saddr, &nk->addr[pd2.didx],
5027223637Sbz					    nk->port[pd2.didx], NULL,
5028126258Smlaier					    pd2.ip_sum, icmpsum,
5029126258Smlaier					    pd->ip_sum, 0, AF_INET6);
5030223637Sbz
5031126258Smlaier				m_copyback(m, off, sizeof(struct icmp6_hdr),
5032126261Smlaier				    (caddr_t)pd->hdr.icmp6);
5033223637Sbz				m_copyback(m, ipoff2, sizeof(h2_6), (caddr_t)&h2_6);
5034126258Smlaier				m_copyback(m, off2, sizeof(struct icmp6_hdr),
5035126261Smlaier				    (caddr_t)&iih);
5036126258Smlaier			}
5037126258Smlaier			return (PF_PASS);
5038126258Smlaier			break;
5039126258Smlaier		}
5040126258Smlaier#endif /* INET6 */
5041126258Smlaier		default: {
5042126258Smlaier			key.af = pd2.af;
5043126258Smlaier			key.proto = pd2.proto;
5044223637Sbz			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
5045223637Sbz			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
5046223637Sbz			key.port[0] = key.port[1] = 0;
5047126258Smlaier
5048240233Sglebius			STATE_LOOKUP(kif, &key, direction, *state, pd);
5049126258Smlaier
5050223637Sbz			/* translate source/destination address, if necessary */
5051223637Sbz			if ((*state)->key[PF_SK_WIRE] !=
5052223637Sbz			    (*state)->key[PF_SK_STACK]) {
5053223637Sbz				struct pf_state_key *nk =
5054223637Sbz				    (*state)->key[pd->didx];
5055223637Sbz
5056223637Sbz				if (PF_ANEQ(pd2.src,
5057223637Sbz				    &nk->addr[pd2.sidx], pd2.af))
5058223637Sbz					pf_change_icmp(pd2.src, NULL, daddr,
5059223637Sbz					    &nk->addr[pd2.sidx], 0, NULL,
5060126258Smlaier					    pd2.ip_sum, icmpsum,
5061126258Smlaier					    pd->ip_sum, 0, pd2.af);
5062223637Sbz
5063223637Sbz				if (PF_ANEQ(pd2.dst,
5064223637Sbz				    &nk->addr[pd2.didx], pd2.af))
5065300979Skp					pf_change_icmp(pd2.dst, NULL, saddr,
5066223637Sbz					    &nk->addr[pd2.didx], 0, NULL,
5067126258Smlaier					    pd2.ip_sum, icmpsum,
5068126258Smlaier					    pd->ip_sum, 0, pd2.af);
5069223637Sbz
5070126258Smlaier				switch (pd2.af) {
5071126258Smlaier#ifdef INET
5072126258Smlaier				case AF_INET:
5073126258Smlaier					m_copyback(m, off, ICMP_MINLEN,
5074126261Smlaier					    (caddr_t)pd->hdr.icmp);
5075223637Sbz					m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
5076126258Smlaier					break;
5077126258Smlaier#endif /* INET */
5078126258Smlaier#ifdef INET6
5079126258Smlaier				case AF_INET6:
5080126258Smlaier					m_copyback(m, off,
5081126258Smlaier					    sizeof(struct icmp6_hdr),
5082240233Sglebius					    (caddr_t )pd->hdr.icmp6);
5083126258Smlaier					m_copyback(m, ipoff2, sizeof(h2_6),
5084240233Sglebius					    (caddr_t )&h2_6);
5085126258Smlaier					break;
5086126258Smlaier#endif /* INET6 */
5087126258Smlaier				}
5088126258Smlaier			}
5089126258Smlaier			return (PF_PASS);
5090126258Smlaier			break;
5091126258Smlaier		}
5092126258Smlaier		}
5093126258Smlaier	}
5094126258Smlaier}
5095126258Smlaier
5096240233Sglebiusstatic int
5097130613Smlaierpf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif,
5098223637Sbz    struct mbuf *m, struct pf_pdesc *pd)
5099126258Smlaier{
5100126258Smlaier	struct pf_state_peer	*src, *dst;
5101223637Sbz	struct pf_state_key_cmp	 key;
5102126258Smlaier
5103240233Sglebius	bzero(&key, sizeof(key));
5104126258Smlaier	key.af = pd->af;
5105126258Smlaier	key.proto = pd->proto;
5106130613Smlaier	if (direction == PF_IN)	{
5107223637Sbz		PF_ACPY(&key.addr[0], pd->src, key.af);
5108223637Sbz		PF_ACPY(&key.addr[1], pd->dst, key.af);
5109223637Sbz		key.port[0] = key.port[1] = 0;
5110130613Smlaier	} else {
5111223637Sbz		PF_ACPY(&key.addr[1], pd->src, key.af);
5112223637Sbz		PF_ACPY(&key.addr[0], pd->dst, key.af);
5113223637Sbz		key.port[1] = key.port[0] = 0;
5114130613Smlaier	}
5115126258Smlaier
5116240233Sglebius	STATE_LOOKUP(kif, &key, direction, *state, pd);
5117126258Smlaier
5118126258Smlaier	if (direction == (*state)->direction) {
5119126258Smlaier		src = &(*state)->src;
5120126258Smlaier		dst = &(*state)->dst;
5121126258Smlaier	} else {
5122126258Smlaier		src = &(*state)->dst;
5123126258Smlaier		dst = &(*state)->src;
5124126258Smlaier	}
5125126258Smlaier
5126126258Smlaier	/* update states */
5127126258Smlaier	if (src->state < PFOTHERS_SINGLE)
5128126258Smlaier		src->state = PFOTHERS_SINGLE;
5129126258Smlaier	if (dst->state == PFOTHERS_SINGLE)
5130126258Smlaier		dst->state = PFOTHERS_MULTIPLE;
5131126258Smlaier
5132126258Smlaier	/* update expire time */
5133240233Sglebius	(*state)->expire = time_uptime;
5134126258Smlaier	if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE)
5135126258Smlaier		(*state)->timeout = PFTM_OTHER_MULTIPLE;
5136126258Smlaier	else
5137126258Smlaier		(*state)->timeout = PFTM_OTHER_SINGLE;
5138126258Smlaier
5139126258Smlaier	/* translate source/destination address, if necessary */
5140223637Sbz	if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
5141223637Sbz		struct pf_state_key *nk = (*state)->key[pd->didx];
5142223637Sbz
5143240233Sglebius		KASSERT(nk, ("%s: nk is null", __func__));
5144240233Sglebius		KASSERT(pd, ("%s: pd is null", __func__));
5145240233Sglebius		KASSERT(pd->src, ("%s: pd->src is null", __func__));
5146240233Sglebius		KASSERT(pd->dst, ("%s: pd->dst is null", __func__));
5147223637Sbz		switch (pd->af) {
5148126258Smlaier#ifdef INET
5149223637Sbz		case AF_INET:
5150223637Sbz			if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET))
5151126258Smlaier				pf_change_a(&pd->src->v4.s_addr,
5152223637Sbz				    pd->ip_sum,
5153223637Sbz				    nk->addr[pd->sidx].v4.s_addr,
5154126258Smlaier				    0);
5155223637Sbz
5156223637Sbz
5157223637Sbz			if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET))
5158126258Smlaier				pf_change_a(&pd->dst->v4.s_addr,
5159223637Sbz				    pd->ip_sum,
5160223637Sbz				    nk->addr[pd->didx].v4.s_addr,
5161126258Smlaier				    0);
5162223637Sbz
5163126258Smlaier				break;
5164126258Smlaier#endif /* INET */
5165126258Smlaier#ifdef INET6
5166223637Sbz		case AF_INET6:
5167223637Sbz			if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET))
5168223637Sbz				PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af);
5169223637Sbz
5170223637Sbz			if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET))
5171223637Sbz				PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af);
5172126258Smlaier#endif /* INET6 */
5173223637Sbz		}
5174126258Smlaier	}
5175126258Smlaier	return (PF_PASS);
5176126258Smlaier}
5177126258Smlaier
5178126258Smlaier/*
5179126258Smlaier * ipoff and off are measured from the start of the mbuf chain.
5180126258Smlaier * h must be at "ipoff" on the mbuf chain.
5181126258Smlaier */
5182126258Smlaiervoid *
5183126258Smlaierpf_pull_hdr(struct mbuf *m, int off, void *p, int len,
5184126258Smlaier    u_short *actionp, u_short *reasonp, sa_family_t af)
5185126258Smlaier{
5186126258Smlaier	switch (af) {
5187126258Smlaier#ifdef INET
5188126258Smlaier	case AF_INET: {
5189126258Smlaier		struct ip	*h = mtod(m, struct ip *);
5190126258Smlaier		u_int16_t	 fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
5191126258Smlaier
5192126258Smlaier		if (fragoff) {
5193126258Smlaier			if (fragoff >= len)
5194126258Smlaier				ACTION_SET(actionp, PF_PASS);
5195126258Smlaier			else {
5196126258Smlaier				ACTION_SET(actionp, PF_DROP);
5197126258Smlaier				REASON_SET(reasonp, PFRES_FRAG);
5198126258Smlaier			}
5199126258Smlaier			return (NULL);
5200126258Smlaier		}
5201130613Smlaier		if (m->m_pkthdr.len < off + len ||
5202130613Smlaier		    ntohs(h->ip_len) < off + len) {
5203126258Smlaier			ACTION_SET(actionp, PF_DROP);
5204126258Smlaier			REASON_SET(reasonp, PFRES_SHORT);
5205126258Smlaier			return (NULL);
5206126258Smlaier		}
5207126258Smlaier		break;
5208126258Smlaier	}
5209126258Smlaier#endif /* INET */
5210126258Smlaier#ifdef INET6
5211126258Smlaier	case AF_INET6: {
5212126258Smlaier		struct ip6_hdr	*h = mtod(m, struct ip6_hdr *);
5213126258Smlaier
5214126258Smlaier		if (m->m_pkthdr.len < off + len ||
5215126258Smlaier		    (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) <
5216126258Smlaier		    (unsigned)(off + len)) {
5217126258Smlaier			ACTION_SET(actionp, PF_DROP);
5218126258Smlaier			REASON_SET(reasonp, PFRES_SHORT);
5219126258Smlaier			return (NULL);
5220126258Smlaier		}
5221126258Smlaier		break;
5222126258Smlaier	}
5223126258Smlaier#endif /* INET6 */
5224126258Smlaier	}
5225126258Smlaier	m_copydata(m, off, len, p);
5226126258Smlaier	return (p);
5227126258Smlaier}
5228126258Smlaier
5229126258Smlaierint
5230231852Sbzpf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif,
5231231852Sbz    int rtableid)
5232126258Smlaier{
5233223637Sbz#ifdef RADIX_MPATH
5234223637Sbz	struct radix_node_head	*rnh;
5235223637Sbz#endif
5236126258Smlaier	struct sockaddr_in	*dst;
5237171168Smlaier	int			 ret = 1;
5238171168Smlaier	int			 check_mpath;
5239145836Smlaier#ifdef INET6
5240145836Smlaier	struct sockaddr_in6	*dst6;
5241145836Smlaier	struct route_in6	 ro;
5242145836Smlaier#else
5243126258Smlaier	struct route		 ro;
5244145836Smlaier#endif
5245171168Smlaier	struct radix_node	*rn;
5246171168Smlaier	struct rtentry		*rt;
5247171168Smlaier	struct ifnet		*ifp;
5248126258Smlaier
5249171168Smlaier	check_mpath = 0;
5250223637Sbz#ifdef RADIX_MPATH
5251223637Sbz	/* XXX: stick to table 0 for now */
5252223637Sbz	rnh = rt_tables_get_rnh(0, af);
5253223637Sbz	if (rnh != NULL && rn_mpath_capable(rnh))
5254223637Sbz		check_mpath = 1;
5255223637Sbz#endif
5256126258Smlaier	bzero(&ro, sizeof(ro));
5257145836Smlaier	switch (af) {
5258145836Smlaier	case AF_INET:
5259145836Smlaier		dst = satosin(&ro.ro_dst);
5260145836Smlaier		dst->sin_family = AF_INET;
5261145836Smlaier		dst->sin_len = sizeof(*dst);
5262145836Smlaier		dst->sin_addr = addr->v4;
5263145836Smlaier		break;
5264145836Smlaier#ifdef INET6
5265145836Smlaier	case AF_INET6:
5266223637Sbz		/*
5267223637Sbz		 * Skip check for addresses with embedded interface scope,
5268223637Sbz		 * as they would always match anyway.
5269223637Sbz		 */
5270223637Sbz		if (IN6_IS_SCOPE_EMBED(&addr->v6))
5271223637Sbz			goto out;
5272145836Smlaier		dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
5273145836Smlaier		dst6->sin6_family = AF_INET6;
5274145836Smlaier		dst6->sin6_len = sizeof(*dst6);
5275145836Smlaier		dst6->sin6_addr = addr->v6;
5276145836Smlaier		break;
5277145836Smlaier#endif /* INET6 */
5278145836Smlaier	default:
5279145836Smlaier		return (0);
5280145836Smlaier	}
5281145836Smlaier
5282171168Smlaier	/* Skip checks for ipsec interfaces */
5283171168Smlaier	if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC)
5284171168Smlaier		goto out;
5285171168Smlaier
5286231852Sbz	switch (af) {
5287231852Sbz#ifdef INET6
5288231852Sbz	case AF_INET6:
5289231852Sbz		in6_rtalloc_ign(&ro, 0, rtableid);
5290231852Sbz		break;
5291231852Sbz#endif
5292222529Sbz#ifdef INET
5293231852Sbz	case AF_INET:
5294231852Sbz		in_rtalloc_ign((struct route *)&ro, 0, rtableid);
5295231852Sbz		break;
5296222529Sbz#endif
5297231852Sbz	default:
5298231852Sbz		rtalloc_ign((struct route *)&ro, 0);	/* No/default FIB. */
5299231852Sbz		break;
5300231852Sbz	}
5301126258Smlaier
5302126258Smlaier	if (ro.ro_rt != NULL) {
5303171168Smlaier		/* No interface given, this is a no-route check */
5304171168Smlaier		if (kif == NULL)
5305171168Smlaier			goto out;
5306171168Smlaier
5307171168Smlaier		if (kif->pfik_ifp == NULL) {
5308171168Smlaier			ret = 0;
5309171168Smlaier			goto out;
5310171168Smlaier		}
5311171168Smlaier
5312171168Smlaier		/* Perform uRPF check if passed input interface */
5313171168Smlaier		ret = 0;
5314171168Smlaier		rn = (struct radix_node *)ro.ro_rt;
5315171168Smlaier		do {
5316171168Smlaier			rt = (struct rtentry *)rn;
5317240233Sglebius			ifp = rt->rt_ifp;
5318171168Smlaier
5319171168Smlaier			if (kif->pfik_ifp == ifp)
5320171168Smlaier				ret = 1;
5321223637Sbz#ifdef RADIX_MPATH
5322171168Smlaier			rn = rn_mpath_next(rn);
5323171168Smlaier#endif
5324171168Smlaier		} while (check_mpath == 1 && rn != NULL && ret == 0);
5325171168Smlaier	} else
5326171168Smlaier		ret = 0;
5327171168Smlaierout:
5328171168Smlaier	if (ro.ro_rt != NULL)
5329126258Smlaier		RTFREE(ro.ro_rt);
5330171168Smlaier	return (ret);
5331145836Smlaier}
5332145836Smlaier
5333222529Sbz#ifdef INET
5334240233Sglebiusstatic void
5335126258Smlaierpf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
5336335252Skp    struct pf_state *s, struct pf_pdesc *pd, struct inpcb *inp)
5337126258Smlaier{
5338126258Smlaier	struct mbuf		*m0, *m1;
5339240233Sglebius	struct sockaddr_in	dst;
5340126258Smlaier	struct ip		*ip;
5341126258Smlaier	struct ifnet		*ifp = NULL;
5342126258Smlaier	struct pf_addr		 naddr;
5343130613Smlaier	struct pf_src_node	*sn = NULL;
5344126258Smlaier	int			 error = 0;
5345242161Sglebius	uint16_t		 ip_len, ip_off;
5346126258Smlaier
5347240233Sglebius	KASSERT(m && *m && r && oifp, ("%s: invalid parameters", __func__));
5348240233Sglebius	KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: invalid direction",
5349240233Sglebius	    __func__));
5350126258Smlaier
5351240233Sglebius	if ((pd->pf_mtag == NULL &&
5352240233Sglebius	    ((pd->pf_mtag = pf_get_mtag(*m)) == NULL)) ||
5353240233Sglebius	    pd->pf_mtag->routed++ > 3) {
5354171168Smlaier		m0 = *m;
5355171168Smlaier		*m = NULL;
5356240233Sglebius		goto bad_locked;
5357132303Smlaier	}
5358132303Smlaier
5359126258Smlaier	if (r->rt == PF_DUPTO) {
5360240233Sglebius		if ((m0 = m_dup(*m, M_NOWAIT)) == NULL) {
5361240233Sglebius			if (s)
5362240233Sglebius				PF_STATE_UNLOCK(s);
5363126258Smlaier			return;
5364240233Sglebius		}
5365126258Smlaier	} else {
5366240233Sglebius		if ((r->rt == PF_REPLYTO) == (r->direction == dir)) {
5367240233Sglebius			if (s)
5368240233Sglebius				PF_STATE_UNLOCK(s);
5369126258Smlaier			return;
5370240233Sglebius		}
5371126258Smlaier		m0 = *m;
5372126258Smlaier	}
5373126258Smlaier
5374126258Smlaier	ip = mtod(m0, struct ip *);
5375126258Smlaier
5376240233Sglebius	bzero(&dst, sizeof(dst));
5377240233Sglebius	dst.sin_family = AF_INET;
5378240233Sglebius	dst.sin_len = sizeof(dst);
5379240233Sglebius	dst.sin_addr = ip->ip_dst;
5380126258Smlaier
5381126258Smlaier	if (r->rt == PF_FASTROUTE) {
5382240233Sglebius		struct rtentry *rt;
5383240233Sglebius
5384240233Sglebius		if (s)
5385240233Sglebius			PF_STATE_UNLOCK(s);
5386240233Sglebius		rt = rtalloc1_fib(sintosa(&dst), 0, 0, M_GETFIB(m0));
5387240233Sglebius		if (rt == NULL) {
5388196039Srwatson			KMOD_IPSTAT_INC(ips_noroute);
5389240233Sglebius			error = EHOSTUNREACH;
5390126258Smlaier			goto bad;
5391126258Smlaier		}
5392126258Smlaier
5393240233Sglebius		ifp = rt->rt_ifp;
5394263478Sglebius		counter_u64_add(rt->rt_pksent, 1);
5395126258Smlaier
5396240233Sglebius		if (rt->rt_flags & RTF_GATEWAY)
5397240233Sglebius			bcopy(satosin(rt->rt_gateway), &dst, sizeof(dst));
5398240233Sglebius		RTFREE_LOCKED(rt);
5399126258Smlaier	} else {
5400145836Smlaier		if (TAILQ_EMPTY(&r->rpool.list)) {
5401145836Smlaier			DPFPRINTF(PF_DEBUG_URGENT,
5402240233Sglebius			    ("%s: TAILQ_EMPTY(&r->rpool.list)\n", __func__));
5403240233Sglebius			goto bad_locked;
5404145836Smlaier		}
5405126258Smlaier		if (s == NULL) {
5406130613Smlaier			pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src,
5407130613Smlaier			    &naddr, NULL, &sn);
5408126258Smlaier			if (!PF_AZERO(&naddr, AF_INET))
5409240233Sglebius				dst.sin_addr.s_addr = naddr.v4.s_addr;
5410130613Smlaier			ifp = r->rpool.cur->kif ?
5411130613Smlaier			    r->rpool.cur->kif->pfik_ifp : NULL;
5412126258Smlaier		} else {
5413126258Smlaier			if (!PF_AZERO(&s->rt_addr, AF_INET))
5414240233Sglebius				dst.sin_addr.s_addr =
5415126258Smlaier				    s->rt_addr.v4.s_addr;
5416130613Smlaier			ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
5417240233Sglebius			PF_STATE_UNLOCK(s);
5418126258Smlaier		}
5419126258Smlaier	}
5420126258Smlaier	if (ifp == NULL)
5421126258Smlaier		goto bad;
5422126258Smlaier
5423130639Smlaier	if (oifp != ifp) {
5424335252Skp		if (pf_test(PF_OUT, ifp, &m0, inp) != PF_PASS)
5425126258Smlaier			goto bad;
5426126258Smlaier		else if (m0 == NULL)
5427126258Smlaier			goto done;
5428145836Smlaier		if (m0->m_len < sizeof(struct ip)) {
5429145836Smlaier			DPFPRINTF(PF_DEBUG_URGENT,
5430240233Sglebius			    ("%s: m0->m_len < sizeof(struct ip)\n", __func__));
5431145836Smlaier			goto bad;
5432145836Smlaier		}
5433126258Smlaier		ip = mtod(m0, struct ip *);
5434126258Smlaier	}
5435126258Smlaier
5436240233Sglebius	if (ifp->if_flags & IFF_LOOPBACK)
5437240233Sglebius		m0->m_flags |= M_SKIP_FIREWALL;
5438240233Sglebius
5439241344Sglebius	ip_len = ntohs(ip->ip_len);
5440241344Sglebius	ip_off = ntohs(ip->ip_off);
5441240233Sglebius
5442240233Sglebius	/* Copied from FreeBSD 10.0-CURRENT ip_output. */
5443126261Smlaier	m0->m_pkthdr.csum_flags |= CSUM_IP;
5444242161Sglebius	if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~ifp->if_hwassist) {
5445126261Smlaier		in_delayed_cksum(m0);
5446242161Sglebius		m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
5447126261Smlaier	}
5448240233Sglebius#ifdef SCTP
5449242161Sglebius	if (m0->m_pkthdr.csum_flags & CSUM_SCTP & ~ifp->if_hwassist) {
5450240233Sglebius		sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2));
5451242161Sglebius		m0->m_pkthdr.csum_flags &= ~CSUM_SCTP;
5452240233Sglebius	}
5453240233Sglebius#endif
5454126261Smlaier
5455130613Smlaier	/*
5456240233Sglebius	 * If small enough for interface, or the interface will take
5457240233Sglebius	 * care of the fragmentation for us, we can just send directly.
5458130613Smlaier	 */
5459241344Sglebius	if (ip_len <= ifp->if_mtu ||
5460240233Sglebius	    (m0->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0 ||
5461241344Sglebius	    ((ip_off & IP_DF) == 0 && (ifp->if_hwassist & CSUM_FRAGMENT))) {
5462223637Sbz		ip->ip_sum = 0;
5463242161Sglebius		if (m0->m_pkthdr.csum_flags & CSUM_IP & ~ifp->if_hwassist) {
5464126258Smlaier			ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
5465242161Sglebius			m0->m_pkthdr.csum_flags &= ~CSUM_IP;
5466242161Sglebius		}
5467254523Sandre		m_clrprotoflags(m0);	/* Avoid confusing lower layers. */
5468240233Sglebius		error = (*ifp->if_output)(ifp, m0, sintosa(&dst), NULL);
5469126258Smlaier		goto done;
5470126258Smlaier	}
5471223637Sbz
5472240233Sglebius	/* Balk when DF bit is set or the interface didn't support TSO. */
5473241344Sglebius	if ((ip_off & IP_DF) || (m0->m_pkthdr.csum_flags & CSUM_TSO)) {
5474240233Sglebius		error = EMSGSIZE;
5475196039Srwatson		KMOD_IPSTAT_INC(ips_cantfrag);
5476126258Smlaier		if (r->rt != PF_DUPTO) {
5477126258Smlaier			icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
5478145886Smlaier			    ifp->if_mtu);
5479126258Smlaier			goto done;
5480126258Smlaier		} else
5481126258Smlaier			goto bad;
5482126258Smlaier	}
5483126258Smlaier
5484242161Sglebius	error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist);
5485240233Sglebius	if (error)
5486126258Smlaier		goto bad;
5487126258Smlaier
5488240233Sglebius	for (; m0; m0 = m1) {
5489126258Smlaier		m1 = m0->m_nextpkt;
5490240233Sglebius		m0->m_nextpkt = NULL;
5491126261Smlaier		if (error == 0) {
5492254523Sandre			m_clrprotoflags(m0);
5493240233Sglebius			error = (*ifp->if_output)(ifp, m0, sintosa(&dst), NULL);
5494126261Smlaier		} else
5495126258Smlaier			m_freem(m0);
5496126258Smlaier	}
5497126258Smlaier
5498126258Smlaier	if (error == 0)
5499196039Srwatson		KMOD_IPSTAT_INC(ips_fragmented);
5500126258Smlaier
5501126258Smlaierdone:
5502126258Smlaier	if (r->rt != PF_DUPTO)
5503126258Smlaier		*m = NULL;
5504126258Smlaier	return;
5505126258Smlaier
5506240233Sglebiusbad_locked:
5507240233Sglebius	if (s)
5508240233Sglebius		PF_STATE_UNLOCK(s);
5509126258Smlaierbad:
5510126258Smlaier	m_freem(m0);
5511126258Smlaier	goto done;
5512126258Smlaier}
5513126258Smlaier#endif /* INET */
5514126258Smlaier
5515126258Smlaier#ifdef INET6
5516240233Sglebiusstatic void
5517126258Smlaierpf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
5518335252Skp    struct pf_state *s, struct pf_pdesc *pd, struct inpcb *inp)
5519126258Smlaier{
5520126258Smlaier	struct mbuf		*m0;
5521240233Sglebius	struct sockaddr_in6	dst;
5522126258Smlaier	struct ip6_hdr		*ip6;
5523126258Smlaier	struct ifnet		*ifp = NULL;
5524126258Smlaier	struct pf_addr		 naddr;
5525130613Smlaier	struct pf_src_node	*sn = NULL;
5526126258Smlaier
5527240233Sglebius	KASSERT(m && *m && r && oifp, ("%s: invalid parameters", __func__));
5528240233Sglebius	KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: invalid direction",
5529240233Sglebius	    __func__));
5530126258Smlaier
5531240233Sglebius	if ((pd->pf_mtag == NULL &&
5532240233Sglebius	    ((pd->pf_mtag = pf_get_mtag(*m)) == NULL)) ||
5533240233Sglebius	    pd->pf_mtag->routed++ > 3) {
5534171168Smlaier		m0 = *m;
5535171168Smlaier		*m = NULL;
5536240233Sglebius		goto bad_locked;
5537132303Smlaier	}
5538132303Smlaier
5539126258Smlaier	if (r->rt == PF_DUPTO) {
5540240233Sglebius		if ((m0 = m_dup(*m, M_NOWAIT)) == NULL) {
5541240233Sglebius			if (s)
5542240233Sglebius				PF_STATE_UNLOCK(s);
5543126258Smlaier			return;
5544240233Sglebius		}
5545126258Smlaier	} else {
5546240233Sglebius		if ((r->rt == PF_REPLYTO) == (r->direction == dir)) {
5547240233Sglebius			if (s)
5548240233Sglebius				PF_STATE_UNLOCK(s);
5549126258Smlaier			return;
5550240233Sglebius		}
5551126258Smlaier		m0 = *m;
5552126258Smlaier	}
5553126258Smlaier
5554126258Smlaier	ip6 = mtod(m0, struct ip6_hdr *);
5555126258Smlaier
5556240233Sglebius	bzero(&dst, sizeof(dst));
5557240233Sglebius	dst.sin6_family = AF_INET6;
5558240233Sglebius	dst.sin6_len = sizeof(dst);
5559240233Sglebius	dst.sin6_addr = ip6->ip6_dst;
5560126258Smlaier
5561171168Smlaier	/* Cheat. XXX why only in the v6 case??? */
5562126258Smlaier	if (r->rt == PF_FASTROUTE) {
5563240233Sglebius		if (s)
5564240233Sglebius			PF_STATE_UNLOCK(s);
5565132280Smlaier		m0->m_flags |= M_SKIP_FIREWALL;
5566126261Smlaier		ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
5567280251Sae		*m = NULL;
5568126258Smlaier		return;
5569126258Smlaier	}
5570126258Smlaier
5571145836Smlaier	if (TAILQ_EMPTY(&r->rpool.list)) {
5572145836Smlaier		DPFPRINTF(PF_DEBUG_URGENT,
5573240233Sglebius		    ("%s: TAILQ_EMPTY(&r->rpool.list)\n", __func__));
5574240233Sglebius		goto bad_locked;
5575145836Smlaier	}
5576126258Smlaier	if (s == NULL) {
5577130613Smlaier		pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src,
5578130613Smlaier		    &naddr, NULL, &sn);
5579126258Smlaier		if (!PF_AZERO(&naddr, AF_INET6))
5580240233Sglebius			PF_ACPY((struct pf_addr *)&dst.sin6_addr,
5581126258Smlaier			    &naddr, AF_INET6);
5582130613Smlaier		ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL;
5583126258Smlaier	} else {
5584126258Smlaier		if (!PF_AZERO(&s->rt_addr, AF_INET6))
5585240233Sglebius			PF_ACPY((struct pf_addr *)&dst.sin6_addr,
5586126258Smlaier			    &s->rt_addr, AF_INET6);
5587130613Smlaier		ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
5588126258Smlaier	}
5589240233Sglebius
5590240233Sglebius	if (s)
5591240233Sglebius		PF_STATE_UNLOCK(s);
5592240233Sglebius
5593126258Smlaier	if (ifp == NULL)
5594126258Smlaier		goto bad;
5595126258Smlaier
5596126258Smlaier	if (oifp != ifp) {
5597335252Skp		if (pf_test6(PF_FWD, ifp, &m0, inp) != PF_PASS)
5598132303Smlaier			goto bad;
5599132303Smlaier		else if (m0 == NULL)
5600132303Smlaier			goto done;
5601145836Smlaier		if (m0->m_len < sizeof(struct ip6_hdr)) {
5602145836Smlaier			DPFPRINTF(PF_DEBUG_URGENT,
5603240233Sglebius			    ("%s: m0->m_len < sizeof(struct ip6_hdr)\n",
5604240233Sglebius			    __func__));
5605145836Smlaier			goto bad;
5606145836Smlaier		}
5607132303Smlaier		ip6 = mtod(m0, struct ip6_hdr *);
5608126258Smlaier	}
5609126258Smlaier
5610240233Sglebius	if (ifp->if_flags & IFF_LOOPBACK)
5611240233Sglebius		m0->m_flags |= M_SKIP_FIREWALL;
5612240233Sglebius
5613290669Skp	if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6 &
5614290669Skp	    ~ifp->if_hwassist) {
5615290669Skp		uint32_t plen = m0->m_pkthdr.len - sizeof(*ip6);
5616290669Skp		in6_delayed_cksum(m0, plen, sizeof(struct ip6_hdr));
5617290669Skp		m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
5618290669Skp	}
5619290669Skp
5620126258Smlaier	/*
5621126258Smlaier	 * If the packet is too large for the outgoing interface,
5622126258Smlaier	 * send back an icmp6 error.
5623126258Smlaier	 */
5624240233Sglebius	if (IN6_IS_SCOPE_EMBED(&dst.sin6_addr))
5625240233Sglebius		dst.sin6_addr.s6_addr16[1] = htons(ifp->if_index);
5626240233Sglebius	if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu)
5627240233Sglebius		nd6_output(ifp, ifp, m0, &dst, NULL);
5628240233Sglebius	else {
5629126258Smlaier		in6_ifstat_inc(ifp, ifs6_in_toobig);
5630126258Smlaier		if (r->rt != PF_DUPTO)
5631126258Smlaier			icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
5632126258Smlaier		else
5633126258Smlaier			goto bad;
5634126258Smlaier	}
5635126258Smlaier
5636126258Smlaierdone:
5637126258Smlaier	if (r->rt != PF_DUPTO)
5638126258Smlaier		*m = NULL;
5639126258Smlaier	return;
5640126258Smlaier
5641240233Sglebiusbad_locked:
5642240233Sglebius	if (s)
5643240233Sglebius		PF_STATE_UNLOCK(s);
5644126258Smlaierbad:
5645126258Smlaier	m_freem(m0);
5646126258Smlaier	goto done;
5647126258Smlaier}
5648126258Smlaier#endif /* INET6 */
5649126258Smlaier
5650126258Smlaier/*
5651132566Smlaier * FreeBSD supports cksum offloads for the following drivers.
5652137413Sru *  em(4), fxp(4), ixgb(4), lge(4), ndis(4), nge(4), re(4),
5653132566Smlaier *   ti(4), txp(4), xl(4)
5654132566Smlaier *
5655132566Smlaier * CSUM_DATA_VALID | CSUM_PSEUDO_HDR :
5656132566Smlaier *  network driver performed cksum including pseudo header, need to verify
5657132566Smlaier *   csum_data
5658132566Smlaier * CSUM_DATA_VALID :
5659132566Smlaier *  network driver performed cksum, needs to additional pseudo header
5660132566Smlaier *  cksum computation with partial csum_data(i.e. lack of H/W support for
5661132566Smlaier *  pseudo header, for instance hme(4), sk(4) and possibly gem(4))
5662132566Smlaier *
5663132566Smlaier * After validating the cksum of packet, set both flag CSUM_DATA_VALID and
5664132566Smlaier * CSUM_PSEUDO_HDR in order to avoid recomputation of the cksum in upper
5665132566Smlaier * TCP/UDP layer.
5666132566Smlaier * Also, set csum_data to 0xffff to force cksum validation.
5667126261Smlaier */
5668240233Sglebiusstatic int
5669126261Smlaierpf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t af)
5670126261Smlaier{
5671126261Smlaier	u_int16_t sum = 0;
5672126261Smlaier	int hw_assist = 0;
5673126261Smlaier	struct ip *ip;
5674126261Smlaier
5675126261Smlaier	if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
5676126261Smlaier		return (1);
5677126261Smlaier	if (m->m_pkthdr.len < off + len)
5678126261Smlaier		return (1);
5679126261Smlaier
5680126261Smlaier	switch (p) {
5681126261Smlaier	case IPPROTO_TCP:
5682126261Smlaier		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
5683126261Smlaier			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
5684126261Smlaier				sum = m->m_pkthdr.csum_data;
5685126261Smlaier			} else {
5686223637Sbz				ip = mtod(m, struct ip *);
5687126261Smlaier				sum = in_pseudo(ip->ip_src.s_addr,
5688240233Sglebius				ip->ip_dst.s_addr, htonl((u_short)len +
5689223637Sbz				m->m_pkthdr.csum_data + IPPROTO_TCP));
5690126261Smlaier			}
5691126261Smlaier			sum ^= 0xffff;
5692126261Smlaier			++hw_assist;
5693126261Smlaier		}
5694126261Smlaier		break;
5695126261Smlaier	case IPPROTO_UDP:
5696126261Smlaier		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
5697126261Smlaier			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
5698126261Smlaier				sum = m->m_pkthdr.csum_data;
5699126261Smlaier			} else {
5700223637Sbz				ip = mtod(m, struct ip *);
5701126261Smlaier				sum = in_pseudo(ip->ip_src.s_addr,
5702223637Sbz				ip->ip_dst.s_addr, htonl((u_short)len +
5703223637Sbz				m->m_pkthdr.csum_data + IPPROTO_UDP));
5704126261Smlaier			}
5705126261Smlaier			sum ^= 0xffff;
5706126261Smlaier			++hw_assist;
5707223637Sbz		}
5708126261Smlaier		break;
5709126261Smlaier	case IPPROTO_ICMP:
5710126261Smlaier#ifdef INET6
5711126261Smlaier	case IPPROTO_ICMPV6:
5712126261Smlaier#endif /* INET6 */
5713126261Smlaier		break;
5714126261Smlaier	default:
5715126261Smlaier		return (1);
5716126261Smlaier	}
5717126261Smlaier
5718126261Smlaier	if (!hw_assist) {
5719126261Smlaier		switch (af) {
5720126261Smlaier		case AF_INET:
5721126261Smlaier			if (p == IPPROTO_ICMP) {
5722126261Smlaier				if (m->m_len < off)
5723126261Smlaier					return (1);
5724126261Smlaier				m->m_data += off;
5725126261Smlaier				m->m_len -= off;
5726126261Smlaier				sum = in_cksum(m, len);
5727126261Smlaier				m->m_data -= off;
5728126261Smlaier				m->m_len += off;
5729126261Smlaier			} else {
5730126261Smlaier				if (m->m_len < sizeof(struct ip))
5731126261Smlaier					return (1);
5732126261Smlaier				sum = in4_cksum(m, p, off, len);
5733126261Smlaier			}
5734126261Smlaier			break;
5735126261Smlaier#ifdef INET6
5736126261Smlaier		case AF_INET6:
5737126261Smlaier			if (m->m_len < sizeof(struct ip6_hdr))
5738126261Smlaier				return (1);
5739126261Smlaier			sum = in6_cksum(m, p, off, len);
5740126261Smlaier			break;
5741126261Smlaier#endif /* INET6 */
5742126261Smlaier		default:
5743126261Smlaier			return (1);
5744126261Smlaier		}
5745126261Smlaier	}
5746126261Smlaier	if (sum) {
5747126261Smlaier		switch (p) {
5748126261Smlaier		case IPPROTO_TCP:
5749183550Szec		    {
5750196039Srwatson			KMOD_TCPSTAT_INC(tcps_rcvbadsum);
5751126261Smlaier			break;
5752183550Szec		    }
5753126261Smlaier		case IPPROTO_UDP:
5754183550Szec		    {
5755196039Srwatson			KMOD_UDPSTAT_INC(udps_badsum);
5756126261Smlaier			break;
5757183550Szec		    }
5758222529Sbz#ifdef INET
5759126261Smlaier		case IPPROTO_ICMP:
5760183550Szec		    {
5761196039Srwatson			KMOD_ICMPSTAT_INC(icps_checksum);
5762126261Smlaier			break;
5763183550Szec		    }
5764222529Sbz#endif
5765126261Smlaier#ifdef INET6
5766126261Smlaier		case IPPROTO_ICMPV6:
5767183550Szec		    {
5768196039Srwatson			KMOD_ICMP6STAT_INC(icp6s_checksum);
5769126261Smlaier			break;
5770183550Szec		    }
5771126261Smlaier#endif /* INET6 */
5772126261Smlaier		}
5773126261Smlaier		return (1);
5774132566Smlaier	} else {
5775132566Smlaier		if (p == IPPROTO_TCP || p == IPPROTO_UDP) {
5776132566Smlaier			m->m_pkthdr.csum_flags |=
5777132566Smlaier			    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5778132566Smlaier			m->m_pkthdr.csum_data = 0xffff;
5779132566Smlaier		}
5780126261Smlaier	}
5781126261Smlaier	return (0);
5782126261Smlaier}
5783223637Sbz
5784126258Smlaier
5785145836Smlaier#ifdef INET
5786126258Smlaierint
5787240233Sglebiuspf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp)
5788126258Smlaier{
5789130613Smlaier	struct pfi_kif		*kif;
5790130613Smlaier	u_short			 action, reason = 0, log = 0;
5791130613Smlaier	struct mbuf		*m = *m0;
5792223637Sbz	struct ip		*h = NULL;
5793223637Sbz	struct m_tag		*ipfwtag;
5794223637Sbz	struct pf_rule		*a = NULL, *r = &V_pf_default_rule, *tr, *nr;
5795130613Smlaier	struct pf_state		*s = NULL;
5796130613Smlaier	struct pf_ruleset	*ruleset = NULL;
5797130613Smlaier	struct pf_pdesc		 pd;
5798130613Smlaier	int			 off, dirndx, pqid = 0;
5799126258Smlaier
5800240233Sglebius	M_ASSERTPKTHDR(m);
5801240233Sglebius
5802223637Sbz	if (!V_pf_status.running)
5803171168Smlaier		return (PF_PASS);
5804126258Smlaier
5805171168Smlaier	memset(&pd, 0, sizeof(pd));
5806145836Smlaier
5807240233Sglebius	kif = (struct pfi_kif *)ifp->if_pf_kif;
5808240233Sglebius
5809130613Smlaier	if (kif == NULL) {
5810145836Smlaier		DPFPRINTF(PF_DEBUG_URGENT,
5811145836Smlaier		    ("pf_test: kif == NULL, if_xname %s\n", ifp->if_xname));
5812130613Smlaier		return (PF_DROP);
5813130613Smlaier	}
5814223637Sbz	if (kif->pfik_flags & PFI_IFLAG_SKIP)
5815145836Smlaier		return (PF_PASS);
5816130613Smlaier
5817240233Sglebius	if (m->m_flags & M_SKIP_FIREWALL)
5818240233Sglebius		return (PF_PASS);
5819126258Smlaier
5820240233Sglebius	pd.pf_mtag = pf_find_mtag(m);
5821240233Sglebius
5822240233Sglebius	PF_RULES_RLOCK();
5823240233Sglebius
5824223637Sbz	if (ip_divert_ptr != NULL &&
5825223637Sbz	    ((ipfwtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL)) != NULL)) {
5826223637Sbz		struct ipfw_rule_ref *rr = (struct ipfw_rule_ref *)(ipfwtag+1);
5827223637Sbz		if (rr->info & IPFW_IS_DIVERT && rr->rulenum == 0) {
5828240233Sglebius			if (pd.pf_mtag == NULL &&
5829240233Sglebius			    ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
5830240233Sglebius				action = PF_DROP;
5831240233Sglebius				goto done;
5832240233Sglebius			}
5833223637Sbz			pd.pf_mtag->flags |= PF_PACKET_LOOPED;
5834223637Sbz			m_tag_delete(m, ipfwtag);
5835223637Sbz		}
5836240233Sglebius		if (pd.pf_mtag && pd.pf_mtag->flags & PF_FASTFWD_OURS_PRESENT) {
5837223637Sbz			m->m_flags |= M_FASTFWD_OURS;
5838223637Sbz			pd.pf_mtag->flags &= ~PF_FASTFWD_OURS_PRESENT;
5839223637Sbz		}
5840240233Sglebius	} else if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) {
5841240233Sglebius		/* We do IP header normalization and packet reassembly here */
5842126258Smlaier		action = PF_DROP;
5843126258Smlaier		goto done;
5844126258Smlaier	}
5845223637Sbz	m = *m0;	/* pf_normalize messes with m0 */
5846126258Smlaier	h = mtod(m, struct ip *);
5847126258Smlaier
5848126258Smlaier	off = h->ip_hl << 2;
5849240233Sglebius	if (off < (int)sizeof(struct ip)) {
5850126258Smlaier		action = PF_DROP;
5851126258Smlaier		REASON_SET(&reason, PFRES_SHORT);
5852126258Smlaier		log = 1;
5853126258Smlaier		goto done;
5854126258Smlaier	}
5855126258Smlaier
5856126258Smlaier	pd.src = (struct pf_addr *)&h->ip_src;
5857126258Smlaier	pd.dst = (struct pf_addr *)&h->ip_dst;
5858223637Sbz	pd.sport = pd.dport = NULL;
5859126258Smlaier	pd.ip_sum = &h->ip_sum;
5860223637Sbz	pd.proto_sum = NULL;
5861126258Smlaier	pd.proto = h->ip_p;
5862223637Sbz	pd.dir = dir;
5863223637Sbz	pd.sidx = (dir == PF_IN) ? 0 : 1;
5864223637Sbz	pd.didx = (dir == PF_IN) ? 1 : 0;
5865126258Smlaier	pd.af = AF_INET;
5866126258Smlaier	pd.tos = h->ip_tos;
5867126258Smlaier	pd.tot_len = ntohs(h->ip_len);
5868126258Smlaier
5869126258Smlaier	/* handle fragments that didn't get reassembled by normalization */
5870126258Smlaier	if (h->ip_off & htons(IP_MF | IP_OFFMASK)) {
5871130613Smlaier		action = pf_test_fragment(&r, dir, kif, m, h,
5872126258Smlaier		    &pd, &a, &ruleset);
5873126258Smlaier		goto done;
5874126258Smlaier	}
5875126258Smlaier
5876126258Smlaier	switch (h->ip_p) {
5877126258Smlaier
5878126258Smlaier	case IPPROTO_TCP: {
5879126258Smlaier		struct tcphdr	th;
5880126258Smlaier
5881126258Smlaier		pd.hdr.tcp = &th;
5882126258Smlaier		if (!pf_pull_hdr(m, off, &th, sizeof(th),
5883126258Smlaier		    &action, &reason, AF_INET)) {
5884126258Smlaier			log = action != PF_PASS;
5885126258Smlaier			goto done;
5886126258Smlaier		}
5887126258Smlaier		pd.p_len = pd.tot_len - off - (th.th_off << 2);
5888126258Smlaier		if ((th.th_flags & TH_ACK) && pd.p_len == 0)
5889126258Smlaier			pqid = 1;
5890130613Smlaier		action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
5891126258Smlaier		if (action == PF_DROP)
5892130613Smlaier			goto done;
5893130613Smlaier		action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
5894126258Smlaier		    &reason);
5895126258Smlaier		if (action == PF_PASS) {
5896223637Sbz			if (pfsync_update_state_ptr != NULL)
5897223637Sbz				pfsync_update_state_ptr(s);
5898126258Smlaier			r = s->rule.ptr;
5899130613Smlaier			a = s->anchor.ptr;
5900126258Smlaier			log = s->log;
5901126258Smlaier		} else if (s == NULL)
5902240233Sglebius			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
5903240233Sglebius			    &a, &ruleset, inp);
5904126258Smlaier		break;
5905126258Smlaier	}
5906126258Smlaier
5907126258Smlaier	case IPPROTO_UDP: {
5908126258Smlaier		struct udphdr	uh;
5909126258Smlaier
5910126258Smlaier		pd.hdr.udp = &uh;
5911126258Smlaier		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
5912126258Smlaier		    &action, &reason, AF_INET)) {
5913126258Smlaier			log = action != PF_PASS;
5914126258Smlaier			goto done;
5915126258Smlaier		}
5916130613Smlaier		if (uh.uh_dport == 0 ||
5917130613Smlaier		    ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
5918130613Smlaier		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
5919130613Smlaier			action = PF_DROP;
5920171168Smlaier			REASON_SET(&reason, PFRES_SHORT);
5921130613Smlaier			goto done;
5922130613Smlaier		}
5923130613Smlaier		action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
5924126258Smlaier		if (action == PF_PASS) {
5925223637Sbz			if (pfsync_update_state_ptr != NULL)
5926223637Sbz				pfsync_update_state_ptr(s);
5927126258Smlaier			r = s->rule.ptr;
5928126258Smlaier			a = s->anchor.ptr;
5929126258Smlaier			log = s->log;
5930126258Smlaier		} else if (s == NULL)
5931240233Sglebius			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
5932240233Sglebius			    &a, &ruleset, inp);
5933126258Smlaier		break;
5934126258Smlaier	}
5935126258Smlaier
5936126258Smlaier	case IPPROTO_ICMP: {
5937126258Smlaier		struct icmp	ih;
5938126258Smlaier
5939126258Smlaier		pd.hdr.icmp = &ih;
5940126258Smlaier		if (!pf_pull_hdr(m, off, &ih, ICMP_MINLEN,
5941126258Smlaier		    &action, &reason, AF_INET)) {
5942126258Smlaier			log = action != PF_PASS;
5943126258Smlaier			goto done;
5944126258Smlaier		}
5945145836Smlaier		action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd,
5946145836Smlaier		    &reason);
5947126258Smlaier		if (action == PF_PASS) {
5948223637Sbz			if (pfsync_update_state_ptr != NULL)
5949223637Sbz				pfsync_update_state_ptr(s);
5950126258Smlaier			r = s->rule.ptr;
5951126258Smlaier			a = s->anchor.ptr;
5952126258Smlaier			log = s->log;
5953126258Smlaier		} else if (s == NULL)
5954240233Sglebius			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
5955240233Sglebius			    &a, &ruleset, inp);
5956126258Smlaier		break;
5957126258Smlaier	}
5958126258Smlaier
5959223637Sbz#ifdef INET6
5960223637Sbz	case IPPROTO_ICMPV6: {
5961223637Sbz		action = PF_DROP;
5962223637Sbz		DPFPRINTF(PF_DEBUG_MISC,
5963223637Sbz		    ("pf: dropping IPv4 packet with ICMPv6 payload\n"));
5964223637Sbz		goto done;
5965223637Sbz	}
5966223637Sbz#endif
5967223637Sbz
5968126258Smlaier	default:
5969223637Sbz		action = pf_test_state_other(&s, dir, kif, m, &pd);
5970126258Smlaier		if (action == PF_PASS) {
5971223637Sbz			if (pfsync_update_state_ptr != NULL)
5972223637Sbz				pfsync_update_state_ptr(s);
5973126258Smlaier			r = s->rule.ptr;
5974126258Smlaier			a = s->anchor.ptr;
5975126258Smlaier			log = s->log;
5976126258Smlaier		} else if (s == NULL)
5977240233Sglebius			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
5978240233Sglebius			    &a, &ruleset, inp);
5979126258Smlaier		break;
5980126258Smlaier	}
5981126258Smlaier
5982126258Smlaierdone:
5983240233Sglebius	PF_RULES_RUNLOCK();
5984126258Smlaier	if (action == PF_PASS && h->ip_hl > 5 &&
5985200930Sdelphij	    !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) {
5986126258Smlaier		action = PF_DROP;
5987145836Smlaier		REASON_SET(&reason, PFRES_IPOPTIONS);
5988286125Sgarga		log = r->log;
5989126258Smlaier		DPFPRINTF(PF_DEBUG_MISC,
5990126258Smlaier		    ("pf: dropping packet with ip options\n"));
5991126258Smlaier	}
5992126258Smlaier
5993240233Sglebius	if (s && s->tag > 0 && pf_tag_packet(m, &pd, s->tag)) {
5994240233Sglebius		action = PF_DROP;
5995240233Sglebius		REASON_SET(&reason, PFRES_MEMORY);
5996240233Sglebius	}
5997240233Sglebius	if (r->rtableid >= 0)
5998240233Sglebius		M_SETFIB(m, r->rtableid);
5999145836Smlaier
6000126258Smlaier#ifdef ALTQ
6001126258Smlaier	if (action == PF_PASS && r->qid) {
6002240233Sglebius		if (pd.pf_mtag == NULL &&
6003240233Sglebius		    ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
6004240233Sglebius			action = PF_DROP;
6005240233Sglebius			REASON_SET(&reason, PFRES_MEMORY);
6006285941Sglebius		} else {
6007298091Sloos			if (s != NULL)
6008298091Sloos				pd.pf_mtag->qid_hash = pf_state_hash(s);
6009285941Sglebius			if (pqid || (pd.tos & IPTOS_LOWDELAY))
6010285941Sglebius				pd.pf_mtag->qid = r->pqid;
6011285941Sglebius			else
6012285941Sglebius				pd.pf_mtag->qid = r->qid;
6013285941Sglebius			/* Add hints for ecn. */
6014285941Sglebius			pd.pf_mtag->hdr = h;
6015240233Sglebius		}
6016223637Sbz
6017126258Smlaier	}
6018145836Smlaier#endif /* ALTQ */
6019126258Smlaier
6020130613Smlaier	/*
6021130613Smlaier	 * connections redirected to loopback should not match sockets
6022130613Smlaier	 * bound specifically to loopback due to security implications,
6023130613Smlaier	 * see tcp_input() and in_pcblookup_listen().
6024130613Smlaier	 */
6025130613Smlaier	if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
6026130613Smlaier	    pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
6027130613Smlaier	    (s->nat_rule.ptr->action == PF_RDR ||
6028130613Smlaier	    s->nat_rule.ptr->action == PF_BINAT) &&
6029171168Smlaier	    (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
6030223637Sbz		m->m_flags |= M_SKIP_FIREWALL;
6031171168Smlaier
6032240233Sglebius	if (action == PF_PASS && r->divert.port && ip_divert_ptr != NULL &&
6033240233Sglebius	    !PACKET_LOOPED(&pd)) {
6034223637Sbz
6035223637Sbz		ipfwtag = m_tag_alloc(MTAG_IPFW_RULE, 0,
6036240233Sglebius		    sizeof(struct ipfw_rule_ref), M_NOWAIT | M_ZERO);
6037223637Sbz		if (ipfwtag != NULL) {
6038225171Sbz			((struct ipfw_rule_ref *)(ipfwtag+1))->info =
6039225171Sbz			    ntohs(r->divert.port);
6040223637Sbz			((struct ipfw_rule_ref *)(ipfwtag+1))->rulenum = dir;
6041223637Sbz
6042240233Sglebius			if (s)
6043240233Sglebius				PF_STATE_UNLOCK(s);
6044240233Sglebius
6045223637Sbz			m_tag_prepend(m, ipfwtag);
6046223637Sbz			if (m->m_flags & M_FASTFWD_OURS) {
6047240233Sglebius				if (pd.pf_mtag == NULL &&
6048240233Sglebius				    ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
6049240233Sglebius					action = PF_DROP;
6050240233Sglebius					REASON_SET(&reason, PFRES_MEMORY);
6051240233Sglebius					log = 1;
6052240233Sglebius					DPFPRINTF(PF_DEBUG_MISC,
6053240233Sglebius					    ("pf: failed to allocate tag\n"));
6054285941Sglebius				} else {
6055285941Sglebius					pd.pf_mtag->flags |=
6056285941Sglebius					    PF_FASTFWD_OURS_PRESENT;
6057285941Sglebius					m->m_flags &= ~M_FASTFWD_OURS;
6058240233Sglebius				}
6059223637Sbz			}
6060240233Sglebius			ip_divert_ptr(*m0, dir ==  PF_IN ? DIR_IN : DIR_OUT);
6061240233Sglebius			*m0 = NULL;
6062223637Sbz
6063223637Sbz			return (action);
6064223637Sbz		} else {
6065223637Sbz			/* XXX: ipfw has the same behaviour! */
6066223637Sbz			action = PF_DROP;
6067223637Sbz			REASON_SET(&reason, PFRES_MEMORY);
6068223637Sbz			log = 1;
6069223637Sbz			DPFPRINTF(PF_DEBUG_MISC,
6070223637Sbz			    ("pf: failed to allocate divert tag\n"));
6071223637Sbz		}
6072223637Sbz	}
6073223637Sbz
6074171168Smlaier	if (log) {
6075171168Smlaier		struct pf_rule *lr;
6076171168Smlaier
6077171168Smlaier		if (s != NULL && s->nat_rule.ptr != NULL &&
6078171168Smlaier		    s->nat_rule.ptr->log & PF_LOG_ALL)
6079171168Smlaier			lr = s->nat_rule.ptr;
6080171168Smlaier		else
6081171168Smlaier			lr = r;
6082240233Sglebius		PFLOG_PACKET(kif, m, AF_INET, dir, reason, lr, a, ruleset, &pd,
6083240233Sglebius		    (s == NULL));
6084130613Smlaier	}
6085130613Smlaier
6086130613Smlaier	kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
6087130613Smlaier	kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++;
6088130613Smlaier
6089130613Smlaier	if (action == PF_PASS || r->action == PF_DROP) {
6090171168Smlaier		dirndx = (dir == PF_OUT);
6091171168Smlaier		r->packets[dirndx]++;
6092171168Smlaier		r->bytes[dirndx] += pd.tot_len;
6093130613Smlaier		if (a != NULL) {
6094171168Smlaier			a->packets[dirndx]++;
6095171168Smlaier			a->bytes[dirndx] += pd.tot_len;
6096130613Smlaier		}
6097130613Smlaier		if (s != NULL) {
6098130613Smlaier			if (s->nat_rule.ptr != NULL) {
6099171168Smlaier				s->nat_rule.ptr->packets[dirndx]++;
6100171168Smlaier				s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
6101130613Smlaier			}
6102130613Smlaier			if (s->src_node != NULL) {
6103171168Smlaier				s->src_node->packets[dirndx]++;
6104171168Smlaier				s->src_node->bytes[dirndx] += pd.tot_len;
6105130613Smlaier			}
6106130613Smlaier			if (s->nat_src_node != NULL) {
6107171168Smlaier				s->nat_src_node->packets[dirndx]++;
6108171168Smlaier				s->nat_src_node->bytes[dirndx] += pd.tot_len;
6109130613Smlaier			}
6110171168Smlaier			dirndx = (dir == s->direction) ? 0 : 1;
6111171168Smlaier			s->packets[dirndx]++;
6112171168Smlaier			s->bytes[dirndx] += pd.tot_len;
6113130613Smlaier		}
6114130613Smlaier		tr = r;
6115130613Smlaier		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
6116223637Sbz		if (nr != NULL && r == &V_pf_default_rule)
6117223637Sbz			tr = nr;
6118130613Smlaier		if (tr->src.addr.type == PF_ADDR_TABLE)
6119223637Sbz			pfr_update_stats(tr->src.addr.p.tbl,
6120223637Sbz			    (s == NULL) ? pd.src :
6121223637Sbz			    &s->key[(s->direction == PF_IN)]->
6122223637Sbz				addr[(s->direction == PF_OUT)],
6123223637Sbz			    pd.af, pd.tot_len, dir == PF_OUT,
6124223637Sbz			    r->action == PF_PASS, tr->src.neg);
6125130613Smlaier		if (tr->dst.addr.type == PF_ADDR_TABLE)
6126223637Sbz			pfr_update_stats(tr->dst.addr.p.tbl,
6127223637Sbz			    (s == NULL) ? pd.dst :
6128223637Sbz			    &s->key[(s->direction == PF_IN)]->
6129223637Sbz				addr[(s->direction == PF_IN)],
6130223637Sbz			    pd.af, pd.tot_len, dir == PF_OUT,
6131223637Sbz			    r->action == PF_PASS, tr->dst.neg);
6132130613Smlaier	}
6133130613Smlaier
6134223637Sbz	switch (action) {
6135223637Sbz	case PF_SYNPROXY_DROP:
6136126258Smlaier		m_freem(*m0);
6137223637Sbz	case PF_DEFER:
6138126258Smlaier		*m0 = NULL;
6139126258Smlaier		action = PF_PASS;
6140223637Sbz		break;
6141271306Sglebius	case PF_DROP:
6142271306Sglebius		m_freem(*m0);
6143271306Sglebius		*m0 = NULL;
6144271306Sglebius		break;
6145223637Sbz	default:
6146240233Sglebius		/* pf_route() returns unlocked. */
6147240233Sglebius		if (r->rt) {
6148335252Skp			pf_route(m0, r, dir, kif->pfik_ifp, s, &pd, inp);
6149240233Sglebius			return (action);
6150240233Sglebius		}
6151223637Sbz		break;
6152223637Sbz	}
6153240233Sglebius	if (s)
6154240233Sglebius		PF_STATE_UNLOCK(s);
6155240233Sglebius
6156126258Smlaier	return (action);
6157126258Smlaier}
6158126258Smlaier#endif /* INET */
6159126258Smlaier
6160126258Smlaier#ifdef INET6
6161126258Smlaierint
6162240233Sglebiuspf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp)
6163126258Smlaier{
6164130613Smlaier	struct pfi_kif		*kif;
6165130613Smlaier	u_short			 action, reason = 0, log = 0;
6166171168Smlaier	struct mbuf		*m = *m0, *n = NULL;
6167284571Skp	struct m_tag		*mtag;
6168223637Sbz	struct ip6_hdr		*h = NULL;
6169223637Sbz	struct pf_rule		*a = NULL, *r = &V_pf_default_rule, *tr, *nr;
6170130613Smlaier	struct pf_state		*s = NULL;
6171130613Smlaier	struct pf_ruleset	*ruleset = NULL;
6172130613Smlaier	struct pf_pdesc		 pd;
6173169843Sdhartmei	int			 off, terminal = 0, dirndx, rh_cnt = 0;
6174284571Skp	int			 fwdir = dir;
6175126258Smlaier
6176240233Sglebius	M_ASSERTPKTHDR(m);
6177240233Sglebius
6178287680Skp	/* Detect packet forwarding.
6179287680Skp	 * If the input interface is different from the output interface we're
6180287680Skp	 * forwarding.
6181287680Skp	 * We do need to be careful about bridges. If the
6182287680Skp	 * net.link.bridge.pfil_bridge sysctl is set we can be filtering on a
6183287680Skp	 * bridge, so if the input interface is a bridge member and the output
6184297429Skp	 * interface is its bridge or a member of the same bridge we're not
6185297429Skp	 * actually forwarding but bridging.
6186287680Skp	 */
6187297429Skp	if (dir == PF_OUT && m->m_pkthdr.rcvif && ifp != m->m_pkthdr.rcvif &&
6188297429Skp	    (m->m_pkthdr.rcvif->if_bridge == NULL ||
6189297429Skp	    (m->m_pkthdr.rcvif->if_bridge != ifp->if_softc &&
6190297429Skp	    m->m_pkthdr.rcvif->if_bridge != ifp->if_bridge)))
6191284571Skp		fwdir = PF_FWD;
6192284571Skp
6193316000Skp	if (dir == PF_FWD)
6194316000Skp		dir = PF_OUT;
6195316000Skp
6196240233Sglebius	if (!V_pf_status.running)
6197126258Smlaier		return (PF_PASS);
6198126258Smlaier
6199171168Smlaier	memset(&pd, 0, sizeof(pd));
6200240233Sglebius	pd.pf_mtag = pf_find_mtag(m);
6201145836Smlaier
6202240233Sglebius	if (pd.pf_mtag && pd.pf_mtag->flags & PF_TAG_GENERATED)
6203240233Sglebius		return (PF_PASS);
6204240233Sglebius
6205240233Sglebius	kif = (struct pfi_kif *)ifp->if_pf_kif;
6206130613Smlaier	if (kif == NULL) {
6207145836Smlaier		DPFPRINTF(PF_DEBUG_URGENT,
6208145836Smlaier		    ("pf_test6: kif == NULL, if_xname %s\n", ifp->if_xname));
6209130613Smlaier		return (PF_DROP);
6210130613Smlaier	}
6211223637Sbz	if (kif->pfik_flags & PFI_IFLAG_SKIP)
6212145836Smlaier		return (PF_PASS);
6213130613Smlaier
6214270575Sglebius	if (m->m_flags & M_SKIP_FIREWALL)
6215270575Sglebius		return (PF_PASS);
6216270575Sglebius
6217240233Sglebius	PF_RULES_RLOCK();
6218223637Sbz
6219126258Smlaier	/* We do IP header normalization and packet reassembly here */
6220145836Smlaier	if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) {
6221126258Smlaier		action = PF_DROP;
6222126258Smlaier		goto done;
6223126258Smlaier	}
6224223637Sbz	m = *m0;	/* pf_normalize messes with m0 */
6225126258Smlaier	h = mtod(m, struct ip6_hdr *);
6226126258Smlaier
6227169843Sdhartmei#if 1
6228169843Sdhartmei	/*
6229169843Sdhartmei	 * we do not support jumbogram yet.  if we keep going, zero ip6_plen
6230169843Sdhartmei	 * will do something bad, so drop the packet for now.
6231169843Sdhartmei	 */
6232169843Sdhartmei	if (htons(h->ip6_plen) == 0) {
6233169843Sdhartmei		action = PF_DROP;
6234169843Sdhartmei		REASON_SET(&reason, PFRES_NORM);	/*XXX*/
6235169843Sdhartmei		goto done;
6236169843Sdhartmei	}
6237169843Sdhartmei#endif
6238169843Sdhartmei
6239126258Smlaier	pd.src = (struct pf_addr *)&h->ip6_src;
6240126258Smlaier	pd.dst = (struct pf_addr *)&h->ip6_dst;
6241223637Sbz	pd.sport = pd.dport = NULL;
6242126258Smlaier	pd.ip_sum = NULL;
6243223637Sbz	pd.proto_sum = NULL;
6244223637Sbz	pd.dir = dir;
6245223637Sbz	pd.sidx = (dir == PF_IN) ? 0 : 1;
6246223637Sbz	pd.didx = (dir == PF_IN) ? 1 : 0;
6247126258Smlaier	pd.af = AF_INET6;
6248126258Smlaier	pd.tos = 0;
6249126258Smlaier	pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
6250126258Smlaier
6251126258Smlaier	off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr);
6252126258Smlaier	pd.proto = h->ip6_nxt;
6253126258Smlaier	do {
6254126258Smlaier		switch (pd.proto) {
6255126258Smlaier		case IPPROTO_FRAGMENT:
6256130613Smlaier			action = pf_test_fragment(&r, dir, kif, m, h,
6257126258Smlaier			    &pd, &a, &ruleset);
6258126258Smlaier			if (action == PF_DROP)
6259126258Smlaier				REASON_SET(&reason, PFRES_FRAG);
6260126258Smlaier			goto done;
6261169843Sdhartmei		case IPPROTO_ROUTING: {
6262169843Sdhartmei			struct ip6_rthdr rthdr;
6263169843Sdhartmei
6264169843Sdhartmei			if (rh_cnt++) {
6265169843Sdhartmei				DPFPRINTF(PF_DEBUG_MISC,
6266169843Sdhartmei				    ("pf: IPv6 more than one rthdr\n"));
6267169843Sdhartmei				action = PF_DROP;
6268169843Sdhartmei				REASON_SET(&reason, PFRES_IPOPTIONS);
6269169843Sdhartmei				log = 1;
6270169843Sdhartmei				goto done;
6271169843Sdhartmei			}
6272169843Sdhartmei			if (!pf_pull_hdr(m, off, &rthdr, sizeof(rthdr), NULL,
6273169843Sdhartmei			    &reason, pd.af)) {
6274169843Sdhartmei				DPFPRINTF(PF_DEBUG_MISC,
6275169843Sdhartmei				    ("pf: IPv6 short rthdr\n"));
6276169843Sdhartmei				action = PF_DROP;
6277169843Sdhartmei				REASON_SET(&reason, PFRES_SHORT);
6278169843Sdhartmei				log = 1;
6279169843Sdhartmei				goto done;
6280169843Sdhartmei			}
6281169843Sdhartmei			if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) {
6282169843Sdhartmei				DPFPRINTF(PF_DEBUG_MISC,
6283169843Sdhartmei				    ("pf: IPv6 rthdr0\n"));
6284169843Sdhartmei				action = PF_DROP;
6285169843Sdhartmei				REASON_SET(&reason, PFRES_IPOPTIONS);
6286169843Sdhartmei				log = 1;
6287169843Sdhartmei				goto done;
6288169843Sdhartmei			}
6289223637Sbz			/* FALLTHROUGH */
6290169843Sdhartmei		}
6291126258Smlaier		case IPPROTO_AH:
6292126258Smlaier		case IPPROTO_HOPOPTS:
6293126258Smlaier		case IPPROTO_DSTOPTS: {
6294126258Smlaier			/* get next header and header length */
6295126258Smlaier			struct ip6_ext	opt6;
6296126258Smlaier
6297126258Smlaier			if (!pf_pull_hdr(m, off, &opt6, sizeof(opt6),
6298145836Smlaier			    NULL, &reason, pd.af)) {
6299126258Smlaier				DPFPRINTF(PF_DEBUG_MISC,
6300126258Smlaier				    ("pf: IPv6 short opt\n"));
6301126258Smlaier				action = PF_DROP;
6302126258Smlaier				log = 1;
6303126258Smlaier				goto done;
6304126258Smlaier			}
6305126258Smlaier			if (pd.proto == IPPROTO_AH)
6306126258Smlaier				off += (opt6.ip6e_len + 2) * 4;
6307126258Smlaier			else
6308126258Smlaier				off += (opt6.ip6e_len + 1) * 8;
6309126258Smlaier			pd.proto = opt6.ip6e_nxt;
6310126258Smlaier			/* goto the next header */
6311126258Smlaier			break;
6312126258Smlaier		}
6313126258Smlaier		default:
6314126258Smlaier			terminal++;
6315126258Smlaier			break;
6316126258Smlaier		}
6317126258Smlaier	} while (!terminal);
6318126258Smlaier
6319171168Smlaier	/* if there's no routing header, use unmodified mbuf for checksumming */
6320171168Smlaier	if (!n)
6321171168Smlaier		n = m;
6322171168Smlaier
6323126258Smlaier	switch (pd.proto) {
6324126258Smlaier
6325126258Smlaier	case IPPROTO_TCP: {
6326126258Smlaier		struct tcphdr	th;
6327126258Smlaier
6328126258Smlaier		pd.hdr.tcp = &th;
6329126258Smlaier		if (!pf_pull_hdr(m, off, &th, sizeof(th),
6330126258Smlaier		    &action, &reason, AF_INET6)) {
6331126258Smlaier			log = action != PF_PASS;
6332126258Smlaier			goto done;
6333126258Smlaier		}
6334126258Smlaier		pd.p_len = pd.tot_len - off - (th.th_off << 2);
6335130613Smlaier		action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
6336126258Smlaier		if (action == PF_DROP)
6337130613Smlaier			goto done;
6338130613Smlaier		action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
6339126258Smlaier		    &reason);
6340126258Smlaier		if (action == PF_PASS) {
6341223637Sbz			if (pfsync_update_state_ptr != NULL)
6342223637Sbz				pfsync_update_state_ptr(s);
6343126258Smlaier			r = s->rule.ptr;
6344130613Smlaier			a = s->anchor.ptr;
6345126258Smlaier			log = s->log;
6346126258Smlaier		} else if (s == NULL)
6347240233Sglebius			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
6348240233Sglebius			    &a, &ruleset, inp);
6349126258Smlaier		break;
6350126258Smlaier	}
6351126258Smlaier
6352126258Smlaier	case IPPROTO_UDP: {
6353126258Smlaier		struct udphdr	uh;
6354126258Smlaier
6355126258Smlaier		pd.hdr.udp = &uh;
6356126258Smlaier		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
6357126258Smlaier		    &action, &reason, AF_INET6)) {
6358126258Smlaier			log = action != PF_PASS;
6359126258Smlaier			goto done;
6360126258Smlaier		}
6361130613Smlaier		if (uh.uh_dport == 0 ||
6362130613Smlaier		    ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
6363130613Smlaier		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
6364130613Smlaier			action = PF_DROP;
6365171168Smlaier			REASON_SET(&reason, PFRES_SHORT);
6366130613Smlaier			goto done;
6367130613Smlaier		}
6368130613Smlaier		action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
6369126258Smlaier		if (action == PF_PASS) {
6370223637Sbz			if (pfsync_update_state_ptr != NULL)
6371223637Sbz				pfsync_update_state_ptr(s);
6372126258Smlaier			r = s->rule.ptr;
6373130613Smlaier			a = s->anchor.ptr;
6374126258Smlaier			log = s->log;
6375126258Smlaier		} else if (s == NULL)
6376240233Sglebius			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
6377240233Sglebius			    &a, &ruleset, inp);
6378126258Smlaier		break;
6379126258Smlaier	}
6380126258Smlaier
6381223637Sbz	case IPPROTO_ICMP: {
6382223637Sbz		action = PF_DROP;
6383223637Sbz		DPFPRINTF(PF_DEBUG_MISC,
6384223637Sbz		    ("pf: dropping IPv6 packet with ICMPv4 payload\n"));
6385223637Sbz		goto done;
6386223637Sbz	}
6387223637Sbz
6388126258Smlaier	case IPPROTO_ICMPV6: {
6389126258Smlaier		struct icmp6_hdr	ih;
6390126258Smlaier
6391126258Smlaier		pd.hdr.icmp6 = &ih;
6392126258Smlaier		if (!pf_pull_hdr(m, off, &ih, sizeof(ih),
6393126258Smlaier		    &action, &reason, AF_INET6)) {
6394126258Smlaier			log = action != PF_PASS;
6395126258Smlaier			goto done;
6396126258Smlaier		}
6397130613Smlaier		action = pf_test_state_icmp(&s, dir, kif,
6398145836Smlaier		    m, off, h, &pd, &reason);
6399126258Smlaier		if (action == PF_PASS) {
6400223637Sbz			if (pfsync_update_state_ptr != NULL)
6401223637Sbz				pfsync_update_state_ptr(s);
6402126258Smlaier			r = s->rule.ptr;
6403130613Smlaier			a = s->anchor.ptr;
6404126258Smlaier			log = s->log;
6405126258Smlaier		} else if (s == NULL)
6406240233Sglebius			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
6407240233Sglebius			    &a, &ruleset, inp);
6408126258Smlaier		break;
6409126258Smlaier	}
6410126258Smlaier
6411126258Smlaier	default:
6412223637Sbz		action = pf_test_state_other(&s, dir, kif, m, &pd);
6413130613Smlaier		if (action == PF_PASS) {
6414223637Sbz			if (pfsync_update_state_ptr != NULL)
6415223637Sbz				pfsync_update_state_ptr(s);
6416130613Smlaier			r = s->rule.ptr;
6417130613Smlaier			a = s->anchor.ptr;
6418130613Smlaier			log = s->log;
6419130613Smlaier		} else if (s == NULL)
6420240233Sglebius			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
6421240233Sglebius			    &a, &ruleset, inp);
6422126258Smlaier		break;
6423126258Smlaier	}
6424126258Smlaier
6425126258Smlaierdone:
6426240233Sglebius	PF_RULES_RUNLOCK();
6427223637Sbz	if (n != m) {
6428223637Sbz		m_freem(n);
6429223637Sbz		n = NULL;
6430223637Sbz	}
6431223637Sbz
6432169843Sdhartmei	/* handle dangerous IPv6 extension headers. */
6433169843Sdhartmei	if (action == PF_PASS && rh_cnt &&
6434200930Sdelphij	    !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) {
6435169843Sdhartmei		action = PF_DROP;
6436169843Sdhartmei		REASON_SET(&reason, PFRES_IPOPTIONS);
6437286125Sgarga		log = r->log;
6438169843Sdhartmei		DPFPRINTF(PF_DEBUG_MISC,
6439169843Sdhartmei		    ("pf: dropping packet with dangerous v6 headers\n"));
6440169843Sdhartmei	}
6441126258Smlaier
6442240233Sglebius	if (s && s->tag > 0 && pf_tag_packet(m, &pd, s->tag)) {
6443240233Sglebius		action = PF_DROP;
6444240233Sglebius		REASON_SET(&reason, PFRES_MEMORY);
6445240233Sglebius	}
6446240233Sglebius	if (r->rtableid >= 0)
6447240233Sglebius		M_SETFIB(m, r->rtableid);
6448145836Smlaier
6449126258Smlaier#ifdef ALTQ
6450126258Smlaier	if (action == PF_PASS && r->qid) {
6451240233Sglebius		if (pd.pf_mtag == NULL &&
6452240233Sglebius		    ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
6453240233Sglebius			action = PF_DROP;
6454240233Sglebius			REASON_SET(&reason, PFRES_MEMORY);
6455285941Sglebius		} else {
6456298091Sloos			if (s != NULL)
6457298091Sloos				pd.pf_mtag->qid_hash = pf_state_hash(s);
6458285941Sglebius			if (pd.tos & IPTOS_LOWDELAY)
6459285941Sglebius				pd.pf_mtag->qid = r->pqid;
6460285941Sglebius			else
6461285941Sglebius				pd.pf_mtag->qid = r->qid;
6462285941Sglebius			/* Add hints for ecn. */
6463285941Sglebius			pd.pf_mtag->hdr = h;
6464240233Sglebius		}
6465126258Smlaier	}
6466145836Smlaier#endif /* ALTQ */
6467126258Smlaier
6468130613Smlaier	if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
6469130613Smlaier	    pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
6470130613Smlaier	    (s->nat_rule.ptr->action == PF_RDR ||
6471130613Smlaier	    s->nat_rule.ptr->action == PF_BINAT) &&
6472171168Smlaier	    IN6_IS_ADDR_LOOPBACK(&pd.dst->v6))
6473223637Sbz		m->m_flags |= M_SKIP_FIREWALL;
6474171168Smlaier
6475223637Sbz	/* XXX: Anybody working on it?! */
6476223637Sbz	if (r->divert.port)
6477223637Sbz		printf("pf: divert(9) is not supported for IPv6\n");
6478223637Sbz
6479171168Smlaier	if (log) {
6480171168Smlaier		struct pf_rule *lr;
6481171168Smlaier
6482171168Smlaier		if (s != NULL && s->nat_rule.ptr != NULL &&
6483171168Smlaier		    s->nat_rule.ptr->log & PF_LOG_ALL)
6484171168Smlaier			lr = s->nat_rule.ptr;
6485171168Smlaier		else
6486171168Smlaier			lr = r;
6487240233Sglebius		PFLOG_PACKET(kif, m, AF_INET6, dir, reason, lr, a, ruleset,
6488240233Sglebius		    &pd, (s == NULL));
6489130613Smlaier	}
6490130613Smlaier
6491130613Smlaier	kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
6492130613Smlaier	kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++;
6493130613Smlaier
6494130613Smlaier	if (action == PF_PASS || r->action == PF_DROP) {
6495171168Smlaier		dirndx = (dir == PF_OUT);
6496171168Smlaier		r->packets[dirndx]++;
6497171168Smlaier		r->bytes[dirndx] += pd.tot_len;
6498130613Smlaier		if (a != NULL) {
6499171168Smlaier			a->packets[dirndx]++;
6500171168Smlaier			a->bytes[dirndx] += pd.tot_len;
6501130613Smlaier		}
6502130613Smlaier		if (s != NULL) {
6503130613Smlaier			if (s->nat_rule.ptr != NULL) {
6504171168Smlaier				s->nat_rule.ptr->packets[dirndx]++;
6505171168Smlaier				s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
6506130613Smlaier			}
6507130613Smlaier			if (s->src_node != NULL) {
6508171168Smlaier				s->src_node->packets[dirndx]++;
6509171168Smlaier				s->src_node->bytes[dirndx] += pd.tot_len;
6510130613Smlaier			}
6511130613Smlaier			if (s->nat_src_node != NULL) {
6512171168Smlaier				s->nat_src_node->packets[dirndx]++;
6513171168Smlaier				s->nat_src_node->bytes[dirndx] += pd.tot_len;
6514130613Smlaier			}
6515171168Smlaier			dirndx = (dir == s->direction) ? 0 : 1;
6516171168Smlaier			s->packets[dirndx]++;
6517171168Smlaier			s->bytes[dirndx] += pd.tot_len;
6518130613Smlaier		}
6519130613Smlaier		tr = r;
6520130613Smlaier		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
6521223637Sbz		if (nr != NULL && r == &V_pf_default_rule)
6522223637Sbz			tr = nr;
6523130613Smlaier		if (tr->src.addr.type == PF_ADDR_TABLE)
6524223637Sbz			pfr_update_stats(tr->src.addr.p.tbl,
6525223637Sbz			    (s == NULL) ? pd.src :
6526223637Sbz			    &s->key[(s->direction == PF_IN)]->addr[0],
6527223637Sbz			    pd.af, pd.tot_len, dir == PF_OUT,
6528223637Sbz			    r->action == PF_PASS, tr->src.neg);
6529130613Smlaier		if (tr->dst.addr.type == PF_ADDR_TABLE)
6530223637Sbz			pfr_update_stats(tr->dst.addr.p.tbl,
6531223637Sbz			    (s == NULL) ? pd.dst :
6532223637Sbz			    &s->key[(s->direction == PF_IN)]->addr[1],
6533223637Sbz			    pd.af, pd.tot_len, dir == PF_OUT,
6534223637Sbz			    r->action == PF_PASS, tr->dst.neg);
6535130613Smlaier	}
6536130613Smlaier
6537223637Sbz	switch (action) {
6538223637Sbz	case PF_SYNPROXY_DROP:
6539126258Smlaier		m_freem(*m0);
6540223637Sbz	case PF_DEFER:
6541126258Smlaier		*m0 = NULL;
6542126258Smlaier		action = PF_PASS;
6543223637Sbz		break;
6544271306Sglebius	case PF_DROP:
6545271306Sglebius		m_freem(*m0);
6546271306Sglebius		*m0 = NULL;
6547271306Sglebius		break;
6548223637Sbz	default:
6549240233Sglebius		/* pf_route6() returns unlocked. */
6550240233Sglebius		if (r->rt) {
6551335252Skp			pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd, inp);
6552240233Sglebius			return (action);
6553240233Sglebius		}
6554223637Sbz		break;
6555223637Sbz	}
6556126258Smlaier
6557240233Sglebius	if (s)
6558240233Sglebius		PF_STATE_UNLOCK(s);
6559240233Sglebius
6560284571Skp	/* If reassembled packet passed, create new fragments. */
6561284571Skp	if (action == PF_PASS && *m0 && fwdir == PF_FWD &&
6562284571Skp	    (mtag = m_tag_find(m, PF_REASSEMBLED, NULL)) != NULL)
6563284571Skp		action = pf_refragment6(ifp, m0, mtag);
6564284571Skp
6565126258Smlaier	return (action);
6566126258Smlaier}
6567126258Smlaier#endif /* INET6 */
6568