pf.c revision 293896
1/*	$OpenBSD: pf.c,v 1.634 2009/02/27 12:37:45 henning Exp $ */
2
3/*
4 * Copyright (c) 2001 Daniel Hartmeier
5 * Copyright (c) 2002 - 2008 Henning Brauer
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 *    - Redistributions of source code must retain the above copyright
13 *      notice, this list of conditions and the following disclaimer.
14 *    - Redistributions in binary form must reproduce the above
15 *      copyright notice, this list of conditions and the following
16 *      disclaimer in the documentation and/or other materials provided
17 *      with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 *
32 * Effort sponsored in part by the Defense Advanced Research Projects
33 * Agency (DARPA) and Air Force Research Laboratory, Air Force
34 * Materiel Command, USAF, under agreement number F30602-01-2-0537.
35 *
36 */
37
38#ifdef __FreeBSD__
39#include "opt_inet.h"
40#include "opt_inet6.h"
41
42#include <sys/cdefs.h>
43__FBSDID("$FreeBSD: releng/9.3/sys/contrib/pf/net/pf.c 293896 2016-01-14 09:11:26Z glebius $");
44#endif
45
46#ifdef __FreeBSD__
47#include "opt_bpf.h"
48#include "opt_pf.h"
49
50#define	NPFSYNC		1
51
52#ifdef DEV_PFLOW
53#define	NPFLOW		DEV_PFLOW
54#else
55#define	NPFLOW		0
56#endif
57
58#else
59#include "bpfilter.h"
60#include "pflog.h"
61#include "pfsync.h"
62#include "pflow.h"
63#endif
64
65#include <sys/param.h>
66#include <sys/systm.h>
67#include <sys/mbuf.h>
68#include <sys/filio.h>
69#include <sys/socket.h>
70#include <sys/socketvar.h>
71#include <sys/kernel.h>
72#include <sys/time.h>
73#ifdef __FreeBSD__
74#include <sys/random.h>
75#include <sys/sysctl.h>
76#include <sys/endian.h>
77#define	betoh64		be64toh
78#else
79#include <sys/pool.h>
80#endif
81#include <sys/proc.h>
82#ifdef __FreeBSD__
83#include <sys/kthread.h>
84#include <sys/lock.h>
85#include <sys/sx.h>
86#else
87#include <sys/rwlock.h>
88#endif
89
90#ifdef __FreeBSD__
91#include <sys/md5.h>
92#else
93#include <crypto/md5.h>
94#endif
95
96#include <net/if.h>
97#include <net/if_types.h>
98#include <net/bpf.h>
99#include <net/route.h>
100#ifdef __FreeBSD__
101#ifdef RADIX_MPATH
102#include <net/radix_mpath.h>
103#endif
104#else
105#include <net/radix_mpath.h>
106#endif
107
108#include <netinet/in.h>
109#include <netinet/in_var.h>
110#include <netinet/in_systm.h>
111#include <netinet/ip.h>
112#include <netinet/ip_var.h>
113#include <netinet/tcp.h>
114#include <netinet/tcp_seq.h>
115#include <netinet/udp.h>
116#include <netinet/ip_icmp.h>
117#include <netinet/in_pcb.h>
118#include <netinet/tcp_timer.h>
119#include <netinet/tcp_var.h>
120#include <netinet/udp_var.h>
121#include <netinet/icmp_var.h>
122#include <netinet/if_ether.h>
123#ifdef __FreeBSD__
124#include <netinet/ip_fw.h>
125#include <netpfil/ipfw/ip_fw_private.h> /* XXX: only for DIR_IN/DIR_OUT */
126#endif
127
128#ifndef __FreeBSD__
129#include <dev/rndvar.h>
130#endif
131#include <net/pfvar.h>
132#include <net/if_pflog.h>
133#include <net/if_pflow.h>
134#include <net/if_pfsync.h>
135
136#ifdef INET6
137#include <netinet/ip6.h>
138#include <netinet/in_pcb.h>
139#include <netinet/icmp6.h>
140#include <netinet6/nd6.h>
141#ifdef __FreeBSD__
142#include <netinet6/ip6_var.h>
143#include <netinet6/in6_pcb.h>
144#endif
145#endif /* INET6 */
146
147#ifdef __FreeBSD__
148#include <machine/in_cksum.h>
149#include <sys/limits.h>
150#include <sys/ucred.h>
151#include <security/mac/mac_framework.h>
152
153extern int ip_optcopy(struct ip *, struct ip *);
154#endif
155
156#ifdef __FreeBSD__
157#define	DPFPRINTF(n, x)	if (V_pf_status.debug >= (n)) printf x
158#else
159#define	DPFPRINTF(n, x)	if (pf_status.debug >= (n)) printf x
160#endif
161
162/*
163 * Global variables
164 */
165
166/* state tables */
167#ifdef __FreeBSD__
168VNET_DEFINE(struct pf_state_tree,	 pf_statetbl);
169
170VNET_DEFINE(struct pf_altqqueue,	 pf_altqs[2]);
171VNET_DEFINE(struct pf_palist,		 pf_pabuf);
172VNET_DEFINE(struct pf_altqqueue *,	 pf_altqs_active);
173VNET_DEFINE(struct pf_altqqueue *,	 pf_altqs_inactive);
174VNET_DEFINE(struct pf_status,		 pf_status);
175
176VNET_DEFINE(u_int32_t,			 ticket_altqs_active);
177VNET_DEFINE(u_int32_t,			 ticket_altqs_inactive);
178VNET_DEFINE(int,			 altqs_inactive_open);
179VNET_DEFINE(u_int32_t,			 ticket_pabuf);
180
181VNET_DEFINE(MD5_CTX,			 pf_tcp_secret_ctx);
182#define	V_pf_tcp_secret_ctx		 VNET(pf_tcp_secret_ctx)
183VNET_DEFINE(u_char,			 pf_tcp_secret[16]);
184#define	V_pf_tcp_secret			 VNET(pf_tcp_secret)
185VNET_DEFINE(int,			 pf_tcp_secret_init);
186#define	V_pf_tcp_secret_init		 VNET(pf_tcp_secret_init)
187VNET_DEFINE(int,			 pf_tcp_iss_off);
188#define	V_pf_tcp_iss_off		 VNET(pf_tcp_iss_off)
189
190struct pf_anchor_stackframe {
191	struct pf_ruleset		*rs;
192	struct pf_rule			*r;
193	struct pf_anchor_node		*parent;
194	struct pf_anchor		*child;
195};
196VNET_DEFINE(struct pf_anchor_stackframe, pf_anchor_stack[64]);
197#define	V_pf_anchor_stack		 VNET(pf_anchor_stack)
198
199VNET_DEFINE(uma_zone_t,	 pf_src_tree_pl);
200VNET_DEFINE(uma_zone_t,	 pf_rule_pl);
201VNET_DEFINE(uma_zone_t,	 pf_pooladdr_pl);
202VNET_DEFINE(uma_zone_t,	 pf_state_pl);
203VNET_DEFINE(uma_zone_t,	 pf_state_key_pl);
204VNET_DEFINE(uma_zone_t,	 pf_state_item_pl);
205VNET_DEFINE(uma_zone_t,	 pf_altq_pl);
206#else
207struct pf_state_tree	 pf_statetbl;
208
209struct pf_altqqueue	 pf_altqs[2];
210struct pf_palist	 pf_pabuf;
211struct pf_altqqueue	*pf_altqs_active;
212struct pf_altqqueue	*pf_altqs_inactive;
213struct pf_status	 pf_status;
214
215u_int32_t		 ticket_altqs_active;
216u_int32_t		 ticket_altqs_inactive;
217int			 altqs_inactive_open;
218u_int32_t		 ticket_pabuf;
219
220MD5_CTX			 pf_tcp_secret_ctx;
221u_char			 pf_tcp_secret[16];
222int			 pf_tcp_secret_init;
223int			 pf_tcp_iss_off;
224
225struct pf_anchor_stackframe {
226	struct pf_ruleset			*rs;
227	struct pf_rule				*r;
228	struct pf_anchor_node			*parent;
229	struct pf_anchor			*child;
230} pf_anchor_stack[64];
231
232struct pool		 pf_src_tree_pl, pf_rule_pl, pf_pooladdr_pl;
233struct pool		 pf_state_pl, pf_state_key_pl, pf_state_item_pl;
234struct pool		 pf_altq_pl;
235#endif
236
237void			 pf_init_threshold(struct pf_threshold *, u_int32_t,
238			    u_int32_t);
239void			 pf_add_threshold(struct pf_threshold *);
240int			 pf_check_threshold(struct pf_threshold *);
241
242void			 pf_change_ap(struct mbuf *, struct pf_addr *, u_int16_t *,
243			    u_int16_t *, u_int16_t *, struct pf_addr *,
244			    u_int16_t, u_int8_t, sa_family_t);
245int			 pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *,
246			    struct tcphdr *, struct pf_state_peer *);
247#ifdef INET6
248void			 pf_change_a6(struct pf_addr *, u_int16_t *,
249			    struct pf_addr *, u_int8_t);
250#endif /* INET6 */
251void			 pf_change_icmp(struct pf_addr *, u_int16_t *,
252			    struct pf_addr *, struct pf_addr *, u_int16_t,
253			    u_int16_t *, u_int16_t *, u_int16_t *,
254			    u_int16_t *, u_int8_t, sa_family_t);
255#ifdef __FreeBSD__
256void			 pf_send_tcp(struct mbuf *,
257			    const struct pf_rule *, sa_family_t,
258#else
259void			 pf_send_tcp(const struct pf_rule *, sa_family_t,
260#endif
261			    const struct pf_addr *, const struct pf_addr *,
262			    u_int16_t, u_int16_t, u_int32_t, u_int32_t,
263			    u_int8_t, u_int16_t, u_int16_t, u_int8_t, int,
264			    u_int16_t, struct ether_header *, struct ifnet *);
265static void		 pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
266			    sa_family_t, struct pf_rule *);
267void			 pf_detach_state(struct pf_state *);
268void			 pf_state_key_detach(struct pf_state *, int);
269u_int32_t		 pf_tcp_iss(struct pf_pdesc *);
270int			 pf_test_rule(struct pf_rule **, struct pf_state **,
271			    int, struct pfi_kif *, struct mbuf *, int,
272			    void *, struct pf_pdesc *, struct pf_rule **,
273#ifdef __FreeBSD__
274			    struct pf_ruleset **, struct ifqueue *,
275			    struct inpcb *);
276#else
277			    struct pf_ruleset **, struct ifqueue *);
278#endif
279static __inline int	 pf_create_state(struct pf_rule *, struct pf_rule *,
280			    struct pf_rule *, struct pf_pdesc *,
281			    struct pf_src_node *, struct pf_state_key *,
282			    struct pf_state_key *, struct pf_state_key *,
283			    struct pf_state_key *, struct mbuf *, int,
284			    u_int16_t, u_int16_t, int *, struct pfi_kif *,
285			    struct pf_state **, int, u_int16_t, u_int16_t,
286			    int);
287int			 pf_test_fragment(struct pf_rule **, int,
288			    struct pfi_kif *, struct mbuf *, void *,
289			    struct pf_pdesc *, struct pf_rule **,
290			    struct pf_ruleset **);
291int			 pf_tcp_track_full(struct pf_state_peer *,
292			    struct pf_state_peer *, struct pf_state **,
293			    struct pfi_kif *, struct mbuf *, int,
294			    struct pf_pdesc *, u_short *, int *);
295int			pf_tcp_track_sloppy(struct pf_state_peer *,
296			    struct pf_state_peer *, struct pf_state **,
297			    struct pf_pdesc *, u_short *);
298int			 pf_test_state_tcp(struct pf_state **, int,
299			    struct pfi_kif *, struct mbuf *, int,
300			    void *, struct pf_pdesc *, u_short *);
301int			 pf_test_state_udp(struct pf_state **, int,
302			    struct pfi_kif *, struct mbuf *, int,
303			    void *, struct pf_pdesc *);
304int			 pf_test_state_icmp(struct pf_state **, int,
305			    struct pfi_kif *, struct mbuf *, int,
306			    void *, struct pf_pdesc *, u_short *);
307int			 pf_test_state_other(struct pf_state **, int,
308			    struct pfi_kif *, struct mbuf *, struct pf_pdesc *);
309void			 pf_route(struct mbuf **, struct pf_rule *, int,
310			    struct ifnet *, struct pf_state *,
311			    struct pf_pdesc *);
312void			 pf_route6(struct mbuf **, struct pf_rule *, int,
313			    struct ifnet *, struct pf_state *,
314			    struct pf_pdesc *);
315#ifndef __FreeBSD__
316int			 pf_socket_lookup(int, struct pf_pdesc *);
317#endif
318u_int8_t		 pf_get_wscale(struct mbuf *, int, u_int16_t,
319			    sa_family_t);
320u_int16_t		 pf_get_mss(struct mbuf *, int, u_int16_t,
321			    sa_family_t);
322u_int16_t		 pf_calc_mss(struct pf_addr *, sa_family_t,
323				int, u_int16_t);
324void			 pf_set_rt_ifp(struct pf_state *,
325			    struct pf_addr *);
326int			 pf_check_proto_cksum(struct mbuf *, int, int,
327			    u_int8_t, sa_family_t);
328#ifndef __FreeBSD__
329struct pf_divert	*pf_get_divert(struct mbuf *);
330#endif
331void			 pf_print_state_parts(struct pf_state *,
332			    struct pf_state_key *, struct pf_state_key *);
333int			 pf_addr_wrap_neq(struct pf_addr_wrap *,
334			    struct pf_addr_wrap *);
335int			 pf_compare_state_keys(struct pf_state_key *,
336			    struct pf_state_key *, struct pfi_kif *, u_int);
337#ifdef __FreeBSD__
338struct pf_state		*pf_find_state(struct pfi_kif *,
339			    struct pf_state_key_cmp *, u_int, struct mbuf *,
340			    struct pf_mtag *);
341#else
342struct pf_state		*pf_find_state(struct pfi_kif *,
343			    struct pf_state_key_cmp *, u_int, struct mbuf *);
344#endif
345int			 pf_src_connlimit(struct pf_state **);
346int			 pf_check_congestion(struct ifqueue *);
347
348#ifdef __FreeBSD__
349int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len);
350
351VNET_DECLARE(int, pf_end_threads);
352
353VNET_DEFINE(struct pf_pool_limit, pf_pool_limits[PF_LIMIT_MAX]);
354#else
355extern struct pool pfr_ktable_pl;
356extern struct pool pfr_kentry_pl;
357
358struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = {
359	{ &pf_state_pl, PFSTATE_HIWAT },
360	{ &pf_src_tree_pl, PFSNODE_HIWAT },
361	{ &pf_frent_pl, PFFRAG_FRENT_HIWAT },
362	{ &pfr_ktable_pl, PFR_KTABLE_HIWAT },
363	{ &pfr_kentry_pl, PFR_KENTRY_HIWAT }
364};
365#endif
366
367#ifdef __FreeBSD__
368#define	PPACKET_LOOPED()						\
369	(pd->pf_mtag->flags & PF_PACKET_LOOPED)
370
371#define	PACKET_LOOPED()							\
372	(pd.pf_mtag->flags & PF_PACKET_LOOPED)
373
374#define	STATE_LOOKUP(i, k, d, s, m, pt)					\
375	do {								\
376		s = pf_find_state(i, k, d, m, pt);			\
377		if (s == NULL || (s)->timeout == PFTM_PURGE)		\
378			return (PF_DROP);				\
379		if (PPACKET_LOOPED())					\
380			return (PF_PASS);				\
381		if (d == PF_OUT &&					\
382		    (((s)->rule.ptr->rt == PF_ROUTETO &&		\
383		    (s)->rule.ptr->direction == PF_OUT) ||		\
384		    ((s)->rule.ptr->rt == PF_REPLYTO &&			\
385		    (s)->rule.ptr->direction == PF_IN)) &&		\
386		    (s)->rt_kif != NULL &&				\
387		    (s)->rt_kif != i)					\
388			return (PF_PASS);				\
389	} while (0)
390#else
391#define	STATE_LOOKUP(i, k, d, s, m)					\
392	do {								\
393		s = pf_find_state(i, k, d, m);				\
394		if (s == NULL || (s)->timeout == PFTM_PURGE)		\
395			return (PF_DROP);				\
396		if (d == PF_OUT &&					\
397		    (((s)->rule.ptr->rt == PF_ROUTETO &&		\
398		    (s)->rule.ptr->direction == PF_OUT) ||		\
399		    ((s)->rule.ptr->rt == PF_REPLYTO &&			\
400		    (s)->rule.ptr->direction == PF_IN)) &&		\
401		    (s)->rt_kif != NULL &&				\
402		    (s)->rt_kif != i)					\
403			return (PF_PASS);				\
404	} while (0)
405#endif
406
407#ifdef __FreeBSD__
408#define	BOUND_IFACE(r, k) \
409	((r)->rule_flag & PFRULE_IFBOUND) ? (k) : V_pfi_all
410#else
411#define	BOUND_IFACE(r, k) \
412	((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all
413#endif
414
415#define	STATE_INC_COUNTERS(s)				\
416	do {						\
417		s->rule.ptr->states_cur++;		\
418		s->rule.ptr->states_tot++;		\
419		if (s->anchor.ptr != NULL) {		\
420			s->anchor.ptr->states_cur++;	\
421			s->anchor.ptr->states_tot++;	\
422		}					\
423		if (s->nat_rule.ptr != NULL) {		\
424			s->nat_rule.ptr->states_cur++;	\
425			s->nat_rule.ptr->states_tot++;	\
426		}					\
427	} while (0)
428
429#define	STATE_DEC_COUNTERS(s)				\
430	do {						\
431		if (s->nat_rule.ptr != NULL)		\
432			s->nat_rule.ptr->states_cur--;	\
433		if (s->anchor.ptr != NULL)		\
434			s->anchor.ptr->states_cur--;	\
435		s->rule.ptr->states_cur--;		\
436	} while (0)
437
438static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
439static __inline int pf_state_compare_key(struct pf_state_key *,
440	struct pf_state_key *);
441static __inline int pf_state_compare_id(struct pf_state *,
442	struct pf_state *);
443
444#ifdef __FreeBSD__
445VNET_DEFINE(struct pf_src_tree,	 	 tree_src_tracking);
446
447VNET_DEFINE(struct pf_state_tree_id,	 tree_id);
448VNET_DEFINE(struct pf_state_queue,	 state_list);
449#else
450struct pf_src_tree tree_src_tracking;
451
452struct pf_state_tree_id tree_id;
453struct pf_state_queue state_list;
454#endif
455
456RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare);
457RB_GENERATE(pf_state_tree, pf_state_key, entry, pf_state_compare_key);
458RB_GENERATE(pf_state_tree_id, pf_state,
459    entry_id, pf_state_compare_id);
460
461static __inline int
462pf_src_compare(struct pf_src_node *a, struct pf_src_node *b)
463{
464	int	diff;
465
466	if (a->rule.ptr > b->rule.ptr)
467		return (1);
468	if (a->rule.ptr < b->rule.ptr)
469		return (-1);
470	if ((diff = a->af - b->af) != 0)
471		return (diff);
472	switch (a->af) {
473#ifdef INET
474	case AF_INET:
475		if (a->addr.addr32[0] > b->addr.addr32[0])
476			return (1);
477		if (a->addr.addr32[0] < b->addr.addr32[0])
478			return (-1);
479		break;
480#endif /* INET */
481#ifdef INET6
482	case AF_INET6:
483		if (a->addr.addr32[3] > b->addr.addr32[3])
484			return (1);
485		if (a->addr.addr32[3] < b->addr.addr32[3])
486			return (-1);
487		if (a->addr.addr32[2] > b->addr.addr32[2])
488			return (1);
489		if (a->addr.addr32[2] < b->addr.addr32[2])
490			return (-1);
491		if (a->addr.addr32[1] > b->addr.addr32[1])
492			return (1);
493		if (a->addr.addr32[1] < b->addr.addr32[1])
494			return (-1);
495		if (a->addr.addr32[0] > b->addr.addr32[0])
496			return (1);
497		if (a->addr.addr32[0] < b->addr.addr32[0])
498			return (-1);
499		break;
500#endif /* INET6 */
501	}
502	return (0);
503}
504
505#ifdef INET6
506void
507pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
508{
509	switch (af) {
510#ifdef INET
511	case AF_INET:
512		dst->addr32[0] = src->addr32[0];
513		break;
514#endif /* INET */
515	case AF_INET6:
516		dst->addr32[0] = src->addr32[0];
517		dst->addr32[1] = src->addr32[1];
518		dst->addr32[2] = src->addr32[2];
519		dst->addr32[3] = src->addr32[3];
520		break;
521	}
522}
523#endif /* INET6 */
524
525void
526pf_init_threshold(struct pf_threshold *threshold,
527    u_int32_t limit, u_int32_t seconds)
528{
529	threshold->limit = limit * PF_THRESHOLD_MULT;
530	threshold->seconds = seconds;
531	threshold->count = 0;
532	threshold->last = time_second;
533}
534
535void
536pf_add_threshold(struct pf_threshold *threshold)
537{
538	u_int32_t t = time_second, diff = t - threshold->last;
539
540	if (diff >= threshold->seconds)
541		threshold->count = 0;
542	else
543		threshold->count -= threshold->count * diff /
544		    threshold->seconds;
545	threshold->count += PF_THRESHOLD_MULT;
546	threshold->last = t;
547}
548
549int
550pf_check_threshold(struct pf_threshold *threshold)
551{
552	return (threshold->count > threshold->limit);
553}
554
555int
556pf_src_connlimit(struct pf_state **state)
557{
558	int bad = 0;
559
560	(*state)->src_node->conn++;
561	(*state)->src.tcp_est = 1;
562	pf_add_threshold(&(*state)->src_node->conn_rate);
563
564	if ((*state)->rule.ptr->max_src_conn &&
565	    (*state)->rule.ptr->max_src_conn <
566	    (*state)->src_node->conn) {
567#ifdef __FreeBSD__
568		V_pf_status.lcounters[LCNT_SRCCONN]++;
569#else
570		pf_status.lcounters[LCNT_SRCCONN]++;
571#endif
572		bad++;
573	}
574
575	if ((*state)->rule.ptr->max_src_conn_rate.limit &&
576	    pf_check_threshold(&(*state)->src_node->conn_rate)) {
577#ifdef __FreeBSD__
578		V_pf_status.lcounters[LCNT_SRCCONNRATE]++;
579#else
580		pf_status.lcounters[LCNT_SRCCONNRATE]++;
581#endif
582		bad++;
583	}
584
585	if (!bad)
586		return (0);
587
588	if ((*state)->rule.ptr->overload_tbl) {
589		struct pfr_addr p;
590		u_int32_t	killed = 0;
591
592#ifdef __FreeBSD__
593		V_pf_status.lcounters[LCNT_OVERLOAD_TABLE]++;
594		if (V_pf_status.debug >= PF_DEBUG_MISC) {
595#else
596		pf_status.lcounters[LCNT_OVERLOAD_TABLE]++;
597		if (pf_status.debug >= PF_DEBUG_MISC) {
598#endif
599			printf("pf_src_connlimit: blocking address ");
600			pf_print_host(&(*state)->src_node->addr, 0,
601			    (*state)->key[PF_SK_WIRE]->af);
602		}
603
604		bzero(&p, sizeof(p));
605		p.pfra_af = (*state)->key[PF_SK_WIRE]->af;
606		switch ((*state)->key[PF_SK_WIRE]->af) {
607#ifdef INET
608		case AF_INET:
609			p.pfra_net = 32;
610			p.pfra_ip4addr = (*state)->src_node->addr.v4;
611			break;
612#endif /* INET */
613#ifdef INET6
614		case AF_INET6:
615			p.pfra_net = 128;
616			p.pfra_ip6addr = (*state)->src_node->addr.v6;
617			break;
618#endif /* INET6 */
619		}
620
621		pfr_insert_kentry((*state)->rule.ptr->overload_tbl,
622		    &p, time_second);
623
624		/* kill existing states if that's required. */
625		if ((*state)->rule.ptr->flush) {
626			struct pf_state_key *sk;
627			struct pf_state *st;
628
629#ifdef __FreeBSD__
630			V_pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++;
631			RB_FOREACH(st, pf_state_tree_id, &V_tree_id) {
632#else
633			pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++;
634			RB_FOREACH(st, pf_state_tree_id, &tree_id) {
635#endif
636				sk = st->key[PF_SK_WIRE];
637				/*
638				 * Kill states from this source.  (Only those
639				 * from the same rule if PF_FLUSH_GLOBAL is not
640				 * set)
641				 */
642				if (sk->af ==
643				    (*state)->key[PF_SK_WIRE]->af &&
644				    (((*state)->direction == PF_OUT &&
645				    PF_AEQ(&(*state)->src_node->addr,
646					&sk->addr[1], sk->af)) ||
647				    ((*state)->direction == PF_IN &&
648				    PF_AEQ(&(*state)->src_node->addr,
649					&sk->addr[0], sk->af))) &&
650				    ((*state)->rule.ptr->flush &
651				    PF_FLUSH_GLOBAL ||
652				    (*state)->rule.ptr == st->rule.ptr)) {
653					st->timeout = PFTM_PURGE;
654					st->src.state = st->dst.state =
655					    TCPS_CLOSED;
656					killed++;
657				}
658			}
659#ifdef __FreeBSD__
660			if (V_pf_status.debug >= PF_DEBUG_MISC)
661#else
662			if (pf_status.debug >= PF_DEBUG_MISC)
663#endif
664				printf(", %u states killed", killed);
665		}
666#ifdef __FreeBSD__
667		if (V_pf_status.debug >= PF_DEBUG_MISC)
668#else
669		if (pf_status.debug >= PF_DEBUG_MISC)
670#endif
671			printf("\n");
672	}
673
674	/* kill this state */
675	(*state)->timeout = PFTM_PURGE;
676	(*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
677	return (1);
678}
679
680int
681pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
682    struct pf_addr *src, sa_family_t af)
683{
684	struct pf_src_node	k;
685
686	if (*sn == NULL) {
687		k.af = af;
688		PF_ACPY(&k.addr, src, af);
689		if (rule->rule_flag & PFRULE_RULESRCTRACK ||
690		    rule->rpool.opts & PF_POOL_STICKYADDR)
691			k.rule.ptr = rule;
692		else
693			k.rule.ptr = NULL;
694#ifdef __FreeBSD__
695		V_pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
696		*sn = RB_FIND(pf_src_tree, &V_tree_src_tracking, &k);
697#else
698		pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
699		*sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
700#endif
701	}
702	if (*sn == NULL) {
703		if (!rule->max_src_nodes ||
704		    rule->src_nodes < rule->max_src_nodes)
705#ifdef __FreeBSD__
706			(*sn) = pool_get(&V_pf_src_tree_pl, PR_NOWAIT | PR_ZERO);
707#else
708			(*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT | PR_ZERO);
709#endif
710		else
711#ifdef __FreeBSD__
712			V_pf_status.lcounters[LCNT_SRCNODES]++;
713#else
714			pf_status.lcounters[LCNT_SRCNODES]++;
715#endif
716		if ((*sn) == NULL)
717			return (-1);
718
719		pf_init_threshold(&(*sn)->conn_rate,
720		    rule->max_src_conn_rate.limit,
721		    rule->max_src_conn_rate.seconds);
722
723		(*sn)->af = af;
724		if (rule->rule_flag & PFRULE_RULESRCTRACK ||
725		    rule->rpool.opts & PF_POOL_STICKYADDR)
726			(*sn)->rule.ptr = rule;
727		else
728			(*sn)->rule.ptr = NULL;
729		PF_ACPY(&(*sn)->addr, src, af);
730		if (RB_INSERT(pf_src_tree,
731#ifdef __FreeBSD__
732		    &V_tree_src_tracking, *sn) != NULL) {
733			if (V_pf_status.debug >= PF_DEBUG_MISC) {
734#else
735		    &tree_src_tracking, *sn) != NULL) {
736			if (pf_status.debug >= PF_DEBUG_MISC) {
737#endif
738				printf("pf: src_tree insert failed: ");
739				pf_print_host(&(*sn)->addr, 0, af);
740				printf("\n");
741			}
742#ifdef __FreeBSD__
743			pool_put(&V_pf_src_tree_pl, *sn);
744#else
745			pool_put(&pf_src_tree_pl, *sn);
746#endif
747			return (-1);
748		}
749		(*sn)->creation = time_second;
750		(*sn)->ruletype = rule->action;
751		if ((*sn)->rule.ptr != NULL)
752			(*sn)->rule.ptr->src_nodes++;
753#ifdef __FreeBSD__
754		V_pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
755		V_pf_status.src_nodes++;
756#else
757		pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
758		pf_status.src_nodes++;
759#endif
760	} else {
761		if (rule->max_src_states &&
762		    (*sn)->states >= rule->max_src_states) {
763#ifdef __FreeBSD__
764			V_pf_status.lcounters[LCNT_SRCSTATES]++;
765#else
766			pf_status.lcounters[LCNT_SRCSTATES]++;
767#endif
768			return (-1);
769		}
770	}
771	return (0);
772}
773
774/* state table stuff */
775
776static __inline int
777pf_state_compare_key(struct pf_state_key *a, struct pf_state_key *b)
778{
779	int	diff;
780
781	if ((diff = a->proto - b->proto) != 0)
782		return (diff);
783	if ((diff = a->af - b->af) != 0)
784		return (diff);
785	switch (a->af) {
786#ifdef INET
787	case AF_INET:
788		if (a->addr[0].addr32[0] > b->addr[0].addr32[0])
789			return (1);
790		if (a->addr[0].addr32[0] < b->addr[0].addr32[0])
791			return (-1);
792		if (a->addr[1].addr32[0] > b->addr[1].addr32[0])
793			return (1);
794		if (a->addr[1].addr32[0] < b->addr[1].addr32[0])
795			return (-1);
796		break;
797#endif /* INET */
798#ifdef INET6
799	case AF_INET6:
800		if (a->addr[0].addr32[3] > b->addr[0].addr32[3])
801			return (1);
802		if (a->addr[0].addr32[3] < b->addr[0].addr32[3])
803			return (-1);
804		if (a->addr[1].addr32[3] > b->addr[1].addr32[3])
805			return (1);
806		if (a->addr[1].addr32[3] < b->addr[1].addr32[3])
807			return (-1);
808		if (a->addr[0].addr32[2] > b->addr[0].addr32[2])
809			return (1);
810		if (a->addr[0].addr32[2] < b->addr[0].addr32[2])
811			return (-1);
812		if (a->addr[1].addr32[2] > b->addr[1].addr32[2])
813			return (1);
814		if (a->addr[1].addr32[2] < b->addr[1].addr32[2])
815			return (-1);
816		if (a->addr[0].addr32[1] > b->addr[0].addr32[1])
817			return (1);
818		if (a->addr[0].addr32[1] < b->addr[0].addr32[1])
819			return (-1);
820		if (a->addr[1].addr32[1] > b->addr[1].addr32[1])
821			return (1);
822		if (a->addr[1].addr32[1] < b->addr[1].addr32[1])
823			return (-1);
824		if (a->addr[0].addr32[0] > b->addr[0].addr32[0])
825			return (1);
826		if (a->addr[0].addr32[0] < b->addr[0].addr32[0])
827			return (-1);
828		if (a->addr[1].addr32[0] > b->addr[1].addr32[0])
829			return (1);
830		if (a->addr[1].addr32[0] < b->addr[1].addr32[0])
831			return (-1);
832		break;
833#endif /* INET6 */
834	}
835
836	if ((diff = a->port[0] - b->port[0]) != 0)
837		return (diff);
838	if ((diff = a->port[1] - b->port[1]) != 0)
839		return (diff);
840
841	return (0);
842}
843
844static __inline int
845pf_state_compare_id(struct pf_state *a, struct pf_state *b)
846{
847	if (a->id > b->id)
848		return (1);
849	if (a->id < b->id)
850		return (-1);
851	if (a->creatorid > b->creatorid)
852		return (1);
853	if (a->creatorid < b->creatorid)
854		return (-1);
855
856	return (0);
857}
858
859int
860pf_state_key_attach(struct pf_state_key *sk, struct pf_state *s, int idx)
861{
862	struct pf_state_item	*si;
863	struct pf_state_key	*cur;
864	struct pf_state		*olds = NULL;
865
866#ifdef __FreeBSD__
867	KASSERT(s->key[idx] == NULL, ("%s: key is null!", __FUNCTION__));
868#else
869	KASSERT(s->key[idx] == NULL);	/* XXX handle this? */
870#endif
871
872#ifdef __FreeBSD__
873	if ((cur = RB_INSERT(pf_state_tree, &V_pf_statetbl, sk)) != NULL) {
874#else
875	if ((cur = RB_INSERT(pf_state_tree, &pf_statetbl, sk)) != NULL) {
876#endif
877		/* key exists. check for same kif, if none, add to key */
878		TAILQ_FOREACH(si, &cur->states, entry)
879			if (si->s->kif == s->kif &&
880			    si->s->direction == s->direction) {
881				if (sk->proto == IPPROTO_TCP &&
882				    si->s->src.state >= TCPS_FIN_WAIT_2 &&
883				    si->s->dst.state >= TCPS_FIN_WAIT_2) {
884					si->s->src.state = si->s->dst.state =
885					    TCPS_CLOSED;
886					/* unlink late or sks can go away */
887					olds = si->s;
888				} else {
889#ifdef __FreeBSD__
890					if (V_pf_status.debug >= PF_DEBUG_MISC) {
891#else
892					if (pf_status.debug >= PF_DEBUG_MISC) {
893#endif
894						printf("pf: %s key attach "
895						    "failed on %s: ",
896						    (idx == PF_SK_WIRE) ?
897						    "wire" : "stack",
898						    s->kif->pfik_name);
899						pf_print_state_parts(s,
900						    (idx == PF_SK_WIRE) ?
901						    sk : NULL,
902						    (idx == PF_SK_STACK) ?
903						    sk : NULL);
904						printf(", existing: ");
905						pf_print_state_parts(si->s,
906						    (idx == PF_SK_WIRE) ?
907						    sk : NULL,
908						    (idx == PF_SK_STACK) ?
909						    sk : NULL);
910						printf("\n");
911					}
912#ifdef __FreeBSD__
913					pool_put(&V_pf_state_key_pl, sk);
914#else
915					pool_put(&pf_state_key_pl, sk);
916#endif
917					return (-1);	/* collision! */
918				}
919			}
920#ifdef __FreeBSD__
921		pool_put(&V_pf_state_key_pl, sk);
922#else
923		pool_put(&pf_state_key_pl, sk);
924#endif
925		s->key[idx] = cur;
926	} else
927		s->key[idx] = sk;
928
929#ifdef __FreeBSD__
930	if ((si = pool_get(&V_pf_state_item_pl, PR_NOWAIT)) == NULL) {
931#else
932	if ((si = pool_get(&pf_state_item_pl, PR_NOWAIT)) == NULL) {
933#endif
934		pf_state_key_detach(s, idx);
935		return (-1);
936	}
937	si->s = s;
938
939	/* list is sorted, if-bound states before floating */
940#ifdef __FreeBSD__
941	if (s->kif == V_pfi_all)
942#else
943	if (s->kif == pfi_all)
944#endif
945		TAILQ_INSERT_TAIL(&s->key[idx]->states, si, entry);
946	else
947		TAILQ_INSERT_HEAD(&s->key[idx]->states, si, entry);
948
949	if (olds)
950		pf_unlink_state(olds);
951
952	return (0);
953}
954
955void
956pf_detach_state(struct pf_state *s)
957{
958	if (s->key[PF_SK_WIRE] == s->key[PF_SK_STACK])
959		s->key[PF_SK_WIRE] = NULL;
960
961	if (s->key[PF_SK_STACK] != NULL)
962		pf_state_key_detach(s, PF_SK_STACK);
963
964	if (s->key[PF_SK_WIRE] != NULL)
965		pf_state_key_detach(s, PF_SK_WIRE);
966}
967
968void
969pf_state_key_detach(struct pf_state *s, int idx)
970{
971	struct pf_state_item	*si;
972
973	si = TAILQ_FIRST(&s->key[idx]->states);
974	while (si && si->s != s)
975	    si = TAILQ_NEXT(si, entry);
976
977	if (si) {
978		TAILQ_REMOVE(&s->key[idx]->states, si, entry);
979#ifdef __FreeBSD__
980		pool_put(&V_pf_state_item_pl, si);
981#else
982		pool_put(&pf_state_item_pl, si);
983#endif
984	}
985
986	if (TAILQ_EMPTY(&s->key[idx]->states)) {
987#ifdef __FreeBSD__
988		RB_REMOVE(pf_state_tree, &V_pf_statetbl, s->key[idx]);
989#else
990		RB_REMOVE(pf_state_tree, &pf_statetbl, s->key[idx]);
991#endif
992		if (s->key[idx]->reverse)
993			s->key[idx]->reverse->reverse = NULL;
994#ifdef __FreeBSD__
995	/* XXX: implement this */
996#else
997		if (s->key[idx]->inp)
998			s->key[idx]->inp->inp_pf_sk = NULL;
999#endif
1000#ifdef __FreeBSD__
1001		pool_put(&V_pf_state_key_pl, s->key[idx]);
1002#else
1003		pool_put(&pf_state_key_pl, s->key[idx]);
1004#endif
1005	}
1006	s->key[idx] = NULL;
1007}
1008
1009struct pf_state_key *
1010pf_alloc_state_key(int pool_flags)
1011{
1012	struct pf_state_key	*sk;
1013
1014#ifdef __FreeBSD__
1015	if ((sk = pool_get(&V_pf_state_key_pl, pool_flags)) == NULL)
1016#else
1017	if ((sk = pool_get(&pf_state_key_pl, pool_flags)) == NULL)
1018#endif
1019		return (NULL);
1020	TAILQ_INIT(&sk->states);
1021
1022	return (sk);
1023}
1024
1025int
1026pf_state_key_setup(struct pf_pdesc *pd, struct pf_rule *nr,
1027	struct pf_state_key **skw, struct pf_state_key **sks,
1028	struct pf_state_key **skp, struct pf_state_key **nkp,
1029	struct pf_addr *saddr, struct pf_addr *daddr,
1030	u_int16_t sport, u_int16_t dport)
1031{
1032#ifdef __FreeBSD__
1033	KASSERT((*skp == NULL && *nkp == NULL),
1034		("%s: skp == NULL && nkp == NULL", __FUNCTION__));
1035#else
1036	KASSERT((*skp == NULL && *nkp == NULL));
1037#endif
1038
1039	if ((*skp = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL)
1040		return (ENOMEM);
1041
1042	PF_ACPY(&(*skp)->addr[pd->sidx], saddr, pd->af);
1043	PF_ACPY(&(*skp)->addr[pd->didx], daddr, pd->af);
1044	(*skp)->port[pd->sidx] = sport;
1045	(*skp)->port[pd->didx] = dport;
1046	(*skp)->proto = pd->proto;
1047	(*skp)->af = pd->af;
1048
1049	if (nr != NULL) {
1050		if ((*nkp = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL)
1051			return (ENOMEM); /* caller must handle cleanup */
1052
1053		/* XXX maybe just bcopy and TAILQ_INIT(&(*nkp)->states) */
1054		PF_ACPY(&(*nkp)->addr[0], &(*skp)->addr[0], pd->af);
1055		PF_ACPY(&(*nkp)->addr[1], &(*skp)->addr[1], pd->af);
1056		(*nkp)->port[0] = (*skp)->port[0];
1057		(*nkp)->port[1] = (*skp)->port[1];
1058		(*nkp)->proto = pd->proto;
1059		(*nkp)->af = pd->af;
1060	} else
1061		*nkp = *skp;
1062
1063	if (pd->dir == PF_IN) {
1064		*skw = *skp;
1065		*sks = *nkp;
1066	} else {
1067		*sks = *skp;
1068		*skw = *nkp;
1069	}
1070	return (0);
1071}
1072
1073
1074int
1075pf_state_insert(struct pfi_kif *kif, struct pf_state_key *skw,
1076    struct pf_state_key *sks, struct pf_state *s)
1077{
1078#ifndef __FreeBSD__
1079	splassert(IPL_SOFTNET);
1080#endif
1081
1082	s->kif = kif;
1083
1084	if (skw == sks) {
1085		if (pf_state_key_attach(skw, s, PF_SK_WIRE))
1086			return (-1);
1087		s->key[PF_SK_STACK] = s->key[PF_SK_WIRE];
1088	} else {
1089		if (pf_state_key_attach(skw, s, PF_SK_WIRE)) {
1090#ifdef __FreeBSD__
1091			pool_put(&V_pf_state_key_pl, sks);
1092#else
1093			pool_put(&pf_state_key_pl, sks);
1094#endif
1095			return (-1);
1096		}
1097		if (pf_state_key_attach(sks, s, PF_SK_STACK)) {
1098			pf_state_key_detach(s, PF_SK_WIRE);
1099			return (-1);
1100		}
1101	}
1102
1103	if (s->id == 0 && s->creatorid == 0) {
1104#ifdef __FreeBSD__
1105		s->id = htobe64(V_pf_status.stateid++);
1106		s->creatorid = V_pf_status.hostid;
1107#else
1108		s->id = htobe64(pf_status.stateid++);
1109		s->creatorid = pf_status.hostid;
1110#endif
1111	}
1112#ifdef __FreeBSD__
1113	if (RB_INSERT(pf_state_tree_id, &V_tree_id, s) != NULL) {
1114		if (V_pf_status.debug >= PF_DEBUG_MISC) {
1115#else
1116	if (RB_INSERT(pf_state_tree_id, &tree_id, s) != NULL) {
1117		if (pf_status.debug >= PF_DEBUG_MISC) {
1118#endif
1119			printf("pf: state insert failed: "
1120			    "id: %016llx creatorid: %08x",
1121#ifdef __FreeBSD__
1122			    (unsigned long long)betoh64(s->id), ntohl(s->creatorid));
1123#else
1124			    betoh64(s->id), ntohl(s->creatorid));
1125#endif
1126			printf("\n");
1127		}
1128		pf_detach_state(s);
1129		return (-1);
1130	}
1131#ifdef __FreeBSD__
1132	TAILQ_INSERT_TAIL(&V_state_list, s, entry_list);
1133	V_pf_status.fcounters[FCNT_STATE_INSERT]++;
1134	V_pf_status.states++;
1135#else
1136	TAILQ_INSERT_TAIL(&state_list, s, entry_list);
1137	pf_status.fcounters[FCNT_STATE_INSERT]++;
1138	pf_status.states++;
1139#endif
1140	pfi_kif_ref(kif, PFI_KIF_REF_STATE);
1141#if NPFSYNC > 0
1142#ifdef __FreeBSD__
1143	if (pfsync_insert_state_ptr != NULL)
1144		pfsync_insert_state_ptr(s);
1145#else
1146	pfsync_insert_state(s);
1147#endif
1148#endif
1149	return (0);
1150}
1151
1152struct pf_state *
1153pf_find_state_byid(struct pf_state_cmp *key)
1154{
1155#ifdef __FreeBSD__
1156	V_pf_status.fcounters[FCNT_STATE_SEARCH]++;
1157
1158	return (RB_FIND(pf_state_tree_id, &V_tree_id, (struct pf_state *)key));
1159#else
1160	pf_status.fcounters[FCNT_STATE_SEARCH]++;
1161
1162	return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key));
1163#endif
1164}
1165
1166/* XXX debug function, intended to be removed one day */
1167int
1168pf_compare_state_keys(struct pf_state_key *a, struct pf_state_key *b,
1169    struct pfi_kif *kif, u_int dir)
1170{
1171	/* a (from hdr) and b (new) must be exact opposites of each other */
1172	if (a->af == b->af && a->proto == b->proto &&
1173	    PF_AEQ(&a->addr[0], &b->addr[1], a->af) &&
1174	    PF_AEQ(&a->addr[1], &b->addr[0], a->af) &&
1175	    a->port[0] == b->port[1] &&
1176	    a->port[1] == b->port[0])
1177		return (0);
1178	else {
1179		/* mismatch. must not happen. */
1180		printf("pf: state key linking mismatch! dir=%s, "
1181		    "if=%s, stored af=%u, a0: ",
1182		    dir == PF_OUT ? "OUT" : "IN", kif->pfik_name, a->af);
1183		pf_print_host(&a->addr[0], a->port[0], a->af);
1184		printf(", a1: ");
1185		pf_print_host(&a->addr[1], a->port[1], a->af);
1186		printf(", proto=%u", a->proto);
1187		printf(", found af=%u, a0: ", b->af);
1188		pf_print_host(&b->addr[0], b->port[0], b->af);
1189		printf(", a1: ");
1190		pf_print_host(&b->addr[1], b->port[1], b->af);
1191		printf(", proto=%u", b->proto);
1192		printf(".\n");
1193		return (-1);
1194	}
1195}
1196
1197struct pf_state *
1198#ifdef __FreeBSD__
1199pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir,
1200    struct mbuf *m, struct pf_mtag *pftag)
1201#else
1202pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir,
1203    struct mbuf *m)
1204#endif
1205{
1206	struct pf_state_key	*sk;
1207	struct pf_state_item	*si;
1208
1209#ifdef __FreeBSD__
1210	V_pf_status.fcounters[FCNT_STATE_SEARCH]++;
1211#else
1212	pf_status.fcounters[FCNT_STATE_SEARCH]++;
1213#endif
1214
1215#ifdef __FreeBSD__
1216	if (dir == PF_OUT && pftag->statekey &&
1217	    ((struct pf_state_key *)pftag->statekey)->reverse)
1218		sk = ((struct pf_state_key *)pftag->statekey)->reverse;
1219	else {
1220#ifdef __FreeBSD__
1221		if ((sk = RB_FIND(pf_state_tree, &V_pf_statetbl,
1222#else
1223		if ((sk = RB_FIND(pf_state_tree, &pf_statetbl,
1224#endif
1225		    (struct pf_state_key *)key)) == NULL)
1226			return (NULL);
1227		if (dir == PF_OUT && pftag->statekey &&
1228		    pf_compare_state_keys(pftag->statekey, sk,
1229		    kif, dir) == 0) {
1230			((struct pf_state_key *)
1231			    pftag->statekey)->reverse = sk;
1232			sk->reverse = pftag->statekey;
1233		}
1234	}
1235#else
1236	if (dir == PF_OUT && m->m_pkthdr.pf.statekey &&
1237	    ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->reverse)
1238		sk = ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->reverse;
1239	else {
1240#ifdef __FreeBSD__
1241		if ((sk = RB_FIND(pf_state_tree, &V_pf_statetbl,
1242#else
1243		if ((sk = RB_FIND(pf_state_tree, &pf_statetbl,
1244#endif
1245		    (struct pf_state_key *)key)) == NULL)
1246			return (NULL);
1247		if (dir == PF_OUT && m->m_pkthdr.pf.statekey &&
1248		    pf_compare_state_keys(m->m_pkthdr.pf.statekey, sk,
1249		    kif, dir) == 0) {
1250			((struct pf_state_key *)
1251			    m->m_pkthdr.pf.statekey)->reverse = sk;
1252			sk->reverse = m->m_pkthdr.pf.statekey;
1253		}
1254	}
1255#endif
1256
1257	if (dir == PF_OUT)
1258#ifdef __FreeBSD__
1259		pftag->statekey = NULL;
1260#else
1261		m->m_pkthdr.pf.statekey = NULL;
1262#endif
1263
1264	/* list is sorted, if-bound states before floating ones */
1265	TAILQ_FOREACH(si, &sk->states, entry)
1266#ifdef __FreeBSD__
1267		if ((si->s->kif == V_pfi_all || si->s->kif == kif) &&
1268#else
1269		if ((si->s->kif == pfi_all || si->s->kif == kif) &&
1270#endif
1271		    sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] :
1272		    si->s->key[PF_SK_STACK]))
1273			return (si->s);
1274
1275	return (NULL);
1276}
1277
1278struct pf_state *
1279pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more)
1280{
1281	struct pf_state_key	*sk;
1282	struct pf_state_item	*si, *ret = NULL;
1283
1284#ifdef __FreeBSD__
1285	V_pf_status.fcounters[FCNT_STATE_SEARCH]++;
1286#else
1287	pf_status.fcounters[FCNT_STATE_SEARCH]++;
1288#endif
1289
1290#ifdef __FreeBSD__
1291	sk = RB_FIND(pf_state_tree, &V_pf_statetbl, (struct pf_state_key *)key);
1292#else
1293	sk = RB_FIND(pf_state_tree, &pf_statetbl, (struct pf_state_key *)key);
1294#endif
1295	if (sk != NULL) {
1296		TAILQ_FOREACH(si, &sk->states, entry)
1297			if (dir == PF_INOUT ||
1298			    (sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] :
1299			    si->s->key[PF_SK_STACK]))) {
1300				if (more == NULL)
1301					return (si->s);
1302
1303				if (ret)
1304					(*more)++;
1305				else
1306					ret = si;
1307			}
1308	}
1309	return (ret ? ret->s : NULL);
1310}
1311
1312/* END state table stuff */
1313
1314
1315void
1316pf_purge_thread(void *v)
1317{
1318	int nloops = 0, s;
1319#ifdef __FreeBSD__
1320	int locked;
1321#endif
1322
1323	CURVNET_SET((struct vnet *)v);
1324
1325	for (;;) {
1326		tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz);
1327
1328#ifdef __FreeBSD__
1329		sx_slock(&V_pf_consistency_lock);
1330		PF_LOCK();
1331		locked = 0;
1332
1333		if (V_pf_end_threads) {
1334			PF_UNLOCK();
1335			sx_sunlock(&V_pf_consistency_lock);
1336			sx_xlock(&V_pf_consistency_lock);
1337			PF_LOCK();
1338
1339			pf_purge_expired_states(V_pf_status.states, 1);
1340			pf_purge_expired_fragments();
1341			pf_purge_expired_src_nodes(1);
1342			V_pf_end_threads++;
1343
1344			sx_xunlock(&V_pf_consistency_lock);
1345			PF_UNLOCK();
1346			wakeup(pf_purge_thread);
1347			kproc_exit(0);
1348		}
1349#endif
1350		s = splsoftnet();
1351
1352		/* process a fraction of the state table every second */
1353#ifdef __FreeBSD__
1354		if (!pf_purge_expired_states(1 + (V_pf_status.states /
1355		    V_pf_default_rule.timeout[PFTM_INTERVAL]), 0)) {
1356			PF_UNLOCK();
1357			sx_sunlock(&V_pf_consistency_lock);
1358			sx_xlock(&V_pf_consistency_lock);
1359			PF_LOCK();
1360			locked = 1;
1361
1362			pf_purge_expired_states(1 + (V_pf_status.states /
1363			    V_pf_default_rule.timeout[PFTM_INTERVAL]), 1);
1364		}
1365#else
1366		pf_purge_expired_states(1 + (pf_status.states
1367		    / pf_default_rule.timeout[PFTM_INTERVAL]));
1368#endif
1369
1370		/* purge other expired types every PFTM_INTERVAL seconds */
1371#ifdef __FreeBSD__
1372		if (++nloops >= V_pf_default_rule.timeout[PFTM_INTERVAL]) {
1373#else
1374		if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) {
1375#endif
1376			pf_purge_expired_fragments();
1377			pf_purge_expired_src_nodes(0);
1378			nloops = 0;
1379		}
1380
1381		splx(s);
1382#ifdef __FreeBSD__
1383		PF_UNLOCK();
1384		if (locked)
1385			sx_xunlock(&V_pf_consistency_lock);
1386		else
1387			sx_sunlock(&V_pf_consistency_lock);
1388#endif
1389	}
1390	CURVNET_RESTORE();
1391}
1392
1393u_int32_t
1394pf_state_expires(const struct pf_state *state)
1395{
1396	u_int32_t	timeout;
1397	u_int32_t	start;
1398	u_int32_t	end;
1399	u_int32_t	states;
1400
1401	/* handle all PFTM_* > PFTM_MAX here */
1402	if (state->timeout == PFTM_PURGE)
1403		return (time_second);
1404	if (state->timeout == PFTM_UNTIL_PACKET)
1405		return (0);
1406#ifdef __FreeBSD__
1407	KASSERT(state->timeout != PFTM_UNLINKED,
1408	    ("pf_state_expires: timeout == PFTM_UNLINKED"));
1409	KASSERT((state->timeout < PFTM_MAX),
1410	    ("pf_state_expires: timeout > PFTM_MAX"));
1411#else
1412	KASSERT(state->timeout != PFTM_UNLINKED);
1413	KASSERT(state->timeout < PFTM_MAX);
1414#endif
1415	timeout = state->rule.ptr->timeout[state->timeout];
1416	if (!timeout)
1417#ifdef __FreeBSD__
1418		timeout = V_pf_default_rule.timeout[state->timeout];
1419#else
1420		timeout = pf_default_rule.timeout[state->timeout];
1421#endif
1422	start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
1423	if (start) {
1424		end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
1425		states = state->rule.ptr->states_cur;
1426	} else {
1427#ifdef __FreeBSD__
1428		start = V_pf_default_rule.timeout[PFTM_ADAPTIVE_START];
1429		end = V_pf_default_rule.timeout[PFTM_ADAPTIVE_END];
1430		states = V_pf_status.states;
1431#else
1432		start = pf_default_rule.timeout[PFTM_ADAPTIVE_START];
1433		end = pf_default_rule.timeout[PFTM_ADAPTIVE_END];
1434		states = pf_status.states;
1435#endif
1436	}
1437	if (end && states > start && start < end) {
1438		if (states < end)
1439			return (state->expire + timeout * (end - states) /
1440			    (end - start));
1441		else
1442			return (time_second);
1443	}
1444	return (state->expire + timeout);
1445}
1446
1447#ifdef __FreeBSD__
1448int
1449pf_purge_expired_src_nodes(int waslocked)
1450#else
1451void
1452pf_purge_expired_src_nodes(int waslocked)
1453#endif
1454{
1455	struct pf_src_node		*cur, *next;
1456	int				 locked = waslocked;
1457
1458#ifdef __FreeBSD__
1459	for (cur = RB_MIN(pf_src_tree, &V_tree_src_tracking); cur; cur = next) {
1460	next = RB_NEXT(pf_src_tree, &V_tree_src_tracking, cur);
1461#else
1462	for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) {
1463	next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur);
1464#endif
1465
1466		if (cur->states <= 0 && cur->expire <= time_second) {
1467			if (! locked) {
1468#ifdef __FreeBSD__
1469				if (!sx_try_upgrade(&V_pf_consistency_lock))
1470					return (0);
1471#else
1472				rw_enter_write(&pf_consistency_lock);
1473#endif
1474				next = RB_NEXT(pf_src_tree,
1475#ifdef __FreeBSD__
1476				    &V_tree_src_tracking, cur);
1477#else
1478				    &tree_src_tracking, cur);
1479#endif
1480				locked = 1;
1481			}
1482			if (cur->rule.ptr != NULL) {
1483				cur->rule.ptr->src_nodes--;
1484				if (cur->rule.ptr->states_cur <= 0 &&
1485				    cur->rule.ptr->max_src_nodes <= 0)
1486					pf_rm_rule(NULL, cur->rule.ptr);
1487			}
1488#ifdef __FreeBSD__
1489			RB_REMOVE(pf_src_tree, &V_tree_src_tracking, cur);
1490			V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
1491			V_pf_status.src_nodes--;
1492			pool_put(&V_pf_src_tree_pl, cur);
1493#else
1494			RB_REMOVE(pf_src_tree, &tree_src_tracking, cur);
1495			pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
1496			pf_status.src_nodes--;
1497			pool_put(&pf_src_tree_pl, cur);
1498#endif
1499		}
1500	}
1501
1502	if (locked && !waslocked)
1503#ifdef __FreeBSD__
1504	{
1505		sx_downgrade(&V_pf_consistency_lock);
1506	}
1507	return (1);
1508#else
1509		rw_exit_write(&pf_consistency_lock);
1510#endif
1511}
1512
1513void
1514pf_src_tree_remove_state(struct pf_state *s)
1515{
1516	u_int32_t timeout;
1517
1518	if (s->src_node != NULL) {
1519		if (s->src.tcp_est)
1520			--s->src_node->conn;
1521		if (--s->src_node->states <= 0) {
1522			timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
1523			if (!timeout)
1524				timeout =
1525#ifdef __FreeBSD__
1526				    V_pf_default_rule.timeout[PFTM_SRC_NODE];
1527#else
1528				    pf_default_rule.timeout[PFTM_SRC_NODE];
1529#endif
1530			s->src_node->expire = time_second + timeout;
1531		}
1532	}
1533	if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
1534		if (--s->nat_src_node->states <= 0) {
1535			timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
1536			if (!timeout)
1537				timeout =
1538#ifdef __FreeBSD__
1539				    V_pf_default_rule.timeout[PFTM_SRC_NODE];
1540#else
1541				    pf_default_rule.timeout[PFTM_SRC_NODE];
1542#endif
1543			s->nat_src_node->expire = time_second + timeout;
1544		}
1545	}
1546	s->src_node = s->nat_src_node = NULL;
1547}
1548
1549/* callers should be at splsoftnet */
1550void
1551pf_unlink_state(struct pf_state *cur)
1552{
1553#ifdef __FreeBSD__
1554	if (cur->local_flags & PFSTATE_EXPIRING)
1555		return;
1556	cur->local_flags |= PFSTATE_EXPIRING;
1557#else
1558	splassert(IPL_SOFTNET);
1559#endif
1560
1561	if (cur->src.state == PF_TCPS_PROXY_DST) {
1562		/* XXX wire key the right one? */
1563#ifdef __FreeBSD__
1564		pf_send_tcp(NULL, cur->rule.ptr, cur->key[PF_SK_WIRE]->af,
1565#else
1566		pf_send_tcp(cur->rule.ptr, cur->key[PF_SK_WIRE]->af,
1567#endif
1568		    &cur->key[PF_SK_WIRE]->addr[1],
1569		    &cur->key[PF_SK_WIRE]->addr[0],
1570		    cur->key[PF_SK_WIRE]->port[1],
1571		    cur->key[PF_SK_WIRE]->port[0],
1572		    cur->src.seqhi, cur->src.seqlo + 1,
1573		    TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL);
1574	}
1575#ifdef __FreeBSD__
1576	RB_REMOVE(pf_state_tree_id, &V_tree_id, cur);
1577#else
1578	RB_REMOVE(pf_state_tree_id, &tree_id, cur);
1579#endif
1580#if NPFLOW > 0
1581	if (cur->state_flags & PFSTATE_PFLOW)
1582#ifdef __FreeBSD__
1583		if (export_pflow_ptr != NULL)
1584			export_pflow_ptr(cur);
1585#else
1586		export_pflow(cur);
1587#endif
1588#endif
1589#if NPFSYNC > 0
1590#ifdef __FreeBSD__
1591	if (pfsync_delete_state_ptr != NULL)
1592		pfsync_delete_state_ptr(cur);
1593#else
1594	pfsync_delete_state(cur);
1595#endif
1596#endif
1597	cur->timeout = PFTM_UNLINKED;
1598	pf_src_tree_remove_state(cur);
1599	pf_detach_state(cur);
1600}
1601
1602/* callers should be at splsoftnet and hold the
1603 * write_lock on pf_consistency_lock */
1604void
1605pf_free_state(struct pf_state *cur)
1606{
1607#ifndef __FreeBSD__
1608	splassert(IPL_SOFTNET);
1609#endif
1610
1611#if NPFSYNC > 0
1612#ifdef __FreeBSD__
1613	if (pfsync_state_in_use_ptr != NULL &&
1614		pfsync_state_in_use_ptr(cur))
1615#else
1616	if (pfsync_state_in_use(cur))
1617#endif
1618		return;
1619#endif
1620#ifdef __FreeBSD__
1621	KASSERT(cur->timeout == PFTM_UNLINKED,
1622	    ("pf_free_state: cur->timeout != PFTM_UNLINKED"));
1623#else
1624	KASSERT(cur->timeout == PFTM_UNLINKED);
1625#endif
1626	if (--cur->rule.ptr->states_cur <= 0 &&
1627	    cur->rule.ptr->src_nodes <= 0)
1628		pf_rm_rule(NULL, cur->rule.ptr);
1629	if (cur->nat_rule.ptr != NULL)
1630		if (--cur->nat_rule.ptr->states_cur <= 0 &&
1631			cur->nat_rule.ptr->src_nodes <= 0)
1632			pf_rm_rule(NULL, cur->nat_rule.ptr);
1633	if (cur->anchor.ptr != NULL)
1634		if (--cur->anchor.ptr->states_cur <= 0)
1635			pf_rm_rule(NULL, cur->anchor.ptr);
1636	pf_normalize_tcp_cleanup(cur);
1637	pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE);
1638#ifdef __FreeBSD__
1639	TAILQ_REMOVE(&V_state_list, cur, entry_list);
1640#else
1641	TAILQ_REMOVE(&state_list, cur, entry_list);
1642#endif
1643	if (cur->tag)
1644		pf_tag_unref(cur->tag);
1645#ifdef __FreeBSD__
1646	pool_put(&V_pf_state_pl, cur);
1647	V_pf_status.fcounters[FCNT_STATE_REMOVALS]++;
1648	V_pf_status.states--;
1649#else
1650	pool_put(&pf_state_pl, cur);
1651	pf_status.fcounters[FCNT_STATE_REMOVALS]++;
1652	pf_status.states--;
1653#endif
1654}
1655
1656#ifdef __FreeBSD__
1657int
1658pf_purge_expired_states(u_int32_t maxcheck, int waslocked)
1659#else
1660void
1661pf_purge_expired_states(u_int32_t maxcheck)
1662#endif
1663{
1664	static struct pf_state	*cur = NULL;
1665	struct pf_state		*next;
1666#ifdef __FreeBSD__
1667	int			 locked = waslocked;
1668#else
1669	int			 locked = 0;
1670#endif
1671
1672	while (maxcheck--) {
1673		/* wrap to start of list when we hit the end */
1674		if (cur == NULL) {
1675#ifdef __FreeBSD__
1676			cur = TAILQ_FIRST(&V_state_list);
1677#else
1678			cur = TAILQ_FIRST(&state_list);
1679#endif
1680			if (cur == NULL)
1681				break;	/* list empty */
1682		}
1683
1684		/* get next state, as cur may get deleted */
1685		next = TAILQ_NEXT(cur, entry_list);
1686
1687		if (cur->timeout == PFTM_UNLINKED) {
1688			/* free unlinked state */
1689			if (! locked) {
1690#ifdef __FreeBSD__
1691				if (!sx_try_upgrade(&V_pf_consistency_lock))
1692					return (0);
1693#else
1694				rw_enter_write(&pf_consistency_lock);
1695#endif
1696				locked = 1;
1697			}
1698			pf_free_state(cur);
1699		} else if (pf_state_expires(cur) <= time_second) {
1700			/* unlink and free expired state */
1701			pf_unlink_state(cur);
1702			if (! locked) {
1703#ifdef __FreeBSD__
1704				if (!sx_try_upgrade(&V_pf_consistency_lock))
1705					return (0);
1706#else
1707				rw_enter_write(&pf_consistency_lock);
1708#endif
1709				locked = 1;
1710			}
1711			pf_free_state(cur);
1712		}
1713		cur = next;
1714	}
1715
1716#ifdef __FreeBSD__
1717	if (!waslocked && locked)
1718		sx_downgrade(&V_pf_consistency_lock);
1719
1720	return (1);
1721#else
1722	if (locked)
1723		rw_exit_write(&pf_consistency_lock);
1724#endif
1725}
1726
1727int
1728pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw)
1729{
1730	if (aw->type != PF_ADDR_TABLE)
1731		return (0);
1732	if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname, 1)) == NULL)
1733		return (1);
1734	return (0);
1735}
1736
1737void
1738pf_tbladdr_remove(struct pf_addr_wrap *aw)
1739{
1740	if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL)
1741		return;
1742	pfr_detach_table(aw->p.tbl);
1743	aw->p.tbl = NULL;
1744}
1745
1746void
1747pf_tbladdr_copyout(struct pf_addr_wrap *aw)
1748{
1749	struct pfr_ktable *kt = aw->p.tbl;
1750
1751	if (aw->type != PF_ADDR_TABLE || kt == NULL)
1752		return;
1753	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
1754		kt = kt->pfrkt_root;
1755	aw->p.tbl = NULL;
1756	aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ?
1757		kt->pfrkt_cnt : -1;
1758}
1759
1760void
1761pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
1762{
1763	switch (af) {
1764#ifdef INET
1765	case AF_INET: {
1766		u_int32_t a = ntohl(addr->addr32[0]);
1767		printf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255,
1768		    (a>>8)&255, a&255);
1769		if (p) {
1770			p = ntohs(p);
1771			printf(":%u", p);
1772		}
1773		break;
1774	}
1775#endif /* INET */
1776#ifdef INET6
1777	case AF_INET6: {
1778		u_int16_t b;
1779		u_int8_t i, curstart, curend, maxstart, maxend;
1780		curstart = curend = maxstart = maxend = 255;
1781		for (i = 0; i < 8; i++) {
1782			if (!addr->addr16[i]) {
1783				if (curstart == 255)
1784					curstart = i;
1785				curend = i;
1786			} else {
1787				if ((curend - curstart) >
1788				    (maxend - maxstart)) {
1789					maxstart = curstart;
1790					maxend = curend;
1791				}
1792				curstart = curend = 255;
1793			}
1794		}
1795		if ((curend - curstart) >
1796		    (maxend - maxstart)) {
1797			maxstart = curstart;
1798			maxend = curend;
1799		}
1800		for (i = 0; i < 8; i++) {
1801			if (i >= maxstart && i <= maxend) {
1802				if (i == 0)
1803					printf(":");
1804				if (i == maxend)
1805					printf(":");
1806			} else {
1807				b = ntohs(addr->addr16[i]);
1808				printf("%x", b);
1809				if (i < 7)
1810					printf(":");
1811			}
1812		}
1813		if (p) {
1814			p = ntohs(p);
1815			printf("[%u]", p);
1816		}
1817		break;
1818	}
1819#endif /* INET6 */
1820	}
1821}
1822
1823void
1824pf_print_state(struct pf_state *s)
1825{
1826	pf_print_state_parts(s, NULL, NULL);
1827}
1828
1829void
1830pf_print_state_parts(struct pf_state *s,
1831    struct pf_state_key *skwp, struct pf_state_key *sksp)
1832{
1833	struct pf_state_key *skw, *sks;
1834	u_int8_t proto, dir;
1835
1836	/* Do our best to fill these, but they're skipped if NULL */
1837	skw = skwp ? skwp : (s ? s->key[PF_SK_WIRE] : NULL);
1838	sks = sksp ? sksp : (s ? s->key[PF_SK_STACK] : NULL);
1839	proto = skw ? skw->proto : (sks ? sks->proto : 0);
1840	dir = s ? s->direction : 0;
1841
1842	switch (proto) {
1843	case IPPROTO_IPV4:
1844		printf("IPv4");
1845		break;
1846	case IPPROTO_IPV6:
1847		printf("IPv6");
1848		break;
1849	case IPPROTO_TCP:
1850		printf("TCP");
1851		break;
1852	case IPPROTO_UDP:
1853		printf("UDP");
1854		break;
1855	case IPPROTO_ICMP:
1856		printf("ICMP");
1857		break;
1858	case IPPROTO_ICMPV6:
1859		printf("ICMPv6");
1860		break;
1861	default:
1862		printf("%u", skw->proto);
1863		break;
1864	}
1865	switch (dir) {
1866	case PF_IN:
1867		printf(" in");
1868		break;
1869	case PF_OUT:
1870		printf(" out");
1871		break;
1872	}
1873	if (skw) {
1874		printf(" wire: ");
1875		pf_print_host(&skw->addr[0], skw->port[0], skw->af);
1876		printf(" ");
1877		pf_print_host(&skw->addr[1], skw->port[1], skw->af);
1878	}
1879	if (sks) {
1880		printf(" stack: ");
1881		if (sks != skw) {
1882			pf_print_host(&sks->addr[0], sks->port[0], sks->af);
1883			printf(" ");
1884			pf_print_host(&sks->addr[1], sks->port[1], sks->af);
1885		} else
1886			printf("-");
1887	}
1888	if (s) {
1889		if (proto == IPPROTO_TCP) {
1890			printf(" [lo=%u high=%u win=%u modulator=%u",
1891			    s->src.seqlo, s->src.seqhi,
1892			    s->src.max_win, s->src.seqdiff);
1893			if (s->src.wscale && s->dst.wscale)
1894				printf(" wscale=%u",
1895				    s->src.wscale & PF_WSCALE_MASK);
1896			printf("]");
1897			printf(" [lo=%u high=%u win=%u modulator=%u",
1898			    s->dst.seqlo, s->dst.seqhi,
1899			    s->dst.max_win, s->dst.seqdiff);
1900			if (s->src.wscale && s->dst.wscale)
1901				printf(" wscale=%u",
1902				s->dst.wscale & PF_WSCALE_MASK);
1903			printf("]");
1904		}
1905		printf(" %u:%u", s->src.state, s->dst.state);
1906	}
1907}
1908
1909void
1910pf_print_flags(u_int8_t f)
1911{
1912	if (f)
1913		printf(" ");
1914	if (f & TH_FIN)
1915		printf("F");
1916	if (f & TH_SYN)
1917		printf("S");
1918	if (f & TH_RST)
1919		printf("R");
1920	if (f & TH_PUSH)
1921		printf("P");
1922	if (f & TH_ACK)
1923		printf("A");
1924	if (f & TH_URG)
1925		printf("U");
1926	if (f & TH_ECE)
1927		printf("E");
1928	if (f & TH_CWR)
1929		printf("W");
1930}
1931
1932#define	PF_SET_SKIP_STEPS(i)					\
1933	do {							\
1934		while (head[i] != cur) {			\
1935			head[i]->skip[i].ptr = cur;		\
1936			head[i] = TAILQ_NEXT(head[i], entries);	\
1937		}						\
1938	} while (0)
1939
1940void
1941pf_calc_skip_steps(struct pf_rulequeue *rules)
1942{
1943	struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT];
1944	int i;
1945
1946	cur = TAILQ_FIRST(rules);
1947	prev = cur;
1948	for (i = 0; i < PF_SKIP_COUNT; ++i)
1949		head[i] = cur;
1950	while (cur != NULL) {
1951
1952		if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
1953			PF_SET_SKIP_STEPS(PF_SKIP_IFP);
1954		if (cur->direction != prev->direction)
1955			PF_SET_SKIP_STEPS(PF_SKIP_DIR);
1956		if (cur->af != prev->af)
1957			PF_SET_SKIP_STEPS(PF_SKIP_AF);
1958		if (cur->proto != prev->proto)
1959			PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
1960		if (cur->src.neg != prev->src.neg ||
1961		    pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr))
1962			PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
1963		if (cur->src.port[0] != prev->src.port[0] ||
1964		    cur->src.port[1] != prev->src.port[1] ||
1965		    cur->src.port_op != prev->src.port_op)
1966			PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
1967		if (cur->dst.neg != prev->dst.neg ||
1968		    pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr))
1969			PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
1970		if (cur->dst.port[0] != prev->dst.port[0] ||
1971		    cur->dst.port[1] != prev->dst.port[1] ||
1972		    cur->dst.port_op != prev->dst.port_op)
1973			PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
1974
1975		prev = cur;
1976		cur = TAILQ_NEXT(cur, entries);
1977	}
1978	for (i = 0; i < PF_SKIP_COUNT; ++i)
1979		PF_SET_SKIP_STEPS(i);
1980}
1981
1982int
1983pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
1984{
1985	if (aw1->type != aw2->type)
1986		return (1);
1987	switch (aw1->type) {
1988	case PF_ADDR_ADDRMASK:
1989	case PF_ADDR_RANGE:
1990		if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, 0))
1991			return (1);
1992		if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0))
1993			return (1);
1994		return (0);
1995	case PF_ADDR_DYNIFTL:
1996		return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt);
1997	case PF_ADDR_NOROUTE:
1998	case PF_ADDR_URPFFAILED:
1999		return (0);
2000	case PF_ADDR_TABLE:
2001		return (aw1->p.tbl != aw2->p.tbl);
2002	case PF_ADDR_RTLABEL:
2003		return (aw1->v.rtlabel != aw2->v.rtlabel);
2004	default:
2005		printf("invalid address type: %d\n", aw1->type);
2006		return (1);
2007	}
2008}
2009
2010/**
2011 * Checksum updates are a little complicated because the checksum in the TCP/UDP
2012 * header isn't always a full checksum. In some cases (i.e. output) it's a
2013 * pseudo-header checksum, which is a partial checksum over src/dst IP
2014 * addresses, protocol number and length.
2015 *
2016 * That means we have the following cases:
2017 *  * Input or forwarding: we don't have TSO, the checksum fields are full
2018 *  	checksums, we need to update the checksum whenever we change anything.
2019 *  * Output (i.e. the checksum is a pseudo-header checksum):
2020 *  	x The field being updated is src/dst address or affects the length of
2021 *  	the packet. We need to update the pseudo-header checksum (note that this
2022 *  	checksum is not ones' complement).
2023 *  	x Some other field is being modified (e.g. src/dst port numbers): We
2024 *  	don't have to update anything.
2025 **/
2026u_int16_t
2027pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
2028{
2029	u_int32_t	l;
2030
2031	if (udp && !cksum)
2032		return (0x0000);
2033	l = cksum + old - new;
2034	l = (l >> 16) + (l & 65535);
2035	l = l & 65535;
2036	if (udp && !l)
2037		return (0xFFFF);
2038	return (l);
2039}
2040
2041u_int16_t
2042pf_proto_cksum_fixup(struct mbuf *m, u_int16_t cksum, u_int16_t old,
2043        u_int16_t new, u_int8_t udp)
2044{
2045	if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6))
2046		return (cksum);
2047
2048	return (pf_cksum_fixup(cksum, old, new, udp));
2049}
2050
2051void
2052pf_change_ap(struct mbuf *m, struct pf_addr *a, u_int16_t *p, u_int16_t *ic,
2053        u_int16_t *pc, struct pf_addr *an, u_int16_t pn, u_int8_t u,
2054        sa_family_t af)
2055{
2056	struct pf_addr	ao;
2057	u_int16_t	po = *p;
2058
2059	PF_ACPY(&ao, a, af);
2060	PF_ACPY(a, an, af);
2061
2062	if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6))
2063		*pc = ~*pc;
2064
2065	*p = pn;
2066
2067	switch (af) {
2068#ifdef INET
2069	case AF_INET:
2070		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
2071		    ao.addr16[0], an->addr16[0], 0),
2072		    ao.addr16[1], an->addr16[1], 0);
2073		*p = pn;
2074
2075		*pc = pf_cksum_fixup(pf_cksum_fixup(*pc,
2076		    ao.addr16[0], an->addr16[0], u),
2077		    ao.addr16[1], an->addr16[1], u);
2078
2079		*pc = pf_proto_cksum_fixup(m, *pc, po, pn, u);
2080		break;
2081#endif /* INET */
2082#ifdef INET6
2083	case AF_INET6:
2084		*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2085		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2086		    pf_cksum_fixup(pf_cksum_fixup(*pc,
2087		    ao.addr16[0], an->addr16[0], u),
2088		    ao.addr16[1], an->addr16[1], u),
2089		    ao.addr16[2], an->addr16[2], u),
2090		    ao.addr16[3], an->addr16[3], u),
2091		    ao.addr16[4], an->addr16[4], u),
2092		    ao.addr16[5], an->addr16[5], u),
2093		    ao.addr16[6], an->addr16[6], u),
2094		    ao.addr16[7], an->addr16[7], u);
2095
2096		*pc = pf_proto_cksum_fixup(m, *pc, po, pn, u);
2097		break;
2098#endif /* INET6 */
2099	}
2100
2101	if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA |
2102	    CSUM_DELAY_DATA_IPV6)) {
2103		*pc = ~*pc;
2104		if (! *pc)
2105			*pc = 0xffff;
2106	}
2107}
2108
2109/* Changes a u_int32_t.  Uses a void * so there are no align restrictions */
2110void
2111pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
2112{
2113	u_int32_t	ao;
2114
2115	memcpy(&ao, a, sizeof(ao));
2116	memcpy(a, &an, sizeof(u_int32_t));
2117	*c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
2118	    ao % 65536, an % 65536, u);
2119}
2120
2121void
2122pf_change_proto_a(struct mbuf *m, void *a, u_int16_t *c, u_int32_t an, u_int8_t udp)
2123{
2124	u_int32_t	ao;
2125
2126	memcpy(&ao, a, sizeof(ao));
2127	memcpy(a, &an, sizeof(u_int32_t));
2128
2129	*c = pf_proto_cksum_fixup(m,
2130	    pf_proto_cksum_fixup(m, *c, ao / 65536, an / 65536, udp),
2131	    ao % 65536, an % 65536, udp);
2132}
2133
2134#ifdef INET6
2135void
2136pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
2137{
2138	struct pf_addr	ao;
2139
2140	PF_ACPY(&ao, a, AF_INET6);
2141	PF_ACPY(a, an, AF_INET6);
2142
2143	*c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2144	    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2145	    pf_cksum_fixup(pf_cksum_fixup(*c,
2146	    ao.addr16[0], an->addr16[0], u),
2147	    ao.addr16[1], an->addr16[1], u),
2148	    ao.addr16[2], an->addr16[2], u),
2149	    ao.addr16[3], an->addr16[3], u),
2150	    ao.addr16[4], an->addr16[4], u),
2151	    ao.addr16[5], an->addr16[5], u),
2152	    ao.addr16[6], an->addr16[6], u),
2153	    ao.addr16[7], an->addr16[7], u);
2154}
2155#endif /* INET6 */
2156
2157void
2158pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
2159    struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
2160    u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
2161{
2162	struct pf_addr	oia, ooa;
2163
2164	PF_ACPY(&oia, ia, af);
2165	if (oa)
2166		PF_ACPY(&ooa, oa, af);
2167
2168	/* Change inner protocol port, fix inner protocol checksum. */
2169	if (ip != NULL) {
2170		u_int16_t	oip = *ip;
2171		u_int32_t	opc;
2172
2173		if (pc != NULL)
2174			opc = *pc;
2175		*ip = np;
2176		if (pc != NULL)
2177			*pc = pf_cksum_fixup(*pc, oip, *ip, u);
2178		*ic = pf_cksum_fixup(*ic, oip, *ip, 0);
2179		if (pc != NULL)
2180			*ic = pf_cksum_fixup(*ic, opc, *pc, 0);
2181	}
2182	/* Change inner ip address, fix inner ip and icmp checksums. */
2183	PF_ACPY(ia, na, af);
2184	switch (af) {
2185#ifdef INET
2186	case AF_INET: {
2187		u_int32_t	 oh2c = *h2c;
2188
2189		*h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
2190		    oia.addr16[0], ia->addr16[0], 0),
2191		    oia.addr16[1], ia->addr16[1], 0);
2192		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
2193		    oia.addr16[0], ia->addr16[0], 0),
2194		    oia.addr16[1], ia->addr16[1], 0);
2195		*ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
2196		break;
2197	}
2198#endif /* INET */
2199#ifdef INET6
2200	case AF_INET6:
2201		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2202		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2203		    pf_cksum_fixup(pf_cksum_fixup(*ic,
2204		    oia.addr16[0], ia->addr16[0], u),
2205		    oia.addr16[1], ia->addr16[1], u),
2206		    oia.addr16[2], ia->addr16[2], u),
2207		    oia.addr16[3], ia->addr16[3], u),
2208		    oia.addr16[4], ia->addr16[4], u),
2209		    oia.addr16[5], ia->addr16[5], u),
2210		    oia.addr16[6], ia->addr16[6], u),
2211		    oia.addr16[7], ia->addr16[7], u);
2212		break;
2213#endif /* INET6 */
2214	}
2215	/* Outer ip address, fix outer ip or icmpv6 checksum, if necessary. */
2216	if (oa) {
2217		PF_ACPY(oa, na, af);
2218		switch (af) {
2219#ifdef INET
2220		case AF_INET:
2221			*hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
2222			    ooa.addr16[0], oa->addr16[0], 0),
2223			    ooa.addr16[1], oa->addr16[1], 0);
2224			break;
2225#endif /* INET */
2226#ifdef INET6
2227		case AF_INET6:
2228			*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2229			    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2230			    pf_cksum_fixup(pf_cksum_fixup(*ic,
2231			    ooa.addr16[0], oa->addr16[0], u),
2232			    ooa.addr16[1], oa->addr16[1], u),
2233			    ooa.addr16[2], oa->addr16[2], u),
2234			    ooa.addr16[3], oa->addr16[3], u),
2235			    ooa.addr16[4], oa->addr16[4], u),
2236			    ooa.addr16[5], oa->addr16[5], u),
2237			    ooa.addr16[6], oa->addr16[6], u),
2238			    ooa.addr16[7], oa->addr16[7], u);
2239			break;
2240#endif /* INET6 */
2241		}
2242	}
2243}
2244
2245
2246/*
2247 * Need to modulate the sequence numbers in the TCP SACK option
2248 * (credits to Krzysztof Pfaff for report and patch)
2249 */
2250int
2251pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd,
2252    struct tcphdr *th, struct pf_state_peer *dst)
2253{
2254	int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen;
2255#ifdef __FreeBSD__
2256	u_int8_t opts[TCP_MAXOLEN], *opt = opts;
2257#else
2258	u_int8_t opts[MAX_TCPOPTLEN], *opt = opts;
2259#endif
2260	int copyback = 0, i, olen;
2261	struct sackblk sack;
2262
2263#define	TCPOLEN_SACKLEN	(TCPOLEN_SACK + 2)
2264	if (hlen < TCPOLEN_SACKLEN ||
2265	    !pf_pull_hdr(m, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af))
2266		return 0;
2267
2268	while (hlen >= TCPOLEN_SACKLEN) {
2269		olen = opt[1];
2270		switch (*opt) {
2271		case TCPOPT_EOL:	/* FALLTHROUGH */
2272		case TCPOPT_NOP:
2273			opt++;
2274			hlen--;
2275			break;
2276		case TCPOPT_SACK:
2277			if (olen > hlen)
2278				olen = hlen;
2279			if (olen >= TCPOLEN_SACKLEN) {
2280				for (i = 2; i + TCPOLEN_SACK <= olen;
2281				    i += TCPOLEN_SACK) {
2282					memcpy(&sack, &opt[i], sizeof(sack));
2283					pf_change_proto_a(m, &sack.start, &th->th_sum,
2284					    htonl(ntohl(sack.start) - dst->seqdiff), 0);
2285					pf_change_proto_a(m, &sack.end, &th->th_sum,
2286					    htonl(ntohl(sack.end) - dst->seqdiff), 0);
2287					memcpy(&opt[i], &sack, sizeof(sack));
2288				}
2289				copyback = 1;
2290			}
2291			/* FALLTHROUGH */
2292		default:
2293			if (olen < 2)
2294				olen = 2;
2295			hlen -= olen;
2296			opt += olen;
2297		}
2298	}
2299
2300	if (copyback)
2301#ifdef __FreeBSD__
2302		m_copyback(m, off + sizeof(*th), thoptlen, (caddr_t)opts);
2303#else
2304		m_copyback(m, off + sizeof(*th), thoptlen, opts);
2305#endif
2306	return (copyback);
2307}
2308
2309void
2310#ifdef __FreeBSD__
2311pf_send_tcp(struct mbuf *replyto, const struct pf_rule *r, sa_family_t af,
2312#else
2313pf_send_tcp(const struct pf_rule *r, sa_family_t af,
2314#endif
2315    const struct pf_addr *saddr, const struct pf_addr *daddr,
2316    u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
2317    u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag,
2318    u_int16_t rtag, struct ether_header *eh, struct ifnet *ifp)
2319{
2320	struct mbuf	*m;
2321	int		 len, tlen;
2322#ifdef INET
2323	struct ip	*h;
2324#endif /* INET */
2325#ifdef INET6
2326	struct ip6_hdr	*h6;
2327#endif /* INET6 */
2328	struct tcphdr	*th;
2329	char		*opt;
2330#ifdef __FreeBSD__
2331	struct pf_mtag  *pf_mtag;
2332
2333	KASSERT(
2334#ifdef INET
2335	    af == AF_INET
2336#else
2337	    0
2338#endif
2339	    ||
2340#ifdef INET6
2341	    af == AF_INET6
2342#else
2343	    0
2344#endif
2345	    , ("Unsupported AF %d", af));
2346	len = 0;
2347	th = NULL;
2348#ifdef INET
2349	h = NULL;
2350#endif
2351#ifdef INET6
2352	h6 = NULL;
2353#endif
2354#endif /* __FreeBSD__ */
2355
2356	/* maximum segment size tcp option */
2357	tlen = sizeof(struct tcphdr);
2358	if (mss)
2359		tlen += 4;
2360
2361	switch (af) {
2362#ifdef INET
2363	case AF_INET:
2364		len = sizeof(struct ip) + tlen;
2365		break;
2366#endif /* INET */
2367#ifdef INET6
2368	case AF_INET6:
2369		len = sizeof(struct ip6_hdr) + tlen;
2370		break;
2371#endif /* INET6 */
2372	}
2373
2374	/* create outgoing mbuf */
2375	m = m_gethdr(M_DONTWAIT, MT_HEADER);
2376	if (m == NULL)
2377		return;
2378#ifdef __FreeBSD__
2379#ifdef MAC
2380	mac_netinet_firewall_send(m);
2381#endif
2382	if ((pf_mtag = pf_get_mtag(m)) == NULL) {
2383		m_freem(m);
2384		return;
2385	}
2386#endif
2387	if (tag)
2388#ifdef __FreeBSD__
2389		m->m_flags |= M_SKIP_FIREWALL;
2390	pf_mtag->tag = rtag;
2391#else
2392		m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
2393	m->m_pkthdr.pf.tag = rtag;
2394#endif
2395
2396	if (r != NULL && r->rtableid >= 0)
2397#ifdef __FreeBSD__
2398	{
2399		M_SETFIB(m, r->rtableid);
2400		pf_mtag->rtableid = r->rtableid;
2401#else
2402		m->m_pkthdr.pf.rtableid = r->rtableid;
2403#endif
2404#ifdef __FreeBSD__
2405	}
2406#endif
2407
2408#ifdef ALTQ
2409	if (r != NULL && r->qid) {
2410#ifdef __FreeBSD__
2411		pf_mtag->qid = r->qid;
2412
2413		/* add hints for ecn */
2414		pf_mtag->hdr = mtod(m, struct ip *);
2415#else
2416		m->m_pkthdr.pf.qid = r->qid;
2417		/* add hints for ecn */
2418		m->m_pkthdr.pf.hdr = mtod(m, struct ip *);
2419#endif
2420	}
2421#endif /* ALTQ */
2422	m->m_data += max_linkhdr;
2423	m->m_pkthdr.len = m->m_len = len;
2424	m->m_pkthdr.rcvif = NULL;
2425	bzero(m->m_data, len);
2426	switch (af) {
2427#ifdef INET
2428	case AF_INET:
2429		h = mtod(m, struct ip *);
2430
2431		/* IP header fields included in the TCP checksum */
2432		h->ip_p = IPPROTO_TCP;
2433		h->ip_len = htons(tlen);
2434		h->ip_src.s_addr = saddr->v4.s_addr;
2435		h->ip_dst.s_addr = daddr->v4.s_addr;
2436
2437		th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip));
2438		break;
2439#endif /* INET */
2440#ifdef INET6
2441	case AF_INET6:
2442		h6 = mtod(m, struct ip6_hdr *);
2443
2444		/* IP header fields included in the TCP checksum */
2445		h6->ip6_nxt = IPPROTO_TCP;
2446		h6->ip6_plen = htons(tlen);
2447		memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr));
2448		memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr));
2449
2450		th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr));
2451		break;
2452#endif /* INET6 */
2453	}
2454
2455	/* TCP header */
2456	th->th_sport = sport;
2457	th->th_dport = dport;
2458	th->th_seq = htonl(seq);
2459	th->th_ack = htonl(ack);
2460	th->th_off = tlen >> 2;
2461	th->th_flags = flags;
2462	th->th_win = htons(win);
2463
2464	if (mss) {
2465		opt = (char *)(th + 1);
2466		opt[0] = TCPOPT_MAXSEG;
2467		opt[1] = 4;
2468		HTONS(mss);
2469		bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2);
2470	}
2471
2472	switch (af) {
2473#ifdef INET
2474	case AF_INET:
2475		/* TCP checksum */
2476		th->th_sum = in_cksum(m, len);
2477
2478		/* Finish the IP header */
2479		h->ip_v = 4;
2480		h->ip_hl = sizeof(*h) >> 2;
2481		h->ip_tos = IPTOS_LOWDELAY;
2482#ifdef __FreeBSD__
2483		h->ip_off = V_path_mtu_discovery ? IP_DF : 0;
2484		h->ip_len = len;
2485		h->ip_ttl = ttl ? ttl : V_ip_defttl;
2486#else
2487		h->ip_len = htons(len);
2488		h->ip_off = htons(ip_mtudisc ? IP_DF : 0);
2489		h->ip_ttl = ttl ? ttl : ip_defttl;
2490#endif
2491		h->ip_sum = 0;
2492		if (eh == NULL) {
2493#ifdef __FreeBSD__
2494		PF_UNLOCK();
2495		ip_output(m, (void *)NULL, (void *)NULL, 0,
2496		    (void *)NULL, (void *)NULL);
2497		PF_LOCK();
2498#else /* ! __FreeBSD__ */
2499			ip_output(m, (void *)NULL, (void *)NULL, 0,
2500			    (void *)NULL, (void *)NULL);
2501#endif
2502		} else {
2503			struct route		 ro;
2504			struct rtentry		 rt;
2505			struct ether_header	*e = (void *)ro.ro_dst.sa_data;
2506
2507			if (ifp == NULL) {
2508				m_freem(m);
2509				return;
2510			}
2511			rt.rt_ifp = ifp;
2512			ro.ro_rt = &rt;
2513			ro.ro_dst.sa_len = sizeof(ro.ro_dst);
2514			ro.ro_dst.sa_family = pseudo_AF_HDRCMPLT;
2515			bcopy(eh->ether_dhost, e->ether_shost, ETHER_ADDR_LEN);
2516			bcopy(eh->ether_shost, e->ether_dhost, ETHER_ADDR_LEN);
2517			e->ether_type = eh->ether_type;
2518#ifdef __FreeBSD__
2519			PF_UNLOCK();
2520			/* XXX_IMPORT: later */
2521			ip_output(m, (void *)NULL, &ro, 0,
2522			    (void *)NULL, (void *)NULL);
2523			PF_LOCK();
2524#else /* ! __FreeBSD__ */
2525			ip_output(m, (void *)NULL, &ro, IP_ROUTETOETHER,
2526			    (void *)NULL, (void *)NULL);
2527#endif
2528		}
2529		break;
2530#endif /* INET */
2531#ifdef INET6
2532	case AF_INET6:
2533		/* TCP checksum */
2534		th->th_sum = in6_cksum(m, IPPROTO_TCP,
2535		    sizeof(struct ip6_hdr), tlen);
2536
2537		h6->ip6_vfc |= IPV6_VERSION;
2538		h6->ip6_hlim = IPV6_DEFHLIM;
2539
2540#ifdef __FreeBSD__
2541		PF_UNLOCK();
2542		ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
2543		PF_LOCK();
2544#else
2545		ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
2546#endif
2547		break;
2548#endif /* INET6 */
2549	}
2550}
2551
2552static void
2553pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af,
2554    struct pf_rule *r)
2555{
2556	struct mbuf	*m0;
2557#ifdef __FreeBSD__
2558#ifdef INET
2559	struct ip *ip;
2560#endif
2561	struct pf_mtag *pf_mtag;
2562#endif
2563
2564#ifdef __FreeBSD__
2565	m0 = m_copypacket(m, M_DONTWAIT);
2566	if (m0 == NULL)
2567		return;
2568#else
2569	if ((m0 = m_copy(m, 0, M_COPYALL)) == NULL)
2570		return;
2571#endif
2572
2573#ifdef __FreeBSD__
2574	if ((pf_mtag = pf_get_mtag(m0)) == NULL)
2575		return;
2576	/* XXX: revisit */
2577	m0->m_flags |= M_SKIP_FIREWALL;
2578#else
2579	m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
2580#endif
2581
2582	if (r->rtableid >= 0)
2583#ifdef __FreeBSD__
2584	{
2585		M_SETFIB(m0, r->rtableid);
2586		pf_mtag->rtableid = r->rtableid;
2587#else
2588		m0->m_pkthdr.pf.rtableid = r->rtableid;
2589#endif
2590#ifdef __FreeBSD__
2591	}
2592#endif
2593
2594#ifdef ALTQ
2595	if (r->qid) {
2596#ifdef __FreeBSD__
2597		pf_mtag->qid = r->qid;
2598		/* add hints for ecn */
2599		pf_mtag->hdr = mtod(m0, struct ip *);
2600#else
2601		m0->m_pkthdr.pf.qid = r->qid;
2602		/* add hints for ecn */
2603		m0->m_pkthdr.pf.hdr = mtod(m0, struct ip *);
2604#endif
2605	}
2606#endif /* ALTQ */
2607
2608	switch (af) {
2609#ifdef INET
2610	case AF_INET:
2611#ifdef __FreeBSD__
2612		/* icmp_error() expects host byte ordering */
2613		ip = mtod(m0, struct ip *);
2614		NTOHS(ip->ip_len);
2615		NTOHS(ip->ip_off);
2616		PF_UNLOCK();
2617		icmp_error(m0, type, code, 0, 0);
2618		PF_LOCK();
2619#else
2620		icmp_error(m0, type, code, 0, 0);
2621#endif
2622		break;
2623#endif /* INET */
2624#ifdef INET6
2625	case AF_INET6:
2626#ifdef __FreeBSD__
2627		PF_UNLOCK();
2628#endif
2629		icmp6_error(m0, type, code, 0);
2630#ifdef __FreeBSD__
2631		PF_LOCK();
2632#endif
2633		break;
2634#endif /* INET6 */
2635	}
2636}
2637
2638/*
2639 * Return 1 if the addresses a and b match (with mask m), otherwise return 0.
2640 * If n is 0, they match if they are equal. If n is != 0, they match if they
2641 * are different.
2642 */
2643int
2644pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
2645    struct pf_addr *b, sa_family_t af)
2646{
2647	int	match = 0;
2648
2649	switch (af) {
2650#ifdef INET
2651	case AF_INET:
2652		if ((a->addr32[0] & m->addr32[0]) ==
2653		    (b->addr32[0] & m->addr32[0]))
2654			match++;
2655		break;
2656#endif /* INET */
2657#ifdef INET6
2658	case AF_INET6:
2659		if (((a->addr32[0] & m->addr32[0]) ==
2660		     (b->addr32[0] & m->addr32[0])) &&
2661		    ((a->addr32[1] & m->addr32[1]) ==
2662		     (b->addr32[1] & m->addr32[1])) &&
2663		    ((a->addr32[2] & m->addr32[2]) ==
2664		     (b->addr32[2] & m->addr32[2])) &&
2665		    ((a->addr32[3] & m->addr32[3]) ==
2666		     (b->addr32[3] & m->addr32[3])))
2667			match++;
2668		break;
2669#endif /* INET6 */
2670	}
2671	if (match) {
2672		if (n)
2673			return (0);
2674		else
2675			return (1);
2676	} else {
2677		if (n)
2678			return (1);
2679		else
2680			return (0);
2681	}
2682}
2683
2684/*
2685 * Return 1 if b <= a <= e, otherwise return 0.
2686 */
2687int
2688pf_match_addr_range(struct pf_addr *b, struct pf_addr *e,
2689    struct pf_addr *a, sa_family_t af)
2690{
2691	switch (af) {
2692#ifdef INET
2693	case AF_INET:
2694		if ((a->addr32[0] < b->addr32[0]) ||
2695		    (a->addr32[0] > e->addr32[0]))
2696			return (0);
2697		break;
2698#endif /* INET */
2699#ifdef INET6
2700	case AF_INET6: {
2701		int	i;
2702
2703		/* check a >= b */
2704		for (i = 0; i < 4; ++i)
2705			if (a->addr32[i] > b->addr32[i])
2706				break;
2707			else if (a->addr32[i] < b->addr32[i])
2708				return (0);
2709		/* check a <= e */
2710		for (i = 0; i < 4; ++i)
2711			if (a->addr32[i] < e->addr32[i])
2712				break;
2713			else if (a->addr32[i] > e->addr32[i])
2714				return (0);
2715		break;
2716	}
2717#endif /* INET6 */
2718	}
2719	return (1);
2720}
2721
2722int
2723pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
2724{
2725	switch (op) {
2726	case PF_OP_IRG:
2727		return ((p > a1) && (p < a2));
2728	case PF_OP_XRG:
2729		return ((p < a1) || (p > a2));
2730	case PF_OP_RRG:
2731		return ((p >= a1) && (p <= a2));
2732	case PF_OP_EQ:
2733		return (p == a1);
2734	case PF_OP_NE:
2735		return (p != a1);
2736	case PF_OP_LT:
2737		return (p < a1);
2738	case PF_OP_LE:
2739		return (p <= a1);
2740	case PF_OP_GT:
2741		return (p > a1);
2742	case PF_OP_GE:
2743		return (p >= a1);
2744	}
2745	return (0); /* never reached */
2746}
2747
2748int
2749pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
2750{
2751	NTOHS(a1);
2752	NTOHS(a2);
2753	NTOHS(p);
2754	return (pf_match(op, a1, a2, p));
2755}
2756
2757int
2758pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
2759{
2760	if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
2761		return (0);
2762	return (pf_match(op, a1, a2, u));
2763}
2764
2765int
2766pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
2767{
2768	if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
2769		return (0);
2770	return (pf_match(op, a1, a2, g));
2771}
2772
2773int
2774#ifdef __FreeBSD__
2775pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag,
2776    struct pf_mtag *pf_mtag)
2777#else
2778pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag)
2779#endif
2780{
2781	if (*tag == -1)
2782#ifdef __FreeBSD__
2783		*tag = pf_mtag->tag;
2784#else
2785		*tag = m->m_pkthdr.pf.tag;
2786#endif
2787
2788	return ((!r->match_tag_not && r->match_tag == *tag) ||
2789	    (r->match_tag_not && r->match_tag != *tag));
2790}
2791
2792int
2793#ifdef __FreeBSD__
2794pf_tag_packet(struct mbuf *m, int tag, int rtableid,
2795    struct pf_mtag *pf_mtag)
2796#else
2797pf_tag_packet(struct mbuf *m, int tag, int rtableid)
2798#endif
2799{
2800	if (tag <= 0 && rtableid < 0)
2801		return (0);
2802
2803	if (tag > 0)
2804#ifdef __FreeBSD__
2805		pf_mtag->tag = tag;
2806#else
2807		m->m_pkthdr.pf.tag = tag;
2808#endif
2809	if (rtableid >= 0)
2810#ifdef __FreeBSD__
2811	{
2812		M_SETFIB(m, rtableid);
2813	}
2814#else
2815		m->m_pkthdr.pf.rtableid = rtableid;
2816#endif
2817
2818	return (0);
2819}
2820
2821void
2822pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n,
2823    struct pf_rule **r, struct pf_rule **a, int *match)
2824{
2825	struct pf_anchor_stackframe	*f;
2826
2827	(*r)->anchor->match = 0;
2828	if (match)
2829		*match = 0;
2830#ifdef __FreeBSD__
2831	if (*depth >= sizeof(V_pf_anchor_stack) /
2832	    sizeof(V_pf_anchor_stack[0])) {
2833#else
2834	if (*depth >= sizeof(pf_anchor_stack) /
2835	    sizeof(pf_anchor_stack[0])) {
2836#endif
2837		printf("pf_step_into_anchor: stack overflow\n");
2838		*r = TAILQ_NEXT(*r, entries);
2839		return;
2840	} else if (*depth == 0 && a != NULL)
2841		*a = *r;
2842#ifdef __FreeBSD__
2843	f = V_pf_anchor_stack + (*depth)++;
2844#else
2845	f = pf_anchor_stack + (*depth)++;
2846#endif
2847	f->rs = *rs;
2848	f->r = *r;
2849	if ((*r)->anchor_wildcard) {
2850		f->parent = &(*r)->anchor->children;
2851		if ((f->child = RB_MIN(pf_anchor_node, f->parent)) ==
2852		    NULL) {
2853			*r = NULL;
2854			return;
2855		}
2856		*rs = &f->child->ruleset;
2857	} else {
2858		f->parent = NULL;
2859		f->child = NULL;
2860		*rs = &(*r)->anchor->ruleset;
2861	}
2862	*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
2863}
2864
2865int
2866pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n,
2867    struct pf_rule **r, struct pf_rule **a, int *match)
2868{
2869	struct pf_anchor_stackframe	*f;
2870	int quick = 0;
2871
2872	do {
2873		if (*depth <= 0)
2874			break;
2875#ifdef __FreeBSD__
2876		f = V_pf_anchor_stack + *depth - 1;
2877#else
2878		f = pf_anchor_stack + *depth - 1;
2879#endif
2880		if (f->parent != NULL && f->child != NULL) {
2881			if (f->child->match ||
2882			    (match != NULL && *match)) {
2883				f->r->anchor->match = 1;
2884				*match = 0;
2885			}
2886			f->child = RB_NEXT(pf_anchor_node, f->parent, f->child);
2887			if (f->child != NULL) {
2888				*rs = &f->child->ruleset;
2889				*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
2890				if (*r == NULL)
2891					continue;
2892				else
2893					break;
2894			}
2895		}
2896		(*depth)--;
2897		if (*depth == 0 && a != NULL)
2898			*a = NULL;
2899		*rs = f->rs;
2900		if (f->r->anchor->match || (match != NULL && *match))
2901			quick = f->r->quick;
2902		*r = TAILQ_NEXT(f->r, entries);
2903	} while (*r == NULL);
2904
2905	return (quick);
2906}
2907
2908#ifdef INET6
2909void
2910pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
2911    struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
2912{
2913	switch (af) {
2914#ifdef INET
2915	case AF_INET:
2916		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
2917		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
2918		break;
2919#endif /* INET */
2920	case AF_INET6:
2921		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
2922		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
2923		naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
2924		((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]);
2925		naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
2926		((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]);
2927		naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
2928		((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]);
2929		break;
2930	}
2931}
2932
2933void
2934pf_addr_inc(struct pf_addr *addr, sa_family_t af)
2935{
2936	switch (af) {
2937#ifdef INET
2938	case AF_INET:
2939		addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
2940		break;
2941#endif /* INET */
2942	case AF_INET6:
2943		if (addr->addr32[3] == 0xffffffff) {
2944			addr->addr32[3] = 0;
2945			if (addr->addr32[2] == 0xffffffff) {
2946				addr->addr32[2] = 0;
2947				if (addr->addr32[1] == 0xffffffff) {
2948					addr->addr32[1] = 0;
2949					addr->addr32[0] =
2950					    htonl(ntohl(addr->addr32[0]) + 1);
2951				} else
2952					addr->addr32[1] =
2953					    htonl(ntohl(addr->addr32[1]) + 1);
2954			} else
2955				addr->addr32[2] =
2956				    htonl(ntohl(addr->addr32[2]) + 1);
2957		} else
2958			addr->addr32[3] =
2959			    htonl(ntohl(addr->addr32[3]) + 1);
2960		break;
2961	}
2962}
2963#endif /* INET6 */
2964
2965int
2966#ifdef __FreeBSD__
2967pf_socket_lookup(int direction, struct pf_pdesc *pd, struct inpcb *inp_arg)
2968#else
2969pf_socket_lookup(int direction, struct pf_pdesc *pd)
2970#endif
2971{
2972	struct pf_addr		*saddr, *daddr;
2973	u_int16_t		 sport, dport;
2974#ifdef __FreeBSD__
2975	struct inpcbinfo	*pi;
2976#else
2977	struct inpcbtable	*tb;
2978#endif
2979	struct inpcb		*inp;
2980
2981	if (pd == NULL)
2982		return (-1);
2983	pd->lookup.uid = UID_MAX;
2984	pd->lookup.gid = GID_MAX;
2985	pd->lookup.pid = NO_PID;
2986
2987#ifdef __FreeBSD__
2988	if (inp_arg != NULL) {
2989		INP_LOCK_ASSERT(inp_arg);
2990		pd->lookup.uid = inp_arg->inp_cred->cr_uid;
2991		pd->lookup.gid = inp_arg->inp_cred->cr_groups[0];
2992		return (1);
2993	}
2994#endif
2995
2996	switch (pd->proto) {
2997	case IPPROTO_TCP:
2998		if (pd->hdr.tcp == NULL)
2999			return (-1);
3000		sport = pd->hdr.tcp->th_sport;
3001		dport = pd->hdr.tcp->th_dport;
3002#ifdef __FreeBSD__
3003		pi = &V_tcbinfo;
3004#else
3005		tb = &tcbtable;
3006#endif
3007		break;
3008	case IPPROTO_UDP:
3009		if (pd->hdr.udp == NULL)
3010			return (-1);
3011		sport = pd->hdr.udp->uh_sport;
3012		dport = pd->hdr.udp->uh_dport;
3013#ifdef __FreeBSD__
3014		pi = &V_udbinfo;
3015#else
3016		tb = &udbtable;
3017#endif
3018		break;
3019	default:
3020		return (-1);
3021	}
3022	if (direction == PF_IN) {
3023		saddr = pd->src;
3024		daddr = pd->dst;
3025	} else {
3026		u_int16_t	p;
3027
3028		p = sport;
3029		sport = dport;
3030		dport = p;
3031		saddr = pd->dst;
3032		daddr = pd->src;
3033	}
3034	switch (pd->af) {
3035#ifdef INET
3036	case AF_INET:
3037#ifdef __FreeBSD__
3038		/*
3039		 * XXXRW: would be nice if we had an mbuf here so that we
3040		 * could use in_pcblookup_mbuf().
3041		 */
3042		inp = in_pcblookup(pi, saddr->v4, sport, daddr->v4,
3043			dport, INPLOOKUP_RLOCKPCB, NULL);
3044		if (inp == NULL) {
3045			inp = in_pcblookup(pi, saddr->v4, sport,
3046			   daddr->v4, dport, INPLOOKUP_WILDCARD |
3047			   INPLOOKUP_RLOCKPCB, NULL);
3048			if (inp == NULL)
3049				return (-1);
3050		}
3051#else
3052		inp = in_pcbhashlookup(tb, saddr->v4, sport, daddr->v4, dport);
3053		if (inp == NULL) {
3054			inp = in_pcblookup_listen(tb, daddr->v4, dport, 0,
3055			    NULL);
3056			if (inp == NULL)
3057				return (-1);
3058		}
3059#endif
3060		break;
3061#endif /* INET */
3062#ifdef INET6
3063	case AF_INET6:
3064#ifdef __FreeBSD__
3065		/*
3066		 * XXXRW: would be nice if we had an mbuf here so that we
3067		 * could use in6_pcblookup_mbuf().
3068		 */
3069		inp = in6_pcblookup(pi, &saddr->v6, sport,
3070			&daddr->v6, dport, INPLOOKUP_RLOCKPCB, NULL);
3071		if (inp == NULL) {
3072			inp = in6_pcblookup(pi, &saddr->v6, sport,
3073			    &daddr->v6, dport, INPLOOKUP_WILDCARD |
3074			    INPLOOKUP_RLOCKPCB, NULL);
3075			if (inp == NULL)
3076				return (-1);
3077		}
3078#else
3079		inp = in6_pcbhashlookup(tb, &saddr->v6, sport, &daddr->v6,
3080		    dport);
3081		if (inp == NULL) {
3082			inp = in6_pcblookup_listen(tb, &daddr->v6, dport, 0,
3083			    NULL);
3084			if (inp == NULL)
3085				return (-1);
3086		}
3087#endif
3088		break;
3089#endif /* INET6 */
3090
3091	default:
3092		return (-1);
3093	}
3094#ifdef __FreeBSD__
3095	INP_RLOCK_ASSERT(inp);
3096	pd->lookup.uid = inp->inp_cred->cr_uid;
3097	pd->lookup.gid = inp->inp_cred->cr_groups[0];
3098	INP_RUNLOCK(inp);
3099#else
3100	pd->lookup.uid = inp->inp_socket->so_euid;
3101	pd->lookup.gid = inp->inp_socket->so_egid;
3102	pd->lookup.pid = inp->inp_socket->so_cpid;
3103#endif
3104	return (1);
3105}
3106
3107u_int8_t
3108pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
3109{
3110	int		 hlen;
3111	u_int8_t	 hdr[60];
3112	u_int8_t	*opt, optlen;
3113	u_int8_t	 wscale = 0;
3114
3115	hlen = th_off << 2;		/* hlen <= sizeof(hdr) */
3116	if (hlen <= sizeof(struct tcphdr))
3117		return (0);
3118	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
3119		return (0);
3120	opt = hdr + sizeof(struct tcphdr);
3121	hlen -= sizeof(struct tcphdr);
3122	while (hlen >= 3) {
3123		switch (*opt) {
3124		case TCPOPT_EOL:
3125		case TCPOPT_NOP:
3126			++opt;
3127			--hlen;
3128			break;
3129		case TCPOPT_WINDOW:
3130			wscale = opt[2];
3131			if (wscale > TCP_MAX_WINSHIFT)
3132				wscale = TCP_MAX_WINSHIFT;
3133			wscale |= PF_WSCALE_FLAG;
3134			/* FALLTHROUGH */
3135		default:
3136			optlen = opt[1];
3137			if (optlen < 2)
3138				optlen = 2;
3139			hlen -= optlen;
3140			opt += optlen;
3141			break;
3142		}
3143	}
3144	return (wscale);
3145}
3146
3147u_int16_t
3148pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
3149{
3150	int		 hlen;
3151	u_int8_t	 hdr[60];
3152	u_int8_t	*opt, optlen;
3153#ifdef __FreeBSD__
3154	u_int16_t	 mss = V_tcp_mssdflt;
3155#else
3156	u_int16_t	 mss = tcp_mssdflt;
3157#endif
3158
3159	hlen = th_off << 2;	/* hlen <= sizeof(hdr) */
3160	if (hlen <= sizeof(struct tcphdr))
3161		return (0);
3162	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
3163		return (0);
3164	opt = hdr + sizeof(struct tcphdr);
3165	hlen -= sizeof(struct tcphdr);
3166	while (hlen >= TCPOLEN_MAXSEG) {
3167		switch (*opt) {
3168		case TCPOPT_EOL:
3169		case TCPOPT_NOP:
3170			++opt;
3171			--hlen;
3172			break;
3173		case TCPOPT_MAXSEG:
3174			bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2);
3175			NTOHS(mss);
3176			/* FALLTHROUGH */
3177		default:
3178			optlen = opt[1];
3179			if (optlen < 2)
3180				optlen = 2;
3181			hlen -= optlen;
3182			opt += optlen;
3183			break;
3184		}
3185	}
3186	return (mss);
3187}
3188
3189u_int16_t
3190pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer)
3191{
3192#ifdef INET
3193	struct sockaddr_in	*dst;
3194	struct route		 ro;
3195#endif /* INET */
3196#ifdef INET6
3197	struct sockaddr_in6	*dst6;
3198	struct route_in6	 ro6;
3199#endif /* INET6 */
3200	struct rtentry		*rt = NULL;
3201#ifdef __FreeBSD__
3202	int			 hlen = 0;
3203	u_int16_t		 mss = V_tcp_mssdflt;
3204#else
3205	int			 hlen;
3206	u_int16_t		 mss = tcp_mssdflt;
3207#endif
3208
3209	switch (af) {
3210#ifdef INET
3211	case AF_INET:
3212		hlen = sizeof(struct ip);
3213		bzero(&ro, sizeof(ro));
3214		dst = (struct sockaddr_in *)&ro.ro_dst;
3215		dst->sin_family = AF_INET;
3216		dst->sin_len = sizeof(*dst);
3217		dst->sin_addr = addr->v4;
3218#ifdef __FreeBSD__
3219		in_rtalloc_ign(&ro, 0, rtableid);
3220#else /* ! __FreeBSD__ */
3221		rtalloc_noclone(&ro, NO_CLONING);
3222#endif
3223		rt = ro.ro_rt;
3224		break;
3225#endif /* INET */
3226#ifdef INET6
3227	case AF_INET6:
3228		hlen = sizeof(struct ip6_hdr);
3229		bzero(&ro6, sizeof(ro6));
3230		dst6 = (struct sockaddr_in6 *)&ro6.ro_dst;
3231		dst6->sin6_family = AF_INET6;
3232		dst6->sin6_len = sizeof(*dst6);
3233		dst6->sin6_addr = addr->v6;
3234#ifdef __FreeBSD__
3235		in6_rtalloc_ign(&ro6, 0, rtableid);
3236#else /* ! __FreeBSD__ */
3237		rtalloc_noclone((struct route *)&ro6, NO_CLONING);
3238#endif
3239		rt = ro6.ro_rt;
3240		break;
3241#endif /* INET6 */
3242	}
3243
3244	if (rt && rt->rt_ifp) {
3245		mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr);
3246#ifdef __FreeBSD__
3247		mss = max(V_tcp_mssdflt, mss);
3248#else
3249		mss = max(tcp_mssdflt, mss);
3250#endif
3251		RTFREE(rt);
3252	}
3253	mss = min(mss, offer);
3254	mss = max(mss, 64);		/* sanity - at least max opt space */
3255	return (mss);
3256}
3257
3258void
3259pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr)
3260{
3261	struct pf_rule *r = s->rule.ptr;
3262	struct pf_src_node *sn = NULL;
3263
3264	s->rt_kif = NULL;
3265	if (!r->rt || r->rt == PF_FASTROUTE)
3266		return;
3267	switch (s->key[PF_SK_WIRE]->af) {
3268#ifdef INET
3269	case AF_INET:
3270		pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL, &sn);
3271		s->rt_kif = r->rpool.cur->kif;
3272		break;
3273#endif /* INET */
3274#ifdef INET6
3275	case AF_INET6:
3276		pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL, &sn);
3277		s->rt_kif = r->rpool.cur->kif;
3278		break;
3279#endif /* INET6 */
3280	}
3281}
3282
3283u_int32_t
3284pf_tcp_iss(struct pf_pdesc *pd)
3285{
3286	MD5_CTX ctx;
3287	u_int32_t digest[4];
3288
3289#ifdef __FreeBSD__
3290	if (V_pf_tcp_secret_init == 0) {
3291		read_random(&V_pf_tcp_secret, sizeof(V_pf_tcp_secret));
3292		MD5Init(&V_pf_tcp_secret_ctx);
3293		MD5Update(&V_pf_tcp_secret_ctx, V_pf_tcp_secret,
3294		    sizeof(V_pf_tcp_secret));
3295		V_pf_tcp_secret_init = 1;
3296	}
3297
3298	ctx = V_pf_tcp_secret_ctx;
3299#else
3300	if (pf_tcp_secret_init == 0) {
3301		arc4random_buf(pf_tcp_secret, sizeof(pf_tcp_secret));
3302		MD5Init(&pf_tcp_secret_ctx);
3303		MD5Update(&pf_tcp_secret_ctx, pf_tcp_secret,
3304		    sizeof(pf_tcp_secret));
3305		pf_tcp_secret_init = 1;
3306	}
3307
3308	ctx = pf_tcp_secret_ctx;
3309#endif
3310
3311	MD5Update(&ctx, (char *)&pd->hdr.tcp->th_sport, sizeof(u_short));
3312	MD5Update(&ctx, (char *)&pd->hdr.tcp->th_dport, sizeof(u_short));
3313	if (pd->af == AF_INET6) {
3314		MD5Update(&ctx, (char *)&pd->src->v6, sizeof(struct in6_addr));
3315		MD5Update(&ctx, (char *)&pd->dst->v6, sizeof(struct in6_addr));
3316	} else {
3317		MD5Update(&ctx, (char *)&pd->src->v4, sizeof(struct in_addr));
3318		MD5Update(&ctx, (char *)&pd->dst->v4, sizeof(struct in_addr));
3319	}
3320	MD5Final((u_char *)digest, &ctx);
3321#ifdef __FreeBSD__
3322	V_pf_tcp_iss_off += 4096;
3323#define	ISN_RANDOM_INCREMENT (4096 - 1)
3324	return (digest[0] + (arc4random() & ISN_RANDOM_INCREMENT) +
3325	    V_pf_tcp_iss_off);
3326#undef	ISN_RANDOM_INCREMENT
3327#else
3328	pf_tcp_iss_off += 4096;
3329	return (digest[0] + tcp_iss + pf_tcp_iss_off);
3330#endif
3331}
3332
3333int
3334pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
3335    struct pfi_kif *kif, struct mbuf *m, int off, void *h,
3336    struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
3337#ifdef __FreeBSD__
3338    struct ifqueue *ifq, struct inpcb *inp)
3339#else
3340    struct ifqueue *ifq)
3341#endif
3342{
3343	struct pf_rule		*nr = NULL;
3344	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
3345	sa_family_t		 af = pd->af;
3346	struct pf_rule		*r, *a = NULL;
3347	struct pf_ruleset	*ruleset = NULL;
3348	struct pf_src_node	*nsn = NULL;
3349	struct tcphdr		*th = pd->hdr.tcp;
3350	struct pf_state_key	*skw = NULL, *sks = NULL;
3351	struct pf_state_key	*sk = NULL, *nk = NULL;
3352	u_short			 reason;
3353	int			 rewrite = 0, hdrlen = 0;
3354	int			 tag = -1, rtableid = -1;
3355	int			 asd = 0;
3356	int			 match = 0;
3357	int			 state_icmp = 0;
3358#ifdef __FreeBSD__
3359	u_int16_t		 sport = 0, dport = 0;
3360	u_int16_t		 bproto_sum = 0, bip_sum = 0;
3361#else
3362	u_int16_t		 sport, dport;
3363	u_int16_t		 bproto_sum = 0, bip_sum;
3364#endif
3365	u_int8_t		 icmptype = 0, icmpcode = 0;
3366
3367
3368	if (direction == PF_IN && pf_check_congestion(ifq)) {
3369		REASON_SET(&reason, PFRES_CONGEST);
3370		return (PF_DROP);
3371	}
3372
3373#ifdef __FreeBSD__
3374	if (inp != NULL)
3375		pd->lookup.done = pf_socket_lookup(direction, pd, inp);
3376	else if (V_debug_pfugidhack) {
3377		PF_UNLOCK();
3378		DPFPRINTF(PF_DEBUG_MISC, ("pf: unlocked lookup\n"));
3379		    pd->lookup.done = pf_socket_lookup(direction, pd, inp);
3380		PF_LOCK();
3381	}
3382#endif
3383
3384	switch (pd->proto) {
3385	case IPPROTO_TCP:
3386		sport = th->th_sport;
3387		dport = th->th_dport;
3388		hdrlen = sizeof(*th);
3389		break;
3390	case IPPROTO_UDP:
3391		sport = pd->hdr.udp->uh_sport;
3392		dport = pd->hdr.udp->uh_dport;
3393		hdrlen = sizeof(*pd->hdr.udp);
3394		break;
3395#ifdef INET
3396	case IPPROTO_ICMP:
3397		if (pd->af != AF_INET)
3398			break;
3399		sport = dport = pd->hdr.icmp->icmp_id;
3400		hdrlen = sizeof(*pd->hdr.icmp);
3401		icmptype = pd->hdr.icmp->icmp_type;
3402		icmpcode = pd->hdr.icmp->icmp_code;
3403
3404		if (icmptype == ICMP_UNREACH ||
3405		    icmptype == ICMP_SOURCEQUENCH ||
3406		    icmptype == ICMP_REDIRECT ||
3407		    icmptype == ICMP_TIMXCEED ||
3408		    icmptype == ICMP_PARAMPROB)
3409			state_icmp++;
3410		break;
3411#endif /* INET */
3412#ifdef INET6
3413	case IPPROTO_ICMPV6:
3414		if (af != AF_INET6)
3415			break;
3416		sport = dport = pd->hdr.icmp6->icmp6_id;
3417		hdrlen = sizeof(*pd->hdr.icmp6);
3418		icmptype = pd->hdr.icmp6->icmp6_type;
3419		icmpcode = pd->hdr.icmp6->icmp6_code;
3420
3421		if (icmptype == ICMP6_DST_UNREACH ||
3422		    icmptype == ICMP6_PACKET_TOO_BIG ||
3423		    icmptype == ICMP6_TIME_EXCEEDED ||
3424		    icmptype == ICMP6_PARAM_PROB)
3425			state_icmp++;
3426		break;
3427#endif /* INET6 */
3428	default:
3429		sport = dport = hdrlen = 0;
3430		break;
3431	}
3432
3433	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3434
3435	/* check packet for BINAT/NAT/RDR */
3436	if ((nr = pf_get_translation(pd, m, off, direction, kif, &nsn,
3437	    &skw, &sks, &sk, &nk, saddr, daddr, sport, dport)) != NULL) {
3438		if (nk == NULL || sk == NULL) {
3439			REASON_SET(&reason, PFRES_MEMORY);
3440			goto cleanup;
3441		}
3442
3443		if (pd->ip_sum)
3444			bip_sum = *pd->ip_sum;
3445
3446		switch (pd->proto) {
3447		case IPPROTO_TCP:
3448			bproto_sum = th->th_sum;
3449			pd->proto_sum = &th->th_sum;
3450
3451			if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) ||
3452			    nk->port[pd->sidx] != sport) {
3453				pf_change_ap(m, saddr, &th->th_sport, pd->ip_sum,
3454				    &th->th_sum, &nk->addr[pd->sidx],
3455				    nk->port[pd->sidx], 0, af);
3456				pd->sport = &th->th_sport;
3457				sport = th->th_sport;
3458			}
3459
3460			if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) ||
3461			    nk->port[pd->didx] != dport) {
3462				pf_change_ap(m, daddr, &th->th_dport, pd->ip_sum,
3463				    &th->th_sum, &nk->addr[pd->didx],
3464				    nk->port[pd->didx], 0, af);
3465				dport = th->th_dport;
3466				pd->dport = &th->th_dport;
3467			}
3468			rewrite++;
3469			break;
3470		case IPPROTO_UDP:
3471			bproto_sum = pd->hdr.udp->uh_sum;
3472			pd->proto_sum = &pd->hdr.udp->uh_sum;
3473
3474			if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) ||
3475			    nk->port[pd->sidx] != sport) {
3476				pf_change_ap(m, saddr, &pd->hdr.udp->uh_sport,
3477				    pd->ip_sum, &pd->hdr.udp->uh_sum,
3478				    &nk->addr[pd->sidx],
3479				    nk->port[pd->sidx], 1, af);
3480				sport = pd->hdr.udp->uh_sport;
3481				pd->sport = &pd->hdr.udp->uh_sport;
3482			}
3483
3484			if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) ||
3485			    nk->port[pd->didx] != dport) {
3486				pf_change_ap(m, daddr, &pd->hdr.udp->uh_dport,
3487				    pd->ip_sum, &pd->hdr.udp->uh_sum,
3488				    &nk->addr[pd->didx],
3489				    nk->port[pd->didx], 1, af);
3490				dport = pd->hdr.udp->uh_dport;
3491				pd->dport = &pd->hdr.udp->uh_dport;
3492			}
3493			rewrite++;
3494			break;
3495#ifdef INET
3496		case IPPROTO_ICMP:
3497			nk->port[0] = nk->port[1];
3498			if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET))
3499				pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
3500				    nk->addr[pd->sidx].v4.s_addr, 0);
3501
3502			if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET))
3503				pf_change_a(&daddr->v4.s_addr, pd->ip_sum,
3504				    nk->addr[pd->didx].v4.s_addr, 0);
3505
3506			if (nk->port[1] != pd->hdr.icmp->icmp_id) {
3507				pd->hdr.icmp->icmp_cksum = pf_cksum_fixup(
3508				    pd->hdr.icmp->icmp_cksum, sport,
3509				    nk->port[1], 0);
3510				pd->hdr.icmp->icmp_id = nk->port[1];
3511				pd->sport = &pd->hdr.icmp->icmp_id;
3512			}
3513			m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp);
3514			break;
3515#endif /* INET */
3516#ifdef INET6
3517		case IPPROTO_ICMPV6:
3518			nk->port[0] = nk->port[1];
3519			if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET6))
3520				pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum,
3521				    &nk->addr[pd->sidx], 0);
3522
3523			if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET6))
3524				pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum,
3525				    &nk->addr[pd->didx], 0);
3526			rewrite++;
3527			break;
3528#endif /* INET */
3529		default:
3530			switch (af) {
3531#ifdef INET
3532			case AF_INET:
3533				if (PF_ANEQ(saddr,
3534				    &nk->addr[pd->sidx], AF_INET))
3535					pf_change_a(&saddr->v4.s_addr,
3536					    pd->ip_sum,
3537					    nk->addr[pd->sidx].v4.s_addr, 0);
3538
3539				if (PF_ANEQ(daddr,
3540				    &nk->addr[pd->didx], AF_INET))
3541					pf_change_a(&daddr->v4.s_addr,
3542					    pd->ip_sum,
3543					    nk->addr[pd->didx].v4.s_addr, 0);
3544				break;
3545#endif /* INET */
3546#ifdef INET6
3547			case AF_INET6:
3548				if (PF_ANEQ(saddr,
3549				    &nk->addr[pd->sidx], AF_INET6))
3550					PF_ACPY(saddr, &nk->addr[pd->sidx], af);
3551
3552				if (PF_ANEQ(daddr,
3553				    &nk->addr[pd->didx], AF_INET6))
3554					PF_ACPY(saddr, &nk->addr[pd->didx], af);
3555				break;
3556#endif /* INET */
3557			}
3558			break;
3559		}
3560		if (nr->natpass)
3561			r = NULL;
3562		pd->nat_rule = nr;
3563	}
3564
3565	while (r != NULL) {
3566		r->evaluations++;
3567		if (pfi_kif_match(r->kif, kif) == r->ifnot)
3568			r = r->skip[PF_SKIP_IFP].ptr;
3569		else if (r->direction && r->direction != direction)
3570			r = r->skip[PF_SKIP_DIR].ptr;
3571		else if (r->af && r->af != af)
3572			r = r->skip[PF_SKIP_AF].ptr;
3573		else if (r->proto && r->proto != pd->proto)
3574			r = r->skip[PF_SKIP_PROTO].ptr;
3575		else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
3576		    r->src.neg, kif, M_GETFIB(m)))
3577			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3578		/* tcp/udp only. port_op always 0 in other cases */
3579		else if (r->src.port_op && !pf_match_port(r->src.port_op,
3580		    r->src.port[0], r->src.port[1], sport))
3581			r = r->skip[PF_SKIP_SRC_PORT].ptr;
3582		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
3583		    r->dst.neg, NULL, M_GETFIB(m)))
3584			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3585		/* tcp/udp only. port_op always 0 in other cases */
3586		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
3587		    r->dst.port[0], r->dst.port[1], dport))
3588			r = r->skip[PF_SKIP_DST_PORT].ptr;
3589		/* icmp only. type always 0 in other cases */
3590		else if (r->type && r->type != icmptype + 1)
3591			r = TAILQ_NEXT(r, entries);
3592		/* icmp only. type always 0 in other cases */
3593		else if (r->code && r->code != icmpcode + 1)
3594			r = TAILQ_NEXT(r, entries);
3595		else if (r->tos && !(r->tos == pd->tos))
3596			r = TAILQ_NEXT(r, entries);
3597		else if (r->rule_flag & PFRULE_FRAGMENT)
3598			r = TAILQ_NEXT(r, entries);
3599		else if (pd->proto == IPPROTO_TCP &&
3600		    (r->flagset & th->th_flags) != r->flags)
3601			r = TAILQ_NEXT(r, entries);
3602		/* tcp/udp only. uid.op always 0 in other cases */
3603		else if (r->uid.op && (pd->lookup.done || (pd->lookup.done =
3604#ifdef __FreeBSD__
3605		    pf_socket_lookup(direction, pd, inp), 1)) &&
3606#else
3607		    pf_socket_lookup(direction, pd), 1)) &&
3608#endif
3609		    !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
3610		    pd->lookup.uid))
3611			r = TAILQ_NEXT(r, entries);
3612		/* tcp/udp only. gid.op always 0 in other cases */
3613		else if (r->gid.op && (pd->lookup.done || (pd->lookup.done =
3614#ifdef __FreeBSD__
3615		    pf_socket_lookup(direction, pd, inp), 1)) &&
3616#else
3617		    pf_socket_lookup(direction, pd), 1)) &&
3618#endif
3619		    !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
3620		    pd->lookup.gid))
3621			r = TAILQ_NEXT(r, entries);
3622		else if (r->prob &&
3623#ifdef __FreeBSD__
3624		    r->prob <= arc4random())
3625#else
3626		    r->prob <= arc4random_uniform(UINT_MAX - 1) + 1)
3627#endif
3628			r = TAILQ_NEXT(r, entries);
3629#ifdef __FreeBSD__
3630		else if (r->match_tag && !pf_match_tag(m, r, &tag, pd->pf_mtag))
3631#else
3632		else if (r->match_tag && !pf_match_tag(m, r, &tag))
3633#endif
3634			r = TAILQ_NEXT(r, entries);
3635		else if (r->os_fingerprint != PF_OSFP_ANY &&
3636		    (pd->proto != IPPROTO_TCP || !pf_osfp_match(
3637		    pf_osfp_fingerprint(pd, m, off, th),
3638		    r->os_fingerprint)))
3639			r = TAILQ_NEXT(r, entries);
3640		else {
3641			if (r->tag)
3642				tag = r->tag;
3643			if (r->rtableid >= 0)
3644				rtableid = r->rtableid;
3645			if (r->anchor == NULL) {
3646				match = 1;
3647				*rm = r;
3648				*am = a;
3649				*rsm = ruleset;
3650				if ((*rm)->quick)
3651					break;
3652				r = TAILQ_NEXT(r, entries);
3653			} else
3654				pf_step_into_anchor(&asd, &ruleset,
3655				    PF_RULESET_FILTER, &r, &a, &match);
3656		}
3657		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
3658		    PF_RULESET_FILTER, &r, &a, &match))
3659			break;
3660	}
3661	r = *rm;
3662	a = *am;
3663	ruleset = *rsm;
3664
3665	REASON_SET(&reason, PFRES_MATCH);
3666
3667	if (r->log || (nr != NULL && nr->log)) {
3668		if (rewrite)
3669			m_copyback(m, off, hdrlen, pd->hdr.any);
3670		PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr,
3671		    a, ruleset, pd);
3672	}
3673
3674	if ((r->action == PF_DROP) &&
3675	    ((r->rule_flag & PFRULE_RETURNRST) ||
3676	    (r->rule_flag & PFRULE_RETURNICMP) ||
3677	    (r->rule_flag & PFRULE_RETURN))) {
3678		/* undo NAT changes, if they have taken place */
3679		if (nr != NULL) {
3680			PF_ACPY(saddr, &sk->addr[pd->sidx], af);
3681			PF_ACPY(daddr, &sk->addr[pd->didx], af);
3682			if (pd->sport)
3683				*pd->sport = sk->port[pd->sidx];
3684			if (pd->dport)
3685				*pd->dport = sk->port[pd->didx];
3686			if (pd->proto_sum)
3687				*pd->proto_sum = bproto_sum;
3688			if (pd->ip_sum)
3689				*pd->ip_sum = bip_sum;
3690			m_copyback(m, off, hdrlen, pd->hdr.any);
3691		}
3692		if (pd->proto == IPPROTO_TCP &&
3693		    ((r->rule_flag & PFRULE_RETURNRST) ||
3694		    (r->rule_flag & PFRULE_RETURN)) &&
3695		    !(th->th_flags & TH_RST)) {
3696			u_int32_t	 ack = ntohl(th->th_seq) + pd->p_len;
3697			int		 len = 0;
3698#ifdef INET
3699			struct ip	*h4;
3700#endif
3701#ifdef INET6
3702			struct ip6_hdr	*h6;
3703#endif
3704
3705			switch (af) {
3706#ifdef INET
3707			case AF_INET:
3708				h4 = mtod(m, struct ip *);
3709				len = ntohs(h4->ip_len) - off;
3710				break;
3711#endif
3712#ifdef INET6
3713			case AF_INET6:
3714				h6 = mtod(m, struct ip6_hdr *);
3715				len = ntohs(h6->ip6_plen) - (off - sizeof(*h6));
3716				break;
3717#endif
3718			}
3719
3720			if (pf_check_proto_cksum(m, off, len, IPPROTO_TCP, af))
3721				REASON_SET(&reason, PFRES_PROTCKSUM);
3722			else {
3723				if (th->th_flags & TH_SYN)
3724					ack++;
3725				if (th->th_flags & TH_FIN)
3726					ack++;
3727#ifdef __FreeBSD__
3728				pf_send_tcp(m, r, af, pd->dst,
3729#else
3730				pf_send_tcp(r, af, pd->dst,
3731#endif
3732				    pd->src, th->th_dport, th->th_sport,
3733				    ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
3734				    r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp);
3735			}
3736		} else if (pd->proto != IPPROTO_ICMP && af == AF_INET &&
3737		    r->return_icmp)
3738			pf_send_icmp(m, r->return_icmp >> 8,
3739			    r->return_icmp & 255, af, r);
3740		else if (pd->proto != IPPROTO_ICMPV6 && af == AF_INET6 &&
3741		    r->return_icmp6)
3742			pf_send_icmp(m, r->return_icmp6 >> 8,
3743			    r->return_icmp6 & 255, af, r);
3744	}
3745
3746	if (r->action == PF_DROP)
3747		goto cleanup;
3748
3749#ifdef __FreeBSD__
3750	if (pf_tag_packet(m, tag, rtableid, pd->pf_mtag)) {
3751#else
3752	if (pf_tag_packet(m, tag, rtableid)) {
3753#endif
3754		REASON_SET(&reason, PFRES_MEMORY);
3755		goto cleanup;
3756	}
3757
3758	if (!state_icmp && (r->keep_state || nr != NULL ||
3759	    (pd->flags & PFDESC_TCP_NORM))) {
3760		int action;
3761		action = pf_create_state(r, nr, a, pd, nsn, skw, sks, nk, sk, m,
3762		    off, sport, dport, &rewrite, kif, sm, tag, bproto_sum,
3763		    bip_sum, hdrlen);
3764		if (action != PF_PASS)
3765			return (action);
3766	} else {
3767#ifdef __FreeBSD__
3768		if (sk != NULL)
3769			pool_put(&V_pf_state_key_pl, sk);
3770		if (nk != NULL)
3771			pool_put(&V_pf_state_key_pl, nk);
3772#else
3773		if (sk != NULL)
3774			pool_put(&pf_state_key_pl, sk);
3775		if (nk != NULL)
3776			pool_put(&pf_state_key_pl, nk);
3777#endif
3778	}
3779
3780	/* copy back packet headers if we performed NAT operations */
3781	if (rewrite)
3782		m_copyback(m, off, hdrlen, pd->hdr.any);
3783
3784#if NPFSYNC > 0
3785	if (*sm != NULL && !ISSET((*sm)->state_flags, PFSTATE_NOSYNC) &&
3786#ifdef __FreeBSD__
3787	    direction == PF_OUT && pfsync_up_ptr != NULL && pfsync_up_ptr()) {
3788#else
3789	    direction == PF_OUT && pfsync_up()) {
3790#endif
3791		/*
3792		 * We want the state created, but we dont
3793		 * want to send this in case a partner
3794		 * firewall has to know about it to allow
3795		 * replies through it.
3796		 */
3797#ifdef __FreeBSD__
3798		if (pfsync_defer_ptr != NULL &&
3799			pfsync_defer_ptr(*sm, m))
3800#else
3801		if (pfsync_defer(*sm, m))
3802#endif
3803			return (PF_DEFER);
3804	}
3805#endif
3806
3807	return (PF_PASS);
3808
3809cleanup:
3810#ifdef __FreeBSD__
3811	if (sk != NULL)
3812		pool_put(&V_pf_state_key_pl, sk);
3813	if (nk != NULL)
3814		pool_put(&V_pf_state_key_pl, nk);
3815#else
3816	if (sk != NULL)
3817		pool_put(&pf_state_key_pl, sk);
3818	if (nk != NULL)
3819		pool_put(&pf_state_key_pl, nk);
3820#endif
3821	return (PF_DROP);
3822}
3823
3824static __inline int
3825pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a,
3826    struct pf_pdesc *pd, struct pf_src_node *nsn, struct pf_state_key *skw,
3827    struct pf_state_key *sks, struct pf_state_key *nk, struct pf_state_key *sk,
3828    struct mbuf *m, int off, u_int16_t sport, u_int16_t dport, int *rewrite,
3829    struct pfi_kif *kif, struct pf_state **sm, int tag, u_int16_t bproto_sum,
3830    u_int16_t bip_sum, int hdrlen)
3831{
3832	struct pf_state		*s = NULL;
3833	struct pf_src_node	*sn = NULL;
3834	struct tcphdr		*th = pd->hdr.tcp;
3835#ifdef __FreeBSD__
3836	u_int16_t		 mss = V_tcp_mssdflt;
3837#else
3838	u_int16_t		 mss = tcp_mssdflt;
3839#endif
3840	u_short			 reason;
3841
3842	/* check maximums */
3843	if (r->max_states && (r->states_cur >= r->max_states)) {
3844#ifdef __FreeBSD__
3845		V_pf_status.lcounters[LCNT_STATES]++;
3846#else
3847		pf_status.lcounters[LCNT_STATES]++;
3848#endif
3849		REASON_SET(&reason, PFRES_MAXSTATES);
3850		return (PF_DROP);
3851	}
3852	/* src node for filter rule */
3853	if ((r->rule_flag & PFRULE_SRCTRACK ||
3854	    r->rpool.opts & PF_POOL_STICKYADDR) &&
3855	    pf_insert_src_node(&sn, r, pd->src, pd->af) != 0) {
3856		REASON_SET(&reason, PFRES_SRCLIMIT);
3857		goto csfailed;
3858	}
3859	/* src node for translation rule */
3860	if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
3861	    pf_insert_src_node(&nsn, nr, &sk->addr[pd->sidx], pd->af)) {
3862		REASON_SET(&reason, PFRES_SRCLIMIT);
3863		goto csfailed;
3864	}
3865#ifdef __FreeBSD__
3866	s = pool_get(&V_pf_state_pl, PR_NOWAIT | PR_ZERO);
3867#else
3868	s = pool_get(&pf_state_pl, PR_NOWAIT | PR_ZERO);
3869#endif
3870	if (s == NULL) {
3871		REASON_SET(&reason, PFRES_MEMORY);
3872		goto csfailed;
3873	}
3874	s->rule.ptr = r;
3875	s->nat_rule.ptr = nr;
3876	s->anchor.ptr = a;
3877	STATE_INC_COUNTERS(s);
3878	if (r->allow_opts)
3879		s->state_flags |= PFSTATE_ALLOWOPTS;
3880	if (r->rule_flag & PFRULE_STATESLOPPY)
3881		s->state_flags |= PFSTATE_SLOPPY;
3882	if (r->rule_flag & PFRULE_PFLOW)
3883		s->state_flags |= PFSTATE_PFLOW;
3884	s->log = r->log & PF_LOG_ALL;
3885	s->sync_state = PFSYNC_S_NONE;
3886	if (nr != NULL)
3887		s->log |= nr->log & PF_LOG_ALL;
3888	switch (pd->proto) {
3889	case IPPROTO_TCP:
3890		s->src.seqlo = ntohl(th->th_seq);
3891		s->src.seqhi = s->src.seqlo + pd->p_len + 1;
3892		if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
3893		    r->keep_state == PF_STATE_MODULATE) {
3894			/* Generate sequence number modulator */
3895			if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) ==
3896			    0)
3897				s->src.seqdiff = 1;
3898			pf_change_proto_a(m, &th->th_seq, &th->th_sum,
3899			    htonl(s->src.seqlo + s->src.seqdiff), 0);
3900			*rewrite = 1;
3901		} else
3902			s->src.seqdiff = 0;
3903		if (th->th_flags & TH_SYN) {
3904			s->src.seqhi++;
3905			s->src.wscale = pf_get_wscale(m, off,
3906			    th->th_off, pd->af);
3907		}
3908		s->src.max_win = MAX(ntohs(th->th_win), 1);
3909		if (s->src.wscale & PF_WSCALE_MASK) {
3910			/* Remove scale factor from initial window */
3911			int win = s->src.max_win;
3912			win += 1 << (s->src.wscale & PF_WSCALE_MASK);
3913			s->src.max_win = (win - 1) >>
3914			    (s->src.wscale & PF_WSCALE_MASK);
3915		}
3916		if (th->th_flags & TH_FIN)
3917			s->src.seqhi++;
3918		s->dst.seqhi = 1;
3919		s->dst.max_win = 1;
3920		s->src.state = TCPS_SYN_SENT;
3921		s->dst.state = TCPS_CLOSED;
3922		s->timeout = PFTM_TCP_FIRST_PACKET;
3923		break;
3924	case IPPROTO_UDP:
3925		s->src.state = PFUDPS_SINGLE;
3926		s->dst.state = PFUDPS_NO_TRAFFIC;
3927		s->timeout = PFTM_UDP_FIRST_PACKET;
3928		break;
3929	case IPPROTO_ICMP:
3930#ifdef INET6
3931	case IPPROTO_ICMPV6:
3932#endif
3933		s->timeout = PFTM_ICMP_FIRST_PACKET;
3934		break;
3935	default:
3936		s->src.state = PFOTHERS_SINGLE;
3937		s->dst.state = PFOTHERS_NO_TRAFFIC;
3938		s->timeout = PFTM_OTHER_FIRST_PACKET;
3939	}
3940
3941	s->creation = time_second;
3942	s->expire = time_second;
3943
3944	if (sn != NULL) {
3945		s->src_node = sn;
3946		s->src_node->states++;
3947	}
3948	if (nsn != NULL) {
3949		/* XXX We only modify one side for now. */
3950		PF_ACPY(&nsn->raddr, &nk->addr[1], pd->af);
3951		s->nat_src_node = nsn;
3952		s->nat_src_node->states++;
3953	}
3954	if (pd->proto == IPPROTO_TCP) {
3955		if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m,
3956		    off, pd, th, &s->src, &s->dst)) {
3957			REASON_SET(&reason, PFRES_MEMORY);
3958			pf_src_tree_remove_state(s);
3959			STATE_DEC_COUNTERS(s);
3960#ifdef __FreeBSD__
3961			pool_put(&V_pf_state_pl, s);
3962#else
3963			pool_put(&pf_state_pl, s);
3964#endif
3965			return (PF_DROP);
3966		}
3967		if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub &&
3968		    pf_normalize_tcp_stateful(m, off, pd, &reason, th, s,
3969		    &s->src, &s->dst, rewrite)) {
3970			/* This really shouldn't happen!!! */
3971			DPFPRINTF(PF_DEBUG_URGENT,
3972			    ("pf_normalize_tcp_stateful failed on first pkt"));
3973			pf_normalize_tcp_cleanup(s);
3974			pf_src_tree_remove_state(s);
3975			STATE_DEC_COUNTERS(s);
3976#ifdef __FreeBSD__
3977			pool_put(&V_pf_state_pl, s);
3978#else
3979			pool_put(&pf_state_pl, s);
3980#endif
3981			return (PF_DROP);
3982		}
3983	}
3984	s->direction = pd->dir;
3985
3986	if (sk == NULL && pf_state_key_setup(pd, nr, &skw, &sks, &sk, &nk,
3987	    pd->src, pd->dst, sport, dport))
3988		goto csfailed;
3989
3990	if (pf_state_insert(BOUND_IFACE(r, kif), skw, sks, s)) {
3991		if (pd->proto == IPPROTO_TCP)
3992			pf_normalize_tcp_cleanup(s);
3993		REASON_SET(&reason, PFRES_STATEINS);
3994		pf_src_tree_remove_state(s);
3995		STATE_DEC_COUNTERS(s);
3996#ifdef __FreeBSD__
3997		pool_put(&V_pf_state_pl, s);
3998#else
3999		pool_put(&pf_state_pl, s);
4000#endif
4001		return (PF_DROP);
4002	} else
4003		*sm = s;
4004
4005	pf_set_rt_ifp(s, pd->src);	/* needs s->state_key set */
4006	if (tag > 0) {
4007		pf_tag_ref(tag);
4008		s->tag = tag;
4009	}
4010	if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) ==
4011	    TH_SYN && r->keep_state == PF_STATE_SYNPROXY) {
4012		s->src.state = PF_TCPS_PROXY_SRC;
4013		/* undo NAT changes, if they have taken place */
4014		if (nr != NULL) {
4015			struct pf_state_key *skt = s->key[PF_SK_WIRE];
4016			if (pd->dir == PF_OUT)
4017				skt = s->key[PF_SK_STACK];
4018			PF_ACPY(pd->src, &skt->addr[pd->sidx], pd->af);
4019			PF_ACPY(pd->dst, &skt->addr[pd->didx], pd->af);
4020			if (pd->sport)
4021				*pd->sport = skt->port[pd->sidx];
4022			if (pd->dport)
4023				*pd->dport = skt->port[pd->didx];
4024			if (pd->proto_sum)
4025				*pd->proto_sum = bproto_sum;
4026			if (pd->ip_sum)
4027				*pd->ip_sum = bip_sum;
4028			m_copyback(m, off, hdrlen, pd->hdr.any);
4029		}
4030		s->src.seqhi = htonl(arc4random());
4031		/* Find mss option */
4032		int rtid = M_GETFIB(m);
4033		mss = pf_get_mss(m, off, th->th_off, pd->af);
4034		mss = pf_calc_mss(pd->src, pd->af, rtid, mss);
4035		mss = pf_calc_mss(pd->dst, pd->af, rtid, mss);
4036		s->src.mss = mss;
4037#ifdef __FreeBSD__
4038		pf_send_tcp(NULL, r, pd->af, pd->dst, pd->src, th->th_dport,
4039#else
4040		pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport,
4041#endif
4042		    th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
4043		    TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL);
4044		REASON_SET(&reason, PFRES_SYNPROXY);
4045		return (PF_SYNPROXY_DROP);
4046	}
4047
4048	return (PF_PASS);
4049
4050csfailed:
4051#ifdef __FreeBSD__
4052	if (sk != NULL)
4053		pool_put(&V_pf_state_key_pl, sk);
4054	if (nk != NULL)
4055		pool_put(&V_pf_state_key_pl, nk);
4056#else
4057	if (sk != NULL)
4058		pool_put(&pf_state_key_pl, sk);
4059	if (nk != NULL)
4060		pool_put(&pf_state_key_pl, nk);
4061#endif
4062
4063	if (sn != NULL && sn->states == 0 && sn->expire == 0) {
4064#ifdef __FreeBSD__
4065		RB_REMOVE(pf_src_tree, &V_tree_src_tracking, sn);
4066		V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
4067		V_pf_status.src_nodes--;
4068		pool_put(&V_pf_src_tree_pl, sn);
4069#else
4070		RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
4071		pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
4072		pf_status.src_nodes--;
4073		pool_put(&pf_src_tree_pl, sn);
4074#endif
4075	}
4076	if (nsn != sn && nsn != NULL && nsn->states == 0 && nsn->expire == 0) {
4077#ifdef __FreeBSD__
4078		RB_REMOVE(pf_src_tree, &V_tree_src_tracking, nsn);
4079		V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
4080		V_pf_status.src_nodes--;
4081		pool_put(&V_pf_src_tree_pl, nsn);
4082#else
4083		RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
4084		pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
4085		pf_status.src_nodes--;
4086		pool_put(&pf_src_tree_pl, nsn);
4087#endif
4088	}
4089	return (PF_DROP);
4090}
4091
4092int
4093pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
4094    struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am,
4095    struct pf_ruleset **rsm)
4096{
4097	struct pf_rule		*r, *a = NULL;
4098	struct pf_ruleset	*ruleset = NULL;
4099	sa_family_t		 af = pd->af;
4100	u_short			 reason;
4101	int			 tag = -1;
4102	int			 asd = 0;
4103	int			 match = 0;
4104
4105	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
4106	while (r != NULL) {
4107		r->evaluations++;
4108		if (pfi_kif_match(r->kif, kif) == r->ifnot)
4109			r = r->skip[PF_SKIP_IFP].ptr;
4110		else if (r->direction && r->direction != direction)
4111			r = r->skip[PF_SKIP_DIR].ptr;
4112		else if (r->af && r->af != af)
4113			r = r->skip[PF_SKIP_AF].ptr;
4114		else if (r->proto && r->proto != pd->proto)
4115			r = r->skip[PF_SKIP_PROTO].ptr;
4116		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
4117		    r->src.neg, kif, M_GETFIB(m)))
4118			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
4119		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
4120		    r->dst.neg, NULL, M_GETFIB(m)))
4121			r = r->skip[PF_SKIP_DST_ADDR].ptr;
4122		else if (r->tos && !(r->tos == pd->tos))
4123			r = TAILQ_NEXT(r, entries);
4124		else if (r->os_fingerprint != PF_OSFP_ANY)
4125			r = TAILQ_NEXT(r, entries);
4126		else if (pd->proto == IPPROTO_UDP &&
4127		    (r->src.port_op || r->dst.port_op))
4128			r = TAILQ_NEXT(r, entries);
4129		else if (pd->proto == IPPROTO_TCP &&
4130		    (r->src.port_op || r->dst.port_op || r->flagset))
4131			r = TAILQ_NEXT(r, entries);
4132		else if ((pd->proto == IPPROTO_ICMP ||
4133		    pd->proto == IPPROTO_ICMPV6) &&
4134		    (r->type || r->code))
4135			r = TAILQ_NEXT(r, entries);
4136		else if (r->prob && r->prob <=
4137		    (arc4random() % (UINT_MAX - 1) + 1))
4138			r = TAILQ_NEXT(r, entries);
4139#ifdef __FreeBSD__
4140		else if (r->match_tag && !pf_match_tag(m, r, &tag, pd->pf_mtag))
4141#else
4142		else if (r->match_tag && !pf_match_tag(m, r, &tag))
4143#endif
4144			r = TAILQ_NEXT(r, entries);
4145		else {
4146			if (r->anchor == NULL) {
4147				match = 1;
4148				*rm = r;
4149				*am = a;
4150				*rsm = ruleset;
4151				if ((*rm)->quick)
4152					break;
4153				r = TAILQ_NEXT(r, entries);
4154			} else
4155				pf_step_into_anchor(&asd, &ruleset,
4156				    PF_RULESET_FILTER, &r, &a, &match);
4157		}
4158		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
4159		    PF_RULESET_FILTER, &r, &a, &match))
4160			break;
4161	}
4162	r = *rm;
4163	a = *am;
4164	ruleset = *rsm;
4165
4166	REASON_SET(&reason, PFRES_MATCH);
4167
4168	if (r->log)
4169		PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset,
4170		    pd);
4171
4172	if (r->action != PF_PASS)
4173		return (PF_DROP);
4174
4175#ifdef __FreeBSD__
4176	if (pf_tag_packet(m, tag, -1, pd->pf_mtag)) {
4177#else
4178	if (pf_tag_packet(m, tag, -1)) {
4179#endif
4180		REASON_SET(&reason, PFRES_MEMORY);
4181		return (PF_DROP);
4182	}
4183
4184	return (PF_PASS);
4185}
4186
4187int
4188pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst,
4189	struct pf_state **state, struct pfi_kif *kif, struct mbuf *m, int off,
4190	struct pf_pdesc *pd, u_short *reason, int *copyback)
4191{
4192	struct tcphdr		*th = pd->hdr.tcp;
4193	u_int16_t		 win = ntohs(th->th_win);
4194	u_int32_t		 ack, end, seq, orig_seq;
4195	u_int8_t		 sws, dws;
4196	int			 ackskew;
4197
4198	if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) {
4199		sws = src->wscale & PF_WSCALE_MASK;
4200		dws = dst->wscale & PF_WSCALE_MASK;
4201	} else
4202		sws = dws = 0;
4203
4204	/*
4205	 * Sequence tracking algorithm from Guido van Rooij's paper:
4206	 *   http://www.madison-gurkha.com/publications/tcp_filtering/
4207	 *	tcp_filtering.ps
4208	 */
4209
4210	orig_seq = seq = ntohl(th->th_seq);
4211	if (src->seqlo == 0) {
4212		/* First packet from this end. Set its state */
4213
4214		if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) &&
4215		    src->scrub == NULL) {
4216			if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) {
4217				REASON_SET(reason, PFRES_MEMORY);
4218				return (PF_DROP);
4219			}
4220		}
4221
4222		/* Deferred generation of sequence number modulator */
4223		if (dst->seqdiff && !src->seqdiff) {
4224			/* use random iss for the TCP server */
4225			while ((src->seqdiff = arc4random() - seq) == 0)
4226				;
4227			ack = ntohl(th->th_ack) - dst->seqdiff;
4228			pf_change_proto_a(m, &th->th_seq, &th->th_sum, htonl(seq +
4229			    src->seqdiff), 0);
4230			pf_change_proto_a(m, &th->th_ack, &th->th_sum, htonl(ack), 0);
4231			*copyback = 1;
4232		} else {
4233			ack = ntohl(th->th_ack);
4234		}
4235
4236		end = seq + pd->p_len;
4237		if (th->th_flags & TH_SYN) {
4238			end++;
4239			if (dst->wscale & PF_WSCALE_FLAG) {
4240				src->wscale = pf_get_wscale(m, off, th->th_off,
4241				    pd->af);
4242				if (src->wscale & PF_WSCALE_FLAG) {
4243					/* Remove scale factor from initial
4244					 * window */
4245					sws = src->wscale & PF_WSCALE_MASK;
4246					win = ((u_int32_t)win + (1 << sws) - 1)
4247					    >> sws;
4248					dws = dst->wscale & PF_WSCALE_MASK;
4249				} else {
4250					/* fixup other window */
4251					dst->max_win <<= dst->wscale &
4252					    PF_WSCALE_MASK;
4253					/* in case of a retrans SYN|ACK */
4254					dst->wscale = 0;
4255				}
4256			}
4257		}
4258		if (th->th_flags & TH_FIN)
4259			end++;
4260
4261		src->seqlo = seq;
4262		if (src->state < TCPS_SYN_SENT)
4263			src->state = TCPS_SYN_SENT;
4264
4265		/*
4266		 * May need to slide the window (seqhi may have been set by
4267		 * the crappy stack check or if we picked up the connection
4268		 * after establishment)
4269		 */
4270		if (src->seqhi == 1 ||
4271		    SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi))
4272			src->seqhi = end + MAX(1, dst->max_win << dws);
4273		if (win > src->max_win)
4274			src->max_win = win;
4275
4276	} else {
4277		ack = ntohl(th->th_ack) - dst->seqdiff;
4278		if (src->seqdiff) {
4279			/* Modulate sequence numbers */
4280			pf_change_proto_a(m, &th->th_seq, &th->th_sum, htonl(seq +
4281			    src->seqdiff), 0);
4282			pf_change_proto_a(m, &th->th_ack, &th->th_sum, htonl(ack), 0);
4283			*copyback = 1;
4284		}
4285		end = seq + pd->p_len;
4286		if (th->th_flags & TH_SYN)
4287			end++;
4288		if (th->th_flags & TH_FIN)
4289			end++;
4290	}
4291
4292	if ((th->th_flags & TH_ACK) == 0) {
4293		/* Let it pass through the ack skew check */
4294		ack = dst->seqlo;
4295	} else if ((ack == 0 &&
4296	    (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) ||
4297	    /* broken tcp stacks do not set ack */
4298	    (dst->state < TCPS_SYN_SENT)) {
4299		/*
4300		 * Many stacks (ours included) will set the ACK number in an
4301		 * FIN|ACK if the SYN times out -- no sequence to ACK.
4302		 */
4303		ack = dst->seqlo;
4304	}
4305
4306	if (seq == end) {
4307		/* Ease sequencing restrictions on no data packets */
4308		seq = src->seqlo;
4309		end = seq;
4310	}
4311
4312	ackskew = dst->seqlo - ack;
4313
4314
4315	/*
4316	 * Need to demodulate the sequence numbers in any TCP SACK options
4317	 * (Selective ACK). We could optionally validate the SACK values
4318	 * against the current ACK window, either forwards or backwards, but
4319	 * I'm not confident that SACK has been implemented properly
4320	 * everywhere. It wouldn't surprise me if several stacks accidently
4321	 * SACK too far backwards of previously ACKed data. There really aren't
4322	 * any security implications of bad SACKing unless the target stack
4323	 * doesn't validate the option length correctly. Someone trying to
4324	 * spoof into a TCP connection won't bother blindly sending SACK
4325	 * options anyway.
4326	 */
4327	if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) {
4328		if (pf_modulate_sack(m, off, pd, th, dst))
4329			*copyback = 1;
4330	}
4331
4332
4333#define	MAXACKWINDOW (0xffff + 1500)	/* 1500 is an arbitrary fudge factor */
4334	if (SEQ_GEQ(src->seqhi, end) &&
4335	    /* Last octet inside other's window space */
4336	    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) &&
4337	    /* Retrans: not more than one window back */
4338	    (ackskew >= -MAXACKWINDOW) &&
4339	    /* Acking not more than one reassembled fragment backwards */
4340	    (ackskew <= (MAXACKWINDOW << sws)) &&
4341	    /* Acking not more than one window forward */
4342	    ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo ||
4343	    (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo) ||
4344	    (pd->flags & PFDESC_IP_REAS) == 0)) {
4345	    /* Require an exact/+1 sequence match on resets when possible */
4346
4347		if (dst->scrub || src->scrub) {
4348			if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
4349			    *state, src, dst, copyback))
4350				return (PF_DROP);
4351		}
4352
4353		/* update max window */
4354		if (src->max_win < win)
4355			src->max_win = win;
4356		/* synchronize sequencing */
4357		if (SEQ_GT(end, src->seqlo))
4358			src->seqlo = end;
4359		/* slide the window of what the other end can send */
4360		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
4361			dst->seqhi = ack + MAX((win << sws), 1);
4362
4363
4364		/* update states */
4365		if (th->th_flags & TH_SYN)
4366			if (src->state < TCPS_SYN_SENT)
4367				src->state = TCPS_SYN_SENT;
4368		if (th->th_flags & TH_FIN)
4369			if (src->state < TCPS_CLOSING)
4370				src->state = TCPS_CLOSING;
4371		if (th->th_flags & TH_ACK) {
4372			if (dst->state == TCPS_SYN_SENT) {
4373				dst->state = TCPS_ESTABLISHED;
4374				if (src->state == TCPS_ESTABLISHED &&
4375				    (*state)->src_node != NULL &&
4376				    pf_src_connlimit(state)) {
4377					REASON_SET(reason, PFRES_SRCLIMIT);
4378					return (PF_DROP);
4379				}
4380			} else if (dst->state == TCPS_CLOSING)
4381				dst->state = TCPS_FIN_WAIT_2;
4382		}
4383		if (th->th_flags & TH_RST)
4384			src->state = dst->state = TCPS_TIME_WAIT;
4385
4386		/* update expire time */
4387		(*state)->expire = time_second;
4388		if (src->state >= TCPS_FIN_WAIT_2 &&
4389		    dst->state >= TCPS_FIN_WAIT_2)
4390			(*state)->timeout = PFTM_TCP_CLOSED;
4391		else if (src->state >= TCPS_CLOSING &&
4392		    dst->state >= TCPS_CLOSING)
4393			(*state)->timeout = PFTM_TCP_FIN_WAIT;
4394		else if (src->state < TCPS_ESTABLISHED ||
4395		    dst->state < TCPS_ESTABLISHED)
4396			(*state)->timeout = PFTM_TCP_OPENING;
4397		else if (src->state >= TCPS_CLOSING ||
4398		    dst->state >= TCPS_CLOSING)
4399			(*state)->timeout = PFTM_TCP_CLOSING;
4400		else
4401			(*state)->timeout = PFTM_TCP_ESTABLISHED;
4402
4403		/* Fall through to PASS packet */
4404
4405	} else if ((dst->state < TCPS_SYN_SENT ||
4406		dst->state >= TCPS_FIN_WAIT_2 ||
4407		src->state >= TCPS_FIN_WAIT_2) &&
4408	    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) &&
4409	    /* Within a window forward of the originating packet */
4410	    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
4411	    /* Within a window backward of the originating packet */
4412
4413		/*
4414		 * This currently handles three situations:
4415		 *  1) Stupid stacks will shotgun SYNs before their peer
4416		 *     replies.
4417		 *  2) When PF catches an already established stream (the
4418		 *     firewall rebooted, the state table was flushed, routes
4419		 *     changed...)
4420		 *  3) Packets get funky immediately after the connection
4421		 *     closes (this should catch Solaris spurious ACK|FINs
4422		 *     that web servers like to spew after a close)
4423		 *
4424		 * This must be a little more careful than the above code
4425		 * since packet floods will also be caught here. We don't
4426		 * update the TTL here to mitigate the damage of a packet
4427		 * flood and so the same code can handle awkward establishment
4428		 * and a loosened connection close.
4429		 * In the establishment case, a correct peer response will
4430		 * validate the connection, go through the normal state code
4431		 * and keep updating the state TTL.
4432		 */
4433
4434#ifdef __FreeBSD__
4435		if (V_pf_status.debug >= PF_DEBUG_MISC) {
4436#else
4437		if (pf_status.debug >= PF_DEBUG_MISC) {
4438#endif
4439			printf("pf: loose state match: ");
4440			pf_print_state(*state);
4441			pf_print_flags(th->th_flags);
4442			printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
4443			    "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack,
4444#ifdef __FreeBSD__
4445			    pd->p_len, ackskew, (unsigned long long)(*state)->packets[0],
4446			    (unsigned long long)(*state)->packets[1],
4447#else
4448			    pd->p_len, ackskew, (*state)->packets[0],
4449			    (*state)->packets[1],
4450#endif
4451			    pd->dir == PF_IN ? "in" : "out",
4452			    pd->dir == (*state)->direction ? "fwd" : "rev");
4453		}
4454
4455		if (dst->scrub || src->scrub) {
4456			if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
4457			    *state, src, dst, copyback))
4458				return (PF_DROP);
4459		}
4460
4461		/* update max window */
4462		if (src->max_win < win)
4463			src->max_win = win;
4464		/* synchronize sequencing */
4465		if (SEQ_GT(end, src->seqlo))
4466			src->seqlo = end;
4467		/* slide the window of what the other end can send */
4468		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
4469			dst->seqhi = ack + MAX((win << sws), 1);
4470
4471		/*
4472		 * Cannot set dst->seqhi here since this could be a shotgunned
4473		 * SYN and not an already established connection.
4474		 */
4475
4476		if (th->th_flags & TH_FIN)
4477			if (src->state < TCPS_CLOSING)
4478				src->state = TCPS_CLOSING;
4479		if (th->th_flags & TH_RST)
4480			src->state = dst->state = TCPS_TIME_WAIT;
4481
4482		/* Fall through to PASS packet */
4483
4484	} else {
4485		if ((*state)->dst.state == TCPS_SYN_SENT &&
4486		    (*state)->src.state == TCPS_SYN_SENT) {
4487			/* Send RST for state mismatches during handshake */
4488			if (!(th->th_flags & TH_RST))
4489#ifdef __FreeBSD__
4490				pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
4491#else
4492				pf_send_tcp((*state)->rule.ptr, pd->af,
4493#endif
4494				    pd->dst, pd->src, th->th_dport,
4495				    th->th_sport, ntohl(th->th_ack), 0,
4496				    TH_RST, 0, 0,
4497				    (*state)->rule.ptr->return_ttl, 1, 0,
4498				    pd->eh, kif->pfik_ifp);
4499			src->seqlo = 0;
4500			src->seqhi = 1;
4501			src->max_win = 1;
4502#ifdef __FreeBSD__
4503		} else if (V_pf_status.debug >= PF_DEBUG_MISC) {
4504#else
4505		} else if (pf_status.debug >= PF_DEBUG_MISC) {
4506#endif
4507			printf("pf: BAD state: ");
4508			pf_print_state(*state);
4509			pf_print_flags(th->th_flags);
4510			printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
4511			    "pkts=%llu:%llu dir=%s,%s\n",
4512			    seq, orig_seq, ack, pd->p_len, ackskew,
4513#ifdef __FreeBSD__
4514			    (unsigned long long)(*state)->packets[0],
4515			    (unsigned long long)(*state)->packets[1],
4516#else
4517			    (*state)->packets[0], (*state)->packets[1],
4518#endif
4519			    pd->dir == PF_IN ? "in" : "out",
4520			    pd->dir == (*state)->direction ? "fwd" : "rev");
4521			printf("pf: State failure on: %c %c %c %c | %c %c\n",
4522			    SEQ_GEQ(src->seqhi, end) ? ' ' : '1',
4523			    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ?
4524			    ' ': '2',
4525			    (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
4526			    (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
4527			    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5',
4528			    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6');
4529		}
4530		REASON_SET(reason, PFRES_BADSTATE);
4531		return (PF_DROP);
4532	}
4533
4534	return (PF_PASS);
4535}
4536
4537int
4538pf_tcp_track_sloppy(struct pf_state_peer *src, struct pf_state_peer *dst,
4539	struct pf_state **state, struct pf_pdesc *pd, u_short *reason)
4540{
4541	struct tcphdr		*th = pd->hdr.tcp;
4542
4543	if (th->th_flags & TH_SYN)
4544		if (src->state < TCPS_SYN_SENT)
4545			src->state = TCPS_SYN_SENT;
4546	if (th->th_flags & TH_FIN)
4547		if (src->state < TCPS_CLOSING)
4548			src->state = TCPS_CLOSING;
4549	if (th->th_flags & TH_ACK) {
4550		if (dst->state == TCPS_SYN_SENT) {
4551			dst->state = TCPS_ESTABLISHED;
4552			if (src->state == TCPS_ESTABLISHED &&
4553			    (*state)->src_node != NULL &&
4554			    pf_src_connlimit(state)) {
4555				REASON_SET(reason, PFRES_SRCLIMIT);
4556				return (PF_DROP);
4557			}
4558		} else if (dst->state == TCPS_CLOSING) {
4559			dst->state = TCPS_FIN_WAIT_2;
4560		} else if (src->state == TCPS_SYN_SENT &&
4561		    dst->state < TCPS_SYN_SENT) {
4562			/*
4563			 * Handle a special sloppy case where we only see one
4564			 * half of the connection. If there is a ACK after
4565			 * the initial SYN without ever seeing a packet from
4566			 * the destination, set the connection to established.
4567			 */
4568			dst->state = src->state = TCPS_ESTABLISHED;
4569			if ((*state)->src_node != NULL &&
4570			    pf_src_connlimit(state)) {
4571				REASON_SET(reason, PFRES_SRCLIMIT);
4572				return (PF_DROP);
4573			}
4574		} else if (src->state == TCPS_CLOSING &&
4575		    dst->state == TCPS_ESTABLISHED &&
4576		    dst->seqlo == 0) {
4577			/*
4578			 * Handle the closing of half connections where we
4579			 * don't see the full bidirectional FIN/ACK+ACK
4580			 * handshake.
4581			 */
4582			dst->state = TCPS_CLOSING;
4583		}
4584	}
4585	if (th->th_flags & TH_RST)
4586		src->state = dst->state = TCPS_TIME_WAIT;
4587
4588	/* update expire time */
4589	(*state)->expire = time_second;
4590	if (src->state >= TCPS_FIN_WAIT_2 &&
4591	    dst->state >= TCPS_FIN_WAIT_2)
4592		(*state)->timeout = PFTM_TCP_CLOSED;
4593	else if (src->state >= TCPS_CLOSING &&
4594	    dst->state >= TCPS_CLOSING)
4595		(*state)->timeout = PFTM_TCP_FIN_WAIT;
4596	else if (src->state < TCPS_ESTABLISHED ||
4597	    dst->state < TCPS_ESTABLISHED)
4598		(*state)->timeout = PFTM_TCP_OPENING;
4599	else if (src->state >= TCPS_CLOSING ||
4600	    dst->state >= TCPS_CLOSING)
4601		(*state)->timeout = PFTM_TCP_CLOSING;
4602	else
4603		(*state)->timeout = PFTM_TCP_ESTABLISHED;
4604
4605	return (PF_PASS);
4606}
4607
4608int
4609pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
4610    struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
4611    u_short *reason)
4612{
4613	struct pf_state_key_cmp	 key;
4614	struct tcphdr		*th = pd->hdr.tcp;
4615	int			 copyback = 0;
4616	struct pf_state_peer	*src, *dst;
4617	struct pf_state_key	*sk;
4618
4619	key.af = pd->af;
4620	key.proto = IPPROTO_TCP;
4621	if (direction == PF_IN)	{	/* wire side, straight */
4622		PF_ACPY(&key.addr[0], pd->src, key.af);
4623		PF_ACPY(&key.addr[1], pd->dst, key.af);
4624		key.port[0] = th->th_sport;
4625		key.port[1] = th->th_dport;
4626	} else {			/* stack side, reverse */
4627		PF_ACPY(&key.addr[1], pd->src, key.af);
4628		PF_ACPY(&key.addr[0], pd->dst, key.af);
4629		key.port[1] = th->th_sport;
4630		key.port[0] = th->th_dport;
4631	}
4632
4633#ifdef __FreeBSD__
4634	STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag);
4635#else
4636	STATE_LOOKUP(kif, &key, direction, *state, m);
4637#endif
4638
4639	if (direction == (*state)->direction) {
4640		src = &(*state)->src;
4641		dst = &(*state)->dst;
4642	} else {
4643		src = &(*state)->dst;
4644		dst = &(*state)->src;
4645	}
4646
4647	sk = (*state)->key[pd->didx];
4648
4649	if ((*state)->src.state == PF_TCPS_PROXY_SRC) {
4650		if (direction != (*state)->direction) {
4651			REASON_SET(reason, PFRES_SYNPROXY);
4652			return (PF_SYNPROXY_DROP);
4653		}
4654		if (th->th_flags & TH_SYN) {
4655			if (ntohl(th->th_seq) != (*state)->src.seqlo) {
4656				REASON_SET(reason, PFRES_SYNPROXY);
4657				return (PF_DROP);
4658			}
4659#ifdef __FreeBSD__
4660			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, pd->dst,
4661#else
4662			pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
4663#endif
4664			    pd->src, th->th_dport, th->th_sport,
4665			    (*state)->src.seqhi, ntohl(th->th_seq) + 1,
4666			    TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1,
4667			    0, NULL, NULL);
4668			REASON_SET(reason, PFRES_SYNPROXY);
4669			return (PF_SYNPROXY_DROP);
4670		} else if (!(th->th_flags & TH_ACK) ||
4671		    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
4672		    (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
4673			REASON_SET(reason, PFRES_SYNPROXY);
4674			return (PF_DROP);
4675		} else if ((*state)->src_node != NULL &&
4676		    pf_src_connlimit(state)) {
4677			REASON_SET(reason, PFRES_SRCLIMIT);
4678			return (PF_DROP);
4679		} else
4680			(*state)->src.state = PF_TCPS_PROXY_DST;
4681	}
4682	if ((*state)->src.state == PF_TCPS_PROXY_DST) {
4683		if (direction == (*state)->direction) {
4684			if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) ||
4685			    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
4686			    (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
4687				REASON_SET(reason, PFRES_SYNPROXY);
4688				return (PF_DROP);
4689			}
4690			(*state)->src.max_win = MAX(ntohs(th->th_win), 1);
4691			if ((*state)->dst.seqhi == 1)
4692				(*state)->dst.seqhi = htonl(arc4random());
4693#ifdef __FreeBSD__
4694			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
4695#else
4696			pf_send_tcp((*state)->rule.ptr, pd->af,
4697#endif
4698			    &sk->addr[pd->sidx], &sk->addr[pd->didx],
4699			    sk->port[pd->sidx], sk->port[pd->didx],
4700			    (*state)->dst.seqhi, 0, TH_SYN, 0,
4701			    (*state)->src.mss, 0, 0, (*state)->tag, NULL, NULL);
4702			REASON_SET(reason, PFRES_SYNPROXY);
4703			return (PF_SYNPROXY_DROP);
4704		} else if (((th->th_flags & (TH_SYN|TH_ACK)) !=
4705		    (TH_SYN|TH_ACK)) ||
4706		    (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) {
4707			REASON_SET(reason, PFRES_SYNPROXY);
4708			return (PF_DROP);
4709		} else {
4710			(*state)->dst.max_win = MAX(ntohs(th->th_win), 1);
4711			(*state)->dst.seqlo = ntohl(th->th_seq);
4712#ifdef __FreeBSD__
4713			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, pd->dst,
4714#else
4715			pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
4716#endif
4717			    pd->src, th->th_dport, th->th_sport,
4718			    ntohl(th->th_ack), ntohl(th->th_seq) + 1,
4719			    TH_ACK, (*state)->src.max_win, 0, 0, 0,
4720			    (*state)->tag, NULL, NULL);
4721#ifdef __FreeBSD__
4722			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
4723#else
4724			pf_send_tcp((*state)->rule.ptr, pd->af,
4725#endif
4726			    &sk->addr[pd->sidx], &sk->addr[pd->didx],
4727			    sk->port[pd->sidx], sk->port[pd->didx],
4728			    (*state)->src.seqhi + 1, (*state)->src.seqlo + 1,
4729			    TH_ACK, (*state)->dst.max_win, 0, 0, 1,
4730			    0, NULL, NULL);
4731			(*state)->src.seqdiff = (*state)->dst.seqhi -
4732			    (*state)->src.seqlo;
4733			(*state)->dst.seqdiff = (*state)->src.seqhi -
4734			    (*state)->dst.seqlo;
4735			(*state)->src.seqhi = (*state)->src.seqlo +
4736			    (*state)->dst.max_win;
4737			(*state)->dst.seqhi = (*state)->dst.seqlo +
4738			    (*state)->src.max_win;
4739			(*state)->src.wscale = (*state)->dst.wscale = 0;
4740			(*state)->src.state = (*state)->dst.state =
4741			    TCPS_ESTABLISHED;
4742			REASON_SET(reason, PFRES_SYNPROXY);
4743			return (PF_SYNPROXY_DROP);
4744		}
4745	}
4746
4747	if (((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN) &&
4748	    dst->state >= TCPS_FIN_WAIT_2 &&
4749	    src->state >= TCPS_FIN_WAIT_2) {
4750#ifdef __FreeBSD__
4751		if (V_pf_status.debug >= PF_DEBUG_MISC) {
4752#else
4753		if (pf_status.debug >= PF_DEBUG_MISC) {
4754#endif
4755			printf("pf: state reuse ");
4756			pf_print_state(*state);
4757			pf_print_flags(th->th_flags);
4758			printf("\n");
4759		}
4760		/* XXX make sure it's the same direction ?? */
4761		(*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
4762		pf_unlink_state(*state);
4763		*state = NULL;
4764		return (PF_DROP);
4765	}
4766
4767	if ((*state)->state_flags & PFSTATE_SLOPPY) {
4768		if (pf_tcp_track_sloppy(src, dst, state, pd, reason) == PF_DROP)
4769			return (PF_DROP);
4770	} else {
4771		if (pf_tcp_track_full(src, dst, state, kif, m, off, pd, reason,
4772		    &copyback) == PF_DROP)
4773			return (PF_DROP);
4774	}
4775
4776	/* translate source/destination address, if necessary */
4777	if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
4778		struct pf_state_key *nk = (*state)->key[pd->didx];
4779
4780		if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) ||
4781		    nk->port[pd->sidx] != th->th_sport)
4782			pf_change_ap(m, pd->src, &th->th_sport,
4783			    pd->ip_sum, &th->th_sum, &nk->addr[pd->sidx],
4784			    nk->port[pd->sidx], 0, pd->af);
4785
4786		if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) ||
4787		    nk->port[pd->didx] != th->th_dport)
4788			pf_change_ap(m, pd->dst, &th->th_dport,
4789			    pd->ip_sum, &th->th_sum, &nk->addr[pd->didx],
4790			    nk->port[pd->didx], 0, pd->af);
4791		copyback = 1;
4792	}
4793
4794	/* Copyback sequence modulation or stateful scrub changes if needed */
4795	if (copyback)
4796#ifdef __FreeBSD__
4797		m_copyback(m, off, sizeof(*th), (caddr_t)th);
4798#else
4799		m_copyback(m, off, sizeof(*th), th);
4800#endif
4801
4802	return (PF_PASS);
4803}
4804
4805int
4806pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
4807    struct mbuf *m, int off, void *h, struct pf_pdesc *pd)
4808{
4809	struct pf_state_peer	*src, *dst;
4810	struct pf_state_key_cmp	 key;
4811	struct udphdr		*uh = pd->hdr.udp;
4812
4813	key.af = pd->af;
4814	key.proto = IPPROTO_UDP;
4815	if (direction == PF_IN)	{	/* wire side, straight */
4816		PF_ACPY(&key.addr[0], pd->src, key.af);
4817		PF_ACPY(&key.addr[1], pd->dst, key.af);
4818		key.port[0] = uh->uh_sport;
4819		key.port[1] = uh->uh_dport;
4820	} else {			/* stack side, reverse */
4821		PF_ACPY(&key.addr[1], pd->src, key.af);
4822		PF_ACPY(&key.addr[0], pd->dst, key.af);
4823		key.port[1] = uh->uh_sport;
4824		key.port[0] = uh->uh_dport;
4825	}
4826
4827#ifdef __FreeBSD__
4828	STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag);
4829#else
4830	STATE_LOOKUP(kif, &key, direction, *state, m);
4831#endif
4832
4833	if (direction == (*state)->direction) {
4834		src = &(*state)->src;
4835		dst = &(*state)->dst;
4836	} else {
4837		src = &(*state)->dst;
4838		dst = &(*state)->src;
4839	}
4840
4841	/* update states */
4842	if (src->state < PFUDPS_SINGLE)
4843		src->state = PFUDPS_SINGLE;
4844	if (dst->state == PFUDPS_SINGLE)
4845		dst->state = PFUDPS_MULTIPLE;
4846
4847	/* update expire time */
4848	(*state)->expire = time_second;
4849	if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE)
4850		(*state)->timeout = PFTM_UDP_MULTIPLE;
4851	else
4852		(*state)->timeout = PFTM_UDP_SINGLE;
4853
4854	/* translate source/destination address, if necessary */
4855	if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
4856		struct pf_state_key *nk = (*state)->key[pd->didx];
4857
4858		if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) ||
4859		    nk->port[pd->sidx] != uh->uh_sport)
4860			pf_change_ap(m, pd->src, &uh->uh_sport, pd->ip_sum,
4861			    &uh->uh_sum, &nk->addr[pd->sidx],
4862			    nk->port[pd->sidx], 1, pd->af);
4863
4864		if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) ||
4865		    nk->port[pd->didx] != uh->uh_dport)
4866			pf_change_ap(m, pd->dst, &uh->uh_dport, pd->ip_sum,
4867			    &uh->uh_sum, &nk->addr[pd->didx],
4868			    nk->port[pd->didx], 1, pd->af);
4869#ifdef __FreeBSD__
4870		m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
4871#else
4872		m_copyback(m, off, sizeof(*uh), uh);
4873#endif
4874	}
4875
4876	return (PF_PASS);
4877}
4878
4879int
4880pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
4881    struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason)
4882{
4883	struct pf_addr  *saddr = pd->src, *daddr = pd->dst;
4884#ifdef __FreeBSD__
4885	u_int16_t	 icmpid = 0, *icmpsum;
4886#else
4887	u_int16_t	 icmpid, *icmpsum;
4888#endif
4889	u_int8_t	 icmptype;
4890	int		 state_icmp = 0;
4891	struct pf_state_key_cmp key;
4892
4893	switch (pd->proto) {
4894#ifdef INET
4895	case IPPROTO_ICMP:
4896		icmptype = pd->hdr.icmp->icmp_type;
4897		icmpid = pd->hdr.icmp->icmp_id;
4898		icmpsum = &pd->hdr.icmp->icmp_cksum;
4899
4900		if (icmptype == ICMP_UNREACH ||
4901		    icmptype == ICMP_SOURCEQUENCH ||
4902		    icmptype == ICMP_REDIRECT ||
4903		    icmptype == ICMP_TIMXCEED ||
4904		    icmptype == ICMP_PARAMPROB)
4905			state_icmp++;
4906		break;
4907#endif /* INET */
4908#ifdef INET6
4909	case IPPROTO_ICMPV6:
4910		icmptype = pd->hdr.icmp6->icmp6_type;
4911		icmpid = pd->hdr.icmp6->icmp6_id;
4912		icmpsum = &pd->hdr.icmp6->icmp6_cksum;
4913
4914		if (icmptype == ICMP6_DST_UNREACH ||
4915		    icmptype == ICMP6_PACKET_TOO_BIG ||
4916		    icmptype == ICMP6_TIME_EXCEEDED ||
4917		    icmptype == ICMP6_PARAM_PROB)
4918			state_icmp++;
4919		break;
4920#endif /* INET6 */
4921	}
4922
4923	if (!state_icmp) {
4924
4925		/*
4926		 * ICMP query/reply message not related to a TCP/UDP packet.
4927		 * Search for an ICMP state.
4928		 */
4929		key.af = pd->af;
4930		key.proto = pd->proto;
4931		key.port[0] = key.port[1] = icmpid;
4932		if (direction == PF_IN)	{	/* wire side, straight */
4933			PF_ACPY(&key.addr[0], pd->src, key.af);
4934			PF_ACPY(&key.addr[1], pd->dst, key.af);
4935		} else {			/* stack side, reverse */
4936			PF_ACPY(&key.addr[1], pd->src, key.af);
4937			PF_ACPY(&key.addr[0], pd->dst, key.af);
4938		}
4939
4940#ifdef __FreeBSD__
4941		STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag);
4942#else
4943		STATE_LOOKUP(kif, &key, direction, *state, m);
4944#endif
4945
4946		(*state)->expire = time_second;
4947		(*state)->timeout = PFTM_ICMP_ERROR_REPLY;
4948
4949		/* translate source/destination address, if necessary */
4950		if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
4951			struct pf_state_key *nk = (*state)->key[pd->didx];
4952
4953			switch (pd->af) {
4954#ifdef INET
4955			case AF_INET:
4956				if (PF_ANEQ(pd->src,
4957				    &nk->addr[pd->sidx], AF_INET))
4958					pf_change_a(&saddr->v4.s_addr,
4959					    pd->ip_sum,
4960					    nk->addr[pd->sidx].v4.s_addr, 0);
4961
4962				if (PF_ANEQ(pd->dst, &nk->addr[pd->didx],
4963				    AF_INET))
4964					pf_change_a(&daddr->v4.s_addr,
4965					    pd->ip_sum,
4966					    nk->addr[pd->didx].v4.s_addr, 0);
4967
4968				if (nk->port[0] !=
4969				    pd->hdr.icmp->icmp_id) {
4970					pd->hdr.icmp->icmp_cksum =
4971					    pf_cksum_fixup(
4972					    pd->hdr.icmp->icmp_cksum, icmpid,
4973					    nk->port[pd->sidx], 0);
4974					pd->hdr.icmp->icmp_id =
4975					    nk->port[pd->sidx];
4976				}
4977
4978				m_copyback(m, off, ICMP_MINLEN,
4979#ifdef __FreeBSD__
4980				    (caddr_t)
4981#endif
4982				    pd->hdr.icmp);
4983				break;
4984#endif /* INET */
4985#ifdef INET6
4986			case AF_INET6:
4987				if (PF_ANEQ(pd->src,
4988				    &nk->addr[pd->sidx], AF_INET6))
4989					pf_change_a6(saddr,
4990					    &pd->hdr.icmp6->icmp6_cksum,
4991					    &nk->addr[pd->sidx], 0);
4992
4993				if (PF_ANEQ(pd->dst,
4994				    &nk->addr[pd->didx], AF_INET6))
4995					pf_change_a6(daddr,
4996					    &pd->hdr.icmp6->icmp6_cksum,
4997					    &nk->addr[pd->didx], 0);
4998
4999				m_copyback(m, off,
5000				    sizeof(struct icmp6_hdr),
5001#ifdef __FreeBSD__
5002				    (caddr_t)
5003#endif
5004				    pd->hdr.icmp6);
5005				break;
5006#endif /* INET6 */
5007			}
5008		}
5009		return (PF_PASS);
5010
5011	} else {
5012		/*
5013		 * ICMP error message in response to a TCP/UDP packet.
5014		 * Extract the inner TCP/UDP header and search for that state.
5015		 */
5016
5017		struct pf_pdesc	pd2;
5018#ifdef __FreeBSD__
5019		bzero(&pd2, sizeof pd2);
5020#endif
5021#ifdef INET
5022		struct ip	h2;
5023#endif /* INET */
5024#ifdef INET6
5025		struct ip6_hdr	h2_6;
5026		int		terminal = 0;
5027#endif /* INET6 */
5028#ifdef __FreeBSD__
5029		int		ipoff2 = 0;
5030		int		off2 = 0;
5031#else
5032		int		ipoff2;
5033		int		off2;
5034#endif
5035
5036		pd2.af = pd->af;
5037		/* Payload packet is from the opposite direction. */
5038		pd2.sidx = (direction == PF_IN) ? 1 : 0;
5039		pd2.didx = (direction == PF_IN) ? 0 : 1;
5040		switch (pd->af) {
5041#ifdef INET
5042		case AF_INET:
5043			/* offset of h2 in mbuf chain */
5044			ipoff2 = off + ICMP_MINLEN;
5045
5046			if (!pf_pull_hdr(m, ipoff2, &h2, sizeof(h2),
5047			    NULL, reason, pd2.af)) {
5048				DPFPRINTF(PF_DEBUG_MISC,
5049				    ("pf: ICMP error message too short "
5050				    "(ip)\n"));
5051				return (PF_DROP);
5052			}
5053			/*
5054			 * ICMP error messages don't refer to non-first
5055			 * fragments
5056			 */
5057			if (h2.ip_off & htons(IP_OFFMASK)) {
5058				REASON_SET(reason, PFRES_FRAG);
5059				return (PF_DROP);
5060			}
5061
5062			/* offset of protocol header that follows h2 */
5063			off2 = ipoff2 + (h2.ip_hl << 2);
5064
5065			pd2.proto = h2.ip_p;
5066			pd2.src = (struct pf_addr *)&h2.ip_src;
5067			pd2.dst = (struct pf_addr *)&h2.ip_dst;
5068			pd2.ip_sum = &h2.ip_sum;
5069			break;
5070#endif /* INET */
5071#ifdef INET6
5072		case AF_INET6:
5073			ipoff2 = off + sizeof(struct icmp6_hdr);
5074
5075			if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof(h2_6),
5076			    NULL, reason, pd2.af)) {
5077				DPFPRINTF(PF_DEBUG_MISC,
5078				    ("pf: ICMP error message too short "
5079				    "(ip6)\n"));
5080				return (PF_DROP);
5081			}
5082			pd2.proto = h2_6.ip6_nxt;
5083			pd2.src = (struct pf_addr *)&h2_6.ip6_src;
5084			pd2.dst = (struct pf_addr *)&h2_6.ip6_dst;
5085			pd2.ip_sum = NULL;
5086			off2 = ipoff2 + sizeof(h2_6);
5087			do {
5088				switch (pd2.proto) {
5089				case IPPROTO_FRAGMENT:
5090					/*
5091					 * ICMPv6 error messages for
5092					 * non-first fragments
5093					 */
5094					REASON_SET(reason, PFRES_FRAG);
5095					return (PF_DROP);
5096				case IPPROTO_AH:
5097				case IPPROTO_HOPOPTS:
5098				case IPPROTO_ROUTING:
5099				case IPPROTO_DSTOPTS: {
5100					/* get next header and header length */
5101					struct ip6_ext opt6;
5102
5103					if (!pf_pull_hdr(m, off2, &opt6,
5104					    sizeof(opt6), NULL, reason,
5105					    pd2.af)) {
5106						DPFPRINTF(PF_DEBUG_MISC,
5107						    ("pf: ICMPv6 short opt\n"));
5108						return (PF_DROP);
5109					}
5110					if (pd2.proto == IPPROTO_AH)
5111						off2 += (opt6.ip6e_len + 2) * 4;
5112					else
5113						off2 += (opt6.ip6e_len + 1) * 8;
5114					pd2.proto = opt6.ip6e_nxt;
5115					/* goto the next header */
5116					break;
5117				}
5118				default:
5119					terminal++;
5120					break;
5121				}
5122			} while (!terminal);
5123			break;
5124#endif /* INET6 */
5125		}
5126
5127		switch (pd2.proto) {
5128		case IPPROTO_TCP: {
5129			struct tcphdr		 th;
5130			u_int32_t		 seq;
5131			struct pf_state_peer	*src, *dst;
5132			u_int8_t		 dws;
5133			int			 copyback = 0;
5134
5135			/*
5136			 * Only the first 8 bytes of the TCP header can be
5137			 * expected. Don't access any TCP header fields after
5138			 * th_seq, an ackskew test is not possible.
5139			 */
5140			if (!pf_pull_hdr(m, off2, &th, 8, NULL, reason,
5141			    pd2.af)) {
5142				DPFPRINTF(PF_DEBUG_MISC,
5143				    ("pf: ICMP error message too short "
5144				    "(tcp)\n"));
5145				return (PF_DROP);
5146			}
5147
5148			key.af = pd2.af;
5149			key.proto = IPPROTO_TCP;
5150			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
5151			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
5152			key.port[pd2.sidx] = th.th_sport;
5153			key.port[pd2.didx] = th.th_dport;
5154
5155#ifdef __FreeBSD__
5156			STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag);
5157#else
5158			STATE_LOOKUP(kif, &key, direction, *state, m);
5159#endif
5160
5161			if (direction == (*state)->direction) {
5162				src = &(*state)->dst;
5163				dst = &(*state)->src;
5164			} else {
5165				src = &(*state)->src;
5166				dst = &(*state)->dst;
5167			}
5168
5169			if (src->wscale && dst->wscale)
5170				dws = dst->wscale & PF_WSCALE_MASK;
5171			else
5172				dws = 0;
5173
5174			/* Demodulate sequence number */
5175			seq = ntohl(th.th_seq) - src->seqdiff;
5176			if (src->seqdiff) {
5177				pf_change_a(&th.th_seq, icmpsum,
5178				    htonl(seq), 0);
5179				copyback = 1;
5180			}
5181
5182			if (!((*state)->state_flags & PFSTATE_SLOPPY) &&
5183			    (!SEQ_GEQ(src->seqhi, seq) ||
5184			    !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)))) {
5185#ifdef __FreeBSD__
5186				if (V_pf_status.debug >= PF_DEBUG_MISC) {
5187#else
5188				if (pf_status.debug >= PF_DEBUG_MISC) {
5189#endif
5190					printf("pf: BAD ICMP %d:%d ",
5191					    icmptype, pd->hdr.icmp->icmp_code);
5192					pf_print_host(pd->src, 0, pd->af);
5193					printf(" -> ");
5194					pf_print_host(pd->dst, 0, pd->af);
5195					printf(" state: ");
5196					pf_print_state(*state);
5197					printf(" seq=%u\n", seq);
5198				}
5199				REASON_SET(reason, PFRES_BADSTATE);
5200				return (PF_DROP);
5201			} else {
5202#ifdef __FreeBSD__
5203				if (V_pf_status.debug >= PF_DEBUG_MISC) {
5204#else
5205				if (pf_status.debug >= PF_DEBUG_MISC) {
5206#endif
5207					printf("pf: OK ICMP %d:%d ",
5208					    icmptype, pd->hdr.icmp->icmp_code);
5209					pf_print_host(pd->src, 0, pd->af);
5210					printf(" -> ");
5211					pf_print_host(pd->dst, 0, pd->af);
5212					printf(" state: ");
5213					pf_print_state(*state);
5214					printf(" seq=%u\n", seq);
5215				}
5216			}
5217
5218			/* translate source/destination address, if necessary */
5219			if ((*state)->key[PF_SK_WIRE] !=
5220			    (*state)->key[PF_SK_STACK]) {
5221				struct pf_state_key *nk =
5222				    (*state)->key[pd->didx];
5223
5224				if (PF_ANEQ(pd2.src,
5225				    &nk->addr[pd2.sidx], pd2.af) ||
5226				    nk->port[pd2.sidx] != th.th_sport)
5227					pf_change_icmp(pd2.src, &th.th_sport,
5228					    daddr, &nk->addr[pd2.sidx],
5229					    nk->port[pd2.sidx], NULL,
5230					    pd2.ip_sum, icmpsum,
5231					    pd->ip_sum, 0, pd2.af);
5232
5233				if (PF_ANEQ(pd2.dst,
5234				    &nk->addr[pd2.didx], pd2.af) ||
5235				    nk->port[pd2.didx] != th.th_dport)
5236					pf_change_icmp(pd2.dst, &th.th_dport,
5237					    NULL, /* XXX Inbound NAT? */
5238					    &nk->addr[pd2.didx],
5239					    nk->port[pd2.didx], NULL,
5240					    pd2.ip_sum, icmpsum,
5241					    pd->ip_sum, 0, pd2.af);
5242				copyback = 1;
5243			}
5244
5245			if (copyback) {
5246				switch (pd2.af) {
5247#ifdef INET
5248				case AF_INET:
5249					m_copyback(m, off, ICMP_MINLEN,
5250#ifdef __FreeBSD__
5251					    (caddr_t)
5252#endif
5253					    pd->hdr.icmp);
5254					m_copyback(m, ipoff2, sizeof(h2),
5255#ifdef __FreeBSD__
5256					    (caddr_t)
5257#endif
5258					    &h2);
5259					break;
5260#endif /* INET */
5261#ifdef INET6
5262				case AF_INET6:
5263					m_copyback(m, off,
5264					    sizeof(struct icmp6_hdr),
5265#ifdef __FreeBSD__
5266					    (caddr_t)
5267#endif
5268					    pd->hdr.icmp6);
5269					m_copyback(m, ipoff2, sizeof(h2_6),
5270#ifdef __FreeBSD__
5271					    (caddr_t)
5272#endif
5273					    &h2_6);
5274					break;
5275#endif /* INET6 */
5276				}
5277#ifdef __FreeBSD__
5278				m_copyback(m, off2, 8, (caddr_t)&th);
5279#else
5280				m_copyback(m, off2, 8, &th);
5281#endif
5282			}
5283
5284			return (PF_PASS);
5285			break;
5286		}
5287		case IPPROTO_UDP: {
5288			struct udphdr		uh;
5289
5290			if (!pf_pull_hdr(m, off2, &uh, sizeof(uh),
5291			    NULL, reason, pd2.af)) {
5292				DPFPRINTF(PF_DEBUG_MISC,
5293				    ("pf: ICMP error message too short "
5294				    "(udp)\n"));
5295				return (PF_DROP);
5296			}
5297
5298			key.af = pd2.af;
5299			key.proto = IPPROTO_UDP;
5300			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
5301			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
5302			key.port[pd2.sidx] = uh.uh_sport;
5303			key.port[pd2.didx] = uh.uh_dport;
5304
5305#ifdef __FreeBSD__
5306			STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag);
5307#else
5308			STATE_LOOKUP(kif, &key, direction, *state, m);
5309#endif
5310
5311			/* translate source/destination address, if necessary */
5312			if ((*state)->key[PF_SK_WIRE] !=
5313			    (*state)->key[PF_SK_STACK]) {
5314				struct pf_state_key *nk =
5315				    (*state)->key[pd->didx];
5316
5317				if (PF_ANEQ(pd2.src,
5318				    &nk->addr[pd2.sidx], pd2.af) ||
5319				    nk->port[pd2.sidx] != uh.uh_sport)
5320					pf_change_icmp(pd2.src, &uh.uh_sport,
5321					    daddr, &nk->addr[pd2.sidx],
5322					    nk->port[pd2.sidx], &uh.uh_sum,
5323					    pd2.ip_sum, icmpsum,
5324					    pd->ip_sum, 1, pd2.af);
5325
5326				if (PF_ANEQ(pd2.dst,
5327				    &nk->addr[pd2.didx], pd2.af) ||
5328				    nk->port[pd2.didx] != uh.uh_dport)
5329					pf_change_icmp(pd2.dst, &uh.uh_dport,
5330					    NULL, /* XXX Inbound NAT? */
5331					    &nk->addr[pd2.didx],
5332					    nk->port[pd2.didx], &uh.uh_sum,
5333					    pd2.ip_sum, icmpsum,
5334					    pd->ip_sum, 1, pd2.af);
5335
5336				switch (pd2.af) {
5337#ifdef INET
5338				case AF_INET:
5339					m_copyback(m, off, ICMP_MINLEN,
5340#ifdef __FreeBSD__
5341					    (caddr_t)
5342#endif
5343					    pd->hdr.icmp);
5344#ifdef __FreeBSD__
5345					m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
5346#else
5347					m_copyback(m, ipoff2, sizeof(h2), &h2);
5348#endif
5349					break;
5350#endif /* INET */
5351#ifdef INET6
5352				case AF_INET6:
5353					m_copyback(m, off,
5354					    sizeof(struct icmp6_hdr),
5355#ifdef __FreeBSD__
5356					    (caddr_t)
5357#endif
5358					    pd->hdr.icmp6);
5359					m_copyback(m, ipoff2, sizeof(h2_6),
5360#ifdef __FreeBSD__
5361					    (caddr_t)
5362#endif
5363					    &h2_6);
5364					break;
5365#endif /* INET6 */
5366				}
5367#ifdef __FreeBSD__
5368				m_copyback(m, off2, sizeof(uh), (caddr_t)&uh);
5369#else
5370				m_copyback(m, off2, sizeof(uh), &uh);
5371#endif
5372			}
5373			return (PF_PASS);
5374			break;
5375		}
5376#ifdef INET
5377		case IPPROTO_ICMP: {
5378			struct icmp		iih;
5379
5380			if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN,
5381			    NULL, reason, pd2.af)) {
5382				DPFPRINTF(PF_DEBUG_MISC,
5383				    ("pf: ICMP error message too short i"
5384				    "(icmp)\n"));
5385				return (PF_DROP);
5386			}
5387
5388			key.af = pd2.af;
5389			key.proto = IPPROTO_ICMP;
5390			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
5391			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
5392			key.port[0] = key.port[1] = iih.icmp_id;
5393
5394#ifdef __FreeBSD__
5395			STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag);
5396#else
5397			STATE_LOOKUP(kif, &key, direction, *state, m);
5398#endif
5399
5400			/* translate source/destination address, if necessary */
5401			if ((*state)->key[PF_SK_WIRE] !=
5402			    (*state)->key[PF_SK_STACK]) {
5403				struct pf_state_key *nk =
5404				    (*state)->key[pd->didx];
5405
5406				if (PF_ANEQ(pd2.src,
5407				    &nk->addr[pd2.sidx], pd2.af) ||
5408				    nk->port[pd2.sidx] != iih.icmp_id)
5409					pf_change_icmp(pd2.src, &iih.icmp_id,
5410					    daddr, &nk->addr[pd2.sidx],
5411					    nk->port[pd2.sidx], NULL,
5412					    pd2.ip_sum, icmpsum,
5413					    pd->ip_sum, 0, AF_INET);
5414
5415				if (PF_ANEQ(pd2.dst,
5416				    &nk->addr[pd2.didx], pd2.af) ||
5417				    nk->port[pd2.didx] != iih.icmp_id)
5418					pf_change_icmp(pd2.dst, &iih.icmp_id,
5419					    NULL, /* XXX Inbound NAT? */
5420					    &nk->addr[pd2.didx],
5421					    nk->port[pd2.didx], NULL,
5422					    pd2.ip_sum, icmpsum,
5423					    pd->ip_sum, 0, AF_INET);
5424
5425#ifdef __FreeBSD__
5426				m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp);
5427				m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
5428				m_copyback(m, off2, ICMP_MINLEN, (caddr_t)&iih);
5429#else
5430				m_copyback(m, off, ICMP_MINLEN, pd->hdr.icmp);
5431				m_copyback(m, ipoff2, sizeof(h2), &h2);
5432				m_copyback(m, off2, ICMP_MINLEN, &iih);
5433#endif
5434			}
5435			return (PF_PASS);
5436			break;
5437		}
5438#endif /* INET */
5439#ifdef INET6
5440		case IPPROTO_ICMPV6: {
5441			struct icmp6_hdr	iih;
5442
5443			if (!pf_pull_hdr(m, off2, &iih,
5444			    sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) {
5445				DPFPRINTF(PF_DEBUG_MISC,
5446				    ("pf: ICMP error message too short "
5447				    "(icmp6)\n"));
5448				return (PF_DROP);
5449			}
5450
5451			key.af = pd2.af;
5452			key.proto = IPPROTO_ICMPV6;
5453			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
5454			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
5455			key.port[0] = key.port[1] = iih.icmp6_id;
5456
5457#ifdef __FreeBSD__
5458			STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag);
5459#else
5460			STATE_LOOKUP(kif, &key, direction, *state, m);
5461#endif
5462
5463			/* translate source/destination address, if necessary */
5464			if ((*state)->key[PF_SK_WIRE] !=
5465			    (*state)->key[PF_SK_STACK]) {
5466				struct pf_state_key *nk =
5467				    (*state)->key[pd->didx];
5468
5469				if (PF_ANEQ(pd2.src,
5470				    &nk->addr[pd2.sidx], pd2.af) ||
5471				    nk->port[pd2.sidx] != iih.icmp6_id)
5472					pf_change_icmp(pd2.src, &iih.icmp6_id,
5473					    daddr, &nk->addr[pd2.sidx],
5474					    nk->port[pd2.sidx], NULL,
5475					    pd2.ip_sum, icmpsum,
5476					    pd->ip_sum, 0, AF_INET6);
5477
5478				if (PF_ANEQ(pd2.dst,
5479				    &nk->addr[pd2.didx], pd2.af) ||
5480				    nk->port[pd2.didx] != iih.icmp6_id)
5481					pf_change_icmp(pd2.dst, &iih.icmp6_id,
5482					    NULL, /* XXX Inbound NAT? */
5483					    &nk->addr[pd2.didx],
5484					    nk->port[pd2.didx], NULL,
5485					    pd2.ip_sum, icmpsum,
5486					    pd->ip_sum, 0, AF_INET6);
5487
5488#ifdef __FreeBSD__
5489				m_copyback(m, off, sizeof(struct icmp6_hdr),
5490				    (caddr_t)pd->hdr.icmp6);
5491				m_copyback(m, ipoff2, sizeof(h2_6), (caddr_t)&h2_6);
5492				m_copyback(m, off2, sizeof(struct icmp6_hdr),
5493				    (caddr_t)&iih);
5494#else
5495				m_copyback(m, off, sizeof(struct icmp6_hdr),
5496				    pd->hdr.icmp6);
5497				m_copyback(m, ipoff2, sizeof(h2_6), &h2_6);
5498				m_copyback(m, off2, sizeof(struct icmp6_hdr),
5499				    &iih);
5500#endif
5501			}
5502			return (PF_PASS);
5503			break;
5504		}
5505#endif /* INET6 */
5506		default: {
5507			key.af = pd2.af;
5508			key.proto = pd2.proto;
5509			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
5510			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
5511			key.port[0] = key.port[1] = 0;
5512
5513#ifdef __FreeBSD__
5514			STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag);
5515#else
5516			STATE_LOOKUP(kif, &key, direction, *state, m);
5517#endif
5518
5519			/* translate source/destination address, if necessary */
5520			if ((*state)->key[PF_SK_WIRE] !=
5521			    (*state)->key[PF_SK_STACK]) {
5522				struct pf_state_key *nk =
5523				    (*state)->key[pd->didx];
5524
5525				if (PF_ANEQ(pd2.src,
5526				    &nk->addr[pd2.sidx], pd2.af))
5527					pf_change_icmp(pd2.src, NULL, daddr,
5528					    &nk->addr[pd2.sidx], 0, NULL,
5529					    pd2.ip_sum, icmpsum,
5530					    pd->ip_sum, 0, pd2.af);
5531
5532				if (PF_ANEQ(pd2.dst,
5533				    &nk->addr[pd2.didx], pd2.af))
5534					pf_change_icmp(pd2.src, NULL,
5535					    NULL, /* XXX Inbound NAT? */
5536					    &nk->addr[pd2.didx], 0, NULL,
5537					    pd2.ip_sum, icmpsum,
5538					    pd->ip_sum, 0, pd2.af);
5539
5540				switch (pd2.af) {
5541#ifdef INET
5542				case AF_INET:
5543#ifdef __FreeBSD__
5544					m_copyback(m, off, ICMP_MINLEN,
5545					    (caddr_t)pd->hdr.icmp);
5546					m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
5547#else
5548					m_copyback(m, off, ICMP_MINLEN,
5549					    pd->hdr.icmp);
5550					m_copyback(m, ipoff2, sizeof(h2), &h2);
5551#endif
5552					break;
5553#endif /* INET */
5554#ifdef INET6
5555				case AF_INET6:
5556					m_copyback(m, off,
5557					    sizeof(struct icmp6_hdr),
5558#ifdef __FreeBSD__
5559					    (caddr_t)
5560#endif
5561					    pd->hdr.icmp6);
5562					m_copyback(m, ipoff2, sizeof(h2_6),
5563#ifdef __FreeBSD__
5564					    (caddr_t)
5565#endif
5566					    &h2_6);
5567					break;
5568#endif /* INET6 */
5569				}
5570			}
5571			return (PF_PASS);
5572			break;
5573		}
5574		}
5575	}
5576}
5577
5578int
5579pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif,
5580    struct mbuf *m, struct pf_pdesc *pd)
5581{
5582	struct pf_state_peer	*src, *dst;
5583	struct pf_state_key_cmp	 key;
5584
5585	key.af = pd->af;
5586	key.proto = pd->proto;
5587	if (direction == PF_IN)	{
5588		PF_ACPY(&key.addr[0], pd->src, key.af);
5589		PF_ACPY(&key.addr[1], pd->dst, key.af);
5590		key.port[0] = key.port[1] = 0;
5591	} else {
5592		PF_ACPY(&key.addr[1], pd->src, key.af);
5593		PF_ACPY(&key.addr[0], pd->dst, key.af);
5594		key.port[1] = key.port[0] = 0;
5595	}
5596
5597#ifdef __FreeBSD__
5598	STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag);
5599#else
5600	STATE_LOOKUP(kif, &key, direction, *state, m);
5601#endif
5602
5603	if (direction == (*state)->direction) {
5604		src = &(*state)->src;
5605		dst = &(*state)->dst;
5606	} else {
5607		src = &(*state)->dst;
5608		dst = &(*state)->src;
5609	}
5610
5611	/* update states */
5612	if (src->state < PFOTHERS_SINGLE)
5613		src->state = PFOTHERS_SINGLE;
5614	if (dst->state == PFOTHERS_SINGLE)
5615		dst->state = PFOTHERS_MULTIPLE;
5616
5617	/* update expire time */
5618	(*state)->expire = time_second;
5619	if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE)
5620		(*state)->timeout = PFTM_OTHER_MULTIPLE;
5621	else
5622		(*state)->timeout = PFTM_OTHER_SINGLE;
5623
5624	/* translate source/destination address, if necessary */
5625	if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
5626		struct pf_state_key *nk = (*state)->key[pd->didx];
5627
5628#ifdef __FreeBSD__
5629		KASSERT(nk, ("%s: nk is null", __FUNCTION__));
5630		KASSERT(pd, ("%s: pd is null", __FUNCTION__));
5631		KASSERT(pd->src, ("%s: pd->src is null", __FUNCTION__));
5632		KASSERT(pd->dst, ("%s: pd->dst is null", __FUNCTION__));
5633#else
5634		KASSERT(nk);
5635		KASSERT(pd);
5636		KASSERT(pd->src);
5637		KASSERT(pd->dst);
5638#endif
5639		switch (pd->af) {
5640#ifdef INET
5641		case AF_INET:
5642			if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET))
5643				pf_change_a(&pd->src->v4.s_addr,
5644				    pd->ip_sum,
5645				    nk->addr[pd->sidx].v4.s_addr,
5646				    0);
5647
5648
5649			if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET))
5650				pf_change_a(&pd->dst->v4.s_addr,
5651				    pd->ip_sum,
5652				    nk->addr[pd->didx].v4.s_addr,
5653				    0);
5654
5655				break;
5656#endif /* INET */
5657#ifdef INET6
5658		case AF_INET6:
5659			if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET))
5660				PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af);
5661
5662			if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET))
5663				PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af);
5664#endif /* INET6 */
5665		}
5666	}
5667	return (PF_PASS);
5668}
5669
5670/*
5671 * ipoff and off are measured from the start of the mbuf chain.
5672 * h must be at "ipoff" on the mbuf chain.
5673 */
5674void *
5675pf_pull_hdr(struct mbuf *m, int off, void *p, int len,
5676    u_short *actionp, u_short *reasonp, sa_family_t af)
5677{
5678	switch (af) {
5679#ifdef INET
5680	case AF_INET: {
5681		struct ip	*h = mtod(m, struct ip *);
5682		u_int16_t	 fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
5683
5684		if (fragoff) {
5685			if (fragoff >= len)
5686				ACTION_SET(actionp, PF_PASS);
5687			else {
5688				ACTION_SET(actionp, PF_DROP);
5689				REASON_SET(reasonp, PFRES_FRAG);
5690			}
5691			return (NULL);
5692		}
5693		if (m->m_pkthdr.len < off + len ||
5694		    ntohs(h->ip_len) < off + len) {
5695			ACTION_SET(actionp, PF_DROP);
5696			REASON_SET(reasonp, PFRES_SHORT);
5697			return (NULL);
5698		}
5699		break;
5700	}
5701#endif /* INET */
5702#ifdef INET6
5703	case AF_INET6: {
5704		struct ip6_hdr	*h = mtod(m, struct ip6_hdr *);
5705
5706		if (m->m_pkthdr.len < off + len ||
5707		    (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) <
5708		    (unsigned)(off + len)) {
5709			ACTION_SET(actionp, PF_DROP);
5710			REASON_SET(reasonp, PFRES_SHORT);
5711			return (NULL);
5712		}
5713		break;
5714	}
5715#endif /* INET6 */
5716	}
5717	m_copydata(m, off, len, p);
5718	return (p);
5719}
5720
5721int
5722pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif,
5723    int rtableid)
5724{
5725#ifdef __FreeBSD__
5726#ifdef RADIX_MPATH
5727	struct radix_node_head	*rnh;
5728#endif
5729#endif
5730	struct sockaddr_in	*dst;
5731	int			 ret = 1;
5732	int			 check_mpath;
5733#ifndef __FreeBSD__
5734	extern int		 ipmultipath;
5735#endif
5736#ifdef INET6
5737#ifndef __FreeBSD__
5738	extern int		 ip6_multipath;
5739#endif
5740	struct sockaddr_in6	*dst6;
5741	struct route_in6	 ro;
5742#else
5743	struct route		 ro;
5744#endif
5745	struct radix_node	*rn;
5746	struct rtentry		*rt;
5747	struct ifnet		*ifp;
5748
5749	check_mpath = 0;
5750#ifdef __FreeBSD__
5751#ifdef RADIX_MPATH
5752	/* XXX: stick to table 0 for now */
5753	rnh = rt_tables_get_rnh(0, af);
5754	if (rnh != NULL && rn_mpath_capable(rnh))
5755		check_mpath = 1;
5756#endif
5757#endif
5758	bzero(&ro, sizeof(ro));
5759	switch (af) {
5760	case AF_INET:
5761		dst = satosin(&ro.ro_dst);
5762		dst->sin_family = AF_INET;
5763		dst->sin_len = sizeof(*dst);
5764		dst->sin_addr = addr->v4;
5765#ifndef __FreeBSD__
5766		if (ipmultipath)
5767			check_mpath = 1;
5768#endif
5769		break;
5770#ifdef INET6
5771	case AF_INET6:
5772		/*
5773		 * Skip check for addresses with embedded interface scope,
5774		 * as they would always match anyway.
5775		 */
5776		if (IN6_IS_SCOPE_EMBED(&addr->v6))
5777			goto out;
5778		dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
5779		dst6->sin6_family = AF_INET6;
5780		dst6->sin6_len = sizeof(*dst6);
5781		dst6->sin6_addr = addr->v6;
5782#ifndef __FreeBSD__
5783		if (ip6_multipath)
5784			check_mpath = 1;
5785#endif
5786		break;
5787#endif /* INET6 */
5788	default:
5789		return (0);
5790	}
5791
5792	/* Skip checks for ipsec interfaces */
5793	if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC)
5794		goto out;
5795
5796#ifdef __FreeBSD__
5797	switch (af) {
5798#ifdef INET6
5799	case AF_INET6:
5800		in6_rtalloc_ign(&ro, 0, rtableid);
5801		break;
5802#endif
5803#ifdef INET
5804	case AF_INET:
5805		in_rtalloc_ign((struct route *)&ro, 0, rtableid);
5806		break;
5807#endif
5808	default:
5809		rtalloc_ign((struct route *)&ro, 0);	/* No/default FIB. */
5810		break;
5811	}
5812#else /* ! __FreeBSD__ */
5813	rtalloc_noclone((struct route *)&ro, NO_CLONING);
5814#endif
5815
5816	if (ro.ro_rt != NULL) {
5817		/* No interface given, this is a no-route check */
5818		if (kif == NULL)
5819			goto out;
5820
5821		if (kif->pfik_ifp == NULL) {
5822			ret = 0;
5823			goto out;
5824		}
5825
5826		/* Perform uRPF check if passed input interface */
5827		ret = 0;
5828		rn = (struct radix_node *)ro.ro_rt;
5829		do {
5830			rt = (struct rtentry *)rn;
5831#ifndef __FreeBSD__ /* CARPDEV */
5832			if (rt->rt_ifp->if_type == IFT_CARP)
5833				ifp = rt->rt_ifp->if_carpdev;
5834			else
5835#endif
5836				ifp = rt->rt_ifp;
5837
5838			if (kif->pfik_ifp == ifp)
5839				ret = 1;
5840#ifdef __FreeBSD__
5841#ifdef RADIX_MPATH
5842			rn = rn_mpath_next(rn);
5843#endif
5844#else
5845			rn = rn_mpath_next(rn, 0);
5846#endif
5847		} while (check_mpath == 1 && rn != NULL && ret == 0);
5848	} else
5849		ret = 0;
5850out:
5851	if (ro.ro_rt != NULL)
5852		RTFREE(ro.ro_rt);
5853	return (ret);
5854}
5855
5856int
5857pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw,
5858    int rtableid)
5859{
5860	struct sockaddr_in	*dst;
5861#ifdef INET6
5862	struct sockaddr_in6	*dst6;
5863	struct route_in6	 ro;
5864#else
5865	struct route		 ro;
5866#endif
5867	int			 ret = 0;
5868
5869	bzero(&ro, sizeof(ro));
5870	switch (af) {
5871	case AF_INET:
5872		dst = satosin(&ro.ro_dst);
5873		dst->sin_family = AF_INET;
5874		dst->sin_len = sizeof(*dst);
5875		dst->sin_addr = addr->v4;
5876		break;
5877#ifdef INET6
5878	case AF_INET6:
5879		dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
5880		dst6->sin6_family = AF_INET6;
5881		dst6->sin6_len = sizeof(*dst6);
5882		dst6->sin6_addr = addr->v6;
5883		break;
5884#endif /* INET6 */
5885	default:
5886		return (0);
5887	}
5888
5889#ifdef __FreeBSD__
5890	switch (af) {
5891#ifdef INET6
5892	case AF_INET6:
5893		in6_rtalloc_ign(&ro, 0, rtableid);
5894		break;
5895#endif
5896#ifdef INET
5897	case AF_INET:
5898		in_rtalloc_ign((struct route *)&ro, 0, rtableid);
5899		break;
5900#endif
5901	default:
5902		rtalloc_ign((struct route *)&ro, 0);
5903		break;
5904	}
5905#else /* ! __FreeBSD__ */
5906	rtalloc_noclone((struct route *)&ro, NO_CLONING);
5907#endif
5908
5909	if (ro.ro_rt != NULL) {
5910#ifdef __FreeBSD__
5911		/* XXX_IMPORT: later */
5912#else
5913		if (ro.ro_rt->rt_labelid == aw->v.rtlabel)
5914			ret = 1;
5915#endif
5916		RTFREE(ro.ro_rt);
5917	}
5918
5919	return (ret);
5920}
5921
5922#ifdef INET
5923void
5924pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
5925    struct pf_state *s, struct pf_pdesc *pd)
5926{
5927	struct mbuf		*m0, *m1;
5928	struct route		 iproute;
5929	struct route		*ro = NULL;
5930	struct sockaddr_in	*dst;
5931	struct ip		*ip;
5932	struct ifnet		*ifp = NULL;
5933	struct pf_addr		 naddr;
5934	struct pf_src_node	*sn = NULL;
5935	int			 error = 0;
5936#ifdef __FreeBSD__
5937	int sw_csum;
5938#endif
5939#ifdef IPSEC
5940	struct m_tag		*mtag;
5941#endif /* IPSEC */
5942
5943	if (m == NULL || *m == NULL || r == NULL ||
5944	    (dir != PF_IN && dir != PF_OUT) || oifp == NULL)
5945		panic("pf_route: invalid parameters");
5946
5947#ifdef __FreeBSD__
5948	if (pd->pf_mtag->routed++ > 3) {
5949#else
5950	if ((*m)->m_pkthdr.pf.routed++ > 3) {
5951#endif
5952		m0 = *m;
5953		*m = NULL;
5954		goto bad;
5955	}
5956
5957	if (r->rt == PF_DUPTO) {
5958#ifdef __FreeBSD__
5959		if ((m0 = m_dup(*m, M_DONTWAIT)) == NULL)
5960#else
5961		if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL)
5962#endif
5963			return;
5964	} else {
5965		if ((r->rt == PF_REPLYTO) == (r->direction == dir))
5966			return;
5967		m0 = *m;
5968	}
5969
5970	if (m0->m_len < sizeof(struct ip)) {
5971		DPFPRINTF(PF_DEBUG_URGENT,
5972		    ("pf_route: m0->m_len < sizeof(struct ip)\n"));
5973		goto bad;
5974	}
5975
5976	ip = mtod(m0, struct ip *);
5977
5978	ro = &iproute;
5979	bzero((caddr_t)ro, sizeof(*ro));
5980	dst = satosin(&ro->ro_dst);
5981	dst->sin_family = AF_INET;
5982	dst->sin_len = sizeof(*dst);
5983	dst->sin_addr = ip->ip_dst;
5984
5985	if (r->rt == PF_FASTROUTE) {
5986#ifdef __FreeBSD__
5987		in_rtalloc_ign(ro, 0, M_GETFIB(m0));
5988#else
5989		rtalloc(ro);
5990#endif
5991		if (ro->ro_rt == 0) {
5992#ifdef __FreeBSD__
5993			KMOD_IPSTAT_INC(ips_noroute);
5994#else
5995			ipstat.ips_noroute++;
5996#endif
5997			goto bad;
5998		}
5999
6000		ifp = ro->ro_rt->rt_ifp;
6001		ro->ro_rt->rt_use++;
6002
6003		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
6004			dst = satosin(ro->ro_rt->rt_gateway);
6005	} else {
6006		if (TAILQ_EMPTY(&r->rpool.list)) {
6007			DPFPRINTF(PF_DEBUG_URGENT,
6008			    ("pf_route: TAILQ_EMPTY(&r->rpool.list)\n"));
6009			goto bad;
6010		}
6011		if (s == NULL) {
6012			pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src,
6013			    &naddr, NULL, &sn);
6014			if (!PF_AZERO(&naddr, AF_INET))
6015				dst->sin_addr.s_addr = naddr.v4.s_addr;
6016			ifp = r->rpool.cur->kif ?
6017			    r->rpool.cur->kif->pfik_ifp : NULL;
6018		} else {
6019			if (!PF_AZERO(&s->rt_addr, AF_INET))
6020				dst->sin_addr.s_addr =
6021				    s->rt_addr.v4.s_addr;
6022			ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
6023		}
6024	}
6025	if (ifp == NULL)
6026		goto bad;
6027
6028	if (oifp != ifp) {
6029#ifdef __FreeBSD__
6030		PF_UNLOCK();
6031		if (pf_test(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) {
6032			PF_LOCK();
6033			goto bad;
6034		} else if (m0 == NULL) {
6035			PF_LOCK();
6036			goto done;
6037		}
6038		PF_LOCK();
6039#else
6040		if (pf_test(PF_OUT, ifp, &m0, NULL) != PF_PASS)
6041			goto bad;
6042		else if (m0 == NULL)
6043			goto done;
6044#endif
6045		if (m0->m_len < sizeof(struct ip)) {
6046			DPFPRINTF(PF_DEBUG_URGENT,
6047			    ("pf_route: m0->m_len < sizeof(struct ip)\n"));
6048			goto bad;
6049		}
6050		ip = mtod(m0, struct ip *);
6051	}
6052
6053#ifdef __FreeBSD__
6054	/* Copied from FreeBSD 5.1-CURRENT ip_output. */
6055	m0->m_pkthdr.csum_flags |= CSUM_IP;
6056	sw_csum = m0->m_pkthdr.csum_flags & ~ifp->if_hwassist;
6057	if (sw_csum & CSUM_DELAY_DATA) {
6058		/*
6059		 * XXX: in_delayed_cksum assumes HBO for ip->ip_len (at least)
6060		 */
6061		NTOHS(ip->ip_len);
6062		NTOHS(ip->ip_off);	/* XXX: needed? */
6063		in_delayed_cksum(m0);
6064		HTONS(ip->ip_len);
6065		HTONS(ip->ip_off);
6066		sw_csum &= ~CSUM_DELAY_DATA;
6067	}
6068	m0->m_pkthdr.csum_flags &= ifp->if_hwassist;
6069
6070	if (ntohs(ip->ip_len) <= ifp->if_mtu ||
6071	    (ifp->if_hwassist & CSUM_FRAGMENT &&
6072	    ((ip->ip_off & htons(IP_DF)) == 0))) {
6073		/*
6074		 * ip->ip_len = htons(ip->ip_len);
6075		 * ip->ip_off = htons(ip->ip_off);
6076		 */
6077		ip->ip_sum = 0;
6078		if (sw_csum & CSUM_DELAY_IP) {
6079			/* From KAME */
6080			if (ip->ip_v == IPVERSION &&
6081			    (ip->ip_hl << 2) == sizeof(*ip)) {
6082				ip->ip_sum = in_cksum_hdr(ip);
6083			} else {
6084				ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
6085			}
6086		}
6087		PF_UNLOCK();
6088		error = (*ifp->if_output)(ifp, m0, sintosa(dst), ro);
6089		PF_LOCK();
6090		goto done;
6091	}
6092#else
6093	/* Copied from ip_output. */
6094#ifdef IPSEC
6095	/*
6096	 * If deferred crypto processing is needed, check that the
6097	 * interface supports it.
6098	 */
6099	if ((mtag = m_tag_find(m0, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL))
6100	    != NULL && (ifp->if_capabilities & IFCAP_IPSEC) == 0) {
6101		/* Notify IPsec to do its own crypto. */
6102		ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1));
6103		goto bad;
6104	}
6105#endif /* IPSEC */
6106
6107	/* Catch routing changes wrt. hardware checksumming for TCP or UDP. */
6108	if (m0->m_pkthdr.csum_flags & M_TCPV4_CSUM_OUT) {
6109		if (!(ifp->if_capabilities & IFCAP_CSUM_TCPv4) ||
6110		    ifp->if_bridge != NULL) {
6111			in_delayed_cksum(m0);
6112			m0->m_pkthdr.csum_flags &= ~M_TCPV4_CSUM_OUT; /* Clr */
6113		}
6114	} else if (m0->m_pkthdr.csum_flags & M_UDPV4_CSUM_OUT) {
6115		if (!(ifp->if_capabilities & IFCAP_CSUM_UDPv4) ||
6116		    ifp->if_bridge != NULL) {
6117			in_delayed_cksum(m0);
6118			m0->m_pkthdr.csum_flags &= ~M_UDPV4_CSUM_OUT; /* Clr */
6119		}
6120	}
6121
6122	if (ntohs(ip->ip_len) <= ifp->if_mtu) {
6123		ip->ip_sum = 0;
6124		if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) &&
6125		    ifp->if_bridge == NULL) {
6126			m0->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT;
6127#ifdef __FreeBSD__
6128			KMOD_IPSTAT_INC(ips_outhwcsum);
6129#else
6130			ipstat.ips_outhwcsum++;
6131#endif
6132		} else
6133			ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
6134		/* Update relevant hardware checksum stats for TCP/UDP */
6135		if (m0->m_pkthdr.csum_flags & M_TCPV4_CSUM_OUT)
6136			KMOD_TCPSTAT_INC(tcps_outhwcsum);
6137		else if (m0->m_pkthdr.csum_flags & M_UDPV4_CSUM_OUT)
6138			KMOD_UDPSTAT_INC(udps_outhwcsum);
6139		error = (*ifp->if_output)(ifp, m0, sintosa(dst), NULL);
6140		goto done;
6141	}
6142#endif
6143
6144	/*
6145	 * Too large for interface; fragment if possible.
6146	 * Must be able to put at least 8 bytes per fragment.
6147	 */
6148	if (ip->ip_off & htons(IP_DF)) {
6149#ifdef __FreeBSD__
6150		KMOD_IPSTAT_INC(ips_cantfrag);
6151#else
6152		ipstat.ips_cantfrag++;
6153#endif
6154		if (r->rt != PF_DUPTO) {
6155#ifdef __FreeBSD__
6156			/* icmp_error() expects host byte ordering */
6157			NTOHS(ip->ip_len);
6158			NTOHS(ip->ip_off);
6159			PF_UNLOCK();
6160			icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
6161			    ifp->if_mtu);
6162			PF_LOCK();
6163#else
6164			icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
6165			    ifp->if_mtu);
6166#endif
6167			goto done;
6168		} else
6169			goto bad;
6170	}
6171
6172	m1 = m0;
6173#ifdef __FreeBSD__
6174	/*
6175	 * XXX: is cheaper + less error prone than own function
6176	 */
6177	NTOHS(ip->ip_len);
6178	NTOHS(ip->ip_off);
6179	error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist, sw_csum);
6180#else
6181	error = ip_fragment(m0, ifp, ifp->if_mtu);
6182#endif
6183	if (error) {
6184#ifndef __FreeBSD__    /* ip_fragment does not do m_freem() on FreeBSD */
6185		m0 = NULL;
6186#endif
6187		goto bad;
6188	}
6189
6190	for (m0 = m1; m0; m0 = m1) {
6191		m1 = m0->m_nextpkt;
6192		m0->m_nextpkt = 0;
6193#ifdef __FreeBSD__
6194		if (error == 0) {
6195			PF_UNLOCK();
6196			error = (*ifp->if_output)(ifp, m0, sintosa(dst),
6197			    NULL);
6198			PF_LOCK();
6199		} else
6200#else
6201		if (error == 0)
6202			error = (*ifp->if_output)(ifp, m0, sintosa(dst),
6203			    NULL);
6204		else
6205#endif
6206			m_freem(m0);
6207	}
6208
6209	if (error == 0)
6210#ifdef __FreeBSD__
6211		KMOD_IPSTAT_INC(ips_fragmented);
6212#else
6213		ipstat.ips_fragmented++;
6214#endif
6215
6216done:
6217	if (r->rt != PF_DUPTO)
6218		*m = NULL;
6219	if (ro == &iproute && ro->ro_rt)
6220		RTFREE(ro->ro_rt);
6221	return;
6222
6223bad:
6224	m_freem(m0);
6225	goto done;
6226}
6227#endif /* INET */
6228
6229#ifdef INET6
6230void
6231pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
6232    struct pf_state *s, struct pf_pdesc *pd)
6233{
6234	struct mbuf		*m0;
6235	struct route_in6	 ip6route;
6236	struct route_in6	*ro;
6237	struct sockaddr_in6	*dst;
6238	struct ip6_hdr		*ip6;
6239	struct ifnet		*ifp = NULL;
6240	struct pf_addr		 naddr;
6241	struct pf_src_node	*sn = NULL;
6242
6243	if (m == NULL || *m == NULL || r == NULL ||
6244	    (dir != PF_IN && dir != PF_OUT) || oifp == NULL)
6245		panic("pf_route6: invalid parameters");
6246
6247#ifdef __FreeBSD__
6248	if (pd->pf_mtag->routed++ > 3) {
6249#else
6250	if ((*m)->m_pkthdr.pf.routed++ > 3) {
6251#endif
6252		m0 = *m;
6253		*m = NULL;
6254		goto bad;
6255	}
6256
6257	if (r->rt == PF_DUPTO) {
6258#ifdef __FreeBSD__
6259		if ((m0 = m_dup(*m, M_DONTWAIT)) == NULL)
6260#else
6261		if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL)
6262#endif
6263			return;
6264	} else {
6265		if ((r->rt == PF_REPLYTO) == (r->direction == dir))
6266			return;
6267		m0 = *m;
6268	}
6269
6270	if (m0->m_len < sizeof(struct ip6_hdr)) {
6271		DPFPRINTF(PF_DEBUG_URGENT,
6272		    ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n"));
6273		goto bad;
6274	}
6275	ip6 = mtod(m0, struct ip6_hdr *);
6276
6277	ro = &ip6route;
6278	bzero((caddr_t)ro, sizeof(*ro));
6279	dst = (struct sockaddr_in6 *)&ro->ro_dst;
6280	dst->sin6_family = AF_INET6;
6281	dst->sin6_len = sizeof(*dst);
6282	dst->sin6_addr = ip6->ip6_dst;
6283
6284	/* Cheat. XXX why only in the v6 case??? */
6285	if (r->rt == PF_FASTROUTE) {
6286#ifdef __FreeBSD__
6287		m0->m_flags |= M_SKIP_FIREWALL;
6288		PF_UNLOCK();
6289		ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
6290#else
6291		m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
6292		ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
6293#endif
6294		return;
6295	}
6296
6297	if (TAILQ_EMPTY(&r->rpool.list)) {
6298		DPFPRINTF(PF_DEBUG_URGENT,
6299		    ("pf_route6: TAILQ_EMPTY(&r->rpool.list)\n"));
6300		goto bad;
6301	}
6302	if (s == NULL) {
6303		pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src,
6304		    &naddr, NULL, &sn);
6305		if (!PF_AZERO(&naddr, AF_INET6))
6306			PF_ACPY((struct pf_addr *)&dst->sin6_addr,
6307			    &naddr, AF_INET6);
6308		ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL;
6309	} else {
6310		if (!PF_AZERO(&s->rt_addr, AF_INET6))
6311			PF_ACPY((struct pf_addr *)&dst->sin6_addr,
6312			    &s->rt_addr, AF_INET6);
6313		ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
6314	}
6315	if (ifp == NULL)
6316		goto bad;
6317
6318	if (oifp != ifp) {
6319#ifdef __FreeBSD__
6320		PF_UNLOCK();
6321		if (pf_test6(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) {
6322			PF_LOCK();
6323			goto bad;
6324		} else if (m0 == NULL) {
6325			PF_LOCK();
6326			goto done;
6327		}
6328		PF_LOCK();
6329#else
6330		if (pf_test6(PF_OUT, ifp, &m0, NULL) != PF_PASS)
6331			goto bad;
6332		else if (m0 == NULL)
6333			goto done;
6334#endif
6335		if (m0->m_len < sizeof(struct ip6_hdr)) {
6336			DPFPRINTF(PF_DEBUG_URGENT,
6337			    ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n"));
6338			goto bad;
6339		}
6340		ip6 = mtod(m0, struct ip6_hdr *);
6341	}
6342
6343	if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6 &
6344	    ~ifp->if_hwassist) {
6345		uint32_t plen = m0->m_pkthdr.len - sizeof(*ip6);
6346		in6_delayed_cksum(m0, plen, sizeof(struct ip6_hdr));
6347		m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
6348	}
6349
6350	/*
6351	 * If the packet is too large for the outgoing interface,
6352	 * send back an icmp6 error.
6353	 */
6354	if (IN6_IS_SCOPE_EMBED(&dst->sin6_addr))
6355		dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index);
6356	if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) {
6357#ifdef __FreeBSD__
6358		PF_UNLOCK();
6359#endif
6360		nd6_output(ifp, ifp, m0, dst, NULL);
6361#ifdef __FreeBSD__
6362		PF_LOCK();
6363#endif
6364	} else {
6365		in6_ifstat_inc(ifp, ifs6_in_toobig);
6366#ifdef __FreeBSD__
6367		if (r->rt != PF_DUPTO) {
6368			PF_UNLOCK();
6369			icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
6370			PF_LOCK();
6371		} else
6372#else
6373		if (r->rt != PF_DUPTO)
6374			icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
6375		else
6376#endif
6377			goto bad;
6378	}
6379
6380done:
6381	if (r->rt != PF_DUPTO)
6382		*m = NULL;
6383	return;
6384
6385bad:
6386	m_freem(m0);
6387	goto done;
6388}
6389#endif /* INET6 */
6390
6391#ifdef __FreeBSD__
6392/*
6393 * FreeBSD supports cksum offloads for the following drivers.
6394 *  em(4), fxp(4), ixgb(4), lge(4), ndis(4), nge(4), re(4),
6395 *   ti(4), txp(4), xl(4)
6396 *
6397 * CSUM_DATA_VALID | CSUM_PSEUDO_HDR :
6398 *  network driver performed cksum including pseudo header, need to verify
6399 *   csum_data
6400 * CSUM_DATA_VALID :
6401 *  network driver performed cksum, needs to additional pseudo header
6402 *  cksum computation with partial csum_data(i.e. lack of H/W support for
6403 *  pseudo header, for instance hme(4), sk(4) and possibly gem(4))
6404 *
6405 * After validating the cksum of packet, set both flag CSUM_DATA_VALID and
6406 * CSUM_PSEUDO_HDR in order to avoid recomputation of the cksum in upper
6407 * TCP/UDP layer.
6408 * Also, set csum_data to 0xffff to force cksum validation.
6409 */
6410int
6411pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t af)
6412{
6413	u_int16_t sum = 0;
6414	int hw_assist = 0;
6415	struct ip *ip;
6416
6417	if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
6418		return (1);
6419	if (m->m_pkthdr.len < off + len)
6420		return (1);
6421
6422	switch (p) {
6423	case IPPROTO_TCP:
6424		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
6425			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
6426				sum = m->m_pkthdr.csum_data;
6427			} else {
6428				ip = mtod(m, struct ip *);
6429				sum = in_pseudo(ip->ip_src.s_addr,
6430				ip->ip_dst.s_addr, htonl((u_short)len +
6431				m->m_pkthdr.csum_data + IPPROTO_TCP));
6432			}
6433			sum ^= 0xffff;
6434			++hw_assist;
6435		}
6436		break;
6437	case IPPROTO_UDP:
6438		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
6439			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
6440				sum = m->m_pkthdr.csum_data;
6441			} else {
6442				ip = mtod(m, struct ip *);
6443				sum = in_pseudo(ip->ip_src.s_addr,
6444				ip->ip_dst.s_addr, htonl((u_short)len +
6445				m->m_pkthdr.csum_data + IPPROTO_UDP));
6446			}
6447			sum ^= 0xffff;
6448			++hw_assist;
6449		}
6450		break;
6451	case IPPROTO_ICMP:
6452#ifdef INET6
6453	case IPPROTO_ICMPV6:
6454#endif /* INET6 */
6455		break;
6456	default:
6457		return (1);
6458	}
6459
6460	if (!hw_assist) {
6461		switch (af) {
6462		case AF_INET:
6463			if (p == IPPROTO_ICMP) {
6464				if (m->m_len < off)
6465					return (1);
6466				m->m_data += off;
6467				m->m_len -= off;
6468				sum = in_cksum(m, len);
6469				m->m_data -= off;
6470				m->m_len += off;
6471			} else {
6472				if (m->m_len < sizeof(struct ip))
6473					return (1);
6474				sum = in4_cksum(m, p, off, len);
6475			}
6476			break;
6477#ifdef INET6
6478		case AF_INET6:
6479			if (m->m_len < sizeof(struct ip6_hdr))
6480				return (1);
6481			sum = in6_cksum(m, p, off, len);
6482			break;
6483#endif /* INET6 */
6484		default:
6485			return (1);
6486		}
6487	}
6488	if (sum) {
6489		switch (p) {
6490		case IPPROTO_TCP:
6491		    {
6492			KMOD_TCPSTAT_INC(tcps_rcvbadsum);
6493			break;
6494		    }
6495		case IPPROTO_UDP:
6496		    {
6497			KMOD_UDPSTAT_INC(udps_badsum);
6498			break;
6499		    }
6500#ifdef INET
6501		case IPPROTO_ICMP:
6502		    {
6503			KMOD_ICMPSTAT_INC(icps_checksum);
6504			break;
6505		    }
6506#endif
6507#ifdef INET6
6508		case IPPROTO_ICMPV6:
6509		    {
6510			KMOD_ICMP6STAT_INC(icp6s_checksum);
6511			break;
6512		    }
6513#endif /* INET6 */
6514		}
6515		return (1);
6516	} else {
6517		if (p == IPPROTO_TCP || p == IPPROTO_UDP) {
6518			m->m_pkthdr.csum_flags |=
6519			    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
6520			m->m_pkthdr.csum_data = 0xffff;
6521		}
6522	}
6523	return (0);
6524}
6525#else /* !__FreeBSD__ */
6526
6527/*
6528 * check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag
6529 *   off is the offset where the protocol header starts
6530 *   len is the total length of protocol header plus payload
6531 * returns 0 when the checksum is valid, otherwise returns 1.
6532 */
6533int
6534pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p,
6535    sa_family_t af)
6536{
6537	u_int16_t flag_ok, flag_bad;
6538	u_int16_t sum;
6539
6540	switch (p) {
6541	case IPPROTO_TCP:
6542		flag_ok = M_TCP_CSUM_IN_OK;
6543		flag_bad = M_TCP_CSUM_IN_BAD;
6544		break;
6545	case IPPROTO_UDP:
6546		flag_ok = M_UDP_CSUM_IN_OK;
6547		flag_bad = M_UDP_CSUM_IN_BAD;
6548		break;
6549	case IPPROTO_ICMP:
6550#ifdef INET6
6551	case IPPROTO_ICMPV6:
6552#endif /* INET6 */
6553		flag_ok = flag_bad = 0;
6554		break;
6555	default:
6556		return (1);
6557	}
6558	if (m->m_pkthdr.csum_flags & flag_ok)
6559		return (0);
6560	if (m->m_pkthdr.csum_flags & flag_bad)
6561		return (1);
6562	if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
6563		return (1);
6564	if (m->m_pkthdr.len < off + len)
6565		return (1);
6566	switch (af) {
6567#ifdef INET
6568	case AF_INET:
6569		if (p == IPPROTO_ICMP) {
6570			if (m->m_len < off)
6571				return (1);
6572			m->m_data += off;
6573			m->m_len -= off;
6574			sum = in_cksum(m, len);
6575			m->m_data -= off;
6576			m->m_len += off;
6577		} else {
6578			if (m->m_len < sizeof(struct ip))
6579				return (1);
6580			sum = in4_cksum(m, p, off, len);
6581		}
6582		break;
6583#endif /* INET */
6584#ifdef INET6
6585	case AF_INET6:
6586		if (m->m_len < sizeof(struct ip6_hdr))
6587			return (1);
6588		sum = in6_cksum(m, p, off, len);
6589		break;
6590#endif /* INET6 */
6591	default:
6592		return (1);
6593	}
6594	if (sum) {
6595		m->m_pkthdr.csum_flags |= flag_bad;
6596		switch (p) {
6597		case IPPROTO_TCP:
6598			KMOD_TCPSTAT_INC(tcps_rcvbadsum);
6599			break;
6600		case IPPROTO_UDP:
6601			KMOD_UDPSTAT_INC(udps_badsum);
6602			break;
6603#ifdef INET
6604		case IPPROTO_ICMP:
6605			KMOD_ICMPSTAT_INC(icps_checksum);
6606			break;
6607#endif
6608#ifdef INET6
6609		case IPPROTO_ICMPV6:
6610			KMOD_ICMP6STAT_INC(icp6s_checksum);
6611			break;
6612#endif /* INET6 */
6613		}
6614		return (1);
6615	}
6616	m->m_pkthdr.csum_flags |= flag_ok;
6617	return (0);
6618}
6619#endif
6620
6621#ifndef __FreeBSD__
6622struct pf_divert *
6623pf_find_divert(struct mbuf *m)
6624{
6625	struct m_tag    *mtag;
6626
6627	if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL)
6628		return (NULL);
6629
6630	return ((struct pf_divert *)(mtag + 1));
6631}
6632
6633struct pf_divert *
6634pf_get_divert(struct mbuf *m)
6635{
6636	struct m_tag    *mtag;
6637
6638	if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) {
6639		mtag = m_tag_get(PACKET_TAG_PF_DIVERT, sizeof(struct pf_divert),
6640		    M_NOWAIT);
6641		if (mtag == NULL)
6642			return (NULL);
6643		bzero(mtag + 1, sizeof(struct pf_divert));
6644		m_tag_prepend(m, mtag);
6645	}
6646
6647	return ((struct pf_divert *)(mtag + 1));
6648}
6649#endif
6650
6651#ifdef INET
6652int
6653#ifdef __FreeBSD__
6654pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
6655    struct ether_header *eh, struct inpcb *inp)
6656#else
6657pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
6658    struct ether_header *eh)
6659#endif
6660{
6661	struct pfi_kif		*kif;
6662	u_short			 action, reason = 0, log = 0;
6663	struct mbuf		*m = *m0;
6664#ifdef __FreeBSD__
6665	struct ip		*h = NULL;
6666	struct m_tag		*ipfwtag;
6667	struct pf_rule		*a = NULL, *r = &V_pf_default_rule, *tr, *nr;
6668#else
6669	struct ip		*h;
6670	struct pf_rule		*a = NULL, *r = &pf_default_rule, *tr, *nr;
6671#endif
6672	struct pf_state		*s = NULL;
6673	struct pf_ruleset	*ruleset = NULL;
6674	struct pf_pdesc		 pd;
6675	int			 off, dirndx, pqid = 0;
6676
6677#ifdef __FreeBSD__
6678	PF_LOCK();
6679	if (!V_pf_status.running)
6680	{
6681		PF_UNLOCK();
6682		return (PF_PASS);
6683	}
6684#else
6685	if (!pf_status.running)
6686		return (PF_PASS);
6687#endif
6688
6689	memset(&pd, 0, sizeof(pd));
6690#ifdef __FreeBSD__
6691	if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) {
6692		PF_UNLOCK();
6693		DPFPRINTF(PF_DEBUG_URGENT,
6694		    ("pf_test: pf_get_mtag returned NULL\n"));
6695		return (PF_DROP);
6696	}
6697#endif
6698#ifndef __FreeBSD__
6699	if (ifp->if_type == IFT_CARP && ifp->if_carpdev)
6700		kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif;
6701	else
6702#endif
6703		kif = (struct pfi_kif *)ifp->if_pf_kif;
6704
6705	if (kif == NULL) {
6706#ifdef __FreeBSD__
6707		PF_UNLOCK();
6708#endif
6709		DPFPRINTF(PF_DEBUG_URGENT,
6710		    ("pf_test: kif == NULL, if_xname %s\n", ifp->if_xname));
6711		return (PF_DROP);
6712	}
6713	if (kif->pfik_flags & PFI_IFLAG_SKIP)
6714#ifdef __FreeBSD__
6715	{
6716		PF_UNLOCK();
6717#endif
6718		return (PF_PASS);
6719#ifdef __FreeBSD__
6720	}
6721#endif
6722
6723#ifdef __FreeBSD__
6724	M_ASSERTPKTHDR(m);
6725#else
6726#ifdef DIAGNOSTIC
6727	if ((m->m_flags & M_PKTHDR) == 0)
6728		panic("non-M_PKTHDR is passed to pf_test");
6729#endif /* DIAGNOSTIC */
6730#endif
6731
6732	if (m->m_pkthdr.len < (int)sizeof(*h)) {
6733		action = PF_DROP;
6734		REASON_SET(&reason, PFRES_SHORT);
6735		log = 1;
6736		goto done;
6737	}
6738
6739#ifdef __FreeBSD__
6740	if (m->m_flags & M_SKIP_FIREWALL) {
6741		PF_UNLOCK();
6742		return (PF_PASS);
6743	}
6744#else
6745	if (m->m_pkthdr.pf.flags & PF_TAG_GENERATED)
6746		return (PF_PASS);
6747#endif
6748
6749#ifdef __FreeBSD__
6750	if (ip_divert_ptr != NULL &&
6751	    ((ipfwtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL)) != NULL)) {
6752		struct ipfw_rule_ref *rr = (struct ipfw_rule_ref *)(ipfwtag+1);
6753		if (rr->info & IPFW_IS_DIVERT && rr->rulenum == 0) {
6754			pd.pf_mtag->flags |= PF_PACKET_LOOPED;
6755			m_tag_delete(m, ipfwtag);
6756		}
6757		if (pd.pf_mtag->flags & PF_FASTFWD_OURS_PRESENT) {
6758			m->m_flags |= M_FASTFWD_OURS;
6759			pd.pf_mtag->flags &= ~PF_FASTFWD_OURS_PRESENT;
6760		}
6761	} else
6762#endif
6763	/* We do IP header normalization and packet reassembly here */
6764	if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) {
6765		action = PF_DROP;
6766		goto done;
6767	}
6768	m = *m0;	/* pf_normalize messes with m0 */
6769	h = mtod(m, struct ip *);
6770
6771	off = h->ip_hl << 2;
6772	if (off < (int)sizeof(*h)) {
6773		action = PF_DROP;
6774		REASON_SET(&reason, PFRES_SHORT);
6775		log = 1;
6776		goto done;
6777	}
6778
6779	pd.src = (struct pf_addr *)&h->ip_src;
6780	pd.dst = (struct pf_addr *)&h->ip_dst;
6781	pd.sport = pd.dport = NULL;
6782	pd.ip_sum = &h->ip_sum;
6783	pd.proto_sum = NULL;
6784	pd.proto = h->ip_p;
6785	pd.dir = dir;
6786	pd.sidx = (dir == PF_IN) ? 0 : 1;
6787	pd.didx = (dir == PF_IN) ? 1 : 0;
6788	pd.af = AF_INET;
6789	pd.tos = h->ip_tos;
6790	pd.tot_len = ntohs(h->ip_len);
6791	pd.eh = eh;
6792
6793	/* handle fragments that didn't get reassembled by normalization */
6794	if (h->ip_off & htons(IP_MF | IP_OFFMASK)) {
6795		action = pf_test_fragment(&r, dir, kif, m, h,
6796		    &pd, &a, &ruleset);
6797		goto done;
6798	}
6799
6800	switch (h->ip_p) {
6801
6802	case IPPROTO_TCP: {
6803		struct tcphdr	th;
6804
6805		pd.hdr.tcp = &th;
6806		if (!pf_pull_hdr(m, off, &th, sizeof(th),
6807		    &action, &reason, AF_INET)) {
6808			log = action != PF_PASS;
6809			goto done;
6810		}
6811		pd.p_len = pd.tot_len - off - (th.th_off << 2);
6812		if ((th.th_flags & TH_ACK) && pd.p_len == 0)
6813			pqid = 1;
6814		action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
6815		if (action == PF_DROP)
6816			goto done;
6817		action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
6818		    &reason);
6819		if (action == PF_PASS) {
6820#if NPFSYNC > 0
6821#ifdef __FreeBSD__
6822			if (pfsync_update_state_ptr != NULL)
6823				pfsync_update_state_ptr(s);
6824#else
6825			pfsync_update_state(s);
6826#endif
6827#endif /* NPFSYNC */
6828			r = s->rule.ptr;
6829			a = s->anchor.ptr;
6830			log = s->log;
6831		} else if (s == NULL)
6832#ifdef __FreeBSD__
6833			action = pf_test_rule(&r, &s, dir, kif,
6834			    m, off, h, &pd, &a, &ruleset, NULL, inp);
6835#else
6836			action = pf_test_rule(&r, &s, dir, kif,
6837			    m, off, h, &pd, &a, &ruleset, &ipintrq);
6838#endif
6839		break;
6840	}
6841
6842	case IPPROTO_UDP: {
6843		struct udphdr	uh;
6844
6845		pd.hdr.udp = &uh;
6846		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
6847		    &action, &reason, AF_INET)) {
6848			log = action != PF_PASS;
6849			goto done;
6850		}
6851		if (uh.uh_dport == 0 ||
6852		    ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
6853		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
6854			action = PF_DROP;
6855			REASON_SET(&reason, PFRES_SHORT);
6856			goto done;
6857		}
6858		action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
6859		if (action == PF_PASS) {
6860#if NPFSYNC > 0
6861#ifdef __FreeBSD__
6862			if (pfsync_update_state_ptr != NULL)
6863				pfsync_update_state_ptr(s);
6864#else
6865			pfsync_update_state(s);
6866#endif
6867#endif /* NPFSYNC */
6868			r = s->rule.ptr;
6869			a = s->anchor.ptr;
6870			log = s->log;
6871		} else if (s == NULL)
6872#ifdef __FreeBSD__
6873			action = pf_test_rule(&r, &s, dir, kif,
6874			    m, off, h, &pd, &a, &ruleset, NULL, inp);
6875#else
6876			action = pf_test_rule(&r, &s, dir, kif,
6877			    m, off, h, &pd, &a, &ruleset, &ipintrq);
6878#endif
6879		break;
6880	}
6881
6882	case IPPROTO_ICMP: {
6883		struct icmp	ih;
6884
6885		pd.hdr.icmp = &ih;
6886		if (!pf_pull_hdr(m, off, &ih, ICMP_MINLEN,
6887		    &action, &reason, AF_INET)) {
6888			log = action != PF_PASS;
6889			goto done;
6890		}
6891		action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd,
6892		    &reason);
6893		if (action == PF_PASS) {
6894#if NPFSYNC > 0
6895#ifdef __FreeBSD__
6896			if (pfsync_update_state_ptr != NULL)
6897				pfsync_update_state_ptr(s);
6898#else
6899			pfsync_update_state(s);
6900#endif
6901#endif /* NPFSYNC */
6902			r = s->rule.ptr;
6903			a = s->anchor.ptr;
6904			log = s->log;
6905		} else if (s == NULL)
6906#ifdef __FreeBSD__
6907			action = pf_test_rule(&r, &s, dir, kif,
6908			    m, off, h, &pd, &a, &ruleset, NULL, inp);
6909#else
6910			action = pf_test_rule(&r, &s, dir, kif,
6911			    m, off, h, &pd, &a, &ruleset, &ipintrq);
6912#endif
6913		break;
6914	}
6915
6916#ifdef INET6
6917	case IPPROTO_ICMPV6: {
6918		action = PF_DROP;
6919		DPFPRINTF(PF_DEBUG_MISC,
6920		    ("pf: dropping IPv4 packet with ICMPv6 payload\n"));
6921		goto done;
6922	}
6923#endif
6924
6925	default:
6926		action = pf_test_state_other(&s, dir, kif, m, &pd);
6927		if (action == PF_PASS) {
6928#if NPFSYNC > 0
6929#ifdef __FreeBSD__
6930			if (pfsync_update_state_ptr != NULL)
6931				pfsync_update_state_ptr(s);
6932#else
6933			pfsync_update_state(s);
6934#endif
6935#endif /* NPFSYNC */
6936			r = s->rule.ptr;
6937			a = s->anchor.ptr;
6938			log = s->log;
6939		} else if (s == NULL)
6940#ifdef __FreeBSD__
6941			action = pf_test_rule(&r, &s, dir, kif, m, off, h,
6942			    &pd, &a, &ruleset, NULL, inp);
6943#else
6944			action = pf_test_rule(&r, &s, dir, kif, m, off, h,
6945			    &pd, &a, &ruleset, &ipintrq);
6946#endif
6947		break;
6948	}
6949
6950done:
6951	if (action == PF_PASS && h->ip_hl > 5 &&
6952	    !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) {
6953		action = PF_DROP;
6954		REASON_SET(&reason, PFRES_IPOPTIONS);
6955		log = 1;
6956		DPFPRINTF(PF_DEBUG_MISC,
6957		    ("pf: dropping packet with ip options\n"));
6958	}
6959
6960	if ((s && s->tag) || r->rtableid >= 0)
6961#ifdef __FreeBSD__
6962		pf_tag_packet(m, s ? s->tag : 0, r->rtableid, pd.pf_mtag);
6963#else
6964		pf_tag_packet(m, s ? s->tag : 0, r->rtableid);
6965#endif
6966
6967	if (dir == PF_IN && s && s->key[PF_SK_STACK])
6968#ifdef __FreeBSD__
6969		pd.pf_mtag->statekey = s->key[PF_SK_STACK];
6970#else
6971		m->m_pkthdr.pf.statekey = s->key[PF_SK_STACK];
6972#endif
6973
6974#ifdef ALTQ
6975	if (action == PF_PASS && r->qid) {
6976#ifdef __FreeBSD__
6977		if (pqid || (pd.tos & IPTOS_LOWDELAY))
6978			pd.pf_mtag->qid = r->pqid;
6979		else
6980			pd.pf_mtag->qid = r->qid;
6981		/* add hints for ecn */
6982		pd.pf_mtag->hdr = h;
6983
6984#else
6985		if (pqid || (pd.tos & IPTOS_LOWDELAY))
6986			m->m_pkthdr.pf.qid = r->pqid;
6987		else
6988			m->m_pkthdr.pf.qid = r->qid;
6989		/* add hints for ecn */
6990		m->m_pkthdr.pf.hdr = h;
6991#endif
6992	}
6993#endif /* ALTQ */
6994
6995	/*
6996	 * connections redirected to loopback should not match sockets
6997	 * bound specifically to loopback due to security implications,
6998	 * see tcp_input() and in_pcblookup_listen().
6999	 */
7000	if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
7001	    pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
7002	    (s->nat_rule.ptr->action == PF_RDR ||
7003	    s->nat_rule.ptr->action == PF_BINAT) &&
7004	    (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
7005#ifdef __FreeBSD__
7006		m->m_flags |= M_SKIP_FIREWALL;
7007#else
7008		m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST;
7009#endif
7010
7011#ifdef __FreeBSD__
7012	if (action == PF_PASS && r->divert.port &&
7013	    ip_divert_ptr != NULL && !PACKET_LOOPED()) {
7014
7015		ipfwtag = m_tag_alloc(MTAG_IPFW_RULE, 0,
7016				sizeof(struct ipfw_rule_ref), M_NOWAIT | M_ZERO);
7017		if (ipfwtag != NULL) {
7018			((struct ipfw_rule_ref *)(ipfwtag+1))->info =
7019			    ntohs(r->divert.port);
7020			((struct ipfw_rule_ref *)(ipfwtag+1))->rulenum = dir;
7021
7022			m_tag_prepend(m, ipfwtag);
7023
7024			PF_UNLOCK();
7025
7026			if (m->m_flags & M_FASTFWD_OURS) {
7027				pd.pf_mtag->flags |= PF_FASTFWD_OURS_PRESENT;
7028				m->m_flags &= ~M_FASTFWD_OURS;
7029			}
7030
7031			ip_divert_ptr(*m0,
7032				dir ==  PF_IN ? DIR_IN : DIR_OUT);
7033			*m0 = NULL;
7034			return (action);
7035		} else {
7036			/* XXX: ipfw has the same behaviour! */
7037			action = PF_DROP;
7038			REASON_SET(&reason, PFRES_MEMORY);
7039			log = 1;
7040			DPFPRINTF(PF_DEBUG_MISC,
7041			    ("pf: failed to allocate divert tag\n"));
7042		}
7043	}
7044#else
7045	if (dir == PF_IN && action == PF_PASS && r->divert.port) {
7046		struct pf_divert *divert;
7047
7048		if ((divert = pf_get_divert(m))) {
7049			m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED;
7050			divert->port = r->divert.port;
7051			divert->addr.ipv4 = r->divert.addr.v4;
7052		}
7053	}
7054#endif
7055
7056	if (log) {
7057		struct pf_rule *lr;
7058
7059		if (s != NULL && s->nat_rule.ptr != NULL &&
7060		    s->nat_rule.ptr->log & PF_LOG_ALL)
7061			lr = s->nat_rule.ptr;
7062		else
7063			lr = r;
7064		PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, lr, a, ruleset,
7065		    &pd);
7066	}
7067
7068	kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
7069	kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++;
7070
7071	if (action == PF_PASS || r->action == PF_DROP) {
7072		dirndx = (dir == PF_OUT);
7073		r->packets[dirndx]++;
7074		r->bytes[dirndx] += pd.tot_len;
7075		if (a != NULL) {
7076			a->packets[dirndx]++;
7077			a->bytes[dirndx] += pd.tot_len;
7078		}
7079		if (s != NULL) {
7080			if (s->nat_rule.ptr != NULL) {
7081				s->nat_rule.ptr->packets[dirndx]++;
7082				s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
7083			}
7084			if (s->src_node != NULL) {
7085				s->src_node->packets[dirndx]++;
7086				s->src_node->bytes[dirndx] += pd.tot_len;
7087			}
7088			if (s->nat_src_node != NULL) {
7089				s->nat_src_node->packets[dirndx]++;
7090				s->nat_src_node->bytes[dirndx] += pd.tot_len;
7091			}
7092			dirndx = (dir == s->direction) ? 0 : 1;
7093			s->packets[dirndx]++;
7094			s->bytes[dirndx] += pd.tot_len;
7095		}
7096		tr = r;
7097		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
7098#ifdef __FreeBSD__
7099		if (nr != NULL && r == &V_pf_default_rule)
7100#else
7101		if (nr != NULL && r == &pf_default_rule)
7102#endif
7103			tr = nr;
7104		if (tr->src.addr.type == PF_ADDR_TABLE)
7105			pfr_update_stats(tr->src.addr.p.tbl,
7106			    (s == NULL) ? pd.src :
7107			    &s->key[(s->direction == PF_IN)]->
7108				addr[(s->direction == PF_OUT)],
7109			    pd.af, pd.tot_len, dir == PF_OUT,
7110			    r->action == PF_PASS, tr->src.neg);
7111		if (tr->dst.addr.type == PF_ADDR_TABLE)
7112			pfr_update_stats(tr->dst.addr.p.tbl,
7113			    (s == NULL) ? pd.dst :
7114			    &s->key[(s->direction == PF_IN)]->
7115				addr[(s->direction == PF_IN)],
7116			    pd.af, pd.tot_len, dir == PF_OUT,
7117			    r->action == PF_PASS, tr->dst.neg);
7118	}
7119
7120	switch (action) {
7121	case PF_SYNPROXY_DROP:
7122		m_freem(*m0);
7123	case PF_DEFER:
7124		*m0 = NULL;
7125		action = PF_PASS;
7126		break;
7127	default:
7128		/* pf_route can free the mbuf causing *m0 to become NULL */
7129		if (r->rt)
7130			pf_route(m0, r, dir, kif->pfik_ifp, s, &pd);
7131		break;
7132	}
7133#ifdef __FreeBSD__
7134	PF_UNLOCK();
7135#endif
7136	return (action);
7137}
7138#endif /* INET */
7139
7140#ifdef INET6
7141int
7142#ifdef __FreeBSD__
7143pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
7144    struct ether_header *eh, struct inpcb *inp)
7145#else
7146pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
7147    struct ether_header *eh)
7148#endif
7149{
7150	struct pfi_kif		*kif;
7151	u_short			 action, reason = 0, log = 0;
7152	struct mbuf		*m = *m0, *n = NULL;
7153#ifdef __FreeBSD__
7154	struct ip6_hdr		*h = NULL;
7155	struct pf_rule		*a = NULL, *r = &V_pf_default_rule, *tr, *nr;
7156#else
7157	struct ip6_hdr		*h;
7158	struct pf_rule		*a = NULL, *r = &pf_default_rule, *tr, *nr;
7159#endif
7160	struct pf_state		*s = NULL;
7161	struct pf_ruleset	*ruleset = NULL;
7162	struct pf_pdesc		 pd;
7163	int			 off, terminal = 0, dirndx, rh_cnt = 0;
7164
7165#ifdef __FreeBSD__
7166	PF_LOCK();
7167	if (!V_pf_status.running) {
7168		PF_UNLOCK();
7169		return (PF_PASS);
7170	}
7171#else
7172	if (!pf_status.running)
7173		return (PF_PASS);
7174#endif
7175
7176	memset(&pd, 0, sizeof(pd));
7177#ifdef __FreeBSD__
7178	if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) {
7179		PF_UNLOCK();
7180		DPFPRINTF(PF_DEBUG_URGENT,
7181		    ("pf_test: pf_get_mtag returned NULL\n"));
7182		return (PF_DROP);
7183	}
7184#endif
7185#ifndef __FreeBSD__
7186	if (ifp->if_type == IFT_CARP && ifp->if_carpdev)
7187		kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif;
7188	else
7189#endif
7190		kif = (struct pfi_kif *)ifp->if_pf_kif;
7191
7192	if (kif == NULL) {
7193#ifdef __FreeBSD__
7194		PF_UNLOCK();
7195#endif
7196		DPFPRINTF(PF_DEBUG_URGENT,
7197		    ("pf_test6: kif == NULL, if_xname %s\n", ifp->if_xname));
7198		return (PF_DROP);
7199	}
7200	if (kif->pfik_flags & PFI_IFLAG_SKIP)
7201#ifdef __FreeBSD__
7202	{
7203		PF_UNLOCK();
7204#endif
7205		return (PF_PASS);
7206#ifdef __FreeBSD__
7207	}
7208#endif
7209
7210#ifdef __FreeBSD__
7211	M_ASSERTPKTHDR(m);
7212#else
7213#ifdef DIAGNOSTIC
7214	if ((m->m_flags & M_PKTHDR) == 0)
7215		panic("non-M_PKTHDR is passed to pf_test6");
7216#endif /* DIAGNOSTIC */
7217#endif
7218
7219	if (m->m_pkthdr.len < (int)sizeof(*h)) {
7220		action = PF_DROP;
7221		REASON_SET(&reason, PFRES_SHORT);
7222		log = 1;
7223		goto done;
7224	}
7225
7226#ifdef __FreeBSD__
7227	if (pd.pf_mtag->flags & PF_TAG_GENERATED) {
7228		PF_UNLOCK();
7229#else
7230	if (m->m_pkthdr.pf.flags & PF_TAG_GENERATED)
7231#endif
7232		return (PF_PASS);
7233#ifdef __FreeBSD__
7234	}
7235#endif
7236
7237	/* We do IP header normalization and packet reassembly here */
7238	if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) {
7239		action = PF_DROP;
7240		goto done;
7241	}
7242	m = *m0;	/* pf_normalize messes with m0 */
7243	h = mtod(m, struct ip6_hdr *);
7244
7245#if 1
7246	/*
7247	 * we do not support jumbogram yet.  if we keep going, zero ip6_plen
7248	 * will do something bad, so drop the packet for now.
7249	 */
7250	if (htons(h->ip6_plen) == 0) {
7251		action = PF_DROP;
7252		REASON_SET(&reason, PFRES_NORM);	/*XXX*/
7253		goto done;
7254	}
7255#endif
7256
7257	pd.src = (struct pf_addr *)&h->ip6_src;
7258	pd.dst = (struct pf_addr *)&h->ip6_dst;
7259	pd.sport = pd.dport = NULL;
7260	pd.ip_sum = NULL;
7261	pd.proto_sum = NULL;
7262	pd.dir = dir;
7263	pd.sidx = (dir == PF_IN) ? 0 : 1;
7264	pd.didx = (dir == PF_IN) ? 1 : 0;
7265	pd.af = AF_INET6;
7266	pd.tos = 0;
7267	pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
7268	pd.eh = eh;
7269
7270	off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr);
7271	pd.proto = h->ip6_nxt;
7272	do {
7273		switch (pd.proto) {
7274		case IPPROTO_FRAGMENT:
7275			action = pf_test_fragment(&r, dir, kif, m, h,
7276			    &pd, &a, &ruleset);
7277			if (action == PF_DROP)
7278				REASON_SET(&reason, PFRES_FRAG);
7279			goto done;
7280		case IPPROTO_ROUTING: {
7281			struct ip6_rthdr rthdr;
7282
7283			if (rh_cnt++) {
7284				DPFPRINTF(PF_DEBUG_MISC,
7285				    ("pf: IPv6 more than one rthdr\n"));
7286				action = PF_DROP;
7287				REASON_SET(&reason, PFRES_IPOPTIONS);
7288				log = 1;
7289				goto done;
7290			}
7291			if (!pf_pull_hdr(m, off, &rthdr, sizeof(rthdr), NULL,
7292			    &reason, pd.af)) {
7293				DPFPRINTF(PF_DEBUG_MISC,
7294				    ("pf: IPv6 short rthdr\n"));
7295				action = PF_DROP;
7296				REASON_SET(&reason, PFRES_SHORT);
7297				log = 1;
7298				goto done;
7299			}
7300			if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) {
7301				DPFPRINTF(PF_DEBUG_MISC,
7302				    ("pf: IPv6 rthdr0\n"));
7303				action = PF_DROP;
7304				REASON_SET(&reason, PFRES_IPOPTIONS);
7305				log = 1;
7306				goto done;
7307			}
7308			/* FALLTHROUGH */
7309		}
7310		case IPPROTO_AH:
7311		case IPPROTO_HOPOPTS:
7312		case IPPROTO_DSTOPTS: {
7313			/* get next header and header length */
7314			struct ip6_ext	opt6;
7315
7316			if (!pf_pull_hdr(m, off, &opt6, sizeof(opt6),
7317			    NULL, &reason, pd.af)) {
7318				DPFPRINTF(PF_DEBUG_MISC,
7319				    ("pf: IPv6 short opt\n"));
7320				action = PF_DROP;
7321				log = 1;
7322				goto done;
7323			}
7324			if (pd.proto == IPPROTO_AH)
7325				off += (opt6.ip6e_len + 2) * 4;
7326			else
7327				off += (opt6.ip6e_len + 1) * 8;
7328			pd.proto = opt6.ip6e_nxt;
7329			/* goto the next header */
7330			break;
7331		}
7332		default:
7333			terminal++;
7334			break;
7335		}
7336	} while (!terminal);
7337
7338	/* if there's no routing header, use unmodified mbuf for checksumming */
7339	if (!n)
7340		n = m;
7341
7342	switch (pd.proto) {
7343
7344	case IPPROTO_TCP: {
7345		struct tcphdr	th;
7346
7347		pd.hdr.tcp = &th;
7348		if (!pf_pull_hdr(m, off, &th, sizeof(th),
7349		    &action, &reason, AF_INET6)) {
7350			log = action != PF_PASS;
7351			goto done;
7352		}
7353		pd.p_len = pd.tot_len - off - (th.th_off << 2);
7354		action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
7355		if (action == PF_DROP)
7356			goto done;
7357		action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
7358		    &reason);
7359		if (action == PF_PASS) {
7360#if NPFSYNC > 0
7361#ifdef __FreeBSD__
7362			if (pfsync_update_state_ptr != NULL)
7363				pfsync_update_state_ptr(s);
7364#else
7365			pfsync_update_state(s);
7366#endif
7367#endif /* NPFSYNC */
7368			r = s->rule.ptr;
7369			a = s->anchor.ptr;
7370			log = s->log;
7371		} else if (s == NULL)
7372#ifdef __FreeBSD__
7373			action = pf_test_rule(&r, &s, dir, kif,
7374			    m, off, h, &pd, &a, &ruleset, NULL, inp);
7375#else
7376			action = pf_test_rule(&r, &s, dir, kif,
7377			    m, off, h, &pd, &a, &ruleset, &ip6intrq);
7378#endif
7379		break;
7380	}
7381
7382	case IPPROTO_UDP: {
7383		struct udphdr	uh;
7384
7385		pd.hdr.udp = &uh;
7386		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
7387		    &action, &reason, AF_INET6)) {
7388			log = action != PF_PASS;
7389			goto done;
7390		}
7391		if (uh.uh_dport == 0 ||
7392		    ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
7393		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
7394			action = PF_DROP;
7395			REASON_SET(&reason, PFRES_SHORT);
7396			goto done;
7397		}
7398		action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
7399		if (action == PF_PASS) {
7400#if NPFSYNC > 0
7401#ifdef __FreeBSD__
7402			if (pfsync_update_state_ptr != NULL)
7403				pfsync_update_state_ptr(s);
7404#else
7405			pfsync_update_state(s);
7406#endif
7407#endif /* NPFSYNC */
7408			r = s->rule.ptr;
7409			a = s->anchor.ptr;
7410			log = s->log;
7411		} else if (s == NULL)
7412#ifdef __FreeBSD__
7413			action = pf_test_rule(&r, &s, dir, kif,
7414			    m, off, h, &pd, &a, &ruleset, NULL, inp);
7415#else
7416			action = pf_test_rule(&r, &s, dir, kif,
7417			    m, off, h, &pd, &a, &ruleset, &ip6intrq);
7418#endif
7419		break;
7420	}
7421
7422	case IPPROTO_ICMP: {
7423		action = PF_DROP;
7424		DPFPRINTF(PF_DEBUG_MISC,
7425		    ("pf: dropping IPv6 packet with ICMPv4 payload\n"));
7426		goto done;
7427	}
7428
7429	case IPPROTO_ICMPV6: {
7430		struct icmp6_hdr	ih;
7431
7432		pd.hdr.icmp6 = &ih;
7433		if (!pf_pull_hdr(m, off, &ih, sizeof(ih),
7434		    &action, &reason, AF_INET6)) {
7435			log = action != PF_PASS;
7436			goto done;
7437		}
7438		action = pf_test_state_icmp(&s, dir, kif,
7439		    m, off, h, &pd, &reason);
7440		if (action == PF_PASS) {
7441#if NPFSYNC > 0
7442#ifdef __FreeBSD__
7443			if (pfsync_update_state_ptr != NULL)
7444				pfsync_update_state_ptr(s);
7445#else
7446			pfsync_update_state(s);
7447#endif
7448#endif /* NPFSYNC */
7449			r = s->rule.ptr;
7450			a = s->anchor.ptr;
7451			log = s->log;
7452		} else if (s == NULL)
7453#ifdef __FreeBSD__
7454			action = pf_test_rule(&r, &s, dir, kif,
7455			    m, off, h, &pd, &a, &ruleset, NULL, inp);
7456#else
7457			action = pf_test_rule(&r, &s, dir, kif,
7458			    m, off, h, &pd, &a, &ruleset, &ip6intrq);
7459#endif
7460		break;
7461	}
7462
7463	default:
7464		action = pf_test_state_other(&s, dir, kif, m, &pd);
7465		if (action == PF_PASS) {
7466#if NPFSYNC > 0
7467#ifdef __FreeBSD__
7468			if (pfsync_update_state_ptr != NULL)
7469				pfsync_update_state_ptr(s);
7470#else
7471			pfsync_update_state(s);
7472#endif
7473#endif /* NPFSYNC */
7474			r = s->rule.ptr;
7475			a = s->anchor.ptr;
7476			log = s->log;
7477		} else if (s == NULL)
7478#ifdef __FreeBSD__
7479			action = pf_test_rule(&r, &s, dir, kif, m, off, h,
7480			    &pd, &a, &ruleset, NULL, inp);
7481#else
7482			action = pf_test_rule(&r, &s, dir, kif, m, off, h,
7483			    &pd, &a, &ruleset, &ip6intrq);
7484#endif
7485		break;
7486	}
7487
7488done:
7489	if (n != m) {
7490		m_freem(n);
7491		n = NULL;
7492	}
7493
7494	/* handle dangerous IPv6 extension headers. */
7495	if (action == PF_PASS && rh_cnt &&
7496	    !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) {
7497		action = PF_DROP;
7498		REASON_SET(&reason, PFRES_IPOPTIONS);
7499		log = 1;
7500		DPFPRINTF(PF_DEBUG_MISC,
7501		    ("pf: dropping packet with dangerous v6 headers\n"));
7502	}
7503
7504	if ((s && s->tag) || r->rtableid >= 0)
7505#ifdef __FreeBSD__
7506		pf_tag_packet(m, s ? s->tag : 0, r->rtableid, pd.pf_mtag);
7507#else
7508		pf_tag_packet(m, s ? s->tag : 0, r->rtableid);
7509#endif
7510
7511	if (dir == PF_IN && s && s->key[PF_SK_STACK])
7512#ifdef __FreeBSD__
7513		pd.pf_mtag->statekey = s->key[PF_SK_STACK];
7514#else
7515		m->m_pkthdr.pf.statekey = s->key[PF_SK_STACK];
7516#endif
7517
7518#ifdef ALTQ
7519	if (action == PF_PASS && r->qid) {
7520#ifdef __FreeBSD__
7521		if (pd.tos & IPTOS_LOWDELAY)
7522			pd.pf_mtag->qid = r->pqid;
7523		else
7524			pd.pf_mtag->qid = r->qid;
7525		/* add hints for ecn */
7526		pd.pf_mtag->hdr = h;
7527#else
7528		if (pd.tos & IPTOS_LOWDELAY)
7529			m->m_pkthdr.pf.qid = r->pqid;
7530		else
7531			m->m_pkthdr.pf.qid = r->qid;
7532		/* add hints for ecn */
7533		m->m_pkthdr.pf.hdr = h;
7534#endif
7535	}
7536#endif /* ALTQ */
7537
7538	if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
7539	    pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
7540	    (s->nat_rule.ptr->action == PF_RDR ||
7541	    s->nat_rule.ptr->action == PF_BINAT) &&
7542	    IN6_IS_ADDR_LOOPBACK(&pd.dst->v6))
7543#ifdef __FreeBSD__
7544		m->m_flags |= M_SKIP_FIREWALL;
7545#else
7546		m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST;
7547#endif
7548
7549#ifdef __FreeBSD__
7550	/* XXX: Anybody working on it?! */
7551	if (r->divert.port)
7552		printf("pf: divert(9) is not supported for IPv6\n");
7553#else
7554	if (dir == PF_IN && action == PF_PASS && r->divert.port) {
7555		struct pf_divert *divert;
7556
7557		if ((divert = pf_get_divert(m))) {
7558			m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED;
7559			divert->port = r->divert.port;
7560			divert->addr.ipv6 = r->divert.addr.v6;
7561		}
7562	}
7563#endif
7564
7565	if (log) {
7566		struct pf_rule *lr;
7567
7568		if (s != NULL && s->nat_rule.ptr != NULL &&
7569		    s->nat_rule.ptr->log & PF_LOG_ALL)
7570			lr = s->nat_rule.ptr;
7571		else
7572			lr = r;
7573		PFLOG_PACKET(kif, h, m, AF_INET6, dir, reason, lr, a, ruleset,
7574		    &pd);
7575	}
7576
7577	kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
7578	kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++;
7579
7580	if (action == PF_PASS || r->action == PF_DROP) {
7581		dirndx = (dir == PF_OUT);
7582		r->packets[dirndx]++;
7583		r->bytes[dirndx] += pd.tot_len;
7584		if (a != NULL) {
7585			a->packets[dirndx]++;
7586			a->bytes[dirndx] += pd.tot_len;
7587		}
7588		if (s != NULL) {
7589			if (s->nat_rule.ptr != NULL) {
7590				s->nat_rule.ptr->packets[dirndx]++;
7591				s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
7592			}
7593			if (s->src_node != NULL) {
7594				s->src_node->packets[dirndx]++;
7595				s->src_node->bytes[dirndx] += pd.tot_len;
7596			}
7597			if (s->nat_src_node != NULL) {
7598				s->nat_src_node->packets[dirndx]++;
7599				s->nat_src_node->bytes[dirndx] += pd.tot_len;
7600			}
7601			dirndx = (dir == s->direction) ? 0 : 1;
7602			s->packets[dirndx]++;
7603			s->bytes[dirndx] += pd.tot_len;
7604		}
7605		tr = r;
7606		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
7607#ifdef __FreeBSD__
7608		if (nr != NULL && r == &V_pf_default_rule)
7609#else
7610		if (nr != NULL && r == &pf_default_rule)
7611#endif
7612			tr = nr;
7613		if (tr->src.addr.type == PF_ADDR_TABLE)
7614			pfr_update_stats(tr->src.addr.p.tbl,
7615			    (s == NULL) ? pd.src :
7616			    &s->key[(s->direction == PF_IN)]->addr[0],
7617			    pd.af, pd.tot_len, dir == PF_OUT,
7618			    r->action == PF_PASS, tr->src.neg);
7619		if (tr->dst.addr.type == PF_ADDR_TABLE)
7620			pfr_update_stats(tr->dst.addr.p.tbl,
7621			    (s == NULL) ? pd.dst :
7622			    &s->key[(s->direction == PF_IN)]->addr[1],
7623			    pd.af, pd.tot_len, dir == PF_OUT,
7624			    r->action == PF_PASS, tr->dst.neg);
7625	}
7626
7627	switch (action) {
7628	case PF_SYNPROXY_DROP:
7629		m_freem(*m0);
7630	case PF_DEFER:
7631		*m0 = NULL;
7632		action = PF_PASS;
7633		break;
7634	default:
7635		/* pf_route6 can free the mbuf causing *m0 to become NULL */
7636		if (r->rt)
7637			pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd);
7638		break;
7639	}
7640
7641#ifdef __FreeBSD__
7642	PF_UNLOCK();
7643#endif
7644	return (action);
7645}
7646#endif /* INET6 */
7647
7648int
7649pf_check_congestion(struct ifqueue *ifq)
7650{
7651#ifdef __FreeBSD__
7652	/* XXX_IMPORT: later */
7653	return (0);
7654#else
7655	if (ifq->ifq_congestion)
7656		return (1);
7657	else
7658		return (0);
7659#endif
7660}
7661
7662/*
7663 * must be called whenever any addressing information such as
7664 * address, port, protocol has changed
7665 */
7666void
7667pf_pkt_addr_changed(struct mbuf *m)
7668{
7669#ifdef __FreeBSD__
7670	struct pf_mtag	*pf_tag;
7671
7672	if ((pf_tag = pf_find_mtag(m)) != NULL)
7673		pf_tag->statekey = NULL;
7674#else
7675	m->m_pkthdr.pf.statekey = NULL;
7676#endif
7677}
7678