1/*	$NetBSD: pf.c,v 1.67 2011/11/19 22:51:24 tls Exp $	*/
2/*	$OpenBSD: pf.c,v 1.552.2.1 2007/11/27 16:37:57 henning Exp $ */
3
4/*
5 * Copyright (c) 2001 Daniel Hartmeier
6 * Copyright (c) 2002,2003 Henning Brauer
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 *    - Redistributions of source code must retain the above copyright
14 *      notice, this list of conditions and the following disclaimer.
15 *    - Redistributions in binary form must reproduce the above
16 *      copyright notice, this list of conditions and the following
17 *      disclaimer in the documentation and/or other materials provided
18 *      with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Effort sponsored in part by the Defense Advanced Research Projects
34 * Agency (DARPA) and Air Force Research Laboratory, Air Force
35 * Materiel Command, USAF, under agreement number F30602-01-2-0537.
36 *
37 */
38
39#include <sys/cdefs.h>
40__KERNEL_RCSID(0, "$NetBSD: pf.c,v 1.67 2011/11/19 22:51:24 tls Exp $");
41
42#include "pflog.h"
43
44#include "pfsync.h"
45
46#include <sys/param.h>
47#include <sys/systm.h>
48#include <sys/mbuf.h>
49#include <sys/filio.h>
50#include <sys/socket.h>
51#include <sys/socketvar.h>
52#include <sys/kernel.h>
53#include <sys/time.h>
54#include <sys/pool.h>
55#include <sys/proc.h>
56#include <sys/rwlock.h>
57#ifdef __NetBSD__
58#include <sys/kthread.h>
59#include <sys/kauth.h>
60#endif /* __NetBSD__ */
61
62#include <net/if.h>
63#include <net/if_types.h>
64#include <net/bpf.h>
65#include <net/route.h>
66#ifndef __NetBSD__
67#include <net/radix_mpath.h>
68#endif /* !__NetBSD__ */
69
70#include <netinet/in.h>
71#ifdef __NetBSD__
72#include <netinet/in_offload.h>
73#endif /* __NetBSD__ */
74#include <netinet/in_var.h>
75#include <netinet/in_systm.h>
76#include <netinet/ip.h>
77#include <netinet/ip_var.h>
78#include <netinet/tcp.h>
79#include <netinet/tcp_seq.h>
80#include <netinet/udp.h>
81#include <netinet/ip_icmp.h>
82#include <netinet/in_pcb.h>
83#include <netinet/tcp_timer.h>
84#include <netinet/tcp_var.h>
85#include <netinet/udp_var.h>
86#include <netinet/icmp_var.h>
87#ifndef __NetBSD__
88#include <netinet/if_ether.h>
89#else
90#include <net/if_ether.h>
91#endif /* __NetBSD__ */
92
93#ifndef __NetBSD__
94#include <dev/rndvar.h>
95#else
96#include <sys/cprng.h>
97#endif /* __NetBSD__ */
98
99#include <net/pfvar.h>
100#include <net/if_pflog.h>
101
102#if NPFSYNC > 0
103#include <net/if_pfsync.h>
104#endif /* NPFSYNC > 0 */
105
106#ifdef INET6
107#include <netinet/ip6.h>
108#include <netinet6/ip6_var.h>
109#ifdef __NetBSD__
110#include <netinet6/in6_pcb.h>
111#endif /* __NetBSD__ */
112#include <netinet/icmp6.h>
113#include <netinet6/nd6.h>
114#endif /* INET6 */
115
116#ifdef __NetBSD__
117#include <netinet/tcp_rndiss.h>
118#endif /* __NetBSD__ */
119
120
121#define DPFPRINTF(n, x)	if (pf_status.debug >= (n)) printf x
122
123/*
124 * Global variables
125 */
126
127/* state tables */
128struct pf_state_tree_lan_ext	 pf_statetbl_lan_ext;
129struct pf_state_tree_ext_gwy	 pf_statetbl_ext_gwy;
130
131struct pf_altqqueue	 pf_altqs[2];
132struct pf_palist	 pf_pabuf;
133struct pf_altqqueue	*pf_altqs_active;
134struct pf_altqqueue	*pf_altqs_inactive;
135struct pf_status	 pf_status;
136
137u_int32_t		 ticket_altqs_active;
138u_int32_t		 ticket_altqs_inactive;
139int			 altqs_inactive_open;
140u_int32_t		 ticket_pabuf;
141
142struct pf_anchor_stackframe {
143	struct pf_ruleset			*rs;
144	struct pf_rule				*r;
145	struct pf_anchor_node			*parent;
146	struct pf_anchor			*child;
147} pf_anchor_stack[64];
148
149struct pool		 pf_src_tree_pl, pf_rule_pl, pf_pooladdr_pl;
150struct pool		 pf_state_pl, pf_state_key_pl;
151struct pool		 pf_altq_pl;
152
153void			 pf_print_host(struct pf_addr *, u_int16_t, u_int8_t);
154
155void			 pf_init_threshold(struct pf_threshold *, u_int32_t,
156			    u_int32_t);
157void			 pf_add_threshold(struct pf_threshold *);
158int			 pf_check_threshold(struct pf_threshold *);
159
160void			 pf_change_ap(struct pf_addr *, u_int16_t *,
161			    u_int16_t *, u_int16_t *, struct pf_addr *,
162			    u_int16_t, u_int8_t, sa_family_t);
163int			 pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *,
164			    struct tcphdr *, struct pf_state_peer *);
165#ifdef INET6
166void			 pf_change_a6(struct pf_addr *, u_int16_t *,
167			    struct pf_addr *, u_int8_t);
168#endif /* INET6 */
169void			 pf_change_icmp(struct pf_addr *, u_int16_t *,
170			    struct pf_addr *, struct pf_addr *, u_int16_t,
171			    u_int16_t *, u_int16_t *, u_int16_t *,
172			    u_int16_t *, u_int8_t, sa_family_t);
173void			 pf_send_tcp(const struct pf_rule *, sa_family_t,
174			    const struct pf_addr *, const struct pf_addr *,
175			    u_int16_t, u_int16_t, u_int32_t, u_int32_t,
176			    u_int8_t, u_int16_t, u_int16_t, u_int8_t, int,
177			    u_int16_t, struct ether_header *, struct ifnet *);
178void			 pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
179			    sa_family_t, struct pf_rule *);
180struct pf_rule		*pf_match_translation(struct pf_pdesc *, struct mbuf *,
181			    int, int, struct pfi_kif *,
182			    struct pf_addr *, u_int16_t, struct pf_addr *,
183			    u_int16_t, int);
184struct pf_rule		*pf_get_translation(struct pf_pdesc *, struct mbuf *,
185			    int, int, struct pfi_kif *, struct pf_src_node **,
186			    struct pf_addr *, u_int16_t,
187			    struct pf_addr *, u_int16_t,
188			    struct pf_addr *, u_int16_t *);
189void			 pf_attach_state(struct pf_state_key *,
190			    struct pf_state *, int);
191void			 pf_detach_state(struct pf_state *, int);
192int			 pf_test_rule(struct pf_rule **, struct pf_state **,
193			    int, struct pfi_kif *, struct mbuf *, int,
194			    void *, struct pf_pdesc *, struct pf_rule **,
195			    struct pf_ruleset **, struct ifqueue *);
196int			 pf_test_fragment(struct pf_rule **, int,
197			    struct pfi_kif *, struct mbuf *, void *,
198			    struct pf_pdesc *, struct pf_rule **,
199			    struct pf_ruleset **);
200int			 pf_test_state_tcp(struct pf_state **, int,
201			    struct pfi_kif *, struct mbuf *, int,
202			    void *, struct pf_pdesc *, u_short *);
203int			 pf_test_state_udp(struct pf_state **, int,
204			    struct pfi_kif *, struct mbuf *, int,
205			    void *, struct pf_pdesc *);
206int			 pf_test_state_icmp(struct pf_state **, int,
207			    struct pfi_kif *, struct mbuf *, int,
208			    void *, struct pf_pdesc *, u_short *);
209int			 pf_test_state_other(struct pf_state **, int,
210			    struct pfi_kif *, struct pf_pdesc *);
211int			 pf_match_tag(struct mbuf *, struct pf_rule *, int *);
212void			 pf_step_into_anchor(int *, struct pf_ruleset **, int,
213			    struct pf_rule **, struct pf_rule **,  int *);
214int			 pf_step_out_of_anchor(int *, struct pf_ruleset **,
215			     int, struct pf_rule **, struct pf_rule **,
216			     int *);
217void			 pf_hash(const struct pf_addr *, struct pf_addr *,
218			    struct pf_poolhashkey *, sa_family_t);
219int			 pf_map_addr(u_int8_t, struct pf_rule *,
220			    const struct pf_addr *, struct pf_addr *,
221			    struct pf_addr *, struct pf_src_node **);
222int			 pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *,
223			    struct pf_addr *, struct pf_addr *, u_int16_t,
224			    struct pf_addr *, u_int16_t*, u_int16_t, u_int16_t,
225			    struct pf_src_node **);
226void			 pf_route(struct mbuf **, struct pf_rule *, int,
227			    struct ifnet *, struct pf_state *,
228			    struct pf_pdesc *);
229void			 pf_route6(struct mbuf **, struct pf_rule *, int,
230			    struct ifnet *, struct pf_state *,
231			    struct pf_pdesc *);
232int			 pf_socket_lookup(int, struct pf_pdesc *);
233u_int8_t		 pf_get_wscale(struct mbuf *, int, u_int16_t,
234			    sa_family_t);
235u_int16_t		 pf_get_mss(struct mbuf *, int, u_int16_t,
236			    sa_family_t);
237u_int16_t		 pf_calc_mss(struct pf_addr *, sa_family_t,
238				u_int16_t);
239void			 pf_set_rt_ifp(struct pf_state *,
240			    struct pf_addr *);
241#ifdef __NetBSD__
242int			 pf_check_proto_cksum(struct mbuf *, int, int, int,
243			    u_int8_t, sa_family_t);
244#else
245int			 pf_check_proto_cksum(struct mbuf *, int, int,
246			    u_int8_t, sa_family_t);
247#endif /* !__NetBSD__ */
248int			 pf_addr_wrap_neq(struct pf_addr_wrap *,
249			    struct pf_addr_wrap *);
250struct pf_state		*pf_find_state(struct pfi_kif *,
251			    struct pf_state_key_cmp *, u_int8_t);
252int			 pf_src_connlimit(struct pf_state **);
253void			 pf_stateins_err(const char *, struct pf_state *,
254			    struct pfi_kif *);
255int			 pf_check_congestion(struct ifqueue *);
256
257extern struct pool pfr_ktable_pl;
258extern struct pool pfr_kentry_pl;
259
260extern int pf_state_lock;
261
262struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = {
263	{ &pf_state_pl, PFSTATE_HIWAT },
264	{ &pf_src_tree_pl, PFSNODE_HIWAT },
265	{ &pf_frent_pl, PFFRAG_FRENT_HIWAT },
266	{ &pfr_ktable_pl, PFR_KTABLE_HIWAT },
267	{ &pfr_kentry_pl, PFR_KENTRY_HIWAT }
268};
269
270#define STATE_LOOKUP()							\
271	do {								\
272		if (pf_state_lock) {		    \
273			*state = NULL;				\
274			return (PF_DROP);			\
275		}								\
276		if (direction == PF_IN)					\
277			*state = pf_find_state(kif, &key, PF_EXT_GWY);	\
278		else							\
279			*state = pf_find_state(kif, &key, PF_LAN_EXT);	\
280		if (*state == NULL || (*state)->timeout == PFTM_PURGE)	\
281			return (PF_DROP);				\
282		if (direction == PF_OUT &&				\
283		    (((*state)->rule.ptr->rt == PF_ROUTETO &&		\
284		      (*state)->rule.ptr->direction == PF_OUT) ||	\
285		     ((*state)->rule.ptr->rt == PF_REPLYTO &&		\
286		      (*state)->rule.ptr->direction == PF_IN)) &&	\
287		    (*state)->rt_kif != NULL &&				\
288		    (*state)->rt_kif != kif)				\
289			return (PF_PASS);				\
290	} while (0)
291
292#define	STATE_TRANSLATE(sk) \
293	(sk)->lan.addr.addr32[0] != (sk)->gwy.addr.addr32[0] || \
294	((sk)->af == AF_INET6 && \
295	((sk)->lan.addr.addr32[1] != (sk)->gwy.addr.addr32[1] || \
296	(sk)->lan.addr.addr32[2] != (sk)->gwy.addr.addr32[2] || \
297	(sk)->lan.addr.addr32[3] != (sk)->gwy.addr.addr32[3])) || \
298	(sk)->lan.port != (sk)->gwy.port
299
300#define BOUND_IFACE(r, k) \
301	((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all
302
303#define STATE_INC_COUNTERS(s)				\
304	do {						\
305		s->rule.ptr->states++;			\
306		if (s->anchor.ptr != NULL)		\
307			s->anchor.ptr->states++;	\
308		if (s->nat_rule.ptr != NULL)		\
309			s->nat_rule.ptr->states++;	\
310	} while (0)
311
312#define STATE_DEC_COUNTERS(s)				\
313	do {						\
314		if (s->nat_rule.ptr != NULL)		\
315			s->nat_rule.ptr->states--;	\
316		if (s->anchor.ptr != NULL)		\
317			s->anchor.ptr->states--;	\
318		s->rule.ptr->states--;			\
319	} while (0)
320
321static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
322static __inline int pf_state_compare_lan_ext(struct pf_state_key *,
323	struct pf_state_key *);
324static __inline int pf_state_compare_ext_gwy(struct pf_state_key *,
325	struct pf_state_key *);
326static __inline int pf_state_compare_id(struct pf_state *,
327	struct pf_state *);
328
329struct pf_src_tree tree_src_tracking;
330
331struct pf_state_tree_id tree_id;
332struct pf_state_queue state_list;
333
334RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare);
335RB_GENERATE(pf_state_tree_lan_ext, pf_state_key,
336    entry_lan_ext, pf_state_compare_lan_ext);
337RB_GENERATE(pf_state_tree_ext_gwy, pf_state_key,
338    entry_ext_gwy, pf_state_compare_ext_gwy);
339RB_GENERATE(pf_state_tree_id, pf_state,
340    entry_id, pf_state_compare_id);
341
342#define	PF_DT_SKIP_LANEXT	0x01
343#define	PF_DT_SKIP_EXTGWY	0x02
344
345#ifdef __NetBSD__
346static __inline struct pfi_kif *
347bound_iface(const struct pf_rule *r, const struct pf_rule *nr,
348    struct pfi_kif *k)
349{
350	uint32_t rule_flag;
351
352	rule_flag = r->rule_flag;
353	if (nr != NULL)
354		rule_flag |= nr->rule_flag;
355
356	return ((rule_flag & PFRULE_IFBOUND) != 0) ? k : pfi_all;
357}
358#endif /* __NetBSD__ */
359
360static __inline int
361pf_src_compare(struct pf_src_node *a, struct pf_src_node *b)
362{
363	int	diff;
364
365	if (a->rule.ptr > b->rule.ptr)
366		return (1);
367	if (a->rule.ptr < b->rule.ptr)
368		return (-1);
369	if ((diff = a->af - b->af) != 0)
370		return (diff);
371	switch (a->af) {
372#ifdef INET
373	case AF_INET:
374		if (a->addr.addr32[0] > b->addr.addr32[0])
375			return (1);
376		if (a->addr.addr32[0] < b->addr.addr32[0])
377			return (-1);
378		break;
379#endif /* INET */
380#ifdef INET6
381	case AF_INET6:
382		if (a->addr.addr32[3] > b->addr.addr32[3])
383			return (1);
384		if (a->addr.addr32[3] < b->addr.addr32[3])
385			return (-1);
386		if (a->addr.addr32[2] > b->addr.addr32[2])
387			return (1);
388		if (a->addr.addr32[2] < b->addr.addr32[2])
389			return (-1);
390		if (a->addr.addr32[1] > b->addr.addr32[1])
391			return (1);
392		if (a->addr.addr32[1] < b->addr.addr32[1])
393			return (-1);
394		if (a->addr.addr32[0] > b->addr.addr32[0])
395			return (1);
396		if (a->addr.addr32[0] < b->addr.addr32[0])
397			return (-1);
398		break;
399#endif /* INET6 */
400	}
401	return (0);
402}
403
404static __inline int
405pf_state_compare_lan_ext(struct pf_state_key *a, struct pf_state_key *b)
406{
407	int	diff;
408
409	if ((diff = a->proto - b->proto) != 0)
410		return (diff);
411	if ((diff = a->af - b->af) != 0)
412		return (diff);
413	switch (a->af) {
414#ifdef INET
415	case AF_INET:
416		if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0])
417			return (1);
418		if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0])
419			return (-1);
420		if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
421			return (1);
422		if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
423			return (-1);
424		break;
425#endif /* INET */
426#ifdef INET6
427	case AF_INET6:
428		if (a->lan.addr.addr32[3] > b->lan.addr.addr32[3])
429			return (1);
430		if (a->lan.addr.addr32[3] < b->lan.addr.addr32[3])
431			return (-1);
432		if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3])
433			return (1);
434		if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3])
435			return (-1);
436		if (a->lan.addr.addr32[2] > b->lan.addr.addr32[2])
437			return (1);
438		if (a->lan.addr.addr32[2] < b->lan.addr.addr32[2])
439			return (-1);
440		if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2])
441			return (1);
442		if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2])
443			return (-1);
444		if (a->lan.addr.addr32[1] > b->lan.addr.addr32[1])
445			return (1);
446		if (a->lan.addr.addr32[1] < b->lan.addr.addr32[1])
447			return (-1);
448		if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1])
449			return (1);
450		if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1])
451			return (-1);
452		if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0])
453			return (1);
454		if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0])
455			return (-1);
456		if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
457			return (1);
458		if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
459			return (-1);
460		break;
461#endif /* INET6 */
462	}
463
464	if ((diff = a->lan.port - b->lan.port) != 0)
465		return (diff);
466	if ((diff = a->ext.port - b->ext.port) != 0)
467		return (diff);
468
469	return (0);
470}
471
472static __inline int
473pf_state_compare_ext_gwy(struct pf_state_key *a, struct pf_state_key *b)
474{
475	int	diff;
476
477	if ((diff = a->proto - b->proto) != 0)
478		return (diff);
479	if ((diff = a->af - b->af) != 0)
480		return (diff);
481	switch (a->af) {
482#ifdef INET
483	case AF_INET:
484		if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
485			return (1);
486		if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
487			return (-1);
488		if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0])
489			return (1);
490		if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0])
491			return (-1);
492		break;
493#endif /* INET */
494#ifdef INET6
495	case AF_INET6:
496		if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3])
497			return (1);
498		if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3])
499			return (-1);
500		if (a->gwy.addr.addr32[3] > b->gwy.addr.addr32[3])
501			return (1);
502		if (a->gwy.addr.addr32[3] < b->gwy.addr.addr32[3])
503			return (-1);
504		if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2])
505			return (1);
506		if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2])
507			return (-1);
508		if (a->gwy.addr.addr32[2] > b->gwy.addr.addr32[2])
509			return (1);
510		if (a->gwy.addr.addr32[2] < b->gwy.addr.addr32[2])
511			return (-1);
512		if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1])
513			return (1);
514		if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1])
515			return (-1);
516		if (a->gwy.addr.addr32[1] > b->gwy.addr.addr32[1])
517			return (1);
518		if (a->gwy.addr.addr32[1] < b->gwy.addr.addr32[1])
519			return (-1);
520		if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
521			return (1);
522		if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
523			return (-1);
524		if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0])
525			return (1);
526		if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0])
527			return (-1);
528		break;
529#endif /* INET6 */
530	}
531
532	if ((diff = a->ext.port - b->ext.port) != 0)
533		return (diff);
534	if ((diff = a->gwy.port - b->gwy.port) != 0)
535		return (diff);
536
537	return (0);
538}
539
540static __inline int
541pf_state_compare_id(struct pf_state *a, struct pf_state *b)
542{
543	if (a->id > b->id)
544		return (1);
545	if (a->id < b->id)
546		return (-1);
547	if (a->creatorid > b->creatorid)
548		return (1);
549	if (a->creatorid < b->creatorid)
550		return (-1);
551
552	return (0);
553}
554
555#ifdef INET6
556void
557pf_addrcpy(struct pf_addr *dst, const struct pf_addr *src, sa_family_t af)
558{
559	switch (af) {
560#ifdef INET
561	case AF_INET:
562		dst->addr32[0] = src->addr32[0];
563		break;
564#endif /* INET */
565	case AF_INET6:
566		dst->addr32[0] = src->addr32[0];
567		dst->addr32[1] = src->addr32[1];
568		dst->addr32[2] = src->addr32[2];
569		dst->addr32[3] = src->addr32[3];
570		break;
571	}
572}
573#endif /* INET6 */
574
575struct pf_state *
576pf_find_state_byid(struct pf_state_cmp *key)
577{
578	pf_status.fcounters[FCNT_STATE_SEARCH]++;
579
580	return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key));
581}
582
583struct pf_state *
584pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int8_t tree)
585{
586	struct pf_state_key	*sk;
587	struct pf_state		*s;
588
589	pf_status.fcounters[FCNT_STATE_SEARCH]++;
590
591	switch (tree) {
592	case PF_LAN_EXT:
593		sk = RB_FIND(pf_state_tree_lan_ext, &pf_statetbl_lan_ext,
594		    (struct pf_state_key *)key);
595		break;
596	case PF_EXT_GWY:
597		sk = RB_FIND(pf_state_tree_ext_gwy, &pf_statetbl_ext_gwy,
598		    (struct pf_state_key *)key);
599		break;
600	default:
601		panic("pf_find_state");
602	}
603
604	/* list is sorted, if-bound states before floating ones */
605	if (sk != NULL)
606		TAILQ_FOREACH(s, &sk->states, next)
607			if (s->kif == pfi_all || s->kif == kif)
608				return (s);
609
610	return (NULL);
611}
612
613struct pf_state *
614pf_find_state_all(struct pf_state_key_cmp *key, u_int8_t tree, int *more)
615{
616	struct pf_state_key	*sk;
617	struct pf_state		*s, *ret = NULL;
618
619	pf_status.fcounters[FCNT_STATE_SEARCH]++;
620
621	switch (tree) {
622	case PF_LAN_EXT:
623		sk = RB_FIND(pf_state_tree_lan_ext,
624		    &pf_statetbl_lan_ext, (struct pf_state_key *)key);
625		break;
626	case PF_EXT_GWY:
627		sk = RB_FIND(pf_state_tree_ext_gwy,
628		    &pf_statetbl_ext_gwy, (struct pf_state_key *)key);
629		break;
630	default:
631		panic("pf_find_state_all");
632	}
633
634	if (sk != NULL) {
635		ret = TAILQ_FIRST(&sk->states);
636		if (more == NULL)
637			return (ret);
638
639		TAILQ_FOREACH(s, &sk->states, next)
640			(*more)++;
641	}
642
643	return (ret);
644}
645
646void
647pf_init_threshold(struct pf_threshold *threshold,
648    u_int32_t limit, u_int32_t seconds)
649{
650	threshold->limit = limit * PF_THRESHOLD_MULT;
651	threshold->seconds = seconds;
652	threshold->count = 0;
653	threshold->last = time_second;
654}
655
656void
657pf_add_threshold(struct pf_threshold *threshold)
658{
659	u_int32_t t = time_second, diff = t - threshold->last;
660
661	if (diff >= threshold->seconds)
662		threshold->count = 0;
663	else
664		threshold->count -= threshold->count * diff /
665		    threshold->seconds;
666	threshold->count += PF_THRESHOLD_MULT;
667	threshold->last = t;
668}
669
670int
671pf_check_threshold(struct pf_threshold *threshold)
672{
673	return (threshold->count > threshold->limit);
674}
675
676int
677pf_src_connlimit(struct pf_state **state)
678{
679	int bad = 0;
680
681	(*state)->src_node->conn++;
682	(*state)->src.tcp_est = 1;
683	pf_add_threshold(&(*state)->src_node->conn_rate);
684
685	if ((*state)->rule.ptr->max_src_conn &&
686	    (*state)->rule.ptr->max_src_conn <
687	    (*state)->src_node->conn) {
688		pf_status.lcounters[LCNT_SRCCONN]++;
689		bad++;
690	}
691
692	if ((*state)->rule.ptr->max_src_conn_rate.limit &&
693	    pf_check_threshold(&(*state)->src_node->conn_rate)) {
694		pf_status.lcounters[LCNT_SRCCONNRATE]++;
695		bad++;
696	}
697
698	if (!bad)
699		return (0);
700
701	if ((*state)->rule.ptr->overload_tbl) {
702		struct pfr_addr p;
703		u_int32_t	killed = 0;
704
705		pf_status.lcounters[LCNT_OVERLOAD_TABLE]++;
706		if (pf_status.debug >= PF_DEBUG_MISC) {
707			printf("pf_src_connlimit: blocking address ");
708			pf_print_host(&(*state)->src_node->addr, 0,
709			    (*state)->state_key->af);
710		}
711
712		bzero(&p, sizeof(p));
713		p.pfra_af = (*state)->state_key->af;
714		switch ((*state)->state_key->af) {
715#ifdef INET
716		case AF_INET:
717			p.pfra_net = 32;
718			p.pfra_ip4addr = (*state)->src_node->addr.v4;
719			break;
720#endif /* INET */
721#ifdef INET6
722		case AF_INET6:
723			p.pfra_net = 128;
724			p.pfra_ip6addr = (*state)->src_node->addr.v6;
725			break;
726#endif /* INET6 */
727		}
728
729		pfr_insert_kentry((*state)->rule.ptr->overload_tbl,
730		    &p, time_second);
731
732		/* kill existing states if that's required. */
733		if ((*state)->rule.ptr->flush) {
734			struct pf_state_key *sk;
735			struct pf_state *st;
736
737			pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++;
738			RB_FOREACH(st, pf_state_tree_id, &tree_id) {
739				sk = st->state_key;
740				/*
741				 * Kill states from this source.  (Only those
742				 * from the same rule if PF_FLUSH_GLOBAL is not
743				 * set)
744				 */
745				if (sk->af ==
746				    (*state)->state_key->af &&
747				    (((*state)->state_key->direction ==
748				        PF_OUT &&
749				    PF_AEQ(&(*state)->src_node->addr,
750				        &sk->lan.addr, sk->af)) ||
751				    ((*state)->state_key->direction == PF_IN &&
752				    PF_AEQ(&(*state)->src_node->addr,
753				        &sk->ext.addr, sk->af))) &&
754				    ((*state)->rule.ptr->flush &
755				    PF_FLUSH_GLOBAL ||
756				    (*state)->rule.ptr == st->rule.ptr)) {
757					st->timeout = PFTM_PURGE;
758					st->src.state = st->dst.state =
759					    TCPS_CLOSED;
760					killed++;
761				}
762			}
763			if (pf_status.debug >= PF_DEBUG_MISC)
764				printf(", %u states killed", killed);
765		}
766		if (pf_status.debug >= PF_DEBUG_MISC)
767			printf("\n");
768	}
769
770	/* kill this state */
771	(*state)->timeout = PFTM_PURGE;
772	(*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
773	return (1);
774}
775
776int
777pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
778    struct pf_addr *src, sa_family_t af)
779{
780	struct pf_src_node	k;
781
782	if (*sn == NULL) {
783		k.af = af;
784		PF_ACPY(&k.addr, src, af);
785		if (rule->rule_flag & PFRULE_RULESRCTRACK ||
786		    rule->rpool.opts & PF_POOL_STICKYADDR)
787			k.rule.ptr = rule;
788		else
789			k.rule.ptr = NULL;
790		pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
791		*sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
792	}
793	if (*sn == NULL) {
794		if (!rule->max_src_nodes ||
795		    rule->src_nodes < rule->max_src_nodes)
796			(*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT);
797		else
798			pf_status.lcounters[LCNT_SRCNODES]++;
799		if ((*sn) == NULL)
800			return (-1);
801		bzero(*sn, sizeof(struct pf_src_node));
802
803		pf_init_threshold(&(*sn)->conn_rate,
804		    rule->max_src_conn_rate.limit,
805		    rule->max_src_conn_rate.seconds);
806
807		(*sn)->af = af;
808		if (rule->rule_flag & PFRULE_RULESRCTRACK ||
809		    rule->rpool.opts & PF_POOL_STICKYADDR)
810			(*sn)->rule.ptr = rule;
811		else
812			(*sn)->rule.ptr = NULL;
813		PF_ACPY(&(*sn)->addr, src, af);
814		if (RB_INSERT(pf_src_tree,
815		    &tree_src_tracking, *sn) != NULL) {
816			if (pf_status.debug >= PF_DEBUG_MISC) {
817				printf("pf: src_tree insert failed: ");
818				pf_print_host(&(*sn)->addr, 0, af);
819				printf("\n");
820			}
821			pool_put(&pf_src_tree_pl, *sn);
822			return (-1);
823		}
824		(*sn)->creation = time_second;
825		(*sn)->ruletype = rule->action;
826		if ((*sn)->rule.ptr != NULL)
827			(*sn)->rule.ptr->src_nodes++;
828		pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
829		pf_status.src_nodes++;
830	} else {
831		if (rule->max_src_states &&
832		    (*sn)->states >= rule->max_src_states) {
833			pf_status.lcounters[LCNT_SRCSTATES]++;
834			return (-1);
835		}
836	}
837	return (0);
838}
839
840void
841pf_stateins_err(const char *tree, struct pf_state *s, struct pfi_kif *kif)
842{
843	struct pf_state_key	*sk = s->state_key;
844
845	if (pf_status.debug >= PF_DEBUG_MISC) {
846		printf("pf: state insert failed: %s %s", tree, kif->pfik_name);
847		printf(" lan: ");
848		pf_print_host(&sk->lan.addr, sk->lan.port,
849		    sk->af);
850		printf(" gwy: ");
851		pf_print_host(&sk->gwy.addr, sk->gwy.port,
852		    sk->af);
853		printf(" ext: ");
854		pf_print_host(&sk->ext.addr, sk->ext.port,
855		    sk->af);
856		if (s->sync_flags & PFSTATE_FROMSYNC)
857			printf(" (from sync)");
858		printf("\n");
859	}
860}
861
862int
863pf_insert_state(struct pfi_kif *kif, struct pf_state *s)
864{
865	struct pf_state_key	*cur;
866	struct pf_state		*sp;
867
868	KASSERT(s->state_key != NULL);
869	s->kif = kif;
870
871	if ((cur = RB_INSERT(pf_state_tree_lan_ext, &pf_statetbl_lan_ext,
872	    s->state_key)) != NULL) {
873		/* key exists. check for same kif, if none, add to key */
874		TAILQ_FOREACH(sp, &cur->states, next)
875			if (sp->kif == kif) {	/* collision! */
876				pf_stateins_err("tree_lan_ext", s, kif);
877				pf_detach_state(s,
878				    PF_DT_SKIP_LANEXT|PF_DT_SKIP_EXTGWY);
879				return (-1);
880			}
881		pf_detach_state(s, PF_DT_SKIP_LANEXT|PF_DT_SKIP_EXTGWY);
882		pf_attach_state(cur, s, kif == pfi_all ? 1 : 0);
883	}
884
885	/* if cur != NULL, we already found a state key and attached to it */
886	if (cur == NULL && (cur = RB_INSERT(pf_state_tree_ext_gwy,
887	    &pf_statetbl_ext_gwy, s->state_key)) != NULL) {
888		/* must not happen. we must have found the sk above! */
889		pf_stateins_err("tree_ext_gwy", s, kif);
890		pf_detach_state(s, PF_DT_SKIP_EXTGWY);
891		return (-1);
892	}
893
894	if (s->id == 0 && s->creatorid == 0) {
895		s->id = htobe64(pf_status.stateid++);
896		s->creatorid = pf_status.hostid;
897	}
898	if (RB_INSERT(pf_state_tree_id, &tree_id, s) != NULL) {
899		if (pf_status.debug >= PF_DEBUG_MISC) {
900#ifdef __NetBSD__
901			printf("pf: state insert failed: "
902			    "id: %016" PRIx64 " creatorid: %08x",
903			    be64toh(s->id), ntohl(s->creatorid));
904#else
905			printf("pf: state insert failed: "
906			    "id: %016llx creatorid: %08x",
907			    betoh64(s->id), ntohl(s->creatorid));
908#endif /* !__NetBSD__ */
909			if (s->sync_flags & PFSTATE_FROMSYNC)
910				printf(" (from sync)");
911			printf("\n");
912		}
913		pf_detach_state(s, 0);
914		return (-1);
915	}
916	TAILQ_INSERT_TAIL(&state_list, s, entry_list);
917	pf_status.fcounters[FCNT_STATE_INSERT]++;
918	pf_status.states++;
919	pfi_kif_ref(kif, PFI_KIF_REF_STATE);
920#if NPFSYNC
921	pfsync_insert_state(s);
922#endif
923	return (0);
924}
925
926#ifdef _LKM
927volatile int pf_purge_thread_stop;
928volatile int pf_purge_thread_running;
929#endif
930
931void
932pf_purge_thread(void *v)
933{
934	int nloops = 0, s;
935
936#ifdef _LKM
937	pf_purge_thread_running = 1;
938	pf_purge_thread_stop = 0;
939
940	while (!pf_purge_thread_stop) {
941#else
942	for (;;) {
943#endif /* !_LKM */
944		tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz);
945
946		s = splsoftnet();
947
948		/* process a fraction of the state table every second */
949		if (! pf_state_lock)
950			pf_purge_expired_states(1 + (pf_status.states
951						/ pf_default_rule.timeout[PFTM_INTERVAL]));
952
953		/* purge other expired types every PFTM_INTERVAL seconds */
954		if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) {
955			pf_purge_expired_fragments();
956			pf_purge_expired_src_nodes(0);
957			nloops = 0;
958		}
959
960		splx(s);
961	}
962
963#ifdef _LKM
964	pf_purge_thread_running = 0;
965	wakeup(&pf_purge_thread_running);
966	kthread_exit(0);
967#endif /* _LKM */
968}
969
970u_int32_t
971pf_state_expires(const struct pf_state *state)
972{
973	u_int32_t	timeout;
974	u_int32_t	start;
975	u_int32_t	end;
976	u_int32_t	states;
977
978	/* handle all PFTM_* > PFTM_MAX here */
979	if (state->timeout == PFTM_PURGE)
980		return (time_second);
981	if (state->timeout == PFTM_UNTIL_PACKET)
982		return (0);
983	KASSERT(state->timeout != PFTM_UNLINKED);
984	KASSERT(state->timeout < PFTM_MAX);
985	timeout = state->rule.ptr->timeout[state->timeout];
986	if (!timeout)
987		timeout = pf_default_rule.timeout[state->timeout];
988	start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
989	if (start) {
990		end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
991		states = state->rule.ptr->states;
992	} else {
993		start = pf_default_rule.timeout[PFTM_ADAPTIVE_START];
994		end = pf_default_rule.timeout[PFTM_ADAPTIVE_END];
995		states = pf_status.states;
996	}
997	if (end && states > start && start < end) {
998		if (states < end)
999			return (state->expire + timeout * (end - states) /
1000			    (end - start));
1001		else
1002			return (time_second);
1003	}
1004	return (state->expire + timeout);
1005}
1006
1007void
1008pf_purge_expired_src_nodes(int waslocked)
1009{
1010	 struct pf_src_node		*cur, *next;
1011	 int				 locked = waslocked;
1012
1013	 for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) {
1014		 next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur);
1015
1016		 if (cur->states <= 0 && cur->expire <= time_second) {
1017			 if (! locked) {
1018				 rw_enter_write(&pf_consistency_lock);
1019			 	 next = RB_NEXT(pf_src_tree,
1020				     &tree_src_tracking, cur);
1021				 locked = 1;
1022			 }
1023			 if (cur->rule.ptr != NULL) {
1024				 cur->rule.ptr->src_nodes--;
1025				 if (cur->rule.ptr->states <= 0 &&
1026				     cur->rule.ptr->max_src_nodes <= 0)
1027					 pf_rm_rule(NULL, cur->rule.ptr);
1028			 }
1029			 RB_REMOVE(pf_src_tree, &tree_src_tracking, cur);
1030			 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
1031			 pf_status.src_nodes--;
1032			 pool_put(&pf_src_tree_pl, cur);
1033		 }
1034	 }
1035
1036	 if (locked && !waslocked)
1037		rw_exit_write(&pf_consistency_lock);
1038}
1039
1040void
1041pf_src_tree_remove_state(struct pf_state *s)
1042{
1043	u_int32_t timeout;
1044
1045	if (s->src_node != NULL) {
1046		if (s->src.tcp_est)
1047			--s->src_node->conn;
1048		if (--s->src_node->states <= 0) {
1049			timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
1050			if (!timeout)
1051				timeout =
1052				    pf_default_rule.timeout[PFTM_SRC_NODE];
1053			s->src_node->expire = time_second + timeout;
1054		}
1055	}
1056	if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
1057		if (--s->nat_src_node->states <= 0) {
1058			timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
1059			if (!timeout)
1060				timeout =
1061				    pf_default_rule.timeout[PFTM_SRC_NODE];
1062			s->nat_src_node->expire = time_second + timeout;
1063		}
1064	}
1065	s->src_node = s->nat_src_node = NULL;
1066}
1067
1068/* callers should be at splsoftnet */
1069void
1070pf_unlink_state(struct pf_state *cur)
1071{
1072	if (cur->src.state == PF_TCPS_PROXY_DST) {
1073		pf_send_tcp(cur->rule.ptr, cur->state_key->af,
1074		    &cur->state_key->ext.addr, &cur->state_key->lan.addr,
1075		    cur->state_key->ext.port, cur->state_key->lan.port,
1076		    cur->src.seqhi, cur->src.seqlo + 1,
1077		    TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL);
1078	}
1079	RB_REMOVE(pf_state_tree_id, &tree_id, cur);
1080#if NPFSYNC
1081	if (cur->creatorid == pf_status.hostid)
1082		pfsync_delete_state(cur);
1083#endif
1084	cur->timeout = PFTM_UNLINKED;
1085	pf_src_tree_remove_state(cur);
1086	pf_detach_state(cur, 0);
1087}
1088
1089/* callers should be at splsoftnet and hold the
1090 * write_lock on pf_consistency_lock */
1091void
1092pf_free_state(struct pf_state *cur)
1093{
1094#if NPFSYNC
1095	if (pfsyncif != NULL &&
1096	    (pfsyncif->sc_bulk_send_next == cur ||
1097	    pfsyncif->sc_bulk_terminator == cur))
1098		return;
1099#endif
1100	KASSERT(cur->timeout == PFTM_UNLINKED);
1101	if (--cur->rule.ptr->states <= 0 &&
1102	    cur->rule.ptr->src_nodes <= 0)
1103		pf_rm_rule(NULL, cur->rule.ptr);
1104	if (cur->nat_rule.ptr != NULL)
1105		if (--cur->nat_rule.ptr->states <= 0 &&
1106			cur->nat_rule.ptr->src_nodes <= 0)
1107			pf_rm_rule(NULL, cur->nat_rule.ptr);
1108	if (cur->anchor.ptr != NULL)
1109		if (--cur->anchor.ptr->states <= 0)
1110			pf_rm_rule(NULL, cur->anchor.ptr);
1111	pf_normalize_tcp_cleanup(cur);
1112	pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE);
1113	TAILQ_REMOVE(&state_list, cur, entry_list);
1114	if (cur->tag)
1115		pf_tag_unref(cur->tag);
1116	pool_put(&pf_state_pl, cur);
1117	pf_status.fcounters[FCNT_STATE_REMOVALS]++;
1118	pf_status.states--;
1119}
1120
1121void
1122pf_purge_expired_states(u_int32_t maxcheck)
1123{
1124	static struct pf_state	*cur = NULL;
1125	struct pf_state		*next;
1126	int 			 locked = 0;
1127
1128	while (maxcheck--) {
1129		/* wrap to start of list when we hit the end */
1130		if (cur == NULL) {
1131			cur = TAILQ_FIRST(&state_list);
1132			if (cur == NULL)
1133				break;	/* list empty */
1134		}
1135
1136		/* get next state, as cur may get deleted */
1137		next = TAILQ_NEXT(cur, entry_list);
1138
1139		if (cur->timeout == PFTM_UNLINKED) {
1140			/* free unlinked state */
1141			if (! locked) {
1142				rw_enter_write(&pf_consistency_lock);
1143				locked = 1;
1144			}
1145			pf_free_state(cur);
1146		} else if (pf_state_expires(cur) <= time_second) {
1147			/* unlink and free expired state */
1148			pf_unlink_state(cur);
1149			if (! locked) {
1150				rw_enter_write(&pf_consistency_lock);
1151				locked = 1;
1152			}
1153			pf_free_state(cur);
1154		}
1155		cur = next;
1156	}
1157
1158	if (locked)
1159		rw_exit_write(&pf_consistency_lock);
1160}
1161
1162int
1163pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw)
1164{
1165	if (aw->type != PF_ADDR_TABLE)
1166		return (0);
1167	if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname)) == NULL)
1168		return (1);
1169	return (0);
1170}
1171
1172void
1173pf_tbladdr_remove(struct pf_addr_wrap *aw)
1174{
1175	if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL)
1176		return;
1177	pfr_detach_table(aw->p.tbl);
1178	aw->p.tbl = NULL;
1179}
1180
1181void
1182pf_tbladdr_copyout(struct pf_addr_wrap *aw)
1183{
1184	struct pfr_ktable *kt = aw->p.tbl;
1185
1186	if (aw->type != PF_ADDR_TABLE || kt == NULL)
1187		return;
1188	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
1189		kt = kt->pfrkt_root;
1190	aw->p.tbl = NULL;
1191	aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ?
1192		kt->pfrkt_cnt : -1;
1193}
1194
1195void
1196pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
1197{
1198	switch (af) {
1199#ifdef INET
1200	case AF_INET: {
1201		u_int32_t a = ntohl(addr->addr32[0]);
1202		printf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255,
1203		    (a>>8)&255, a&255);
1204		if (p) {
1205			p = ntohs(p);
1206			printf(":%u", p);
1207		}
1208		break;
1209	}
1210#endif /* INET */
1211#ifdef INET6
1212	case AF_INET6: {
1213		u_int16_t b;
1214		u_int8_t i, curstart = 255, curend = 0,
1215		    maxstart = 0, maxend = 0;
1216		for (i = 0; i < 8; i++) {
1217			if (!addr->addr16[i]) {
1218				if (curstart == 255)
1219					curstart = i;
1220				else
1221					curend = i;
1222			} else {
1223				if (curstart) {
1224					if ((curend - curstart) >
1225					    (maxend - maxstart)) {
1226						maxstart = curstart;
1227						maxend = curend;
1228						curstart = 255;
1229					}
1230				}
1231			}
1232		}
1233		for (i = 0; i < 8; i++) {
1234			if (i >= maxstart && i <= maxend) {
1235				if (maxend != 7) {
1236					if (i == maxstart)
1237						printf(":");
1238				} else {
1239					if (i == maxend)
1240						printf(":");
1241				}
1242			} else {
1243				b = ntohs(addr->addr16[i]);
1244				printf("%x", b);
1245				if (i < 7)
1246					printf(":");
1247			}
1248		}
1249		if (p) {
1250			p = ntohs(p);
1251			printf("[%u]", p);
1252		}
1253		break;
1254	}
1255#endif /* INET6 */
1256	}
1257}
1258
1259void
1260pf_print_state(struct pf_state *s)
1261{
1262	struct pf_state_key *sk = s->state_key;
1263	switch (sk->proto) {
1264	case IPPROTO_TCP:
1265		printf("TCP ");
1266		break;
1267	case IPPROTO_UDP:
1268		printf("UDP ");
1269		break;
1270	case IPPROTO_ICMP:
1271		printf("ICMP ");
1272		break;
1273	case IPPROTO_ICMPV6:
1274		printf("ICMPV6 ");
1275		break;
1276	default:
1277		printf("%u ", sk->proto);
1278		break;
1279	}
1280	pf_print_host(&sk->lan.addr, sk->lan.port, sk->af);
1281	printf(" ");
1282	pf_print_host(&sk->gwy.addr, sk->gwy.port, sk->af);
1283	printf(" ");
1284	pf_print_host(&sk->ext.addr, sk->ext.port, sk->af);
1285	printf(" [lo=%u high=%u win=%u modulator=%u", s->src.seqlo,
1286	    s->src.seqhi, s->src.max_win, s->src.seqdiff);
1287	if (s->src.wscale && s->dst.wscale)
1288		printf(" wscale=%u", s->src.wscale & PF_WSCALE_MASK);
1289	printf("]");
1290	printf(" [lo=%u high=%u win=%u modulator=%u", s->dst.seqlo,
1291	    s->dst.seqhi, s->dst.max_win, s->dst.seqdiff);
1292	if (s->src.wscale && s->dst.wscale)
1293		printf(" wscale=%u", s->dst.wscale & PF_WSCALE_MASK);
1294	printf("]");
1295	printf(" %u:%u", s->src.state, s->dst.state);
1296}
1297
1298void
1299pf_print_flags(u_int8_t f)
1300{
1301	if (f)
1302		printf(" ");
1303	if (f & TH_FIN)
1304		printf("F");
1305	if (f & TH_SYN)
1306		printf("S");
1307	if (f & TH_RST)
1308		printf("R");
1309	if (f & TH_PUSH)
1310		printf("P");
1311	if (f & TH_ACK)
1312		printf("A");
1313	if (f & TH_URG)
1314		printf("U");
1315	if (f & TH_ECE)
1316		printf("E");
1317	if (f & TH_CWR)
1318		printf("W");
1319}
1320
1321#define	PF_SET_SKIP_STEPS(i)					\
1322	do {							\
1323		while (head[i] != cur) {			\
1324			head[i]->skip[i].ptr = cur;		\
1325			head[i] = TAILQ_NEXT(head[i], entries);	\
1326		}						\
1327	} while (0)
1328
1329void
1330pf_calc_skip_steps(struct pf_rulequeue *rules)
1331{
1332	struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT];
1333	int i;
1334
1335	cur = TAILQ_FIRST(rules);
1336	prev = cur;
1337	for (i = 0; i < PF_SKIP_COUNT; ++i)
1338		head[i] = cur;
1339	while (cur != NULL) {
1340
1341		if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
1342			PF_SET_SKIP_STEPS(PF_SKIP_IFP);
1343		if (cur->direction != prev->direction)
1344			PF_SET_SKIP_STEPS(PF_SKIP_DIR);
1345		if (cur->af != prev->af)
1346			PF_SET_SKIP_STEPS(PF_SKIP_AF);
1347		if (cur->proto != prev->proto)
1348			PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
1349		if (cur->src.neg != prev->src.neg ||
1350		    pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr))
1351			PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
1352		if (cur->src.port[0] != prev->src.port[0] ||
1353		    cur->src.port[1] != prev->src.port[1] ||
1354		    cur->src.port_op != prev->src.port_op)
1355			PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
1356		if (cur->dst.neg != prev->dst.neg ||
1357		    pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr))
1358			PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
1359		if (cur->dst.port[0] != prev->dst.port[0] ||
1360		    cur->dst.port[1] != prev->dst.port[1] ||
1361		    cur->dst.port_op != prev->dst.port_op)
1362			PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
1363
1364		prev = cur;
1365		cur = TAILQ_NEXT(cur, entries);
1366	}
1367	for (i = 0; i < PF_SKIP_COUNT; ++i)
1368		PF_SET_SKIP_STEPS(i);
1369}
1370
1371int
1372pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
1373{
1374	if (aw1->type != aw2->type)
1375		return (1);
1376	switch (aw1->type) {
1377	case PF_ADDR_ADDRMASK:
1378		if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, 0))
1379			return (1);
1380		if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0))
1381			return (1);
1382		return (0);
1383	case PF_ADDR_DYNIFTL:
1384		return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt);
1385	case PF_ADDR_NOROUTE:
1386	case PF_ADDR_URPFFAILED:
1387		return (0);
1388	case PF_ADDR_TABLE:
1389		return (aw1->p.tbl != aw2->p.tbl);
1390	case PF_ADDR_RTLABEL:
1391		return (aw1->v.rtlabel != aw2->v.rtlabel);
1392	default:
1393		printf("invalid address type: %d\n", aw1->type);
1394		return (1);
1395	}
1396}
1397
1398u_int16_t
1399pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
1400{
1401	u_int32_t	l;
1402
1403	if (udp && !cksum)
1404		return (0x0000);
1405	l = cksum + old - new;
1406	l = (l >> 16) + (l & 65535);
1407	l = l & 65535;
1408	if (udp && !l)
1409		return (0xFFFF);
1410	return (l);
1411}
1412
1413void
1414pf_change_ap(struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc,
1415    struct pf_addr *an, u_int16_t pn, u_int8_t u, sa_family_t af)
1416{
1417	struct pf_addr	ao;
1418	u_int16_t	po = *p;
1419
1420	PF_ACPY(&ao, a, af);
1421	PF_ACPY(a, an, af);
1422
1423	*p = pn;
1424
1425	switch (af) {
1426#ifdef INET
1427	case AF_INET:
1428		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
1429		    ao.addr16[0], an->addr16[0], 0),
1430		    ao.addr16[1], an->addr16[1], 0);
1431		*p = pn;
1432		*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
1433		    ao.addr16[0], an->addr16[0], u),
1434		    ao.addr16[1], an->addr16[1], u),
1435		    po, pn, u);
1436		break;
1437#endif /* INET */
1438#ifdef INET6
1439	case AF_INET6:
1440		*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1441		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1442		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
1443		    ao.addr16[0], an->addr16[0], u),
1444		    ao.addr16[1], an->addr16[1], u),
1445		    ao.addr16[2], an->addr16[2], u),
1446		    ao.addr16[3], an->addr16[3], u),
1447		    ao.addr16[4], an->addr16[4], u),
1448		    ao.addr16[5], an->addr16[5], u),
1449		    ao.addr16[6], an->addr16[6], u),
1450		    ao.addr16[7], an->addr16[7], u),
1451		    po, pn, u);
1452		break;
1453#endif /* INET6 */
1454	}
1455}
1456
1457
1458/* Changes a u_int32_t.  Uses a void * so there are no align restrictions */
1459void
1460pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
1461{
1462	u_int32_t	ao;
1463
1464	memcpy(&ao, a, sizeof(ao));
1465	memcpy(a, &an, sizeof(u_int32_t));
1466	*c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
1467	    ao % 65536, an % 65536, u);
1468}
1469
1470#ifdef INET6
1471void
1472pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
1473{
1474	struct pf_addr	ao;
1475
1476	PF_ACPY(&ao, a, AF_INET6);
1477	PF_ACPY(a, an, AF_INET6);
1478
1479	*c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1480	    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1481	    pf_cksum_fixup(pf_cksum_fixup(*c,
1482	    ao.addr16[0], an->addr16[0], u),
1483	    ao.addr16[1], an->addr16[1], u),
1484	    ao.addr16[2], an->addr16[2], u),
1485	    ao.addr16[3], an->addr16[3], u),
1486	    ao.addr16[4], an->addr16[4], u),
1487	    ao.addr16[5], an->addr16[5], u),
1488	    ao.addr16[6], an->addr16[6], u),
1489	    ao.addr16[7], an->addr16[7], u);
1490}
1491#endif /* INET6 */
1492
1493void
1494pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
1495    struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
1496    u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
1497{
1498	struct pf_addr	oia, ooa;
1499
1500	PF_ACPY(&oia, ia, af);
1501	PF_ACPY(&ooa, oa, af);
1502
1503	/* Change inner protocol port, fix inner protocol checksum. */
1504	if (ip != NULL) {
1505		u_int16_t	oip = *ip;
1506		u_int32_t	opc = 0;
1507
1508		if (pc != NULL)
1509			opc = *pc;
1510		*ip = np;
1511		if (pc != NULL)
1512			*pc = pf_cksum_fixup(*pc, oip, *ip, u);
1513		*ic = pf_cksum_fixup(*ic, oip, *ip, 0);
1514		if (pc != NULL)
1515			*ic = pf_cksum_fixup(*ic, opc, *pc, 0);
1516	}
1517	/* Change inner ip address, fix inner ip and icmp checksums. */
1518	PF_ACPY(ia, na, af);
1519	switch (af) {
1520#ifdef INET
1521	case AF_INET: {
1522		u_int32_t	 oh2c = *h2c;
1523
1524		*h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
1525		    oia.addr16[0], ia->addr16[0], 0),
1526		    oia.addr16[1], ia->addr16[1], 0);
1527		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
1528		    oia.addr16[0], ia->addr16[0], 0),
1529		    oia.addr16[1], ia->addr16[1], 0);
1530		*ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
1531		break;
1532	}
1533#endif /* INET */
1534#ifdef INET6
1535	case AF_INET6:
1536		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1537		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1538		    pf_cksum_fixup(pf_cksum_fixup(*ic,
1539		    oia.addr16[0], ia->addr16[0], u),
1540		    oia.addr16[1], ia->addr16[1], u),
1541		    oia.addr16[2], ia->addr16[2], u),
1542		    oia.addr16[3], ia->addr16[3], u),
1543		    oia.addr16[4], ia->addr16[4], u),
1544		    oia.addr16[5], ia->addr16[5], u),
1545		    oia.addr16[6], ia->addr16[6], u),
1546		    oia.addr16[7], ia->addr16[7], u);
1547		break;
1548#endif /* INET6 */
1549	}
1550	/* Change outer ip address, fix outer ip or icmpv6 checksum. */
1551	PF_ACPY(oa, na, af);
1552	switch (af) {
1553#ifdef INET
1554	case AF_INET:
1555		*hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
1556		    ooa.addr16[0], oa->addr16[0], 0),
1557		    ooa.addr16[1], oa->addr16[1], 0);
1558		break;
1559#endif /* INET */
1560#ifdef INET6
1561	case AF_INET6:
1562		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1563		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1564		    pf_cksum_fixup(pf_cksum_fixup(*ic,
1565		    ooa.addr16[0], oa->addr16[0], u),
1566		    ooa.addr16[1], oa->addr16[1], u),
1567		    ooa.addr16[2], oa->addr16[2], u),
1568		    ooa.addr16[3], oa->addr16[3], u),
1569		    ooa.addr16[4], oa->addr16[4], u),
1570		    ooa.addr16[5], oa->addr16[5], u),
1571		    ooa.addr16[6], oa->addr16[6], u),
1572		    ooa.addr16[7], oa->addr16[7], u);
1573		break;
1574#endif /* INET6 */
1575	}
1576}
1577
1578
1579/*
1580 * Need to modulate the sequence numbers in the TCP SACK option
1581 * (credits to Krzysztof Pfaff for report and patch)
1582 */
1583int
1584pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd,
1585    struct tcphdr *th, struct pf_state_peer *dst)
1586{
1587	int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen;
1588	u_int8_t opts[MAX_TCPOPTLEN], *opt = opts;
1589	int copyback = 0, i, olen;
1590	struct sackblk sack;
1591
1592#ifdef __NetBSD__
1593#define	TCPOLEN_SACK (2 * sizeof(uint32_t))
1594#endif
1595
1596#define TCPOLEN_SACKLEN	(TCPOLEN_SACK + 2)
1597	if (hlen < TCPOLEN_SACKLEN ||
1598	    !pf_pull_hdr(m, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af))
1599		return 0;
1600
1601	while (hlen >= TCPOLEN_SACKLEN) {
1602		olen = opt[1];
1603		switch (*opt) {
1604		case TCPOPT_EOL:	/* FALLTHROUGH */
1605		case TCPOPT_NOP:
1606			opt++;
1607			hlen--;
1608			break;
1609		case TCPOPT_SACK:
1610			if (olen > hlen)
1611				olen = hlen;
1612			if (olen >= TCPOLEN_SACKLEN) {
1613				for (i = 2; i + TCPOLEN_SACK <= olen;
1614				    i += TCPOLEN_SACK) {
1615					memcpy(&sack, &opt[i], sizeof(sack));
1616#ifdef __NetBSD__
1617#define	SACK_START	sack.left
1618#define	SACK_END	sack.right
1619#else
1620#define	SACK_START	sack.start
1621#define	SACK_END	sack.end
1622#endif
1623					pf_change_a(&SACK_START, &th->th_sum,
1624					    htonl(ntohl(SACK_START) -
1625					    dst->seqdiff), 0);
1626					pf_change_a(&SACK_END, &th->th_sum,
1627					    htonl(ntohl(SACK_END) -
1628					    dst->seqdiff), 0);
1629#undef SACK_START
1630#undef SACK_END
1631					memcpy(&opt[i], &sack, sizeof(sack));
1632				}
1633				copyback = 1;
1634			}
1635			/* FALLTHROUGH */
1636		default:
1637			if (olen < 2)
1638				olen = 2;
1639			hlen -= olen;
1640			opt += olen;
1641		}
1642	}
1643
1644	if (copyback)
1645		m_copyback(m, off + sizeof(*th), thoptlen, opts);
1646	return (copyback);
1647}
1648
1649void
1650pf_send_tcp(const struct pf_rule *r, sa_family_t af,
1651    const struct pf_addr *saddr, const struct pf_addr *daddr,
1652    u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
1653    u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag,
1654    u_int16_t rtag, struct ether_header *eh, struct ifnet *ifp)
1655{
1656	struct mbuf	*m;
1657	int		 len, tlen;
1658#ifdef INET
1659	struct ip	*h = NULL;
1660#endif /* INET */
1661#ifdef INET6
1662	struct ip6_hdr	*h6 = NULL;
1663#endif /* INET6 */
1664	struct tcphdr	*th;
1665	char		*opt;
1666#ifdef __NetBSD__
1667	struct pf_mtag	*pf_mtag;
1668#endif /* __NetBSD__ */
1669
1670	/* maximum segment size tcp option */
1671	tlen = sizeof(struct tcphdr);
1672	if (mss)
1673		tlen += 4;
1674
1675	switch (af) {
1676#ifdef INET
1677	case AF_INET:
1678		len = sizeof(struct ip) + tlen;
1679		break;
1680#endif /* INET */
1681#ifdef INET6
1682	case AF_INET6:
1683		len = sizeof(struct ip6_hdr) + tlen;
1684		break;
1685#endif /* INET6 */
1686	default:
1687		return;
1688	}
1689
1690	/* create outgoing mbuf */
1691	m = m_gethdr(M_DONTWAIT, MT_HEADER);
1692	if (m == NULL)
1693		return;
1694#ifdef __NetBSD__
1695	if ((pf_mtag = pf_get_mtag(m)) == NULL) {
1696		m_freem(m);
1697		return;
1698	}
1699	if (tag)
1700		pf_mtag->flags |= PF_TAG_GENERATED;
1701	pf_mtag->tag = rtag;
1702
1703	if (r != NULL && r->rtableid >= 0)
1704		pf_mtag->rtableid = r->rtableid;
1705#else
1706	if (tag)
1707		m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
1708	m->m_pkthdr.pf.tag = rtag;
1709
1710	if (r != NULL && r->rtableid >= 0)
1711		m->m_pkthdr.pf.rtableid = m->m_pkthdr.pf.rtableid;
1712#endif /* !__NetBSD__ */
1713
1714#ifdef ALTQ
1715	if (r != NULL && r->qid) {
1716#ifdef __NetBSD__
1717		struct m_tag	*mtag;
1718		struct altq_tag	*atag;
1719
1720		mtag = m_tag_get(PACKET_TAG_ALTQ_QID, sizeof(*atag), M_NOWAIT);
1721		if (mtag != NULL) {
1722			atag = (struct altq_tag *)(mtag + 1);
1723			atag->qid = r->qid;
1724			/* add hints for ecn */
1725			atag->af = af;
1726			atag->hdr = mtod(m, struct ip *);
1727			m_tag_prepend(m, mtag);
1728		}
1729#else
1730		m->m_pkthdr.pf.qid = r->qid;
1731		/* add hints for ecn */
1732		m->m_pkthdr.pf.hdr = mtod(m, struct ip *);
1733#endif /* !__NetBSD__ */
1734	}
1735#endif /* ALTQ */
1736	m->m_data += max_linkhdr;
1737	m->m_pkthdr.len = m->m_len = len;
1738	m->m_pkthdr.rcvif = NULL;
1739	bzero(m->m_data, len);
1740	switch (af) {
1741#ifdef INET
1742	case AF_INET:
1743		h = mtod(m, struct ip *);
1744
1745		/* IP header fields included in the TCP checksum */
1746		h->ip_p = IPPROTO_TCP;
1747		h->ip_len = htons(tlen);
1748		h->ip_src.s_addr = saddr->v4.s_addr;
1749		h->ip_dst.s_addr = daddr->v4.s_addr;
1750
1751		th = (struct tcphdr *)((char *)h + sizeof(struct ip));
1752		break;
1753#endif /* INET */
1754#ifdef INET6
1755	case AF_INET6:
1756		h6 = mtod(m, struct ip6_hdr *);
1757
1758		/* IP header fields included in the TCP checksum */
1759		h6->ip6_nxt = IPPROTO_TCP;
1760		h6->ip6_plen = htons(tlen);
1761		memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr));
1762		memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr));
1763
1764		th = (struct tcphdr *)((char *)h6 + sizeof(struct ip6_hdr));
1765		break;
1766#endif /* INET6 */
1767	default:
1768		m_freem(m);
1769		return;
1770	}
1771
1772	/* TCP header */
1773	th->th_sport = sport;
1774	th->th_dport = dport;
1775	th->th_seq = htonl(seq);
1776	th->th_ack = htonl(ack);
1777	th->th_off = tlen >> 2;
1778	th->th_flags = flags;
1779	th->th_win = htons(win);
1780
1781	if (mss) {
1782		opt = (char *)(th + 1);
1783		opt[0] = TCPOPT_MAXSEG;
1784		opt[1] = 4;
1785		HTONS(mss);
1786		bcopy((void *)&mss, (void *)(opt + 2), 2);
1787	}
1788
1789	switch (af) {
1790#ifdef INET
1791	case AF_INET:
1792		/* TCP checksum */
1793		th->th_sum = in_cksum(m, len);
1794
1795		/* Finish the IP header */
1796		h->ip_v = 4;
1797		h->ip_hl = sizeof(*h) >> 2;
1798		h->ip_tos = IPTOS_LOWDELAY;
1799		h->ip_len = htons(len);
1800		h->ip_off = htons(ip_mtudisc ? IP_DF : 0);
1801		h->ip_ttl = ttl ? ttl : ip_defttl;
1802		h->ip_sum = 0;
1803		if (eh == NULL) {
1804			ip_output(m, (void *)NULL, (void *)NULL, 0,
1805			    (void *)NULL, (void *)NULL);
1806		} else {
1807#ifdef __NetBSD__
1808			/*
1809			 * On netbsd, pf_test and pf_test6 are always called
1810			 * with eh == NULL.
1811			 */
1812			panic("pf_send_tcp: eh != NULL");
1813#else
1814			struct route		 ro;
1815			struct rtentry		 rt;
1816			struct ether_header	*e = (void *)ro.ro_dst.sa_data;
1817
1818			if (ifp == NULL) {
1819				m_freem(m);
1820				return;
1821			}
1822			rt.rt_ifp = ifp;
1823			ro.ro_rt = &rt;
1824			ro.ro_dst.sa_len = sizeof(ro.ro_dst);
1825			ro.ro_dst.sa_family = pseudo_AF_HDRCMPLT;
1826			bcopy(eh->ether_dhost, e->ether_shost, ETHER_ADDR_LEN);
1827			bcopy(eh->ether_shost, e->ether_dhost, ETHER_ADDR_LEN);
1828			e->ether_type = eh->ether_type;
1829			ip_output(m, (void *)NULL, &ro, IP_ROUTETOETHER,
1830			    (void *)NULL, (void *)NULL);
1831#endif /* !__NetBSD__ */
1832		}
1833		break;
1834#endif /* INET */
1835#ifdef INET6
1836	case AF_INET6:
1837		/* TCP checksum */
1838		th->th_sum = in6_cksum(m, IPPROTO_TCP,
1839		    sizeof(struct ip6_hdr), tlen);
1840
1841		h6->ip6_vfc |= IPV6_VERSION;
1842		h6->ip6_hlim = IPV6_DEFHLIM;
1843
1844		ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
1845		break;
1846#endif /* INET6 */
1847	}
1848}
1849
1850void
1851pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af,
1852    struct pf_rule *r)
1853{
1854	struct mbuf	*m0;
1855#ifdef __NetBSD__
1856	struct pf_mtag	*pf_mtag;
1857#endif /* __NetBSD__ */
1858
1859	m0 = m_copy(m, 0, M_COPYALL);
1860
1861#ifdef __NetBSD__
1862	if ((pf_mtag = pf_get_mtag(m0)) == NULL)
1863		return;
1864	pf_mtag->flags |= PF_TAG_GENERATED;
1865
1866	if (r->rtableid >= 0)
1867		pf_mtag->rtableid = r->rtableid;
1868#else
1869	m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
1870
1871	if (r->rtableid >= 0)
1872		m0->m_pkthdr.pf.rtableid = r->rtableid;
1873#endif /* !__NetBSD__ */
1874
1875#ifdef ALTQ
1876	if (r->qid) {
1877#ifdef __NetBSD__
1878		struct m_tag	*mtag;
1879		struct altq_tag	*atag;
1880
1881		mtag = m_tag_get(PACKET_TAG_ALTQ_QID, sizeof(*atag), M_NOWAIT);
1882		if (mtag != NULL) {
1883			atag = (struct altq_tag *)(mtag + 1);
1884			atag->qid = r->qid;
1885			/* add hints for ecn */
1886			atag->af = af;
1887			atag->hdr = mtod(m0, struct ip *);
1888			m_tag_prepend(m0, mtag);
1889		}
1890#else
1891		m0->m_pkthdr.pf.qid = r->qid;
1892		/* add hints for ecn */
1893		m0->m_pkthdr.pf.hdr = mtod(m0, struct ip *);
1894#endif /* !__NetBSD__ */
1895	}
1896#endif /* ALTQ */
1897
1898	switch (af) {
1899#ifdef INET
1900	case AF_INET:
1901		icmp_error(m0, type, code, 0, 0);
1902		break;
1903#endif /* INET */
1904#ifdef INET6
1905	case AF_INET6:
1906		icmp6_error(m0, type, code, 0);
1907		break;
1908#endif /* INET6 */
1909	}
1910}
1911
1912/*
1913 * Return 1 if the addresses a and b match (with mask m), otherwise return 0.
1914 * If n is 0, they match if they are equal. If n is != 0, they match if they
1915 * are different.
1916 */
1917int
1918pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
1919    struct pf_addr *b, sa_family_t af)
1920{
1921	int	match = 0;
1922
1923	switch (af) {
1924#ifdef INET
1925	case AF_INET:
1926		if ((a->addr32[0] & m->addr32[0]) ==
1927		    (b->addr32[0] & m->addr32[0]))
1928			match++;
1929		break;
1930#endif /* INET */
1931#ifdef INET6
1932	case AF_INET6:
1933		if (((a->addr32[0] & m->addr32[0]) ==
1934		     (b->addr32[0] & m->addr32[0])) &&
1935		    ((a->addr32[1] & m->addr32[1]) ==
1936		     (b->addr32[1] & m->addr32[1])) &&
1937		    ((a->addr32[2] & m->addr32[2]) ==
1938		     (b->addr32[2] & m->addr32[2])) &&
1939		    ((a->addr32[3] & m->addr32[3]) ==
1940		     (b->addr32[3] & m->addr32[3])))
1941			match++;
1942		break;
1943#endif /* INET6 */
1944	}
1945	if (match) {
1946		if (n)
1947			return (0);
1948		else
1949			return (1);
1950	} else {
1951		if (n)
1952			return (1);
1953		else
1954			return (0);
1955	}
1956}
1957
1958int
1959pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
1960{
1961	switch (op) {
1962	case PF_OP_IRG:
1963		return ((p > a1) && (p < a2));
1964	case PF_OP_XRG:
1965		return ((p < a1) || (p > a2));
1966	case PF_OP_RRG:
1967		return ((p >= a1) && (p <= a2));
1968	case PF_OP_EQ:
1969		return (p == a1);
1970	case PF_OP_NE:
1971		return (p != a1);
1972	case PF_OP_LT:
1973		return (p < a1);
1974	case PF_OP_LE:
1975		return (p <= a1);
1976	case PF_OP_GT:
1977		return (p > a1);
1978	case PF_OP_GE:
1979		return (p >= a1);
1980	}
1981	return (0); /* never reached */
1982}
1983
1984int
1985pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
1986{
1987	NTOHS(a1);
1988	NTOHS(a2);
1989	NTOHS(p);
1990	return (pf_match(op, a1, a2, p));
1991}
1992
1993int
1994pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
1995{
1996	if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
1997		return (0);
1998	return (pf_match(op, a1, a2, u));
1999}
2000
2001int
2002pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
2003{
2004	if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
2005		return (0);
2006	return (pf_match(op, a1, a2, g));
2007}
2008
2009int
2010pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag)
2011{
2012#ifdef __NetBSD__
2013	if (*tag == -1) {
2014		struct pf_mtag *pf_mtag = pf_get_mtag(m);
2015		if (pf_mtag == NULL)
2016			return (0);
2017
2018		*tag = pf_mtag->tag;
2019	}
2020#else
2021	if (*tag == -1)
2022		*tag = m->m_pkthdr.pf.tag;
2023#endif /* !__NetBSD__ */
2024
2025	return ((!r->match_tag_not && r->match_tag == *tag) ||
2026	    (r->match_tag_not && r->match_tag != *tag));
2027}
2028
2029int
2030pf_tag_packet(struct mbuf *m, int tag, int rtableid)
2031{
2032	if (tag <= 0 && rtableid < 0)
2033		return (0);
2034
2035#ifdef __NetBSD__
2036	if (tag > 0 || rtableid > 0) {
2037		struct pf_mtag *pf_mtag = pf_get_mtag(m);
2038		if (pf_mtag == NULL)
2039			return (1);
2040
2041		if (tag > 0)
2042			pf_mtag->tag = tag;
2043		if (rtableid > 0)
2044			pf_mtag->rtableid = rtableid;
2045	}
2046#else
2047	if (tag > 0)
2048		m->m_pkthdr.pf.tag = tag;
2049	if (rtableid >= 0)
2050		m->m_pkthdr.pf.rtableid = rtableid;
2051#endif /* !__NetBSD__ */
2052
2053	return (0);
2054}
2055
2056void
2057pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n,
2058    struct pf_rule **r, struct pf_rule **a,  int *match)
2059{
2060	struct pf_anchor_stackframe	*f;
2061
2062	(*r)->anchor->match = 0;
2063	if (match)
2064		*match = 0;
2065	if (*depth >= sizeof(pf_anchor_stack) /
2066	    sizeof(pf_anchor_stack[0])) {
2067		printf("pf_step_into_anchor: stack overflow\n");
2068		*r = TAILQ_NEXT(*r, entries);
2069		return;
2070	} else if (*depth == 0 && a != NULL)
2071		*a = *r;
2072	f = pf_anchor_stack + (*depth)++;
2073	f->rs = *rs;
2074	f->r = *r;
2075	if ((*r)->anchor_wildcard) {
2076		f->parent = &(*r)->anchor->children;
2077		if ((f->child = RB_MIN(pf_anchor_node, f->parent)) ==
2078		    NULL) {
2079			*r = NULL;
2080			return;
2081		}
2082		*rs = &f->child->ruleset;
2083	} else {
2084		f->parent = NULL;
2085		f->child = NULL;
2086		*rs = &(*r)->anchor->ruleset;
2087	}
2088	*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
2089}
2090
2091int
2092pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n,
2093    struct pf_rule **r, struct pf_rule **a, int *match)
2094{
2095	struct pf_anchor_stackframe	*f;
2096	int quick = 0;
2097
2098	do {
2099		if (*depth <= 0)
2100			break;
2101		f = pf_anchor_stack + *depth - 1;
2102		if (f->parent != NULL && f->child != NULL) {
2103			if (f->child->match ||
2104			    (match != NULL && *match)) {
2105				f->r->anchor->match = 1;
2106				*match = 0;
2107			}
2108			f->child = RB_NEXT(pf_anchor_node, f->parent, f->child);
2109			if (f->child != NULL) {
2110				*rs = &f->child->ruleset;
2111				*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
2112				if (*r == NULL)
2113					continue;
2114				else
2115					break;
2116			}
2117		}
2118		(*depth)--;
2119		if (*depth == 0 && a != NULL)
2120			*a = NULL;
2121		*rs = f->rs;
2122		if (f->r->anchor->match || (match  != NULL && *match))
2123			quick = f->r->quick;
2124		*r = TAILQ_NEXT(f->r, entries);
2125	} while (*r == NULL);
2126
2127	return (quick);
2128}
2129
2130#ifdef INET6
2131void
2132pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
2133    struct pf_addr *rmask, const struct pf_addr *saddr, sa_family_t af)
2134{
2135	switch (af) {
2136#ifdef INET
2137	case AF_INET:
2138		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
2139		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
2140		break;
2141#endif /* INET */
2142	case AF_INET6:
2143		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
2144		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
2145		naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
2146		((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]);
2147		naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
2148		((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]);
2149		naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
2150		((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]);
2151		break;
2152	}
2153}
2154
2155void
2156pf_addr_inc(struct pf_addr *addr, sa_family_t af)
2157{
2158	switch (af) {
2159#ifdef INET
2160	case AF_INET:
2161		addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
2162		break;
2163#endif /* INET */
2164	case AF_INET6:
2165		if (addr->addr32[3] == 0xffffffff) {
2166			addr->addr32[3] = 0;
2167			if (addr->addr32[2] == 0xffffffff) {
2168				addr->addr32[2] = 0;
2169				if (addr->addr32[1] == 0xffffffff) {
2170					addr->addr32[1] = 0;
2171					addr->addr32[0] =
2172					    htonl(ntohl(addr->addr32[0]) + 1);
2173				} else
2174					addr->addr32[1] =
2175					    htonl(ntohl(addr->addr32[1]) + 1);
2176			} else
2177				addr->addr32[2] =
2178				    htonl(ntohl(addr->addr32[2]) + 1);
2179		} else
2180			addr->addr32[3] =
2181			    htonl(ntohl(addr->addr32[3]) + 1);
2182		break;
2183	}
2184}
2185#endif /* INET6 */
2186
2187#define mix(a,b,c) \
2188	do {					\
2189		a -= b; a -= c; a ^= (c >> 13);	\
2190		b -= c; b -= a; b ^= (a << 8);	\
2191		c -= a; c -= b; c ^= (b >> 13);	\
2192		a -= b; a -= c; a ^= (c >> 12);	\
2193		b -= c; b -= a; b ^= (a << 16);	\
2194		c -= a; c -= b; c ^= (b >> 5);	\
2195		a -= b; a -= c; a ^= (c >> 3);	\
2196		b -= c; b -= a; b ^= (a << 10);	\
2197		c -= a; c -= b; c ^= (b >> 15);	\
2198	} while (0)
2199
2200/*
2201 * hash function based on bridge_hash in if_bridge.c
2202 */
2203void
2204pf_hash(const struct pf_addr *inaddr, struct pf_addr *hash,
2205    struct pf_poolhashkey *key, sa_family_t af)
2206{
2207	u_int32_t	a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
2208
2209	switch (af) {
2210#ifdef INET
2211	case AF_INET:
2212		a += inaddr->addr32[0];
2213		b += key->key32[1];
2214		mix(a, b, c);
2215		hash->addr32[0] = c + key->key32[2];
2216		break;
2217#endif /* INET */
2218#ifdef INET6
2219	case AF_INET6:
2220		a += inaddr->addr32[0];
2221		b += inaddr->addr32[2];
2222		mix(a, b, c);
2223		hash->addr32[0] = c;
2224		a += inaddr->addr32[1];
2225		b += inaddr->addr32[3];
2226		c += key->key32[1];
2227		mix(a, b, c);
2228		hash->addr32[1] = c;
2229		a += inaddr->addr32[2];
2230		b += inaddr->addr32[1];
2231		c += key->key32[2];
2232		mix(a, b, c);
2233		hash->addr32[2] = c;
2234		a += inaddr->addr32[3];
2235		b += inaddr->addr32[0];
2236		c += key->key32[3];
2237		mix(a, b, c);
2238		hash->addr32[3] = c;
2239		break;
2240#endif /* INET6 */
2241	}
2242}
2243
2244int
2245pf_map_addr(sa_family_t af, struct pf_rule *r, const struct pf_addr *saddr,
2246    struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn)
2247{
2248	unsigned char		 hash[16];
2249	struct pf_pool		*rpool = &r->rpool;
2250	struct pf_addr		*raddr = &rpool->cur->addr.v.a.addr;
2251	struct pf_addr		*rmask = &rpool->cur->addr.v.a.mask;
2252	struct pf_pooladdr	*acur = rpool->cur;
2253	struct pf_src_node	 k;
2254
2255	if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
2256	    (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
2257		k.af = af;
2258		PF_ACPY(&k.addr, saddr, af);
2259		if (r->rule_flag & PFRULE_RULESRCTRACK ||
2260		    r->rpool.opts & PF_POOL_STICKYADDR)
2261			k.rule.ptr = r;
2262		else
2263			k.rule.ptr = NULL;
2264		pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
2265		*sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
2266		if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) {
2267			PF_ACPY(naddr, &(*sn)->raddr, af);
2268			if (pf_status.debug >= PF_DEBUG_MISC) {
2269				printf("pf_map_addr: src tracking maps ");
2270				pf_print_host(&k.addr, 0, af);
2271				printf(" to ");
2272				pf_print_host(naddr, 0, af);
2273				printf("\n");
2274			}
2275			return (0);
2276		}
2277	}
2278
2279	if (rpool->cur->addr.type == PF_ADDR_NOROUTE)
2280		return (1);
2281	if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
2282		switch (af) {
2283#ifdef INET
2284		case AF_INET:
2285			if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
2286			    (rpool->opts & PF_POOL_TYPEMASK) !=
2287			    PF_POOL_ROUNDROBIN)
2288				return (1);
2289			 raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
2290			 rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
2291			break;
2292#endif /* INET */
2293#ifdef INET6
2294		case AF_INET6:
2295			if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
2296			    (rpool->opts & PF_POOL_TYPEMASK) !=
2297			    PF_POOL_ROUNDROBIN)
2298				return (1);
2299			raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
2300			rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
2301			break;
2302#endif /* INET6 */
2303		}
2304	} else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
2305		if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN)
2306			return (1); /* unsupported */
2307	} else {
2308		raddr = &rpool->cur->addr.v.a.addr;
2309		rmask = &rpool->cur->addr.v.a.mask;
2310	}
2311
2312	switch (rpool->opts & PF_POOL_TYPEMASK) {
2313	case PF_POOL_NONE:
2314		PF_ACPY(naddr, raddr, af);
2315		break;
2316	case PF_POOL_BITMASK:
2317		PF_POOLMASK(naddr, raddr, rmask, saddr, af);
2318		break;
2319	case PF_POOL_RANDOM:
2320		if (init_addr != NULL && PF_AZERO(init_addr, af)) {
2321			switch (af) {
2322#ifdef INET
2323			case AF_INET:
2324				rpool->counter.addr32[0] =
2325				    htonl(cprng_fast32());
2326				break;
2327#endif /* INET */
2328#ifdef INET6
2329			case AF_INET6:
2330				if (rmask->addr32[3] != 0xffffffff)
2331					rpool->counter.addr32[3] =
2332					    htonl(cprng_fast32());
2333				else
2334					break;
2335				if (rmask->addr32[2] != 0xffffffff)
2336					rpool->counter.addr32[2] =
2337					    htonl(cprng_fast32());
2338				else
2339					break;
2340				if (rmask->addr32[1] != 0xffffffff)
2341					rpool->counter.addr32[1] =
2342					    htonl(cprng_fast32());
2343				else
2344					break;
2345				if (rmask->addr32[0] != 0xffffffff)
2346					rpool->counter.addr32[0] =
2347					    htonl(cprng_fast32());
2348				break;
2349#endif /* INET6 */
2350			}
2351			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
2352			PF_ACPY(init_addr, naddr, af);
2353
2354		} else {
2355			PF_AINC(&rpool->counter, af);
2356			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
2357		}
2358		break;
2359	case PF_POOL_SRCHASH:
2360		pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
2361		PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
2362		break;
2363	case PF_POOL_ROUNDROBIN:
2364		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
2365			if (!pfr_pool_get(rpool->cur->addr.p.tbl,
2366			    &rpool->tblidx, &rpool->counter,
2367			    &raddr, &rmask, af))
2368				goto get_addr;
2369		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
2370			if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
2371			    &rpool->tblidx, &rpool->counter,
2372			    &raddr, &rmask, af))
2373				goto get_addr;
2374		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
2375			goto get_addr;
2376
2377	try_next:
2378		if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL)
2379			rpool->cur = TAILQ_FIRST(&rpool->list);
2380		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
2381			rpool->tblidx = -1;
2382			if (pfr_pool_get(rpool->cur->addr.p.tbl,
2383			    &rpool->tblidx, &rpool->counter,
2384			    &raddr, &rmask, af)) {
2385				/* table contains no address of type 'af' */
2386				if (rpool->cur != acur)
2387					goto try_next;
2388				return (1);
2389			}
2390		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
2391			rpool->tblidx = -1;
2392			if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
2393			    &rpool->tblidx, &rpool->counter,
2394			    &raddr, &rmask, af)) {
2395				/* table contains no address of type 'af' */
2396				if (rpool->cur != acur)
2397					goto try_next;
2398				return (1);
2399			}
2400		} else {
2401			raddr = &rpool->cur->addr.v.a.addr;
2402			rmask = &rpool->cur->addr.v.a.mask;
2403			PF_ACPY(&rpool->counter, raddr, af);
2404		}
2405
2406	get_addr:
2407		PF_ACPY(naddr, &rpool->counter, af);
2408		if (init_addr != NULL && PF_AZERO(init_addr, af))
2409			PF_ACPY(init_addr, naddr, af);
2410		PF_AINC(&rpool->counter, af);
2411		break;
2412	}
2413	if (*sn != NULL)
2414		PF_ACPY(&(*sn)->raddr, naddr, af);
2415
2416	if (pf_status.debug >= PF_DEBUG_MISC &&
2417	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
2418		printf("pf_map_addr: selected address ");
2419		pf_print_host(naddr, 0, af);
2420		printf("\n");
2421	}
2422
2423	return (0);
2424}
2425
2426int
2427pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r,
2428    struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t dport,
2429    struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high,
2430    struct pf_src_node **sn)
2431{
2432	struct pf_state_key_cmp	key;
2433	struct pf_addr		init_addr;
2434	u_int16_t		cut;
2435
2436	bzero(&init_addr, sizeof(init_addr));
2437	if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
2438		return (1);
2439
2440	if (proto == IPPROTO_ICMP) {
2441		low = 1;
2442		high = 65535;
2443	}
2444
2445	do {
2446		key.af = af;
2447		key.proto = proto;
2448		PF_ACPY(&key.ext.addr, daddr, key.af);
2449		PF_ACPY(&key.gwy.addr, naddr, key.af);
2450		key.ext.port = dport;
2451
2452		/*
2453		 * port search; start random, step;
2454		 * similar 2 portloop in in_pcbbind
2455		 */
2456		if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP ||
2457		    proto == IPPROTO_ICMP)) {
2458			key.gwy.port = dport;
2459			if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL)
2460				return (0);
2461		} else if (low == 0 && high == 0) {
2462			key.gwy.port = *nport;
2463			if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL)
2464				return (0);
2465		} else if (low == high) {
2466			key.gwy.port = htons(low);
2467			if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL) {
2468				*nport = htons(low);
2469				return (0);
2470			}
2471		} else {
2472			u_int16_t tmp;
2473
2474			if (low > high) {
2475				tmp = low;
2476				low = high;
2477				high = tmp;
2478			}
2479			/* low < high */
2480			cut = htonl(cprng_fast32()) % (1 + high - low) + low;
2481			/* low <= cut <= high */
2482			for (tmp = cut; tmp <= high; ++(tmp)) {
2483				key.gwy.port = htons(tmp);
2484				if (pf_find_state_all(&key, PF_EXT_GWY, NULL) ==
2485				    NULL) {
2486					*nport = htons(tmp);
2487					return (0);
2488				}
2489			}
2490			for (tmp = cut - 1; tmp >= low; --(tmp)) {
2491				key.gwy.port = htons(tmp);
2492				if (pf_find_state_all(&key, PF_EXT_GWY, NULL) ==
2493				    NULL) {
2494					*nport = htons(tmp);
2495					return (0);
2496				}
2497			}
2498		}
2499
2500		switch (r->rpool.opts & PF_POOL_TYPEMASK) {
2501		case PF_POOL_RANDOM:
2502		case PF_POOL_ROUNDROBIN:
2503			if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
2504				return (1);
2505			break;
2506		case PF_POOL_NONE:
2507		case PF_POOL_SRCHASH:
2508		case PF_POOL_BITMASK:
2509		default:
2510			return (1);
2511		}
2512	} while (! PF_AEQ(&init_addr, naddr, af) );
2513
2514	return (1);					/* none available */
2515}
2516
2517struct pf_rule *
2518pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
2519    int direction, struct pfi_kif *kif, struct pf_addr *saddr, u_int16_t sport,
2520    struct pf_addr *daddr, u_int16_t dport, int rs_num)
2521{
2522	struct pf_rule		*r, *rm = NULL;
2523	struct pf_ruleset	*ruleset = NULL;
2524	int			 tag = -1;
2525	int			 rtableid = -1;
2526	int			 asd = 0;
2527
2528	r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
2529	while (r && rm == NULL) {
2530		struct pf_rule_addr	*src = NULL, *dst = NULL;
2531		struct pf_addr_wrap	*xdst = NULL;
2532
2533		if (r->action == PF_BINAT && direction == PF_IN) {
2534			src = &r->dst;
2535			if (r->rpool.cur != NULL)
2536				xdst = &r->rpool.cur->addr;
2537		} else {
2538			src = &r->src;
2539			dst = &r->dst;
2540		}
2541
2542		r->evaluations++;
2543		if (pfi_kif_match(r->kif, kif) == r->ifnot)
2544			r = r->skip[PF_SKIP_IFP].ptr;
2545		else if (r->direction && r->direction != direction)
2546			r = r->skip[PF_SKIP_DIR].ptr;
2547		else if (r->af && r->af != pd->af)
2548			r = r->skip[PF_SKIP_AF].ptr;
2549		else if (r->proto && r->proto != pd->proto)
2550			r = r->skip[PF_SKIP_PROTO].ptr;
2551		else if (PF_MISMATCHAW(&src->addr, saddr, pd->af,
2552		    src->neg, kif))
2553			r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
2554			    PF_SKIP_DST_ADDR].ptr;
2555		else if (src->port_op && !pf_match_port(src->port_op,
2556		    src->port[0], src->port[1], sport))
2557			r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
2558			    PF_SKIP_DST_PORT].ptr;
2559		else if (dst != NULL &&
2560		    PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL))
2561			r = r->skip[PF_SKIP_DST_ADDR].ptr;
2562		else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af,
2563		    0, NULL))
2564			r = TAILQ_NEXT(r, entries);
2565		else if (dst != NULL && dst->port_op &&
2566		    !pf_match_port(dst->port_op, dst->port[0],
2567		    dst->port[1], dport))
2568			r = r->skip[PF_SKIP_DST_PORT].ptr;
2569		else if (r->match_tag && !pf_match_tag(m, r, &tag))
2570			r = TAILQ_NEXT(r, entries);
2571		else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
2572		    IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m,
2573		    off, pd->hdr.tcp), r->os_fingerprint)))
2574			r = TAILQ_NEXT(r, entries);
2575		else {
2576			if (r->tag)
2577				tag = r->tag;
2578			if (r->rtableid >= 0)
2579				rtableid = r->rtableid;
2580			if (r->anchor == NULL) {
2581				rm = r;
2582			} else
2583				pf_step_into_anchor(&asd, &ruleset, rs_num,
2584				    &r, NULL, NULL);
2585		}
2586		if (r == NULL)
2587			pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r,
2588			    NULL, NULL);
2589	}
2590	if (pf_tag_packet(m, tag, rtableid))
2591		return (NULL);
2592	if (rm != NULL && (rm->action == PF_NONAT ||
2593	    rm->action == PF_NORDR || rm->action == PF_NOBINAT))
2594		return (NULL);
2595	return (rm);
2596}
2597
2598struct pf_rule *
2599pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction,
2600    struct pfi_kif *kif, struct pf_src_node **sn,
2601    struct pf_addr *saddr, u_int16_t sport,
2602    struct pf_addr *daddr, u_int16_t dport,
2603    struct pf_addr *naddr, u_int16_t *nport)
2604{
2605	struct pf_rule	*r = NULL;
2606
2607	if (direction == PF_OUT) {
2608		r = pf_match_translation(pd, m, off, direction, kif, saddr,
2609		    sport, daddr, dport, PF_RULESET_BINAT);
2610		if (r == NULL)
2611			r = pf_match_translation(pd, m, off, direction, kif,
2612			    saddr, sport, daddr, dport, PF_RULESET_NAT);
2613	} else {
2614		r = pf_match_translation(pd, m, off, direction, kif, saddr,
2615		    sport, daddr, dport, PF_RULESET_RDR);
2616		if (r == NULL)
2617			r = pf_match_translation(pd, m, off, direction, kif,
2618			    saddr, sport, daddr, dport, PF_RULESET_BINAT);
2619	}
2620
2621	if (r != NULL) {
2622		switch (r->action) {
2623		case PF_NONAT:
2624		case PF_NOBINAT:
2625		case PF_NORDR:
2626			return (NULL);
2627		case PF_NAT:
2628			if (pf_get_sport(pd->af, pd->proto, r, saddr,
2629			    daddr, dport, naddr, nport, r->rpool.proxy_port[0],
2630			    r->rpool.proxy_port[1], sn)) {
2631				DPFPRINTF(PF_DEBUG_MISC,
2632				    ("pf: NAT proxy port allocation "
2633				    "(%u-%u) failed\n",
2634				    r->rpool.proxy_port[0],
2635				    r->rpool.proxy_port[1]));
2636				return (NULL);
2637			}
2638			break;
2639		case PF_BINAT:
2640			switch (direction) {
2641			case PF_OUT:
2642				if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){
2643					switch (pd->af) {
2644#ifdef INET
2645					case AF_INET:
2646						if (r->rpool.cur->addr.p.dyn->
2647						    pfid_acnt4 < 1)
2648							return (NULL);
2649						PF_POOLMASK(naddr,
2650						    &r->rpool.cur->addr.p.dyn->
2651						    pfid_addr4,
2652						    &r->rpool.cur->addr.p.dyn->
2653						    pfid_mask4,
2654						    saddr, AF_INET);
2655						break;
2656#endif /* INET */
2657#ifdef INET6
2658					case AF_INET6:
2659						if (r->rpool.cur->addr.p.dyn->
2660						    pfid_acnt6 < 1)
2661							return (NULL);
2662						PF_POOLMASK(naddr,
2663						    &r->rpool.cur->addr.p.dyn->
2664						    pfid_addr6,
2665						    &r->rpool.cur->addr.p.dyn->
2666						    pfid_mask6,
2667						    saddr, AF_INET6);
2668						break;
2669#endif /* INET6 */
2670					}
2671				} else
2672					PF_POOLMASK(naddr,
2673					    &r->rpool.cur->addr.v.a.addr,
2674					    &r->rpool.cur->addr.v.a.mask,
2675					    saddr, pd->af);
2676				break;
2677			case PF_IN:
2678				if (r->src.addr.type == PF_ADDR_DYNIFTL) {
2679					switch (pd->af) {
2680#ifdef INET
2681					case AF_INET:
2682						if (r->src.addr.p.dyn->
2683						    pfid_acnt4 < 1)
2684							return (NULL);
2685						PF_POOLMASK(naddr,
2686						    &r->src.addr.p.dyn->
2687						    pfid_addr4,
2688						    &r->src.addr.p.dyn->
2689						    pfid_mask4,
2690						    daddr, AF_INET);
2691						break;
2692#endif /* INET */
2693#ifdef INET6
2694					case AF_INET6:
2695						if (r->src.addr.p.dyn->
2696						    pfid_acnt6 < 1)
2697							return (NULL);
2698						PF_POOLMASK(naddr,
2699						    &r->src.addr.p.dyn->
2700						    pfid_addr6,
2701						    &r->src.addr.p.dyn->
2702						    pfid_mask6,
2703						    daddr, AF_INET6);
2704						break;
2705#endif /* INET6 */
2706					}
2707				} else
2708					PF_POOLMASK(naddr,
2709					    &r->src.addr.v.a.addr,
2710					    &r->src.addr.v.a.mask, daddr,
2711					    pd->af);
2712				break;
2713			}
2714			break;
2715		case PF_RDR: {
2716			if (pf_map_addr(pd->af, r, saddr, naddr, NULL, sn))
2717				return (NULL);
2718			if ((r->rpool.opts & PF_POOL_TYPEMASK) ==
2719			    PF_POOL_BITMASK)
2720				PF_POOLMASK(naddr, naddr,
2721				    &r->rpool.cur->addr.v.a.mask, daddr,
2722				    pd->af);
2723
2724			if (r->rpool.proxy_port[1]) {
2725				u_int32_t	tmp_nport;
2726
2727				tmp_nport = ((ntohs(dport) -
2728				    ntohs(r->dst.port[0])) %
2729				    (r->rpool.proxy_port[1] -
2730				    r->rpool.proxy_port[0] + 1)) +
2731				    r->rpool.proxy_port[0];
2732
2733				/* wrap around if necessary */
2734				if (tmp_nport > 65535)
2735					tmp_nport -= 65535;
2736				*nport = htons((u_int16_t)tmp_nport);
2737			} else if (r->rpool.proxy_port[0])
2738				*nport = htons(r->rpool.proxy_port[0]);
2739			break;
2740		}
2741		default:
2742			return (NULL);
2743		}
2744	}
2745
2746	return (r);
2747}
2748
2749int
2750pf_socket_lookup(int direction, struct pf_pdesc *pd)
2751{
2752	struct pf_addr		*saddr, *daddr;
2753	u_int16_t		 sport, dport;
2754	struct inpcbtable	*tb;
2755	struct inpcb		*inp = NULL;
2756	struct socket		*so = NULL;
2757#if defined(__NetBSD__) && defined(INET6)
2758	struct in6pcb		*in6p = NULL;
2759#else
2760#define in6p inp
2761#endif /* __NetBSD__ && INET6 */
2762
2763	if (pd == NULL)
2764		return (-1);
2765	pd->lookup.uid = UID_MAX;
2766	pd->lookup.gid = GID_MAX;
2767	pd->lookup.pid = NO_PID;
2768	switch (pd->proto) {
2769	case IPPROTO_TCP:
2770		if (pd->hdr.tcp == NULL)
2771			return (-1);
2772		sport = pd->hdr.tcp->th_sport;
2773		dport = pd->hdr.tcp->th_dport;
2774		tb = &tcbtable;
2775		break;
2776	case IPPROTO_UDP:
2777		if (pd->hdr.udp == NULL)
2778			return (-1);
2779		sport = pd->hdr.udp->uh_sport;
2780		dport = pd->hdr.udp->uh_dport;
2781		tb = &udbtable;
2782		break;
2783	default:
2784		return (-1);
2785	}
2786	if (direction == PF_IN) {
2787		saddr = pd->src;
2788		daddr = pd->dst;
2789	} else {
2790		u_int16_t	p;
2791
2792		p = sport;
2793		sport = dport;
2794		dport = p;
2795		saddr = pd->dst;
2796		daddr = pd->src;
2797	}
2798	switch (pd->af) {
2799
2800#ifdef __NetBSD__
2801#define in_pcbhashlookup(tbl, saddr, sport, daddr, dport) \
2802    in_pcblookup_connect(tbl, saddr, sport, daddr, dport, NULL)
2803#define in6_pcbhashlookup(tbl, saddr, sport, daddr, dport) \
2804    in6_pcblookup_connect(tbl, saddr, sport, daddr, dport, 0, NULL)
2805#define in_pcblookup_listen(tbl, addr, port, zero) \
2806    in_pcblookup_bind(tbl, addr, port)
2807#define in6_pcblookup_listen(tbl, addr, port, zero) \
2808    in6_pcblookup_bind(tbl, addr, port, zero)
2809#endif
2810
2811#ifdef INET
2812	case AF_INET:
2813		inp = in_pcbhashlookup(tb, saddr->v4, sport, daddr->v4, dport);
2814		if (inp == NULL) {
2815			inp = in_pcblookup_listen(tb, daddr->v4, dport, 0);
2816			if (inp == NULL)
2817				return (-1);
2818		}
2819		break;
2820#endif /* INET */
2821#ifdef INET6
2822	case AF_INET6:
2823/*###2817 [cc] warning: assignment from incompatible pointer type%%%*/
2824		in6p = in6_pcbhashlookup(tb, &saddr->v6, sport, &daddr->v6,
2825		    dport);
2826		if (inp == NULL) {
2827			in6p = in6_pcblookup_listen(tb, &daddr->v6, dport, 0);
2828			if (inp == NULL)
2829				return (-1);
2830		}
2831		break;
2832#endif /* INET6 */
2833
2834	default:
2835		return (-1);
2836	}
2837
2838#ifdef __NetBSD__
2839	switch (pd->af) {
2840#ifdef INET
2841	case AF_INET:
2842		so = inp->inp_socket;
2843		break;
2844#endif
2845#ifdef INET6
2846	case AF_INET6:
2847/*###2840 [cc] error: 'struct inpcb' has no member named 'in6p_head'%%%*/
2848		so = in6p->in6p_socket;
2849		break;
2850#endif /* INET6 */
2851	}
2852	pd->lookup.uid = kauth_cred_geteuid(so->so_cred);
2853	pd->lookup.gid = kauth_cred_getegid(so->so_cred);
2854#else
2855	so = inp->inp_socket;
2856	pd->lookup.uid = so->so_euid;
2857	pd->lookup.gid = so->so_egid;
2858#endif /* !__NetBSD__ */
2859	pd->lookup.pid = so->so_cpid;
2860	return (1);
2861}
2862
2863u_int8_t
2864pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
2865{
2866	int		 hlen;
2867	u_int8_t	 hdr[60];
2868	u_int8_t	*opt, optlen;
2869	u_int8_t	 wscale = 0;
2870
2871	hlen = th_off << 2;		/* hlen <= sizeof(hdr) */
2872	if (hlen <= sizeof(struct tcphdr))
2873		return (0);
2874	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
2875		return (0);
2876	opt = hdr + sizeof(struct tcphdr);
2877	hlen -= sizeof(struct tcphdr);
2878	while (hlen >= 3) {
2879		switch (*opt) {
2880		case TCPOPT_EOL:
2881		case TCPOPT_NOP:
2882			++opt;
2883			--hlen;
2884			break;
2885		case TCPOPT_WINDOW:
2886			wscale = opt[2];
2887			if (wscale > TCP_MAX_WINSHIFT)
2888				wscale = TCP_MAX_WINSHIFT;
2889			wscale |= PF_WSCALE_FLAG;
2890			/* FALLTHROUGH */
2891		default:
2892			optlen = opt[1];
2893			if (optlen < 2)
2894				optlen = 2;
2895			hlen -= optlen;
2896			opt += optlen;
2897			break;
2898		}
2899	}
2900	return (wscale);
2901}
2902
2903u_int16_t
2904pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
2905{
2906	int		 hlen;
2907	u_int8_t	 hdr[60];
2908	u_int8_t	*opt, optlen;
2909	u_int16_t	 mss = tcp_mssdflt;
2910
2911	hlen = th_off << 2;	/* hlen <= sizeof(hdr) */
2912	if (hlen <= sizeof(struct tcphdr))
2913		return (0);
2914	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
2915		return (0);
2916	opt = hdr + sizeof(struct tcphdr);
2917	hlen -= sizeof(struct tcphdr);
2918	while (hlen >= TCPOLEN_MAXSEG) {
2919		switch (*opt) {
2920		case TCPOPT_EOL:
2921		case TCPOPT_NOP:
2922			++opt;
2923			--hlen;
2924			break;
2925		case TCPOPT_MAXSEG:
2926			bcopy((void *)(opt + 2), (void *)&mss, 2);
2927			NTOHS(mss);
2928			/* FALLTHROUGH */
2929		default:
2930			optlen = opt[1];
2931			if (optlen < 2)
2932				optlen = 2;
2933			hlen -= optlen;
2934			opt += optlen;
2935			break;
2936		}
2937	}
2938	return (mss);
2939}
2940
2941u_int16_t
2942pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer)
2943{
2944	union {
2945		struct sockaddr		dst;
2946		struct sockaddr_in	dst4;
2947		struct sockaddr_in6	dst6;
2948	} u;
2949	struct route		 ro;
2950	struct route		*rop = &ro;
2951	struct rtentry		*rt;
2952	int			 hlen;
2953	u_int16_t		 mss = tcp_mssdflt;
2954
2955	hlen = 0;	/* XXXGCC -Wuninitialized m68k */
2956
2957	memset(&ro, 0, sizeof(ro));
2958	switch (af) {
2959#ifdef INET
2960	case AF_INET:
2961		hlen = sizeof(struct ip);
2962		sockaddr_in_init(&u.dst4, &addr->v4, 0);
2963		rtcache_setdst(rop, &u.dst);
2964		break;
2965#endif /* INET */
2966#ifdef INET6
2967	case AF_INET6:
2968		hlen = sizeof(struct ip6_hdr);
2969		sockaddr_in6_init(&u.dst6, &addr->v6, 0, 0, 0);
2970		rtcache_setdst(rop, &u.dst);
2971		break;
2972#endif /* INET6 */
2973	}
2974
2975#ifndef __NetBSD__
2976	rtalloc_noclone(rop, NO_CLONING);
2977	if ((rt = ro->ro_rt) != NULL) {
2978		mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr);
2979		mss = max(tcp_mssdflt, mss);
2980	}
2981#else
2982	if ((rt = rtcache_init_noclone(rop)) != NULL) {
2983		mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr);
2984		mss = max(tcp_mssdflt, mss);
2985	}
2986	rtcache_free(rop);
2987#endif
2988	mss = min(mss, offer);
2989	mss = max(mss, 64);		/* sanity - at least max opt space */
2990	return (mss);
2991}
2992
2993void
2994pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr)
2995{
2996	struct pf_rule *r = s->rule.ptr;
2997
2998	s->rt_kif = NULL;
2999	if (!r->rt || r->rt == PF_FASTROUTE)
3000		return;
3001	switch (s->state_key->af) {
3002#ifdef INET
3003	case AF_INET:
3004		pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL,
3005		    &s->nat_src_node);
3006		s->rt_kif = r->rpool.cur->kif;
3007		break;
3008#endif /* INET */
3009#ifdef INET6
3010	case AF_INET6:
3011		pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL,
3012		    &s->nat_src_node);
3013		s->rt_kif = r->rpool.cur->kif;
3014		break;
3015#endif /* INET6 */
3016	}
3017}
3018
3019void
3020pf_attach_state(struct pf_state_key *sk, struct pf_state *s, int tail)
3021{
3022	s->state_key = sk;
3023	sk->refcnt++;
3024
3025	/* list is sorted, if-bound states before floating */
3026	if (tail)
3027		TAILQ_INSERT_TAIL(&sk->states, s, next);
3028	else
3029		TAILQ_INSERT_HEAD(&sk->states, s, next);
3030}
3031
3032void
3033pf_detach_state(struct pf_state *s, int flags)
3034{
3035	struct pf_state_key	*sk = s->state_key;
3036
3037	if (sk == NULL)
3038		return;
3039
3040	s->state_key = NULL;
3041	TAILQ_REMOVE(&sk->states, s, next);
3042	if (--sk->refcnt == 0) {
3043		if (!(flags & PF_DT_SKIP_EXTGWY))
3044			RB_REMOVE(pf_state_tree_ext_gwy,
3045			    &pf_statetbl_ext_gwy, sk);
3046		if (!(flags & PF_DT_SKIP_LANEXT))
3047			RB_REMOVE(pf_state_tree_lan_ext,
3048			    &pf_statetbl_lan_ext, sk);
3049		pool_put(&pf_state_key_pl, sk);
3050	}
3051}
3052
3053struct pf_state_key *
3054pf_alloc_state_key(struct pf_state *s)
3055{
3056	struct pf_state_key	*sk;
3057
3058	if ((sk = pool_get(&pf_state_key_pl, PR_NOWAIT)) == NULL)
3059		return (NULL);
3060	bzero(sk, sizeof(*sk));
3061	TAILQ_INIT(&sk->states);
3062	pf_attach_state(sk, s, 0);
3063
3064	return (sk);
3065}
3066
3067int
3068pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
3069    struct pfi_kif *kif, struct mbuf *m, int off, void *h,
3070    struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
3071    struct ifqueue *ifq)
3072{
3073	struct pf_rule		*nr = NULL;
3074	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
3075	u_int16_t		 bport, nport = 0;
3076	sa_family_t		 af = pd->af;
3077	struct pf_rule		*r, *a = NULL;
3078	struct pf_ruleset	*ruleset = NULL;
3079	struct pf_src_node	*nsn = NULL;
3080	struct tcphdr		*th = pd->hdr.tcp;
3081	u_short			 reason;
3082	int			 rewrite = 0, hdrlen = 0;
3083	int			 tag = -1, rtableid = -1;
3084	int			 asd = 0;
3085	int			 match = 0;
3086	int			 state_icmp = 0;
3087	u_int16_t		 mss = tcp_mssdflt;
3088	u_int16_t		 sport, dport;
3089	u_int8_t		 icmptype = 0, icmpcode = 0;
3090
3091	if (direction == PF_IN && pf_check_congestion(ifq)) {
3092		REASON_SET(&reason, PFRES_CONGEST);
3093		return (PF_DROP);
3094	}
3095
3096	sport = dport = hdrlen = 0;
3097
3098	switch (pd->proto) {
3099	case IPPROTO_TCP:
3100		sport = th->th_sport;
3101		dport = th->th_dport;
3102		hdrlen = sizeof(*th);
3103		break;
3104	case IPPROTO_UDP:
3105		sport = pd->hdr.udp->uh_sport;
3106		dport = pd->hdr.udp->uh_dport;
3107		hdrlen = sizeof(*pd->hdr.udp);
3108		break;
3109#ifdef INET
3110	case IPPROTO_ICMP:
3111		if (pd->af != AF_INET)
3112			break;
3113		sport = dport = pd->hdr.icmp->icmp_id;
3114		icmptype = pd->hdr.icmp->icmp_type;
3115		icmpcode = pd->hdr.icmp->icmp_code;
3116
3117		if (icmptype == ICMP_UNREACH ||
3118		    icmptype == ICMP_SOURCEQUENCH ||
3119		    icmptype == ICMP_REDIRECT ||
3120		    icmptype == ICMP_TIMXCEED ||
3121		    icmptype == ICMP_PARAMPROB)
3122			state_icmp++;
3123		break;
3124#endif /* INET */
3125
3126#ifdef INET6
3127	case IPPROTO_ICMPV6:
3128		if (pd->af != AF_INET6)
3129			break;
3130		sport = dport = pd->hdr.icmp6->icmp6_id;
3131		hdrlen = sizeof(*pd->hdr.icmp6);
3132		icmptype = pd->hdr.icmp6->icmp6_type;
3133		icmpcode = pd->hdr.icmp6->icmp6_code;
3134
3135		if (icmptype == ICMP6_DST_UNREACH ||
3136		    icmptype == ICMP6_PACKET_TOO_BIG ||
3137		    icmptype == ICMP6_TIME_EXCEEDED ||
3138		    icmptype == ICMP6_PARAM_PROB)
3139			state_icmp++;
3140		break;
3141#endif /* INET6 */
3142	}
3143
3144	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3145
3146	if (direction == PF_OUT) {
3147		bport = nport = sport;
3148		/* check outgoing packet for BINAT/NAT */
3149		if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
3150		    saddr, sport, daddr, dport, &pd->naddr, &nport)) != NULL) {
3151			PF_ACPY(&pd->baddr, saddr, af);
3152			switch (pd->proto) {
3153			case IPPROTO_TCP:
3154				pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
3155				    &th->th_sum, &pd->naddr, nport, 0, af);
3156				sport = th->th_sport;
3157				rewrite++;
3158				break;
3159			case IPPROTO_UDP:
3160				pf_change_ap(saddr, &pd->hdr.udp->uh_sport,
3161				    pd->ip_sum, &pd->hdr.udp->uh_sum,
3162				    &pd->naddr, nport, 1, af);
3163				sport = pd->hdr.udp->uh_sport;
3164				rewrite++;
3165				break;
3166#ifdef INET
3167			case IPPROTO_ICMP:
3168				pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
3169				    pd->naddr.v4.s_addr, 0);
3170				pd->hdr.icmp->icmp_cksum = pf_cksum_fixup(
3171				    pd->hdr.icmp->icmp_cksum, sport, nport, 0);
3172				pd->hdr.icmp->icmp_id = nport;
3173				m_copyback(m, off, ICMP_MINLEN, pd->hdr.icmp);
3174				break;
3175#endif /* INET */
3176#ifdef INET6
3177			case IPPROTO_ICMPV6:
3178				pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum,
3179				    &pd->naddr, 0);
3180				rewrite++;
3181				break;
3182#endif /* INET */
3183			default:
3184				switch (af) {
3185#ifdef INET
3186				case AF_INET:
3187					pf_change_a(&saddr->v4.s_addr,
3188					    pd->ip_sum, pd->naddr.v4.s_addr, 0);
3189					break;
3190#endif /* INET */
3191#ifdef INET6
3192				case AF_INET6:
3193					PF_ACPY(saddr, &pd->naddr, af);
3194					break;
3195#endif /* INET */
3196				}
3197				break;
3198			}
3199
3200			if (nr->natpass)
3201				r = NULL;
3202			pd->nat_rule = nr;
3203		}
3204	} else {
3205		bport = nport = dport;
3206		/* check incoming packet for BINAT/RDR */
3207		if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
3208		    saddr, sport, daddr, dport, &pd->naddr, &nport)) != NULL) {
3209			PF_ACPY(&pd->baddr, daddr, af);
3210			switch (pd->proto) {
3211			case IPPROTO_TCP:
3212				pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
3213				    &th->th_sum, &pd->naddr, nport, 0, af);
3214				dport = th->th_dport;
3215				rewrite++;
3216				break;
3217			case IPPROTO_UDP:
3218				pf_change_ap(daddr, &pd->hdr.udp->uh_dport,
3219				    pd->ip_sum, &pd->hdr.udp->uh_sum,
3220				    &pd->naddr, nport, 1, af);
3221				dport = pd->hdr.udp->uh_dport;
3222				rewrite++;
3223				break;
3224#ifdef INET
3225			case IPPROTO_ICMP:
3226				pf_change_a(&daddr->v4.s_addr, pd->ip_sum,
3227				    pd->naddr.v4.s_addr, 0);
3228				break;
3229#endif /* INET */
3230#ifdef INET6
3231			case IPPROTO_ICMPV6:
3232				pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum,
3233				    &pd->naddr, 0);
3234				rewrite++;
3235				break;
3236#endif /* INET6 */
3237			default:
3238				switch (af) {
3239#ifdef INET
3240				case AF_INET:
3241					pf_change_a(&daddr->v4.s_addr,
3242					    pd->ip_sum, pd->naddr.v4.s_addr, 0);
3243					break;
3244#endif /* INET */
3245#ifdef INET6
3246				case AF_INET6:
3247					PF_ACPY(daddr, &pd->naddr, af);
3248					break;
3249#endif /* INET */
3250				}
3251				break;
3252			}
3253
3254			if (nr->natpass)
3255				r = NULL;
3256			pd->nat_rule = nr;
3257		}
3258	}
3259
3260	while (r != NULL) {
3261		r->evaluations++;
3262		if (pfi_kif_match(r->kif, kif) == r->ifnot)
3263			r = r->skip[PF_SKIP_IFP].ptr;
3264		else if (r->direction && r->direction != direction)
3265			r = r->skip[PF_SKIP_DIR].ptr;
3266		else if (r->af && r->af != af)
3267			r = r->skip[PF_SKIP_AF].ptr;
3268		else if (r->proto && r->proto != pd->proto)
3269			r = r->skip[PF_SKIP_PROTO].ptr;
3270		else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
3271		    r->src.neg, kif))
3272			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3273		/* tcp/udp only. port_op always 0 in other cases */
3274		else if (r->src.port_op && !pf_match_port(r->src.port_op,
3275		    r->src.port[0], r->src.port[1], sport))
3276			r = r->skip[PF_SKIP_SRC_PORT].ptr;
3277		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
3278		    r->dst.neg, NULL))
3279			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3280		/* tcp/udp only. port_op always 0 in other cases */
3281		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
3282		    r->dst.port[0], r->dst.port[1], dport))
3283			r = r->skip[PF_SKIP_DST_PORT].ptr;
3284		/* icmp only. type always 0 in other cases */
3285		else if (r->type && r->type != icmptype + 1)
3286			r = TAILQ_NEXT(r, entries);
3287		/* icmp only. type always 0 in other cases */
3288		else if (r->code && r->code != icmpcode + 1)
3289			r = TAILQ_NEXT(r, entries);
3290		else if (r->tos && !(r->tos == pd->tos))
3291			r = TAILQ_NEXT(r, entries);
3292		else if (r->rule_flag & PFRULE_FRAGMENT)
3293			r = TAILQ_NEXT(r, entries);
3294		else if (pd->proto == IPPROTO_TCP &&
3295		    (r->flagset & th->th_flags) != r->flags)
3296			r = TAILQ_NEXT(r, entries);
3297		/* tcp/udp only. uid.op always 0 in other cases */
3298		else if (r->uid.op && (pd->lookup.done || (pd->lookup.done =
3299		    pf_socket_lookup(direction, pd), 1)) &&
3300		    !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
3301		    pd->lookup.uid))
3302			r = TAILQ_NEXT(r, entries);
3303		/* tcp/udp only. gid.op always 0 in other cases */
3304		else if (r->gid.op && (pd->lookup.done || (pd->lookup.done =
3305		    pf_socket_lookup(direction, pd), 1)) &&
3306		    !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
3307		    pd->lookup.gid))
3308			r = TAILQ_NEXT(r, entries);
3309		else if (r->prob && r->prob <= cprng_fast32())
3310			r = TAILQ_NEXT(r, entries);
3311		else if (r->match_tag && !pf_match_tag(m, r, &tag))
3312			r = TAILQ_NEXT(r, entries);
3313		else if (r->os_fingerprint != PF_OSFP_ANY &&
3314		    (pd->proto != IPPROTO_TCP || !pf_osfp_match(
3315		    pf_osfp_fingerprint(pd, m, off, th),
3316		    r->os_fingerprint)))
3317			r = TAILQ_NEXT(r, entries);
3318		else {
3319			if (r->tag)
3320				tag = r->tag;
3321			if (r->rtableid >= 0)
3322				rtableid = r->rtableid;
3323			if (r->anchor == NULL) {
3324				match = 1;
3325				*rm = r;
3326				*am = a;
3327				*rsm = ruleset;
3328				if ((*rm)->quick)
3329					break;
3330				r = TAILQ_NEXT(r, entries);
3331			} else
3332				pf_step_into_anchor(&asd, &ruleset,
3333				    PF_RULESET_FILTER, &r, &a, &match);
3334		}
3335		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
3336		    PF_RULESET_FILTER, &r, &a, &match))
3337			break;
3338	}
3339	r = *rm;
3340	a = *am;
3341	ruleset = *rsm;
3342
3343	REASON_SET(&reason, PFRES_MATCH);
3344
3345	if (r->log || (nr != NULL && nr->log)) {
3346		if (rewrite)
3347			m_copyback(m, off, hdrlen, pd->hdr.any);
3348		PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr,
3349		    a, ruleset, pd);
3350	}
3351
3352	if (r->keep_state && pf_state_lock) {
3353		REASON_SET(&reason, PFRES_STATELOCKED);
3354		return PF_DROP;
3355	}
3356
3357	if ((r->action == PF_DROP) &&
3358	    ((r->rule_flag & PFRULE_RETURNRST) ||
3359	    (r->rule_flag & PFRULE_RETURNICMP) ||
3360	    (r->rule_flag & PFRULE_RETURN))) {
3361		/* undo NAT changes, if they have taken place */
3362		if (nr != NULL) {
3363			if (direction == PF_OUT) {
3364				switch (pd->proto) {
3365				case IPPROTO_TCP:
3366					pf_change_ap(saddr, &th->th_sport,
3367					    pd->ip_sum, &th->th_sum,
3368					    &pd->baddr, bport, 0, af);
3369					sport = th->th_sport;
3370					rewrite++;
3371					break;
3372				case IPPROTO_UDP:
3373					pf_change_ap(saddr,
3374					    &pd->hdr.udp->uh_sport, pd->ip_sum,
3375					    &pd->hdr.udp->uh_sum, &pd->baddr,
3376					    bport, 1, af);
3377					sport = pd->hdr.udp->uh_sport;
3378					rewrite++;
3379					break;
3380				case IPPROTO_ICMP:
3381#ifdef INET6
3382				case IPPROTO_ICMPV6:
3383#endif
3384					/* nothing! */
3385					break;
3386				default:
3387					switch (af) {
3388					case AF_INET:
3389						pf_change_a(&saddr->v4.s_addr,
3390						    pd->ip_sum,
3391						    pd->baddr.v4.s_addr, 0);
3392						break;
3393					case AF_INET6:
3394						PF_ACPY(saddr, &pd->baddr, af);
3395						break;
3396					}
3397				}
3398			} else {
3399				switch (pd->proto) {
3400				case IPPROTO_TCP:
3401					pf_change_ap(daddr, &th->th_dport,
3402					    pd->ip_sum, &th->th_sum,
3403					    &pd->baddr, bport, 0, af);
3404					dport = th->th_dport;
3405					rewrite++;
3406					break;
3407				case IPPROTO_UDP:
3408					pf_change_ap(daddr,
3409					    &pd->hdr.udp->uh_dport, pd->ip_sum,
3410					    &pd->hdr.udp->uh_sum, &pd->baddr,
3411					    bport, 1, af);
3412					dport = pd->hdr.udp->uh_dport;
3413					rewrite++;
3414					break;
3415				case IPPROTO_ICMP:
3416#ifdef INET6
3417				case IPPROTO_ICMPV6:
3418#endif
3419					/* nothing! */
3420					break;
3421				default:
3422					switch (af) {
3423					case AF_INET:
3424						pf_change_a(&daddr->v4.s_addr,
3425						    pd->ip_sum,
3426						    pd->baddr.v4.s_addr, 0);
3427						break;
3428					case AF_INET6:
3429						PF_ACPY(daddr, &pd->baddr, af);
3430						break;
3431					}
3432				}
3433			}
3434		}
3435		if (pd->proto == IPPROTO_TCP &&
3436		    ((r->rule_flag & PFRULE_RETURNRST) ||
3437		    (r->rule_flag & PFRULE_RETURN)) &&
3438		    !(th->th_flags & TH_RST)) {
3439			u_int32_t	 ack = ntohl(th->th_seq) + pd->p_len;
3440			struct ip	*hip = mtod(m, struct ip *);
3441
3442#ifdef __NetBSD__
3443			if (pf_check_proto_cksum(m, direction, off,
3444			    ntohs(hip->ip_len) - off, IPPROTO_TCP, AF_INET))
3445#else
3446			if (pf_check_proto_cksum(m, off,
3447			    ntohs(hip->ip_len) - off, IPPROTO_TCP, AF_INET))
3448#endif /* !__NetBSD__ */
3449				REASON_SET(&reason, PFRES_PROTCKSUM);
3450			else {
3451				if (th->th_flags & TH_SYN)
3452					ack++;
3453				if (th->th_flags & TH_FIN)
3454					ack++;
3455				pf_send_tcp(r, af, pd->dst,
3456				    pd->src, th->th_dport, th->th_sport,
3457				    ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
3458				    r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp);
3459			}
3460		} else if ((af == AF_INET) && r->return_icmp)
3461			pf_send_icmp(m, r->return_icmp >> 8,
3462			    r->return_icmp & 255, af, r);
3463		else if ((af == AF_INET6) && r->return_icmp6)
3464			pf_send_icmp(m, r->return_icmp6 >> 8,
3465			    r->return_icmp6 & 255, af, r);
3466	}
3467
3468	if (r->action == PF_DROP)
3469		return (PF_DROP);
3470
3471	if (pf_tag_packet(m, tag, rtableid)) {
3472		REASON_SET(&reason, PFRES_MEMORY);
3473		return (PF_DROP);
3474	}
3475
3476	if (!state_icmp && (r->keep_state || nr != NULL ||
3477	    (pd->flags & PFDESC_TCP_NORM))) {
3478		/* create new state */
3479		u_int16_t	 len;
3480		struct pf_state	*s = NULL;
3481		struct pf_state_key *sk = NULL;
3482		struct pf_src_node *sn = NULL;
3483
3484		/* check maximums */
3485		if (r->max_states && (r->states >= r->max_states)) {
3486			pf_status.lcounters[LCNT_STATES]++;
3487			REASON_SET(&reason, PFRES_MAXSTATES);
3488			goto cleanup;
3489		}
3490		/* src node for filter rule */
3491		if ((r->rule_flag & PFRULE_SRCTRACK ||
3492		    r->rpool.opts & PF_POOL_STICKYADDR) &&
3493		    pf_insert_src_node(&sn, r, saddr, af) != 0) {
3494			REASON_SET(&reason, PFRES_SRCLIMIT);
3495			goto cleanup;
3496		}
3497		/* src node for translation rule */
3498		if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
3499		    ((direction == PF_OUT &&
3500		    pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
3501		    (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) {
3502			REASON_SET(&reason, PFRES_SRCLIMIT);
3503			goto cleanup;
3504		}
3505		s = pool_get(&pf_state_pl, PR_NOWAIT);
3506		if (s == NULL) {
3507			REASON_SET(&reason, PFRES_MEMORY);
3508cleanup:
3509			if (sn != NULL && sn->states == 0 && sn->expire == 0) {
3510				RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
3511				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3512				pf_status.src_nodes--;
3513				pool_put(&pf_src_tree_pl, sn);
3514			}
3515			if (nsn != sn && nsn != NULL && nsn->states == 0 &&
3516			    nsn->expire == 0) {
3517				RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
3518				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3519				pf_status.src_nodes--;
3520				pool_put(&pf_src_tree_pl, nsn);
3521			}
3522			if (sk != NULL) {
3523				pool_put(&pf_state_key_pl, sk);
3524			}
3525			return (PF_DROP);
3526		}
3527		bzero(s, sizeof(*s));
3528		s->rule.ptr = r;
3529		s->nat_rule.ptr = nr;
3530		s->anchor.ptr = a;
3531		STATE_INC_COUNTERS(s);
3532		s->allow_opts = r->allow_opts;
3533		s->log = r->log & PF_LOG_ALL;
3534		if (nr != NULL)
3535			s->log |= nr->log & PF_LOG_ALL;
3536		switch (pd->proto) {
3537		case IPPROTO_TCP:
3538			len = pd->tot_len - off - (th->th_off << 2);
3539			s->src.seqlo = ntohl(th->th_seq);
3540			s->src.seqhi = s->src.seqlo + len + 1;
3541			if ((th->th_flags & (TH_SYN|TH_ACK)) ==
3542			TH_SYN && r->keep_state == PF_STATE_MODULATE) {
3543				/* Generate sequence number modulator */
3544				while ((s->src.seqdiff =
3545				    tcp_rndiss_next() - s->src.seqlo) == 0)
3546					;
3547				pf_change_a(&th->th_seq, &th->th_sum,
3548				    htonl(s->src.seqlo + s->src.seqdiff), 0);
3549				rewrite = 1;
3550			} else
3551				s->src.seqdiff = 0;
3552			if (th->th_flags & TH_SYN) {
3553				s->src.seqhi++;
3554				s->src.wscale = pf_get_wscale(m, off,
3555				    th->th_off, af);
3556			}
3557			s->src.max_win = MAX(ntohs(th->th_win), 1);
3558			if (s->src.wscale & PF_WSCALE_MASK) {
3559				/* Remove scale factor from initial window */
3560				int win = s->src.max_win;
3561				win += 1 << (s->src.wscale & PF_WSCALE_MASK);
3562				s->src.max_win = (win - 1) >>
3563				    (s->src.wscale & PF_WSCALE_MASK);
3564			}
3565			if (th->th_flags & TH_FIN)
3566				s->src.seqhi++;
3567			s->dst.seqhi = 1;
3568			s->dst.max_win = 1;
3569			s->src.state = TCPS_SYN_SENT;
3570			s->dst.state = TCPS_CLOSED;
3571			s->timeout = PFTM_TCP_FIRST_PACKET;
3572			break;
3573		case IPPROTO_UDP:
3574			s->src.state = PFUDPS_SINGLE;
3575			s->dst.state = PFUDPS_NO_TRAFFIC;
3576			s->timeout = PFTM_UDP_FIRST_PACKET;
3577			break;
3578		case IPPROTO_ICMP:
3579#ifdef INET6
3580		case IPPROTO_ICMPV6:
3581#endif
3582			s->timeout = PFTM_ICMP_FIRST_PACKET;
3583			break;
3584		default:
3585			s->src.state = PFOTHERS_SINGLE;
3586			s->dst.state = PFOTHERS_NO_TRAFFIC;
3587			s->timeout = PFTM_OTHER_FIRST_PACKET;
3588		}
3589
3590		s->creation = time_second;
3591		s->expire = time_second;
3592
3593		if (sn != NULL) {
3594			s->src_node = sn;
3595			s->src_node->states++;
3596		}
3597		if (nsn != NULL) {
3598			PF_ACPY(&nsn->raddr, &pd->naddr, af);
3599			s->nat_src_node = nsn;
3600			s->nat_src_node->states++;
3601		}
3602		if (pd->proto == IPPROTO_TCP) {
3603			if ((pd->flags & PFDESC_TCP_NORM) &&
3604			    pf_normalize_tcp_init(m, off, pd, th, &s->src,
3605			    &s->dst)) {
3606				REASON_SET(&reason, PFRES_MEMORY);
3607				pf_src_tree_remove_state(s);
3608				STATE_DEC_COUNTERS(s);
3609				pool_put(&pf_state_pl, s);
3610				return (PF_DROP);
3611			}
3612			if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub &&
3613			    pf_normalize_tcp_stateful(m, off, pd, &reason,
3614			    th, s, &s->src, &s->dst, &rewrite)) {
3615				/* This really shouldn't happen!!! */
3616				DPFPRINTF(PF_DEBUG_URGENT,
3617				    ("pf_normalize_tcp_stateful failed on "
3618				    "first pkt"));
3619				pf_normalize_tcp_cleanup(s);
3620				pf_src_tree_remove_state(s);
3621				STATE_DEC_COUNTERS(s);
3622				pool_put(&pf_state_pl, s);
3623				return (PF_DROP);
3624			}
3625		}
3626
3627		if ((sk = pf_alloc_state_key(s)) == NULL) {
3628			REASON_SET(&reason, PFRES_MEMORY);
3629			goto cleanup;
3630		}
3631
3632		sk->proto = pd->proto;
3633		sk->direction = direction;
3634		sk->af = af;
3635		if (direction == PF_OUT) {
3636			PF_ACPY(&sk->gwy.addr, saddr, af);
3637			PF_ACPY(&sk->ext.addr, daddr, af);
3638			switch (pd->proto) {
3639			case IPPROTO_ICMP:
3640#ifdef INET6
3641			case IPPROTO_ICMPV6:
3642#endif
3643				sk->gwy.port = nport;
3644				sk->ext.port = 0;
3645				break;
3646			default:
3647				sk->gwy.port = sport;
3648				sk->ext.port = dport;
3649			}
3650			if (nr != NULL) {
3651				PF_ACPY(&sk->lan.addr, &pd->baddr, af);
3652				sk->lan.port = bport;
3653			} else {
3654				PF_ACPY(&sk->lan.addr, &sk->gwy.addr, af);
3655				sk->lan.port = sk->gwy.port;
3656			}
3657		} else {
3658			PF_ACPY(&sk->lan.addr, daddr, af);
3659			PF_ACPY(&sk->ext.addr, saddr, af);
3660			switch (pd->proto) {
3661			case IPPROTO_ICMP:
3662#ifdef INET6
3663			case IPPROTO_ICMPV6:
3664#endif
3665				sk->lan.port = nport;
3666				sk->ext.port = 0;
3667				break;
3668			default:
3669				sk->lan.port = dport;
3670				sk->ext.port = sport;
3671			}
3672			if (nr != NULL) {
3673				PF_ACPY(&sk->gwy.addr, &pd->baddr, af);
3674				sk->gwy.port = bport;
3675			} else {
3676				PF_ACPY(&sk->gwy.addr, &sk->lan.addr, af);
3677				sk->gwy.port = sk->lan.port;
3678			}
3679		}
3680
3681		pf_set_rt_ifp(s, saddr);	/* needs s->state_key set */
3682
3683		if (pf_insert_state(bound_iface(r, nr, kif), s)) {
3684			if (pd->proto == IPPROTO_TCP)
3685				pf_normalize_tcp_cleanup(s);
3686			REASON_SET(&reason, PFRES_STATEINS);
3687			pf_src_tree_remove_state(s);
3688			STATE_DEC_COUNTERS(s);
3689			pool_put(&pf_state_pl, s);
3690			return (PF_DROP);
3691		} else
3692			*sm = s;
3693		if (tag > 0) {
3694			pf_tag_ref(tag);
3695			s->tag = tag;
3696		}
3697		if (pd->proto == IPPROTO_TCP &&
3698		    (th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
3699		    r->keep_state == PF_STATE_SYNPROXY) {
3700			s->src.state = PF_TCPS_PROXY_SRC;
3701			if (nr != NULL) {
3702				if (direction == PF_OUT) {
3703					pf_change_ap(saddr, &th->th_sport,
3704					    pd->ip_sum, &th->th_sum, &pd->baddr,
3705					    bport, 0, af);
3706					sport = th->th_sport;
3707				} else {
3708					pf_change_ap(daddr, &th->th_dport,
3709					    pd->ip_sum, &th->th_sum, &pd->baddr,
3710					    bport, 0, af);
3711					sport = th->th_dport;
3712				}
3713			}
3714			s->src.seqhi = htonl(cprng_fast32());
3715			/* Find mss option */
3716			mss = pf_get_mss(m, off, th->th_off, af);
3717			mss = pf_calc_mss(saddr, af, mss);
3718			mss = pf_calc_mss(daddr, af, mss);
3719			s->src.mss = mss;
3720			pf_send_tcp(r, af, daddr, saddr, th->th_dport,
3721			    th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
3722			    TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL);
3723			REASON_SET(&reason, PFRES_SYNPROXY);
3724			return (PF_SYNPROXY_DROP);
3725		}
3726	}
3727
3728	/* copy back packet headers if we performed NAT operations */
3729	if (rewrite)
3730		m_copyback(m, off, hdrlen, pd->hdr.any);
3731
3732	return (PF_PASS);
3733}
3734
3735int
3736pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
3737    struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am,
3738    struct pf_ruleset **rsm)
3739{
3740	struct pf_rule		*r, *a = NULL;
3741	struct pf_ruleset	*ruleset = NULL;
3742	sa_family_t		 af = pd->af;
3743	u_short			 reason;
3744	int			 tag = -1;
3745	int			 asd = 0;
3746	int			 match = 0;
3747
3748	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3749	while (r != NULL) {
3750		r->evaluations++;
3751		if (pfi_kif_match(r->kif, kif) == r->ifnot)
3752			r = r->skip[PF_SKIP_IFP].ptr;
3753		else if (r->direction && r->direction != direction)
3754			r = r->skip[PF_SKIP_DIR].ptr;
3755		else if (r->af && r->af != af)
3756			r = r->skip[PF_SKIP_AF].ptr;
3757		else if (r->proto && r->proto != pd->proto)
3758			r = r->skip[PF_SKIP_PROTO].ptr;
3759		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
3760		    r->src.neg, kif))
3761			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3762		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
3763		    r->dst.neg, NULL))
3764			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3765		else if (r->tos && !(r->tos == pd->tos))
3766			r = TAILQ_NEXT(r, entries);
3767		else if (r->src.port_op || r->dst.port_op ||
3768		    r->flagset || r->type || r->code ||
3769		    r->os_fingerprint != PF_OSFP_ANY)
3770			r = TAILQ_NEXT(r, entries);
3771		else if (r->prob && r->prob <= cprng_fast32())
3772			r = TAILQ_NEXT(r, entries);
3773		else if (r->match_tag && !pf_match_tag(m, r, &tag))
3774			r = TAILQ_NEXT(r, entries);
3775		else {
3776			if (r->anchor == NULL) {
3777				match = 1;
3778				*rm = r;
3779				*am = a;
3780				*rsm = ruleset;
3781				if ((*rm)->quick)
3782					break;
3783				r = TAILQ_NEXT(r, entries);
3784			} else
3785				pf_step_into_anchor(&asd, &ruleset,
3786				    PF_RULESET_FILTER, &r, &a, &match);
3787		}
3788		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
3789		    PF_RULESET_FILTER, &r, &a, &match))
3790			break;
3791	}
3792	r = *rm;
3793	a = *am;
3794	ruleset = *rsm;
3795
3796	REASON_SET(&reason, PFRES_MATCH);
3797
3798	if (r->log)
3799		PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset,
3800		    pd);
3801
3802	if (r->action != PF_PASS)
3803		return (PF_DROP);
3804
3805	if (pf_tag_packet(m, tag, -1)) {
3806		REASON_SET(&reason, PFRES_MEMORY);
3807		return (PF_DROP);
3808	}
3809
3810	return (PF_PASS);
3811}
3812
3813int
3814pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
3815    struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
3816    u_short *reason)
3817{
3818	struct pf_state_key_cmp	 key;
3819	struct tcphdr		*th = pd->hdr.tcp;
3820	u_int16_t		 win = ntohs(th->th_win);
3821	u_int32_t		 ack, end, seq, orig_seq;
3822	u_int8_t		 sws, dws;
3823	int			 ackskew;
3824	int			 copyback = 0;
3825	struct pf_state_peer	*src, *dst;
3826
3827	key.af = pd->af;
3828	key.proto = IPPROTO_TCP;
3829	if (direction == PF_IN)	{
3830		PF_ACPY(&key.ext.addr, pd->src, key.af);
3831		PF_ACPY(&key.gwy.addr, pd->dst, key.af);
3832		key.ext.port = th->th_sport;
3833		key.gwy.port = th->th_dport;
3834	} else {
3835		PF_ACPY(&key.lan.addr, pd->src, key.af);
3836		PF_ACPY(&key.ext.addr, pd->dst, key.af);
3837		key.lan.port = th->th_sport;
3838		key.ext.port = th->th_dport;
3839	}
3840
3841	STATE_LOOKUP();
3842
3843	if (direction == (*state)->state_key->direction) {
3844		src = &(*state)->src;
3845		dst = &(*state)->dst;
3846	} else {
3847		src = &(*state)->dst;
3848		dst = &(*state)->src;
3849	}
3850
3851	if ((*state)->src.state == PF_TCPS_PROXY_SRC) {
3852		if (direction != (*state)->state_key->direction) {
3853			REASON_SET(reason, PFRES_SYNPROXY);
3854			return (PF_SYNPROXY_DROP);
3855		}
3856		if (th->th_flags & TH_SYN) {
3857			if (ntohl(th->th_seq) != (*state)->src.seqlo) {
3858				REASON_SET(reason, PFRES_SYNPROXY);
3859				return (PF_DROP);
3860			}
3861			pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
3862			    pd->src, th->th_dport, th->th_sport,
3863			    (*state)->src.seqhi, ntohl(th->th_seq) + 1,
3864			    TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1,
3865			    0, NULL, NULL);
3866			REASON_SET(reason, PFRES_SYNPROXY);
3867			return (PF_SYNPROXY_DROP);
3868		} else if (!(th->th_flags & TH_ACK) ||
3869		    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
3870		    (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
3871			REASON_SET(reason, PFRES_SYNPROXY);
3872			return (PF_DROP);
3873		} else if ((*state)->src_node != NULL &&
3874		    pf_src_connlimit(state)) {
3875			REASON_SET(reason, PFRES_SRCLIMIT);
3876			return (PF_DROP);
3877		} else
3878			(*state)->src.state = PF_TCPS_PROXY_DST;
3879	}
3880	if ((*state)->src.state == PF_TCPS_PROXY_DST) {
3881		struct pf_state_host *psrc, *pdst;
3882
3883		if (direction == PF_OUT) {
3884			psrc = &(*state)->state_key->gwy;
3885			pdst = &(*state)->state_key->ext;
3886		} else {
3887			psrc = &(*state)->state_key->ext;
3888			pdst = &(*state)->state_key->lan;
3889		}
3890		if (direction == (*state)->state_key->direction) {
3891			if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) ||
3892			    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
3893			    (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
3894				REASON_SET(reason, PFRES_SYNPROXY);
3895				return (PF_DROP);
3896			}
3897			(*state)->src.max_win = MAX(ntohs(th->th_win), 1);
3898			if ((*state)->dst.seqhi == 1)
3899				(*state)->dst.seqhi = htonl(cprng_fast32());
3900			pf_send_tcp((*state)->rule.ptr, pd->af, &psrc->addr,
3901			    &pdst->addr, psrc->port, pdst->port,
3902			    (*state)->dst.seqhi, 0, TH_SYN, 0,
3903			    (*state)->src.mss, 0, 0, (*state)->tag, NULL, NULL);
3904			REASON_SET(reason, PFRES_SYNPROXY);
3905			return (PF_SYNPROXY_DROP);
3906		} else if (((th->th_flags & (TH_SYN|TH_ACK)) !=
3907		    (TH_SYN|TH_ACK)) ||
3908		    (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) {
3909			REASON_SET(reason, PFRES_SYNPROXY);
3910			return (PF_DROP);
3911		} else {
3912			(*state)->dst.max_win = MAX(ntohs(th->th_win), 1);
3913			(*state)->dst.seqlo = ntohl(th->th_seq);
3914			pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
3915			    pd->src, th->th_dport, th->th_sport,
3916			    ntohl(th->th_ack), ntohl(th->th_seq) + 1,
3917			    TH_ACK, (*state)->src.max_win, 0, 0, 0,
3918			    (*state)->tag, NULL, NULL);
3919			pf_send_tcp((*state)->rule.ptr, pd->af, &psrc->addr,
3920			    &pdst->addr, psrc->port, pdst->port,
3921			    (*state)->src.seqhi + 1, (*state)->src.seqlo + 1,
3922			    TH_ACK, (*state)->dst.max_win, 0, 0, 1,
3923			    0, NULL, NULL);
3924			(*state)->src.seqdiff = (*state)->dst.seqhi -
3925			    (*state)->src.seqlo;
3926			(*state)->dst.seqdiff = (*state)->src.seqhi -
3927			    (*state)->dst.seqlo;
3928			(*state)->src.seqhi = (*state)->src.seqlo +
3929			    (*state)->dst.max_win;
3930			(*state)->dst.seqhi = (*state)->dst.seqlo +
3931			    (*state)->src.max_win;
3932			(*state)->src.wscale = (*state)->dst.wscale = 0;
3933			(*state)->src.state = (*state)->dst.state =
3934			    TCPS_ESTABLISHED;
3935			REASON_SET(reason, PFRES_SYNPROXY);
3936			return (PF_SYNPROXY_DROP);
3937		}
3938	}
3939
3940	if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) {
3941		sws = src->wscale & PF_WSCALE_MASK;
3942		dws = dst->wscale & PF_WSCALE_MASK;
3943	} else
3944		sws = dws = 0;
3945
3946	/*
3947	 * Sequence tracking algorithm from Guido van Rooij's paper:
3948	 *   http://www.madison-gurkha.com/publications/tcp_filtering/
3949	 *	tcp_filtering.ps
3950	 */
3951
3952	orig_seq = seq = ntohl(th->th_seq);
3953	if (src->seqlo == 0) {
3954		/* First packet from this end. Set its state */
3955
3956		if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) &&
3957		    src->scrub == NULL) {
3958			if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) {
3959				REASON_SET(reason, PFRES_MEMORY);
3960				return (PF_DROP);
3961			}
3962		}
3963
3964		/* Deferred generation of sequence number modulator */
3965		if (dst->seqdiff && !src->seqdiff) {
3966			while ((src->seqdiff = tcp_rndiss_next() - seq) == 0)
3967				;
3968			ack = ntohl(th->th_ack) - dst->seqdiff;
3969			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
3970			    src->seqdiff), 0);
3971			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
3972			copyback = 1;
3973		} else {
3974			ack = ntohl(th->th_ack);
3975		}
3976
3977		end = seq + pd->p_len;
3978		if (th->th_flags & TH_SYN) {
3979			end++;
3980			if (dst->wscale & PF_WSCALE_FLAG) {
3981				src->wscale = pf_get_wscale(m, off, th->th_off,
3982				    pd->af);
3983				if (src->wscale & PF_WSCALE_FLAG) {
3984					/* Remove scale factor from initial
3985					 * window */
3986					sws = src->wscale & PF_WSCALE_MASK;
3987					win = ((u_int32_t)win + (1 << sws) - 1)
3988					    >> sws;
3989					dws = dst->wscale & PF_WSCALE_MASK;
3990				} else {
3991					/* fixup other window */
3992					dst->max_win <<= dst->wscale &
3993					    PF_WSCALE_MASK;
3994					/* in case of a retrans SYN|ACK */
3995					dst->wscale = 0;
3996				}
3997			}
3998		}
3999		if (th->th_flags & TH_FIN)
4000			end++;
4001
4002		src->seqlo = seq;
4003		if (src->state < TCPS_SYN_SENT)
4004			src->state = TCPS_SYN_SENT;
4005
4006		/*
4007		 * May need to slide the window (seqhi may have been set by
4008		 * the crappy stack check or if we picked up the connection
4009		 * after establishment)
4010		 */
4011		if (src->seqhi == 1 ||
4012		    SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi))
4013			src->seqhi = end + MAX(1, dst->max_win << dws);
4014		if (win > src->max_win)
4015			src->max_win = win;
4016
4017	} else {
4018		ack = ntohl(th->th_ack) - dst->seqdiff;
4019		if (src->seqdiff) {
4020			/* Modulate sequence numbers */
4021			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
4022			    src->seqdiff), 0);
4023			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
4024			copyback = 1;
4025		}
4026		end = seq + pd->p_len;
4027		if (th->th_flags & TH_SYN)
4028			end++;
4029		if (th->th_flags & TH_FIN)
4030			end++;
4031	}
4032
4033	if ((th->th_flags & TH_ACK) == 0) {
4034		/* Let it pass through the ack skew check */
4035		ack = dst->seqlo;
4036	} else if ((ack == 0 &&
4037	    (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) ||
4038	    /* broken tcp stacks do not set ack */
4039	    (dst->state < TCPS_SYN_SENT)) {
4040		/*
4041		 * Many stacks (ours included) will set the ACK number in an
4042		 * FIN|ACK if the SYN times out -- no sequence to ACK.
4043		 */
4044		ack = dst->seqlo;
4045	}
4046
4047	if (seq == end) {
4048		/* Ease sequencing restrictions on no data packets */
4049		seq = src->seqlo;
4050		end = seq;
4051	}
4052
4053	ackskew = dst->seqlo - ack;
4054
4055
4056	/*
4057	 * Need to demodulate the sequence numbers in any TCP SACK options
4058	 * (Selective ACK). We could optionally validate the SACK values
4059	 * against the current ACK window, either forwards or backwards, but
4060	 * I'm not confident that SACK has been implemented properly
4061	 * everywhere. It wouldn't surprise me if several stacks accidently
4062	 * SACK too far backwards of previously ACKed data. There really aren't
4063	 * any security implications of bad SACKing unless the target stack
4064	 * doesn't validate the option length correctly. Someone trying to
4065	 * spoof into a TCP connection won't bother blindly sending SACK
4066	 * options anyway.
4067	 */
4068	if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) {
4069		if (pf_modulate_sack(m, off, pd, th, dst))
4070			copyback = 1;
4071	}
4072
4073
4074#define MAXACKWINDOW (0xffff + 1500)	/* 1500 is an arbitrary fudge factor */
4075	if (SEQ_GEQ(src->seqhi, end) &&
4076	    /* Last octet inside other's window space */
4077	    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) &&
4078	    /* Retrans: not more than one window back */
4079	    (ackskew >= -MAXACKWINDOW) &&
4080	    /* Acking not more than one reassembled fragment backwards */
4081	    (ackskew <= (MAXACKWINDOW << sws)) &&
4082	    /* Acking not more than one window forward */
4083	    ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo ||
4084	    (orig_seq == src->seqlo + 1) || (pd->flags & PFDESC_IP_REAS) == 0)) {
4085	    /* Require an exact/+1 sequence match on resets when possible */
4086
4087		if (dst->scrub || src->scrub) {
4088			if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
4089			    *state, src, dst, &copyback))
4090				return (PF_DROP);
4091		}
4092
4093		/* update max window */
4094		if (src->max_win < win)
4095			src->max_win = win;
4096		/* synchronize sequencing */
4097		if (SEQ_GT(end, src->seqlo))
4098			src->seqlo = end;
4099		/* slide the window of what the other end can send */
4100		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
4101			dst->seqhi = ack + MAX((win << sws), 1);
4102
4103
4104		/* update states */
4105		if (th->th_flags & TH_SYN)
4106			if (src->state < TCPS_SYN_SENT)
4107				src->state = TCPS_SYN_SENT;
4108		if (th->th_flags & TH_FIN)
4109			if (src->state < TCPS_CLOSING)
4110				src->state = TCPS_CLOSING;
4111		if (th->th_flags & TH_ACK) {
4112			if (dst->state == TCPS_SYN_SENT) {
4113				dst->state = TCPS_ESTABLISHED;
4114				if (src->state == TCPS_ESTABLISHED &&
4115				    (*state)->src_node != NULL &&
4116				    pf_src_connlimit(state)) {
4117					REASON_SET(reason, PFRES_SRCLIMIT);
4118					return (PF_DROP);
4119				}
4120			} else if (dst->state == TCPS_CLOSING)
4121				dst->state = TCPS_FIN_WAIT_2;
4122		}
4123		if (th->th_flags & TH_RST)
4124			src->state = dst->state = TCPS_TIME_WAIT;
4125
4126		/* update expire time */
4127		(*state)->expire = time_second;
4128		if (src->state >= TCPS_FIN_WAIT_2 &&
4129		    dst->state >= TCPS_FIN_WAIT_2)
4130			(*state)->timeout = PFTM_TCP_CLOSED;
4131		else if (src->state >= TCPS_CLOSING &&
4132		    dst->state >= TCPS_CLOSING)
4133			(*state)->timeout = PFTM_TCP_FIN_WAIT;
4134		else if (src->state < TCPS_ESTABLISHED ||
4135		    dst->state < TCPS_ESTABLISHED)
4136			(*state)->timeout = PFTM_TCP_OPENING;
4137		else if (src->state >= TCPS_CLOSING ||
4138		    dst->state >= TCPS_CLOSING)
4139			(*state)->timeout = PFTM_TCP_CLOSING;
4140		else
4141			(*state)->timeout = PFTM_TCP_ESTABLISHED;
4142
4143		/* Fall through to PASS packet */
4144
4145	} else if ((dst->state < TCPS_SYN_SENT ||
4146		dst->state >= TCPS_FIN_WAIT_2 ||
4147		src->state >= TCPS_FIN_WAIT_2) &&
4148	    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) &&
4149	    /* Within a window forward of the originating packet */
4150	    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
4151	    /* Within a window backward of the originating packet */
4152
4153		/*
4154		 * This currently handles three situations:
4155		 *  1) Stupid stacks will shotgun SYNs before their peer
4156		 *     replies.
4157		 *  2) When PF catches an already established stream (the
4158		 *     firewall rebooted, the state table was flushed, routes
4159		 *     changed...)
4160		 *  3) Packets get funky immediately after the connection
4161		 *     closes (this should catch Solaris spurious ACK|FINs
4162		 *     that web servers like to spew after a close)
4163		 *
4164		 * This must be a little more careful than the above code
4165		 * since packet floods will also be caught here. We don't
4166		 * update the TTL here to mitigate the damage of a packet
4167		 * flood and so the same code can handle awkward establishment
4168		 * and a loosened connection close.
4169		 * In the establishment case, a correct peer response will
4170		 * validate the connection, go through the normal state code
4171		 * and keep updating the state TTL.
4172		 */
4173
4174		if (pf_status.debug >= PF_DEBUG_MISC) {
4175			printf("pf: loose state match: ");
4176			pf_print_state(*state);
4177			pf_print_flags(th->th_flags);
4178			printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
4179			    "pkts=%llu:%llu\n", seq, orig_seq, ack, pd->p_len,
4180			    ackskew,
4181			    (unsigned long long int)(*state)->packets[0],
4182			    (unsigned long long int)(*state)->packets[1]);
4183		}
4184
4185		if (dst->scrub || src->scrub) {
4186			if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
4187			    *state, src, dst, &copyback))
4188				return (PF_DROP);
4189		}
4190
4191		/* update max window */
4192		if (src->max_win < win)
4193			src->max_win = win;
4194		/* synchronize sequencing */
4195		if (SEQ_GT(end, src->seqlo))
4196			src->seqlo = end;
4197		/* slide the window of what the other end can send */
4198		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
4199			dst->seqhi = ack + MAX((win << sws), 1);
4200
4201		/*
4202		 * Cannot set dst->seqhi here since this could be a shotgunned
4203		 * SYN and not an already established connection.
4204		 */
4205
4206		if (th->th_flags & TH_FIN)
4207			if (src->state < TCPS_CLOSING)
4208				src->state = TCPS_CLOSING;
4209		if (th->th_flags & TH_RST)
4210			src->state = dst->state = TCPS_TIME_WAIT;
4211
4212		/* Fall through to PASS packet */
4213
4214	} else {
4215		if ((*state)->dst.state == TCPS_SYN_SENT &&
4216		    (*state)->src.state == TCPS_SYN_SENT) {
4217			/* Send RST for state mismatches during handshake */
4218			if (!(th->th_flags & TH_RST))
4219				pf_send_tcp((*state)->rule.ptr, pd->af,
4220				    pd->dst, pd->src, th->th_dport,
4221				    th->th_sport, ntohl(th->th_ack), 0,
4222				    TH_RST, 0, 0,
4223				    (*state)->rule.ptr->return_ttl, 1, 0,
4224				    pd->eh, kif->pfik_ifp);
4225			src->seqlo = 0;
4226			src->seqhi = 1;
4227			src->max_win = 1;
4228		} else if (pf_status.debug >= PF_DEBUG_MISC) {
4229			printf("pf: BAD state: ");
4230			pf_print_state(*state);
4231			pf_print_flags(th->th_flags);
4232			printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
4233			    "pkts=%llu:%llu dir=%s,%s\n",
4234			    seq, orig_seq, ack, pd->p_len, ackskew,
4235			    (unsigned long long int)(*state)->packets[0],
4236			    (unsigned long long int)(*state)->packets[1],
4237			    direction == PF_IN ? "in" : "out",
4238			    direction == (*state)->state_key->direction ?
4239				"fwd" : "rev");
4240			printf("pf: State failure on: %c %c %c %c | %c %c\n",
4241			    SEQ_GEQ(src->seqhi, end) ? ' ' : '1',
4242			    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ?
4243			    ' ': '2',
4244			    (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
4245			    (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
4246			    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5',
4247			    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6');
4248		}
4249		REASON_SET(reason, PFRES_BADSTATE);
4250		return (PF_DROP);
4251	}
4252
4253	/* Any packets which have gotten here are to be passed */
4254
4255	/* translate source/destination address, if necessary */
4256	if (STATE_TRANSLATE((*state)->state_key)) {
4257		if (direction == PF_OUT)
4258			pf_change_ap(pd->src, &th->th_sport, pd->ip_sum,
4259			    &th->th_sum, &(*state)->state_key->gwy.addr,
4260			    (*state)->state_key->gwy.port, 0, pd->af);
4261		else
4262			pf_change_ap(pd->dst, &th->th_dport, pd->ip_sum,
4263			    &th->th_sum, &(*state)->state_key->lan.addr,
4264			    (*state)->state_key->lan.port, 0, pd->af);
4265		m_copyback(m, off, sizeof(*th), th);
4266	} else if (copyback) {
4267		/* Copyback sequence modulation or stateful scrub changes */
4268		m_copyback(m, off, sizeof(*th), th);
4269	}
4270
4271	return (PF_PASS);
4272}
4273
4274int
4275pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
4276    struct mbuf *m, int off, void *h, struct pf_pdesc *pd)
4277{
4278	struct pf_state_peer	*src, *dst;
4279	struct pf_state_key_cmp	 key;
4280	struct udphdr		*uh = pd->hdr.udp;
4281
4282	key.af = pd->af;
4283	key.proto = IPPROTO_UDP;
4284	if (direction == PF_IN)	{
4285		PF_ACPY(&key.ext.addr, pd->src, key.af);
4286		PF_ACPY(&key.gwy.addr, pd->dst, key.af);
4287		key.ext.port = uh->uh_sport;
4288		key.gwy.port = uh->uh_dport;
4289	} else {
4290		PF_ACPY(&key.lan.addr, pd->src, key.af);
4291		PF_ACPY(&key.ext.addr, pd->dst, key.af);
4292		key.lan.port = uh->uh_sport;
4293		key.ext.port = uh->uh_dport;
4294	}
4295
4296	STATE_LOOKUP();
4297
4298	if (direction == (*state)->state_key->direction) {
4299		src = &(*state)->src;
4300		dst = &(*state)->dst;
4301	} else {
4302		src = &(*state)->dst;
4303		dst = &(*state)->src;
4304	}
4305
4306	/* update states */
4307	if (src->state < PFUDPS_SINGLE)
4308		src->state = PFUDPS_SINGLE;
4309	if (dst->state == PFUDPS_SINGLE)
4310		dst->state = PFUDPS_MULTIPLE;
4311
4312	/* update expire time */
4313	(*state)->expire = time_second;
4314	if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE)
4315		(*state)->timeout = PFTM_UDP_MULTIPLE;
4316	else
4317		(*state)->timeout = PFTM_UDP_SINGLE;
4318
4319	/* translate source/destination address, if necessary */
4320	if (STATE_TRANSLATE((*state)->state_key)) {
4321		if (direction == PF_OUT)
4322			pf_change_ap(pd->src, &uh->uh_sport, pd->ip_sum,
4323			    &uh->uh_sum, &(*state)->state_key->gwy.addr,
4324			    (*state)->state_key->gwy.port, 1, pd->af);
4325		else
4326			pf_change_ap(pd->dst, &uh->uh_dport, pd->ip_sum,
4327			    &uh->uh_sum, &(*state)->state_key->lan.addr,
4328			    (*state)->state_key->lan.port, 1, pd->af);
4329		m_copyback(m, off, sizeof(*uh), uh);
4330	}
4331
4332	return (PF_PASS);
4333}
4334
4335int
4336pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
4337    struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
4338    u_short *reason)
4339{
4340	struct pf_addr	*saddr = pd->src, *daddr = pd->dst;
4341	u_int16_t	 icmpid = 0, *icmpsum;
4342	u_int8_t	 icmptype;
4343	int		 state_icmp = 0;
4344	struct pf_state_key_cmp key;
4345
4346	icmpsum = NULL;	/* XXXGCC -Wuninitialized m68k */
4347	icmptype = 0;	/* XXXGCC -Wuninitialized m68k */
4348
4349	switch (pd->proto) {
4350#ifdef INET
4351	case IPPROTO_ICMP:
4352		icmptype = pd->hdr.icmp->icmp_type;
4353		icmpid = pd->hdr.icmp->icmp_id;
4354		icmpsum = &pd->hdr.icmp->icmp_cksum;
4355
4356		if (icmptype == ICMP_UNREACH ||
4357		    icmptype == ICMP_SOURCEQUENCH ||
4358		    icmptype == ICMP_REDIRECT ||
4359		    icmptype == ICMP_TIMXCEED ||
4360		    icmptype == ICMP_PARAMPROB)
4361			state_icmp++;
4362		break;
4363#endif /* INET */
4364#ifdef INET6
4365	case IPPROTO_ICMPV6:
4366		icmptype = pd->hdr.icmp6->icmp6_type;
4367		icmpid = pd->hdr.icmp6->icmp6_id;
4368		icmpsum = &pd->hdr.icmp6->icmp6_cksum;
4369
4370		if (icmptype == ICMP6_DST_UNREACH ||
4371		    icmptype == ICMP6_PACKET_TOO_BIG ||
4372		    icmptype == ICMP6_TIME_EXCEEDED ||
4373		    icmptype == ICMP6_PARAM_PROB)
4374			state_icmp++;
4375		break;
4376#endif /* INET6 */
4377	}
4378
4379	if (!state_icmp) {
4380
4381		/*
4382		 * ICMP query/reply message not related to a TCP/UDP packet.
4383		 * Search for an ICMP state.
4384		 */
4385		key.af = pd->af;
4386		key.proto = pd->proto;
4387		if (direction == PF_IN)	{
4388			PF_ACPY(&key.ext.addr, pd->src, key.af);
4389			PF_ACPY(&key.gwy.addr, pd->dst, key.af);
4390			key.ext.port = 0;
4391			key.gwy.port = icmpid;
4392		} else {
4393			PF_ACPY(&key.lan.addr, pd->src, key.af);
4394			PF_ACPY(&key.ext.addr, pd->dst, key.af);
4395			key.lan.port = icmpid;
4396			key.ext.port = 0;
4397		}
4398
4399		STATE_LOOKUP();
4400
4401		(*state)->expire = time_second;
4402		(*state)->timeout = PFTM_ICMP_ERROR_REPLY;
4403
4404		/* translate source/destination address, if necessary */
4405		if (STATE_TRANSLATE((*state)->state_key)) {
4406			if (direction == PF_OUT) {
4407				switch (pd->af) {
4408#ifdef INET
4409				case AF_INET:
4410					pf_change_a(&saddr->v4.s_addr,
4411					    pd->ip_sum,
4412					    (*state)->state_key->gwy.addr.v4.s_addr, 0);
4413					pd->hdr.icmp->icmp_cksum =
4414					    pf_cksum_fixup(
4415					    pd->hdr.icmp->icmp_cksum, icmpid,
4416					    (*state)->state_key->gwy.port, 0);
4417					pd->hdr.icmp->icmp_id =
4418					    (*state)->state_key->gwy.port;
4419					m_copyback(m, off, ICMP_MINLEN,
4420					    pd->hdr.icmp);
4421					break;
4422#endif /* INET */
4423#ifdef INET6
4424				case AF_INET6:
4425					pf_change_a6(saddr,
4426					    &pd->hdr.icmp6->icmp6_cksum,
4427					    &(*state)->state_key->gwy.addr, 0);
4428					m_copyback(m, off,
4429					    sizeof(struct icmp6_hdr),
4430					    pd->hdr.icmp6);
4431					break;
4432#endif /* INET6 */
4433				}
4434			} else {
4435				switch (pd->af) {
4436#ifdef INET
4437				case AF_INET:
4438					pf_change_a(&daddr->v4.s_addr,
4439					    pd->ip_sum,
4440					    (*state)->state_key->lan.addr.v4.s_addr, 0);
4441					pd->hdr.icmp->icmp_cksum =
4442					    pf_cksum_fixup(
4443					    pd->hdr.icmp->icmp_cksum, icmpid,
4444					    (*state)->state_key->lan.port, 0);
4445					pd->hdr.icmp->icmp_id =
4446					    (*state)->state_key->lan.port;
4447					m_copyback(m, off, ICMP_MINLEN,
4448					    pd->hdr.icmp);
4449					break;
4450#endif /* INET */
4451#ifdef INET6
4452				case AF_INET6:
4453					pf_change_a6(daddr,
4454					    &pd->hdr.icmp6->icmp6_cksum,
4455					    &(*state)->state_key->lan.addr, 0);
4456					m_copyback(m, off,
4457					    sizeof(struct icmp6_hdr),
4458					    pd->hdr.icmp6);
4459					break;
4460#endif /* INET6 */
4461				}
4462			}
4463		}
4464
4465		return (PF_PASS);
4466
4467	} else {
4468		/*
4469		 * ICMP error message in response to a TCP/UDP packet.
4470		 * Extract the inner TCP/UDP header and search for that state.
4471		 */
4472
4473		struct pf_pdesc	pd2;
4474#ifdef INET
4475		struct ip	h2;
4476#endif /* INET */
4477#ifdef INET6
4478		struct ip6_hdr	h2_6;
4479		int		terminal = 0;
4480#endif /* INET6 */
4481		int		ipoff2 = 0;
4482		int		off2 = 0;
4483
4484		memset(&pd2, 0, sizeof pd2);	/* XXX gcc */
4485
4486		pd2.af = pd->af;
4487		switch (pd->af) {
4488#ifdef INET
4489		case AF_INET:
4490			/* offset of h2 in mbuf chain */
4491			ipoff2 = off + ICMP_MINLEN;
4492
4493			if (!pf_pull_hdr(m, ipoff2, &h2, sizeof(h2),
4494			    NULL, reason, pd2.af)) {
4495				DPFPRINTF(PF_DEBUG_MISC,
4496				    ("pf: ICMP error message too short "
4497				    "(ip)\n"));
4498				return (PF_DROP);
4499			}
4500			/*
4501			 * ICMP error messages don't refer to non-first
4502			 * fragments
4503			 */
4504			if (h2.ip_off & htons(IP_OFFMASK)) {
4505				REASON_SET(reason, PFRES_FRAG);
4506				return (PF_DROP);
4507			}
4508
4509			/* offset of protocol header that follows h2 */
4510			off2 = ipoff2 + (h2.ip_hl << 2);
4511
4512			pd2.proto = h2.ip_p;
4513			pd2.src = (struct pf_addr *)&h2.ip_src;
4514			pd2.dst = (struct pf_addr *)&h2.ip_dst;
4515			pd2.ip_sum = &h2.ip_sum;
4516			break;
4517#endif /* INET */
4518#ifdef INET6
4519		case AF_INET6:
4520			ipoff2 = off + sizeof(struct icmp6_hdr);
4521
4522			if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof(h2_6),
4523			    NULL, reason, pd2.af)) {
4524				DPFPRINTF(PF_DEBUG_MISC,
4525				    ("pf: ICMP error message too short "
4526				    "(ip6)\n"));
4527				return (PF_DROP);
4528			}
4529			pd2.proto = h2_6.ip6_nxt;
4530			pd2.src = (struct pf_addr *)&h2_6.ip6_src;
4531			pd2.dst = (struct pf_addr *)&h2_6.ip6_dst;
4532			pd2.ip_sum = NULL;
4533			off2 = ipoff2 + sizeof(h2_6);
4534			do {
4535				switch (pd2.proto) {
4536				case IPPROTO_FRAGMENT:
4537					/*
4538					 * ICMPv6 error messages for
4539					 * non-first fragments
4540					 */
4541					REASON_SET(reason, PFRES_FRAG);
4542					return (PF_DROP);
4543				case IPPROTO_AH:
4544				case IPPROTO_HOPOPTS:
4545				case IPPROTO_ROUTING:
4546				case IPPROTO_DSTOPTS: {
4547					/* get next header and header length */
4548					struct ip6_ext opt6;
4549
4550					if (!pf_pull_hdr(m, off2, &opt6,
4551					    sizeof(opt6), NULL, reason,
4552					    pd2.af)) {
4553						DPFPRINTF(PF_DEBUG_MISC,
4554						    ("pf: ICMPv6 short opt\n"));
4555						return (PF_DROP);
4556					}
4557					if (pd2.proto == IPPROTO_AH)
4558						off2 += (opt6.ip6e_len + 2) * 4;
4559					else
4560						off2 += (opt6.ip6e_len + 1) * 8;
4561					pd2.proto = opt6.ip6e_nxt;
4562					/* goto the next header */
4563					break;
4564				}
4565				default:
4566					terminal++;
4567					break;
4568				}
4569			} while (!terminal);
4570			break;
4571#endif /* INET6 */
4572		}
4573
4574		switch (pd2.proto) {
4575		case IPPROTO_TCP: {
4576			struct tcphdr		 th;
4577			u_int32_t		 seq;
4578			struct pf_state_peer	*src, *dst;
4579			u_int8_t		 dws;
4580			int			 copyback = 0;
4581
4582			/*
4583			 * Only the first 8 bytes of the TCP header can be
4584			 * expected. Don't access any TCP header fields after
4585			 * th_seq, an ackskew test is not possible.
4586			 */
4587			if (!pf_pull_hdr(m, off2, &th, 8, NULL, reason,
4588			    pd2.af)) {
4589				DPFPRINTF(PF_DEBUG_MISC,
4590				    ("pf: ICMP error message too short "
4591				    "(tcp)\n"));
4592				return (PF_DROP);
4593			}
4594
4595			key.af = pd2.af;
4596			key.proto = IPPROTO_TCP;
4597			if (direction == PF_IN)	{
4598				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
4599				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
4600				key.ext.port = th.th_dport;
4601				key.gwy.port = th.th_sport;
4602			} else {
4603				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
4604				PF_ACPY(&key.ext.addr, pd2.src, key.af);
4605				key.lan.port = th.th_dport;
4606				key.ext.port = th.th_sport;
4607			}
4608
4609			STATE_LOOKUP();
4610
4611			if (direction == (*state)->state_key->direction) {
4612				src = &(*state)->dst;
4613				dst = &(*state)->src;
4614			} else {
4615				src = &(*state)->src;
4616				dst = &(*state)->dst;
4617			}
4618
4619			if (src->wscale && dst->wscale)
4620				dws = dst->wscale & PF_WSCALE_MASK;
4621			else
4622				dws = 0;
4623
4624			/* Demodulate sequence number */
4625			seq = ntohl(th.th_seq) - src->seqdiff;
4626			if (src->seqdiff) {
4627				pf_change_a(&th.th_seq, icmpsum,
4628				    htonl(seq), 0);
4629				copyback = 1;
4630			}
4631
4632			if (!SEQ_GEQ(src->seqhi, seq) ||
4633			    !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws))) {
4634				if (pf_status.debug >= PF_DEBUG_MISC) {
4635					printf("pf: BAD ICMP %d:%d ",
4636					    icmptype, pd->hdr.icmp->icmp_code);
4637					pf_print_host(pd->src, 0, pd->af);
4638					printf(" -> ");
4639					pf_print_host(pd->dst, 0, pd->af);
4640					printf(" state: ");
4641					pf_print_state(*state);
4642					printf(" seq=%u\n", seq);
4643				}
4644				REASON_SET(reason, PFRES_BADSTATE);
4645				return (PF_DROP);
4646			}
4647
4648			if (STATE_TRANSLATE((*state)->state_key)) {
4649				if (direction == PF_IN) {
4650					pf_change_icmp(pd2.src, &th.th_sport,
4651					    daddr, &(*state)->state_key->lan.addr,
4652					    (*state)->state_key->lan.port, NULL,
4653					    pd2.ip_sum, icmpsum,
4654					    pd->ip_sum, 0, pd2.af);
4655				} else {
4656					pf_change_icmp(pd2.dst, &th.th_dport,
4657					    saddr, &(*state)->state_key->gwy.addr,
4658					    (*state)->state_key->gwy.port, NULL,
4659					    pd2.ip_sum, icmpsum,
4660					    pd->ip_sum, 0, pd2.af);
4661				}
4662				copyback = 1;
4663			}
4664
4665			if (copyback) {
4666				switch (pd2.af) {
4667#ifdef INET
4668				case AF_INET:
4669					m_copyback(m, off, ICMP_MINLEN,
4670					    pd->hdr.icmp);
4671					m_copyback(m, ipoff2, sizeof(h2),
4672					    &h2);
4673					break;
4674#endif /* INET */
4675#ifdef INET6
4676				case AF_INET6:
4677					m_copyback(m, off,
4678					    sizeof(struct icmp6_hdr),
4679					    pd->hdr.icmp6);
4680					m_copyback(m, ipoff2, sizeof(h2_6),
4681					    &h2_6);
4682					break;
4683#endif /* INET6 */
4684				}
4685				m_copyback(m, off2, 8, &th);
4686			}
4687
4688			return (PF_PASS);
4689			break;
4690		}
4691		case IPPROTO_UDP: {
4692			struct udphdr		uh;
4693
4694			if (!pf_pull_hdr(m, off2, &uh, sizeof(uh),
4695			    NULL, reason, pd2.af)) {
4696				DPFPRINTF(PF_DEBUG_MISC,
4697				    ("pf: ICMP error message too short "
4698				    "(udp)\n"));
4699				return (PF_DROP);
4700			}
4701
4702			key.af = pd2.af;
4703			key.proto = IPPROTO_UDP;
4704			if (direction == PF_IN)	{
4705				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
4706				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
4707				key.ext.port = uh.uh_dport;
4708				key.gwy.port = uh.uh_sport;
4709			} else {
4710				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
4711				PF_ACPY(&key.ext.addr, pd2.src, key.af);
4712				key.lan.port = uh.uh_dport;
4713				key.ext.port = uh.uh_sport;
4714			}
4715
4716			STATE_LOOKUP();
4717
4718			if (STATE_TRANSLATE((*state)->state_key)) {
4719				if (direction == PF_IN) {
4720					pf_change_icmp(pd2.src, &uh.uh_sport,
4721					    daddr,
4722					    &(*state)->state_key->lan.addr,
4723					    (*state)->state_key->lan.port,
4724					    &uh.uh_sum,
4725					    pd2.ip_sum, icmpsum,
4726					    pd->ip_sum, 1, pd2.af);
4727				} else {
4728					pf_change_icmp(pd2.dst, &uh.uh_dport,
4729					    saddr,
4730					    &(*state)->state_key->gwy.addr,
4731					    (*state)->state_key->gwy.port, &uh.uh_sum,
4732					    pd2.ip_sum, icmpsum,
4733					    pd->ip_sum, 1, pd2.af);
4734				}
4735				switch (pd2.af) {
4736#ifdef INET
4737				case AF_INET:
4738					m_copyback(m, off, ICMP_MINLEN,
4739					    pd->hdr.icmp);
4740					m_copyback(m, ipoff2, sizeof(h2), &h2);
4741					break;
4742#endif /* INET */
4743#ifdef INET6
4744				case AF_INET6:
4745					m_copyback(m, off,
4746					    sizeof(struct icmp6_hdr),
4747					    pd->hdr.icmp6);
4748					m_copyback(m, ipoff2, sizeof(h2_6),
4749					    &h2_6);
4750					break;
4751#endif /* INET6 */
4752				}
4753				m_copyback(m, off2, sizeof(uh), &uh);
4754			}
4755
4756			return (PF_PASS);
4757			break;
4758		}
4759#ifdef INET
4760		case IPPROTO_ICMP: {
4761			struct icmp		iih;
4762
4763			if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN,
4764			    NULL, reason, pd2.af)) {
4765				DPFPRINTF(PF_DEBUG_MISC,
4766				    ("pf: ICMP error message too short i"
4767				    "(icmp)\n"));
4768				return (PF_DROP);
4769			}
4770
4771			key.af = pd2.af;
4772			key.proto = IPPROTO_ICMP;
4773			if (direction == PF_IN)	{
4774				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
4775				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
4776				key.ext.port = 0;
4777				key.gwy.port = iih.icmp_id;
4778			} else {
4779				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
4780				PF_ACPY(&key.ext.addr, pd2.src, key.af);
4781				key.lan.port = iih.icmp_id;
4782				key.ext.port = 0;
4783			}
4784
4785			STATE_LOOKUP();
4786
4787			if (STATE_TRANSLATE((*state)->state_key)) {
4788				if (direction == PF_IN) {
4789					pf_change_icmp(pd2.src, &iih.icmp_id,
4790					    daddr,
4791					    &(*state)->state_key->lan.addr,
4792					    (*state)->state_key->lan.port, NULL,
4793					    pd2.ip_sum, icmpsum,
4794					    pd->ip_sum, 0, AF_INET);
4795				} else {
4796					pf_change_icmp(pd2.dst, &iih.icmp_id,
4797					    saddr,
4798					    &(*state)->state_key->gwy.addr,
4799					    (*state)->state_key->gwy.port, NULL,
4800					    pd2.ip_sum, icmpsum,
4801					    pd->ip_sum, 0, AF_INET);
4802				}
4803				m_copyback(m, off, ICMP_MINLEN, pd->hdr.icmp);
4804				m_copyback(m, ipoff2, sizeof(h2), &h2);
4805				m_copyback(m, off2, ICMP_MINLEN, &iih);
4806			}
4807
4808			return (PF_PASS);
4809			break;
4810		}
4811#endif /* INET */
4812#ifdef INET6
4813		case IPPROTO_ICMPV6: {
4814			struct icmp6_hdr	iih;
4815
4816			if (!pf_pull_hdr(m, off2, &iih,
4817			    sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) {
4818				DPFPRINTF(PF_DEBUG_MISC,
4819				    ("pf: ICMP error message too short "
4820				    "(icmp6)\n"));
4821				return (PF_DROP);
4822			}
4823
4824			key.af = pd2.af;
4825			key.proto = IPPROTO_ICMPV6;
4826			if (direction == PF_IN)	{
4827				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
4828				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
4829				key.ext.port = 0;
4830				key.gwy.port = iih.icmp6_id;
4831			} else {
4832				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
4833				PF_ACPY(&key.ext.addr, pd2.src, key.af);
4834				key.lan.port = iih.icmp6_id;
4835				key.ext.port = 0;
4836			}
4837
4838			STATE_LOOKUP();
4839
4840			if (STATE_TRANSLATE((*state)->state_key)) {
4841				if (direction == PF_IN) {
4842					pf_change_icmp(pd2.src, &iih.icmp6_id,
4843					    daddr,
4844					    &(*state)->state_key->lan.addr,
4845					    (*state)->state_key->lan.port, NULL,
4846					    pd2.ip_sum, icmpsum,
4847					    pd->ip_sum, 0, AF_INET6);
4848				} else {
4849					pf_change_icmp(pd2.dst, &iih.icmp6_id,
4850					    saddr, &(*state)->state_key->gwy.addr,
4851					    (*state)->state_key->gwy.port, NULL,
4852					    pd2.ip_sum, icmpsum,
4853					    pd->ip_sum, 0, AF_INET6);
4854				}
4855				m_copyback(m, off, sizeof(struct icmp6_hdr),
4856				    pd->hdr.icmp6);
4857				m_copyback(m, ipoff2, sizeof(h2_6), &h2_6);
4858				m_copyback(m, off2, sizeof(struct icmp6_hdr),
4859				    &iih);
4860			}
4861
4862			return (PF_PASS);
4863			break;
4864		}
4865#endif /* INET6 */
4866		default: {
4867			key.af = pd2.af;
4868			key.proto = pd2.proto;
4869			if (direction == PF_IN)	{
4870				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
4871				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
4872				key.ext.port = 0;
4873				key.gwy.port = 0;
4874			} else {
4875				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
4876				PF_ACPY(&key.ext.addr, pd2.src, key.af);
4877				key.lan.port = 0;
4878				key.ext.port = 0;
4879			}
4880
4881			STATE_LOOKUP();
4882
4883			if (STATE_TRANSLATE((*state)->state_key)) {
4884				if (direction == PF_IN) {
4885					pf_change_icmp(pd2.src, NULL,
4886					    daddr,
4887					    &(*state)->state_key->lan.addr,
4888					    0, NULL,
4889					    pd2.ip_sum, icmpsum,
4890					    pd->ip_sum, 0, pd2.af);
4891				} else {
4892					pf_change_icmp(pd2.dst, NULL,
4893					    saddr,
4894					    &(*state)->state_key->gwy.addr,
4895					    0, NULL,
4896					    pd2.ip_sum, icmpsum,
4897					    pd->ip_sum, 0, pd2.af);
4898				}
4899				switch (pd2.af) {
4900#ifdef INET
4901				case AF_INET:
4902					m_copyback(m, off, ICMP_MINLEN,
4903					    pd->hdr.icmp);
4904					m_copyback(m, ipoff2, sizeof(h2), &h2);
4905					break;
4906#endif /* INET */
4907#ifdef INET6
4908				case AF_INET6:
4909					m_copyback(m, off,
4910					    sizeof(struct icmp6_hdr),
4911					    pd->hdr.icmp6);
4912					m_copyback(m, ipoff2, sizeof(h2_6),
4913					    &h2_6);
4914					break;
4915#endif /* INET6 */
4916				}
4917			}
4918
4919			return (PF_PASS);
4920			break;
4921		}
4922		}
4923	}
4924}
4925
4926int
4927pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif,
4928    struct pf_pdesc *pd)
4929{
4930	struct pf_state_peer	*src, *dst;
4931	struct pf_state_key_cmp	 key;
4932
4933	key.af = pd->af;
4934	key.proto = pd->proto;
4935	if (direction == PF_IN)	{
4936		PF_ACPY(&key.ext.addr, pd->src, key.af);
4937		PF_ACPY(&key.gwy.addr, pd->dst, key.af);
4938		key.ext.port = 0;
4939		key.gwy.port = 0;
4940	} else {
4941		PF_ACPY(&key.lan.addr, pd->src, key.af);
4942		PF_ACPY(&key.ext.addr, pd->dst, key.af);
4943		key.lan.port = 0;
4944		key.ext.port = 0;
4945	}
4946
4947	STATE_LOOKUP();
4948
4949	if (direction == (*state)->state_key->direction) {
4950		src = &(*state)->src;
4951		dst = &(*state)->dst;
4952	} else {
4953		src = &(*state)->dst;
4954		dst = &(*state)->src;
4955	}
4956
4957	/* update states */
4958	if (src->state < PFOTHERS_SINGLE)
4959		src->state = PFOTHERS_SINGLE;
4960	if (dst->state == PFOTHERS_SINGLE)
4961		dst->state = PFOTHERS_MULTIPLE;
4962
4963	/* update expire time */
4964	(*state)->expire = time_second;
4965	if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE)
4966		(*state)->timeout = PFTM_OTHER_MULTIPLE;
4967	else
4968		(*state)->timeout = PFTM_OTHER_SINGLE;
4969
4970	/* translate source/destination address, if necessary */
4971	if (STATE_TRANSLATE((*state)->state_key)) {
4972		if (direction == PF_OUT)
4973			switch (pd->af) {
4974#ifdef INET
4975			case AF_INET:
4976				pf_change_a(&pd->src->v4.s_addr,
4977				    pd->ip_sum,
4978				    (*state)->state_key->gwy.addr.v4.s_addr,
4979				    0);
4980				break;
4981#endif /* INET */
4982#ifdef INET6
4983			case AF_INET6:
4984				PF_ACPY(pd->src,
4985				    &(*state)->state_key->gwy.addr, pd->af);
4986				break;
4987#endif /* INET6 */
4988			}
4989		else
4990			switch (pd->af) {
4991#ifdef INET
4992			case AF_INET:
4993				pf_change_a(&pd->dst->v4.s_addr,
4994				    pd->ip_sum,
4995				    (*state)->state_key->lan.addr.v4.s_addr,
4996				    0);
4997				break;
4998#endif /* INET */
4999#ifdef INET6
5000			case AF_INET6:
5001				PF_ACPY(pd->dst,
5002				    &(*state)->state_key->lan.addr, pd->af);
5003				break;
5004#endif /* INET6 */
5005			}
5006	}
5007
5008	return (PF_PASS);
5009}
5010
5011/*
5012 * ipoff and off are measured from the start of the mbuf chain.
5013 * h must be at "ipoff" on the mbuf chain.
5014 */
5015void *
5016pf_pull_hdr(struct mbuf *m, int off, void *p, int len,
5017    u_short *actionp, u_short *reasonp, sa_family_t af)
5018{
5019	switch (af) {
5020#ifdef INET
5021	case AF_INET: {
5022		struct ip	*h = mtod(m, struct ip *);
5023		u_int16_t	 fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
5024
5025		if (fragoff) {
5026			if (fragoff >= len)
5027				ACTION_SET(actionp, PF_PASS);
5028			else {
5029				ACTION_SET(actionp, PF_DROP);
5030				REASON_SET(reasonp, PFRES_FRAG);
5031			}
5032			return (NULL);
5033		}
5034		if (m->m_pkthdr.len < off + len ||
5035		    ntohs(h->ip_len) < off + len) {
5036			ACTION_SET(actionp, PF_DROP);
5037			REASON_SET(reasonp, PFRES_SHORT);
5038			return (NULL);
5039		}
5040		break;
5041	}
5042#endif /* INET */
5043#ifdef INET6
5044	case AF_INET6: {
5045		struct ip6_hdr	*h = mtod(m, struct ip6_hdr *);
5046
5047		if (m->m_pkthdr.len < off + len ||
5048		    (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) <
5049		    (unsigned)(off + len)) {
5050			ACTION_SET(actionp, PF_DROP);
5051			REASON_SET(reasonp, PFRES_SHORT);
5052			return (NULL);
5053		}
5054		break;
5055	}
5056#endif /* INET6 */
5057	}
5058	m_copydata(m, off, len, p);
5059	return (p);
5060}
5061
5062int
5063pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif)
5064{
5065#ifdef __NetBSD__
5066	union {
5067		struct sockaddr		dst;
5068		struct sockaddr_in	dst4;
5069		struct sockaddr_in6	dst6;
5070	} u;
5071	struct route		 ro;
5072	int			 ret = 1;
5073
5074	bzero(&ro, sizeof(ro));
5075	switch (af) {
5076	case AF_INET:
5077		sockaddr_in_init(&u.dst4, &addr->v4, 0);
5078		break;
5079#ifdef INET6
5080	case AF_INET6:
5081		sockaddr_in6_init(&u.dst6, &addr->v6, 0, 0, 0);
5082		break;
5083#endif /* INET6 */
5084	default:
5085		return (0);
5086	}
5087	rtcache_setdst(&ro, &u.dst);
5088
5089	ret = rtcache_init(&ro) != NULL ? 1 : 0;
5090	rtcache_free(&ro);
5091
5092	return (ret);
5093#else /* !__NetBSD__ */
5094	struct sockaddr_in	*dst;
5095	int			 ret = 1;
5096	int			 check_mpath;
5097	extern int		 ipmultipath;
5098#ifdef INET6
5099	extern int		 ip6_multipath;
5100	struct sockaddr_in6	*dst6;
5101	struct route_in6	 ro;
5102#else
5103	struct route		 ro;
5104#endif
5105	struct radix_node	*rn;
5106	struct rtentry		*rt;
5107	struct ifnet		*ifp;
5108
5109	check_mpath = 0;
5110	bzero(&ro, sizeof(ro));
5111	switch (af) {
5112	case AF_INET:
5113		dst = satosin(&ro.ro_dst);
5114		dst->sin_family = AF_INET;
5115		dst->sin_len = sizeof(*dst);
5116		dst->sin_addr = addr->v4;
5117		if (ipmultipath)
5118			check_mpath = 1;
5119		break;
5120#ifdef INET6
5121	case AF_INET6:
5122		dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
5123		dst6->sin6_family = AF_INET6;
5124		dst6->sin6_len = sizeof(*dst6);
5125		dst6->sin6_addr = addr->v6;
5126		if (ip6_multipath)
5127			check_mpath = 1;
5128		break;
5129#endif /* INET6 */
5130	default:
5131		return (0);
5132	}
5133
5134	/* Skip checks for ipsec interfaces */
5135	if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC)
5136		goto out;
5137
5138	rtalloc_noclone((struct route *)&ro, NO_CLONING);
5139
5140	if (ro.ro_rt != NULL) {
5141		/* No interface given, this is a no-route check */
5142		if (kif == NULL)
5143			goto out;
5144
5145		if (kif->pfik_ifp == NULL) {
5146			ret = 0;
5147			goto out;
5148		}
5149
5150		/* Perform uRPF check if passed input interface */
5151		ret = 0;
5152		rn = (struct radix_node *)ro.ro_rt;
5153		do {
5154			rt = (struct rtentry *)rn;
5155			if (rt->rt_ifp->if_type == IFT_CARP)
5156				ifp = rt->rt_ifp->if_carpdev;
5157			else
5158				ifp = rt->rt_ifp;
5159
5160			if (kif->pfik_ifp == ifp)
5161				ret = 1;
5162			rn = rn_mpath_next(rn);
5163		} while (check_mpath == 1 && rn != NULL && ret == 0);
5164	} else
5165		ret = 0;
5166out:
5167	if (ro.ro_rt != NULL)
5168		RTFREE(ro.ro_rt);
5169	return (ret);
5170#endif /* !__NetBSD__ */
5171}
5172
5173int
5174pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw)
5175{
5176#ifdef __NetBSD__
5177	/* NetBSD doesn't have route labels. */
5178
5179	return (0);
5180#else
5181	struct sockaddr_in	*dst;
5182#ifdef INET6
5183	struct sockaddr_in6	*dst6;
5184	struct route_in6	 ro;
5185#else
5186	struct route		 ro;
5187#endif
5188	int			 ret = 0;
5189
5190	bzero(&ro, sizeof(ro));
5191	switch (af) {
5192	case AF_INET:
5193		dst = satosin(&ro.ro_dst);
5194		dst->sin_family = AF_INET;
5195		dst->sin_len = sizeof(*dst);
5196		dst->sin_addr = addr->v4;
5197		break;
5198#ifdef INET6
5199	case AF_INET6:
5200		dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
5201		dst6->sin6_family = AF_INET6;
5202		dst6->sin6_len = sizeof(*dst6);
5203		dst6->sin6_addr = addr->v6;
5204		break;
5205#endif /* INET6 */
5206	default:
5207		return (0);
5208	}
5209
5210	rtalloc_noclone((struct route *)&ro, NO_CLONING);
5211
5212	if (ro.ro_rt != NULL) {
5213		if (ro.ro_rt->rt_labelid == aw->v.rtlabel)
5214			ret = 1;
5215		RTFREE(ro.ro_rt);
5216	}
5217
5218	return (ret);
5219#endif /* !__NetBSD__ */
5220}
5221
5222#ifdef INET
5223void
5224pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
5225    struct pf_state *s, struct pf_pdesc *pd)
5226{
5227	struct mbuf		*m0, *m1;
5228	struct route		 iproute;
5229	struct route		*ro = NULL;
5230	const struct sockaddr	*dst;
5231	union {
5232		struct sockaddr		dst;
5233		struct sockaddr_in	dst4;
5234	} u;
5235	struct ip		*ip;
5236	struct ifnet		*ifp = NULL;
5237	struct pf_addr		 naddr;
5238	struct pf_src_node	*sn = NULL;
5239	int			 error = 0;
5240#ifdef KAME_IPSEC
5241	struct m_tag		*mtag;
5242#endif /* KAME_IPSEC */
5243#ifdef __NetBSD__
5244	struct pf_mtag		*pf_mtag;
5245#endif /* __NetBSD__ */
5246
5247	if (m == NULL || *m == NULL || r == NULL ||
5248	    (dir != PF_IN && dir != PF_OUT) || oifp == NULL)
5249		panic("pf_route: invalid parameters");
5250
5251#ifdef __NetBSD__
5252	if ((pf_mtag = pf_get_mtag(*m)) == NULL) {
5253		m0 = *m;
5254		*m = NULL;
5255		goto bad;
5256	}
5257	if (pf_mtag->routed++ > 3) {
5258		m0 = *m;
5259		*m = NULL;
5260		goto bad;
5261	}
5262#else
5263	if ((*m)->m_pkthdr.pf.routed++ > 3) {
5264		m0 = *m;
5265		*m = NULL;
5266		goto bad;
5267	}
5268#endif /* !__NetBSD__ */
5269
5270	if (r->rt == PF_DUPTO) {
5271		if ((m0 = m_dup(*m, 0, M_COPYALL, M_NOWAIT)) == NULL)
5272			return;
5273	} else {
5274		if ((r->rt == PF_REPLYTO) == (r->direction == dir))
5275			return;
5276		m0 = *m;
5277	}
5278
5279	if (m0->m_len < sizeof(struct ip)) {
5280		DPFPRINTF(PF_DEBUG_URGENT,
5281		    ("pf_route: m0->m_len < sizeof(struct ip)\n"));
5282		goto bad;
5283	}
5284
5285	ip = mtod(m0, struct ip *);
5286
5287	ro = &iproute;
5288	memset(ro, 0, sizeof(*ro));
5289	sockaddr_in_init(&u.dst4, &ip->ip_dst, 0);
5290	dst = &u.dst;
5291	rtcache_setdst(ro, dst);
5292
5293	if (r->rt == PF_FASTROUTE) {
5294		struct rtentry *rt;
5295
5296		rt = rtcache_init(ro);
5297
5298		if (rt == NULL) {
5299			ip_statinc(IP_STAT_NOROUTE);
5300			goto bad;
5301		}
5302
5303		ifp = rt->rt_ifp;
5304		rt->rt_use++;
5305
5306		if (rt->rt_flags & RTF_GATEWAY)
5307			dst = rt->rt_gateway;
5308	} else {
5309		if (TAILQ_EMPTY(&r->rpool.list)) {
5310			DPFPRINTF(PF_DEBUG_URGENT,
5311			    ("pf_route: TAILQ_EMPTY(&r->rpool.list)\n"));
5312			goto bad;
5313		}
5314		if (s == NULL) {
5315			pf_map_addr(AF_INET, r,
5316			    (const struct pf_addr *)&ip->ip_src,
5317			    &naddr, NULL, &sn);
5318			if (!PF_AZERO(&naddr, AF_INET))
5319				u.dst4.sin_addr.s_addr = naddr.v4.s_addr;
5320			ifp = r->rpool.cur->kif ?
5321			    r->rpool.cur->kif->pfik_ifp : NULL;
5322		} else {
5323			if (!PF_AZERO(&s->rt_addr, AF_INET))
5324				u.dst4.sin_addr.s_addr = s->rt_addr.v4.s_addr;
5325			ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
5326		}
5327	}
5328	if (ifp == NULL)
5329		goto bad;
5330
5331	if (oifp != ifp) {
5332		if (pf_test(PF_OUT, ifp, &m0, NULL) != PF_PASS)
5333			goto bad;
5334		else if (m0 == NULL)
5335			goto done;
5336		if (m0->m_len < sizeof(struct ip)) {
5337			DPFPRINTF(PF_DEBUG_URGENT,
5338			    ("pf_route: m0->m_len < sizeof(struct ip)\n"));
5339			goto bad;
5340		}
5341		ip = mtod(m0, struct ip *);
5342	}
5343
5344	/* Copied from ip_output. */
5345#ifdef KAME_IPSEC
5346	/*
5347	 * If deferred crypto processing is needed, check that the
5348	 * interface supports it.
5349	 */
5350	if ((mtag = m_tag_find(m0, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL))
5351	    != NULL && (ifp->if_capabilities & IFCAP_IPSEC) == 0) {
5352		/* Notify IPsec to do its own crypto. */
5353		ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1));
5354		goto bad;
5355	}
5356#endif /* KAME_IPSEC */
5357
5358	/* Catch routing changes wrt. hardware checksumming for TCP or UDP. */
5359#ifdef __NetBSD__
5360	if (m0->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
5361		in_delayed_cksum(m0);
5362		m0->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4);
5363	}
5364#else
5365	if (m0->m_pkthdr.csum_flags & M_TCPV4_CSUM_OUT) {
5366		if (!(ifp->if_capabilities & IFCAP_CSUM_TCPv4) ||
5367		    ifp->if_bridge != NULL) {
5368			in_delayed_cksum(m0);
5369			m0->m_pkthdr.csum_flags &= ~M_TCPV4_CSUM_OUT; /* Clear */
5370		}
5371	} else if (m0->m_pkthdr.csum_flags & M_UDPV4_CSUM_OUT) {
5372		if (!(ifp->if_capabilities & IFCAP_CSUM_UDPv4) ||
5373		    ifp->if_bridge != NULL) {
5374			in_delayed_cksum(m0);
5375			m0->m_pkthdr.csum_flags &= ~M_UDPV4_CSUM_OUT; /* Clear */
5376		}
5377	}
5378#endif /* !__NetBSD__ */
5379
5380	if (ntohs(ip->ip_len) <= ifp->if_mtu) {
5381#ifdef __NetBSD__
5382		ip->ip_sum = 0;
5383		ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
5384
5385		m0->m_pkthdr.csum_flags &= ~M_CSUM_IPv4;
5386#else
5387		if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) &&
5388		    ifp->if_bridge == NULL) {
5389			m0->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT;
5390			ipstat.ips_outhwcsum++;
5391		} else {
5392			ip->ip_sum = 0;
5393			ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
5394		}
5395		/* Update relevant hardware checksum stats for TCP/UDP */
5396		if (m0->m_pkthdr.csum_flags & M_TCPV4_CSUM_OUT)
5397			tcpstat.tcps_outhwcsum++;
5398		else if (m0->m_pkthdr.csum_flags & M_UDPV4_CSUM_OUT)
5399			udpstat.udps_outhwcsum++;
5400#endif /* !__NetBSD__ */
5401		error = (*ifp->if_output)(ifp, m0, dst, NULL);
5402		goto done;
5403	}
5404
5405	/*
5406	 * Too large for interface; fragment if possible.
5407	 * Must be able to put at least 8 bytes per fragment.
5408	 */
5409	if (ip->ip_off & htons(IP_DF)) {
5410		ip_statinc(IP_STAT_CANTFRAG);
5411		if (r->rt != PF_DUPTO) {
5412			icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
5413			    ifp->if_mtu);
5414			goto done;
5415		} else
5416			goto bad;
5417	}
5418
5419#ifdef __NetBSD__
5420	/* Make ip_fragment re-compute checksums. */
5421	if (IN_NEED_CHECKSUM(ifp, M_CSUM_IPv4)) {
5422		m0->m_pkthdr.csum_flags |= M_CSUM_IPv4;
5423	}
5424#endif /* __NetBSD__ */
5425	m1 = m0;
5426	error = ip_fragment(m0, ifp, ifp->if_mtu);
5427	if (error) {
5428		m0 = NULL;
5429		goto bad;
5430	}
5431
5432	for (m0 = m1; m0; m0 = m1) {
5433		m1 = m0->m_nextpkt;
5434		m0->m_nextpkt = 0;
5435		if (error == 0)
5436			error = (*ifp->if_output)(ifp, m0, dst, NULL);
5437		else
5438			m_freem(m0);
5439	}
5440
5441	if (error == 0)
5442		ip_statinc(IP_STAT_FRAGMENTED);
5443
5444done:
5445	if (r->rt != PF_DUPTO)
5446		*m = NULL;
5447	if (ro == &iproute)
5448		rtcache_free(ro);
5449	return;
5450
5451bad:
5452	m_freem(m0);
5453	goto done;
5454}
5455#endif /* INET */
5456
5457#ifdef INET6
5458void
5459pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
5460    struct pf_state *s, struct pf_pdesc *pd)
5461{
5462	struct mbuf		*m0;
5463	struct sockaddr_in6	 dst;
5464	struct ip6_hdr		*ip6;
5465	struct ifnet		*ifp = NULL;
5466	struct pf_addr		 naddr;
5467	struct pf_src_node	*sn = NULL;
5468	int			 error = 0;
5469#ifdef __NetBSD__
5470	struct pf_mtag		*pf_mtag;
5471#endif /* __NetBSD__ */
5472
5473	if (m == NULL || *m == NULL || r == NULL ||
5474	    (dir != PF_IN && dir != PF_OUT) || oifp == NULL)
5475		panic("pf_route6: invalid parameters");
5476
5477#ifdef __NetBSD__
5478	if ((pf_mtag = pf_get_mtag(*m)) == NULL) {
5479		m0 = *m;
5480		*m = NULL;
5481		goto bad;
5482	}
5483	if (pf_mtag->routed++ > 3) {
5484		m0 = *m;
5485		*m = NULL;
5486		goto bad;
5487	}
5488#else
5489	if ((*m)->m_pkthdr.pf.routed++ > 3) {
5490		m0 = *m;
5491		*m = NULL;
5492		goto bad;
5493	}
5494#endif /* !__NetBSD__ */
5495
5496	if (r->rt == PF_DUPTO) {
5497		if ((m0 = m_dup(*m, 0, M_COPYALL, M_NOWAIT)) == NULL)
5498			return;
5499	} else {
5500		if ((r->rt == PF_REPLYTO) == (r->direction == dir))
5501			return;
5502		m0 = *m;
5503	}
5504
5505	if (m0->m_len < sizeof(struct ip6_hdr)) {
5506		DPFPRINTF(PF_DEBUG_URGENT,
5507		    ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n"));
5508		goto bad;
5509	}
5510	ip6 = mtod(m0, struct ip6_hdr *);
5511
5512	dst.sin6_family = AF_INET6;
5513	dst.sin6_len = sizeof(dst);
5514	dst.sin6_addr = ip6->ip6_dst;
5515
5516	/* Cheat. XXX why only in the v6 case??? */
5517	if (r->rt == PF_FASTROUTE) {
5518#ifdef __NetBSD__
5519		pf_mtag->flags |= PF_TAG_GENERATED;
5520#else
5521		m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
5522#endif /* !__NetBSD__ */
5523		ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
5524		return;
5525	}
5526
5527	if (TAILQ_EMPTY(&r->rpool.list)) {
5528		DPFPRINTF(PF_DEBUG_URGENT,
5529		    ("pf_route6: TAILQ_EMPTY(&r->rpool.list)\n"));
5530		goto bad;
5531	}
5532	if (s == NULL) {
5533		pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src,
5534		    &naddr, NULL, &sn);
5535		if (!PF_AZERO(&naddr, AF_INET6))
5536			PF_ACPY((struct pf_addr *)&dst.sin6_addr,
5537			    &naddr, AF_INET6);
5538		ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL;
5539	} else {
5540		if (!PF_AZERO(&s->rt_addr, AF_INET6))
5541			PF_ACPY((struct pf_addr *)&dst.sin6_addr,
5542			    &s->rt_addr, AF_INET6);
5543		ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
5544	}
5545	if (ifp == NULL)
5546		goto bad;
5547
5548	if (oifp != ifp) {
5549		if (pf_test6(PF_OUT, ifp, &m0, NULL) != PF_PASS)
5550			goto bad;
5551		else if (m0 == NULL)
5552			goto done;
5553		if (m0->m_len < sizeof(struct ip6_hdr)) {
5554			DPFPRINTF(PF_DEBUG_URGENT,
5555			    ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n"));
5556			goto bad;
5557		}
5558		ip6 = mtod(m0, struct ip6_hdr *);
5559	}
5560
5561	/*
5562	 * If the packet is too large for the outgoing interface,
5563	 * send back an icmp6 error.
5564	 */
5565	if (IN6_IS_SCOPE_EMBEDDABLE(&dst.sin6_addr))
5566		dst.sin6_addr.s6_addr16[1] = htons(ifp->if_index);
5567	if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) {
5568		error = nd6_output(ifp, ifp, m0, &dst, NULL);
5569	} else {
5570		in6_ifstat_inc(ifp, ifs6_in_toobig);
5571		if (r->rt != PF_DUPTO)
5572			icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
5573		else
5574			goto bad;
5575	}
5576
5577done:
5578	if (r->rt != PF_DUPTO)
5579		*m = NULL;
5580	return;
5581
5582bad:
5583	m_freem(m0);
5584	goto done;
5585}
5586#endif /* INET6 */
5587
5588
5589/*
5590 * check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag
5591 *   off is the offset where the protocol header starts
5592 *   len is the total length of protocol header plus payload
5593 * returns 0 when the checksum is valid, otherwise returns 1.
5594 */
5595#ifdef __NetBSD__
5596int
5597pf_check_proto_cksum(struct mbuf *m, int direction, int off, int len,
5598    u_int8_t p, sa_family_t af)
5599#else
5600int
5601pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p,
5602    sa_family_t af)
5603#endif /* !__NetBSD__ */
5604{
5605#ifndef __NetBSD__
5606	u_int16_t flag_ok, flag_bad;
5607#endif /* !__NetBSD__ */
5608	u_int16_t sum;
5609
5610#ifndef __NetBSD__
5611	switch (p) {
5612	case IPPROTO_TCP:
5613		flag_ok = M_TCP_CSUM_IN_OK;
5614		flag_bad = M_TCP_CSUM_IN_BAD;
5615		break;
5616	case IPPROTO_UDP:
5617		flag_ok = M_UDP_CSUM_IN_OK;
5618		flag_bad = M_UDP_CSUM_IN_BAD;
5619		break;
5620	case IPPROTO_ICMP:
5621#ifdef INET6
5622	case IPPROTO_ICMPV6:
5623#endif /* INET6 */
5624		flag_ok = flag_bad = 0;
5625		break;
5626	default:
5627		return (1);
5628	}
5629	if (m->m_pkthdr.csum_flags & flag_ok)
5630		return (0);
5631	if (m->m_pkthdr.csum_flags & flag_bad)
5632		return (1);
5633#endif /* !__NetBSD__ */
5634	if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
5635		return (1);
5636	if (m->m_pkthdr.len < off + len)
5637		return (1);
5638#ifdef __NetBSD__
5639	if (direction == PF_IN) {
5640		switch (p) {
5641		case IPPROTO_TCP: {
5642			struct tcphdr th; /* XXX */
5643			int thlen;
5644
5645			m_copydata(m, off, sizeof(th), &th); /* XXX */
5646			thlen = th.th_off << 2;
5647			return tcp_input_checksum(af, m, &th, off,
5648			    thlen, len - thlen) != 0;
5649		}
5650
5651		case IPPROTO_UDP: {
5652			struct udphdr uh; /* XXX */
5653
5654			m_copydata(m, off, sizeof(uh), &uh); /* XXX */
5655			return udp_input_checksum(af, m, &uh, off, len) != 0;
5656		}
5657		}
5658	}
5659#endif /* __NetBSD__ */
5660	switch (af) {
5661#ifdef INET
5662	case AF_INET:
5663		if (p == IPPROTO_ICMP) {
5664			if (m->m_len < off)
5665				return (1);
5666			m->m_data += off;
5667			m->m_len -= off;
5668			sum = in_cksum(m, len);
5669			m->m_data -= off;
5670			m->m_len += off;
5671		} else {
5672			if (m->m_len < sizeof(struct ip))
5673				return (1);
5674			sum = in4_cksum(m, p, off, len);
5675		}
5676		break;
5677#endif /* INET */
5678#ifdef INET6
5679	case AF_INET6:
5680		if (m->m_len < sizeof(struct ip6_hdr))
5681			return (1);
5682		sum = in6_cksum(m, p, off, len);
5683		break;
5684#endif /* INET6 */
5685	default:
5686		return (1);
5687	}
5688	if (sum) {
5689#ifndef __NetBSD__
5690		m->m_pkthdr.csum_flags |= flag_bad;
5691#endif /* !__NetBSD__ */
5692		switch (p) {
5693		case IPPROTO_TCP:
5694			tcp_statinc(TCP_STAT_RCVBADSUM);
5695			break;
5696		case IPPROTO_UDP:
5697			udp_statinc(UDP_STAT_BADSUM);
5698			break;
5699		case IPPROTO_ICMP:
5700			icmp_statinc(ICMP_STAT_CHECKSUM);
5701			break;
5702#ifdef INET6
5703		case IPPROTO_ICMPV6:
5704			icmp6_statinc(ICMP6_STAT_CHECKSUM);
5705			break;
5706#endif /* INET6 */
5707		}
5708		return (1);
5709	}
5710#ifndef __NetBSD__
5711	m->m_pkthdr.csum_flags |= flag_ok;
5712#endif /* !__NetBSD__ */
5713	return (0);
5714}
5715
5716#ifdef INET
5717int
5718pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
5719    struct ether_header *eh)
5720{
5721	struct pfi_kif		*kif;
5722	u_short			 action, reason = 0, log = 0;
5723	struct mbuf		*m = *m0;
5724	struct ip		*h = NULL;
5725	struct pf_rule		*a = NULL, *r = &pf_default_rule, *tr, *nr;
5726	struct pf_state		*s = NULL;
5727	struct pf_state_key	*sk = NULL;
5728	struct pf_ruleset	*ruleset = NULL;
5729	struct pf_pdesc		 pd;
5730	int			 off, dirndx, pqid = 0;
5731#ifdef __NetBSD__
5732	struct pf_mtag		*pf_mtag = NULL; /* XXX gcc */
5733#endif /* __NetBSD__ */
5734
5735	if (!pf_status.running)
5736		return (PF_PASS);
5737
5738	memset(&pd, 0, sizeof(pd));
5739	if (ifp->if_type == IFT_CARP && ifp->if_carpdev)
5740		kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif;
5741	else
5742		kif = (struct pfi_kif *)ifp->if_pf_kif;
5743
5744	if (kif == NULL) {
5745		DPFPRINTF(PF_DEBUG_URGENT,
5746		    ("pf_test: kif == NULL, if_xname %s\n", ifp->if_xname));
5747		return (PF_DROP);
5748	}
5749	if (kif->pfik_flags & PFI_IFLAG_SKIP)
5750		return (PF_PASS);
5751
5752#ifdef DIAGNOSTIC
5753	if ((m->m_flags & M_PKTHDR) == 0)
5754		panic("non-M_PKTHDR is passed to pf_test");
5755#endif /* DIAGNOSTIC */
5756
5757	if (m->m_pkthdr.len < (int)sizeof(*h)) {
5758		action = PF_DROP;
5759		REASON_SET(&reason, PFRES_SHORT);
5760		log = 1;
5761		goto done;
5762	}
5763
5764#ifdef __NetBSD__
5765	if ((pf_mtag = pf_get_mtag(m)) == NULL) {
5766		DPFPRINTF(PF_DEBUG_URGENT,
5767		    ("pf_test: pf_get_mtag returned NULL\n"));
5768		return (PF_DROP);
5769	}
5770	if (pf_mtag->flags & PF_TAG_GENERATED)
5771		return (PF_PASS);
5772#else
5773	if (m->m_pkthdr.pf.flags & PF_TAG_GENERATED)
5774		return (PF_PASS);
5775#endif /* !__NetBSD__ */
5776
5777	/* We do IP header normalization and packet reassembly here */
5778	if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) {
5779		action = PF_DROP;
5780		goto done;
5781	}
5782	m = *m0;	/* pf_normalize messes with m0 */
5783	h = mtod(m, struct ip *);
5784
5785	off = h->ip_hl << 2;
5786	if (off < (int)sizeof(*h)) {
5787		action = PF_DROP;
5788		REASON_SET(&reason, PFRES_SHORT);
5789		log = 1;
5790		goto done;
5791	}
5792
5793	pd.src = (struct pf_addr *)&h->ip_src;
5794	pd.dst = (struct pf_addr *)&h->ip_dst;
5795	PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET);
5796	pd.ip_sum = &h->ip_sum;
5797	pd.proto = h->ip_p;
5798	pd.af = AF_INET;
5799	pd.tos = h->ip_tos;
5800	pd.tot_len = ntohs(h->ip_len);
5801	pd.eh = eh;
5802
5803	/* handle fragments that didn't get reassembled by normalization */
5804	if (h->ip_off & htons(IP_MF | IP_OFFMASK)) {
5805		action = pf_test_fragment(&r, dir, kif, m, h,
5806		    &pd, &a, &ruleset);
5807		goto done;
5808	}
5809
5810	switch (h->ip_p) {
5811
5812	case IPPROTO_TCP: {
5813		struct tcphdr	th;
5814
5815		pd.hdr.tcp = &th;
5816		if (!pf_pull_hdr(m, off, &th, sizeof(th),
5817		    &action, &reason, AF_INET)) {
5818			log = action != PF_PASS;
5819			goto done;
5820		}
5821		pd.p_len = pd.tot_len - off - (th.th_off << 2);
5822		if ((th.th_flags & TH_ACK) && pd.p_len == 0)
5823			pqid = 1;
5824		action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
5825		if (action == PF_DROP)
5826			goto done;
5827		action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
5828		    &reason);
5829		if (action == PF_PASS) {
5830#if NPFSYNC
5831			pfsync_update_state(s);
5832#endif /* NPFSYNC */
5833			r = s->rule.ptr;
5834			a = s->anchor.ptr;
5835			log = s->log;
5836		} else if (s == NULL)
5837			action = pf_test_rule(&r, &s, dir, kif,
5838			    m, off, h, &pd, &a, &ruleset, &ipintrq);
5839		break;
5840	}
5841
5842	case IPPROTO_UDP: {
5843		struct udphdr	uh;
5844
5845		pd.hdr.udp = &uh;
5846		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
5847		    &action, &reason, AF_INET)) {
5848			log = action != PF_PASS;
5849			goto done;
5850		}
5851		if (uh.uh_dport == 0 ||
5852		    ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
5853		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
5854			action = PF_DROP;
5855			REASON_SET(&reason, PFRES_SHORT);
5856			goto done;
5857		}
5858		action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
5859		if (action == PF_PASS) {
5860#if NPFSYNC
5861			pfsync_update_state(s);
5862#endif /* NPFSYNC */
5863			r = s->rule.ptr;
5864			a = s->anchor.ptr;
5865			log = s->log;
5866		} else if (s == NULL)
5867			action = pf_test_rule(&r, &s, dir, kif,
5868			    m, off, h, &pd, &a, &ruleset, &ipintrq);
5869		break;
5870	}
5871
5872	case IPPROTO_ICMP: {
5873		struct icmp	ih;
5874
5875		pd.hdr.icmp = &ih;
5876		if (!pf_pull_hdr(m, off, &ih, ICMP_MINLEN,
5877		    &action, &reason, AF_INET)) {
5878			log = action != PF_PASS;
5879			goto done;
5880		}
5881		action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd,
5882		    &reason);
5883		if (action == PF_PASS) {
5884#if NPFSYNC
5885			pfsync_update_state(s);
5886#endif /* NPFSYNC */
5887			r = s->rule.ptr;
5888			a = s->anchor.ptr;
5889			log = s->log;
5890		} else if (s == NULL)
5891			action = pf_test_rule(&r, &s, dir, kif,
5892			    m, off, h, &pd, &a, &ruleset, &ipintrq);
5893		break;
5894	}
5895
5896#ifdef INET6
5897	case IPPROTO_ICMPV6: {
5898		action = PF_DROP;
5899		DPFPRINTF(PF_DEBUG_MISC,
5900		    ("pf: dropping IPv4 packet with ICMPv6 payload\n"));
5901		goto done;
5902	}
5903#endif
5904
5905	default:
5906		action = pf_test_state_other(&s, dir, kif, &pd);
5907		if (action == PF_PASS) {
5908#if NPFSYNC
5909			pfsync_update_state(s);
5910#endif /* NPFSYNC */
5911			r = s->rule.ptr;
5912			a = s->anchor.ptr;
5913			log = s->log;
5914		} else if (s == NULL)
5915			action = pf_test_rule(&r, &s, dir, kif, m, off, h,
5916			    &pd, &a, &ruleset, &ipintrq);
5917		break;
5918	}
5919
5920done:
5921	if (action == PF_PASS && h->ip_hl > 5 &&
5922	    !((s && s->allow_opts) || r->allow_opts)) {
5923		action = PF_DROP;
5924		REASON_SET(&reason, PFRES_IPOPTIONS);
5925		log = 1;
5926		DPFPRINTF(PF_DEBUG_MISC,
5927		    ("pf: dropping packet with ip options\n"));
5928	}
5929
5930	if ((s && s->tag) || r->rtableid)
5931		pf_tag_packet(m, s ? s->tag : 0, r->rtableid);
5932
5933#ifdef ALTQ
5934	if (action == PF_PASS && r->qid) {
5935#ifdef __NetBSD__
5936		struct m_tag	*mtag;
5937		struct altq_tag	*atag;
5938
5939		mtag = m_tag_get(PACKET_TAG_ALTQ_QID, sizeof(*atag), M_NOWAIT);
5940		if (mtag != NULL) {
5941			atag = (struct altq_tag *)(mtag + 1);
5942			if (pqid || (pd.tos & IPTOS_LOWDELAY))
5943				atag->qid = r->pqid;
5944			else
5945				atag->qid = r->qid;
5946			/* add hints for ecn */
5947			atag->af = AF_INET;
5948			atag->hdr = h;
5949			m_tag_prepend(m, mtag);
5950		}
5951#else
5952		if (pqid || (pd.tos & IPTOS_LOWDELAY))
5953			m->m_pkthdr.pf.qid = r->pqid;
5954		else
5955			m->m_pkthdr.pf.qid = r->qid;
5956		/* add hints for ecn */
5957		m->m_pkthdr.pf.hdr = h;
5958#endif /* !__NetBSD__ */
5959	}
5960#endif /* ALTQ */
5961
5962	/*
5963	 * connections redirected to loopback should not match sockets
5964	 * bound specifically to loopback due to security implications,
5965	 * see tcp_input() and in_pcblookup_listen().
5966	 */
5967	if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
5968	    pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
5969	    (s->nat_rule.ptr->action == PF_RDR ||
5970	    s->nat_rule.ptr->action == PF_BINAT) &&
5971	    (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
5972#ifdef __NetBSD__
5973		pf_mtag->flags |= PF_TAG_TRANSLATE_LOCALHOST;
5974#else
5975		m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST;
5976#endif /* !__NetBSD__ */
5977
5978	if (log) {
5979		struct pf_rule *lr;
5980
5981		if (s != NULL && s->nat_rule.ptr != NULL &&
5982		    s->nat_rule.ptr->log & PF_LOG_ALL)
5983			lr = s->nat_rule.ptr;
5984		else
5985			lr = r;
5986		PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, lr, a, ruleset,
5987		    &pd);
5988	}
5989
5990	kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
5991	kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++;
5992
5993	if (action == PF_PASS || r->action == PF_DROP) {
5994		dirndx = (dir == PF_OUT);
5995		r->packets[dirndx]++;
5996		r->bytes[dirndx] += pd.tot_len;
5997		if (a != NULL) {
5998			a->packets[dirndx]++;
5999			a->bytes[dirndx] += pd.tot_len;
6000		}
6001		if (s != NULL) {
6002			sk = s->state_key;
6003			if (s->nat_rule.ptr != NULL) {
6004				s->nat_rule.ptr->packets[dirndx]++;
6005				s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
6006			}
6007			if (s->src_node != NULL) {
6008				s->src_node->packets[dirndx]++;
6009				s->src_node->bytes[dirndx] += pd.tot_len;
6010			}
6011			if (s->nat_src_node != NULL) {
6012				s->nat_src_node->packets[dirndx]++;
6013				s->nat_src_node->bytes[dirndx] += pd.tot_len;
6014			}
6015			dirndx = (dir == sk->direction) ? 0 : 1;
6016			s->packets[dirndx]++;
6017			s->bytes[dirndx] += pd.tot_len;
6018		}
6019		tr = r;
6020		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
6021		if (nr != NULL) {
6022			struct pf_addr *x;
6023			/*
6024			 * XXX: we need to make sure that the addresses
6025			 * passed to pfr_update_stats() are the same than
6026			 * the addresses used during matching (pfr_match)
6027			 */
6028			if (r == &pf_default_rule) {
6029				tr = nr;
6030				x = (sk == NULL || sk->direction == dir) ?
6031				    &pd.baddr : &pd.naddr;
6032			} else
6033				x = (sk == NULL || sk->direction == dir) ?
6034				    &pd.naddr : &pd.baddr;
6035			if (x == &pd.baddr || s == NULL) {
6036				/* we need to change the address */
6037				if (dir == PF_OUT)
6038					pd.src = x;
6039				else
6040					pd.dst = x;
6041			}
6042		}
6043		if (tr->src.addr.type == PF_ADDR_TABLE)
6044			pfr_update_stats(tr->src.addr.p.tbl, (sk == NULL ||
6045			    sk->direction == dir) ?
6046			    pd.src : pd.dst, pd.af,
6047			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
6048			    tr->src.neg);
6049		if (tr->dst.addr.type == PF_ADDR_TABLE)
6050			pfr_update_stats(tr->dst.addr.p.tbl, (sk == NULL ||
6051			    sk->direction == dir) ? pd.dst : pd.src, pd.af,
6052			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
6053			    tr->dst.neg);
6054	}
6055
6056
6057	if (action == PF_SYNPROXY_DROP) {
6058		m_freem(*m0);
6059		*m0 = NULL;
6060		action = PF_PASS;
6061	} else if (r->rt)
6062		/* pf_route can free the mbuf causing *m0 to become NULL */
6063		pf_route(m0, r, dir, kif->pfik_ifp, s, &pd);
6064
6065	return (action);
6066}
6067#endif /* INET */
6068
6069#ifdef INET6
6070int
6071pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
6072    struct ether_header *eh)
6073{
6074	struct pfi_kif		*kif;
6075	u_short			 action, reason = 0, log = 0;
6076	struct mbuf		*m = *m0, *n = NULL;
6077	struct ip6_hdr		*h = NULL; /* XXX gcc */
6078	struct pf_rule		*a = NULL, *r = &pf_default_rule, *tr, *nr;
6079	struct pf_state		*s = NULL;
6080	struct pf_state_key	*sk = NULL;
6081	struct pf_ruleset	*ruleset = NULL;
6082	struct pf_pdesc		 pd;
6083	int			 off, terminal = 0, dirndx, rh_cnt = 0;
6084#ifdef __NetBSD__
6085	struct pf_mtag		*pf_mtag = NULL; /* XXX gcc */
6086#endif /* __NetBSD__ */
6087
6088	if (!pf_status.running)
6089		return (PF_PASS);
6090
6091	memset(&pd, 0, sizeof(pd));
6092	if (ifp->if_type == IFT_CARP && ifp->if_carpdev)
6093		kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif;
6094	else
6095		kif = (struct pfi_kif *)ifp->if_pf_kif;
6096
6097	if (kif == NULL) {
6098		DPFPRINTF(PF_DEBUG_URGENT,
6099		    ("pf_test6: kif == NULL, if_xname %s\n", ifp->if_xname));
6100		return (PF_DROP);
6101	}
6102	if (kif->pfik_flags & PFI_IFLAG_SKIP)
6103		return (PF_PASS);
6104
6105#ifdef DIAGNOSTIC
6106	if ((m->m_flags & M_PKTHDR) == 0)
6107		panic("non-M_PKTHDR is passed to pf_test6");
6108#endif /* DIAGNOSTIC */
6109
6110	if (m->m_pkthdr.len < (int)sizeof(*h)) {
6111		action = PF_DROP;
6112		REASON_SET(&reason, PFRES_SHORT);
6113		log = 1;
6114		goto done;
6115	}
6116
6117#ifdef __NetBSD__
6118	if ((pf_mtag = pf_get_mtag(m)) == NULL) {
6119		DPFPRINTF(PF_DEBUG_URGENT,
6120		    ("pf_test6: pf_get_mtag returned NULL\n"));
6121		return (PF_DROP);
6122	}
6123	if (pf_mtag->flags & PF_TAG_GENERATED)
6124		return (PF_PASS);
6125#else
6126	if (m->m_pkthdr.pf.flags & PF_TAG_GENERATED)
6127		return (PF_PASS);
6128#endif /* !__NetBSD__ */
6129
6130	/* We do IP header normalization and packet reassembly here */
6131	if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) {
6132		action = PF_DROP;
6133		goto done;
6134	}
6135	m = *m0;	/* pf_normalize messes with m0 */
6136	h = mtod(m, struct ip6_hdr *);
6137
6138#if 1
6139	/*
6140	 * we do not support jumbogram yet.  if we keep going, zero ip6_plen
6141	 * will do something bad, so drop the packet for now.
6142	 */
6143	if (htons(h->ip6_plen) == 0) {
6144		action = PF_DROP;
6145		REASON_SET(&reason, PFRES_NORM);	/*XXX*/
6146		goto done;
6147	}
6148#endif
6149
6150	pd.src = (struct pf_addr *)&h->ip6_src;
6151	pd.dst = (struct pf_addr *)&h->ip6_dst;
6152	PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET6);
6153	pd.ip_sum = NULL;
6154	pd.af = AF_INET6;
6155	pd.tos = 0;
6156	pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
6157	pd.eh = eh;
6158
6159	off = ((char *)h - m->m_data) + sizeof(struct ip6_hdr);
6160	pd.proto = h->ip6_nxt;
6161	do {
6162		switch (pd.proto) {
6163		case IPPROTO_FRAGMENT:
6164			action = pf_test_fragment(&r, dir, kif, m, h,
6165			    &pd, &a, &ruleset);
6166			if (action == PF_DROP)
6167				REASON_SET(&reason, PFRES_FRAG);
6168			goto done;
6169		case IPPROTO_ROUTING: {
6170			struct ip6_rthdr rthdr;
6171
6172			if (rh_cnt++) {
6173				DPFPRINTF(PF_DEBUG_MISC,
6174				    ("pf: IPv6 more than one rthdr\n"));
6175				action = PF_DROP;
6176				REASON_SET(&reason, PFRES_IPOPTIONS);
6177				log = 1;
6178				goto done;
6179			}
6180			if (!pf_pull_hdr(m, off, &rthdr, sizeof(rthdr), NULL,
6181			    &reason, pd.af)) {
6182				DPFPRINTF(PF_DEBUG_MISC,
6183				    ("pf: IPv6 short rthdr\n"));
6184				action = PF_DROP;
6185				REASON_SET(&reason, PFRES_SHORT);
6186				log = 1;
6187				goto done;
6188			}
6189			if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) {
6190				DPFPRINTF(PF_DEBUG_MISC,
6191				    ("pf: IPv6 rthdr0\n"));
6192				action = PF_DROP;
6193				REASON_SET(&reason, PFRES_IPOPTIONS);
6194				log = 1;
6195				goto done;
6196			}
6197			/* FALLTHROUGH */
6198		}
6199		case IPPROTO_AH:
6200		case IPPROTO_HOPOPTS:
6201		case IPPROTO_DSTOPTS: {
6202			/* get next header and header length */
6203			struct ip6_ext	opt6;
6204
6205			if (!pf_pull_hdr(m, off, &opt6, sizeof(opt6),
6206			    NULL, &reason, pd.af)) {
6207				DPFPRINTF(PF_DEBUG_MISC,
6208				    ("pf: IPv6 short opt\n"));
6209				action = PF_DROP;
6210				log = 1;
6211				goto done;
6212			}
6213			if (pd.proto == IPPROTO_AH)
6214				off += (opt6.ip6e_len + 2) * 4;
6215			else
6216				off += (opt6.ip6e_len + 1) * 8;
6217			pd.proto = opt6.ip6e_nxt;
6218			/* goto the next header */
6219			break;
6220		}
6221		default:
6222			terminal++;
6223			break;
6224		}
6225	} while (!terminal);
6226
6227	/* if there's no routing header, use unmodified mbuf for checksumming */
6228	if (!n)
6229		n = m;
6230
6231	switch (pd.proto) {
6232
6233	case IPPROTO_TCP: {
6234		struct tcphdr	th;
6235
6236		pd.hdr.tcp = &th;
6237		if (!pf_pull_hdr(m, off, &th, sizeof(th),
6238		    &action, &reason, AF_INET6)) {
6239			log = action != PF_PASS;
6240			goto done;
6241		}
6242		pd.p_len = pd.tot_len - off - (th.th_off << 2);
6243		action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
6244		if (action == PF_DROP)
6245			goto done;
6246		action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
6247		    &reason);
6248		if (action == PF_PASS) {
6249#if NPFSYNC
6250			pfsync_update_state(s);
6251#endif /* NPFSYNC */
6252			r = s->rule.ptr;
6253			a = s->anchor.ptr;
6254			log = s->log;
6255		} else if (s == NULL)
6256			action = pf_test_rule(&r, &s, dir, kif,
6257			    m, off, h, &pd, &a, &ruleset, &ip6intrq);
6258		break;
6259	}
6260
6261	case IPPROTO_UDP: {
6262		struct udphdr	uh;
6263
6264		pd.hdr.udp = &uh;
6265		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
6266		    &action, &reason, AF_INET6)) {
6267			log = action != PF_PASS;
6268			goto done;
6269		}
6270		if (uh.uh_dport == 0 ||
6271		    ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
6272		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
6273			action = PF_DROP;
6274			REASON_SET(&reason, PFRES_SHORT);
6275			goto done;
6276		}
6277		action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
6278		if (action == PF_PASS) {
6279#if NPFSYNC
6280			pfsync_update_state(s);
6281#endif /* NPFSYNC */
6282			r = s->rule.ptr;
6283			a = s->anchor.ptr;
6284			log = s->log;
6285		} else if (s == NULL)
6286			action = pf_test_rule(&r, &s, dir, kif,
6287			    m, off, h, &pd, &a, &ruleset, &ip6intrq);
6288		break;
6289	}
6290
6291#ifdef INET
6292	case IPPROTO_ICMP: {
6293		action = PF_DROP;
6294		DPFPRINTF(PF_DEBUG_MISC,
6295		    ("pf: dropping IPv6 packet with ICMPv4 payload\n"));
6296		goto done;
6297	}
6298#endif
6299
6300	case IPPROTO_ICMPV6: {
6301		struct icmp6_hdr	ih;
6302
6303		pd.hdr.icmp6 = &ih;
6304		if (!pf_pull_hdr(m, off, &ih, sizeof(ih),
6305		    &action, &reason, AF_INET6)) {
6306			log = action != PF_PASS;
6307			goto done;
6308		}
6309		action = pf_test_state_icmp(&s, dir, kif,
6310		    m, off, h, &pd, &reason);
6311		if (action == PF_PASS) {
6312#if NPFSYNC
6313			pfsync_update_state(s);
6314#endif /* NPFSYNC */
6315			r = s->rule.ptr;
6316			a = s->anchor.ptr;
6317			log = s->log;
6318		} else if (s == NULL)
6319			action = pf_test_rule(&r, &s, dir, kif,
6320			    m, off, h, &pd, &a, &ruleset, &ip6intrq);
6321		break;
6322	}
6323
6324	default:
6325		action = pf_test_state_other(&s, dir, kif, &pd);
6326		if (action == PF_PASS) {
6327#if NPFSYNC
6328			pfsync_update_state(s);
6329#endif /* NPFSYNC */
6330			r = s->rule.ptr;
6331			a = s->anchor.ptr;
6332			log = s->log;
6333		} else if (s == NULL)
6334			action = pf_test_rule(&r, &s, dir, kif, m, off, h,
6335			    &pd, &a, &ruleset, &ip6intrq);
6336		break;
6337	}
6338
6339done:
6340	if (n != m) {
6341		m_freem(n);
6342		n = NULL;
6343	}
6344
6345	/* handle dangerous IPv6 extension headers. */
6346	if (action == PF_PASS && rh_cnt &&
6347	    !((s && s->allow_opts) || r->allow_opts)) {
6348		action = PF_DROP;
6349		REASON_SET(&reason, PFRES_IPOPTIONS);
6350		log = 1;
6351		DPFPRINTF(PF_DEBUG_MISC,
6352		    ("pf: dropping packet with dangerous v6 headers\n"));
6353	}
6354
6355	if ((s && s->tag) || r->rtableid)
6356		pf_tag_packet(m, s ? s->tag : 0, r->rtableid);
6357
6358#ifdef ALTQ
6359	if (action == PF_PASS && r->qid) {
6360#ifdef __NetBSD__
6361		struct m_tag	*mtag;
6362		struct altq_tag	*atag;
6363
6364		mtag = m_tag_get(PACKET_TAG_ALTQ_QID, sizeof(*atag), M_NOWAIT);
6365		if (mtag != NULL) {
6366			atag = (struct altq_tag *)(mtag + 1);
6367			if (pd.tos & IPTOS_LOWDELAY)
6368				atag->qid = r->pqid;
6369			else
6370				atag->qid = r->qid;
6371			/* add hints for ecn */
6372			atag->af = AF_INET6;
6373			atag->hdr = h;
6374			m_tag_prepend(m, mtag);
6375		}
6376#else
6377		if (pd.tos & IPTOS_LOWDELAY)
6378			m->m_pkthdr.pf.qid = r->pqid;
6379		else
6380			m->m_pkthdr.pf.qid = r->qid;
6381		/* add hints for ecn */
6382		m->m_pkthdr.pf.hdr = h;
6383#endif /* !__NetBSD__ */
6384	}
6385#endif /* ALTQ */
6386
6387	if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
6388	    pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
6389	    (s->nat_rule.ptr->action == PF_RDR ||
6390	    s->nat_rule.ptr->action == PF_BINAT) &&
6391	    IN6_IS_ADDR_LOOPBACK(&pd.dst->v6))
6392#ifdef __NetBSD__
6393		pf_mtag->flags |= PF_TAG_TRANSLATE_LOCALHOST;
6394#else
6395		m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST;
6396#endif /* !__NetBSD__ */
6397
6398	if (log) {
6399		struct pf_rule *lr;
6400
6401		if (s != NULL && s->nat_rule.ptr != NULL &&
6402		    s->nat_rule.ptr->log & PF_LOG_ALL)
6403			lr = s->nat_rule.ptr;
6404		else
6405			lr = r;
6406		PFLOG_PACKET(kif, h, m, AF_INET6, dir, reason, lr, a, ruleset,
6407		    &pd);
6408	}
6409
6410	kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
6411	kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++;
6412
6413	if (action == PF_PASS || r->action == PF_DROP) {
6414		dirndx = (dir == PF_OUT);
6415		r->packets[dirndx]++;
6416		r->bytes[dirndx] += pd.tot_len;
6417		if (a != NULL) {
6418			a->packets[dirndx]++;
6419			a->bytes[dirndx] += pd.tot_len;
6420		}
6421		if (s != NULL) {
6422			sk = s->state_key;
6423			if (s->nat_rule.ptr != NULL) {
6424				s->nat_rule.ptr->packets[dirndx]++;
6425				s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
6426			}
6427			if (s->src_node != NULL) {
6428				s->src_node->packets[dirndx]++;
6429				s->src_node->bytes[dirndx] += pd.tot_len;
6430			}
6431			if (s->nat_src_node != NULL) {
6432				s->nat_src_node->packets[dirndx]++;
6433				s->nat_src_node->bytes[dirndx] += pd.tot_len;
6434			}
6435			dirndx = (dir == sk->direction) ? 0 : 1;
6436			s->packets[dirndx]++;
6437			s->bytes[dirndx] += pd.tot_len;
6438		}
6439		tr = r;
6440		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
6441		if (nr != NULL) {
6442			struct pf_addr *x;
6443			/*
6444			 * XXX: we need to make sure that the addresses
6445			 * passed to pfr_update_stats() are the same than
6446			 * the addresses used during matching (pfr_match)
6447			 */
6448			if (r == &pf_default_rule) {
6449				tr = nr;
6450				x = (s == NULL || sk->direction == dir) ?
6451				    &pd.baddr : &pd.naddr;
6452			} else {
6453				x = (s == NULL || sk->direction == dir) ?
6454				    &pd.naddr : &pd.baddr;
6455			}
6456			if (x == &pd.baddr || s == NULL) {
6457				if (dir == PF_OUT)
6458					pd.src = x;
6459				else
6460					pd.dst = x;
6461			}
6462		}
6463		if (tr->src.addr.type == PF_ADDR_TABLE)
6464			pfr_update_stats(tr->src.addr.p.tbl, (sk == NULL ||
6465			    sk->direction == dir) ? pd.src : pd.dst, pd.af,
6466			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
6467			    tr->src.neg);
6468		if (tr->dst.addr.type == PF_ADDR_TABLE)
6469			pfr_update_stats(tr->dst.addr.p.tbl, (sk == NULL ||
6470			    sk->direction == dir) ? pd.dst : pd.src, pd.af,
6471			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
6472			    tr->dst.neg);
6473	}
6474
6475
6476	if (action == PF_SYNPROXY_DROP) {
6477		m_freem(*m0);
6478		*m0 = NULL;
6479		action = PF_PASS;
6480	} else if (r->rt)
6481		/* pf_route6 can free the mbuf causing *m0 to become NULL */
6482		pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd);
6483
6484	return (action);
6485}
6486#endif /* INET6 */
6487
6488int
6489pf_check_congestion(struct ifqueue *ifq)
6490{
6491#ifdef __NetBSD__
6492	return (0);
6493#else
6494	if (ifq->ifq_congestion)
6495		return (1);
6496	else
6497		return (0);
6498#endif /* !__NetBSD__ */
6499}
6500