1/*
2 * Copyright (c) 2007-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/*	$apfw: git commit 6602420f2f101b74305cd78f7cd9e0c8fdedae97 $ */
30/*	$OpenBSD: pf.c,v 1.567 2008/02/20 23:40:13 henning Exp $ */
31
32/*
33 * Copyright (c) 2001 Daniel Hartmeier
34 * Copyright (c) 2002,2003 Henning Brauer
35 * All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 *
41 *    - Redistributions of source code must retain the above copyright
42 *      notice, this list of conditions and the following disclaimer.
43 *    - Redistributions in binary form must reproduce the above
44 *      copyright notice, this list of conditions and the following
45 *      disclaimer in the documentation and/or other materials provided
46 *      with the distribution.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
49 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
50 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
51 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
52 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
53 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
54 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
55 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
56 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
58 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
59 * POSSIBILITY OF SUCH DAMAGE.
60 *
61 * Effort sponsored in part by the Defense Advanced Research Projects
62 * Agency (DARPA) and Air Force Research Laboratory, Air Force
63 * Materiel Command, USAF, under agreement number F30602-01-2-0537.
64 *
65 */
66
67#include <machine/endian.h>
68#include <sys/param.h>
69#include <sys/systm.h>
70#include <sys/mbuf.h>
71#include <sys/filio.h>
72#include <sys/socket.h>
73#include <sys/socketvar.h>
74#include <sys/kernel.h>
75#include <sys/time.h>
76#include <sys/proc.h>
77#include <sys/random.h>
78#include <sys/mcache.h>
79
80#include <libkern/crypto/md5.h>
81#include <libkern/libkern.h>
82
83#include <mach/thread_act.h>
84
85#include <net/if.h>
86#include <net/if_types.h>
87#include <net/bpf.h>
88#include <net/route.h>
89
90#include <netinet/in.h>
91#include <netinet/in_var.h>
92#include <netinet/in_systm.h>
93#include <netinet/ip.h>
94#include <netinet/ip_var.h>
95#include <netinet/tcp.h>
96#include <netinet/tcp_seq.h>
97#include <netinet/udp.h>
98#include <netinet/ip_icmp.h>
99#include <netinet/in_pcb.h>
100#include <netinet/tcp_timer.h>
101#include <netinet/tcp_var.h>
102#include <netinet/tcp_fsm.h>
103#include <netinet/udp_var.h>
104#include <netinet/icmp_var.h>
105#include <net/if_ether.h>
106#include <net/ethernet.h>
107#include <net/flowhash.h>
108#include <net/pfvar.h>
109#include <net/if_pflog.h>
110
111#if NPFSYNC
112#include <net/if_pfsync.h>
113#endif /* NPFSYNC */
114
115#if INET6
116#include <netinet/ip6.h>
117#include <netinet6/in6_pcb.h>
118#include <netinet6/ip6_var.h>
119#include <netinet/icmp6.h>
120#include <netinet6/nd6.h>
121#endif /* INET6 */
122
123#if DUMMYNET
124#include <netinet/ip_dummynet.h>
125#endif /* DUMMYNET */
126
127#define DPFPRINTF(n, x)	(pf_status.debug >= (n) ? printf x : ((void)0))
128
129/*
130 * On Mac OS X, the rtableid value is treated as the interface scope
131 * value that is equivalent to the interface index used for scoped
132 * routing.  A valid scope value is anything but IFSCOPE_NONE (0),
133 * as per definition of ifindex which is a positive, non-zero number.
134 * The other BSDs treat a negative rtableid value as invalid, hence
135 * the test against INT_MAX to handle userland apps which initialize
136 * the field with a negative number.
137 */
138#define	PF_RTABLEID_IS_VALID(r) \
139	((r) > IFSCOPE_NONE && (r) <= INT_MAX)
140
141/*
142 * Global variables
143 */
144decl_lck_mtx_data(,pf_lock_data);
145decl_lck_rw_data(,pf_perim_lock_data);
146lck_mtx_t *pf_lock = &pf_lock_data;
147lck_rw_t *pf_perim_lock = &pf_perim_lock_data;
148
149/* state tables */
150struct pf_state_tree_lan_ext	 pf_statetbl_lan_ext;
151struct pf_state_tree_ext_gwy	 pf_statetbl_ext_gwy;
152
153struct pf_palist	 pf_pabuf;
154struct pf_status	 pf_status;
155
156#if PF_ALTQ
157struct pf_altqqueue	 pf_altqs[2];
158struct pf_altqqueue	*pf_altqs_active;
159struct pf_altqqueue	*pf_altqs_inactive;
160u_int32_t		 ticket_altqs_active;
161u_int32_t		 ticket_altqs_inactive;
162int			 altqs_inactive_open;
163#endif /* PF_ALTQ */
164u_int32_t		 ticket_pabuf;
165
166static MD5_CTX		 pf_tcp_secret_ctx;
167static u_char		 pf_tcp_secret[16];
168static int		 pf_tcp_secret_init;
169static int		 pf_tcp_iss_off;
170
171static struct pf_anchor_stackframe {
172	struct pf_ruleset			*rs;
173	struct pf_rule				*r;
174	struct pf_anchor_node			*parent;
175	struct pf_anchor			*child;
176} pf_anchor_stack[64];
177
178struct pool		 pf_src_tree_pl, pf_rule_pl, pf_pooladdr_pl;
179struct pool		 pf_state_pl, pf_state_key_pl;
180#if PF_ALTQ
181struct pool		 pf_altq_pl;
182#endif /* PF_ALTQ */
183
184typedef void (*hook_fn_t)(void *);
185
186struct hook_desc {
187	TAILQ_ENTRY(hook_desc) hd_list;
188	hook_fn_t hd_fn;
189	void *hd_arg;
190};
191
192#define	HOOK_REMOVE	0x01
193#define	HOOK_FREE	0x02
194#define	HOOK_ABORT	0x04
195
196static void		*hook_establish(struct hook_desc_head *, int,
197			    hook_fn_t, void *);
198static void		hook_runloop(struct hook_desc_head *, int flags);
199
200struct pool		 pf_app_state_pl;
201static void		 pf_print_addr(struct pf_addr *addr, sa_family_t af);
202static void		 pf_print_sk_host(struct pf_state_host *, u_int8_t, int,
203			    u_int8_t);
204
205static void		 pf_print_host(struct pf_addr *, u_int16_t, u_int8_t);
206
207static void		 pf_init_threshold(struct pf_threshold *, u_int32_t,
208			    u_int32_t);
209static void		 pf_add_threshold(struct pf_threshold *);
210static int		 pf_check_threshold(struct pf_threshold *);
211
212static void		 pf_change_ap(int, struct mbuf *, struct pf_addr *,
213			    u_int16_t *, u_int16_t *, u_int16_t *,
214			    struct pf_addr *, u_int16_t, u_int8_t, sa_family_t);
215static int		 pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *,
216			    struct tcphdr *, struct pf_state_peer *);
217#if INET6
218static void		 pf_change_a6(struct pf_addr *, u_int16_t *,
219			    struct pf_addr *, u_int8_t);
220#endif /* INET6 */
221static void		 pf_change_icmp(struct pf_addr *, u_int16_t *,
222			    struct pf_addr *, struct pf_addr *, u_int16_t,
223			    u_int16_t *, u_int16_t *, u_int16_t *,
224			    u_int16_t *, u_int8_t, sa_family_t);
225static void		 pf_send_tcp(const struct pf_rule *, sa_family_t,
226			    const struct pf_addr *, const struct pf_addr *,
227			    u_int16_t, u_int16_t, u_int32_t, u_int32_t,
228			    u_int8_t, u_int16_t, u_int16_t, u_int8_t, int,
229			    u_int16_t, struct ether_header *, struct ifnet *);
230static void		 pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
231			    sa_family_t, struct pf_rule *);
232static struct pf_rule	*pf_match_translation(struct pf_pdesc *, struct mbuf *,
233			    int, int, struct pfi_kif *, struct pf_addr *,
234			    union pf_state_xport *, struct pf_addr *,
235			    union pf_state_xport *, int);
236static struct pf_rule	*pf_get_translation_aux(struct pf_pdesc *,
237			    struct mbuf *, int, int, struct pfi_kif *,
238			    struct pf_src_node **, struct pf_addr *,
239			    union pf_state_xport *, struct pf_addr *,
240			    union pf_state_xport *, struct pf_addr *,
241			    union pf_state_xport *);
242static void		 pf_attach_state(struct pf_state_key *,
243			    struct pf_state *, int);
244static void		 pf_detach_state(struct pf_state *, int);
245static u_int32_t	 pf_tcp_iss(struct pf_pdesc *);
246static int		 pf_test_rule(struct pf_rule **, struct pf_state **,
247			    int, struct pfi_kif *, struct mbuf *, int,
248			    void *, struct pf_pdesc *, struct pf_rule **,
249			    struct pf_ruleset **, struct ifqueue *);
250#if DUMMYNET
251static int		 pf_test_dummynet(struct pf_rule **, int,
252			    struct pfi_kif *, struct mbuf **,
253			    struct pf_pdesc *, struct ip_fw_args *);
254#endif /* DUMMYNET */
255static int		 pf_test_fragment(struct pf_rule **, int,
256			    struct pfi_kif *, struct mbuf *, void *,
257			    struct pf_pdesc *, struct pf_rule **,
258			    struct pf_ruleset **);
259static int		 pf_test_state_tcp(struct pf_state **, int,
260			    struct pfi_kif *, struct mbuf *, int,
261			    void *, struct pf_pdesc *, u_short *);
262static int		 pf_test_state_udp(struct pf_state **, int,
263			    struct pfi_kif *, struct mbuf *, int,
264			    void *, struct pf_pdesc *, u_short *);
265static int		 pf_test_state_icmp(struct pf_state **, int,
266			    struct pfi_kif *, struct mbuf *, int,
267			    void *, struct pf_pdesc *, u_short *);
268static int		 pf_test_state_other(struct pf_state **, int,
269			    struct pfi_kif *, struct pf_pdesc *);
270static int		 pf_match_tag(struct mbuf *, struct pf_rule *,
271			    struct pf_mtag *, int *);
272static void		 pf_hash(struct pf_addr *, struct pf_addr *,
273			    struct pf_poolhashkey *, sa_family_t);
274static int		 pf_map_addr(u_int8_t, struct pf_rule *,
275			    struct pf_addr *, struct pf_addr *,
276			    struct pf_addr *, struct pf_src_node **);
277static int		 pf_get_sport(struct pf_pdesc *, struct pfi_kif *,
278			    struct pf_rule *, struct pf_addr *,
279			    union pf_state_xport *, struct pf_addr *,
280			    union pf_state_xport *, struct pf_addr *,
281			    union pf_state_xport *, struct pf_src_node **);
282static void		 pf_route(struct mbuf **, struct pf_rule *, int,
283			    struct ifnet *, struct pf_state *,
284			    struct pf_pdesc *);
285#if INET6
286static void		 pf_route6(struct mbuf **, struct pf_rule *, int,
287			    struct ifnet *, struct pf_state *,
288			    struct pf_pdesc *);
289#endif /* INET6 */
290static u_int8_t		 pf_get_wscale(struct mbuf *, int, u_int16_t,
291			    sa_family_t);
292static u_int16_t	 pf_get_mss(struct mbuf *, int, u_int16_t,
293			    sa_family_t);
294static u_int16_t	 pf_calc_mss(struct pf_addr *, sa_family_t,
295				u_int16_t);
296static void		 pf_set_rt_ifp(struct pf_state *,
297			    struct pf_addr *);
298static int		 pf_check_proto_cksum(struct mbuf *, int, int,
299			    u_int8_t, sa_family_t);
300static int		 pf_addr_wrap_neq(struct pf_addr_wrap *,
301			    struct pf_addr_wrap *);
302static struct pf_state	*pf_find_state(struct pfi_kif *,
303			    struct pf_state_key_cmp *, u_int);
304static int		 pf_src_connlimit(struct pf_state **);
305static void		 pf_stateins_err(const char *, struct pf_state *,
306			    struct pfi_kif *);
307static int		 pf_check_congestion(struct ifqueue *);
308
309#if 0
310static const char *pf_pptp_ctrl_type_name(u_int16_t code);
311#endif
312static void		pf_pptp_handler(struct pf_state *, int, int,
313			    struct pf_pdesc *, struct pfi_kif *);
314static void		pf_pptp_unlink(struct pf_state *);
315static void		pf_grev1_unlink(struct pf_state *);
316static int		pf_test_state_grev1(struct pf_state **, int,
317			    struct pfi_kif *, int, struct pf_pdesc *);
318static int		pf_ike_compare(struct pf_app_state *,
319			    struct pf_app_state *);
320static int		pf_test_state_esp(struct pf_state **, int,
321			    struct pfi_kif *, int, struct pf_pdesc *);
322
323extern struct pool pfr_ktable_pl;
324extern struct pool pfr_kentry_pl;
325extern int path_mtu_discovery;
326
327struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = {
328	{ &pf_state_pl, PFSTATE_HIWAT },
329	{ &pf_app_state_pl, PFAPPSTATE_HIWAT },
330	{ &pf_src_tree_pl, PFSNODE_HIWAT },
331	{ &pf_frent_pl, PFFRAG_FRENT_HIWAT },
332	{ &pfr_ktable_pl, PFR_KTABLE_HIWAT },
333	{ &pfr_kentry_pl, PFR_KENTRY_HIWAT },
334};
335
336struct mbuf *
337pf_lazy_makewritable(struct pf_pdesc *pd, struct mbuf *m, int len)
338{
339	if (pd->lmw < 0)
340		return (0);
341
342	VERIFY(m == pd->mp);
343
344	if (len > pd->lmw) {
345		if (m_makewritable(&m, 0, len, M_DONTWAIT))
346			len = -1;
347		pd->lmw = len;
348		if (len >= 0 && m != pd->mp) {
349			pd->mp = m;
350			pd->pf_mtag = pf_find_mtag(m);
351
352			switch (pd->af) {
353			case AF_INET: {
354				struct ip *h = mtod(m, struct ip *);
355				pd->src = (struct pf_addr *)&h->ip_src;
356				pd->dst = (struct pf_addr *)&h->ip_dst;
357				pd->ip_sum = &h->ip_sum;
358				break;
359			}
360#if INET6
361			case AF_INET6: {
362				struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
363				pd->src = (struct pf_addr *)&h->ip6_src;
364				pd->dst = (struct pf_addr *)&h->ip6_dst;
365				break;
366			}
367#endif /* INET6 */
368			}
369		}
370	}
371
372	return (len < 0 ? 0 : m);
373}
374
375static const int *
376pf_state_lookup_aux(struct pf_state **state, struct pfi_kif *kif,
377	int direction, int *action)
378{
379	if (*state == NULL || (*state)->timeout == PFTM_PURGE) {
380		*action = PF_DROP;
381		return (action);
382	}
383
384	if (direction == PF_OUT &&
385	    (((*state)->rule.ptr->rt == PF_ROUTETO &&
386	    (*state)->rule.ptr->direction == PF_OUT) ||
387	    ((*state)->rule.ptr->rt == PF_REPLYTO &&
388	    (*state)->rule.ptr->direction == PF_IN)) &&
389	    (*state)->rt_kif != NULL && (*state)->rt_kif != kif) {
390		*action = PF_PASS;
391		return (action);
392	}
393
394	return (0);
395}
396
397#define STATE_LOOKUP()							 \
398	do {								 \
399		int action;						 \
400		*state = pf_find_state(kif, &key, direction);		 \
401		if (*state != NULL && pd != NULL && 			 \
402			pd->flowhash == 0) {				 \
403			pd->flowhash = (*state)->state_key->flowhash;	 \
404		}							 \
405		if (pf_state_lookup_aux(state, kif, direction, &action)) \
406			return (action);				 \
407	} while (0)
408
409#define	STATE_ADDR_TRANSLATE(sk)					\
410	(sk)->lan.addr.addr32[0] != (sk)->gwy.addr.addr32[0] ||		\
411	((sk)->af == AF_INET6 &&					\
412	((sk)->lan.addr.addr32[1] != (sk)->gwy.addr.addr32[1] ||	\
413	(sk)->lan.addr.addr32[2] != (sk)->gwy.addr.addr32[2] ||		\
414	(sk)->lan.addr.addr32[3] != (sk)->gwy.addr.addr32[3]))
415
416#define STATE_TRANSLATE(sk)						\
417	(STATE_ADDR_TRANSLATE(sk) ||					\
418	(sk)->lan.xport.port != (sk)->gwy.xport.port)
419
420#define STATE_GRE_TRANSLATE(sk)						\
421	(STATE_ADDR_TRANSLATE(sk) ||					\
422	(sk)->lan.xport.call_id != (sk)->gwy.xport.call_id)
423
424#define BOUND_IFACE(r, k) \
425	((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all
426
427#define STATE_INC_COUNTERS(s)					\
428	do {							\
429		s->rule.ptr->states++;				\
430		VERIFY(s->rule.ptr->states != 0);		\
431		if (s->anchor.ptr != NULL) {			\
432			s->anchor.ptr->states++;		\
433			VERIFY(s->anchor.ptr->states != 0);	\
434		}						\
435		if (s->nat_rule.ptr != NULL) {			\
436			s->nat_rule.ptr->states++;		\
437			VERIFY(s->nat_rule.ptr->states != 0);	\
438		}						\
439	} while (0)
440
441#define STATE_DEC_COUNTERS(s)					\
442	do {							\
443		if (s->nat_rule.ptr != NULL) {			\
444			VERIFY(s->nat_rule.ptr->states > 0);	\
445			s->nat_rule.ptr->states--;		\
446		}						\
447		if (s->anchor.ptr != NULL) {			\
448			VERIFY(s->anchor.ptr->states > 0);	\
449			s->anchor.ptr->states--;		\
450		}						\
451		VERIFY(s->rule.ptr->states > 0);		\
452		s->rule.ptr->states--;				\
453	} while (0)
454
455static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
456static __inline int pf_state_compare_lan_ext(struct pf_state_key *,
457	struct pf_state_key *);
458static __inline int pf_state_compare_ext_gwy(struct pf_state_key *,
459	struct pf_state_key *);
460static __inline int pf_state_compare_id(struct pf_state *,
461	struct pf_state *);
462
463struct pf_src_tree tree_src_tracking;
464
465struct pf_state_tree_id tree_id;
466struct pf_state_queue state_list;
467
468RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare);
469RB_GENERATE(pf_state_tree_lan_ext, pf_state_key,
470    entry_lan_ext, pf_state_compare_lan_ext);
471RB_GENERATE(pf_state_tree_ext_gwy, pf_state_key,
472    entry_ext_gwy, pf_state_compare_ext_gwy);
473RB_GENERATE(pf_state_tree_id, pf_state,
474    entry_id, pf_state_compare_id);
475
476#define	PF_DT_SKIP_LANEXT	0x01
477#define	PF_DT_SKIP_EXTGWY	0x02
478
479static const u_int16_t PF_PPTP_PORT = 1723;
480static const u_int32_t PF_PPTP_MAGIC_NUMBER = 0x1A2B3C4D;
481
482struct pf_pptp_hdr {
483	u_int16_t	length;
484	u_int16_t	type;
485	u_int32_t	magic;
486};
487
488struct pf_pptp_ctrl_hdr {
489	u_int16_t	type;
490	u_int16_t	reserved_0;
491};
492
493struct pf_pptp_ctrl_generic {
494	u_int16_t	data[0];
495};
496
497#define PF_PPTP_CTRL_TYPE_START_REQ	1
498struct pf_pptp_ctrl_start_req {
499	u_int16_t	protocol_version;
500	u_int16_t	reserved_1;
501	u_int32_t	framing_capabilities;
502	u_int32_t	bearer_capabilities;
503	u_int16_t	maximum_channels;
504	u_int16_t	firmware_revision;
505	u_int8_t	host_name[64];
506	u_int8_t	vendor_string[64];
507};
508
509#define PF_PPTP_CTRL_TYPE_START_RPY	2
510struct pf_pptp_ctrl_start_rpy {
511	u_int16_t	protocol_version;
512	u_int8_t	result_code;
513	u_int8_t	error_code;
514	u_int32_t	framing_capabilities;
515	u_int32_t	bearer_capabilities;
516	u_int16_t	maximum_channels;
517	u_int16_t	firmware_revision;
518	u_int8_t	host_name[64];
519	u_int8_t	vendor_string[64];
520};
521
522#define PF_PPTP_CTRL_TYPE_STOP_REQ	3
523struct pf_pptp_ctrl_stop_req {
524	u_int8_t	reason;
525	u_int8_t	reserved_1;
526	u_int16_t	reserved_2;
527};
528
529#define PF_PPTP_CTRL_TYPE_STOP_RPY	4
530struct pf_pptp_ctrl_stop_rpy {
531	u_int8_t	reason;
532	u_int8_t	error_code;
533	u_int16_t	reserved_1;
534};
535
536#define PF_PPTP_CTRL_TYPE_ECHO_REQ	5
537struct pf_pptp_ctrl_echo_req {
538	u_int32_t	identifier;
539};
540
541#define PF_PPTP_CTRL_TYPE_ECHO_RPY	6
542struct pf_pptp_ctrl_echo_rpy {
543	u_int32_t	identifier;
544	u_int8_t	result_code;
545	u_int8_t	error_code;
546	u_int16_t	reserved_1;
547};
548
549#define PF_PPTP_CTRL_TYPE_CALL_OUT_REQ	7
550struct pf_pptp_ctrl_call_out_req {
551	u_int16_t	call_id;
552	u_int16_t	call_sernum;
553	u_int32_t	min_bps;
554	u_int32_t	bearer_type;
555	u_int32_t	framing_type;
556	u_int16_t	rxwindow_size;
557	u_int16_t	proc_delay;
558	u_int8_t	phone_num[64];
559	u_int8_t	sub_addr[64];
560};
561
562#define PF_PPTP_CTRL_TYPE_CALL_OUT_RPY	8
563struct pf_pptp_ctrl_call_out_rpy {
564	u_int16_t	call_id;
565	u_int16_t	peer_call_id;
566	u_int8_t	result_code;
567	u_int8_t	error_code;
568	u_int16_t	cause_code;
569	u_int32_t	connect_speed;
570	u_int16_t	rxwindow_size;
571	u_int16_t	proc_delay;
572	u_int32_t	phy_channel_id;
573};
574
575#define PF_PPTP_CTRL_TYPE_CALL_IN_1ST	9
576struct pf_pptp_ctrl_call_in_1st {
577	u_int16_t	call_id;
578	u_int16_t	call_sernum;
579	u_int32_t	bearer_type;
580	u_int32_t	phy_channel_id;
581	u_int16_t	dialed_number_len;
582	u_int16_t	dialing_number_len;
583	u_int8_t	dialed_num[64];
584	u_int8_t	dialing_num[64];
585	u_int8_t	sub_addr[64];
586};
587
588#define PF_PPTP_CTRL_TYPE_CALL_IN_2ND	10
589struct pf_pptp_ctrl_call_in_2nd {
590	u_int16_t	call_id;
591	u_int16_t	peer_call_id;
592	u_int8_t	result_code;
593	u_int8_t	error_code;
594	u_int16_t	rxwindow_size;
595	u_int16_t	txdelay;
596	u_int16_t	reserved_1;
597};
598
599#define PF_PPTP_CTRL_TYPE_CALL_IN_3RD	11
600struct pf_pptp_ctrl_call_in_3rd {
601	u_int16_t	call_id;
602	u_int16_t	reserved_1;
603	u_int32_t	connect_speed;
604	u_int16_t	rxwindow_size;
605	u_int16_t	txdelay;
606	u_int32_t	framing_type;
607};
608
609#define PF_PPTP_CTRL_TYPE_CALL_CLR	12
610struct pf_pptp_ctrl_call_clr {
611	u_int16_t	call_id;
612	u_int16_t	reserved_1;
613};
614
615#define PF_PPTP_CTRL_TYPE_CALL_DISC	13
616struct pf_pptp_ctrl_call_disc {
617	u_int16_t	call_id;
618	u_int8_t	result_code;
619	u_int8_t	error_code;
620	u_int16_t	cause_code;
621	u_int16_t	reserved_1;
622	u_int8_t	statistics[128];
623};
624
625#define PF_PPTP_CTRL_TYPE_ERROR	14
626struct pf_pptp_ctrl_error {
627	u_int16_t	peer_call_id;
628	u_int16_t	reserved_1;
629	u_int32_t	crc_errors;
630	u_int32_t	fr_errors;
631	u_int32_t	hw_errors;
632	u_int32_t	buf_errors;
633	u_int32_t	tim_errors;
634	u_int32_t	align_errors;
635};
636
637#define PF_PPTP_CTRL_TYPE_SET_LINKINFO	15
638struct pf_pptp_ctrl_set_linkinfo {
639	u_int16_t	peer_call_id;
640	u_int16_t	reserved_1;
641	u_int32_t	tx_accm;
642	u_int32_t	rx_accm;
643};
644
645#if 0
646static const char *pf_pptp_ctrl_type_name(u_int16_t code)
647{
648	code = ntohs(code);
649
650	if (code < PF_PPTP_CTRL_TYPE_START_REQ ||
651	    code > PF_PPTP_CTRL_TYPE_SET_LINKINFO) {
652		static char reserved[] = "reserved-00";
653
654		sprintf(&reserved[9], "%02x", code);
655		return (reserved);
656	} else {
657		static const char *name[] = {
658			"start_req", "start_rpy", "stop_req", "stop_rpy",
659			"echo_req", "echo_rpy", "call_out_req", "call_out_rpy",
660			"call_in_1st", "call_in_2nd", "call_in_3rd",
661			"call_clr", "call_disc", "error", "set_linkinfo"
662		};
663
664		return (name[code - 1]);
665	}
666};
667#endif
668
669static const size_t PF_PPTP_CTRL_MSG_MINSIZE =
670	sizeof (struct pf_pptp_hdr) +
671	sizeof (struct pf_pptp_ctrl_hdr) +
672	MIN(sizeof (struct pf_pptp_ctrl_start_req),
673	MIN(sizeof (struct pf_pptp_ctrl_start_rpy),
674	MIN(sizeof (struct pf_pptp_ctrl_stop_req),
675	MIN(sizeof (struct pf_pptp_ctrl_stop_rpy),
676	MIN(sizeof (struct pf_pptp_ctrl_echo_req),
677	MIN(sizeof (struct pf_pptp_ctrl_echo_rpy),
678	MIN(sizeof (struct pf_pptp_ctrl_call_out_req),
679	MIN(sizeof (struct pf_pptp_ctrl_call_out_rpy),
680	MIN(sizeof (struct pf_pptp_ctrl_call_in_1st),
681	MIN(sizeof (struct pf_pptp_ctrl_call_in_2nd),
682	MIN(sizeof (struct pf_pptp_ctrl_call_in_3rd),
683	MIN(sizeof (struct pf_pptp_ctrl_call_clr),
684	MIN(sizeof (struct pf_pptp_ctrl_call_disc),
685	MIN(sizeof (struct pf_pptp_ctrl_error),
686	sizeof (struct pf_pptp_ctrl_set_linkinfo)
687	))))))))))))));
688
689union pf_pptp_ctrl_msg_union {
690	struct pf_pptp_ctrl_start_req		start_req;
691	struct pf_pptp_ctrl_start_rpy		start_rpy;
692	struct pf_pptp_ctrl_stop_req		stop_req;
693	struct pf_pptp_ctrl_stop_rpy		stop_rpy;
694	struct pf_pptp_ctrl_echo_req		echo_req;
695	struct pf_pptp_ctrl_echo_rpy		echo_rpy;
696	struct pf_pptp_ctrl_call_out_req	call_out_req;
697	struct pf_pptp_ctrl_call_out_rpy	call_out_rpy;
698	struct pf_pptp_ctrl_call_in_1st		call_in_1st;
699	struct pf_pptp_ctrl_call_in_2nd		call_in_2nd;
700	struct pf_pptp_ctrl_call_in_3rd		call_in_3rd;
701	struct pf_pptp_ctrl_call_clr		call_clr;
702	struct pf_pptp_ctrl_call_disc		call_disc;
703	struct pf_pptp_ctrl_error			error;
704	struct pf_pptp_ctrl_set_linkinfo	set_linkinfo;
705	u_int8_t							data[0];
706};
707
708struct pf_pptp_ctrl_msg {
709	struct pf_pptp_hdr				hdr;
710	struct pf_pptp_ctrl_hdr			ctrl;
711	union pf_pptp_ctrl_msg_union	msg;
712};
713
714#define PF_GRE_FLAG_CHECKSUM_PRESENT	0x8000
715#define PF_GRE_FLAG_VERSION_MASK		0x0007
716#define PF_GRE_PPP_ETHERTYPE			0x880B
717
718struct pf_grev1_hdr {
719	u_int16_t flags;
720	u_int16_t protocol_type;
721	u_int16_t payload_length;
722	u_int16_t call_id;
723	/*
724	u_int32_t seqno;
725	u_int32_t ackno;
726	*/
727};
728
729static const u_int16_t PF_IKE_PORT = 500;
730
731struct pf_ike_hdr {
732	u_int64_t initiator_cookie, responder_cookie;
733	u_int8_t next_payload, version, exchange_type, flags;
734	u_int32_t message_id, length;
735};
736
737#define PF_IKE_PACKET_MINSIZE	(sizeof (struct pf_ike_hdr))
738
739#define PF_IKEv1_EXCHTYPE_BASE				 1
740#define PF_IKEv1_EXCHTYPE_ID_PROTECT		 2
741#define PF_IKEv1_EXCHTYPE_AUTH_ONLY			 3
742#define PF_IKEv1_EXCHTYPE_AGGRESSIVE		 4
743#define PF_IKEv1_EXCHTYPE_INFORMATIONAL		 5
744#define PF_IKEv2_EXCHTYPE_SA_INIT			34
745#define PF_IKEv2_EXCHTYPE_AUTH				35
746#define PF_IKEv2_EXCHTYPE_CREATE_CHILD_SA	36
747#define PF_IKEv2_EXCHTYPE_INFORMATIONAL		37
748
749#define PF_IKEv1_FLAG_E		0x01
750#define PF_IKEv1_FLAG_C		0x02
751#define PF_IKEv1_FLAG_A		0x04
752#define PF_IKEv2_FLAG_I		0x08
753#define PF_IKEv2_FLAG_V		0x10
754#define PF_IKEv2_FLAG_R		0x20
755
756struct pf_esp_hdr {
757	u_int32_t spi;
758	u_int32_t seqno;
759	u_int8_t payload[];
760};
761
762static __inline int
763pf_src_compare(struct pf_src_node *a, struct pf_src_node *b)
764{
765	int	diff;
766
767	if (a->rule.ptr > b->rule.ptr)
768		return (1);
769	if (a->rule.ptr < b->rule.ptr)
770		return (-1);
771	if ((diff = a->af - b->af) != 0)
772		return (diff);
773	switch (a->af) {
774#if INET
775	case AF_INET:
776		if (a->addr.addr32[0] > b->addr.addr32[0])
777			return (1);
778		if (a->addr.addr32[0] < b->addr.addr32[0])
779			return (-1);
780		break;
781#endif /* INET */
782#if INET6
783	case AF_INET6:
784		if (a->addr.addr32[3] > b->addr.addr32[3])
785			return (1);
786		if (a->addr.addr32[3] < b->addr.addr32[3])
787			return (-1);
788		if (a->addr.addr32[2] > b->addr.addr32[2])
789			return (1);
790		if (a->addr.addr32[2] < b->addr.addr32[2])
791			return (-1);
792		if (a->addr.addr32[1] > b->addr.addr32[1])
793			return (1);
794		if (a->addr.addr32[1] < b->addr.addr32[1])
795			return (-1);
796		if (a->addr.addr32[0] > b->addr.addr32[0])
797			return (1);
798		if (a->addr.addr32[0] < b->addr.addr32[0])
799			return (-1);
800		break;
801#endif /* INET6 */
802	}
803	return (0);
804}
805
806static __inline int
807pf_state_compare_lan_ext(struct pf_state_key *a, struct pf_state_key *b)
808{
809	int	diff;
810	int	extfilter;
811
812	if ((diff = a->proto - b->proto) != 0)
813		return (diff);
814	if ((diff = a->af - b->af) != 0)
815		return (diff);
816
817	extfilter = PF_EXTFILTER_APD;
818
819	switch (a->proto) {
820	case IPPROTO_ICMP:
821	case IPPROTO_ICMPV6:
822		if ((diff = a->lan.xport.port - b->lan.xport.port) != 0)
823			return (diff);
824		break;
825
826	case IPPROTO_TCP:
827		if ((diff = a->lan.xport.port - b->lan.xport.port) != 0)
828			return (diff);
829		if ((diff = a->ext.xport.port - b->ext.xport.port) != 0)
830			return (diff);
831		break;
832
833	case IPPROTO_UDP:
834		if ((diff = a->proto_variant - b->proto_variant))
835			return (diff);
836		extfilter = a->proto_variant;
837		if ((diff = a->lan.xport.port - b->lan.xport.port) != 0)
838			return (diff);
839		if ((extfilter < PF_EXTFILTER_AD) &&
840		    (diff = a->ext.xport.port - b->ext.xport.port) != 0)
841			return (diff);
842		break;
843
844	case IPPROTO_GRE:
845		if (a->proto_variant == PF_GRE_PPTP_VARIANT &&
846		    a->proto_variant == b->proto_variant) {
847			if (!!(diff = a->ext.xport.call_id -
848			    b->ext.xport.call_id))
849				return (diff);
850		}
851		break;
852
853	case IPPROTO_ESP:
854		if (!!(diff = a->ext.xport.spi - b->ext.xport.spi))
855			return (diff);
856		break;
857
858	default:
859		break;
860	}
861
862	switch (a->af) {
863#if INET
864	case AF_INET:
865		if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0])
866			return (1);
867		if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0])
868			return (-1);
869		if (extfilter < PF_EXTFILTER_EI) {
870			if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
871				return (1);
872			if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
873				return (-1);
874		}
875		break;
876#endif /* INET */
877#if INET6
878	case AF_INET6:
879		if (a->lan.addr.addr32[3] > b->lan.addr.addr32[3])
880			return (1);
881		if (a->lan.addr.addr32[3] < b->lan.addr.addr32[3])
882			return (-1);
883		if (a->lan.addr.addr32[2] > b->lan.addr.addr32[2])
884			return (1);
885		if (a->lan.addr.addr32[2] < b->lan.addr.addr32[2])
886			return (-1);
887		if (a->lan.addr.addr32[1] > b->lan.addr.addr32[1])
888			return (1);
889		if (a->lan.addr.addr32[1] < b->lan.addr.addr32[1])
890			return (-1);
891		if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0])
892			return (1);
893		if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0])
894			return (-1);
895		if (extfilter < PF_EXTFILTER_EI ||
896		    !PF_AZERO(&b->ext.addr, AF_INET6)) {
897			if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3])
898				return (1);
899			if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3])
900				return (-1);
901			if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2])
902				return (1);
903			if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2])
904				return (-1);
905			if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1])
906				return (1);
907			if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1])
908				return (-1);
909			if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
910				return (1);
911			if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
912				return (-1);
913		}
914		break;
915#endif /* INET6 */
916	}
917
918	if (a->app_state && b->app_state) {
919		if (a->app_state->compare_lan_ext &&
920		    b->app_state->compare_lan_ext) {
921			diff = (const char *)b->app_state->compare_lan_ext -
922			    (const char *)a->app_state->compare_lan_ext;
923			if (diff != 0)
924				return (diff);
925			diff = a->app_state->compare_lan_ext(a->app_state,
926			    b->app_state);
927			if (diff != 0)
928				return (diff);
929		}
930	}
931
932	return (0);
933}
934
935static __inline int
936pf_state_compare_ext_gwy(struct pf_state_key *a, struct pf_state_key *b)
937{
938	int	diff;
939	int	extfilter;
940
941	if ((diff = a->proto - b->proto) != 0)
942		return (diff);
943
944	if ((diff = a->af - b->af) != 0)
945		return (diff);
946
947	extfilter = PF_EXTFILTER_APD;
948
949	switch (a->proto) {
950	case IPPROTO_ICMP:
951	case IPPROTO_ICMPV6:
952		if ((diff = a->gwy.xport.port - b->gwy.xport.port) != 0)
953			return (diff);
954		break;
955
956	case IPPROTO_TCP:
957		if ((diff = a->ext.xport.port - b->ext.xport.port) != 0)
958			return (diff);
959		if ((diff = a->gwy.xport.port - b->gwy.xport.port) != 0)
960			return (diff);
961		break;
962
963	case IPPROTO_UDP:
964		if ((diff = a->proto_variant - b->proto_variant))
965			return (diff);
966		extfilter = a->proto_variant;
967		if ((diff = a->gwy.xport.port - b->gwy.xport.port) != 0)
968			return (diff);
969		if ((extfilter < PF_EXTFILTER_AD) &&
970		    (diff = a->ext.xport.port - b->ext.xport.port) != 0)
971			return (diff);
972		break;
973
974	case IPPROTO_GRE:
975		if (a->proto_variant == PF_GRE_PPTP_VARIANT &&
976		    a->proto_variant == b->proto_variant) {
977			if (!!(diff = a->gwy.xport.call_id -
978			    b->gwy.xport.call_id))
979				return (diff);
980		}
981		break;
982
983	case IPPROTO_ESP:
984		if (!!(diff = a->gwy.xport.spi - b->gwy.xport.spi))
985			return (diff);
986		break;
987
988	default:
989		break;
990	}
991
992	switch (a->af) {
993#if INET
994	case AF_INET:
995		if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0])
996			return (1);
997		if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0])
998			return (-1);
999		if (extfilter < PF_EXTFILTER_EI) {
1000			if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
1001				return (1);
1002			if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
1003				return (-1);
1004		}
1005		break;
1006#endif /* INET */
1007#if INET6
1008	case AF_INET6:
1009		if (a->gwy.addr.addr32[3] > b->gwy.addr.addr32[3])
1010			return (1);
1011		if (a->gwy.addr.addr32[3] < b->gwy.addr.addr32[3])
1012			return (-1);
1013		if (a->gwy.addr.addr32[2] > b->gwy.addr.addr32[2])
1014			return (1);
1015		if (a->gwy.addr.addr32[2] < b->gwy.addr.addr32[2])
1016			return (-1);
1017		if (a->gwy.addr.addr32[1] > b->gwy.addr.addr32[1])
1018			return (1);
1019		if (a->gwy.addr.addr32[1] < b->gwy.addr.addr32[1])
1020			return (-1);
1021		if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0])
1022			return (1);
1023		if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0])
1024			return (-1);
1025		if (extfilter < PF_EXTFILTER_EI ||
1026		    !PF_AZERO(&b->ext.addr, AF_INET6)) {
1027			if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3])
1028				return (1);
1029			if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3])
1030				return (-1);
1031			if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2])
1032				return (1);
1033			if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2])
1034				return (-1);
1035			if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1])
1036				return (1);
1037			if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1])
1038				return (-1);
1039			if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
1040				return (1);
1041			if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
1042				return (-1);
1043		}
1044		break;
1045#endif /* INET6 */
1046	}
1047
1048	if (a->app_state && b->app_state) {
1049		if (a->app_state->compare_ext_gwy &&
1050		    b->app_state->compare_ext_gwy) {
1051			diff = (const char *)b->app_state->compare_ext_gwy -
1052			    (const char *)a->app_state->compare_ext_gwy;
1053			if (diff != 0)
1054				return (diff);
1055			diff = a->app_state->compare_ext_gwy(a->app_state,
1056			    b->app_state);
1057			if (diff != 0)
1058				return (diff);
1059		}
1060	}
1061
1062	return (0);
1063}
1064
1065static __inline int
1066pf_state_compare_id(struct pf_state *a, struct pf_state *b)
1067{
1068	if (a->id > b->id)
1069		return (1);
1070	if (a->id < b->id)
1071		return (-1);
1072	if (a->creatorid > b->creatorid)
1073		return (1);
1074	if (a->creatorid < b->creatorid)
1075		return (-1);
1076
1077	return (0);
1078}
1079
1080#if INET6
1081void
1082pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
1083{
1084	switch (af) {
1085#if INET
1086	case AF_INET:
1087		dst->addr32[0] = src->addr32[0];
1088		break;
1089#endif /* INET */
1090	case AF_INET6:
1091		dst->addr32[0] = src->addr32[0];
1092		dst->addr32[1] = src->addr32[1];
1093		dst->addr32[2] = src->addr32[2];
1094		dst->addr32[3] = src->addr32[3];
1095		break;
1096	}
1097}
1098#endif /* INET6 */
1099
1100struct pf_state *
1101pf_find_state_byid(struct pf_state_cmp *key)
1102{
1103	pf_status.fcounters[FCNT_STATE_SEARCH]++;
1104
1105	return (RB_FIND(pf_state_tree_id, &tree_id,
1106	    (struct pf_state *)(void *)key));
1107}
1108
1109static struct pf_state *
1110pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir)
1111{
1112	struct pf_state_key	*sk = NULL;
1113	struct pf_state		*s;
1114
1115	pf_status.fcounters[FCNT_STATE_SEARCH]++;
1116
1117	switch (dir) {
1118	case PF_OUT:
1119		sk = RB_FIND(pf_state_tree_lan_ext, &pf_statetbl_lan_ext,
1120		    (struct pf_state_key *)key);
1121		break;
1122	case PF_IN:
1123		sk = RB_FIND(pf_state_tree_ext_gwy, &pf_statetbl_ext_gwy,
1124		    (struct pf_state_key *)key);
1125		break;
1126	default:
1127		panic("pf_find_state");
1128	}
1129
1130	/* list is sorted, if-bound states before floating ones */
1131	if (sk != NULL)
1132		TAILQ_FOREACH(s, &sk->states, next)
1133			if (s->kif == pfi_all || s->kif == kif)
1134				return (s);
1135
1136	return (NULL);
1137}
1138
1139struct pf_state *
1140pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more)
1141{
1142	struct pf_state_key	*sk = NULL;
1143	struct pf_state		*s, *ret = NULL;
1144
1145	pf_status.fcounters[FCNT_STATE_SEARCH]++;
1146
1147	switch (dir) {
1148	case PF_OUT:
1149		sk = RB_FIND(pf_state_tree_lan_ext,
1150		    &pf_statetbl_lan_ext, (struct pf_state_key *)key);
1151		break;
1152	case PF_IN:
1153		sk = RB_FIND(pf_state_tree_ext_gwy,
1154		    &pf_statetbl_ext_gwy, (struct pf_state_key *)key);
1155		break;
1156	default:
1157		panic("pf_find_state_all");
1158	}
1159
1160	if (sk != NULL) {
1161		ret = TAILQ_FIRST(&sk->states);
1162		if (more == NULL)
1163			return (ret);
1164
1165		TAILQ_FOREACH(s, &sk->states, next)
1166			(*more)++;
1167	}
1168
1169	return (ret);
1170}
1171
1172static void
1173pf_init_threshold(struct pf_threshold *threshold,
1174    u_int32_t limit, u_int32_t seconds)
1175{
1176	threshold->limit = limit * PF_THRESHOLD_MULT;
1177	threshold->seconds = seconds;
1178	threshold->count = 0;
1179	threshold->last = pf_time_second();
1180}
1181
1182static void
1183pf_add_threshold(struct pf_threshold *threshold)
1184{
1185	u_int32_t t = pf_time_second(), diff = t - threshold->last;
1186
1187	if (diff >= threshold->seconds)
1188		threshold->count = 0;
1189	else
1190		threshold->count -= threshold->count * diff /
1191		    threshold->seconds;
1192	threshold->count += PF_THRESHOLD_MULT;
1193	threshold->last = t;
1194}
1195
1196static int
1197pf_check_threshold(struct pf_threshold *threshold)
1198{
1199	return (threshold->count > threshold->limit);
1200}
1201
1202static int
1203pf_src_connlimit(struct pf_state **state)
1204{
1205	int bad = 0;
1206
1207	(*state)->src_node->conn++;
1208	VERIFY((*state)->src_node->conn != 0);
1209	(*state)->src.tcp_est = 1;
1210	pf_add_threshold(&(*state)->src_node->conn_rate);
1211
1212	if ((*state)->rule.ptr->max_src_conn &&
1213	    (*state)->rule.ptr->max_src_conn <
1214	    (*state)->src_node->conn) {
1215		pf_status.lcounters[LCNT_SRCCONN]++;
1216		bad++;
1217	}
1218
1219	if ((*state)->rule.ptr->max_src_conn_rate.limit &&
1220	    pf_check_threshold(&(*state)->src_node->conn_rate)) {
1221		pf_status.lcounters[LCNT_SRCCONNRATE]++;
1222		bad++;
1223	}
1224
1225	if (!bad)
1226		return (0);
1227
1228	if ((*state)->rule.ptr->overload_tbl) {
1229		struct pfr_addr p;
1230		u_int32_t	killed = 0;
1231
1232		pf_status.lcounters[LCNT_OVERLOAD_TABLE]++;
1233		if (pf_status.debug >= PF_DEBUG_MISC) {
1234			printf("pf_src_connlimit: blocking address ");
1235			pf_print_host(&(*state)->src_node->addr, 0,
1236			    (*state)->state_key->af);
1237		}
1238
1239		bzero(&p, sizeof (p));
1240		p.pfra_af = (*state)->state_key->af;
1241		switch ((*state)->state_key->af) {
1242#if INET
1243		case AF_INET:
1244			p.pfra_net = 32;
1245			p.pfra_ip4addr = (*state)->src_node->addr.v4;
1246			break;
1247#endif /* INET */
1248#if INET6
1249		case AF_INET6:
1250			p.pfra_net = 128;
1251			p.pfra_ip6addr = (*state)->src_node->addr.v6;
1252			break;
1253#endif /* INET6 */
1254		}
1255
1256		pfr_insert_kentry((*state)->rule.ptr->overload_tbl,
1257		    &p, pf_calendar_time_second());
1258
1259		/* kill existing states if that's required. */
1260		if ((*state)->rule.ptr->flush) {
1261			struct pf_state_key *sk;
1262			struct pf_state *st;
1263
1264			pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++;
1265			RB_FOREACH(st, pf_state_tree_id, &tree_id) {
1266				sk = st->state_key;
1267				/*
1268				 * Kill states from this source.  (Only those
1269				 * from the same rule if PF_FLUSH_GLOBAL is not
1270				 * set)
1271				 */
1272				if (sk->af ==
1273				    (*state)->state_key->af &&
1274				    (((*state)->state_key->direction ==
1275				        PF_OUT &&
1276				    PF_AEQ(&(*state)->src_node->addr,
1277				        &sk->lan.addr, sk->af)) ||
1278				    ((*state)->state_key->direction == PF_IN &&
1279				    PF_AEQ(&(*state)->src_node->addr,
1280				        &sk->ext.addr, sk->af))) &&
1281				    ((*state)->rule.ptr->flush &
1282				    PF_FLUSH_GLOBAL ||
1283				    (*state)->rule.ptr == st->rule.ptr)) {
1284					st->timeout = PFTM_PURGE;
1285					st->src.state = st->dst.state =
1286					    TCPS_CLOSED;
1287					killed++;
1288				}
1289			}
1290			if (pf_status.debug >= PF_DEBUG_MISC)
1291				printf(", %u states killed", killed);
1292		}
1293		if (pf_status.debug >= PF_DEBUG_MISC)
1294			printf("\n");
1295	}
1296
1297	/* kill this state */
1298	(*state)->timeout = PFTM_PURGE;
1299	(*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
1300	return (1);
1301}
1302
1303int
1304pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
1305    struct pf_addr *src, sa_family_t af)
1306{
1307	struct pf_src_node	k;
1308
1309	if (*sn == NULL) {
1310		k.af = af;
1311		PF_ACPY(&k.addr, src, af);
1312		if (rule->rule_flag & PFRULE_RULESRCTRACK ||
1313		    rule->rpool.opts & PF_POOL_STICKYADDR)
1314			k.rule.ptr = rule;
1315		else
1316			k.rule.ptr = NULL;
1317		pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
1318		*sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
1319	}
1320	if (*sn == NULL) {
1321		if (!rule->max_src_nodes ||
1322		    rule->src_nodes < rule->max_src_nodes)
1323			(*sn) = pool_get(&pf_src_tree_pl, PR_WAITOK);
1324		else
1325			pf_status.lcounters[LCNT_SRCNODES]++;
1326		if ((*sn) == NULL)
1327			return (-1);
1328		bzero(*sn, sizeof (struct pf_src_node));
1329
1330		pf_init_threshold(&(*sn)->conn_rate,
1331		    rule->max_src_conn_rate.limit,
1332		    rule->max_src_conn_rate.seconds);
1333
1334		(*sn)->af = af;
1335		if (rule->rule_flag & PFRULE_RULESRCTRACK ||
1336		    rule->rpool.opts & PF_POOL_STICKYADDR)
1337			(*sn)->rule.ptr = rule;
1338		else
1339			(*sn)->rule.ptr = NULL;
1340		PF_ACPY(&(*sn)->addr, src, af);
1341		if (RB_INSERT(pf_src_tree,
1342		    &tree_src_tracking, *sn) != NULL) {
1343			if (pf_status.debug >= PF_DEBUG_MISC) {
1344				printf("pf: src_tree insert failed: ");
1345				pf_print_host(&(*sn)->addr, 0, af);
1346				printf("\n");
1347			}
1348			pool_put(&pf_src_tree_pl, *sn);
1349			return (-1);
1350		}
1351		(*sn)->creation = pf_time_second();
1352		(*sn)->ruletype = rule->action;
1353		if ((*sn)->rule.ptr != NULL)
1354			(*sn)->rule.ptr->src_nodes++;
1355		pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
1356		pf_status.src_nodes++;
1357	} else {
1358		if (rule->max_src_states &&
1359		    (*sn)->states >= rule->max_src_states) {
1360			pf_status.lcounters[LCNT_SRCSTATES]++;
1361			return (-1);
1362		}
1363	}
1364	return (0);
1365}
1366
1367static void
1368pf_stateins_err(const char *tree, struct pf_state *s, struct pfi_kif *kif)
1369{
1370	struct pf_state_key	*sk = s->state_key;
1371
1372	if (pf_status.debug >= PF_DEBUG_MISC) {
1373		printf("pf: state insert failed: %s %s ", tree, kif->pfik_name);
1374		switch (sk->proto) {
1375		case IPPROTO_TCP:
1376			printf("TCP");
1377			break;
1378		case IPPROTO_UDP:
1379			printf("UDP");
1380			break;
1381		case IPPROTO_ICMP:
1382			printf("ICMP4");
1383			break;
1384		case IPPROTO_ICMPV6:
1385			printf("ICMP6");
1386			break;
1387		default:
1388			printf("PROTO=%u", sk->proto);
1389			break;
1390		}
1391		printf(" lan: ");
1392		pf_print_sk_host(&sk->lan, sk->af, sk->proto,
1393		    sk->proto_variant);
1394		printf(" gwy: ");
1395		pf_print_sk_host(&sk->gwy, sk->af, sk->proto,
1396		    sk->proto_variant);
1397		printf(" ext: ");
1398		pf_print_sk_host(&sk->ext, sk->af, sk->proto,
1399		    sk->proto_variant);
1400		if (s->sync_flags & PFSTATE_FROMSYNC)
1401			printf(" (from sync)");
1402		printf("\n");
1403	}
1404}
1405
1406int
1407pf_insert_state(struct pfi_kif *kif, struct pf_state *s)
1408{
1409	struct pf_state_key	*cur;
1410	struct pf_state		*sp;
1411
1412	VERIFY(s->state_key != NULL);
1413	s->kif = kif;
1414
1415	if ((cur = RB_INSERT(pf_state_tree_lan_ext, &pf_statetbl_lan_ext,
1416	    s->state_key)) != NULL) {
1417		/* key exists. check for same kif, if none, add to key */
1418		TAILQ_FOREACH(sp, &cur->states, next)
1419			if (sp->kif == kif) {	/* collision! */
1420				pf_stateins_err("tree_lan_ext", s, kif);
1421				pf_detach_state(s,
1422				    PF_DT_SKIP_LANEXT|PF_DT_SKIP_EXTGWY);
1423				return (-1);
1424			}
1425		pf_detach_state(s, PF_DT_SKIP_LANEXT|PF_DT_SKIP_EXTGWY);
1426		pf_attach_state(cur, s, kif == pfi_all ? 1 : 0);
1427	}
1428
1429	/* if cur != NULL, we already found a state key and attached to it */
1430	if (cur == NULL && (cur = RB_INSERT(pf_state_tree_ext_gwy,
1431	    &pf_statetbl_ext_gwy, s->state_key)) != NULL) {
1432		/* must not happen. we must have found the sk above! */
1433		pf_stateins_err("tree_ext_gwy", s, kif);
1434		pf_detach_state(s, PF_DT_SKIP_EXTGWY);
1435		return (-1);
1436	}
1437
1438	if (s->id == 0 && s->creatorid == 0) {
1439		s->id = htobe64(pf_status.stateid++);
1440		s->creatorid = pf_status.hostid;
1441	}
1442	if (RB_INSERT(pf_state_tree_id, &tree_id, s) != NULL) {
1443		if (pf_status.debug >= PF_DEBUG_MISC) {
1444			printf("pf: state insert failed: "
1445			    "id: %016llx creatorid: %08x",
1446			    be64toh(s->id), ntohl(s->creatorid));
1447			if (s->sync_flags & PFSTATE_FROMSYNC)
1448				printf(" (from sync)");
1449			printf("\n");
1450		}
1451		pf_detach_state(s, 0);
1452		return (-1);
1453	}
1454	TAILQ_INSERT_TAIL(&state_list, s, entry_list);
1455	pf_status.fcounters[FCNT_STATE_INSERT]++;
1456	pf_status.states++;
1457	VERIFY(pf_status.states != 0);
1458	pfi_kif_ref(kif, PFI_KIF_REF_STATE);
1459#if NPFSYNC
1460	pfsync_insert_state(s);
1461#endif
1462	return (0);
1463}
1464
1465static int
1466pf_purge_thread_cont(int err)
1467{
1468#pragma unused(err)
1469	static u_int32_t nloops = 0;
1470	int t = 1;	/* 1 second */
1471
1472	lck_rw_lock_shared(pf_perim_lock);
1473	lck_mtx_lock(pf_lock);
1474
1475	/* purge everything if not running */
1476	if (!pf_status.running) {
1477		pf_purge_expired_states(pf_status.states);
1478		pf_purge_expired_fragments();
1479		pf_purge_expired_src_nodes();
1480
1481		/* terminate thread (we don't currently do this) */
1482		if (pf_purge_thread == NULL) {
1483			lck_mtx_unlock(pf_lock);
1484			lck_rw_done(pf_perim_lock);
1485
1486			thread_deallocate(current_thread());
1487			thread_terminate(current_thread());
1488			/* NOTREACHED */
1489			return (0);
1490		} else {
1491			/* if there's nothing left, sleep w/o timeout */
1492			if (pf_status.states == 0 &&
1493			    pf_normalize_isempty() &&
1494			    RB_EMPTY(&tree_src_tracking)) {
1495				nloops = 0;
1496				t = 0;
1497			}
1498			goto done;
1499		}
1500	}
1501
1502	/* process a fraction of the state table every second */
1503	pf_purge_expired_states(1 + (pf_status.states
1504	    / pf_default_rule.timeout[PFTM_INTERVAL]));
1505
1506	/* purge other expired types every PFTM_INTERVAL seconds */
1507	if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) {
1508		pf_purge_expired_fragments();
1509		pf_purge_expired_src_nodes();
1510		nloops = 0;
1511	}
1512done:
1513	lck_mtx_unlock(pf_lock);
1514	lck_rw_done(pf_perim_lock);
1515
1516	(void) tsleep0(pf_purge_thread_fn, PWAIT, "pf_purge_cont",
1517	    t * hz, pf_purge_thread_cont);
1518	/* NOTREACHED */
1519	VERIFY(0);
1520
1521	return (0);
1522}
1523
1524void
1525pf_purge_thread_fn(void *v, wait_result_t w)
1526{
1527#pragma unused(v, w)
1528	(void) tsleep0(pf_purge_thread_fn, PWAIT, "pf_purge", 0,
1529	    pf_purge_thread_cont);
1530	/*
1531	 * tsleep0() shouldn't have returned as PCATCH was not set;
1532	 * therefore assert in this case.
1533	 */
1534	VERIFY(0);
1535}
1536
1537u_int64_t
1538pf_state_expires(const struct pf_state *state)
1539{
1540	u_int32_t	t;
1541	u_int32_t	start;
1542	u_int32_t	end;
1543	u_int32_t	states;
1544
1545	lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
1546
1547	/* handle all PFTM_* > PFTM_MAX here */
1548	if (state->timeout == PFTM_PURGE)
1549		return (pf_time_second());
1550	if (state->timeout == PFTM_UNTIL_PACKET)
1551		return (0);
1552	VERIFY(state->timeout != PFTM_UNLINKED);
1553	VERIFY(state->timeout < PFTM_MAX);
1554	t = state->rule.ptr->timeout[state->timeout];
1555	if (!t)
1556		t = pf_default_rule.timeout[state->timeout];
1557	start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
1558	if (start) {
1559		end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
1560		states = state->rule.ptr->states;
1561	} else {
1562		start = pf_default_rule.timeout[PFTM_ADAPTIVE_START];
1563		end = pf_default_rule.timeout[PFTM_ADAPTIVE_END];
1564		states = pf_status.states;
1565	}
1566	if (end && states > start && start < end) {
1567		if (states < end)
1568			return (state->expire + t * (end - states) /
1569			    (end - start));
1570		else
1571			return (pf_time_second());
1572	}
1573	return (state->expire + t);
1574}
1575
1576void
1577pf_purge_expired_src_nodes(void)
1578{
1579	struct pf_src_node		*cur, *next;
1580
1581	lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
1582
1583	for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) {
1584		next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur);
1585
1586		if (cur->states <= 0 && cur->expire <= pf_time_second()) {
1587			if (cur->rule.ptr != NULL) {
1588				cur->rule.ptr->src_nodes--;
1589				if (cur->rule.ptr->states <= 0 &&
1590				    cur->rule.ptr->max_src_nodes <= 0)
1591					pf_rm_rule(NULL, cur->rule.ptr);
1592			}
1593			RB_REMOVE(pf_src_tree, &tree_src_tracking, cur);
1594			pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
1595			pf_status.src_nodes--;
1596			pool_put(&pf_src_tree_pl, cur);
1597		}
1598	}
1599}
1600
1601void
1602pf_src_tree_remove_state(struct pf_state *s)
1603{
1604	u_int32_t t;
1605
1606	lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
1607
1608	if (s->src_node != NULL) {
1609		if (s->src.tcp_est) {
1610			VERIFY(s->src_node->conn > 0);
1611			--s->src_node->conn;
1612		}
1613		VERIFY(s->src_node->states > 0);
1614		if (--s->src_node->states <= 0) {
1615			t = s->rule.ptr->timeout[PFTM_SRC_NODE];
1616			if (!t)
1617				t = pf_default_rule.timeout[PFTM_SRC_NODE];
1618			s->src_node->expire = pf_time_second() + t;
1619		}
1620	}
1621	if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
1622		VERIFY(s->nat_src_node->states > 0);
1623		if (--s->nat_src_node->states <= 0) {
1624			t = s->rule.ptr->timeout[PFTM_SRC_NODE];
1625			if (!t)
1626				t = pf_default_rule.timeout[PFTM_SRC_NODE];
1627			s->nat_src_node->expire = pf_time_second() + t;
1628		}
1629	}
1630	s->src_node = s->nat_src_node = NULL;
1631}
1632
1633void
1634pf_unlink_state(struct pf_state *cur)
1635{
1636	lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
1637
1638	if (cur->src.state == PF_TCPS_PROXY_DST) {
1639		pf_send_tcp(cur->rule.ptr, cur->state_key->af,
1640		    &cur->state_key->ext.addr, &cur->state_key->lan.addr,
1641		    cur->state_key->ext.xport.port,
1642		    cur->state_key->lan.xport.port,
1643		    cur->src.seqhi, cur->src.seqlo + 1,
1644		    TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL);
1645	}
1646
1647	hook_runloop(&cur->unlink_hooks, HOOK_REMOVE|HOOK_FREE);
1648	RB_REMOVE(pf_state_tree_id, &tree_id, cur);
1649#if NPFSYNC
1650	if (cur->creatorid == pf_status.hostid)
1651		pfsync_delete_state(cur);
1652#endif
1653	cur->timeout = PFTM_UNLINKED;
1654	pf_src_tree_remove_state(cur);
1655	pf_detach_state(cur, 0);
1656}
1657
1658/* callers should be at splpf and hold the
1659 * write_lock on pf_consistency_lock */
1660void
1661pf_free_state(struct pf_state *cur)
1662{
1663	lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
1664#if NPFSYNC
1665	if (pfsyncif != NULL &&
1666	    (pfsyncif->sc_bulk_send_next == cur ||
1667	    pfsyncif->sc_bulk_terminator == cur))
1668		return;
1669#endif
1670	VERIFY(cur->timeout == PFTM_UNLINKED);
1671	VERIFY(cur->rule.ptr->states > 0);
1672	if (--cur->rule.ptr->states <= 0 &&
1673	    cur->rule.ptr->src_nodes <= 0)
1674		pf_rm_rule(NULL, cur->rule.ptr);
1675	if (cur->nat_rule.ptr != NULL) {
1676		VERIFY(cur->nat_rule.ptr->states > 0);
1677		if (--cur->nat_rule.ptr->states <= 0 &&
1678		    cur->nat_rule.ptr->src_nodes <= 0)
1679			pf_rm_rule(NULL, cur->nat_rule.ptr);
1680	}
1681	if (cur->anchor.ptr != NULL) {
1682		VERIFY(cur->anchor.ptr->states > 0);
1683		if (--cur->anchor.ptr->states <= 0)
1684			pf_rm_rule(NULL, cur->anchor.ptr);
1685	}
1686	pf_normalize_tcp_cleanup(cur);
1687	pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE);
1688	TAILQ_REMOVE(&state_list, cur, entry_list);
1689	if (cur->tag)
1690		pf_tag_unref(cur->tag);
1691	pool_put(&pf_state_pl, cur);
1692	pf_status.fcounters[FCNT_STATE_REMOVALS]++;
1693	VERIFY(pf_status.states > 0);
1694	pf_status.states--;
1695}
1696
1697void
1698pf_purge_expired_states(u_int32_t maxcheck)
1699{
1700	static struct pf_state	*cur = NULL;
1701	struct pf_state		*next;
1702
1703	lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
1704
1705	while (maxcheck--) {
1706		/* wrap to start of list when we hit the end */
1707		if (cur == NULL) {
1708			cur = TAILQ_FIRST(&state_list);
1709			if (cur == NULL)
1710				break;	/* list empty */
1711		}
1712
1713		/* get next state, as cur may get deleted */
1714		next = TAILQ_NEXT(cur, entry_list);
1715
1716		if (cur->timeout == PFTM_UNLINKED) {
1717			pf_free_state(cur);
1718		} else if (pf_state_expires(cur) <= pf_time_second()) {
1719			/* unlink and free expired state */
1720			pf_unlink_state(cur);
1721			pf_free_state(cur);
1722		}
1723		cur = next;
1724	}
1725}
1726
1727int
1728pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw)
1729{
1730	lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
1731
1732	if (aw->type != PF_ADDR_TABLE)
1733		return (0);
1734	if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname)) == NULL)
1735		return (1);
1736	return (0);
1737}
1738
1739void
1740pf_tbladdr_remove(struct pf_addr_wrap *aw)
1741{
1742	lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
1743
1744	if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL)
1745		return;
1746	pfr_detach_table(aw->p.tbl);
1747	aw->p.tbl = NULL;
1748}
1749
1750void
1751pf_tbladdr_copyout(struct pf_addr_wrap *aw)
1752{
1753	struct pfr_ktable *kt = aw->p.tbl;
1754
1755	lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
1756
1757	if (aw->type != PF_ADDR_TABLE || kt == NULL)
1758		return;
1759	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
1760		kt = kt->pfrkt_root;
1761	aw->p.tbl = NULL;
1762	aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ?
1763	    kt->pfrkt_cnt : -1;
1764}
1765
1766static void
1767pf_print_addr(struct pf_addr *addr, sa_family_t af)
1768{
1769	switch (af) {
1770#if INET
1771	case AF_INET: {
1772		u_int32_t a = ntohl(addr->addr32[0]);
1773		printf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255,
1774		    (a>>8)&255, a&255);
1775		break;
1776	}
1777#endif /* INET */
1778#if INET6
1779	case AF_INET6: {
1780		u_int16_t b;
1781		u_int8_t i, curstart = 255, curend = 0,
1782		    maxstart = 0, maxend = 0;
1783		for (i = 0; i < 8; i++) {
1784			if (!addr->addr16[i]) {
1785				if (curstart == 255)
1786					curstart = i;
1787				else
1788					curend = i;
1789			} else {
1790				if (curstart) {
1791					if ((curend - curstart) >
1792					    (maxend - maxstart)) {
1793						maxstart = curstart;
1794						maxend = curend;
1795						curstart = 255;
1796					}
1797				}
1798			}
1799		}
1800		for (i = 0; i < 8; i++) {
1801			if (i >= maxstart && i <= maxend) {
1802				if (maxend != 7) {
1803					if (i == maxstart)
1804						printf(":");
1805				} else {
1806					if (i == maxend)
1807						printf(":");
1808				}
1809			} else {
1810				b = ntohs(addr->addr16[i]);
1811				printf("%x", b);
1812				if (i < 7)
1813					printf(":");
1814			}
1815		}
1816		break;
1817	}
1818#endif /* INET6 */
1819	}
1820}
1821
1822static void
1823pf_print_sk_host(struct pf_state_host *sh, sa_family_t af, int proto,
1824	u_int8_t proto_variant)
1825{
1826	pf_print_addr(&sh->addr, af);
1827
1828	switch (proto) {
1829	case IPPROTO_ESP:
1830		if (sh->xport.spi)
1831			printf("[%08x]", ntohl(sh->xport.spi));
1832		break;
1833
1834	case IPPROTO_GRE:
1835		if (proto_variant == PF_GRE_PPTP_VARIANT)
1836			printf("[%u]", ntohs(sh->xport.call_id));
1837		break;
1838
1839	case IPPROTO_TCP:
1840	case IPPROTO_UDP:
1841		printf("[%u]", ntohs(sh->xport.port));
1842		break;
1843
1844	default:
1845		break;
1846	}
1847}
1848
1849static void
1850pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
1851{
1852	pf_print_addr(addr, af);
1853	if (p)
1854		printf("[%u]", ntohs(p));
1855}
1856
1857void
1858pf_print_state(struct pf_state *s)
1859{
1860	struct pf_state_key *sk = s->state_key;
1861	switch (sk->proto) {
1862	case IPPROTO_ESP:
1863		printf("ESP ");
1864		break;
1865	case IPPROTO_GRE:
1866		printf("GRE%u ", sk->proto_variant);
1867		break;
1868	case IPPROTO_TCP:
1869		printf("TCP ");
1870		break;
1871	case IPPROTO_UDP:
1872		printf("UDP ");
1873		break;
1874	case IPPROTO_ICMP:
1875		printf("ICMP ");
1876		break;
1877	case IPPROTO_ICMPV6:
1878		printf("ICMPV6 ");
1879		break;
1880	default:
1881		printf("%u ", sk->proto);
1882		break;
1883	}
1884	pf_print_sk_host(&sk->lan, sk->af, sk->proto, sk->proto_variant);
1885	printf(" ");
1886	pf_print_sk_host(&sk->gwy, sk->af, sk->proto, sk->proto_variant);
1887	printf(" ");
1888	pf_print_sk_host(&sk->ext, sk->af, sk->proto, sk->proto_variant);
1889	printf(" [lo=%u high=%u win=%u modulator=%u", s->src.seqlo,
1890	    s->src.seqhi, s->src.max_win, s->src.seqdiff);
1891	if (s->src.wscale && s->dst.wscale)
1892		printf(" wscale=%u", s->src.wscale & PF_WSCALE_MASK);
1893	printf("]");
1894	printf(" [lo=%u high=%u win=%u modulator=%u", s->dst.seqlo,
1895	    s->dst.seqhi, s->dst.max_win, s->dst.seqdiff);
1896	if (s->src.wscale && s->dst.wscale)
1897		printf(" wscale=%u", s->dst.wscale & PF_WSCALE_MASK);
1898	printf("]");
1899	printf(" %u:%u", s->src.state, s->dst.state);
1900}
1901
1902void
1903pf_print_flags(u_int8_t f)
1904{
1905	if (f)
1906		printf(" ");
1907	if (f & TH_FIN)
1908		printf("F");
1909	if (f & TH_SYN)
1910		printf("S");
1911	if (f & TH_RST)
1912		printf("R");
1913	if (f & TH_PUSH)
1914		printf("P");
1915	if (f & TH_ACK)
1916		printf("A");
1917	if (f & TH_URG)
1918		printf("U");
1919	if (f & TH_ECE)
1920		printf("E");
1921	if (f & TH_CWR)
1922		printf("W");
1923}
1924
1925#define	PF_SET_SKIP_STEPS(i)					\
1926	do {							\
1927		while (head[i] != cur) {			\
1928			head[i]->skip[i].ptr = cur;		\
1929			head[i] = TAILQ_NEXT(head[i], entries);	\
1930		}						\
1931	} while (0)
1932
1933void
1934pf_calc_skip_steps(struct pf_rulequeue *rules)
1935{
1936	struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT];
1937	int i;
1938
1939	cur = TAILQ_FIRST(rules);
1940	prev = cur;
1941	for (i = 0; i < PF_SKIP_COUNT; ++i)
1942		head[i] = cur;
1943	while (cur != NULL) {
1944
1945		if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
1946			PF_SET_SKIP_STEPS(PF_SKIP_IFP);
1947		if (cur->direction != prev->direction)
1948			PF_SET_SKIP_STEPS(PF_SKIP_DIR);
1949		if (cur->af != prev->af)
1950			PF_SET_SKIP_STEPS(PF_SKIP_AF);
1951		if (cur->proto != prev->proto)
1952			PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
1953		if (cur->src.neg != prev->src.neg ||
1954		    pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr))
1955			PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
1956		{
1957			union pf_rule_xport *cx = &cur->src.xport;
1958			union pf_rule_xport *px = &prev->src.xport;
1959
1960			switch (cur->proto) {
1961			case IPPROTO_GRE:
1962			case IPPROTO_ESP:
1963				PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
1964				break;
1965			default:
1966				if (prev->proto == IPPROTO_GRE ||
1967				    prev->proto == IPPROTO_ESP ||
1968				    cx->range.op != px->range.op ||
1969				    cx->range.port[0] != px->range.port[0] ||
1970				    cx->range.port[1] != px->range.port[1])
1971					PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
1972				break;
1973			}
1974		}
1975		if (cur->dst.neg != prev->dst.neg ||
1976		    pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr))
1977			PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
1978		{
1979			union pf_rule_xport *cx = &cur->dst.xport;
1980			union pf_rule_xport *px = &prev->dst.xport;
1981
1982			switch (cur->proto) {
1983			case IPPROTO_GRE:
1984				if (cur->proto != prev->proto ||
1985				    cx->call_id != px->call_id)
1986					PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
1987				break;
1988			case IPPROTO_ESP:
1989				if (cur->proto != prev->proto ||
1990				    cx->spi != px->spi)
1991					PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
1992				break;
1993			default:
1994				if (prev->proto == IPPROTO_GRE ||
1995				    prev->proto == IPPROTO_ESP ||
1996				    cx->range.op != px->range.op ||
1997				    cx->range.port[0] != px->range.port[0] ||
1998				    cx->range.port[1] != px->range.port[1])
1999					PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
2000				break;
2001			}
2002		}
2003
2004		prev = cur;
2005		cur = TAILQ_NEXT(cur, entries);
2006	}
2007	for (i = 0; i < PF_SKIP_COUNT; ++i)
2008		PF_SET_SKIP_STEPS(i);
2009}
2010
2011u_int32_t
2012pf_calc_state_key_flowhash(struct pf_state_key *sk)
2013{
2014	struct pf_flowhash_key fh __attribute__((aligned(8)));
2015
2016	bzero(&fh, sizeof (fh));
2017	if (PF_ALEQ(&sk->lan.addr, &sk->ext.addr, sk->af)) {
2018		bcopy(&sk->lan.addr, &fh.ap1.addr, sizeof (fh.ap1.addr));
2019		bcopy(&sk->ext.addr, &fh.ap2.addr, sizeof (fh.ap2.addr));
2020	} else {
2021		bcopy(&sk->ext.addr, &fh.ap1.addr, sizeof (fh.ap1.addr));
2022		bcopy(&sk->lan.addr, &fh.ap2.addr, sizeof (fh.ap2.addr));
2023	}
2024	if (sk->lan.xport.spi <= sk->ext.xport.spi) {
2025		fh.ap1.xport.spi = sk->lan.xport.spi;
2026		fh.ap2.xport.spi = sk->ext.xport.spi;
2027	} else {
2028		fh.ap1.xport.spi = sk->ext.xport.spi;
2029		fh.ap2.xport.spi = sk->lan.xport.spi;
2030	}
2031	fh.af = sk->af;
2032	fh.proto = sk->proto;
2033
2034	return (net_flowhash(&fh, sizeof (fh), pf_hash_seed));
2035}
2036
2037static int
2038pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
2039{
2040	if (aw1->type != aw2->type)
2041		return (1);
2042	switch (aw1->type) {
2043	case PF_ADDR_ADDRMASK:
2044	case PF_ADDR_RANGE:
2045		if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, 0))
2046			return (1);
2047		if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0))
2048			return (1);
2049		return (0);
2050	case PF_ADDR_DYNIFTL:
2051		return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt);
2052	case PF_ADDR_NOROUTE:
2053	case PF_ADDR_URPFFAILED:
2054		return (0);
2055	case PF_ADDR_TABLE:
2056		return (aw1->p.tbl != aw2->p.tbl);
2057	case PF_ADDR_RTLABEL:
2058		return (aw1->v.rtlabel != aw2->v.rtlabel);
2059	default:
2060		printf("invalid address type: %d\n", aw1->type);
2061		return (1);
2062	}
2063}
2064
2065u_int16_t
2066pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
2067{
2068	u_int32_t	l;
2069
2070	if (udp && !cksum)
2071		return (0);
2072	l = cksum + old - new;
2073	l = (l >> 16) + (l & 0xffff);
2074	l = l & 0xffff;
2075	if (udp && !l)
2076		return (0xffff);
2077	return (l);
2078}
2079
2080static void
2081pf_change_ap(int dir, struct mbuf *m, struct pf_addr *a, u_int16_t *p,
2082    u_int16_t *ic, u_int16_t *pc, struct pf_addr *an, u_int16_t pn,
2083    u_int8_t u, sa_family_t af)
2084{
2085	struct pf_addr	ao;
2086	u_int16_t	po = *p;
2087
2088	PF_ACPY(&ao, a, af);
2089	PF_ACPY(a, an, af);
2090
2091	*p = pn;
2092
2093	switch (af) {
2094#if INET
2095	case AF_INET:
2096		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
2097		    ao.addr16[0], an->addr16[0], 0),
2098		    ao.addr16[1], an->addr16[1], 0);
2099		*p = pn;
2100		/*
2101		 * If the packet is originated from an ALG on the NAT gateway
2102		 * (source address is loopback or local), in which case the
2103		 * TCP/UDP checksum field contains the pseudo header checksum
2104		 * that's not yet complemented.
2105		 */
2106		if (dir == PF_OUT && m != NULL &&
2107		    (m->m_flags & M_PKTHDR) &&
2108		    (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))) {
2109			/* Pseudo-header checksum does not include ports */
2110			*pc = ~pf_cksum_fixup(pf_cksum_fixup(~*pc,
2111			    ao.addr16[0], an->addr16[0], u),
2112			    ao.addr16[1], an->addr16[1], u);
2113		} else {
2114			*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
2115			    ao.addr16[0], an->addr16[0], u),
2116			    ao.addr16[1], an->addr16[1], u),
2117			    po, pn, u);
2118		}
2119		break;
2120#endif /* INET */
2121#if INET6
2122	case AF_INET6:
2123		/*
2124		 * If the packet is originated from an ALG on the NAT gateway
2125		 * (source address is loopback or local), in which case the
2126		 * TCP/UDP checksum field contains the pseudo header checksum
2127		 * that's not yet complemented.
2128		 */
2129		if (dir == PF_OUT && m != NULL &&
2130		    (m->m_flags & M_PKTHDR) &&
2131		    (m->m_pkthdr.csum_flags & (CSUM_TCPIPV6 | CSUM_UDPIPV6))) {
2132			/* Pseudo-header checksum does not include ports */
2133			*pc = ~pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2134		    		pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2135		    		pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(~*pc,
2136		    		ao.addr16[0], an->addr16[0], u),
2137		    		ao.addr16[1], an->addr16[1], u),
2138		    		ao.addr16[2], an->addr16[2], u),
2139		    		ao.addr16[3], an->addr16[3], u),
2140		    		ao.addr16[4], an->addr16[4], u),
2141		    		ao.addr16[5], an->addr16[5], u),
2142		    		ao.addr16[6], an->addr16[6], u),
2143		    		ao.addr16[7], an->addr16[7], u),
2144		    		po, pn, u);
2145		} else {
2146			*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2147		    		pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2148		    		pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
2149		    		ao.addr16[0], an->addr16[0], u),
2150		    		ao.addr16[1], an->addr16[1], u),
2151		    		ao.addr16[2], an->addr16[2], u),
2152		    		ao.addr16[3], an->addr16[3], u),
2153		    		ao.addr16[4], an->addr16[4], u),
2154		    		ao.addr16[5], an->addr16[5], u),
2155		    		ao.addr16[6], an->addr16[6], u),
2156		    		ao.addr16[7], an->addr16[7], u),
2157		    		po, pn, u);
2158		}
2159		break;
2160#endif /* INET6 */
2161	}
2162}
2163
2164
2165/* Changes a u_int32_t.  Uses a void * so there are no align restrictions */
2166void
2167pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
2168{
2169	u_int32_t	ao;
2170
2171	memcpy(&ao, a, sizeof (ao));
2172	memcpy(a, &an, sizeof (u_int32_t));
2173	*c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
2174	    ao % 65536, an % 65536, u);
2175}
2176
2177#if INET6
2178static void
2179pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
2180{
2181	struct pf_addr	ao;
2182
2183	PF_ACPY(&ao, a, AF_INET6);
2184	PF_ACPY(a, an, AF_INET6);
2185
2186	*c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2187	    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2188	    pf_cksum_fixup(pf_cksum_fixup(*c,
2189	    ao.addr16[0], an->addr16[0], u),
2190	    ao.addr16[1], an->addr16[1], u),
2191	    ao.addr16[2], an->addr16[2], u),
2192	    ao.addr16[3], an->addr16[3], u),
2193	    ao.addr16[4], an->addr16[4], u),
2194	    ao.addr16[5], an->addr16[5], u),
2195	    ao.addr16[6], an->addr16[6], u),
2196	    ao.addr16[7], an->addr16[7], u);
2197}
2198#endif /* INET6 */
2199
2200static void
2201pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
2202    struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
2203    u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
2204{
2205	struct pf_addr	oia, ooa;
2206
2207	PF_ACPY(&oia, ia, af);
2208	PF_ACPY(&ooa, oa, af);
2209
2210	/* Change inner protocol port, fix inner protocol checksum. */
2211	if (ip != NULL) {
2212		u_int16_t	oip = *ip;
2213		u_int32_t	opc = 0;
2214
2215		if (pc != NULL)
2216			opc = *pc;
2217		*ip = np;
2218		if (pc != NULL)
2219			*pc = pf_cksum_fixup(*pc, oip, *ip, u);
2220		*ic = pf_cksum_fixup(*ic, oip, *ip, 0);
2221		if (pc != NULL)
2222			*ic = pf_cksum_fixup(*ic, opc, *pc, 0);
2223	}
2224	/* Change inner ip address, fix inner ip and icmp checksums. */
2225	PF_ACPY(ia, na, af);
2226	switch (af) {
2227#if INET
2228	case AF_INET: {
2229		u_int32_t	 oh2c = *h2c;
2230
2231		*h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
2232		    oia.addr16[0], ia->addr16[0], 0),
2233		    oia.addr16[1], ia->addr16[1], 0);
2234		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
2235		    oia.addr16[0], ia->addr16[0], 0),
2236		    oia.addr16[1], ia->addr16[1], 0);
2237		*ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
2238		break;
2239	}
2240#endif /* INET */
2241#if INET6
2242	case AF_INET6:
2243		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2244		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2245		    pf_cksum_fixup(pf_cksum_fixup(*ic,
2246		    oia.addr16[0], ia->addr16[0], u),
2247		    oia.addr16[1], ia->addr16[1], u),
2248		    oia.addr16[2], ia->addr16[2], u),
2249		    oia.addr16[3], ia->addr16[3], u),
2250		    oia.addr16[4], ia->addr16[4], u),
2251		    oia.addr16[5], ia->addr16[5], u),
2252		    oia.addr16[6], ia->addr16[6], u),
2253		    oia.addr16[7], ia->addr16[7], u);
2254		break;
2255#endif /* INET6 */
2256	}
2257	/* Change outer ip address, fix outer ip or icmpv6 checksum. */
2258	PF_ACPY(oa, na, af);
2259	switch (af) {
2260#if INET
2261	case AF_INET:
2262		*hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
2263		    ooa.addr16[0], oa->addr16[0], 0),
2264		    ooa.addr16[1], oa->addr16[1], 0);
2265		break;
2266#endif /* INET */
2267#if INET6
2268	case AF_INET6:
2269		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2270		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2271		    pf_cksum_fixup(pf_cksum_fixup(*ic,
2272		    ooa.addr16[0], oa->addr16[0], u),
2273		    ooa.addr16[1], oa->addr16[1], u),
2274		    ooa.addr16[2], oa->addr16[2], u),
2275		    ooa.addr16[3], oa->addr16[3], u),
2276		    ooa.addr16[4], oa->addr16[4], u),
2277		    ooa.addr16[5], oa->addr16[5], u),
2278		    ooa.addr16[6], oa->addr16[6], u),
2279		    ooa.addr16[7], oa->addr16[7], u);
2280		break;
2281#endif /* INET6 */
2282	}
2283}
2284
2285
2286/*
2287 * Need to modulate the sequence numbers in the TCP SACK option
2288 * (credits to Krzysztof Pfaff for report and patch)
2289 */
2290static int
2291pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd,
2292    struct tcphdr *th, struct pf_state_peer *dst)
2293{
2294	int hlen = (th->th_off << 2) - sizeof (*th), thoptlen = hlen;
2295	u_int8_t opts[MAX_TCPOPTLEN], *opt = opts;
2296	int copyback = 0, i, olen;
2297	struct sackblk sack;
2298
2299#define TCPOLEN_SACKLEN	(TCPOLEN_SACK + 2)
2300	if (hlen < TCPOLEN_SACKLEN ||
2301	    !pf_pull_hdr(m, off + sizeof (*th), opts, hlen, NULL, NULL, pd->af))
2302		return (0);
2303
2304	while (hlen >= TCPOLEN_SACKLEN) {
2305		olen = opt[1];
2306		switch (*opt) {
2307		case TCPOPT_EOL:	/* FALLTHROUGH */
2308		case TCPOPT_NOP:
2309			opt++;
2310			hlen--;
2311			break;
2312		case TCPOPT_SACK:
2313			if (olen > hlen)
2314				olen = hlen;
2315			if (olen >= TCPOLEN_SACKLEN) {
2316				for (i = 2; i + TCPOLEN_SACK <= olen;
2317				    i += TCPOLEN_SACK) {
2318					memcpy(&sack, &opt[i], sizeof (sack));
2319					pf_change_a(&sack.start, &th->th_sum,
2320					    htonl(ntohl(sack.start) -
2321					    dst->seqdiff), 0);
2322					pf_change_a(&sack.end, &th->th_sum,
2323					    htonl(ntohl(sack.end) -
2324					    dst->seqdiff), 0);
2325					memcpy(&opt[i], &sack, sizeof (sack));
2326				}
2327				copyback = off + sizeof (*th) + thoptlen;
2328			}
2329			/* FALLTHROUGH */
2330		default:
2331			if (olen < 2)
2332				olen = 2;
2333			hlen -= olen;
2334			opt += olen;
2335		}
2336	}
2337
2338	if (copyback) {
2339		m = pf_lazy_makewritable(pd, m, copyback);
2340		if (!m)
2341			return (-1);
2342		m_copyback(m, off + sizeof (*th), thoptlen, opts);
2343	}
2344	return (copyback);
2345}
2346
2347static void
2348pf_send_tcp(const struct pf_rule *r, sa_family_t af,
2349    const struct pf_addr *saddr, const struct pf_addr *daddr,
2350    u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
2351    u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag,
2352    u_int16_t rtag, struct ether_header *eh, struct ifnet *ifp)
2353{
2354#pragma unused(eh, ifp)
2355	struct mbuf	*m;
2356	int		 len, tlen;
2357#if INET
2358	struct ip	*h = NULL;
2359#endif /* INET */
2360#if INET6
2361	struct ip6_hdr	*h6 = NULL;
2362#endif /* INET6 */
2363	struct tcphdr	*th = NULL;
2364	char		*opt;
2365	struct pf_mtag	*pf_mtag;
2366
2367	/* maximum segment size tcp option */
2368	tlen = sizeof (struct tcphdr);
2369	if (mss)
2370		tlen += 4;
2371
2372	switch (af) {
2373#if INET
2374	case AF_INET:
2375		len = sizeof (struct ip) + tlen;
2376		break;
2377#endif /* INET */
2378#if INET6
2379	case AF_INET6:
2380		len = sizeof (struct ip6_hdr) + tlen;
2381		break;
2382#endif /* INET6 */
2383	default:
2384		panic("pf_send_tcp: not AF_INET or AF_INET6!");
2385		return;
2386	}
2387
2388	/* create outgoing mbuf */
2389	m = m_gethdr(M_DONTWAIT, MT_HEADER);
2390	if (m == NULL)
2391		return;
2392
2393	if ((pf_mtag = pf_get_mtag(m)) == NULL) {
2394		m_free(m);
2395		return;
2396	}
2397
2398	if (tag)
2399		pf_mtag->pftag_flags |= PF_TAG_GENERATED;
2400	pf_mtag->pftag_tag = rtag;
2401
2402	if (r != NULL && PF_RTABLEID_IS_VALID(r->rtableid))
2403		pf_mtag->pftag_rtableid = r->rtableid;
2404
2405#if PF_ALTQ
2406	if (altq_allowed && r != NULL && r->qid)
2407		pf_mtag->pftag_qid = r->qid;
2408#endif /* PF_ALTQ */
2409
2410	/* add hints for ecn */
2411	pf_mtag->pftag_hdr = mtod(m, struct ip *);
2412	/* record address family */
2413	pf_mtag->pftag_flags &= ~(PF_TAG_HDR_INET | PF_TAG_HDR_INET6);
2414	switch (af) {
2415#if INET
2416	case AF_INET:
2417		pf_mtag->pftag_flags |= PF_TAG_HDR_INET;
2418		break;
2419#endif /* INET */
2420#if INET6
2421	case AF_INET6:
2422		pf_mtag->pftag_flags |= PF_TAG_HDR_INET6;
2423		break;
2424#endif /* INET6 */
2425	}
2426	/* indicate this is TCP */
2427	pf_mtag->pftag_flags |= PF_TAG_TCP;
2428
2429	/* Make sure headers are 32-bit aligned */
2430	m->m_data += max_linkhdr;
2431	m->m_pkthdr.len = m->m_len = len;
2432	m->m_pkthdr.rcvif = NULL;
2433	bzero(m->m_data, len);
2434	switch (af) {
2435#if INET
2436	case AF_INET:
2437		h = mtod(m, struct ip *);
2438
2439		/* IP header fields included in the TCP checksum */
2440		h->ip_p = IPPROTO_TCP;
2441		h->ip_len = htons(tlen);
2442		h->ip_src.s_addr = saddr->v4.s_addr;
2443		h->ip_dst.s_addr = daddr->v4.s_addr;
2444
2445		th = (struct tcphdr *)(void *)((caddr_t)h + sizeof (struct ip));
2446		break;
2447#endif /* INET */
2448#if INET6
2449	case AF_INET6:
2450		h6 = mtod(m, struct ip6_hdr *);
2451
2452		/* IP header fields included in the TCP checksum */
2453		h6->ip6_nxt = IPPROTO_TCP;
2454		h6->ip6_plen = htons(tlen);
2455		memcpy(&h6->ip6_src, &saddr->v6, sizeof (struct in6_addr));
2456		memcpy(&h6->ip6_dst, &daddr->v6, sizeof (struct in6_addr));
2457
2458		th = (struct tcphdr *)(void *)
2459		    ((caddr_t)h6 + sizeof (struct ip6_hdr));
2460		break;
2461#endif /* INET6 */
2462	}
2463
2464	/* TCP header */
2465	th->th_sport = sport;
2466	th->th_dport = dport;
2467	th->th_seq = htonl(seq);
2468	th->th_ack = htonl(ack);
2469	th->th_off = tlen >> 2;
2470	th->th_flags = flags;
2471	th->th_win = htons(win);
2472
2473	if (mss) {
2474		opt = (char *)(th + 1);
2475		opt[0] = TCPOPT_MAXSEG;
2476		opt[1] = 4;
2477#if BYTE_ORDER != BIG_ENDIAN
2478		HTONS(mss);
2479#endif
2480		bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2);
2481	}
2482
2483	switch (af) {
2484#if INET
2485	case AF_INET: {
2486		struct route ro;
2487
2488		/* TCP checksum */
2489		th->th_sum = in_cksum(m, len);
2490
2491		/* Finish the IP header */
2492		h->ip_v = 4;
2493		h->ip_hl = sizeof (*h) >> 2;
2494		h->ip_tos = IPTOS_LOWDELAY;
2495		/*
2496		 * ip_output() expects ip_len and ip_off to be in host order.
2497		 */
2498		h->ip_len = len;
2499		h->ip_off = (path_mtu_discovery ? IP_DF : 0);
2500		h->ip_ttl = ttl ? ttl : ip_defttl;
2501		h->ip_sum = 0;
2502
2503		bzero(&ro, sizeof (ro));
2504		ip_output(m, NULL, &ro, 0, NULL, NULL);
2505		if (ro.ro_rt != NULL)
2506			rtfree(ro.ro_rt);
2507		break;
2508	}
2509#endif /* INET */
2510#if INET6
2511	case AF_INET6: {
2512		struct route_in6 ro6;
2513
2514		/* TCP checksum */
2515		th->th_sum = in6_cksum(m, IPPROTO_TCP,
2516		    sizeof (struct ip6_hdr), tlen);
2517
2518		h6->ip6_vfc |= IPV6_VERSION;
2519		h6->ip6_hlim = IPV6_DEFHLIM;
2520
2521		bzero(&ro6, sizeof (ro6));
2522		ip6_output(m, NULL, &ro6, 0, NULL, NULL, NULL);
2523		if (ro6.ro_rt != NULL)
2524			rtfree(ro6.ro_rt);
2525		break;
2526	}
2527#endif /* INET6 */
2528	}
2529}
2530
2531static void
2532pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af,
2533    struct pf_rule *r)
2534{
2535	struct mbuf	*m0;
2536	struct pf_mtag	*pf_mtag;
2537
2538	m0 = m_copy(m, 0, M_COPYALL);
2539	if (m0 == NULL)
2540		return;
2541
2542	if ((pf_mtag = pf_get_mtag(m0)) == NULL)
2543		return;
2544
2545	pf_mtag->pftag_flags |= PF_TAG_GENERATED;
2546
2547	if (PF_RTABLEID_IS_VALID(r->rtableid))
2548		pf_mtag->pftag_rtableid = r->rtableid;
2549
2550#if PF_ALTQ
2551	if (altq_allowed && r->qid)
2552		pf_mtag->pftag_qid = r->qid;
2553#endif /* PF_ALTQ */
2554
2555	/* add hints for ecn */
2556	pf_mtag->pftag_hdr = mtod(m0, struct ip *);
2557	/* record address family */
2558	pf_mtag->pftag_flags &=
2559	    ~(PF_TAG_HDR_INET | PF_TAG_HDR_INET6 | PF_TAG_TCP);
2560	switch (af) {
2561#if INET
2562	case AF_INET:
2563		pf_mtag->pftag_flags |= PF_TAG_HDR_INET;
2564		break;
2565#endif /* INET */
2566#if INET6
2567	case AF_INET6:
2568		pf_mtag->pftag_flags |= PF_TAG_HDR_INET6;
2569		break;
2570#endif /* INET6 */
2571	}
2572
2573	switch (af) {
2574#if INET
2575	case AF_INET:
2576		icmp_error(m0, type, code, 0, 0);
2577		break;
2578#endif /* INET */
2579#if INET6
2580	case AF_INET6:
2581		icmp6_error(m0, type, code, 0);
2582		break;
2583#endif /* INET6 */
2584	}
2585}
2586
2587/*
2588 * Return 1 if the addresses a and b match (with mask m), otherwise return 0.
2589 * If n is 0, they match if they are equal. If n is != 0, they match if they
2590 * are different.
2591 */
2592int
2593pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
2594    struct pf_addr *b, sa_family_t af)
2595{
2596	int	match = 0;
2597
2598	switch (af) {
2599#if INET
2600	case AF_INET:
2601		if ((a->addr32[0] & m->addr32[0]) ==
2602		    (b->addr32[0] & m->addr32[0]))
2603			match++;
2604		break;
2605#endif /* INET */
2606#if INET6
2607	case AF_INET6:
2608		if (((a->addr32[0] & m->addr32[0]) ==
2609		     (b->addr32[0] & m->addr32[0])) &&
2610		    ((a->addr32[1] & m->addr32[1]) ==
2611		     (b->addr32[1] & m->addr32[1])) &&
2612		    ((a->addr32[2] & m->addr32[2]) ==
2613		     (b->addr32[2] & m->addr32[2])) &&
2614		    ((a->addr32[3] & m->addr32[3]) ==
2615		     (b->addr32[3] & m->addr32[3])))
2616			match++;
2617		break;
2618#endif /* INET6 */
2619	}
2620	if (match) {
2621		if (n)
2622			return (0);
2623		else
2624			return (1);
2625	} else {
2626		if (n)
2627			return (1);
2628		else
2629			return (0);
2630	}
2631}
2632
2633/*
2634 * Return 1 if b <= a <= e, otherwise return 0.
2635 */
2636int
2637pf_match_addr_range(struct pf_addr *b, struct pf_addr *e,
2638    struct pf_addr *a, sa_family_t af)
2639{
2640	switch (af) {
2641#if INET
2642	case AF_INET:
2643		if ((a->addr32[0] < b->addr32[0]) ||
2644		    (a->addr32[0] > e->addr32[0]))
2645			return (0);
2646		break;
2647#endif /* INET */
2648#if INET6
2649	case AF_INET6: {
2650		int	i;
2651
2652		/* check a >= b */
2653		for (i = 0; i < 4; ++i)
2654			if (a->addr32[i] > b->addr32[i])
2655				break;
2656			else if (a->addr32[i] < b->addr32[i])
2657				return (0);
2658		/* check a <= e */
2659		for (i = 0; i < 4; ++i)
2660			if (a->addr32[i] < e->addr32[i])
2661				break;
2662			else if (a->addr32[i] > e->addr32[i])
2663				return (0);
2664		break;
2665	}
2666#endif /* INET6 */
2667	}
2668	return (1);
2669}
2670
2671int
2672pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
2673{
2674	switch (op) {
2675	case PF_OP_IRG:
2676		return ((p > a1) && (p < a2));
2677	case PF_OP_XRG:
2678		return ((p < a1) || (p > a2));
2679	case PF_OP_RRG:
2680		return ((p >= a1) && (p <= a2));
2681	case PF_OP_EQ:
2682		return (p == a1);
2683	case PF_OP_NE:
2684		return (p != a1);
2685	case PF_OP_LT:
2686		return (p < a1);
2687	case PF_OP_LE:
2688		return (p <= a1);
2689	case PF_OP_GT:
2690		return (p > a1);
2691	case PF_OP_GE:
2692		return (p >= a1);
2693	}
2694	return (0); /* never reached */
2695}
2696
2697int
2698pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
2699{
2700#if BYTE_ORDER != BIG_ENDIAN
2701	NTOHS(a1);
2702	NTOHS(a2);
2703	NTOHS(p);
2704#endif
2705	return (pf_match(op, a1, a2, p));
2706}
2707
2708int
2709pf_match_xport(u_int8_t proto, u_int8_t proto_variant, union pf_rule_xport *rx,
2710    union pf_state_xport *sx)
2711{
2712	int d = !0;
2713
2714	if (sx) {
2715		switch (proto) {
2716		case IPPROTO_GRE:
2717			if (proto_variant == PF_GRE_PPTP_VARIANT)
2718				d = (rx->call_id == sx->call_id);
2719			break;
2720
2721		case IPPROTO_ESP:
2722			d = (rx->spi == sx->spi);
2723			break;
2724
2725		case IPPROTO_TCP:
2726		case IPPROTO_UDP:
2727		case IPPROTO_ICMP:
2728		case IPPROTO_ICMPV6:
2729			if (rx->range.op)
2730				d = pf_match_port(rx->range.op,
2731				    rx->range.port[0], rx->range.port[1],
2732				    sx->port);
2733			break;
2734
2735		default:
2736			break;
2737		}
2738	}
2739
2740	return (d);
2741}
2742
2743int
2744pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
2745{
2746	if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
2747		return (0);
2748	return (pf_match(op, a1, a2, u));
2749}
2750
2751int
2752pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
2753{
2754	if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
2755		return (0);
2756	return (pf_match(op, a1, a2, g));
2757}
2758
2759static int
2760pf_match_tag(struct mbuf *m, struct pf_rule *r, struct pf_mtag *pf_mtag,
2761    int *tag)
2762{
2763#pragma unused(m)
2764	if (*tag == -1)
2765		*tag = pf_mtag->pftag_tag;
2766
2767	return ((!r->match_tag_not && r->match_tag == *tag) ||
2768	    (r->match_tag_not && r->match_tag != *tag));
2769}
2770
2771int
2772pf_tag_packet(struct mbuf *m, struct pf_mtag *pf_mtag, int tag,
2773    unsigned int rtableid, struct pf_pdesc *pd)
2774{
2775	if (tag <= 0 && !PF_RTABLEID_IS_VALID(rtableid) &&
2776		(pd == NULL || pd->flowhash == 0))
2777		return (0);
2778
2779	if (pf_mtag == NULL && (pf_mtag = pf_get_mtag(m)) == NULL)
2780		return (1);
2781
2782	if (tag > 0)
2783		pf_mtag->pftag_tag = tag;
2784	if (PF_RTABLEID_IS_VALID(rtableid))
2785		pf_mtag->pftag_rtableid = rtableid;
2786	if (pd != NULL && pd->flowhash != 0) {
2787		pf_mtag->pftag_flags |= PF_TAG_FLOWHASH;
2788		pf_mtag->pftag_flowhash = pd->flowhash;
2789		pf_mtag->pftag_flags |= (pd->flags & PFDESC_FLOW_ADV) ?
2790			PF_TAG_FLOWADV : 0;
2791	}
2792
2793	return (0);
2794}
2795
2796void
2797pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n,
2798    struct pf_rule **r, struct pf_rule **a,  int *match)
2799{
2800	struct pf_anchor_stackframe	*f;
2801
2802	(*r)->anchor->match = 0;
2803	if (match)
2804		*match = 0;
2805	if (*depth >= (int)sizeof (pf_anchor_stack) /
2806	    (int)sizeof (pf_anchor_stack[0])) {
2807		printf("pf_step_into_anchor: stack overflow\n");
2808		*r = TAILQ_NEXT(*r, entries);
2809		return;
2810	} else if (*depth == 0 && a != NULL)
2811		*a = *r;
2812	f = pf_anchor_stack + (*depth)++;
2813	f->rs = *rs;
2814	f->r = *r;
2815	if ((*r)->anchor_wildcard) {
2816		f->parent = &(*r)->anchor->children;
2817		if ((f->child = RB_MIN(pf_anchor_node, f->parent)) ==
2818		    NULL) {
2819			*r = NULL;
2820			return;
2821		}
2822		*rs = &f->child->ruleset;
2823	} else {
2824		f->parent = NULL;
2825		f->child = NULL;
2826		*rs = &(*r)->anchor->ruleset;
2827	}
2828	*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
2829}
2830
2831int
2832pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n,
2833    struct pf_rule **r, struct pf_rule **a, int *match)
2834{
2835	struct pf_anchor_stackframe	*f;
2836	int quick = 0;
2837
2838	do {
2839		if (*depth <= 0)
2840			break;
2841		f = pf_anchor_stack + *depth - 1;
2842		if (f->parent != NULL && f->child != NULL) {
2843			if (f->child->match ||
2844			    (match != NULL && *match)) {
2845				f->r->anchor->match = 1;
2846				*match = 0;
2847			}
2848			f->child = RB_NEXT(pf_anchor_node, f->parent, f->child);
2849			if (f->child != NULL) {
2850				*rs = &f->child->ruleset;
2851				*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
2852				if (*r == NULL)
2853					continue;
2854				else
2855					break;
2856			}
2857		}
2858		(*depth)--;
2859		if (*depth == 0 && a != NULL)
2860			*a = NULL;
2861		*rs = f->rs;
2862		if (f->r->anchor->match || (match  != NULL && *match))
2863			quick = f->r->quick;
2864		*r = TAILQ_NEXT(f->r, entries);
2865	} while (*r == NULL);
2866
2867	return (quick);
2868}
2869
2870#if INET6
2871void
2872pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
2873    struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
2874{
2875	switch (af) {
2876#if INET
2877	case AF_INET:
2878		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
2879		    ((rmask->addr32[0] ^ 0xffffffff) & saddr->addr32[0]);
2880		break;
2881#endif /* INET */
2882	case AF_INET6:
2883		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
2884		    ((rmask->addr32[0] ^ 0xffffffff) & saddr->addr32[0]);
2885		naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
2886		    ((rmask->addr32[1] ^ 0xffffffff) & saddr->addr32[1]);
2887		naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
2888		    ((rmask->addr32[2] ^ 0xffffffff) & saddr->addr32[2]);
2889		naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
2890		    ((rmask->addr32[3] ^ 0xffffffff) & saddr->addr32[3]);
2891		break;
2892	}
2893}
2894
2895void
2896pf_addr_inc(struct pf_addr *addr, sa_family_t af)
2897{
2898	switch (af) {
2899#if INET
2900	case AF_INET:
2901		addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
2902		break;
2903#endif /* INET */
2904	case AF_INET6:
2905		if (addr->addr32[3] == 0xffffffff) {
2906			addr->addr32[3] = 0;
2907			if (addr->addr32[2] == 0xffffffff) {
2908				addr->addr32[2] = 0;
2909				if (addr->addr32[1] == 0xffffffff) {
2910					addr->addr32[1] = 0;
2911					addr->addr32[0] =
2912					    htonl(ntohl(addr->addr32[0]) + 1);
2913				} else
2914					addr->addr32[1] =
2915					    htonl(ntohl(addr->addr32[1]) + 1);
2916			} else
2917				addr->addr32[2] =
2918				    htonl(ntohl(addr->addr32[2]) + 1);
2919		} else
2920			addr->addr32[3] =
2921			    htonl(ntohl(addr->addr32[3]) + 1);
2922		break;
2923	}
2924}
2925#endif /* INET6 */
2926
2927#define mix(a, b, c) \
2928	do {					\
2929		a -= b; a -= c; a ^= (c >> 13);	\
2930		b -= c; b -= a; b ^= (a << 8);	\
2931		c -= a; c -= b; c ^= (b >> 13);	\
2932		a -= b; a -= c; a ^= (c >> 12);	\
2933		b -= c; b -= a; b ^= (a << 16);	\
2934		c -= a; c -= b; c ^= (b >> 5);	\
2935		a -= b; a -= c; a ^= (c >> 3);	\
2936		b -= c; b -= a; b ^= (a << 10);	\
2937		c -= a; c -= b; c ^= (b >> 15);	\
2938	} while (0)
2939
2940/*
2941 * hash function based on bridge_hash in if_bridge.c
2942 */
2943static void
2944pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
2945    struct pf_poolhashkey *key, sa_family_t af)
2946{
2947	u_int32_t	a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
2948
2949	switch (af) {
2950#if INET
2951	case AF_INET:
2952		a += inaddr->addr32[0];
2953		b += key->key32[1];
2954		mix(a, b, c);
2955		hash->addr32[0] = c + key->key32[2];
2956		break;
2957#endif /* INET */
2958#if INET6
2959	case AF_INET6:
2960		a += inaddr->addr32[0];
2961		b += inaddr->addr32[2];
2962		mix(a, b, c);
2963		hash->addr32[0] = c;
2964		a += inaddr->addr32[1];
2965		b += inaddr->addr32[3];
2966		c += key->key32[1];
2967		mix(a, b, c);
2968		hash->addr32[1] = c;
2969		a += inaddr->addr32[2];
2970		b += inaddr->addr32[1];
2971		c += key->key32[2];
2972		mix(a, b, c);
2973		hash->addr32[2] = c;
2974		a += inaddr->addr32[3];
2975		b += inaddr->addr32[0];
2976		c += key->key32[3];
2977		mix(a, b, c);
2978		hash->addr32[3] = c;
2979		break;
2980#endif /* INET6 */
2981	}
2982}
2983
2984static int
2985pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
2986    struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn)
2987{
2988	unsigned char		 hash[16];
2989	struct pf_pool		*rpool = &r->rpool;
2990	struct pf_addr		*raddr = &rpool->cur->addr.v.a.addr;
2991	struct pf_addr		*rmask = &rpool->cur->addr.v.a.mask;
2992	struct pf_pooladdr	*acur = rpool->cur;
2993	struct pf_src_node	 k;
2994
2995	if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
2996	    (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
2997		k.af = af;
2998		PF_ACPY(&k.addr, saddr, af);
2999		if (r->rule_flag & PFRULE_RULESRCTRACK ||
3000		    r->rpool.opts & PF_POOL_STICKYADDR)
3001			k.rule.ptr = r;
3002		else
3003			k.rule.ptr = NULL;
3004		pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
3005		*sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
3006		if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) {
3007			PF_ACPY(naddr, &(*sn)->raddr, af);
3008			if (pf_status.debug >= PF_DEBUG_MISC) {
3009				printf("pf_map_addr: src tracking maps ");
3010				pf_print_host(&k.addr, 0, af);
3011				printf(" to ");
3012				pf_print_host(naddr, 0, af);
3013				printf("\n");
3014			}
3015			return (0);
3016		}
3017	}
3018
3019	if (rpool->cur->addr.type == PF_ADDR_NOROUTE)
3020		return (1);
3021	if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
3022		switch (af) {
3023#if INET
3024		case AF_INET:
3025			if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
3026			    (rpool->opts & PF_POOL_TYPEMASK) !=
3027			    PF_POOL_ROUNDROBIN)
3028				return (1);
3029			raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
3030			rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
3031			break;
3032#endif /* INET */
3033#if INET6
3034		case AF_INET6:
3035			if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
3036			    (rpool->opts & PF_POOL_TYPEMASK) !=
3037			    PF_POOL_ROUNDROBIN)
3038				return (1);
3039			raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
3040			rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
3041			break;
3042#endif /* INET6 */
3043		}
3044	} else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
3045		if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN)
3046			return (1); /* unsupported */
3047	} else {
3048		raddr = &rpool->cur->addr.v.a.addr;
3049		rmask = &rpool->cur->addr.v.a.mask;
3050	}
3051
3052	switch (rpool->opts & PF_POOL_TYPEMASK) {
3053	case PF_POOL_NONE:
3054		PF_ACPY(naddr, raddr, af);
3055		break;
3056	case PF_POOL_BITMASK:
3057		PF_POOLMASK(naddr, raddr, rmask, saddr, af);
3058		break;
3059	case PF_POOL_RANDOM:
3060		if (init_addr != NULL && PF_AZERO(init_addr, af)) {
3061			switch (af) {
3062#if INET
3063			case AF_INET:
3064				rpool->counter.addr32[0] = htonl(random());
3065				break;
3066#endif /* INET */
3067#if INET6
3068			case AF_INET6:
3069				if (rmask->addr32[3] != 0xffffffff)
3070					rpool->counter.addr32[3] =
3071					    htonl(random());
3072				else
3073					break;
3074				if (rmask->addr32[2] != 0xffffffff)
3075					rpool->counter.addr32[2] =
3076					    htonl(random());
3077				else
3078					break;
3079				if (rmask->addr32[1] != 0xffffffff)
3080					rpool->counter.addr32[1] =
3081					    htonl(random());
3082				else
3083					break;
3084				if (rmask->addr32[0] != 0xffffffff)
3085					rpool->counter.addr32[0] =
3086					    htonl(random());
3087				break;
3088#endif /* INET6 */
3089			}
3090			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
3091			PF_ACPY(init_addr, naddr, af);
3092
3093		} else {
3094			PF_AINC(&rpool->counter, af);
3095			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
3096		}
3097		break;
3098	case PF_POOL_SRCHASH:
3099		pf_hash(saddr, (struct pf_addr *)(void *)&hash,
3100		    &rpool->key, af);
3101		PF_POOLMASK(naddr, raddr, rmask,
3102		    (struct pf_addr *)(void *)&hash, af);
3103		break;
3104	case PF_POOL_ROUNDROBIN:
3105		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
3106			if (!pfr_pool_get(rpool->cur->addr.p.tbl,
3107			    &rpool->tblidx, &rpool->counter,
3108			    &raddr, &rmask, af))
3109				goto get_addr;
3110		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
3111			if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
3112			    &rpool->tblidx, &rpool->counter,
3113			    &raddr, &rmask, af))
3114				goto get_addr;
3115		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
3116			goto get_addr;
3117
3118	try_next:
3119		if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL)
3120			rpool->cur = TAILQ_FIRST(&rpool->list);
3121		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
3122			rpool->tblidx = -1;
3123			if (pfr_pool_get(rpool->cur->addr.p.tbl,
3124			    &rpool->tblidx, &rpool->counter,
3125			    &raddr, &rmask, af)) {
3126				/* table contains no address of type 'af' */
3127				if (rpool->cur != acur)
3128					goto try_next;
3129				return (1);
3130			}
3131		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
3132			rpool->tblidx = -1;
3133			if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
3134			    &rpool->tblidx, &rpool->counter,
3135			    &raddr, &rmask, af)) {
3136				/* table contains no address of type 'af' */
3137				if (rpool->cur != acur)
3138					goto try_next;
3139				return (1);
3140			}
3141		} else {
3142			raddr = &rpool->cur->addr.v.a.addr;
3143			rmask = &rpool->cur->addr.v.a.mask;
3144			PF_ACPY(&rpool->counter, raddr, af);
3145		}
3146
3147	get_addr:
3148		PF_ACPY(naddr, &rpool->counter, af);
3149		if (init_addr != NULL && PF_AZERO(init_addr, af))
3150			PF_ACPY(init_addr, naddr, af);
3151		PF_AINC(&rpool->counter, af);
3152		break;
3153	}
3154	if (*sn != NULL)
3155		PF_ACPY(&(*sn)->raddr, naddr, af);
3156
3157	if (pf_status.debug >= PF_DEBUG_MISC &&
3158	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
3159		printf("pf_map_addr: selected address ");
3160		pf_print_host(naddr, 0, af);
3161		printf("\n");
3162	}
3163
3164	return (0);
3165}
3166
3167static int
3168pf_get_sport(struct pf_pdesc *pd, struct pfi_kif *kif, struct pf_rule *r,
3169    struct pf_addr *saddr, union pf_state_xport *sxport, struct pf_addr *daddr,
3170    union pf_state_xport *dxport, struct pf_addr *naddr,
3171    union pf_state_xport *nxport, struct pf_src_node **sn)
3172{
3173#pragma unused(kif)
3174	struct pf_state_key_cmp	key;
3175	struct pf_addr		init_addr;
3176	unsigned int cut;
3177	sa_family_t af = pd->af;
3178	u_int8_t proto = pd->proto;
3179	unsigned int low = r->rpool.proxy_port[0];
3180	unsigned int high = r->rpool.proxy_port[1];
3181
3182	bzero(&init_addr, sizeof (init_addr));
3183	if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
3184		return (1);
3185
3186	if (proto == IPPROTO_ICMP) {
3187		low = 1;
3188		high = 65535;
3189	}
3190
3191	if (!nxport)
3192		return (0); /* No output necessary. */
3193
3194	/*--- Special mapping rules for UDP ---*/
3195	if (proto == IPPROTO_UDP) {
3196
3197		/*--- Never float IKE source port ---*/
3198		if (ntohs(sxport->port) == PF_IKE_PORT) {
3199			nxport->port = sxport->port;
3200			return (0);
3201		}
3202
3203		/*--- Apply exterior mapping options ---*/
3204		if (r->extmap > PF_EXTMAP_APD) {
3205			struct pf_state *s;
3206
3207			TAILQ_FOREACH(s, &state_list, entry_list) {
3208				struct pf_state_key *sk = s->state_key;
3209				if (!sk)
3210					continue;
3211				if (s->nat_rule.ptr != r)
3212					continue;
3213				if (sk->proto != IPPROTO_UDP || sk->af != af)
3214					continue;
3215				if (sk->lan.xport.port != sxport->port)
3216					continue;
3217				if (PF_ANEQ(&sk->lan.addr, saddr, af))
3218					continue;
3219				if (r->extmap < PF_EXTMAP_EI &&
3220				    PF_ANEQ(&sk->ext.addr, daddr, af))
3221					continue;
3222
3223				nxport->port = sk->gwy.xport.port;
3224				return (0);
3225			}
3226		}
3227	} else if (proto == IPPROTO_TCP) {
3228		struct pf_state* s;
3229		/*
3230		 * APPLE MODIFICATION: <rdar://problem/6546358>
3231		 * Fix allows....NAT to use a single binding for TCP session
3232		 * with same source IP and source port
3233		 */
3234		TAILQ_FOREACH(s, &state_list, entry_list) {
3235			struct pf_state_key* sk = s->state_key;
3236			if (!sk)
3237				continue;
3238			if (s->nat_rule.ptr != r)
3239				continue;
3240			if (sk->proto != IPPROTO_TCP || sk->af != af)
3241				 continue;
3242			if (sk->lan.xport.port != sxport->port)
3243				continue;
3244			if (!(PF_AEQ(&sk->lan.addr, saddr, af)))
3245				continue;
3246			nxport->port = sk->gwy.xport.port;
3247			return (0);
3248		}
3249	}
3250	do {
3251		key.af = af;
3252		key.proto = proto;
3253		PF_ACPY(&key.ext.addr, daddr, key.af);
3254		PF_ACPY(&key.gwy.addr, naddr, key.af);
3255		switch (proto) {
3256			case IPPROTO_UDP:
3257				key.proto_variant = r->extfilter;
3258				break;
3259			default:
3260				key.proto_variant = 0;
3261				break;
3262		}
3263		if (dxport)
3264			key.ext.xport = *dxport;
3265		else
3266			memset(&key.ext.xport, 0, sizeof (key.ext.xport));
3267		/*
3268		 * port search; start random, step;
3269		 * similar 2 portloop in in_pcbbind
3270		 */
3271		if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP ||
3272		    proto == IPPROTO_ICMP)) {
3273			if (dxport)
3274				key.gwy.xport = *dxport;
3275			else
3276				memset(&key.gwy.xport, 0,
3277				    sizeof (key.ext.xport));
3278			if (pf_find_state_all(&key, PF_IN, NULL) == NULL)
3279				return (0);
3280		} else if (low == 0 && high == 0) {
3281			key.gwy.xport = *nxport;
3282			if (pf_find_state_all(&key, PF_IN, NULL) == NULL)
3283				return (0);
3284		} else if (low == high) {
3285			key.gwy.xport.port = htons(low);
3286			if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
3287				nxport->port = htons(low);
3288				return (0);
3289			}
3290		} else {
3291			unsigned int tmp;
3292			if (low > high) {
3293				tmp = low;
3294				low = high;
3295				high = tmp;
3296			}
3297			/* low < high */
3298			cut = htonl(random()) % (1 + high - low) + low;
3299			/* low <= cut <= high */
3300			for (tmp = cut; tmp <= high; ++(tmp)) {
3301				key.gwy.xport.port = htons(tmp);
3302				if (pf_find_state_all(&key, PF_IN, NULL) ==
3303				    NULL) {
3304					nxport->port = htons(tmp);
3305					return (0);
3306				}
3307			}
3308			for (tmp = cut - 1; tmp >= low; --(tmp)) {
3309				key.gwy.xport.port = htons(tmp);
3310				if (pf_find_state_all(&key, PF_IN, NULL) ==
3311				    NULL) {
3312					nxport->port = htons(tmp);
3313					return (0);
3314				}
3315			}
3316		}
3317
3318		switch (r->rpool.opts & PF_POOL_TYPEMASK) {
3319		case PF_POOL_RANDOM:
3320		case PF_POOL_ROUNDROBIN:
3321			if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
3322				return (1);
3323			break;
3324		case PF_POOL_NONE:
3325		case PF_POOL_SRCHASH:
3326		case PF_POOL_BITMASK:
3327		default:
3328			return (1);
3329		}
3330	} while (!PF_AEQ(&init_addr, naddr, af));
3331
3332	return (1);					/* none available */
3333}
3334
3335static struct pf_rule *
3336pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
3337    int direction, struct pfi_kif *kif, struct pf_addr *saddr,
3338    union pf_state_xport *sxport, struct pf_addr *daddr,
3339    union pf_state_xport *dxport, int rs_num)
3340{
3341	struct pf_rule		*r, *rm = NULL;
3342	struct pf_ruleset	*ruleset = NULL;
3343	int			 tag = -1;
3344	unsigned int		 rtableid = IFSCOPE_NONE;
3345	int			 asd = 0;
3346
3347	r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
3348	while (r && rm == NULL) {
3349		struct pf_rule_addr	*src = NULL, *dst = NULL;
3350		struct pf_addr_wrap	*xdst = NULL;
3351		struct pf_addr_wrap	*xsrc = NULL;
3352		union pf_rule_xport	rdrxport;
3353
3354		if (r->action == PF_BINAT && direction == PF_IN) {
3355			src = &r->dst;
3356			if (r->rpool.cur != NULL)
3357				xdst = &r->rpool.cur->addr;
3358		} else if (r->action == PF_RDR && direction == PF_OUT) {
3359			dst = &r->src;
3360			src = &r->dst;
3361			if (r->rpool.cur != NULL) {
3362				rdrxport.range.op = PF_OP_EQ;
3363				rdrxport.range.port[0] =
3364				    htons(r->rpool.proxy_port[0]);
3365				xsrc = &r->rpool.cur->addr;
3366			}
3367		} else {
3368			src = &r->src;
3369			dst = &r->dst;
3370		}
3371
3372		r->evaluations++;
3373		if (pfi_kif_match(r->kif, kif) == r->ifnot)
3374			r = r->skip[PF_SKIP_IFP].ptr;
3375		else if (r->direction && r->direction != direction)
3376			r = r->skip[PF_SKIP_DIR].ptr;
3377		else if (r->af && r->af != pd->af)
3378			r = r->skip[PF_SKIP_AF].ptr;
3379		else if (r->proto && r->proto != pd->proto)
3380			r = r->skip[PF_SKIP_PROTO].ptr;
3381		else if (xsrc && PF_MISMATCHAW(xsrc, saddr, pd->af, 0, NULL))
3382			r = TAILQ_NEXT(r, entries);
3383		else if (!xsrc && PF_MISMATCHAW(&src->addr, saddr, pd->af,
3384		    src->neg, kif))
3385			r = TAILQ_NEXT(r, entries);
3386		else if (xsrc && (!rdrxport.range.port[0] ||
3387		    !pf_match_xport(r->proto, r->proto_variant, &rdrxport,
3388		    sxport)))
3389			r = TAILQ_NEXT(r, entries);
3390		else if (!xsrc && !pf_match_xport(r->proto,
3391		    r->proto_variant, &src->xport, sxport))
3392			r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
3393			    PF_SKIP_DST_PORT].ptr;
3394		else if (dst != NULL &&
3395		    PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL))
3396			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3397		else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af,
3398		    0, NULL))
3399			r = TAILQ_NEXT(r, entries);
3400		else if (dst && !pf_match_xport(r->proto, r->proto_variant,
3401		    &dst->xport, dxport))
3402			r = r->skip[PF_SKIP_DST_PORT].ptr;
3403		else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag))
3404			r = TAILQ_NEXT(r, entries);
3405		else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
3406		    IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m,
3407		    off, pd->hdr.tcp), r->os_fingerprint)))
3408			r = TAILQ_NEXT(r, entries);
3409		else {
3410			if (r->tag)
3411				tag = r->tag;
3412			if (PF_RTABLEID_IS_VALID(r->rtableid))
3413				rtableid = r->rtableid;
3414			if (r->anchor == NULL) {
3415				rm = r;
3416			} else
3417				pf_step_into_anchor(&asd, &ruleset, rs_num,
3418				    &r, NULL, NULL);
3419		}
3420		if (r == NULL)
3421			pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r,
3422			    NULL, NULL);
3423	}
3424	if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid, NULL))
3425		return (NULL);
3426	if (rm != NULL && (rm->action == PF_NONAT ||
3427	    rm->action == PF_NORDR || rm->action == PF_NOBINAT))
3428		return (NULL);
3429	return (rm);
3430}
3431
3432static struct pf_rule *
3433pf_get_translation_aux(struct pf_pdesc *pd, struct mbuf *m, int off,
3434    int direction, struct pfi_kif *kif, struct pf_src_node **sn,
3435    struct pf_addr *saddr, union pf_state_xport *sxport, struct pf_addr *daddr,
3436    union pf_state_xport *dxport, struct pf_addr *naddr,
3437    union pf_state_xport *nxport)
3438{
3439	struct pf_rule	*r = NULL;
3440
3441	if (direction == PF_OUT) {
3442		r = pf_match_translation(pd, m, off, direction, kif, saddr,
3443		    sxport, daddr, dxport, PF_RULESET_BINAT);
3444		if (r == NULL)
3445			r = pf_match_translation(pd, m, off, direction, kif,
3446			    saddr, sxport, daddr, dxport, PF_RULESET_RDR);
3447		if (r == NULL)
3448			r = pf_match_translation(pd, m, off, direction, kif,
3449			    saddr, sxport, daddr, dxport, PF_RULESET_NAT);
3450	} else {
3451		r = pf_match_translation(pd, m, off, direction, kif, saddr,
3452		    sxport, daddr, dxport, PF_RULESET_RDR);
3453		if (r == NULL)
3454			r = pf_match_translation(pd, m, off, direction, kif,
3455			    saddr, sxport, daddr, dxport, PF_RULESET_BINAT);
3456	}
3457
3458	if (r != NULL) {
3459		switch (r->action) {
3460		case PF_NONAT:
3461		case PF_NOBINAT:
3462		case PF_NORDR:
3463			return (NULL);
3464		case PF_NAT:
3465			if (pf_get_sport(pd, kif, r, saddr, sxport, daddr,
3466			    dxport, naddr, nxport, sn)) {
3467				DPFPRINTF(PF_DEBUG_MISC,
3468				    ("pf: NAT proxy port allocation "
3469				    "(%u-%u) failed\n",
3470				    r->rpool.proxy_port[0],
3471				    r->rpool.proxy_port[1]));
3472				return (NULL);
3473			}
3474			break;
3475		case PF_BINAT:
3476			switch (direction) {
3477			case PF_OUT:
3478				if (r->rpool.cur->addr.type ==
3479				    PF_ADDR_DYNIFTL) {
3480					switch (pd->af) {
3481#if INET
3482					case AF_INET:
3483						if (r->rpool.cur->addr.p.dyn->
3484						    pfid_acnt4 < 1)
3485							return (NULL);
3486						PF_POOLMASK(naddr,
3487						    &r->rpool.cur->addr.p.dyn->
3488						    pfid_addr4,
3489						    &r->rpool.cur->addr.p.dyn->
3490						    pfid_mask4,
3491						    saddr, AF_INET);
3492						break;
3493#endif /* INET */
3494#if INET6
3495					case AF_INET6:
3496						if (r->rpool.cur->addr.p.dyn->
3497						    pfid_acnt6 < 1)
3498							return (NULL);
3499						PF_POOLMASK(naddr,
3500						    &r->rpool.cur->addr.p.dyn->
3501						    pfid_addr6,
3502						    &r->rpool.cur->addr.p.dyn->
3503						    pfid_mask6,
3504						    saddr, AF_INET6);
3505						break;
3506#endif /* INET6 */
3507					}
3508				} else {
3509					PF_POOLMASK(naddr,
3510					    &r->rpool.cur->addr.v.a.addr,
3511					    &r->rpool.cur->addr.v.a.mask,
3512					    saddr, pd->af);
3513				}
3514				break;
3515			case PF_IN:
3516				if (r->src.addr.type == PF_ADDR_DYNIFTL) {
3517					switch (pd->af) {
3518#if INET
3519					case AF_INET:
3520						if (r->src.addr.p.dyn->
3521						    pfid_acnt4 < 1)
3522							return (NULL);
3523						PF_POOLMASK(naddr,
3524						    &r->src.addr.p.dyn->
3525						    pfid_addr4,
3526						    &r->src.addr.p.dyn->
3527						    pfid_mask4,
3528						    daddr, AF_INET);
3529						break;
3530#endif /* INET */
3531#if INET6
3532					case AF_INET6:
3533						if (r->src.addr.p.dyn->
3534						    pfid_acnt6 < 1)
3535							return (NULL);
3536						PF_POOLMASK(naddr,
3537						    &r->src.addr.p.dyn->
3538						    pfid_addr6,
3539						    &r->src.addr.p.dyn->
3540						    pfid_mask6,
3541						    daddr, AF_INET6);
3542						break;
3543#endif /* INET6 */
3544					}
3545				} else
3546					PF_POOLMASK(naddr,
3547					    &r->src.addr.v.a.addr,
3548					    &r->src.addr.v.a.mask, daddr,
3549					    pd->af);
3550				break;
3551			}
3552			break;
3553		case PF_RDR: {
3554			switch (direction) {
3555			case PF_OUT:
3556				if (r->dst.addr.type == PF_ADDR_DYNIFTL) {
3557					switch (pd->af) {
3558#if INET
3559					case AF_INET:
3560						if (r->dst.addr.p.dyn->
3561						    pfid_acnt4 < 1)
3562							return (NULL);
3563						PF_POOLMASK(naddr,
3564						    &r->dst.addr.p.dyn->
3565						    pfid_addr4,
3566						    &r->dst.addr.p.dyn->
3567						    pfid_mask4,
3568						    daddr, AF_INET);
3569						break;
3570#endif /* INET */
3571#if INET6
3572					case AF_INET6:
3573						if (r->dst.addr.p.dyn->
3574						    pfid_acnt6 < 1)
3575							return (NULL);
3576						PF_POOLMASK(naddr,
3577						    &r->dst.addr.p.dyn->
3578						    pfid_addr6,
3579						    &r->dst.addr.p.dyn->
3580						    pfid_mask6,
3581						    daddr, AF_INET6);
3582						break;
3583#endif /* INET6 */
3584					}
3585				} else {
3586					PF_POOLMASK(naddr,
3587					    &r->dst.addr.v.a.addr,
3588					    &r->dst.addr.v.a.mask,
3589					    daddr, pd->af);
3590				}
3591				if (nxport && r->dst.xport.range.port[0])
3592					nxport->port =
3593					    r->dst.xport.range.port[0];
3594				break;
3595			case PF_IN:
3596				if (pf_map_addr(pd->af, r, saddr,
3597				    naddr, NULL, sn))
3598					return (NULL);
3599				if ((r->rpool.opts & PF_POOL_TYPEMASK) ==
3600				    PF_POOL_BITMASK)
3601					PF_POOLMASK(naddr, naddr,
3602					    &r->rpool.cur->addr.v.a.mask, daddr,
3603					    pd->af);
3604
3605				if (nxport && dxport) {
3606					if (r->rpool.proxy_port[1]) {
3607						u_int32_t	tmp_nport;
3608
3609						tmp_nport =
3610						    ((ntohs(dxport->port) -
3611						    ntohs(r->dst.xport.range.
3612						    port[0])) %
3613						    (r->rpool.proxy_port[1] -
3614						    r->rpool.proxy_port[0] +
3615						    1)) + r->rpool.proxy_port[0];
3616
3617						/* wrap around if necessary */
3618						if (tmp_nport > 65535)
3619							tmp_nport -= 65535;
3620						nxport->port =
3621						    htons((u_int16_t)tmp_nport);
3622					} else if (r->rpool.proxy_port[0]) {
3623						nxport->port = htons(r->rpool.
3624						    proxy_port[0]);
3625					}
3626				}
3627				break;
3628			}
3629			break;
3630		}
3631		default:
3632			return (NULL);
3633		}
3634	}
3635
3636	return (r);
3637}
3638
3639int
3640pf_socket_lookup(int direction, struct pf_pdesc *pd)
3641{
3642	struct pf_addr		*saddr, *daddr;
3643	u_int16_t		 sport, dport;
3644	struct inpcbinfo	*pi;
3645	int 			inp = 0;
3646
3647	if (pd == NULL)
3648		return (-1);
3649	pd->lookup.uid = UID_MAX;
3650	pd->lookup.gid = GID_MAX;
3651	pd->lookup.pid = NO_PID;
3652
3653	switch (pd->proto) {
3654	case IPPROTO_TCP:
3655		if (pd->hdr.tcp == NULL)
3656			return (-1);
3657		sport = pd->hdr.tcp->th_sport;
3658		dport = pd->hdr.tcp->th_dport;
3659		pi = &tcbinfo;
3660		break;
3661	case IPPROTO_UDP:
3662		if (pd->hdr.udp == NULL)
3663			return (-1);
3664		sport = pd->hdr.udp->uh_sport;
3665		dport = pd->hdr.udp->uh_dport;
3666		pi = &udbinfo;
3667		break;
3668	default:
3669		return (-1);
3670	}
3671	if (direction == PF_IN) {
3672		saddr = pd->src;
3673		daddr = pd->dst;
3674	} else {
3675		u_int16_t	p;
3676
3677		p = sport;
3678		sport = dport;
3679		dport = p;
3680		saddr = pd->dst;
3681		daddr = pd->src;
3682	}
3683	switch (pd->af) {
3684#if INET
3685	case AF_INET:
3686		inp = in_pcblookup_hash_exists(pi, saddr->v4, sport, daddr->v4, dport,
3687		    0, &pd->lookup.uid, &pd->lookup.gid, NULL);
3688#if INET6
3689		if (inp == 0) {
3690			struct in6_addr s6, d6;
3691
3692			memset(&s6, 0, sizeof (s6));
3693			s6.s6_addr16[5] = htons(0xffff);
3694			memcpy(&s6.s6_addr32[3], &saddr->v4,
3695			    sizeof (saddr->v4));
3696
3697			memset(&d6, 0, sizeof (d6));
3698			d6.s6_addr16[5] = htons(0xffff);
3699			memcpy(&d6.s6_addr32[3], &daddr->v4,
3700			    sizeof (daddr->v4));
3701
3702			inp = in6_pcblookup_hash_exists(pi, &s6, sport,
3703			    &d6, dport, 0, &pd->lookup.uid, &pd->lookup.gid, NULL);
3704			if (inp == 0) {
3705				inp = in_pcblookup_hash_exists(pi, saddr->v4, sport,
3706				    daddr->v4, dport, INPLOOKUP_WILDCARD, &pd->lookup.uid, &pd->lookup.gid, NULL);
3707				if (inp == 0) {
3708					inp = in6_pcblookup_hash_exists(pi, &s6, sport,
3709					    &d6, dport, INPLOOKUP_WILDCARD,
3710					    &pd->lookup.uid, &pd->lookup.gid, NULL);
3711					if (inp == 0)
3712						return (-1);
3713				}
3714			}
3715		}
3716#else
3717		if (inp == 0) {
3718			inp = in_pcblookup_hash_exists(pi, saddr->v4, sport,
3719			    daddr->v4, dport, INPLOOKUP_WILDCARD,
3720			    &pd->lookup.uid, &pd->lookup.gid, NULL);
3721			if (inp == 0)
3722				return (-1);
3723		}
3724#endif /* !INET6 */
3725		break;
3726#endif /* INET */
3727#if INET6
3728	case AF_INET6:
3729		inp = in6_pcblookup_hash_exists(pi, &saddr->v6, sport, &daddr->v6,
3730		    dport, 0, &pd->lookup.uid, &pd->lookup.gid, NULL);
3731		if (inp == 0) {
3732			inp = in6_pcblookup_hash_exists(pi, &saddr->v6, sport,
3733			    &daddr->v6, dport, INPLOOKUP_WILDCARD,
3734			    &pd->lookup.uid, &pd->lookup.gid, NULL);
3735			if (inp == 0)
3736				return (-1);
3737		}
3738		break;
3739#endif /* INET6 */
3740
3741	default:
3742		return (-1);
3743	}
3744
3745	return (1);
3746}
3747
3748static u_int8_t
3749pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
3750{
3751	int		 hlen;
3752	u_int8_t	 hdr[60];
3753	u_int8_t	*opt, optlen;
3754	u_int8_t	 wscale = 0;
3755
3756	hlen = th_off << 2;		/* hlen <= sizeof (hdr) */
3757	if (hlen <= (int)sizeof (struct tcphdr))
3758		return (0);
3759	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
3760		return (0);
3761	opt = hdr + sizeof (struct tcphdr);
3762	hlen -= sizeof (struct tcphdr);
3763	while (hlen >= 3) {
3764		switch (*opt) {
3765		case TCPOPT_EOL:
3766		case TCPOPT_NOP:
3767			++opt;
3768			--hlen;
3769			break;
3770		case TCPOPT_WINDOW:
3771			wscale = opt[2];
3772			if (wscale > TCP_MAX_WINSHIFT)
3773				wscale = TCP_MAX_WINSHIFT;
3774			wscale |= PF_WSCALE_FLAG;
3775			/* FALLTHROUGH */
3776		default:
3777			optlen = opt[1];
3778			if (optlen < 2)
3779				optlen = 2;
3780			hlen -= optlen;
3781			opt += optlen;
3782			break;
3783		}
3784	}
3785	return (wscale);
3786}
3787
3788static u_int16_t
3789pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
3790{
3791	int		 hlen;
3792	u_int8_t	 hdr[60];
3793	u_int8_t	*opt, optlen;
3794	u_int16_t	 mss = tcp_mssdflt;
3795
3796	hlen = th_off << 2;	/* hlen <= sizeof (hdr) */
3797	if (hlen <= (int)sizeof (struct tcphdr))
3798		return (0);
3799	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
3800		return (0);
3801	opt = hdr + sizeof (struct tcphdr);
3802	hlen -= sizeof (struct tcphdr);
3803	while (hlen >= TCPOLEN_MAXSEG) {
3804		switch (*opt) {
3805		case TCPOPT_EOL:
3806		case TCPOPT_NOP:
3807			++opt;
3808			--hlen;
3809			break;
3810		case TCPOPT_MAXSEG:
3811			bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2);
3812#if BYTE_ORDER != BIG_ENDIAN
3813			NTOHS(mss);
3814#endif
3815			/* FALLTHROUGH */
3816		default:
3817			optlen = opt[1];
3818			if (optlen < 2)
3819				optlen = 2;
3820			hlen -= optlen;
3821			opt += optlen;
3822			break;
3823		}
3824	}
3825	return (mss);
3826}
3827
3828static u_int16_t
3829pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer)
3830{
3831#if INET
3832	struct sockaddr_in	*dst;
3833	struct route		 ro;
3834#endif /* INET */
3835#if INET6
3836	struct sockaddr_in6	*dst6;
3837	struct route_in6	 ro6;
3838#endif /* INET6 */
3839	struct rtentry		*rt = NULL;
3840	int			 hlen;
3841	u_int16_t		 mss = tcp_mssdflt;
3842
3843	switch (af) {
3844#if INET
3845	case AF_INET:
3846		hlen = sizeof (struct ip);
3847		bzero(&ro, sizeof (ro));
3848		dst = (struct sockaddr_in *)(void *)&ro.ro_dst;
3849		dst->sin_family = AF_INET;
3850		dst->sin_len = sizeof (*dst);
3851		dst->sin_addr = addr->v4;
3852		rtalloc(&ro);
3853		rt = ro.ro_rt;
3854		break;
3855#endif /* INET */
3856#if INET6
3857	case AF_INET6:
3858		hlen = sizeof (struct ip6_hdr);
3859		bzero(&ro6, sizeof (ro6));
3860		dst6 = (struct sockaddr_in6 *)(void *)&ro6.ro_dst;
3861		dst6->sin6_family = AF_INET6;
3862		dst6->sin6_len = sizeof (*dst6);
3863		dst6->sin6_addr = addr->v6;
3864		rtalloc((struct route *)&ro);
3865		rt = ro6.ro_rt;
3866		break;
3867#endif /* INET6 */
3868	default:
3869		panic("pf_calc_mss: not AF_INET or AF_INET6!");
3870		return (0);
3871	}
3872
3873	if (rt && rt->rt_ifp) {
3874		mss = rt->rt_ifp->if_mtu - hlen - sizeof (struct tcphdr);
3875		mss = max(tcp_mssdflt, mss);
3876		RTFREE(rt);
3877	}
3878	mss = min(mss, offer);
3879	mss = max(mss, 64);		/* sanity - at least max opt space */
3880	return (mss);
3881}
3882
3883static void
3884pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr)
3885{
3886	struct pf_rule *r = s->rule.ptr;
3887
3888	s->rt_kif = NULL;
3889	if (!r->rt || r->rt == PF_FASTROUTE)
3890		return;
3891	switch (s->state_key->af) {
3892#if INET
3893	case AF_INET:
3894		pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL,
3895		    &s->nat_src_node);
3896		s->rt_kif = r->rpool.cur->kif;
3897		break;
3898#endif /* INET */
3899#if INET6
3900	case AF_INET6:
3901		pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL,
3902		    &s->nat_src_node);
3903		s->rt_kif = r->rpool.cur->kif;
3904		break;
3905#endif /* INET6 */
3906	}
3907}
3908
3909static void
3910pf_attach_state(struct pf_state_key *sk, struct pf_state *s, int tail)
3911{
3912	s->state_key = sk;
3913	sk->refcnt++;
3914
3915	/* list is sorted, if-bound states before floating */
3916	if (tail)
3917		TAILQ_INSERT_TAIL(&sk->states, s, next);
3918	else
3919		TAILQ_INSERT_HEAD(&sk->states, s, next);
3920}
3921
3922static void
3923pf_detach_state(struct pf_state *s, int flags)
3924{
3925	struct pf_state_key	*sk = s->state_key;
3926
3927	if (sk == NULL)
3928		return;
3929
3930	s->state_key = NULL;
3931	TAILQ_REMOVE(&sk->states, s, next);
3932	if (--sk->refcnt == 0) {
3933		if (!(flags & PF_DT_SKIP_EXTGWY))
3934			RB_REMOVE(pf_state_tree_ext_gwy,
3935			    &pf_statetbl_ext_gwy, sk);
3936		if (!(flags & PF_DT_SKIP_LANEXT))
3937			RB_REMOVE(pf_state_tree_lan_ext,
3938			    &pf_statetbl_lan_ext, sk);
3939		if (sk->app_state)
3940			pool_put(&pf_app_state_pl, sk->app_state);
3941		pool_put(&pf_state_key_pl, sk);
3942	}
3943}
3944
3945struct pf_state_key *
3946pf_alloc_state_key(struct pf_state *s, struct pf_state_key *psk)
3947{
3948	struct pf_state_key	*sk;
3949
3950	if ((sk = pool_get(&pf_state_key_pl, PR_WAITOK)) == NULL)
3951		return (NULL);
3952	bzero(sk, sizeof (*sk));
3953	TAILQ_INIT(&sk->states);
3954	pf_attach_state(sk, s, 0);
3955
3956	/* initialize state key from psk, if provided */
3957	if (psk != NULL) {
3958		bcopy(&psk->lan, &sk->lan, sizeof (sk->lan));
3959		bcopy(&psk->gwy, &sk->gwy, sizeof (sk->gwy));
3960		bcopy(&psk->ext, &sk->ext, sizeof (sk->ext));
3961		sk->af = psk->af;
3962		sk->proto = psk->proto;
3963		sk->direction = psk->direction;
3964		sk->proto_variant = psk->proto_variant;
3965		VERIFY(psk->app_state == NULL);
3966		sk->flowhash = psk->flowhash;
3967		/* don't touch tree entries, states and refcnt on sk */
3968	}
3969
3970	return (sk);
3971}
3972
3973static u_int32_t
3974pf_tcp_iss(struct pf_pdesc *pd)
3975{
3976	MD5_CTX ctx;
3977	u_int32_t digest[4];
3978
3979	if (pf_tcp_secret_init == 0) {
3980		read_random(pf_tcp_secret, sizeof (pf_tcp_secret));
3981		MD5Init(&pf_tcp_secret_ctx);
3982		MD5Update(&pf_tcp_secret_ctx, pf_tcp_secret,
3983		    sizeof (pf_tcp_secret));
3984		pf_tcp_secret_init = 1;
3985	}
3986	ctx = pf_tcp_secret_ctx;
3987
3988	MD5Update(&ctx, (char *)&pd->hdr.tcp->th_sport, sizeof (u_short));
3989	MD5Update(&ctx, (char *)&pd->hdr.tcp->th_dport, sizeof (u_short));
3990	if (pd->af == AF_INET6) {
3991		MD5Update(&ctx, (char *)&pd->src->v6, sizeof (struct in6_addr));
3992		MD5Update(&ctx, (char *)&pd->dst->v6, sizeof (struct in6_addr));
3993	} else {
3994		MD5Update(&ctx, (char *)&pd->src->v4, sizeof (struct in_addr));
3995		MD5Update(&ctx, (char *)&pd->dst->v4, sizeof (struct in_addr));
3996	}
3997	MD5Final((u_char *)digest, &ctx);
3998	pf_tcp_iss_off += 4096;
3999	return (digest[0] + random() + pf_tcp_iss_off);
4000}
4001
4002static int
4003pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
4004    struct pfi_kif *kif, struct mbuf *m, int off, void *h,
4005    struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
4006    struct ifqueue *ifq)
4007{
4008#pragma unused(h)
4009	struct pf_rule		*nr = NULL;
4010	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
4011	sa_family_t		 af = pd->af;
4012	struct pf_rule		*r, *a = NULL;
4013	struct pf_ruleset	*ruleset = NULL;
4014	struct pf_src_node	*nsn = NULL;
4015	struct tcphdr		*th = pd->hdr.tcp;
4016	u_short			 reason;
4017	int			 rewrite = 0, hdrlen = 0;
4018	int			 tag = -1;
4019	unsigned int		 rtableid = IFSCOPE_NONE;
4020	int			 asd = 0;
4021	int			 match = 0;
4022	int			 state_icmp = 0;
4023	u_int16_t		 mss = tcp_mssdflt;
4024	u_int8_t		 icmptype = 0, icmpcode = 0;
4025
4026	struct pf_grev1_hdr	*grev1 = pd->hdr.grev1;
4027	union pf_state_xport bxport, nxport, sxport, dxport;
4028	struct pf_state_key	 psk;
4029
4030	lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
4031
4032	if (direction == PF_IN && pf_check_congestion(ifq)) {
4033		REASON_SET(&reason, PFRES_CONGEST);
4034		return (PF_DROP);
4035	}
4036
4037	hdrlen = 0;
4038	sxport.spi = 0;
4039	dxport.spi = 0;
4040	nxport.spi = 0;
4041
4042	switch (pd->proto) {
4043	case IPPROTO_TCP:
4044		sxport.port = th->th_sport;
4045		dxport.port = th->th_dport;
4046		hdrlen = sizeof (*th);
4047		break;
4048	case IPPROTO_UDP:
4049		sxport.port = pd->hdr.udp->uh_sport;
4050		dxport.port = pd->hdr.udp->uh_dport;
4051		hdrlen = sizeof (*pd->hdr.udp);
4052		break;
4053#if INET
4054	case IPPROTO_ICMP:
4055		if (pd->af != AF_INET)
4056			break;
4057		sxport.port = dxport.port = pd->hdr.icmp->icmp_id;
4058		hdrlen = ICMP_MINLEN;
4059		icmptype = pd->hdr.icmp->icmp_type;
4060		icmpcode = pd->hdr.icmp->icmp_code;
4061
4062		if (icmptype == ICMP_UNREACH ||
4063		    icmptype == ICMP_SOURCEQUENCH ||
4064		    icmptype == ICMP_REDIRECT ||
4065		    icmptype == ICMP_TIMXCEED ||
4066		    icmptype == ICMP_PARAMPROB)
4067			state_icmp++;
4068		break;
4069#endif /* INET */
4070#if INET6
4071	case IPPROTO_ICMPV6:
4072		if (pd->af != AF_INET6)
4073			break;
4074		sxport.port = dxport.port = pd->hdr.icmp6->icmp6_id;
4075		hdrlen = sizeof (*pd->hdr.icmp6);
4076		icmptype = pd->hdr.icmp6->icmp6_type;
4077		icmpcode = pd->hdr.icmp6->icmp6_code;
4078
4079		if (icmptype == ICMP6_DST_UNREACH ||
4080		    icmptype == ICMP6_PACKET_TOO_BIG ||
4081		    icmptype == ICMP6_TIME_EXCEEDED ||
4082		    icmptype == ICMP6_PARAM_PROB)
4083			state_icmp++;
4084		break;
4085#endif /* INET6 */
4086	case IPPROTO_GRE:
4087		if (pd->proto_variant == PF_GRE_PPTP_VARIANT) {
4088			sxport.call_id = dxport.call_id =
4089			    pd->hdr.grev1->call_id;
4090			hdrlen = sizeof (*pd->hdr.grev1);
4091		}
4092		break;
4093	case IPPROTO_ESP:
4094		sxport.spi = 0;
4095		dxport.spi = pd->hdr.esp->spi;
4096		hdrlen = sizeof (*pd->hdr.esp);
4097		break;
4098	}
4099
4100	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
4101
4102	if (direction == PF_OUT) {
4103		bxport = nxport = sxport;
4104		/* check outgoing packet for BINAT/NAT */
4105		if ((nr = pf_get_translation_aux(pd, m, off, PF_OUT, kif, &nsn,
4106		    saddr, &sxport, daddr, &dxport, &pd->naddr, &nxport)) !=
4107		    NULL) {
4108			PF_ACPY(&pd->baddr, saddr, af);
4109			switch (pd->proto) {
4110			case IPPROTO_TCP:
4111				pf_change_ap(direction, pd->mp, saddr,
4112				    &th->th_sport, pd->ip_sum, &th->th_sum,
4113				    &pd->naddr, nxport.port, 0, af);
4114				sxport.port = th->th_sport;
4115				rewrite++;
4116				break;
4117			case IPPROTO_UDP:
4118				pf_change_ap(direction, pd->mp, saddr,
4119				    &pd->hdr.udp->uh_sport, pd->ip_sum,
4120				    &pd->hdr.udp->uh_sum, &pd->naddr,
4121				    nxport.port, 1, af);
4122				sxport.port = pd->hdr.udp->uh_sport;
4123				rewrite++;
4124				break;
4125#if INET
4126			case IPPROTO_ICMP:
4127				if (pd->af == AF_INET) {
4128					pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
4129					    pd->naddr.v4.s_addr, 0);
4130					pd->hdr.icmp->icmp_cksum = pf_cksum_fixup(
4131					    pd->hdr.icmp->icmp_cksum, sxport.port,
4132					    nxport.port, 0);
4133					pd->hdr.icmp->icmp_id = nxport.port;
4134					++rewrite;
4135				}
4136				break;
4137#endif /* INET */
4138#if INET6
4139			case IPPROTO_ICMPV6:
4140				if (pd->af == AF_INET6) {
4141					pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum,
4142					    &pd->naddr, 0);
4143					rewrite++;
4144				}
4145				break;
4146#endif /* INET */
4147			case IPPROTO_GRE:
4148				switch (af) {
4149#if INET
4150				case AF_INET:
4151					pf_change_a(&saddr->v4.s_addr,
4152					    pd->ip_sum, pd->naddr.v4.s_addr, 0);
4153					break;
4154#endif /* INET */
4155#if INET6
4156				case AF_INET6:
4157					PF_ACPY(saddr, &pd->naddr, AF_INET6);
4158					break;
4159#endif /* INET6 */
4160				}
4161				++rewrite;
4162				break;
4163			case IPPROTO_ESP:
4164				bxport.spi = 0;
4165				switch (af) {
4166#if INET
4167				case AF_INET:
4168					pf_change_a(&saddr->v4.s_addr,
4169					    pd->ip_sum, pd->naddr.v4.s_addr, 0);
4170					break;
4171#endif /* INET */
4172#if INET6
4173				case AF_INET6:
4174					PF_ACPY(saddr, &pd->naddr, AF_INET6);
4175					break;
4176#endif /* INET6 */
4177				}
4178				break;
4179			default:
4180				switch (af) {
4181#if INET
4182				case AF_INET:
4183					pf_change_a(&saddr->v4.s_addr,
4184					    pd->ip_sum, pd->naddr.v4.s_addr, 0);
4185					break;
4186#endif /* INET */
4187#if INET6
4188				case AF_INET6:
4189					PF_ACPY(saddr, &pd->naddr, af);
4190					break;
4191#endif /* INET */
4192				}
4193				break;
4194			}
4195
4196			if (nr->natpass)
4197				r = NULL;
4198			pd->nat_rule = nr;
4199		}
4200	} else {
4201		bxport.port = nxport.port = dxport.port;
4202		/* check incoming packet for BINAT/RDR */
4203		if ((nr = pf_get_translation_aux(pd, m, off, PF_IN, kif, &nsn,
4204		    saddr, &sxport, daddr, &dxport, &pd->naddr, &nxport)) !=
4205		    NULL) {
4206			PF_ACPY(&pd->baddr, daddr, af);
4207			switch (pd->proto) {
4208			case IPPROTO_TCP:
4209				pf_change_ap(direction, pd->mp, daddr,
4210				    &th->th_dport, pd->ip_sum, &th->th_sum,
4211				    &pd->naddr, nxport.port, 0, af);
4212				dxport.port = th->th_dport;
4213				rewrite++;
4214				break;
4215			case IPPROTO_UDP:
4216				pf_change_ap(direction, pd->mp, daddr,
4217				    &pd->hdr.udp->uh_dport, pd->ip_sum,
4218				    &pd->hdr.udp->uh_sum, &pd->naddr,
4219				    nxport.port, 1, af);
4220				dxport.port = pd->hdr.udp->uh_dport;
4221				rewrite++;
4222				break;
4223#if INET
4224			case IPPROTO_ICMP:
4225				if (pd->af == AF_INET) {
4226					pf_change_a(&daddr->v4.s_addr, pd->ip_sum,
4227					    pd->naddr.v4.s_addr, 0);
4228				}
4229				break;
4230#endif /* INET */
4231#if INET6
4232			case IPPROTO_ICMPV6:
4233				if (pd->af == AF_INET6) {
4234					pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum,
4235					    &pd->naddr, 0);
4236					rewrite++;
4237				}
4238				break;
4239#endif /* INET6 */
4240			case IPPROTO_GRE:
4241				if (pd->proto_variant == PF_GRE_PPTP_VARIANT)
4242					grev1->call_id = nxport.call_id;
4243
4244				switch (af) {
4245#if INET
4246				case AF_INET:
4247					pf_change_a(&daddr->v4.s_addr,
4248					    pd->ip_sum, pd->naddr.v4.s_addr, 0);
4249					break;
4250#endif /* INET */
4251#if INET6
4252				case AF_INET6:
4253					PF_ACPY(daddr, &pd->naddr, AF_INET6);
4254					break;
4255#endif /* INET6 */
4256				}
4257				++rewrite;
4258				break;
4259			case IPPROTO_ESP:
4260				switch (af) {
4261#if INET
4262				case AF_INET:
4263					pf_change_a(&daddr->v4.s_addr,
4264					    pd->ip_sum, pd->naddr.v4.s_addr, 0);
4265					break;
4266#endif /* INET */
4267#if INET6
4268				case AF_INET6:
4269					PF_ACPY(daddr, &pd->naddr, AF_INET6);
4270					break;
4271#endif /* INET6 */
4272				}
4273				break;
4274			default:
4275				switch (af) {
4276#if INET
4277				case AF_INET:
4278					pf_change_a(&daddr->v4.s_addr,
4279					    pd->ip_sum, pd->naddr.v4.s_addr, 0);
4280					break;
4281#endif /* INET */
4282#if INET6
4283				case AF_INET6:
4284					PF_ACPY(daddr, &pd->naddr, af);
4285					break;
4286#endif /* INET */
4287				}
4288				break;
4289			}
4290
4291			if (nr->natpass)
4292				r = NULL;
4293			pd->nat_rule = nr;
4294		}
4295	}
4296
4297	if (nr && nr->tag > 0)
4298		tag = nr->tag;
4299
4300	while (r != NULL) {
4301		r->evaluations++;
4302		if (pfi_kif_match(r->kif, kif) == r->ifnot)
4303			r = r->skip[PF_SKIP_IFP].ptr;
4304		else if (r->direction && r->direction != direction)
4305			r = r->skip[PF_SKIP_DIR].ptr;
4306		else if (r->af && r->af != af)
4307			r = r->skip[PF_SKIP_AF].ptr;
4308		else if (r->proto && r->proto != pd->proto)
4309			r = r->skip[PF_SKIP_PROTO].ptr;
4310		else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
4311		    r->src.neg, kif))
4312			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
4313		/* tcp/udp only. port_op always 0 in other cases */
4314		else if (r->proto == pd->proto &&
4315		    (r->proto == IPPROTO_TCP || r->proto == IPPROTO_UDP) &&
4316		    r->src.xport.range.op &&
4317		    !pf_match_port(r->src.xport.range.op,
4318		    r->src.xport.range.port[0], r->src.xport.range.port[1],
4319		    th->th_sport))
4320			r = r->skip[PF_SKIP_SRC_PORT].ptr;
4321		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
4322		    r->dst.neg, NULL))
4323			r = r->skip[PF_SKIP_DST_ADDR].ptr;
4324		/* tcp/udp only. port_op always 0 in other cases */
4325		else if (r->proto == pd->proto &&
4326		    (r->proto == IPPROTO_TCP || r->proto == IPPROTO_UDP) &&
4327		    r->dst.xport.range.op &&
4328		    !pf_match_port(r->dst.xport.range.op,
4329		    r->dst.xport.range.port[0], r->dst.xport.range.port[1],
4330		    th->th_dport))
4331			r = r->skip[PF_SKIP_DST_PORT].ptr;
4332		/* icmp only. type always 0 in other cases */
4333		else if (r->type && r->type != icmptype + 1)
4334			r = TAILQ_NEXT(r, entries);
4335		/* icmp only. type always 0 in other cases */
4336		else if (r->code && r->code != icmpcode + 1)
4337			r = TAILQ_NEXT(r, entries);
4338		else if ((r->rule_flag & PFRULE_TOS) && r->tos &&
4339		    !(r->tos & pd->tos))
4340			r = TAILQ_NEXT(r, entries);
4341		else if ((r->rule_flag & PFRULE_DSCP) && r->tos &&
4342		    !(r->tos & (pd->tos & DSCP_MASK)))
4343			r = TAILQ_NEXT(r, entries);
4344		else if ((r->rule_flag & PFRULE_SC) && r->tos &&
4345		    ((r->tos & SCIDX_MASK) != pd->sc))
4346			r = TAILQ_NEXT(r, entries);
4347		else if (r->rule_flag & PFRULE_FRAGMENT)
4348			r = TAILQ_NEXT(r, entries);
4349		else if (pd->proto == IPPROTO_TCP &&
4350		    (r->flagset & th->th_flags) != r->flags)
4351			r = TAILQ_NEXT(r, entries);
4352		/* tcp/udp only. uid.op always 0 in other cases */
4353		else if (r->uid.op && (pd->lookup.done || (pd->lookup.done =
4354		    pf_socket_lookup(direction, pd), 1)) &&
4355		    !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
4356		    pd->lookup.uid))
4357			r = TAILQ_NEXT(r, entries);
4358		/* tcp/udp only. gid.op always 0 in other cases */
4359		else if (r->gid.op && (pd->lookup.done || (pd->lookup.done =
4360		    pf_socket_lookup(direction, pd), 1)) &&
4361		    !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
4362		    pd->lookup.gid))
4363			r = TAILQ_NEXT(r, entries);
4364		else if (r->prob && r->prob <= (random() % (UINT_MAX - 1) + 1))
4365			r = TAILQ_NEXT(r, entries);
4366		else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag))
4367			r = TAILQ_NEXT(r, entries);
4368		else if (r->os_fingerprint != PF_OSFP_ANY &&
4369		    (pd->proto != IPPROTO_TCP || !pf_osfp_match(
4370		    pf_osfp_fingerprint(pd, m, off, th),
4371		    r->os_fingerprint)))
4372			r = TAILQ_NEXT(r, entries);
4373		else {
4374			if (r->tag)
4375				tag = r->tag;
4376			if (PF_RTABLEID_IS_VALID(r->rtableid))
4377				rtableid = r->rtableid;
4378			if (r->anchor == NULL) {
4379				match = 1;
4380				*rm = r;
4381				*am = a;
4382				*rsm = ruleset;
4383				if ((*rm)->quick)
4384					break;
4385				r = TAILQ_NEXT(r, entries);
4386			} else
4387				pf_step_into_anchor(&asd, &ruleset,
4388				    PF_RULESET_FILTER, &r, &a, &match);
4389		}
4390		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
4391		    PF_RULESET_FILTER, &r, &a, &match))
4392			break;
4393	}
4394	r = *rm;
4395	a = *am;
4396	ruleset = *rsm;
4397
4398	REASON_SET(&reason, PFRES_MATCH);
4399
4400	if (r->log || (nr != NULL && nr->log)) {
4401		if (rewrite > 0) {
4402			if (rewrite < off + hdrlen)
4403				rewrite = off + hdrlen;
4404
4405			m = pf_lazy_makewritable(pd, m, rewrite);
4406			if (!m) {
4407				REASON_SET(&reason, PFRES_MEMORY);
4408				return (PF_DROP);
4409			}
4410
4411			m_copyback(m, off, hdrlen, pd->hdr.any);
4412		}
4413		PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr,
4414		    a, ruleset, pd);
4415	}
4416
4417	if ((r->action == PF_DROP) &&
4418	    ((r->rule_flag & PFRULE_RETURNRST) ||
4419	    (r->rule_flag & PFRULE_RETURNICMP) ||
4420	    (r->rule_flag & PFRULE_RETURN))) {
4421		/* undo NAT changes, if they have taken place */
4422		if (nr != NULL) {
4423			if (direction == PF_OUT) {
4424				switch (pd->proto) {
4425				case IPPROTO_TCP:
4426					pf_change_ap(direction, pd->mp, saddr,
4427					    &th->th_sport, pd->ip_sum,
4428					    &th->th_sum, &pd->baddr,
4429					    bxport.port, 0, af);
4430					sxport.port = th->th_sport;
4431					rewrite++;
4432					break;
4433				case IPPROTO_UDP:
4434					pf_change_ap(direction, pd->mp, saddr,
4435					    &pd->hdr.udp->uh_sport, pd->ip_sum,
4436					    &pd->hdr.udp->uh_sum, &pd->baddr,
4437					    bxport.port, 1, af);
4438					sxport.port = pd->hdr.udp->uh_sport;
4439					rewrite++;
4440					break;
4441				case IPPROTO_ICMP:
4442#if INET6
4443				case IPPROTO_ICMPV6:
4444#endif
4445					/* nothing! */
4446					break;
4447				case IPPROTO_GRE:
4448					PF_ACPY(&pd->baddr, saddr, af);
4449					++rewrite;
4450					switch (af) {
4451#if INET
4452					case AF_INET:
4453						pf_change_a(&saddr->v4.s_addr,
4454						    pd->ip_sum,
4455						    pd->baddr.v4.s_addr, 0);
4456						break;
4457#endif /* INET */
4458#if INET6
4459					case AF_INET6:
4460						PF_ACPY(saddr, &pd->baddr,
4461						    AF_INET6);
4462						break;
4463#endif /* INET6 */
4464					}
4465					break;
4466				case IPPROTO_ESP:
4467					PF_ACPY(&pd->baddr, saddr, af);
4468					switch (af) {
4469#if INET
4470					case AF_INET:
4471						pf_change_a(&saddr->v4.s_addr,
4472						    pd->ip_sum,
4473						    pd->baddr.v4.s_addr, 0);
4474						break;
4475#endif /* INET */
4476#if INET6
4477					case AF_INET6:
4478						PF_ACPY(saddr, &pd->baddr,
4479						    AF_INET6);
4480						break;
4481#endif /* INET6 */
4482					}
4483					break;
4484				default:
4485					switch (af) {
4486					case AF_INET:
4487						pf_change_a(&saddr->v4.s_addr,
4488						    pd->ip_sum,
4489						    pd->baddr.v4.s_addr, 0);
4490						break;
4491					case AF_INET6:
4492						PF_ACPY(saddr, &pd->baddr, af);
4493						break;
4494					}
4495				}
4496			} else {
4497				switch (pd->proto) {
4498				case IPPROTO_TCP:
4499					pf_change_ap(direction, pd->mp, daddr,
4500					    &th->th_dport, pd->ip_sum,
4501					    &th->th_sum, &pd->baddr,
4502					    bxport.port, 0, af);
4503					dxport.port = th->th_dport;
4504					rewrite++;
4505					break;
4506				case IPPROTO_UDP:
4507					pf_change_ap(direction, pd->mp, daddr,
4508					    &pd->hdr.udp->uh_dport, pd->ip_sum,
4509					    &pd->hdr.udp->uh_sum, &pd->baddr,
4510					    bxport.port, 1, af);
4511					dxport.port = pd->hdr.udp->uh_dport;
4512					rewrite++;
4513					break;
4514				case IPPROTO_ICMP:
4515#if INET6
4516				case IPPROTO_ICMPV6:
4517#endif
4518					/* nothing! */
4519					break;
4520				case IPPROTO_GRE:
4521					if (pd->proto_variant ==
4522					    PF_GRE_PPTP_VARIANT)
4523						grev1->call_id = bxport.call_id;
4524					++rewrite;
4525					switch (af) {
4526#if INET
4527					case AF_INET:
4528						pf_change_a(&daddr->v4.s_addr,
4529						    pd->ip_sum,
4530						    pd->baddr.v4.s_addr, 0);
4531						break;
4532#endif /* INET */
4533#if INET6
4534					case AF_INET6:
4535						PF_ACPY(daddr, &pd->baddr,
4536						    AF_INET6);
4537						break;
4538#endif /* INET6 */
4539					}
4540					break;
4541				case IPPROTO_ESP:
4542					switch (af) {
4543#if INET
4544					case AF_INET:
4545						pf_change_a(&daddr->v4.s_addr,
4546						    pd->ip_sum,
4547						    pd->baddr.v4.s_addr, 0);
4548						break;
4549#endif /* INET */
4550#if INET6
4551					case AF_INET6:
4552						PF_ACPY(daddr, &pd->baddr,
4553						    AF_INET6);
4554						break;
4555#endif /* INET6 */
4556					}
4557					break;
4558				default:
4559					switch (af) {
4560					case AF_INET:
4561						pf_change_a(&daddr->v4.s_addr,
4562						    pd->ip_sum,
4563						    pd->baddr.v4.s_addr, 0);
4564						break;
4565#if INET6
4566					case AF_INET6:
4567						PF_ACPY(daddr, &pd->baddr, af);
4568						break;
4569#endif /* INET6 */
4570					}
4571				}
4572			}
4573		}
4574		if (pd->proto == IPPROTO_TCP &&
4575		    ((r->rule_flag & PFRULE_RETURNRST) ||
4576		    (r->rule_flag & PFRULE_RETURN)) &&
4577		    !(th->th_flags & TH_RST)) {
4578			u_int32_t	 ack = ntohl(th->th_seq) + pd->p_len;
4579			int		 len = 0;
4580			struct ip	*h4;
4581#if INET6
4582			struct ip6_hdr	*h6;
4583#endif /* INET6 */
4584
4585			switch (af) {
4586			case AF_INET:
4587				h4 = mtod(m, struct ip *);
4588				len = ntohs(h4->ip_len) - off;
4589				break;
4590#if INET6
4591			case AF_INET6:
4592				h6 = mtod(m, struct ip6_hdr *);
4593				len = ntohs(h6->ip6_plen) -
4594				    (off - sizeof (*h6));
4595				break;
4596#endif /* INET6 */
4597			}
4598
4599			if (pf_check_proto_cksum(m, off, len, IPPROTO_TCP, af))
4600				REASON_SET(&reason, PFRES_PROTCKSUM);
4601			else {
4602				if (th->th_flags & TH_SYN)
4603					ack++;
4604				if (th->th_flags & TH_FIN)
4605					ack++;
4606				pf_send_tcp(r, af, pd->dst,
4607				    pd->src, th->th_dport, th->th_sport,
4608				    ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
4609				    r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp);
4610			}
4611		} else if (pd->proto != IPPROTO_ICMP && af == AF_INET &&
4612		    pd->proto != IPPROTO_ESP && pd->proto != IPPROTO_AH &&
4613		    r->return_icmp)
4614			pf_send_icmp(m, r->return_icmp >> 8,
4615			    r->return_icmp & 255, af, r);
4616		else if (pd->proto != IPPROTO_ICMPV6 && af == AF_INET6 &&
4617		    pd->proto != IPPROTO_ESP && pd->proto != IPPROTO_AH &&
4618		    r->return_icmp6)
4619			pf_send_icmp(m, r->return_icmp6 >> 8,
4620			    r->return_icmp6 & 255, af, r);
4621	}
4622
4623	if (r->action == PF_DROP)
4624		return (PF_DROP);
4625
4626	/* prepare state key, for flowhash and/or the state (if created) */
4627	bzero(&psk, sizeof (psk));
4628	psk.proto = pd->proto;
4629	psk.direction = direction;
4630	psk.af = af;
4631	if (pd->proto == IPPROTO_UDP) {
4632		if (ntohs(pd->hdr.udp->uh_sport) == PF_IKE_PORT &&
4633		    ntohs(pd->hdr.udp->uh_dport) == PF_IKE_PORT) {
4634			psk.proto_variant = PF_EXTFILTER_APD;
4635		} else {
4636			psk.proto_variant = nr ? nr->extfilter : r->extfilter;
4637			if (psk.proto_variant < PF_EXTFILTER_APD)
4638				psk.proto_variant = PF_EXTFILTER_APD;
4639		}
4640	} else if (pd->proto == IPPROTO_GRE) {
4641		psk.proto_variant = pd->proto_variant;
4642	}
4643	if (direction == PF_OUT) {
4644		PF_ACPY(&psk.gwy.addr, saddr, af);
4645		PF_ACPY(&psk.ext.addr, daddr, af);
4646		switch (pd->proto) {
4647		case IPPROTO_UDP:
4648			psk.gwy.xport = sxport;
4649			psk.ext.xport = dxport;
4650			break;
4651		case IPPROTO_ESP:
4652			psk.gwy.xport.spi = 0;
4653			psk.ext.xport.spi = pd->hdr.esp->spi;
4654			break;
4655		case IPPROTO_ICMP:
4656#if INET6
4657		case IPPROTO_ICMPV6:
4658#endif
4659			psk.gwy.xport.port = nxport.port;
4660			psk.ext.xport.spi = 0;
4661			break;
4662		default:
4663			psk.gwy.xport = sxport;
4664			psk.ext.xport = dxport;
4665			break;
4666		}
4667		if (nr != NULL) {
4668			PF_ACPY(&psk.lan.addr, &pd->baddr, af);
4669			psk.lan.xport = bxport;
4670		} else {
4671			PF_ACPY(&psk.lan.addr, &psk.gwy.addr, af);
4672			psk.lan.xport = psk.gwy.xport;
4673		}
4674	} else {
4675		PF_ACPY(&psk.lan.addr, daddr, af);
4676		PF_ACPY(&psk.ext.addr, saddr, af);
4677		switch (pd->proto) {
4678		case IPPROTO_ICMP:
4679#if INET6
4680		case IPPROTO_ICMPV6:
4681#endif
4682			psk.lan.xport = nxport;
4683			psk.ext.xport.spi = 0;
4684			break;
4685		case IPPROTO_ESP:
4686			psk.ext.xport.spi = 0;
4687			psk.lan.xport.spi = pd->hdr.esp->spi;
4688			break;
4689		default:
4690			psk.lan.xport = dxport;
4691			psk.ext.xport = sxport;
4692			break;
4693		}
4694		if (nr != NULL) {
4695			PF_ACPY(&psk.gwy.addr, &pd->baddr, af);
4696			psk.gwy.xport = bxport;
4697		} else {
4698			PF_ACPY(&psk.gwy.addr, &psk.lan.addr, af);
4699			psk.gwy.xport = psk.lan.xport;
4700		}
4701	}
4702	if (pd->flowhash != 0) {
4703		/* flowhash was already computed by upper layers */
4704		psk.flowhash = pd->flowhash;
4705	} else {
4706		psk.flowhash = pf_calc_state_key_flowhash(&psk);
4707		pd->flowhash = psk.flowhash;
4708	}
4709
4710	if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid, pd)) {
4711		REASON_SET(&reason, PFRES_MEMORY);
4712		return (PF_DROP);
4713	}
4714
4715	if (!state_icmp && (r->keep_state || nr != NULL ||
4716	    (pd->flags & PFDESC_TCP_NORM))) {
4717		/* create new state */
4718		struct pf_state	*s = NULL;
4719		struct pf_state_key *sk = NULL;
4720		struct pf_src_node *sn = NULL;
4721		struct pf_ike_hdr ike;
4722
4723		if (pd->proto == IPPROTO_UDP) {
4724			struct udphdr *uh = pd->hdr.udp;
4725			size_t plen = m->m_pkthdr.len - off - sizeof (*uh);
4726
4727			if (ntohs(uh->uh_sport) == PF_IKE_PORT &&
4728			    ntohs(uh->uh_dport) == PF_IKE_PORT &&
4729			    plen >= PF_IKE_PACKET_MINSIZE) {
4730				if (plen > PF_IKE_PACKET_MINSIZE)
4731					plen = PF_IKE_PACKET_MINSIZE;
4732				m_copydata(m, off + sizeof (*uh), plen, &ike);
4733			}
4734		}
4735
4736		if (nr != NULL && pd->proto == IPPROTO_ESP &&
4737		    direction == PF_OUT) {
4738			struct pf_state_key_cmp	sk0;
4739			struct pf_state *s0;
4740
4741			/*
4742			 * <jhw@apple.com>
4743			 * This squelches state creation if the external
4744			 * address matches an existing incomplete state with a
4745			 * different internal address.  Only one 'blocking'
4746			 * partial state is allowed for each external address.
4747			 */
4748			memset(&sk0, 0, sizeof (sk0));
4749			sk0.af = pd->af;
4750			sk0.proto = IPPROTO_ESP;
4751			PF_ACPY(&sk0.gwy.addr, saddr, sk0.af);
4752			PF_ACPY(&sk0.ext.addr, daddr, sk0.af);
4753			s0 = pf_find_state(kif, &sk0, PF_IN);
4754
4755			if (s0 && PF_ANEQ(&s0->state_key->lan.addr,
4756			    pd->src, pd->af)) {
4757				nsn = 0;
4758				goto cleanup;
4759			}
4760		}
4761
4762		/* check maximums */
4763		if (r->max_states && (r->states >= r->max_states)) {
4764			pf_status.lcounters[LCNT_STATES]++;
4765			REASON_SET(&reason, PFRES_MAXSTATES);
4766			goto cleanup;
4767		}
4768		/* src node for filter rule */
4769		if ((r->rule_flag & PFRULE_SRCTRACK ||
4770		    r->rpool.opts & PF_POOL_STICKYADDR) &&
4771		    pf_insert_src_node(&sn, r, saddr, af) != 0) {
4772			REASON_SET(&reason, PFRES_SRCLIMIT);
4773			goto cleanup;
4774		}
4775		/* src node for translation rule */
4776		if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
4777		    ((direction == PF_OUT &&
4778		    nr->action != PF_RDR &&
4779		    pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
4780		    (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) {
4781			REASON_SET(&reason, PFRES_SRCLIMIT);
4782			goto cleanup;
4783		}
4784		s = pool_get(&pf_state_pl, PR_WAITOK);
4785		if (s == NULL) {
4786			REASON_SET(&reason, PFRES_MEMORY);
4787cleanup:
4788			if (sn != NULL && sn->states == 0 && sn->expire == 0) {
4789				RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
4790				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
4791				pf_status.src_nodes--;
4792				pool_put(&pf_src_tree_pl, sn);
4793			}
4794			if (nsn != sn && nsn != NULL && nsn->states == 0 &&
4795			    nsn->expire == 0) {
4796				RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
4797				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
4798				pf_status.src_nodes--;
4799				pool_put(&pf_src_tree_pl, nsn);
4800			}
4801			if (sk != NULL) {
4802				if (sk->app_state)
4803					pool_put(&pf_app_state_pl,
4804					    sk->app_state);
4805				pool_put(&pf_state_key_pl, sk);
4806			}
4807			return (PF_DROP);
4808		}
4809		bzero(s, sizeof (*s));
4810		TAILQ_INIT(&s->unlink_hooks);
4811		s->rule.ptr = r;
4812		s->nat_rule.ptr = nr;
4813		s->anchor.ptr = a;
4814		STATE_INC_COUNTERS(s);
4815		s->allow_opts = r->allow_opts;
4816		s->log = r->log & PF_LOG_ALL;
4817		if (nr != NULL)
4818			s->log |= nr->log & PF_LOG_ALL;
4819		switch (pd->proto) {
4820		case IPPROTO_TCP:
4821			s->src.seqlo = ntohl(th->th_seq);
4822			s->src.seqhi = s->src.seqlo + pd->p_len + 1;
4823			if ((th->th_flags & (TH_SYN|TH_ACK)) ==
4824			    TH_SYN && r->keep_state == PF_STATE_MODULATE) {
4825				/* Generate sequence number modulator */
4826				if ((s->src.seqdiff = pf_tcp_iss(pd) -
4827				    s->src.seqlo) == 0)
4828					s->src.seqdiff = 1;
4829				pf_change_a(&th->th_seq, &th->th_sum,
4830				    htonl(s->src.seqlo + s->src.seqdiff), 0);
4831				rewrite = off + sizeof (*th);
4832			} else
4833				s->src.seqdiff = 0;
4834			if (th->th_flags & TH_SYN) {
4835				s->src.seqhi++;
4836				s->src.wscale = pf_get_wscale(m, off,
4837				    th->th_off, af);
4838			}
4839			s->src.max_win = MAX(ntohs(th->th_win), 1);
4840			if (s->src.wscale & PF_WSCALE_MASK) {
4841				/* Remove scale factor from initial window */
4842				int win = s->src.max_win;
4843				win += 1 << (s->src.wscale & PF_WSCALE_MASK);
4844				s->src.max_win = (win - 1) >>
4845				    (s->src.wscale & PF_WSCALE_MASK);
4846			}
4847			if (th->th_flags & TH_FIN)
4848				s->src.seqhi++;
4849			s->dst.seqhi = 1;
4850			s->dst.max_win = 1;
4851			s->src.state = TCPS_SYN_SENT;
4852			s->dst.state = TCPS_CLOSED;
4853			s->timeout = PFTM_TCP_FIRST_PACKET;
4854			break;
4855		case IPPROTO_UDP:
4856			s->src.state = PFUDPS_SINGLE;
4857			s->dst.state = PFUDPS_NO_TRAFFIC;
4858			s->timeout = PFTM_UDP_FIRST_PACKET;
4859			break;
4860		case IPPROTO_ICMP:
4861#if INET6
4862		case IPPROTO_ICMPV6:
4863#endif
4864			s->timeout = PFTM_ICMP_FIRST_PACKET;
4865			break;
4866		case IPPROTO_GRE:
4867			s->src.state = PFGRE1S_INITIATING;
4868			s->dst.state = PFGRE1S_NO_TRAFFIC;
4869			s->timeout = PFTM_GREv1_INITIATING;
4870			break;
4871		case IPPROTO_ESP:
4872			s->src.state = PFESPS_INITIATING;
4873			s->dst.state = PFESPS_NO_TRAFFIC;
4874			s->timeout = PFTM_ESP_FIRST_PACKET;
4875			break;
4876		default:
4877			s->src.state = PFOTHERS_SINGLE;
4878			s->dst.state = PFOTHERS_NO_TRAFFIC;
4879			s->timeout = PFTM_OTHER_FIRST_PACKET;
4880		}
4881
4882		s->creation = pf_time_second();
4883		s->expire = pf_time_second();
4884
4885		if (sn != NULL) {
4886			s->src_node = sn;
4887			s->src_node->states++;
4888			VERIFY(s->src_node->states != 0);
4889		}
4890		if (nsn != NULL) {
4891			PF_ACPY(&nsn->raddr, &pd->naddr, af);
4892			s->nat_src_node = nsn;
4893			s->nat_src_node->states++;
4894			VERIFY(s->nat_src_node->states != 0);
4895		}
4896		if (pd->proto == IPPROTO_TCP) {
4897			if ((pd->flags & PFDESC_TCP_NORM) &&
4898			    pf_normalize_tcp_init(m, off, pd, th, &s->src,
4899			    &s->dst)) {
4900				REASON_SET(&reason, PFRES_MEMORY);
4901				pf_src_tree_remove_state(s);
4902				STATE_DEC_COUNTERS(s);
4903				pool_put(&pf_state_pl, s);
4904				return (PF_DROP);
4905			}
4906			if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub &&
4907			    pf_normalize_tcp_stateful(m, off, pd, &reason,
4908			    th, s, &s->src, &s->dst, &rewrite)) {
4909				/* This really shouldn't happen!!! */
4910				DPFPRINTF(PF_DEBUG_URGENT,
4911				    ("pf_normalize_tcp_stateful failed on "
4912				    "first pkt"));
4913				pf_normalize_tcp_cleanup(s);
4914				pf_src_tree_remove_state(s);
4915				STATE_DEC_COUNTERS(s);
4916				pool_put(&pf_state_pl, s);
4917				return (PF_DROP);
4918			}
4919		}
4920
4921		/* allocate state key and import values from psk */
4922		if ((sk = pf_alloc_state_key(s, &psk)) == NULL) {
4923			REASON_SET(&reason, PFRES_MEMORY);
4924			goto cleanup;
4925		}
4926
4927		pf_set_rt_ifp(s, saddr);	/* needs s->state_key set */
4928
4929		m = pd->mp;
4930
4931		if (sk->app_state == 0) {
4932			switch (pd->proto) {
4933			case IPPROTO_TCP: {
4934				u_int16_t dport = (direction == PF_OUT) ?
4935				    sk->ext.xport.port : sk->gwy.xport.port;
4936
4937				if (nr != NULL &&
4938				    ntohs(dport) == PF_PPTP_PORT) {
4939					struct pf_app_state *as;
4940
4941					as = pool_get(&pf_app_state_pl,
4942					    PR_WAITOK);
4943					if (!as) {
4944						REASON_SET(&reason,
4945						    PFRES_MEMORY);
4946						goto cleanup;
4947					}
4948
4949					bzero(as, sizeof (*as));
4950					as->handler = pf_pptp_handler;
4951					as->compare_lan_ext = 0;
4952					as->compare_ext_gwy = 0;
4953					as->u.pptp.grev1_state = 0;
4954					sk->app_state = as;
4955					(void) hook_establish(&s->unlink_hooks,
4956					    0, (hook_fn_t) pf_pptp_unlink, s);
4957				}
4958				break;
4959			}
4960
4961			case IPPROTO_UDP: {
4962				struct udphdr *uh = pd->hdr.udp;
4963
4964				if (nr != NULL &&
4965				    ntohs(uh->uh_sport) == PF_IKE_PORT &&
4966				    ntohs(uh->uh_dport) == PF_IKE_PORT) {
4967					struct pf_app_state *as;
4968
4969					as = pool_get(&pf_app_state_pl,
4970					    PR_WAITOK);
4971					if (!as) {
4972						REASON_SET(&reason,
4973						    PFRES_MEMORY);
4974						goto cleanup;
4975					}
4976
4977					bzero(as, sizeof (*as));
4978					as->compare_lan_ext = pf_ike_compare;
4979					as->compare_ext_gwy = pf_ike_compare;
4980					as->u.ike.cookie = ike.initiator_cookie;
4981					sk->app_state = as;
4982				}
4983				break;
4984			}
4985
4986			default:
4987				break;
4988			}
4989		}
4990
4991		if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
4992			if (pd->proto == IPPROTO_TCP)
4993				pf_normalize_tcp_cleanup(s);
4994			REASON_SET(&reason, PFRES_STATEINS);
4995			pf_src_tree_remove_state(s);
4996			STATE_DEC_COUNTERS(s);
4997			pool_put(&pf_state_pl, s);
4998			return (PF_DROP);
4999		} else
5000			*sm = s;
5001		if (tag > 0) {
5002			pf_tag_ref(tag);
5003			s->tag = tag;
5004		}
5005		if (pd->proto == IPPROTO_TCP &&
5006		    (th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
5007		    r->keep_state == PF_STATE_SYNPROXY) {
5008			s->src.state = PF_TCPS_PROXY_SRC;
5009			if (nr != NULL) {
5010				if (direction == PF_OUT) {
5011					pf_change_ap(direction, pd->mp, saddr,
5012					    &th->th_sport, pd->ip_sum,
5013					    &th->th_sum, &pd->baddr,
5014					    bxport.port, 0, af);
5015					sxport.port = th->th_sport;
5016				} else {
5017					pf_change_ap(direction, pd->mp, daddr,
5018					    &th->th_dport, pd->ip_sum,
5019					    &th->th_sum, &pd->baddr,
5020					    bxport.port, 0, af);
5021					sxport.port = th->th_dport;
5022				}
5023			}
5024			s->src.seqhi = htonl(random());
5025			/* Find mss option */
5026			mss = pf_get_mss(m, off, th->th_off, af);
5027			mss = pf_calc_mss(saddr, af, mss);
5028			mss = pf_calc_mss(daddr, af, mss);
5029			s->src.mss = mss;
5030			pf_send_tcp(r, af, daddr, saddr, th->th_dport,
5031			    th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
5032			    TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL);
5033			REASON_SET(&reason, PFRES_SYNPROXY);
5034			return (PF_SYNPROXY_DROP);
5035		}
5036
5037		if (sk->app_state && sk->app_state->handler) {
5038			int offx = off;
5039
5040			switch (pd->proto) {
5041			case IPPROTO_TCP:
5042				offx += th->th_off << 2;
5043				break;
5044			case IPPROTO_UDP:
5045				offx += pd->hdr.udp->uh_ulen << 2;
5046				break;
5047			default:
5048				/* ALG handlers only apply to TCP and UDP rules */
5049				break;
5050			}
5051
5052			if (offx > off) {
5053				sk->app_state->handler(s, direction, offx,
5054				    pd, kif);
5055				if (pd->lmw < 0) {
5056					REASON_SET(&reason, PFRES_MEMORY);
5057					return (PF_DROP);
5058				}
5059				m = pd->mp;
5060			}
5061		}
5062	}
5063
5064	/* copy back packet headers if we performed NAT operations */
5065	if (rewrite) {
5066		if (rewrite < off + hdrlen)
5067			rewrite = off + hdrlen;
5068
5069		m = pf_lazy_makewritable(pd, pd->mp, rewrite);
5070		if (!m) {
5071			REASON_SET(&reason, PFRES_MEMORY);
5072			return (PF_DROP);
5073		}
5074
5075		m_copyback(m, off, hdrlen, pd->hdr.any);
5076	}
5077
5078	return (PF_PASS);
5079}
5080
5081#if DUMMYNET
5082/*
5083 * When pf_test_dummynet() returns PF_PASS, the rule matching parameter "rm"
5084 * remains unchanged, meaning the packet did not match a dummynet rule.
5085 * when the packet does match a dummynet rule, pf_test_dummynet() returns
5086 * PF_PASS and zero out the mbuf rule as the packet is effectively siphoned
5087 * out by dummynet.
5088 */
5089static int
5090pf_test_dummynet(struct pf_rule **rm, int direction, struct pfi_kif *kif,
5091    struct mbuf **m0, struct pf_pdesc *pd, struct ip_fw_args *fwa)
5092{
5093	struct mbuf		*m = *m0;
5094	struct pf_rule		*am = NULL;
5095	struct pf_ruleset	*rsm = NULL;
5096	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
5097	sa_family_t		 af = pd->af;
5098	struct pf_rule		*r, *a = NULL;
5099	struct pf_ruleset	*ruleset = NULL;
5100	struct tcphdr		*th = pd->hdr.tcp;
5101	u_short			 reason;
5102	int			 hdrlen = 0;
5103	int			 tag = -1;
5104	unsigned int		 rtableid = IFSCOPE_NONE;
5105	int			 asd = 0;
5106	int			 match = 0;
5107	u_int8_t		 icmptype = 0, icmpcode = 0;
5108	struct ip_fw_args	dnflow;
5109	struct pf_rule		*prev_matching_rule = fwa ? fwa->fwa_pf_rule : NULL;
5110	int			found_prev_rule = (prev_matching_rule) ? 0 : 1;
5111
5112	lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
5113
5114	if (!DUMMYNET_LOADED)
5115		return (PF_PASS);
5116
5117	if (TAILQ_EMPTY(pf_main_ruleset.rules[PF_RULESET_DUMMYNET].active.ptr))
5118		return (PF_PASS);
5119
5120	bzero(&dnflow, sizeof(dnflow));
5121
5122	hdrlen = 0;
5123
5124	/* Fragments don't gave protocol headers */
5125	if (!(pd->flags & PFDESC_IP_FRAG))
5126		switch (pd->proto) {
5127		case IPPROTO_TCP:
5128			dnflow.fwa_id.flags = pd->hdr.tcp->th_flags;
5129			dnflow.fwa_id.dst_port = ntohs(pd->hdr.tcp->th_dport);
5130			dnflow.fwa_id.src_port = ntohs(pd->hdr.tcp->th_sport);
5131			hdrlen = sizeof (*th);
5132			break;
5133		case IPPROTO_UDP:
5134			dnflow.fwa_id.dst_port = ntohs(pd->hdr.udp->uh_dport);
5135			dnflow.fwa_id.src_port = ntohs(pd->hdr.udp->uh_sport);
5136			hdrlen = sizeof (*pd->hdr.udp);
5137			break;
5138#if INET
5139		case IPPROTO_ICMP:
5140			if (af != AF_INET)
5141				break;
5142			hdrlen = ICMP_MINLEN;
5143			icmptype = pd->hdr.icmp->icmp_type;
5144			icmpcode = pd->hdr.icmp->icmp_code;
5145			break;
5146#endif /* INET */
5147#if INET6
5148		case IPPROTO_ICMPV6:
5149			if (af != AF_INET6)
5150				break;
5151			hdrlen = sizeof (*pd->hdr.icmp6);
5152			icmptype = pd->hdr.icmp6->icmp6_type;
5153			icmpcode = pd->hdr.icmp6->icmp6_code;
5154			break;
5155#endif /* INET6 */
5156		case IPPROTO_GRE:
5157			if (pd->proto_variant == PF_GRE_PPTP_VARIANT)
5158				hdrlen = sizeof (*pd->hdr.grev1);
5159			break;
5160		case IPPROTO_ESP:
5161			hdrlen = sizeof (*pd->hdr.esp);
5162			break;
5163		}
5164
5165	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_DUMMYNET].active.ptr);
5166
5167	while (r != NULL) {
5168		r->evaluations++;
5169		if (pfi_kif_match(r->kif, kif) == r->ifnot)
5170			r = r->skip[PF_SKIP_IFP].ptr;
5171		else if (r->direction && r->direction != direction)
5172			r = r->skip[PF_SKIP_DIR].ptr;
5173		else if (r->af && r->af != af)
5174			r = r->skip[PF_SKIP_AF].ptr;
5175		else if (r->proto && r->proto != pd->proto)
5176			r = r->skip[PF_SKIP_PROTO].ptr;
5177		else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
5178		    r->src.neg, kif))
5179			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
5180		/* tcp/udp only. port_op always 0 in other cases */
5181		else if (r->proto == pd->proto &&
5182		    (r->proto == IPPROTO_TCP || r->proto == IPPROTO_UDP) &&
5183		    ((pd->flags & PFDESC_IP_FRAG) ||
5184		    ((r->src.xport.range.op &&
5185		    !pf_match_port(r->src.xport.range.op,
5186		    r->src.xport.range.port[0], r->src.xport.range.port[1],
5187		    th->th_sport)))))
5188			r = r->skip[PF_SKIP_SRC_PORT].ptr;
5189		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
5190		    r->dst.neg, NULL))
5191			r = r->skip[PF_SKIP_DST_ADDR].ptr;
5192		/* tcp/udp only. port_op always 0 in other cases */
5193		else if (r->proto == pd->proto &&
5194		    (r->proto == IPPROTO_TCP || r->proto == IPPROTO_UDP) &&
5195		    r->dst.xport.range.op &&
5196		    ((pd->flags & PFDESC_IP_FRAG) ||
5197		    !pf_match_port(r->dst.xport.range.op,
5198		    r->dst.xport.range.port[0], r->dst.xport.range.port[1],
5199		    th->th_dport)))
5200			r = r->skip[PF_SKIP_DST_PORT].ptr;
5201		/* icmp only. type always 0 in other cases */
5202		else if (r->type &&
5203			((pd->flags & PFDESC_IP_FRAG) ||
5204			r->type != icmptype + 1))
5205			r = TAILQ_NEXT(r, entries);
5206		/* icmp only. type always 0 in other cases */
5207		else if (r->code &&
5208			((pd->flags & PFDESC_IP_FRAG) ||
5209			r->code != icmpcode + 1))
5210			r = TAILQ_NEXT(r, entries);
5211		else if (r->tos && !(r->tos == pd->tos))
5212			r = TAILQ_NEXT(r, entries);
5213		else if (r->rule_flag & PFRULE_FRAGMENT)
5214			r = TAILQ_NEXT(r, entries);
5215		else if (pd->proto == IPPROTO_TCP &&
5216		    ((pd->flags & PFDESC_IP_FRAG) ||
5217		    (r->flagset & th->th_flags) != r->flags))
5218			r = TAILQ_NEXT(r, entries);
5219		else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag))
5220			r = TAILQ_NEXT(r, entries);
5221		else {
5222			/*
5223			 * Need to go past the previous dummynet matching rule
5224			 */
5225			if (r->anchor == NULL) {
5226				if (found_prev_rule) {
5227					if (r->tag)
5228						tag = r->tag;
5229					if (PF_RTABLEID_IS_VALID(r->rtableid))
5230						rtableid = r->rtableid;
5231					match = 1;
5232					*rm = r;
5233					am = a;
5234					rsm = ruleset;
5235					if ((*rm)->quick)
5236						break;
5237				} else if (r == prev_matching_rule) {
5238					found_prev_rule = 1;
5239				}
5240				r = TAILQ_NEXT(r, entries);
5241			} else {
5242				pf_step_into_anchor(&asd, &ruleset,
5243				    PF_RULESET_DUMMYNET, &r, &a, &match);
5244			}
5245		}
5246		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
5247		    PF_RULESET_DUMMYNET, &r, &a, &match))
5248			break;
5249	}
5250	r = *rm;
5251	a = am;
5252	ruleset = rsm;
5253
5254	if (!match)
5255		return (PF_PASS);
5256
5257	REASON_SET(&reason, PFRES_DUMMYNET);
5258
5259	if (r->log) {
5260		PFLOG_PACKET(kif, h, m, af, direction, reason, r,
5261		    a, ruleset, pd);
5262	}
5263
5264	if (r->action == PF_NODUMMYNET) {
5265		int dirndx = (direction == PF_OUT);
5266
5267		r->packets[dirndx]++;
5268		r->bytes[dirndx] += pd->tot_len;
5269
5270		return (PF_PASS);
5271	}
5272	if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid, pd)) {
5273		REASON_SET(&reason, PFRES_MEMORY);
5274
5275		return (PF_DROP);
5276	}
5277
5278	if (r->dnpipe && ip_dn_io_ptr != NULL) {
5279		int dirndx = (direction == PF_OUT);
5280
5281		r->packets[dirndx]++;
5282		r->bytes[dirndx] += pd->tot_len;
5283
5284		dnflow.fwa_cookie = r->dnpipe;
5285		dnflow.fwa_pf_rule = r;
5286		dnflow.fwa_id.proto = pd->proto;
5287		dnflow.fwa_flags = r->dntype;
5288		switch (af) {
5289			case AF_INET:
5290				dnflow.fwa_id.addr_type = 4;
5291				dnflow.fwa_id.src_ip = ntohl(saddr->v4.s_addr);
5292				dnflow.fwa_id.dst_ip = ntohl(daddr->v4.s_addr);
5293				break;
5294			case AF_INET6:
5295				dnflow.fwa_id.addr_type = 6;
5296				dnflow.fwa_id.src_ip6 = saddr->v6;
5297				dnflow.fwa_id.dst_ip6 = saddr->v6;
5298				break;
5299			}
5300
5301		if (fwa != NULL) {
5302			dnflow.fwa_oif = fwa->fwa_oif;
5303			dnflow.fwa_oflags = fwa->fwa_oflags;
5304			/*
5305			 * Note that fwa_ro, fwa_dst and fwa_ipoa are
5306			 * actually in a union so the following does work
5307			 * for both IPv4 and IPv6
5308			 */
5309			dnflow.fwa_ro = fwa->fwa_ro;
5310			dnflow.fwa_dst = fwa->fwa_dst;
5311			dnflow.fwa_ipoa = fwa->fwa_ipoa;
5312			dnflow.fwa_ro6_pmtu = fwa->fwa_ro6_pmtu;
5313			dnflow.fwa_origifp = fwa->fwa_origifp;
5314			dnflow.fwa_mtu = fwa->fwa_mtu;
5315			dnflow.fwa_alwaysfrag = fwa->fwa_alwaysfrag;
5316			dnflow.fwa_unfragpartlen = fwa->fwa_unfragpartlen;
5317			dnflow.fwa_exthdrs = fwa->fwa_exthdrs;
5318		}
5319
5320		if (af == AF_INET) {
5321			struct ip *iphdr = mtod(m, struct ip *);
5322			NTOHS(iphdr->ip_len);
5323			NTOHS(iphdr->ip_off);
5324		}
5325		/*
5326		 * Don't need to unlock pf_lock as NET_THREAD_HELD_PF
5327		 * allows for recursive behavior
5328		 */
5329		ip_dn_io_ptr(m,
5330			dnflow.fwa_cookie,
5331			af == AF_INET ?
5332				direction == PF_IN ? DN_TO_IP_IN : DN_TO_IP_OUT :
5333				direction == PF_IN ? DN_TO_IP6_IN : DN_TO_IP6_OUT,
5334			&dnflow, DN_CLIENT_PF);
5335
5336		/*
5337		 * The packet is siphoned out by dummynet so return a NULL
5338		 * mbuf so the caller can still return success.
5339		 */
5340		*m0 = NULL;
5341
5342		return (PF_PASS);
5343	}
5344
5345	return (PF_PASS);
5346}
5347#endif /* DUMMYNET */
5348
5349static int
5350pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
5351    struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am,
5352    struct pf_ruleset **rsm)
5353{
5354#pragma unused(h)
5355	struct pf_rule		*r, *a = NULL;
5356	struct pf_ruleset	*ruleset = NULL;
5357	sa_family_t		 af = pd->af;
5358	u_short			 reason;
5359	int			 tag = -1;
5360	int			 asd = 0;
5361	int			 match = 0;
5362
5363	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
5364	while (r != NULL) {
5365		r->evaluations++;
5366		if (pfi_kif_match(r->kif, kif) == r->ifnot)
5367			r = r->skip[PF_SKIP_IFP].ptr;
5368		else if (r->direction && r->direction != direction)
5369			r = r->skip[PF_SKIP_DIR].ptr;
5370		else if (r->af && r->af != af)
5371			r = r->skip[PF_SKIP_AF].ptr;
5372		else if (r->proto && r->proto != pd->proto)
5373			r = r->skip[PF_SKIP_PROTO].ptr;
5374		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
5375		    r->src.neg, kif))
5376			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
5377		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
5378		    r->dst.neg, NULL))
5379			r = r->skip[PF_SKIP_DST_ADDR].ptr;
5380                else if ((r->rule_flag & PFRULE_TOS) && r->tos &&
5381		    !(r->tos & pd->tos))
5382			r = TAILQ_NEXT(r, entries);
5383                else if ((r->rule_flag & PFRULE_DSCP) && r->tos &&
5384		    !(r->tos & (pd->tos & DSCP_MASK)))
5385			r = TAILQ_NEXT(r, entries);
5386                else if ((r->rule_flag & PFRULE_SC) && r->tos &&
5387		    ((r->tos & SCIDX_MASK) != pd->sc))
5388			r = TAILQ_NEXT(r, entries);
5389		else if (r->os_fingerprint != PF_OSFP_ANY)
5390			r = TAILQ_NEXT(r, entries);
5391		else if (pd->proto == IPPROTO_UDP &&
5392		    (r->src.xport.range.op || r->dst.xport.range.op))
5393			r = TAILQ_NEXT(r, entries);
5394		else if (pd->proto == IPPROTO_TCP &&
5395		    (r->src.xport.range.op || r->dst.xport.range.op ||
5396		    r->flagset))
5397			r = TAILQ_NEXT(r, entries);
5398		else if ((pd->proto == IPPROTO_ICMP ||
5399		    pd->proto == IPPROTO_ICMPV6) &&
5400		    (r->type || r->code))
5401			r = TAILQ_NEXT(r, entries);
5402		else if (r->prob && r->prob <= (random() % (UINT_MAX - 1) + 1))
5403			r = TAILQ_NEXT(r, entries);
5404		else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag))
5405			r = TAILQ_NEXT(r, entries);
5406		else {
5407			if (r->anchor == NULL) {
5408				match = 1;
5409				*rm = r;
5410				*am = a;
5411				*rsm = ruleset;
5412				if ((*rm)->quick)
5413					break;
5414				r = TAILQ_NEXT(r, entries);
5415			} else
5416				pf_step_into_anchor(&asd, &ruleset,
5417				    PF_RULESET_FILTER, &r, &a, &match);
5418		}
5419		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
5420		    PF_RULESET_FILTER, &r, &a, &match))
5421			break;
5422	}
5423	r = *rm;
5424	a = *am;
5425	ruleset = *rsm;
5426
5427	REASON_SET(&reason, PFRES_MATCH);
5428
5429	if (r->log)
5430		PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset,
5431		    pd);
5432
5433	if (r->action != PF_PASS)
5434		return (PF_DROP);
5435
5436	if (pf_tag_packet(m, pd->pf_mtag, tag, -1, NULL)) {
5437		REASON_SET(&reason, PFRES_MEMORY);
5438		return (PF_DROP);
5439	}
5440
5441	return (PF_PASS);
5442}
5443
5444static void
5445pf_pptp_handler(struct pf_state *s, int direction, int off,
5446    struct pf_pdesc *pd, struct pfi_kif *kif)
5447{
5448#pragma unused(direction)
5449	struct tcphdr *th;
5450	struct pf_pptp_state *pptps;
5451	struct pf_pptp_ctrl_msg cm;
5452	size_t plen;
5453	struct pf_state *gs;
5454	u_int16_t ct;
5455	u_int16_t *pac_call_id;
5456	u_int16_t *pns_call_id;
5457	u_int16_t *spoof_call_id;
5458	u_int8_t *pac_state;
5459	u_int8_t *pns_state;
5460	enum { PF_PPTP_PASS, PF_PPTP_INSERT_GRE, PF_PPTP_REMOVE_GRE } op;
5461	struct mbuf *m;
5462	struct pf_state_key *sk;
5463	struct pf_state_key *gsk;
5464	struct pf_app_state *gas;
5465
5466	sk = s->state_key;
5467	pptps = &sk->app_state->u.pptp;
5468	gs = pptps->grev1_state;
5469
5470	if (gs)
5471		gs->expire = pf_time_second();
5472
5473	m = pd->mp;
5474	plen = min(sizeof (cm), m->m_pkthdr.len - off);
5475	if (plen < PF_PPTP_CTRL_MSG_MINSIZE)
5476		return;
5477
5478	m_copydata(m, off, plen, &cm);
5479
5480	if (ntohl(cm.hdr.magic) != PF_PPTP_MAGIC_NUMBER)
5481		return;
5482	if (ntohs(cm.hdr.type) != 1)
5483		return;
5484
5485	if (!gs) {
5486		gs = pool_get(&pf_state_pl, PR_WAITOK);
5487		if (!gs)
5488			return;
5489
5490		memcpy(gs, s, sizeof (*gs));
5491
5492		memset(&gs->entry_id, 0, sizeof (gs->entry_id));
5493		memset(&gs->entry_list, 0, sizeof (gs->entry_list));
5494
5495		TAILQ_INIT(&gs->unlink_hooks);
5496		gs->rt_kif = NULL;
5497		gs->creation = 0;
5498		gs->pfsync_time = 0;
5499		gs->packets[0] = gs->packets[1] = 0;
5500		gs->bytes[0] = gs->bytes[1] = 0;
5501		gs->timeout = PFTM_UNLINKED;
5502		gs->id = gs->creatorid = 0;
5503		gs->src.state = gs->dst.state = PFGRE1S_NO_TRAFFIC;
5504		gs->src.scrub = gs->dst.scrub = 0;
5505
5506		gas = pool_get(&pf_app_state_pl, PR_NOWAIT);
5507		if (!gas) {
5508			pool_put(&pf_state_pl, gs);
5509			return;
5510		}
5511
5512		gsk = pf_alloc_state_key(gs, NULL);
5513		if (!gsk) {
5514			pool_put(&pf_app_state_pl, gas);
5515			pool_put(&pf_state_pl, gs);
5516			return;
5517		}
5518
5519		memcpy(&gsk->lan, &sk->lan, sizeof (gsk->lan));
5520		memcpy(&gsk->gwy, &sk->gwy, sizeof (gsk->gwy));
5521		memcpy(&gsk->ext, &sk->ext, sizeof (gsk->ext));
5522		gsk->af = sk->af;
5523		gsk->proto = IPPROTO_GRE;
5524		gsk->proto_variant = PF_GRE_PPTP_VARIANT;
5525		gsk->app_state = gas;
5526		gsk->lan.xport.call_id = 0;
5527		gsk->gwy.xport.call_id = 0;
5528		gsk->ext.xport.call_id = 0;
5529		gsk->flowhash = pf_calc_state_key_flowhash(gsk);
5530		memset(gas, 0, sizeof (*gas));
5531		gas->u.grev1.pptp_state = s;
5532		STATE_INC_COUNTERS(gs);
5533		pptps->grev1_state = gs;
5534		(void) hook_establish(&gs->unlink_hooks, 0,
5535		    (hook_fn_t) pf_grev1_unlink, gs);
5536	} else {
5537		gsk = gs->state_key;
5538	}
5539
5540	switch (sk->direction) {
5541	case PF_IN:
5542		pns_call_id = &gsk->ext.xport.call_id;
5543		pns_state = &gs->dst.state;
5544		pac_call_id = &gsk->lan.xport.call_id;
5545		pac_state = &gs->src.state;
5546		break;
5547
5548	case PF_OUT:
5549		pns_call_id = &gsk->lan.xport.call_id;
5550		pns_state = &gs->src.state;
5551		pac_call_id = &gsk->ext.xport.call_id;
5552		pac_state = &gs->dst.state;
5553		break;
5554
5555	default:
5556		DPFPRINTF(PF_DEBUG_URGENT,
5557		    ("pf_pptp_handler: bad directional!\n"));
5558		return;
5559	}
5560
5561	spoof_call_id = 0;
5562	op = PF_PPTP_PASS;
5563
5564	ct = ntohs(cm.ctrl.type);
5565
5566	switch (ct) {
5567	case PF_PPTP_CTRL_TYPE_CALL_OUT_REQ:
5568		*pns_call_id = cm.msg.call_out_req.call_id;
5569		*pns_state = PFGRE1S_INITIATING;
5570		if (s->nat_rule.ptr && pns_call_id == &gsk->lan.xport.call_id)
5571			spoof_call_id = &cm.msg.call_out_req.call_id;
5572		break;
5573
5574	case PF_PPTP_CTRL_TYPE_CALL_OUT_RPY:
5575		*pac_call_id = cm.msg.call_out_rpy.call_id;
5576		if (s->nat_rule.ptr)
5577			spoof_call_id =
5578			    (pac_call_id == &gsk->lan.xport.call_id) ?
5579			    &cm.msg.call_out_rpy.call_id :
5580			    &cm.msg.call_out_rpy.peer_call_id;
5581		if (gs->timeout == PFTM_UNLINKED) {
5582			*pac_state = PFGRE1S_INITIATING;
5583			op = PF_PPTP_INSERT_GRE;
5584		}
5585		break;
5586
5587	case PF_PPTP_CTRL_TYPE_CALL_IN_1ST:
5588		*pns_call_id = cm.msg.call_in_1st.call_id;
5589		*pns_state = PFGRE1S_INITIATING;
5590		if (s->nat_rule.ptr && pns_call_id == &gsk->lan.xport.call_id)
5591			spoof_call_id = &cm.msg.call_in_1st.call_id;
5592		break;
5593
5594	case PF_PPTP_CTRL_TYPE_CALL_IN_2ND:
5595		*pac_call_id = cm.msg.call_in_2nd.call_id;
5596		*pac_state = PFGRE1S_INITIATING;
5597		if (s->nat_rule.ptr)
5598			spoof_call_id =
5599			    (pac_call_id == &gsk->lan.xport.call_id) ?
5600			    &cm.msg.call_in_2nd.call_id :
5601			    &cm.msg.call_in_2nd.peer_call_id;
5602		break;
5603
5604	case PF_PPTP_CTRL_TYPE_CALL_IN_3RD:
5605		if (s->nat_rule.ptr && pns_call_id == &gsk->lan.xport.call_id)
5606			spoof_call_id = &cm.msg.call_in_3rd.call_id;
5607		if (cm.msg.call_in_3rd.call_id != *pns_call_id) {
5608			break;
5609		}
5610		if (gs->timeout == PFTM_UNLINKED)
5611			op = PF_PPTP_INSERT_GRE;
5612		break;
5613
5614	case PF_PPTP_CTRL_TYPE_CALL_CLR:
5615		if (cm.msg.call_clr.call_id != *pns_call_id)
5616			op = PF_PPTP_REMOVE_GRE;
5617		break;
5618
5619	case PF_PPTP_CTRL_TYPE_CALL_DISC:
5620		if (cm.msg.call_clr.call_id != *pac_call_id)
5621			op = PF_PPTP_REMOVE_GRE;
5622		break;
5623
5624	case PF_PPTP_CTRL_TYPE_ERROR:
5625		if (s->nat_rule.ptr && pns_call_id == &gsk->lan.xport.call_id)
5626			spoof_call_id = &cm.msg.error.peer_call_id;
5627		break;
5628
5629	case PF_PPTP_CTRL_TYPE_SET_LINKINFO:
5630		if (s->nat_rule.ptr && pac_call_id == &gsk->lan.xport.call_id)
5631			spoof_call_id = &cm.msg.set_linkinfo.peer_call_id;
5632		break;
5633
5634	default:
5635		op = PF_PPTP_PASS;
5636		break;
5637	}
5638
5639	if (!gsk->gwy.xport.call_id && gsk->lan.xport.call_id) {
5640		gsk->gwy.xport.call_id = gsk->lan.xport.call_id;
5641		if (spoof_call_id) {
5642			u_int16_t call_id = 0;
5643			int n = 0;
5644			struct pf_state_key_cmp key;
5645
5646			key.af = gsk->af;
5647			key.proto = IPPROTO_GRE;
5648			key.proto_variant = PF_GRE_PPTP_VARIANT;
5649			PF_ACPY(&key.gwy.addr, &gsk->gwy.addr, key.af);
5650			PF_ACPY(&key.ext.addr, &gsk->ext.addr, key.af);
5651			key.gwy.xport.call_id = gsk->gwy.xport.call_id;
5652			key.ext.xport.call_id = gsk->ext.xport.call_id;
5653			do {
5654				call_id = htonl(random());
5655			} while (!call_id);
5656
5657			while (pf_find_state_all(&key, PF_IN, 0)) {
5658				call_id = ntohs(call_id);
5659				--call_id;
5660				if (--call_id == 0) call_id = 0xffff;
5661				call_id = htons(call_id);
5662
5663				key.gwy.xport.call_id = call_id;
5664
5665				if (++n > 65535) {
5666					DPFPRINTF(PF_DEBUG_URGENT,
5667					    ("pf_pptp_handler: failed to spoof "
5668					    "call id\n"));
5669					key.gwy.xport.call_id = 0;
5670					break;
5671				}
5672			}
5673
5674			gsk->gwy.xport.call_id = call_id;
5675		}
5676	}
5677
5678	th = pd->hdr.tcp;
5679
5680	if (spoof_call_id && gsk->lan.xport.call_id != gsk->gwy.xport.call_id) {
5681		if (*spoof_call_id == gsk->gwy.xport.call_id) {
5682			*spoof_call_id = gsk->lan.xport.call_id;
5683			th->th_sum = pf_cksum_fixup(th->th_sum,
5684			    gsk->gwy.xport.call_id, gsk->lan.xport.call_id, 0);
5685		} else {
5686			*spoof_call_id = gsk->gwy.xport.call_id;
5687			th->th_sum = pf_cksum_fixup(th->th_sum,
5688			    gsk->lan.xport.call_id, gsk->gwy.xport.call_id, 0);
5689		}
5690
5691		m = pf_lazy_makewritable(pd, m, off + plen);
5692		if (!m) {
5693			pptps->grev1_state = NULL;
5694			STATE_DEC_COUNTERS(gs);
5695			pool_put(&pf_state_pl, gs);
5696			return;
5697		}
5698		m_copyback(m, off, plen, &cm);
5699	}
5700
5701	switch (op) {
5702	case PF_PPTP_REMOVE_GRE:
5703		gs->timeout = PFTM_PURGE;
5704		gs->src.state = gs->dst.state = PFGRE1S_NO_TRAFFIC;
5705		gsk->lan.xport.call_id = 0;
5706		gsk->gwy.xport.call_id = 0;
5707		gsk->ext.xport.call_id = 0;
5708		gs->id = gs->creatorid = 0;
5709		break;
5710
5711	case PF_PPTP_INSERT_GRE:
5712		gs->creation = pf_time_second();
5713		gs->expire = pf_time_second();
5714		gs->timeout = PFTM_TCP_ESTABLISHED;
5715		if (gs->src_node != NULL) {
5716			++gs->src_node->states;
5717			VERIFY(gs->src_node->states != 0);
5718		}
5719		if (gs->nat_src_node != NULL) {
5720			++gs->nat_src_node->states;
5721			VERIFY(gs->nat_src_node->states != 0);
5722		}
5723		pf_set_rt_ifp(gs, &sk->lan.addr);
5724		if (pf_insert_state(BOUND_IFACE(s->rule.ptr, kif), gs)) {
5725
5726			/*
5727			 * <jhw@apple.com>
5728			 * FIX ME: insertion can fail when multiple PNS
5729			 * behind the same NAT open calls to the same PAC
5730			 * simultaneously because spoofed call ID numbers
5731			 * are chosen before states are inserted.  This is
5732			 * hard to fix and happens infrequently enough that
5733			 * users will normally try again and this ALG will
5734			 * succeed.  Failures are expected to be rare enough
5735			 * that fixing this is a low priority.
5736			 */
5737			pptps->grev1_state = NULL;
5738			pd->lmw = -1;	/* Force PF_DROP on PFRES_MEMORY */
5739			pf_src_tree_remove_state(gs);
5740			STATE_DEC_COUNTERS(gs);
5741			pool_put(&pf_state_pl, gs);
5742			DPFPRINTF(PF_DEBUG_URGENT, ("pf_pptp_handler: error "
5743			    "inserting GREv1 state.\n"));
5744		}
5745		break;
5746
5747	default:
5748		break;
5749	}
5750}
5751
5752static void
5753pf_pptp_unlink(struct pf_state *s)
5754{
5755	struct pf_app_state *as = s->state_key->app_state;
5756	struct pf_state *grev1s = as->u.pptp.grev1_state;
5757
5758	if (grev1s) {
5759		struct pf_app_state *gas = grev1s->state_key->app_state;
5760
5761		if (grev1s->timeout < PFTM_MAX)
5762			grev1s->timeout = PFTM_PURGE;
5763		gas->u.grev1.pptp_state = NULL;
5764		as->u.pptp.grev1_state = NULL;
5765	}
5766}
5767
5768static void
5769pf_grev1_unlink(struct pf_state *s)
5770{
5771	struct pf_app_state *as = s->state_key->app_state;
5772	struct pf_state *pptps = as->u.grev1.pptp_state;
5773
5774	if (pptps) {
5775		struct pf_app_state *pas = pptps->state_key->app_state;
5776
5777		pas->u.pptp.grev1_state = NULL;
5778		as->u.grev1.pptp_state = NULL;
5779	}
5780}
5781
5782static int
5783pf_ike_compare(struct pf_app_state *a, struct pf_app_state *b)
5784{
5785	int64_t d = a->u.ike.cookie - b->u.ike.cookie;
5786	return ((d > 0) ? 1 : ((d < 0) ? -1 : 0));
5787}
5788
5789static int
5790pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
5791    struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
5792    u_short *reason)
5793{
5794#pragma unused(h)
5795	struct pf_state_key_cmp	 key;
5796	struct tcphdr		*th = pd->hdr.tcp;
5797	u_int16_t		 win = ntohs(th->th_win);
5798	u_int32_t		 ack, end, seq, orig_seq;
5799	u_int8_t		 sws, dws;
5800	int			 ackskew;
5801	int			 copyback = 0;
5802	struct pf_state_peer	*src, *dst;
5803
5804	key.app_state = 0;
5805	key.af = pd->af;
5806	key.proto = IPPROTO_TCP;
5807	if (direction == PF_IN)	{
5808		PF_ACPY(&key.ext.addr, pd->src, key.af);
5809		PF_ACPY(&key.gwy.addr, pd->dst, key.af);
5810		key.ext.xport.port = th->th_sport;
5811		key.gwy.xport.port = th->th_dport;
5812	} else {
5813		PF_ACPY(&key.lan.addr, pd->src, key.af);
5814		PF_ACPY(&key.ext.addr, pd->dst, key.af);
5815		key.lan.xport.port = th->th_sport;
5816		key.ext.xport.port = th->th_dport;
5817	}
5818
5819	STATE_LOOKUP();
5820
5821	if (direction == (*state)->state_key->direction) {
5822		src = &(*state)->src;
5823		dst = &(*state)->dst;
5824	} else {
5825		src = &(*state)->dst;
5826		dst = &(*state)->src;
5827	}
5828
5829	if ((*state)->src.state == PF_TCPS_PROXY_SRC) {
5830		if (direction != (*state)->state_key->direction) {
5831			REASON_SET(reason, PFRES_SYNPROXY);
5832			return (PF_SYNPROXY_DROP);
5833		}
5834		if (th->th_flags & TH_SYN) {
5835			if (ntohl(th->th_seq) != (*state)->src.seqlo) {
5836				REASON_SET(reason, PFRES_SYNPROXY);
5837				return (PF_DROP);
5838			}
5839			pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
5840			    pd->src, th->th_dport, th->th_sport,
5841			    (*state)->src.seqhi, ntohl(th->th_seq) + 1,
5842			    TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1,
5843			    0, NULL, NULL);
5844			REASON_SET(reason, PFRES_SYNPROXY);
5845			return (PF_SYNPROXY_DROP);
5846		} else if (!(th->th_flags & TH_ACK) ||
5847		    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
5848		    (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
5849			REASON_SET(reason, PFRES_SYNPROXY);
5850			return (PF_DROP);
5851		} else if ((*state)->src_node != NULL &&
5852		    pf_src_connlimit(state)) {
5853			REASON_SET(reason, PFRES_SRCLIMIT);
5854			return (PF_DROP);
5855		} else
5856			(*state)->src.state = PF_TCPS_PROXY_DST;
5857	}
5858	if ((*state)->src.state == PF_TCPS_PROXY_DST) {
5859		struct pf_state_host *psrc, *pdst;
5860
5861		if (direction == PF_OUT) {
5862			psrc = &(*state)->state_key->gwy;
5863			pdst = &(*state)->state_key->ext;
5864		} else {
5865			psrc = &(*state)->state_key->ext;
5866			pdst = &(*state)->state_key->lan;
5867		}
5868		if (direction == (*state)->state_key->direction) {
5869			if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) ||
5870			    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
5871			    (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
5872				REASON_SET(reason, PFRES_SYNPROXY);
5873				return (PF_DROP);
5874			}
5875			(*state)->src.max_win = MAX(ntohs(th->th_win), 1);
5876			if ((*state)->dst.seqhi == 1)
5877				(*state)->dst.seqhi = htonl(random());
5878			pf_send_tcp((*state)->rule.ptr, pd->af, &psrc->addr,
5879			    &pdst->addr, psrc->xport.port, pdst->xport.port,
5880			    (*state)->dst.seqhi, 0, TH_SYN, 0,
5881			    (*state)->src.mss, 0, 0, (*state)->tag, NULL, NULL);
5882			REASON_SET(reason, PFRES_SYNPROXY);
5883			return (PF_SYNPROXY_DROP);
5884		} else if (((th->th_flags & (TH_SYN|TH_ACK)) !=
5885		    (TH_SYN|TH_ACK)) ||
5886		    (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) {
5887			REASON_SET(reason, PFRES_SYNPROXY);
5888			return (PF_DROP);
5889		} else {
5890			(*state)->dst.max_win = MAX(ntohs(th->th_win), 1);
5891			(*state)->dst.seqlo = ntohl(th->th_seq);
5892			pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
5893			    pd->src, th->th_dport, th->th_sport,
5894			    ntohl(th->th_ack), ntohl(th->th_seq) + 1,
5895			    TH_ACK, (*state)->src.max_win, 0, 0, 0,
5896			    (*state)->tag, NULL, NULL);
5897			pf_send_tcp((*state)->rule.ptr, pd->af, &psrc->addr,
5898			    &pdst->addr, psrc->xport.port, pdst->xport.port,
5899			    (*state)->src.seqhi + 1, (*state)->src.seqlo + 1,
5900			    TH_ACK, (*state)->dst.max_win, 0, 0, 1,
5901			    0, NULL, NULL);
5902			(*state)->src.seqdiff = (*state)->dst.seqhi -
5903			    (*state)->src.seqlo;
5904			(*state)->dst.seqdiff = (*state)->src.seqhi -
5905			    (*state)->dst.seqlo;
5906			(*state)->src.seqhi = (*state)->src.seqlo +
5907			    (*state)->dst.max_win;
5908			(*state)->dst.seqhi = (*state)->dst.seqlo +
5909			    (*state)->src.max_win;
5910			(*state)->src.wscale = (*state)->dst.wscale = 0;
5911			(*state)->src.state = (*state)->dst.state =
5912			    TCPS_ESTABLISHED;
5913			REASON_SET(reason, PFRES_SYNPROXY);
5914			return (PF_SYNPROXY_DROP);
5915		}
5916	}
5917
5918	if (((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN) &&
5919	    dst->state >= TCPS_FIN_WAIT_2 &&
5920	    src->state >= TCPS_FIN_WAIT_2) {
5921		if (pf_status.debug >= PF_DEBUG_MISC) {
5922			printf("pf: state reuse ");
5923			pf_print_state(*state);
5924			pf_print_flags(th->th_flags);
5925			printf("\n");
5926		}
5927		/* XXX make sure it's the same direction ?? */
5928		(*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
5929		pf_unlink_state(*state);
5930		*state = NULL;
5931		return (PF_DROP);
5932	}
5933
5934	if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) {
5935		sws = src->wscale & PF_WSCALE_MASK;
5936		dws = dst->wscale & PF_WSCALE_MASK;
5937	} else
5938		sws = dws = 0;
5939
5940	/*
5941	 * Sequence tracking algorithm from Guido van Rooij's paper:
5942	 *   http://www.madison-gurkha.com/publications/tcp_filtering/
5943	 *	tcp_filtering.ps
5944	 */
5945
5946	orig_seq = seq = ntohl(th->th_seq);
5947	if (src->seqlo == 0) {
5948		/* First packet from this end. Set its state */
5949
5950		if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) &&
5951		    src->scrub == NULL) {
5952			if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) {
5953				REASON_SET(reason, PFRES_MEMORY);
5954				return (PF_DROP);
5955			}
5956		}
5957
5958		/* Deferred generation of sequence number modulator */
5959		if (dst->seqdiff && !src->seqdiff) {
5960			/* use random iss for the TCP server */
5961			while ((src->seqdiff = random() - seq) == 0)
5962				;
5963			ack = ntohl(th->th_ack) - dst->seqdiff;
5964			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
5965			    src->seqdiff), 0);
5966			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
5967			copyback = off + sizeof (*th);
5968		} else {
5969			ack = ntohl(th->th_ack);
5970		}
5971
5972		end = seq + pd->p_len;
5973		if (th->th_flags & TH_SYN) {
5974			end++;
5975			if (dst->wscale & PF_WSCALE_FLAG) {
5976				src->wscale = pf_get_wscale(m, off, th->th_off,
5977				    pd->af);
5978				if (src->wscale & PF_WSCALE_FLAG) {
5979					/*
5980					 * Remove scale factor from initial
5981					 * window
5982					 */
5983					sws = src->wscale & PF_WSCALE_MASK;
5984					win = ((u_int32_t)win + (1 << sws) - 1)
5985					    >> sws;
5986					dws = dst->wscale & PF_WSCALE_MASK;
5987				} else {
5988					/*
5989					 * Window scale negotiation has failed,
5990					 * therefore we must restore the window
5991					 * scale in the state record that we
5992					 * optimistically removed in
5993					 * pf_test_rule().  Care is required to
5994					 * prevent arithmetic overflow from
5995					 * zeroing the window when it's
5996					 * truncated down to 16-bits.
5997					 */
5998					u_int32_t max_win = dst->max_win;
5999					max_win <<=
6000					    dst->wscale & PF_WSCALE_MASK;
6001					dst->max_win = MIN(0xffff, max_win);
6002					/* in case of a retrans SYN|ACK */
6003					dst->wscale = 0;
6004				}
6005			}
6006		}
6007		if (th->th_flags & TH_FIN)
6008			end++;
6009
6010		src->seqlo = seq;
6011		if (src->state < TCPS_SYN_SENT)
6012			src->state = TCPS_SYN_SENT;
6013
6014		/*
6015		 * May need to slide the window (seqhi may have been set by
6016		 * the crappy stack check or if we picked up the connection
6017		 * after establishment)
6018		 */
6019		if (src->seqhi == 1 ||
6020		    SEQ_GEQ(end + MAX(1, (u_int32_t)dst->max_win << dws),
6021		    src->seqhi))
6022			src->seqhi = end + MAX(1, (u_int32_t)dst->max_win << dws);
6023		if (win > src->max_win)
6024			src->max_win = win;
6025
6026	} else {
6027		ack = ntohl(th->th_ack) - dst->seqdiff;
6028		if (src->seqdiff) {
6029			/* Modulate sequence numbers */
6030			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
6031			    src->seqdiff), 0);
6032			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
6033			copyback = off+ sizeof (*th);
6034		}
6035		end = seq + pd->p_len;
6036		if (th->th_flags & TH_SYN)
6037			end++;
6038		if (th->th_flags & TH_FIN)
6039			end++;
6040	}
6041
6042	if ((th->th_flags & TH_ACK) == 0) {
6043		/* Let it pass through the ack skew check */
6044		ack = dst->seqlo;
6045	} else if ((ack == 0 &&
6046	    (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) ||
6047	    /* broken tcp stacks do not set ack */
6048	    (dst->state < TCPS_SYN_SENT)) {
6049		/*
6050		 * Many stacks (ours included) will set the ACK number in an
6051		 * FIN|ACK if the SYN times out -- no sequence to ACK.
6052		 */
6053		ack = dst->seqlo;
6054	}
6055
6056	if (seq == end) {
6057		/* Ease sequencing restrictions on no data packets */
6058		seq = src->seqlo;
6059		end = seq;
6060	}
6061
6062	ackskew = dst->seqlo - ack;
6063
6064
6065	/*
6066	 * Need to demodulate the sequence numbers in any TCP SACK options
6067	 * (Selective ACK). We could optionally validate the SACK values
6068	 * against the current ACK window, either forwards or backwards, but
6069	 * I'm not confident that SACK has been implemented properly
6070	 * everywhere. It wouldn't surprise me if several stacks accidently
6071	 * SACK too far backwards of previously ACKed data. There really aren't
6072	 * any security implications of bad SACKing unless the target stack
6073	 * doesn't validate the option length correctly. Someone trying to
6074	 * spoof into a TCP connection won't bother blindly sending SACK
6075	 * options anyway.
6076	 */
6077	if (dst->seqdiff && (th->th_off << 2) > (int)sizeof (struct tcphdr)) {
6078		copyback = pf_modulate_sack(m, off, pd, th, dst);
6079		if (copyback == -1) {
6080			REASON_SET(reason, PFRES_MEMORY);
6081			return (PF_DROP);
6082		}
6083
6084		m = pd->mp;
6085	}
6086
6087
6088#define MAXACKWINDOW (0xffff + 1500)	/* 1500 is an arbitrary fudge factor */
6089	if (SEQ_GEQ(src->seqhi, end) &&
6090	    /* Last octet inside other's window space */
6091	    SEQ_GEQ(seq, src->seqlo - ((u_int32_t)dst->max_win << dws)) &&
6092	    /* Retrans: not more than one window back */
6093	    (ackskew >= -MAXACKWINDOW) &&
6094	    /* Acking not more than one reassembled fragment backwards */
6095	    (ackskew <= (MAXACKWINDOW << sws)) &&
6096	    /* Acking not more than one window forward */
6097	    ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo ||
6098	    (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo) ||
6099	    (pd->flags & PFDESC_IP_REAS) == 0)) {
6100	    /* Require an exact/+1 sequence match on resets when possible */
6101
6102		if (dst->scrub || src->scrub) {
6103			if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
6104			    *state, src, dst, &copyback))
6105				return (PF_DROP);
6106
6107			m = pd->mp;
6108		}
6109
6110		/* update max window */
6111		if (src->max_win < win)
6112			src->max_win = win;
6113		/* synchronize sequencing */
6114		if (SEQ_GT(end, src->seqlo))
6115			src->seqlo = end;
6116		/* slide the window of what the other end can send */
6117		if (SEQ_GEQ(ack + ((u_int32_t)win << sws), dst->seqhi))
6118			dst->seqhi = ack + MAX(((u_int32_t)win << sws), 1);
6119
6120		/* update states */
6121		if (th->th_flags & TH_SYN)
6122			if (src->state < TCPS_SYN_SENT)
6123				src->state = TCPS_SYN_SENT;
6124		if (th->th_flags & TH_FIN)
6125			if (src->state < TCPS_CLOSING)
6126				src->state = TCPS_CLOSING;
6127		if (th->th_flags & TH_ACK) {
6128			if (dst->state == TCPS_SYN_SENT) {
6129				dst->state = TCPS_ESTABLISHED;
6130				if (src->state == TCPS_ESTABLISHED &&
6131				    (*state)->src_node != NULL &&
6132				    pf_src_connlimit(state)) {
6133					REASON_SET(reason, PFRES_SRCLIMIT);
6134					return (PF_DROP);
6135				}
6136			} else if (dst->state == TCPS_CLOSING)
6137				dst->state = TCPS_FIN_WAIT_2;
6138		}
6139		if (th->th_flags & TH_RST)
6140			src->state = dst->state = TCPS_TIME_WAIT;
6141
6142		/* update expire time */
6143		(*state)->expire = pf_time_second();
6144		if (src->state >= TCPS_FIN_WAIT_2 &&
6145		    dst->state >= TCPS_FIN_WAIT_2)
6146			(*state)->timeout = PFTM_TCP_CLOSED;
6147		else if (src->state >= TCPS_CLOSING &&
6148		    dst->state >= TCPS_CLOSING)
6149			(*state)->timeout = PFTM_TCP_FIN_WAIT;
6150		else if (src->state < TCPS_ESTABLISHED ||
6151		    dst->state < TCPS_ESTABLISHED)
6152			(*state)->timeout = PFTM_TCP_OPENING;
6153		else if (src->state >= TCPS_CLOSING ||
6154		    dst->state >= TCPS_CLOSING)
6155			(*state)->timeout = PFTM_TCP_CLOSING;
6156		else
6157			(*state)->timeout = PFTM_TCP_ESTABLISHED;
6158
6159		/* Fall through to PASS packet */
6160
6161	} else if ((dst->state < TCPS_SYN_SENT ||
6162	    dst->state >= TCPS_FIN_WAIT_2 || src->state >= TCPS_FIN_WAIT_2) &&
6163	    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) &&
6164	    /* Within a window forward of the originating packet */
6165	    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
6166	    /* Within a window backward of the originating packet */
6167
6168		/*
6169		 * This currently handles three situations:
6170		 *  1) Stupid stacks will shotgun SYNs before their peer
6171		 *     replies.
6172		 *  2) When PF catches an already established stream (the
6173		 *     firewall rebooted, the state table was flushed, routes
6174		 *     changed...)
6175		 *  3) Packets get funky immediately after the connection
6176		 *     closes (this should catch Solaris spurious ACK|FINs
6177		 *     that web servers like to spew after a close)
6178		 *
6179		 * This must be a little more careful than the above code
6180		 * since packet floods will also be caught here. We don't
6181		 * update the TTL here to mitigate the damage of a packet
6182		 * flood and so the same code can handle awkward establishment
6183		 * and a loosened connection close.
6184		 * In the establishment case, a correct peer response will
6185		 * validate the connection, go through the normal state code
6186		 * and keep updating the state TTL.
6187		 */
6188
6189		if (pf_status.debug >= PF_DEBUG_MISC) {
6190			printf("pf: loose state match: ");
6191			pf_print_state(*state);
6192			pf_print_flags(th->th_flags);
6193			printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
6194			    "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack,
6195			    pd->p_len, ackskew, (*state)->packets[0],
6196			    (*state)->packets[1],
6197			    direction == PF_IN ? "in" : "out",
6198			    direction == (*state)->state_key->direction ?
6199			    "fwd" : "rev");
6200		}
6201
6202		if (dst->scrub || src->scrub) {
6203			if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
6204			    *state, src, dst, &copyback))
6205				return (PF_DROP);
6206			m = pd->mp;
6207		}
6208
6209		/* update max window */
6210		if (src->max_win < win)
6211			src->max_win = win;
6212		/* synchronize sequencing */
6213		if (SEQ_GT(end, src->seqlo))
6214			src->seqlo = end;
6215		/* slide the window of what the other end can send */
6216		if (SEQ_GEQ(ack + ((u_int32_t)win << sws), dst->seqhi))
6217			dst->seqhi = ack + MAX(((u_int32_t)win << sws), 1);
6218
6219		/*
6220		 * Cannot set dst->seqhi here since this could be a shotgunned
6221		 * SYN and not an already established connection.
6222		 */
6223
6224		if (th->th_flags & TH_FIN)
6225			if (src->state < TCPS_CLOSING)
6226				src->state = TCPS_CLOSING;
6227		if (th->th_flags & TH_RST)
6228			src->state = dst->state = TCPS_TIME_WAIT;
6229
6230		/* Fall through to PASS packet */
6231
6232	} else {
6233		if ((*state)->dst.state == TCPS_SYN_SENT &&
6234		    (*state)->src.state == TCPS_SYN_SENT) {
6235			/* Send RST for state mismatches during handshake */
6236			if (!(th->th_flags & TH_RST))
6237				pf_send_tcp((*state)->rule.ptr, pd->af,
6238				    pd->dst, pd->src, th->th_dport,
6239				    th->th_sport, ntohl(th->th_ack), 0,
6240				    TH_RST, 0, 0,
6241				    (*state)->rule.ptr->return_ttl, 1, 0,
6242				    pd->eh, kif->pfik_ifp);
6243			src->seqlo = 0;
6244			src->seqhi = 1;
6245			src->max_win = 1;
6246		} else if (pf_status.debug >= PF_DEBUG_MISC) {
6247			printf("pf: BAD state: ");
6248			pf_print_state(*state);
6249			pf_print_flags(th->th_flags);
6250			printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
6251			    "pkts=%llu:%llu dir=%s,%s\n",
6252			    seq, orig_seq, ack, pd->p_len, ackskew,
6253			    (*state)->packets[0], (*state)->packets[1],
6254			    direction == PF_IN ? "in" : "out",
6255			    direction == (*state)->state_key->direction ?
6256			    "fwd" : "rev");
6257			printf("pf: State failure on: %c %c %c %c | %c %c\n",
6258			    SEQ_GEQ(src->seqhi, end) ? ' ' : '1',
6259			    SEQ_GEQ(seq,
6260			    src->seqlo - ((u_int32_t)dst->max_win << dws)) ?
6261			    ' ': '2',
6262			    (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
6263			    (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
6264			    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5',
6265			    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6');
6266		}
6267		REASON_SET(reason, PFRES_BADSTATE);
6268		return (PF_DROP);
6269	}
6270
6271	/* Any packets which have gotten here are to be passed */
6272
6273	if ((*state)->state_key->app_state &&
6274	    (*state)->state_key->app_state->handler) {
6275		(*state)->state_key->app_state->handler(*state, direction,
6276		    off + (th->th_off << 2), pd, kif);
6277		if (pd->lmw < 0) {
6278			REASON_SET(reason, PFRES_MEMORY);
6279			return (PF_DROP);
6280		}
6281		m = pd->mp;
6282	}
6283
6284	/* translate source/destination address, if necessary */
6285	if (STATE_TRANSLATE((*state)->state_key)) {
6286		if (direction == PF_OUT)
6287			pf_change_ap(direction, pd->mp, pd->src, &th->th_sport,
6288			    pd->ip_sum, &th->th_sum,
6289			    &(*state)->state_key->gwy.addr,
6290			    (*state)->state_key->gwy.xport.port, 0, pd->af);
6291		else
6292			pf_change_ap(direction, pd->mp, pd->dst, &th->th_dport,
6293			    pd->ip_sum, &th->th_sum,
6294			    &(*state)->state_key->lan.addr,
6295			    (*state)->state_key->lan.xport.port, 0, pd->af);
6296		copyback = off + sizeof (*th);
6297	}
6298
6299	if (copyback) {
6300		m = pf_lazy_makewritable(pd, m, copyback);
6301		if (!m) {
6302			REASON_SET(reason, PFRES_MEMORY);
6303			return (PF_DROP);
6304		}
6305
6306		/* Copyback sequence modulation or stateful scrub changes */
6307		m_copyback(m, off, sizeof (*th), th);
6308	}
6309
6310	return (PF_PASS);
6311}
6312
6313static int
6314pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
6315    struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason)
6316{
6317#pragma unused(h)
6318	struct pf_state_peer	*src, *dst;
6319	struct pf_state_key_cmp	 key;
6320	struct udphdr		*uh = pd->hdr.udp;
6321	struct pf_app_state as;
6322	int dx, action, extfilter;
6323	key.app_state = 0;
6324	key.proto_variant = PF_EXTFILTER_APD;
6325
6326	key.af = pd->af;
6327	key.proto = IPPROTO_UDP;
6328	if (direction == PF_IN)	{
6329		PF_ACPY(&key.ext.addr, pd->src, key.af);
6330		PF_ACPY(&key.gwy.addr, pd->dst, key.af);
6331		key.ext.xport.port = uh->uh_sport;
6332		key.gwy.xport.port = uh->uh_dport;
6333		dx = PF_IN;
6334	} else {
6335		PF_ACPY(&key.lan.addr, pd->src, key.af);
6336		PF_ACPY(&key.ext.addr, pd->dst, key.af);
6337		key.lan.xport.port = uh->uh_sport;
6338		key.ext.xport.port = uh->uh_dport;
6339		dx = PF_OUT;
6340	}
6341
6342	if (ntohs(uh->uh_sport) == PF_IKE_PORT &&
6343	    ntohs(uh->uh_dport) == PF_IKE_PORT) {
6344		struct pf_ike_hdr ike;
6345		size_t plen = m->m_pkthdr.len - off - sizeof (*uh);
6346		if (plen < PF_IKE_PACKET_MINSIZE) {
6347			DPFPRINTF(PF_DEBUG_MISC,
6348			    ("pf: IKE message too small.\n"));
6349			return (PF_DROP);
6350		}
6351
6352		if (plen > sizeof (ike))
6353			plen = sizeof (ike);
6354		m_copydata(m, off + sizeof (*uh), plen, &ike);
6355
6356		if (ike.initiator_cookie) {
6357			key.app_state = &as;
6358			as.compare_lan_ext = pf_ike_compare;
6359			as.compare_ext_gwy = pf_ike_compare;
6360			as.u.ike.cookie = ike.initiator_cookie;
6361		} else {
6362			/*
6363			 * <http://tools.ietf.org/html/\
6364			 *    draft-ietf-ipsec-nat-t-ike-01>
6365			 * Support non-standard NAT-T implementations that
6366			 * push the ESP packet over the top of the IKE packet.
6367			 * Do not drop packet.
6368			 */
6369			DPFPRINTF(PF_DEBUG_MISC,
6370			    ("pf: IKE initiator cookie = 0.\n"));
6371		}
6372	}
6373
6374	*state = pf_find_state(kif, &key, dx);
6375
6376	if (!key.app_state && *state == 0) {
6377		key.proto_variant = PF_EXTFILTER_AD;
6378		*state = pf_find_state(kif, &key, dx);
6379	}
6380
6381	if (!key.app_state && *state == 0) {
6382		key.proto_variant = PF_EXTFILTER_EI;
6383		*state = pf_find_state(kif, &key, dx);
6384	}
6385
6386	if ((*state) != NULL && pd != NULL &&
6387		pd->flowhash == 0)
6388		pd->flowhash = (*state)->state_key->flowhash;
6389
6390	if (pf_state_lookup_aux(state, kif, direction, &action))
6391		return (action);
6392
6393	if (direction == (*state)->state_key->direction) {
6394		src = &(*state)->src;
6395		dst = &(*state)->dst;
6396	} else {
6397		src = &(*state)->dst;
6398		dst = &(*state)->src;
6399	}
6400
6401	/* update states */
6402	if (src->state < PFUDPS_SINGLE)
6403		src->state = PFUDPS_SINGLE;
6404	if (dst->state == PFUDPS_SINGLE)
6405		dst->state = PFUDPS_MULTIPLE;
6406
6407	/* update expire time */
6408	(*state)->expire = pf_time_second();
6409	if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE)
6410		(*state)->timeout = PFTM_UDP_MULTIPLE;
6411	else
6412		(*state)->timeout = PFTM_UDP_SINGLE;
6413
6414	extfilter = (*state)->state_key->proto_variant;
6415	if (extfilter > PF_EXTFILTER_APD) {
6416		(*state)->state_key->ext.xport.port = key.ext.xport.port;
6417		if (extfilter > PF_EXTFILTER_AD)
6418			PF_ACPY(&(*state)->state_key->ext.addr,
6419			    &key.ext.addr, key.af);
6420	}
6421
6422	if ((*state)->state_key->app_state &&
6423	    (*state)->state_key->app_state->handler) {
6424		(*state)->state_key->app_state->handler(*state, direction,
6425		    off + uh->uh_ulen, pd, kif);
6426		if (pd->lmw < 0) {
6427			REASON_SET(reason, PFRES_MEMORY);
6428			return (PF_DROP);
6429		}
6430		m = pd->mp;
6431	}
6432
6433	/* translate source/destination address, if necessary */
6434	if (STATE_TRANSLATE((*state)->state_key)) {
6435		m = pf_lazy_makewritable(pd, m, off + sizeof (*uh));
6436		if (!m) {
6437			REASON_SET(reason, PFRES_MEMORY);
6438			return (PF_DROP);
6439		}
6440
6441		if (direction == PF_OUT)
6442			pf_change_ap(direction, pd->mp, pd->src, &uh->uh_sport,
6443			    pd->ip_sum, &uh->uh_sum,
6444			    &(*state)->state_key->gwy.addr,
6445			    (*state)->state_key->gwy.xport.port, 1, pd->af);
6446		else
6447			pf_change_ap(direction, pd->mp, pd->dst, &uh->uh_dport,
6448			    pd->ip_sum, &uh->uh_sum,
6449			    &(*state)->state_key->lan.addr,
6450			    (*state)->state_key->lan.xport.port, 1, pd->af);
6451		m_copyback(m, off, sizeof (*uh), uh);
6452	}
6453
6454	return (PF_PASS);
6455}
6456
6457static int
6458pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
6459    struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason)
6460{
6461#pragma unused(h)
6462	struct pf_addr	*saddr = pd->src, *daddr = pd->dst;
6463	u_int16_t	 icmpid = 0, *icmpsum;
6464	u_int8_t	 icmptype;
6465	int		 state_icmp = 0;
6466	struct pf_state_key_cmp key;
6467
6468	struct pf_app_state as;
6469	key.app_state = 0;
6470
6471	switch (pd->proto) {
6472#if INET
6473	case IPPROTO_ICMP:
6474		icmptype = pd->hdr.icmp->icmp_type;
6475		icmpid = pd->hdr.icmp->icmp_id;
6476		icmpsum = &pd->hdr.icmp->icmp_cksum;
6477
6478		if (icmptype == ICMP_UNREACH ||
6479		    icmptype == ICMP_SOURCEQUENCH ||
6480		    icmptype == ICMP_REDIRECT ||
6481		    icmptype == ICMP_TIMXCEED ||
6482		    icmptype == ICMP_PARAMPROB)
6483			state_icmp++;
6484		break;
6485#endif /* INET */
6486#if INET6
6487	case IPPROTO_ICMPV6:
6488		icmptype = pd->hdr.icmp6->icmp6_type;
6489		icmpid = pd->hdr.icmp6->icmp6_id;
6490		icmpsum = &pd->hdr.icmp6->icmp6_cksum;
6491
6492		if (icmptype == ICMP6_DST_UNREACH ||
6493		    icmptype == ICMP6_PACKET_TOO_BIG ||
6494		    icmptype == ICMP6_TIME_EXCEEDED ||
6495		    icmptype == ICMP6_PARAM_PROB)
6496			state_icmp++;
6497		break;
6498#endif /* INET6 */
6499	}
6500
6501	if (!state_icmp) {
6502
6503		/*
6504		 * ICMP query/reply message not related to a TCP/UDP packet.
6505		 * Search for an ICMP state.
6506		 */
6507		key.af = pd->af;
6508		key.proto = pd->proto;
6509		if (direction == PF_IN)	{
6510			PF_ACPY(&key.ext.addr, pd->src, key.af);
6511			PF_ACPY(&key.gwy.addr, pd->dst, key.af);
6512			key.ext.xport.port = 0;
6513			key.gwy.xport.port = icmpid;
6514		} else {
6515			PF_ACPY(&key.lan.addr, pd->src, key.af);
6516			PF_ACPY(&key.ext.addr, pd->dst, key.af);
6517			key.lan.xport.port = icmpid;
6518			key.ext.xport.port = 0;
6519		}
6520
6521		STATE_LOOKUP();
6522
6523		(*state)->expire = pf_time_second();
6524		(*state)->timeout = PFTM_ICMP_ERROR_REPLY;
6525
6526		/* translate source/destination address, if necessary */
6527		if (STATE_TRANSLATE((*state)->state_key)) {
6528			if (direction == PF_OUT) {
6529				switch (pd->af) {
6530#if INET
6531				case AF_INET:
6532					pf_change_a(&saddr->v4.s_addr,
6533					    pd->ip_sum,
6534					    (*state)->state_key->gwy.addr.v4.s_addr, 0);
6535					pd->hdr.icmp->icmp_cksum =
6536					    pf_cksum_fixup(
6537					    pd->hdr.icmp->icmp_cksum, icmpid,
6538					    (*state)->state_key->gwy.xport.port, 0);
6539					pd->hdr.icmp->icmp_id =
6540					    (*state)->state_key->gwy.xport.port;
6541					m = pf_lazy_makewritable(pd, m,
6542					    off + ICMP_MINLEN);
6543					if (!m)
6544						return (PF_DROP);
6545					m_copyback(m, off, ICMP_MINLEN,
6546					    pd->hdr.icmp);
6547					break;
6548#endif /* INET */
6549#if INET6
6550				case AF_INET6:
6551					pf_change_a6(saddr,
6552					    &pd->hdr.icmp6->icmp6_cksum,
6553					    &(*state)->state_key->gwy.addr, 0);
6554					m = pf_lazy_makewritable(pd, m,
6555					    off + sizeof (struct icmp6_hdr));
6556					if (!m)
6557						return (PF_DROP);
6558					m_copyback(m, off,
6559					    sizeof (struct icmp6_hdr),
6560					    pd->hdr.icmp6);
6561					break;
6562#endif /* INET6 */
6563				}
6564			} else {
6565				switch (pd->af) {
6566#if INET
6567				case AF_INET:
6568					pf_change_a(&daddr->v4.s_addr,
6569					    pd->ip_sum,
6570					    (*state)->state_key->lan.addr.v4.s_addr, 0);
6571					pd->hdr.icmp->icmp_cksum =
6572					    pf_cksum_fixup(
6573					    pd->hdr.icmp->icmp_cksum, icmpid,
6574					    (*state)->state_key->lan.xport.port, 0);
6575					pd->hdr.icmp->icmp_id =
6576					    (*state)->state_key->lan.xport.port;
6577					m = pf_lazy_makewritable(pd, m,
6578					    off + ICMP_MINLEN);
6579					if (!m)
6580						return (PF_DROP);
6581					m_copyback(m, off, ICMP_MINLEN,
6582					    pd->hdr.icmp);
6583					break;
6584#endif /* INET */
6585#if INET6
6586				case AF_INET6:
6587					pf_change_a6(daddr,
6588					    &pd->hdr.icmp6->icmp6_cksum,
6589					    &(*state)->state_key->lan.addr, 0);
6590					m = pf_lazy_makewritable(pd, m,
6591					    off + sizeof (struct icmp6_hdr));
6592					if (!m)
6593						return (PF_DROP);
6594					m_copyback(m, off,
6595					    sizeof (struct icmp6_hdr),
6596					    pd->hdr.icmp6);
6597					break;
6598#endif /* INET6 */
6599				}
6600			}
6601		}
6602
6603		return (PF_PASS);
6604
6605	} else {
6606		/*
6607		 * ICMP error message in response to a TCP/UDP packet.
6608		 * Extract the inner TCP/UDP header and search for that state.
6609		 */
6610
6611		struct pf_pdesc	pd2;
6612#if INET
6613		struct ip	h2;
6614#endif /* INET */
6615#if INET6
6616		struct ip6_hdr	h2_6;
6617		int		terminal = 0;
6618#endif /* INET6 */
6619		int		ipoff2 = 0;
6620		int		off2 = 0;
6621
6622		memset(&pd2, 0, sizeof (pd2));
6623
6624		pd2.af = pd->af;
6625		switch (pd->af) {
6626#if INET
6627		case AF_INET:
6628			/* offset of h2 in mbuf chain */
6629			ipoff2 = off + ICMP_MINLEN;
6630
6631			if (!pf_pull_hdr(m, ipoff2, &h2, sizeof (h2),
6632			    NULL, reason, pd2.af)) {
6633				DPFPRINTF(PF_DEBUG_MISC,
6634				    ("pf: ICMP error message too short "
6635				    "(ip)\n"));
6636				return (PF_DROP);
6637			}
6638			/*
6639			 * ICMP error messages don't refer to non-first
6640			 * fragments
6641			 */
6642			if (h2.ip_off & htons(IP_OFFMASK)) {
6643				REASON_SET(reason, PFRES_FRAG);
6644				return (PF_DROP);
6645			}
6646
6647			/* offset of protocol header that follows h2 */
6648			off2 = ipoff2 + (h2.ip_hl << 2);
6649
6650			pd2.proto = h2.ip_p;
6651			pd2.src = (struct pf_addr *)&h2.ip_src;
6652			pd2.dst = (struct pf_addr *)&h2.ip_dst;
6653			pd2.ip_sum = &h2.ip_sum;
6654			break;
6655#endif /* INET */
6656#if INET6
6657		case AF_INET6:
6658			ipoff2 = off + sizeof (struct icmp6_hdr);
6659
6660			if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof (h2_6),
6661			    NULL, reason, pd2.af)) {
6662				DPFPRINTF(PF_DEBUG_MISC,
6663				    ("pf: ICMP error message too short "
6664				    "(ip6)\n"));
6665				return (PF_DROP);
6666			}
6667			pd2.proto = h2_6.ip6_nxt;
6668			pd2.src = (struct pf_addr *)&h2_6.ip6_src;
6669			pd2.dst = (struct pf_addr *)&h2_6.ip6_dst;
6670			pd2.ip_sum = NULL;
6671			off2 = ipoff2 + sizeof (h2_6);
6672			do {
6673				switch (pd2.proto) {
6674				case IPPROTO_FRAGMENT:
6675					/*
6676					 * ICMPv6 error messages for
6677					 * non-first fragments
6678					 */
6679					REASON_SET(reason, PFRES_FRAG);
6680					return (PF_DROP);
6681				case IPPROTO_AH:
6682				case IPPROTO_HOPOPTS:
6683				case IPPROTO_ROUTING:
6684				case IPPROTO_DSTOPTS: {
6685					/* get next header and header length */
6686					struct ip6_ext opt6;
6687
6688					if (!pf_pull_hdr(m, off2, &opt6,
6689					    sizeof (opt6), NULL, reason,
6690					    pd2.af)) {
6691						DPFPRINTF(PF_DEBUG_MISC,
6692						    ("pf: ICMPv6 short opt\n"));
6693						return (PF_DROP);
6694					}
6695					if (pd2.proto == IPPROTO_AH)
6696						off2 += (opt6.ip6e_len + 2) * 4;
6697					else
6698						off2 += (opt6.ip6e_len + 1) * 8;
6699					pd2.proto = opt6.ip6e_nxt;
6700					/* goto the next header */
6701					break;
6702				}
6703				default:
6704					terminal++;
6705					break;
6706				}
6707			} while (!terminal);
6708			break;
6709#endif /* INET6 */
6710		}
6711
6712		switch (pd2.proto) {
6713		case IPPROTO_TCP: {
6714			struct tcphdr		 th;
6715			u_int32_t		 seq;
6716			struct pf_state_peer	*src, *dst;
6717			u_int8_t		 dws;
6718			int			 copyback = 0;
6719
6720			/*
6721			 * Only the first 8 bytes of the TCP header can be
6722			 * expected. Don't access any TCP header fields after
6723			 * th_seq, an ackskew test is not possible.
6724			 */
6725			if (!pf_pull_hdr(m, off2, &th, 8, NULL, reason,
6726			    pd2.af)) {
6727				DPFPRINTF(PF_DEBUG_MISC,
6728				    ("pf: ICMP error message too short "
6729				    "(tcp)\n"));
6730				return (PF_DROP);
6731			}
6732
6733			key.af = pd2.af;
6734			key.proto = IPPROTO_TCP;
6735			if (direction == PF_IN)	{
6736				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
6737				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
6738				key.ext.xport.port = th.th_dport;
6739				key.gwy.xport.port = th.th_sport;
6740			} else {
6741				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
6742				PF_ACPY(&key.ext.addr, pd2.src, key.af);
6743				key.lan.xport.port = th.th_dport;
6744				key.ext.xport.port = th.th_sport;
6745			}
6746
6747			STATE_LOOKUP();
6748
6749			if (direction == (*state)->state_key->direction) {
6750				src = &(*state)->dst;
6751				dst = &(*state)->src;
6752			} else {
6753				src = &(*state)->src;
6754				dst = &(*state)->dst;
6755			}
6756
6757			if (src->wscale && dst->wscale)
6758				dws = dst->wscale & PF_WSCALE_MASK;
6759			else
6760				dws = 0;
6761
6762			/* Demodulate sequence number */
6763			seq = ntohl(th.th_seq) - src->seqdiff;
6764			if (src->seqdiff) {
6765				pf_change_a(&th.th_seq, icmpsum,
6766				    htonl(seq), 0);
6767				copyback = 1;
6768			}
6769
6770			if (!SEQ_GEQ(src->seqhi, seq) ||
6771			    !SEQ_GEQ(seq,
6772			    src->seqlo - ((u_int32_t)dst->max_win << dws))) {
6773				if (pf_status.debug >= PF_DEBUG_MISC) {
6774					printf("pf: BAD ICMP %d:%d ",
6775					    icmptype, pd->hdr.icmp->icmp_code);
6776					pf_print_host(pd->src, 0, pd->af);
6777					printf(" -> ");
6778					pf_print_host(pd->dst, 0, pd->af);
6779					printf(" state: ");
6780					pf_print_state(*state);
6781					printf(" seq=%u\n", seq);
6782				}
6783				REASON_SET(reason, PFRES_BADSTATE);
6784				return (PF_DROP);
6785			}
6786
6787			if (STATE_TRANSLATE((*state)->state_key)) {
6788				if (direction == PF_IN) {
6789					pf_change_icmp(pd2.src, &th.th_sport,
6790					    daddr, &(*state)->state_key->lan.addr,
6791					    (*state)->state_key->lan.xport.port, NULL,
6792					    pd2.ip_sum, icmpsum,
6793					    pd->ip_sum, 0, pd2.af);
6794				} else {
6795					pf_change_icmp(pd2.dst, &th.th_dport,
6796					    saddr, &(*state)->state_key->gwy.addr,
6797					    (*state)->state_key->gwy.xport.port, NULL,
6798					    pd2.ip_sum, icmpsum,
6799					    pd->ip_sum, 0, pd2.af);
6800				}
6801				copyback = 1;
6802			}
6803
6804			if (copyback) {
6805				m = pf_lazy_makewritable(pd, m, off2 + 8);
6806				if (!m)
6807					return (PF_DROP);
6808				switch (pd2.af) {
6809#if INET
6810				case AF_INET:
6811					m_copyback(m, off, ICMP_MINLEN,
6812					    pd->hdr.icmp);
6813					m_copyback(m, ipoff2, sizeof (h2),
6814					    &h2);
6815					break;
6816#endif /* INET */
6817#if INET6
6818				case AF_INET6:
6819					m_copyback(m, off,
6820					    sizeof (struct icmp6_hdr),
6821					    pd->hdr.icmp6);
6822					m_copyback(m, ipoff2, sizeof (h2_6),
6823					    &h2_6);
6824					break;
6825#endif /* INET6 */
6826				}
6827				m_copyback(m, off2, 8, &th);
6828			}
6829
6830			return (PF_PASS);
6831			break;
6832		}
6833		case IPPROTO_UDP: {
6834			struct udphdr		uh;
6835			int dx, action;
6836			if (!pf_pull_hdr(m, off2, &uh, sizeof (uh),
6837			    NULL, reason, pd2.af)) {
6838				DPFPRINTF(PF_DEBUG_MISC,
6839				    ("pf: ICMP error message too short "
6840				    "(udp)\n"));
6841				return (PF_DROP);
6842			}
6843
6844			key.af = pd2.af;
6845			key.proto = IPPROTO_UDP;
6846			if (direction == PF_IN)	{
6847				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
6848				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
6849				key.ext.xport.port = uh.uh_dport;
6850				key.gwy.xport.port = uh.uh_sport;
6851				dx = PF_IN;
6852			} else {
6853				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
6854				PF_ACPY(&key.ext.addr, pd2.src, key.af);
6855				key.lan.xport.port = uh.uh_dport;
6856				key.ext.xport.port = uh.uh_sport;
6857				dx = PF_OUT;
6858			}
6859
6860			key.proto_variant = PF_EXTFILTER_APD;
6861
6862			if (ntohs(uh.uh_sport) == PF_IKE_PORT &&
6863			    ntohs(uh.uh_dport) == PF_IKE_PORT) {
6864				struct pf_ike_hdr ike;
6865				size_t plen =
6866				    m->m_pkthdr.len - off2 - sizeof (uh);
6867				if (direction == PF_IN &&
6868				    plen < 8 /* PF_IKE_PACKET_MINSIZE */) {
6869					DPFPRINTF(PF_DEBUG_MISC, ("pf: "
6870					    "ICMP error, embedded IKE message "
6871					    "too small.\n"));
6872					return (PF_DROP);
6873				}
6874
6875				if (plen > sizeof (ike))
6876					plen = sizeof (ike);
6877				m_copydata(m, off + sizeof (uh), plen, &ike);
6878
6879				key.app_state = &as;
6880				as.compare_lan_ext = pf_ike_compare;
6881				as.compare_ext_gwy = pf_ike_compare;
6882				as.u.ike.cookie = ike.initiator_cookie;
6883			}
6884
6885			*state = pf_find_state(kif, &key, dx);
6886
6887			if (key.app_state && *state == 0) {
6888				key.app_state = 0;
6889				*state = pf_find_state(kif, &key, dx);
6890			}
6891
6892			if (*state == 0) {
6893				key.proto_variant = PF_EXTFILTER_AD;
6894				*state = pf_find_state(kif, &key, dx);
6895			}
6896
6897			if (*state == 0) {
6898				key.proto_variant = PF_EXTFILTER_EI;
6899				*state = pf_find_state(kif, &key, dx);
6900			}
6901
6902			if (*state != NULL && pd != NULL &&
6903				pd->flowhash == 0)
6904				pd->flowhash = (*state)->state_key->flowhash;
6905
6906			if (pf_state_lookup_aux(state, kif, direction, &action))
6907				return (action);
6908
6909			if (STATE_TRANSLATE((*state)->state_key)) {
6910				if (direction == PF_IN) {
6911					pf_change_icmp(pd2.src, &uh.uh_sport,
6912					    daddr, &(*state)->state_key->lan.addr,
6913					    (*state)->state_key->lan.xport.port, &uh.uh_sum,
6914					    pd2.ip_sum, icmpsum,
6915					    pd->ip_sum, 1, pd2.af);
6916				} else {
6917					pf_change_icmp(pd2.dst, &uh.uh_dport,
6918					    saddr, &(*state)->state_key->gwy.addr,
6919					    (*state)->state_key->gwy.xport.port, &uh.uh_sum,
6920					    pd2.ip_sum, icmpsum,
6921					    pd->ip_sum, 1, pd2.af);
6922				}
6923				m = pf_lazy_makewritable(pd, m,
6924				    off2 + sizeof (uh));
6925				if (!m)
6926					return (PF_DROP);
6927				switch (pd2.af) {
6928#if INET
6929				case AF_INET:
6930					m_copyback(m, off, ICMP_MINLEN,
6931					    pd->hdr.icmp);
6932					m_copyback(m, ipoff2, sizeof (h2), &h2);
6933					break;
6934#endif /* INET */
6935#if INET6
6936				case AF_INET6:
6937					m_copyback(m, off,
6938					    sizeof (struct icmp6_hdr),
6939					    pd->hdr.icmp6);
6940					m_copyback(m, ipoff2, sizeof (h2_6),
6941					    &h2_6);
6942					break;
6943#endif /* INET6 */
6944				}
6945				m_copyback(m, off2, sizeof (uh), &uh);
6946			}
6947
6948			return (PF_PASS);
6949			break;
6950		}
6951#if INET
6952		case IPPROTO_ICMP: {
6953			struct icmp		iih;
6954
6955			if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN,
6956			    NULL, reason, pd2.af)) {
6957				DPFPRINTF(PF_DEBUG_MISC,
6958				    ("pf: ICMP error message too short i"
6959				    "(icmp)\n"));
6960				return (PF_DROP);
6961			}
6962
6963			key.af = pd2.af;
6964			key.proto = IPPROTO_ICMP;
6965			if (direction == PF_IN)	{
6966				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
6967				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
6968				key.ext.xport.port = 0;
6969				key.gwy.xport.port = iih.icmp_id;
6970			} else {
6971				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
6972				PF_ACPY(&key.ext.addr, pd2.src, key.af);
6973				key.lan.xport.port = iih.icmp_id;
6974				key.ext.xport.port = 0;
6975			}
6976
6977			STATE_LOOKUP();
6978
6979			if (STATE_TRANSLATE((*state)->state_key)) {
6980				if (direction == PF_IN) {
6981					pf_change_icmp(pd2.src, &iih.icmp_id,
6982					    daddr, &(*state)->state_key->lan.addr,
6983					    (*state)->state_key->lan.xport.port, NULL,
6984					    pd2.ip_sum, icmpsum,
6985					    pd->ip_sum, 0, AF_INET);
6986				} else {
6987					pf_change_icmp(pd2.dst, &iih.icmp_id,
6988					    saddr, &(*state)->state_key->gwy.addr,
6989					    (*state)->state_key->gwy.xport.port, NULL,
6990					    pd2.ip_sum, icmpsum,
6991					    pd->ip_sum, 0, AF_INET);
6992				}
6993				m = pf_lazy_makewritable(pd, m, off2 + ICMP_MINLEN);
6994				if (!m)
6995					return (PF_DROP);
6996				m_copyback(m, off, ICMP_MINLEN, pd->hdr.icmp);
6997				m_copyback(m, ipoff2, sizeof (h2), &h2);
6998				m_copyback(m, off2, ICMP_MINLEN, &iih);
6999			}
7000
7001			return (PF_PASS);
7002			break;
7003		}
7004#endif /* INET */
7005#if INET6
7006		case IPPROTO_ICMPV6: {
7007			struct icmp6_hdr	iih;
7008
7009			if (!pf_pull_hdr(m, off2, &iih,
7010			    sizeof (struct icmp6_hdr), NULL, reason, pd2.af)) {
7011				DPFPRINTF(PF_DEBUG_MISC,
7012				    ("pf: ICMP error message too short "
7013				    "(icmp6)\n"));
7014				return (PF_DROP);
7015			}
7016
7017			key.af = pd2.af;
7018			key.proto = IPPROTO_ICMPV6;
7019			if (direction == PF_IN)	{
7020				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
7021				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
7022				key.ext.xport.port = 0;
7023				key.gwy.xport.port = iih.icmp6_id;
7024			} else {
7025				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
7026				PF_ACPY(&key.ext.addr, pd2.src, key.af);
7027				key.lan.xport.port = iih.icmp6_id;
7028				key.ext.xport.port = 0;
7029			}
7030
7031			STATE_LOOKUP();
7032
7033			if (STATE_TRANSLATE((*state)->state_key)) {
7034				if (direction == PF_IN) {
7035					pf_change_icmp(pd2.src, &iih.icmp6_id,
7036					    daddr, &(*state)->state_key->lan.addr,
7037					    (*state)->state_key->lan.xport.port, NULL,
7038					    pd2.ip_sum, icmpsum,
7039					    pd->ip_sum, 0, AF_INET6);
7040				} else {
7041					pf_change_icmp(pd2.dst, &iih.icmp6_id,
7042					    saddr, &(*state)->state_key->gwy.addr,
7043					    (*state)->state_key->gwy.xport.port, NULL,
7044					    pd2.ip_sum, icmpsum,
7045					    pd->ip_sum, 0, AF_INET6);
7046				}
7047				m = pf_lazy_makewritable(pd, m, off2 +
7048				    sizeof (struct icmp6_hdr));
7049				if (!m)
7050					return (PF_DROP);
7051				m_copyback(m, off, sizeof (struct icmp6_hdr),
7052				    pd->hdr.icmp6);
7053				m_copyback(m, ipoff2, sizeof (h2_6), &h2_6);
7054				m_copyback(m, off2, sizeof (struct icmp6_hdr),
7055				    &iih);
7056			}
7057
7058			return (PF_PASS);
7059			break;
7060		}
7061#endif /* INET6 */
7062		default: {
7063			key.af = pd2.af;
7064			key.proto = pd2.proto;
7065			if (direction == PF_IN)	{
7066				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
7067				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
7068				key.ext.xport.port = 0;
7069				key.gwy.xport.port = 0;
7070			} else {
7071				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
7072				PF_ACPY(&key.ext.addr, pd2.src, key.af);
7073				key.lan.xport.port = 0;
7074				key.ext.xport.port = 0;
7075			}
7076
7077			STATE_LOOKUP();
7078
7079			if (STATE_TRANSLATE((*state)->state_key)) {
7080				if (direction == PF_IN) {
7081					pf_change_icmp(pd2.src, NULL,
7082					    daddr, &(*state)->state_key->lan.addr,
7083					    0, NULL,
7084					    pd2.ip_sum, icmpsum,
7085					    pd->ip_sum, 0, pd2.af);
7086				} else {
7087					pf_change_icmp(pd2.dst, NULL,
7088					    saddr, &(*state)->state_key->gwy.addr,
7089					    0, NULL,
7090					    pd2.ip_sum, icmpsum,
7091					    pd->ip_sum, 0, pd2.af);
7092				}
7093				switch (pd2.af) {
7094#if INET
7095				case AF_INET:
7096					m = pf_lazy_makewritable(pd, m,
7097					    ipoff2 + sizeof (h2));
7098					if (!m)
7099						return (PF_DROP);
7100#endif /* INET */
7101#if INET6
7102				case AF_INET6:
7103					m = pf_lazy_makewritable(pd, m,
7104					    ipoff2 + sizeof (h2_6));
7105					if (!m)
7106						return (PF_DROP);
7107					m_copyback(m, off,
7108					    sizeof (struct icmp6_hdr),
7109					    pd->hdr.icmp6);
7110					m_copyback(m, ipoff2, sizeof (h2_6),
7111					    &h2_6);
7112					break;
7113#endif /* INET6 */
7114				}
7115			}
7116
7117			return (PF_PASS);
7118			break;
7119		}
7120		}
7121	}
7122}
7123
7124static int
7125pf_test_state_grev1(struct pf_state **state, int direction,
7126    struct pfi_kif *kif, int off, struct pf_pdesc *pd)
7127{
7128	struct pf_state_peer *src;
7129	struct pf_state_peer *dst;
7130	struct pf_state_key_cmp key;
7131	struct pf_grev1_hdr *grev1 = pd->hdr.grev1;
7132	struct mbuf *m;
7133
7134	key.app_state = 0;
7135	key.af = pd->af;
7136	key.proto = IPPROTO_GRE;
7137	key.proto_variant = PF_GRE_PPTP_VARIANT;
7138	if (direction == PF_IN)	{
7139		PF_ACPY(&key.ext.addr, pd->src, key.af);
7140		PF_ACPY(&key.gwy.addr, pd->dst, key.af);
7141		key.gwy.xport.call_id = grev1->call_id;
7142	} else {
7143		PF_ACPY(&key.lan.addr, pd->src, key.af);
7144		PF_ACPY(&key.ext.addr, pd->dst, key.af);
7145		key.ext.xport.call_id = grev1->call_id;
7146	}
7147
7148	STATE_LOOKUP();
7149
7150	if (direction == (*state)->state_key->direction) {
7151		src = &(*state)->src;
7152		dst = &(*state)->dst;
7153	} else {
7154		src = &(*state)->dst;
7155		dst = &(*state)->src;
7156	}
7157
7158	/* update states */
7159	if (src->state < PFGRE1S_INITIATING)
7160		src->state = PFGRE1S_INITIATING;
7161
7162	/* update expire time */
7163	(*state)->expire = pf_time_second();
7164	if (src->state >= PFGRE1S_INITIATING &&
7165	    dst->state >= PFGRE1S_INITIATING) {
7166		if ((*state)->timeout != PFTM_TCP_ESTABLISHED)
7167			(*state)->timeout = PFTM_GREv1_ESTABLISHED;
7168		src->state = PFGRE1S_ESTABLISHED;
7169		dst->state = PFGRE1S_ESTABLISHED;
7170	} else {
7171		(*state)->timeout = PFTM_GREv1_INITIATING;
7172	}
7173
7174	if ((*state)->state_key->app_state)
7175		(*state)->state_key->app_state->u.grev1.pptp_state->expire =
7176		    pf_time_second();
7177
7178	/* translate source/destination address, if necessary */
7179	if (STATE_GRE_TRANSLATE((*state)->state_key)) {
7180		if (direction == PF_OUT) {
7181			switch (pd->af) {
7182#if INET
7183			case AF_INET:
7184				pf_change_a(&pd->src->v4.s_addr,
7185				    pd->ip_sum,
7186				    (*state)->state_key->gwy.addr.v4.s_addr, 0);
7187				break;
7188#endif /* INET */
7189#if INET6
7190			case AF_INET6:
7191				PF_ACPY(pd->src, &(*state)->state_key->gwy.addr,
7192				    pd->af);
7193				break;
7194#endif /* INET6 */
7195			}
7196		} else {
7197			grev1->call_id = (*state)->state_key->lan.xport.call_id;
7198
7199			switch (pd->af) {
7200#if INET
7201			case AF_INET:
7202				pf_change_a(&pd->dst->v4.s_addr,
7203				    pd->ip_sum,
7204				    (*state)->state_key->lan.addr.v4.s_addr, 0);
7205				break;
7206#endif /* INET */
7207#if INET6
7208			case AF_INET6:
7209				PF_ACPY(pd->dst, &(*state)->state_key->lan.addr,
7210				    pd->af);
7211				break;
7212#endif /* INET6 */
7213			}
7214		}
7215
7216		m = pf_lazy_makewritable(pd, pd->mp, off + sizeof (*grev1));
7217		if (!m)
7218			return (PF_DROP);
7219		m_copyback(m, off, sizeof (*grev1), grev1);
7220	}
7221
7222	return (PF_PASS);
7223}
7224
7225static int
7226pf_test_state_esp(struct pf_state **state, int direction, struct pfi_kif *kif,
7227    int off, struct pf_pdesc *pd)
7228{
7229#pragma unused(off)
7230	struct pf_state_peer *src;
7231	struct pf_state_peer *dst;
7232	struct pf_state_key_cmp key;
7233	struct pf_esp_hdr *esp = pd->hdr.esp;
7234	int action;
7235
7236	memset(&key, 0, sizeof (key));
7237	key.af = pd->af;
7238	key.proto = IPPROTO_ESP;
7239	if (direction == PF_IN)	{
7240		PF_ACPY(&key.ext.addr, pd->src, key.af);
7241		PF_ACPY(&key.gwy.addr, pd->dst, key.af);
7242		key.gwy.xport.spi = esp->spi;
7243	} else {
7244		PF_ACPY(&key.lan.addr, pd->src, key.af);
7245		PF_ACPY(&key.ext.addr, pd->dst, key.af);
7246		key.ext.xport.spi = esp->spi;
7247	}
7248
7249	*state = pf_find_state(kif, &key, direction);
7250
7251	if (*state == 0) {
7252		struct pf_state *s;
7253
7254		/*
7255		 * <jhw@apple.com>
7256		 * No matching state.  Look for a blocking state.  If we find
7257		 * one, then use that state and move it so that it's keyed to
7258		 * the SPI in the current packet.
7259		 */
7260		if (direction == PF_IN) {
7261			key.gwy.xport.spi = 0;
7262
7263			s = pf_find_state(kif, &key, direction);
7264			if (s) {
7265				struct pf_state_key *sk = s->state_key;
7266
7267				RB_REMOVE(pf_state_tree_ext_gwy,
7268				    &pf_statetbl_ext_gwy, sk);
7269				sk->lan.xport.spi = sk->gwy.xport.spi =
7270				    esp->spi;
7271
7272				if (RB_INSERT(pf_state_tree_ext_gwy,
7273				    &pf_statetbl_ext_gwy, sk))
7274					pf_detach_state(s, PF_DT_SKIP_EXTGWY);
7275				else
7276					*state = s;
7277			}
7278		} else {
7279			key.ext.xport.spi = 0;
7280
7281			s = pf_find_state(kif, &key, direction);
7282			if (s) {
7283				struct pf_state_key *sk = s->state_key;
7284
7285				RB_REMOVE(pf_state_tree_lan_ext,
7286				    &pf_statetbl_lan_ext, sk);
7287				sk->ext.xport.spi = esp->spi;
7288
7289				if (RB_INSERT(pf_state_tree_lan_ext,
7290				    &pf_statetbl_lan_ext, sk))
7291					pf_detach_state(s, PF_DT_SKIP_LANEXT);
7292				else
7293					*state = s;
7294			}
7295		}
7296
7297		if (s) {
7298			if (*state == 0) {
7299#if NPFSYNC
7300				if (s->creatorid == pf_status.hostid)
7301					pfsync_delete_state(s);
7302#endif
7303				s->timeout = PFTM_UNLINKED;
7304				hook_runloop(&s->unlink_hooks,
7305				    HOOK_REMOVE|HOOK_FREE);
7306				pf_src_tree_remove_state(s);
7307				pf_free_state(s);
7308				return (PF_DROP);
7309			}
7310		}
7311	}
7312
7313	if (*state != NULL && pd != NULL &&
7314		pd->flowhash == 0) {
7315		pd->flowhash = (*state)->state_key->flowhash;
7316	}
7317
7318	if (pf_state_lookup_aux(state, kif, direction, &action))
7319		return (action);
7320
7321	if (direction == (*state)->state_key->direction) {
7322		src = &(*state)->src;
7323		dst = &(*state)->dst;
7324	} else {
7325		src = &(*state)->dst;
7326		dst = &(*state)->src;
7327	}
7328
7329	/* update states */
7330	if (src->state < PFESPS_INITIATING)
7331		src->state = PFESPS_INITIATING;
7332
7333	/* update expire time */
7334	(*state)->expire = pf_time_second();
7335	if (src->state >= PFESPS_INITIATING &&
7336	    dst->state >= PFESPS_INITIATING) {
7337		(*state)->timeout = PFTM_ESP_ESTABLISHED;
7338		src->state = PFESPS_ESTABLISHED;
7339		dst->state = PFESPS_ESTABLISHED;
7340	} else {
7341		(*state)->timeout = PFTM_ESP_INITIATING;
7342	}
7343	/* translate source/destination address, if necessary */
7344	if (STATE_ADDR_TRANSLATE((*state)->state_key)) {
7345		if (direction == PF_OUT) {
7346			switch (pd->af) {
7347#if INET
7348			case AF_INET:
7349				pf_change_a(&pd->src->v4.s_addr,
7350				    pd->ip_sum,
7351				    (*state)->state_key->gwy.addr.v4.s_addr, 0);
7352				break;
7353#endif /* INET */
7354#if INET6
7355			case AF_INET6:
7356				PF_ACPY(pd->src, &(*state)->state_key->gwy.addr,
7357				    pd->af);
7358				break;
7359#endif /* INET6 */
7360			}
7361		} else {
7362			switch (pd->af) {
7363#if INET
7364			case AF_INET:
7365				pf_change_a(&pd->dst->v4.s_addr,
7366				    pd->ip_sum,
7367				    (*state)->state_key->lan.addr.v4.s_addr, 0);
7368				break;
7369#endif /* INET */
7370#if INET6
7371			case AF_INET6:
7372				PF_ACPY(pd->dst, &(*state)->state_key->lan.addr,
7373				    pd->af);
7374				break;
7375#endif /* INET6 */
7376			}
7377		}
7378	}
7379
7380	return (PF_PASS);
7381}
7382
7383static int
7384pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif,
7385    struct pf_pdesc *pd)
7386{
7387	struct pf_state_peer	*src, *dst;
7388	struct pf_state_key_cmp	 key;
7389
7390	key.app_state = 0;
7391	key.af = pd->af;
7392	key.proto = pd->proto;
7393	if (direction == PF_IN)	{
7394		PF_ACPY(&key.ext.addr, pd->src, key.af);
7395		PF_ACPY(&key.gwy.addr, pd->dst, key.af);
7396		key.ext.xport.port = 0;
7397		key.gwy.xport.port = 0;
7398	} else {
7399		PF_ACPY(&key.lan.addr, pd->src, key.af);
7400		PF_ACPY(&key.ext.addr, pd->dst, key.af);
7401		key.lan.xport.port = 0;
7402		key.ext.xport.port = 0;
7403	}
7404
7405	STATE_LOOKUP();
7406
7407	if (direction == (*state)->state_key->direction) {
7408		src = &(*state)->src;
7409		dst = &(*state)->dst;
7410	} else {
7411		src = &(*state)->dst;
7412		dst = &(*state)->src;
7413	}
7414
7415	/* update states */
7416	if (src->state < PFOTHERS_SINGLE)
7417		src->state = PFOTHERS_SINGLE;
7418	if (dst->state == PFOTHERS_SINGLE)
7419		dst->state = PFOTHERS_MULTIPLE;
7420
7421	/* update expire time */
7422	(*state)->expire = pf_time_second();
7423	if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE)
7424		(*state)->timeout = PFTM_OTHER_MULTIPLE;
7425	else
7426		(*state)->timeout = PFTM_OTHER_SINGLE;
7427
7428	/* translate source/destination address, if necessary */
7429	if (STATE_ADDR_TRANSLATE((*state)->state_key)) {
7430		if (direction == PF_OUT) {
7431			switch (pd->af) {
7432#if INET
7433			case AF_INET:
7434				pf_change_a(&pd->src->v4.s_addr,
7435				    pd->ip_sum,
7436				    (*state)->state_key->gwy.addr.v4.s_addr,
7437				    0);
7438				break;
7439#endif /* INET */
7440#if INET6
7441			case AF_INET6:
7442				PF_ACPY(pd->src,
7443				    &(*state)->state_key->gwy.addr, pd->af);
7444				break;
7445#endif /* INET6 */
7446			}
7447		} else {
7448			switch (pd->af) {
7449#if INET
7450			case AF_INET:
7451				pf_change_a(&pd->dst->v4.s_addr,
7452				    pd->ip_sum,
7453				    (*state)->state_key->lan.addr.v4.s_addr,
7454				    0);
7455				break;
7456#endif /* INET */
7457#if INET6
7458			case AF_INET6:
7459				PF_ACPY(pd->dst,
7460				    &(*state)->state_key->lan.addr, pd->af);
7461				break;
7462#endif /* INET6 */
7463			}
7464		}
7465	}
7466
7467	return (PF_PASS);
7468}
7469
7470/*
7471 * ipoff and off are measured from the start of the mbuf chain.
7472 * h must be at "ipoff" on the mbuf chain.
7473 */
7474void *
7475pf_pull_hdr(struct mbuf *m, int off, void *p, int len,
7476    u_short *actionp, u_short *reasonp, sa_family_t af)
7477{
7478	switch (af) {
7479#if INET
7480	case AF_INET: {
7481		struct ip	*h = mtod(m, struct ip *);
7482		u_int16_t	 fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
7483
7484		if (fragoff) {
7485			if (fragoff >= len) {
7486				ACTION_SET(actionp, PF_PASS);
7487			} else {
7488				ACTION_SET(actionp, PF_DROP);
7489				REASON_SET(reasonp, PFRES_FRAG);
7490			}
7491			return (NULL);
7492		}
7493		if (m->m_pkthdr.len < off + len ||
7494		    ntohs(h->ip_len) < off + len) {
7495			ACTION_SET(actionp, PF_DROP);
7496			REASON_SET(reasonp, PFRES_SHORT);
7497			return (NULL);
7498		}
7499		break;
7500	}
7501#endif /* INET */
7502#if INET6
7503	case AF_INET6: {
7504		struct ip6_hdr	*h = mtod(m, struct ip6_hdr *);
7505
7506		if (m->m_pkthdr.len < off + len ||
7507		    (ntohs(h->ip6_plen) + sizeof (struct ip6_hdr)) <
7508		    (unsigned)(off + len)) {
7509			ACTION_SET(actionp, PF_DROP);
7510			REASON_SET(reasonp, PFRES_SHORT);
7511			return (NULL);
7512		}
7513		break;
7514	}
7515#endif /* INET6 */
7516	}
7517	m_copydata(m, off, len, p);
7518	return (p);
7519}
7520
7521int
7522pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif)
7523{
7524#pragma unused(kif)
7525	struct sockaddr_in	*dst;
7526	int			 ret = 1;
7527#if INET6
7528	struct sockaddr_in6	*dst6;
7529	struct route_in6	 ro;
7530#else
7531	struct route		 ro;
7532#endif
7533
7534	bzero(&ro, sizeof (ro));
7535	switch (af) {
7536	case AF_INET:
7537		dst = satosin(&ro.ro_dst);
7538		dst->sin_family = AF_INET;
7539		dst->sin_len = sizeof (*dst);
7540		dst->sin_addr = addr->v4;
7541		break;
7542#if INET6
7543	case AF_INET6:
7544		dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
7545		dst6->sin6_family = AF_INET6;
7546		dst6->sin6_len = sizeof (*dst6);
7547		dst6->sin6_addr = addr->v6;
7548		break;
7549#endif /* INET6 */
7550	default:
7551		return (0);
7552	}
7553
7554	/* XXX: IFT_ENC is not currently used by anything*/
7555	/* Skip checks for ipsec interfaces */
7556	if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC)
7557		goto out;
7558
7559	rtalloc((struct route *)&ro);
7560
7561out:
7562	if (ro.ro_rt != NULL)
7563		RTFREE(ro.ro_rt);
7564	return (ret);
7565}
7566
7567int
7568pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw)
7569{
7570#pragma unused(aw)
7571	struct sockaddr_in	*dst;
7572#if INET6
7573	struct sockaddr_in6	*dst6;
7574	struct route_in6	 ro;
7575#else
7576	struct route		 ro;
7577#endif
7578	int			 ret = 0;
7579
7580	bzero(&ro, sizeof (ro));
7581	switch (af) {
7582	case AF_INET:
7583		dst = satosin(&ro.ro_dst);
7584		dst->sin_family = AF_INET;
7585		dst->sin_len = sizeof (*dst);
7586		dst->sin_addr = addr->v4;
7587		break;
7588#if INET6
7589	case AF_INET6:
7590		dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
7591		dst6->sin6_family = AF_INET6;
7592		dst6->sin6_len = sizeof (*dst6);
7593		dst6->sin6_addr = addr->v6;
7594		break;
7595#endif /* INET6 */
7596	default:
7597		return (0);
7598	}
7599
7600	rtalloc((struct route *)&ro);
7601
7602	if (ro.ro_rt != NULL) {
7603		RTFREE(ro.ro_rt);
7604	}
7605
7606	return (ret);
7607}
7608
7609#if INET
7610static void
7611pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
7612    struct pf_state *s, struct pf_pdesc *pd)
7613{
7614#pragma unused(pd)
7615	struct mbuf		*m0, *m1;
7616	struct route		 iproute;
7617	struct route		*ro = NULL;
7618	struct sockaddr_in	*dst;
7619	struct ip		*ip;
7620	struct ifnet		*ifp = NULL;
7621	struct pf_addr		 naddr;
7622	struct pf_src_node	*sn = NULL;
7623	int			 error = 0;
7624	int			 sw_csum = 0;
7625
7626	if (m == NULL || *m == NULL || r == NULL ||
7627	    (dir != PF_IN && dir != PF_OUT) || oifp == NULL)
7628		panic("pf_route: invalid parameters");
7629
7630	if (pd->pf_mtag->pftag_routed++ > 3) {
7631		m0 = *m;
7632		*m = NULL;
7633		goto bad;
7634	}
7635
7636	if (r->rt == PF_DUPTO) {
7637		if ((m0 = m_copym(*m, 0, M_COPYALL, M_NOWAIT)) == NULL)
7638			return;
7639	} else {
7640		if ((r->rt == PF_REPLYTO) == (r->direction == dir))
7641			return;
7642		m0 = *m;
7643	}
7644
7645	if (m0->m_len < (int)sizeof (struct ip)) {
7646		DPFPRINTF(PF_DEBUG_URGENT,
7647		    ("pf_route: m0->m_len < sizeof (struct ip)\n"));
7648		goto bad;
7649	}
7650
7651	ip = mtod(m0, struct ip *);
7652
7653	ro = &iproute;
7654	bzero((caddr_t)ro, sizeof (*ro));
7655	dst = satosin((void *)&ro->ro_dst);
7656	dst->sin_family = AF_INET;
7657	dst->sin_len = sizeof (*dst);
7658	dst->sin_addr = ip->ip_dst;
7659
7660	if (r->rt == PF_FASTROUTE) {
7661		rtalloc(ro);
7662		if (ro->ro_rt == 0) {
7663			ipstat.ips_noroute++;
7664			goto bad;
7665		}
7666
7667		ifp = ro->ro_rt->rt_ifp;
7668		RT_LOCK(ro->ro_rt);
7669		ro->ro_rt->rt_use++;
7670
7671		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
7672			dst = satosin((void *)ro->ro_rt->rt_gateway);
7673		RT_UNLOCK(ro->ro_rt);
7674	} else {
7675		if (TAILQ_EMPTY(&r->rpool.list)) {
7676			DPFPRINTF(PF_DEBUG_URGENT,
7677			    ("pf_route: TAILQ_EMPTY(&r->rpool.list)\n"));
7678			goto bad;
7679		}
7680		if (s == NULL) {
7681			pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src,
7682			    &naddr, NULL, &sn);
7683			if (!PF_AZERO(&naddr, AF_INET))
7684				dst->sin_addr.s_addr = naddr.v4.s_addr;
7685			ifp = r->rpool.cur->kif ?
7686			    r->rpool.cur->kif->pfik_ifp : NULL;
7687		} else {
7688			if (!PF_AZERO(&s->rt_addr, AF_INET))
7689				dst->sin_addr.s_addr =
7690				    s->rt_addr.v4.s_addr;
7691			ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
7692		}
7693	}
7694	if (ifp == NULL)
7695		goto bad;
7696
7697	if (oifp != ifp) {
7698		if (pf_test(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS)
7699			goto bad;
7700		else if (m0 == NULL)
7701			goto done;
7702		if (m0->m_len < (int)sizeof (struct ip)) {
7703			DPFPRINTF(PF_DEBUG_URGENT,
7704			    ("pf_route: m0->m_len < sizeof (struct ip)\n"));
7705			goto bad;
7706		}
7707		ip = mtod(m0, struct ip *);
7708	}
7709
7710	/* Copied from ip_output. */
7711
7712	/* Catch routing changes wrt. hardware checksumming for TCP or UDP. */
7713	m0->m_pkthdr.csum_flags |= CSUM_IP;
7714	sw_csum = m0->m_pkthdr.csum_flags &
7715	    ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
7716
7717	if (ifp->if_hwassist & CSUM_TCP_SUM16) {
7718		/*
7719		 * Special case code for GMACE
7720		 * frames that can be checksumed by GMACE SUM16 HW:
7721		 * frame >64, no fragments, no UDP
7722		 */
7723		if (apple_hwcksum_tx && (m0->m_pkthdr.csum_flags & CSUM_TCP) &&
7724		    (ntohs(ip->ip_len) > 50) &&
7725		    (ntohs(ip->ip_len) <= ifp->if_mtu)) {
7726			/*
7727			 * Apple GMAC HW, expects:
7728			 *	STUFF_OFFSET << 16 | START_OFFSET
7729			 */
7730			/* IP+Enet header length */
7731			u_short offset = ((ip->ip_hl) << 2) + 14;
7732			u_short csumprev = m0->m_pkthdr.csum_data & 0xffff;
7733			m0->m_pkthdr.csum_flags = CSUM_DATA_VALID |
7734			    CSUM_TCP_SUM16; /* for GMAC */
7735			m0->m_pkthdr.csum_data = (csumprev + offset)  << 16 ;
7736			m0->m_pkthdr.csum_data += offset;
7737			/* do IP hdr chksum in software */
7738			sw_csum = CSUM_DELAY_IP;
7739		} else {
7740			/* let the software handle any UDP or TCP checksums */
7741			sw_csum |= (CSUM_DELAY_DATA & m0->m_pkthdr.csum_flags);
7742		}
7743	} else if (apple_hwcksum_tx == 0) {
7744		sw_csum |= (CSUM_DELAY_DATA | CSUM_DELAY_IP) &
7745		    m0->m_pkthdr.csum_flags;
7746	}
7747
7748	if (sw_csum & CSUM_DELAY_DATA) {
7749		in_delayed_cksum(m0);
7750		sw_csum &= ~CSUM_DELAY_DATA;
7751		m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
7752	}
7753
7754	if (apple_hwcksum_tx != 0) {
7755		m0->m_pkthdr.csum_flags &=
7756		    IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
7757	} else {
7758		m0->m_pkthdr.csum_flags = 0;
7759	}
7760
7761	if (ntohs(ip->ip_len) <= ifp->if_mtu ||
7762	    (ifp->if_hwassist & CSUM_FRAGMENT)) {
7763		ip->ip_sum = 0;
7764		if (sw_csum & CSUM_DELAY_IP)
7765			ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
7766		error = ifnet_output(ifp, PF_INET, m0, ro->ro_rt, sintosa(dst));
7767		goto done;
7768	}
7769
7770	/*
7771	 * Too large for interface; fragment if possible.
7772	 * Must be able to put at least 8 bytes per fragment.
7773	 */
7774	if (ip->ip_off & htons(IP_DF)) {
7775		ipstat.ips_cantfrag++;
7776		if (r->rt != PF_DUPTO) {
7777			icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
7778			    ifp->if_mtu);
7779			goto done;
7780		} else
7781			goto bad;
7782	}
7783
7784	m1 = m0;
7785
7786	/* PR-8933605: send ip_len,ip_off to ip_fragment in host byte order */
7787#if BYTE_ORDER != BIG_ENDIAN
7788	NTOHS(ip->ip_off);
7789	NTOHS(ip->ip_len);
7790#endif
7791	error = ip_fragment(m0, ifp, ifp->if_mtu, sw_csum);
7792
7793	if (error) {
7794		m0 = NULL;
7795		goto bad;
7796	}
7797
7798	for (m0 = m1; m0; m0 = m1) {
7799		m1 = m0->m_nextpkt;
7800		m0->m_nextpkt = 0;
7801		if (error == 0)
7802			error = ifnet_output(ifp, PF_INET, m0, ro->ro_rt,
7803			    sintosa(dst));
7804		else
7805			m_freem(m0);
7806	}
7807
7808	if (error == 0)
7809		ipstat.ips_fragmented++;
7810
7811done:
7812	if (r->rt != PF_DUPTO)
7813		*m = NULL;
7814	if (ro == &iproute && ro->ro_rt)
7815		RTFREE(ro->ro_rt);
7816	return;
7817
7818bad:
7819	m_freem(m0);
7820	goto done;
7821}
7822#endif /* INET */
7823
7824#if INET6
7825static void
7826pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
7827    struct pf_state *s, struct pf_pdesc *pd)
7828{
7829#pragma unused(pd)
7830	struct mbuf		*m0;
7831	struct route_in6	 ip6route;
7832	struct route_in6	*ro;
7833	struct sockaddr_in6	*dst;
7834	struct ip6_hdr		*ip6;
7835	struct ifnet		*ifp = NULL;
7836	struct pf_addr		 naddr;
7837	struct pf_src_node	*sn = NULL;
7838	int			 error = 0;
7839
7840	if (m == NULL || *m == NULL || r == NULL ||
7841	    (dir != PF_IN && dir != PF_OUT) || oifp == NULL)
7842		panic("pf_route6: invalid parameters");
7843
7844	if (pd->pf_mtag->pftag_routed++ > 3) {
7845		m0 = *m;
7846		*m = NULL;
7847		goto bad;
7848	}
7849
7850	if (r->rt == PF_DUPTO) {
7851		if ((m0 = m_copym(*m, 0, M_COPYALL, M_NOWAIT)) == NULL)
7852			return;
7853	} else {
7854		if ((r->rt == PF_REPLYTO) == (r->direction == dir))
7855			return;
7856		m0 = *m;
7857	}
7858
7859	if (m0->m_len < (int)sizeof (struct ip6_hdr)) {
7860		DPFPRINTF(PF_DEBUG_URGENT,
7861		    ("pf_route6: m0->m_len < sizeof (struct ip6_hdr)\n"));
7862		goto bad;
7863	}
7864	ip6 = mtod(m0, struct ip6_hdr *);
7865
7866	ro = &ip6route;
7867	bzero((caddr_t)ro, sizeof (*ro));
7868	dst = (struct sockaddr_in6 *)&ro->ro_dst;
7869	dst->sin6_family = AF_INET6;
7870	dst->sin6_len = sizeof (*dst);
7871	dst->sin6_addr = ip6->ip6_dst;
7872
7873	/* Cheat. XXX why only in the v6 case??? */
7874	if (r->rt == PF_FASTROUTE) {
7875		struct pf_mtag *pf_mtag;
7876
7877		if ((pf_mtag = pf_get_mtag(m0)) == NULL)
7878			goto bad;
7879		pf_mtag->pftag_flags |= PF_TAG_GENERATED;
7880		ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
7881		return;
7882	}
7883
7884	if (TAILQ_EMPTY(&r->rpool.list)) {
7885		DPFPRINTF(PF_DEBUG_URGENT,
7886		    ("pf_route6: TAILQ_EMPTY(&r->rpool.list)\n"));
7887		goto bad;
7888	}
7889	if (s == NULL) {
7890		pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src,
7891		    &naddr, NULL, &sn);
7892		if (!PF_AZERO(&naddr, AF_INET6))
7893			PF_ACPY((struct pf_addr *)&dst->sin6_addr,
7894			    &naddr, AF_INET6);
7895		ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL;
7896	} else {
7897		if (!PF_AZERO(&s->rt_addr, AF_INET6))
7898			PF_ACPY((struct pf_addr *)&dst->sin6_addr,
7899			    &s->rt_addr, AF_INET6);
7900		ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
7901	}
7902	if (ifp == NULL)
7903		goto bad;
7904
7905	if (oifp != ifp) {
7906		if (pf_test6(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS)
7907			goto bad;
7908		else if (m0 == NULL)
7909			goto done;
7910		if (m0->m_len < (int)sizeof (struct ip6_hdr)) {
7911			DPFPRINTF(PF_DEBUG_URGENT, ("pf_route6: m0->m_len "
7912			    "< sizeof (struct ip6_hdr)\n"));
7913			goto bad;
7914		}
7915		ip6 = mtod(m0, struct ip6_hdr *);
7916	}
7917
7918	/*
7919	 * If the packet is too large for the outgoing interface,
7920	 * send back an icmp6 error.
7921	 */
7922	if (IN6_IS_SCOPE_EMBED(&dst->sin6_addr))
7923		dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index);
7924	if ((unsigned)m0->m_pkthdr.len <= ifp->if_mtu) {
7925		error = nd6_output(ifp, ifp, m0, dst, NULL, NULL);
7926	} else {
7927		in6_ifstat_inc(ifp, ifs6_in_toobig);
7928		if (r->rt != PF_DUPTO)
7929			icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
7930		else
7931			goto bad;
7932	}
7933
7934done:
7935	if (r->rt != PF_DUPTO)
7936		*m = NULL;
7937	return;
7938
7939bad:
7940	m_freem(m0);
7941	goto done;
7942}
7943#endif /* INET6 */
7944
7945
7946/*
7947 * check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag
7948 *   off is the offset where the protocol header starts
7949 *   len is the total length of protocol header plus payload
7950 * returns 0 when the checksum is valid, otherwise returns 1.
7951 */
7952static int
7953pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p,
7954    sa_family_t af)
7955{
7956	u_int16_t sum;
7957
7958	switch (p) {
7959	case IPPROTO_TCP:
7960	case IPPROTO_UDP:
7961		/*
7962		 * Optimize for the common case; if the hardware calculated
7963		 * value doesn't include pseudo-header checksum, or if it
7964		 * is partially-computed (only 16-bit summation), do it in
7965		 * software below.
7966		 */
7967		if (apple_hwcksum_rx && (m->m_pkthdr.csum_flags &
7968		    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) &&
7969		    (m->m_pkthdr.csum_data ^ 0xffff) == 0) {
7970			return (0);
7971		}
7972		break;
7973	case IPPROTO_ICMP:
7974#if INET6
7975	case IPPROTO_ICMPV6:
7976#endif /* INET6 */
7977		break;
7978	default:
7979		return (1);
7980	}
7981	if (off < (int)sizeof (struct ip) || len < (int)sizeof (struct udphdr))
7982		return (1);
7983	if (m->m_pkthdr.len < off + len)
7984		return (1);
7985	switch (af) {
7986#if INET
7987	case AF_INET:
7988		if (p == IPPROTO_ICMP) {
7989			if (m->m_len < off)
7990				return (1);
7991			m->m_data += off;
7992			m->m_len -= off;
7993			sum = in_cksum(m, len);
7994			m->m_data -= off;
7995			m->m_len += off;
7996		} else {
7997			if (m->m_len < (int)sizeof (struct ip))
7998				return (1);
7999			sum = inet_cksum(m, p, off, len);
8000		}
8001		break;
8002#endif /* INET */
8003#if INET6
8004	case AF_INET6:
8005		if (m->m_len < (int)sizeof (struct ip6_hdr))
8006			return (1);
8007		sum = inet6_cksum(m, p, off, len);
8008		break;
8009#endif /* INET6 */
8010	default:
8011		return (1);
8012	}
8013	if (sum) {
8014		switch (p) {
8015		case IPPROTO_TCP:
8016			tcpstat.tcps_rcvbadsum++;
8017			break;
8018		case IPPROTO_UDP:
8019			udpstat.udps_badsum++;
8020			break;
8021		case IPPROTO_ICMP:
8022			icmpstat.icps_checksum++;
8023			break;
8024#if INET6
8025		case IPPROTO_ICMPV6:
8026			icmp6stat.icp6s_checksum++;
8027			break;
8028#endif /* INET6 */
8029		}
8030		return (1);
8031	}
8032	return (0);
8033}
8034
8035#if INET
8036#define PF_APPLE_UPDATE_PDESC_IPv4()				\
8037	do {							\
8038		if (m && pd.mp && m != pd.mp) {			\
8039			m = pd.mp;				\
8040			h = mtod(m, struct ip *);		\
8041			pd.pf_mtag = pf_get_mtag(m);		\
8042		}						\
8043	} while (0)
8044
8045int
8046pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
8047    struct ether_header *eh, struct ip_fw_args *fwa)
8048{
8049#if !DUMMYNET
8050#pragma unused(fwa)
8051#endif
8052	struct pfi_kif		*kif;
8053	u_short			 action = PF_PASS, reason = 0, log = 0;
8054	struct mbuf		*m = *m0;
8055	struct ip		*h = 0;
8056	struct pf_rule		*a = NULL, *r = &pf_default_rule, *tr, *nr;
8057	struct pf_state		*s = NULL;
8058	struct pf_state_key	*sk = NULL;
8059	struct pf_ruleset	*ruleset = NULL;
8060	struct pf_pdesc		 pd;
8061	int			 off, dirndx, pqid = 0;
8062
8063	lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
8064
8065	if (!pf_status.running)
8066		return (PF_PASS);
8067
8068	memset(&pd, 0, sizeof (pd));
8069
8070	if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) {
8071		DPFPRINTF(PF_DEBUG_URGENT,
8072		    ("pf_test: pf_get_mtag returned NULL\n"));
8073		return (PF_DROP);
8074	}
8075
8076	if (pd.pf_mtag->pftag_flags & PF_TAG_GENERATED)
8077		return (PF_PASS);
8078
8079	kif = (struct pfi_kif *)ifp->if_pf_kif;
8080
8081	if (kif == NULL) {
8082		DPFPRINTF(PF_DEBUG_URGENT,
8083		    ("pf_test: kif == NULL, if_name %s\n", ifp->if_name));
8084		return (PF_DROP);
8085	}
8086	if (kif->pfik_flags & PFI_IFLAG_SKIP)
8087		return (PF_PASS);
8088
8089#ifdef DIAGNOSTIC
8090	if ((m->m_flags & M_PKTHDR) == 0)
8091		panic("non-M_PKTHDR is passed to pf_test");
8092#endif /* DIAGNOSTIC */
8093
8094	/* initialize enough of pd for the done label */
8095	h = mtod(m, struct ip *);
8096	pd.mp = m;
8097	pd.lmw = 0;
8098	pd.pf_mtag = pf_get_mtag(m);
8099	pd.src = (struct pf_addr *)&h->ip_src;
8100	pd.dst = (struct pf_addr *)&h->ip_dst;
8101	PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET);
8102	pd.ip_sum = &h->ip_sum;
8103	pd.proto = h->ip_p;
8104	pd.proto_variant = 0;
8105	pd.af = AF_INET;
8106	pd.tos = h->ip_tos;
8107	pd.tot_len = ntohs(h->ip_len);
8108	pd.eh = eh;
8109
8110	if (m->m_pkthdr.len < (int)sizeof (*h)) {
8111		action = PF_DROP;
8112		REASON_SET(&reason, PFRES_SHORT);
8113		log = 1;
8114		goto done;
8115	}
8116
8117#if DUMMYNET
8118	if (fwa != NULL && fwa->fwa_pf_rule != NULL)
8119		goto nonormalize;
8120#endif /* DUMMYNET */
8121
8122	/* We do IP header normalization and packet reassembly here */
8123	action = pf_normalize_ip(m0, dir, kif, &reason, &pd);
8124	pd.mp = m = *m0;
8125	if (action != PF_PASS || pd.lmw < 0) {
8126		action = PF_DROP;
8127		goto done;
8128	}
8129
8130#if DUMMYNET
8131nonormalize:
8132#endif /* DUMMYNET */
8133	m = *m0;	/* pf_normalize messes with m0 */
8134	h = mtod(m, struct ip *);
8135
8136	off = h->ip_hl << 2;
8137	if (off < (int)sizeof (*h)) {
8138		action = PF_DROP;
8139		REASON_SET(&reason, PFRES_SHORT);
8140		log = 1;
8141		goto done;
8142	}
8143
8144	pd.src = (struct pf_addr *)&h->ip_src;
8145	pd.dst = (struct pf_addr *)&h->ip_dst;
8146	PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET);
8147	pd.ip_sum = &h->ip_sum;
8148	pd.proto = h->ip_p;
8149	pd.proto_variant = 0;
8150	pd.mp = m;
8151	pd.lmw = 0;
8152	pd.pf_mtag = pf_get_mtag(m);
8153	pd.af = AF_INET;
8154	pd.tos = h->ip_tos;
8155	pd.sc = MBUF_SCIDX(mbuf_get_service_class(m));
8156	pd.tot_len = ntohs(h->ip_len);
8157	pd.eh = eh;
8158	if (pd.pf_mtag != NULL && pd.pf_mtag->pftag_flowhash != 0) {
8159		pd.flowhash = pd.pf_mtag->pftag_flowhash;
8160		pd.flags |= (m->m_pkthdr.m_fhflags & PF_TAG_FLOWADV) ?
8161		    PFDESC_FLOW_ADV : 0;
8162	}
8163
8164	/* handle fragments that didn't get reassembled by normalization */
8165	if (h->ip_off & htons(IP_MF | IP_OFFMASK)) {
8166		pd.flags |= PFDESC_IP_FRAG;
8167#if DUMMYNET
8168		/* Traffic goes through dummynet first */
8169		action = pf_test_dummynet(&r, dir, kif, &m, &pd, fwa);
8170		if (action == PF_DROP || m == NULL) {
8171			*m0 = NULL;
8172			return (action);
8173		}
8174#endif /* DUMMYNET */
8175		action = pf_test_fragment(&r, dir, kif, m, h,
8176		    &pd, &a, &ruleset);
8177		goto done;
8178	}
8179
8180	switch (h->ip_p) {
8181
8182	case IPPROTO_TCP: {
8183		struct tcphdr	th;
8184		pd.hdr.tcp = &th;
8185		if (!pf_pull_hdr(m, off, &th, sizeof (th),
8186		    &action, &reason, AF_INET)) {
8187			log = action != PF_PASS;
8188			goto done;
8189		}
8190		pd.p_len = pd.tot_len - off - (th.th_off << 2);
8191		if ((th.th_flags & TH_ACK) && pd.p_len == 0)
8192			pqid = 1;
8193#if DUMMYNET
8194		/* Traffic goes through dummynet first */
8195		action = pf_test_dummynet(&r, dir, kif, &m, &pd, fwa);
8196		if (action == PF_DROP || m == NULL) {
8197			*m0 = NULL;
8198			return (action);
8199		}
8200#endif /* DUMMYNET */
8201		action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
8202		if (pd.lmw < 0)
8203			goto done;
8204		PF_APPLE_UPDATE_PDESC_IPv4();
8205		if (action == PF_DROP)
8206			goto done;
8207		action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
8208		    &reason);
8209		if (pd.lmw < 0)
8210			goto done;
8211		PF_APPLE_UPDATE_PDESC_IPv4();
8212		if (action == PF_PASS) {
8213#if NPFSYNC
8214			pfsync_update_state(s);
8215#endif /* NPFSYNC */
8216			r = s->rule.ptr;
8217			a = s->anchor.ptr;
8218			log = s->log;
8219		} else if (s == NULL)
8220			action = pf_test_rule(&r, &s, dir, kif,
8221			    m, off, h, &pd, &a, &ruleset, &ipintrq);
8222		break;
8223	}
8224
8225	case IPPROTO_UDP: {
8226		struct udphdr	uh;
8227
8228		pd.hdr.udp = &uh;
8229		if (!pf_pull_hdr(m, off, &uh, sizeof (uh),
8230		    &action, &reason, AF_INET)) {
8231			log = action != PF_PASS;
8232			goto done;
8233		}
8234		if (uh.uh_dport == 0 ||
8235		    ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
8236		    ntohs(uh.uh_ulen) < sizeof (struct udphdr)) {
8237			action = PF_DROP;
8238			REASON_SET(&reason, PFRES_SHORT);
8239			goto done;
8240		}
8241#if DUMMYNET
8242		/* Traffic goes through dummynet first */
8243		action = pf_test_dummynet(&r, dir, kif, &m, &pd, fwa);
8244		if (action == PF_DROP || m == NULL) {
8245			*m0 = NULL;
8246			return (action);
8247		}
8248#endif /* DUMMYNET */
8249		action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd,
8250		    &reason);
8251		if (pd.lmw < 0)
8252			goto done;
8253		PF_APPLE_UPDATE_PDESC_IPv4();
8254		if (action == PF_PASS) {
8255#if NPFSYNC
8256			pfsync_update_state(s);
8257#endif /* NPFSYNC */
8258			r = s->rule.ptr;
8259			a = s->anchor.ptr;
8260			log = s->log;
8261		} else if (s == NULL)
8262			action = pf_test_rule(&r, &s, dir, kif,
8263			    m, off, h, &pd, &a, &ruleset, &ipintrq);
8264		break;
8265	}
8266
8267	case IPPROTO_ICMP: {
8268		struct icmp	ih;
8269
8270		pd.hdr.icmp = &ih;
8271		if (!pf_pull_hdr(m, off, &ih, ICMP_MINLEN,
8272		    &action, &reason, AF_INET)) {
8273			log = action != PF_PASS;
8274			goto done;
8275		}
8276#if DUMMYNET
8277		/* Traffic goes through dummynet first */
8278		action = pf_test_dummynet(&r, dir, kif, &m, &pd, fwa);
8279		if (action == PF_DROP || m == NULL) {
8280			*m0 = NULL;
8281			return (action);
8282		}
8283#endif /* DUMMYNET */
8284		action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd,
8285		    &reason);
8286		if (pd.lmw < 0)
8287			goto done;
8288		PF_APPLE_UPDATE_PDESC_IPv4();
8289		if (action == PF_PASS) {
8290#if NPFSYNC
8291			pfsync_update_state(s);
8292#endif /* NPFSYNC */
8293			r = s->rule.ptr;
8294			a = s->anchor.ptr;
8295			log = s->log;
8296		} else if (s == NULL)
8297			action = pf_test_rule(&r, &s, dir, kif,
8298			    m, off, h, &pd, &a, &ruleset, &ipintrq);
8299		break;
8300	}
8301
8302	case IPPROTO_ESP: {
8303		struct pf_esp_hdr	esp;
8304
8305		pd.hdr.esp = &esp;
8306		if (!pf_pull_hdr(m, off, &esp, sizeof (esp), &action, &reason,
8307		    AF_INET)) {
8308			log = action != PF_PASS;
8309			goto done;
8310		}
8311#if DUMMYNET
8312		/* Traffic goes through dummynet first */
8313		action = pf_test_dummynet(&r, dir, kif, &m, &pd, fwa);
8314		if (action == PF_DROP || m == NULL) {
8315			*m0 = NULL;
8316			return (action);
8317		}
8318#endif /* DUMMYNET */
8319		action = pf_test_state_esp(&s, dir, kif, off, &pd);
8320		if (pd.lmw < 0)
8321			goto done;
8322		PF_APPLE_UPDATE_PDESC_IPv4();
8323		if (action == PF_PASS) {
8324#if NPFSYNC
8325			pfsync_update_state(s);
8326#endif /* NPFSYNC */
8327			r = s->rule.ptr;
8328			a = s->anchor.ptr;
8329			log = s->log;
8330		} else if (s == NULL)
8331			action = pf_test_rule(&r, &s, dir, kif,
8332			    m, off, h, &pd, &a, &ruleset, &ipintrq);
8333		break;
8334	}
8335
8336	case IPPROTO_GRE: {
8337		struct pf_grev1_hdr	grev1;
8338		pd.hdr.grev1 = &grev1;
8339		if (!pf_pull_hdr(m, off, &grev1, sizeof (grev1), &action,
8340		    &reason, AF_INET)) {
8341			log = (action != PF_PASS);
8342			goto done;
8343		}
8344#if DUMMYNET
8345		/* Traffic goes through dummynet first */
8346		action = pf_test_dummynet(&r, dir, kif, &m, &pd, fwa);
8347		if (action == PF_DROP || m == NULL) {
8348			*m0 = NULL;
8349			return (action);
8350		}
8351#endif /* DUMMYNET */
8352		if ((ntohs(grev1.flags) & PF_GRE_FLAG_VERSION_MASK) == 1 &&
8353		    ntohs(grev1.protocol_type) == PF_GRE_PPP_ETHERTYPE) {
8354			if (ntohs(grev1.payload_length) >
8355			    m->m_pkthdr.len - off) {
8356				action = PF_DROP;
8357				REASON_SET(&reason, PFRES_SHORT);
8358				goto done;
8359			}
8360			pd.proto_variant = PF_GRE_PPTP_VARIANT;
8361			action = pf_test_state_grev1(&s, dir, kif, off, &pd);
8362			if (pd.lmw < 0) goto done;
8363			PF_APPLE_UPDATE_PDESC_IPv4();
8364			if (action == PF_PASS) {
8365#if NPFSYNC
8366				pfsync_update_state(s);
8367#endif /* NPFSYNC */
8368				r = s->rule.ptr;
8369				a = s->anchor.ptr;
8370				log = s->log;
8371				break;
8372			} else if (s == NULL) {
8373				action = pf_test_rule(&r, &s, dir, kif, m, off,
8374				    h, &pd, &a, &ruleset, &ipintrq);
8375				if (action == PF_PASS)
8376					break;
8377			}
8378		}
8379
8380		/* not GREv1/PPTP, so treat as ordinary GRE... */
8381	}
8382
8383	default:
8384#if DUMMYNET
8385		/* Traffic goes through dummynet first */
8386		action = pf_test_dummynet(&r, dir, kif, &m, &pd, fwa);
8387		if (action == PF_DROP || m == NULL) {
8388			*m0 = NULL;
8389			return (action);
8390		}
8391#endif /* DUMMYNET */
8392		action = pf_test_state_other(&s, dir, kif, &pd);
8393		if (pd.lmw < 0)
8394			goto done;
8395		PF_APPLE_UPDATE_PDESC_IPv4();
8396		if (action == PF_PASS) {
8397#if NPFSYNC
8398			pfsync_update_state(s);
8399#endif /* NPFSYNC */
8400			r = s->rule.ptr;
8401			a = s->anchor.ptr;
8402			log = s->log;
8403		} else if (s == NULL)
8404			action = pf_test_rule(&r, &s, dir, kif, m, off, h,
8405			    &pd, &a, &ruleset, &ipintrq);
8406		break;
8407	}
8408
8409done:
8410	*m0 = pd.mp;
8411	PF_APPLE_UPDATE_PDESC_IPv4();
8412
8413	if (action == PF_PASS && h->ip_hl > 5 &&
8414	    !((s && s->allow_opts) || r->allow_opts)) {
8415		action = PF_DROP;
8416		REASON_SET(&reason, PFRES_IPOPTIONS);
8417		log = 1;
8418		DPFPRINTF(PF_DEBUG_MISC,
8419		    ("pf: dropping packet with ip options [hlen=%u]\n",
8420		    (unsigned int) h->ip_hl));
8421	}
8422
8423	if ((s && s->tag) || PF_RTABLEID_IS_VALID(r->rtableid) ||
8424	    pd.flowhash != 0)
8425		(void) pf_tag_packet(m, pd.pf_mtag, s ? s->tag : 0,
8426		    r->rtableid, &pd);
8427
8428	if (action == PF_PASS) {
8429#if PF_ALTQ
8430		if (altq_allowed && r->qid) {
8431			if (pqid || (pd.tos & IPTOS_LOWDELAY))
8432				pd.pf_mtag->pftag_qid = r->pqid;
8433			else
8434				pd.pf_mtag->pftag_qid = r->qid;
8435		}
8436#endif /* PF_ALTQ */
8437		/* add hints for ecn */
8438		pd.pf_mtag->pftag_hdr = h;
8439		/* record address family */
8440		pd.pf_mtag->pftag_flags &= ~PF_TAG_HDR_INET6;
8441		pd.pf_mtag->pftag_flags |= PF_TAG_HDR_INET;
8442		/* record TCP vs. non-TCP */
8443		if (pd.proto == IPPROTO_TCP)
8444			pd.pf_mtag->pftag_flags |= PF_TAG_TCP;
8445		else
8446			pd.pf_mtag->pftag_flags &= ~PF_TAG_TCP;
8447	}
8448
8449	/*
8450	 * connections redirected to loopback should not match sockets
8451	 * bound specifically to loopback due to security implications,
8452	 * see tcp_input() and in_pcblookup_listen().
8453	 */
8454	if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
8455	    pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
8456	    (s->nat_rule.ptr->action == PF_RDR ||
8457	    s->nat_rule.ptr->action == PF_BINAT) &&
8458	    (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
8459		pd.pf_mtag->pftag_flags |= PF_TAG_TRANSLATE_LOCALHOST;
8460
8461	if (log) {
8462		struct pf_rule *lr;
8463
8464		if (s != NULL && s->nat_rule.ptr != NULL &&
8465		    s->nat_rule.ptr->log & PF_LOG_ALL)
8466			lr = s->nat_rule.ptr;
8467		else
8468			lr = r;
8469		PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, lr, a, ruleset,
8470		    &pd);
8471	}
8472
8473	kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
8474	kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++;
8475
8476	if (action == PF_PASS || r->action == PF_DROP) {
8477		dirndx = (dir == PF_OUT);
8478		r->packets[dirndx]++;
8479		r->bytes[dirndx] += pd.tot_len;
8480		if (a != NULL) {
8481			a->packets[dirndx]++;
8482			a->bytes[dirndx] += pd.tot_len;
8483		}
8484		if (s != NULL) {
8485			sk = s->state_key;
8486			if (s->nat_rule.ptr != NULL) {
8487				s->nat_rule.ptr->packets[dirndx]++;
8488				s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
8489			}
8490			if (s->src_node != NULL) {
8491				s->src_node->packets[dirndx]++;
8492				s->src_node->bytes[dirndx] += pd.tot_len;
8493			}
8494			if (s->nat_src_node != NULL) {
8495				s->nat_src_node->packets[dirndx]++;
8496				s->nat_src_node->bytes[dirndx] += pd.tot_len;
8497			}
8498			dirndx = (dir == sk->direction) ? 0 : 1;
8499			s->packets[dirndx]++;
8500			s->bytes[dirndx] += pd.tot_len;
8501		}
8502		tr = r;
8503		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
8504		if (nr != NULL) {
8505			struct pf_addr *x;
8506			/*
8507			 * XXX: we need to make sure that the addresses
8508			 * passed to pfr_update_stats() are the same than
8509			 * the addresses used during matching (pfr_match)
8510			 */
8511			if (r == &pf_default_rule) {
8512				tr = nr;
8513				x = (sk == NULL || sk->direction == dir) ?
8514				    &pd.baddr : &pd.naddr;
8515			} else
8516				x = (sk == NULL || sk->direction == dir) ?
8517				    &pd.naddr : &pd.baddr;
8518			if (x == &pd.baddr || s == NULL) {
8519				/* we need to change the address */
8520				if (dir == PF_OUT)
8521					pd.src = x;
8522				else
8523					pd.dst = x;
8524			}
8525		}
8526		if (tr->src.addr.type == PF_ADDR_TABLE)
8527			pfr_update_stats(tr->src.addr.p.tbl, (sk == NULL ||
8528			    sk->direction == dir) ?
8529			    pd.src : pd.dst, pd.af,
8530			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
8531			    tr->src.neg);
8532		if (tr->dst.addr.type == PF_ADDR_TABLE)
8533			pfr_update_stats(tr->dst.addr.p.tbl, (sk == NULL ||
8534			    sk->direction == dir) ? pd.dst : pd.src, pd.af,
8535			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
8536			    tr->dst.neg);
8537	}
8538
8539	VERIFY(m == NULL || pd.mp == NULL || pd.mp == m);
8540
8541	if (*m0) {
8542		if (pd.lmw < 0) {
8543			REASON_SET(&reason, PFRES_MEMORY);
8544			action = PF_DROP;
8545		}
8546
8547		if (action == PF_DROP) {
8548			m_freem(*m0);
8549			*m0 = NULL;
8550			return (PF_DROP);
8551		}
8552
8553		*m0 = m;
8554	}
8555
8556	if (action == PF_SYNPROXY_DROP) {
8557		m_freem(*m0);
8558		*m0 = NULL;
8559		action = PF_PASS;
8560	} else if (r->rt)
8561		/* pf_route can free the mbuf causing *m0 to become NULL */
8562		pf_route(m0, r, dir, kif->pfik_ifp, s, &pd);
8563
8564	return (action);
8565}
8566#endif /* INET */
8567
8568#if INET6
8569#define PF_APPLE_UPDATE_PDESC_IPv6()				\
8570	do {							\
8571		if (m && pd.mp && m != pd.mp) {			\
8572			if (n == m)				\
8573				n = pd.mp;			\
8574			m = pd.mp;				\
8575			h = mtod(m, struct ip6_hdr *);		\
8576		}						\
8577	} while (0)
8578
8579int
8580pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
8581    struct ether_header *eh, struct ip_fw_args *fwa)
8582{
8583#if !DUMMYNET
8584#pragma unused(fwa)
8585#endif
8586	struct pfi_kif		*kif;
8587	u_short			 action = PF_PASS, reason = 0, log = 0;
8588	struct mbuf		*m = *m0, *n = NULL;
8589	struct ip6_hdr		*h;
8590	struct pf_rule		*a = NULL, *r = &pf_default_rule, *tr, *nr;
8591	struct pf_state		*s = NULL;
8592	struct pf_state_key	*sk = NULL;
8593	struct pf_ruleset	*ruleset = NULL;
8594	struct pf_pdesc		 pd;
8595	int			 off, terminal = 0, dirndx, rh_cnt = 0;
8596	u_int8_t		 nxt;
8597
8598	lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
8599
8600	if (!pf_status.running)
8601		return (PF_PASS);
8602
8603	memset(&pd, 0, sizeof (pd));
8604
8605	if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) {
8606		DPFPRINTF(PF_DEBUG_URGENT,
8607		    ("pf_test6: pf_get_mtag returned NULL\n"));
8608		return (PF_DROP);
8609	}
8610
8611	if (pd.pf_mtag->pftag_flags & PF_TAG_GENERATED)
8612		return (PF_PASS);
8613
8614	kif = (struct pfi_kif *)ifp->if_pf_kif;
8615
8616	if (kif == NULL) {
8617		DPFPRINTF(PF_DEBUG_URGENT,
8618		    ("pf_test6: kif == NULL, if_name %s\n", ifp->if_name));
8619		return (PF_DROP);
8620	}
8621	if (kif->pfik_flags & PFI_IFLAG_SKIP)
8622		return (PF_PASS);
8623
8624#ifdef DIAGNOSTIC
8625	if ((m->m_flags & M_PKTHDR) == 0)
8626		panic("non-M_PKTHDR is passed to pf_test6");
8627#endif /* DIAGNOSTIC */
8628
8629	h = mtod(m, struct ip6_hdr *);
8630
8631	nxt = h->ip6_nxt;
8632	off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr);
8633	pd.mp = m;
8634	pd.lmw = 0;
8635	pd.pf_mtag = pf_get_mtag(m);
8636	pd.src = (struct pf_addr *)&h->ip6_src;
8637	pd.dst = (struct pf_addr *)&h->ip6_dst;
8638	PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET6);
8639	pd.ip_sum = NULL;
8640	pd.af = AF_INET6;
8641	pd.proto = nxt;
8642	pd.proto_variant = 0;
8643	pd.tos = 0;
8644	pd.sc = MBUF_SCIDX(mbuf_get_service_class(m));
8645	pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
8646	pd.eh = eh;
8647
8648	if (pd.pf_mtag->pftag_flowhash != 0) {
8649		pd.flowhash = pd.pf_mtag->pftag_flowhash;
8650		pd.flags |= (m->m_pkthdr.m_fhflags & PF_TAG_FLOWADV) ?
8651		    PFDESC_FLOW_ADV : 0;
8652	}
8653
8654	if (m->m_pkthdr.len < (int)sizeof (*h)) {
8655		action = PF_DROP;
8656		REASON_SET(&reason, PFRES_SHORT);
8657		log = 1;
8658		goto done;
8659	}
8660
8661#if DUMMYNET
8662	if (fwa != NULL && fwa->fwa_pf_rule != NULL)
8663		goto nonormalize;
8664#endif /* DUMMYNET */
8665
8666	/* We do IP header normalization and packet reassembly here */
8667	action = pf_normalize_ip6(m0, dir, kif, &reason, &pd);
8668	pd.mp = m = *m0;
8669	if (action != PF_PASS || pd.lmw < 0) {
8670		action = PF_DROP;
8671		goto done;
8672	}
8673
8674#if DUMMYNET
8675nonormalize:
8676#endif /* DUMMYNET */
8677	h = mtod(m, struct ip6_hdr *);
8678
8679#if 1
8680	/*
8681	 * we do not support jumbogram yet.  if we keep going, zero ip6_plen
8682	 * will do something bad, so drop the packet for now.
8683	 */
8684	if (htons(h->ip6_plen) == 0) {
8685		action = PF_DROP;
8686		REASON_SET(&reason, PFRES_NORM);	/*XXX*/
8687		goto done;
8688	}
8689#endif
8690
8691	pd.src = (struct pf_addr *)&h->ip6_src;
8692	pd.dst = (struct pf_addr *)&h->ip6_dst;
8693	PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET6);
8694	pd.ip_sum = NULL;
8695	pd.af = AF_INET6;
8696	pd.tos = 0;
8697	pd.tot_len = ntohs(h->ip6_plen) + sizeof (struct ip6_hdr);
8698	pd.eh = eh;
8699
8700	off = ((caddr_t)h - m->m_data) + sizeof (struct ip6_hdr);
8701	pd.proto = h->ip6_nxt;
8702	pd.proto_variant = 0;
8703	pd.mp = m;
8704	pd.lmw = 0;
8705	pd.pf_mtag = pf_get_mtag(m);
8706
8707	do {
8708		switch (nxt) {
8709		case IPPROTO_FRAGMENT: {
8710			struct ip6_frag ip6f;
8711
8712			pd.flags |= PFDESC_IP_FRAG;
8713			if (!pf_pull_hdr(m, off, &ip6f, sizeof ip6f, NULL,
8714			    &reason, pd.af)) {
8715				DPFPRINTF(PF_DEBUG_MISC,
8716				    ("pf: IPv6 short fragment header\n"));
8717				action = PF_DROP;
8718				REASON_SET(&reason, PFRES_SHORT);
8719				log = 1;
8720				goto done;
8721			}
8722			pd.proto = nxt = ip6f.ip6f_nxt;
8723#if DUMMYNET
8724			/* Traffic goes through dummynet first */
8725			action = pf_test_dummynet(&r, dir, kif, &m, &pd, fwa);
8726			if (action == PF_DROP || m == NULL) {
8727				*m0 = NULL;
8728				return (action);
8729			}
8730#endif /* DUMMYNET */
8731			action = pf_test_fragment(&r, dir, kif, m, h, &pd, &a,
8732			    &ruleset);
8733			if (action == PF_DROP) {
8734				REASON_SET(&reason, PFRES_FRAG);
8735				log = 1;
8736			}
8737			goto done;
8738		}
8739		case IPPROTO_ROUTING:
8740			++rh_cnt;
8741			/* FALL THROUGH */
8742
8743		case IPPROTO_AH:
8744		case IPPROTO_HOPOPTS:
8745		case IPPROTO_DSTOPTS: {
8746			/* get next header and header length */
8747			struct ip6_ext	opt6;
8748
8749			if (!pf_pull_hdr(m, off, &opt6, sizeof(opt6),
8750			    NULL, &reason, pd.af)) {
8751				DPFPRINTF(PF_DEBUG_MISC,
8752				    ("pf: IPv6 short opt\n"));
8753				action = PF_DROP;
8754				log = 1;
8755				goto done;
8756			}
8757			if (pd.proto == IPPROTO_AH)
8758				off += (opt6.ip6e_len + 2) * 4;
8759			else
8760				off += (opt6.ip6e_len + 1) * 8;
8761			nxt = opt6.ip6e_nxt;
8762			/* goto the next header */
8763			break;
8764		}
8765		default:
8766			terminal++;
8767			break;
8768		}
8769	} while (!terminal);
8770
8771	/* if there's no routing header, use unmodified mbuf for checksumming */
8772	if (!n)
8773		n = m;
8774
8775	switch (pd.proto) {
8776
8777	case IPPROTO_TCP: {
8778		struct tcphdr	th;
8779
8780		pd.hdr.tcp = &th;
8781		if (!pf_pull_hdr(m, off, &th, sizeof (th),
8782		    &action, &reason, AF_INET6)) {
8783			log = action != PF_PASS;
8784			goto done;
8785		}
8786		pd.p_len = pd.tot_len - off - (th.th_off << 2);
8787#if DUMMYNET
8788		/* Traffic goes through dummynet first */
8789		action = pf_test_dummynet(&r, dir, kif, &m, &pd, fwa);
8790		if (action == PF_DROP || m == NULL) {
8791			*m0 = NULL;
8792			return (action);
8793		}
8794#endif /* DUMMYNET */
8795		action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
8796		if (pd.lmw < 0)
8797			goto done;
8798		PF_APPLE_UPDATE_PDESC_IPv6();
8799		if (action == PF_DROP)
8800			goto done;
8801		action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
8802		    &reason);
8803		if (pd.lmw < 0)
8804			goto done;
8805		PF_APPLE_UPDATE_PDESC_IPv6();
8806		if (action == PF_PASS) {
8807#if NPFSYNC
8808			pfsync_update_state(s);
8809#endif /* NPFSYNC */
8810			r = s->rule.ptr;
8811			a = s->anchor.ptr;
8812			log = s->log;
8813		} else if (s == NULL)
8814			action = pf_test_rule(&r, &s, dir, kif,
8815			    m, off, h, &pd, &a, &ruleset, &ip6intrq);
8816		break;
8817	}
8818
8819	case IPPROTO_UDP: {
8820		struct udphdr	uh;
8821
8822		pd.hdr.udp = &uh;
8823		if (!pf_pull_hdr(m, off, &uh, sizeof (uh),
8824		    &action, &reason, AF_INET6)) {
8825			log = action != PF_PASS;
8826			goto done;
8827		}
8828		if (uh.uh_dport == 0 ||
8829		    ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
8830		    ntohs(uh.uh_ulen) < sizeof (struct udphdr)) {
8831			action = PF_DROP;
8832			REASON_SET(&reason, PFRES_SHORT);
8833			goto done;
8834		}
8835#if DUMMYNET
8836		/* Traffic goes through dummynet first */
8837		action = pf_test_dummynet(&r, dir, kif, &m, &pd, fwa);
8838		if (action == PF_DROP || m == NULL) {
8839			*m0 = NULL;
8840			return (action);
8841		}
8842#endif /* DUMMYNET */
8843		action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd,
8844		    &reason);
8845		if (pd.lmw < 0)
8846			goto done;
8847		PF_APPLE_UPDATE_PDESC_IPv6();
8848		if (action == PF_PASS) {
8849#if NPFSYNC
8850			pfsync_update_state(s);
8851#endif /* NPFSYNC */
8852			r = s->rule.ptr;
8853			a = s->anchor.ptr;
8854			log = s->log;
8855		} else if (s == NULL)
8856			action = pf_test_rule(&r, &s, dir, kif,
8857			    m, off, h, &pd, &a, &ruleset, &ip6intrq);
8858		break;
8859	}
8860
8861	case IPPROTO_ICMPV6: {
8862		struct icmp6_hdr	ih;
8863
8864		pd.hdr.icmp6 = &ih;
8865		if (!pf_pull_hdr(m, off, &ih, sizeof (ih),
8866		    &action, &reason, AF_INET6)) {
8867			log = action != PF_PASS;
8868			goto done;
8869		}
8870#if DUMMYNET
8871		/* Traffic goes through dummynet first */
8872		action = pf_test_dummynet(&r, dir, kif, &m, &pd, fwa);
8873		if (action == PF_DROP || m == NULL) {
8874			*m0 = NULL;
8875			return (action);
8876		}
8877#endif /* DUMMYNET */
8878		action = pf_test_state_icmp(&s, dir, kif,
8879		    m, off, h, &pd, &reason);
8880		if (pd.lmw < 0)
8881			goto done;
8882		PF_APPLE_UPDATE_PDESC_IPv6();
8883		if (action == PF_PASS) {
8884#if NPFSYNC
8885			pfsync_update_state(s);
8886#endif /* NPFSYNC */
8887			r = s->rule.ptr;
8888			a = s->anchor.ptr;
8889			log = s->log;
8890		} else if (s == NULL)
8891			action = pf_test_rule(&r, &s, dir, kif,
8892			    m, off, h, &pd, &a, &ruleset, &ip6intrq);
8893		break;
8894	}
8895
8896	case IPPROTO_ESP: {
8897		struct pf_esp_hdr	esp;
8898
8899		pd.hdr.esp = &esp;
8900		if (!pf_pull_hdr(m, off, &esp, sizeof (esp), &action, &reason,
8901		    AF_INET6)) {
8902			log = action != PF_PASS;
8903			goto done;
8904		}
8905#if DUMMYNET
8906		/* Traffic goes through dummynet first */
8907		action = pf_test_dummynet(&r, dir, kif, &m, &pd, fwa);
8908		if (action == PF_DROP || m == NULL) {
8909			*m0 = NULL;
8910			return (action);
8911		}
8912#endif /* DUMMYNET */
8913		action = pf_test_state_esp(&s, dir, kif, off, &pd);
8914		if (pd.lmw < 0)
8915			goto done;
8916		PF_APPLE_UPDATE_PDESC_IPv6();
8917		if (action == PF_PASS) {
8918#if NPFSYNC
8919			pfsync_update_state(s);
8920#endif /* NPFSYNC */
8921			r = s->rule.ptr;
8922			a = s->anchor.ptr;
8923			log = s->log;
8924		} else if (s == NULL)
8925			action = pf_test_rule(&r, &s, dir, kif,
8926			    m, off, h, &pd, &a, &ruleset, &ip6intrq);
8927		break;
8928	}
8929
8930	case IPPROTO_GRE: {
8931		struct pf_grev1_hdr	grev1;
8932
8933		pd.hdr.grev1 = &grev1;
8934		if (!pf_pull_hdr(m, off, &grev1, sizeof (grev1), &action,
8935		    &reason, AF_INET6)) {
8936			log = (action != PF_PASS);
8937			goto done;
8938		}
8939#if DUMMYNET
8940		/* Traffic goes through dummynet first */
8941		action = pf_test_dummynet(&r, dir, kif, &m, &pd, fwa);
8942		if (action == PF_DROP || m == NULL) {
8943			*m0 = NULL;
8944			return (action);
8945		}
8946#endif /* DUMMYNET */
8947		if ((ntohs(grev1.flags) & PF_GRE_FLAG_VERSION_MASK) == 1 &&
8948		    ntohs(grev1.protocol_type) == PF_GRE_PPP_ETHERTYPE) {
8949			if (ntohs(grev1.payload_length) >
8950			    m->m_pkthdr.len - off) {
8951				action = PF_DROP;
8952				REASON_SET(&reason, PFRES_SHORT);
8953				goto done;
8954			}
8955			action = pf_test_state_grev1(&s, dir, kif, off, &pd);
8956			if (pd.lmw < 0)
8957				goto done;
8958			PF_APPLE_UPDATE_PDESC_IPv6();
8959			if (action == PF_PASS) {
8960#if NPFSYNC
8961				pfsync_update_state(s);
8962#endif /* NPFSYNC */
8963				r = s->rule.ptr;
8964				a = s->anchor.ptr;
8965				log = s->log;
8966				break;
8967			} else if (s == NULL) {
8968				action = pf_test_rule(&r, &s, dir, kif, m, off,
8969				    h, &pd, &a, &ruleset, &ip6intrq);
8970				if (action == PF_PASS)
8971					break;
8972			}
8973		}
8974
8975		/* not GREv1/PPTP, so treat as ordinary GRE... */
8976	}
8977
8978	default:
8979#if DUMMYNET
8980		/* Traffic goes through dummynet first */
8981		action = pf_test_dummynet(&r, dir, kif, &m, &pd, fwa);
8982		if (action == PF_DROP || m == NULL) {
8983			*m0 = NULL;
8984			return (action);
8985		}
8986#endif /* DUMMYNET */
8987		action = pf_test_state_other(&s, dir, kif, &pd);
8988		if (pd.lmw < 0)
8989			goto done;
8990		PF_APPLE_UPDATE_PDESC_IPv6();
8991		if (action == PF_PASS) {
8992#if NPFSYNC
8993			pfsync_update_state(s);
8994#endif /* NPFSYNC */
8995			r = s->rule.ptr;
8996			a = s->anchor.ptr;
8997			log = s->log;
8998		} else if (s == NULL)
8999			action = pf_test_rule(&r, &s, dir, kif, m, off, h,
9000			    &pd, &a, &ruleset, &ip6intrq);
9001		break;
9002	}
9003
9004done:
9005	*m0 = pd.mp;
9006	PF_APPLE_UPDATE_PDESC_IPv6();
9007
9008	if (n != m) {
9009		m_freem(n);
9010		n = NULL;
9011	}
9012
9013	/* handle dangerous IPv6 extension headers. */
9014	if (action == PF_PASS && rh_cnt &&
9015	    !((s && s->allow_opts) || r->allow_opts)) {
9016		action = PF_DROP;
9017		REASON_SET(&reason, PFRES_IPOPTIONS);
9018		log = 1;
9019		DPFPRINTF(PF_DEBUG_MISC,
9020		    ("pf: dropping packet with dangerous v6 headers\n"));
9021	}
9022
9023	if ((s && s->tag) || PF_RTABLEID_IS_VALID(r->rtableid) || pd.flowhash != 0)
9024		(void) pf_tag_packet(m, pd.pf_mtag, s ? s->tag : 0,
9025		    r->rtableid, &pd);
9026
9027	if (action == PF_PASS) {
9028#if PF_ALTQ
9029		if (altq_allowed && r->qid) {
9030			if (pd.tos & IPTOS_LOWDELAY)
9031				pd.pf_mtag->pftag_qid = r->pqid;
9032			else
9033				pd.pf_mtag->pftag_qid = r->qid;
9034		}
9035#endif /* PF_ALTQ */
9036		/* add hints for ecn */
9037		pd.pf_mtag->pftag_hdr = h;
9038		/* record address family */
9039		pd.pf_mtag->pftag_flags &= ~PF_TAG_HDR_INET;
9040		pd.pf_mtag->pftag_flags |= PF_TAG_HDR_INET6;
9041		/* record TCP vs. non-TCP */
9042		if (pd.proto == IPPROTO_TCP)
9043			pd.pf_mtag->pftag_flags |= PF_TAG_TCP;
9044		else
9045			pd.pf_mtag->pftag_flags &= ~PF_TAG_TCP;
9046	}
9047
9048	if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
9049	    pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
9050	    (s->nat_rule.ptr->action == PF_RDR ||
9051	    s->nat_rule.ptr->action == PF_BINAT) &&
9052	    IN6_IS_ADDR_LOOPBACK(&pd.dst->v6))
9053		pd.pf_mtag->pftag_flags |= PF_TAG_TRANSLATE_LOCALHOST;
9054
9055	if (log) {
9056		struct pf_rule *lr;
9057
9058		if (s != NULL && s->nat_rule.ptr != NULL &&
9059		    s->nat_rule.ptr->log & PF_LOG_ALL)
9060			lr = s->nat_rule.ptr;
9061		else
9062			lr = r;
9063		PFLOG_PACKET(kif, h, m, AF_INET6, dir, reason, lr, a, ruleset,
9064		    &pd);
9065	}
9066
9067	kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
9068	kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++;
9069
9070	if (action == PF_PASS || r->action == PF_DROP) {
9071		dirndx = (dir == PF_OUT);
9072		r->packets[dirndx]++;
9073		r->bytes[dirndx] += pd.tot_len;
9074		if (a != NULL) {
9075			a->packets[dirndx]++;
9076			a->bytes[dirndx] += pd.tot_len;
9077		}
9078		if (s != NULL) {
9079			sk = s->state_key;
9080			if (s->nat_rule.ptr != NULL) {
9081				s->nat_rule.ptr->packets[dirndx]++;
9082				s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
9083			}
9084			if (s->src_node != NULL) {
9085				s->src_node->packets[dirndx]++;
9086				s->src_node->bytes[dirndx] += pd.tot_len;
9087			}
9088			if (s->nat_src_node != NULL) {
9089				s->nat_src_node->packets[dirndx]++;
9090				s->nat_src_node->bytes[dirndx] += pd.tot_len;
9091			}
9092			dirndx = (dir == sk->direction) ? 0 : 1;
9093			s->packets[dirndx]++;
9094			s->bytes[dirndx] += pd.tot_len;
9095		}
9096		tr = r;
9097		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
9098		if (nr != NULL) {
9099			struct pf_addr *x;
9100			/*
9101			 * XXX: we need to make sure that the addresses
9102			 * passed to pfr_update_stats() are the same than
9103			 * the addresses used during matching (pfr_match)
9104			 */
9105			if (r == &pf_default_rule) {
9106				tr = nr;
9107				x = (s == NULL || sk->direction == dir) ?
9108				    &pd.baddr : &pd.naddr;
9109			} else {
9110				x = (s == NULL || sk->direction == dir) ?
9111				    &pd.naddr : &pd.baddr;
9112			}
9113			if (x == &pd.baddr || s == NULL) {
9114				if (dir == PF_OUT)
9115					pd.src = x;
9116				else
9117					pd.dst = x;
9118			}
9119		}
9120		if (tr->src.addr.type == PF_ADDR_TABLE)
9121			pfr_update_stats(tr->src.addr.p.tbl, (sk == NULL ||
9122			    sk->direction == dir) ? pd.src : pd.dst, pd.af,
9123			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
9124			    tr->src.neg);
9125		if (tr->dst.addr.type == PF_ADDR_TABLE)
9126			pfr_update_stats(tr->dst.addr.p.tbl, (sk == NULL ||
9127			    sk->direction == dir) ? pd.dst : pd.src, pd.af,
9128			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
9129			    tr->dst.neg);
9130	}
9131
9132#if 0
9133	if (action == PF_SYNPROXY_DROP) {
9134		m_freem(*m0);
9135		*m0 = NULL;
9136		action = PF_PASS;
9137	} else if (r->rt)
9138		/* pf_route6 can free the mbuf causing *m0 to become NULL */
9139		pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd);
9140#else
9141	VERIFY(m == NULL || pd.mp == NULL || pd.mp == m);
9142
9143	if (*m0) {
9144		if (pd.lmw < 0) {
9145			REASON_SET(&reason, PFRES_MEMORY);
9146			action = PF_DROP;
9147		}
9148
9149		if (action == PF_DROP) {
9150			m_freem(*m0);
9151			*m0 = NULL;
9152			return (PF_DROP);
9153		}
9154
9155		*m0 = m;
9156	}
9157
9158	if (action == PF_SYNPROXY_DROP) {
9159		m_freem(*m0);
9160		*m0 = NULL;
9161		action = PF_PASS;
9162	} else if (r->rt) {
9163		if (action == PF_PASS) {
9164			m = *m0;
9165			h = mtod(m, struct ip6_hdr *);
9166		}
9167
9168		/* pf_route6 can free the mbuf causing *m0 to become NULL */
9169		pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd);
9170	}
9171#endif /* 0 */
9172
9173	return (action);
9174}
9175#endif /* INET6 */
9176
9177static int
9178pf_check_congestion(struct ifqueue *ifq)
9179{
9180#pragma unused(ifq)
9181	return (0);
9182}
9183
9184void
9185pool_init(struct pool *pp, size_t size, unsigned int align, unsigned int ioff,
9186    int flags, const char *wchan, void *palloc)
9187{
9188#pragma unused(align, ioff, flags, palloc)
9189	bzero(pp, sizeof (*pp));
9190	pp->pool_zone = zinit(size, 1024 * size, PAGE_SIZE, wchan);
9191	if (pp->pool_zone != NULL) {
9192		zone_change(pp->pool_zone, Z_EXPAND, TRUE);
9193		zone_change(pp->pool_zone, Z_CALLERACCT, FALSE);
9194		pp->pool_hiwat = pp->pool_limit = (unsigned int)-1;
9195		pp->pool_name = wchan;
9196	}
9197}
9198
9199/* Zones cannot be currently destroyed */
9200void
9201pool_destroy(struct pool *pp)
9202{
9203#pragma unused(pp)
9204}
9205
9206void
9207pool_sethiwat(struct pool *pp, int n)
9208{
9209	pp->pool_hiwat = n;	/* Currently unused */
9210}
9211
9212void
9213pool_sethardlimit(struct pool *pp, int n, const char *warnmess, int ratecap)
9214{
9215#pragma unused(warnmess, ratecap)
9216	pp->pool_limit = n;
9217}
9218
9219void *
9220pool_get(struct pool *pp, int flags)
9221{
9222	void *buf;
9223
9224	lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
9225
9226	if (pp->pool_count > pp->pool_limit) {
9227		DPFPRINTF(PF_DEBUG_NOISY,
9228		    ("pf: pool %s hard limit reached (%d)\n",
9229		    pp->pool_name != NULL ? pp->pool_name : "unknown",
9230		    pp->pool_limit));
9231		pp->pool_fails++;
9232		return (NULL);
9233	}
9234
9235	buf = zalloc_canblock(pp->pool_zone, (flags & (PR_NOWAIT | PR_WAITOK)));
9236	if (buf != NULL) {
9237		pp->pool_count++;
9238		VERIFY(pp->pool_count != 0);
9239	}
9240	return (buf);
9241}
9242
9243void
9244pool_put(struct pool *pp, void *v)
9245{
9246	lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
9247
9248	zfree(pp->pool_zone, v);
9249	VERIFY(pp->pool_count != 0);
9250	pp->pool_count--;
9251}
9252
9253struct pf_mtag *
9254pf_find_mtag(struct mbuf *m)
9255{
9256	if (!(m->m_flags & M_PKTHDR))
9257		return (NULL);
9258
9259	return (m_pftag(m));
9260}
9261
9262struct pf_mtag *
9263pf_get_mtag(struct mbuf *m)
9264{
9265	return (pf_find_mtag(m));
9266}
9267
9268uint64_t
9269pf_time_second(void)
9270{
9271	struct timeval t;
9272
9273	microuptime(&t);
9274	return (t.tv_sec);
9275}
9276
9277uint64_t
9278pf_calendar_time_second(void)
9279{
9280	struct timeval t;
9281
9282	microtime(&t);
9283	return (t.tv_sec);
9284}
9285
9286static void *
9287hook_establish(struct hook_desc_head *head, int tail, hook_fn_t fn, void *arg)
9288{
9289	struct hook_desc *hd;
9290
9291	hd = _MALLOC(sizeof(*hd), M_DEVBUF, M_WAITOK);
9292	if (hd == NULL)
9293		return (NULL);
9294
9295	hd->hd_fn = fn;
9296	hd->hd_arg = arg;
9297	if (tail)
9298		TAILQ_INSERT_TAIL(head, hd, hd_list);
9299	else
9300		TAILQ_INSERT_HEAD(head, hd, hd_list);
9301
9302	return (hd);
9303}
9304
9305static void
9306hook_runloop(struct hook_desc_head *head, int flags)
9307{
9308	struct hook_desc *hd;
9309
9310	if (!(flags & HOOK_REMOVE)) {
9311		if (!(flags & HOOK_ABORT))
9312			TAILQ_FOREACH(hd, head, hd_list)
9313				hd->hd_fn(hd->hd_arg);
9314	} else {
9315		while (!!(hd = TAILQ_FIRST(head))) {
9316			TAILQ_REMOVE(head, hd, hd_list);
9317			if (!(flags & HOOK_ABORT))
9318				hd->hd_fn(hd->hd_arg);
9319			if (flags & HOOK_FREE)
9320				_FREE(hd, M_DEVBUF);
9321		}
9322	}
9323}
9324