ip_fw.h revision 223666
1/*-
2 * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 *
25 * $FreeBSD: head/sys/netinet/ip_fw.h 223666 2011-06-29 10:06:58Z ae $
26 */
27
28#ifndef _IPFW2_H
29#define _IPFW2_H
30
31/*
32 * The default rule number.  By the design of ip_fw, the default rule
33 * is the last one, so its number can also serve as the highest number
34 * allowed for a rule.  The ip_fw code relies on both meanings of this
35 * constant.
36 */
37#define	IPFW_DEFAULT_RULE	65535
38
39/*
40 * The number of ipfw tables.  The maximum allowed table number is the
41 * (IPFW_TABLES_MAX - 1).
42 */
43#define	IPFW_TABLES_MAX		128
44
45/*
46 * Most commands (queue, pipe, tag, untag, limit...) can have a 16-bit
47 * argument between 1 and 65534. The value 0 is unused, the value
48 * 65535 (IP_FW_TABLEARG) is used to represent 'tablearg', i.e. the
49 * can be 1..65534, or 65535 to indicate the use of a 'tablearg'
50 * result of the most recent table() lookup.
51 * Note that 16bit is only a historical limit, resulting from
52 * the use of a 16-bit fields for that value. In reality, we can have
53 * 2^32 pipes, queues, tag values and so on, and use 0 as a tablearg.
54 */
55#define	IPFW_ARG_MIN		1
56#define	IPFW_ARG_MAX		65534
57#define IP_FW_TABLEARG		65535	/* XXX should use 0 */
58
59/*
60 * Number of entries in the call stack of the call/return commands.
61 * Call stack currently is an uint16_t array with rule numbers.
62 */
63#define	IPFW_CALLSTACK_SIZE	16
64
65/*
66 * The kernel representation of ipfw rules is made of a list of
67 * 'instructions' (for all practical purposes equivalent to BPF
68 * instructions), which specify which fields of the packet
69 * (or its metadata) should be analysed.
70 *
71 * Each instruction is stored in a structure which begins with
72 * "ipfw_insn", and can contain extra fields depending on the
73 * instruction type (listed below).
74 * Note that the code is written so that individual instructions
75 * have a size which is a multiple of 32 bits. This means that, if
76 * such structures contain pointers or other 64-bit entities,
77 * (there is just one instance now) they may end up unaligned on
78 * 64-bit architectures, so the must be handled with care.
79 *
80 * "enum ipfw_opcodes" are the opcodes supported. We can have up
81 * to 256 different opcodes. When adding new opcodes, they should
82 * be appended to the end of the opcode list before O_LAST_OPCODE,
83 * this will prevent the ABI from being broken, otherwise users
84 * will have to recompile ipfw(8) when they update the kernel.
85 */
86
87enum ipfw_opcodes {		/* arguments (4 byte each)	*/
88	O_NOP,
89
90	O_IP_SRC,		/* u32 = IP			*/
91	O_IP_SRC_MASK,		/* ip = IP/mask			*/
92	O_IP_SRC_ME,		/* none				*/
93	O_IP_SRC_SET,		/* u32=base, arg1=len, bitmap	*/
94
95	O_IP_DST,		/* u32 = IP			*/
96	O_IP_DST_MASK,		/* ip = IP/mask			*/
97	O_IP_DST_ME,		/* none				*/
98	O_IP_DST_SET,		/* u32=base, arg1=len, bitmap	*/
99
100	O_IP_SRCPORT,		/* (n)port list:mask 4 byte ea	*/
101	O_IP_DSTPORT,		/* (n)port list:mask 4 byte ea	*/
102	O_PROTO,		/* arg1=protocol		*/
103
104	O_MACADDR2,		/* 2 mac addr:mask		*/
105	O_MAC_TYPE,		/* same as srcport		*/
106
107	O_LAYER2,		/* none				*/
108	O_IN,			/* none				*/
109	O_FRAG,			/* none				*/
110
111	O_RECV,			/* none				*/
112	O_XMIT,			/* none				*/
113	O_VIA,			/* none				*/
114
115	O_IPOPT,		/* arg1 = 2*u8 bitmap		*/
116	O_IPLEN,		/* arg1 = len			*/
117	O_IPID,			/* arg1 = id			*/
118
119	O_IPTOS,		/* arg1 = id			*/
120	O_IPPRECEDENCE,		/* arg1 = precedence << 5	*/
121	O_IPTTL,		/* arg1 = TTL			*/
122
123	O_IPVER,		/* arg1 = version		*/
124	O_UID,			/* u32 = id			*/
125	O_GID,			/* u32 = id			*/
126	O_ESTAB,		/* none (tcp established)	*/
127	O_TCPFLAGS,		/* arg1 = 2*u8 bitmap		*/
128	O_TCPWIN,		/* arg1 = desired win		*/
129	O_TCPSEQ,		/* u32 = desired seq.		*/
130	O_TCPACK,		/* u32 = desired seq.		*/
131	O_ICMPTYPE,		/* u32 = icmp bitmap		*/
132	O_TCPOPTS,		/* arg1 = 2*u8 bitmap		*/
133
134	O_VERREVPATH,		/* none				*/
135	O_VERSRCREACH,		/* none				*/
136
137	O_PROBE_STATE,		/* none				*/
138	O_KEEP_STATE,		/* none				*/
139	O_LIMIT,		/* ipfw_insn_limit		*/
140	O_LIMIT_PARENT,		/* dyn_type, not an opcode.	*/
141
142	/*
143	 * These are really 'actions'.
144	 */
145
146	O_LOG,			/* ipfw_insn_log		*/
147	O_PROB,			/* u32 = match probability	*/
148
149	O_CHECK_STATE,		/* none				*/
150	O_ACCEPT,		/* none				*/
151	O_DENY,			/* none 			*/
152	O_REJECT,		/* arg1=icmp arg (same as deny)	*/
153	O_COUNT,		/* none				*/
154	O_SKIPTO,		/* arg1=next rule number	*/
155	O_PIPE,			/* arg1=pipe number		*/
156	O_QUEUE,		/* arg1=queue number		*/
157	O_DIVERT,		/* arg1=port number		*/
158	O_TEE,			/* arg1=port number		*/
159	O_FORWARD_IP,		/* fwd sockaddr			*/
160	O_FORWARD_MAC,		/* fwd mac			*/
161	O_NAT,                  /* nope                         */
162	O_REASS,                /* none                         */
163
164	/*
165	 * More opcodes.
166	 */
167	O_IPSEC,		/* has ipsec history 		*/
168	O_IP_SRC_LOOKUP,	/* arg1=table number, u32=value	*/
169	O_IP_DST_LOOKUP,	/* arg1=table number, u32=value	*/
170	O_ANTISPOOF,		/* none				*/
171	O_JAIL,			/* u32 = id			*/
172	O_ALTQ,			/* u32 = altq classif. qid	*/
173	O_DIVERTED,		/* arg1=bitmap (1:loop, 2:out)	*/
174	O_TCPDATALEN,		/* arg1 = tcp data len		*/
175	O_IP6_SRC,		/* address without mask		*/
176	O_IP6_SRC_ME,		/* my addresses			*/
177	O_IP6_SRC_MASK,		/* address with the mask	*/
178	O_IP6_DST,
179	O_IP6_DST_ME,
180	O_IP6_DST_MASK,
181	O_FLOW6ID,		/* for flow id tag in the ipv6 pkt */
182	O_ICMP6TYPE,		/* icmp6 packet type filtering	*/
183	O_EXT_HDR,		/* filtering for ipv6 extension header */
184	O_IP6,
185
186	/*
187	 * actions for ng_ipfw
188	 */
189	O_NETGRAPH,		/* send to ng_ipfw		*/
190	O_NGTEE,		/* copy to ng_ipfw		*/
191
192	O_IP4,
193
194	O_UNREACH6,		/* arg1=icmpv6 code arg (deny)  */
195
196	O_TAG,   		/* arg1=tag number */
197	O_TAGGED,		/* arg1=tag number */
198
199	O_SETFIB,		/* arg1=FIB number */
200	O_FIB,			/* arg1=FIB desired fib number */
201
202	O_SOCKARG,		/* socket argument */
203
204	O_CALLRETURN,		/* arg1=called rule number */
205
206	O_LAST_OPCODE		/* not an opcode!		*/
207};
208
209
210/*
211 * The extension header are filtered only for presence using a bit
212 * vector with a flag for each header.
213 */
214#define EXT_FRAGMENT	0x1
215#define EXT_HOPOPTS	0x2
216#define EXT_ROUTING	0x4
217#define EXT_AH		0x8
218#define EXT_ESP		0x10
219#define EXT_DSTOPTS	0x20
220#define EXT_RTHDR0		0x40
221#define EXT_RTHDR2		0x80
222
223/*
224 * Template for instructions.
225 *
226 * ipfw_insn is used for all instructions which require no operands,
227 * a single 16-bit value (arg1), or a couple of 8-bit values.
228 *
229 * For other instructions which require different/larger arguments
230 * we have derived structures, ipfw_insn_*.
231 *
232 * The size of the instruction (in 32-bit words) is in the low
233 * 6 bits of "len". The 2 remaining bits are used to implement
234 * NOT and OR on individual instructions. Given a type, you can
235 * compute the length to be put in "len" using F_INSN_SIZE(t)
236 *
237 * F_NOT	negates the match result of the instruction.
238 *
239 * F_OR		is used to build or blocks. By default, instructions
240 *		are evaluated as part of a logical AND. An "or" block
241 *		{ X or Y or Z } contains F_OR set in all but the last
242 *		instruction of the block. A match will cause the code
243 *		to skip past the last instruction of the block.
244 *
245 * NOTA BENE: in a couple of places we assume that
246 *	sizeof(ipfw_insn) == sizeof(u_int32_t)
247 * this needs to be fixed.
248 *
249 */
250typedef struct	_ipfw_insn {	/* template for instructions */
251	u_int8_t 	opcode;
252	u_int8_t	len;	/* number of 32-bit words */
253#define	F_NOT		0x80
254#define	F_OR		0x40
255#define	F_LEN_MASK	0x3f
256#define	F_LEN(cmd)	((cmd)->len & F_LEN_MASK)
257
258	u_int16_t	arg1;
259} ipfw_insn;
260
261/*
262 * The F_INSN_SIZE(type) computes the size, in 4-byte words, of
263 * a given type.
264 */
265#define	F_INSN_SIZE(t)	((sizeof (t))/sizeof(u_int32_t))
266
267/*
268 * This is used to store an array of 16-bit entries (ports etc.)
269 */
270typedef struct	_ipfw_insn_u16 {
271	ipfw_insn o;
272	u_int16_t ports[2];	/* there may be more */
273} ipfw_insn_u16;
274
275/*
276 * This is used to store an array of 32-bit entries
277 * (uid, single IPv4 addresses etc.)
278 */
279typedef struct	_ipfw_insn_u32 {
280	ipfw_insn o;
281	u_int32_t d[1];	/* one or more */
282} ipfw_insn_u32;
283
284/*
285 * This is used to store IP addr-mask pairs.
286 */
287typedef struct	_ipfw_insn_ip {
288	ipfw_insn o;
289	struct in_addr	addr;
290	struct in_addr	mask;
291} ipfw_insn_ip;
292
293/*
294 * This is used to forward to a given address (ip).
295 */
296typedef struct  _ipfw_insn_sa {
297	ipfw_insn o;
298	struct sockaddr_in sa;
299} ipfw_insn_sa;
300
301/*
302 * This is used for MAC addr-mask pairs.
303 */
304typedef struct	_ipfw_insn_mac {
305	ipfw_insn o;
306	u_char addr[12];	/* dst[6] + src[6] */
307	u_char mask[12];	/* dst[6] + src[6] */
308} ipfw_insn_mac;
309
310/*
311 * This is used for interface match rules (recv xx, xmit xx).
312 */
313typedef struct	_ipfw_insn_if {
314	ipfw_insn o;
315	union {
316		struct in_addr ip;
317		int glob;
318	} p;
319	char name[IFNAMSIZ];
320} ipfw_insn_if;
321
322/*
323 * This is used for storing an altq queue id number.
324 */
325typedef struct _ipfw_insn_altq {
326	ipfw_insn	o;
327	u_int32_t	qid;
328} ipfw_insn_altq;
329
330/*
331 * This is used for limit rules.
332 */
333typedef struct	_ipfw_insn_limit {
334	ipfw_insn o;
335	u_int8_t _pad;
336	u_int8_t limit_mask;	/* combination of DYN_* below	*/
337#define	DYN_SRC_ADDR	0x1
338#define	DYN_SRC_PORT	0x2
339#define	DYN_DST_ADDR	0x4
340#define	DYN_DST_PORT	0x8
341
342	u_int16_t conn_limit;
343} ipfw_insn_limit;
344
345/*
346 * This is used for log instructions.
347 */
348typedef struct  _ipfw_insn_log {
349        ipfw_insn o;
350	u_int32_t max_log;	/* how many do we log -- 0 = all */
351	u_int32_t log_left;	/* how many left to log 	*/
352} ipfw_insn_log;
353
354/*
355 * Data structures required by both ipfw(8) and ipfw(4) but not part of the
356 * management API are protected by IPFW_INTERNAL.
357 */
358#ifdef IPFW_INTERNAL
359/* Server pool support (LSNAT). */
360struct cfg_spool {
361	LIST_ENTRY(cfg_spool)   _next;          /* chain of spool instances */
362	struct in_addr          addr;
363	u_short                 port;
364};
365#endif
366
367/* Redirect modes id. */
368#define REDIR_ADDR      0x01
369#define REDIR_PORT      0x02
370#define REDIR_PROTO     0x04
371
372#ifdef IPFW_INTERNAL
373/* Nat redirect configuration. */
374struct cfg_redir {
375	LIST_ENTRY(cfg_redir)   _next;          /* chain of redir instances */
376	u_int16_t               mode;           /* type of redirect mode */
377	struct in_addr	        laddr;          /* local ip address */
378	struct in_addr	        paddr;          /* public ip address */
379	struct in_addr	        raddr;          /* remote ip address */
380	u_short                 lport;          /* local port */
381	u_short                 pport;          /* public port */
382	u_short                 rport;          /* remote port  */
383	u_short                 pport_cnt;      /* number of public ports */
384	u_short                 rport_cnt;      /* number of remote ports */
385	int                     proto;          /* protocol: tcp/udp */
386	struct alias_link       **alink;
387	/* num of entry in spool chain */
388	u_int16_t               spool_cnt;
389	/* chain of spool instances */
390	LIST_HEAD(spool_chain, cfg_spool) spool_chain;
391};
392#endif
393
394#ifdef IPFW_INTERNAL
395/* Nat configuration data struct. */
396struct cfg_nat {
397	/* chain of nat instances */
398	LIST_ENTRY(cfg_nat)     _next;
399	int                     id;                     /* nat id */
400	struct in_addr          ip;                     /* nat ip address */
401	char                    if_name[IF_NAMESIZE];   /* interface name */
402	int                     mode;                   /* aliasing mode */
403	struct libalias	        *lib;                   /* libalias instance */
404	/* number of entry in spool chain */
405	int                     redir_cnt;
406	/* chain of redir instances */
407	LIST_HEAD(redir_chain, cfg_redir) redir_chain;
408};
409#endif
410
411#define SOF_NAT         sizeof(struct cfg_nat)
412#define SOF_REDIR       sizeof(struct cfg_redir)
413#define SOF_SPOOL       sizeof(struct cfg_spool)
414
415/* Nat command. */
416typedef struct	_ipfw_insn_nat {
417 	ipfw_insn	o;
418 	struct cfg_nat *nat;
419} ipfw_insn_nat;
420
421/* Apply ipv6 mask on ipv6 addr */
422#define APPLY_MASK(addr,mask)                          \
423    (addr)->__u6_addr.__u6_addr32[0] &= (mask)->__u6_addr.__u6_addr32[0]; \
424    (addr)->__u6_addr.__u6_addr32[1] &= (mask)->__u6_addr.__u6_addr32[1]; \
425    (addr)->__u6_addr.__u6_addr32[2] &= (mask)->__u6_addr.__u6_addr32[2]; \
426    (addr)->__u6_addr.__u6_addr32[3] &= (mask)->__u6_addr.__u6_addr32[3];
427
428/* Structure for ipv6 */
429typedef struct _ipfw_insn_ip6 {
430       ipfw_insn o;
431       struct in6_addr addr6;
432       struct in6_addr mask6;
433} ipfw_insn_ip6;
434
435/* Used to support icmp6 types */
436typedef struct _ipfw_insn_icmp6 {
437       ipfw_insn o;
438       uint32_t d[7]; /* XXX This number si related to the netinet/icmp6.h
439                       *     define ICMP6_MAXTYPE
440                       *     as follows: n = ICMP6_MAXTYPE/32 + 1
441                        *     Actually is 203
442                       */
443} ipfw_insn_icmp6;
444
445/*
446 * Here we have the structure representing an ipfw rule.
447 *
448 * It starts with a general area (with link fields and counters)
449 * followed by an array of one or more instructions, which the code
450 * accesses as an array of 32-bit values.
451 *
452 * Given a rule pointer  r:
453 *
454 *  r->cmd		is the start of the first instruction.
455 *  ACTION_PTR(r)	is the start of the first action (things to do
456 *			once a rule matched).
457 *
458 * When assembling instruction, remember the following:
459 *
460 *  + if a rule has a "keep-state" (or "limit") option, then the
461 *	first instruction (at r->cmd) MUST BE an O_PROBE_STATE
462 *  + if a rule has a "log" option, then the first action
463 *	(at ACTION_PTR(r)) MUST be O_LOG
464 *  + if a rule has an "altq" option, it comes after "log"
465 *  + if a rule has an O_TAG option, it comes after "log" and "altq"
466 *
467 * NOTE: we use a simple linked list of rules because we never need
468 * 	to delete a rule without scanning the list. We do not use
469 *	queue(3) macros for portability and readability.
470 */
471
472struct ip_fw {
473	struct ip_fw	*x_next;	/* linked list of rules		*/
474	struct ip_fw	*next_rule;	/* ptr to next [skipto] rule	*/
475	/* 'next_rule' is used to pass up 'set_disable' status		*/
476
477	uint16_t	act_ofs;	/* offset of action in 32-bit units */
478	uint16_t	cmd_len;	/* # of 32-bit words in cmd	*/
479	uint16_t	rulenum;	/* rule number			*/
480	uint8_t	set;		/* rule set (0..31)		*/
481#define	RESVD_SET	31	/* set for default and persistent rules */
482	uint8_t		_pad;		/* padding			*/
483	uint32_t	id;		/* rule id */
484
485	/* These fields are present in all rules.			*/
486	uint64_t	pcnt;		/* Packet counter		*/
487	uint64_t	bcnt;		/* Byte counter			*/
488	uint32_t	timestamp;	/* tv_sec of last match		*/
489
490	ipfw_insn	cmd[1];		/* storage for commands		*/
491};
492
493#define ACTION_PTR(rule)				\
494	(ipfw_insn *)( (u_int32_t *)((rule)->cmd) + ((rule)->act_ofs) )
495
496#define RULESIZE(rule)  (sizeof(struct ip_fw) + \
497	((struct ip_fw *)(rule))->cmd_len * 4 - 4)
498
499#if 1 // should be moved to in.h
500/*
501 * This structure is used as a flow mask and a flow id for various
502 * parts of the code.
503 * addr_type is used in userland and kernel to mark the address type.
504 * fib is used in the kernel to record the fib in use.
505 * _flags is used in the kernel to store tcp flags for dynamic rules.
506 */
507struct ipfw_flow_id {
508	uint32_t	dst_ip;
509	uint32_t	src_ip;
510	uint16_t	dst_port;
511	uint16_t	src_port;
512	uint8_t		fib;
513	uint8_t		proto;
514	uint8_t		_flags;	/* protocol-specific flags */
515	uint8_t		addr_type; /* 4=ip4, 6=ip6, 1=ether ? */
516	struct in6_addr dst_ip6;
517	struct in6_addr src_ip6;
518	uint32_t	flow_id6;
519	uint32_t	extra; /* queue/pipe or frag_id */
520};
521#endif
522
523#define IS_IP6_FLOW_ID(id)	((id)->addr_type == 6)
524
525/*
526 * Dynamic ipfw rule.
527 */
528typedef struct _ipfw_dyn_rule ipfw_dyn_rule;
529
530struct _ipfw_dyn_rule {
531	ipfw_dyn_rule	*next;		/* linked list of rules.	*/
532	struct ip_fw *rule;		/* pointer to rule		*/
533	/* 'rule' is used to pass up the rule number (from the parent)	*/
534
535	ipfw_dyn_rule *parent;		/* pointer to parent rule	*/
536	u_int64_t	pcnt;		/* packet match counter		*/
537	u_int64_t	bcnt;		/* byte match counter		*/
538	struct ipfw_flow_id id;		/* (masked) flow id		*/
539	u_int32_t	expire;		/* expire time			*/
540	u_int32_t	bucket;		/* which bucket in hash table	*/
541	u_int32_t	state;		/* state of this rule (typically a
542					 * combination of TCP flags)
543					 */
544	u_int32_t	ack_fwd;	/* most recent ACKs in forward	*/
545	u_int32_t	ack_rev;	/* and reverse directions (used	*/
546					/* to generate keepalives)	*/
547	u_int16_t	dyn_type;	/* rule type			*/
548	u_int16_t	count;		/* refcount			*/
549};
550
551/*
552 * Definitions for IP option names.
553 */
554#define	IP_FW_IPOPT_LSRR	0x01
555#define	IP_FW_IPOPT_SSRR	0x02
556#define	IP_FW_IPOPT_RR		0x04
557#define	IP_FW_IPOPT_TS		0x08
558
559/*
560 * Definitions for TCP option names.
561 */
562#define	IP_FW_TCPOPT_MSS	0x01
563#define	IP_FW_TCPOPT_WINDOW	0x02
564#define	IP_FW_TCPOPT_SACK	0x04
565#define	IP_FW_TCPOPT_TS		0x08
566#define	IP_FW_TCPOPT_CC		0x10
567
568#define	ICMP_REJECT_RST		0x100	/* fake ICMP code (send a TCP RST) */
569#define	ICMP6_UNREACH_RST	0x100	/* fake ICMPv6 code (send a TCP RST) */
570
571/*
572 * These are used for lookup tables.
573 */
574typedef struct	_ipfw_table_entry {
575	in_addr_t	addr;		/* network address		*/
576	u_int32_t	value;		/* value			*/
577	u_int16_t	tbl;		/* table number			*/
578	u_int8_t	masklen;	/* mask length			*/
579} ipfw_table_entry;
580
581typedef struct	_ipfw_table {
582	u_int32_t	size;		/* size of entries in bytes	*/
583	u_int32_t	cnt;		/* # of entries			*/
584	u_int16_t	tbl;		/* table number			*/
585	ipfw_table_entry ent[0];	/* entries			*/
586} ipfw_table;
587
588#endif /* _IPFW2_H */
589