1/* Minor modifications to fit on compatibility framework:
2   Rusty.Russell@rustcorp.com.au
3*/
4
5/*
6 * This code is heavily based on the code on the old ip_fw.c code; see below for
7 * copyrights and attributions of the old code.  This code is basically GPL.
8 *
9 * 15-Aug-1997: Major changes to allow graphs for firewall rules.
10 *              Paul Russell <Paul.Russell@rustcorp.com.au> and
11 *		Michael Neuling <Michael.Neuling@rustcorp.com.au>
12 * 24-Aug-1997: Generalised protocol handling (not just TCP/UDP/ICMP).
13 *              Added explicit RETURN from chains.
14 *              Removed TOS mangling (done in ipchains 1.0.1).
15 *              Fixed read & reset bug by reworking proc handling.
16 *              Paul Russell <Paul.Russell@rustcorp.com.au>
17 * 28-Sep-1997: Added packet marking for net sched code.
18 *              Removed fw_via comparisons: all done on device name now,
19 *              similar to changes in ip_fw.c in DaveM's CVS970924 tree.
20 *              Paul Russell <Paul.Russell@rustcorp.com.au>
21 * 2-Nov-1997:  Moved types across to __u16, etc.
22 *              Added inverse flags.
23 *              Fixed fragment bug (in args to port_match).
24 *              Changed mark to only one flag (MARKABS).
25 * 21-Nov-1997: Added ability to test ICMP code.
26 * 19-Jan-1998: Added wildcard interfaces.
27 * 6-Feb-1998:  Merged 2.0 and 2.1 versions.
28 *              Initialised ip_masq for 2.0.x version.
29 *              Added explicit NETLINK option for 2.1.x version.
30 *              Added packet and byte counters for policy matches.
31 * 26-Feb-1998: Fixed race conditions, added SMP support.
32 * 18-Mar-1998: Fix SMP, fix race condition fix.
33 * 1-May-1998:  Remove caching of device pointer.
34 * 12-May-1998: Allow tiny fragment case for TCP/UDP.
35 * 15-May-1998: Treat short packets as fragments, don't just block.
36 * 3-Jan-1999:  Fixed serious procfs security hole -- users should never
37 *              be allowed to view the chains!
38 *              Marc Santoro <ultima@snicker.emoti.com>
39 * 29-Jan-1999: Locally generated bogus IPs dealt with, rather than crash
40 *              during dump_packet. --RR.
41 * 19-May-1999: Star Wars: The Phantom Menace opened.  Rule num
42 *		printed in log (modified from Michael Hasenstein's patch).
43 *		Added SYN in log message. --RR
44 * 23-Jul-1999: Fixed small fragment security exposure opened on 15-May-1998.
45 *              John McDonald <jm@dataprotect.com>
46 *              Thomas Lopatic <tl@dataprotect.com>
47 */
48
49/*
50 *
51 * The origina Linux port was done Alan Cox, with changes/fixes from
52 * Pauline Middlelink, Jos Vos, Thomas Quinot, Wouter Gadeyne, Juan
53 * Jose Ciarlante, Bernd Eckenfels, Keith Owens and others.
54 *
55 * Copyright from the original FreeBSD version follows:
56 *
57 * Copyright (c) 1993 Daniel Boulet
58 * Copyright (c) 1994 Ugen J.S.Antsilevich
59 *
60 * Redistribution and use in source forms, with and without modification,
61 * are permitted provided that this entire comment appears intact.
62 *
63 * Redistribution in binary form may occur without any restrictions.
64 * Obviously, it would be nice if you gave credit where credit is due
65 * but requiring it would be too onerous.
66 *
67 * This software is provided ``AS IS'' without any warranties of any kind.  */
68
69#include <linux/config.h>
70
71#include <asm/uaccess.h>
72#include <asm/system.h>
73#include <linux/types.h>
74#include <linux/sched.h>
75#include <linux/string.h>
76#include <linux/errno.h>
77#include <linux/module.h>
78
79#include <linux/socket.h>
80#include <linux/sockios.h>
81#include <linux/in.h>
82#include <linux/inet.h>
83#include <linux/netdevice.h>
84#include <linux/icmp.h>
85#include <linux/udp.h>
86#include <net/ip.h>
87#include <net/protocol.h>
88#include <net/route.h>
89#include <net/tcp.h>
90#include <net/udp.h>
91#include <net/sock.h>
92#include <net/icmp.h>
93#include <linux/netlink.h>
94#include <linux/netfilter.h>
95#include <linux/netfilter_ipv4/compat_firewall.h>
96#include <linux/netfilter_ipv4/ipchains_core.h>
97
98#include <net/checksum.h>
99#include <linux/proc_fs.h>
100#include <linux/stat.h>
101
102/* Understanding locking in this code: (thanks to Alan Cox for using
103 * little words to explain this to me). -- PR
104 *
105 * In UP, there can be two packets traversing the chains:
106 * 1) A packet from the current userspace context
107 * 2) A packet off the bh handlers (timer or net).
108 *
109 * For SMP (kernel v2.1+), multiply this by # CPUs.
110 *
111 * [Note that this in not correct for 2.2 - because the socket code always
112 *  uses lock_kernel() to serialize, and bottom halves (timers and net_bhs)
113 *  only run on one CPU at a time.  This will probably change for 2.3.
114 *  It is still good to use spinlocks because that avoids the global cli()
115 *  for updating the tables, which is rather costly in SMP kernels -AK]
116 *
117 * This means counters and backchains can get corrupted if no precautions
118 * are taken.
119 *
120 * To actually alter a chain on UP, we need only do a cli(), as this will
121 * stop a bh handler firing, as we are in the current userspace context
122 * (coming from a setsockopt()).
123 *
124 * On SMP, we need a write_lock_irqsave(), which is a simple cli() in
125 * UP.
126 *
127 * For backchains and counters, we use an array, indexed by
128 * [cpu_number_map[smp_processor_id()]*2 + !in_interrupt()]; the array is of
129 * size [smp_num_cpus*2].  For v2.0, smp_num_cpus is effectively 1.  So,
130 * confident of uniqueness, we modify counters even though we only
131 * have a read lock (to read the counters, you need a write lock,
132 * though).  */
133
134/* Why I didn't use straight locking... -- PR
135 *
136 * The backchains can be separated out of the ip_chains structure, and
137 * allocated as needed inside ip_fw_check().
138 *
139 * The counters, however, can't.  Trying to lock these means blocking
140 * interrupts every time we want to access them.  This would suck HARD
141 * performance-wise.  Not locking them leads to possible corruption,
142 * made worse on 32-bit machines (counters are 64-bit).  */
143
144/*#define DEBUG_IP_FIREWALL*/
145/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
146/*#define DEBUG_IP_FIREWALL_USER*/
147/*#define DEBUG_IP_FIREWALL_LOCKING*/
148
149#if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE)
150static struct sock *ipfwsk;
151#endif
152
153#ifdef CONFIG_SMP
154#define SLOT_NUMBER() (cpu_number_map(smp_processor_id())*2 + !in_interrupt())
155#else /* !SMP */
156#define SLOT_NUMBER() (!in_interrupt())
157#endif /* CONFIG_SMP */
158#define NUM_SLOTS (smp_num_cpus*2)
159
160#define SIZEOF_STRUCT_IP_CHAIN (sizeof(struct ip_chain) \
161				+ NUM_SLOTS*sizeof(struct ip_reent))
162#define SIZEOF_STRUCT_IP_FW_KERNEL (sizeof(struct ip_fwkernel) \
163				    + NUM_SLOTS*sizeof(struct ip_counters))
164
165#ifdef DEBUG_IP_FIREWALL_LOCKING
166static unsigned int fwc_rlocks, fwc_wlocks;
167#define FWC_DEBUG_LOCK(d)			\
168do {						\
169	FWC_DONT_HAVE_LOCK(d);			\
170	d |= (1 << SLOT_NUMBER());		\
171} while (0)
172
173#define FWC_DEBUG_UNLOCK(d)			\
174do {						\
175	FWC_HAVE_LOCK(d);			\
176	d &= ~(1 << SLOT_NUMBER());		\
177} while (0)
178
179#define FWC_DONT_HAVE_LOCK(d)					\
180do {								\
181	if ((d) & (1 << SLOT_NUMBER()))				\
182		printk("%s:%i: Got lock on %i already!\n", 	\
183		       __FILE__, __LINE__, SLOT_NUMBER());	\
184} while(0)
185
186#define FWC_HAVE_LOCK(d)				\
187do {							\
188	if (!((d) & (1 << SLOT_NUMBER())))		\
189	printk("%s:%i:No lock on %i!\n", 		\
190	       __FILE__, __LINE__, SLOT_NUMBER());	\
191} while (0)
192
193#else
194#define FWC_DEBUG_LOCK(d) do { } while(0)
195#define FWC_DEBUG_UNLOCK(d) do { } while(0)
196#define FWC_DONT_HAVE_LOCK(d) do { } while(0)
197#define FWC_HAVE_LOCK(d) do { } while(0)
198#endif /*DEBUG_IP_FIRWALL_LOCKING*/
199
200#define FWC_READ_LOCK(l) do { FWC_DEBUG_LOCK(fwc_rlocks); read_lock(l); } while (0)
201#define FWC_WRITE_LOCK(l) do { FWC_DEBUG_LOCK(fwc_wlocks); write_lock(l); } while (0)
202#define FWC_READ_LOCK_IRQ(l,f) do { FWC_DEBUG_LOCK(fwc_rlocks); read_lock_irqsave(l,f); } while (0)
203#define FWC_WRITE_LOCK_IRQ(l,f) do { FWC_DEBUG_LOCK(fwc_wlocks); write_lock_irqsave(l,f); } while (0)
204#define FWC_READ_UNLOCK(l) do { FWC_DEBUG_UNLOCK(fwc_rlocks); read_unlock(l); } while (0)
205#define FWC_WRITE_UNLOCK(l) do { FWC_DEBUG_UNLOCK(fwc_wlocks); write_unlock(l); } while (0)
206#define FWC_READ_UNLOCK_IRQ(l,f) do { FWC_DEBUG_UNLOCK(fwc_rlocks); read_unlock_irqrestore(l,f); } while (0)
207#define FWC_WRITE_UNLOCK_IRQ(l,f) do { FWC_DEBUG_UNLOCK(fwc_wlocks); write_unlock_irqrestore(l,f); } while (0)
208
209struct ip_chain;
210
211struct ip_counters
212{
213	__u64 pcnt, bcnt;			/* Packet and byte counters */
214};
215
216struct ip_fwkernel
217{
218	struct ip_fw ipfw;
219	struct ip_fwkernel *next;	/* where to go next if current
220					 * rule doesn't match */
221	struct ip_chain *branch;	/* which branch to jump to if
222					 * current rule matches */
223	int simplebranch;		/* Use this if branch == NULL */
224	struct ip_counters counters[0]; /* Actually several of these */
225};
226
227struct ip_reent
228{
229	struct ip_chain *prevchain;	/* Pointer to referencing chain */
230	struct ip_fwkernel *prevrule;	/* Pointer to referencing rule */
231	struct ip_counters counters;
232};
233
234struct ip_chain
235{
236	ip_chainlabel label;	    /* Defines the label for each block */
237 	struct ip_chain *next;	    /* Pointer to next block */
238	struct ip_fwkernel *chain;  /* Pointer to first rule in block */
239	__u32 refcount; 	    /* Number of refernces to block */
240	int policy;		    /* Default rule for chain.  Only *
241				     * used in built in chains */
242	struct ip_reent reent[0];   /* Actually several of these */
243};
244
245/*
246 *	Implement IP packet firewall
247 */
248
249#ifdef DEBUG_IP_FIREWALL
250#define dprintf(format, args...)  printk(format , ## args)
251#else
252#define dprintf(format, args...)
253#endif
254
255#ifdef DEBUG_IP_FIREWALL_USER
256#define duprintf(format, args...) printk(format , ## args)
257#else
258#define duprintf(format, args...)
259#endif
260
261/* Lock around ip_fw_chains linked list structure */
262rwlock_t ip_fw_lock = RW_LOCK_UNLOCKED;
263
264/* Head of linked list of fw rules */
265static struct ip_chain *ip_fw_chains;
266
267#define IP_FW_INPUT_CHAIN ip_fw_chains
268#define IP_FW_FORWARD_CHAIN (ip_fw_chains->next)
269#define IP_FW_OUTPUT_CHAIN (ip_fw_chains->next->next)
270
271/* Returns 1 if the port is matched by the range, 0 otherwise */
272extern inline int port_match(__u16 min, __u16 max, __u16 port,
273			     int frag, int invert)
274{
275	if (frag) /* Fragments fail ANY port test. */
276		return (min == 0 && max == 0xFFFF);
277	else return (port >= min && port <= max) ^ invert;
278}
279
280/* Returns whether matches rule or not. */
281static int ip_rule_match(struct ip_fwkernel *f,
282			 const char *ifname,
283			 struct iphdr *ip,
284			 char tcpsyn,
285			 __u16 src_port, __u16 dst_port,
286			 char isfrag)
287{
288#define FWINV(bool,invflg) ((bool) ^ !!(f->ipfw.fw_invflg & invflg))
289	/*
290	 *	This is a bit simpler as we don't have to walk
291	 *	an interface chain as you do in BSD - same logic
292	 *	however.
293	 */
294
295	if (FWINV((ip->saddr&f->ipfw.fw_smsk.s_addr) != f->ipfw.fw_src.s_addr,
296		  IP_FW_INV_SRCIP)
297	    || FWINV((ip->daddr&f->ipfw.fw_dmsk.s_addr)!=f->ipfw.fw_dst.s_addr,
298		     IP_FW_INV_DSTIP)) {
299		dprintf("Source or dest mismatch.\n");
300
301		dprintf("SRC: %u. Mask: %u. Target: %u.%s\n", ip->saddr,
302			f->ipfw.fw_smsk.s_addr, f->ipfw.fw_src.s_addr,
303			f->ipfw.fw_invflg & IP_FW_INV_SRCIP ? " (INV)" : "");
304		dprintf("DST: %u. Mask: %u. Target: %u.%s\n", ip->daddr,
305			f->ipfw.fw_dmsk.s_addr, f->ipfw.fw_dst.s_addr,
306			f->ipfw.fw_invflg & IP_FW_INV_DSTIP ? " (INV)" : "");
307		return 0;
308	}
309
310	/*
311	 *	Look for a VIA device match
312	 */
313	if (f->ipfw.fw_flg & IP_FW_F_WILDIF) {
314	    if (FWINV(strncmp(ifname, f->ipfw.fw_vianame,
315			      strlen(f->ipfw.fw_vianame)) != 0,
316		      IP_FW_INV_VIA)) {
317		dprintf("Wildcard interface mismatch.%s\n",
318			f->ipfw.fw_invflg & IP_FW_INV_VIA ? " (INV)" : "");
319		return 0;	/* Mismatch */
320	    }
321	}
322	else if (FWINV(strcmp(ifname, f->ipfw.fw_vianame) != 0,
323		       IP_FW_INV_VIA)) {
324	    dprintf("Interface name does not match.%s\n",
325		    f->ipfw.fw_invflg & IP_FW_INV_VIA
326		    ? " (INV)" : "");
327	    return 0;	/* Mismatch */
328	}
329
330	/*
331	 *	Ok the chain addresses match.
332	 */
333
334	/* If we have a fragment rule but the packet is not a fragment
335	 * the we return zero */
336	if (FWINV((f->ipfw.fw_flg&IP_FW_F_FRAG) && !isfrag, IP_FW_INV_FRAG)) {
337		dprintf("Fragment rule but not fragment.%s\n",
338			f->ipfw.fw_invflg & IP_FW_INV_FRAG ? " (INV)" : "");
339		return 0;
340	}
341
342	/* Fragment NEVER passes a SYN test, even an inverted one. */
343	if (FWINV((f->ipfw.fw_flg&IP_FW_F_TCPSYN) && !tcpsyn, IP_FW_INV_SYN)
344	    || (isfrag && (f->ipfw.fw_flg&IP_FW_F_TCPSYN))) {
345		dprintf("Rule requires SYN and packet has no SYN.%s\n",
346			f->ipfw.fw_invflg & IP_FW_INV_SYN ? " (INV)" : "");
347		return 0;
348	}
349
350	if (f->ipfw.fw_proto) {
351		/*
352		 *	Specific firewall - packet's protocol
353		 *	must match firewall's.
354		 */
355
356		if (FWINV(ip->protocol!=f->ipfw.fw_proto, IP_FW_INV_PROTO)) {
357			dprintf("Packet protocol %hi does not match %hi.%s\n",
358				ip->protocol, f->ipfw.fw_proto,
359				f->ipfw.fw_invflg&IP_FW_INV_PROTO ? " (INV)":"");
360			return 0;
361		}
362
363		/* For non TCP/UDP/ICMP, port range is max anyway. */
364		if (!port_match(f->ipfw.fw_spts[0],
365				f->ipfw.fw_spts[1],
366				src_port, isfrag,
367				!!(f->ipfw.fw_invflg&IP_FW_INV_SRCPT))
368		    || !port_match(f->ipfw.fw_dpts[0],
369				   f->ipfw.fw_dpts[1],
370				   dst_port, isfrag,
371				   !!(f->ipfw.fw_invflg
372				      &IP_FW_INV_DSTPT))) {
373		    dprintf("Port match failed.\n");
374		    return 0;
375		}
376	}
377
378	dprintf("Match succeeded.\n");
379	return 1;
380}
381
382static const char *branchname(struct ip_chain *branch,int simplebranch)
383{
384	if (branch)
385		return branch->label;
386	switch (simplebranch)
387	{
388	case FW_BLOCK: return IP_FW_LABEL_BLOCK;
389	case FW_ACCEPT: return IP_FW_LABEL_ACCEPT;
390	case FW_REJECT: return IP_FW_LABEL_REJECT;
391	case FW_REDIRECT: return IP_FW_LABEL_REDIRECT;
392	case FW_MASQUERADE: return IP_FW_LABEL_MASQUERADE;
393	case FW_SKIP: return "-";
394	case FW_SKIP+1: return IP_FW_LABEL_RETURN;
395	default:
396		return "UNKNOWN";
397	}
398}
399
400/*
401 * VERY ugly piece of code which actually
402 * makes kernel printf for matching packets...
403 */
404static void dump_packet(const struct iphdr *ip,
405			const char *ifname,
406			struct ip_fwkernel *f,
407			const ip_chainlabel chainlabel,
408			__u16 src_port,
409			__u16 dst_port,
410			unsigned int count,
411			int syn)
412{
413	__u32 *opt = (__u32 *) (ip + 1);
414	int opti;
415
416	if (f) {
417		printk(KERN_INFO "Packet log: %s ",chainlabel);
418		printk("%s ",branchname(f->branch,f->simplebranch));
419		if (f->simplebranch==FW_REDIRECT)
420			printk("%d ",f->ipfw.fw_redirpt);
421	}
422
423	printk("%s PROTO=%d %u.%u.%u.%u:%hu %u.%u.%u.%u:%hu"
424	       " L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu",
425	       ifname, ip->protocol, NIPQUAD(ip->saddr),
426	       src_port, NIPQUAD(ip->daddr),
427	       dst_port,
428	       ntohs(ip->tot_len), ip->tos, ntohs(ip->id),
429	       ntohs(ip->frag_off), ip->ttl);
430
431	for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++)
432		printk(" O=0x%8.8X", *opt++);
433	printk(" %s(#%d)\n", syn ? "SYN " : /* "PENANCE" */ "", count);
434}
435
436/* function for checking chain labels for user space. */
437static int check_label(ip_chainlabel label)
438{
439	unsigned int i;
440	/* strlen must be < IP_FW_MAX_LABEL_LENGTH. */
441	for (i = 0; i < IP_FW_MAX_LABEL_LENGTH + 1; i++)
442		if (label[i] == '\0') return 1;
443
444	return 0;
445}
446
447/*	This function returns a pointer to the first chain with a label
448 *	that matches the one given. */
449static struct ip_chain *find_label(ip_chainlabel label)
450{
451	struct ip_chain *tmp;
452	FWC_HAVE_LOCK(fwc_rlocks | fwc_wlocks);
453	for (tmp = ip_fw_chains; tmp; tmp = tmp->next)
454		if (strcmp(tmp->label,label) == 0)
455			break;
456	return tmp;
457}
458
459/* This function returns a boolean which when true sets answer to one
460   of the FW_*. */
461static int find_special(ip_chainlabel label, int *answer)
462{
463	if (label[0] == '\0') {
464		*answer = FW_SKIP; /* => pass-through rule */
465		return 1;
466	} else if (strcmp(label,IP_FW_LABEL_ACCEPT) == 0) {
467		*answer = FW_ACCEPT;
468		return 1;
469	} else if (strcmp(label,IP_FW_LABEL_BLOCK) == 0) {
470		*answer = FW_BLOCK;
471		return 1;
472	} else if (strcmp(label,IP_FW_LABEL_REJECT) == 0) {
473		*answer = FW_REJECT;
474		return 1;
475	} else if (strcmp(label,IP_FW_LABEL_REDIRECT) == 0) {
476		*answer = FW_REDIRECT;
477		return 1;
478	} else if (strcmp(label,IP_FW_LABEL_MASQUERADE) == 0) {
479		*answer = FW_MASQUERADE;
480		return 1;
481	} else if (strcmp(label, IP_FW_LABEL_RETURN) == 0) {
482		*answer = FW_SKIP+1;
483		return 1;
484	} else {
485		return 0;
486	}
487}
488
489/* This function cleans up the prevchain and prevrule.  If the verbose
490 * flag is set then he names of the chains will be printed as it
491 * cleans up.  */
492static void cleanup(struct ip_chain *chain,
493		    const int verbose,
494		    unsigned int slot)
495{
496	struct ip_chain *tmpchain = chain->reent[slot].prevchain;
497	if (verbose)
498		printk(KERN_ERR "Chain backtrace: ");
499	while (tmpchain) {
500		if (verbose)
501			printk("%s<-",chain->label);
502		chain->reent[slot].prevchain = NULL;
503		chain = tmpchain;
504		tmpchain = chain->reent[slot].prevchain;
505	}
506	if (verbose)
507		printk("%s\n",chain->label);
508}
509
510static inline int
511ip_fw_domatch(struct ip_fwkernel *f,
512	      struct iphdr *ip,
513	      const char *rif,
514	      const ip_chainlabel label,
515	      struct sk_buff *skb,
516	      unsigned int slot,
517	      __u16 src_port, __u16 dst_port,
518	      unsigned int count,
519	      int tcpsyn)
520{
521	f->counters[slot].bcnt+=ntohs(ip->tot_len);
522	f->counters[slot].pcnt++;
523	if (f->ipfw.fw_flg & IP_FW_F_PRN) {
524		dump_packet(ip,rif,f,label,src_port,dst_port,count,tcpsyn);
525	}
526	ip->tos = (ip->tos & f->ipfw.fw_tosand) ^ f->ipfw.fw_tosxor;
527
528/* This functionality is useless in stock 2.0.x series, but we don't
529 * discard the mark thing altogether, to avoid breaking ipchains (and,
530 * more importantly, the ipfwadm wrapper) --PR */
531	if (f->ipfw.fw_flg & IP_FW_F_MARKABS) {
532		skb->nfmark = f->ipfw.fw_mark;
533	} else {
534		skb->nfmark += f->ipfw.fw_mark;
535	}
536	if (f->ipfw.fw_flg & IP_FW_F_NETLINK) {
537#if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE)
538		size_t len = min_t(unsigned int, f->ipfw.fw_outputsize, ntohs(ip->tot_len))
539			+ sizeof(__u32) + sizeof(skb->nfmark) + IFNAMSIZ;
540		struct sk_buff *outskb=alloc_skb(len, GFP_ATOMIC);
541
542		duprintf("Sending packet out NETLINK (length = %u).\n",
543			 (unsigned int)len);
544		if (outskb) {
545			/* Prepend length, mark & interface */
546			skb_put(outskb, len);
547			*((__u32 *)outskb->data) = (__u32)len;
548			*((__u32 *)(outskb->data+sizeof(__u32))) = skb->nfmark;
549			strcpy(outskb->data+sizeof(__u32)*2, rif);
550			memcpy(outskb->data+sizeof(__u32)*2+IFNAMSIZ, ip,
551			       len-(sizeof(__u32)*2+IFNAMSIZ));
552			netlink_broadcast(ipfwsk, outskb, 0, ~0, GFP_ATOMIC);
553		}
554		else {
555#endif
556			if (net_ratelimit())
557				printk(KERN_WARNING "ip_fw: packet drop due to "
558				       "netlink failure\n");
559			return 0;
560#if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE)
561		}
562#endif
563	}
564	return 1;
565}
566
567/*
568 *	Returns one of the generic firewall policies, like FW_ACCEPT.
569 *
570 *	The testing is either false for normal firewall mode or true for
571 *	user checking mode (counters are not updated, TOS & mark not done).
572 */
573static int
574ip_fw_check(struct iphdr *ip,
575	    const char *rif,
576	    __u16 *redirport,
577	    struct ip_chain *chain,
578	    struct sk_buff *skb,
579	    unsigned int slot,
580	    int testing)
581{
582	struct tcphdr		*tcp=(struct tcphdr *)((__u32 *)ip+ip->ihl);
583	struct udphdr		*udp=(struct udphdr *)((__u32 *)ip+ip->ihl);
584	struct icmphdr		*icmp=(struct icmphdr *)((__u32 *)ip+ip->ihl);
585	__u32			src, dst;
586	__u16			src_port = 0xFFFF, dst_port = 0xFFFF;
587	char			tcpsyn=0;
588	__u16			offset;
589	unsigned char		oldtos;
590	struct ip_fwkernel	*f;
591	int			ret = FW_SKIP+2;
592	unsigned int		count;
593
594	/* We handle fragments by dealing with the first fragment as
595	 * if it was a normal packet.  All other fragments are treated
596	 * normally, except that they will NEVER match rules that ask
597	 * things we don't know, ie. tcp syn flag or ports).  If the
598	 * rule is also a fragment-specific rule, non-fragments won't
599	 * match it. */
600
601	offset = ntohs(ip->frag_off) & IP_OFFSET;
602
603	/*
604	 *	Don't allow a fragment of TCP 8 bytes in. Nobody
605	 *	normal causes this. Its a cracker trying to break
606	 *	in by doing a flag overwrite to pass the direction
607	 *	checks.
608	 */
609	if (offset == 1 && ip->protocol == IPPROTO_TCP)	{
610		if (!testing && net_ratelimit()) {
611			printk("Suspect TCP fragment.\n");
612			dump_packet(ip,rif,NULL,NULL,0,0,0,0);
613		}
614		return FW_BLOCK;
615	}
616
617	/* If we can't investigate ports, treat as fragment.  It's
618	 * either a trucated whole packet, or a truncated first
619	 * fragment, or a TCP first fragment of length 8-15, in which
620	 * case the above rule stops reassembly.
621	 */
622	if (offset == 0) {
623		unsigned int size_req;
624		switch (ip->protocol) {
625		case IPPROTO_TCP:
626			/* Don't care about things past flags word */
627			size_req = 16;
628			break;
629
630		case IPPROTO_UDP:
631		case IPPROTO_ICMP:
632			size_req = 8;
633			break;
634
635		default:
636			size_req = 0;
637		}
638
639		/* If it is a truncated first fragment then it can be
640		 * used to rewrite port information, and thus should
641		 * be blocked.
642		 */
643		if (ntohs(ip->tot_len) < (ip->ihl<<2)+size_req) {
644			if (!testing && net_ratelimit()) {
645				printk("Suspect short first fragment.\n");
646				dump_packet(ip,rif,NULL,NULL,0,0,0,0);
647			}
648			return FW_BLOCK;
649		}
650	}
651
652	src = ip->saddr;
653	dst = ip->daddr;
654	oldtos = ip->tos;
655
656	/*
657	 *	If we got interface from which packet came
658	 *	we can use the address directly. Linux 2.1 now uses address
659	 *	chains per device too, but unlike BSD we first check if the
660	 *	incoming packet matches a device address and the routing
661	 *	table before calling the firewall.
662	 */
663
664	dprintf("Packet ");
665	switch(ip->protocol)
666	{
667		case IPPROTO_TCP:
668			dprintf("TCP ");
669			if (!offset) {
670				src_port=ntohs(tcp->source);
671				dst_port=ntohs(tcp->dest);
672
673				/* Connection initilisation can only
674				 * be made when the syn bit is set and
675				 * neither of the ack or reset is
676				 * set. */
677				if(tcp->syn && !(tcp->ack || tcp->rst))
678					tcpsyn=1;
679			}
680			break;
681		case IPPROTO_UDP:
682			dprintf("UDP ");
683			if (!offset) {
684				src_port=ntohs(udp->source);
685				dst_port=ntohs(udp->dest);
686			}
687			break;
688		case IPPROTO_ICMP:
689			if (!offset) {
690				src_port=(__u16)icmp->type;
691				dst_port=(__u16)icmp->code;
692			}
693			dprintf("ICMP ");
694			break;
695		default:
696			dprintf("p=%d ",ip->protocol);
697			break;
698	}
699#ifdef DEBUG_IP_FIREWALL
700	print_ip(ip->saddr);
701
702	if (offset)
703		dprintf(":fragment (%i) ", ((int)offset)<<2);
704	else if (ip->protocol==IPPROTO_TCP || ip->protocol==IPPROTO_UDP
705		 || ip->protocol==IPPROTO_ICMP)
706		dprintf(":%hu:%hu", src_port, dst_port);
707	dprintf("\n");
708#endif
709
710	if (!testing) FWC_READ_LOCK(&ip_fw_lock);
711	else FWC_HAVE_LOCK(fwc_rlocks);
712
713	f = chain->chain;
714	do {
715		count = 0;
716		for (; f; f = f->next) {
717			count++;
718			if (ip_rule_match(f,rif,ip,
719					  tcpsyn,src_port,dst_port,offset)) {
720				if (!testing
721				    && !ip_fw_domatch(f, ip, rif, chain->label,
722						      skb, slot,
723						      src_port, dst_port,
724						      count, tcpsyn)) {
725					ret = FW_BLOCK;
726					cleanup(chain, 0, slot);
727					goto out;
728				}
729				break;
730			}
731		}
732		if (f) {
733			if (f->branch) {
734				/* Do sanity check to see if we have
735                                 * already set prevchain and if so we
736                                 * must be in a loop */
737				if (f->branch->reent[slot].prevchain) {
738					if (!testing) {
739						printk(KERN_ERR
740						       "IP firewall: "
741						       "Loop detected "
742						       "at `%s'.\n",
743						       f->branch->label);
744						cleanup(chain, 1, slot);
745						ret = FW_BLOCK;
746					} else {
747						cleanup(chain, 0, slot);
748						ret = FW_SKIP+1;
749					}
750				}
751				else {
752					f->branch->reent[slot].prevchain
753						= chain;
754					f->branch->reent[slot].prevrule
755						= f->next;
756					chain = f->branch;
757					f = chain->chain;
758				}
759			}
760			else if (f->simplebranch == FW_SKIP)
761				f = f->next;
762			else if (f->simplebranch == FW_SKIP+1) {
763				/* Just like falling off the chain */
764				goto fall_off_chain;
765			} else {
766				cleanup(chain, 0, slot);
767				ret = f->simplebranch;
768			}
769		} /* f == NULL */
770		else {
771		fall_off_chain:
772			if (chain->reent[slot].prevchain) {
773				struct ip_chain *tmp = chain;
774				f = chain->reent[slot].prevrule;
775				chain = chain->reent[slot].prevchain;
776				tmp->reent[slot].prevchain = NULL;
777			}
778			else {
779				ret = chain->policy;
780				if (!testing) {
781					chain->reent[slot].counters.pcnt++;
782					chain->reent[slot].counters.bcnt
783						+= ntohs(ip->tot_len);
784				}
785			}
786		}
787	} while (ret == FW_SKIP+2);
788
789 out:
790	if (!testing) FWC_READ_UNLOCK(&ip_fw_lock);
791
792	/* Recalculate checksum if not going to reject, and TOS changed. */
793	if (ip->tos != oldtos
794	    && ret != FW_REJECT && ret != FW_BLOCK
795	    && !testing)
796		ip_send_check(ip);
797
798	if (ret == FW_REDIRECT && redirport) {
799		if ((*redirport = htons(f->ipfw.fw_redirpt)) == 0) {
800			/* Wildcard redirection.
801			 * Note that redirport will become
802			 * 0xFFFF for non-TCP/UDP packets.
803			 */
804			*redirport = htons(dst_port);
805		}
806	}
807
808#ifdef DEBUG_ALLOW_ALL
809	return (testing ? ret : FW_ACCEPT);
810#else
811	return ret;
812#endif
813}
814
815/* Must have write lock & interrupts off for any of these */
816
817/* This function sets all the byte counters in a chain to zero.  The
818 * input is a pointer to the chain required for zeroing */
819static int zero_fw_chain(struct ip_chain *chainptr)
820{
821	struct ip_fwkernel *i;
822
823	FWC_HAVE_LOCK(fwc_wlocks);
824	for (i = chainptr->chain; i; i = i->next)
825		memset(i->counters, 0, sizeof(struct ip_counters)*NUM_SLOTS);
826	return 0;
827}
828
829static int clear_fw_chain(struct ip_chain *chainptr)
830{
831	struct ip_fwkernel *i= chainptr->chain;
832
833	FWC_HAVE_LOCK(fwc_wlocks);
834	chainptr->chain=NULL;
835
836	while (i) {
837		struct ip_fwkernel *tmp = i->next;
838		if (i->branch)
839			i->branch->refcount--;
840		kfree(i);
841		i = tmp;
842		MOD_DEC_USE_COUNT;
843	}
844	return 0;
845}
846
847static int replace_in_chain(struct ip_chain *chainptr,
848			    struct ip_fwkernel *frwl,
849			    __u32 position)
850{
851	struct ip_fwkernel *f = chainptr->chain;
852
853	FWC_HAVE_LOCK(fwc_wlocks);
854
855	while (--position && f != NULL) f = f->next;
856	if (f == NULL)
857		return EINVAL;
858
859	if (f->branch) f->branch->refcount--;
860	if (frwl->branch) frwl->branch->refcount++;
861
862	frwl->next = f->next;
863	memcpy(f,frwl,sizeof(struct ip_fwkernel));
864	kfree(frwl);
865	return 0;
866}
867
868static int append_to_chain(struct ip_chain *chainptr, struct ip_fwkernel *rule)
869{
870	struct ip_fwkernel *i;
871
872	FWC_HAVE_LOCK(fwc_wlocks);
873	/* Special case if no rules already present */
874	if (chainptr->chain == NULL) {
875
876		/* If pointer writes are atomic then turning off
877		 * interrupts is not necessary. */
878		chainptr->chain = rule;
879		if (rule->branch) rule->branch->refcount++;
880		goto append_successful;
881	}
882
883	/* Find the rule before the end of the chain */
884	for (i = chainptr->chain; i->next; i = i->next);
885	i->next = rule;
886	if (rule->branch) rule->branch->refcount++;
887
888append_successful:
889	MOD_INC_USE_COUNT;
890	return 0;
891}
892
893/* This function inserts a rule at the position of position in the
894 * chain refenced by chainptr.  If position is 1 then this rule will
895 * become the new rule one. */
896static int insert_in_chain(struct ip_chain *chainptr,
897			   struct ip_fwkernel *frwl,
898			   __u32 position)
899{
900	struct ip_fwkernel *f = chainptr->chain;
901
902	FWC_HAVE_LOCK(fwc_wlocks);
903	/* special case if the position is number 1 */
904	if (position == 1) {
905		frwl->next = chainptr->chain;
906		if (frwl->branch) frwl->branch->refcount++;
907		chainptr->chain = frwl;
908		goto insert_successful;
909	}
910	position--;
911	while (--position && f != NULL) f = f->next;
912	if (f == NULL)
913		return EINVAL;
914	if (frwl->branch) frwl->branch->refcount++;
915	frwl->next = f->next;
916
917	f->next = frwl;
918
919insert_successful:
920	MOD_INC_USE_COUNT;
921	return 0;
922}
923
924/* This function deletes the a rule from a given rulenum and chain.
925 * With rulenum = 1 is the first rule is deleted. */
926
927static int del_num_from_chain(struct ip_chain *chainptr, __u32 rulenum)
928{
929	struct ip_fwkernel *i=chainptr->chain,*tmp;
930
931	FWC_HAVE_LOCK(fwc_wlocks);
932
933	if (!chainptr->chain)
934		return ENOENT;
935
936	/* Need a special case for the first rule */
937	if (rulenum == 1) {
938		/* store temp to allow for freeing up of memory */
939		tmp = chainptr->chain;
940	        if (chainptr->chain->branch) chainptr->chain->branch->refcount--;
941		chainptr->chain = chainptr->chain->next;
942		kfree(tmp); /* free memory that is now unused */
943	} else {
944		rulenum--;
945		while (--rulenum && i->next ) i = i->next;
946		if (!i->next)
947			return ENOENT;
948		tmp = i->next;
949		if (i->next->branch)
950			i->next->branch->refcount--;
951		i->next = i->next->next;
952		kfree(tmp);
953	}
954
955	MOD_DEC_USE_COUNT;
956	return 0;
957}
958
959
960/* This function deletes the a rule from a given rule and chain.
961 * The rule that is deleted is the first occursance of that rule. */
962static int del_rule_from_chain(struct ip_chain *chainptr,
963			       struct ip_fwkernel *frwl)
964{
965	struct ip_fwkernel *ltmp,*ftmp = chainptr->chain ;
966	int was_found;
967
968	FWC_HAVE_LOCK(fwc_wlocks);
969
970	/* Sure, we should compare marks, but since the `ipfwadm'
971	 * script uses it for an unholy hack... well, life is easier
972	 * this way.  We also mask it out of the flags word. --PR */
973	for (ltmp=NULL, was_found=0;
974	     !was_found && ftmp != NULL;
975	     ltmp = ftmp,ftmp = ftmp->next) {
976		if (ftmp->ipfw.fw_src.s_addr!=frwl->ipfw.fw_src.s_addr
977		    || ftmp->ipfw.fw_dst.s_addr!=frwl->ipfw.fw_dst.s_addr
978		    || ftmp->ipfw.fw_smsk.s_addr!=frwl->ipfw.fw_smsk.s_addr
979		    || ftmp->ipfw.fw_dmsk.s_addr!=frwl->ipfw.fw_dmsk.s_addr
980		    || ((ftmp->ipfw.fw_flg & ~IP_FW_F_MARKABS)
981			!= (frwl->ipfw.fw_flg & ~IP_FW_F_MARKABS))
982		    || ftmp->ipfw.fw_invflg!=frwl->ipfw.fw_invflg
983		    || ftmp->ipfw.fw_proto!=frwl->ipfw.fw_proto
984		    || ftmp->ipfw.fw_redirpt!=frwl->ipfw.fw_redirpt
985		    || ftmp->ipfw.fw_spts[0]!=frwl->ipfw.fw_spts[0]
986		    || ftmp->ipfw.fw_spts[1]!=frwl->ipfw.fw_spts[1]
987		    || ftmp->ipfw.fw_dpts[0]!=frwl->ipfw.fw_dpts[0]
988		    || ftmp->ipfw.fw_dpts[1]!=frwl->ipfw.fw_dpts[1]
989		    || ftmp->ipfw.fw_outputsize!=frwl->ipfw.fw_outputsize) {
990			duprintf("del_rule_from_chain: mismatch:"
991				 "src:%u/%u dst:%u/%u smsk:%u/%u dmsk:%u/%u "
992				 "flg:%hX/%hX invflg:%hX/%hX proto:%u/%u "
993				 "mark:%u/%u "
994				 "ports:%hu-%hu/%hu-%hu %hu-%hu/%hu-%hu "
995				 "outputsize:%hu-%hu\n",
996				 ftmp->ipfw.fw_src.s_addr,
997				 frwl->ipfw.fw_src.s_addr,
998				 ftmp->ipfw.fw_dst.s_addr,
999				 frwl->ipfw.fw_dst.s_addr,
1000				 ftmp->ipfw.fw_smsk.s_addr,
1001				 frwl->ipfw.fw_smsk.s_addr,
1002				 ftmp->ipfw.fw_dmsk.s_addr,
1003				 frwl->ipfw.fw_dmsk.s_addr,
1004				 ftmp->ipfw.fw_flg,
1005				 frwl->ipfw.fw_flg,
1006				 ftmp->ipfw.fw_invflg,
1007				 frwl->ipfw.fw_invflg,
1008				 ftmp->ipfw.fw_proto,
1009				 frwl->ipfw.fw_proto,
1010				 ftmp->ipfw.fw_mark,
1011				 frwl->ipfw.fw_mark,
1012				 ftmp->ipfw.fw_spts[0],
1013				 frwl->ipfw.fw_spts[0],
1014				 ftmp->ipfw.fw_spts[1],
1015				 frwl->ipfw.fw_spts[1],
1016				 ftmp->ipfw.fw_dpts[0],
1017				 frwl->ipfw.fw_dpts[0],
1018				 ftmp->ipfw.fw_dpts[1],
1019				 frwl->ipfw.fw_dpts[1],
1020				 ftmp->ipfw.fw_outputsize,
1021				 frwl->ipfw.fw_outputsize);
1022			continue;
1023		}
1024
1025		if (strncmp(ftmp->ipfw.fw_vianame,
1026			    frwl->ipfw.fw_vianame,
1027			    IFNAMSIZ)) {
1028			duprintf("del_rule_from_chain: if mismatch: %s/%s\n",
1029				 ftmp->ipfw.fw_vianame,
1030				 frwl->ipfw.fw_vianame);
1031		        continue;
1032		}
1033		if (ftmp->branch != frwl->branch) {
1034			duprintf("del_rule_from_chain: branch mismatch: "
1035				 "%s/%s\n",
1036				 ftmp->branch?ftmp->branch->label:"(null)",
1037				 frwl->branch?frwl->branch->label:"(null)");
1038			continue;
1039		}
1040		if (ftmp->branch == NULL
1041		    && ftmp->simplebranch != frwl->simplebranch) {
1042			duprintf("del_rule_from_chain: simplebranch mismatch: "
1043				 "%i/%i\n",
1044				 ftmp->simplebranch, frwl->simplebranch);
1045			continue;
1046		}
1047		was_found = 1;
1048		if (ftmp->branch)
1049			ftmp->branch->refcount--;
1050		if (ltmp)
1051			ltmp->next = ftmp->next;
1052		else
1053			chainptr->chain = ftmp->next;
1054		kfree(ftmp);
1055		MOD_DEC_USE_COUNT;
1056		break;
1057	}
1058
1059	if (was_found)
1060		return 0;
1061	else {
1062		duprintf("del_rule_from_chain: no matching rule found\n");
1063		return EINVAL;
1064	}
1065}
1066
1067/* This function takes the label of a chain and deletes the first
1068 * chain with that name.  No special cases required for the built in
1069 * chains as they have their refcount initilised to 1 so that they are
1070 * never deleted.  */
1071static int del_chain(ip_chainlabel label)
1072{
1073	struct ip_chain *tmp,*tmp2;
1074
1075	FWC_HAVE_LOCK(fwc_wlocks);
1076	/* Corner case: return EBUSY not ENOENT for first elem ("input") */
1077	if (strcmp(label, ip_fw_chains->label) == 0)
1078		return EBUSY;
1079
1080	for (tmp = ip_fw_chains; tmp->next; tmp = tmp->next)
1081		if(strcmp(tmp->next->label,label) == 0)
1082			break;
1083
1084	tmp2 = tmp->next;
1085	if (!tmp2)
1086		return ENOENT;
1087
1088	if (tmp2->refcount)
1089		return EBUSY;
1090
1091	if (tmp2->chain)
1092		return ENOTEMPTY;
1093
1094	tmp->next = tmp2->next;
1095	kfree(tmp2);
1096
1097	MOD_DEC_USE_COUNT;
1098	return 0;
1099}
1100
1101/* This is a function to initilise a chain.  Built in rules start with
1102 * refcount = 1 so that they cannot be deleted.  User defined rules
1103 * start with refcount = 0 so they can be deleted. */
1104static struct ip_chain *ip_init_chain(ip_chainlabel name,
1105				      __u32 ref,
1106				      int policy)
1107{
1108	unsigned int i;
1109	struct ip_chain *label
1110		= kmalloc(SIZEOF_STRUCT_IP_CHAIN, GFP_KERNEL);
1111	if (label == NULL)
1112		panic("Can't kmalloc for firewall chains.\n");
1113	strcpy(label->label,name);
1114	label->next = NULL;
1115	label->chain = NULL;
1116	label->refcount = ref;
1117	label->policy = policy;
1118	for (i = 0; i < smp_num_cpus*2; i++) {
1119		label->reent[i].counters.pcnt = label->reent[i].counters.bcnt
1120			= 0;
1121		label->reent[i].prevchain = NULL;
1122		label->reent[i].prevrule = NULL;
1123	}
1124
1125	return label;
1126}
1127
1128/* This is a function for reating a new chain.  The chains is not
1129 * created if a chain of the same name already exists */
1130static int create_chain(ip_chainlabel label)
1131{
1132	struct ip_chain *tmp;
1133
1134	if (!check_label(label))
1135		return EINVAL;
1136
1137	FWC_HAVE_LOCK(fwc_wlocks);
1138	for (tmp = ip_fw_chains; tmp->next; tmp = tmp->next)
1139		if (strcmp(tmp->label,label) == 0)
1140			return EEXIST;
1141
1142	if (strcmp(tmp->label,label) == 0)
1143		return EEXIST;
1144
1145	tmp->next = ip_init_chain(label, 0, FW_SKIP); /* refcount is
1146					      * zero since this is a
1147					      * user defined chain *
1148					      * and therefore can be
1149					      * deleted */
1150	MOD_INC_USE_COUNT;
1151	return 0;
1152}
1153
1154/* This function simply changes the policy on one of the built in
1155 * chains.  checking must be done before this is call to ensure that
1156 * chainptr is pointing to one of the three possible chains */
1157static int change_policy(struct ip_chain *chainptr, int policy)
1158{
1159	FWC_HAVE_LOCK(fwc_wlocks);
1160	chainptr->policy = policy;
1161	return 0;
1162}
1163
1164/* This function takes an ip_fwuser and converts it to a ip_fwkernel.  It also
1165 * performs some checks in the structure. */
1166static struct ip_fwkernel *convert_ipfw(struct ip_fwuser *fwuser, int *errno)
1167{
1168	struct ip_fwkernel *fwkern;
1169
1170	if ( (fwuser->ipfw.fw_flg & ~IP_FW_F_MASK) != 0 ) {
1171		duprintf("convert_ipfw: undefined flag bits set (flags=%x)\n",
1172			 fwuser->ipfw.fw_flg);
1173		*errno = EINVAL;
1174		return NULL;
1175	}
1176
1177#ifdef DEBUG_IP_FIREWALL_USER
1178	/* These are sanity checks that don't really matter.
1179	 * We can get rid of these once testing is complete.
1180	 */
1181	if ((fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN)
1182	    && ((fwuser->ipfw.fw_invflg & IP_FW_INV_PROTO)
1183		|| fwuser->ipfw.fw_proto != IPPROTO_TCP)) {
1184		duprintf("convert_ipfw: TCP SYN flag set but proto != TCP!\n");
1185		*errno = EINVAL;
1186		return NULL;
1187	}
1188
1189	if (strcmp(fwuser->label, IP_FW_LABEL_REDIRECT) != 0
1190	    && fwuser->ipfw.fw_redirpt != 0) {
1191		duprintf("convert_ipfw: Target not REDIR but redirpt != 0!\n");
1192		*errno = EINVAL;
1193		return NULL;
1194	}
1195
1196	if ((!(fwuser->ipfw.fw_flg & IP_FW_F_FRAG)
1197	     && (fwuser->ipfw.fw_invflg & IP_FW_INV_FRAG))
1198	    || (!(fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN)
1199		&& (fwuser->ipfw.fw_invflg & IP_FW_INV_SYN))) {
1200		duprintf("convert_ipfw: Can't have INV flag if flag unset!\n");
1201		*errno = EINVAL;
1202		return NULL;
1203	}
1204
1205	if (((fwuser->ipfw.fw_invflg & IP_FW_INV_SRCPT)
1206	     && fwuser->ipfw.fw_spts[0] == 0
1207	     && fwuser->ipfw.fw_spts[1] == 0xFFFF)
1208	    || ((fwuser->ipfw.fw_invflg & IP_FW_INV_DSTPT)
1209		&& fwuser->ipfw.fw_dpts[0] == 0
1210		&& fwuser->ipfw.fw_dpts[1] == 0xFFFF)
1211	    || ((fwuser->ipfw.fw_invflg & IP_FW_INV_VIA)
1212		&& (fwuser->ipfw.fw_vianame)[0] == '\0')
1213	    || ((fwuser->ipfw.fw_invflg & IP_FW_INV_SRCIP)
1214		&& fwuser->ipfw.fw_smsk.s_addr == 0)
1215	    || ((fwuser->ipfw.fw_invflg & IP_FW_INV_DSTIP)
1216		&& fwuser->ipfw.fw_dmsk.s_addr == 0)) {
1217		duprintf("convert_ipfw: INV flag makes rule unmatchable!\n");
1218		*errno = EINVAL;
1219		return NULL;
1220	}
1221
1222	if ((fwuser->ipfw.fw_flg & IP_FW_F_FRAG)
1223	    && !(fwuser->ipfw.fw_invflg & IP_FW_INV_FRAG)
1224	    && (fwuser->ipfw.fw_spts[0] != 0
1225		|| fwuser->ipfw.fw_spts[1] != 0xFFFF
1226		|| fwuser->ipfw.fw_dpts[0] != 0
1227		|| fwuser->ipfw.fw_dpts[1] != 0xFFFF
1228		|| (fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN))) {
1229		duprintf("convert_ipfw: Can't test ports or SYN with frag!\n");
1230		*errno = EINVAL;
1231		return NULL;
1232	}
1233#endif
1234
1235	if ((fwuser->ipfw.fw_spts[0] != 0
1236	     || fwuser->ipfw.fw_spts[1] != 0xFFFF
1237	     || fwuser->ipfw.fw_dpts[0] != 0
1238	     || fwuser->ipfw.fw_dpts[1] != 0xFFFF)
1239	    && ((fwuser->ipfw.fw_invflg & IP_FW_INV_PROTO)
1240		|| (fwuser->ipfw.fw_proto != IPPROTO_TCP
1241		    && fwuser->ipfw.fw_proto != IPPROTO_UDP
1242		    && fwuser->ipfw.fw_proto != IPPROTO_ICMP))) {
1243		duprintf("convert_ipfw: Can only test ports for TCP/UDP/ICMP!\n");
1244		*errno = EINVAL;
1245		return NULL;
1246	}
1247
1248	fwkern = kmalloc(SIZEOF_STRUCT_IP_FW_KERNEL, GFP_ATOMIC);
1249	if (!fwkern) {
1250		duprintf("convert_ipfw: kmalloc failed!\n");
1251		*errno = ENOMEM;
1252		return NULL;
1253	}
1254	memcpy(&fwkern->ipfw,&fwuser->ipfw,sizeof(struct ip_fw));
1255
1256	if (!find_special(fwuser->label, &fwkern->simplebranch)) {
1257		fwkern->branch = find_label(fwuser->label);
1258		if (!fwkern->branch) {
1259			duprintf("convert_ipfw: chain doesn't exist `%s'.\n",
1260				 fwuser->label);
1261			kfree(fwkern);
1262			*errno = ENOENT;
1263			return NULL;
1264		} else if (fwkern->branch == IP_FW_INPUT_CHAIN
1265			   || fwkern->branch == IP_FW_FORWARD_CHAIN
1266			   || fwkern->branch == IP_FW_OUTPUT_CHAIN) {
1267			duprintf("convert_ipfw: Can't branch to builtin chain `%s'.\n",
1268				 fwuser->label);
1269			kfree(fwkern);
1270			*errno = ENOENT;
1271			return NULL;
1272		}
1273	} else
1274		fwkern->branch = NULL;
1275	memset(fwkern->counters, 0, sizeof(struct ip_counters)*NUM_SLOTS);
1276
1277	/* Handle empty vianame by making it a wildcard */
1278	if ((fwkern->ipfw.fw_vianame)[0] == '\0')
1279	    fwkern->ipfw.fw_flg |= IP_FW_F_WILDIF;
1280
1281	fwkern->next = NULL;
1282	return fwkern;
1283}
1284
1285int ip_fw_ctl(int cmd, void *m, int len)
1286{
1287	int ret;
1288	struct ip_chain *chain;
1289	unsigned long flags;
1290
1291	FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags);
1292
1293	switch (cmd) {
1294	case IP_FW_FLUSH:
1295		if (len != sizeof(ip_chainlabel) || !check_label(m))
1296			ret = EINVAL;
1297		else if ((chain = find_label(m)) == NULL)
1298			ret = ENOENT;
1299		else ret = clear_fw_chain(chain);
1300		break;
1301
1302	case IP_FW_ZERO:
1303		if (len != sizeof(ip_chainlabel) || !check_label(m))
1304			ret = EINVAL;
1305		else if ((chain = find_label(m)) == NULL)
1306			ret = ENOENT;
1307		else ret = zero_fw_chain(chain);
1308		break;
1309
1310	case IP_FW_CHECK: {
1311		struct ip_fwtest *new = m;
1312		struct iphdr *ip;
1313
1314		/* Don't need write lock. */
1315		FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
1316
1317		if (len != sizeof(struct ip_fwtest) || !check_label(m))
1318			return EINVAL;
1319
1320		/* Need readlock to do find_label */
1321		FWC_READ_LOCK(&ip_fw_lock);
1322
1323		if ((chain = find_label(new->fwt_label)) == NULL)
1324			ret = ENOENT;
1325		else {
1326			ip = &(new->fwt_packet.fwp_iph);
1327
1328			if (ip->ihl != sizeof(struct iphdr) / sizeof(int)) {
1329			    duprintf("ip_fw_ctl: ip->ihl=%d, want %d\n",
1330				     ip->ihl,
1331				     sizeof(struct iphdr) / sizeof(int));
1332			    ret = EINVAL;
1333			}
1334			else {
1335				ret = ip_fw_check(ip, new->fwt_packet.fwp_vianame,
1336						  NULL, chain,
1337						  NULL, SLOT_NUMBER(), 1);
1338				switch (ret) {
1339				case FW_ACCEPT:
1340					ret = 0; break;
1341				case FW_REDIRECT:
1342					ret = ECONNABORTED; break;
1343				case FW_MASQUERADE:
1344					ret = ECONNRESET; break;
1345				case FW_REJECT:
1346					ret = ECONNREFUSED; break;
1347					/* Hack to help diag; these only get
1348					   returned when testing. */
1349				case FW_SKIP+1:
1350					ret = ELOOP; break;
1351				case FW_SKIP:
1352					ret = ENFILE; break;
1353				default: /* FW_BLOCK */
1354					ret = ETIMEDOUT; break;
1355				}
1356			}
1357		}
1358		FWC_READ_UNLOCK(&ip_fw_lock);
1359		return ret;
1360	}
1361
1362	case IP_FW_MASQ_TIMEOUTS: {
1363		ret = ip_fw_masq_timeouts(m, len);
1364	}
1365	break;
1366
1367	case IP_FW_REPLACE: {
1368		struct ip_fwkernel *ip_fwkern;
1369		struct ip_fwnew *new = m;
1370
1371		if (len != sizeof(struct ip_fwnew)
1372		    || !check_label(new->fwn_label))
1373			ret = EINVAL;
1374		else if ((chain = find_label(new->fwn_label)) == NULL)
1375			ret = ENOENT;
1376		else if ((ip_fwkern = convert_ipfw(&new->fwn_rule, &ret))
1377			 != NULL)
1378			ret = replace_in_chain(chain, ip_fwkern,
1379					       new->fwn_rulenum);
1380	}
1381	break;
1382
1383	case IP_FW_APPEND: {
1384		struct ip_fwchange *new = m;
1385		struct ip_fwkernel *ip_fwkern;
1386
1387		if (len != sizeof(struct ip_fwchange)
1388		    || !check_label(new->fwc_label))
1389			ret = EINVAL;
1390		else if ((chain = find_label(new->fwc_label)) == NULL)
1391			ret = ENOENT;
1392		else if ((ip_fwkern = convert_ipfw(&new->fwc_rule, &ret))
1393			 != NULL)
1394			ret = append_to_chain(chain, ip_fwkern);
1395	}
1396	break;
1397
1398	case IP_FW_INSERT: {
1399		struct ip_fwkernel *ip_fwkern;
1400		struct ip_fwnew *new = m;
1401
1402		if (len != sizeof(struct ip_fwnew)
1403		    || !check_label(new->fwn_label))
1404			ret = EINVAL;
1405		else if ((chain = find_label(new->fwn_label)) == NULL)
1406			ret = ENOENT;
1407		else if ((ip_fwkern = convert_ipfw(&new->fwn_rule, &ret))
1408			 != NULL)
1409			ret = insert_in_chain(chain, ip_fwkern,
1410					      new->fwn_rulenum);
1411	}
1412	break;
1413
1414	case IP_FW_DELETE: {
1415		struct ip_fwchange *new = m;
1416		struct ip_fwkernel *ip_fwkern;
1417
1418		if (len != sizeof(struct ip_fwchange)
1419		    || !check_label(new->fwc_label))
1420			ret = EINVAL;
1421		else if ((chain = find_label(new->fwc_label)) == NULL)
1422			ret = ENOENT;
1423		else if ((ip_fwkern = convert_ipfw(&new->fwc_rule, &ret))
1424			 != NULL) {
1425			ret = del_rule_from_chain(chain, ip_fwkern);
1426			kfree(ip_fwkern);
1427		}
1428	}
1429	break;
1430
1431	case IP_FW_DELETE_NUM: {
1432		struct ip_fwdelnum *new = m;
1433
1434		if (len != sizeof(struct ip_fwdelnum)
1435		    || !check_label(new->fwd_label))
1436			ret = EINVAL;
1437		else if ((chain = find_label(new->fwd_label)) == NULL)
1438			ret = ENOENT;
1439		else ret = del_num_from_chain(chain, new->fwd_rulenum);
1440	}
1441	break;
1442
1443	case IP_FW_CREATECHAIN: {
1444		if (len != sizeof(ip_chainlabel)) {
1445			duprintf("create_chain: bad size %i\n", len);
1446			ret = EINVAL;
1447		}
1448		else ret = create_chain(m);
1449	}
1450	break;
1451
1452	case IP_FW_DELETECHAIN: {
1453		if (len != sizeof(ip_chainlabel)) {
1454			duprintf("delete_chain: bad size %i\n", len);
1455			ret = EINVAL;
1456		}
1457		else ret = del_chain(m);
1458	}
1459	break;
1460
1461	case IP_FW_POLICY: {
1462		struct ip_fwpolicy *new = m;
1463
1464		if (len != sizeof(struct ip_fwpolicy)
1465		    || !check_label(new->fwp_label))
1466			ret = EINVAL;
1467		else if ((chain = find_label(new->fwp_label)) == NULL)
1468			ret = ENOENT;
1469		else if (chain != IP_FW_INPUT_CHAIN
1470			 && chain != IP_FW_FORWARD_CHAIN
1471			 && chain != IP_FW_OUTPUT_CHAIN) {
1472			duprintf("change_policy: can't change policy on user"
1473				 " defined chain.\n");
1474			ret = EINVAL;
1475		}
1476		else {
1477		        int pol = FW_SKIP;
1478			find_special(new->fwp_policy, &pol);
1479
1480			switch(pol) {
1481			case FW_MASQUERADE:
1482				if (chain != IP_FW_FORWARD_CHAIN) {
1483					ret = EINVAL;
1484					break;
1485				}
1486				/* Fall thru... */
1487			case FW_BLOCK:
1488			case FW_ACCEPT:
1489			case FW_REJECT:
1490				ret = change_policy(chain, pol);
1491				break;
1492			default:
1493			        duprintf("change_policy: bad policy `%s'\n",
1494					 new->fwp_policy);
1495				ret = EINVAL;
1496			}
1497		}
1498		break;
1499	}
1500	default:
1501		duprintf("ip_fw_ctl:  unknown request %d\n",cmd);
1502		ret = ENOPROTOOPT;
1503	}
1504
1505	FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
1506	return ret;
1507}
1508
1509/* Returns bytes used - doesn't NUL terminate */
1510static int dump_rule(char *buffer,
1511		     const char *chainlabel,
1512		     const struct ip_fwkernel *rule)
1513{
1514	int len;
1515	unsigned int i;
1516	__u64 packets = 0, bytes = 0;
1517
1518	FWC_HAVE_LOCK(fwc_wlocks);
1519	for (i = 0; i < NUM_SLOTS; i++) {
1520		packets += rule->counters[i].pcnt;
1521		bytes += rule->counters[i].bcnt;
1522	}
1523
1524	len=sprintf(buffer,
1525		    "%9s "			/* Chain name */
1526		    "%08X/%08X->%08X/%08X "	/* Source & Destination IPs */
1527		    "%.16s "			/* Interface */
1528		    "%X %X "			/* fw_flg and fw_invflg fields */
1529		    "%u "			/* Protocol */
1530		    "%-9u %-9u %-9u %-9u "	/* Packet & byte counters */
1531		    "%u-%u %u-%u "		/* Source & Dest port ranges */
1532		    "A%02X X%02X "		/* TOS and and xor masks */
1533		    "%08X "			/* Redirection port */
1534		    "%u "			/* fw_mark field */
1535		    "%u "			/* output size */
1536		    "%9s\n",			/* Target */
1537		    chainlabel,
1538		    ntohl(rule->ipfw.fw_src.s_addr),
1539		    ntohl(rule->ipfw.fw_smsk.s_addr),
1540		    ntohl(rule->ipfw.fw_dst.s_addr),
1541		    ntohl(rule->ipfw.fw_dmsk.s_addr),
1542		    (rule->ipfw.fw_vianame)[0] ? rule->ipfw.fw_vianame : "-",
1543		    rule->ipfw.fw_flg,
1544		    rule->ipfw.fw_invflg,
1545		    rule->ipfw.fw_proto,
1546		    (__u32)(packets >> 32), (__u32)packets,
1547		    (__u32)(bytes >> 32), (__u32)bytes,
1548		    rule->ipfw.fw_spts[0], rule->ipfw.fw_spts[1],
1549		    rule->ipfw.fw_dpts[0], rule->ipfw.fw_dpts[1],
1550		    rule->ipfw.fw_tosand, rule->ipfw.fw_tosxor,
1551		    rule->ipfw.fw_redirpt,
1552		    rule->ipfw.fw_mark,
1553		    rule->ipfw.fw_outputsize,
1554		    branchname(rule->branch,rule->simplebranch));
1555
1556	duprintf("dump_rule: %i bytes done.\n", len);
1557	return len;
1558}
1559
1560/* File offset is actually in records, not bytes. */
1561static int ip_chain_procinfo(char *buffer, char **start,
1562			     off_t offset, int length
1563#if LINUX_VERSION_CODE < KERNEL_VERSION(2,3,29)
1564			     , int reset
1565#endif
1566	)
1567{
1568#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,29)
1569	int reset = 0;
1570#endif
1571	struct ip_chain *i;
1572	struct ip_fwkernel *j = ip_fw_chains->chain;
1573	unsigned long flags;
1574	int len = 0;
1575	int last_len = 0;
1576	off_t upto = 0;
1577
1578	duprintf("Offset starts at %lu\n", offset);
1579	duprintf("ip_fw_chains is 0x%0lX\n", (unsigned long int)ip_fw_chains);
1580
1581	/* Need a write lock to lock out ``readers'' which update counters. */
1582	FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags);
1583
1584	for (i = ip_fw_chains; i; i = i->next) {
1585	    for (j = i->chain; j; j = j->next) {
1586		if (upto == offset) break;
1587		duprintf("Skipping rule in chain `%s'\n",
1588			 i->label);
1589		upto++;
1590	    }
1591	    if (upto == offset) break;
1592	}
1593
1594	/* Don't init j first time, or once i = NULL */
1595	for (; i; (void)((i = i->next) && (j = i->chain))) {
1596		duprintf("Dumping chain `%s'\n", i->label);
1597		for (; j; j = j->next, upto++, last_len = len)
1598		{
1599			len += dump_rule(buffer+len, i->label, j);
1600			if (len > length) {
1601				duprintf("Dumped to %i (past %i).  "
1602					 "Moving back to %i.\n",
1603					 len, length, last_len);
1604				len = last_len;
1605				goto outside;
1606			}
1607			else if (reset)
1608				memset(j->counters, 0,
1609				       sizeof(struct ip_counters)*NUM_SLOTS);
1610		}
1611	}
1612outside:
1613	FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
1614	buffer[len] = '\0';
1615
1616	duprintf("ip_chain_procinfo: Length = %i (of %i).  Offset = %li.\n",
1617		 len, length, upto);
1618	/* `start' hack - see fs/proc/generic.c line ~165 */
1619	*start=(char *)((unsigned int)upto-offset);
1620	return len;
1621}
1622
1623static int ip_chain_name_procinfo(char *buffer, char **start,
1624				  off_t offset, int length)
1625{
1626	struct ip_chain *i;
1627	int len = 0,last_len = 0;
1628	off_t pos = 0,begin = 0;
1629	unsigned long flags;
1630
1631	/* Need a write lock to lock out ``readers'' which update counters. */
1632	FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags);
1633
1634	for (i = ip_fw_chains; i; i = i->next)
1635	{
1636		unsigned int j;
1637		__u32 packetsHi = 0, packetsLo = 0, bytesHi = 0, bytesLo = 0;
1638
1639		for (j = 0; j < NUM_SLOTS; j++) {
1640			packetsLo += i->reent[j].counters.pcnt & 0xFFFFFFFF;
1641			packetsHi += ((i->reent[j].counters.pcnt >> 32)
1642				      & 0xFFFFFFFF);
1643			bytesLo += i->reent[j].counters.bcnt & 0xFFFFFFFF;
1644			bytesHi += ((i->reent[j].counters.bcnt >> 32)
1645				    & 0xFFFFFFFF);
1646		}
1647
1648		/* print the label and the policy */
1649		len+=sprintf(buffer+len,"%s %s %i %u %u %u %u\n",
1650			     i->label,branchname(NULL, i->policy),i->refcount,
1651			     packetsHi, packetsLo, bytesHi, bytesLo);
1652		pos=begin+len;
1653		if(pos<offset) {
1654			len=0;
1655			begin=pos;
1656		}
1657		else if(pos>offset+length) {
1658			len = last_len;
1659			break;
1660		}
1661
1662		last_len = len;
1663	}
1664	FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
1665
1666	*start = buffer+(offset-begin);
1667	len-=(offset-begin);
1668	if(len>length)
1669		len=length;
1670	return len;
1671}
1672
1673/*
1674 *	Interface to the generic firewall chains.
1675 */
1676int ipfw_input_check(struct firewall_ops *this, int pf,
1677		     struct net_device *dev, void *phdr, void *arg,
1678		     struct sk_buff **pskb)
1679{
1680	return ip_fw_check(phdr, dev->name,
1681			   arg, IP_FW_INPUT_CHAIN, *pskb, SLOT_NUMBER(), 0);
1682}
1683
1684int ipfw_output_check(struct firewall_ops *this, int pf,
1685		      struct net_device *dev, void *phdr, void *arg,
1686		      struct sk_buff **pskb)
1687{
1688	/* Locally generated bogus packets by root. <SIGH>. */
1689	if (((struct iphdr *)phdr)->ihl * 4 < sizeof(struct iphdr)
1690	    || (*pskb)->len < sizeof(struct iphdr))
1691		return FW_ACCEPT;
1692	return ip_fw_check(phdr, dev->name,
1693			   arg, IP_FW_OUTPUT_CHAIN, *pskb, SLOT_NUMBER(), 0);
1694}
1695
1696int ipfw_forward_check(struct firewall_ops *this, int pf,
1697		       struct net_device *dev, void *phdr, void *arg,
1698		       struct sk_buff **pskb)
1699{
1700	return ip_fw_check(phdr, dev->name,
1701			   arg, IP_FW_FORWARD_CHAIN, *pskb, SLOT_NUMBER(), 0);
1702}
1703
1704struct firewall_ops ipfw_ops=
1705{
1706	NULL,
1707	ipfw_forward_check,
1708	ipfw_input_check,
1709	ipfw_output_check,
1710	NULL,
1711	NULL
1712};
1713
1714int ipfw_init_or_cleanup(int init)
1715{
1716	struct proc_dir_entry *proc;
1717	int ret = 0;
1718	unsigned long flags;
1719
1720	if (!init) goto cleanup;
1721
1722#ifdef DEBUG_IP_FIREWALL_LOCKING
1723	fwc_wlocks = fwc_rlocks = 0;
1724#endif
1725
1726#if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE)
1727	ipfwsk = netlink_kernel_create(NETLINK_FIREWALL, NULL);
1728	if (ipfwsk == NULL)
1729		goto cleanup_nothing;
1730#endif
1731
1732	ret = register_firewall(PF_INET, &ipfw_ops);
1733	if (ret < 0)
1734		goto cleanup_netlink;
1735
1736	proc = proc_net_create(IP_FW_PROC_CHAINS, S_IFREG | S_IRUSR | S_IWUSR,
1737			       ip_chain_procinfo);
1738	if (proc) proc->owner = THIS_MODULE;
1739	proc = proc_net_create(IP_FW_PROC_CHAIN_NAMES,
1740			       S_IFREG | S_IRUSR | S_IWUSR,
1741			       ip_chain_name_procinfo);
1742	if (proc) proc->owner = THIS_MODULE;
1743
1744	IP_FW_INPUT_CHAIN = ip_init_chain(IP_FW_LABEL_INPUT, 1, FW_ACCEPT);
1745	IP_FW_FORWARD_CHAIN = ip_init_chain(IP_FW_LABEL_FORWARD, 1, FW_ACCEPT);
1746	IP_FW_OUTPUT_CHAIN = ip_init_chain(IP_FW_LABEL_OUTPUT, 1, FW_ACCEPT);
1747
1748	return ret;
1749
1750 cleanup:
1751	unregister_firewall(PF_INET, &ipfw_ops);
1752
1753	FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags);
1754	while (ip_fw_chains) {
1755		struct ip_chain *next = ip_fw_chains->next;
1756
1757		clear_fw_chain(ip_fw_chains);
1758		kfree(ip_fw_chains);
1759		ip_fw_chains = next;
1760	}
1761	FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
1762
1763	proc_net_remove(IP_FW_PROC_CHAINS);
1764	proc_net_remove(IP_FW_PROC_CHAIN_NAMES);
1765
1766 cleanup_netlink:
1767#if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE)
1768	sock_release(ipfwsk->socket);
1769
1770 cleanup_nothing:
1771#endif
1772	return ret;
1773}
1774MODULE_LICENSE("Dual BSD/GPL");
1775