1/*
2 * Packet matching code.
3 *
4 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5 * Copyright (C) 2009-2002 Netfilter core team <coreteam@netfilter.org>
6 *
7 * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
8 * 	- increase module usage count as soon as we have rules inside
9 * 	  a table
10 */
11#include <linux/config.h>
12#include <linux/cache.h>
13#include <linux/skbuff.h>
14#include <linux/kmod.h>
15#include <linux/vmalloc.h>
16#include <linux/netdevice.h>
17#include <linux/module.h>
18#include <linux/tcp.h>
19#include <linux/udp.h>
20#include <linux/icmp.h>
21#include <net/ip.h>
22#include <asm/uaccess.h>
23#include <asm/semaphore.h>
24#include <linux/proc_fs.h>
25
26#include <linux/netfilter_ipv4/ip_tables.h>
27
28/*#define DEBUG_IP_FIREWALL*/
29/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
30/*#define DEBUG_IP_FIREWALL_USER*/
31
32#ifdef DEBUG_IP_FIREWALL
33#define dprintf(format, args...)  printk(format , ## args)
34#else
35#define dprintf(format, args...)
36#endif
37
38#ifdef DEBUG_IP_FIREWALL_USER
39#define duprintf(format, args...) printk(format , ## args)
40#else
41#define duprintf(format, args...)
42#endif
43
44#ifdef CONFIG_NETFILTER_DEBUG
45#define IP_NF_ASSERT(x)						\
46do {								\
47	if (!(x))						\
48		printk("IP_NF_ASSERT: %s:%s:%u\n",		\
49		       __FUNCTION__, __FILE__, __LINE__);	\
50} while(0)
51#else
52#define IP_NF_ASSERT(x)
53#endif
54#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
55
56/* Mutex protects lists (only traversed in user context). */
57static DECLARE_MUTEX(ipt_mutex);
58
59/* Must have mutex */
60#define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
61#define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
62#include <linux/netfilter_ipv4/lockhelp.h>
63#include <linux/netfilter_ipv4/listhelp.h>
64
65
66/* Locking is simple: we assume at worst case there will be one packet
67   in user context and one from bottom halves (or soft irq if Alexey's
68   softnet patch was applied).
69
70   We keep a set of rules for each CPU, so we can avoid write-locking
71   them; doing a readlock_bh() stops packets coming through if we're
72   in user context.
73
74   To be cache friendly on SMP, we arrange them like so:
75   [ n-entries ]
76   ... cache-align padding ...
77   [ n-entries ]
78
79   Hence the start of any table is given by get_table() below.  */
80
81/* The table itself */
82struct ipt_table_info
83{
84	/* Size per table */
85	unsigned int size;
86	unsigned int number;
87	/* Initial number of entries. Needed for module usage count */
88	unsigned int initial_entries;
89
90	/* Entry points and underflows */
91	unsigned int hook_entry[NF_IP_NUMHOOKS];
92	unsigned int underflow[NF_IP_NUMHOOKS];
93
94	/* ipt_entry tables: one per CPU */
95	char entries[0] ____cacheline_aligned;
96};
97
98static LIST_HEAD(ipt_target);
99static LIST_HEAD(ipt_match);
100static LIST_HEAD(ipt_tables);
101#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
102
103#ifdef CONFIG_SMP
104#define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
105#else
106#define TABLE_OFFSET(t,p) 0
107#endif
108
109
110/* Returns whether matches rule or not. */
111static inline int
112ip_packet_match(const struct iphdr *ip,
113		const char *indev,
114		const char *outdev,
115		const struct ipt_ip *ipinfo,
116		int isfrag)
117{
118	size_t i;
119	unsigned long ret;
120
121#define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
122
123	if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
124		  IPT_INV_SRCIP)
125	    || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
126		     IPT_INV_DSTIP)) {
127		dprintf("Source or dest mismatch.\n");
128
129		dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
130			NIPQUAD(ip->saddr),
131			NIPQUAD(ipinfo->smsk.s_addr),
132			NIPQUAD(ipinfo->src.s_addr),
133			ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
134		dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
135			NIPQUAD(ip->daddr),
136			NIPQUAD(ipinfo->dmsk.s_addr),
137			NIPQUAD(ipinfo->dst.s_addr),
138			ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
139		return 0;
140	}
141
142	/* Look for ifname matches; this should unroll nicely. */
143	for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
144		ret |= (((const unsigned long *)indev)[i]
145			^ ((const unsigned long *)ipinfo->iniface)[i])
146			& ((const unsigned long *)ipinfo->iniface_mask)[i];
147	}
148
149	if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
150		dprintf("VIA in mismatch (%s vs %s).%s\n",
151			indev, ipinfo->iniface,
152			ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
153		return 0;
154	}
155
156	for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
157		ret |= (((const unsigned long *)outdev)[i]
158			^ ((const unsigned long *)ipinfo->outiface)[i])
159			& ((const unsigned long *)ipinfo->outiface_mask)[i];
160	}
161
162	if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
163		dprintf("VIA out mismatch (%s vs %s).%s\n",
164			outdev, ipinfo->outiface,
165			ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
166		return 0;
167	}
168
169	/* Check specific protocol */
170	if (ipinfo->proto
171	    && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
172		dprintf("Packet protocol %hi does not match %hi.%s\n",
173			ip->protocol, ipinfo->proto,
174			ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
175		return 0;
176	}
177
178	/* If we have a fragment rule but the packet is not a fragment
179	 * then we return zero */
180	if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
181		dprintf("Fragment rule but not fragment.%s\n",
182			ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
183		return 0;
184	}
185
186	return 1;
187}
188
189static inline int
190ip_checkentry(const struct ipt_ip *ip)
191{
192	if (ip->flags & ~IPT_F_MASK) {
193		duprintf("Unknown flag bits set: %08X\n",
194			 ip->flags & ~IPT_F_MASK);
195		return 0;
196	}
197	if (ip->invflags & ~IPT_INV_MASK) {
198		duprintf("Unknown invflag bits set: %08X\n",
199			 ip->invflags & ~IPT_INV_MASK);
200		return 0;
201	}
202	return 1;
203}
204
205static unsigned int
206ipt_error(struct sk_buff **pskb,
207	  unsigned int hooknum,
208	  const struct net_device *in,
209	  const struct net_device *out,
210	  const void *targinfo,
211	  void *userinfo)
212{
213	if (net_ratelimit())
214		printk("ip_tables: error: `%s'\n", (char *)targinfo);
215
216	return NF_DROP;
217}
218
219static inline
220int do_match(struct ipt_entry_match *m,
221	     const struct sk_buff *skb,
222	     const struct net_device *in,
223	     const struct net_device *out,
224	     int offset,
225	     const void *hdr,
226	     u_int16_t datalen,
227	     int *hotdrop)
228{
229	/* Stop iteration if it doesn't match */
230	if (!m->u.kernel.match->match(skb, in, out, m->data,
231				      offset, hdr, datalen, hotdrop))
232		return 1;
233	else
234		return 0;
235}
236
237static inline struct ipt_entry *
238get_entry(void *base, unsigned int offset)
239{
240	return (struct ipt_entry *)(base + offset);
241}
242
243/* Returns one of the generic firewall policies, like NF_ACCEPT. */
244unsigned int
245ipt_do_table(struct sk_buff **pskb,
246	     unsigned int hook,
247	     const struct net_device *in,
248	     const struct net_device *out,
249	     struct ipt_table *table,
250	     void *userdata)
251{
252	static const char nulldevname[IFNAMSIZ] = { 0 };
253	u_int16_t offset;
254	struct iphdr *ip;
255	void *protohdr;
256	u_int16_t datalen;
257	int hotdrop = 0;
258	/* Initializing verdict to NF_DROP keeps gcc happy. */
259	unsigned int verdict = NF_DROP;
260	const char *indev, *outdev;
261	void *table_base;
262	struct ipt_entry *e, *back;
263
264	/* Initialization */
265	ip = (*pskb)->nh.iph;
266	protohdr = (u_int32_t *)ip + ip->ihl;
267	datalen = (*pskb)->len - ip->ihl * 4;
268	indev = in ? in->name : nulldevname;
269	outdev = out ? out->name : nulldevname;
270	/* We handle fragments by dealing with the first fragment as
271	 * if it was a normal packet.  All other fragments are treated
272	 * normally, except that they will NEVER match rules that ask
273	 * things we don't know, ie. tcp syn flag or ports).  If the
274	 * rule is also a fragment-specific rule, non-fragments won't
275	 * match it. */
276	offset = ntohs(ip->frag_off) & IP_OFFSET;
277
278	read_lock_bh(&table->lock);
279	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
280	table_base = (void *)table->private->entries
281		+ TABLE_OFFSET(table->private,
282			       cpu_number_map(smp_processor_id()));
283	e = get_entry(table_base, table->private->hook_entry[hook]);
284
285#ifdef CONFIG_NETFILTER_DEBUG
286	/* Check noone else using our table */
287	if (((struct ipt_entry *)table_base)->comefrom != 0xdead57ac
288	    && ((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec) {
289		printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
290		       smp_processor_id(),
291		       table->name,
292		       &((struct ipt_entry *)table_base)->comefrom,
293		       ((struct ipt_entry *)table_base)->comefrom);
294	}
295	((struct ipt_entry *)table_base)->comefrom = 0x57acc001;
296#endif
297
298	/* For return from builtin chain */
299	back = get_entry(table_base, table->private->underflow[hook]);
300
301	do {
302		IP_NF_ASSERT(e);
303		IP_NF_ASSERT(back);
304		(*pskb)->nfcache |= e->nfcache;
305		if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
306			struct ipt_entry_target *t;
307
308			if (IPT_MATCH_ITERATE(e, do_match,
309					      *pskb, in, out,
310					      offset, protohdr,
311					      datalen, &hotdrop) != 0)
312				goto no_match;
313
314			ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
315
316			t = ipt_get_target(e);
317			IP_NF_ASSERT(t->u.kernel.target);
318			/* Standard target? */
319			if (!t->u.kernel.target->target) {
320				int v;
321
322				v = ((struct ipt_standard_target *)t)->verdict;
323				if (v < 0) {
324					/* Pop from stack? */
325					if (v != IPT_RETURN) {
326						verdict = (unsigned)(-v) - 1;
327						break;
328					}
329					e = back;
330					back = get_entry(table_base,
331							 back->comefrom);
332					continue;
333				}
334				if (table_base + v
335				    != (void *)e + e->next_offset) {
336					/* Save old back ptr in next entry */
337					struct ipt_entry *next
338						= (void *)e + e->next_offset;
339					next->comefrom
340						= (void *)back - table_base;
341					/* set back pointer to next entry */
342					back = next;
343				}
344
345				e = get_entry(table_base, v);
346			} else {
347				/* Targets which reenter must return
348                                   abs. verdicts */
349#ifdef CONFIG_NETFILTER_DEBUG
350				((struct ipt_entry *)table_base)->comefrom
351					= 0xeeeeeeec;
352#endif
353				verdict = t->u.kernel.target->target(pskb,
354								     hook,
355								     in, out,
356								     t->data,
357								     userdata);
358
359#ifdef CONFIG_NETFILTER_DEBUG
360				if (((struct ipt_entry *)table_base)->comefrom
361				    != 0xeeeeeeec
362				    && verdict == IPT_CONTINUE) {
363					printk("Target %s reentered!\n",
364					       t->u.kernel.target->name);
365					verdict = NF_DROP;
366				}
367				((struct ipt_entry *)table_base)->comefrom
368					= 0x57acc001;
369#endif
370				/* Target might have changed stuff. */
371				ip = (*pskb)->nh.iph;
372				protohdr = (u_int32_t *)ip + ip->ihl;
373				datalen = (*pskb)->len - ip->ihl * 4;
374
375				if (verdict == IPT_CONTINUE)
376					e = (void *)e + e->next_offset;
377				else
378					/* Verdict */
379					break;
380			}
381		} else {
382
383		no_match:
384			e = (void *)e + e->next_offset;
385		}
386	} while (!hotdrop);
387
388#ifdef CONFIG_NETFILTER_DEBUG
389	((struct ipt_entry *)table_base)->comefrom = 0xdead57ac;
390#endif
391	read_unlock_bh(&table->lock);
392
393#ifdef DEBUG_ALLOW_ALL
394	return NF_ACCEPT;
395#else
396	if (hotdrop)
397		return NF_DROP;
398	else return verdict;
399#endif
400}
401
402/* If it succeeds, returns element and locks mutex */
403static inline void *
404find_inlist_lock_noload(struct list_head *head,
405			const char *name,
406			int *error,
407			struct semaphore *mutex)
408{
409	void *ret;
410
411
412	*error = down_interruptible(mutex);
413	if (*error != 0)
414		return NULL;
415
416	ret = list_named_find(head, name);
417	if (!ret) {
418		*error = -ENOENT;
419		up(mutex);
420	}
421	return ret;
422}
423
424#ifndef CONFIG_KMOD
425#define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m))
426#else
427static void *
428find_inlist_lock(struct list_head *head,
429		 const char *name,
430		 const char *prefix,
431		 int *error,
432		 struct semaphore *mutex)
433{
434	void *ret;
435
436	ret = find_inlist_lock_noload(head, name, error, mutex);
437	if (!ret) {
438		char modulename[IPT_FUNCTION_MAXNAMELEN + strlen(prefix) + 1];
439		strcpy(modulename, prefix);
440		strcat(modulename, name);
441		duprintf("find_inlist: loading `%s'.\n", modulename);
442		request_module(modulename);
443		ret = find_inlist_lock_noload(head, name, error, mutex);
444	}
445
446	return ret;
447}
448#endif
449
450static inline struct ipt_table *
451find_table_lock(const char *name, int *error, struct semaphore *mutex)
452{
453	return find_inlist_lock(&ipt_tables, name, "iptable_", error, mutex);
454}
455
456static inline struct ipt_match *
457find_match_lock(const char *name, int *error, struct semaphore *mutex)
458{
459	return find_inlist_lock(&ipt_match, name, "ipt_", error, mutex);
460}
461
462static inline struct ipt_target *
463find_target_lock(const char *name, int *error, struct semaphore *mutex)
464{
465	return find_inlist_lock(&ipt_target, name, "ipt_", error, mutex);
466}
467
468/* All zeroes == unconditional rule. */
469static inline int
470unconditional(const struct ipt_ip *ip)
471{
472	unsigned int i;
473
474	for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
475		if (((__u32 *)ip)[i])
476			return 0;
477
478	return 1;
479}
480
481/* Figures out from what hook each rule can be called: returns 0 if
482   there are loops.  Puts hook bitmask in comefrom. */
483static int
484mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
485{
486	unsigned int hook;
487
488	/* No recursion; use packet counter to save back ptrs (reset
489	   to 0 as we leave), and comefrom to save source hook bitmask */
490	for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
491		unsigned int pos = newinfo->hook_entry[hook];
492		struct ipt_entry *e
493			= (struct ipt_entry *)(newinfo->entries + pos);
494
495		if (!(valid_hooks & (1 << hook)))
496			continue;
497
498		/* Set initial back pointer. */
499		e->counters.pcnt = pos;
500
501		for (;;) {
502			struct ipt_standard_target *t
503				= (void *)ipt_get_target(e);
504
505			if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
506				printk("iptables: loop hook %u pos %u %08X.\n",
507				       hook, pos, e->comefrom);
508				return 0;
509			}
510			e->comefrom
511				|= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
512
513			/* Unconditional return/END. */
514			if (e->target_offset == sizeof(struct ipt_entry)
515			    && (strcmp(t->target.u.user.name,
516				       IPT_STANDARD_TARGET) == 0)
517			    && t->verdict < 0
518			    && unconditional(&e->ip)) {
519				unsigned int oldpos, size;
520
521				/* Return: backtrack through the last
522				   big jump. */
523				do {
524					e->comefrom ^= (1<<NF_IP_NUMHOOKS);
525#ifdef DEBUG_IP_FIREWALL_USER
526					if (e->comefrom
527					    & (1 << NF_IP_NUMHOOKS)) {
528						duprintf("Back unset "
529							 "on hook %u "
530							 "rule %u\n",
531							 hook, pos);
532					}
533#endif
534					oldpos = pos;
535					pos = e->counters.pcnt;
536					e->counters.pcnt = 0;
537
538					/* We're at the start. */
539					if (pos == oldpos)
540						goto next;
541
542					e = (struct ipt_entry *)
543						(newinfo->entries + pos);
544				} while (oldpos == pos + e->next_offset);
545
546				/* Move along one */
547				size = e->next_offset;
548				e = (struct ipt_entry *)
549					(newinfo->entries + pos + size);
550				e->counters.pcnt = pos;
551				pos += size;
552			} else {
553				int newpos = t->verdict;
554
555				if (strcmp(t->target.u.user.name,
556					   IPT_STANDARD_TARGET) == 0
557				    && newpos >= 0) {
558					/* This a jump; chase it. */
559					duprintf("Jump rule %u -> %u\n",
560						 pos, newpos);
561				} else {
562					/* ... this is a fallthru */
563					newpos = pos + e->next_offset;
564				}
565				e = (struct ipt_entry *)
566					(newinfo->entries + newpos);
567				e->counters.pcnt = pos;
568				pos = newpos;
569			}
570		}
571		next:
572		duprintf("Finished chain %u\n", hook);
573	}
574	return 1;
575}
576
577static inline int
578cleanup_match(struct ipt_entry_match *m, unsigned int *i)
579{
580	if (i && (*i)-- == 0)
581		return 1;
582
583	if (m->u.kernel.match->destroy)
584		m->u.kernel.match->destroy(m->data,
585					   m->u.match_size - sizeof(*m));
586
587	if (m->u.kernel.match->me)
588		__MOD_DEC_USE_COUNT(m->u.kernel.match->me);
589
590	return 0;
591}
592
593static inline int
594standard_check(const struct ipt_entry_target *t,
595	       unsigned int max_offset)
596{
597	struct ipt_standard_target *targ = (void *)t;
598
599	/* Check standard info. */
600	if (t->u.target_size
601	    != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
602		duprintf("standard_check: target size %u != %u\n",
603			 t->u.target_size,
604			 IPT_ALIGN(sizeof(struct ipt_standard_target)));
605		return 0;
606	}
607
608	if (targ->verdict >= 0
609	    && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
610		duprintf("ipt_standard_check: bad verdict (%i)\n",
611			 targ->verdict);
612		return 0;
613	}
614
615	if (targ->verdict < -NF_MAX_VERDICT - 1) {
616		duprintf("ipt_standard_check: bad negative verdict (%i)\n",
617			 targ->verdict);
618		return 0;
619	}
620	return 1;
621}
622
623static inline int
624check_match(struct ipt_entry_match *m,
625	    const char *name,
626	    const struct ipt_ip *ip,
627	    unsigned int hookmask,
628	    unsigned int *i)
629{
630	int ret;
631	struct ipt_match *match;
632
633	match = find_match_lock(m->u.user.name, &ret, &ipt_mutex);
634	if (!match) {
635		duprintf("check_match: `%s' not found\n", m->u.user.name);
636		return ret;
637	}
638	if (match->me)
639		__MOD_INC_USE_COUNT(match->me);
640	m->u.kernel.match = match;
641	up(&ipt_mutex);
642
643	if (m->u.kernel.match->checkentry
644	    && !m->u.kernel.match->checkentry(name, ip, m->data,
645					      m->u.match_size - sizeof(*m),
646					      hookmask)) {
647		if (m->u.kernel.match->me)
648			__MOD_DEC_USE_COUNT(m->u.kernel.match->me);
649		duprintf("ip_tables: check failed for `%s'.\n",
650			 m->u.kernel.match->name);
651		return -EINVAL;
652	}
653
654	(*i)++;
655	return 0;
656}
657
658static struct ipt_target ipt_standard_target;
659
660static inline int
661check_entry(struct ipt_entry *e, const char *name, unsigned int size,
662	    unsigned int *i)
663{
664	struct ipt_entry_target *t;
665	struct ipt_target *target;
666	int ret;
667	unsigned int j;
668
669	if (!ip_checkentry(&e->ip)) {
670		duprintf("ip_tables: ip check failed %p %s.\n", e, name);
671		return -EINVAL;
672	}
673
674	j = 0;
675	ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
676	if (ret != 0)
677		goto cleanup_matches;
678
679	t = ipt_get_target(e);
680	target = find_target_lock(t->u.user.name, &ret, &ipt_mutex);
681	if (!target) {
682		duprintf("check_entry: `%s' not found\n", t->u.user.name);
683		goto cleanup_matches;
684	}
685	if (target->me)
686		__MOD_INC_USE_COUNT(target->me);
687	t->u.kernel.target = target;
688	up(&ipt_mutex);
689
690	if (t->u.kernel.target == &ipt_standard_target) {
691		if (!standard_check(t, size)) {
692			ret = -EINVAL;
693			goto cleanup_matches;
694		}
695	} else if (t->u.kernel.target->checkentry
696		   && !t->u.kernel.target->checkentry(name, e, t->data,
697						      t->u.target_size
698						      - sizeof(*t),
699						      e->comefrom)) {
700		if (t->u.kernel.target->me)
701			__MOD_DEC_USE_COUNT(t->u.kernel.target->me);
702		duprintf("ip_tables: check failed for `%s'.\n",
703			 t->u.kernel.target->name);
704		ret = -EINVAL;
705		goto cleanup_matches;
706	}
707
708	(*i)++;
709	return 0;
710
711 cleanup_matches:
712	IPT_MATCH_ITERATE(e, cleanup_match, &j);
713	return ret;
714}
715
716static inline int
717check_entry_size_and_hooks(struct ipt_entry *e,
718			   struct ipt_table_info *newinfo,
719			   unsigned char *base,
720			   unsigned char *limit,
721			   const unsigned int *hook_entries,
722			   const unsigned int *underflows,
723			   unsigned int *i)
724{
725	unsigned int h;
726
727	if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
728	    || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
729		duprintf("Bad offset %p\n", e);
730		return -EINVAL;
731	}
732
733	if (e->next_offset
734	    < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
735		duprintf("checking: element %p size %u\n",
736			 e, e->next_offset);
737		return -EINVAL;
738	}
739
740	/* Check hooks & underflows */
741	for (h = 0; h < NF_IP_NUMHOOKS; h++) {
742		if ((unsigned char *)e - base == hook_entries[h])
743			newinfo->hook_entry[h] = hook_entries[h];
744		if ((unsigned char *)e - base == underflows[h])
745			newinfo->underflow[h] = underflows[h];
746	}
747
748
749	/* Clear counters and comefrom */
750	e->counters = ((struct ipt_counters) { 0, 0 });
751	e->comefrom = 0;
752
753	(*i)++;
754	return 0;
755}
756
757static inline int
758cleanup_entry(struct ipt_entry *e, unsigned int *i)
759{
760	struct ipt_entry_target *t;
761
762	if (i && (*i)-- == 0)
763		return 1;
764
765	/* Cleanup all matches */
766	IPT_MATCH_ITERATE(e, cleanup_match, NULL);
767	t = ipt_get_target(e);
768	if (t->u.kernel.target->destroy)
769		t->u.kernel.target->destroy(t->data,
770					    t->u.target_size - sizeof(*t));
771	if (t->u.kernel.target->me)
772		__MOD_DEC_USE_COUNT(t->u.kernel.target->me);
773
774	return 0;
775}
776
777/* Checks and translates the user-supplied table segment (held in
778   newinfo) */
779static int
780translate_table(const char *name,
781		unsigned int valid_hooks,
782		struct ipt_table_info *newinfo,
783		unsigned int size,
784		unsigned int number,
785		const unsigned int *hook_entries,
786		const unsigned int *underflows)
787{
788	unsigned int i;
789	int ret;
790
791	newinfo->size = size;
792	newinfo->number = number;
793
794	/* Init all hooks to impossible value. */
795	for (i = 0; i < NF_IP_NUMHOOKS; i++) {
796		newinfo->hook_entry[i] = 0xFFFFFFFF;
797		newinfo->underflow[i] = 0xFFFFFFFF;
798	}
799
800	duprintf("translate_table: size %u\n", newinfo->size);
801	i = 0;
802	/* Walk through entries, checking offsets. */
803	ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
804				check_entry_size_and_hooks,
805				newinfo,
806				newinfo->entries,
807				newinfo->entries + size,
808				hook_entries, underflows, &i);
809	if (ret != 0)
810		return ret;
811
812	if (i != number) {
813		duprintf("translate_table: %u not %u entries\n",
814			 i, number);
815		return -EINVAL;
816	}
817
818	/* Check hooks all assigned */
819	for (i = 0; i < NF_IP_NUMHOOKS; i++) {
820		/* Only hooks which are valid */
821		if (!(valid_hooks & (1 << i)))
822			continue;
823		if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
824			duprintf("Invalid hook entry %u %u\n",
825				 i, hook_entries[i]);
826			return -EINVAL;
827		}
828		if (newinfo->underflow[i] == 0xFFFFFFFF) {
829			duprintf("Invalid underflow %u %u\n",
830				 i, underflows[i]);
831			return -EINVAL;
832		}
833	}
834
835	if (!mark_source_chains(newinfo, valid_hooks))
836		return -ELOOP;
837
838	/* Finally, each sanity check must pass */
839	i = 0;
840	ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
841				check_entry, name, size, &i);
842
843	if (ret != 0) {
844		IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
845				  cleanup_entry, &i);
846		return ret;
847	}
848
849	/* And one copy for every other CPU */
850	for (i = 1; i < smp_num_cpus; i++) {
851		memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
852		       newinfo->entries,
853		       SMP_ALIGN(newinfo->size));
854	}
855
856	return ret;
857}
858
859static struct ipt_table_info *
860replace_table(struct ipt_table *table,
861	      unsigned int num_counters,
862	      struct ipt_table_info *newinfo,
863	      int *error)
864{
865	struct ipt_table_info *oldinfo;
866
867#ifdef CONFIG_NETFILTER_DEBUG
868	{
869		struct ipt_entry *table_base;
870		unsigned int i;
871
872		for (i = 0; i < smp_num_cpus; i++) {
873			table_base =
874				(void *)newinfo->entries
875				+ TABLE_OFFSET(newinfo, i);
876
877			table_base->comefrom = 0xdead57ac;
878		}
879	}
880#endif
881
882	/* Do the substitution. */
883	write_lock_bh(&table->lock);
884	/* Check inside lock: is the old number correct? */
885	if (num_counters != table->private->number) {
886		duprintf("num_counters != table->private->number (%u/%u)\n",
887			 num_counters, table->private->number);
888		write_unlock_bh(&table->lock);
889		*error = -EAGAIN;
890		return NULL;
891	}
892	oldinfo = table->private;
893	table->private = newinfo;
894	newinfo->initial_entries = oldinfo->initial_entries;
895	write_unlock_bh(&table->lock);
896
897	return oldinfo;
898}
899
900/* Gets counters. */
901static inline int
902add_entry_to_counter(const struct ipt_entry *e,
903		     struct ipt_counters total[],
904		     unsigned int *i)
905{
906	ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
907
908	(*i)++;
909	return 0;
910}
911
912static void
913get_counters(const struct ipt_table_info *t,
914	     struct ipt_counters counters[])
915{
916	unsigned int cpu;
917	unsigned int i;
918
919	for (cpu = 0; cpu < smp_num_cpus; cpu++) {
920		i = 0;
921		IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
922				  t->size,
923				  add_entry_to_counter,
924				  counters,
925				  &i);
926	}
927}
928
929static int
930copy_entries_to_user(unsigned int total_size,
931		     struct ipt_table *table,
932		     void *userptr)
933{
934	unsigned int off, num, countersize;
935	struct ipt_entry *e;
936	struct ipt_counters *counters;
937	int ret = 0;
938
939	/* We need atomic snapshot of counters: rest doesn't change
940	   (other than comefrom, which userspace doesn't care
941	   about). */
942	countersize = sizeof(struct ipt_counters) * table->private->number;
943	counters = vmalloc(countersize);
944
945	if (counters == NULL)
946		return -ENOMEM;
947
948	/* First, sum counters... */
949	memset(counters, 0, countersize);
950	write_lock_bh(&table->lock);
951	get_counters(table->private, counters);
952	write_unlock_bh(&table->lock);
953
954	/* ... then copy entire thing from CPU 0... */
955	if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
956		ret = -EFAULT;
957		goto free_counters;
958	}
959
960	/* ... then go back and fix counters and names */
961	for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
962		unsigned int i;
963		struct ipt_entry_match *m;
964		struct ipt_entry_target *t;
965
966		e = (struct ipt_entry *)(table->private->entries + off);
967		if (copy_to_user(userptr + off
968				 + offsetof(struct ipt_entry, counters),
969				 &counters[num],
970				 sizeof(counters[num])) != 0) {
971			ret = -EFAULT;
972			goto free_counters;
973		}
974
975		for (i = sizeof(struct ipt_entry);
976		     i < e->target_offset;
977		     i += m->u.match_size) {
978			m = (void *)e + i;
979
980			if (copy_to_user(userptr + off + i
981					 + offsetof(struct ipt_entry_match,
982						    u.user.name),
983					 m->u.kernel.match->name,
984					 strlen(m->u.kernel.match->name)+1)
985			    != 0) {
986				ret = -EFAULT;
987				goto free_counters;
988			}
989		}
990
991		t = ipt_get_target(e);
992		if (copy_to_user(userptr + off + e->target_offset
993				 + offsetof(struct ipt_entry_target,
994					    u.user.name),
995				 t->u.kernel.target->name,
996				 strlen(t->u.kernel.target->name)+1) != 0) {
997			ret = -EFAULT;
998			goto free_counters;
999		}
1000	}
1001
1002 free_counters:
1003	vfree(counters);
1004	return ret;
1005}
1006
1007static int
1008get_entries(const struct ipt_get_entries *entries,
1009	    struct ipt_get_entries *uptr)
1010{
1011	int ret;
1012	struct ipt_table *t;
1013
1014	t = find_table_lock(entries->name, &ret, &ipt_mutex);
1015	if (t) {
1016		duprintf("t->private->number = %u\n",
1017			 t->private->number);
1018		if (entries->size == t->private->size)
1019			ret = copy_entries_to_user(t->private->size,
1020						   t, uptr->entrytable);
1021		else {
1022			duprintf("get_entries: I've got %u not %u!\n",
1023				 t->private->size,
1024				 entries->size);
1025			ret = -EINVAL;
1026		}
1027		up(&ipt_mutex);
1028	} else
1029		duprintf("get_entries: Can't find %s!\n",
1030			 entries->name);
1031
1032	return ret;
1033}
1034
1035static int
1036do_replace(void *user, unsigned int len)
1037{
1038	int ret;
1039	struct ipt_replace tmp;
1040	struct ipt_table *t;
1041	struct ipt_table_info *newinfo, *oldinfo;
1042	struct ipt_counters *counters;
1043
1044	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1045		return -EFAULT;
1046
1047	/* Hack: Causes ipchains to give correct error msg --RR */
1048	if (len != sizeof(tmp) + tmp.size)
1049		return -ENOPROTOOPT;
1050
1051	/* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
1052	if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
1053		return -ENOMEM;
1054
1055	newinfo = vmalloc(sizeof(struct ipt_table_info)
1056			  + SMP_ALIGN(tmp.size) * smp_num_cpus);
1057	if (!newinfo)
1058		return -ENOMEM;
1059
1060	if (copy_from_user(newinfo->entries, user + sizeof(tmp),
1061			   tmp.size) != 0) {
1062		ret = -EFAULT;
1063		goto free_newinfo;
1064	}
1065
1066	counters = vmalloc(tmp.num_counters * sizeof(struct ipt_counters));
1067	if (!counters) {
1068		ret = -ENOMEM;
1069		goto free_newinfo;
1070	}
1071	memset(counters, 0, tmp.num_counters * sizeof(struct ipt_counters));
1072
1073	ret = translate_table(tmp.name, tmp.valid_hooks,
1074			      newinfo, tmp.size, tmp.num_entries,
1075			      tmp.hook_entry, tmp.underflow);
1076	if (ret != 0)
1077		goto free_newinfo_counters;
1078
1079	duprintf("ip_tables: Translated table\n");
1080
1081	t = find_table_lock(tmp.name, &ret, &ipt_mutex);
1082	if (!t)
1083		goto free_newinfo_counters_untrans;
1084
1085	/* You lied! */
1086	if (tmp.valid_hooks != t->valid_hooks) {
1087		duprintf("Valid hook crap: %08X vs %08X\n",
1088			 tmp.valid_hooks, t->valid_hooks);
1089		ret = -EINVAL;
1090		goto free_newinfo_counters_untrans_unlock;
1091	}
1092
1093	oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
1094	if (!oldinfo)
1095		goto free_newinfo_counters_untrans_unlock;
1096
1097	/* Update module usage count based on number of rules */
1098	duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1099		oldinfo->number, oldinfo->initial_entries, newinfo->number);
1100	if (t->me && (oldinfo->number <= oldinfo->initial_entries) &&
1101 	    (newinfo->number > oldinfo->initial_entries))
1102		__MOD_INC_USE_COUNT(t->me);
1103	else if (t->me && (oldinfo->number > oldinfo->initial_entries) &&
1104	 	 (newinfo->number <= oldinfo->initial_entries))
1105		__MOD_DEC_USE_COUNT(t->me);
1106
1107	/* Get the old counters. */
1108	get_counters(oldinfo, counters);
1109	/* Decrease module usage counts and free resource */
1110	IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
1111	vfree(oldinfo);
1112	/* Silent error: too late now. */
1113	copy_to_user(tmp.counters, counters,
1114		     sizeof(struct ipt_counters) * tmp.num_counters);
1115	vfree(counters);
1116	up(&ipt_mutex);
1117	return 0;
1118
1119 free_newinfo_counters_untrans_unlock:
1120	up(&ipt_mutex);
1121 free_newinfo_counters_untrans:
1122	IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
1123 free_newinfo_counters:
1124	vfree(counters);
1125 free_newinfo:
1126	vfree(newinfo);
1127	return ret;
1128}
1129
1130/* We're lazy, and add to the first CPU; overflow works its fey magic
1131 * and everything is OK. */
1132static inline int
1133add_counter_to_entry(struct ipt_entry *e,
1134		     const struct ipt_counters addme[],
1135		     unsigned int *i)
1136{
1137
1138	ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1139
1140	(*i)++;
1141	return 0;
1142}
1143
1144static int
1145do_add_counters(void *user, unsigned int len)
1146{
1147	unsigned int i;
1148	struct ipt_counters_info tmp, *paddc;
1149	struct ipt_table *t;
1150	int ret;
1151
1152	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1153		return -EFAULT;
1154
1155	if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters))
1156		return -EINVAL;
1157
1158	paddc = vmalloc(len);
1159	if (!paddc)
1160		return -ENOMEM;
1161
1162	if (copy_from_user(paddc, user, len) != 0) {
1163		ret = -EFAULT;
1164		goto free;
1165	}
1166
1167	t = find_table_lock(tmp.name, &ret, &ipt_mutex);
1168	if (!t)
1169		goto free;
1170
1171	write_lock_bh(&t->lock);
1172	if (t->private->number != paddc->num_counters) {
1173		ret = -EINVAL;
1174		goto unlock_up_free;
1175	}
1176
1177	i = 0;
1178	IPT_ENTRY_ITERATE(t->private->entries,
1179			  t->private->size,
1180			  add_counter_to_entry,
1181			  paddc->counters,
1182			  &i);
1183 unlock_up_free:
1184	write_unlock_bh(&t->lock);
1185	up(&ipt_mutex);
1186 free:
1187	vfree(paddc);
1188
1189	return ret;
1190}
1191
1192static int
1193do_ipt_set_ctl(struct sock *sk,	int cmd, void *user, unsigned int len)
1194{
1195	int ret;
1196
1197	if (!capable(CAP_NET_ADMIN))
1198		return -EPERM;
1199
1200	switch (cmd) {
1201	case IPT_SO_SET_REPLACE:
1202		ret = do_replace(user, len);
1203		break;
1204
1205	case IPT_SO_SET_ADD_COUNTERS:
1206		ret = do_add_counters(user, len);
1207		break;
1208
1209	default:
1210		duprintf("do_ipt_set_ctl:  unknown request %i\n", cmd);
1211		ret = -EINVAL;
1212	}
1213
1214	return ret;
1215}
1216
1217static int
1218do_ipt_get_ctl(struct sock *sk, int cmd, void *user, int *len)
1219{
1220	int ret;
1221
1222	if (!capable(CAP_NET_ADMIN))
1223		return -EPERM;
1224
1225	switch (cmd) {
1226	case IPT_SO_GET_INFO: {
1227		char name[IPT_TABLE_MAXNAMELEN];
1228		struct ipt_table *t;
1229
1230		if (*len != sizeof(struct ipt_getinfo)) {
1231			duprintf("length %u != %u\n", *len,
1232				 sizeof(struct ipt_getinfo));
1233			ret = -EINVAL;
1234			break;
1235		}
1236
1237		if (copy_from_user(name, user, sizeof(name)) != 0) {
1238			ret = -EFAULT;
1239			break;
1240		}
1241		name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1242		t = find_table_lock(name, &ret, &ipt_mutex);
1243		if (t) {
1244			struct ipt_getinfo info;
1245
1246			info.valid_hooks = t->valid_hooks;
1247			memcpy(info.hook_entry, t->private->hook_entry,
1248			       sizeof(info.hook_entry));
1249			memcpy(info.underflow, t->private->underflow,
1250			       sizeof(info.underflow));
1251			info.num_entries = t->private->number;
1252			info.size = t->private->size;
1253			strcpy(info.name, name);
1254
1255			if (copy_to_user(user, &info, *len) != 0)
1256				ret = -EFAULT;
1257			else
1258				ret = 0;
1259
1260			up(&ipt_mutex);
1261		}
1262	}
1263	break;
1264
1265	case IPT_SO_GET_ENTRIES: {
1266		struct ipt_get_entries get;
1267
1268		if (*len < sizeof(get)) {
1269			duprintf("get_entries: %u < %u\n", *len, sizeof(get));
1270			ret = -EINVAL;
1271		} else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1272			ret = -EFAULT;
1273		} else if (*len != sizeof(struct ipt_get_entries) + get.size) {
1274			duprintf("get_entries: %u != %u\n", *len,
1275				 sizeof(struct ipt_get_entries) + get.size);
1276			ret = -EINVAL;
1277		} else
1278			ret = get_entries(&get, user);
1279		break;
1280	}
1281
1282	default:
1283		duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
1284		ret = -EINVAL;
1285	}
1286
1287	return ret;
1288}
1289
1290/* Registration hooks for targets. */
1291int
1292ipt_register_target(struct ipt_target *target)
1293{
1294	int ret;
1295
1296	MOD_INC_USE_COUNT;
1297	ret = down_interruptible(&ipt_mutex);
1298	if (ret != 0) {
1299		MOD_DEC_USE_COUNT;
1300		return ret;
1301	}
1302	if (!list_named_insert(&ipt_target, target)) {
1303		duprintf("ipt_register_target: `%s' already in list!\n",
1304			 target->name);
1305		ret = -EINVAL;
1306		MOD_DEC_USE_COUNT;
1307	}
1308	up(&ipt_mutex);
1309	return ret;
1310}
1311
1312void
1313ipt_unregister_target(struct ipt_target *target)
1314{
1315	down(&ipt_mutex);
1316	LIST_DELETE(&ipt_target, target);
1317	up(&ipt_mutex);
1318	MOD_DEC_USE_COUNT;
1319}
1320
1321int
1322ipt_register_match(struct ipt_match *match)
1323{
1324	int ret;
1325
1326	MOD_INC_USE_COUNT;
1327	ret = down_interruptible(&ipt_mutex);
1328	if (ret != 0) {
1329		MOD_DEC_USE_COUNT;
1330		return ret;
1331	}
1332	if (!list_named_insert(&ipt_match, match)) {
1333		duprintf("ipt_register_match: `%s' already in list!\n",
1334			 match->name);
1335		MOD_DEC_USE_COUNT;
1336		ret = -EINVAL;
1337	}
1338	up(&ipt_mutex);
1339
1340	return ret;
1341}
1342
1343void
1344ipt_unregister_match(struct ipt_match *match)
1345{
1346	down(&ipt_mutex);
1347	LIST_DELETE(&ipt_match, match);
1348	up(&ipt_mutex);
1349	MOD_DEC_USE_COUNT;
1350}
1351
1352int ipt_register_table(struct ipt_table *table)
1353{
1354	int ret;
1355	struct ipt_table_info *newinfo;
1356	static struct ipt_table_info bootstrap
1357		= { 0, 0, 0, { 0 }, { 0 }, { } };
1358
1359	MOD_INC_USE_COUNT;
1360	newinfo = vmalloc(sizeof(struct ipt_table_info)
1361			  + SMP_ALIGN(table->table->size) * smp_num_cpus);
1362	if (!newinfo) {
1363		ret = -ENOMEM;
1364		MOD_DEC_USE_COUNT;
1365		return ret;
1366	}
1367	memcpy(newinfo->entries, table->table->entries, table->table->size);
1368
1369	ret = translate_table(table->name, table->valid_hooks,
1370			      newinfo, table->table->size,
1371			      table->table->num_entries,
1372			      table->table->hook_entry,
1373			      table->table->underflow);
1374	if (ret != 0) {
1375		vfree(newinfo);
1376		MOD_DEC_USE_COUNT;
1377		return ret;
1378	}
1379
1380	ret = down_interruptible(&ipt_mutex);
1381	if (ret != 0) {
1382		vfree(newinfo);
1383		MOD_DEC_USE_COUNT;
1384		return ret;
1385	}
1386
1387	/* Don't autoload: we'd eat our tail... */
1388	if (list_named_find(&ipt_tables, table->name)) {
1389		ret = -EEXIST;
1390		goto free_unlock;
1391	}
1392
1393	/* Simplifies replace_table code. */
1394	table->private = &bootstrap;
1395	if (!replace_table(table, 0, newinfo, &ret))
1396		goto free_unlock;
1397
1398	duprintf("table->private->number = %u\n",
1399		 table->private->number);
1400
1401	/* save number of initial entries */
1402	table->private->initial_entries = table->private->number;
1403
1404	table->lock = RW_LOCK_UNLOCKED;
1405	list_prepend(&ipt_tables, table);
1406
1407 unlock:
1408	up(&ipt_mutex);
1409	return ret;
1410
1411 free_unlock:
1412	vfree(newinfo);
1413	MOD_DEC_USE_COUNT;
1414	goto unlock;
1415}
1416
1417void ipt_unregister_table(struct ipt_table *table)
1418{
1419	down(&ipt_mutex);
1420	LIST_DELETE(&ipt_tables, table);
1421	up(&ipt_mutex);
1422
1423	/* Decrease module usage counts and free resources */
1424	IPT_ENTRY_ITERATE(table->private->entries, table->private->size,
1425			  cleanup_entry, NULL);
1426	vfree(table->private);
1427	MOD_DEC_USE_COUNT;
1428}
1429
1430/* Returns 1 if the port is matched by the range, 0 otherwise */
1431static inline int
1432port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
1433{
1434	int ret;
1435
1436	ret = (port >= min && port <= max) ^ invert;
1437	return ret;
1438}
1439
1440static int
1441tcp_find_option(u_int8_t option,
1442		const struct tcphdr *tcp,
1443		u_int16_t datalen,
1444		int invert,
1445		int *hotdrop)
1446{
1447	unsigned int i = sizeof(struct tcphdr);
1448	const u_int8_t *opt = (u_int8_t *)tcp;
1449
1450	duprintf("tcp_match: finding option\n");
1451	/* If we don't have the whole header, drop packet. */
1452	if (tcp->doff * 4 > datalen) {
1453		*hotdrop = 1;
1454		return 0;
1455	}
1456
1457	while (i < tcp->doff * 4) {
1458		if (opt[i] == option) return !invert;
1459		if (opt[i] < 2) i++;
1460		else i += opt[i+1]?:1;
1461	}
1462
1463	return invert;
1464}
1465
1466static int
1467tcp_match(const struct sk_buff *skb,
1468	  const struct net_device *in,
1469	  const struct net_device *out,
1470	  const void *matchinfo,
1471	  int offset,
1472	  const void *hdr,
1473	  u_int16_t datalen,
1474	  int *hotdrop)
1475{
1476	const struct tcphdr *tcp = hdr;
1477	const struct ipt_tcp *tcpinfo = matchinfo;
1478
1479	/* To quote Alan:
1480
1481	   Don't allow a fragment of TCP 8 bytes in. Nobody normal
1482	   causes this. Its a cracker trying to break in by doing a
1483	   flag overwrite to pass the direction checks.
1484	*/
1485
1486	if (offset == 1) {
1487		duprintf("Dropping evil TCP offset=1 frag.\n");
1488		*hotdrop = 1;
1489		return 0;
1490	} else if (offset == 0 && datalen < sizeof(struct tcphdr)) {
1491		/* We've been asked to examine this packet, and we
1492		   can't.  Hence, no choice but to drop. */
1493		duprintf("Dropping evil TCP offset=0 tinygram.\n");
1494		*hotdrop = 1;
1495		return 0;
1496	}
1497
1498
1499#define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
1500
1501	/* Must not be a fragment. */
1502	return !offset
1503		&& port_match(tcpinfo->spts[0], tcpinfo->spts[1],
1504			      ntohs(tcp->source),
1505			      !!(tcpinfo->invflags & IPT_TCP_INV_SRCPT))
1506		&& port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
1507			      ntohs(tcp->dest),
1508			      !!(tcpinfo->invflags & IPT_TCP_INV_DSTPT))
1509		&& FWINVTCP((((unsigned char *)tcp)[13]
1510			     & tcpinfo->flg_mask)
1511			    == tcpinfo->flg_cmp,
1512			    IPT_TCP_INV_FLAGS)
1513		&& (!tcpinfo->option
1514		    || tcp_find_option(tcpinfo->option, tcp, datalen,
1515				       tcpinfo->invflags
1516				       & IPT_TCP_INV_OPTION,
1517				       hotdrop));
1518}
1519
1520/* Called when user tries to insert an entry of this type. */
1521static int
1522tcp_checkentry(const char *tablename,
1523	       const struct ipt_ip *ip,
1524	       void *matchinfo,
1525	       unsigned int matchsize,
1526	       unsigned int hook_mask)
1527{
1528	const struct ipt_tcp *tcpinfo = matchinfo;
1529
1530	/* Must specify proto == TCP, and no unknown invflags */
1531	return ip->proto == IPPROTO_TCP
1532		&& !(ip->invflags & IPT_INV_PROTO)
1533		&& matchsize == IPT_ALIGN(sizeof(struct ipt_tcp))
1534		&& !(tcpinfo->invflags & ~IPT_TCP_INV_MASK);
1535}
1536
1537static int
1538udp_match(const struct sk_buff *skb,
1539	  const struct net_device *in,
1540	  const struct net_device *out,
1541	  const void *matchinfo,
1542	  int offset,
1543	  const void *hdr,
1544	  u_int16_t datalen,
1545	  int *hotdrop)
1546{
1547	const struct udphdr *udp = hdr;
1548	const struct ipt_udp *udpinfo = matchinfo;
1549
1550	if (offset == 0 && datalen < sizeof(struct udphdr)) {
1551		/* We've been asked to examine this packet, and we
1552		   can't.  Hence, no choice but to drop. */
1553		duprintf("Dropping evil UDP tinygram.\n");
1554		*hotdrop = 1;
1555		return 0;
1556	}
1557
1558	/* Must not be a fragment. */
1559	return !offset
1560		&& port_match(udpinfo->spts[0], udpinfo->spts[1],
1561			      ntohs(udp->source),
1562			      !!(udpinfo->invflags & IPT_UDP_INV_SRCPT))
1563		&& port_match(udpinfo->dpts[0], udpinfo->dpts[1],
1564			      ntohs(udp->dest),
1565			      !!(udpinfo->invflags & IPT_UDP_INV_DSTPT));
1566}
1567
1568/* Called when user tries to insert an entry of this type. */
1569static int
1570udp_checkentry(const char *tablename,
1571	       const struct ipt_ip *ip,
1572	       void *matchinfo,
1573	       unsigned int matchinfosize,
1574	       unsigned int hook_mask)
1575{
1576	const struct ipt_udp *udpinfo = matchinfo;
1577
1578	/* Must specify proto == UDP, and no unknown invflags */
1579	if (ip->proto != IPPROTO_UDP || (ip->invflags & IPT_INV_PROTO)) {
1580		duprintf("ipt_udp: Protocol %u != %u\n", ip->proto,
1581			 IPPROTO_UDP);
1582		return 0;
1583	}
1584	if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_udp))) {
1585		duprintf("ipt_udp: matchsize %u != %u\n",
1586			 matchinfosize, IPT_ALIGN(sizeof(struct ipt_udp)));
1587		return 0;
1588	}
1589	if (udpinfo->invflags & ~IPT_UDP_INV_MASK) {
1590		duprintf("ipt_udp: unknown flags %X\n",
1591			 udpinfo->invflags);
1592		return 0;
1593	}
1594
1595	return 1;
1596}
1597
1598/* Returns 1 if the type and code is matched by the range, 0 otherwise */
1599static inline int
1600icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1601		     u_int8_t type, u_int8_t code,
1602		     int invert)
1603{
1604	return (type == test_type && code >= min_code && code <= max_code)
1605		^ invert;
1606}
1607
1608static int
1609icmp_match(const struct sk_buff *skb,
1610	   const struct net_device *in,
1611	   const struct net_device *out,
1612	   const void *matchinfo,
1613	   int offset,
1614	   const void *hdr,
1615	   u_int16_t datalen,
1616	   int *hotdrop)
1617{
1618	const struct icmphdr *icmp = hdr;
1619	const struct ipt_icmp *icmpinfo = matchinfo;
1620
1621	if (offset == 0 && datalen < 2) {
1622		/* We've been asked to examine this packet, and we
1623		   can't.  Hence, no choice but to drop. */
1624		duprintf("Dropping evil ICMP tinygram.\n");
1625		*hotdrop = 1;
1626		return 0;
1627	}
1628
1629	/* Must not be a fragment. */
1630	return !offset
1631		&& icmp_type_code_match(icmpinfo->type,
1632					icmpinfo->code[0],
1633					icmpinfo->code[1],
1634					icmp->type, icmp->code,
1635					!!(icmpinfo->invflags&IPT_ICMP_INV));
1636}
1637
1638/* Called when user tries to insert an entry of this type. */
1639static int
1640icmp_checkentry(const char *tablename,
1641	   const struct ipt_ip *ip,
1642	   void *matchinfo,
1643	   unsigned int matchsize,
1644	   unsigned int hook_mask)
1645{
1646	const struct ipt_icmp *icmpinfo = matchinfo;
1647
1648	/* Must specify proto == ICMP, and no unknown invflags */
1649	return ip->proto == IPPROTO_ICMP
1650		&& !(ip->invflags & IPT_INV_PROTO)
1651		&& matchsize == IPT_ALIGN(sizeof(struct ipt_icmp))
1652		&& !(icmpinfo->invflags & ~IPT_ICMP_INV);
1653}
1654
1655/* The built-in targets: standard (NULL) and error. */
1656static struct ipt_target ipt_standard_target
1657= { { NULL, NULL }, IPT_STANDARD_TARGET, NULL, NULL, NULL };
1658static struct ipt_target ipt_error_target
1659= { { NULL, NULL }, IPT_ERROR_TARGET, ipt_error, NULL, NULL };
1660
1661static struct nf_sockopt_ops ipt_sockopts
1662= { { NULL, NULL }, PF_INET, IPT_BASE_CTL, IPT_SO_SET_MAX+1, do_ipt_set_ctl,
1663    IPT_BASE_CTL, IPT_SO_GET_MAX+1, do_ipt_get_ctl, 0, NULL  };
1664
1665static struct ipt_match tcp_matchstruct
1666= { { NULL, NULL }, "tcp", &tcp_match, &tcp_checkentry, NULL };
1667static struct ipt_match udp_matchstruct
1668= { { NULL, NULL }, "udp", &udp_match, &udp_checkentry, NULL };
1669static struct ipt_match icmp_matchstruct
1670= { { NULL, NULL }, "icmp", &icmp_match, &icmp_checkentry, NULL };
1671
1672#ifdef CONFIG_PROC_FS
1673static inline int print_name(const char *i,
1674			     off_t start_offset, char *buffer, int length,
1675			     off_t *pos, unsigned int *count)
1676{
1677	if ((*count)++ >= start_offset) {
1678		unsigned int namelen;
1679
1680		namelen = sprintf(buffer + *pos, "%s\n",
1681				  i + sizeof(struct list_head));
1682		if (*pos + namelen > length) {
1683			/* Stop iterating */
1684			return 1;
1685		}
1686		*pos += namelen;
1687	}
1688	return 0;
1689}
1690
1691static int ipt_get_tables(char *buffer, char **start, off_t offset, int length)
1692{
1693	off_t pos = 0;
1694	unsigned int count = 0;
1695
1696	if (down_interruptible(&ipt_mutex) != 0)
1697		return 0;
1698
1699	LIST_FIND(&ipt_tables, print_name, void *,
1700		  offset, buffer, length, &pos, &count);
1701
1702	up(&ipt_mutex);
1703
1704	/* `start' hack - see fs/proc/generic.c line ~105 */
1705	*start=(char *)((unsigned long)count-offset);
1706	return pos;
1707}
1708
1709static int ipt_get_targets(char *buffer, char **start, off_t offset, int length)
1710{
1711	off_t pos = 0;
1712	unsigned int count = 0;
1713
1714	if (down_interruptible(&ipt_mutex) != 0)
1715		return 0;
1716
1717	LIST_FIND(&ipt_target, print_name, void *,
1718		  offset, buffer, length, &pos, &count);
1719
1720	up(&ipt_mutex);
1721
1722	*start = (char *)((unsigned long)count - offset);
1723	return pos;
1724}
1725
1726static int ipt_get_matches(char *buffer, char **start, off_t offset, int length)
1727{
1728	off_t pos = 0;
1729	unsigned int count = 0;
1730
1731	if (down_interruptible(&ipt_mutex) != 0)
1732		return 0;
1733
1734	LIST_FIND(&ipt_match, print_name, void *,
1735		  offset, buffer, length, &pos, &count);
1736
1737	up(&ipt_mutex);
1738
1739	*start = (char *)((unsigned long)count - offset);
1740	return pos;
1741}
1742
1743static struct { char *name; get_info_t *get_info; } ipt_proc_entry[] =
1744{ { "ip_tables_names", ipt_get_tables },
1745  { "ip_tables_targets", ipt_get_targets },
1746  { "ip_tables_matches", ipt_get_matches },
1747  { NULL, NULL} };
1748#endif /*CONFIG_PROC_FS*/
1749
1750static int __init init(void)
1751{
1752	int ret;
1753
1754	/* Noone else will be downing sem now, so we won't sleep */
1755	down(&ipt_mutex);
1756	list_append(&ipt_target, &ipt_standard_target);
1757	list_append(&ipt_target, &ipt_error_target);
1758	list_append(&ipt_match, &tcp_matchstruct);
1759	list_append(&ipt_match, &udp_matchstruct);
1760	list_append(&ipt_match, &icmp_matchstruct);
1761	up(&ipt_mutex);
1762
1763	/* Register setsockopt */
1764	ret = nf_register_sockopt(&ipt_sockopts);
1765	if (ret < 0) {
1766		duprintf("Unable to register sockopts.\n");
1767		return ret;
1768	}
1769
1770#ifdef CONFIG_PROC_FS
1771	{
1772	struct proc_dir_entry *proc;
1773	int i;
1774
1775	for (i = 0; ipt_proc_entry[i].name; i++) {
1776		proc = proc_net_create(ipt_proc_entry[i].name, 0,
1777				       ipt_proc_entry[i].get_info);
1778		if (!proc) {
1779			while (--i >= 0)
1780				proc_net_remove(ipt_proc_entry[i].name);
1781			nf_unregister_sockopt(&ipt_sockopts);
1782			return -ENOMEM;
1783		}
1784		proc->owner = THIS_MODULE;
1785	}
1786	}
1787#endif
1788
1789	printk("ip_tables: (C) 2000-2002 Netfilter core team\n");
1790	return 0;
1791}
1792
1793static void __exit fini(void)
1794{
1795	nf_unregister_sockopt(&ipt_sockopts);
1796#ifdef CONFIG_PROC_FS
1797	{
1798	int i;
1799	for (i = 0; ipt_proc_entry[i].name; i++)
1800		proc_net_remove(ipt_proc_entry[i].name);
1801	}
1802#endif
1803}
1804
1805EXPORT_SYMBOL(ipt_register_table);
1806EXPORT_SYMBOL(ipt_unregister_table);
1807EXPORT_SYMBOL(ipt_register_match);
1808EXPORT_SYMBOL(ipt_unregister_match);
1809EXPORT_SYMBOL(ipt_do_table);
1810EXPORT_SYMBOL(ipt_register_target);
1811EXPORT_SYMBOL(ipt_unregister_target);
1812
1813module_init(init);
1814module_exit(fini);
1815MODULE_LICENSE("GPL");
1816