1/* Minor modifications to fit on compatibility framework: 2 Rusty.Russell@rustcorp.com.au 3*/ 4 5/* 6 * This code is heavily based on the code on the old ip_fw.c code; see below for 7 * copyrights and attributions of the old code. This code is basically GPL. 8 * 9 * 15-Aug-1997: Major changes to allow graphs for firewall rules. 10 * Paul Russell <Paul.Russell@rustcorp.com.au> and 11 * Michael Neuling <Michael.Neuling@rustcorp.com.au> 12 * 24-Aug-1997: Generalised protocol handling (not just TCP/UDP/ICMP). 13 * Added explicit RETURN from chains. 14 * Removed TOS mangling (done in ipchains 1.0.1). 15 * Fixed read & reset bug by reworking proc handling. 16 * Paul Russell <Paul.Russell@rustcorp.com.au> 17 * 28-Sep-1997: Added packet marking for net sched code. 18 * Removed fw_via comparisons: all done on device name now, 19 * similar to changes in ip_fw.c in DaveM's CVS970924 tree. 20 * Paul Russell <Paul.Russell@rustcorp.com.au> 21 * 2-Nov-1997: Moved types across to __u16, etc. 22 * Added inverse flags. 23 * Fixed fragment bug (in args to port_match). 24 * Changed mark to only one flag (MARKABS). 25 * 21-Nov-1997: Added ability to test ICMP code. 26 * 19-Jan-1998: Added wildcard interfaces. 27 * 6-Feb-1998: Merged 2.0 and 2.1 versions. 28 * Initialised ip_masq for 2.0.x version. 29 * Added explicit NETLINK option for 2.1.x version. 30 * Added packet and byte counters for policy matches. 31 * 26-Feb-1998: Fixed race conditions, added SMP support. 32 * 18-Mar-1998: Fix SMP, fix race condition fix. 33 * 1-May-1998: Remove caching of device pointer. 34 * 12-May-1998: Allow tiny fragment case for TCP/UDP. 35 * 15-May-1998: Treat short packets as fragments, don't just block. 36 * 3-Jan-1999: Fixed serious procfs security hole -- users should never 37 * be allowed to view the chains! 38 * Marc Santoro <ultima@snicker.emoti.com> 39 * 29-Jan-1999: Locally generated bogus IPs dealt with, rather than crash 40 * during dump_packet. --RR. 41 * 19-May-1999: Star Wars: The Phantom Menace opened. Rule num 42 * printed in log (modified from Michael Hasenstein's patch). 43 * Added SYN in log message. --RR 44 * 23-Jul-1999: Fixed small fragment security exposure opened on 15-May-1998. 45 * John McDonald <jm@dataprotect.com> 46 * Thomas Lopatic <tl@dataprotect.com> 47 */ 48 49/* 50 * 51 * The origina Linux port was done Alan Cox, with changes/fixes from 52 * Pauline Middlelink, Jos Vos, Thomas Quinot, Wouter Gadeyne, Juan 53 * Jose Ciarlante, Bernd Eckenfels, Keith Owens and others. 54 * 55 * Copyright from the original FreeBSD version follows: 56 * 57 * Copyright (c) 1993 Daniel Boulet 58 * Copyright (c) 1994 Ugen J.S.Antsilevich 59 * 60 * Redistribution and use in source forms, with and without modification, 61 * are permitted provided that this entire comment appears intact. 62 * 63 * Redistribution in binary form may occur without any restrictions. 64 * Obviously, it would be nice if you gave credit where credit is due 65 * but requiring it would be too onerous. 66 * 67 * This software is provided ``AS IS'' without any warranties of any kind. */ 68 69#include <linux/config.h> 70 71#include <asm/uaccess.h> 72#include <asm/system.h> 73#include <linux/types.h> 74#include <linux/sched.h> 75#include <linux/string.h> 76#include <linux/errno.h> 77#include <linux/module.h> 78 79#include <linux/socket.h> 80#include <linux/sockios.h> 81#include <linux/in.h> 82#include <linux/inet.h> 83#include <linux/netdevice.h> 84#include <linux/icmp.h> 85#include <linux/udp.h> 86#include <net/ip.h> 87#include <net/protocol.h> 88#include <net/route.h> 89#include <net/tcp.h> 90#include <net/udp.h> 91#include <net/sock.h> 92#include <net/icmp.h> 93#include <linux/netlink.h> 94#include <linux/netfilter.h> 95#include <linux/netfilter_ipv4/compat_firewall.h> 96#include <linux/netfilter_ipv4/ipchains_core.h> 97 98#include <net/checksum.h> 99#include <linux/proc_fs.h> 100#include <linux/stat.h> 101 102/* Understanding locking in this code: (thanks to Alan Cox for using 103 * little words to explain this to me). -- PR 104 * 105 * In UP, there can be two packets traversing the chains: 106 * 1) A packet from the current userspace context 107 * 2) A packet off the bh handlers (timer or net). 108 * 109 * For SMP (kernel v2.1+), multiply this by # CPUs. 110 * 111 * [Note that this in not correct for 2.2 - because the socket code always 112 * uses lock_kernel() to serialize, and bottom halves (timers and net_bhs) 113 * only run on one CPU at a time. This will probably change for 2.3. 114 * It is still good to use spinlocks because that avoids the global cli() 115 * for updating the tables, which is rather costly in SMP kernels -AK] 116 * 117 * This means counters and backchains can get corrupted if no precautions 118 * are taken. 119 * 120 * To actually alter a chain on UP, we need only do a cli(), as this will 121 * stop a bh handler firing, as we are in the current userspace context 122 * (coming from a setsockopt()). 123 * 124 * On SMP, we need a write_lock_irqsave(), which is a simple cli() in 125 * UP. 126 * 127 * For backchains and counters, we use an array, indexed by 128 * [cpu_number_map[smp_processor_id()]*2 + !in_interrupt()]; the array is of 129 * size [smp_num_cpus*2]. For v2.0, smp_num_cpus is effectively 1. So, 130 * confident of uniqueness, we modify counters even though we only 131 * have a read lock (to read the counters, you need a write lock, 132 * though). */ 133 134/* Why I didn't use straight locking... -- PR 135 * 136 * The backchains can be separated out of the ip_chains structure, and 137 * allocated as needed inside ip_fw_check(). 138 * 139 * The counters, however, can't. Trying to lock these means blocking 140 * interrupts every time we want to access them. This would suck HARD 141 * performance-wise. Not locking them leads to possible corruption, 142 * made worse on 32-bit machines (counters are 64-bit). */ 143 144/*#define DEBUG_IP_FIREWALL*/ 145/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */ 146/*#define DEBUG_IP_FIREWALL_USER*/ 147/*#define DEBUG_IP_FIREWALL_LOCKING*/ 148 149#if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE) 150static struct sock *ipfwsk; 151#endif 152 153#ifdef CONFIG_SMP 154#define SLOT_NUMBER() (cpu_number_map(smp_processor_id())*2 + !in_interrupt()) 155#else /* !SMP */ 156#define SLOT_NUMBER() (!in_interrupt()) 157#endif /* CONFIG_SMP */ 158#define NUM_SLOTS (smp_num_cpus*2) 159 160#define SIZEOF_STRUCT_IP_CHAIN (sizeof(struct ip_chain) \ 161 + NUM_SLOTS*sizeof(struct ip_reent)) 162#define SIZEOF_STRUCT_IP_FW_KERNEL (sizeof(struct ip_fwkernel) \ 163 + NUM_SLOTS*sizeof(struct ip_counters)) 164 165#ifdef DEBUG_IP_FIREWALL_LOCKING 166static unsigned int fwc_rlocks, fwc_wlocks; 167#define FWC_DEBUG_LOCK(d) \ 168do { \ 169 FWC_DONT_HAVE_LOCK(d); \ 170 d |= (1 << SLOT_NUMBER()); \ 171} while (0) 172 173#define FWC_DEBUG_UNLOCK(d) \ 174do { \ 175 FWC_HAVE_LOCK(d); \ 176 d &= ~(1 << SLOT_NUMBER()); \ 177} while (0) 178 179#define FWC_DONT_HAVE_LOCK(d) \ 180do { \ 181 if ((d) & (1 << SLOT_NUMBER())) \ 182 printk("%s:%i: Got lock on %i already!\n", \ 183 __FILE__, __LINE__, SLOT_NUMBER()); \ 184} while(0) 185 186#define FWC_HAVE_LOCK(d) \ 187do { \ 188 if (!((d) & (1 << SLOT_NUMBER()))) \ 189 printk("%s:%i:No lock on %i!\n", \ 190 __FILE__, __LINE__, SLOT_NUMBER()); \ 191} while (0) 192 193#else 194#define FWC_DEBUG_LOCK(d) do { } while(0) 195#define FWC_DEBUG_UNLOCK(d) do { } while(0) 196#define FWC_DONT_HAVE_LOCK(d) do { } while(0) 197#define FWC_HAVE_LOCK(d) do { } while(0) 198#endif /*DEBUG_IP_FIRWALL_LOCKING*/ 199 200#define FWC_READ_LOCK(l) do { FWC_DEBUG_LOCK(fwc_rlocks); read_lock(l); } while (0) 201#define FWC_WRITE_LOCK(l) do { FWC_DEBUG_LOCK(fwc_wlocks); write_lock(l); } while (0) 202#define FWC_READ_LOCK_IRQ(l,f) do { FWC_DEBUG_LOCK(fwc_rlocks); read_lock_irqsave(l,f); } while (0) 203#define FWC_WRITE_LOCK_IRQ(l,f) do { FWC_DEBUG_LOCK(fwc_wlocks); write_lock_irqsave(l,f); } while (0) 204#define FWC_READ_UNLOCK(l) do { FWC_DEBUG_UNLOCK(fwc_rlocks); read_unlock(l); } while (0) 205#define FWC_WRITE_UNLOCK(l) do { FWC_DEBUG_UNLOCK(fwc_wlocks); write_unlock(l); } while (0) 206#define FWC_READ_UNLOCK_IRQ(l,f) do { FWC_DEBUG_UNLOCK(fwc_rlocks); read_unlock_irqrestore(l,f); } while (0) 207#define FWC_WRITE_UNLOCK_IRQ(l,f) do { FWC_DEBUG_UNLOCK(fwc_wlocks); write_unlock_irqrestore(l,f); } while (0) 208 209struct ip_chain; 210 211struct ip_counters 212{ 213 __u64 pcnt, bcnt; /* Packet and byte counters */ 214}; 215 216struct ip_fwkernel 217{ 218 struct ip_fw ipfw; 219 struct ip_fwkernel *next; /* where to go next if current 220 * rule doesn't match */ 221 struct ip_chain *branch; /* which branch to jump to if 222 * current rule matches */ 223 int simplebranch; /* Use this if branch == NULL */ 224 struct ip_counters counters[0]; /* Actually several of these */ 225}; 226 227struct ip_reent 228{ 229 struct ip_chain *prevchain; /* Pointer to referencing chain */ 230 struct ip_fwkernel *prevrule; /* Pointer to referencing rule */ 231 struct ip_counters counters; 232}; 233 234struct ip_chain 235{ 236 ip_chainlabel label; /* Defines the label for each block */ 237 struct ip_chain *next; /* Pointer to next block */ 238 struct ip_fwkernel *chain; /* Pointer to first rule in block */ 239 __u32 refcount; /* Number of refernces to block */ 240 int policy; /* Default rule for chain. Only * 241 * used in built in chains */ 242 struct ip_reent reent[0]; /* Actually several of these */ 243}; 244 245/* 246 * Implement IP packet firewall 247 */ 248 249#ifdef DEBUG_IP_FIREWALL 250#define dprintf(format, args...) printk(format , ## args) 251#else 252#define dprintf(format, args...) 253#endif 254 255#ifdef DEBUG_IP_FIREWALL_USER 256#define duprintf(format, args...) printk(format , ## args) 257#else 258#define duprintf(format, args...) 259#endif 260 261/* Lock around ip_fw_chains linked list structure */ 262rwlock_t ip_fw_lock = RW_LOCK_UNLOCKED; 263 264/* Head of linked list of fw rules */ 265static struct ip_chain *ip_fw_chains; 266 267#define IP_FW_INPUT_CHAIN ip_fw_chains 268#define IP_FW_FORWARD_CHAIN (ip_fw_chains->next) 269#define IP_FW_OUTPUT_CHAIN (ip_fw_chains->next->next) 270 271/* Returns 1 if the port is matched by the range, 0 otherwise */ 272extern inline int port_match(__u16 min, __u16 max, __u16 port, 273 int frag, int invert) 274{ 275 if (frag) /* Fragments fail ANY port test. */ 276 return (min == 0 && max == 0xFFFF); 277 else return (port >= min && port <= max) ^ invert; 278} 279 280/* Returns whether matches rule or not. */ 281static int ip_rule_match(struct ip_fwkernel *f, 282 const char *ifname, 283 struct iphdr *ip, 284 char tcpsyn, 285 __u16 src_port, __u16 dst_port, 286 char isfrag) 287{ 288#define FWINV(bool,invflg) ((bool) ^ !!(f->ipfw.fw_invflg & invflg)) 289 /* 290 * This is a bit simpler as we don't have to walk 291 * an interface chain as you do in BSD - same logic 292 * however. 293 */ 294 295 if (FWINV((ip->saddr&f->ipfw.fw_smsk.s_addr) != f->ipfw.fw_src.s_addr, 296 IP_FW_INV_SRCIP) 297 || FWINV((ip->daddr&f->ipfw.fw_dmsk.s_addr)!=f->ipfw.fw_dst.s_addr, 298 IP_FW_INV_DSTIP)) { 299 dprintf("Source or dest mismatch.\n"); 300 301 dprintf("SRC: %u. Mask: %u. Target: %u.%s\n", ip->saddr, 302 f->ipfw.fw_smsk.s_addr, f->ipfw.fw_src.s_addr, 303 f->ipfw.fw_invflg & IP_FW_INV_SRCIP ? " (INV)" : ""); 304 dprintf("DST: %u. Mask: %u. Target: %u.%s\n", ip->daddr, 305 f->ipfw.fw_dmsk.s_addr, f->ipfw.fw_dst.s_addr, 306 f->ipfw.fw_invflg & IP_FW_INV_DSTIP ? " (INV)" : ""); 307 return 0; 308 } 309 310 /* 311 * Look for a VIA device match 312 */ 313 if (f->ipfw.fw_flg & IP_FW_F_WILDIF) { 314 if (FWINV(strncmp(ifname, f->ipfw.fw_vianame, 315 strlen(f->ipfw.fw_vianame)) != 0, 316 IP_FW_INV_VIA)) { 317 dprintf("Wildcard interface mismatch.%s\n", 318 f->ipfw.fw_invflg & IP_FW_INV_VIA ? " (INV)" : ""); 319 return 0; /* Mismatch */ 320 } 321 } 322 else if (FWINV(strcmp(ifname, f->ipfw.fw_vianame) != 0, 323 IP_FW_INV_VIA)) { 324 dprintf("Interface name does not match.%s\n", 325 f->ipfw.fw_invflg & IP_FW_INV_VIA 326 ? " (INV)" : ""); 327 return 0; /* Mismatch */ 328 } 329 330 /* 331 * Ok the chain addresses match. 332 */ 333 334 /* If we have a fragment rule but the packet is not a fragment 335 * the we return zero */ 336 if (FWINV((f->ipfw.fw_flg&IP_FW_F_FRAG) && !isfrag, IP_FW_INV_FRAG)) { 337 dprintf("Fragment rule but not fragment.%s\n", 338 f->ipfw.fw_invflg & IP_FW_INV_FRAG ? " (INV)" : ""); 339 return 0; 340 } 341 342 /* Fragment NEVER passes a SYN test, even an inverted one. */ 343 if (FWINV((f->ipfw.fw_flg&IP_FW_F_TCPSYN) && !tcpsyn, IP_FW_INV_SYN) 344 || (isfrag && (f->ipfw.fw_flg&IP_FW_F_TCPSYN))) { 345 dprintf("Rule requires SYN and packet has no SYN.%s\n", 346 f->ipfw.fw_invflg & IP_FW_INV_SYN ? " (INV)" : ""); 347 return 0; 348 } 349 350 if (f->ipfw.fw_proto) { 351 /* 352 * Specific firewall - packet's protocol 353 * must match firewall's. 354 */ 355 356 if (FWINV(ip->protocol!=f->ipfw.fw_proto, IP_FW_INV_PROTO)) { 357 dprintf("Packet protocol %hi does not match %hi.%s\n", 358 ip->protocol, f->ipfw.fw_proto, 359 f->ipfw.fw_invflg&IP_FW_INV_PROTO ? " (INV)":""); 360 return 0; 361 } 362 363 /* For non TCP/UDP/ICMP, port range is max anyway. */ 364 if (!port_match(f->ipfw.fw_spts[0], 365 f->ipfw.fw_spts[1], 366 src_port, isfrag, 367 !!(f->ipfw.fw_invflg&IP_FW_INV_SRCPT)) 368 || !port_match(f->ipfw.fw_dpts[0], 369 f->ipfw.fw_dpts[1], 370 dst_port, isfrag, 371 !!(f->ipfw.fw_invflg 372 &IP_FW_INV_DSTPT))) { 373 dprintf("Port match failed.\n"); 374 return 0; 375 } 376 } 377 378 dprintf("Match succeeded.\n"); 379 return 1; 380} 381 382static const char *branchname(struct ip_chain *branch,int simplebranch) 383{ 384 if (branch) 385 return branch->label; 386 switch (simplebranch) 387 { 388 case FW_BLOCK: return IP_FW_LABEL_BLOCK; 389 case FW_ACCEPT: return IP_FW_LABEL_ACCEPT; 390 case FW_REJECT: return IP_FW_LABEL_REJECT; 391 case FW_REDIRECT: return IP_FW_LABEL_REDIRECT; 392 case FW_MASQUERADE: return IP_FW_LABEL_MASQUERADE; 393 case FW_SKIP: return "-"; 394 case FW_SKIP+1: return IP_FW_LABEL_RETURN; 395 default: 396 return "UNKNOWN"; 397 } 398} 399 400/* 401 * VERY ugly piece of code which actually 402 * makes kernel printf for matching packets... 403 */ 404static void dump_packet(const struct iphdr *ip, 405 const char *ifname, 406 struct ip_fwkernel *f, 407 const ip_chainlabel chainlabel, 408 __u16 src_port, 409 __u16 dst_port, 410 unsigned int count, 411 int syn) 412{ 413 __u32 *opt = (__u32 *) (ip + 1); 414 int opti; 415 416 if (f) { 417 printk(KERN_INFO "Packet log: %s ",chainlabel); 418 printk("%s ",branchname(f->branch,f->simplebranch)); 419 if (f->simplebranch==FW_REDIRECT) 420 printk("%d ",f->ipfw.fw_redirpt); 421 } 422 423 printk("%s PROTO=%d %u.%u.%u.%u:%hu %u.%u.%u.%u:%hu" 424 " L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu", 425 ifname, ip->protocol, NIPQUAD(ip->saddr), 426 src_port, NIPQUAD(ip->daddr), 427 dst_port, 428 ntohs(ip->tot_len), ip->tos, ntohs(ip->id), 429 ntohs(ip->frag_off), ip->ttl); 430 431 for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++) 432 printk(" O=0x%8.8X", *opt++); 433 printk(" %s(#%d)\n", syn ? "SYN " : /* "PENANCE" */ "", count); 434} 435 436/* function for checking chain labels for user space. */ 437static int check_label(ip_chainlabel label) 438{ 439 unsigned int i; 440 /* strlen must be < IP_FW_MAX_LABEL_LENGTH. */ 441 for (i = 0; i < IP_FW_MAX_LABEL_LENGTH + 1; i++) 442 if (label[i] == '\0') return 1; 443 444 return 0; 445} 446 447/* This function returns a pointer to the first chain with a label 448 * that matches the one given. */ 449static struct ip_chain *find_label(ip_chainlabel label) 450{ 451 struct ip_chain *tmp; 452 FWC_HAVE_LOCK(fwc_rlocks | fwc_wlocks); 453 for (tmp = ip_fw_chains; tmp; tmp = tmp->next) 454 if (strcmp(tmp->label,label) == 0) 455 break; 456 return tmp; 457} 458 459/* This function returns a boolean which when true sets answer to one 460 of the FW_*. */ 461static int find_special(ip_chainlabel label, int *answer) 462{ 463 if (label[0] == '\0') { 464 *answer = FW_SKIP; /* => pass-through rule */ 465 return 1; 466 } else if (strcmp(label,IP_FW_LABEL_ACCEPT) == 0) { 467 *answer = FW_ACCEPT; 468 return 1; 469 } else if (strcmp(label,IP_FW_LABEL_BLOCK) == 0) { 470 *answer = FW_BLOCK; 471 return 1; 472 } else if (strcmp(label,IP_FW_LABEL_REJECT) == 0) { 473 *answer = FW_REJECT; 474 return 1; 475 } else if (strcmp(label,IP_FW_LABEL_REDIRECT) == 0) { 476 *answer = FW_REDIRECT; 477 return 1; 478 } else if (strcmp(label,IP_FW_LABEL_MASQUERADE) == 0) { 479 *answer = FW_MASQUERADE; 480 return 1; 481 } else if (strcmp(label, IP_FW_LABEL_RETURN) == 0) { 482 *answer = FW_SKIP+1; 483 return 1; 484 } else { 485 return 0; 486 } 487} 488 489/* This function cleans up the prevchain and prevrule. If the verbose 490 * flag is set then he names of the chains will be printed as it 491 * cleans up. */ 492static void cleanup(struct ip_chain *chain, 493 const int verbose, 494 unsigned int slot) 495{ 496 struct ip_chain *tmpchain = chain->reent[slot].prevchain; 497 if (verbose) 498 printk(KERN_ERR "Chain backtrace: "); 499 while (tmpchain) { 500 if (verbose) 501 printk("%s<-",chain->label); 502 chain->reent[slot].prevchain = NULL; 503 chain = tmpchain; 504 tmpchain = chain->reent[slot].prevchain; 505 } 506 if (verbose) 507 printk("%s\n",chain->label); 508} 509 510static inline int 511ip_fw_domatch(struct ip_fwkernel *f, 512 struct iphdr *ip, 513 const char *rif, 514 const ip_chainlabel label, 515 struct sk_buff *skb, 516 unsigned int slot, 517 __u16 src_port, __u16 dst_port, 518 unsigned int count, 519 int tcpsyn) 520{ 521 f->counters[slot].bcnt+=ntohs(ip->tot_len); 522 f->counters[slot].pcnt++; 523 if (f->ipfw.fw_flg & IP_FW_F_PRN) { 524 dump_packet(ip,rif,f,label,src_port,dst_port,count,tcpsyn); 525 } 526 ip->tos = (ip->tos & f->ipfw.fw_tosand) ^ f->ipfw.fw_tosxor; 527 528/* This functionality is useless in stock 2.0.x series, but we don't 529 * discard the mark thing altogether, to avoid breaking ipchains (and, 530 * more importantly, the ipfwadm wrapper) --PR */ 531 if (f->ipfw.fw_flg & IP_FW_F_MARKABS) { 532 skb->nfmark = f->ipfw.fw_mark; 533 } else { 534 skb->nfmark += f->ipfw.fw_mark; 535 } 536 if (f->ipfw.fw_flg & IP_FW_F_NETLINK) { 537#if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE) 538 size_t len = min_t(unsigned int, f->ipfw.fw_outputsize, ntohs(ip->tot_len)) 539 + sizeof(__u32) + sizeof(skb->nfmark) + IFNAMSIZ; 540 struct sk_buff *outskb=alloc_skb(len, GFP_ATOMIC); 541 542 duprintf("Sending packet out NETLINK (length = %u).\n", 543 (unsigned int)len); 544 if (outskb) { 545 /* Prepend length, mark & interface */ 546 skb_put(outskb, len); 547 *((__u32 *)outskb->data) = (__u32)len; 548 *((__u32 *)(outskb->data+sizeof(__u32))) = skb->nfmark; 549 strcpy(outskb->data+sizeof(__u32)*2, rif); 550 memcpy(outskb->data+sizeof(__u32)*2+IFNAMSIZ, ip, 551 len-(sizeof(__u32)*2+IFNAMSIZ)); 552 netlink_broadcast(ipfwsk, outskb, 0, ~0, GFP_ATOMIC); 553 } 554 else { 555#endif 556 if (net_ratelimit()) 557 printk(KERN_WARNING "ip_fw: packet drop due to " 558 "netlink failure\n"); 559 return 0; 560#if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE) 561 } 562#endif 563 } 564 return 1; 565} 566 567/* 568 * Returns one of the generic firewall policies, like FW_ACCEPT. 569 * 570 * The testing is either false for normal firewall mode or true for 571 * user checking mode (counters are not updated, TOS & mark not done). 572 */ 573static int 574ip_fw_check(struct iphdr *ip, 575 const char *rif, 576 __u16 *redirport, 577 struct ip_chain *chain, 578 struct sk_buff *skb, 579 unsigned int slot, 580 int testing) 581{ 582 struct tcphdr *tcp=(struct tcphdr *)((__u32 *)ip+ip->ihl); 583 struct udphdr *udp=(struct udphdr *)((__u32 *)ip+ip->ihl); 584 struct icmphdr *icmp=(struct icmphdr *)((__u32 *)ip+ip->ihl); 585 __u32 src, dst; 586 __u16 src_port = 0xFFFF, dst_port = 0xFFFF; 587 char tcpsyn=0; 588 __u16 offset; 589 unsigned char oldtos; 590 struct ip_fwkernel *f; 591 int ret = FW_SKIP+2; 592 unsigned int count; 593 594 /* We handle fragments by dealing with the first fragment as 595 * if it was a normal packet. All other fragments are treated 596 * normally, except that they will NEVER match rules that ask 597 * things we don't know, ie. tcp syn flag or ports). If the 598 * rule is also a fragment-specific rule, non-fragments won't 599 * match it. */ 600 601 offset = ntohs(ip->frag_off) & IP_OFFSET; 602 603 /* 604 * Don't allow a fragment of TCP 8 bytes in. Nobody 605 * normal causes this. Its a cracker trying to break 606 * in by doing a flag overwrite to pass the direction 607 * checks. 608 */ 609 if (offset == 1 && ip->protocol == IPPROTO_TCP) { 610 if (!testing && net_ratelimit()) { 611 printk("Suspect TCP fragment.\n"); 612 dump_packet(ip,rif,NULL,NULL,0,0,0,0); 613 } 614 return FW_BLOCK; 615 } 616 617 /* If we can't investigate ports, treat as fragment. It's 618 * either a trucated whole packet, or a truncated first 619 * fragment, or a TCP first fragment of length 8-15, in which 620 * case the above rule stops reassembly. 621 */ 622 if (offset == 0) { 623 unsigned int size_req; 624 switch (ip->protocol) { 625 case IPPROTO_TCP: 626 /* Don't care about things past flags word */ 627 size_req = 16; 628 break; 629 630 case IPPROTO_UDP: 631 case IPPROTO_ICMP: 632 size_req = 8; 633 break; 634 635 default: 636 size_req = 0; 637 } 638 639 /* If it is a truncated first fragment then it can be 640 * used to rewrite port information, and thus should 641 * be blocked. 642 */ 643 if (ntohs(ip->tot_len) < (ip->ihl<<2)+size_req) { 644 if (!testing && net_ratelimit()) { 645 printk("Suspect short first fragment.\n"); 646 dump_packet(ip,rif,NULL,NULL,0,0,0,0); 647 } 648 return FW_BLOCK; 649 } 650 } 651 652 src = ip->saddr; 653 dst = ip->daddr; 654 oldtos = ip->tos; 655 656 /* 657 * If we got interface from which packet came 658 * we can use the address directly. Linux 2.1 now uses address 659 * chains per device too, but unlike BSD we first check if the 660 * incoming packet matches a device address and the routing 661 * table before calling the firewall. 662 */ 663 664 dprintf("Packet "); 665 switch(ip->protocol) 666 { 667 case IPPROTO_TCP: 668 dprintf("TCP "); 669 if (!offset) { 670 src_port=ntohs(tcp->source); 671 dst_port=ntohs(tcp->dest); 672 673 /* Connection initilisation can only 674 * be made when the syn bit is set and 675 * neither of the ack or reset is 676 * set. */ 677 if(tcp->syn && !(tcp->ack || tcp->rst)) 678 tcpsyn=1; 679 } 680 break; 681 case IPPROTO_UDP: 682 dprintf("UDP "); 683 if (!offset) { 684 src_port=ntohs(udp->source); 685 dst_port=ntohs(udp->dest); 686 } 687 break; 688 case IPPROTO_ICMP: 689 if (!offset) { 690 src_port=(__u16)icmp->type; 691 dst_port=(__u16)icmp->code; 692 } 693 dprintf("ICMP "); 694 break; 695 default: 696 dprintf("p=%d ",ip->protocol); 697 break; 698 } 699#ifdef DEBUG_IP_FIREWALL 700 print_ip(ip->saddr); 701 702 if (offset) 703 dprintf(":fragment (%i) ", ((int)offset)<<2); 704 else if (ip->protocol==IPPROTO_TCP || ip->protocol==IPPROTO_UDP 705 || ip->protocol==IPPROTO_ICMP) 706 dprintf(":%hu:%hu", src_port, dst_port); 707 dprintf("\n"); 708#endif 709 710 if (!testing) FWC_READ_LOCK(&ip_fw_lock); 711 else FWC_HAVE_LOCK(fwc_rlocks); 712 713 f = chain->chain; 714 do { 715 count = 0; 716 for (; f; f = f->next) { 717 count++; 718 if (ip_rule_match(f,rif,ip, 719 tcpsyn,src_port,dst_port,offset)) { 720 if (!testing 721 && !ip_fw_domatch(f, ip, rif, chain->label, 722 skb, slot, 723 src_port, dst_port, 724 count, tcpsyn)) { 725 ret = FW_BLOCK; 726 cleanup(chain, 0, slot); 727 goto out; 728 } 729 break; 730 } 731 } 732 if (f) { 733 if (f->branch) { 734 /* Do sanity check to see if we have 735 * already set prevchain and if so we 736 * must be in a loop */ 737 if (f->branch->reent[slot].prevchain) { 738 if (!testing) { 739 printk(KERN_ERR 740 "IP firewall: " 741 "Loop detected " 742 "at `%s'.\n", 743 f->branch->label); 744 cleanup(chain, 1, slot); 745 ret = FW_BLOCK; 746 } else { 747 cleanup(chain, 0, slot); 748 ret = FW_SKIP+1; 749 } 750 } 751 else { 752 f->branch->reent[slot].prevchain 753 = chain; 754 f->branch->reent[slot].prevrule 755 = f->next; 756 chain = f->branch; 757 f = chain->chain; 758 } 759 } 760 else if (f->simplebranch == FW_SKIP) 761 f = f->next; 762 else if (f->simplebranch == FW_SKIP+1) { 763 /* Just like falling off the chain */ 764 goto fall_off_chain; 765 } else { 766 cleanup(chain, 0, slot); 767 ret = f->simplebranch; 768 } 769 } /* f == NULL */ 770 else { 771 fall_off_chain: 772 if (chain->reent[slot].prevchain) { 773 struct ip_chain *tmp = chain; 774 f = chain->reent[slot].prevrule; 775 chain = chain->reent[slot].prevchain; 776 tmp->reent[slot].prevchain = NULL; 777 } 778 else { 779 ret = chain->policy; 780 if (!testing) { 781 chain->reent[slot].counters.pcnt++; 782 chain->reent[slot].counters.bcnt 783 += ntohs(ip->tot_len); 784 } 785 } 786 } 787 } while (ret == FW_SKIP+2); 788 789 out: 790 if (!testing) FWC_READ_UNLOCK(&ip_fw_lock); 791 792 /* Recalculate checksum if not going to reject, and TOS changed. */ 793 if (ip->tos != oldtos 794 && ret != FW_REJECT && ret != FW_BLOCK 795 && !testing) 796 ip_send_check(ip); 797 798 if (ret == FW_REDIRECT && redirport) { 799 if ((*redirport = htons(f->ipfw.fw_redirpt)) == 0) { 800 /* Wildcard redirection. 801 * Note that redirport will become 802 * 0xFFFF for non-TCP/UDP packets. 803 */ 804 *redirport = htons(dst_port); 805 } 806 } 807 808#ifdef DEBUG_ALLOW_ALL 809 return (testing ? ret : FW_ACCEPT); 810#else 811 return ret; 812#endif 813} 814 815/* Must have write lock & interrupts off for any of these */ 816 817/* This function sets all the byte counters in a chain to zero. The 818 * input is a pointer to the chain required for zeroing */ 819static int zero_fw_chain(struct ip_chain *chainptr) 820{ 821 struct ip_fwkernel *i; 822 823 FWC_HAVE_LOCK(fwc_wlocks); 824 for (i = chainptr->chain; i; i = i->next) 825 memset(i->counters, 0, sizeof(struct ip_counters)*NUM_SLOTS); 826 return 0; 827} 828 829static int clear_fw_chain(struct ip_chain *chainptr) 830{ 831 struct ip_fwkernel *i= chainptr->chain; 832 833 FWC_HAVE_LOCK(fwc_wlocks); 834 chainptr->chain=NULL; 835 836 while (i) { 837 struct ip_fwkernel *tmp = i->next; 838 if (i->branch) 839 i->branch->refcount--; 840 kfree(i); 841 i = tmp; 842 MOD_DEC_USE_COUNT; 843 } 844 return 0; 845} 846 847static int replace_in_chain(struct ip_chain *chainptr, 848 struct ip_fwkernel *frwl, 849 __u32 position) 850{ 851 struct ip_fwkernel *f = chainptr->chain; 852 853 FWC_HAVE_LOCK(fwc_wlocks); 854 855 while (--position && f != NULL) f = f->next; 856 if (f == NULL) 857 return EINVAL; 858 859 if (f->branch) f->branch->refcount--; 860 if (frwl->branch) frwl->branch->refcount++; 861 862 frwl->next = f->next; 863 memcpy(f,frwl,sizeof(struct ip_fwkernel)); 864 kfree(frwl); 865 return 0; 866} 867 868static int append_to_chain(struct ip_chain *chainptr, struct ip_fwkernel *rule) 869{ 870 struct ip_fwkernel *i; 871 872 FWC_HAVE_LOCK(fwc_wlocks); 873 /* Special case if no rules already present */ 874 if (chainptr->chain == NULL) { 875 876 /* If pointer writes are atomic then turning off 877 * interrupts is not necessary. */ 878 chainptr->chain = rule; 879 if (rule->branch) rule->branch->refcount++; 880 goto append_successful; 881 } 882 883 /* Find the rule before the end of the chain */ 884 for (i = chainptr->chain; i->next; i = i->next); 885 i->next = rule; 886 if (rule->branch) rule->branch->refcount++; 887 888append_successful: 889 MOD_INC_USE_COUNT; 890 return 0; 891} 892 893/* This function inserts a rule at the position of position in the 894 * chain refenced by chainptr. If position is 1 then this rule will 895 * become the new rule one. */ 896static int insert_in_chain(struct ip_chain *chainptr, 897 struct ip_fwkernel *frwl, 898 __u32 position) 899{ 900 struct ip_fwkernel *f = chainptr->chain; 901 902 FWC_HAVE_LOCK(fwc_wlocks); 903 /* special case if the position is number 1 */ 904 if (position == 1) { 905 frwl->next = chainptr->chain; 906 if (frwl->branch) frwl->branch->refcount++; 907 chainptr->chain = frwl; 908 goto insert_successful; 909 } 910 position--; 911 while (--position && f != NULL) f = f->next; 912 if (f == NULL) 913 return EINVAL; 914 if (frwl->branch) frwl->branch->refcount++; 915 frwl->next = f->next; 916 917 f->next = frwl; 918 919insert_successful: 920 MOD_INC_USE_COUNT; 921 return 0; 922} 923 924/* This function deletes the a rule from a given rulenum and chain. 925 * With rulenum = 1 is the first rule is deleted. */ 926 927static int del_num_from_chain(struct ip_chain *chainptr, __u32 rulenum) 928{ 929 struct ip_fwkernel *i=chainptr->chain,*tmp; 930 931 FWC_HAVE_LOCK(fwc_wlocks); 932 933 if (!chainptr->chain) 934 return ENOENT; 935 936 /* Need a special case for the first rule */ 937 if (rulenum == 1) { 938 /* store temp to allow for freeing up of memory */ 939 tmp = chainptr->chain; 940 if (chainptr->chain->branch) chainptr->chain->branch->refcount--; 941 chainptr->chain = chainptr->chain->next; 942 kfree(tmp); /* free memory that is now unused */ 943 } else { 944 rulenum--; 945 while (--rulenum && i->next ) i = i->next; 946 if (!i->next) 947 return ENOENT; 948 tmp = i->next; 949 if (i->next->branch) 950 i->next->branch->refcount--; 951 i->next = i->next->next; 952 kfree(tmp); 953 } 954 955 MOD_DEC_USE_COUNT; 956 return 0; 957} 958 959 960/* This function deletes the a rule from a given rule and chain. 961 * The rule that is deleted is the first occursance of that rule. */ 962static int del_rule_from_chain(struct ip_chain *chainptr, 963 struct ip_fwkernel *frwl) 964{ 965 struct ip_fwkernel *ltmp,*ftmp = chainptr->chain ; 966 int was_found; 967 968 FWC_HAVE_LOCK(fwc_wlocks); 969 970 /* Sure, we should compare marks, but since the `ipfwadm' 971 * script uses it for an unholy hack... well, life is easier 972 * this way. We also mask it out of the flags word. --PR */ 973 for (ltmp=NULL, was_found=0; 974 !was_found && ftmp != NULL; 975 ltmp = ftmp,ftmp = ftmp->next) { 976 if (ftmp->ipfw.fw_src.s_addr!=frwl->ipfw.fw_src.s_addr 977 || ftmp->ipfw.fw_dst.s_addr!=frwl->ipfw.fw_dst.s_addr 978 || ftmp->ipfw.fw_smsk.s_addr!=frwl->ipfw.fw_smsk.s_addr 979 || ftmp->ipfw.fw_dmsk.s_addr!=frwl->ipfw.fw_dmsk.s_addr 980 || ((ftmp->ipfw.fw_flg & ~IP_FW_F_MARKABS) 981 != (frwl->ipfw.fw_flg & ~IP_FW_F_MARKABS)) 982 || ftmp->ipfw.fw_invflg!=frwl->ipfw.fw_invflg 983 || ftmp->ipfw.fw_proto!=frwl->ipfw.fw_proto 984 || ftmp->ipfw.fw_redirpt!=frwl->ipfw.fw_redirpt 985 || ftmp->ipfw.fw_spts[0]!=frwl->ipfw.fw_spts[0] 986 || ftmp->ipfw.fw_spts[1]!=frwl->ipfw.fw_spts[1] 987 || ftmp->ipfw.fw_dpts[0]!=frwl->ipfw.fw_dpts[0] 988 || ftmp->ipfw.fw_dpts[1]!=frwl->ipfw.fw_dpts[1] 989 || ftmp->ipfw.fw_outputsize!=frwl->ipfw.fw_outputsize) { 990 duprintf("del_rule_from_chain: mismatch:" 991 "src:%u/%u dst:%u/%u smsk:%u/%u dmsk:%u/%u " 992 "flg:%hX/%hX invflg:%hX/%hX proto:%u/%u " 993 "mark:%u/%u " 994 "ports:%hu-%hu/%hu-%hu %hu-%hu/%hu-%hu " 995 "outputsize:%hu-%hu\n", 996 ftmp->ipfw.fw_src.s_addr, 997 frwl->ipfw.fw_src.s_addr, 998 ftmp->ipfw.fw_dst.s_addr, 999 frwl->ipfw.fw_dst.s_addr, 1000 ftmp->ipfw.fw_smsk.s_addr, 1001 frwl->ipfw.fw_smsk.s_addr, 1002 ftmp->ipfw.fw_dmsk.s_addr, 1003 frwl->ipfw.fw_dmsk.s_addr, 1004 ftmp->ipfw.fw_flg, 1005 frwl->ipfw.fw_flg, 1006 ftmp->ipfw.fw_invflg, 1007 frwl->ipfw.fw_invflg, 1008 ftmp->ipfw.fw_proto, 1009 frwl->ipfw.fw_proto, 1010 ftmp->ipfw.fw_mark, 1011 frwl->ipfw.fw_mark, 1012 ftmp->ipfw.fw_spts[0], 1013 frwl->ipfw.fw_spts[0], 1014 ftmp->ipfw.fw_spts[1], 1015 frwl->ipfw.fw_spts[1], 1016 ftmp->ipfw.fw_dpts[0], 1017 frwl->ipfw.fw_dpts[0], 1018 ftmp->ipfw.fw_dpts[1], 1019 frwl->ipfw.fw_dpts[1], 1020 ftmp->ipfw.fw_outputsize, 1021 frwl->ipfw.fw_outputsize); 1022 continue; 1023 } 1024 1025 if (strncmp(ftmp->ipfw.fw_vianame, 1026 frwl->ipfw.fw_vianame, 1027 IFNAMSIZ)) { 1028 duprintf("del_rule_from_chain: if mismatch: %s/%s\n", 1029 ftmp->ipfw.fw_vianame, 1030 frwl->ipfw.fw_vianame); 1031 continue; 1032 } 1033 if (ftmp->branch != frwl->branch) { 1034 duprintf("del_rule_from_chain: branch mismatch: " 1035 "%s/%s\n", 1036 ftmp->branch?ftmp->branch->label:"(null)", 1037 frwl->branch?frwl->branch->label:"(null)"); 1038 continue; 1039 } 1040 if (ftmp->branch == NULL 1041 && ftmp->simplebranch != frwl->simplebranch) { 1042 duprintf("del_rule_from_chain: simplebranch mismatch: " 1043 "%i/%i\n", 1044 ftmp->simplebranch, frwl->simplebranch); 1045 continue; 1046 } 1047 was_found = 1; 1048 if (ftmp->branch) 1049 ftmp->branch->refcount--; 1050 if (ltmp) 1051 ltmp->next = ftmp->next; 1052 else 1053 chainptr->chain = ftmp->next; 1054 kfree(ftmp); 1055 MOD_DEC_USE_COUNT; 1056 break; 1057 } 1058 1059 if (was_found) 1060 return 0; 1061 else { 1062 duprintf("del_rule_from_chain: no matching rule found\n"); 1063 return EINVAL; 1064 } 1065} 1066 1067/* This function takes the label of a chain and deletes the first 1068 * chain with that name. No special cases required for the built in 1069 * chains as they have their refcount initilised to 1 so that they are 1070 * never deleted. */ 1071static int del_chain(ip_chainlabel label) 1072{ 1073 struct ip_chain *tmp,*tmp2; 1074 1075 FWC_HAVE_LOCK(fwc_wlocks); 1076 /* Corner case: return EBUSY not ENOENT for first elem ("input") */ 1077 if (strcmp(label, ip_fw_chains->label) == 0) 1078 return EBUSY; 1079 1080 for (tmp = ip_fw_chains; tmp->next; tmp = tmp->next) 1081 if(strcmp(tmp->next->label,label) == 0) 1082 break; 1083 1084 tmp2 = tmp->next; 1085 if (!tmp2) 1086 return ENOENT; 1087 1088 if (tmp2->refcount) 1089 return EBUSY; 1090 1091 if (tmp2->chain) 1092 return ENOTEMPTY; 1093 1094 tmp->next = tmp2->next; 1095 kfree(tmp2); 1096 1097 MOD_DEC_USE_COUNT; 1098 return 0; 1099} 1100 1101/* This is a function to initilise a chain. Built in rules start with 1102 * refcount = 1 so that they cannot be deleted. User defined rules 1103 * start with refcount = 0 so they can be deleted. */ 1104static struct ip_chain *ip_init_chain(ip_chainlabel name, 1105 __u32 ref, 1106 int policy) 1107{ 1108 unsigned int i; 1109 struct ip_chain *label 1110 = kmalloc(SIZEOF_STRUCT_IP_CHAIN, GFP_KERNEL); 1111 if (label == NULL) 1112 panic("Can't kmalloc for firewall chains.\n"); 1113 strcpy(label->label,name); 1114 label->next = NULL; 1115 label->chain = NULL; 1116 label->refcount = ref; 1117 label->policy = policy; 1118 for (i = 0; i < smp_num_cpus*2; i++) { 1119 label->reent[i].counters.pcnt = label->reent[i].counters.bcnt 1120 = 0; 1121 label->reent[i].prevchain = NULL; 1122 label->reent[i].prevrule = NULL; 1123 } 1124 1125 return label; 1126} 1127 1128/* This is a function for reating a new chain. The chains is not 1129 * created if a chain of the same name already exists */ 1130static int create_chain(ip_chainlabel label) 1131{ 1132 struct ip_chain *tmp; 1133 1134 if (!check_label(label)) 1135 return EINVAL; 1136 1137 FWC_HAVE_LOCK(fwc_wlocks); 1138 for (tmp = ip_fw_chains; tmp->next; tmp = tmp->next) 1139 if (strcmp(tmp->label,label) == 0) 1140 return EEXIST; 1141 1142 if (strcmp(tmp->label,label) == 0) 1143 return EEXIST; 1144 1145 tmp->next = ip_init_chain(label, 0, FW_SKIP); /* refcount is 1146 * zero since this is a 1147 * user defined chain * 1148 * and therefore can be 1149 * deleted */ 1150 MOD_INC_USE_COUNT; 1151 return 0; 1152} 1153 1154/* This function simply changes the policy on one of the built in 1155 * chains. checking must be done before this is call to ensure that 1156 * chainptr is pointing to one of the three possible chains */ 1157static int change_policy(struct ip_chain *chainptr, int policy) 1158{ 1159 FWC_HAVE_LOCK(fwc_wlocks); 1160 chainptr->policy = policy; 1161 return 0; 1162} 1163 1164/* This function takes an ip_fwuser and converts it to a ip_fwkernel. It also 1165 * performs some checks in the structure. */ 1166static struct ip_fwkernel *convert_ipfw(struct ip_fwuser *fwuser, int *errno) 1167{ 1168 struct ip_fwkernel *fwkern; 1169 1170 if ( (fwuser->ipfw.fw_flg & ~IP_FW_F_MASK) != 0 ) { 1171 duprintf("convert_ipfw: undefined flag bits set (flags=%x)\n", 1172 fwuser->ipfw.fw_flg); 1173 *errno = EINVAL; 1174 return NULL; 1175 } 1176 1177#ifdef DEBUG_IP_FIREWALL_USER 1178 /* These are sanity checks that don't really matter. 1179 * We can get rid of these once testing is complete. 1180 */ 1181 if ((fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN) 1182 && ((fwuser->ipfw.fw_invflg & IP_FW_INV_PROTO) 1183 || fwuser->ipfw.fw_proto != IPPROTO_TCP)) { 1184 duprintf("convert_ipfw: TCP SYN flag set but proto != TCP!\n"); 1185 *errno = EINVAL; 1186 return NULL; 1187 } 1188 1189 if (strcmp(fwuser->label, IP_FW_LABEL_REDIRECT) != 0 1190 && fwuser->ipfw.fw_redirpt != 0) { 1191 duprintf("convert_ipfw: Target not REDIR but redirpt != 0!\n"); 1192 *errno = EINVAL; 1193 return NULL; 1194 } 1195 1196 if ((!(fwuser->ipfw.fw_flg & IP_FW_F_FRAG) 1197 && (fwuser->ipfw.fw_invflg & IP_FW_INV_FRAG)) 1198 || (!(fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN) 1199 && (fwuser->ipfw.fw_invflg & IP_FW_INV_SYN))) { 1200 duprintf("convert_ipfw: Can't have INV flag if flag unset!\n"); 1201 *errno = EINVAL; 1202 return NULL; 1203 } 1204 1205 if (((fwuser->ipfw.fw_invflg & IP_FW_INV_SRCPT) 1206 && fwuser->ipfw.fw_spts[0] == 0 1207 && fwuser->ipfw.fw_spts[1] == 0xFFFF) 1208 || ((fwuser->ipfw.fw_invflg & IP_FW_INV_DSTPT) 1209 && fwuser->ipfw.fw_dpts[0] == 0 1210 && fwuser->ipfw.fw_dpts[1] == 0xFFFF) 1211 || ((fwuser->ipfw.fw_invflg & IP_FW_INV_VIA) 1212 && (fwuser->ipfw.fw_vianame)[0] == '\0') 1213 || ((fwuser->ipfw.fw_invflg & IP_FW_INV_SRCIP) 1214 && fwuser->ipfw.fw_smsk.s_addr == 0) 1215 || ((fwuser->ipfw.fw_invflg & IP_FW_INV_DSTIP) 1216 && fwuser->ipfw.fw_dmsk.s_addr == 0)) { 1217 duprintf("convert_ipfw: INV flag makes rule unmatchable!\n"); 1218 *errno = EINVAL; 1219 return NULL; 1220 } 1221 1222 if ((fwuser->ipfw.fw_flg & IP_FW_F_FRAG) 1223 && !(fwuser->ipfw.fw_invflg & IP_FW_INV_FRAG) 1224 && (fwuser->ipfw.fw_spts[0] != 0 1225 || fwuser->ipfw.fw_spts[1] != 0xFFFF 1226 || fwuser->ipfw.fw_dpts[0] != 0 1227 || fwuser->ipfw.fw_dpts[1] != 0xFFFF 1228 || (fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN))) { 1229 duprintf("convert_ipfw: Can't test ports or SYN with frag!\n"); 1230 *errno = EINVAL; 1231 return NULL; 1232 } 1233#endif 1234 1235 if ((fwuser->ipfw.fw_spts[0] != 0 1236 || fwuser->ipfw.fw_spts[1] != 0xFFFF 1237 || fwuser->ipfw.fw_dpts[0] != 0 1238 || fwuser->ipfw.fw_dpts[1] != 0xFFFF) 1239 && ((fwuser->ipfw.fw_invflg & IP_FW_INV_PROTO) 1240 || (fwuser->ipfw.fw_proto != IPPROTO_TCP 1241 && fwuser->ipfw.fw_proto != IPPROTO_UDP 1242 && fwuser->ipfw.fw_proto != IPPROTO_ICMP))) { 1243 duprintf("convert_ipfw: Can only test ports for TCP/UDP/ICMP!\n"); 1244 *errno = EINVAL; 1245 return NULL; 1246 } 1247 1248 fwkern = kmalloc(SIZEOF_STRUCT_IP_FW_KERNEL, GFP_ATOMIC); 1249 if (!fwkern) { 1250 duprintf("convert_ipfw: kmalloc failed!\n"); 1251 *errno = ENOMEM; 1252 return NULL; 1253 } 1254 memcpy(&fwkern->ipfw,&fwuser->ipfw,sizeof(struct ip_fw)); 1255 1256 if (!find_special(fwuser->label, &fwkern->simplebranch)) { 1257 fwkern->branch = find_label(fwuser->label); 1258 if (!fwkern->branch) { 1259 duprintf("convert_ipfw: chain doesn't exist `%s'.\n", 1260 fwuser->label); 1261 kfree(fwkern); 1262 *errno = ENOENT; 1263 return NULL; 1264 } else if (fwkern->branch == IP_FW_INPUT_CHAIN 1265 || fwkern->branch == IP_FW_FORWARD_CHAIN 1266 || fwkern->branch == IP_FW_OUTPUT_CHAIN) { 1267 duprintf("convert_ipfw: Can't branch to builtin chain `%s'.\n", 1268 fwuser->label); 1269 kfree(fwkern); 1270 *errno = ENOENT; 1271 return NULL; 1272 } 1273 } else 1274 fwkern->branch = NULL; 1275 memset(fwkern->counters, 0, sizeof(struct ip_counters)*NUM_SLOTS); 1276 1277 /* Handle empty vianame by making it a wildcard */ 1278 if ((fwkern->ipfw.fw_vianame)[0] == '\0') 1279 fwkern->ipfw.fw_flg |= IP_FW_F_WILDIF; 1280 1281 fwkern->next = NULL; 1282 return fwkern; 1283} 1284 1285int ip_fw_ctl(int cmd, void *m, int len) 1286{ 1287 int ret; 1288 struct ip_chain *chain; 1289 unsigned long flags; 1290 1291 FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags); 1292 1293 switch (cmd) { 1294 case IP_FW_FLUSH: 1295 if (len != sizeof(ip_chainlabel) || !check_label(m)) 1296 ret = EINVAL; 1297 else if ((chain = find_label(m)) == NULL) 1298 ret = ENOENT; 1299 else ret = clear_fw_chain(chain); 1300 break; 1301 1302 case IP_FW_ZERO: 1303 if (len != sizeof(ip_chainlabel) || !check_label(m)) 1304 ret = EINVAL; 1305 else if ((chain = find_label(m)) == NULL) 1306 ret = ENOENT; 1307 else ret = zero_fw_chain(chain); 1308 break; 1309 1310 case IP_FW_CHECK: { 1311 struct ip_fwtest *new = m; 1312 struct iphdr *ip; 1313 1314 /* Don't need write lock. */ 1315 FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags); 1316 1317 if (len != sizeof(struct ip_fwtest) || !check_label(m)) 1318 return EINVAL; 1319 1320 /* Need readlock to do find_label */ 1321 FWC_READ_LOCK(&ip_fw_lock); 1322 1323 if ((chain = find_label(new->fwt_label)) == NULL) 1324 ret = ENOENT; 1325 else { 1326 ip = &(new->fwt_packet.fwp_iph); 1327 1328 if (ip->ihl != sizeof(struct iphdr) / sizeof(int)) { 1329 duprintf("ip_fw_ctl: ip->ihl=%d, want %d\n", 1330 ip->ihl, 1331 sizeof(struct iphdr) / sizeof(int)); 1332 ret = EINVAL; 1333 } 1334 else { 1335 ret = ip_fw_check(ip, new->fwt_packet.fwp_vianame, 1336 NULL, chain, 1337 NULL, SLOT_NUMBER(), 1); 1338 switch (ret) { 1339 case FW_ACCEPT: 1340 ret = 0; break; 1341 case FW_REDIRECT: 1342 ret = ECONNABORTED; break; 1343 case FW_MASQUERADE: 1344 ret = ECONNRESET; break; 1345 case FW_REJECT: 1346 ret = ECONNREFUSED; break; 1347 /* Hack to help diag; these only get 1348 returned when testing. */ 1349 case FW_SKIP+1: 1350 ret = ELOOP; break; 1351 case FW_SKIP: 1352 ret = ENFILE; break; 1353 default: /* FW_BLOCK */ 1354 ret = ETIMEDOUT; break; 1355 } 1356 } 1357 } 1358 FWC_READ_UNLOCK(&ip_fw_lock); 1359 return ret; 1360 } 1361 1362 case IP_FW_MASQ_TIMEOUTS: { 1363 ret = ip_fw_masq_timeouts(m, len); 1364 } 1365 break; 1366 1367 case IP_FW_REPLACE: { 1368 struct ip_fwkernel *ip_fwkern; 1369 struct ip_fwnew *new = m; 1370 1371 if (len != sizeof(struct ip_fwnew) 1372 || !check_label(new->fwn_label)) 1373 ret = EINVAL; 1374 else if ((chain = find_label(new->fwn_label)) == NULL) 1375 ret = ENOENT; 1376 else if ((ip_fwkern = convert_ipfw(&new->fwn_rule, &ret)) 1377 != NULL) 1378 ret = replace_in_chain(chain, ip_fwkern, 1379 new->fwn_rulenum); 1380 } 1381 break; 1382 1383 case IP_FW_APPEND: { 1384 struct ip_fwchange *new = m; 1385 struct ip_fwkernel *ip_fwkern; 1386 1387 if (len != sizeof(struct ip_fwchange) 1388 || !check_label(new->fwc_label)) 1389 ret = EINVAL; 1390 else if ((chain = find_label(new->fwc_label)) == NULL) 1391 ret = ENOENT; 1392 else if ((ip_fwkern = convert_ipfw(&new->fwc_rule, &ret)) 1393 != NULL) 1394 ret = append_to_chain(chain, ip_fwkern); 1395 } 1396 break; 1397 1398 case IP_FW_INSERT: { 1399 struct ip_fwkernel *ip_fwkern; 1400 struct ip_fwnew *new = m; 1401 1402 if (len != sizeof(struct ip_fwnew) 1403 || !check_label(new->fwn_label)) 1404 ret = EINVAL; 1405 else if ((chain = find_label(new->fwn_label)) == NULL) 1406 ret = ENOENT; 1407 else if ((ip_fwkern = convert_ipfw(&new->fwn_rule, &ret)) 1408 != NULL) 1409 ret = insert_in_chain(chain, ip_fwkern, 1410 new->fwn_rulenum); 1411 } 1412 break; 1413 1414 case IP_FW_DELETE: { 1415 struct ip_fwchange *new = m; 1416 struct ip_fwkernel *ip_fwkern; 1417 1418 if (len != sizeof(struct ip_fwchange) 1419 || !check_label(new->fwc_label)) 1420 ret = EINVAL; 1421 else if ((chain = find_label(new->fwc_label)) == NULL) 1422 ret = ENOENT; 1423 else if ((ip_fwkern = convert_ipfw(&new->fwc_rule, &ret)) 1424 != NULL) { 1425 ret = del_rule_from_chain(chain, ip_fwkern); 1426 kfree(ip_fwkern); 1427 } 1428 } 1429 break; 1430 1431 case IP_FW_DELETE_NUM: { 1432 struct ip_fwdelnum *new = m; 1433 1434 if (len != sizeof(struct ip_fwdelnum) 1435 || !check_label(new->fwd_label)) 1436 ret = EINVAL; 1437 else if ((chain = find_label(new->fwd_label)) == NULL) 1438 ret = ENOENT; 1439 else ret = del_num_from_chain(chain, new->fwd_rulenum); 1440 } 1441 break; 1442 1443 case IP_FW_CREATECHAIN: { 1444 if (len != sizeof(ip_chainlabel)) { 1445 duprintf("create_chain: bad size %i\n", len); 1446 ret = EINVAL; 1447 } 1448 else ret = create_chain(m); 1449 } 1450 break; 1451 1452 case IP_FW_DELETECHAIN: { 1453 if (len != sizeof(ip_chainlabel)) { 1454 duprintf("delete_chain: bad size %i\n", len); 1455 ret = EINVAL; 1456 } 1457 else ret = del_chain(m); 1458 } 1459 break; 1460 1461 case IP_FW_POLICY: { 1462 struct ip_fwpolicy *new = m; 1463 1464 if (len != sizeof(struct ip_fwpolicy) 1465 || !check_label(new->fwp_label)) 1466 ret = EINVAL; 1467 else if ((chain = find_label(new->fwp_label)) == NULL) 1468 ret = ENOENT; 1469 else if (chain != IP_FW_INPUT_CHAIN 1470 && chain != IP_FW_FORWARD_CHAIN 1471 && chain != IP_FW_OUTPUT_CHAIN) { 1472 duprintf("change_policy: can't change policy on user" 1473 " defined chain.\n"); 1474 ret = EINVAL; 1475 } 1476 else { 1477 int pol = FW_SKIP; 1478 find_special(new->fwp_policy, &pol); 1479 1480 switch(pol) { 1481 case FW_MASQUERADE: 1482 if (chain != IP_FW_FORWARD_CHAIN) { 1483 ret = EINVAL; 1484 break; 1485 } 1486 /* Fall thru... */ 1487 case FW_BLOCK: 1488 case FW_ACCEPT: 1489 case FW_REJECT: 1490 ret = change_policy(chain, pol); 1491 break; 1492 default: 1493 duprintf("change_policy: bad policy `%s'\n", 1494 new->fwp_policy); 1495 ret = EINVAL; 1496 } 1497 } 1498 break; 1499 } 1500 default: 1501 duprintf("ip_fw_ctl: unknown request %d\n",cmd); 1502 ret = ENOPROTOOPT; 1503 } 1504 1505 FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags); 1506 return ret; 1507} 1508 1509/* Returns bytes used - doesn't NUL terminate */ 1510static int dump_rule(char *buffer, 1511 const char *chainlabel, 1512 const struct ip_fwkernel *rule) 1513{ 1514 int len; 1515 unsigned int i; 1516 __u64 packets = 0, bytes = 0; 1517 1518 FWC_HAVE_LOCK(fwc_wlocks); 1519 for (i = 0; i < NUM_SLOTS; i++) { 1520 packets += rule->counters[i].pcnt; 1521 bytes += rule->counters[i].bcnt; 1522 } 1523 1524 len=sprintf(buffer, 1525 "%9s " /* Chain name */ 1526 "%08X/%08X->%08X/%08X " /* Source & Destination IPs */ 1527 "%.16s " /* Interface */ 1528 "%X %X " /* fw_flg and fw_invflg fields */ 1529 "%u " /* Protocol */ 1530 "%-9u %-9u %-9u %-9u " /* Packet & byte counters */ 1531 "%u-%u %u-%u " /* Source & Dest port ranges */ 1532 "A%02X X%02X " /* TOS and and xor masks */ 1533 "%08X " /* Redirection port */ 1534 "%u " /* fw_mark field */ 1535 "%u " /* output size */ 1536 "%9s\n", /* Target */ 1537 chainlabel, 1538 ntohl(rule->ipfw.fw_src.s_addr), 1539 ntohl(rule->ipfw.fw_smsk.s_addr), 1540 ntohl(rule->ipfw.fw_dst.s_addr), 1541 ntohl(rule->ipfw.fw_dmsk.s_addr), 1542 (rule->ipfw.fw_vianame)[0] ? rule->ipfw.fw_vianame : "-", 1543 rule->ipfw.fw_flg, 1544 rule->ipfw.fw_invflg, 1545 rule->ipfw.fw_proto, 1546 (__u32)(packets >> 32), (__u32)packets, 1547 (__u32)(bytes >> 32), (__u32)bytes, 1548 rule->ipfw.fw_spts[0], rule->ipfw.fw_spts[1], 1549 rule->ipfw.fw_dpts[0], rule->ipfw.fw_dpts[1], 1550 rule->ipfw.fw_tosand, rule->ipfw.fw_tosxor, 1551 rule->ipfw.fw_redirpt, 1552 rule->ipfw.fw_mark, 1553 rule->ipfw.fw_outputsize, 1554 branchname(rule->branch,rule->simplebranch)); 1555 1556 duprintf("dump_rule: %i bytes done.\n", len); 1557 return len; 1558} 1559 1560/* File offset is actually in records, not bytes. */ 1561static int ip_chain_procinfo(char *buffer, char **start, 1562 off_t offset, int length 1563#if LINUX_VERSION_CODE < KERNEL_VERSION(2,3,29) 1564 , int reset 1565#endif 1566 ) 1567{ 1568#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,29) 1569 int reset = 0; 1570#endif 1571 struct ip_chain *i; 1572 struct ip_fwkernel *j = ip_fw_chains->chain; 1573 unsigned long flags; 1574 int len = 0; 1575 int last_len = 0; 1576 off_t upto = 0; 1577 1578 duprintf("Offset starts at %lu\n", offset); 1579 duprintf("ip_fw_chains is 0x%0lX\n", (unsigned long int)ip_fw_chains); 1580 1581 /* Need a write lock to lock out ``readers'' which update counters. */ 1582 FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags); 1583 1584 for (i = ip_fw_chains; i; i = i->next) { 1585 for (j = i->chain; j; j = j->next) { 1586 if (upto == offset) break; 1587 duprintf("Skipping rule in chain `%s'\n", 1588 i->label); 1589 upto++; 1590 } 1591 if (upto == offset) break; 1592 } 1593 1594 /* Don't init j first time, or once i = NULL */ 1595 for (; i; (void)((i = i->next) && (j = i->chain))) { 1596 duprintf("Dumping chain `%s'\n", i->label); 1597 for (; j; j = j->next, upto++, last_len = len) 1598 { 1599 len += dump_rule(buffer+len, i->label, j); 1600 if (len > length) { 1601 duprintf("Dumped to %i (past %i). " 1602 "Moving back to %i.\n", 1603 len, length, last_len); 1604 len = last_len; 1605 goto outside; 1606 } 1607 else if (reset) 1608 memset(j->counters, 0, 1609 sizeof(struct ip_counters)*NUM_SLOTS); 1610 } 1611 } 1612outside: 1613 FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags); 1614 buffer[len] = '\0'; 1615 1616 duprintf("ip_chain_procinfo: Length = %i (of %i). Offset = %li.\n", 1617 len, length, upto); 1618 /* `start' hack - see fs/proc/generic.c line ~165 */ 1619 *start=(char *)((unsigned int)upto-offset); 1620 return len; 1621} 1622 1623static int ip_chain_name_procinfo(char *buffer, char **start, 1624 off_t offset, int length) 1625{ 1626 struct ip_chain *i; 1627 int len = 0,last_len = 0; 1628 off_t pos = 0,begin = 0; 1629 unsigned long flags; 1630 1631 /* Need a write lock to lock out ``readers'' which update counters. */ 1632 FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags); 1633 1634 for (i = ip_fw_chains; i; i = i->next) 1635 { 1636 unsigned int j; 1637 __u32 packetsHi = 0, packetsLo = 0, bytesHi = 0, bytesLo = 0; 1638 1639 for (j = 0; j < NUM_SLOTS; j++) { 1640 packetsLo += i->reent[j].counters.pcnt & 0xFFFFFFFF; 1641 packetsHi += ((i->reent[j].counters.pcnt >> 32) 1642 & 0xFFFFFFFF); 1643 bytesLo += i->reent[j].counters.bcnt & 0xFFFFFFFF; 1644 bytesHi += ((i->reent[j].counters.bcnt >> 32) 1645 & 0xFFFFFFFF); 1646 } 1647 1648 /* print the label and the policy */ 1649 len+=sprintf(buffer+len,"%s %s %i %u %u %u %u\n", 1650 i->label,branchname(NULL, i->policy),i->refcount, 1651 packetsHi, packetsLo, bytesHi, bytesLo); 1652 pos=begin+len; 1653 if(pos<offset) { 1654 len=0; 1655 begin=pos; 1656 } 1657 else if(pos>offset+length) { 1658 len = last_len; 1659 break; 1660 } 1661 1662 last_len = len; 1663 } 1664 FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags); 1665 1666 *start = buffer+(offset-begin); 1667 len-=(offset-begin); 1668 if(len>length) 1669 len=length; 1670 return len; 1671} 1672 1673/* 1674 * Interface to the generic firewall chains. 1675 */ 1676int ipfw_input_check(struct firewall_ops *this, int pf, 1677 struct net_device *dev, void *phdr, void *arg, 1678 struct sk_buff **pskb) 1679{ 1680 return ip_fw_check(phdr, dev->name, 1681 arg, IP_FW_INPUT_CHAIN, *pskb, SLOT_NUMBER(), 0); 1682} 1683 1684int ipfw_output_check(struct firewall_ops *this, int pf, 1685 struct net_device *dev, void *phdr, void *arg, 1686 struct sk_buff **pskb) 1687{ 1688 /* Locally generated bogus packets by root. <SIGH>. */ 1689 if (((struct iphdr *)phdr)->ihl * 4 < sizeof(struct iphdr) 1690 || (*pskb)->len < sizeof(struct iphdr)) 1691 return FW_ACCEPT; 1692 return ip_fw_check(phdr, dev->name, 1693 arg, IP_FW_OUTPUT_CHAIN, *pskb, SLOT_NUMBER(), 0); 1694} 1695 1696int ipfw_forward_check(struct firewall_ops *this, int pf, 1697 struct net_device *dev, void *phdr, void *arg, 1698 struct sk_buff **pskb) 1699{ 1700 return ip_fw_check(phdr, dev->name, 1701 arg, IP_FW_FORWARD_CHAIN, *pskb, SLOT_NUMBER(), 0); 1702} 1703 1704struct firewall_ops ipfw_ops= 1705{ 1706 NULL, 1707 ipfw_forward_check, 1708 ipfw_input_check, 1709 ipfw_output_check, 1710 NULL, 1711 NULL 1712}; 1713 1714int ipfw_init_or_cleanup(int init) 1715{ 1716 struct proc_dir_entry *proc; 1717 int ret = 0; 1718 unsigned long flags; 1719 1720 if (!init) goto cleanup; 1721 1722#ifdef DEBUG_IP_FIREWALL_LOCKING 1723 fwc_wlocks = fwc_rlocks = 0; 1724#endif 1725 1726#if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE) 1727 ipfwsk = netlink_kernel_create(NETLINK_FIREWALL, NULL); 1728 if (ipfwsk == NULL) 1729 goto cleanup_nothing; 1730#endif 1731 1732 ret = register_firewall(PF_INET, &ipfw_ops); 1733 if (ret < 0) 1734 goto cleanup_netlink; 1735 1736 proc = proc_net_create(IP_FW_PROC_CHAINS, S_IFREG | S_IRUSR | S_IWUSR, 1737 ip_chain_procinfo); 1738 if (proc) proc->owner = THIS_MODULE; 1739 proc = proc_net_create(IP_FW_PROC_CHAIN_NAMES, 1740 S_IFREG | S_IRUSR | S_IWUSR, 1741 ip_chain_name_procinfo); 1742 if (proc) proc->owner = THIS_MODULE; 1743 1744 IP_FW_INPUT_CHAIN = ip_init_chain(IP_FW_LABEL_INPUT, 1, FW_ACCEPT); 1745 IP_FW_FORWARD_CHAIN = ip_init_chain(IP_FW_LABEL_FORWARD, 1, FW_ACCEPT); 1746 IP_FW_OUTPUT_CHAIN = ip_init_chain(IP_FW_LABEL_OUTPUT, 1, FW_ACCEPT); 1747 1748 return ret; 1749 1750 cleanup: 1751 unregister_firewall(PF_INET, &ipfw_ops); 1752 1753 FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags); 1754 while (ip_fw_chains) { 1755 struct ip_chain *next = ip_fw_chains->next; 1756 1757 clear_fw_chain(ip_fw_chains); 1758 kfree(ip_fw_chains); 1759 ip_fw_chains = next; 1760 } 1761 FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags); 1762 1763 proc_net_remove(IP_FW_PROC_CHAINS); 1764 proc_net_remove(IP_FW_PROC_CHAIN_NAMES); 1765 1766 cleanup_netlink: 1767#if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE) 1768 sock_release(ipfwsk->socket); 1769 1770 cleanup_nothing: 1771#endif 1772 return ret; 1773} 1774MODULE_LICENSE("Dual BSD/GPL"); 1775