1/* 2 * Copyright (c) 2007-2012 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29/* $apfw: pf_norm.c,v 1.10 2008/08/28 19:10:53 jhw Exp $ */ 30/* $OpenBSD: pf_norm.c,v 1.107 2006/04/16 00:59:52 pascoe Exp $ */ 31 32/* 33 * Copyright 2001 Niels Provos <provos@citi.umich.edu> 34 * All rights reserved. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 45 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 46 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 47 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 48 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 49 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 50 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 51 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 52 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 53 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 54 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 55 */ 56 57#include <sys/param.h> 58#include <sys/systm.h> 59#include <sys/mbuf.h> 60#include <sys/filio.h> 61#include <sys/fcntl.h> 62#include <sys/socket.h> 63#include <sys/kernel.h> 64#include <sys/time.h> 65#include <sys/random.h> 66#include <sys/mcache.h> 67 68#include <net/if.h> 69#include <net/if_types.h> 70#include <net/bpf.h> 71#include <net/route.h> 72#include <net/if_pflog.h> 73 74#include <netinet/in.h> 75#include <netinet/in_var.h> 76#include <netinet/in_systm.h> 77#include <netinet/ip.h> 78#include <netinet/ip_var.h> 79#include <netinet/tcp.h> 80#include <netinet/tcp_seq.h> 81#include <netinet/tcp_fsm.h> 82#include <netinet/udp.h> 83#include <netinet/ip_icmp.h> 84 85#if INET6 86#include <netinet/ip6.h> 87#endif /* INET6 */ 88 89#include <net/pfvar.h> 90 91struct pf_frent { 92 LIST_ENTRY(pf_frent) fr_next; 93 struct mbuf *fr_m; 94#define fr_ip fr_u.fru_ipv4 95#define fr_ip6 fr_u.fru_ipv6 96 union { 97 struct ip *fru_ipv4; 98 struct ip6_hdr *fru_ipv6; 99 } fr_u; 100 struct ip6_frag fr_ip6f_opt; 101 int fr_ip6f_hlen; 102}; 103 104struct pf_frcache { 105 LIST_ENTRY(pf_frcache) fr_next; 106 uint16_t fr_off; 107 uint16_t fr_end; 108}; 109 110#define PFFRAG_SEENLAST 0x0001 /* Seen the last fragment for this */ 111#define PFFRAG_NOBUFFER 0x0002 /* Non-buffering fragment cache */ 112#define PFFRAG_DROP 0x0004 /* Drop all fragments */ 113#define BUFFER_FRAGMENTS(fr) (!((fr)->fr_flags & PFFRAG_NOBUFFER)) 114 115struct pf_fragment { 116 RB_ENTRY(pf_fragment) fr_entry; 117 TAILQ_ENTRY(pf_fragment) frag_next; 118 struct pf_addr fr_srcx; 119 struct pf_addr fr_dstx; 120 u_int8_t fr_p; /* protocol of this fragment */ 121 u_int8_t fr_flags; /* status flags */ 122 u_int16_t fr_max; /* fragment data max */ 123#define fr_id fr_uid.fru_id4 124#define fr_id6 fr_uid.fru_id6 125 union { 126 u_int16_t fru_id4; 127 u_int32_t fru_id6; 128 } fr_uid; 129 int fr_af; 130 u_int32_t fr_timeout; 131#define fr_queue fr_u.fru_queue 132#define fr_cache fr_u.fru_cache 133 union { 134 LIST_HEAD(pf_fragq, pf_frent) fru_queue; /* buffering */ 135 LIST_HEAD(pf_cacheq, pf_frcache) fru_cache; /* non-buf */ 136 } fr_u; 137}; 138 139static TAILQ_HEAD(pf_fragqueue, pf_fragment) pf_fragqueue; 140static TAILQ_HEAD(pf_cachequeue, pf_fragment) pf_cachequeue; 141 142static __inline int pf_frag_compare(struct pf_fragment *, 143 struct pf_fragment *); 144static RB_HEAD(pf_frag_tree, pf_fragment) pf_frag_tree, pf_cache_tree; 145RB_PROTOTYPE_SC(__private_extern__, pf_frag_tree, pf_fragment, fr_entry, 146 pf_frag_compare); 147RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); 148 149/* Private prototypes */ 150static void pf_ip6hdr2key(struct pf_fragment *, struct ip6_hdr *, 151 struct ip6_frag *); 152static void pf_ip2key(struct pf_fragment *, struct ip *); 153static void pf_remove_fragment(struct pf_fragment *); 154static void pf_flush_fragments(void); 155static void pf_free_fragment(struct pf_fragment *); 156static struct pf_fragment *pf_find_fragment_by_key(struct pf_fragment *, 157 struct pf_frag_tree *); 158static __inline struct pf_fragment * 159 pf_find_fragment_by_ipv4_header(struct ip *, struct pf_frag_tree *); 160static __inline struct pf_fragment * 161 pf_find_fragment_by_ipv6_header(struct ip6_hdr *, struct ip6_frag *, 162 struct pf_frag_tree *); 163static struct mbuf *pf_reassemble(struct mbuf **, struct pf_fragment **, 164 struct pf_frent *, int); 165static struct mbuf *pf_fragcache(struct mbuf **, struct ip *, 166 struct pf_fragment **, int, int, int *); 167static struct mbuf *pf_reassemble6(struct mbuf **, struct pf_fragment **, 168 struct pf_frent *, int); 169static struct mbuf *pf_frag6cache(struct mbuf **, struct ip6_hdr*, 170 struct ip6_frag *, struct pf_fragment **, int, int, int, int *); 171static int pf_normalize_tcpopt(struct pf_rule *, int, struct pfi_kif *, 172 struct pf_pdesc *, struct mbuf *, struct tcphdr *, int, int *); 173 174#define DPFPRINTF(x) do { \ 175 if (pf_status.debug >= PF_DEBUG_MISC) { \ 176 printf("%s: ", __func__); \ 177 printf x ; \ 178 } \ 179} while (0) 180 181/* Globals */ 182struct pool pf_frent_pl, pf_frag_pl; 183static struct pool pf_cache_pl, pf_cent_pl; 184struct pool pf_state_scrub_pl; 185 186static int pf_nfrents, pf_ncache; 187 188void 189pf_normalize_init(void) 190{ 191 pool_init(&pf_frent_pl, sizeof (struct pf_frent), 0, 0, 0, "pffrent", 192 NULL); 193 pool_init(&pf_frag_pl, sizeof (struct pf_fragment), 0, 0, 0, "pffrag", 194 NULL); 195 pool_init(&pf_cache_pl, sizeof (struct pf_fragment), 0, 0, 0, 196 "pffrcache", NULL); 197 pool_init(&pf_cent_pl, sizeof (struct pf_frcache), 0, 0, 0, "pffrcent", 198 NULL); 199 pool_init(&pf_state_scrub_pl, sizeof (struct pf_state_scrub), 0, 0, 0, 200 "pfstscr", NULL); 201 202 pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT); 203 pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0); 204 pool_sethardlimit(&pf_cache_pl, PFFRAG_FRCACHE_HIWAT, NULL, 0); 205 pool_sethardlimit(&pf_cent_pl, PFFRAG_FRCENT_HIWAT, NULL, 0); 206 207 TAILQ_INIT(&pf_fragqueue); 208 TAILQ_INIT(&pf_cachequeue); 209} 210 211#if 0 212void 213pf_normalize_destroy(void) 214{ 215 pool_destroy(&pf_state_scrub_pl); 216 pool_destroy(&pf_cent_pl); 217 pool_destroy(&pf_cache_pl); 218 pool_destroy(&pf_frag_pl); 219 pool_destroy(&pf_frent_pl); 220} 221#endif 222 223int 224pf_normalize_isempty(void) 225{ 226 return (TAILQ_EMPTY(&pf_fragqueue) && TAILQ_EMPTY(&pf_cachequeue)); 227} 228 229static __inline int 230pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b) 231{ 232 int diff; 233 234 if ((diff = a->fr_af - b->fr_af)) 235 return (diff); 236 else if ((diff = a->fr_p - b->fr_p)) 237 return (diff); 238 else { 239 struct pf_addr *sa = &a->fr_srcx; 240 struct pf_addr *sb = &b->fr_srcx; 241 struct pf_addr *da = &a->fr_dstx; 242 struct pf_addr *db = &b->fr_dstx; 243 244 switch (a->fr_af) { 245#ifdef INET 246 case AF_INET: 247 if ((diff = a->fr_id - b->fr_id)) 248 return (diff); 249 else if (sa->v4.s_addr < sb->v4.s_addr) 250 return (-1); 251 else if (sa->v4.s_addr > sb->v4.s_addr) 252 return (1); 253 else if (da->v4.s_addr < db->v4.s_addr) 254 return (-1); 255 else if (da->v4.s_addr > db->v4.s_addr) 256 return (1); 257 break; 258#endif 259#ifdef INET6 260 case AF_INET6: 261 if ((diff = a->fr_id6 - b->fr_id6)) 262 return (diff); 263 else if (sa->addr32[3] < sb->addr32[3]) 264 return (-1); 265 else if (sa->addr32[3] > sb->addr32[3]) 266 return (1); 267 else if (sa->addr32[2] < sb->addr32[2]) 268 return (-1); 269 else if (sa->addr32[2] > sb->addr32[2]) 270 return (1); 271 else if (sa->addr32[1] < sb->addr32[1]) 272 return (-1); 273 else if (sa->addr32[1] > sb->addr32[1]) 274 return (1); 275 else if (sa->addr32[0] < sb->addr32[0]) 276 return (-1); 277 else if (sa->addr32[0] > sb->addr32[0]) 278 return (1); 279 else if (da->addr32[3] < db->addr32[3]) 280 return (-1); 281 else if (da->addr32[3] > db->addr32[3]) 282 return (1); 283 else if (da->addr32[2] < db->addr32[2]) 284 return (-1); 285 else if (da->addr32[2] > db->addr32[2]) 286 return (1); 287 else if (da->addr32[1] < db->addr32[1]) 288 return (-1); 289 else if (da->addr32[1] > db->addr32[1]) 290 return (1); 291 else if (da->addr32[0] < db->addr32[0]) 292 return (-1); 293 else if (da->addr32[0] > db->addr32[0]) 294 return (1); 295 break; 296#endif 297 default: 298 VERIFY(!0 && "only IPv4 and IPv6 supported!"); 299 break; 300 } 301 } 302 return (0); 303} 304 305void 306pf_purge_expired_fragments(void) 307{ 308 struct pf_fragment *frag; 309 u_int32_t expire = pf_time_second() - 310 pf_default_rule.timeout[PFTM_FRAG]; 311 312 while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) { 313 VERIFY(BUFFER_FRAGMENTS(frag)); 314 if (frag->fr_timeout > expire) 315 break; 316 317 switch (frag->fr_af) { 318 case AF_INET: 319 DPFPRINTF(("expiring IPv4 %d(%p) from queue.\n", 320 ntohs(frag->fr_id), frag)); 321 break; 322 case AF_INET6: 323 DPFPRINTF(("expiring IPv6 %d(%p) from queue.\n", 324 ntohl(frag->fr_id6), frag)); 325 break; 326 default: 327 VERIFY(0 && "only IPv4 and IPv6 supported"); 328 break; 329 } 330 pf_free_fragment(frag); 331 } 332 333 while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) { 334 VERIFY(!BUFFER_FRAGMENTS(frag)); 335 if (frag->fr_timeout > expire) 336 break; 337 338 switch (frag->fr_af) { 339 case AF_INET: 340 DPFPRINTF(("expiring IPv4 %d(%p) from cache.\n", 341 ntohs(frag->fr_id), frag)); 342 break; 343 case AF_INET6: 344 DPFPRINTF(("expiring IPv6 %d(%p) from cache.\n", 345 ntohl(frag->fr_id6), frag)); 346 break; 347 default: 348 VERIFY(0 && "only IPv4 and IPv6 supported"); 349 break; 350 } 351 pf_free_fragment(frag); 352 VERIFY(TAILQ_EMPTY(&pf_cachequeue) || 353 TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag); 354 } 355} 356 357/* 358 * Try to flush old fragments to make space for new ones 359 */ 360 361static void 362pf_flush_fragments(void) 363{ 364 struct pf_fragment *frag; 365 int goal; 366 367 goal = pf_nfrents * 9 / 10; 368 DPFPRINTF(("trying to free > %d frents\n", 369 pf_nfrents - goal)); 370 while (goal < pf_nfrents) { 371 frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue); 372 if (frag == NULL) 373 break; 374 pf_free_fragment(frag); 375 } 376 377 378 goal = pf_ncache * 9 / 10; 379 DPFPRINTF(("trying to free > %d cache entries\n", 380 pf_ncache - goal)); 381 while (goal < pf_ncache) { 382 frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue); 383 if (frag == NULL) 384 break; 385 pf_free_fragment(frag); 386 } 387} 388 389/* Frees the fragments and all associated entries */ 390 391static void 392pf_free_fragment(struct pf_fragment *frag) 393{ 394 struct pf_frent *frent; 395 struct pf_frcache *frcache; 396 397 /* Free all fragments */ 398 if (BUFFER_FRAGMENTS(frag)) { 399 for (frent = LIST_FIRST(&frag->fr_queue); frent; 400 frent = LIST_FIRST(&frag->fr_queue)) { 401 LIST_REMOVE(frent, fr_next); 402 403 m_freem(frent->fr_m); 404 pool_put(&pf_frent_pl, frent); 405 pf_nfrents--; 406 } 407 } else { 408 for (frcache = LIST_FIRST(&frag->fr_cache); frcache; 409 frcache = LIST_FIRST(&frag->fr_cache)) { 410 LIST_REMOVE(frcache, fr_next); 411 412 VERIFY(LIST_EMPTY(&frag->fr_cache) || 413 LIST_FIRST(&frag->fr_cache)->fr_off > 414 frcache->fr_end); 415 416 pool_put(&pf_cent_pl, frcache); 417 pf_ncache--; 418 } 419 } 420 421 pf_remove_fragment(frag); 422} 423 424static void 425pf_ip6hdr2key(struct pf_fragment *key, struct ip6_hdr *ip6, 426 struct ip6_frag *fh) 427{ 428 key->fr_p = fh->ip6f_nxt; 429 key->fr_id6 = fh->ip6f_ident; 430 key->fr_af = AF_INET6; 431 key->fr_srcx.v6 = ip6->ip6_src; 432 key->fr_dstx.v6 = ip6->ip6_dst; 433} 434 435static void 436pf_ip2key(struct pf_fragment *key, struct ip *ip) 437{ 438 key->fr_p = ip->ip_p; 439 key->fr_id = ip->ip_id; 440 key->fr_af = AF_INET; 441 key->fr_srcx.v4.s_addr = ip->ip_src.s_addr; 442 key->fr_dstx.v4.s_addr = ip->ip_dst.s_addr; 443} 444 445static struct pf_fragment * 446pf_find_fragment_by_key(struct pf_fragment *key, struct pf_frag_tree *tree) 447{ 448 struct pf_fragment *frag; 449 450 frag = RB_FIND(pf_frag_tree, tree, key); 451 if (frag != NULL) { 452 /* XXX Are we sure we want to update the timeout? */ 453 frag->fr_timeout = pf_time_second(); 454 if (BUFFER_FRAGMENTS(frag)) { 455 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); 456 TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next); 457 } else { 458 TAILQ_REMOVE(&pf_cachequeue, frag, frag_next); 459 TAILQ_INSERT_HEAD(&pf_cachequeue, frag, frag_next); 460 } 461 } 462 463 return (frag); 464} 465 466static __inline struct pf_fragment * 467pf_find_fragment_by_ipv4_header(struct ip *ip, struct pf_frag_tree *tree) 468{ 469 struct pf_fragment key; 470 pf_ip2key(&key, ip); 471 return pf_find_fragment_by_key(&key, tree); 472} 473 474static __inline struct pf_fragment * 475pf_find_fragment_by_ipv6_header(struct ip6_hdr *ip6, struct ip6_frag *fh, 476 struct pf_frag_tree *tree) 477{ 478 struct pf_fragment key; 479 pf_ip6hdr2key(&key, ip6, fh); 480 return pf_find_fragment_by_key(&key, tree); 481} 482 483/* Removes a fragment from the fragment queue and frees the fragment */ 484 485static void 486pf_remove_fragment(struct pf_fragment *frag) 487{ 488 if (BUFFER_FRAGMENTS(frag)) { 489 RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag); 490 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); 491 pool_put(&pf_frag_pl, frag); 492 } else { 493 RB_REMOVE(pf_frag_tree, &pf_cache_tree, frag); 494 TAILQ_REMOVE(&pf_cachequeue, frag, frag_next); 495 pool_put(&pf_cache_pl, frag); 496 } 497} 498 499#define FR_IP_OFF(fr) ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3) 500static struct mbuf * 501pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, 502 struct pf_frent *frent, int mff) 503{ 504 struct mbuf *m = *m0, *m2; 505 struct pf_frent *frea, *next; 506 struct pf_frent *frep = NULL; 507 struct ip *ip = frent->fr_ip; 508 int hlen = ip->ip_hl << 2; 509 u_int16_t off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; 510 u_int16_t ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4; 511 u_int16_t fr_max = ip_len + off; 512 513 VERIFY(*frag == NULL || BUFFER_FRAGMENTS(*frag)); 514 515 /* Strip off ip header */ 516 m->m_data += hlen; 517 m->m_len -= hlen; 518 519 /* Create a new reassembly queue for this packet */ 520 if (*frag == NULL) { 521 *frag = pool_get(&pf_frag_pl, PR_NOWAIT); 522 if (*frag == NULL) { 523 pf_flush_fragments(); 524 *frag = pool_get(&pf_frag_pl, PR_NOWAIT); 525 if (*frag == NULL) 526 goto drop_fragment; 527 } 528 529 (*frag)->fr_flags = 0; 530 (*frag)->fr_max = 0; 531 (*frag)->fr_af = AF_INET; 532 (*frag)->fr_srcx.v4 = frent->fr_ip->ip_src; 533 (*frag)->fr_dstx.v4 = frent->fr_ip->ip_dst; 534 (*frag)->fr_p = frent->fr_ip->ip_p; 535 (*frag)->fr_id = frent->fr_ip->ip_id; 536 (*frag)->fr_timeout = pf_time_second(); 537 LIST_INIT(&(*frag)->fr_queue); 538 539 RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag); 540 TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next); 541 542 /* We do not have a previous fragment */ 543 frep = NULL; 544 goto insert; 545 } 546 547 /* 548 * Find a fragment after the current one: 549 * - off contains the real shifted offset. 550 */ 551 LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) { 552 if (FR_IP_OFF(frea) > off) 553 break; 554 frep = frea; 555 } 556 557 VERIFY(frep != NULL || frea != NULL); 558 559 if (frep != NULL && 560 FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 561 4 > off) { 562 u_int16_t precut; 563 564 precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - 565 frep->fr_ip->ip_hl * 4 - off; 566 if (precut >= ip_len) 567 goto drop_fragment; 568 m_adj(frent->fr_m, precut); 569 DPFPRINTF(("overlap -%d\n", precut)); 570 /* Enforce 8 byte boundaries */ 571 ip->ip_off = htons(ntohs(ip->ip_off) + (precut >> 3)); 572 off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; 573 ip_len -= precut; 574 ip->ip_len = htons(ip_len); 575 } 576 577 for (; frea != NULL && ip_len + off > FR_IP_OFF(frea); 578 frea = next) { 579 u_int16_t aftercut; 580 581 aftercut = ip_len + off - FR_IP_OFF(frea); 582 DPFPRINTF(("adjust overlap %d\n", aftercut)); 583 if (aftercut < ntohs(frea->fr_ip->ip_len) - frea->fr_ip->ip_hl 584 * 4) { 585 frea->fr_ip->ip_len = 586 htons(ntohs(frea->fr_ip->ip_len) - aftercut); 587 frea->fr_ip->ip_off = htons(ntohs(frea->fr_ip->ip_off) + 588 (aftercut >> 3)); 589 m_adj(frea->fr_m, aftercut); 590 break; 591 } 592 593 /* This fragment is completely overlapped, lose it */ 594 next = LIST_NEXT(frea, fr_next); 595 m_freem(frea->fr_m); 596 LIST_REMOVE(frea, fr_next); 597 pool_put(&pf_frent_pl, frea); 598 pf_nfrents--; 599 } 600 601insert: 602 /* Update maximum data size */ 603 if ((*frag)->fr_max < fr_max) 604 (*frag)->fr_max = fr_max; 605 /* This is the last segment */ 606 if (!mff) 607 (*frag)->fr_flags |= PFFRAG_SEENLAST; 608 609 if (frep == NULL) 610 LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next); 611 else 612 LIST_INSERT_AFTER(frep, frent, fr_next); 613 614 /* Check if we are completely reassembled */ 615 if (!((*frag)->fr_flags & PFFRAG_SEENLAST)) 616 return (NULL); 617 618 /* Check if we have all the data */ 619 off = 0; 620 for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) { 621 next = LIST_NEXT(frep, fr_next); 622 623 off += ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4; 624 if (off < (*frag)->fr_max && 625 (next == NULL || FR_IP_OFF(next) != off)) { 626 DPFPRINTF(("missing fragment at %d, next %d, max %d\n", 627 off, next == NULL ? -1 : FR_IP_OFF(next), 628 (*frag)->fr_max)); 629 return (NULL); 630 } 631 } 632 DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max)); 633 if (off < (*frag)->fr_max) 634 return (NULL); 635 636 /* We have all the data */ 637 frent = LIST_FIRST(&(*frag)->fr_queue); 638 VERIFY(frent != NULL); 639 if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) { 640 DPFPRINTF(("drop: too big: %d\n", off)); 641 pf_free_fragment(*frag); 642 *frag = NULL; 643 return (NULL); 644 } 645 next = LIST_NEXT(frent, fr_next); 646 647 /* Magic from ip_input */ 648 ip = frent->fr_ip; 649 m = frent->fr_m; 650 m2 = m->m_next; 651 m->m_next = NULL; 652 m_cat(m, m2); 653 pool_put(&pf_frent_pl, frent); 654 pf_nfrents--; 655 for (frent = next; frent != NULL; frent = next) { 656 next = LIST_NEXT(frent, fr_next); 657 658 m2 = frent->fr_m; 659 pool_put(&pf_frent_pl, frent); 660 pf_nfrents--; 661 m_cat(m, m2); 662 } 663 664 ip->ip_src = (*frag)->fr_srcx.v4; 665 ip->ip_dst = (*frag)->fr_dstx.v4; 666 667 /* Remove from fragment queue */ 668 pf_remove_fragment(*frag); 669 *frag = NULL; 670 671 hlen = ip->ip_hl << 2; 672 ip->ip_len = htons(off + hlen); 673 m->m_len += hlen; 674 m->m_data -= hlen; 675 676 /* some debugging cruft by sklower, below, will go away soon */ 677 /* XXX this should be done elsewhere */ 678 if (m->m_flags & M_PKTHDR) { 679 int plen = 0; 680 for (m2 = m; m2; m2 = m2->m_next) 681 plen += m2->m_len; 682 m->m_pkthdr.len = plen; 683 } 684 685 DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip->ip_len))); 686 return (m); 687 688drop_fragment: 689 /* Oops - fail safe - drop packet */ 690 pool_put(&pf_frent_pl, frent); 691 pf_nfrents--; 692 m_freem(m); 693 return (NULL); 694} 695 696static struct mbuf * 697pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, 698 int drop, int *nomem) 699{ 700 struct mbuf *m = *m0; 701 struct pf_frcache *frp, *fra, *cur = NULL; 702 int ip_len = ntohs(h->ip_len) - (h->ip_hl << 2); 703 u_int16_t off = ntohs(h->ip_off) << 3; 704 u_int16_t fr_max = ip_len + off; 705 int hosed = 0; 706 707 VERIFY(*frag == NULL || !BUFFER_FRAGMENTS(*frag)); 708 709 /* Create a new range queue for this packet */ 710 if (*frag == NULL) { 711 *frag = pool_get(&pf_cache_pl, PR_NOWAIT); 712 if (*frag == NULL) { 713 pf_flush_fragments(); 714 *frag = pool_get(&pf_cache_pl, PR_NOWAIT); 715 if (*frag == NULL) 716 goto no_mem; 717 } 718 719 /* Get an entry for the queue */ 720 cur = pool_get(&pf_cent_pl, PR_NOWAIT); 721 if (cur == NULL) { 722 pool_put(&pf_cache_pl, *frag); 723 *frag = NULL; 724 goto no_mem; 725 } 726 pf_ncache++; 727 728 (*frag)->fr_flags = PFFRAG_NOBUFFER; 729 (*frag)->fr_max = 0; 730 (*frag)->fr_af = AF_INET; 731 (*frag)->fr_srcx.v4 = h->ip_src; 732 (*frag)->fr_dstx.v4 = h->ip_dst; 733 (*frag)->fr_p = h->ip_p; 734 (*frag)->fr_id = h->ip_id; 735 (*frag)->fr_timeout = pf_time_second(); 736 737 cur->fr_off = off; 738 cur->fr_end = fr_max; 739 LIST_INIT(&(*frag)->fr_cache); 740 LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next); 741 742 RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag); 743 TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next); 744 745 DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off, 746 fr_max)); 747 748 goto pass; 749 } 750 751 /* 752 * Find a fragment after the current one: 753 * - off contains the real shifted offset. 754 */ 755 frp = NULL; 756 LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) { 757 if (fra->fr_off > off) 758 break; 759 frp = fra; 760 } 761 762 VERIFY(frp != NULL || fra != NULL); 763 764 if (frp != NULL) { 765 int precut; 766 767 precut = frp->fr_end - off; 768 if (precut >= ip_len) { 769 /* Fragment is entirely a duplicate */ 770 DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n", 771 h->ip_id, frp->fr_off, frp->fr_end, off, fr_max)); 772 goto drop_fragment; 773 } 774 if (precut == 0) { 775 /* They are adjacent. Fixup cache entry */ 776 DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n", 777 h->ip_id, frp->fr_off, frp->fr_end, off, fr_max)); 778 frp->fr_end = fr_max; 779 } else if (precut > 0) { 780 /* 781 * The first part of this payload overlaps with a 782 * fragment that has already been passed. 783 * Need to trim off the first part of the payload. 784 * But to do so easily, we need to create another 785 * mbuf to throw the original header into. 786 */ 787 788 DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n", 789 h->ip_id, precut, frp->fr_off, frp->fr_end, off, 790 fr_max)); 791 792 off += precut; 793 fr_max -= precut; 794 /* Update the previous frag to encompass this one */ 795 frp->fr_end = fr_max; 796 797 if (!drop) { 798 /* 799 * XXX Optimization opportunity 800 * This is a very heavy way to trim the payload. 801 * we could do it much faster by diddling mbuf 802 * internals but that would be even less legible 803 * than this mbuf magic. For my next trick, 804 * I'll pull a rabbit out of my laptop. 805 */ 806 *m0 = m_copym(m, 0, h->ip_hl << 2, M_NOWAIT); 807 if (*m0 == NULL) 808 goto no_mem; 809 VERIFY((*m0)->m_next == NULL); 810 m_adj(m, precut + (h->ip_hl << 2)); 811 m_cat(*m0, m); 812 m = *m0; 813 if (m->m_flags & M_PKTHDR) { 814 int plen = 0; 815 struct mbuf *t; 816 for (t = m; t; t = t->m_next) 817 plen += t->m_len; 818 m->m_pkthdr.len = plen; 819 } 820 821 822 h = mtod(m, struct ip *); 823 824 825 VERIFY((int)m->m_len == 826 ntohs(h->ip_len) - precut); 827 h->ip_off = htons(ntohs(h->ip_off) + 828 (precut >> 3)); 829 h->ip_len = htons(ntohs(h->ip_len) - precut); 830 } else { 831 hosed++; 832 } 833 } else { 834 /* There is a gap between fragments */ 835 836 DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n", 837 h->ip_id, -precut, frp->fr_off, frp->fr_end, off, 838 fr_max)); 839 840 cur = pool_get(&pf_cent_pl, PR_NOWAIT); 841 if (cur == NULL) 842 goto no_mem; 843 pf_ncache++; 844 845 cur->fr_off = off; 846 cur->fr_end = fr_max; 847 LIST_INSERT_AFTER(frp, cur, fr_next); 848 } 849 } 850 851 if (fra != NULL) { 852 int aftercut; 853 int merge = 0; 854 855 aftercut = fr_max - fra->fr_off; 856 if (aftercut == 0) { 857 /* Adjacent fragments */ 858 DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n", 859 h->ip_id, off, fr_max, fra->fr_off, fra->fr_end)); 860 fra->fr_off = off; 861 merge = 1; 862 } else if (aftercut > 0) { 863 /* Need to chop off the tail of this fragment */ 864 DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n", 865 h->ip_id, aftercut, off, fr_max, fra->fr_off, 866 fra->fr_end)); 867 fra->fr_off = off; 868 fr_max -= aftercut; 869 870 merge = 1; 871 872 if (!drop) { 873 m_adj(m, -aftercut); 874 if (m->m_flags & M_PKTHDR) { 875 int plen = 0; 876 struct mbuf *t; 877 for (t = m; t; t = t->m_next) 878 plen += t->m_len; 879 m->m_pkthdr.len = plen; 880 } 881 h = mtod(m, struct ip *); 882 VERIFY((int)m->m_len == 883 ntohs(h->ip_len) - aftercut); 884 h->ip_len = htons(ntohs(h->ip_len) - aftercut); 885 } else { 886 hosed++; 887 } 888 } else if (frp == NULL) { 889 /* There is a gap between fragments */ 890 DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n", 891 h->ip_id, -aftercut, off, fr_max, fra->fr_off, 892 fra->fr_end)); 893 894 cur = pool_get(&pf_cent_pl, PR_NOWAIT); 895 if (cur == NULL) 896 goto no_mem; 897 pf_ncache++; 898 899 cur->fr_off = off; 900 cur->fr_end = fr_max; 901 LIST_INSERT_BEFORE(fra, cur, fr_next); 902 } 903 904 905 /* Need to glue together two separate fragment descriptors */ 906 if (merge) { 907 if (cur && fra->fr_off <= cur->fr_end) { 908 /* Need to merge in a previous 'cur' */ 909 DPFPRINTF(("fragcache[%d]: adjacent(merge " 910 "%d-%d) %d-%d (%d-%d)\n", 911 h->ip_id, cur->fr_off, cur->fr_end, off, 912 fr_max, fra->fr_off, fra->fr_end)); 913 fra->fr_off = cur->fr_off; 914 LIST_REMOVE(cur, fr_next); 915 pool_put(&pf_cent_pl, cur); 916 pf_ncache--; 917 cur = NULL; 918 919 } else if (frp && fra->fr_off <= frp->fr_end) { 920 /* Need to merge in a modified 'frp' */ 921 VERIFY(cur == NULL); 922 DPFPRINTF(("fragcache[%d]: adjacent(merge " 923 "%d-%d) %d-%d (%d-%d)\n", 924 h->ip_id, frp->fr_off, frp->fr_end, off, 925 fr_max, fra->fr_off, fra->fr_end)); 926 fra->fr_off = frp->fr_off; 927 LIST_REMOVE(frp, fr_next); 928 pool_put(&pf_cent_pl, frp); 929 pf_ncache--; 930 frp = NULL; 931 932 } 933 } 934 } 935 936 if (hosed) { 937 /* 938 * We must keep tracking the overall fragment even when 939 * we're going to drop it anyway so that we know when to 940 * free the overall descriptor. Thus we drop the frag late. 941 */ 942 goto drop_fragment; 943 } 944 945 946pass: 947 /* Update maximum data size */ 948 if ((*frag)->fr_max < fr_max) 949 (*frag)->fr_max = fr_max; 950 951 /* This is the last segment */ 952 if (!mff) 953 (*frag)->fr_flags |= PFFRAG_SEENLAST; 954 955 /* Check if we are completely reassembled */ 956 if (((*frag)->fr_flags & PFFRAG_SEENLAST) && 957 LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 && 958 LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) { 959 /* Remove from fragment queue */ 960 DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id, 961 (*frag)->fr_max)); 962 pf_free_fragment(*frag); 963 *frag = NULL; 964 } 965 966 return (m); 967 968no_mem: 969 *nomem = 1; 970 971 /* Still need to pay attention to !IP_MF */ 972 if (!mff && *frag != NULL) 973 (*frag)->fr_flags |= PFFRAG_SEENLAST; 974 975 m_freem(m); 976 return (NULL); 977 978drop_fragment: 979 980 /* Still need to pay attention to !IP_MF */ 981 if (!mff && *frag != NULL) 982 (*frag)->fr_flags |= PFFRAG_SEENLAST; 983 984 if (drop) { 985 /* This fragment has been deemed bad. Don't reass */ 986 if (((*frag)->fr_flags & PFFRAG_DROP) == 0) 987 DPFPRINTF(("fragcache[%d]: dropping overall fragment\n", 988 h->ip_id)); 989 (*frag)->fr_flags |= PFFRAG_DROP; 990 } 991 992 m_freem(m); 993 return (NULL); 994} 995 996#define FR_IP6_OFF(fr) \ 997 (ntohs((fr)->fr_ip6f_opt.ip6f_offlg & IP6F_OFF_MASK)) 998#define FR_IP6_PLEN(fr) (ntohs((fr)->fr_ip6->ip6_plen)) 999struct mbuf * 1000pf_reassemble6(struct mbuf **m0, struct pf_fragment **frag, 1001 struct pf_frent *frent, int mff) 1002{ 1003 struct mbuf *m, *m2; 1004 struct pf_frent *frea, *frep, *next; 1005 struct ip6_hdr *ip6; 1006 int plen, off, fr_max; 1007 1008 VERIFY(*frag == NULL || BUFFER_FRAGMENTS(*frag)); 1009 m = *m0; 1010 frep = NULL; 1011 ip6 = frent->fr_ip6; 1012 off = FR_IP6_OFF(frent); 1013 plen = FR_IP6_PLEN(frent); 1014 fr_max = off + plen - (frent->fr_ip6f_hlen - sizeof *ip6); 1015 1016 DPFPRINTF(("%p IPv6 frag plen %u off %u fr_ip6f_hlen %u fr_max %u m_len %u\n", m, 1017 plen, off, frent->fr_ip6f_hlen, fr_max, m->m_len)); 1018 1019 /* strip off headers up to the fragment payload */ 1020 m->m_data += frent->fr_ip6f_hlen; 1021 m->m_len -= frent->fr_ip6f_hlen; 1022 1023 /* Create a new reassembly queue for this packet */ 1024 if (*frag == NULL) { 1025 *frag = pool_get(&pf_frag_pl, PR_NOWAIT); 1026 if (*frag == NULL) { 1027 pf_flush_fragments(); 1028 *frag = pool_get(&pf_frag_pl, PR_NOWAIT); 1029 if (*frag == NULL) 1030 goto drop_fragment; 1031 } 1032 1033 (*frag)->fr_flags = 0; 1034 (*frag)->fr_max = 0; 1035 (*frag)->fr_af = AF_INET6; 1036 (*frag)->fr_srcx.v6 = frent->fr_ip6->ip6_src; 1037 (*frag)->fr_dstx.v6 = frent->fr_ip6->ip6_dst; 1038 (*frag)->fr_p = frent->fr_ip6f_opt.ip6f_nxt; 1039 (*frag)->fr_id6 = frent->fr_ip6f_opt.ip6f_ident; 1040 (*frag)->fr_timeout = pf_time_second(); 1041 LIST_INIT(&(*frag)->fr_queue); 1042 1043 RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag); 1044 TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next); 1045 1046 /* We do not have a previous fragment */ 1047 frep = NULL; 1048 goto insert; 1049 } 1050 1051 /* 1052 * Find a fragment after the current one: 1053 * - off contains the real shifted offset. 1054 */ 1055 LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) { 1056 if (FR_IP6_OFF(frea) > off) 1057 break; 1058 frep = frea; 1059 } 1060 1061 VERIFY(frep != NULL || frea != NULL); 1062 1063 if (frep != NULL && 1064 FR_IP6_OFF(frep) + FR_IP6_PLEN(frep) - frep->fr_ip6f_hlen > off) 1065 { 1066 u_int16_t precut; 1067 1068 precut = FR_IP6_OFF(frep) + FR_IP6_PLEN(frep) - 1069 frep->fr_ip6f_hlen - off; 1070 if (precut >= plen) 1071 goto drop_fragment; 1072 m_adj(frent->fr_m, precut); 1073 DPFPRINTF(("overlap -%d\n", precut)); 1074 /* Enforce 8 byte boundaries */ 1075 frent->fr_ip6f_opt.ip6f_offlg = 1076 htons(ntohs(frent->fr_ip6f_opt.ip6f_offlg) + 1077 (precut >> 3)); 1078 off = FR_IP6_OFF(frent); 1079 plen -= precut; 1080 ip6->ip6_plen = htons(plen); 1081 } 1082 1083 for (; frea != NULL && plen + off > FR_IP6_OFF(frea); frea = next) { 1084 u_int16_t aftercut; 1085 1086 aftercut = plen + off - FR_IP6_OFF(frea); 1087 DPFPRINTF(("adjust overlap %d\n", aftercut)); 1088 if (aftercut < FR_IP6_PLEN(frea) - frea->fr_ip6f_hlen) { 1089 frea->fr_ip6->ip6_plen = htons(FR_IP6_PLEN(frea) - 1090 aftercut); 1091 frea->fr_ip6f_opt.ip6f_offlg = 1092 htons(ntohs(frea->fr_ip6f_opt.ip6f_offlg) + 1093 (aftercut >> 3)); 1094 m_adj(frea->fr_m, aftercut); 1095 break; 1096 } 1097 1098 /* This fragment is completely overlapped, lose it */ 1099 next = LIST_NEXT(frea, fr_next); 1100 m_freem(frea->fr_m); 1101 LIST_REMOVE(frea, fr_next); 1102 pool_put(&pf_frent_pl, frea); 1103 pf_nfrents--; 1104 } 1105 1106 insert: 1107 /* Update maximum data size */ 1108 if ((*frag)->fr_max < fr_max) 1109 (*frag)->fr_max = fr_max; 1110 /* This is the last segment */ 1111 if (!mff) 1112 (*frag)->fr_flags |= PFFRAG_SEENLAST; 1113 1114 if (frep == NULL) 1115 LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next); 1116 else 1117 LIST_INSERT_AFTER(frep, frent, fr_next); 1118 1119 /* Check if we are completely reassembled */ 1120 if (!((*frag)->fr_flags & PFFRAG_SEENLAST)) 1121 return (NULL); 1122 1123 /* Check if we have all the data */ 1124 off = 0; 1125 for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) { 1126 next = LIST_NEXT(frep, fr_next); 1127 off += FR_IP6_PLEN(frep) - (frent->fr_ip6f_hlen - sizeof *ip6); 1128 DPFPRINTF(("frep at %d, next %d, max %d\n", 1129 off, next == NULL ? -1 : FR_IP6_OFF(next), 1130 (*frag)->fr_max)); 1131 if (off < (*frag)->fr_max && 1132 (next == NULL || FR_IP6_OFF(next) != off)) { 1133 DPFPRINTF(("missing fragment at %d, next %d, max %d\n", 1134 off, next == NULL ? -1 : FR_IP6_OFF(next), 1135 (*frag)->fr_max)); 1136 return (NULL); 1137 } 1138 } 1139 DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max)); 1140 if (off < (*frag)->fr_max) 1141 return (NULL); 1142 1143 /* We have all the data */ 1144 frent = LIST_FIRST(&(*frag)->fr_queue); 1145 VERIFY(frent != NULL); 1146 if (frent->fr_ip6f_hlen + off > IP_MAXPACKET) { 1147 DPFPRINTF(("drop: too big: %d\n", off)); 1148 pf_free_fragment(*frag); 1149 *frag = NULL; 1150 return (NULL); 1151 } 1152 1153 ip6 = frent->fr_ip6; 1154 ip6->ip6_nxt = (*frag)->fr_p; 1155 ip6->ip6_plen = htons(off); 1156 ip6->ip6_src = (*frag)->fr_srcx.v6; 1157 ip6->ip6_dst = (*frag)->fr_dstx.v6; 1158 1159 /* Remove from fragment queue */ 1160 pf_remove_fragment(*frag); 1161 *frag = NULL; 1162 1163 m = frent->fr_m; 1164 m->m_len += sizeof(struct ip6_hdr); 1165 m->m_data -= sizeof(struct ip6_hdr); 1166 memmove(m->m_data, ip6, sizeof(struct ip6_hdr)); 1167 1168 next = LIST_NEXT(frent, fr_next); 1169 pool_put(&pf_frent_pl, frent); 1170 pf_nfrents--; 1171 for (frent = next; next != NULL; frent = next) { 1172 m2 = frent->fr_m; 1173 1174 m_cat(m, m2); 1175 next = LIST_NEXT(frent, fr_next); 1176 pool_put(&pf_frent_pl, frent); 1177 pf_nfrents--; 1178 } 1179 1180 /* XXX this should be done elsewhere */ 1181 if (m->m_flags & M_PKTHDR) { 1182 int pktlen = 0; 1183 for (m2 = m; m2; m2 = m2->m_next) 1184 pktlen += m2->m_len; 1185 m->m_pkthdr.len = pktlen; 1186 } 1187 1188 DPFPRINTF(("complete: %p ip6_plen %d m_pkthdr.len %d\n", 1189 m, ntohs(ip6->ip6_plen), m->m_pkthdr.len)); 1190 1191 return m; 1192 1193 drop_fragment: 1194 /* Oops - fail safe - drop packet */ 1195 pool_put(&pf_frent_pl, frent); 1196 --pf_nfrents; 1197 m_freem(m); 1198 return NULL; 1199} 1200 1201static struct mbuf * 1202pf_frag6cache(struct mbuf **m0, struct ip6_hdr *h, struct ip6_frag *fh, 1203 struct pf_fragment **frag, int hlen, int mff, int drop, int *nomem) 1204{ 1205 struct mbuf *m = *m0; 1206 u_int16_t plen, off, fr_max; 1207 struct pf_frcache *frp, *fra, *cur = NULL; 1208 int hosed = 0; 1209 1210 VERIFY(*frag == NULL || !BUFFER_FRAGMENTS(*frag)); 1211 m = *m0; 1212 off = ntohs(fh->ip6f_offlg & IP6F_OFF_MASK); 1213 plen = ntohs(h->ip6_plen) - (hlen - sizeof *h); 1214 1215 /* 1216 * Apple Modification: dimambro@apple.com. The hlen, being passed 1217 * into this function Includes all the headers associated with 1218 * the packet, and may include routing headers, so to get to 1219 * the data payload as stored in the original IPv6 header we need 1220 * to subtract al those headers and the IP header. 1221 * 1222 * The 'max' local variable should also contain the offset from the start 1223 * of the reassembled packet to the octet just past the end of the octets 1224 * in the current fragment where: 1225 * - 'off' is the offset from the start of the reassembled packet to the 1226 * first octet in the fragment, 1227 * - 'plen' is the length of the "payload data length" Excluding all the 1228 * IPv6 headers of the fragment. 1229 * - 'hlen' is computed in pf_normalize_ip6() as the offset from the start 1230 * of the IPv6 packet to the beginning of the data. 1231 */ 1232 fr_max = off + plen; 1233 1234 DPFPRINTF(("%p plen %u off %u fr_max %u\n", m, 1235 plen, off, fr_max)); 1236 1237 /* Create a new range queue for this packet */ 1238 if (*frag == NULL) { 1239 *frag = pool_get(&pf_cache_pl, PR_NOWAIT); 1240 if (*frag == NULL) { 1241 pf_flush_fragments(); 1242 *frag = pool_get(&pf_cache_pl, PR_NOWAIT); 1243 if (*frag == NULL) 1244 goto no_mem; 1245 } 1246 1247 /* Get an entry for the queue */ 1248 cur = pool_get(&pf_cent_pl, PR_NOWAIT); 1249 if (cur == NULL) { 1250 pool_put(&pf_cache_pl, *frag); 1251 *frag = NULL; 1252 goto no_mem; 1253 } 1254 pf_ncache++; 1255 1256 (*frag)->fr_flags = PFFRAG_NOBUFFER; 1257 (*frag)->fr_max = 0; 1258 (*frag)->fr_af = AF_INET6; 1259 (*frag)->fr_srcx.v6 = h->ip6_src; 1260 (*frag)->fr_dstx.v6 = h->ip6_dst; 1261 (*frag)->fr_p = fh->ip6f_nxt; 1262 (*frag)->fr_id6 = fh->ip6f_ident; 1263 (*frag)->fr_timeout = pf_time_second(); 1264 1265 cur->fr_off = off; 1266 cur->fr_end = fr_max; 1267 LIST_INIT(&(*frag)->fr_cache); 1268 LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next); 1269 1270 RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag); 1271 TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next); 1272 1273 DPFPRINTF(("frag6cache[%d]: new %d-%d\n", ntohl(fh->ip6f_ident), 1274 off, fr_max)); 1275 1276 goto pass; 1277 } 1278 1279 /* 1280 * Find a fragment after the current one: 1281 * - off contains the real shifted offset. 1282 */ 1283 frp = NULL; 1284 LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) { 1285 if (fra->fr_off > off) 1286 break; 1287 frp = fra; 1288 } 1289 1290 VERIFY(frp != NULL || fra != NULL); 1291 1292 if (frp != NULL) { 1293 int precut; 1294 1295 precut = frp->fr_end - off; 1296 if (precut >= plen) { 1297 /* Fragment is entirely a duplicate */ 1298 DPFPRINTF(("frag6cache[%u]: dead (%d-%d) %d-%d\n", 1299 ntohl(fh->ip6f_ident), frp->fr_off, frp->fr_end, 1300 off, fr_max)); 1301 goto drop_fragment; 1302 } 1303 if (precut == 0) { 1304 /* They are adjacent. Fixup cache entry */ 1305 DPFPRINTF(("frag6cache[%u]: adjacent (%d-%d) %d-%d\n", 1306 ntohl(fh->ip6f_ident), frp->fr_off, frp->fr_end, 1307 off, fr_max)); 1308 frp->fr_end = fr_max; 1309 } else if (precut > 0) { 1310 /* The first part of this payload overlaps with a 1311 * fragment that has already been passed. 1312 * Need to trim off the first part of the payload. 1313 * But to do so easily, we need to create another 1314 * mbuf to throw the original header into. 1315 */ 1316 1317 DPFPRINTF(("frag6cache[%u]: chop %d (%d-%d) %d-%d\n", 1318 ntohl(fh->ip6f_ident), precut, frp->fr_off, 1319 frp->fr_end, off, fr_max)); 1320 1321 off += precut; 1322 fr_max -= precut; 1323 /* Update the previous frag to encompass this one */ 1324 frp->fr_end = fr_max; 1325 1326 if (!drop) { 1327 /* XXX Optimization opportunity 1328 * This is a very heavy way to trim the payload. 1329 * we could do it much faster by diddling mbuf 1330 * internals but that would be even less legible 1331 * than this mbuf magic. For my next trick, 1332 * I'll pull a rabbit out of my laptop. 1333 */ 1334 *m0 = m_copym(m, 0, hlen, M_NOWAIT); 1335 if (*m0 == NULL) 1336 goto no_mem; 1337 VERIFY((*m0)->m_next == NULL); 1338 m_adj(m, precut + hlen); 1339 m_cat(*m0, m); 1340 m = *m0; 1341 if (m->m_flags & M_PKTHDR) { 1342 int pktlen = 0; 1343 struct mbuf *t; 1344 for (t = m; t; t = t->m_next) 1345 pktlen += t->m_len; 1346 m->m_pkthdr.len = pktlen; 1347 } 1348 1349 h = mtod(m, struct ip6_hdr *); 1350 1351 VERIFY((int)m->m_len == 1352 ntohs(h->ip6_plen) - precut); 1353 fh->ip6f_offlg &= ~IP6F_OFF_MASK; 1354 fh->ip6f_offlg |= 1355 htons(ntohs(fh->ip6f_offlg & IP6F_OFF_MASK) 1356 + (precut >> 3)); 1357 h->ip6_plen = htons(ntohs(h->ip6_plen) - 1358 precut); 1359 } else { 1360 hosed++; 1361 } 1362 } else { 1363 /* There is a gap between fragments */ 1364 1365 DPFPRINTF(("frag6cache[%u]: gap %d (%d-%d) %d-%d\n", 1366 ntohl(fh->ip6f_ident), -precut, frp->fr_off, 1367 frp->fr_end, off, fr_max)); 1368 1369 cur = pool_get(&pf_cent_pl, PR_NOWAIT); 1370 if (cur == NULL) 1371 goto no_mem; 1372 pf_ncache++; 1373 1374 cur->fr_off = off; 1375 cur->fr_end = fr_max; 1376 LIST_INSERT_AFTER(frp, cur, fr_next); 1377 } 1378 } 1379 1380 if (fra != NULL) { 1381 int aftercut; 1382 int merge = 0; 1383 1384 aftercut = fr_max - fra->fr_off; 1385 if (aftercut == 0) { 1386 /* Adjacent fragments */ 1387 DPFPRINTF(("frag6cache[%u]: adjacent %d-%d (%d-%d)\n", 1388 ntohl(fh->ip6f_ident), off, fr_max, fra->fr_off, 1389 fra->fr_end)); 1390 fra->fr_off = off; 1391 merge = 1; 1392 } else if (aftercut > 0) { 1393 /* Need to chop off the tail of this fragment */ 1394 DPFPRINTF(("frag6cache[%u]: chop %d %d-%d (%d-%d)\n", 1395 ntohl(fh->ip6f_ident), aftercut, off, fr_max, 1396 fra->fr_off, fra->fr_end)); 1397 fra->fr_off = off; 1398 fr_max -= aftercut; 1399 1400 merge = 1; 1401 1402 if (!drop) { 1403 m_adj(m, -aftercut); 1404 if (m->m_flags & M_PKTHDR) { 1405 int pktlen = 0; 1406 struct mbuf *t; 1407 for (t = m; t; t = t->m_next) 1408 pktlen += t->m_len; 1409 m->m_pkthdr.len = pktlen; 1410 } 1411 h = mtod(m, struct ip6_hdr *); 1412 VERIFY((int)m->m_len == 1413 ntohs(h->ip6_plen) - aftercut); 1414 h->ip6_plen = 1415 htons(ntohs(h->ip6_plen) - aftercut); 1416 } else { 1417 hosed++; 1418 } 1419 } else if (frp == NULL) { 1420 /* There is a gap between fragments */ 1421 DPFPRINTF(("frag6cache[%u]: gap %d %d-%d (%d-%d)\n", 1422 ntohl(fh->ip6f_ident), -aftercut, off, fr_max, 1423 fra->fr_off, fra->fr_end)); 1424 1425 cur = pool_get(&pf_cent_pl, PR_NOWAIT); 1426 if (cur == NULL) 1427 goto no_mem; 1428 pf_ncache++; 1429 1430 cur->fr_off = off; 1431 cur->fr_end = fr_max; 1432 LIST_INSERT_BEFORE(fra, cur, fr_next); 1433 } 1434 1435 /* Need to glue together two separate fragment descriptors */ 1436 if (merge) { 1437 if (cur && fra->fr_off <= cur->fr_end) { 1438 /* Need to merge in a previous 'cur' */ 1439 DPFPRINTF(("frag6cache[%u]: adjacent(merge " 1440 "%d-%d) %d-%d (%d-%d)\n", 1441 ntohl(fh->ip6f_ident), cur->fr_off, 1442 cur->fr_end, off, fr_max, fra->fr_off, 1443 fra->fr_end)); 1444 fra->fr_off = cur->fr_off; 1445 LIST_REMOVE(cur, fr_next); 1446 pool_put(&pf_cent_pl, cur); 1447 pf_ncache--; 1448 cur = NULL; 1449 } else if (frp && fra->fr_off <= frp->fr_end) { 1450 /* Need to merge in a modified 'frp' */ 1451 VERIFY(cur == NULL); 1452 DPFPRINTF(("frag6cache[%u]: adjacent(merge " 1453 "%d-%d) %d-%d (%d-%d)\n", 1454 ntohl(fh->ip6f_ident), frp->fr_off, 1455 frp->fr_end, off, fr_max, fra->fr_off, 1456 fra->fr_end)); 1457 fra->fr_off = frp->fr_off; 1458 LIST_REMOVE(frp, fr_next); 1459 pool_put(&pf_cent_pl, frp); 1460 pf_ncache--; 1461 frp = NULL; 1462 } 1463 } 1464 } 1465 1466 if (hosed) { 1467 /* 1468 * We must keep tracking the overall fragment even when 1469 * we're going to drop it anyway so that we know when to 1470 * free the overall descriptor. Thus we drop the frag late. 1471 */ 1472 goto drop_fragment; 1473 } 1474 1475 pass: 1476 /* Update maximum data size */ 1477 if ((*frag)->fr_max < fr_max) 1478 (*frag)->fr_max = fr_max; 1479 1480 /* This is the last segment */ 1481 if (!mff) 1482 (*frag)->fr_flags |= PFFRAG_SEENLAST; 1483 1484 /* Check if we are completely reassembled */ 1485 if (((*frag)->fr_flags & PFFRAG_SEENLAST) && 1486 LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 && 1487 LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) { 1488 /* Remove from fragment queue */ 1489 DPFPRINTF(("frag6cache[%u]: done 0-%d\n", 1490 ntohl(fh->ip6f_ident), (*frag)->fr_max)); 1491 pf_free_fragment(*frag); 1492 *frag = NULL; 1493 } 1494 1495 return (m); 1496 1497 no_mem: 1498 *nomem = 1; 1499 1500 /* Still need to pay attention to !IP_MF */ 1501 if (!mff && *frag != NULL) 1502 (*frag)->fr_flags |= PFFRAG_SEENLAST; 1503 1504 m_freem(m); 1505 return (NULL); 1506 1507 drop_fragment: 1508 1509 /* Still need to pay attention to !IP_MF */ 1510 if (!mff && *frag != NULL) 1511 (*frag)->fr_flags |= PFFRAG_SEENLAST; 1512 1513 if (drop) { 1514 /* This fragment has been deemed bad. Don't reass */ 1515 if (((*frag)->fr_flags & PFFRAG_DROP) == 0) 1516 DPFPRINTF(("frag6cache[%u]: dropping overall fragment\n", 1517 ntohl(fh->ip6f_ident))); 1518 (*frag)->fr_flags |= PFFRAG_DROP; 1519 } 1520 1521 m_freem(m); 1522 return (NULL); 1523} 1524 1525int 1526pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, 1527 struct pf_pdesc *pd) 1528{ 1529 struct mbuf *m = *m0; 1530 struct pf_rule *r; 1531 struct pf_frent *frent; 1532 struct pf_fragment *frag = NULL; 1533 struct ip *h = mtod(m, struct ip *); 1534 int mff = (ntohs(h->ip_off) & IP_MF); 1535 int hlen = h->ip_hl << 2; 1536 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; 1537 u_int16_t fr_max; 1538 int ip_len; 1539 int ip_off; 1540 int asd = 0; 1541 struct pf_ruleset *ruleset = NULL; 1542 1543 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); 1544 while (r != NULL) { 1545 r->evaluations++; 1546 if (pfi_kif_match(r->kif, kif) == r->ifnot) 1547 r = r->skip[PF_SKIP_IFP].ptr; 1548 else if (r->direction && r->direction != dir) 1549 r = r->skip[PF_SKIP_DIR].ptr; 1550 else if (r->af && r->af != AF_INET) 1551 r = r->skip[PF_SKIP_AF].ptr; 1552 else if (r->proto && r->proto != h->ip_p) 1553 r = r->skip[PF_SKIP_PROTO].ptr; 1554 else if (PF_MISMATCHAW(&r->src.addr, 1555 (struct pf_addr *)&h->ip_src.s_addr, AF_INET, 1556 r->src.neg, kif)) 1557 r = r->skip[PF_SKIP_SRC_ADDR].ptr; 1558 else if (PF_MISMATCHAW(&r->dst.addr, 1559 (struct pf_addr *)&h->ip_dst.s_addr, AF_INET, 1560 r->dst.neg, NULL)) 1561 r = r->skip[PF_SKIP_DST_ADDR].ptr; 1562 else { 1563 if (r->anchor == NULL) 1564 break; 1565 else 1566 pf_step_into_anchor(&asd, &ruleset, 1567 PF_RULESET_SCRUB, &r, NULL, NULL); 1568 } 1569 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, 1570 PF_RULESET_SCRUB, &r, NULL, NULL)) 1571 break; 1572 } 1573 1574 if (r == NULL || r->action == PF_NOSCRUB) 1575 return (PF_PASS); 1576 else { 1577 r->packets[dir == PF_OUT]++; 1578 r->bytes[dir == PF_OUT] += pd->tot_len; 1579 } 1580 1581 /* Check for illegal packets */ 1582 if (hlen < (int)sizeof (struct ip)) 1583 goto drop; 1584 1585 if (hlen > ntohs(h->ip_len)) 1586 goto drop; 1587 1588 /* Clear IP_DF if the rule uses the no-df option */ 1589 if (r->rule_flag & PFRULE_NODF && h->ip_off & htons(IP_DF)) { 1590 u_int16_t ipoff = h->ip_off; 1591 1592 h->ip_off &= htons(~IP_DF); 1593 h->ip_sum = pf_cksum_fixup(h->ip_sum, ipoff, h->ip_off, 0); 1594 } 1595 1596 /* We will need other tests here */ 1597 if (!fragoff && !mff) 1598 goto no_fragment; 1599 1600 /* 1601 * We're dealing with a fragment now. Don't allow fragments 1602 * with IP_DF to enter the cache. If the flag was cleared by 1603 * no-df above, fine. Otherwise drop it. 1604 */ 1605 if (h->ip_off & htons(IP_DF)) { 1606 DPFPRINTF(("IP_DF\n")); 1607 goto bad; 1608 } 1609 1610 ip_len = ntohs(h->ip_len) - hlen; 1611 ip_off = (ntohs(h->ip_off) & IP_OFFMASK) << 3; 1612 1613 /* All fragments are 8 byte aligned */ 1614 if (mff && (ip_len & 0x7)) { 1615 DPFPRINTF(("mff and %d\n", ip_len)); 1616 goto bad; 1617 } 1618 1619 /* Respect maximum length */ 1620 if (fragoff + ip_len > IP_MAXPACKET) { 1621 DPFPRINTF(("max packet %d\n", fragoff + ip_len)); 1622 goto bad; 1623 } 1624 fr_max = fragoff + ip_len; 1625 1626 if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) { 1627 /* Fully buffer all of the fragments */ 1628 1629 frag = pf_find_fragment_by_ipv4_header(h, &pf_frag_tree); 1630 /* Check if we saw the last fragment already */ 1631 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) && 1632 fr_max > frag->fr_max) 1633 goto bad; 1634 1635 /* Get an entry for the fragment queue */ 1636 frent = pool_get(&pf_frent_pl, PR_NOWAIT); 1637 if (frent == NULL) { 1638 REASON_SET(reason, PFRES_MEMORY); 1639 return (PF_DROP); 1640 } 1641 pf_nfrents++; 1642 frent->fr_ip = h; 1643 frent->fr_m = m; 1644 1645 /* Might return a completely reassembled mbuf, or NULL */ 1646 DPFPRINTF(("reass IPv4 frag %d @ %d-%d\n", ntohs(h->ip_id), 1647 fragoff, fr_max)); 1648 *m0 = m = pf_reassemble(m0, &frag, frent, mff); 1649 1650 if (m == NULL) 1651 return (PF_DROP); 1652 1653 /* use mtag from concatenated mbuf chain */ 1654 pd->pf_mtag = pf_find_mtag(m); 1655#ifdef DIAGNOSTIC 1656 if (pd->pf_mtag == NULL) { 1657 printf("%s: pf_find_mtag returned NULL(1)\n", __func__); 1658 if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) { 1659 m_freem(m); 1660 *m0 = NULL; 1661 goto no_mem; 1662 } 1663 } 1664#endif 1665 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) 1666 goto drop; 1667 1668 h = mtod(m, struct ip *); 1669 } else { 1670 /* non-buffering fragment cache (drops or masks overlaps) */ 1671 int nomem = 0; 1672 1673 if (dir == PF_OUT && (pd->pf_mtag->pftag_flags & PF_TAG_FRAGCACHE)) { 1674 /* 1675 * Already passed the fragment cache in the 1676 * input direction. If we continued, it would 1677 * appear to be a dup and would be dropped. 1678 */ 1679 goto fragment_pass; 1680 } 1681 1682 frag = pf_find_fragment_by_ipv4_header(h, &pf_cache_tree); 1683 1684 /* Check if we saw the last fragment already */ 1685 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) && 1686 fr_max > frag->fr_max) { 1687 if (r->rule_flag & PFRULE_FRAGDROP) 1688 frag->fr_flags |= PFFRAG_DROP; 1689 goto bad; 1690 } 1691 1692 *m0 = m = pf_fragcache(m0, h, &frag, mff, 1693 (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem); 1694 if (m == NULL) { 1695 if (nomem) 1696 goto no_mem; 1697 goto drop; 1698 } 1699 1700 /* use mtag from copied and trimmed mbuf chain */ 1701 pd->pf_mtag = pf_find_mtag(m); 1702#ifdef DIAGNOSTIC 1703 if (pd->pf_mtag == NULL) { 1704 printf("%s: pf_find_mtag returned NULL(2)\n", __func__); 1705 if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) { 1706 m_freem(m); 1707 *m0 = NULL; 1708 goto no_mem; 1709 } 1710 } 1711#endif 1712 if (dir == PF_IN) 1713 pd->pf_mtag->pftag_flags |= PF_TAG_FRAGCACHE; 1714 1715 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) 1716 goto drop; 1717 goto fragment_pass; 1718 } 1719 1720no_fragment: 1721 /* At this point, only IP_DF is allowed in ip_off */ 1722 if (h->ip_off & ~htons(IP_DF)) { 1723 u_int16_t ipoff = h->ip_off; 1724 1725 h->ip_off &= htons(IP_DF); 1726 h->ip_sum = pf_cksum_fixup(h->ip_sum, ipoff, h->ip_off, 0); 1727 } 1728 1729 /* Enforce a minimum ttl, may cause endless packet loops */ 1730 if (r->min_ttl && h->ip_ttl < r->min_ttl) { 1731 u_int16_t ip_ttl = h->ip_ttl; 1732 1733 h->ip_ttl = r->min_ttl; 1734 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0); 1735 } 1736#if RANDOM_IP_ID 1737 if (r->rule_flag & PFRULE_RANDOMID) { 1738 u_int16_t ip_id = h->ip_id; 1739 1740 h->ip_id = ip_randomid(); 1741 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0); 1742 } 1743#endif /* RANDOM_IP_ID */ 1744 if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) 1745 pd->flags |= PFDESC_IP_REAS; 1746 1747 return (PF_PASS); 1748 1749fragment_pass: 1750 /* Enforce a minimum ttl, may cause endless packet loops */ 1751 if (r->min_ttl && h->ip_ttl < r->min_ttl) { 1752 u_int16_t ip_ttl = h->ip_ttl; 1753 1754 h->ip_ttl = r->min_ttl; 1755 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0); 1756 } 1757 if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) 1758 pd->flags |= PFDESC_IP_REAS; 1759 return (PF_PASS); 1760 1761no_mem: 1762 REASON_SET(reason, PFRES_MEMORY); 1763 if (r != NULL && r->log) 1764 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, 1765 NULL, NULL, pd); 1766 return (PF_DROP); 1767 1768drop: 1769 REASON_SET(reason, PFRES_NORM); 1770 if (r != NULL && r->log) 1771 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, 1772 NULL, NULL, pd); 1773 return (PF_DROP); 1774 1775bad: 1776 DPFPRINTF(("dropping bad IPv4 fragment\n")); 1777 1778 /* Free associated fragments */ 1779 if (frag != NULL) 1780 pf_free_fragment(frag); 1781 1782 REASON_SET(reason, PFRES_FRAG); 1783 if (r != NULL && r->log) 1784 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd); 1785 1786 return (PF_DROP); 1787} 1788 1789#if INET6 1790int 1791pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, 1792 u_short *reason, struct pf_pdesc *pd) 1793{ 1794 struct mbuf *m = *m0; 1795 struct pf_rule *r; 1796 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 1797 int off; 1798 struct ip6_ext ext; 1799/* adi XXX */ 1800#if 0 1801 struct ip6_opt opt; 1802 struct ip6_opt_jumbo jumbo; 1803 int optend; 1804 int ooff; 1805#endif 1806 struct ip6_frag frag; 1807 u_int32_t jumbolen = 0, plen; 1808 u_int16_t fragoff = 0; 1809 u_int8_t proto; 1810 int terminal; 1811 struct pf_frent *frent; 1812 struct pf_fragment *pff = NULL; 1813 int mff = 0, rh_cnt = 0; 1814 u_int16_t fr_max; 1815 int asd = 0; 1816 struct pf_ruleset *ruleset = NULL; 1817 1818 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); 1819 while (r != NULL) { 1820 r->evaluations++; 1821 if (pfi_kif_match(r->kif, kif) == r->ifnot) 1822 r = r->skip[PF_SKIP_IFP].ptr; 1823 else if (r->direction && r->direction != dir) 1824 r = r->skip[PF_SKIP_DIR].ptr; 1825 else if (r->af && r->af != AF_INET6) 1826 r = r->skip[PF_SKIP_AF].ptr; 1827#if 0 /* header chain! */ 1828 else if (r->proto && r->proto != h->ip6_nxt) 1829 r = r->skip[PF_SKIP_PROTO].ptr; 1830#endif 1831 else if (PF_MISMATCHAW(&r->src.addr, 1832 (struct pf_addr *)&h->ip6_src, AF_INET6, 1833 r->src.neg, kif)) 1834 r = r->skip[PF_SKIP_SRC_ADDR].ptr; 1835 else if (PF_MISMATCHAW(&r->dst.addr, 1836 (struct pf_addr *)&h->ip6_dst, AF_INET6, 1837 r->dst.neg, NULL)) 1838 r = r->skip[PF_SKIP_DST_ADDR].ptr; 1839 else { 1840 if (r->anchor == NULL) 1841 break; 1842 else 1843 pf_step_into_anchor(&asd, &ruleset, 1844 PF_RULESET_SCRUB, &r, NULL, NULL); 1845 } 1846 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, 1847 PF_RULESET_SCRUB, &r, NULL, NULL)) 1848 break; 1849 } 1850 1851 if (r == NULL || r->action == PF_NOSCRUB) 1852 return (PF_PASS); 1853 else { 1854 r->packets[dir == PF_OUT]++; 1855 r->bytes[dir == PF_OUT] += pd->tot_len; 1856 } 1857 1858 /* Check for illegal packets */ 1859 if ((int)(sizeof (struct ip6_hdr) + IPV6_MAXPACKET) < m->m_pkthdr.len) 1860 goto drop; 1861 1862 off = sizeof (struct ip6_hdr); 1863 proto = h->ip6_nxt; 1864 terminal = 0; 1865 do { 1866 pd->proto = proto; 1867 switch (proto) { 1868 case IPPROTO_FRAGMENT: 1869 goto fragment; 1870 break; 1871 case IPPROTO_AH: 1872 case IPPROTO_ROUTING: 1873 case IPPROTO_DSTOPTS: 1874 if (!pf_pull_hdr(m, off, &ext, sizeof (ext), NULL, 1875 NULL, AF_INET6)) 1876 goto shortpkt; 1877 /* 1878 * <jhw@apple.com> 1879 * Multiple routing headers not allowed. 1880 * Routing header type zero considered harmful. 1881 */ 1882 if (proto == IPPROTO_ROUTING) { 1883 const struct ip6_rthdr *rh = 1884 (const struct ip6_rthdr *)&ext; 1885 if (rh_cnt++) 1886 goto drop; 1887 if (rh->ip6r_type == IPV6_RTHDR_TYPE_0) 1888 goto drop; 1889 } 1890 else 1891 if (proto == IPPROTO_AH) 1892 off += (ext.ip6e_len + 2) * 4; 1893 else 1894 off += (ext.ip6e_len + 1) * 8; 1895 proto = ext.ip6e_nxt; 1896 break; 1897 case IPPROTO_HOPOPTS: 1898/* adi XXX */ 1899#if 0 1900 if (!pf_pull_hdr(m, off, &ext, sizeof (ext), NULL, 1901 NULL, AF_INET6)) 1902 goto shortpkt; 1903 optend = off + (ext.ip6e_len + 1) * 8; 1904 ooff = off + sizeof (ext); 1905 do { 1906 if (!pf_pull_hdr(m, ooff, &opt.ip6o_type, 1907 sizeof (opt.ip6o_type), NULL, NULL, 1908 AF_INET6)) 1909 goto shortpkt; 1910 if (opt.ip6o_type == IP6OPT_PAD1) { 1911 ooff++; 1912 continue; 1913 } 1914 if (!pf_pull_hdr(m, ooff, &opt, sizeof (opt), 1915 NULL, NULL, AF_INET6)) 1916 goto shortpkt; 1917 if (ooff + sizeof (opt) + opt.ip6o_len > optend) 1918 goto drop; 1919 switch (opt.ip6o_type) { 1920 case IP6OPT_JUMBO: 1921 if (h->ip6_plen != 0) 1922 goto drop; 1923 if (!pf_pull_hdr(m, ooff, &jumbo, 1924 sizeof (jumbo), NULL, NULL, 1925 AF_INET6)) 1926 goto shortpkt; 1927 memcpy(&jumbolen, jumbo.ip6oj_jumbo_len, 1928 sizeof (jumbolen)); 1929 jumbolen = ntohl(jumbolen); 1930 if (jumbolen <= IPV6_MAXPACKET) 1931 goto drop; 1932 if (sizeof (struct ip6_hdr) + 1933 jumbolen != m->m_pkthdr.len) 1934 goto drop; 1935 break; 1936 default: 1937 break; 1938 } 1939 ooff += sizeof (opt) + opt.ip6o_len; 1940 } while (ooff < optend); 1941 1942 off = optend; 1943 proto = ext.ip6e_nxt; 1944 break; 1945#endif 1946 default: 1947 terminal = 1; 1948 break; 1949 } 1950 } while (!terminal); 1951 1952 /* jumbo payload option must be present, or plen > 0 */ 1953 if (ntohs(h->ip6_plen) == 0) 1954 plen = jumbolen; 1955 else 1956 plen = ntohs(h->ip6_plen); 1957 if (plen == 0) 1958 goto drop; 1959 if ((int)(sizeof (struct ip6_hdr) + plen) > m->m_pkthdr.len) 1960 goto shortpkt; 1961 1962 /* Enforce a minimum ttl, may cause endless packet loops */ 1963 if (r->min_ttl && h->ip6_hlim < r->min_ttl) 1964 h->ip6_hlim = r->min_ttl; 1965 1966 return (PF_PASS); 1967 1968fragment: 1969 if (ntohs(h->ip6_plen) == 0 || jumbolen) 1970 goto drop; 1971 plen = ntohs(h->ip6_plen); 1972 1973 if (!pf_pull_hdr(m, off, &frag, sizeof (frag), NULL, NULL, AF_INET6)) 1974 goto shortpkt; 1975 fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK); 1976 pd->proto = frag.ip6f_nxt; 1977 mff = ntohs(frag.ip6f_offlg & IP6F_MORE_FRAG); 1978 off += sizeof frag; 1979 if (fragoff + (plen - off) > IPV6_MAXPACKET) 1980 goto badfrag; 1981 1982 fr_max = fragoff + plen - (off - sizeof(struct ip6_hdr)); 1983 DPFPRINTF(("%p IPv6 frag plen %u mff %d off %u fragoff %u fr_max %u\n", m, 1984 plen, mff, off, fragoff, fr_max)); 1985 1986 if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) { 1987 /* Fully buffer all of the fragments */ 1988 pd->flags |= PFDESC_IP_REAS; 1989 1990 pff = pf_find_fragment_by_ipv6_header(h, &frag, 1991 &pf_frag_tree); 1992 1993 /* Check if we saw the last fragment already */ 1994 if (pff != NULL && (pff->fr_flags & PFFRAG_SEENLAST) && 1995 fr_max > pff->fr_max) 1996 goto badfrag; 1997 1998 /* Get an entry for the fragment queue */ 1999 frent = pool_get(&pf_frent_pl, PR_NOWAIT); 2000 if (frent == NULL) { 2001 REASON_SET(reason, PFRES_MEMORY); 2002 return (PF_DROP); 2003 } 2004 pf_nfrents++; 2005 frent->fr_ip6 = h; 2006 frent->fr_m = m; 2007 frent->fr_ip6f_opt = frag; 2008 frent->fr_ip6f_hlen = off; 2009 2010 /* Might return a completely reassembled mbuf, or NULL */ 2011 DPFPRINTF(("reass IPv6 frag %d @ %d-%d\n", 2012 ntohl(frag.ip6f_ident), fragoff, fr_max)); 2013 *m0 = m = pf_reassemble6(m0, &pff, frent, mff); 2014 2015 if (m == NULL) 2016 return (PF_DROP); 2017 2018 if (pff != NULL && (pff->fr_flags & PFFRAG_DROP)) 2019 goto drop; 2020 2021 h = mtod(m, struct ip6_hdr *); 2022 } 2023 else if (dir == PF_IN || !(pd->pf_mtag->pftag_flags & PF_TAG_FRAGCACHE)) { 2024 /* non-buffering fragment cache (overlaps: see RFC 5722) */ 2025 int nomem = 0; 2026 2027 pff = pf_find_fragment_by_ipv6_header(h, &frag, 2028 &pf_cache_tree); 2029 2030 /* Check if we saw the last fragment already */ 2031 if (pff != NULL && (pff->fr_flags & PFFRAG_SEENLAST) && 2032 fr_max > pff->fr_max) { 2033 if (r->rule_flag & PFRULE_FRAGDROP) 2034 pff->fr_flags |= PFFRAG_DROP; 2035 goto badfrag; 2036 } 2037 2038 *m0 = m = pf_frag6cache(m0, h, &frag, &pff, off, mff, 2039 (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem); 2040 if (m == NULL) { 2041 if (nomem) 2042 goto no_mem; 2043 goto drop; 2044 } 2045 2046 if (dir == PF_IN) 2047 pd->pf_mtag->pftag_flags |= PF_TAG_FRAGCACHE; 2048 2049 if (pff != NULL && (pff->fr_flags & PFFRAG_DROP)) 2050 goto drop; 2051 } 2052 2053 /* Enforce a minimum ttl, may cause endless packet loops */ 2054 if (r->min_ttl && h->ip6_hlim < r->min_ttl) 2055 h->ip6_hlim = r->min_ttl; 2056 return (PF_PASS); 2057 2058 no_mem: 2059 REASON_SET(reason, PFRES_MEMORY); 2060 goto dropout; 2061 2062 shortpkt: 2063 REASON_SET(reason, PFRES_SHORT); 2064 goto dropout; 2065 2066 drop: 2067 REASON_SET(reason, PFRES_NORM); 2068 goto dropout; 2069 2070 badfrag: 2071 DPFPRINTF(("dropping bad IPv6 fragment\n")); 2072 REASON_SET(reason, PFRES_FRAG); 2073 goto dropout; 2074 2075 dropout: 2076 if (pff != NULL) 2077 pf_free_fragment(pff); 2078 if (r != NULL && r->log) 2079 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd); 2080 return (PF_DROP); 2081} 2082#endif /* INET6 */ 2083 2084int 2085pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff, 2086 int off, void *h, struct pf_pdesc *pd) 2087{ 2088#pragma unused(ipoff, h) 2089 struct pf_rule *r, *rm = NULL; 2090 struct tcphdr *th = pd->hdr.tcp; 2091 int rewrite = 0; 2092 int asd = 0; 2093 u_short reason; 2094 u_int8_t flags; 2095 sa_family_t af = pd->af; 2096 struct pf_ruleset *ruleset = NULL; 2097 union pf_state_xport sxport, dxport; 2098 2099 sxport.port = th->th_sport; 2100 dxport.port = th->th_dport; 2101 2102 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); 2103 while (r != NULL) { 2104 r->evaluations++; 2105 if (pfi_kif_match(r->kif, kif) == r->ifnot) 2106 r = r->skip[PF_SKIP_IFP].ptr; 2107 else if (r->direction && r->direction != dir) 2108 r = r->skip[PF_SKIP_DIR].ptr; 2109 else if (r->af && r->af != af) 2110 r = r->skip[PF_SKIP_AF].ptr; 2111 else if (r->proto && r->proto != pd->proto) 2112 r = r->skip[PF_SKIP_PROTO].ptr; 2113 else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, 2114 r->src.neg, kif)) 2115 r = r->skip[PF_SKIP_SRC_ADDR].ptr; 2116 else if (r->src.xport.range.op && 2117 !pf_match_xport(r->src.xport.range.op, r->proto_variant, 2118 &r->src.xport, &sxport)) 2119 r = r->skip[PF_SKIP_SRC_PORT].ptr; 2120 else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, 2121 r->dst.neg, NULL)) 2122 r = r->skip[PF_SKIP_DST_ADDR].ptr; 2123 else if (r->dst.xport.range.op && 2124 !pf_match_xport(r->dst.xport.range.op, r->proto_variant, 2125 &r->dst.xport, &dxport)) 2126 r = r->skip[PF_SKIP_DST_PORT].ptr; 2127 else if (r->os_fingerprint != PF_OSFP_ANY && 2128 !pf_osfp_match(pf_osfp_fingerprint(pd, m, off, th), 2129 r->os_fingerprint)) 2130 r = TAILQ_NEXT(r, entries); 2131 else { 2132 if (r->anchor == NULL) { 2133 rm = r; 2134 break; 2135 } else { 2136 pf_step_into_anchor(&asd, &ruleset, 2137 PF_RULESET_SCRUB, &r, NULL, NULL); 2138 } 2139 } 2140 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, 2141 PF_RULESET_SCRUB, &r, NULL, NULL)) 2142 break; 2143 } 2144 2145 if (rm == NULL || rm->action == PF_NOSCRUB) 2146 return (PF_PASS); 2147 else { 2148 r->packets[dir == PF_OUT]++; 2149 r->bytes[dir == PF_OUT] += pd->tot_len; 2150 } 2151 2152 if (rm->rule_flag & PFRULE_REASSEMBLE_TCP) 2153 pd->flags |= PFDESC_TCP_NORM; 2154 2155 flags = th->th_flags; 2156 if (flags & TH_SYN) { 2157 /* Illegal packet */ 2158 if (flags & TH_RST) 2159 goto tcp_drop; 2160 2161 if (flags & TH_FIN) 2162 flags &= ~TH_FIN; 2163 } else { 2164 /* Illegal packet */ 2165 if (!(flags & (TH_ACK|TH_RST))) 2166 goto tcp_drop; 2167 } 2168 2169 if (!(flags & TH_ACK)) { 2170 /* These flags are only valid if ACK is set */ 2171 if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG)) 2172 goto tcp_drop; 2173 } 2174 2175 /* Check for illegal header length */ 2176 if (th->th_off < (sizeof (struct tcphdr) >> 2)) 2177 goto tcp_drop; 2178 2179 /* If flags changed, or reserved data set, then adjust */ 2180 if (flags != th->th_flags || th->th_x2 != 0) { 2181 u_int16_t ov, nv; 2182 2183 ov = *(u_int16_t *)(&th->th_ack + 1); 2184 th->th_flags = flags; 2185 th->th_x2 = 0; 2186 nv = *(u_int16_t *)(&th->th_ack + 1); 2187 2188 th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv, 0); 2189 rewrite = 1; 2190 } 2191 2192 /* Remove urgent pointer, if TH_URG is not set */ 2193 if (!(flags & TH_URG) && th->th_urp) { 2194 th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0, 0); 2195 th->th_urp = 0; 2196 rewrite = 1; 2197 } 2198 2199 /* copy back packet headers if we sanitized */ 2200 /* Process options */ 2201 if (r->max_mss) { 2202 int rv = pf_normalize_tcpopt(r, dir, kif, pd, m, th, off, 2203 &rewrite); 2204 if (rv == PF_DROP) 2205 return rv; 2206 m = pd->mp; 2207 } 2208 2209 if (rewrite) { 2210 struct mbuf *mw = pf_lazy_makewritable(pd, m, 2211 off + sizeof (*th)); 2212 if (!mw) { 2213 REASON_SET(&reason, PFRES_MEMORY); 2214 if (r->log) 2215 PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, 2216 r, 0, 0, pd); 2217 return PF_DROP; 2218 } 2219 2220 m_copyback(mw, off, sizeof (*th), th); 2221 } 2222 2223 return (PF_PASS); 2224 2225tcp_drop: 2226 REASON_SET(&reason, PFRES_NORM); 2227 if (rm != NULL && r->log) 2228 PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, NULL, NULL, pd); 2229 return (PF_DROP); 2230} 2231 2232int 2233pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd, 2234 struct tcphdr *th, struct pf_state_peer *src, struct pf_state_peer *dst) 2235{ 2236#pragma unused(dst) 2237 u_int32_t tsval, tsecr; 2238 u_int8_t hdr[60]; 2239 u_int8_t *opt; 2240 2241 VERIFY(src->scrub == NULL); 2242 2243 src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT); 2244 if (src->scrub == NULL) 2245 return (1); 2246 bzero(src->scrub, sizeof (*src->scrub)); 2247 2248 switch (pd->af) { 2249#if INET 2250 case AF_INET: { 2251 struct ip *h = mtod(m, struct ip *); 2252 src->scrub->pfss_ttl = h->ip_ttl; 2253 break; 2254 } 2255#endif /* INET */ 2256#if INET6 2257 case AF_INET6: { 2258 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 2259 src->scrub->pfss_ttl = h->ip6_hlim; 2260 break; 2261 } 2262#endif /* INET6 */ 2263 } 2264 2265 2266 /* 2267 * All normalizations below are only begun if we see the start of 2268 * the connections. They must all set an enabled bit in pfss_flags 2269 */ 2270 if ((th->th_flags & TH_SYN) == 0) 2271 return (0); 2272 2273 2274 if (th->th_off > (sizeof (struct tcphdr) >> 2) && src->scrub && 2275 pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) { 2276 /* Diddle with TCP options */ 2277 int hlen; 2278 opt = hdr + sizeof (struct tcphdr); 2279 hlen = (th->th_off << 2) - sizeof (struct tcphdr); 2280 while (hlen >= TCPOLEN_TIMESTAMP) { 2281 switch (*opt) { 2282 case TCPOPT_EOL: /* FALLTHROUGH */ 2283 case TCPOPT_NOP: 2284 opt++; 2285 hlen--; 2286 break; 2287 case TCPOPT_TIMESTAMP: 2288 if (opt[1] >= TCPOLEN_TIMESTAMP) { 2289 src->scrub->pfss_flags |= 2290 PFSS_TIMESTAMP; 2291 src->scrub->pfss_ts_mod = 2292 htonl(random()); 2293 2294 /* note PFSS_PAWS not set yet */ 2295 memcpy(&tsval, &opt[2], 2296 sizeof (u_int32_t)); 2297 memcpy(&tsecr, &opt[6], 2298 sizeof (u_int32_t)); 2299 src->scrub->pfss_tsval0 = ntohl(tsval); 2300 src->scrub->pfss_tsval = ntohl(tsval); 2301 src->scrub->pfss_tsecr = ntohl(tsecr); 2302 getmicrouptime(&src->scrub->pfss_last); 2303 } 2304 /* FALLTHROUGH */ 2305 default: 2306 hlen -= MAX(opt[1], 2); 2307 opt += MAX(opt[1], 2); 2308 break; 2309 } 2310 } 2311 } 2312 2313 return (0); 2314} 2315 2316void 2317pf_normalize_tcp_cleanup(struct pf_state *state) 2318{ 2319 if (state->src.scrub) 2320 pool_put(&pf_state_scrub_pl, state->src.scrub); 2321 if (state->dst.scrub) 2322 pool_put(&pf_state_scrub_pl, state->dst.scrub); 2323 2324 /* Someday... flush the TCP segment reassembly descriptors. */ 2325} 2326 2327int 2328pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, 2329 u_short *reason, struct tcphdr *th, struct pf_state *state, 2330 struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback) 2331{ 2332 struct timeval uptime; 2333 u_int32_t tsval, tsecr; 2334 u_int tsval_from_last; 2335 u_int8_t hdr[60]; 2336 u_int8_t *opt; 2337 int copyback = 0; 2338 int got_ts = 0; 2339 2340 VERIFY(src->scrub || dst->scrub); 2341 2342 /* 2343 * Enforce the minimum TTL seen for this connection. Negate a common 2344 * technique to evade an intrusion detection system and confuse 2345 * firewall state code. 2346 */ 2347 switch (pd->af) { 2348#if INET 2349 case AF_INET: { 2350 if (src->scrub) { 2351 struct ip *h = mtod(m, struct ip *); 2352 if (h->ip_ttl > src->scrub->pfss_ttl) 2353 src->scrub->pfss_ttl = h->ip_ttl; 2354 h->ip_ttl = src->scrub->pfss_ttl; 2355 } 2356 break; 2357 } 2358#endif /* INET */ 2359#if INET6 2360 case AF_INET6: { 2361 if (src->scrub) { 2362 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 2363 if (h->ip6_hlim > src->scrub->pfss_ttl) 2364 src->scrub->pfss_ttl = h->ip6_hlim; 2365 h->ip6_hlim = src->scrub->pfss_ttl; 2366 } 2367 break; 2368 } 2369#endif /* INET6 */ 2370 } 2371 2372 if (th->th_off > (sizeof (struct tcphdr) >> 2) && 2373 ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) || 2374 (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) && 2375 pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) { 2376 /* Diddle with TCP options */ 2377 int hlen; 2378 opt = hdr + sizeof (struct tcphdr); 2379 hlen = (th->th_off << 2) - sizeof (struct tcphdr); 2380 while (hlen >= TCPOLEN_TIMESTAMP) { 2381 switch (*opt) { 2382 case TCPOPT_EOL: /* FALLTHROUGH */ 2383 case TCPOPT_NOP: 2384 opt++; 2385 hlen--; 2386 break; 2387 case TCPOPT_TIMESTAMP: 2388 /* 2389 * Modulate the timestamps. Can be used for 2390 * NAT detection, OS uptime determination or 2391 * reboot detection. 2392 */ 2393 2394 if (got_ts) { 2395 /* Huh? Multiple timestamps!? */ 2396 if (pf_status.debug >= PF_DEBUG_MISC) { 2397 DPFPRINTF(("multiple TS??")); 2398 pf_print_state(state); 2399 printf("\n"); 2400 } 2401 REASON_SET(reason, PFRES_TS); 2402 return (PF_DROP); 2403 } 2404 if (opt[1] >= TCPOLEN_TIMESTAMP) { 2405 memcpy(&tsval, &opt[2], 2406 sizeof (u_int32_t)); 2407 if (tsval && src->scrub && 2408 (src->scrub->pfss_flags & 2409 PFSS_TIMESTAMP)) { 2410 tsval = ntohl(tsval); 2411 pf_change_a(&opt[2], 2412 &th->th_sum, 2413 htonl(tsval + 2414 src->scrub->pfss_ts_mod), 2415 0); 2416 copyback = 1; 2417 } 2418 2419 /* Modulate TS reply iff valid (!0) */ 2420 memcpy(&tsecr, &opt[6], 2421 sizeof (u_int32_t)); 2422 if (tsecr && dst->scrub && 2423 (dst->scrub->pfss_flags & 2424 PFSS_TIMESTAMP)) { 2425 tsecr = ntohl(tsecr) 2426 - dst->scrub->pfss_ts_mod; 2427 pf_change_a(&opt[6], 2428 &th->th_sum, htonl(tsecr), 2429 0); 2430 copyback = 1; 2431 } 2432 got_ts = 1; 2433 } 2434 /* FALLTHROUGH */ 2435 default: 2436 hlen -= MAX(opt[1], 2); 2437 opt += MAX(opt[1], 2); 2438 break; 2439 } 2440 } 2441 if (copyback) { 2442 /* Copyback the options, caller copys back header */ 2443 int optoff = off + sizeof (*th); 2444 int optlen = (th->th_off << 2) - sizeof (*th); 2445 m = pf_lazy_makewritable(pd, m, optoff + optlen); 2446 if (!m) { 2447 REASON_SET(reason, PFRES_MEMORY); 2448 return PF_DROP; 2449 } 2450 *writeback = optoff + optlen; 2451 m_copyback(m, optoff, optlen, hdr + sizeof (*th)); 2452 } 2453 } 2454 2455 2456 /* 2457 * Must invalidate PAWS checks on connections idle for too long. 2458 * The fastest allowed timestamp clock is 1ms. That turns out to 2459 * be about 24 days before it wraps. XXX Right now our lowerbound 2460 * TS echo check only works for the first 12 days of a connection 2461 * when the TS has exhausted half its 32bit space 2462 */ 2463#define TS_MAX_IDLE (24*24*60*60) 2464#define TS_MAX_CONN (12*24*60*60) /* XXX remove when better tsecr check */ 2465 2466 getmicrouptime(&uptime); 2467 if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) && 2468 (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE || 2469 pf_time_second() - state->creation > TS_MAX_CONN)) { 2470 if (pf_status.debug >= PF_DEBUG_MISC) { 2471 DPFPRINTF(("src idled out of PAWS\n")); 2472 pf_print_state(state); 2473 printf("\n"); 2474 } 2475 src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS) 2476 | PFSS_PAWS_IDLED; 2477 } 2478 if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) && 2479 uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) { 2480 if (pf_status.debug >= PF_DEBUG_MISC) { 2481 DPFPRINTF(("dst idled out of PAWS\n")); 2482 pf_print_state(state); 2483 printf("\n"); 2484 } 2485 dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS) 2486 | PFSS_PAWS_IDLED; 2487 } 2488 2489 if (got_ts && src->scrub && dst->scrub && 2490 (src->scrub->pfss_flags & PFSS_PAWS) && 2491 (dst->scrub->pfss_flags & PFSS_PAWS)) { 2492 /* 2493 * Validate that the timestamps are "in-window". 2494 * RFC1323 describes TCP Timestamp options that allow 2495 * measurement of RTT (round trip time) and PAWS 2496 * (protection against wrapped sequence numbers). PAWS 2497 * gives us a set of rules for rejecting packets on 2498 * long fat pipes (packets that were somehow delayed 2499 * in transit longer than the time it took to send the 2500 * full TCP sequence space of 4Gb). We can use these 2501 * rules and infer a few others that will let us treat 2502 * the 32bit timestamp and the 32bit echoed timestamp 2503 * as sequence numbers to prevent a blind attacker from 2504 * inserting packets into a connection. 2505 * 2506 * RFC1323 tells us: 2507 * - The timestamp on this packet must be greater than 2508 * or equal to the last value echoed by the other 2509 * endpoint. The RFC says those will be discarded 2510 * since it is a dup that has already been acked. 2511 * This gives us a lowerbound on the timestamp. 2512 * timestamp >= other last echoed timestamp 2513 * - The timestamp will be less than or equal to 2514 * the last timestamp plus the time between the 2515 * last packet and now. The RFC defines the max 2516 * clock rate as 1ms. We will allow clocks to be 2517 * up to 10% fast and will allow a total difference 2518 * or 30 seconds due to a route change. And this 2519 * gives us an upperbound on the timestamp. 2520 * timestamp <= last timestamp + max ticks 2521 * We have to be careful here. Windows will send an 2522 * initial timestamp of zero and then initialize it 2523 * to a random value after the 3whs; presumably to 2524 * avoid a DoS by having to call an expensive RNG 2525 * during a SYN flood. Proof MS has at least one 2526 * good security geek. 2527 * 2528 * - The TCP timestamp option must also echo the other 2529 * endpoints timestamp. The timestamp echoed is the 2530 * one carried on the earliest unacknowledged segment 2531 * on the left edge of the sequence window. The RFC 2532 * states that the host will reject any echoed 2533 * timestamps that were larger than any ever sent. 2534 * This gives us an upperbound on the TS echo. 2535 * tescr <= largest_tsval 2536 * - The lowerbound on the TS echo is a little more 2537 * tricky to determine. The other endpoint's echoed 2538 * values will not decrease. But there may be 2539 * network conditions that re-order packets and 2540 * cause our view of them to decrease. For now the 2541 * only lowerbound we can safely determine is that 2542 * the TS echo will never be less than the original 2543 * TS. XXX There is probably a better lowerbound. 2544 * Remove TS_MAX_CONN with better lowerbound check. 2545 * tescr >= other original TS 2546 * 2547 * It is also important to note that the fastest 2548 * timestamp clock of 1ms will wrap its 32bit space in 2549 * 24 days. So we just disable TS checking after 24 2550 * days of idle time. We actually must use a 12d 2551 * connection limit until we can come up with a better 2552 * lowerbound to the TS echo check. 2553 */ 2554 struct timeval delta_ts; 2555 int ts_fudge; 2556 2557 2558 /* 2559 * PFTM_TS_DIFF is how many seconds of leeway to allow 2560 * a host's timestamp. This can happen if the previous 2561 * packet got delayed in transit for much longer than 2562 * this packet. 2563 */ 2564 if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0) 2565 ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF]; 2566 2567 2568 /* Calculate max ticks since the last timestamp */ 2569#define TS_MAXFREQ 1100 /* RFC max TS freq of 1Khz + 10% skew */ 2570#define TS_MICROSECS 1000000 /* microseconds per second */ 2571 timersub(&uptime, &src->scrub->pfss_last, &delta_ts); 2572 tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ; 2573 tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ); 2574 2575 2576 if ((src->state >= TCPS_ESTABLISHED && 2577 dst->state >= TCPS_ESTABLISHED) && 2578 (SEQ_LT(tsval, dst->scrub->pfss_tsecr) || 2579 SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) || 2580 (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) || 2581 SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) { 2582 /* 2583 * Bad RFC1323 implementation or an insertion attack. 2584 * 2585 * - Solaris 2.6 and 2.7 are known to send another ACK 2586 * after the FIN,FIN|ACK,ACK closing that carries 2587 * an old timestamp. 2588 */ 2589 2590 DPFPRINTF(("Timestamp failed %c%c%c%c\n", 2591 SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ', 2592 SEQ_GT(tsval, src->scrub->pfss_tsval + 2593 tsval_from_last) ? '1' : ' ', 2594 SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ', 2595 SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' ')); 2596 DPFPRINTF((" tsval: %u tsecr: %u +ticks: %u " 2597 "idle: %lus %ums\n", 2598 tsval, tsecr, tsval_from_last, delta_ts.tv_sec, 2599 delta_ts.tv_usec / 1000)); 2600 DPFPRINTF((" src->tsval: %u tsecr: %u\n", 2601 src->scrub->pfss_tsval, src->scrub->pfss_tsecr)); 2602 DPFPRINTF((" dst->tsval: %u tsecr: %u tsval0: %u\n", 2603 dst->scrub->pfss_tsval, dst->scrub->pfss_tsecr, 2604 dst->scrub->pfss_tsval0)); 2605 if (pf_status.debug >= PF_DEBUG_MISC) { 2606 pf_print_state(state); 2607 pf_print_flags(th->th_flags); 2608 printf("\n"); 2609 } 2610 REASON_SET(reason, PFRES_TS); 2611 return (PF_DROP); 2612 } 2613 2614 /* XXX I'd really like to require tsecr but it's optional */ 2615 2616 } else if (!got_ts && (th->th_flags & TH_RST) == 0 && 2617 ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED) 2618 || pd->p_len > 0 || (th->th_flags & TH_SYN)) && 2619 src->scrub && dst->scrub && 2620 (src->scrub->pfss_flags & PFSS_PAWS) && 2621 (dst->scrub->pfss_flags & PFSS_PAWS)) { 2622 /* 2623 * Didn't send a timestamp. Timestamps aren't really useful 2624 * when: 2625 * - connection opening or closing (often not even sent). 2626 * but we must not let an attacker to put a FIN on a 2627 * data packet to sneak it through our ESTABLISHED check. 2628 * - on a TCP reset. RFC suggests not even looking at TS. 2629 * - on an empty ACK. The TS will not be echoed so it will 2630 * probably not help keep the RTT calculation in sync and 2631 * there isn't as much danger when the sequence numbers 2632 * got wrapped. So some stacks don't include TS on empty 2633 * ACKs :-( 2634 * 2635 * To minimize the disruption to mostly RFC1323 conformant 2636 * stacks, we will only require timestamps on data packets. 2637 * 2638 * And what do ya know, we cannot require timestamps on data 2639 * packets. There appear to be devices that do legitimate 2640 * TCP connection hijacking. There are HTTP devices that allow 2641 * a 3whs (with timestamps) and then buffer the HTTP request. 2642 * If the intermediate device has the HTTP response cache, it 2643 * will spoof the response but not bother timestamping its 2644 * packets. So we can look for the presence of a timestamp in 2645 * the first data packet and if there, require it in all future 2646 * packets. 2647 */ 2648 2649 if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) { 2650 /* 2651 * Hey! Someone tried to sneak a packet in. Or the 2652 * stack changed its RFC1323 behavior?!?! 2653 */ 2654 if (pf_status.debug >= PF_DEBUG_MISC) { 2655 DPFPRINTF(("Did not receive expected RFC1323 " 2656 "timestamp\n")); 2657 pf_print_state(state); 2658 pf_print_flags(th->th_flags); 2659 printf("\n"); 2660 } 2661 REASON_SET(reason, PFRES_TS); 2662 return (PF_DROP); 2663 } 2664 } 2665 2666 2667 /* 2668 * We will note if a host sends his data packets with or without 2669 * timestamps. And require all data packets to contain a timestamp 2670 * if the first does. PAWS implicitly requires that all data packets be 2671 * timestamped. But I think there are middle-man devices that hijack 2672 * TCP streams immediately after the 3whs and don't timestamp their 2673 * packets (seen in a WWW accelerator or cache). 2674 */ 2675 if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags & 2676 (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) { 2677 if (got_ts) 2678 src->scrub->pfss_flags |= PFSS_DATA_TS; 2679 else { 2680 src->scrub->pfss_flags |= PFSS_DATA_NOTS; 2681 if (pf_status.debug >= PF_DEBUG_MISC && dst->scrub && 2682 (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) { 2683 /* Don't warn if other host rejected RFC1323 */ 2684 DPFPRINTF(("Broken RFC1323 stack did not " 2685 "timestamp data packet. Disabled PAWS " 2686 "security.\n")); 2687 pf_print_state(state); 2688 pf_print_flags(th->th_flags); 2689 printf("\n"); 2690 } 2691 } 2692 } 2693 2694 2695 /* 2696 * Update PAWS values 2697 */ 2698 if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags & 2699 (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) { 2700 getmicrouptime(&src->scrub->pfss_last); 2701 if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) || 2702 (src->scrub->pfss_flags & PFSS_PAWS) == 0) 2703 src->scrub->pfss_tsval = tsval; 2704 2705 if (tsecr) { 2706 if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) || 2707 (src->scrub->pfss_flags & PFSS_PAWS) == 0) 2708 src->scrub->pfss_tsecr = tsecr; 2709 2710 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 && 2711 (SEQ_LT(tsval, src->scrub->pfss_tsval0) || 2712 src->scrub->pfss_tsval0 == 0)) { 2713 /* tsval0 MUST be the lowest timestamp */ 2714 src->scrub->pfss_tsval0 = tsval; 2715 } 2716 2717 /* Only fully initialized after a TS gets echoed */ 2718 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0) 2719 src->scrub->pfss_flags |= PFSS_PAWS; 2720 } 2721 } 2722 2723 /* I have a dream.... TCP segment reassembly.... */ 2724 return (0); 2725} 2726 2727static int 2728pf_normalize_tcpopt(struct pf_rule *r, int dir, struct pfi_kif *kif, 2729 struct pf_pdesc *pd, struct mbuf *m, struct tcphdr *th, int off, 2730 int *rewrptr) 2731{ 2732#pragma unused(dir, kif) 2733 sa_family_t af = pd->af; 2734 u_int16_t *mss; 2735 int thoff; 2736 int opt, cnt, optlen = 0; 2737 int rewrite = 0; 2738 u_char opts[MAX_TCPOPTLEN]; 2739 u_char *optp = opts; 2740 2741 thoff = th->th_off << 2; 2742 cnt = thoff - sizeof (struct tcphdr); 2743 2744 if (cnt > 0 && !pf_pull_hdr(m, off + sizeof (*th), opts, cnt, 2745 NULL, NULL, af)) 2746 return PF_DROP; 2747 2748 for (; cnt > 0; cnt -= optlen, optp += optlen) { 2749 opt = optp[0]; 2750 if (opt == TCPOPT_EOL) 2751 break; 2752 if (opt == TCPOPT_NOP) 2753 optlen = 1; 2754 else { 2755 if (cnt < 2) 2756 break; 2757 optlen = optp[1]; 2758 if (optlen < 2 || optlen > cnt) 2759 break; 2760 } 2761 switch (opt) { 2762 case TCPOPT_MAXSEG: 2763 mss = (u_int16_t *)(void *)(optp + 2); 2764 if ((ntohs(*mss)) > r->max_mss) { 2765 /* 2766 * <jhw@apple.com> 2767 * Only do the TCP checksum fixup if delayed 2768 * checksum calculation will not be performed. 2769 */ 2770 if (m->m_pkthdr.rcvif || 2771 !(m->m_pkthdr.csum_flags & CSUM_TCP)) 2772 th->th_sum = pf_cksum_fixup(th->th_sum, 2773 *mss, htons(r->max_mss), 0); 2774 *mss = htons(r->max_mss); 2775 rewrite = 1; 2776 } 2777 break; 2778 default: 2779 break; 2780 } 2781 } 2782 2783 if (rewrite) { 2784 struct mbuf *mw; 2785 u_short reason; 2786 2787 mw = pf_lazy_makewritable(pd, pd->mp, 2788 off + sizeof (*th) + thoff); 2789 if (!mw) { 2790 REASON_SET(&reason, PFRES_MEMORY); 2791 if (r->log) 2792 PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, 2793 r, 0, 0, pd); 2794 return PF_DROP; 2795 } 2796 2797 *rewrptr = 1; 2798 m_copyback(mw, off + sizeof (*th), thoff - sizeof (*th), opts); 2799 } 2800 2801 return PF_PASS; 2802} 2803