altq_subr.c revision 298133
1/* $FreeBSD: stable/10/sys/contrib/altq/altq/altq_subr.c 298133 2016-04-16 22:02:32Z loos $ */ 2/* $KAME: altq_subr.c,v 1.21 2003/11/06 06:32:53 kjc Exp $ */ 3 4/* 5 * Copyright (C) 1997-2003 6 * Sony Computer Science Laboratories Inc. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30#if defined(__FreeBSD__) || defined(__NetBSD__) 31#include "opt_altq.h" 32#include "opt_inet.h" 33#ifdef __FreeBSD__ 34#include "opt_inet6.h" 35#endif 36#endif /* __FreeBSD__ || __NetBSD__ */ 37 38#include <sys/param.h> 39#include <sys/malloc.h> 40#include <sys/mbuf.h> 41#include <sys/systm.h> 42#include <sys/proc.h> 43#include <sys/socket.h> 44#include <sys/socketvar.h> 45#include <sys/kernel.h> 46#include <sys/errno.h> 47#include <sys/syslog.h> 48#include <sys/sysctl.h> 49#include <sys/queue.h> 50 51#include <net/if.h> 52#include <net/if_var.h> 53#include <net/if_dl.h> 54#include <net/if_types.h> 55#ifdef __FreeBSD__ 56#include <net/vnet.h> 57#endif 58 59#include <netinet/in.h> 60#include <netinet/in_systm.h> 61#include <netinet/ip.h> 62#ifdef INET6 63#include <netinet/ip6.h> 64#endif 65#include <netinet/tcp.h> 66#include <netinet/udp.h> 67 68#include <netpfil/pf/pf.h> 69#include <netpfil/pf/pf_altq.h> 70#include <altq/altq.h> 71#ifdef ALTQ3_COMPAT 72#include <altq/altq_conf.h> 73#endif 74 75/* machine dependent clock related includes */ 76#ifdef __FreeBSD__ 77#include <sys/bus.h> 78#include <sys/cpu.h> 79#include <sys/eventhandler.h> 80#include <machine/clock.h> 81#endif 82#if defined(__amd64__) || defined(__i386__) 83#include <machine/cpufunc.h> /* for pentium tsc */ 84#include <machine/specialreg.h> /* for CPUID_TSC */ 85#ifdef __FreeBSD__ 86#include <machine/md_var.h> /* for cpu_feature */ 87#elif defined(__NetBSD__) || defined(__OpenBSD__) 88#include <machine/cpu.h> /* for cpu_feature */ 89#endif 90#endif /* __amd64 || __i386__ */ 91 92/* 93 * internal function prototypes 94 */ 95static void tbr_timeout(void *); 96int (*altq_input)(struct mbuf *, int) = NULL; 97static struct mbuf *tbr_dequeue(struct ifaltq *, int); 98static int tbr_timer = 0; /* token bucket regulator timer */ 99#if !defined(__FreeBSD__) || (__FreeBSD_version < 600000) 100static struct callout tbr_callout = CALLOUT_INITIALIZER; 101#else 102static struct callout tbr_callout; 103#endif 104 105#ifdef ALTQ3_CLFIER_COMPAT 106static int extract_ports4(struct mbuf *, struct ip *, struct flowinfo_in *); 107#ifdef INET6 108static int extract_ports6(struct mbuf *, struct ip6_hdr *, 109 struct flowinfo_in6 *); 110#endif 111static int apply_filter4(u_int32_t, struct flow_filter *, 112 struct flowinfo_in *); 113static int apply_ppfilter4(u_int32_t, struct flow_filter *, 114 struct flowinfo_in *); 115#ifdef INET6 116static int apply_filter6(u_int32_t, struct flow_filter6 *, 117 struct flowinfo_in6 *); 118#endif 119static int apply_tosfilter4(u_int32_t, struct flow_filter *, 120 struct flowinfo_in *); 121static u_long get_filt_handle(struct acc_classifier *, int); 122static struct acc_filter *filth_to_filtp(struct acc_classifier *, u_long); 123static u_int32_t filt2fibmask(struct flow_filter *); 124 125static void ip4f_cache(struct ip *, struct flowinfo_in *); 126static int ip4f_lookup(struct ip *, struct flowinfo_in *); 127static int ip4f_init(void); 128static struct ip4_frag *ip4f_alloc(void); 129static void ip4f_free(struct ip4_frag *); 130#endif /* ALTQ3_CLFIER_COMPAT */ 131 132/* 133 * alternate queueing support routines 134 */ 135 136/* look up the queue state by the interface name and the queueing type. */ 137void * 138altq_lookup(name, type) 139 char *name; 140 int type; 141{ 142 struct ifnet *ifp; 143 144 if ((ifp = ifunit(name)) != NULL) { 145 /* read if_snd unlocked */ 146 if (type != ALTQT_NONE && ifp->if_snd.altq_type == type) 147 return (ifp->if_snd.altq_disc); 148 } 149 150 return NULL; 151} 152 153int 154altq_attach(ifq, type, discipline, enqueue, dequeue, request, clfier, classify) 155 struct ifaltq *ifq; 156 int type; 157 void *discipline; 158 int (*enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *); 159 struct mbuf *(*dequeue)(struct ifaltq *, int); 160 int (*request)(struct ifaltq *, int, void *); 161 void *clfier; 162 void *(*classify)(void *, struct mbuf *, int); 163{ 164 IFQ_LOCK(ifq); 165 if (!ALTQ_IS_READY(ifq)) { 166 IFQ_UNLOCK(ifq); 167 return ENXIO; 168 } 169 170#ifdef ALTQ3_COMPAT 171 /* 172 * pfaltq can override the existing discipline, but altq3 cannot. 173 * check these if clfier is not NULL (which implies altq3). 174 */ 175 if (clfier != NULL) { 176 if (ALTQ_IS_ENABLED(ifq)) { 177 IFQ_UNLOCK(ifq); 178 return EBUSY; 179 } 180 if (ALTQ_IS_ATTACHED(ifq)) { 181 IFQ_UNLOCK(ifq); 182 return EEXIST; 183 } 184 } 185#endif 186 ifq->altq_type = type; 187 ifq->altq_disc = discipline; 188 ifq->altq_enqueue = enqueue; 189 ifq->altq_dequeue = dequeue; 190 ifq->altq_request = request; 191 ifq->altq_clfier = clfier; 192 ifq->altq_classify = classify; 193 ifq->altq_flags &= (ALTQF_CANTCHANGE|ALTQF_ENABLED); 194#ifdef ALTQ3_COMPAT 195#ifdef ALTQ_KLD 196 altq_module_incref(type); 197#endif 198#endif 199 IFQ_UNLOCK(ifq); 200 return 0; 201} 202 203int 204altq_detach(ifq) 205 struct ifaltq *ifq; 206{ 207 IFQ_LOCK(ifq); 208 209 if (!ALTQ_IS_READY(ifq)) { 210 IFQ_UNLOCK(ifq); 211 return ENXIO; 212 } 213 if (ALTQ_IS_ENABLED(ifq)) { 214 IFQ_UNLOCK(ifq); 215 return EBUSY; 216 } 217 if (!ALTQ_IS_ATTACHED(ifq)) { 218 IFQ_UNLOCK(ifq); 219 return (0); 220 } 221#ifdef ALTQ3_COMPAT 222#ifdef ALTQ_KLD 223 altq_module_declref(ifq->altq_type); 224#endif 225#endif 226 227 ifq->altq_type = ALTQT_NONE; 228 ifq->altq_disc = NULL; 229 ifq->altq_enqueue = NULL; 230 ifq->altq_dequeue = NULL; 231 ifq->altq_request = NULL; 232 ifq->altq_clfier = NULL; 233 ifq->altq_classify = NULL; 234 ifq->altq_flags &= ALTQF_CANTCHANGE; 235 236 IFQ_UNLOCK(ifq); 237 return 0; 238} 239 240int 241altq_enable(ifq) 242 struct ifaltq *ifq; 243{ 244 int s; 245 246 IFQ_LOCK(ifq); 247 248 if (!ALTQ_IS_READY(ifq)) { 249 IFQ_UNLOCK(ifq); 250 return ENXIO; 251 } 252 if (ALTQ_IS_ENABLED(ifq)) { 253 IFQ_UNLOCK(ifq); 254 return 0; 255 } 256 257#ifdef __NetBSD__ 258 s = splnet(); 259#else 260 s = splimp(); 261#endif 262 IFQ_PURGE_NOLOCK(ifq); 263 ASSERT(ifq->ifq_len == 0); 264 ifq->ifq_drv_maxlen = 0; /* disable bulk dequeue */ 265 ifq->altq_flags |= ALTQF_ENABLED; 266 if (ifq->altq_clfier != NULL) 267 ifq->altq_flags |= ALTQF_CLASSIFY; 268 splx(s); 269 270 IFQ_UNLOCK(ifq); 271 return 0; 272} 273 274int 275altq_disable(ifq) 276 struct ifaltq *ifq; 277{ 278 int s; 279 280 IFQ_LOCK(ifq); 281 if (!ALTQ_IS_ENABLED(ifq)) { 282 IFQ_UNLOCK(ifq); 283 return 0; 284 } 285 286#ifdef __NetBSD__ 287 s = splnet(); 288#else 289 s = splimp(); 290#endif 291 IFQ_PURGE_NOLOCK(ifq); 292 ASSERT(ifq->ifq_len == 0); 293 ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY); 294 splx(s); 295 296 IFQ_UNLOCK(ifq); 297 return 0; 298} 299 300#ifdef ALTQ_DEBUG 301void 302altq_assert(file, line, failedexpr) 303 const char *file, *failedexpr; 304 int line; 305{ 306 (void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n", 307 failedexpr, file, line); 308 panic("altq assertion"); 309 /* NOTREACHED */ 310} 311#endif 312 313/* 314 * internal representation of token bucket parameters 315 * rate: byte_per_unittime << 32 316 * (((bits_per_sec) / 8) << 32) / machclk_freq 317 * depth: byte << 32 318 * 319 */ 320#define TBR_SHIFT 32 321#define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT) 322#define TBR_UNSCALE(x) ((x) >> TBR_SHIFT) 323 324static struct mbuf * 325tbr_dequeue(ifq, op) 326 struct ifaltq *ifq; 327 int op; 328{ 329 struct tb_regulator *tbr; 330 struct mbuf *m; 331 int64_t interval; 332 u_int64_t now; 333 334 IFQ_LOCK_ASSERT(ifq); 335 tbr = ifq->altq_tbr; 336 if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) { 337 /* if this is a remove after poll, bypass tbr check */ 338 } else { 339 /* update token only when it is negative */ 340 if (tbr->tbr_token <= 0) { 341 now = read_machclk(); 342 interval = now - tbr->tbr_last; 343 if (interval >= tbr->tbr_filluptime) 344 tbr->tbr_token = tbr->tbr_depth; 345 else { 346 tbr->tbr_token += interval * tbr->tbr_rate; 347 if (tbr->tbr_token > tbr->tbr_depth) 348 tbr->tbr_token = tbr->tbr_depth; 349 } 350 tbr->tbr_last = now; 351 } 352 /* if token is still negative, don't allow dequeue */ 353 if (tbr->tbr_token <= 0) 354 return (NULL); 355 } 356 357 if (ALTQ_IS_ENABLED(ifq)) 358 m = (*ifq->altq_dequeue)(ifq, op); 359 else { 360 if (op == ALTDQ_POLL) 361 _IF_POLL(ifq, m); 362 else 363 _IF_DEQUEUE(ifq, m); 364 } 365 366 if (m != NULL && op == ALTDQ_REMOVE) 367 tbr->tbr_token -= TBR_SCALE(m_pktlen(m)); 368 tbr->tbr_lastop = op; 369 return (m); 370} 371 372/* 373 * set a token bucket regulator. 374 * if the specified rate is zero, the token bucket regulator is deleted. 375 */ 376int 377tbr_set(ifq, profile) 378 struct ifaltq *ifq; 379 struct tb_profile *profile; 380{ 381 struct tb_regulator *tbr, *otbr; 382 383 if (tbr_dequeue_ptr == NULL) 384 tbr_dequeue_ptr = tbr_dequeue; 385 386 if (machclk_freq == 0) 387 init_machclk(); 388 if (machclk_freq == 0) { 389 printf("tbr_set: no cpu clock available!\n"); 390 return (ENXIO); 391 } 392 393 IFQ_LOCK(ifq); 394 if (profile->rate == 0) { 395 /* delete this tbr */ 396 if ((tbr = ifq->altq_tbr) == NULL) { 397 IFQ_UNLOCK(ifq); 398 return (ENOENT); 399 } 400 ifq->altq_tbr = NULL; 401 free(tbr, M_DEVBUF); 402 IFQ_UNLOCK(ifq); 403 return (0); 404 } 405 406 tbr = malloc(sizeof(struct tb_regulator), M_DEVBUF, M_NOWAIT | M_ZERO); 407 if (tbr == NULL) { 408 IFQ_UNLOCK(ifq); 409 return (ENOMEM); 410 } 411 412 tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq; 413 tbr->tbr_depth = TBR_SCALE(profile->depth); 414 if (tbr->tbr_rate > 0) 415 tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate; 416 else 417 tbr->tbr_filluptime = 0xffffffffffffffffLL; 418 tbr->tbr_token = tbr->tbr_depth; 419 tbr->tbr_last = read_machclk(); 420 tbr->tbr_lastop = ALTDQ_REMOVE; 421 422 otbr = ifq->altq_tbr; 423 ifq->altq_tbr = tbr; /* set the new tbr */ 424 425 if (otbr != NULL) 426 free(otbr, M_DEVBUF); 427 else { 428 if (tbr_timer == 0) { 429 CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0); 430 tbr_timer = 1; 431 } 432 } 433 IFQ_UNLOCK(ifq); 434 return (0); 435} 436 437/* 438 * tbr_timeout goes through the interface list, and kicks the drivers 439 * if necessary. 440 * 441 * MPSAFE 442 */ 443static void 444tbr_timeout(arg) 445 void *arg; 446{ 447#ifdef __FreeBSD__ 448 VNET_ITERATOR_DECL(vnet_iter); 449#endif 450 struct ifnet *ifp; 451 int active, s; 452 453 active = 0; 454#ifdef __NetBSD__ 455 s = splnet(); 456#else 457 s = splimp(); 458#endif 459#ifdef __FreeBSD__ 460 IFNET_RLOCK_NOSLEEP(); 461 VNET_LIST_RLOCK_NOSLEEP(); 462 VNET_FOREACH(vnet_iter) { 463 CURVNET_SET(vnet_iter); 464#endif 465 for (ifp = TAILQ_FIRST(&V_ifnet); ifp; 466 ifp = TAILQ_NEXT(ifp, if_list)) { 467 /* read from if_snd unlocked */ 468 if (!TBR_IS_ENABLED(&ifp->if_snd)) 469 continue; 470 active++; 471 if (!IFQ_IS_EMPTY(&ifp->if_snd) && 472 ifp->if_start != NULL) 473 (*ifp->if_start)(ifp); 474 } 475#ifdef __FreeBSD__ 476 CURVNET_RESTORE(); 477 } 478 VNET_LIST_RUNLOCK_NOSLEEP(); 479 IFNET_RUNLOCK_NOSLEEP(); 480#endif 481 splx(s); 482 if (active > 0) 483 CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0); 484 else 485 tbr_timer = 0; /* don't need tbr_timer anymore */ 486} 487 488/* 489 * get token bucket regulator profile 490 */ 491int 492tbr_get(ifq, profile) 493 struct ifaltq *ifq; 494 struct tb_profile *profile; 495{ 496 struct tb_regulator *tbr; 497 498 IFQ_LOCK(ifq); 499 if ((tbr = ifq->altq_tbr) == NULL) { 500 profile->rate = 0; 501 profile->depth = 0; 502 } else { 503 profile->rate = 504 (u_int)TBR_UNSCALE(tbr->tbr_rate * 8 * machclk_freq); 505 profile->depth = (u_int)TBR_UNSCALE(tbr->tbr_depth); 506 } 507 IFQ_UNLOCK(ifq); 508 return (0); 509} 510 511/* 512 * attach a discipline to the interface. if one already exists, it is 513 * overridden. 514 * Locking is done in the discipline specific attach functions. Basically 515 * they call back to altq_attach which takes care of the attach and locking. 516 */ 517int 518altq_pfattach(struct pf_altq *a) 519{ 520 int error = 0; 521 522 switch (a->scheduler) { 523 case ALTQT_NONE: 524 break; 525#ifdef ALTQ_CBQ 526 case ALTQT_CBQ: 527 error = cbq_pfattach(a); 528 break; 529#endif 530#ifdef ALTQ_PRIQ 531 case ALTQT_PRIQ: 532 error = priq_pfattach(a); 533 break; 534#endif 535#ifdef ALTQ_HFSC 536 case ALTQT_HFSC: 537 error = hfsc_pfattach(a); 538 break; 539#endif 540#ifdef ALTQ_FAIRQ 541 case ALTQT_FAIRQ: 542 error = fairq_pfattach(a); 543 break; 544#endif 545#ifdef ALTQ_CODEL 546 case ALTQT_CODEL: 547 error = codel_pfattach(a); 548 break; 549#endif 550 default: 551 error = ENXIO; 552 } 553 554 return (error); 555} 556 557/* 558 * detach a discipline from the interface. 559 * it is possible that the discipline was already overridden by another 560 * discipline. 561 */ 562int 563altq_pfdetach(struct pf_altq *a) 564{ 565 struct ifnet *ifp; 566 int s, error = 0; 567 568 if ((ifp = ifunit(a->ifname)) == NULL) 569 return (EINVAL); 570 571 /* if this discipline is no longer referenced, just return */ 572 /* read unlocked from if_snd */ 573 if (a->altq_disc == NULL || a->altq_disc != ifp->if_snd.altq_disc) 574 return (0); 575 576#ifdef __NetBSD__ 577 s = splnet(); 578#else 579 s = splimp(); 580#endif 581 /* read unlocked from if_snd, _disable and _detach take care */ 582 if (ALTQ_IS_ENABLED(&ifp->if_snd)) 583 error = altq_disable(&ifp->if_snd); 584 if (error == 0) 585 error = altq_detach(&ifp->if_snd); 586 splx(s); 587 588 return (error); 589} 590 591/* 592 * add a discipline or a queue 593 * Locking is done in the discipline specific functions with regards to 594 * malloc with WAITOK, also it is not yet clear which lock to use. 595 */ 596int 597altq_add(struct pf_altq *a) 598{ 599 int error = 0; 600 601 if (a->qname[0] != 0) 602 return (altq_add_queue(a)); 603 604 if (machclk_freq == 0) 605 init_machclk(); 606 if (machclk_freq == 0) 607 panic("altq_add: no cpu clock"); 608 609 switch (a->scheduler) { 610#ifdef ALTQ_CBQ 611 case ALTQT_CBQ: 612 error = cbq_add_altq(a); 613 break; 614#endif 615#ifdef ALTQ_PRIQ 616 case ALTQT_PRIQ: 617 error = priq_add_altq(a); 618 break; 619#endif 620#ifdef ALTQ_HFSC 621 case ALTQT_HFSC: 622 error = hfsc_add_altq(a); 623 break; 624#endif 625#ifdef ALTQ_FAIRQ 626 case ALTQT_FAIRQ: 627 error = fairq_add_altq(a); 628 break; 629#endif 630#ifdef ALTQ_CODEL 631 case ALTQT_CODEL: 632 error = codel_add_altq(a); 633 break; 634#endif 635 default: 636 error = ENXIO; 637 } 638 639 return (error); 640} 641 642/* 643 * remove a discipline or a queue 644 * It is yet unclear what lock to use to protect this operation, the 645 * discipline specific functions will determine and grab it 646 */ 647int 648altq_remove(struct pf_altq *a) 649{ 650 int error = 0; 651 652 if (a->qname[0] != 0) 653 return (altq_remove_queue(a)); 654 655 switch (a->scheduler) { 656#ifdef ALTQ_CBQ 657 case ALTQT_CBQ: 658 error = cbq_remove_altq(a); 659 break; 660#endif 661#ifdef ALTQ_PRIQ 662 case ALTQT_PRIQ: 663 error = priq_remove_altq(a); 664 break; 665#endif 666#ifdef ALTQ_HFSC 667 case ALTQT_HFSC: 668 error = hfsc_remove_altq(a); 669 break; 670#endif 671#ifdef ALTQ_FAIRQ 672 case ALTQT_FAIRQ: 673 error = fairq_remove_altq(a); 674 break; 675#endif 676#ifdef ALTQ_CODEL 677 case ALTQT_CODEL: 678 error = codel_remove_altq(a); 679 break; 680#endif 681 default: 682 error = ENXIO; 683 } 684 685 return (error); 686} 687 688/* 689 * add a queue to the discipline 690 * It is yet unclear what lock to use to protect this operation, the 691 * discipline specific functions will determine and grab it 692 */ 693int 694altq_add_queue(struct pf_altq *a) 695{ 696 int error = 0; 697 698 switch (a->scheduler) { 699#ifdef ALTQ_CBQ 700 case ALTQT_CBQ: 701 error = cbq_add_queue(a); 702 break; 703#endif 704#ifdef ALTQ_PRIQ 705 case ALTQT_PRIQ: 706 error = priq_add_queue(a); 707 break; 708#endif 709#ifdef ALTQ_HFSC 710 case ALTQT_HFSC: 711 error = hfsc_add_queue(a); 712 break; 713#endif 714#ifdef ALTQ_FAIRQ 715 case ALTQT_FAIRQ: 716 error = fairq_add_queue(a); 717 break; 718#endif 719 default: 720 error = ENXIO; 721 } 722 723 return (error); 724} 725 726/* 727 * remove a queue from the discipline 728 * It is yet unclear what lock to use to protect this operation, the 729 * discipline specific functions will determine and grab it 730 */ 731int 732altq_remove_queue(struct pf_altq *a) 733{ 734 int error = 0; 735 736 switch (a->scheduler) { 737#ifdef ALTQ_CBQ 738 case ALTQT_CBQ: 739 error = cbq_remove_queue(a); 740 break; 741#endif 742#ifdef ALTQ_PRIQ 743 case ALTQT_PRIQ: 744 error = priq_remove_queue(a); 745 break; 746#endif 747#ifdef ALTQ_HFSC 748 case ALTQT_HFSC: 749 error = hfsc_remove_queue(a); 750 break; 751#endif 752#ifdef ALTQ_FAIRQ 753 case ALTQT_FAIRQ: 754 error = fairq_remove_queue(a); 755 break; 756#endif 757 default: 758 error = ENXIO; 759 } 760 761 return (error); 762} 763 764/* 765 * get queue statistics 766 * Locking is done in the discipline specific functions with regards to 767 * copyout operations, also it is not yet clear which lock to use. 768 */ 769int 770altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes) 771{ 772 int error = 0; 773 774 switch (a->scheduler) { 775#ifdef ALTQ_CBQ 776 case ALTQT_CBQ: 777 error = cbq_getqstats(a, ubuf, nbytes); 778 break; 779#endif 780#ifdef ALTQ_PRIQ 781 case ALTQT_PRIQ: 782 error = priq_getqstats(a, ubuf, nbytes); 783 break; 784#endif 785#ifdef ALTQ_HFSC 786 case ALTQT_HFSC: 787 error = hfsc_getqstats(a, ubuf, nbytes); 788 break; 789#endif 790#ifdef ALTQ_FAIRQ 791 case ALTQT_FAIRQ: 792 error = fairq_getqstats(a, ubuf, nbytes); 793 break; 794#endif 795#ifdef ALTQ_CODEL 796 case ALTQT_CODEL: 797 error = codel_getqstats(a, ubuf, nbytes); 798 break; 799#endif 800 default: 801 error = ENXIO; 802 } 803 804 return (error); 805} 806 807/* 808 * read and write diffserv field in IPv4 or IPv6 header 809 */ 810u_int8_t 811read_dsfield(m, pktattr) 812 struct mbuf *m; 813 struct altq_pktattr *pktattr; 814{ 815 struct mbuf *m0; 816 u_int8_t ds_field = 0; 817 818 if (pktattr == NULL || 819 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6)) 820 return ((u_int8_t)0); 821 822 /* verify that pattr_hdr is within the mbuf data */ 823 for (m0 = m; m0 != NULL; m0 = m0->m_next) 824 if ((pktattr->pattr_hdr >= m0->m_data) && 825 (pktattr->pattr_hdr < m0->m_data + m0->m_len)) 826 break; 827 if (m0 == NULL) { 828 /* ick, pattr_hdr is stale */ 829 pktattr->pattr_af = AF_UNSPEC; 830#ifdef ALTQ_DEBUG 831 printf("read_dsfield: can't locate header!\n"); 832#endif 833 return ((u_int8_t)0); 834 } 835 836 if (pktattr->pattr_af == AF_INET) { 837 struct ip *ip = (struct ip *)pktattr->pattr_hdr; 838 839 if (ip->ip_v != 4) 840 return ((u_int8_t)0); /* version mismatch! */ 841 ds_field = ip->ip_tos; 842 } 843#ifdef INET6 844 else if (pktattr->pattr_af == AF_INET6) { 845 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 846 u_int32_t flowlabel; 847 848 flowlabel = ntohl(ip6->ip6_flow); 849 if ((flowlabel >> 28) != 6) 850 return ((u_int8_t)0); /* version mismatch! */ 851 ds_field = (flowlabel >> 20) & 0xff; 852 } 853#endif 854 return (ds_field); 855} 856 857void 858write_dsfield(struct mbuf *m, struct altq_pktattr *pktattr, u_int8_t dsfield) 859{ 860 struct mbuf *m0; 861 862 if (pktattr == NULL || 863 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6)) 864 return; 865 866 /* verify that pattr_hdr is within the mbuf data */ 867 for (m0 = m; m0 != NULL; m0 = m0->m_next) 868 if ((pktattr->pattr_hdr >= m0->m_data) && 869 (pktattr->pattr_hdr < m0->m_data + m0->m_len)) 870 break; 871 if (m0 == NULL) { 872 /* ick, pattr_hdr is stale */ 873 pktattr->pattr_af = AF_UNSPEC; 874#ifdef ALTQ_DEBUG 875 printf("write_dsfield: can't locate header!\n"); 876#endif 877 return; 878 } 879 880 if (pktattr->pattr_af == AF_INET) { 881 struct ip *ip = (struct ip *)pktattr->pattr_hdr; 882 u_int8_t old; 883 int32_t sum; 884 885 if (ip->ip_v != 4) 886 return; /* version mismatch! */ 887 old = ip->ip_tos; 888 dsfield |= old & 3; /* leave CU bits */ 889 if (old == dsfield) 890 return; 891 ip->ip_tos = dsfield; 892 /* 893 * update checksum (from RFC1624) 894 * HC' = ~(~HC + ~m + m') 895 */ 896 sum = ~ntohs(ip->ip_sum) & 0xffff; 897 sum += 0xff00 + (~old & 0xff) + dsfield; 898 sum = (sum >> 16) + (sum & 0xffff); 899 sum += (sum >> 16); /* add carry */ 900 901 ip->ip_sum = htons(~sum & 0xffff); 902 } 903#ifdef INET6 904 else if (pktattr->pattr_af == AF_INET6) { 905 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 906 u_int32_t flowlabel; 907 908 flowlabel = ntohl(ip6->ip6_flow); 909 if ((flowlabel >> 28) != 6) 910 return; /* version mismatch! */ 911 flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20); 912 ip6->ip6_flow = htonl(flowlabel); 913 } 914#endif 915 return; 916} 917 918 919/* 920 * high resolution clock support taking advantage of a machine dependent 921 * high resolution time counter (e.g., timestamp counter of intel pentium). 922 * we assume 923 * - 64-bit-long monotonically-increasing counter 924 * - frequency range is 100M-4GHz (CPU speed) 925 */ 926/* if pcc is not available or disabled, emulate 256MHz using microtime() */ 927#define MACHCLK_SHIFT 8 928 929int machclk_usepcc; 930u_int32_t machclk_freq; 931u_int32_t machclk_per_tick; 932 933#if defined(__i386__) && defined(__NetBSD__) 934extern u_int64_t cpu_tsc_freq; 935#endif 936 937#if (__FreeBSD_version >= 700035) 938/* Update TSC freq with the value indicated by the caller. */ 939static void 940tsc_freq_changed(void *arg, const struct cf_level *level, int status) 941{ 942 /* If there was an error during the transition, don't do anything. */ 943 if (status != 0) 944 return; 945 946#if (__FreeBSD_version >= 701102) && (defined(__amd64__) || defined(__i386__)) 947 /* If TSC is P-state invariant, don't do anything. */ 948 if (tsc_is_invariant) 949 return; 950#endif 951 952 /* Total setting for this level gives the new frequency in MHz. */ 953 init_machclk(); 954} 955EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL, 956 EVENTHANDLER_PRI_LAST); 957#endif /* __FreeBSD_version >= 700035 */ 958 959static void 960init_machclk_setup(void) 961{ 962#if (__FreeBSD_version >= 600000) 963 callout_init(&tbr_callout, 0); 964#endif 965 966 machclk_usepcc = 1; 967 968#if (!defined(__amd64__) && !defined(__i386__)) || defined(ALTQ_NOPCC) 969 machclk_usepcc = 0; 970#endif 971#if defined(__FreeBSD__) && defined(SMP) 972 machclk_usepcc = 0; 973#endif 974#if defined(__NetBSD__) && defined(MULTIPROCESSOR) 975 machclk_usepcc = 0; 976#endif 977#if defined(__amd64__) || defined(__i386__) 978 /* check if TSC is available */ 979#ifdef __FreeBSD__ 980 if ((cpu_feature & CPUID_TSC) == 0 || 981 atomic_load_acq_64(&tsc_freq) == 0) 982#else 983 if ((cpu_feature & CPUID_TSC) == 0) 984#endif 985 machclk_usepcc = 0; 986#endif 987} 988 989void 990init_machclk(void) 991{ 992 static int called; 993 994 /* Call one-time initialization function. */ 995 if (!called) { 996 init_machclk_setup(); 997 called = 1; 998 } 999 1000 if (machclk_usepcc == 0) { 1001 /* emulate 256MHz using microtime() */ 1002 machclk_freq = 1000000 << MACHCLK_SHIFT; 1003 machclk_per_tick = machclk_freq / hz; 1004#ifdef ALTQ_DEBUG 1005 printf("altq: emulate %uHz cpu clock\n", machclk_freq); 1006#endif 1007 return; 1008 } 1009 1010 /* 1011 * if the clock frequency (of Pentium TSC or Alpha PCC) is 1012 * accessible, just use it. 1013 */ 1014#if defined(__amd64__) || defined(__i386__) 1015#ifdef __FreeBSD__ 1016 machclk_freq = atomic_load_acq_64(&tsc_freq); 1017#elif defined(__NetBSD__) 1018 machclk_freq = (u_int32_t)cpu_tsc_freq; 1019#elif defined(__OpenBSD__) && (defined(I586_CPU) || defined(I686_CPU)) 1020 machclk_freq = pentium_mhz * 1000000; 1021#endif 1022#endif 1023 1024 /* 1025 * if we don't know the clock frequency, measure it. 1026 */ 1027 if (machclk_freq == 0) { 1028 static int wait; 1029 struct timeval tv_start, tv_end; 1030 u_int64_t start, end, diff; 1031 int timo; 1032 1033 microtime(&tv_start); 1034 start = read_machclk(); 1035 timo = hz; /* 1 sec */ 1036 (void)tsleep(&wait, PWAIT | PCATCH, "init_machclk", timo); 1037 microtime(&tv_end); 1038 end = read_machclk(); 1039 diff = (u_int64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000 1040 + tv_end.tv_usec - tv_start.tv_usec; 1041 if (diff != 0) 1042 machclk_freq = (u_int)((end - start) * 1000000 / diff); 1043 } 1044 1045 machclk_per_tick = machclk_freq / hz; 1046 1047#ifdef ALTQ_DEBUG 1048 printf("altq: CPU clock: %uHz\n", machclk_freq); 1049#endif 1050} 1051 1052#if defined(__OpenBSD__) && defined(__i386__) 1053static __inline u_int64_t 1054rdtsc(void) 1055{ 1056 u_int64_t rv; 1057 __asm __volatile(".byte 0x0f, 0x31" : "=A" (rv)); 1058 return (rv); 1059} 1060#endif /* __OpenBSD__ && __i386__ */ 1061 1062u_int64_t 1063read_machclk(void) 1064{ 1065 u_int64_t val; 1066 1067 if (machclk_usepcc) { 1068#if defined(__amd64__) || defined(__i386__) 1069 val = rdtsc(); 1070#else 1071 panic("read_machclk"); 1072#endif 1073 } else { 1074 struct timeval tv; 1075 1076 microtime(&tv); 1077 val = (((u_int64_t)(tv.tv_sec - boottime.tv_sec) * 1000000 1078 + tv.tv_usec) << MACHCLK_SHIFT); 1079 } 1080 return (val); 1081} 1082 1083#ifdef ALTQ3_CLFIER_COMPAT 1084 1085#ifndef IPPROTO_ESP 1086#define IPPROTO_ESP 50 /* encapsulating security payload */ 1087#endif 1088#ifndef IPPROTO_AH 1089#define IPPROTO_AH 51 /* authentication header */ 1090#endif 1091 1092/* 1093 * extract flow information from a given packet. 1094 * filt_mask shows flowinfo fields required. 1095 * we assume the ip header is in one mbuf, and addresses and ports are 1096 * in network byte order. 1097 */ 1098int 1099altq_extractflow(m, af, flow, filt_bmask) 1100 struct mbuf *m; 1101 int af; 1102 struct flowinfo *flow; 1103 u_int32_t filt_bmask; 1104{ 1105 1106 switch (af) { 1107 case PF_INET: { 1108 struct flowinfo_in *fin; 1109 struct ip *ip; 1110 1111 ip = mtod(m, struct ip *); 1112 1113 if (ip->ip_v != 4) 1114 break; 1115 1116 fin = (struct flowinfo_in *)flow; 1117 fin->fi_len = sizeof(struct flowinfo_in); 1118 fin->fi_family = AF_INET; 1119 1120 fin->fi_proto = ip->ip_p; 1121 fin->fi_tos = ip->ip_tos; 1122 1123 fin->fi_src.s_addr = ip->ip_src.s_addr; 1124 fin->fi_dst.s_addr = ip->ip_dst.s_addr; 1125 1126 if (filt_bmask & FIMB4_PORTS) 1127 /* if port info is required, extract port numbers */ 1128 extract_ports4(m, ip, fin); 1129 else { 1130 fin->fi_sport = 0; 1131 fin->fi_dport = 0; 1132 fin->fi_gpi = 0; 1133 } 1134 return (1); 1135 } 1136 1137#ifdef INET6 1138 case PF_INET6: { 1139 struct flowinfo_in6 *fin6; 1140 struct ip6_hdr *ip6; 1141 1142 ip6 = mtod(m, struct ip6_hdr *); 1143 /* should we check the ip version? */ 1144 1145 fin6 = (struct flowinfo_in6 *)flow; 1146 fin6->fi6_len = sizeof(struct flowinfo_in6); 1147 fin6->fi6_family = AF_INET6; 1148 1149 fin6->fi6_proto = ip6->ip6_nxt; 1150 fin6->fi6_tclass = (ntohl(ip6->ip6_flow) >> 20) & 0xff; 1151 1152 fin6->fi6_flowlabel = ip6->ip6_flow & htonl(0x000fffff); 1153 fin6->fi6_src = ip6->ip6_src; 1154 fin6->fi6_dst = ip6->ip6_dst; 1155 1156 if ((filt_bmask & FIMB6_PORTS) || 1157 ((filt_bmask & FIMB6_PROTO) 1158 && ip6->ip6_nxt > IPPROTO_IPV6)) 1159 /* 1160 * if port info is required, or proto is required 1161 * but there are option headers, extract port 1162 * and protocol numbers. 1163 */ 1164 extract_ports6(m, ip6, fin6); 1165 else { 1166 fin6->fi6_sport = 0; 1167 fin6->fi6_dport = 0; 1168 fin6->fi6_gpi = 0; 1169 } 1170 return (1); 1171 } 1172#endif /* INET6 */ 1173 1174 default: 1175 break; 1176 } 1177 1178 /* failed */ 1179 flow->fi_len = sizeof(struct flowinfo); 1180 flow->fi_family = AF_UNSPEC; 1181 return (0); 1182} 1183 1184/* 1185 * helper routine to extract port numbers 1186 */ 1187/* structure for ipsec and ipv6 option header template */ 1188struct _opt6 { 1189 u_int8_t opt6_nxt; /* next header */ 1190 u_int8_t opt6_hlen; /* header extension length */ 1191 u_int16_t _pad; 1192 u_int32_t ah_spi; /* security parameter index 1193 for authentication header */ 1194}; 1195 1196/* 1197 * extract port numbers from a ipv4 packet. 1198 */ 1199static int 1200extract_ports4(m, ip, fin) 1201 struct mbuf *m; 1202 struct ip *ip; 1203 struct flowinfo_in *fin; 1204{ 1205 struct mbuf *m0; 1206 u_short ip_off; 1207 u_int8_t proto; 1208 int off; 1209 1210 fin->fi_sport = 0; 1211 fin->fi_dport = 0; 1212 fin->fi_gpi = 0; 1213 1214 ip_off = ntohs(ip->ip_off); 1215 /* if it is a fragment, try cached fragment info */ 1216 if (ip_off & IP_OFFMASK) { 1217 ip4f_lookup(ip, fin); 1218 return (1); 1219 } 1220 1221 /* locate the mbuf containing the protocol header */ 1222 for (m0 = m; m0 != NULL; m0 = m0->m_next) 1223 if (((caddr_t)ip >= m0->m_data) && 1224 ((caddr_t)ip < m0->m_data + m0->m_len)) 1225 break; 1226 if (m0 == NULL) { 1227#ifdef ALTQ_DEBUG 1228 printf("extract_ports4: can't locate header! ip=%p\n", ip); 1229#endif 1230 return (0); 1231 } 1232 off = ((caddr_t)ip - m0->m_data) + (ip->ip_hl << 2); 1233 proto = ip->ip_p; 1234 1235#ifdef ALTQ_IPSEC 1236 again: 1237#endif 1238 while (off >= m0->m_len) { 1239 off -= m0->m_len; 1240 m0 = m0->m_next; 1241 if (m0 == NULL) 1242 return (0); /* bogus ip_hl! */ 1243 } 1244 if (m0->m_len < off + 4) 1245 return (0); 1246 1247 switch (proto) { 1248 case IPPROTO_TCP: 1249 case IPPROTO_UDP: { 1250 struct udphdr *udp; 1251 1252 udp = (struct udphdr *)(mtod(m0, caddr_t) + off); 1253 fin->fi_sport = udp->uh_sport; 1254 fin->fi_dport = udp->uh_dport; 1255 fin->fi_proto = proto; 1256 } 1257 break; 1258 1259#ifdef ALTQ_IPSEC 1260 case IPPROTO_ESP: 1261 if (fin->fi_gpi == 0){ 1262 u_int32_t *gpi; 1263 1264 gpi = (u_int32_t *)(mtod(m0, caddr_t) + off); 1265 fin->fi_gpi = *gpi; 1266 } 1267 fin->fi_proto = proto; 1268 break; 1269 1270 case IPPROTO_AH: { 1271 /* get next header and header length */ 1272 struct _opt6 *opt6; 1273 1274 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); 1275 proto = opt6->opt6_nxt; 1276 off += 8 + (opt6->opt6_hlen * 4); 1277 if (fin->fi_gpi == 0 && m0->m_len >= off + 8) 1278 fin->fi_gpi = opt6->ah_spi; 1279 } 1280 /* goto the next header */ 1281 goto again; 1282#endif /* ALTQ_IPSEC */ 1283 1284 default: 1285 fin->fi_proto = proto; 1286 return (0); 1287 } 1288 1289 /* if this is a first fragment, cache it. */ 1290 if (ip_off & IP_MF) 1291 ip4f_cache(ip, fin); 1292 1293 return (1); 1294} 1295 1296#ifdef INET6 1297static int 1298extract_ports6(m, ip6, fin6) 1299 struct mbuf *m; 1300 struct ip6_hdr *ip6; 1301 struct flowinfo_in6 *fin6; 1302{ 1303 struct mbuf *m0; 1304 int off; 1305 u_int8_t proto; 1306 1307 fin6->fi6_gpi = 0; 1308 fin6->fi6_sport = 0; 1309 fin6->fi6_dport = 0; 1310 1311 /* locate the mbuf containing the protocol header */ 1312 for (m0 = m; m0 != NULL; m0 = m0->m_next) 1313 if (((caddr_t)ip6 >= m0->m_data) && 1314 ((caddr_t)ip6 < m0->m_data + m0->m_len)) 1315 break; 1316 if (m0 == NULL) { 1317#ifdef ALTQ_DEBUG 1318 printf("extract_ports6: can't locate header! ip6=%p\n", ip6); 1319#endif 1320 return (0); 1321 } 1322 off = ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr); 1323 1324 proto = ip6->ip6_nxt; 1325 do { 1326 while (off >= m0->m_len) { 1327 off -= m0->m_len; 1328 m0 = m0->m_next; 1329 if (m0 == NULL) 1330 return (0); 1331 } 1332 if (m0->m_len < off + 4) 1333 return (0); 1334 1335 switch (proto) { 1336 case IPPROTO_TCP: 1337 case IPPROTO_UDP: { 1338 struct udphdr *udp; 1339 1340 udp = (struct udphdr *)(mtod(m0, caddr_t) + off); 1341 fin6->fi6_sport = udp->uh_sport; 1342 fin6->fi6_dport = udp->uh_dport; 1343 fin6->fi6_proto = proto; 1344 } 1345 return (1); 1346 1347 case IPPROTO_ESP: 1348 if (fin6->fi6_gpi == 0) { 1349 u_int32_t *gpi; 1350 1351 gpi = (u_int32_t *)(mtod(m0, caddr_t) + off); 1352 fin6->fi6_gpi = *gpi; 1353 } 1354 fin6->fi6_proto = proto; 1355 return (1); 1356 1357 case IPPROTO_AH: { 1358 /* get next header and header length */ 1359 struct _opt6 *opt6; 1360 1361 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); 1362 if (fin6->fi6_gpi == 0 && m0->m_len >= off + 8) 1363 fin6->fi6_gpi = opt6->ah_spi; 1364 proto = opt6->opt6_nxt; 1365 off += 8 + (opt6->opt6_hlen * 4); 1366 /* goto the next header */ 1367 break; 1368 } 1369 1370 case IPPROTO_HOPOPTS: 1371 case IPPROTO_ROUTING: 1372 case IPPROTO_DSTOPTS: { 1373 /* get next header and header length */ 1374 struct _opt6 *opt6; 1375 1376 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); 1377 proto = opt6->opt6_nxt; 1378 off += (opt6->opt6_hlen + 1) * 8; 1379 /* goto the next header */ 1380 break; 1381 } 1382 1383 case IPPROTO_FRAGMENT: 1384 /* ipv6 fragmentations are not supported yet */ 1385 default: 1386 fin6->fi6_proto = proto; 1387 return (0); 1388 } 1389 } while (1); 1390 /*NOTREACHED*/ 1391} 1392#endif /* INET6 */ 1393 1394/* 1395 * altq common classifier 1396 */ 1397int 1398acc_add_filter(classifier, filter, class, phandle) 1399 struct acc_classifier *classifier; 1400 struct flow_filter *filter; 1401 void *class; 1402 u_long *phandle; 1403{ 1404 struct acc_filter *afp, *prev, *tmp; 1405 int i, s; 1406 1407#ifdef INET6 1408 if (filter->ff_flow.fi_family != AF_INET && 1409 filter->ff_flow.fi_family != AF_INET6) 1410 return (EINVAL); 1411#else 1412 if (filter->ff_flow.fi_family != AF_INET) 1413 return (EINVAL); 1414#endif 1415 1416 afp = malloc(sizeof(struct acc_filter), 1417 M_DEVBUF, M_WAITOK); 1418 if (afp == NULL) 1419 return (ENOMEM); 1420 bzero(afp, sizeof(struct acc_filter)); 1421 1422 afp->f_filter = *filter; 1423 afp->f_class = class; 1424 1425 i = ACC_WILDCARD_INDEX; 1426 if (filter->ff_flow.fi_family == AF_INET) { 1427 struct flow_filter *filter4 = &afp->f_filter; 1428 1429 /* 1430 * if address is 0, it's a wildcard. if address mask 1431 * isn't set, use full mask. 1432 */ 1433 if (filter4->ff_flow.fi_dst.s_addr == 0) 1434 filter4->ff_mask.mask_dst.s_addr = 0; 1435 else if (filter4->ff_mask.mask_dst.s_addr == 0) 1436 filter4->ff_mask.mask_dst.s_addr = 0xffffffff; 1437 if (filter4->ff_flow.fi_src.s_addr == 0) 1438 filter4->ff_mask.mask_src.s_addr = 0; 1439 else if (filter4->ff_mask.mask_src.s_addr == 0) 1440 filter4->ff_mask.mask_src.s_addr = 0xffffffff; 1441 1442 /* clear extra bits in addresses */ 1443 filter4->ff_flow.fi_dst.s_addr &= 1444 filter4->ff_mask.mask_dst.s_addr; 1445 filter4->ff_flow.fi_src.s_addr &= 1446 filter4->ff_mask.mask_src.s_addr; 1447 1448 /* 1449 * if dst address is a wildcard, use hash-entry 1450 * ACC_WILDCARD_INDEX. 1451 */ 1452 if (filter4->ff_mask.mask_dst.s_addr != 0xffffffff) 1453 i = ACC_WILDCARD_INDEX; 1454 else 1455 i = ACC_GET_HASH_INDEX(filter4->ff_flow.fi_dst.s_addr); 1456 } 1457#ifdef INET6 1458 else if (filter->ff_flow.fi_family == AF_INET6) { 1459 struct flow_filter6 *filter6 = 1460 (struct flow_filter6 *)&afp->f_filter; 1461#ifndef IN6MASK0 /* taken from kame ipv6 */ 1462#define IN6MASK0 {{{ 0, 0, 0, 0 }}} 1463#define IN6MASK128 {{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}} 1464 const struct in6_addr in6mask0 = IN6MASK0; 1465 const struct in6_addr in6mask128 = IN6MASK128; 1466#endif 1467 1468 if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_dst)) 1469 filter6->ff_mask6.mask6_dst = in6mask0; 1470 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_dst)) 1471 filter6->ff_mask6.mask6_dst = in6mask128; 1472 if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_src)) 1473 filter6->ff_mask6.mask6_src = in6mask0; 1474 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_src)) 1475 filter6->ff_mask6.mask6_src = in6mask128; 1476 1477 /* clear extra bits in addresses */ 1478 for (i = 0; i < 16; i++) 1479 filter6->ff_flow6.fi6_dst.s6_addr[i] &= 1480 filter6->ff_mask6.mask6_dst.s6_addr[i]; 1481 for (i = 0; i < 16; i++) 1482 filter6->ff_flow6.fi6_src.s6_addr[i] &= 1483 filter6->ff_mask6.mask6_src.s6_addr[i]; 1484 1485 if (filter6->ff_flow6.fi6_flowlabel == 0) 1486 i = ACC_WILDCARD_INDEX; 1487 else 1488 i = ACC_GET_HASH_INDEX(filter6->ff_flow6.fi6_flowlabel); 1489 } 1490#endif /* INET6 */ 1491 1492 afp->f_handle = get_filt_handle(classifier, i); 1493 1494 /* update filter bitmask */ 1495 afp->f_fbmask = filt2fibmask(filter); 1496 classifier->acc_fbmask |= afp->f_fbmask; 1497 1498 /* 1499 * add this filter to the filter list. 1500 * filters are ordered from the highest rule number. 1501 */ 1502#ifdef __NetBSD__ 1503 s = splnet(); 1504#else 1505 s = splimp(); 1506#endif 1507 prev = NULL; 1508 LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) { 1509 if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno) 1510 prev = tmp; 1511 else 1512 break; 1513 } 1514 if (prev == NULL) 1515 LIST_INSERT_HEAD(&classifier->acc_filters[i], afp, f_chain); 1516 else 1517 LIST_INSERT_AFTER(prev, afp, f_chain); 1518 splx(s); 1519 1520 *phandle = afp->f_handle; 1521 return (0); 1522} 1523 1524int 1525acc_delete_filter(classifier, handle) 1526 struct acc_classifier *classifier; 1527 u_long handle; 1528{ 1529 struct acc_filter *afp; 1530 int s; 1531 1532 if ((afp = filth_to_filtp(classifier, handle)) == NULL) 1533 return (EINVAL); 1534 1535#ifdef __NetBSD__ 1536 s = splnet(); 1537#else 1538 s = splimp(); 1539#endif 1540 LIST_REMOVE(afp, f_chain); 1541 splx(s); 1542 1543 free(afp, M_DEVBUF); 1544 1545 /* todo: update filt_bmask */ 1546 1547 return (0); 1548} 1549 1550/* 1551 * delete filters referencing to the specified class. 1552 * if the all flag is not 0, delete all the filters. 1553 */ 1554int 1555acc_discard_filters(classifier, class, all) 1556 struct acc_classifier *classifier; 1557 void *class; 1558 int all; 1559{ 1560 struct acc_filter *afp; 1561 int i, s; 1562 1563#ifdef __NetBSD__ 1564 s = splnet(); 1565#else 1566 s = splimp(); 1567#endif 1568 for (i = 0; i < ACC_FILTER_TABLESIZE; i++) { 1569 do { 1570 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1571 if (all || afp->f_class == class) { 1572 LIST_REMOVE(afp, f_chain); 1573 free(afp, M_DEVBUF); 1574 /* start again from the head */ 1575 break; 1576 } 1577 } while (afp != NULL); 1578 } 1579 splx(s); 1580 1581 if (all) 1582 classifier->acc_fbmask = 0; 1583 1584 return (0); 1585} 1586 1587void * 1588acc_classify(clfier, m, af) 1589 void *clfier; 1590 struct mbuf *m; 1591 int af; 1592{ 1593 struct acc_classifier *classifier; 1594 struct flowinfo flow; 1595 struct acc_filter *afp; 1596 int i; 1597 1598 classifier = (struct acc_classifier *)clfier; 1599 altq_extractflow(m, af, &flow, classifier->acc_fbmask); 1600 1601 if (flow.fi_family == AF_INET) { 1602 struct flowinfo_in *fp = (struct flowinfo_in *)&flow; 1603 1604 if ((classifier->acc_fbmask & FIMB4_ALL) == FIMB4_TOS) { 1605 /* only tos is used */ 1606 LIST_FOREACH(afp, 1607 &classifier->acc_filters[ACC_WILDCARD_INDEX], 1608 f_chain) 1609 if (apply_tosfilter4(afp->f_fbmask, 1610 &afp->f_filter, fp)) 1611 /* filter matched */ 1612 return (afp->f_class); 1613 } else if ((classifier->acc_fbmask & 1614 (~(FIMB4_PROTO|FIMB4_SPORT|FIMB4_DPORT) & FIMB4_ALL)) 1615 == 0) { 1616 /* only proto and ports are used */ 1617 LIST_FOREACH(afp, 1618 &classifier->acc_filters[ACC_WILDCARD_INDEX], 1619 f_chain) 1620 if (apply_ppfilter4(afp->f_fbmask, 1621 &afp->f_filter, fp)) 1622 /* filter matched */ 1623 return (afp->f_class); 1624 } else { 1625 /* get the filter hash entry from its dest address */ 1626 i = ACC_GET_HASH_INDEX(fp->fi_dst.s_addr); 1627 do { 1628 /* 1629 * go through this loop twice. first for dst 1630 * hash, second for wildcards. 1631 */ 1632 LIST_FOREACH(afp, &classifier->acc_filters[i], 1633 f_chain) 1634 if (apply_filter4(afp->f_fbmask, 1635 &afp->f_filter, fp)) 1636 /* filter matched */ 1637 return (afp->f_class); 1638 1639 /* 1640 * check again for filters with a dst addr 1641 * wildcard. 1642 * (daddr == 0 || dmask != 0xffffffff). 1643 */ 1644 if (i != ACC_WILDCARD_INDEX) 1645 i = ACC_WILDCARD_INDEX; 1646 else 1647 break; 1648 } while (1); 1649 } 1650 } 1651#ifdef INET6 1652 else if (flow.fi_family == AF_INET6) { 1653 struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)&flow; 1654 1655 /* get the filter hash entry from its flow ID */ 1656 if (fp6->fi6_flowlabel != 0) 1657 i = ACC_GET_HASH_INDEX(fp6->fi6_flowlabel); 1658 else 1659 /* flowlable can be zero */ 1660 i = ACC_WILDCARD_INDEX; 1661 1662 /* go through this loop twice. first for flow hash, second 1663 for wildcards. */ 1664 do { 1665 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1666 if (apply_filter6(afp->f_fbmask, 1667 (struct flow_filter6 *)&afp->f_filter, 1668 fp6)) 1669 /* filter matched */ 1670 return (afp->f_class); 1671 1672 /* 1673 * check again for filters with a wildcard. 1674 */ 1675 if (i != ACC_WILDCARD_INDEX) 1676 i = ACC_WILDCARD_INDEX; 1677 else 1678 break; 1679 } while (1); 1680 } 1681#endif /* INET6 */ 1682 1683 /* no filter matched */ 1684 return (NULL); 1685} 1686 1687static int 1688apply_filter4(fbmask, filt, pkt) 1689 u_int32_t fbmask; 1690 struct flow_filter *filt; 1691 struct flowinfo_in *pkt; 1692{ 1693 if (filt->ff_flow.fi_family != AF_INET) 1694 return (0); 1695 if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport) 1696 return (0); 1697 if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport) 1698 return (0); 1699 if ((fbmask & FIMB4_DADDR) && 1700 filt->ff_flow.fi_dst.s_addr != 1701 (pkt->fi_dst.s_addr & filt->ff_mask.mask_dst.s_addr)) 1702 return (0); 1703 if ((fbmask & FIMB4_SADDR) && 1704 filt->ff_flow.fi_src.s_addr != 1705 (pkt->fi_src.s_addr & filt->ff_mask.mask_src.s_addr)) 1706 return (0); 1707 if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto) 1708 return (0); 1709 if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos != 1710 (pkt->fi_tos & filt->ff_mask.mask_tos)) 1711 return (0); 1712 if ((fbmask & FIMB4_GPI) && filt->ff_flow.fi_gpi != (pkt->fi_gpi)) 1713 return (0); 1714 /* match */ 1715 return (1); 1716} 1717 1718/* 1719 * filter matching function optimized for a common case that checks 1720 * only protocol and port numbers 1721 */ 1722static int 1723apply_ppfilter4(fbmask, filt, pkt) 1724 u_int32_t fbmask; 1725 struct flow_filter *filt; 1726 struct flowinfo_in *pkt; 1727{ 1728 if (filt->ff_flow.fi_family != AF_INET) 1729 return (0); 1730 if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport) 1731 return (0); 1732 if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport) 1733 return (0); 1734 if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto) 1735 return (0); 1736 /* match */ 1737 return (1); 1738} 1739 1740/* 1741 * filter matching function only for tos field. 1742 */ 1743static int 1744apply_tosfilter4(fbmask, filt, pkt) 1745 u_int32_t fbmask; 1746 struct flow_filter *filt; 1747 struct flowinfo_in *pkt; 1748{ 1749 if (filt->ff_flow.fi_family != AF_INET) 1750 return (0); 1751 if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos != 1752 (pkt->fi_tos & filt->ff_mask.mask_tos)) 1753 return (0); 1754 /* match */ 1755 return (1); 1756} 1757 1758#ifdef INET6 1759static int 1760apply_filter6(fbmask, filt, pkt) 1761 u_int32_t fbmask; 1762 struct flow_filter6 *filt; 1763 struct flowinfo_in6 *pkt; 1764{ 1765 int i; 1766 1767 if (filt->ff_flow6.fi6_family != AF_INET6) 1768 return (0); 1769 if ((fbmask & FIMB6_FLABEL) && 1770 filt->ff_flow6.fi6_flowlabel != pkt->fi6_flowlabel) 1771 return (0); 1772 if ((fbmask & FIMB6_PROTO) && 1773 filt->ff_flow6.fi6_proto != pkt->fi6_proto) 1774 return (0); 1775 if ((fbmask & FIMB6_SPORT) && 1776 filt->ff_flow6.fi6_sport != pkt->fi6_sport) 1777 return (0); 1778 if ((fbmask & FIMB6_DPORT) && 1779 filt->ff_flow6.fi6_dport != pkt->fi6_dport) 1780 return (0); 1781 if (fbmask & FIMB6_SADDR) { 1782 for (i = 0; i < 4; i++) 1783 if (filt->ff_flow6.fi6_src.s6_addr32[i] != 1784 (pkt->fi6_src.s6_addr32[i] & 1785 filt->ff_mask6.mask6_src.s6_addr32[i])) 1786 return (0); 1787 } 1788 if (fbmask & FIMB6_DADDR) { 1789 for (i = 0; i < 4; i++) 1790 if (filt->ff_flow6.fi6_dst.s6_addr32[i] != 1791 (pkt->fi6_dst.s6_addr32[i] & 1792 filt->ff_mask6.mask6_dst.s6_addr32[i])) 1793 return (0); 1794 } 1795 if ((fbmask & FIMB6_TCLASS) && 1796 filt->ff_flow6.fi6_tclass != 1797 (pkt->fi6_tclass & filt->ff_mask6.mask6_tclass)) 1798 return (0); 1799 if ((fbmask & FIMB6_GPI) && 1800 filt->ff_flow6.fi6_gpi != pkt->fi6_gpi) 1801 return (0); 1802 /* match */ 1803 return (1); 1804} 1805#endif /* INET6 */ 1806 1807/* 1808 * filter handle: 1809 * bit 20-28: index to the filter hash table 1810 * bit 0-19: unique id in the hash bucket. 1811 */ 1812static u_long 1813get_filt_handle(classifier, i) 1814 struct acc_classifier *classifier; 1815 int i; 1816{ 1817 static u_long handle_number = 1; 1818 u_long handle; 1819 struct acc_filter *afp; 1820 1821 while (1) { 1822 handle = handle_number++ & 0x000fffff; 1823 1824 if (LIST_EMPTY(&classifier->acc_filters[i])) 1825 break; 1826 1827 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1828 if ((afp->f_handle & 0x000fffff) == handle) 1829 break; 1830 if (afp == NULL) 1831 break; 1832 /* this handle is already used, try again */ 1833 } 1834 1835 return ((i << 20) | handle); 1836} 1837 1838/* convert filter handle to filter pointer */ 1839static struct acc_filter * 1840filth_to_filtp(classifier, handle) 1841 struct acc_classifier *classifier; 1842 u_long handle; 1843{ 1844 struct acc_filter *afp; 1845 int i; 1846 1847 i = ACC_GET_HINDEX(handle); 1848 1849 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1850 if (afp->f_handle == handle) 1851 return (afp); 1852 1853 return (NULL); 1854} 1855 1856/* create flowinfo bitmask */ 1857static u_int32_t 1858filt2fibmask(filt) 1859 struct flow_filter *filt; 1860{ 1861 u_int32_t mask = 0; 1862#ifdef INET6 1863 struct flow_filter6 *filt6; 1864#endif 1865 1866 switch (filt->ff_flow.fi_family) { 1867 case AF_INET: 1868 if (filt->ff_flow.fi_proto != 0) 1869 mask |= FIMB4_PROTO; 1870 if (filt->ff_flow.fi_tos != 0) 1871 mask |= FIMB4_TOS; 1872 if (filt->ff_flow.fi_dst.s_addr != 0) 1873 mask |= FIMB4_DADDR; 1874 if (filt->ff_flow.fi_src.s_addr != 0) 1875 mask |= FIMB4_SADDR; 1876 if (filt->ff_flow.fi_sport != 0) 1877 mask |= FIMB4_SPORT; 1878 if (filt->ff_flow.fi_dport != 0) 1879 mask |= FIMB4_DPORT; 1880 if (filt->ff_flow.fi_gpi != 0) 1881 mask |= FIMB4_GPI; 1882 break; 1883#ifdef INET6 1884 case AF_INET6: 1885 filt6 = (struct flow_filter6 *)filt; 1886 1887 if (filt6->ff_flow6.fi6_proto != 0) 1888 mask |= FIMB6_PROTO; 1889 if (filt6->ff_flow6.fi6_tclass != 0) 1890 mask |= FIMB6_TCLASS; 1891 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_dst)) 1892 mask |= FIMB6_DADDR; 1893 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_src)) 1894 mask |= FIMB6_SADDR; 1895 if (filt6->ff_flow6.fi6_sport != 0) 1896 mask |= FIMB6_SPORT; 1897 if (filt6->ff_flow6.fi6_dport != 0) 1898 mask |= FIMB6_DPORT; 1899 if (filt6->ff_flow6.fi6_gpi != 0) 1900 mask |= FIMB6_GPI; 1901 if (filt6->ff_flow6.fi6_flowlabel != 0) 1902 mask |= FIMB6_FLABEL; 1903 break; 1904#endif /* INET6 */ 1905 } 1906 return (mask); 1907} 1908 1909 1910/* 1911 * helper functions to handle IPv4 fragments. 1912 * currently only in-sequence fragments are handled. 1913 * - fragment info is cached in a LRU list. 1914 * - when a first fragment is found, cache its flow info. 1915 * - when a non-first fragment is found, lookup the cache. 1916 */ 1917 1918struct ip4_frag { 1919 TAILQ_ENTRY(ip4_frag) ip4f_chain; 1920 char ip4f_valid; 1921 u_short ip4f_id; 1922 struct flowinfo_in ip4f_info; 1923}; 1924 1925static TAILQ_HEAD(ip4f_list, ip4_frag) ip4f_list; /* IPv4 fragment cache */ 1926 1927#define IP4F_TABSIZE 16 /* IPv4 fragment cache size */ 1928 1929 1930static void 1931ip4f_cache(ip, fin) 1932 struct ip *ip; 1933 struct flowinfo_in *fin; 1934{ 1935 struct ip4_frag *fp; 1936 1937 if (TAILQ_EMPTY(&ip4f_list)) { 1938 /* first time call, allocate fragment cache entries. */ 1939 if (ip4f_init() < 0) 1940 /* allocation failed! */ 1941 return; 1942 } 1943 1944 fp = ip4f_alloc(); 1945 fp->ip4f_id = ip->ip_id; 1946 fp->ip4f_info.fi_proto = ip->ip_p; 1947 fp->ip4f_info.fi_src.s_addr = ip->ip_src.s_addr; 1948 fp->ip4f_info.fi_dst.s_addr = ip->ip_dst.s_addr; 1949 1950 /* save port numbers */ 1951 fp->ip4f_info.fi_sport = fin->fi_sport; 1952 fp->ip4f_info.fi_dport = fin->fi_dport; 1953 fp->ip4f_info.fi_gpi = fin->fi_gpi; 1954} 1955 1956static int 1957ip4f_lookup(ip, fin) 1958 struct ip *ip; 1959 struct flowinfo_in *fin; 1960{ 1961 struct ip4_frag *fp; 1962 1963 for (fp = TAILQ_FIRST(&ip4f_list); fp != NULL && fp->ip4f_valid; 1964 fp = TAILQ_NEXT(fp, ip4f_chain)) 1965 if (ip->ip_id == fp->ip4f_id && 1966 ip->ip_src.s_addr == fp->ip4f_info.fi_src.s_addr && 1967 ip->ip_dst.s_addr == fp->ip4f_info.fi_dst.s_addr && 1968 ip->ip_p == fp->ip4f_info.fi_proto) { 1969 1970 /* found the matching entry */ 1971 fin->fi_sport = fp->ip4f_info.fi_sport; 1972 fin->fi_dport = fp->ip4f_info.fi_dport; 1973 fin->fi_gpi = fp->ip4f_info.fi_gpi; 1974 1975 if ((ntohs(ip->ip_off) & IP_MF) == 0) 1976 /* this is the last fragment, 1977 release the entry. */ 1978 ip4f_free(fp); 1979 1980 return (1); 1981 } 1982 1983 /* no matching entry found */ 1984 return (0); 1985} 1986 1987static int 1988ip4f_init(void) 1989{ 1990 struct ip4_frag *fp; 1991 int i; 1992 1993 TAILQ_INIT(&ip4f_list); 1994 for (i=0; i<IP4F_TABSIZE; i++) { 1995 fp = malloc(sizeof(struct ip4_frag), 1996 M_DEVBUF, M_NOWAIT); 1997 if (fp == NULL) { 1998 printf("ip4f_init: can't alloc %dth entry!\n", i); 1999 if (i == 0) 2000 return (-1); 2001 return (0); 2002 } 2003 fp->ip4f_valid = 0; 2004 TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain); 2005 } 2006 return (0); 2007} 2008 2009static struct ip4_frag * 2010ip4f_alloc(void) 2011{ 2012 struct ip4_frag *fp; 2013 2014 /* reclaim an entry at the tail, put it at the head */ 2015 fp = TAILQ_LAST(&ip4f_list, ip4f_list); 2016 TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain); 2017 fp->ip4f_valid = 1; 2018 TAILQ_INSERT_HEAD(&ip4f_list, fp, ip4f_chain); 2019 return (fp); 2020} 2021 2022static void 2023ip4f_free(fp) 2024 struct ip4_frag *fp; 2025{ 2026 TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain); 2027 fp->ip4f_valid = 0; 2028 TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain); 2029} 2030 2031#endif /* ALTQ3_CLFIER_COMPAT */ 2032