altq_subr.c revision 298091
1/* $FreeBSD: stable/10/sys/contrib/altq/altq/altq_subr.c 298091 2016-04-16 02:11:04Z loos $ */ 2/* $KAME: altq_subr.c,v 1.21 2003/11/06 06:32:53 kjc Exp $ */ 3 4/* 5 * Copyright (C) 1997-2003 6 * Sony Computer Science Laboratories Inc. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30#if defined(__FreeBSD__) || defined(__NetBSD__) 31#include "opt_altq.h" 32#include "opt_inet.h" 33#ifdef __FreeBSD__ 34#include "opt_inet6.h" 35#endif 36#endif /* __FreeBSD__ || __NetBSD__ */ 37 38#include <sys/param.h> 39#include <sys/malloc.h> 40#include <sys/mbuf.h> 41#include <sys/systm.h> 42#include <sys/proc.h> 43#include <sys/socket.h> 44#include <sys/socketvar.h> 45#include <sys/kernel.h> 46#include <sys/errno.h> 47#include <sys/syslog.h> 48#include <sys/sysctl.h> 49#include <sys/queue.h> 50 51#include <net/if.h> 52#include <net/if_var.h> 53#include <net/if_dl.h> 54#include <net/if_types.h> 55#ifdef __FreeBSD__ 56#include <net/vnet.h> 57#endif 58 59#include <netinet/in.h> 60#include <netinet/in_systm.h> 61#include <netinet/ip.h> 62#ifdef INET6 63#include <netinet/ip6.h> 64#endif 65#include <netinet/tcp.h> 66#include <netinet/udp.h> 67 68#include <netpfil/pf/pf.h> 69#include <netpfil/pf/pf_altq.h> 70#include <altq/altq.h> 71#ifdef ALTQ3_COMPAT 72#include <altq/altq_conf.h> 73#endif 74 75/* machine dependent clock related includes */ 76#ifdef __FreeBSD__ 77#include <sys/bus.h> 78#include <sys/cpu.h> 79#include <sys/eventhandler.h> 80#include <machine/clock.h> 81#endif 82#if defined(__amd64__) || defined(__i386__) 83#include <machine/cpufunc.h> /* for pentium tsc */ 84#include <machine/specialreg.h> /* for CPUID_TSC */ 85#ifdef __FreeBSD__ 86#include <machine/md_var.h> /* for cpu_feature */ 87#elif defined(__NetBSD__) || defined(__OpenBSD__) 88#include <machine/cpu.h> /* for cpu_feature */ 89#endif 90#endif /* __amd64 || __i386__ */ 91 92/* 93 * internal function prototypes 94 */ 95static void tbr_timeout(void *); 96int (*altq_input)(struct mbuf *, int) = NULL; 97static struct mbuf *tbr_dequeue(struct ifaltq *, int); 98static int tbr_timer = 0; /* token bucket regulator timer */ 99#if !defined(__FreeBSD__) || (__FreeBSD_version < 600000) 100static struct callout tbr_callout = CALLOUT_INITIALIZER; 101#else 102static struct callout tbr_callout; 103#endif 104 105#ifdef ALTQ3_CLFIER_COMPAT 106static int extract_ports4(struct mbuf *, struct ip *, struct flowinfo_in *); 107#ifdef INET6 108static int extract_ports6(struct mbuf *, struct ip6_hdr *, 109 struct flowinfo_in6 *); 110#endif 111static int apply_filter4(u_int32_t, struct flow_filter *, 112 struct flowinfo_in *); 113static int apply_ppfilter4(u_int32_t, struct flow_filter *, 114 struct flowinfo_in *); 115#ifdef INET6 116static int apply_filter6(u_int32_t, struct flow_filter6 *, 117 struct flowinfo_in6 *); 118#endif 119static int apply_tosfilter4(u_int32_t, struct flow_filter *, 120 struct flowinfo_in *); 121static u_long get_filt_handle(struct acc_classifier *, int); 122static struct acc_filter *filth_to_filtp(struct acc_classifier *, u_long); 123static u_int32_t filt2fibmask(struct flow_filter *); 124 125static void ip4f_cache(struct ip *, struct flowinfo_in *); 126static int ip4f_lookup(struct ip *, struct flowinfo_in *); 127static int ip4f_init(void); 128static struct ip4_frag *ip4f_alloc(void); 129static void ip4f_free(struct ip4_frag *); 130#endif /* ALTQ3_CLFIER_COMPAT */ 131 132/* 133 * alternate queueing support routines 134 */ 135 136/* look up the queue state by the interface name and the queueing type. */ 137void * 138altq_lookup(name, type) 139 char *name; 140 int type; 141{ 142 struct ifnet *ifp; 143 144 if ((ifp = ifunit(name)) != NULL) { 145 /* read if_snd unlocked */ 146 if (type != ALTQT_NONE && ifp->if_snd.altq_type == type) 147 return (ifp->if_snd.altq_disc); 148 } 149 150 return NULL; 151} 152 153int 154altq_attach(ifq, type, discipline, enqueue, dequeue, request, clfier, classify) 155 struct ifaltq *ifq; 156 int type; 157 void *discipline; 158 int (*enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *); 159 struct mbuf *(*dequeue)(struct ifaltq *, int); 160 int (*request)(struct ifaltq *, int, void *); 161 void *clfier; 162 void *(*classify)(void *, struct mbuf *, int); 163{ 164 IFQ_LOCK(ifq); 165 if (!ALTQ_IS_READY(ifq)) { 166 IFQ_UNLOCK(ifq); 167 return ENXIO; 168 } 169 170#ifdef ALTQ3_COMPAT 171 /* 172 * pfaltq can override the existing discipline, but altq3 cannot. 173 * check these if clfier is not NULL (which implies altq3). 174 */ 175 if (clfier != NULL) { 176 if (ALTQ_IS_ENABLED(ifq)) { 177 IFQ_UNLOCK(ifq); 178 return EBUSY; 179 } 180 if (ALTQ_IS_ATTACHED(ifq)) { 181 IFQ_UNLOCK(ifq); 182 return EEXIST; 183 } 184 } 185#endif 186 ifq->altq_type = type; 187 ifq->altq_disc = discipline; 188 ifq->altq_enqueue = enqueue; 189 ifq->altq_dequeue = dequeue; 190 ifq->altq_request = request; 191 ifq->altq_clfier = clfier; 192 ifq->altq_classify = classify; 193 ifq->altq_flags &= (ALTQF_CANTCHANGE|ALTQF_ENABLED); 194#ifdef ALTQ3_COMPAT 195#ifdef ALTQ_KLD 196 altq_module_incref(type); 197#endif 198#endif 199 IFQ_UNLOCK(ifq); 200 return 0; 201} 202 203int 204altq_detach(ifq) 205 struct ifaltq *ifq; 206{ 207 IFQ_LOCK(ifq); 208 209 if (!ALTQ_IS_READY(ifq)) { 210 IFQ_UNLOCK(ifq); 211 return ENXIO; 212 } 213 if (ALTQ_IS_ENABLED(ifq)) { 214 IFQ_UNLOCK(ifq); 215 return EBUSY; 216 } 217 if (!ALTQ_IS_ATTACHED(ifq)) { 218 IFQ_UNLOCK(ifq); 219 return (0); 220 } 221#ifdef ALTQ3_COMPAT 222#ifdef ALTQ_KLD 223 altq_module_declref(ifq->altq_type); 224#endif 225#endif 226 227 ifq->altq_type = ALTQT_NONE; 228 ifq->altq_disc = NULL; 229 ifq->altq_enqueue = NULL; 230 ifq->altq_dequeue = NULL; 231 ifq->altq_request = NULL; 232 ifq->altq_clfier = NULL; 233 ifq->altq_classify = NULL; 234 ifq->altq_flags &= ALTQF_CANTCHANGE; 235 236 IFQ_UNLOCK(ifq); 237 return 0; 238} 239 240int 241altq_enable(ifq) 242 struct ifaltq *ifq; 243{ 244 int s; 245 246 IFQ_LOCK(ifq); 247 248 if (!ALTQ_IS_READY(ifq)) { 249 IFQ_UNLOCK(ifq); 250 return ENXIO; 251 } 252 if (ALTQ_IS_ENABLED(ifq)) { 253 IFQ_UNLOCK(ifq); 254 return 0; 255 } 256 257#ifdef __NetBSD__ 258 s = splnet(); 259#else 260 s = splimp(); 261#endif 262 IFQ_PURGE_NOLOCK(ifq); 263 ASSERT(ifq->ifq_len == 0); 264 ifq->ifq_drv_maxlen = 0; /* disable bulk dequeue */ 265 ifq->altq_flags |= ALTQF_ENABLED; 266 if (ifq->altq_clfier != NULL) 267 ifq->altq_flags |= ALTQF_CLASSIFY; 268 splx(s); 269 270 IFQ_UNLOCK(ifq); 271 return 0; 272} 273 274int 275altq_disable(ifq) 276 struct ifaltq *ifq; 277{ 278 int s; 279 280 IFQ_LOCK(ifq); 281 if (!ALTQ_IS_ENABLED(ifq)) { 282 IFQ_UNLOCK(ifq); 283 return 0; 284 } 285 286#ifdef __NetBSD__ 287 s = splnet(); 288#else 289 s = splimp(); 290#endif 291 IFQ_PURGE_NOLOCK(ifq); 292 ASSERT(ifq->ifq_len == 0); 293 ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY); 294 splx(s); 295 296 IFQ_UNLOCK(ifq); 297 return 0; 298} 299 300#ifdef ALTQ_DEBUG 301void 302altq_assert(file, line, failedexpr) 303 const char *file, *failedexpr; 304 int line; 305{ 306 (void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n", 307 failedexpr, file, line); 308 panic("altq assertion"); 309 /* NOTREACHED */ 310} 311#endif 312 313/* 314 * internal representation of token bucket parameters 315 * rate: byte_per_unittime << 32 316 * (((bits_per_sec) / 8) << 32) / machclk_freq 317 * depth: byte << 32 318 * 319 */ 320#define TBR_SHIFT 32 321#define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT) 322#define TBR_UNSCALE(x) ((x) >> TBR_SHIFT) 323 324static struct mbuf * 325tbr_dequeue(ifq, op) 326 struct ifaltq *ifq; 327 int op; 328{ 329 struct tb_regulator *tbr; 330 struct mbuf *m; 331 int64_t interval; 332 u_int64_t now; 333 334 IFQ_LOCK_ASSERT(ifq); 335 tbr = ifq->altq_tbr; 336 if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) { 337 /* if this is a remove after poll, bypass tbr check */ 338 } else { 339 /* update token only when it is negative */ 340 if (tbr->tbr_token <= 0) { 341 now = read_machclk(); 342 interval = now - tbr->tbr_last; 343 if (interval >= tbr->tbr_filluptime) 344 tbr->tbr_token = tbr->tbr_depth; 345 else { 346 tbr->tbr_token += interval * tbr->tbr_rate; 347 if (tbr->tbr_token > tbr->tbr_depth) 348 tbr->tbr_token = tbr->tbr_depth; 349 } 350 tbr->tbr_last = now; 351 } 352 /* if token is still negative, don't allow dequeue */ 353 if (tbr->tbr_token <= 0) 354 return (NULL); 355 } 356 357 if (ALTQ_IS_ENABLED(ifq)) 358 m = (*ifq->altq_dequeue)(ifq, op); 359 else { 360 if (op == ALTDQ_POLL) 361 _IF_POLL(ifq, m); 362 else 363 _IF_DEQUEUE(ifq, m); 364 } 365 366 if (m != NULL && op == ALTDQ_REMOVE) 367 tbr->tbr_token -= TBR_SCALE(m_pktlen(m)); 368 tbr->tbr_lastop = op; 369 return (m); 370} 371 372/* 373 * set a token bucket regulator. 374 * if the specified rate is zero, the token bucket regulator is deleted. 375 */ 376int 377tbr_set(ifq, profile) 378 struct ifaltq *ifq; 379 struct tb_profile *profile; 380{ 381 struct tb_regulator *tbr, *otbr; 382 383 if (tbr_dequeue_ptr == NULL) 384 tbr_dequeue_ptr = tbr_dequeue; 385 386 if (machclk_freq == 0) 387 init_machclk(); 388 if (machclk_freq == 0) { 389 printf("tbr_set: no cpu clock available!\n"); 390 return (ENXIO); 391 } 392 393 IFQ_LOCK(ifq); 394 if (profile->rate == 0) { 395 /* delete this tbr */ 396 if ((tbr = ifq->altq_tbr) == NULL) { 397 IFQ_UNLOCK(ifq); 398 return (ENOENT); 399 } 400 ifq->altq_tbr = NULL; 401 free(tbr, M_DEVBUF); 402 IFQ_UNLOCK(ifq); 403 return (0); 404 } 405 406 tbr = malloc(sizeof(struct tb_regulator), M_DEVBUF, M_NOWAIT | M_ZERO); 407 if (tbr == NULL) { 408 IFQ_UNLOCK(ifq); 409 return (ENOMEM); 410 } 411 412 tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq; 413 tbr->tbr_depth = TBR_SCALE(profile->depth); 414 if (tbr->tbr_rate > 0) 415 tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate; 416 else 417 tbr->tbr_filluptime = 0xffffffffffffffffLL; 418 tbr->tbr_token = tbr->tbr_depth; 419 tbr->tbr_last = read_machclk(); 420 tbr->tbr_lastop = ALTDQ_REMOVE; 421 422 otbr = ifq->altq_tbr; 423 ifq->altq_tbr = tbr; /* set the new tbr */ 424 425 if (otbr != NULL) 426 free(otbr, M_DEVBUF); 427 else { 428 if (tbr_timer == 0) { 429 CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0); 430 tbr_timer = 1; 431 } 432 } 433 IFQ_UNLOCK(ifq); 434 return (0); 435} 436 437/* 438 * tbr_timeout goes through the interface list, and kicks the drivers 439 * if necessary. 440 * 441 * MPSAFE 442 */ 443static void 444tbr_timeout(arg) 445 void *arg; 446{ 447#ifdef __FreeBSD__ 448 VNET_ITERATOR_DECL(vnet_iter); 449#endif 450 struct ifnet *ifp; 451 int active, s; 452 453 active = 0; 454#ifdef __NetBSD__ 455 s = splnet(); 456#else 457 s = splimp(); 458#endif 459#ifdef __FreeBSD__ 460 IFNET_RLOCK_NOSLEEP(); 461 VNET_LIST_RLOCK_NOSLEEP(); 462 VNET_FOREACH(vnet_iter) { 463 CURVNET_SET(vnet_iter); 464#endif 465 for (ifp = TAILQ_FIRST(&V_ifnet); ifp; 466 ifp = TAILQ_NEXT(ifp, if_list)) { 467 /* read from if_snd unlocked */ 468 if (!TBR_IS_ENABLED(&ifp->if_snd)) 469 continue; 470 active++; 471 if (!IFQ_IS_EMPTY(&ifp->if_snd) && 472 ifp->if_start != NULL) 473 (*ifp->if_start)(ifp); 474 } 475#ifdef __FreeBSD__ 476 CURVNET_RESTORE(); 477 } 478 VNET_LIST_RUNLOCK_NOSLEEP(); 479 IFNET_RUNLOCK_NOSLEEP(); 480#endif 481 splx(s); 482 if (active > 0) 483 CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0); 484 else 485 tbr_timer = 0; /* don't need tbr_timer anymore */ 486} 487 488/* 489 * get token bucket regulator profile 490 */ 491int 492tbr_get(ifq, profile) 493 struct ifaltq *ifq; 494 struct tb_profile *profile; 495{ 496 struct tb_regulator *tbr; 497 498 IFQ_LOCK(ifq); 499 if ((tbr = ifq->altq_tbr) == NULL) { 500 profile->rate = 0; 501 profile->depth = 0; 502 } else { 503 profile->rate = 504 (u_int)TBR_UNSCALE(tbr->tbr_rate * 8 * machclk_freq); 505 profile->depth = (u_int)TBR_UNSCALE(tbr->tbr_depth); 506 } 507 IFQ_UNLOCK(ifq); 508 return (0); 509} 510 511/* 512 * attach a discipline to the interface. if one already exists, it is 513 * overridden. 514 * Locking is done in the discipline specific attach functions. Basically 515 * they call back to altq_attach which takes care of the attach and locking. 516 */ 517int 518altq_pfattach(struct pf_altq *a) 519{ 520 int error = 0; 521 522 switch (a->scheduler) { 523 case ALTQT_NONE: 524 break; 525#ifdef ALTQ_CBQ 526 case ALTQT_CBQ: 527 error = cbq_pfattach(a); 528 break; 529#endif 530#ifdef ALTQ_PRIQ 531 case ALTQT_PRIQ: 532 error = priq_pfattach(a); 533 break; 534#endif 535#ifdef ALTQ_HFSC 536 case ALTQT_HFSC: 537 error = hfsc_pfattach(a); 538 break; 539#endif 540#ifdef ALTQ_FAIRQ 541 case ALTQT_FAIRQ: 542 error = fairq_pfattach(a); 543 break; 544#endif 545 default: 546 error = ENXIO; 547 } 548 549 return (error); 550} 551 552/* 553 * detach a discipline from the interface. 554 * it is possible that the discipline was already overridden by another 555 * discipline. 556 */ 557int 558altq_pfdetach(struct pf_altq *a) 559{ 560 struct ifnet *ifp; 561 int s, error = 0; 562 563 if ((ifp = ifunit(a->ifname)) == NULL) 564 return (EINVAL); 565 566 /* if this discipline is no longer referenced, just return */ 567 /* read unlocked from if_snd */ 568 if (a->altq_disc == NULL || a->altq_disc != ifp->if_snd.altq_disc) 569 return (0); 570 571#ifdef __NetBSD__ 572 s = splnet(); 573#else 574 s = splimp(); 575#endif 576 /* read unlocked from if_snd, _disable and _detach take care */ 577 if (ALTQ_IS_ENABLED(&ifp->if_snd)) 578 error = altq_disable(&ifp->if_snd); 579 if (error == 0) 580 error = altq_detach(&ifp->if_snd); 581 splx(s); 582 583 return (error); 584} 585 586/* 587 * add a discipline or a queue 588 * Locking is done in the discipline specific functions with regards to 589 * malloc with WAITOK, also it is not yet clear which lock to use. 590 */ 591int 592altq_add(struct pf_altq *a) 593{ 594 int error = 0; 595 596 if (a->qname[0] != 0) 597 return (altq_add_queue(a)); 598 599 if (machclk_freq == 0) 600 init_machclk(); 601 if (machclk_freq == 0) 602 panic("altq_add: no cpu clock"); 603 604 switch (a->scheduler) { 605#ifdef ALTQ_CBQ 606 case ALTQT_CBQ: 607 error = cbq_add_altq(a); 608 break; 609#endif 610#ifdef ALTQ_PRIQ 611 case ALTQT_PRIQ: 612 error = priq_add_altq(a); 613 break; 614#endif 615#ifdef ALTQ_HFSC 616 case ALTQT_HFSC: 617 error = hfsc_add_altq(a); 618 break; 619#endif 620#ifdef ALTQ_FAIRQ 621 case ALTQT_FAIRQ: 622 error = fairq_add_altq(a); 623 break; 624#endif 625 default: 626 error = ENXIO; 627 } 628 629 return (error); 630} 631 632/* 633 * remove a discipline or a queue 634 * It is yet unclear what lock to use to protect this operation, the 635 * discipline specific functions will determine and grab it 636 */ 637int 638altq_remove(struct pf_altq *a) 639{ 640 int error = 0; 641 642 if (a->qname[0] != 0) 643 return (altq_remove_queue(a)); 644 645 switch (a->scheduler) { 646#ifdef ALTQ_CBQ 647 case ALTQT_CBQ: 648 error = cbq_remove_altq(a); 649 break; 650#endif 651#ifdef ALTQ_PRIQ 652 case ALTQT_PRIQ: 653 error = priq_remove_altq(a); 654 break; 655#endif 656#ifdef ALTQ_HFSC 657 case ALTQT_HFSC: 658 error = hfsc_remove_altq(a); 659 break; 660#endif 661#ifdef ALTQ_FAIRQ 662 case ALTQT_FAIRQ: 663 error = fairq_remove_altq(a); 664 break; 665#endif 666 default: 667 error = ENXIO; 668 } 669 670 return (error); 671} 672 673/* 674 * add a queue to the discipline 675 * It is yet unclear what lock to use to protect this operation, the 676 * discipline specific functions will determine and grab it 677 */ 678int 679altq_add_queue(struct pf_altq *a) 680{ 681 int error = 0; 682 683 switch (a->scheduler) { 684#ifdef ALTQ_CBQ 685 case ALTQT_CBQ: 686 error = cbq_add_queue(a); 687 break; 688#endif 689#ifdef ALTQ_PRIQ 690 case ALTQT_PRIQ: 691 error = priq_add_queue(a); 692 break; 693#endif 694#ifdef ALTQ_HFSC 695 case ALTQT_HFSC: 696 error = hfsc_add_queue(a); 697 break; 698#endif 699#ifdef ALTQ_FAIRQ 700 case ALTQT_FAIRQ: 701 error = fairq_add_queue(a); 702 break; 703#endif 704 default: 705 error = ENXIO; 706 } 707 708 return (error); 709} 710 711/* 712 * remove a queue from the discipline 713 * It is yet unclear what lock to use to protect this operation, the 714 * discipline specific functions will determine and grab it 715 */ 716int 717altq_remove_queue(struct pf_altq *a) 718{ 719 int error = 0; 720 721 switch (a->scheduler) { 722#ifdef ALTQ_CBQ 723 case ALTQT_CBQ: 724 error = cbq_remove_queue(a); 725 break; 726#endif 727#ifdef ALTQ_PRIQ 728 case ALTQT_PRIQ: 729 error = priq_remove_queue(a); 730 break; 731#endif 732#ifdef ALTQ_HFSC 733 case ALTQT_HFSC: 734 error = hfsc_remove_queue(a); 735 break; 736#endif 737#ifdef ALTQ_FAIRQ 738 case ALTQT_FAIRQ: 739 error = fairq_remove_queue(a); 740 break; 741#endif 742 default: 743 error = ENXIO; 744 } 745 746 return (error); 747} 748 749/* 750 * get queue statistics 751 * Locking is done in the discipline specific functions with regards to 752 * copyout operations, also it is not yet clear which lock to use. 753 */ 754int 755altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes) 756{ 757 int error = 0; 758 759 switch (a->scheduler) { 760#ifdef ALTQ_CBQ 761 case ALTQT_CBQ: 762 error = cbq_getqstats(a, ubuf, nbytes); 763 break; 764#endif 765#ifdef ALTQ_PRIQ 766 case ALTQT_PRIQ: 767 error = priq_getqstats(a, ubuf, nbytes); 768 break; 769#endif 770#ifdef ALTQ_HFSC 771 case ALTQT_HFSC: 772 error = hfsc_getqstats(a, ubuf, nbytes); 773 break; 774#endif 775#ifdef ALTQ_FAIRQ 776 case ALTQT_FAIRQ: 777 error = fairq_getqstats(a, ubuf, nbytes); 778 break; 779#endif 780 default: 781 error = ENXIO; 782 } 783 784 return (error); 785} 786 787/* 788 * read and write diffserv field in IPv4 or IPv6 header 789 */ 790u_int8_t 791read_dsfield(m, pktattr) 792 struct mbuf *m; 793 struct altq_pktattr *pktattr; 794{ 795 struct mbuf *m0; 796 u_int8_t ds_field = 0; 797 798 if (pktattr == NULL || 799 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6)) 800 return ((u_int8_t)0); 801 802 /* verify that pattr_hdr is within the mbuf data */ 803 for (m0 = m; m0 != NULL; m0 = m0->m_next) 804 if ((pktattr->pattr_hdr >= m0->m_data) && 805 (pktattr->pattr_hdr < m0->m_data + m0->m_len)) 806 break; 807 if (m0 == NULL) { 808 /* ick, pattr_hdr is stale */ 809 pktattr->pattr_af = AF_UNSPEC; 810#ifdef ALTQ_DEBUG 811 printf("read_dsfield: can't locate header!\n"); 812#endif 813 return ((u_int8_t)0); 814 } 815 816 if (pktattr->pattr_af == AF_INET) { 817 struct ip *ip = (struct ip *)pktattr->pattr_hdr; 818 819 if (ip->ip_v != 4) 820 return ((u_int8_t)0); /* version mismatch! */ 821 ds_field = ip->ip_tos; 822 } 823#ifdef INET6 824 else if (pktattr->pattr_af == AF_INET6) { 825 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 826 u_int32_t flowlabel; 827 828 flowlabel = ntohl(ip6->ip6_flow); 829 if ((flowlabel >> 28) != 6) 830 return ((u_int8_t)0); /* version mismatch! */ 831 ds_field = (flowlabel >> 20) & 0xff; 832 } 833#endif 834 return (ds_field); 835} 836 837void 838write_dsfield(struct mbuf *m, struct altq_pktattr *pktattr, u_int8_t dsfield) 839{ 840 struct mbuf *m0; 841 842 if (pktattr == NULL || 843 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6)) 844 return; 845 846 /* verify that pattr_hdr is within the mbuf data */ 847 for (m0 = m; m0 != NULL; m0 = m0->m_next) 848 if ((pktattr->pattr_hdr >= m0->m_data) && 849 (pktattr->pattr_hdr < m0->m_data + m0->m_len)) 850 break; 851 if (m0 == NULL) { 852 /* ick, pattr_hdr is stale */ 853 pktattr->pattr_af = AF_UNSPEC; 854#ifdef ALTQ_DEBUG 855 printf("write_dsfield: can't locate header!\n"); 856#endif 857 return; 858 } 859 860 if (pktattr->pattr_af == AF_INET) { 861 struct ip *ip = (struct ip *)pktattr->pattr_hdr; 862 u_int8_t old; 863 int32_t sum; 864 865 if (ip->ip_v != 4) 866 return; /* version mismatch! */ 867 old = ip->ip_tos; 868 dsfield |= old & 3; /* leave CU bits */ 869 if (old == dsfield) 870 return; 871 ip->ip_tos = dsfield; 872 /* 873 * update checksum (from RFC1624) 874 * HC' = ~(~HC + ~m + m') 875 */ 876 sum = ~ntohs(ip->ip_sum) & 0xffff; 877 sum += 0xff00 + (~old & 0xff) + dsfield; 878 sum = (sum >> 16) + (sum & 0xffff); 879 sum += (sum >> 16); /* add carry */ 880 881 ip->ip_sum = htons(~sum & 0xffff); 882 } 883#ifdef INET6 884 else if (pktattr->pattr_af == AF_INET6) { 885 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 886 u_int32_t flowlabel; 887 888 flowlabel = ntohl(ip6->ip6_flow); 889 if ((flowlabel >> 28) != 6) 890 return; /* version mismatch! */ 891 flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20); 892 ip6->ip6_flow = htonl(flowlabel); 893 } 894#endif 895 return; 896} 897 898 899/* 900 * high resolution clock support taking advantage of a machine dependent 901 * high resolution time counter (e.g., timestamp counter of intel pentium). 902 * we assume 903 * - 64-bit-long monotonically-increasing counter 904 * - frequency range is 100M-4GHz (CPU speed) 905 */ 906/* if pcc is not available or disabled, emulate 256MHz using microtime() */ 907#define MACHCLK_SHIFT 8 908 909int machclk_usepcc; 910u_int32_t machclk_freq; 911u_int32_t machclk_per_tick; 912 913#if defined(__i386__) && defined(__NetBSD__) 914extern u_int64_t cpu_tsc_freq; 915#endif 916 917#if (__FreeBSD_version >= 700035) 918/* Update TSC freq with the value indicated by the caller. */ 919static void 920tsc_freq_changed(void *arg, const struct cf_level *level, int status) 921{ 922 /* If there was an error during the transition, don't do anything. */ 923 if (status != 0) 924 return; 925 926#if (__FreeBSD_version >= 701102) && (defined(__amd64__) || defined(__i386__)) 927 /* If TSC is P-state invariant, don't do anything. */ 928 if (tsc_is_invariant) 929 return; 930#endif 931 932 /* Total setting for this level gives the new frequency in MHz. */ 933 init_machclk(); 934} 935EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL, 936 EVENTHANDLER_PRI_LAST); 937#endif /* __FreeBSD_version >= 700035 */ 938 939static void 940init_machclk_setup(void) 941{ 942#if (__FreeBSD_version >= 600000) 943 callout_init(&tbr_callout, 0); 944#endif 945 946 machclk_usepcc = 1; 947 948#if (!defined(__amd64__) && !defined(__i386__)) || defined(ALTQ_NOPCC) 949 machclk_usepcc = 0; 950#endif 951#if defined(__FreeBSD__) && defined(SMP) 952 machclk_usepcc = 0; 953#endif 954#if defined(__NetBSD__) && defined(MULTIPROCESSOR) 955 machclk_usepcc = 0; 956#endif 957#if defined(__amd64__) || defined(__i386__) 958 /* check if TSC is available */ 959#ifdef __FreeBSD__ 960 if ((cpu_feature & CPUID_TSC) == 0 || 961 atomic_load_acq_64(&tsc_freq) == 0) 962#else 963 if ((cpu_feature & CPUID_TSC) == 0) 964#endif 965 machclk_usepcc = 0; 966#endif 967} 968 969void 970init_machclk(void) 971{ 972 static int called; 973 974 /* Call one-time initialization function. */ 975 if (!called) { 976 init_machclk_setup(); 977 called = 1; 978 } 979 980 if (machclk_usepcc == 0) { 981 /* emulate 256MHz using microtime() */ 982 machclk_freq = 1000000 << MACHCLK_SHIFT; 983 machclk_per_tick = machclk_freq / hz; 984#ifdef ALTQ_DEBUG 985 printf("altq: emulate %uHz cpu clock\n", machclk_freq); 986#endif 987 return; 988 } 989 990 /* 991 * if the clock frequency (of Pentium TSC or Alpha PCC) is 992 * accessible, just use it. 993 */ 994#if defined(__amd64__) || defined(__i386__) 995#ifdef __FreeBSD__ 996 machclk_freq = atomic_load_acq_64(&tsc_freq); 997#elif defined(__NetBSD__) 998 machclk_freq = (u_int32_t)cpu_tsc_freq; 999#elif defined(__OpenBSD__) && (defined(I586_CPU) || defined(I686_CPU)) 1000 machclk_freq = pentium_mhz * 1000000; 1001#endif 1002#endif 1003 1004 /* 1005 * if we don't know the clock frequency, measure it. 1006 */ 1007 if (machclk_freq == 0) { 1008 static int wait; 1009 struct timeval tv_start, tv_end; 1010 u_int64_t start, end, diff; 1011 int timo; 1012 1013 microtime(&tv_start); 1014 start = read_machclk(); 1015 timo = hz; /* 1 sec */ 1016 (void)tsleep(&wait, PWAIT | PCATCH, "init_machclk", timo); 1017 microtime(&tv_end); 1018 end = read_machclk(); 1019 diff = (u_int64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000 1020 + tv_end.tv_usec - tv_start.tv_usec; 1021 if (diff != 0) 1022 machclk_freq = (u_int)((end - start) * 1000000 / diff); 1023 } 1024 1025 machclk_per_tick = machclk_freq / hz; 1026 1027#ifdef ALTQ_DEBUG 1028 printf("altq: CPU clock: %uHz\n", machclk_freq); 1029#endif 1030} 1031 1032#if defined(__OpenBSD__) && defined(__i386__) 1033static __inline u_int64_t 1034rdtsc(void) 1035{ 1036 u_int64_t rv; 1037 __asm __volatile(".byte 0x0f, 0x31" : "=A" (rv)); 1038 return (rv); 1039} 1040#endif /* __OpenBSD__ && __i386__ */ 1041 1042u_int64_t 1043read_machclk(void) 1044{ 1045 u_int64_t val; 1046 1047 if (machclk_usepcc) { 1048#if defined(__amd64__) || defined(__i386__) 1049 val = rdtsc(); 1050#else 1051 panic("read_machclk"); 1052#endif 1053 } else { 1054 struct timeval tv; 1055 1056 microtime(&tv); 1057 val = (((u_int64_t)(tv.tv_sec - boottime.tv_sec) * 1000000 1058 + tv.tv_usec) << MACHCLK_SHIFT); 1059 } 1060 return (val); 1061} 1062 1063#ifdef ALTQ3_CLFIER_COMPAT 1064 1065#ifndef IPPROTO_ESP 1066#define IPPROTO_ESP 50 /* encapsulating security payload */ 1067#endif 1068#ifndef IPPROTO_AH 1069#define IPPROTO_AH 51 /* authentication header */ 1070#endif 1071 1072/* 1073 * extract flow information from a given packet. 1074 * filt_mask shows flowinfo fields required. 1075 * we assume the ip header is in one mbuf, and addresses and ports are 1076 * in network byte order. 1077 */ 1078int 1079altq_extractflow(m, af, flow, filt_bmask) 1080 struct mbuf *m; 1081 int af; 1082 struct flowinfo *flow; 1083 u_int32_t filt_bmask; 1084{ 1085 1086 switch (af) { 1087 case PF_INET: { 1088 struct flowinfo_in *fin; 1089 struct ip *ip; 1090 1091 ip = mtod(m, struct ip *); 1092 1093 if (ip->ip_v != 4) 1094 break; 1095 1096 fin = (struct flowinfo_in *)flow; 1097 fin->fi_len = sizeof(struct flowinfo_in); 1098 fin->fi_family = AF_INET; 1099 1100 fin->fi_proto = ip->ip_p; 1101 fin->fi_tos = ip->ip_tos; 1102 1103 fin->fi_src.s_addr = ip->ip_src.s_addr; 1104 fin->fi_dst.s_addr = ip->ip_dst.s_addr; 1105 1106 if (filt_bmask & FIMB4_PORTS) 1107 /* if port info is required, extract port numbers */ 1108 extract_ports4(m, ip, fin); 1109 else { 1110 fin->fi_sport = 0; 1111 fin->fi_dport = 0; 1112 fin->fi_gpi = 0; 1113 } 1114 return (1); 1115 } 1116 1117#ifdef INET6 1118 case PF_INET6: { 1119 struct flowinfo_in6 *fin6; 1120 struct ip6_hdr *ip6; 1121 1122 ip6 = mtod(m, struct ip6_hdr *); 1123 /* should we check the ip version? */ 1124 1125 fin6 = (struct flowinfo_in6 *)flow; 1126 fin6->fi6_len = sizeof(struct flowinfo_in6); 1127 fin6->fi6_family = AF_INET6; 1128 1129 fin6->fi6_proto = ip6->ip6_nxt; 1130 fin6->fi6_tclass = (ntohl(ip6->ip6_flow) >> 20) & 0xff; 1131 1132 fin6->fi6_flowlabel = ip6->ip6_flow & htonl(0x000fffff); 1133 fin6->fi6_src = ip6->ip6_src; 1134 fin6->fi6_dst = ip6->ip6_dst; 1135 1136 if ((filt_bmask & FIMB6_PORTS) || 1137 ((filt_bmask & FIMB6_PROTO) 1138 && ip6->ip6_nxt > IPPROTO_IPV6)) 1139 /* 1140 * if port info is required, or proto is required 1141 * but there are option headers, extract port 1142 * and protocol numbers. 1143 */ 1144 extract_ports6(m, ip6, fin6); 1145 else { 1146 fin6->fi6_sport = 0; 1147 fin6->fi6_dport = 0; 1148 fin6->fi6_gpi = 0; 1149 } 1150 return (1); 1151 } 1152#endif /* INET6 */ 1153 1154 default: 1155 break; 1156 } 1157 1158 /* failed */ 1159 flow->fi_len = sizeof(struct flowinfo); 1160 flow->fi_family = AF_UNSPEC; 1161 return (0); 1162} 1163 1164/* 1165 * helper routine to extract port numbers 1166 */ 1167/* structure for ipsec and ipv6 option header template */ 1168struct _opt6 { 1169 u_int8_t opt6_nxt; /* next header */ 1170 u_int8_t opt6_hlen; /* header extension length */ 1171 u_int16_t _pad; 1172 u_int32_t ah_spi; /* security parameter index 1173 for authentication header */ 1174}; 1175 1176/* 1177 * extract port numbers from a ipv4 packet. 1178 */ 1179static int 1180extract_ports4(m, ip, fin) 1181 struct mbuf *m; 1182 struct ip *ip; 1183 struct flowinfo_in *fin; 1184{ 1185 struct mbuf *m0; 1186 u_short ip_off; 1187 u_int8_t proto; 1188 int off; 1189 1190 fin->fi_sport = 0; 1191 fin->fi_dport = 0; 1192 fin->fi_gpi = 0; 1193 1194 ip_off = ntohs(ip->ip_off); 1195 /* if it is a fragment, try cached fragment info */ 1196 if (ip_off & IP_OFFMASK) { 1197 ip4f_lookup(ip, fin); 1198 return (1); 1199 } 1200 1201 /* locate the mbuf containing the protocol header */ 1202 for (m0 = m; m0 != NULL; m0 = m0->m_next) 1203 if (((caddr_t)ip >= m0->m_data) && 1204 ((caddr_t)ip < m0->m_data + m0->m_len)) 1205 break; 1206 if (m0 == NULL) { 1207#ifdef ALTQ_DEBUG 1208 printf("extract_ports4: can't locate header! ip=%p\n", ip); 1209#endif 1210 return (0); 1211 } 1212 off = ((caddr_t)ip - m0->m_data) + (ip->ip_hl << 2); 1213 proto = ip->ip_p; 1214 1215#ifdef ALTQ_IPSEC 1216 again: 1217#endif 1218 while (off >= m0->m_len) { 1219 off -= m0->m_len; 1220 m0 = m0->m_next; 1221 if (m0 == NULL) 1222 return (0); /* bogus ip_hl! */ 1223 } 1224 if (m0->m_len < off + 4) 1225 return (0); 1226 1227 switch (proto) { 1228 case IPPROTO_TCP: 1229 case IPPROTO_UDP: { 1230 struct udphdr *udp; 1231 1232 udp = (struct udphdr *)(mtod(m0, caddr_t) + off); 1233 fin->fi_sport = udp->uh_sport; 1234 fin->fi_dport = udp->uh_dport; 1235 fin->fi_proto = proto; 1236 } 1237 break; 1238 1239#ifdef ALTQ_IPSEC 1240 case IPPROTO_ESP: 1241 if (fin->fi_gpi == 0){ 1242 u_int32_t *gpi; 1243 1244 gpi = (u_int32_t *)(mtod(m0, caddr_t) + off); 1245 fin->fi_gpi = *gpi; 1246 } 1247 fin->fi_proto = proto; 1248 break; 1249 1250 case IPPROTO_AH: { 1251 /* get next header and header length */ 1252 struct _opt6 *opt6; 1253 1254 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); 1255 proto = opt6->opt6_nxt; 1256 off += 8 + (opt6->opt6_hlen * 4); 1257 if (fin->fi_gpi == 0 && m0->m_len >= off + 8) 1258 fin->fi_gpi = opt6->ah_spi; 1259 } 1260 /* goto the next header */ 1261 goto again; 1262#endif /* ALTQ_IPSEC */ 1263 1264 default: 1265 fin->fi_proto = proto; 1266 return (0); 1267 } 1268 1269 /* if this is a first fragment, cache it. */ 1270 if (ip_off & IP_MF) 1271 ip4f_cache(ip, fin); 1272 1273 return (1); 1274} 1275 1276#ifdef INET6 1277static int 1278extract_ports6(m, ip6, fin6) 1279 struct mbuf *m; 1280 struct ip6_hdr *ip6; 1281 struct flowinfo_in6 *fin6; 1282{ 1283 struct mbuf *m0; 1284 int off; 1285 u_int8_t proto; 1286 1287 fin6->fi6_gpi = 0; 1288 fin6->fi6_sport = 0; 1289 fin6->fi6_dport = 0; 1290 1291 /* locate the mbuf containing the protocol header */ 1292 for (m0 = m; m0 != NULL; m0 = m0->m_next) 1293 if (((caddr_t)ip6 >= m0->m_data) && 1294 ((caddr_t)ip6 < m0->m_data + m0->m_len)) 1295 break; 1296 if (m0 == NULL) { 1297#ifdef ALTQ_DEBUG 1298 printf("extract_ports6: can't locate header! ip6=%p\n", ip6); 1299#endif 1300 return (0); 1301 } 1302 off = ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr); 1303 1304 proto = ip6->ip6_nxt; 1305 do { 1306 while (off >= m0->m_len) { 1307 off -= m0->m_len; 1308 m0 = m0->m_next; 1309 if (m0 == NULL) 1310 return (0); 1311 } 1312 if (m0->m_len < off + 4) 1313 return (0); 1314 1315 switch (proto) { 1316 case IPPROTO_TCP: 1317 case IPPROTO_UDP: { 1318 struct udphdr *udp; 1319 1320 udp = (struct udphdr *)(mtod(m0, caddr_t) + off); 1321 fin6->fi6_sport = udp->uh_sport; 1322 fin6->fi6_dport = udp->uh_dport; 1323 fin6->fi6_proto = proto; 1324 } 1325 return (1); 1326 1327 case IPPROTO_ESP: 1328 if (fin6->fi6_gpi == 0) { 1329 u_int32_t *gpi; 1330 1331 gpi = (u_int32_t *)(mtod(m0, caddr_t) + off); 1332 fin6->fi6_gpi = *gpi; 1333 } 1334 fin6->fi6_proto = proto; 1335 return (1); 1336 1337 case IPPROTO_AH: { 1338 /* get next header and header length */ 1339 struct _opt6 *opt6; 1340 1341 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); 1342 if (fin6->fi6_gpi == 0 && m0->m_len >= off + 8) 1343 fin6->fi6_gpi = opt6->ah_spi; 1344 proto = opt6->opt6_nxt; 1345 off += 8 + (opt6->opt6_hlen * 4); 1346 /* goto the next header */ 1347 break; 1348 } 1349 1350 case IPPROTO_HOPOPTS: 1351 case IPPROTO_ROUTING: 1352 case IPPROTO_DSTOPTS: { 1353 /* get next header and header length */ 1354 struct _opt6 *opt6; 1355 1356 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); 1357 proto = opt6->opt6_nxt; 1358 off += (opt6->opt6_hlen + 1) * 8; 1359 /* goto the next header */ 1360 break; 1361 } 1362 1363 case IPPROTO_FRAGMENT: 1364 /* ipv6 fragmentations are not supported yet */ 1365 default: 1366 fin6->fi6_proto = proto; 1367 return (0); 1368 } 1369 } while (1); 1370 /*NOTREACHED*/ 1371} 1372#endif /* INET6 */ 1373 1374/* 1375 * altq common classifier 1376 */ 1377int 1378acc_add_filter(classifier, filter, class, phandle) 1379 struct acc_classifier *classifier; 1380 struct flow_filter *filter; 1381 void *class; 1382 u_long *phandle; 1383{ 1384 struct acc_filter *afp, *prev, *tmp; 1385 int i, s; 1386 1387#ifdef INET6 1388 if (filter->ff_flow.fi_family != AF_INET && 1389 filter->ff_flow.fi_family != AF_INET6) 1390 return (EINVAL); 1391#else 1392 if (filter->ff_flow.fi_family != AF_INET) 1393 return (EINVAL); 1394#endif 1395 1396 afp = malloc(sizeof(struct acc_filter), 1397 M_DEVBUF, M_WAITOK); 1398 if (afp == NULL) 1399 return (ENOMEM); 1400 bzero(afp, sizeof(struct acc_filter)); 1401 1402 afp->f_filter = *filter; 1403 afp->f_class = class; 1404 1405 i = ACC_WILDCARD_INDEX; 1406 if (filter->ff_flow.fi_family == AF_INET) { 1407 struct flow_filter *filter4 = &afp->f_filter; 1408 1409 /* 1410 * if address is 0, it's a wildcard. if address mask 1411 * isn't set, use full mask. 1412 */ 1413 if (filter4->ff_flow.fi_dst.s_addr == 0) 1414 filter4->ff_mask.mask_dst.s_addr = 0; 1415 else if (filter4->ff_mask.mask_dst.s_addr == 0) 1416 filter4->ff_mask.mask_dst.s_addr = 0xffffffff; 1417 if (filter4->ff_flow.fi_src.s_addr == 0) 1418 filter4->ff_mask.mask_src.s_addr = 0; 1419 else if (filter4->ff_mask.mask_src.s_addr == 0) 1420 filter4->ff_mask.mask_src.s_addr = 0xffffffff; 1421 1422 /* clear extra bits in addresses */ 1423 filter4->ff_flow.fi_dst.s_addr &= 1424 filter4->ff_mask.mask_dst.s_addr; 1425 filter4->ff_flow.fi_src.s_addr &= 1426 filter4->ff_mask.mask_src.s_addr; 1427 1428 /* 1429 * if dst address is a wildcard, use hash-entry 1430 * ACC_WILDCARD_INDEX. 1431 */ 1432 if (filter4->ff_mask.mask_dst.s_addr != 0xffffffff) 1433 i = ACC_WILDCARD_INDEX; 1434 else 1435 i = ACC_GET_HASH_INDEX(filter4->ff_flow.fi_dst.s_addr); 1436 } 1437#ifdef INET6 1438 else if (filter->ff_flow.fi_family == AF_INET6) { 1439 struct flow_filter6 *filter6 = 1440 (struct flow_filter6 *)&afp->f_filter; 1441#ifndef IN6MASK0 /* taken from kame ipv6 */ 1442#define IN6MASK0 {{{ 0, 0, 0, 0 }}} 1443#define IN6MASK128 {{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}} 1444 const struct in6_addr in6mask0 = IN6MASK0; 1445 const struct in6_addr in6mask128 = IN6MASK128; 1446#endif 1447 1448 if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_dst)) 1449 filter6->ff_mask6.mask6_dst = in6mask0; 1450 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_dst)) 1451 filter6->ff_mask6.mask6_dst = in6mask128; 1452 if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_src)) 1453 filter6->ff_mask6.mask6_src = in6mask0; 1454 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_src)) 1455 filter6->ff_mask6.mask6_src = in6mask128; 1456 1457 /* clear extra bits in addresses */ 1458 for (i = 0; i < 16; i++) 1459 filter6->ff_flow6.fi6_dst.s6_addr[i] &= 1460 filter6->ff_mask6.mask6_dst.s6_addr[i]; 1461 for (i = 0; i < 16; i++) 1462 filter6->ff_flow6.fi6_src.s6_addr[i] &= 1463 filter6->ff_mask6.mask6_src.s6_addr[i]; 1464 1465 if (filter6->ff_flow6.fi6_flowlabel == 0) 1466 i = ACC_WILDCARD_INDEX; 1467 else 1468 i = ACC_GET_HASH_INDEX(filter6->ff_flow6.fi6_flowlabel); 1469 } 1470#endif /* INET6 */ 1471 1472 afp->f_handle = get_filt_handle(classifier, i); 1473 1474 /* update filter bitmask */ 1475 afp->f_fbmask = filt2fibmask(filter); 1476 classifier->acc_fbmask |= afp->f_fbmask; 1477 1478 /* 1479 * add this filter to the filter list. 1480 * filters are ordered from the highest rule number. 1481 */ 1482#ifdef __NetBSD__ 1483 s = splnet(); 1484#else 1485 s = splimp(); 1486#endif 1487 prev = NULL; 1488 LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) { 1489 if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno) 1490 prev = tmp; 1491 else 1492 break; 1493 } 1494 if (prev == NULL) 1495 LIST_INSERT_HEAD(&classifier->acc_filters[i], afp, f_chain); 1496 else 1497 LIST_INSERT_AFTER(prev, afp, f_chain); 1498 splx(s); 1499 1500 *phandle = afp->f_handle; 1501 return (0); 1502} 1503 1504int 1505acc_delete_filter(classifier, handle) 1506 struct acc_classifier *classifier; 1507 u_long handle; 1508{ 1509 struct acc_filter *afp; 1510 int s; 1511 1512 if ((afp = filth_to_filtp(classifier, handle)) == NULL) 1513 return (EINVAL); 1514 1515#ifdef __NetBSD__ 1516 s = splnet(); 1517#else 1518 s = splimp(); 1519#endif 1520 LIST_REMOVE(afp, f_chain); 1521 splx(s); 1522 1523 free(afp, M_DEVBUF); 1524 1525 /* todo: update filt_bmask */ 1526 1527 return (0); 1528} 1529 1530/* 1531 * delete filters referencing to the specified class. 1532 * if the all flag is not 0, delete all the filters. 1533 */ 1534int 1535acc_discard_filters(classifier, class, all) 1536 struct acc_classifier *classifier; 1537 void *class; 1538 int all; 1539{ 1540 struct acc_filter *afp; 1541 int i, s; 1542 1543#ifdef __NetBSD__ 1544 s = splnet(); 1545#else 1546 s = splimp(); 1547#endif 1548 for (i = 0; i < ACC_FILTER_TABLESIZE; i++) { 1549 do { 1550 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1551 if (all || afp->f_class == class) { 1552 LIST_REMOVE(afp, f_chain); 1553 free(afp, M_DEVBUF); 1554 /* start again from the head */ 1555 break; 1556 } 1557 } while (afp != NULL); 1558 } 1559 splx(s); 1560 1561 if (all) 1562 classifier->acc_fbmask = 0; 1563 1564 return (0); 1565} 1566 1567void * 1568acc_classify(clfier, m, af) 1569 void *clfier; 1570 struct mbuf *m; 1571 int af; 1572{ 1573 struct acc_classifier *classifier; 1574 struct flowinfo flow; 1575 struct acc_filter *afp; 1576 int i; 1577 1578 classifier = (struct acc_classifier *)clfier; 1579 altq_extractflow(m, af, &flow, classifier->acc_fbmask); 1580 1581 if (flow.fi_family == AF_INET) { 1582 struct flowinfo_in *fp = (struct flowinfo_in *)&flow; 1583 1584 if ((classifier->acc_fbmask & FIMB4_ALL) == FIMB4_TOS) { 1585 /* only tos is used */ 1586 LIST_FOREACH(afp, 1587 &classifier->acc_filters[ACC_WILDCARD_INDEX], 1588 f_chain) 1589 if (apply_tosfilter4(afp->f_fbmask, 1590 &afp->f_filter, fp)) 1591 /* filter matched */ 1592 return (afp->f_class); 1593 } else if ((classifier->acc_fbmask & 1594 (~(FIMB4_PROTO|FIMB4_SPORT|FIMB4_DPORT) & FIMB4_ALL)) 1595 == 0) { 1596 /* only proto and ports are used */ 1597 LIST_FOREACH(afp, 1598 &classifier->acc_filters[ACC_WILDCARD_INDEX], 1599 f_chain) 1600 if (apply_ppfilter4(afp->f_fbmask, 1601 &afp->f_filter, fp)) 1602 /* filter matched */ 1603 return (afp->f_class); 1604 } else { 1605 /* get the filter hash entry from its dest address */ 1606 i = ACC_GET_HASH_INDEX(fp->fi_dst.s_addr); 1607 do { 1608 /* 1609 * go through this loop twice. first for dst 1610 * hash, second for wildcards. 1611 */ 1612 LIST_FOREACH(afp, &classifier->acc_filters[i], 1613 f_chain) 1614 if (apply_filter4(afp->f_fbmask, 1615 &afp->f_filter, fp)) 1616 /* filter matched */ 1617 return (afp->f_class); 1618 1619 /* 1620 * check again for filters with a dst addr 1621 * wildcard. 1622 * (daddr == 0 || dmask != 0xffffffff). 1623 */ 1624 if (i != ACC_WILDCARD_INDEX) 1625 i = ACC_WILDCARD_INDEX; 1626 else 1627 break; 1628 } while (1); 1629 } 1630 } 1631#ifdef INET6 1632 else if (flow.fi_family == AF_INET6) { 1633 struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)&flow; 1634 1635 /* get the filter hash entry from its flow ID */ 1636 if (fp6->fi6_flowlabel != 0) 1637 i = ACC_GET_HASH_INDEX(fp6->fi6_flowlabel); 1638 else 1639 /* flowlable can be zero */ 1640 i = ACC_WILDCARD_INDEX; 1641 1642 /* go through this loop twice. first for flow hash, second 1643 for wildcards. */ 1644 do { 1645 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1646 if (apply_filter6(afp->f_fbmask, 1647 (struct flow_filter6 *)&afp->f_filter, 1648 fp6)) 1649 /* filter matched */ 1650 return (afp->f_class); 1651 1652 /* 1653 * check again for filters with a wildcard. 1654 */ 1655 if (i != ACC_WILDCARD_INDEX) 1656 i = ACC_WILDCARD_INDEX; 1657 else 1658 break; 1659 } while (1); 1660 } 1661#endif /* INET6 */ 1662 1663 /* no filter matched */ 1664 return (NULL); 1665} 1666 1667static int 1668apply_filter4(fbmask, filt, pkt) 1669 u_int32_t fbmask; 1670 struct flow_filter *filt; 1671 struct flowinfo_in *pkt; 1672{ 1673 if (filt->ff_flow.fi_family != AF_INET) 1674 return (0); 1675 if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport) 1676 return (0); 1677 if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport) 1678 return (0); 1679 if ((fbmask & FIMB4_DADDR) && 1680 filt->ff_flow.fi_dst.s_addr != 1681 (pkt->fi_dst.s_addr & filt->ff_mask.mask_dst.s_addr)) 1682 return (0); 1683 if ((fbmask & FIMB4_SADDR) && 1684 filt->ff_flow.fi_src.s_addr != 1685 (pkt->fi_src.s_addr & filt->ff_mask.mask_src.s_addr)) 1686 return (0); 1687 if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto) 1688 return (0); 1689 if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos != 1690 (pkt->fi_tos & filt->ff_mask.mask_tos)) 1691 return (0); 1692 if ((fbmask & FIMB4_GPI) && filt->ff_flow.fi_gpi != (pkt->fi_gpi)) 1693 return (0); 1694 /* match */ 1695 return (1); 1696} 1697 1698/* 1699 * filter matching function optimized for a common case that checks 1700 * only protocol and port numbers 1701 */ 1702static int 1703apply_ppfilter4(fbmask, filt, pkt) 1704 u_int32_t fbmask; 1705 struct flow_filter *filt; 1706 struct flowinfo_in *pkt; 1707{ 1708 if (filt->ff_flow.fi_family != AF_INET) 1709 return (0); 1710 if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport) 1711 return (0); 1712 if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport) 1713 return (0); 1714 if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto) 1715 return (0); 1716 /* match */ 1717 return (1); 1718} 1719 1720/* 1721 * filter matching function only for tos field. 1722 */ 1723static int 1724apply_tosfilter4(fbmask, filt, pkt) 1725 u_int32_t fbmask; 1726 struct flow_filter *filt; 1727 struct flowinfo_in *pkt; 1728{ 1729 if (filt->ff_flow.fi_family != AF_INET) 1730 return (0); 1731 if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos != 1732 (pkt->fi_tos & filt->ff_mask.mask_tos)) 1733 return (0); 1734 /* match */ 1735 return (1); 1736} 1737 1738#ifdef INET6 1739static int 1740apply_filter6(fbmask, filt, pkt) 1741 u_int32_t fbmask; 1742 struct flow_filter6 *filt; 1743 struct flowinfo_in6 *pkt; 1744{ 1745 int i; 1746 1747 if (filt->ff_flow6.fi6_family != AF_INET6) 1748 return (0); 1749 if ((fbmask & FIMB6_FLABEL) && 1750 filt->ff_flow6.fi6_flowlabel != pkt->fi6_flowlabel) 1751 return (0); 1752 if ((fbmask & FIMB6_PROTO) && 1753 filt->ff_flow6.fi6_proto != pkt->fi6_proto) 1754 return (0); 1755 if ((fbmask & FIMB6_SPORT) && 1756 filt->ff_flow6.fi6_sport != pkt->fi6_sport) 1757 return (0); 1758 if ((fbmask & FIMB6_DPORT) && 1759 filt->ff_flow6.fi6_dport != pkt->fi6_dport) 1760 return (0); 1761 if (fbmask & FIMB6_SADDR) { 1762 for (i = 0; i < 4; i++) 1763 if (filt->ff_flow6.fi6_src.s6_addr32[i] != 1764 (pkt->fi6_src.s6_addr32[i] & 1765 filt->ff_mask6.mask6_src.s6_addr32[i])) 1766 return (0); 1767 } 1768 if (fbmask & FIMB6_DADDR) { 1769 for (i = 0; i < 4; i++) 1770 if (filt->ff_flow6.fi6_dst.s6_addr32[i] != 1771 (pkt->fi6_dst.s6_addr32[i] & 1772 filt->ff_mask6.mask6_dst.s6_addr32[i])) 1773 return (0); 1774 } 1775 if ((fbmask & FIMB6_TCLASS) && 1776 filt->ff_flow6.fi6_tclass != 1777 (pkt->fi6_tclass & filt->ff_mask6.mask6_tclass)) 1778 return (0); 1779 if ((fbmask & FIMB6_GPI) && 1780 filt->ff_flow6.fi6_gpi != pkt->fi6_gpi) 1781 return (0); 1782 /* match */ 1783 return (1); 1784} 1785#endif /* INET6 */ 1786 1787/* 1788 * filter handle: 1789 * bit 20-28: index to the filter hash table 1790 * bit 0-19: unique id in the hash bucket. 1791 */ 1792static u_long 1793get_filt_handle(classifier, i) 1794 struct acc_classifier *classifier; 1795 int i; 1796{ 1797 static u_long handle_number = 1; 1798 u_long handle; 1799 struct acc_filter *afp; 1800 1801 while (1) { 1802 handle = handle_number++ & 0x000fffff; 1803 1804 if (LIST_EMPTY(&classifier->acc_filters[i])) 1805 break; 1806 1807 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1808 if ((afp->f_handle & 0x000fffff) == handle) 1809 break; 1810 if (afp == NULL) 1811 break; 1812 /* this handle is already used, try again */ 1813 } 1814 1815 return ((i << 20) | handle); 1816} 1817 1818/* convert filter handle to filter pointer */ 1819static struct acc_filter * 1820filth_to_filtp(classifier, handle) 1821 struct acc_classifier *classifier; 1822 u_long handle; 1823{ 1824 struct acc_filter *afp; 1825 int i; 1826 1827 i = ACC_GET_HINDEX(handle); 1828 1829 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1830 if (afp->f_handle == handle) 1831 return (afp); 1832 1833 return (NULL); 1834} 1835 1836/* create flowinfo bitmask */ 1837static u_int32_t 1838filt2fibmask(filt) 1839 struct flow_filter *filt; 1840{ 1841 u_int32_t mask = 0; 1842#ifdef INET6 1843 struct flow_filter6 *filt6; 1844#endif 1845 1846 switch (filt->ff_flow.fi_family) { 1847 case AF_INET: 1848 if (filt->ff_flow.fi_proto != 0) 1849 mask |= FIMB4_PROTO; 1850 if (filt->ff_flow.fi_tos != 0) 1851 mask |= FIMB4_TOS; 1852 if (filt->ff_flow.fi_dst.s_addr != 0) 1853 mask |= FIMB4_DADDR; 1854 if (filt->ff_flow.fi_src.s_addr != 0) 1855 mask |= FIMB4_SADDR; 1856 if (filt->ff_flow.fi_sport != 0) 1857 mask |= FIMB4_SPORT; 1858 if (filt->ff_flow.fi_dport != 0) 1859 mask |= FIMB4_DPORT; 1860 if (filt->ff_flow.fi_gpi != 0) 1861 mask |= FIMB4_GPI; 1862 break; 1863#ifdef INET6 1864 case AF_INET6: 1865 filt6 = (struct flow_filter6 *)filt; 1866 1867 if (filt6->ff_flow6.fi6_proto != 0) 1868 mask |= FIMB6_PROTO; 1869 if (filt6->ff_flow6.fi6_tclass != 0) 1870 mask |= FIMB6_TCLASS; 1871 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_dst)) 1872 mask |= FIMB6_DADDR; 1873 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_src)) 1874 mask |= FIMB6_SADDR; 1875 if (filt6->ff_flow6.fi6_sport != 0) 1876 mask |= FIMB6_SPORT; 1877 if (filt6->ff_flow6.fi6_dport != 0) 1878 mask |= FIMB6_DPORT; 1879 if (filt6->ff_flow6.fi6_gpi != 0) 1880 mask |= FIMB6_GPI; 1881 if (filt6->ff_flow6.fi6_flowlabel != 0) 1882 mask |= FIMB6_FLABEL; 1883 break; 1884#endif /* INET6 */ 1885 } 1886 return (mask); 1887} 1888 1889 1890/* 1891 * helper functions to handle IPv4 fragments. 1892 * currently only in-sequence fragments are handled. 1893 * - fragment info is cached in a LRU list. 1894 * - when a first fragment is found, cache its flow info. 1895 * - when a non-first fragment is found, lookup the cache. 1896 */ 1897 1898struct ip4_frag { 1899 TAILQ_ENTRY(ip4_frag) ip4f_chain; 1900 char ip4f_valid; 1901 u_short ip4f_id; 1902 struct flowinfo_in ip4f_info; 1903}; 1904 1905static TAILQ_HEAD(ip4f_list, ip4_frag) ip4f_list; /* IPv4 fragment cache */ 1906 1907#define IP4F_TABSIZE 16 /* IPv4 fragment cache size */ 1908 1909 1910static void 1911ip4f_cache(ip, fin) 1912 struct ip *ip; 1913 struct flowinfo_in *fin; 1914{ 1915 struct ip4_frag *fp; 1916 1917 if (TAILQ_EMPTY(&ip4f_list)) { 1918 /* first time call, allocate fragment cache entries. */ 1919 if (ip4f_init() < 0) 1920 /* allocation failed! */ 1921 return; 1922 } 1923 1924 fp = ip4f_alloc(); 1925 fp->ip4f_id = ip->ip_id; 1926 fp->ip4f_info.fi_proto = ip->ip_p; 1927 fp->ip4f_info.fi_src.s_addr = ip->ip_src.s_addr; 1928 fp->ip4f_info.fi_dst.s_addr = ip->ip_dst.s_addr; 1929 1930 /* save port numbers */ 1931 fp->ip4f_info.fi_sport = fin->fi_sport; 1932 fp->ip4f_info.fi_dport = fin->fi_dport; 1933 fp->ip4f_info.fi_gpi = fin->fi_gpi; 1934} 1935 1936static int 1937ip4f_lookup(ip, fin) 1938 struct ip *ip; 1939 struct flowinfo_in *fin; 1940{ 1941 struct ip4_frag *fp; 1942 1943 for (fp = TAILQ_FIRST(&ip4f_list); fp != NULL && fp->ip4f_valid; 1944 fp = TAILQ_NEXT(fp, ip4f_chain)) 1945 if (ip->ip_id == fp->ip4f_id && 1946 ip->ip_src.s_addr == fp->ip4f_info.fi_src.s_addr && 1947 ip->ip_dst.s_addr == fp->ip4f_info.fi_dst.s_addr && 1948 ip->ip_p == fp->ip4f_info.fi_proto) { 1949 1950 /* found the matching entry */ 1951 fin->fi_sport = fp->ip4f_info.fi_sport; 1952 fin->fi_dport = fp->ip4f_info.fi_dport; 1953 fin->fi_gpi = fp->ip4f_info.fi_gpi; 1954 1955 if ((ntohs(ip->ip_off) & IP_MF) == 0) 1956 /* this is the last fragment, 1957 release the entry. */ 1958 ip4f_free(fp); 1959 1960 return (1); 1961 } 1962 1963 /* no matching entry found */ 1964 return (0); 1965} 1966 1967static int 1968ip4f_init(void) 1969{ 1970 struct ip4_frag *fp; 1971 int i; 1972 1973 TAILQ_INIT(&ip4f_list); 1974 for (i=0; i<IP4F_TABSIZE; i++) { 1975 fp = malloc(sizeof(struct ip4_frag), 1976 M_DEVBUF, M_NOWAIT); 1977 if (fp == NULL) { 1978 printf("ip4f_init: can't alloc %dth entry!\n", i); 1979 if (i == 0) 1980 return (-1); 1981 return (0); 1982 } 1983 fp->ip4f_valid = 0; 1984 TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain); 1985 } 1986 return (0); 1987} 1988 1989static struct ip4_frag * 1990ip4f_alloc(void) 1991{ 1992 struct ip4_frag *fp; 1993 1994 /* reclaim an entry at the tail, put it at the head */ 1995 fp = TAILQ_LAST(&ip4f_list, ip4f_list); 1996 TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain); 1997 fp->ip4f_valid = 1; 1998 TAILQ_INSERT_HEAD(&ip4f_list, fp, ip4f_chain); 1999 return (fp); 2000} 2001 2002static void 2003ip4f_free(fp) 2004 struct ip4_frag *fp; 2005{ 2006 TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain); 2007 fp->ip4f_valid = 0; 2008 TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain); 2009} 2010 2011#endif /* ALTQ3_CLFIER_COMPAT */ 2012