altq_subr.c revision 304843
1/*- 2 * Copyright (C) 1997-2003 3 * Sony Computer Science Laboratories Inc. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $KAME: altq_subr.c,v 1.21 2003/11/06 06:32:53 kjc Exp $ 27 * $FreeBSD: stable/11/sys/net/altq/altq_subr.c 304843 2016-08-26 10:04:10Z kib $ 28 */ 29 30#include "opt_altq.h" 31#include "opt_inet.h" 32#include "opt_inet6.h" 33 34#include <sys/param.h> 35#include <sys/malloc.h> 36#include <sys/mbuf.h> 37#include <sys/systm.h> 38#include <sys/proc.h> 39#include <sys/socket.h> 40#include <sys/socketvar.h> 41#include <sys/kernel.h> 42#include <sys/errno.h> 43#include <sys/syslog.h> 44#include <sys/sysctl.h> 45#include <sys/queue.h> 46 47#include <net/if.h> 48#include <net/if_var.h> 49#include <net/if_dl.h> 50#include <net/if_types.h> 51#include <net/vnet.h> 52 53#include <netinet/in.h> 54#include <netinet/in_systm.h> 55#include <netinet/ip.h> 56#ifdef INET6 57#include <netinet/ip6.h> 58#endif 59#include <netinet/tcp.h> 60#include <netinet/udp.h> 61 62#include <netpfil/pf/pf.h> 63#include <netpfil/pf/pf_altq.h> 64#include <net/altq/altq.h> 65#ifdef ALTQ3_COMPAT 66#include <net/altq/altq_conf.h> 67#endif 68 69/* machine dependent clock related includes */ 70#include <sys/bus.h> 71#include <sys/cpu.h> 72#include <sys/eventhandler.h> 73#include <machine/clock.h> 74#if defined(__amd64__) || defined(__i386__) 75#include <machine/cpufunc.h> /* for pentium tsc */ 76#include <machine/specialreg.h> /* for CPUID_TSC */ 77#include <machine/md_var.h> /* for cpu_feature */ 78#endif /* __amd64 || __i386__ */ 79 80/* 81 * internal function prototypes 82 */ 83static void tbr_timeout(void *); 84int (*altq_input)(struct mbuf *, int) = NULL; 85static struct mbuf *tbr_dequeue(struct ifaltq *, int); 86static int tbr_timer = 0; /* token bucket regulator timer */ 87#if !defined(__FreeBSD__) || (__FreeBSD_version < 600000) 88static struct callout tbr_callout = CALLOUT_INITIALIZER; 89#else 90static struct callout tbr_callout; 91#endif 92 93#ifdef ALTQ3_CLFIER_COMPAT 94static int extract_ports4(struct mbuf *, struct ip *, struct flowinfo_in *); 95#ifdef INET6 96static int extract_ports6(struct mbuf *, struct ip6_hdr *, 97 struct flowinfo_in6 *); 98#endif 99static int apply_filter4(u_int32_t, struct flow_filter *, 100 struct flowinfo_in *); 101static int apply_ppfilter4(u_int32_t, struct flow_filter *, 102 struct flowinfo_in *); 103#ifdef INET6 104static int apply_filter6(u_int32_t, struct flow_filter6 *, 105 struct flowinfo_in6 *); 106#endif 107static int apply_tosfilter4(u_int32_t, struct flow_filter *, 108 struct flowinfo_in *); 109static u_long get_filt_handle(struct acc_classifier *, int); 110static struct acc_filter *filth_to_filtp(struct acc_classifier *, u_long); 111static u_int32_t filt2fibmask(struct flow_filter *); 112 113static void ip4f_cache(struct ip *, struct flowinfo_in *); 114static int ip4f_lookup(struct ip *, struct flowinfo_in *); 115static int ip4f_init(void); 116static struct ip4_frag *ip4f_alloc(void); 117static void ip4f_free(struct ip4_frag *); 118#endif /* ALTQ3_CLFIER_COMPAT */ 119 120/* 121 * alternate queueing support routines 122 */ 123 124/* look up the queue state by the interface name and the queueing type. */ 125void * 126altq_lookup(name, type) 127 char *name; 128 int type; 129{ 130 struct ifnet *ifp; 131 132 if ((ifp = ifunit(name)) != NULL) { 133 /* read if_snd unlocked */ 134 if (type != ALTQT_NONE && ifp->if_snd.altq_type == type) 135 return (ifp->if_snd.altq_disc); 136 } 137 138 return NULL; 139} 140 141int 142altq_attach(ifq, type, discipline, enqueue, dequeue, request, clfier, classify) 143 struct ifaltq *ifq; 144 int type; 145 void *discipline; 146 int (*enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *); 147 struct mbuf *(*dequeue)(struct ifaltq *, int); 148 int (*request)(struct ifaltq *, int, void *); 149 void *clfier; 150 void *(*classify)(void *, struct mbuf *, int); 151{ 152 IFQ_LOCK(ifq); 153 if (!ALTQ_IS_READY(ifq)) { 154 IFQ_UNLOCK(ifq); 155 return ENXIO; 156 } 157 158#ifdef ALTQ3_COMPAT 159 /* 160 * pfaltq can override the existing discipline, but altq3 cannot. 161 * check these if clfier is not NULL (which implies altq3). 162 */ 163 if (clfier != NULL) { 164 if (ALTQ_IS_ENABLED(ifq)) { 165 IFQ_UNLOCK(ifq); 166 return EBUSY; 167 } 168 if (ALTQ_IS_ATTACHED(ifq)) { 169 IFQ_UNLOCK(ifq); 170 return EEXIST; 171 } 172 } 173#endif 174 ifq->altq_type = type; 175 ifq->altq_disc = discipline; 176 ifq->altq_enqueue = enqueue; 177 ifq->altq_dequeue = dequeue; 178 ifq->altq_request = request; 179 ifq->altq_clfier = clfier; 180 ifq->altq_classify = classify; 181 ifq->altq_flags &= (ALTQF_CANTCHANGE|ALTQF_ENABLED); 182#ifdef ALTQ3_COMPAT 183#ifdef ALTQ_KLD 184 altq_module_incref(type); 185#endif 186#endif 187 IFQ_UNLOCK(ifq); 188 return 0; 189} 190 191int 192altq_detach(ifq) 193 struct ifaltq *ifq; 194{ 195 IFQ_LOCK(ifq); 196 197 if (!ALTQ_IS_READY(ifq)) { 198 IFQ_UNLOCK(ifq); 199 return ENXIO; 200 } 201 if (ALTQ_IS_ENABLED(ifq)) { 202 IFQ_UNLOCK(ifq); 203 return EBUSY; 204 } 205 if (!ALTQ_IS_ATTACHED(ifq)) { 206 IFQ_UNLOCK(ifq); 207 return (0); 208 } 209#ifdef ALTQ3_COMPAT 210#ifdef ALTQ_KLD 211 altq_module_declref(ifq->altq_type); 212#endif 213#endif 214 215 ifq->altq_type = ALTQT_NONE; 216 ifq->altq_disc = NULL; 217 ifq->altq_enqueue = NULL; 218 ifq->altq_dequeue = NULL; 219 ifq->altq_request = NULL; 220 ifq->altq_clfier = NULL; 221 ifq->altq_classify = NULL; 222 ifq->altq_flags &= ALTQF_CANTCHANGE; 223 224 IFQ_UNLOCK(ifq); 225 return 0; 226} 227 228int 229altq_enable(ifq) 230 struct ifaltq *ifq; 231{ 232 int s; 233 234 IFQ_LOCK(ifq); 235 236 if (!ALTQ_IS_READY(ifq)) { 237 IFQ_UNLOCK(ifq); 238 return ENXIO; 239 } 240 if (ALTQ_IS_ENABLED(ifq)) { 241 IFQ_UNLOCK(ifq); 242 return 0; 243 } 244 245 s = splnet(); 246 IFQ_PURGE_NOLOCK(ifq); 247 ASSERT(ifq->ifq_len == 0); 248 ifq->ifq_drv_maxlen = 0; /* disable bulk dequeue */ 249 ifq->altq_flags |= ALTQF_ENABLED; 250 if (ifq->altq_clfier != NULL) 251 ifq->altq_flags |= ALTQF_CLASSIFY; 252 splx(s); 253 254 IFQ_UNLOCK(ifq); 255 return 0; 256} 257 258int 259altq_disable(ifq) 260 struct ifaltq *ifq; 261{ 262 int s; 263 264 IFQ_LOCK(ifq); 265 if (!ALTQ_IS_ENABLED(ifq)) { 266 IFQ_UNLOCK(ifq); 267 return 0; 268 } 269 270 s = splnet(); 271 IFQ_PURGE_NOLOCK(ifq); 272 ASSERT(ifq->ifq_len == 0); 273 ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY); 274 splx(s); 275 276 IFQ_UNLOCK(ifq); 277 return 0; 278} 279 280#ifdef ALTQ_DEBUG 281void 282altq_assert(file, line, failedexpr) 283 const char *file, *failedexpr; 284 int line; 285{ 286 (void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n", 287 failedexpr, file, line); 288 panic("altq assertion"); 289 /* NOTREACHED */ 290} 291#endif 292 293/* 294 * internal representation of token bucket parameters 295 * rate: byte_per_unittime << 32 296 * (((bits_per_sec) / 8) << 32) / machclk_freq 297 * depth: byte << 32 298 * 299 */ 300#define TBR_SHIFT 32 301#define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT) 302#define TBR_UNSCALE(x) ((x) >> TBR_SHIFT) 303 304static struct mbuf * 305tbr_dequeue(ifq, op) 306 struct ifaltq *ifq; 307 int op; 308{ 309 struct tb_regulator *tbr; 310 struct mbuf *m; 311 int64_t interval; 312 u_int64_t now; 313 314 IFQ_LOCK_ASSERT(ifq); 315 tbr = ifq->altq_tbr; 316 if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) { 317 /* if this is a remove after poll, bypass tbr check */ 318 } else { 319 /* update token only when it is negative */ 320 if (tbr->tbr_token <= 0) { 321 now = read_machclk(); 322 interval = now - tbr->tbr_last; 323 if (interval >= tbr->tbr_filluptime) 324 tbr->tbr_token = tbr->tbr_depth; 325 else { 326 tbr->tbr_token += interval * tbr->tbr_rate; 327 if (tbr->tbr_token > tbr->tbr_depth) 328 tbr->tbr_token = tbr->tbr_depth; 329 } 330 tbr->tbr_last = now; 331 } 332 /* if token is still negative, don't allow dequeue */ 333 if (tbr->tbr_token <= 0) 334 return (NULL); 335 } 336 337 if (ALTQ_IS_ENABLED(ifq)) 338 m = (*ifq->altq_dequeue)(ifq, op); 339 else { 340 if (op == ALTDQ_POLL) 341 _IF_POLL(ifq, m); 342 else 343 _IF_DEQUEUE(ifq, m); 344 } 345 346 if (m != NULL && op == ALTDQ_REMOVE) 347 tbr->tbr_token -= TBR_SCALE(m_pktlen(m)); 348 tbr->tbr_lastop = op; 349 return (m); 350} 351 352/* 353 * set a token bucket regulator. 354 * if the specified rate is zero, the token bucket regulator is deleted. 355 */ 356int 357tbr_set(ifq, profile) 358 struct ifaltq *ifq; 359 struct tb_profile *profile; 360{ 361 struct tb_regulator *tbr, *otbr; 362 363 if (tbr_dequeue_ptr == NULL) 364 tbr_dequeue_ptr = tbr_dequeue; 365 366 if (machclk_freq == 0) 367 init_machclk(); 368 if (machclk_freq == 0) { 369 printf("tbr_set: no cpu clock available!\n"); 370 return (ENXIO); 371 } 372 373 IFQ_LOCK(ifq); 374 if (profile->rate == 0) { 375 /* delete this tbr */ 376 if ((tbr = ifq->altq_tbr) == NULL) { 377 IFQ_UNLOCK(ifq); 378 return (ENOENT); 379 } 380 ifq->altq_tbr = NULL; 381 free(tbr, M_DEVBUF); 382 IFQ_UNLOCK(ifq); 383 return (0); 384 } 385 386 tbr = malloc(sizeof(struct tb_regulator), M_DEVBUF, M_NOWAIT | M_ZERO); 387 if (tbr == NULL) { 388 IFQ_UNLOCK(ifq); 389 return (ENOMEM); 390 } 391 392 tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq; 393 tbr->tbr_depth = TBR_SCALE(profile->depth); 394 if (tbr->tbr_rate > 0) 395 tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate; 396 else 397 tbr->tbr_filluptime = 0xffffffffffffffffLL; 398 tbr->tbr_token = tbr->tbr_depth; 399 tbr->tbr_last = read_machclk(); 400 tbr->tbr_lastop = ALTDQ_REMOVE; 401 402 otbr = ifq->altq_tbr; 403 ifq->altq_tbr = tbr; /* set the new tbr */ 404 405 if (otbr != NULL) 406 free(otbr, M_DEVBUF); 407 else { 408 if (tbr_timer == 0) { 409 CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0); 410 tbr_timer = 1; 411 } 412 } 413 IFQ_UNLOCK(ifq); 414 return (0); 415} 416 417/* 418 * tbr_timeout goes through the interface list, and kicks the drivers 419 * if necessary. 420 * 421 * MPSAFE 422 */ 423static void 424tbr_timeout(arg) 425 void *arg; 426{ 427 VNET_ITERATOR_DECL(vnet_iter); 428 struct ifnet *ifp; 429 int active, s; 430 431 active = 0; 432 s = splnet(); 433 IFNET_RLOCK_NOSLEEP(); 434 VNET_LIST_RLOCK_NOSLEEP(); 435 VNET_FOREACH(vnet_iter) { 436 CURVNET_SET(vnet_iter); 437 for (ifp = TAILQ_FIRST(&V_ifnet); ifp; 438 ifp = TAILQ_NEXT(ifp, if_list)) { 439 /* read from if_snd unlocked */ 440 if (!TBR_IS_ENABLED(&ifp->if_snd)) 441 continue; 442 active++; 443 if (!IFQ_IS_EMPTY(&ifp->if_snd) && 444 ifp->if_start != NULL) 445 (*ifp->if_start)(ifp); 446 } 447 CURVNET_RESTORE(); 448 } 449 VNET_LIST_RUNLOCK_NOSLEEP(); 450 IFNET_RUNLOCK_NOSLEEP(); 451 splx(s); 452 if (active > 0) 453 CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0); 454 else 455 tbr_timer = 0; /* don't need tbr_timer anymore */ 456} 457 458/* 459 * get token bucket regulator profile 460 */ 461int 462tbr_get(ifq, profile) 463 struct ifaltq *ifq; 464 struct tb_profile *profile; 465{ 466 struct tb_regulator *tbr; 467 468 IFQ_LOCK(ifq); 469 if ((tbr = ifq->altq_tbr) == NULL) { 470 profile->rate = 0; 471 profile->depth = 0; 472 } else { 473 profile->rate = 474 (u_int)TBR_UNSCALE(tbr->tbr_rate * 8 * machclk_freq); 475 profile->depth = (u_int)TBR_UNSCALE(tbr->tbr_depth); 476 } 477 IFQ_UNLOCK(ifq); 478 return (0); 479} 480 481/* 482 * attach a discipline to the interface. if one already exists, it is 483 * overridden. 484 * Locking is done in the discipline specific attach functions. Basically 485 * they call back to altq_attach which takes care of the attach and locking. 486 */ 487int 488altq_pfattach(struct pf_altq *a) 489{ 490 int error = 0; 491 492 switch (a->scheduler) { 493 case ALTQT_NONE: 494 break; 495#ifdef ALTQ_CBQ 496 case ALTQT_CBQ: 497 error = cbq_pfattach(a); 498 break; 499#endif 500#ifdef ALTQ_PRIQ 501 case ALTQT_PRIQ: 502 error = priq_pfattach(a); 503 break; 504#endif 505#ifdef ALTQ_HFSC 506 case ALTQT_HFSC: 507 error = hfsc_pfattach(a); 508 break; 509#endif 510#ifdef ALTQ_FAIRQ 511 case ALTQT_FAIRQ: 512 error = fairq_pfattach(a); 513 break; 514#endif 515#ifdef ALTQ_CODEL 516 case ALTQT_CODEL: 517 error = codel_pfattach(a); 518 break; 519#endif 520 default: 521 error = ENXIO; 522 } 523 524 return (error); 525} 526 527/* 528 * detach a discipline from the interface. 529 * it is possible that the discipline was already overridden by another 530 * discipline. 531 */ 532int 533altq_pfdetach(struct pf_altq *a) 534{ 535 struct ifnet *ifp; 536 int s, error = 0; 537 538 if ((ifp = ifunit(a->ifname)) == NULL) 539 return (EINVAL); 540 541 /* if this discipline is no longer referenced, just return */ 542 /* read unlocked from if_snd */ 543 if (a->altq_disc == NULL || a->altq_disc != ifp->if_snd.altq_disc) 544 return (0); 545 546 s = splnet(); 547 /* read unlocked from if_snd, _disable and _detach take care */ 548 if (ALTQ_IS_ENABLED(&ifp->if_snd)) 549 error = altq_disable(&ifp->if_snd); 550 if (error == 0) 551 error = altq_detach(&ifp->if_snd); 552 splx(s); 553 554 return (error); 555} 556 557/* 558 * add a discipline or a queue 559 * Locking is done in the discipline specific functions with regards to 560 * malloc with WAITOK, also it is not yet clear which lock to use. 561 */ 562int 563altq_add(struct pf_altq *a) 564{ 565 int error = 0; 566 567 if (a->qname[0] != 0) 568 return (altq_add_queue(a)); 569 570 if (machclk_freq == 0) 571 init_machclk(); 572 if (machclk_freq == 0) 573 panic("altq_add: no cpu clock"); 574 575 switch (a->scheduler) { 576#ifdef ALTQ_CBQ 577 case ALTQT_CBQ: 578 error = cbq_add_altq(a); 579 break; 580#endif 581#ifdef ALTQ_PRIQ 582 case ALTQT_PRIQ: 583 error = priq_add_altq(a); 584 break; 585#endif 586#ifdef ALTQ_HFSC 587 case ALTQT_HFSC: 588 error = hfsc_add_altq(a); 589 break; 590#endif 591#ifdef ALTQ_FAIRQ 592 case ALTQT_FAIRQ: 593 error = fairq_add_altq(a); 594 break; 595#endif 596#ifdef ALTQ_CODEL 597 case ALTQT_CODEL: 598 error = codel_add_altq(a); 599 break; 600#endif 601 default: 602 error = ENXIO; 603 } 604 605 return (error); 606} 607 608/* 609 * remove a discipline or a queue 610 * It is yet unclear what lock to use to protect this operation, the 611 * discipline specific functions will determine and grab it 612 */ 613int 614altq_remove(struct pf_altq *a) 615{ 616 int error = 0; 617 618 if (a->qname[0] != 0) 619 return (altq_remove_queue(a)); 620 621 switch (a->scheduler) { 622#ifdef ALTQ_CBQ 623 case ALTQT_CBQ: 624 error = cbq_remove_altq(a); 625 break; 626#endif 627#ifdef ALTQ_PRIQ 628 case ALTQT_PRIQ: 629 error = priq_remove_altq(a); 630 break; 631#endif 632#ifdef ALTQ_HFSC 633 case ALTQT_HFSC: 634 error = hfsc_remove_altq(a); 635 break; 636#endif 637#ifdef ALTQ_FAIRQ 638 case ALTQT_FAIRQ: 639 error = fairq_remove_altq(a); 640 break; 641#endif 642#ifdef ALTQ_CODEL 643 case ALTQT_CODEL: 644 error = codel_remove_altq(a); 645 break; 646#endif 647 default: 648 error = ENXIO; 649 } 650 651 return (error); 652} 653 654/* 655 * add a queue to the discipline 656 * It is yet unclear what lock to use to protect this operation, the 657 * discipline specific functions will determine and grab it 658 */ 659int 660altq_add_queue(struct pf_altq *a) 661{ 662 int error = 0; 663 664 switch (a->scheduler) { 665#ifdef ALTQ_CBQ 666 case ALTQT_CBQ: 667 error = cbq_add_queue(a); 668 break; 669#endif 670#ifdef ALTQ_PRIQ 671 case ALTQT_PRIQ: 672 error = priq_add_queue(a); 673 break; 674#endif 675#ifdef ALTQ_HFSC 676 case ALTQT_HFSC: 677 error = hfsc_add_queue(a); 678 break; 679#endif 680#ifdef ALTQ_FAIRQ 681 case ALTQT_FAIRQ: 682 error = fairq_add_queue(a); 683 break; 684#endif 685 default: 686 error = ENXIO; 687 } 688 689 return (error); 690} 691 692/* 693 * remove a queue from the discipline 694 * It is yet unclear what lock to use to protect this operation, the 695 * discipline specific functions will determine and grab it 696 */ 697int 698altq_remove_queue(struct pf_altq *a) 699{ 700 int error = 0; 701 702 switch (a->scheduler) { 703#ifdef ALTQ_CBQ 704 case ALTQT_CBQ: 705 error = cbq_remove_queue(a); 706 break; 707#endif 708#ifdef ALTQ_PRIQ 709 case ALTQT_PRIQ: 710 error = priq_remove_queue(a); 711 break; 712#endif 713#ifdef ALTQ_HFSC 714 case ALTQT_HFSC: 715 error = hfsc_remove_queue(a); 716 break; 717#endif 718#ifdef ALTQ_FAIRQ 719 case ALTQT_FAIRQ: 720 error = fairq_remove_queue(a); 721 break; 722#endif 723 default: 724 error = ENXIO; 725 } 726 727 return (error); 728} 729 730/* 731 * get queue statistics 732 * Locking is done in the discipline specific functions with regards to 733 * copyout operations, also it is not yet clear which lock to use. 734 */ 735int 736altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes) 737{ 738 int error = 0; 739 740 switch (a->scheduler) { 741#ifdef ALTQ_CBQ 742 case ALTQT_CBQ: 743 error = cbq_getqstats(a, ubuf, nbytes); 744 break; 745#endif 746#ifdef ALTQ_PRIQ 747 case ALTQT_PRIQ: 748 error = priq_getqstats(a, ubuf, nbytes); 749 break; 750#endif 751#ifdef ALTQ_HFSC 752 case ALTQT_HFSC: 753 error = hfsc_getqstats(a, ubuf, nbytes); 754 break; 755#endif 756#ifdef ALTQ_FAIRQ 757 case ALTQT_FAIRQ: 758 error = fairq_getqstats(a, ubuf, nbytes); 759 break; 760#endif 761#ifdef ALTQ_CODEL 762 case ALTQT_CODEL: 763 error = codel_getqstats(a, ubuf, nbytes); 764 break; 765#endif 766 default: 767 error = ENXIO; 768 } 769 770 return (error); 771} 772 773/* 774 * read and write diffserv field in IPv4 or IPv6 header 775 */ 776u_int8_t 777read_dsfield(m, pktattr) 778 struct mbuf *m; 779 struct altq_pktattr *pktattr; 780{ 781 struct mbuf *m0; 782 u_int8_t ds_field = 0; 783 784 if (pktattr == NULL || 785 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6)) 786 return ((u_int8_t)0); 787 788 /* verify that pattr_hdr is within the mbuf data */ 789 for (m0 = m; m0 != NULL; m0 = m0->m_next) 790 if ((pktattr->pattr_hdr >= m0->m_data) && 791 (pktattr->pattr_hdr < m0->m_data + m0->m_len)) 792 break; 793 if (m0 == NULL) { 794 /* ick, pattr_hdr is stale */ 795 pktattr->pattr_af = AF_UNSPEC; 796#ifdef ALTQ_DEBUG 797 printf("read_dsfield: can't locate header!\n"); 798#endif 799 return ((u_int8_t)0); 800 } 801 802 if (pktattr->pattr_af == AF_INET) { 803 struct ip *ip = (struct ip *)pktattr->pattr_hdr; 804 805 if (ip->ip_v != 4) 806 return ((u_int8_t)0); /* version mismatch! */ 807 ds_field = ip->ip_tos; 808 } 809#ifdef INET6 810 else if (pktattr->pattr_af == AF_INET6) { 811 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 812 u_int32_t flowlabel; 813 814 flowlabel = ntohl(ip6->ip6_flow); 815 if ((flowlabel >> 28) != 6) 816 return ((u_int8_t)0); /* version mismatch! */ 817 ds_field = (flowlabel >> 20) & 0xff; 818 } 819#endif 820 return (ds_field); 821} 822 823void 824write_dsfield(struct mbuf *m, struct altq_pktattr *pktattr, u_int8_t dsfield) 825{ 826 struct mbuf *m0; 827 828 if (pktattr == NULL || 829 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6)) 830 return; 831 832 /* verify that pattr_hdr is within the mbuf data */ 833 for (m0 = m; m0 != NULL; m0 = m0->m_next) 834 if ((pktattr->pattr_hdr >= m0->m_data) && 835 (pktattr->pattr_hdr < m0->m_data + m0->m_len)) 836 break; 837 if (m0 == NULL) { 838 /* ick, pattr_hdr is stale */ 839 pktattr->pattr_af = AF_UNSPEC; 840#ifdef ALTQ_DEBUG 841 printf("write_dsfield: can't locate header!\n"); 842#endif 843 return; 844 } 845 846 if (pktattr->pattr_af == AF_INET) { 847 struct ip *ip = (struct ip *)pktattr->pattr_hdr; 848 u_int8_t old; 849 int32_t sum; 850 851 if (ip->ip_v != 4) 852 return; /* version mismatch! */ 853 old = ip->ip_tos; 854 dsfield |= old & 3; /* leave CU bits */ 855 if (old == dsfield) 856 return; 857 ip->ip_tos = dsfield; 858 /* 859 * update checksum (from RFC1624) 860 * HC' = ~(~HC + ~m + m') 861 */ 862 sum = ~ntohs(ip->ip_sum) & 0xffff; 863 sum += 0xff00 + (~old & 0xff) + dsfield; 864 sum = (sum >> 16) + (sum & 0xffff); 865 sum += (sum >> 16); /* add carry */ 866 867 ip->ip_sum = htons(~sum & 0xffff); 868 } 869#ifdef INET6 870 else if (pktattr->pattr_af == AF_INET6) { 871 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 872 u_int32_t flowlabel; 873 874 flowlabel = ntohl(ip6->ip6_flow); 875 if ((flowlabel >> 28) != 6) 876 return; /* version mismatch! */ 877 flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20); 878 ip6->ip6_flow = htonl(flowlabel); 879 } 880#endif 881 return; 882} 883 884 885/* 886 * high resolution clock support taking advantage of a machine dependent 887 * high resolution time counter (e.g., timestamp counter of intel pentium). 888 * we assume 889 * - 64-bit-long monotonically-increasing counter 890 * - frequency range is 100M-4GHz (CPU speed) 891 */ 892/* if pcc is not available or disabled, emulate 256MHz using microtime() */ 893#define MACHCLK_SHIFT 8 894 895int machclk_usepcc; 896u_int32_t machclk_freq; 897u_int32_t machclk_per_tick; 898 899#if defined(__i386__) && defined(__NetBSD__) 900extern u_int64_t cpu_tsc_freq; 901#endif 902 903#if (__FreeBSD_version >= 700035) 904/* Update TSC freq with the value indicated by the caller. */ 905static void 906tsc_freq_changed(void *arg, const struct cf_level *level, int status) 907{ 908 /* If there was an error during the transition, don't do anything. */ 909 if (status != 0) 910 return; 911 912#if (__FreeBSD_version >= 701102) && (defined(__amd64__) || defined(__i386__)) 913 /* If TSC is P-state invariant, don't do anything. */ 914 if (tsc_is_invariant) 915 return; 916#endif 917 918 /* Total setting for this level gives the new frequency in MHz. */ 919 init_machclk(); 920} 921EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL, 922 EVENTHANDLER_PRI_LAST); 923#endif /* __FreeBSD_version >= 700035 */ 924 925static void 926init_machclk_setup(void) 927{ 928#if (__FreeBSD_version >= 600000) 929 callout_init(&tbr_callout, 0); 930#endif 931 932 machclk_usepcc = 1; 933 934#if (!defined(__amd64__) && !defined(__i386__)) || defined(ALTQ_NOPCC) 935 machclk_usepcc = 0; 936#endif 937#if defined(__FreeBSD__) && defined(SMP) 938 machclk_usepcc = 0; 939#endif 940#if defined(__NetBSD__) && defined(MULTIPROCESSOR) 941 machclk_usepcc = 0; 942#endif 943#if defined(__amd64__) || defined(__i386__) 944 /* check if TSC is available */ 945 if ((cpu_feature & CPUID_TSC) == 0 || 946 atomic_load_acq_64(&tsc_freq) == 0) 947 machclk_usepcc = 0; 948#endif 949} 950 951void 952init_machclk(void) 953{ 954 static int called; 955 956 /* Call one-time initialization function. */ 957 if (!called) { 958 init_machclk_setup(); 959 called = 1; 960 } 961 962 if (machclk_usepcc == 0) { 963 /* emulate 256MHz using microtime() */ 964 machclk_freq = 1000000 << MACHCLK_SHIFT; 965 machclk_per_tick = machclk_freq / hz; 966#ifdef ALTQ_DEBUG 967 printf("altq: emulate %uHz cpu clock\n", machclk_freq); 968#endif 969 return; 970 } 971 972 /* 973 * if the clock frequency (of Pentium TSC or Alpha PCC) is 974 * accessible, just use it. 975 */ 976#if defined(__amd64__) || defined(__i386__) 977 machclk_freq = atomic_load_acq_64(&tsc_freq); 978#endif 979 980 /* 981 * if we don't know the clock frequency, measure it. 982 */ 983 if (machclk_freq == 0) { 984 static int wait; 985 struct timeval tv_start, tv_end; 986 u_int64_t start, end, diff; 987 int timo; 988 989 microtime(&tv_start); 990 start = read_machclk(); 991 timo = hz; /* 1 sec */ 992 (void)tsleep(&wait, PWAIT | PCATCH, "init_machclk", timo); 993 microtime(&tv_end); 994 end = read_machclk(); 995 diff = (u_int64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000 996 + tv_end.tv_usec - tv_start.tv_usec; 997 if (diff != 0) 998 machclk_freq = (u_int)((end - start) * 1000000 / diff); 999 } 1000 1001 machclk_per_tick = machclk_freq / hz; 1002 1003#ifdef ALTQ_DEBUG 1004 printf("altq: CPU clock: %uHz\n", machclk_freq); 1005#endif 1006} 1007 1008#if defined(__OpenBSD__) && defined(__i386__) 1009static __inline u_int64_t 1010rdtsc(void) 1011{ 1012 u_int64_t rv; 1013 __asm __volatile(".byte 0x0f, 0x31" : "=A" (rv)); 1014 return (rv); 1015} 1016#endif /* __OpenBSD__ && __i386__ */ 1017 1018u_int64_t 1019read_machclk(void) 1020{ 1021 u_int64_t val; 1022 1023 if (machclk_usepcc) { 1024#if defined(__amd64__) || defined(__i386__) 1025 val = rdtsc(); 1026#else 1027 panic("read_machclk"); 1028#endif 1029 } else { 1030 struct timeval tv, boottime; 1031 1032 microtime(&tv); 1033 getboottime(&boottime); 1034 val = (((u_int64_t)(tv.tv_sec - boottime.tv_sec) * 1000000 1035 + tv.tv_usec) << MACHCLK_SHIFT); 1036 } 1037 return (val); 1038} 1039 1040#ifdef ALTQ3_CLFIER_COMPAT 1041 1042#ifndef IPPROTO_ESP 1043#define IPPROTO_ESP 50 /* encapsulating security payload */ 1044#endif 1045#ifndef IPPROTO_AH 1046#define IPPROTO_AH 51 /* authentication header */ 1047#endif 1048 1049/* 1050 * extract flow information from a given packet. 1051 * filt_mask shows flowinfo fields required. 1052 * we assume the ip header is in one mbuf, and addresses and ports are 1053 * in network byte order. 1054 */ 1055int 1056altq_extractflow(m, af, flow, filt_bmask) 1057 struct mbuf *m; 1058 int af; 1059 struct flowinfo *flow; 1060 u_int32_t filt_bmask; 1061{ 1062 1063 switch (af) { 1064 case PF_INET: { 1065 struct flowinfo_in *fin; 1066 struct ip *ip; 1067 1068 ip = mtod(m, struct ip *); 1069 1070 if (ip->ip_v != 4) 1071 break; 1072 1073 fin = (struct flowinfo_in *)flow; 1074 fin->fi_len = sizeof(struct flowinfo_in); 1075 fin->fi_family = AF_INET; 1076 1077 fin->fi_proto = ip->ip_p; 1078 fin->fi_tos = ip->ip_tos; 1079 1080 fin->fi_src.s_addr = ip->ip_src.s_addr; 1081 fin->fi_dst.s_addr = ip->ip_dst.s_addr; 1082 1083 if (filt_bmask & FIMB4_PORTS) 1084 /* if port info is required, extract port numbers */ 1085 extract_ports4(m, ip, fin); 1086 else { 1087 fin->fi_sport = 0; 1088 fin->fi_dport = 0; 1089 fin->fi_gpi = 0; 1090 } 1091 return (1); 1092 } 1093 1094#ifdef INET6 1095 case PF_INET6: { 1096 struct flowinfo_in6 *fin6; 1097 struct ip6_hdr *ip6; 1098 1099 ip6 = mtod(m, struct ip6_hdr *); 1100 /* should we check the ip version? */ 1101 1102 fin6 = (struct flowinfo_in6 *)flow; 1103 fin6->fi6_len = sizeof(struct flowinfo_in6); 1104 fin6->fi6_family = AF_INET6; 1105 1106 fin6->fi6_proto = ip6->ip6_nxt; 1107 fin6->fi6_tclass = (ntohl(ip6->ip6_flow) >> 20) & 0xff; 1108 1109 fin6->fi6_flowlabel = ip6->ip6_flow & htonl(0x000fffff); 1110 fin6->fi6_src = ip6->ip6_src; 1111 fin6->fi6_dst = ip6->ip6_dst; 1112 1113 if ((filt_bmask & FIMB6_PORTS) || 1114 ((filt_bmask & FIMB6_PROTO) 1115 && ip6->ip6_nxt > IPPROTO_IPV6)) 1116 /* 1117 * if port info is required, or proto is required 1118 * but there are option headers, extract port 1119 * and protocol numbers. 1120 */ 1121 extract_ports6(m, ip6, fin6); 1122 else { 1123 fin6->fi6_sport = 0; 1124 fin6->fi6_dport = 0; 1125 fin6->fi6_gpi = 0; 1126 } 1127 return (1); 1128 } 1129#endif /* INET6 */ 1130 1131 default: 1132 break; 1133 } 1134 1135 /* failed */ 1136 flow->fi_len = sizeof(struct flowinfo); 1137 flow->fi_family = AF_UNSPEC; 1138 return (0); 1139} 1140 1141/* 1142 * helper routine to extract port numbers 1143 */ 1144/* structure for ipsec and ipv6 option header template */ 1145struct _opt6 { 1146 u_int8_t opt6_nxt; /* next header */ 1147 u_int8_t opt6_hlen; /* header extension length */ 1148 u_int16_t _pad; 1149 u_int32_t ah_spi; /* security parameter index 1150 for authentication header */ 1151}; 1152 1153/* 1154 * extract port numbers from a ipv4 packet. 1155 */ 1156static int 1157extract_ports4(m, ip, fin) 1158 struct mbuf *m; 1159 struct ip *ip; 1160 struct flowinfo_in *fin; 1161{ 1162 struct mbuf *m0; 1163 u_short ip_off; 1164 u_int8_t proto; 1165 int off; 1166 1167 fin->fi_sport = 0; 1168 fin->fi_dport = 0; 1169 fin->fi_gpi = 0; 1170 1171 ip_off = ntohs(ip->ip_off); 1172 /* if it is a fragment, try cached fragment info */ 1173 if (ip_off & IP_OFFMASK) { 1174 ip4f_lookup(ip, fin); 1175 return (1); 1176 } 1177 1178 /* locate the mbuf containing the protocol header */ 1179 for (m0 = m; m0 != NULL; m0 = m0->m_next) 1180 if (((caddr_t)ip >= m0->m_data) && 1181 ((caddr_t)ip < m0->m_data + m0->m_len)) 1182 break; 1183 if (m0 == NULL) { 1184#ifdef ALTQ_DEBUG 1185 printf("extract_ports4: can't locate header! ip=%p\n", ip); 1186#endif 1187 return (0); 1188 } 1189 off = ((caddr_t)ip - m0->m_data) + (ip->ip_hl << 2); 1190 proto = ip->ip_p; 1191 1192#ifdef ALTQ_IPSEC 1193 again: 1194#endif 1195 while (off >= m0->m_len) { 1196 off -= m0->m_len; 1197 m0 = m0->m_next; 1198 if (m0 == NULL) 1199 return (0); /* bogus ip_hl! */ 1200 } 1201 if (m0->m_len < off + 4) 1202 return (0); 1203 1204 switch (proto) { 1205 case IPPROTO_TCP: 1206 case IPPROTO_UDP: { 1207 struct udphdr *udp; 1208 1209 udp = (struct udphdr *)(mtod(m0, caddr_t) + off); 1210 fin->fi_sport = udp->uh_sport; 1211 fin->fi_dport = udp->uh_dport; 1212 fin->fi_proto = proto; 1213 } 1214 break; 1215 1216#ifdef ALTQ_IPSEC 1217 case IPPROTO_ESP: 1218 if (fin->fi_gpi == 0){ 1219 u_int32_t *gpi; 1220 1221 gpi = (u_int32_t *)(mtod(m0, caddr_t) + off); 1222 fin->fi_gpi = *gpi; 1223 } 1224 fin->fi_proto = proto; 1225 break; 1226 1227 case IPPROTO_AH: { 1228 /* get next header and header length */ 1229 struct _opt6 *opt6; 1230 1231 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); 1232 proto = opt6->opt6_nxt; 1233 off += 8 + (opt6->opt6_hlen * 4); 1234 if (fin->fi_gpi == 0 && m0->m_len >= off + 8) 1235 fin->fi_gpi = opt6->ah_spi; 1236 } 1237 /* goto the next header */ 1238 goto again; 1239#endif /* ALTQ_IPSEC */ 1240 1241 default: 1242 fin->fi_proto = proto; 1243 return (0); 1244 } 1245 1246 /* if this is a first fragment, cache it. */ 1247 if (ip_off & IP_MF) 1248 ip4f_cache(ip, fin); 1249 1250 return (1); 1251} 1252 1253#ifdef INET6 1254static int 1255extract_ports6(m, ip6, fin6) 1256 struct mbuf *m; 1257 struct ip6_hdr *ip6; 1258 struct flowinfo_in6 *fin6; 1259{ 1260 struct mbuf *m0; 1261 int off; 1262 u_int8_t proto; 1263 1264 fin6->fi6_gpi = 0; 1265 fin6->fi6_sport = 0; 1266 fin6->fi6_dport = 0; 1267 1268 /* locate the mbuf containing the protocol header */ 1269 for (m0 = m; m0 != NULL; m0 = m0->m_next) 1270 if (((caddr_t)ip6 >= m0->m_data) && 1271 ((caddr_t)ip6 < m0->m_data + m0->m_len)) 1272 break; 1273 if (m0 == NULL) { 1274#ifdef ALTQ_DEBUG 1275 printf("extract_ports6: can't locate header! ip6=%p\n", ip6); 1276#endif 1277 return (0); 1278 } 1279 off = ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr); 1280 1281 proto = ip6->ip6_nxt; 1282 do { 1283 while (off >= m0->m_len) { 1284 off -= m0->m_len; 1285 m0 = m0->m_next; 1286 if (m0 == NULL) 1287 return (0); 1288 } 1289 if (m0->m_len < off + 4) 1290 return (0); 1291 1292 switch (proto) { 1293 case IPPROTO_TCP: 1294 case IPPROTO_UDP: { 1295 struct udphdr *udp; 1296 1297 udp = (struct udphdr *)(mtod(m0, caddr_t) + off); 1298 fin6->fi6_sport = udp->uh_sport; 1299 fin6->fi6_dport = udp->uh_dport; 1300 fin6->fi6_proto = proto; 1301 } 1302 return (1); 1303 1304 case IPPROTO_ESP: 1305 if (fin6->fi6_gpi == 0) { 1306 u_int32_t *gpi; 1307 1308 gpi = (u_int32_t *)(mtod(m0, caddr_t) + off); 1309 fin6->fi6_gpi = *gpi; 1310 } 1311 fin6->fi6_proto = proto; 1312 return (1); 1313 1314 case IPPROTO_AH: { 1315 /* get next header and header length */ 1316 struct _opt6 *opt6; 1317 1318 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); 1319 if (fin6->fi6_gpi == 0 && m0->m_len >= off + 8) 1320 fin6->fi6_gpi = opt6->ah_spi; 1321 proto = opt6->opt6_nxt; 1322 off += 8 + (opt6->opt6_hlen * 4); 1323 /* goto the next header */ 1324 break; 1325 } 1326 1327 case IPPROTO_HOPOPTS: 1328 case IPPROTO_ROUTING: 1329 case IPPROTO_DSTOPTS: { 1330 /* get next header and header length */ 1331 struct _opt6 *opt6; 1332 1333 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); 1334 proto = opt6->opt6_nxt; 1335 off += (opt6->opt6_hlen + 1) * 8; 1336 /* goto the next header */ 1337 break; 1338 } 1339 1340 case IPPROTO_FRAGMENT: 1341 /* ipv6 fragmentations are not supported yet */ 1342 default: 1343 fin6->fi6_proto = proto; 1344 return (0); 1345 } 1346 } while (1); 1347 /*NOTREACHED*/ 1348} 1349#endif /* INET6 */ 1350 1351/* 1352 * altq common classifier 1353 */ 1354int 1355acc_add_filter(classifier, filter, class, phandle) 1356 struct acc_classifier *classifier; 1357 struct flow_filter *filter; 1358 void *class; 1359 u_long *phandle; 1360{ 1361 struct acc_filter *afp, *prev, *tmp; 1362 int i, s; 1363 1364#ifdef INET6 1365 if (filter->ff_flow.fi_family != AF_INET && 1366 filter->ff_flow.fi_family != AF_INET6) 1367 return (EINVAL); 1368#else 1369 if (filter->ff_flow.fi_family != AF_INET) 1370 return (EINVAL); 1371#endif 1372 1373 afp = malloc(sizeof(struct acc_filter), 1374 M_DEVBUF, M_WAITOK); 1375 if (afp == NULL) 1376 return (ENOMEM); 1377 bzero(afp, sizeof(struct acc_filter)); 1378 1379 afp->f_filter = *filter; 1380 afp->f_class = class; 1381 1382 i = ACC_WILDCARD_INDEX; 1383 if (filter->ff_flow.fi_family == AF_INET) { 1384 struct flow_filter *filter4 = &afp->f_filter; 1385 1386 /* 1387 * if address is 0, it's a wildcard. if address mask 1388 * isn't set, use full mask. 1389 */ 1390 if (filter4->ff_flow.fi_dst.s_addr == 0) 1391 filter4->ff_mask.mask_dst.s_addr = 0; 1392 else if (filter4->ff_mask.mask_dst.s_addr == 0) 1393 filter4->ff_mask.mask_dst.s_addr = 0xffffffff; 1394 if (filter4->ff_flow.fi_src.s_addr == 0) 1395 filter4->ff_mask.mask_src.s_addr = 0; 1396 else if (filter4->ff_mask.mask_src.s_addr == 0) 1397 filter4->ff_mask.mask_src.s_addr = 0xffffffff; 1398 1399 /* clear extra bits in addresses */ 1400 filter4->ff_flow.fi_dst.s_addr &= 1401 filter4->ff_mask.mask_dst.s_addr; 1402 filter4->ff_flow.fi_src.s_addr &= 1403 filter4->ff_mask.mask_src.s_addr; 1404 1405 /* 1406 * if dst address is a wildcard, use hash-entry 1407 * ACC_WILDCARD_INDEX. 1408 */ 1409 if (filter4->ff_mask.mask_dst.s_addr != 0xffffffff) 1410 i = ACC_WILDCARD_INDEX; 1411 else 1412 i = ACC_GET_HASH_INDEX(filter4->ff_flow.fi_dst.s_addr); 1413 } 1414#ifdef INET6 1415 else if (filter->ff_flow.fi_family == AF_INET6) { 1416 struct flow_filter6 *filter6 = 1417 (struct flow_filter6 *)&afp->f_filter; 1418#ifndef IN6MASK0 /* taken from kame ipv6 */ 1419#define IN6MASK0 {{{ 0, 0, 0, 0 }}} 1420#define IN6MASK128 {{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}} 1421 const struct in6_addr in6mask0 = IN6MASK0; 1422 const struct in6_addr in6mask128 = IN6MASK128; 1423#endif 1424 1425 if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_dst)) 1426 filter6->ff_mask6.mask6_dst = in6mask0; 1427 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_dst)) 1428 filter6->ff_mask6.mask6_dst = in6mask128; 1429 if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_src)) 1430 filter6->ff_mask6.mask6_src = in6mask0; 1431 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_src)) 1432 filter6->ff_mask6.mask6_src = in6mask128; 1433 1434 /* clear extra bits in addresses */ 1435 for (i = 0; i < 16; i++) 1436 filter6->ff_flow6.fi6_dst.s6_addr[i] &= 1437 filter6->ff_mask6.mask6_dst.s6_addr[i]; 1438 for (i = 0; i < 16; i++) 1439 filter6->ff_flow6.fi6_src.s6_addr[i] &= 1440 filter6->ff_mask6.mask6_src.s6_addr[i]; 1441 1442 if (filter6->ff_flow6.fi6_flowlabel == 0) 1443 i = ACC_WILDCARD_INDEX; 1444 else 1445 i = ACC_GET_HASH_INDEX(filter6->ff_flow6.fi6_flowlabel); 1446 } 1447#endif /* INET6 */ 1448 1449 afp->f_handle = get_filt_handle(classifier, i); 1450 1451 /* update filter bitmask */ 1452 afp->f_fbmask = filt2fibmask(filter); 1453 classifier->acc_fbmask |= afp->f_fbmask; 1454 1455 /* 1456 * add this filter to the filter list. 1457 * filters are ordered from the highest rule number. 1458 */ 1459 s = splnet(); 1460 prev = NULL; 1461 LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) { 1462 if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno) 1463 prev = tmp; 1464 else 1465 break; 1466 } 1467 if (prev == NULL) 1468 LIST_INSERT_HEAD(&classifier->acc_filters[i], afp, f_chain); 1469 else 1470 LIST_INSERT_AFTER(prev, afp, f_chain); 1471 splx(s); 1472 1473 *phandle = afp->f_handle; 1474 return (0); 1475} 1476 1477int 1478acc_delete_filter(classifier, handle) 1479 struct acc_classifier *classifier; 1480 u_long handle; 1481{ 1482 struct acc_filter *afp; 1483 int s; 1484 1485 if ((afp = filth_to_filtp(classifier, handle)) == NULL) 1486 return (EINVAL); 1487 1488 s = splnet(); 1489 LIST_REMOVE(afp, f_chain); 1490 splx(s); 1491 1492 free(afp, M_DEVBUF); 1493 1494 /* todo: update filt_bmask */ 1495 1496 return (0); 1497} 1498 1499/* 1500 * delete filters referencing to the specified class. 1501 * if the all flag is not 0, delete all the filters. 1502 */ 1503int 1504acc_discard_filters(classifier, class, all) 1505 struct acc_classifier *classifier; 1506 void *class; 1507 int all; 1508{ 1509 struct acc_filter *afp; 1510 int i, s; 1511 1512 s = splnet(); 1513 for (i = 0; i < ACC_FILTER_TABLESIZE; i++) { 1514 do { 1515 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1516 if (all || afp->f_class == class) { 1517 LIST_REMOVE(afp, f_chain); 1518 free(afp, M_DEVBUF); 1519 /* start again from the head */ 1520 break; 1521 } 1522 } while (afp != NULL); 1523 } 1524 splx(s); 1525 1526 if (all) 1527 classifier->acc_fbmask = 0; 1528 1529 return (0); 1530} 1531 1532void * 1533acc_classify(clfier, m, af) 1534 void *clfier; 1535 struct mbuf *m; 1536 int af; 1537{ 1538 struct acc_classifier *classifier; 1539 struct flowinfo flow; 1540 struct acc_filter *afp; 1541 int i; 1542 1543 classifier = (struct acc_classifier *)clfier; 1544 altq_extractflow(m, af, &flow, classifier->acc_fbmask); 1545 1546 if (flow.fi_family == AF_INET) { 1547 struct flowinfo_in *fp = (struct flowinfo_in *)&flow; 1548 1549 if ((classifier->acc_fbmask & FIMB4_ALL) == FIMB4_TOS) { 1550 /* only tos is used */ 1551 LIST_FOREACH(afp, 1552 &classifier->acc_filters[ACC_WILDCARD_INDEX], 1553 f_chain) 1554 if (apply_tosfilter4(afp->f_fbmask, 1555 &afp->f_filter, fp)) 1556 /* filter matched */ 1557 return (afp->f_class); 1558 } else if ((classifier->acc_fbmask & 1559 (~(FIMB4_PROTO|FIMB4_SPORT|FIMB4_DPORT) & FIMB4_ALL)) 1560 == 0) { 1561 /* only proto and ports are used */ 1562 LIST_FOREACH(afp, 1563 &classifier->acc_filters[ACC_WILDCARD_INDEX], 1564 f_chain) 1565 if (apply_ppfilter4(afp->f_fbmask, 1566 &afp->f_filter, fp)) 1567 /* filter matched */ 1568 return (afp->f_class); 1569 } else { 1570 /* get the filter hash entry from its dest address */ 1571 i = ACC_GET_HASH_INDEX(fp->fi_dst.s_addr); 1572 do { 1573 /* 1574 * go through this loop twice. first for dst 1575 * hash, second for wildcards. 1576 */ 1577 LIST_FOREACH(afp, &classifier->acc_filters[i], 1578 f_chain) 1579 if (apply_filter4(afp->f_fbmask, 1580 &afp->f_filter, fp)) 1581 /* filter matched */ 1582 return (afp->f_class); 1583 1584 /* 1585 * check again for filters with a dst addr 1586 * wildcard. 1587 * (daddr == 0 || dmask != 0xffffffff). 1588 */ 1589 if (i != ACC_WILDCARD_INDEX) 1590 i = ACC_WILDCARD_INDEX; 1591 else 1592 break; 1593 } while (1); 1594 } 1595 } 1596#ifdef INET6 1597 else if (flow.fi_family == AF_INET6) { 1598 struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)&flow; 1599 1600 /* get the filter hash entry from its flow ID */ 1601 if (fp6->fi6_flowlabel != 0) 1602 i = ACC_GET_HASH_INDEX(fp6->fi6_flowlabel); 1603 else 1604 /* flowlable can be zero */ 1605 i = ACC_WILDCARD_INDEX; 1606 1607 /* go through this loop twice. first for flow hash, second 1608 for wildcards. */ 1609 do { 1610 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1611 if (apply_filter6(afp->f_fbmask, 1612 (struct flow_filter6 *)&afp->f_filter, 1613 fp6)) 1614 /* filter matched */ 1615 return (afp->f_class); 1616 1617 /* 1618 * check again for filters with a wildcard. 1619 */ 1620 if (i != ACC_WILDCARD_INDEX) 1621 i = ACC_WILDCARD_INDEX; 1622 else 1623 break; 1624 } while (1); 1625 } 1626#endif /* INET6 */ 1627 1628 /* no filter matched */ 1629 return (NULL); 1630} 1631 1632static int 1633apply_filter4(fbmask, filt, pkt) 1634 u_int32_t fbmask; 1635 struct flow_filter *filt; 1636 struct flowinfo_in *pkt; 1637{ 1638 if (filt->ff_flow.fi_family != AF_INET) 1639 return (0); 1640 if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport) 1641 return (0); 1642 if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport) 1643 return (0); 1644 if ((fbmask & FIMB4_DADDR) && 1645 filt->ff_flow.fi_dst.s_addr != 1646 (pkt->fi_dst.s_addr & filt->ff_mask.mask_dst.s_addr)) 1647 return (0); 1648 if ((fbmask & FIMB4_SADDR) && 1649 filt->ff_flow.fi_src.s_addr != 1650 (pkt->fi_src.s_addr & filt->ff_mask.mask_src.s_addr)) 1651 return (0); 1652 if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto) 1653 return (0); 1654 if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos != 1655 (pkt->fi_tos & filt->ff_mask.mask_tos)) 1656 return (0); 1657 if ((fbmask & FIMB4_GPI) && filt->ff_flow.fi_gpi != (pkt->fi_gpi)) 1658 return (0); 1659 /* match */ 1660 return (1); 1661} 1662 1663/* 1664 * filter matching function optimized for a common case that checks 1665 * only protocol and port numbers 1666 */ 1667static int 1668apply_ppfilter4(fbmask, filt, pkt) 1669 u_int32_t fbmask; 1670 struct flow_filter *filt; 1671 struct flowinfo_in *pkt; 1672{ 1673 if (filt->ff_flow.fi_family != AF_INET) 1674 return (0); 1675 if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport) 1676 return (0); 1677 if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport) 1678 return (0); 1679 if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto) 1680 return (0); 1681 /* match */ 1682 return (1); 1683} 1684 1685/* 1686 * filter matching function only for tos field. 1687 */ 1688static int 1689apply_tosfilter4(fbmask, filt, pkt) 1690 u_int32_t fbmask; 1691 struct flow_filter *filt; 1692 struct flowinfo_in *pkt; 1693{ 1694 if (filt->ff_flow.fi_family != AF_INET) 1695 return (0); 1696 if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos != 1697 (pkt->fi_tos & filt->ff_mask.mask_tos)) 1698 return (0); 1699 /* match */ 1700 return (1); 1701} 1702 1703#ifdef INET6 1704static int 1705apply_filter6(fbmask, filt, pkt) 1706 u_int32_t fbmask; 1707 struct flow_filter6 *filt; 1708 struct flowinfo_in6 *pkt; 1709{ 1710 int i; 1711 1712 if (filt->ff_flow6.fi6_family != AF_INET6) 1713 return (0); 1714 if ((fbmask & FIMB6_FLABEL) && 1715 filt->ff_flow6.fi6_flowlabel != pkt->fi6_flowlabel) 1716 return (0); 1717 if ((fbmask & FIMB6_PROTO) && 1718 filt->ff_flow6.fi6_proto != pkt->fi6_proto) 1719 return (0); 1720 if ((fbmask & FIMB6_SPORT) && 1721 filt->ff_flow6.fi6_sport != pkt->fi6_sport) 1722 return (0); 1723 if ((fbmask & FIMB6_DPORT) && 1724 filt->ff_flow6.fi6_dport != pkt->fi6_dport) 1725 return (0); 1726 if (fbmask & FIMB6_SADDR) { 1727 for (i = 0; i < 4; i++) 1728 if (filt->ff_flow6.fi6_src.s6_addr32[i] != 1729 (pkt->fi6_src.s6_addr32[i] & 1730 filt->ff_mask6.mask6_src.s6_addr32[i])) 1731 return (0); 1732 } 1733 if (fbmask & FIMB6_DADDR) { 1734 for (i = 0; i < 4; i++) 1735 if (filt->ff_flow6.fi6_dst.s6_addr32[i] != 1736 (pkt->fi6_dst.s6_addr32[i] & 1737 filt->ff_mask6.mask6_dst.s6_addr32[i])) 1738 return (0); 1739 } 1740 if ((fbmask & FIMB6_TCLASS) && 1741 filt->ff_flow6.fi6_tclass != 1742 (pkt->fi6_tclass & filt->ff_mask6.mask6_tclass)) 1743 return (0); 1744 if ((fbmask & FIMB6_GPI) && 1745 filt->ff_flow6.fi6_gpi != pkt->fi6_gpi) 1746 return (0); 1747 /* match */ 1748 return (1); 1749} 1750#endif /* INET6 */ 1751 1752/* 1753 * filter handle: 1754 * bit 20-28: index to the filter hash table 1755 * bit 0-19: unique id in the hash bucket. 1756 */ 1757static u_long 1758get_filt_handle(classifier, i) 1759 struct acc_classifier *classifier; 1760 int i; 1761{ 1762 static u_long handle_number = 1; 1763 u_long handle; 1764 struct acc_filter *afp; 1765 1766 while (1) { 1767 handle = handle_number++ & 0x000fffff; 1768 1769 if (LIST_EMPTY(&classifier->acc_filters[i])) 1770 break; 1771 1772 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1773 if ((afp->f_handle & 0x000fffff) == handle) 1774 break; 1775 if (afp == NULL) 1776 break; 1777 /* this handle is already used, try again */ 1778 } 1779 1780 return ((i << 20) | handle); 1781} 1782 1783/* convert filter handle to filter pointer */ 1784static struct acc_filter * 1785filth_to_filtp(classifier, handle) 1786 struct acc_classifier *classifier; 1787 u_long handle; 1788{ 1789 struct acc_filter *afp; 1790 int i; 1791 1792 i = ACC_GET_HINDEX(handle); 1793 1794 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1795 if (afp->f_handle == handle) 1796 return (afp); 1797 1798 return (NULL); 1799} 1800 1801/* create flowinfo bitmask */ 1802static u_int32_t 1803filt2fibmask(filt) 1804 struct flow_filter *filt; 1805{ 1806 u_int32_t mask = 0; 1807#ifdef INET6 1808 struct flow_filter6 *filt6; 1809#endif 1810 1811 switch (filt->ff_flow.fi_family) { 1812 case AF_INET: 1813 if (filt->ff_flow.fi_proto != 0) 1814 mask |= FIMB4_PROTO; 1815 if (filt->ff_flow.fi_tos != 0) 1816 mask |= FIMB4_TOS; 1817 if (filt->ff_flow.fi_dst.s_addr != 0) 1818 mask |= FIMB4_DADDR; 1819 if (filt->ff_flow.fi_src.s_addr != 0) 1820 mask |= FIMB4_SADDR; 1821 if (filt->ff_flow.fi_sport != 0) 1822 mask |= FIMB4_SPORT; 1823 if (filt->ff_flow.fi_dport != 0) 1824 mask |= FIMB4_DPORT; 1825 if (filt->ff_flow.fi_gpi != 0) 1826 mask |= FIMB4_GPI; 1827 break; 1828#ifdef INET6 1829 case AF_INET6: 1830 filt6 = (struct flow_filter6 *)filt; 1831 1832 if (filt6->ff_flow6.fi6_proto != 0) 1833 mask |= FIMB6_PROTO; 1834 if (filt6->ff_flow6.fi6_tclass != 0) 1835 mask |= FIMB6_TCLASS; 1836 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_dst)) 1837 mask |= FIMB6_DADDR; 1838 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_src)) 1839 mask |= FIMB6_SADDR; 1840 if (filt6->ff_flow6.fi6_sport != 0) 1841 mask |= FIMB6_SPORT; 1842 if (filt6->ff_flow6.fi6_dport != 0) 1843 mask |= FIMB6_DPORT; 1844 if (filt6->ff_flow6.fi6_gpi != 0) 1845 mask |= FIMB6_GPI; 1846 if (filt6->ff_flow6.fi6_flowlabel != 0) 1847 mask |= FIMB6_FLABEL; 1848 break; 1849#endif /* INET6 */ 1850 } 1851 return (mask); 1852} 1853 1854 1855/* 1856 * helper functions to handle IPv4 fragments. 1857 * currently only in-sequence fragments are handled. 1858 * - fragment info is cached in a LRU list. 1859 * - when a first fragment is found, cache its flow info. 1860 * - when a non-first fragment is found, lookup the cache. 1861 */ 1862 1863struct ip4_frag { 1864 TAILQ_ENTRY(ip4_frag) ip4f_chain; 1865 char ip4f_valid; 1866 u_short ip4f_id; 1867 struct flowinfo_in ip4f_info; 1868}; 1869 1870static TAILQ_HEAD(ip4f_list, ip4_frag) ip4f_list; /* IPv4 fragment cache */ 1871 1872#define IP4F_TABSIZE 16 /* IPv4 fragment cache size */ 1873 1874 1875static void 1876ip4f_cache(ip, fin) 1877 struct ip *ip; 1878 struct flowinfo_in *fin; 1879{ 1880 struct ip4_frag *fp; 1881 1882 if (TAILQ_EMPTY(&ip4f_list)) { 1883 /* first time call, allocate fragment cache entries. */ 1884 if (ip4f_init() < 0) 1885 /* allocation failed! */ 1886 return; 1887 } 1888 1889 fp = ip4f_alloc(); 1890 fp->ip4f_id = ip->ip_id; 1891 fp->ip4f_info.fi_proto = ip->ip_p; 1892 fp->ip4f_info.fi_src.s_addr = ip->ip_src.s_addr; 1893 fp->ip4f_info.fi_dst.s_addr = ip->ip_dst.s_addr; 1894 1895 /* save port numbers */ 1896 fp->ip4f_info.fi_sport = fin->fi_sport; 1897 fp->ip4f_info.fi_dport = fin->fi_dport; 1898 fp->ip4f_info.fi_gpi = fin->fi_gpi; 1899} 1900 1901static int 1902ip4f_lookup(ip, fin) 1903 struct ip *ip; 1904 struct flowinfo_in *fin; 1905{ 1906 struct ip4_frag *fp; 1907 1908 for (fp = TAILQ_FIRST(&ip4f_list); fp != NULL && fp->ip4f_valid; 1909 fp = TAILQ_NEXT(fp, ip4f_chain)) 1910 if (ip->ip_id == fp->ip4f_id && 1911 ip->ip_src.s_addr == fp->ip4f_info.fi_src.s_addr && 1912 ip->ip_dst.s_addr == fp->ip4f_info.fi_dst.s_addr && 1913 ip->ip_p == fp->ip4f_info.fi_proto) { 1914 1915 /* found the matching entry */ 1916 fin->fi_sport = fp->ip4f_info.fi_sport; 1917 fin->fi_dport = fp->ip4f_info.fi_dport; 1918 fin->fi_gpi = fp->ip4f_info.fi_gpi; 1919 1920 if ((ntohs(ip->ip_off) & IP_MF) == 0) 1921 /* this is the last fragment, 1922 release the entry. */ 1923 ip4f_free(fp); 1924 1925 return (1); 1926 } 1927 1928 /* no matching entry found */ 1929 return (0); 1930} 1931 1932static int 1933ip4f_init(void) 1934{ 1935 struct ip4_frag *fp; 1936 int i; 1937 1938 TAILQ_INIT(&ip4f_list); 1939 for (i=0; i<IP4F_TABSIZE; i++) { 1940 fp = malloc(sizeof(struct ip4_frag), 1941 M_DEVBUF, M_NOWAIT); 1942 if (fp == NULL) { 1943 printf("ip4f_init: can't alloc %dth entry!\n", i); 1944 if (i == 0) 1945 return (-1); 1946 return (0); 1947 } 1948 fp->ip4f_valid = 0; 1949 TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain); 1950 } 1951 return (0); 1952} 1953 1954static struct ip4_frag * 1955ip4f_alloc(void) 1956{ 1957 struct ip4_frag *fp; 1958 1959 /* reclaim an entry at the tail, put it at the head */ 1960 fp = TAILQ_LAST(&ip4f_list, ip4f_list); 1961 TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain); 1962 fp->ip4f_valid = 1; 1963 TAILQ_INSERT_HEAD(&ip4f_list, fp, ip4f_chain); 1964 return (fp); 1965} 1966 1967static void 1968ip4f_free(fp) 1969 struct ip4_frag *fp; 1970{ 1971 TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain); 1972 fp->ip4f_valid = 0; 1973 TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain); 1974} 1975 1976#endif /* ALTQ3_CLFIER_COMPAT */ 1977