1/* $FreeBSD$ */ 2/* $KAME: altq_red.c,v 1.18 2003/09/05 22:40:36 itojun Exp $ */ 3 4/* 5 * Copyright (C) 1997-2003 6 * Sony Computer Science Laboratories Inc. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 */ 30/* 31 * Copyright (c) 1990-1994 Regents of the University of California. 32 * All rights reserved. 33 * 34 * Redistribution and use in source and binary forms, with or without 35 * modification, are permitted provided that the following conditions 36 * are met: 37 * 1. Redistributions of source code must retain the above copyright 38 * notice, this list of conditions and the following disclaimer. 39 * 2. Redistributions in binary form must reproduce the above copyright 40 * notice, this list of conditions and the following disclaimer in the 41 * documentation and/or other materials provided with the distribution. 42 * 3. All advertising materials mentioning features or use of this software 43 * must display the following acknowledgement: 44 * This product includes software developed by the Computer Systems 45 * Engineering Group at Lawrence Berkeley Laboratory. 46 * 4. Neither the name of the University nor of the Laboratory may be used 47 * to endorse or promote products derived from this software without 48 * specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 */ 62 63#if defined(__FreeBSD__) || defined(__NetBSD__) 64#include "opt_altq.h" 65#include "opt_inet.h" 66#ifdef __FreeBSD__ 67#include "opt_inet6.h" 68#endif 69#endif /* __FreeBSD__ || __NetBSD__ */ 70#ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */ 71 72#include <sys/param.h> 73#include <sys/malloc.h> 74#include <sys/mbuf.h> 75#include <sys/socket.h> 76#include <sys/systm.h> 77#include <sys/errno.h> 78#if 1 /* ALTQ3_COMPAT */ 79#include <sys/sockio.h> 80#include <sys/proc.h> 81#include <sys/kernel.h> 82#ifdef ALTQ_FLOWVALVE 83#include <sys/queue.h> 84#include <sys/time.h> 85#endif 86#endif /* ALTQ3_COMPAT */ 87 88#include <net/if.h> 89 90#include <netinet/in.h> 91#include <netinet/in_systm.h> 92#include <netinet/ip.h> 93#ifdef INET6 94#include <netinet/ip6.h> 95#endif 96 97#include <net/pfvar.h> 98#include <altq/altq.h> 99#include <altq/altq_red.h> 100#ifdef ALTQ3_COMPAT 101#include <altq/altq_conf.h> 102#ifdef ALTQ_FLOWVALVE 103#include <altq/altq_flowvalve.h> 104#endif 105#endif 106 107/* 108 * ALTQ/RED (Random Early Detection) implementation using 32-bit 109 * fixed-point calculation. 110 * 111 * written by kjc using the ns code as a reference. 112 * you can learn more about red and ns from Sally's home page at 113 * http://www-nrg.ee.lbl.gov/floyd/ 114 * 115 * most of the red parameter values are fixed in this implementation 116 * to prevent fixed-point overflow/underflow. 117 * if you change the parameters, watch out for overflow/underflow! 118 * 119 * the parameters used are recommended values by Sally. 120 * the corresponding ns config looks: 121 * q_weight=0.00195 122 * minthresh=5 maxthresh=15 queue-size=60 123 * linterm=30 124 * dropmech=drop-tail 125 * bytes=false (can't be handled by 32-bit fixed-point) 126 * doubleq=false dqthresh=false 127 * wait=true 128 */ 129/* 130 * alternative red parameters for a slow link. 131 * 132 * assume the queue length becomes from zero to L and keeps L, it takes 133 * N packets for q_avg to reach 63% of L. 134 * when q_weight is 0.002, N is about 500 packets. 135 * for a slow link like dial-up, 500 packets takes more than 1 minute! 136 * when q_weight is 0.008, N is about 127 packets. 137 * when q_weight is 0.016, N is about 63 packets. 138 * bursts of 50 packets are allowed for 0.002, bursts of 25 packets 139 * are allowed for 0.016. 140 * see Sally's paper for more details. 141 */ 142/* normal red parameters */ 143#define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */ 144 /* q_weight = 0.00195 */ 145 146/* red parameters for a slow link */ 147#define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */ 148 /* q_weight = 0.0078125 */ 149 150/* red parameters for a very slow link (e.g., dialup) */ 151#define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */ 152 /* q_weight = 0.015625 */ 153 154/* fixed-point uses 12-bit decimal places */ 155#define FP_SHIFT 12 /* fixed-point shift */ 156 157/* red parameters for drop probability */ 158#define INV_P_MAX 10 /* inverse of max drop probability */ 159#define TH_MIN 5 /* min threshold */ 160#define TH_MAX 15 /* max threshold */ 161 162#define RED_LIMIT 60 /* default max queue lenght */ 163#define RED_STATS /* collect statistics */ 164 165/* 166 * our default policy for forced-drop is drop-tail. 167 * (in altq-1.1.2 or earlier, the default was random-drop. 168 * but it makes more sense to punish the cause of the surge.) 169 * to switch to the random-drop policy, define "RED_RANDOM_DROP". 170 */ 171 172#ifdef ALTQ3_COMPAT 173#ifdef ALTQ_FLOWVALVE 174/* 175 * flow-valve is an extention to protect red from unresponsive flows 176 * and to promote end-to-end congestion control. 177 * flow-valve observes the average drop rates of the flows that have 178 * experienced packet drops in the recent past. 179 * when the average drop rate exceeds the threshold, the flow is 180 * blocked by the flow-valve. the trapped flow should back off 181 * exponentially to escape from the flow-valve. 182 */ 183#ifdef RED_RANDOM_DROP 184#error "random-drop can't be used with flow-valve!" 185#endif 186#endif /* ALTQ_FLOWVALVE */ 187 188/* red_list keeps all red_queue_t's allocated. */ 189static red_queue_t *red_list = NULL; 190 191#endif /* ALTQ3_COMPAT */ 192 193/* default red parameter values */ 194static int default_th_min = TH_MIN; 195static int default_th_max = TH_MAX; 196static int default_inv_pmax = INV_P_MAX; 197 198#ifdef ALTQ3_COMPAT 199/* internal function prototypes */ 200static int red_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); 201static struct mbuf *red_dequeue(struct ifaltq *, int); 202static int red_request(struct ifaltq *, int, void *); 203static void red_purgeq(red_queue_t *); 204static int red_detach(red_queue_t *); 205#ifdef ALTQ_FLOWVALVE 206static __inline struct fve *flowlist_lookup(struct flowvalve *, 207 struct altq_pktattr *, struct timeval *); 208static __inline struct fve *flowlist_reclaim(struct flowvalve *, 209 struct altq_pktattr *); 210static __inline void flowlist_move_to_head(struct flowvalve *, struct fve *); 211static __inline int fv_p2f(struct flowvalve *, int); 212#if 0 /* XXX: make the compiler happy (fv_alloc unused) */ 213static struct flowvalve *fv_alloc(struct red *); 214#endif 215static void fv_destroy(struct flowvalve *); 216static int fv_checkflow(struct flowvalve *, struct altq_pktattr *, 217 struct fve **); 218static void fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *, 219 struct fve *); 220#endif 221#endif /* ALTQ3_COMPAT */ 222 223/* 224 * red support routines 225 */ 226red_t * 227red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags, 228 int pkttime) 229{ 230 red_t *rp; 231 int w, i; 232 int npkts_per_sec; 233 234 rp = malloc(sizeof(red_t), M_DEVBUF, M_NOWAIT | M_ZERO); 235 if (rp == NULL) 236 return (NULL); 237 238 if (weight == 0) 239 rp->red_weight = W_WEIGHT; 240 else 241 rp->red_weight = weight; 242 243 /* allocate weight table */ 244 rp->red_wtab = wtab_alloc(rp->red_weight); 245 if (rp->red_wtab == NULL) { 246 free(rp, M_DEVBUF); 247 return (NULL); 248 } 249 250 rp->red_avg = 0; 251 rp->red_idle = 1; 252 253 if (inv_pmax == 0) 254 rp->red_inv_pmax = default_inv_pmax; 255 else 256 rp->red_inv_pmax = inv_pmax; 257 if (th_min == 0) 258 rp->red_thmin = default_th_min; 259 else 260 rp->red_thmin = th_min; 261 if (th_max == 0) 262 rp->red_thmax = default_th_max; 263 else 264 rp->red_thmax = th_max; 265 266 rp->red_flags = flags; 267 268 if (pkttime == 0) 269 /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */ 270 rp->red_pkttime = 800; 271 else 272 rp->red_pkttime = pkttime; 273 274 if (weight == 0) { 275 /* when the link is very slow, adjust red parameters */ 276 npkts_per_sec = 1000000 / rp->red_pkttime; 277 if (npkts_per_sec < 50) { 278 /* up to about 400Kbps */ 279 rp->red_weight = W_WEIGHT_2; 280 } else if (npkts_per_sec < 300) { 281 /* up to about 2.4Mbps */ 282 rp->red_weight = W_WEIGHT_1; 283 } 284 } 285 286 /* calculate wshift. weight must be power of 2 */ 287 w = rp->red_weight; 288 for (i = 0; w > 1; i++) 289 w = w >> 1; 290 rp->red_wshift = i; 291 w = 1 << rp->red_wshift; 292 if (w != rp->red_weight) { 293 printf("invalid weight value %d for red! use %d\n", 294 rp->red_weight, w); 295 rp->red_weight = w; 296 } 297 298 /* 299 * thmin_s and thmax_s are scaled versions of th_min and th_max 300 * to be compared with avg. 301 */ 302 rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT); 303 rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT); 304 305 /* 306 * precompute probability denominator 307 * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point 308 */ 309 rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin) 310 * rp->red_inv_pmax) << FP_SHIFT; 311 312 microtime(&rp->red_last); 313 return (rp); 314} 315 316void 317red_destroy(red_t *rp) 318{ 319#ifdef ALTQ3_COMPAT 320#ifdef ALTQ_FLOWVALVE 321 if (rp->red_flowvalve != NULL) 322 fv_destroy(rp->red_flowvalve); 323#endif 324#endif /* ALTQ3_COMPAT */ 325 wtab_destroy(rp->red_wtab); 326 free(rp, M_DEVBUF); 327} 328 329void 330red_getstats(red_t *rp, struct redstats *sp) 331{ 332 sp->q_avg = rp->red_avg >> rp->red_wshift; 333 sp->xmit_cnt = rp->red_stats.xmit_cnt; 334 sp->drop_cnt = rp->red_stats.drop_cnt; 335 sp->drop_forced = rp->red_stats.drop_forced; 336 sp->drop_unforced = rp->red_stats.drop_unforced; 337 sp->marked_packets = rp->red_stats.marked_packets; 338} 339 340int 341red_addq(red_t *rp, class_queue_t *q, struct mbuf *m, 342 struct altq_pktattr *pktattr) 343{ 344 int avg, droptype; 345 int n; 346#ifdef ALTQ3_COMPAT 347#ifdef ALTQ_FLOWVALVE 348 struct fve *fve = NULL; 349 350 if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0) 351 if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) { 352 m_freem(m); 353 return (-1); 354 } 355#endif 356#endif /* ALTQ3_COMPAT */ 357 358 avg = rp->red_avg; 359 360 /* 361 * if we were idle, we pretend that n packets arrived during 362 * the idle period. 363 */ 364 if (rp->red_idle) { 365 struct timeval now; 366 int t; 367 368 rp->red_idle = 0; 369 microtime(&now); 370 t = (now.tv_sec - rp->red_last.tv_sec); 371 if (t > 60) { 372 /* 373 * being idle for more than 1 minute, set avg to zero. 374 * this prevents t from overflow. 375 */ 376 avg = 0; 377 } else { 378 t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec); 379 n = t / rp->red_pkttime - 1; 380 381 /* the following line does (avg = (1 - Wq)^n * avg) */ 382 if (n > 0) 383 avg = (avg >> FP_SHIFT) * 384 pow_w(rp->red_wtab, n); 385 } 386 } 387 388 /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */ 389 avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift); 390 rp->red_avg = avg; /* save the new value */ 391 392 /* 393 * red_count keeps a tally of arriving traffic that has not 394 * been dropped. 395 */ 396 rp->red_count++; 397 398 /* see if we drop early */ 399 droptype = DTYPE_NODROP; 400 if (avg >= rp->red_thmin_s && qlen(q) > 1) { 401 if (avg >= rp->red_thmax_s) { 402 /* avg >= th_max: forced drop */ 403 droptype = DTYPE_FORCED; 404 } else if (rp->red_old == 0) { 405 /* first exceeds th_min */ 406 rp->red_count = 1; 407 rp->red_old = 1; 408 } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift, 409 rp->red_probd, rp->red_count)) { 410 /* mark or drop by red */ 411 if ((rp->red_flags & REDF_ECN) && 412 mark_ecn(m, pktattr, rp->red_flags)) { 413 /* successfully marked. do not drop. */ 414 rp->red_count = 0; 415#ifdef RED_STATS 416 rp->red_stats.marked_packets++; 417#endif 418 } else { 419 /* unforced drop by red */ 420 droptype = DTYPE_EARLY; 421 } 422 } 423 } else { 424 /* avg < th_min */ 425 rp->red_old = 0; 426 } 427 428 /* 429 * if the queue length hits the hard limit, it's a forced drop. 430 */ 431 if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q)) 432 droptype = DTYPE_FORCED; 433 434#ifdef RED_RANDOM_DROP 435 /* if successful or forced drop, enqueue this packet. */ 436 if (droptype != DTYPE_EARLY) 437 _addq(q, m); 438#else 439 /* if successful, enqueue this packet. */ 440 if (droptype == DTYPE_NODROP) 441 _addq(q, m); 442#endif 443 if (droptype != DTYPE_NODROP) { 444 if (droptype == DTYPE_EARLY) { 445 /* drop the incoming packet */ 446#ifdef RED_STATS 447 rp->red_stats.drop_unforced++; 448#endif 449 } else { 450 /* forced drop, select a victim packet in the queue. */ 451#ifdef RED_RANDOM_DROP 452 m = _getq_random(q); 453#endif 454#ifdef RED_STATS 455 rp->red_stats.drop_forced++; 456#endif 457 } 458#ifdef RED_STATS 459 PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m)); 460#endif 461 rp->red_count = 0; 462#ifdef ALTQ3_COMPAT 463#ifdef ALTQ_FLOWVALVE 464 if (rp->red_flowvalve != NULL) 465 fv_dropbyred(rp->red_flowvalve, pktattr, fve); 466#endif 467#endif /* ALTQ3_COMPAT */ 468 m_freem(m); 469 return (-1); 470 } 471 /* successfully queued */ 472#ifdef RED_STATS 473 PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m)); 474#endif 475 return (0); 476} 477 478/* 479 * early-drop probability is calculated as follows: 480 * prob = p_max * (avg - th_min) / (th_max - th_min) 481 * prob_a = prob / (2 - count*prob) 482 * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min)) 483 * here prob_a increases as successive undrop count increases. 484 * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)), 485 * becomes 1 when (count >= (2 / prob))). 486 */ 487int 488drop_early(int fp_len, int fp_probd, int count) 489{ 490 int d; /* denominator of drop-probability */ 491 492 d = fp_probd - count * fp_len; 493 if (d <= 0) 494 /* count exceeds the hard limit: drop or mark */ 495 return (1); 496 497 /* 498 * now the range of d is [1..600] in fixed-point. (when 499 * th_max-th_min=10 and p_max=1/30) 500 * drop probability = (avg - TH_MIN) / d 501 */ 502 503 if ((arc4random() % d) < fp_len) { 504 /* drop or mark */ 505 return (1); 506 } 507 /* no drop/mark */ 508 return (0); 509} 510 511/* 512 * try to mark CE bit to the packet. 513 * returns 1 if successfully marked, 0 otherwise. 514 */ 515int 516mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags) 517{ 518 struct mbuf *m0; 519 struct pf_mtag *at; 520 void *hdr; 521 522 at = pf_find_mtag(m); 523 if (at != NULL) { 524 hdr = at->hdr; 525#ifdef ALTQ3_COMPAT 526 } else if (pktattr != NULL) { 527 af = pktattr->pattr_af; 528 hdr = pktattr->pattr_hdr; 529#endif /* ALTQ3_COMPAT */ 530 } else 531 return (0); 532 533 /* verify that pattr_hdr is within the mbuf data */ 534 for (m0 = m; m0 != NULL; m0 = m0->m_next) 535 if (((caddr_t)hdr >= m0->m_data) && 536 ((caddr_t)hdr < m0->m_data + m0->m_len)) 537 break; 538 if (m0 == NULL) { 539 /* ick, tag info is stale */ 540 return (0); 541 } 542 543 switch (((struct ip *)hdr)->ip_v) { 544 case IPVERSION: 545 if (flags & REDF_ECN4) { 546 struct ip *ip = hdr; 547 u_int8_t otos; 548 int sum; 549 550 if (ip->ip_v != 4) 551 return (0); /* version mismatch! */ 552 553 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT) 554 return (0); /* not-ECT */ 555 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE) 556 return (1); /* already marked */ 557 558 /* 559 * ecn-capable but not marked, 560 * mark CE and update checksum 561 */ 562 otos = ip->ip_tos; 563 ip->ip_tos |= IPTOS_ECN_CE; 564 /* 565 * update checksum (from RFC1624) 566 * HC' = ~(~HC + ~m + m') 567 */ 568 sum = ~ntohs(ip->ip_sum) & 0xffff; 569 sum += (~otos & 0xffff) + ip->ip_tos; 570 sum = (sum >> 16) + (sum & 0xffff); 571 sum += (sum >> 16); /* add carry */ 572 ip->ip_sum = htons(~sum & 0xffff); 573 return (1); 574 } 575 break; 576#ifdef INET6 577 case (IPV6_VERSION >> 4): 578 if (flags & REDF_ECN6) { 579 struct ip6_hdr *ip6 = hdr; 580 u_int32_t flowlabel; 581 582 flowlabel = ntohl(ip6->ip6_flow); 583 if ((flowlabel >> 28) != 6) 584 return (0); /* version mismatch! */ 585 if ((flowlabel & (IPTOS_ECN_MASK << 20)) == 586 (IPTOS_ECN_NOTECT << 20)) 587 return (0); /* not-ECT */ 588 if ((flowlabel & (IPTOS_ECN_MASK << 20)) == 589 (IPTOS_ECN_CE << 20)) 590 return (1); /* already marked */ 591 /* 592 * ecn-capable but not marked, mark CE 593 */ 594 flowlabel |= (IPTOS_ECN_CE << 20); 595 ip6->ip6_flow = htonl(flowlabel); 596 return (1); 597 } 598 break; 599#endif /* INET6 */ 600 } 601 602 /* not marked */ 603 return (0); 604} 605 606struct mbuf * 607red_getq(rp, q) 608 red_t *rp; 609 class_queue_t *q; 610{ 611 struct mbuf *m; 612 613 if ((m = _getq(q)) == NULL) { 614 if (rp->red_idle == 0) { 615 rp->red_idle = 1; 616 microtime(&rp->red_last); 617 } 618 return NULL; 619 } 620 621 rp->red_idle = 0; 622 return (m); 623} 624 625/* 626 * helper routine to calibrate avg during idle. 627 * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point 628 * here Wq = 1/weight and the code assumes Wq is close to zero. 629 * 630 * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point. 631 */ 632static struct wtab *wtab_list = NULL; /* pointer to wtab list */ 633 634struct wtab * 635wtab_alloc(int weight) 636{ 637 struct wtab *w; 638 int i; 639 640 for (w = wtab_list; w != NULL; w = w->w_next) 641 if (w->w_weight == weight) { 642 w->w_refcount++; 643 return (w); 644 } 645 646 w = malloc(sizeof(struct wtab), M_DEVBUF, M_NOWAIT | M_ZERO); 647 if (w == NULL) 648 return (NULL); 649 w->w_weight = weight; 650 w->w_refcount = 1; 651 w->w_next = wtab_list; 652 wtab_list = w; 653 654 /* initialize the weight table */ 655 w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight; 656 for (i = 1; i < 32; i++) { 657 w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT; 658 if (w->w_tab[i] == 0 && w->w_param_max == 0) 659 w->w_param_max = 1 << i; 660 } 661 662 return (w); 663} 664 665int 666wtab_destroy(struct wtab *w) 667{ 668 struct wtab *prev; 669 670 if (--w->w_refcount > 0) 671 return (0); 672 673 if (wtab_list == w) 674 wtab_list = w->w_next; 675 else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next) 676 if (prev->w_next == w) { 677 prev->w_next = w->w_next; 678 break; 679 } 680 681 free(w, M_DEVBUF); 682 return (0); 683} 684 685int32_t 686pow_w(struct wtab *w, int n) 687{ 688 int i, bit; 689 int32_t val; 690 691 if (n >= w->w_param_max) 692 return (0); 693 694 val = 1 << FP_SHIFT; 695 if (n <= 0) 696 return (val); 697 698 bit = 1; 699 i = 0; 700 while (n) { 701 if (n & bit) { 702 val = (val * w->w_tab[i]) >> FP_SHIFT; 703 n &= ~bit; 704 } 705 i++; 706 bit <<= 1; 707 } 708 return (val); 709} 710 711#ifdef ALTQ3_COMPAT 712/* 713 * red device interface 714 */ 715altqdev_decl(red); 716 717int 718redopen(dev, flag, fmt, p) 719 dev_t dev; 720 int flag, fmt; 721#if (__FreeBSD_version > 500000) 722 struct thread *p; 723#else 724 struct proc *p; 725#endif 726{ 727 /* everything will be done when the queueing scheme is attached. */ 728 return 0; 729} 730 731int 732redclose(dev, flag, fmt, p) 733 dev_t dev; 734 int flag, fmt; 735#if (__FreeBSD_version > 500000) 736 struct thread *p; 737#else 738 struct proc *p; 739#endif 740{ 741 red_queue_t *rqp; 742 int err, error = 0; 743 744 while ((rqp = red_list) != NULL) { 745 /* destroy all */ 746 err = red_detach(rqp); 747 if (err != 0 && error == 0) 748 error = err; 749 } 750 751 return error; 752} 753 754int 755redioctl(dev, cmd, addr, flag, p) 756 dev_t dev; 757 ioctlcmd_t cmd; 758 caddr_t addr; 759 int flag; 760#if (__FreeBSD_version > 500000) 761 struct thread *p; 762#else 763 struct proc *p; 764#endif 765{ 766 red_queue_t *rqp; 767 struct red_interface *ifacep; 768 struct ifnet *ifp; 769 int error = 0; 770 771 /* check super-user privilege */ 772 switch (cmd) { 773 case RED_GETSTATS: 774 break; 775 default: 776#if (__FreeBSD_version > 700000) 777 if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0) 778#elsif (__FreeBSD_version > 400000) 779 if ((error = suser(p)) != 0) 780#else 781 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) 782#endif 783 return (error); 784 break; 785 } 786 787 switch (cmd) { 788 789 case RED_ENABLE: 790 ifacep = (struct red_interface *)addr; 791 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { 792 error = EBADF; 793 break; 794 } 795 error = altq_enable(rqp->rq_ifq); 796 break; 797 798 case RED_DISABLE: 799 ifacep = (struct red_interface *)addr; 800 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { 801 error = EBADF; 802 break; 803 } 804 error = altq_disable(rqp->rq_ifq); 805 break; 806 807 case RED_IF_ATTACH: 808 ifp = ifunit(((struct red_interface *)addr)->red_ifname); 809 if (ifp == NULL) { 810 error = ENXIO; 811 break; 812 } 813 814 /* allocate and initialize red_queue_t */ 815 rqp = malloc(sizeof(red_queue_t), M_DEVBUF, M_WAITOK); 816 if (rqp == NULL) { 817 error = ENOMEM; 818 break; 819 } 820 bzero(rqp, sizeof(red_queue_t)); 821 822 rqp->rq_q = malloc(sizeof(class_queue_t), 823 M_DEVBUF, M_WAITOK); 824 if (rqp->rq_q == NULL) { 825 free(rqp, M_DEVBUF); 826 error = ENOMEM; 827 break; 828 } 829 bzero(rqp->rq_q, sizeof(class_queue_t)); 830 831 rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0); 832 if (rqp->rq_red == NULL) { 833 free(rqp->rq_q, M_DEVBUF); 834 free(rqp, M_DEVBUF); 835 error = ENOMEM; 836 break; 837 } 838 839 rqp->rq_ifq = &ifp->if_snd; 840 qtail(rqp->rq_q) = NULL; 841 qlen(rqp->rq_q) = 0; 842 qlimit(rqp->rq_q) = RED_LIMIT; 843 qtype(rqp->rq_q) = Q_RED; 844 845 /* 846 * set RED to this ifnet structure. 847 */ 848 error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp, 849 red_enqueue, red_dequeue, red_request, 850 NULL, NULL); 851 if (error) { 852 red_destroy(rqp->rq_red); 853 free(rqp->rq_q, M_DEVBUF); 854 free(rqp, M_DEVBUF); 855 break; 856 } 857 858 /* add this state to the red list */ 859 rqp->rq_next = red_list; 860 red_list = rqp; 861 break; 862 863 case RED_IF_DETACH: 864 ifacep = (struct red_interface *)addr; 865 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { 866 error = EBADF; 867 break; 868 } 869 error = red_detach(rqp); 870 break; 871 872 case RED_GETSTATS: 873 do { 874 struct red_stats *q_stats; 875 red_t *rp; 876 877 q_stats = (struct red_stats *)addr; 878 if ((rqp = altq_lookup(q_stats->iface.red_ifname, 879 ALTQT_RED)) == NULL) { 880 error = EBADF; 881 break; 882 } 883 884 q_stats->q_len = qlen(rqp->rq_q); 885 q_stats->q_limit = qlimit(rqp->rq_q); 886 887 rp = rqp->rq_red; 888 q_stats->q_avg = rp->red_avg >> rp->red_wshift; 889 q_stats->xmit_cnt = rp->red_stats.xmit_cnt; 890 q_stats->drop_cnt = rp->red_stats.drop_cnt; 891 q_stats->drop_forced = rp->red_stats.drop_forced; 892 q_stats->drop_unforced = rp->red_stats.drop_unforced; 893 q_stats->marked_packets = rp->red_stats.marked_packets; 894 895 q_stats->weight = rp->red_weight; 896 q_stats->inv_pmax = rp->red_inv_pmax; 897 q_stats->th_min = rp->red_thmin; 898 q_stats->th_max = rp->red_thmax; 899 900#ifdef ALTQ_FLOWVALVE 901 if (rp->red_flowvalve != NULL) { 902 struct flowvalve *fv = rp->red_flowvalve; 903 q_stats->fv_flows = fv->fv_flows; 904 q_stats->fv_pass = fv->fv_stats.pass; 905 q_stats->fv_predrop = fv->fv_stats.predrop; 906 q_stats->fv_alloc = fv->fv_stats.alloc; 907 q_stats->fv_escape = fv->fv_stats.escape; 908 } else { 909#endif /* ALTQ_FLOWVALVE */ 910 q_stats->fv_flows = 0; 911 q_stats->fv_pass = 0; 912 q_stats->fv_predrop = 0; 913 q_stats->fv_alloc = 0; 914 q_stats->fv_escape = 0; 915#ifdef ALTQ_FLOWVALVE 916 } 917#endif /* ALTQ_FLOWVALVE */ 918 } while (/*CONSTCOND*/ 0); 919 break; 920 921 case RED_CONFIG: 922 do { 923 struct red_conf *fc; 924 red_t *new; 925 int s, limit; 926 927 fc = (struct red_conf *)addr; 928 if ((rqp = altq_lookup(fc->iface.red_ifname, 929 ALTQT_RED)) == NULL) { 930 error = EBADF; 931 break; 932 } 933 new = red_alloc(fc->red_weight, 934 fc->red_inv_pmax, 935 fc->red_thmin, 936 fc->red_thmax, 937 fc->red_flags, 938 fc->red_pkttime); 939 if (new == NULL) { 940 error = ENOMEM; 941 break; 942 } 943 944#ifdef __NetBSD__ 945 s = splnet(); 946#else 947 s = splimp(); 948#endif 949 red_purgeq(rqp); 950 limit = fc->red_limit; 951 if (limit < fc->red_thmax) 952 limit = fc->red_thmax; 953 qlimit(rqp->rq_q) = limit; 954 fc->red_limit = limit; /* write back the new value */ 955 956 red_destroy(rqp->rq_red); 957 rqp->rq_red = new; 958 959 splx(s); 960 961 /* write back new values */ 962 fc->red_limit = limit; 963 fc->red_inv_pmax = rqp->rq_red->red_inv_pmax; 964 fc->red_thmin = rqp->rq_red->red_thmin; 965 fc->red_thmax = rqp->rq_red->red_thmax; 966 967 } while (/*CONSTCOND*/ 0); 968 break; 969 970 case RED_SETDEFAULTS: 971 do { 972 struct redparams *rp; 973 974 rp = (struct redparams *)addr; 975 976 default_th_min = rp->th_min; 977 default_th_max = rp->th_max; 978 default_inv_pmax = rp->inv_pmax; 979 } while (/*CONSTCOND*/ 0); 980 break; 981 982 default: 983 error = EINVAL; 984 break; 985 } 986 return error; 987} 988 989static int 990red_detach(rqp) 991 red_queue_t *rqp; 992{ 993 red_queue_t *tmp; 994 int error = 0; 995 996 if (ALTQ_IS_ENABLED(rqp->rq_ifq)) 997 altq_disable(rqp->rq_ifq); 998 999 if ((error = altq_detach(rqp->rq_ifq))) 1000 return (error); 1001 1002 if (red_list == rqp) 1003 red_list = rqp->rq_next; 1004 else { 1005 for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next) 1006 if (tmp->rq_next == rqp) { 1007 tmp->rq_next = rqp->rq_next; 1008 break; 1009 } 1010 if (tmp == NULL) 1011 printf("red_detach: no state found in red_list!\n"); 1012 } 1013 1014 red_destroy(rqp->rq_red); 1015 free(rqp->rq_q, M_DEVBUF); 1016 free(rqp, M_DEVBUF); 1017 return (error); 1018} 1019 1020/* 1021 * enqueue routine: 1022 * 1023 * returns: 0 when successfully queued. 1024 * ENOBUFS when drop occurs. 1025 */ 1026static int 1027red_enqueue(ifq, m, pktattr) 1028 struct ifaltq *ifq; 1029 struct mbuf *m; 1030 struct altq_pktattr *pktattr; 1031{ 1032 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; 1033 1034 IFQ_LOCK_ASSERT(ifq); 1035 1036 if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0) 1037 return ENOBUFS; 1038 ifq->ifq_len++; 1039 return 0; 1040} 1041 1042/* 1043 * dequeue routine: 1044 * must be called in splimp. 1045 * 1046 * returns: mbuf dequeued. 1047 * NULL when no packet is available in the queue. 1048 */ 1049 1050static struct mbuf * 1051red_dequeue(ifq, op) 1052 struct ifaltq *ifq; 1053 int op; 1054{ 1055 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; 1056 struct mbuf *m; 1057 1058 IFQ_LOCK_ASSERT(ifq); 1059 1060 if (op == ALTDQ_POLL) 1061 return qhead(rqp->rq_q); 1062 1063 /* op == ALTDQ_REMOVE */ 1064 m = red_getq(rqp->rq_red, rqp->rq_q); 1065 if (m != NULL) 1066 ifq->ifq_len--; 1067 return (m); 1068} 1069 1070static int 1071red_request(ifq, req, arg) 1072 struct ifaltq *ifq; 1073 int req; 1074 void *arg; 1075{ 1076 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; 1077 1078 IFQ_LOCK_ASSERT(ifq); 1079 1080 switch (req) { 1081 case ALTRQ_PURGE: 1082 red_purgeq(rqp); 1083 break; 1084 } 1085 return (0); 1086} 1087 1088static void 1089red_purgeq(rqp) 1090 red_queue_t *rqp; 1091{ 1092 _flushq(rqp->rq_q); 1093 if (ALTQ_IS_ENABLED(rqp->rq_ifq)) 1094 rqp->rq_ifq->ifq_len = 0; 1095} 1096 1097#ifdef ALTQ_FLOWVALVE 1098 1099#define FV_PSHIFT 7 /* weight of average drop rate -- 1/128 */ 1100#define FV_PSCALE(x) ((x) << FV_PSHIFT) 1101#define FV_PUNSCALE(x) ((x) >> FV_PSHIFT) 1102#define FV_FSHIFT 5 /* weight of average fraction -- 1/32 */ 1103#define FV_FSCALE(x) ((x) << FV_FSHIFT) 1104#define FV_FUNSCALE(x) ((x) >> FV_FSHIFT) 1105 1106#define FV_TIMER (3 * hz) /* timer value for garbage collector */ 1107#define FV_FLOWLISTSIZE 64 /* how many flows in flowlist */ 1108 1109#define FV_N 10 /* update fve_f every FV_N packets */ 1110 1111#define FV_BACKOFFTHRESH 1 /* backoff threshold interval in second */ 1112#define FV_TTHRESH 3 /* time threshold to delete fve */ 1113#define FV_ALPHA 5 /* extra packet count */ 1114 1115#define FV_STATS 1116 1117#if (__FreeBSD_version > 300000) 1118#define FV_TIMESTAMP(tp) getmicrotime(tp) 1119#else 1120#define FV_TIMESTAMP(tp) { (*(tp)) = time; } 1121#endif 1122 1123/* 1124 * Brtt table: 127 entry table to convert drop rate (p) to 1125 * the corresponding bandwidth fraction (f) 1126 * the following equation is implemented to use scaled values, 1127 * fve_p and fve_f, in the fixed point format. 1128 * 1129 * Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p)) 1130 * f = Brtt(p) / (max_th + alpha) 1131 */ 1132#define BRTT_SIZE 128 1133#define BRTT_SHIFT 12 1134#define BRTT_MASK 0x0007f000 1135#define BRTT_PMAX (1 << (FV_PSHIFT + FP_SHIFT)) 1136 1137const int brtt_tab[BRTT_SIZE] = { 1138 0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728, 1139 392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361, 1140 225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333, 1141 145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612, 1142 98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957, 1143 67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440, 1144 47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184, 1145 33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611, 1146 24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062, 1147 18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487, 1148 14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222, 1149 10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844, 1150 8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079, 1151 6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746, 1152 5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722, 1153 4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924 1154}; 1155 1156static __inline struct fve * 1157flowlist_lookup(fv, pktattr, now) 1158 struct flowvalve *fv; 1159 struct altq_pktattr *pktattr; 1160 struct timeval *now; 1161{ 1162 struct fve *fve; 1163 int flows; 1164 struct ip *ip; 1165#ifdef INET6 1166 struct ip6_hdr *ip6; 1167#endif 1168 struct timeval tthresh; 1169 1170 if (pktattr == NULL) 1171 return (NULL); 1172 1173 tthresh.tv_sec = now->tv_sec - FV_TTHRESH; 1174 flows = 0; 1175 /* 1176 * search the flow list 1177 */ 1178 switch (pktattr->pattr_af) { 1179 case AF_INET: 1180 ip = (struct ip *)pktattr->pattr_hdr; 1181 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){ 1182 if (fve->fve_lastdrop.tv_sec == 0) 1183 break; 1184 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) { 1185 fve->fve_lastdrop.tv_sec = 0; 1186 break; 1187 } 1188 if (fve->fve_flow.flow_af == AF_INET && 1189 fve->fve_flow.flow_ip.ip_src.s_addr == 1190 ip->ip_src.s_addr && 1191 fve->fve_flow.flow_ip.ip_dst.s_addr == 1192 ip->ip_dst.s_addr) 1193 return (fve); 1194 flows++; 1195 } 1196 break; 1197#ifdef INET6 1198 case AF_INET6: 1199 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 1200 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){ 1201 if (fve->fve_lastdrop.tv_sec == 0) 1202 break; 1203 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) { 1204 fve->fve_lastdrop.tv_sec = 0; 1205 break; 1206 } 1207 if (fve->fve_flow.flow_af == AF_INET6 && 1208 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src, 1209 &ip6->ip6_src) && 1210 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst, 1211 &ip6->ip6_dst)) 1212 return (fve); 1213 flows++; 1214 } 1215 break; 1216#endif /* INET6 */ 1217 1218 default: 1219 /* unknown protocol. no drop. */ 1220 return (NULL); 1221 } 1222 fv->fv_flows = flows; /* save the number of active fve's */ 1223 return (NULL); 1224} 1225 1226static __inline struct fve * 1227flowlist_reclaim(fv, pktattr) 1228 struct flowvalve *fv; 1229 struct altq_pktattr *pktattr; 1230{ 1231 struct fve *fve; 1232 struct ip *ip; 1233#ifdef INET6 1234 struct ip6_hdr *ip6; 1235#endif 1236 1237 /* 1238 * get an entry from the tail of the LRU list. 1239 */ 1240 fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead); 1241 1242 switch (pktattr->pattr_af) { 1243 case AF_INET: 1244 ip = (struct ip *)pktattr->pattr_hdr; 1245 fve->fve_flow.flow_af = AF_INET; 1246 fve->fve_flow.flow_ip.ip_src = ip->ip_src; 1247 fve->fve_flow.flow_ip.ip_dst = ip->ip_dst; 1248 break; 1249#ifdef INET6 1250 case AF_INET6: 1251 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 1252 fve->fve_flow.flow_af = AF_INET6; 1253 fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src; 1254 fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst; 1255 break; 1256#endif 1257 } 1258 1259 fve->fve_state = Green; 1260 fve->fve_p = 0.0; 1261 fve->fve_f = 0.0; 1262 fve->fve_ifseq = fv->fv_ifseq - 1; 1263 fve->fve_count = 0; 1264 1265 fv->fv_flows++; 1266#ifdef FV_STATS 1267 fv->fv_stats.alloc++; 1268#endif 1269 return (fve); 1270} 1271 1272static __inline void 1273flowlist_move_to_head(fv, fve) 1274 struct flowvalve *fv; 1275 struct fve *fve; 1276{ 1277 if (TAILQ_FIRST(&fv->fv_flowlist) != fve) { 1278 TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru); 1279 TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru); 1280 } 1281} 1282 1283#if 0 /* XXX: make the compiler happy (fv_alloc unused) */ 1284/* 1285 * allocate flowvalve structure 1286 */ 1287static struct flowvalve * 1288fv_alloc(rp) 1289 struct red *rp; 1290{ 1291 struct flowvalve *fv; 1292 struct fve *fve; 1293 int i, num; 1294 1295 num = FV_FLOWLISTSIZE; 1296 fv = malloc(sizeof(struct flowvalve), 1297 M_DEVBUF, M_WAITOK); 1298 if (fv == NULL) 1299 return (NULL); 1300 bzero(fv, sizeof(struct flowvalve)); 1301 1302 fv->fv_fves = malloc(sizeof(struct fve) * num, 1303 M_DEVBUF, M_WAITOK); 1304 if (fv->fv_fves == NULL) { 1305 free(fv, M_DEVBUF); 1306 return (NULL); 1307 } 1308 bzero(fv->fv_fves, sizeof(struct fve) * num); 1309 1310 fv->fv_flows = 0; 1311 TAILQ_INIT(&fv->fv_flowlist); 1312 for (i = 0; i < num; i++) { 1313 fve = &fv->fv_fves[i]; 1314 fve->fve_lastdrop.tv_sec = 0; 1315 TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru); 1316 } 1317 1318 /* initialize drop rate threshold in scaled fixed-point */ 1319 fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax; 1320 1321 /* initialize drop rate to fraction table */ 1322 fv->fv_p2ftab = malloc(sizeof(int) * BRTT_SIZE, 1323 M_DEVBUF, M_WAITOK); 1324 if (fv->fv_p2ftab == NULL) { 1325 free(fv->fv_fves, M_DEVBUF); 1326 free(fv, M_DEVBUF); 1327 return (NULL); 1328 } 1329 /* 1330 * create the p2f table. 1331 * (shift is used to keep the precision) 1332 */ 1333 for (i = 1; i < BRTT_SIZE; i++) { 1334 int f; 1335 1336 f = brtt_tab[i] << 8; 1337 fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8; 1338 } 1339 1340 return (fv); 1341} 1342#endif 1343 1344static void fv_destroy(fv) 1345 struct flowvalve *fv; 1346{ 1347 free(fv->fv_p2ftab, M_DEVBUF); 1348 free(fv->fv_fves, M_DEVBUF); 1349 free(fv, M_DEVBUF); 1350} 1351 1352static __inline int 1353fv_p2f(fv, p) 1354 struct flowvalve *fv; 1355 int p; 1356{ 1357 int val, f; 1358 1359 if (p >= BRTT_PMAX) 1360 f = fv->fv_p2ftab[BRTT_SIZE-1]; 1361 else if ((val = (p & BRTT_MASK))) 1362 f = fv->fv_p2ftab[(val >> BRTT_SHIFT)]; 1363 else 1364 f = fv->fv_p2ftab[1]; 1365 return (f); 1366} 1367 1368/* 1369 * check if an arriving packet should be pre-dropped. 1370 * called from red_addq() when a packet arrives. 1371 * returns 1 when the packet should be pre-dropped. 1372 * should be called in splimp. 1373 */ 1374static int 1375fv_checkflow(fv, pktattr, fcache) 1376 struct flowvalve *fv; 1377 struct altq_pktattr *pktattr; 1378 struct fve **fcache; 1379{ 1380 struct fve *fve; 1381 struct timeval now; 1382 1383 fv->fv_ifseq++; 1384 FV_TIMESTAMP(&now); 1385 1386 if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL) 1387 /* no matching entry in the flowlist */ 1388 return (0); 1389 1390 *fcache = fve; 1391 1392 /* update fraction f for every FV_N packets */ 1393 if (++fve->fve_count == FV_N) { 1394 /* 1395 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f 1396 */ 1397 fve->fve_f = 1398 (FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq) 1399 + fve->fve_f - FV_FUNSCALE(fve->fve_f); 1400 fve->fve_ifseq = fv->fv_ifseq; 1401 fve->fve_count = 0; 1402 } 1403 1404 /* 1405 * overpumping test 1406 */ 1407 if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) { 1408 int fthresh; 1409 1410 /* calculate a threshold */ 1411 fthresh = fv_p2f(fv, fve->fve_p); 1412 if (fve->fve_f > fthresh) 1413 fve->fve_state = Red; 1414 } 1415 1416 if (fve->fve_state == Red) { 1417 /* 1418 * backoff test 1419 */ 1420 if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) { 1421 /* no drop for at least FV_BACKOFFTHRESH sec */ 1422 fve->fve_p = 0; 1423 fve->fve_state = Green; 1424#ifdef FV_STATS 1425 fv->fv_stats.escape++; 1426#endif 1427 } else { 1428 /* block this flow */ 1429 flowlist_move_to_head(fv, fve); 1430 fve->fve_lastdrop = now; 1431#ifdef FV_STATS 1432 fv->fv_stats.predrop++; 1433#endif 1434 return (1); 1435 } 1436 } 1437 1438 /* 1439 * p = (1 - Wp) * p 1440 */ 1441 fve->fve_p -= FV_PUNSCALE(fve->fve_p); 1442 if (fve->fve_p < 0) 1443 fve->fve_p = 0; 1444#ifdef FV_STATS 1445 fv->fv_stats.pass++; 1446#endif 1447 return (0); 1448} 1449 1450/* 1451 * called from red_addq when a packet is dropped by red. 1452 * should be called in splimp. 1453 */ 1454static void fv_dropbyred(fv, pktattr, fcache) 1455 struct flowvalve *fv; 1456 struct altq_pktattr *pktattr; 1457 struct fve *fcache; 1458{ 1459 struct fve *fve; 1460 struct timeval now; 1461 1462 if (pktattr == NULL) 1463 return; 1464 FV_TIMESTAMP(&now); 1465 1466 if (fcache != NULL) 1467 /* the fve of this packet is already cached */ 1468 fve = fcache; 1469 else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL) 1470 fve = flowlist_reclaim(fv, pktattr); 1471 1472 flowlist_move_to_head(fv, fve); 1473 1474 /* 1475 * update p: the following line cancels the update 1476 * in fv_checkflow() and calculate 1477 * p = Wp + (1 - Wp) * p 1478 */ 1479 fve->fve_p = (1 << FP_SHIFT) + fve->fve_p; 1480 1481 fve->fve_lastdrop = now; 1482} 1483 1484#endif /* ALTQ_FLOWVALVE */ 1485 1486#ifdef KLD_MODULE 1487 1488static struct altqsw red_sw = 1489 {"red", redopen, redclose, redioctl}; 1490 1491ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw); 1492MODULE_VERSION(altq_red, 1); 1493 1494#endif /* KLD_MODULE */ 1495#endif /* ALTQ3_COMPAT */ 1496 1497#endif /* ALTQ_RED */ 1498