1130368Smlaier/* $FreeBSD$ */ 2130365Smlaier/* $KAME: altq_rio.c,v 1.17 2003/07/10 12:07:49 kjc Exp $ */ 3130365Smlaier 4130365Smlaier/* 5130365Smlaier * Copyright (C) 1998-2003 6130365Smlaier * Sony Computer Science Laboratories Inc. All rights reserved. 7130365Smlaier * 8130365Smlaier * Redistribution and use in source and binary forms, with or without 9130365Smlaier * modification, are permitted provided that the following conditions 10130365Smlaier * are met: 11130365Smlaier * 1. Redistributions of source code must retain the above copyright 12130365Smlaier * notice, this list of conditions and the following disclaimer. 13130365Smlaier * 2. Redistributions in binary form must reproduce the above copyright 14130365Smlaier * notice, this list of conditions and the following disclaimer in the 15130365Smlaier * documentation and/or other materials provided with the distribution. 16130365Smlaier * 17130365Smlaier * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND 18130365Smlaier * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19130365Smlaier * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20130365Smlaier * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE 21130365Smlaier * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22130365Smlaier * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23130365Smlaier * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24130365Smlaier * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25130365Smlaier * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26130365Smlaier * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27130365Smlaier * SUCH DAMAGE. 28130365Smlaier */ 29130365Smlaier/* 30130365Smlaier * Copyright (c) 1990-1994 Regents of the University of California. 31130365Smlaier * All rights reserved. 32130365Smlaier * 33130365Smlaier * Redistribution and use in source and binary forms, with or without 34130365Smlaier * modification, are permitted provided that the following conditions 35130365Smlaier * are met: 36130365Smlaier * 1. Redistributions of source code must retain the above copyright 37130365Smlaier * notice, this list of conditions and the following disclaimer. 38130365Smlaier * 2. Redistributions in binary form must reproduce the above copyright 39130365Smlaier * notice, this list of conditions and the following disclaimer in the 40130365Smlaier * documentation and/or other materials provided with the distribution. 41130365Smlaier * 3. All advertising materials mentioning features or use of this software 42130365Smlaier * must display the following acknowledgement: 43130365Smlaier * This product includes software developed by the Computer Systems 44130365Smlaier * Engineering Group at Lawrence Berkeley Laboratory. 45130365Smlaier * 4. Neither the name of the University nor of the Laboratory may be used 46130365Smlaier * to endorse or promote products derived from this software without 47130365Smlaier * specific prior written permission. 48130365Smlaier * 49130365Smlaier * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50130365Smlaier * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51130365Smlaier * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52130365Smlaier * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53130365Smlaier * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54130365Smlaier * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55130365Smlaier * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56130365Smlaier * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57130365Smlaier * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58130365Smlaier * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59130365Smlaier * SUCH DAMAGE. 60130365Smlaier */ 61130365Smlaier 62130365Smlaier#if defined(__FreeBSD__) || defined(__NetBSD__) 63130365Smlaier#include "opt_altq.h" 64130365Smlaier#include "opt_inet.h" 65130365Smlaier#ifdef __FreeBSD__ 66130365Smlaier#include "opt_inet6.h" 67130365Smlaier#endif 68130365Smlaier#endif /* __FreeBSD__ || __NetBSD__ */ 69130365Smlaier#ifdef ALTQ_RIO /* rio is enabled by ALTQ_RIO option in opt_altq.h */ 70130365Smlaier 71130365Smlaier#include <sys/param.h> 72130365Smlaier#include <sys/malloc.h> 73130365Smlaier#include <sys/mbuf.h> 74130365Smlaier#include <sys/socket.h> 75130365Smlaier#include <sys/systm.h> 76130365Smlaier#include <sys/errno.h> 77130365Smlaier#if 1 /* ALTQ3_COMPAT */ 78130365Smlaier#include <sys/proc.h> 79130365Smlaier#include <sys/sockio.h> 80130365Smlaier#include <sys/kernel.h> 81130365Smlaier#endif 82130365Smlaier 83130365Smlaier#include <net/if.h> 84130365Smlaier 85130365Smlaier#include <netinet/in.h> 86130365Smlaier#include <netinet/in_systm.h> 87130365Smlaier#include <netinet/ip.h> 88130365Smlaier#ifdef INET6 89130365Smlaier#include <netinet/ip6.h> 90130365Smlaier#endif 91130365Smlaier 92130365Smlaier#include <net/pfvar.h> 93130365Smlaier#include <altq/altq.h> 94130365Smlaier#include <altq/altq_cdnr.h> 95130365Smlaier#include <altq/altq_red.h> 96130365Smlaier#include <altq/altq_rio.h> 97130365Smlaier#ifdef ALTQ3_COMPAT 98130365Smlaier#include <altq/altq_conf.h> 99130365Smlaier#endif 100130365Smlaier 101130365Smlaier/* 102130365Smlaier * RIO: RED with IN/OUT bit 103130365Smlaier * described in 104130365Smlaier * "Explicit Allocation of Best Effort Packet Delivery Service" 105130365Smlaier * David D. Clark and Wenjia Fang, MIT Lab for Computer Science 106130365Smlaier * http://diffserv.lcs.mit.edu/Papers/exp-alloc-ddc-wf.{ps,pdf} 107130365Smlaier * 108130365Smlaier * this implementation is extended to support more than 2 drop precedence 109130365Smlaier * values as described in RFC2597 (Assured Forwarding PHB Group). 110130365Smlaier * 111130365Smlaier */ 112130365Smlaier/* 113130365Smlaier * AF DS (differentiated service) codepoints. 114130365Smlaier * (classes can be mapped to CBQ or H-FSC classes.) 115130365Smlaier * 116130365Smlaier * 0 1 2 3 4 5 6 7 117130365Smlaier * +---+---+---+---+---+---+---+---+ 118130365Smlaier * | CLASS |DropPre| 0 | CU | 119130365Smlaier * +---+---+---+---+---+---+---+---+ 120130365Smlaier * 121130365Smlaier * class 1: 001 122130365Smlaier * class 2: 010 123130365Smlaier * class 3: 011 124130365Smlaier * class 4: 100 125130365Smlaier * 126130365Smlaier * low drop prec: 01 127130365Smlaier * medium drop prec: 10 128130365Smlaier * high drop prec: 01 129130365Smlaier */ 130130365Smlaier 131130365Smlaier/* normal red parameters */ 132130365Smlaier#define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */ 133130365Smlaier /* q_weight = 0.00195 */ 134130365Smlaier 135130365Smlaier/* red parameters for a slow link */ 136130365Smlaier#define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */ 137130365Smlaier /* q_weight = 0.0078125 */ 138130365Smlaier 139130365Smlaier/* red parameters for a very slow link (e.g., dialup) */ 140130365Smlaier#define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */ 141130365Smlaier /* q_weight = 0.015625 */ 142130365Smlaier 143130365Smlaier/* fixed-point uses 12-bit decimal places */ 144130365Smlaier#define FP_SHIFT 12 /* fixed-point shift */ 145130365Smlaier 146130365Smlaier/* red parameters for drop probability */ 147130365Smlaier#define INV_P_MAX 10 /* inverse of max drop probability */ 148130365Smlaier#define TH_MIN 5 /* min threshold */ 149130365Smlaier#define TH_MAX 15 /* max threshold */ 150130365Smlaier 151130365Smlaier#define RIO_LIMIT 60 /* default max queue lenght */ 152130365Smlaier#define RIO_STATS /* collect statistics */ 153130365Smlaier 154130365Smlaier#define TV_DELTA(a, b, delta) { \ 155130365Smlaier register int xxs; \ 156130365Smlaier \ 157130365Smlaier delta = (a)->tv_usec - (b)->tv_usec; \ 158130365Smlaier if ((xxs = (a)->tv_sec - (b)->tv_sec) != 0) { \ 159130365Smlaier if (xxs < 0) { \ 160130365Smlaier delta = 60000000; \ 161130365Smlaier } else if (xxs > 4) { \ 162130365Smlaier if (xxs > 60) \ 163130365Smlaier delta = 60000000; \ 164130365Smlaier else \ 165130365Smlaier delta += xxs * 1000000; \ 166130365Smlaier } else while (xxs > 0) { \ 167130365Smlaier delta += 1000000; \ 168130365Smlaier xxs--; \ 169130365Smlaier } \ 170130365Smlaier } \ 171130365Smlaier} 172130365Smlaier 173130365Smlaier#ifdef ALTQ3_COMPAT 174130365Smlaier/* rio_list keeps all rio_queue_t's allocated. */ 175130365Smlaierstatic rio_queue_t *rio_list = NULL; 176130365Smlaier#endif 177130365Smlaier/* default rio parameter values */ 178130365Smlaierstatic struct redparams default_rio_params[RIO_NDROPPREC] = { 179130365Smlaier /* th_min, th_max, inv_pmax */ 180130365Smlaier { TH_MAX * 2 + TH_MIN, TH_MAX * 3, INV_P_MAX }, /* low drop precedence */ 181130365Smlaier { TH_MAX + TH_MIN, TH_MAX * 2, INV_P_MAX }, /* medium drop precedence */ 182130365Smlaier { TH_MIN, TH_MAX, INV_P_MAX } /* high drop precedence */ 183130365Smlaier}; 184130365Smlaier 185130365Smlaier/* internal function prototypes */ 186130365Smlaierstatic int dscp2index(u_int8_t); 187130365Smlaier#ifdef ALTQ3_COMPAT 188130365Smlaierstatic int rio_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); 189130365Smlaierstatic struct mbuf *rio_dequeue(struct ifaltq *, int); 190130365Smlaierstatic int rio_request(struct ifaltq *, int, void *); 191130365Smlaierstatic int rio_detach(rio_queue_t *); 192130365Smlaier 193130365Smlaier/* 194130365Smlaier * rio device interface 195130365Smlaier */ 196130365Smlaieraltqdev_decl(rio); 197130365Smlaier 198130365Smlaier#endif /* ALTQ3_COMPAT */ 199130365Smlaier 200130365Smlaierrio_t * 201130365Smlaierrio_alloc(int weight, struct redparams *params, int flags, int pkttime) 202130365Smlaier{ 203130365Smlaier rio_t *rp; 204130365Smlaier int w, i; 205130365Smlaier int npkts_per_sec; 206130365Smlaier 207240824Sglebius rp = malloc(sizeof(rio_t), M_DEVBUF, M_NOWAIT | M_ZERO); 208130365Smlaier if (rp == NULL) 209130365Smlaier return (NULL); 210130365Smlaier 211130365Smlaier rp->rio_flags = flags; 212130365Smlaier if (pkttime == 0) 213130365Smlaier /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */ 214130365Smlaier rp->rio_pkttime = 800; 215130365Smlaier else 216130365Smlaier rp->rio_pkttime = pkttime; 217130365Smlaier 218130365Smlaier if (weight != 0) 219130365Smlaier rp->rio_weight = weight; 220130365Smlaier else { 221130365Smlaier /* use default */ 222130365Smlaier rp->rio_weight = W_WEIGHT; 223130365Smlaier 224130365Smlaier /* when the link is very slow, adjust red parameters */ 225130365Smlaier npkts_per_sec = 1000000 / rp->rio_pkttime; 226130365Smlaier if (npkts_per_sec < 50) { 227130365Smlaier /* up to about 400Kbps */ 228130365Smlaier rp->rio_weight = W_WEIGHT_2; 229130365Smlaier } else if (npkts_per_sec < 300) { 230130365Smlaier /* up to about 2.4Mbps */ 231130365Smlaier rp->rio_weight = W_WEIGHT_1; 232130365Smlaier } 233130365Smlaier } 234130365Smlaier 235130365Smlaier /* calculate wshift. weight must be power of 2 */ 236130365Smlaier w = rp->rio_weight; 237130365Smlaier for (i = 0; w > 1; i++) 238130365Smlaier w = w >> 1; 239130365Smlaier rp->rio_wshift = i; 240130365Smlaier w = 1 << rp->rio_wshift; 241130365Smlaier if (w != rp->rio_weight) { 242130365Smlaier printf("invalid weight value %d for red! use %d\n", 243130365Smlaier rp->rio_weight, w); 244130365Smlaier rp->rio_weight = w; 245130365Smlaier } 246130365Smlaier 247130365Smlaier /* allocate weight table */ 248130365Smlaier rp->rio_wtab = wtab_alloc(rp->rio_weight); 249130365Smlaier 250130365Smlaier for (i = 0; i < RIO_NDROPPREC; i++) { 251130365Smlaier struct dropprec_state *prec = &rp->rio_precstate[i]; 252130365Smlaier 253130365Smlaier prec->avg = 0; 254130365Smlaier prec->idle = 1; 255130365Smlaier 256130365Smlaier if (params == NULL || params[i].inv_pmax == 0) 257130365Smlaier prec->inv_pmax = default_rio_params[i].inv_pmax; 258130365Smlaier else 259130365Smlaier prec->inv_pmax = params[i].inv_pmax; 260130365Smlaier if (params == NULL || params[i].th_min == 0) 261130365Smlaier prec->th_min = default_rio_params[i].th_min; 262130365Smlaier else 263130365Smlaier prec->th_min = params[i].th_min; 264130365Smlaier if (params == NULL || params[i].th_max == 0) 265130365Smlaier prec->th_max = default_rio_params[i].th_max; 266130365Smlaier else 267130365Smlaier prec->th_max = params[i].th_max; 268130365Smlaier 269130365Smlaier /* 270130365Smlaier * th_min_s and th_max_s are scaled versions of th_min 271130365Smlaier * and th_max to be compared with avg. 272130365Smlaier */ 273130365Smlaier prec->th_min_s = prec->th_min << (rp->rio_wshift + FP_SHIFT); 274130365Smlaier prec->th_max_s = prec->th_max << (rp->rio_wshift + FP_SHIFT); 275130365Smlaier 276130365Smlaier /* 277130365Smlaier * precompute probability denominator 278130365Smlaier * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point 279130365Smlaier */ 280130365Smlaier prec->probd = (2 * (prec->th_max - prec->th_min) 281130365Smlaier * prec->inv_pmax) << FP_SHIFT; 282130365Smlaier 283130365Smlaier microtime(&prec->last); 284130365Smlaier } 285130365Smlaier 286130365Smlaier return (rp); 287130365Smlaier} 288130365Smlaier 289130365Smlaiervoid 290130365Smlaierrio_destroy(rio_t *rp) 291130365Smlaier{ 292130365Smlaier wtab_destroy(rp->rio_wtab); 293184205Sdes free(rp, M_DEVBUF); 294130365Smlaier} 295130365Smlaier 296130365Smlaiervoid 297130365Smlaierrio_getstats(rio_t *rp, struct redstats *sp) 298130365Smlaier{ 299130365Smlaier int i; 300130365Smlaier 301130365Smlaier for (i = 0; i < RIO_NDROPPREC; i++) { 302130365Smlaier bcopy(&rp->q_stats[i], sp, sizeof(struct redstats)); 303130365Smlaier sp->q_avg = rp->rio_precstate[i].avg >> rp->rio_wshift; 304130365Smlaier sp++; 305130365Smlaier } 306130365Smlaier} 307130365Smlaier 308130365Smlaier#if (RIO_NDROPPREC == 3) 309130365Smlaier/* 310130365Smlaier * internally, a drop precedence value is converted to an index 311130365Smlaier * starting from 0. 312130365Smlaier */ 313130365Smlaierstatic int 314130365Smlaierdscp2index(u_int8_t dscp) 315130365Smlaier{ 316130365Smlaier int dpindex = dscp & AF_DROPPRECMASK; 317130365Smlaier 318130365Smlaier if (dpindex == 0) 319130365Smlaier return (0); 320130365Smlaier return ((dpindex >> 3) - 1); 321130365Smlaier} 322130365Smlaier#endif 323130365Smlaier 324130365Smlaier#if 1 325130365Smlaier/* 326130365Smlaier * kludge: when a packet is dequeued, we need to know its drop precedence 327130365Smlaier * in order to keep the queue length of each drop precedence. 328130365Smlaier * use m_pkthdr.rcvif to pass this info. 329130365Smlaier */ 330130365Smlaier#define RIOM_SET_PRECINDEX(m, idx) \ 331147256Sbrooks do { (m)->m_pkthdr.rcvif = (void *)((long)(idx)); } while (0) 332130365Smlaier#define RIOM_GET_PRECINDEX(m) \ 333130365Smlaier ({ long idx; idx = (long)((m)->m_pkthdr.rcvif); \ 334130365Smlaier (m)->m_pkthdr.rcvif = NULL; idx; }) 335130365Smlaier#endif 336130365Smlaier 337130365Smlaierint 338130365Smlaierrio_addq(rio_t *rp, class_queue_t *q, struct mbuf *m, 339130365Smlaier struct altq_pktattr *pktattr) 340130365Smlaier{ 341130365Smlaier int avg, droptype; 342130365Smlaier u_int8_t dsfield, odsfield; 343130365Smlaier int dpindex, i, n, t; 344130365Smlaier struct timeval now; 345130365Smlaier struct dropprec_state *prec; 346130365Smlaier 347130365Smlaier dsfield = odsfield = read_dsfield(m, pktattr); 348130365Smlaier dpindex = dscp2index(dsfield); 349130365Smlaier 350130365Smlaier /* 351130365Smlaier * update avg of the precedence states whose drop precedence 352130365Smlaier * is larger than or equal to the drop precedence of the packet 353130365Smlaier */ 354130365Smlaier now.tv_sec = 0; 355130365Smlaier for (i = dpindex; i < RIO_NDROPPREC; i++) { 356130365Smlaier prec = &rp->rio_precstate[i]; 357130365Smlaier avg = prec->avg; 358130365Smlaier if (prec->idle) { 359130365Smlaier prec->idle = 0; 360130365Smlaier if (now.tv_sec == 0) 361130365Smlaier microtime(&now); 362130365Smlaier t = (now.tv_sec - prec->last.tv_sec); 363130365Smlaier if (t > 60) 364130365Smlaier avg = 0; 365130365Smlaier else { 366130365Smlaier t = t * 1000000 + 367130365Smlaier (now.tv_usec - prec->last.tv_usec); 368130365Smlaier n = t / rp->rio_pkttime; 369130365Smlaier /* calculate (avg = (1 - Wq)^n * avg) */ 370130365Smlaier if (n > 0) 371130365Smlaier avg = (avg >> FP_SHIFT) * 372130365Smlaier pow_w(rp->rio_wtab, n); 373130365Smlaier } 374130365Smlaier } 375130365Smlaier 376130365Smlaier /* run estimator. (avg is scaled by WEIGHT in fixed-point) */ 377130365Smlaier avg += (prec->qlen << FP_SHIFT) - (avg >> rp->rio_wshift); 378130365Smlaier prec->avg = avg; /* save the new value */ 379130365Smlaier /* 380130365Smlaier * count keeps a tally of arriving traffic that has not 381130365Smlaier * been dropped. 382130365Smlaier */ 383130365Smlaier prec->count++; 384130365Smlaier } 385130365Smlaier 386130365Smlaier prec = &rp->rio_precstate[dpindex]; 387130365Smlaier avg = prec->avg; 388130365Smlaier 389130365Smlaier /* see if we drop early */ 390130365Smlaier droptype = DTYPE_NODROP; 391130365Smlaier if (avg >= prec->th_min_s && prec->qlen > 1) { 392130365Smlaier if (avg >= prec->th_max_s) { 393130365Smlaier /* avg >= th_max: forced drop */ 394130365Smlaier droptype = DTYPE_FORCED; 395130365Smlaier } else if (prec->old == 0) { 396130365Smlaier /* first exceeds th_min */ 397130365Smlaier prec->count = 1; 398130365Smlaier prec->old = 1; 399130365Smlaier } else if (drop_early((avg - prec->th_min_s) >> rp->rio_wshift, 400130365Smlaier prec->probd, prec->count)) { 401130365Smlaier /* unforced drop by red */ 402130365Smlaier droptype = DTYPE_EARLY; 403130365Smlaier } 404130365Smlaier } else { 405130365Smlaier /* avg < th_min */ 406130365Smlaier prec->old = 0; 407130365Smlaier } 408130365Smlaier 409130365Smlaier /* 410130365Smlaier * if the queue length hits the hard limit, it's a forced drop. 411130365Smlaier */ 412130365Smlaier if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q)) 413130365Smlaier droptype = DTYPE_FORCED; 414130365Smlaier 415130365Smlaier if (droptype != DTYPE_NODROP) { 416130365Smlaier /* always drop incoming packet (as opposed to randomdrop) */ 417130365Smlaier for (i = dpindex; i < RIO_NDROPPREC; i++) 418130365Smlaier rp->rio_precstate[i].count = 0; 419130365Smlaier#ifdef RIO_STATS 420130365Smlaier if (droptype == DTYPE_EARLY) 421130365Smlaier rp->q_stats[dpindex].drop_unforced++; 422130365Smlaier else 423130365Smlaier rp->q_stats[dpindex].drop_forced++; 424130365Smlaier PKTCNTR_ADD(&rp->q_stats[dpindex].drop_cnt, m_pktlen(m)); 425130365Smlaier#endif 426130365Smlaier m_freem(m); 427130365Smlaier return (-1); 428130365Smlaier } 429130365Smlaier 430130365Smlaier for (i = dpindex; i < RIO_NDROPPREC; i++) 431130365Smlaier rp->rio_precstate[i].qlen++; 432130365Smlaier 433130365Smlaier /* save drop precedence index in mbuf hdr */ 434130365Smlaier RIOM_SET_PRECINDEX(m, dpindex); 435130365Smlaier 436130365Smlaier if (rp->rio_flags & RIOF_CLEARDSCP) 437130365Smlaier dsfield &= ~DSCP_MASK; 438130365Smlaier 439130365Smlaier if (dsfield != odsfield) 440130365Smlaier write_dsfield(m, pktattr, dsfield); 441130365Smlaier 442130365Smlaier _addq(q, m); 443130365Smlaier 444130365Smlaier#ifdef RIO_STATS 445130365Smlaier PKTCNTR_ADD(&rp->q_stats[dpindex].xmit_cnt, m_pktlen(m)); 446130365Smlaier#endif 447130365Smlaier return (0); 448130365Smlaier} 449130365Smlaier 450130365Smlaierstruct mbuf * 451130365Smlaierrio_getq(rio_t *rp, class_queue_t *q) 452130365Smlaier{ 453130365Smlaier struct mbuf *m; 454130365Smlaier int dpindex, i; 455130365Smlaier 456130365Smlaier if ((m = _getq(q)) == NULL) 457130365Smlaier return NULL; 458130365Smlaier 459130365Smlaier dpindex = RIOM_GET_PRECINDEX(m); 460130365Smlaier for (i = dpindex; i < RIO_NDROPPREC; i++) { 461130365Smlaier if (--rp->rio_precstate[i].qlen == 0) { 462130365Smlaier if (rp->rio_precstate[i].idle == 0) { 463130365Smlaier rp->rio_precstate[i].idle = 1; 464130365Smlaier microtime(&rp->rio_precstate[i].last); 465130365Smlaier } 466130365Smlaier } 467130365Smlaier } 468130365Smlaier return (m); 469130365Smlaier} 470130365Smlaier 471130365Smlaier#ifdef ALTQ3_COMPAT 472130365Smlaierint 473130365Smlaierrioopen(dev, flag, fmt, p) 474130365Smlaier dev_t dev; 475130365Smlaier int flag, fmt; 476130365Smlaier#if (__FreeBSD_version > 500000) 477130365Smlaier struct thread *p; 478130365Smlaier#else 479130365Smlaier struct proc *p; 480130365Smlaier#endif 481130365Smlaier{ 482130365Smlaier /* everything will be done when the queueing scheme is attached. */ 483130365Smlaier return 0; 484130365Smlaier} 485130365Smlaier 486130365Smlaierint 487130365Smlaierrioclose(dev, flag, fmt, p) 488130365Smlaier dev_t dev; 489130365Smlaier int flag, fmt; 490130365Smlaier#if (__FreeBSD_version > 500000) 491130365Smlaier struct thread *p; 492130365Smlaier#else 493130365Smlaier struct proc *p; 494130365Smlaier#endif 495130365Smlaier{ 496130365Smlaier rio_queue_t *rqp; 497130365Smlaier int err, error = 0; 498130365Smlaier 499130365Smlaier while ((rqp = rio_list) != NULL) { 500130365Smlaier /* destroy all */ 501130365Smlaier err = rio_detach(rqp); 502130365Smlaier if (err != 0 && error == 0) 503130365Smlaier error = err; 504130365Smlaier } 505130365Smlaier 506130365Smlaier return error; 507130365Smlaier} 508130365Smlaier 509130365Smlaierint 510130365Smlaierrioioctl(dev, cmd, addr, flag, p) 511130365Smlaier dev_t dev; 512130365Smlaier ioctlcmd_t cmd; 513130365Smlaier caddr_t addr; 514130365Smlaier int flag; 515130365Smlaier#if (__FreeBSD_version > 500000) 516130365Smlaier struct thread *p; 517130365Smlaier#else 518130365Smlaier struct proc *p; 519130365Smlaier#endif 520130365Smlaier{ 521130365Smlaier rio_queue_t *rqp; 522130365Smlaier struct rio_interface *ifacep; 523130365Smlaier struct ifnet *ifp; 524130365Smlaier int error = 0; 525130365Smlaier 526130365Smlaier /* check super-user privilege */ 527130365Smlaier switch (cmd) { 528130365Smlaier case RIO_GETSTATS: 529130365Smlaier break; 530130365Smlaier default: 531164033Srwatson#if (__FreeBSD_version > 700000) 532164033Srwatson if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0) 533164033Srwatson return (error); 534164033Srwatson#elsif (__FreeBSD_version > 400000) 535130365Smlaier if ((error = suser(p)) != 0) 536130365Smlaier return (error); 537130365Smlaier#else 538130365Smlaier if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) 539130365Smlaier return (error); 540130365Smlaier#endif 541130365Smlaier break; 542130365Smlaier } 543130365Smlaier 544130365Smlaier switch (cmd) { 545130365Smlaier 546130365Smlaier case RIO_ENABLE: 547130365Smlaier ifacep = (struct rio_interface *)addr; 548130365Smlaier if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) { 549130365Smlaier error = EBADF; 550130365Smlaier break; 551130365Smlaier } 552130365Smlaier error = altq_enable(rqp->rq_ifq); 553130365Smlaier break; 554130365Smlaier 555130365Smlaier case RIO_DISABLE: 556130365Smlaier ifacep = (struct rio_interface *)addr; 557130365Smlaier if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) { 558130365Smlaier error = EBADF; 559130365Smlaier break; 560130365Smlaier } 561130365Smlaier error = altq_disable(rqp->rq_ifq); 562130365Smlaier break; 563130365Smlaier 564130365Smlaier case RIO_IF_ATTACH: 565130365Smlaier ifp = ifunit(((struct rio_interface *)addr)->rio_ifname); 566130365Smlaier if (ifp == NULL) { 567130365Smlaier error = ENXIO; 568130365Smlaier break; 569130365Smlaier } 570130365Smlaier 571130365Smlaier /* allocate and initialize rio_queue_t */ 572184205Sdes rqp = malloc(sizeof(rio_queue_t), M_DEVBUF, M_WAITOK); 573130365Smlaier if (rqp == NULL) { 574130365Smlaier error = ENOMEM; 575130365Smlaier break; 576130365Smlaier } 577130365Smlaier bzero(rqp, sizeof(rio_queue_t)); 578130365Smlaier 579184205Sdes rqp->rq_q = malloc(sizeof(class_queue_t), 580130365Smlaier M_DEVBUF, M_WAITOK); 581130365Smlaier if (rqp->rq_q == NULL) { 582184205Sdes free(rqp, M_DEVBUF); 583130365Smlaier error = ENOMEM; 584130365Smlaier break; 585130365Smlaier } 586130365Smlaier bzero(rqp->rq_q, sizeof(class_queue_t)); 587130365Smlaier 588130365Smlaier rqp->rq_rio = rio_alloc(0, NULL, 0, 0); 589130365Smlaier if (rqp->rq_rio == NULL) { 590184205Sdes free(rqp->rq_q, M_DEVBUF); 591184205Sdes free(rqp, M_DEVBUF); 592130365Smlaier error = ENOMEM; 593130365Smlaier break; 594130365Smlaier } 595130365Smlaier 596130365Smlaier rqp->rq_ifq = &ifp->if_snd; 597130365Smlaier qtail(rqp->rq_q) = NULL; 598130365Smlaier qlen(rqp->rq_q) = 0; 599130365Smlaier qlimit(rqp->rq_q) = RIO_LIMIT; 600130365Smlaier qtype(rqp->rq_q) = Q_RIO; 601130365Smlaier 602130365Smlaier /* 603130365Smlaier * set RIO to this ifnet structure. 604130365Smlaier */ 605130365Smlaier error = altq_attach(rqp->rq_ifq, ALTQT_RIO, rqp, 606130365Smlaier rio_enqueue, rio_dequeue, rio_request, 607130365Smlaier NULL, NULL); 608130365Smlaier if (error) { 609130365Smlaier rio_destroy(rqp->rq_rio); 610184205Sdes free(rqp->rq_q, M_DEVBUF); 611184205Sdes free(rqp, M_DEVBUF); 612130365Smlaier break; 613130365Smlaier } 614130365Smlaier 615130365Smlaier /* add this state to the rio list */ 616130365Smlaier rqp->rq_next = rio_list; 617130365Smlaier rio_list = rqp; 618130365Smlaier break; 619130365Smlaier 620130365Smlaier case RIO_IF_DETACH: 621130365Smlaier ifacep = (struct rio_interface *)addr; 622130365Smlaier if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) { 623130365Smlaier error = EBADF; 624130365Smlaier break; 625130365Smlaier } 626130365Smlaier error = rio_detach(rqp); 627130365Smlaier break; 628130365Smlaier 629130365Smlaier case RIO_GETSTATS: 630130365Smlaier do { 631130365Smlaier struct rio_stats *q_stats; 632130365Smlaier rio_t *rp; 633130365Smlaier int i; 634130365Smlaier 635130365Smlaier q_stats = (struct rio_stats *)addr; 636130365Smlaier if ((rqp = altq_lookup(q_stats->iface.rio_ifname, 637130365Smlaier ALTQT_RIO)) == NULL) { 638130365Smlaier error = EBADF; 639130365Smlaier break; 640130365Smlaier } 641130365Smlaier 642130365Smlaier rp = rqp->rq_rio; 643130365Smlaier 644130365Smlaier q_stats->q_limit = qlimit(rqp->rq_q); 645130365Smlaier q_stats->weight = rp->rio_weight; 646130365Smlaier q_stats->flags = rp->rio_flags; 647130365Smlaier 648130365Smlaier for (i = 0; i < RIO_NDROPPREC; i++) { 649130365Smlaier q_stats->q_len[i] = rp->rio_precstate[i].qlen; 650130365Smlaier bcopy(&rp->q_stats[i], &q_stats->q_stats[i], 651130365Smlaier sizeof(struct redstats)); 652130365Smlaier q_stats->q_stats[i].q_avg = 653130365Smlaier rp->rio_precstate[i].avg >> rp->rio_wshift; 654130365Smlaier 655130365Smlaier q_stats->q_params[i].inv_pmax 656130365Smlaier = rp->rio_precstate[i].inv_pmax; 657130365Smlaier q_stats->q_params[i].th_min 658130365Smlaier = rp->rio_precstate[i].th_min; 659130365Smlaier q_stats->q_params[i].th_max 660130365Smlaier = rp->rio_precstate[i].th_max; 661130365Smlaier } 662130365Smlaier } while (/*CONSTCOND*/ 0); 663130365Smlaier break; 664130365Smlaier 665130365Smlaier case RIO_CONFIG: 666130365Smlaier do { 667130365Smlaier struct rio_conf *fc; 668130365Smlaier rio_t *new; 669130365Smlaier int s, limit, i; 670130365Smlaier 671130365Smlaier fc = (struct rio_conf *)addr; 672130365Smlaier if ((rqp = altq_lookup(fc->iface.rio_ifname, 673130365Smlaier ALTQT_RIO)) == NULL) { 674130365Smlaier error = EBADF; 675130365Smlaier break; 676130365Smlaier } 677130365Smlaier 678130365Smlaier new = rio_alloc(fc->rio_weight, &fc->q_params[0], 679130365Smlaier fc->rio_flags, fc->rio_pkttime); 680130365Smlaier if (new == NULL) { 681130365Smlaier error = ENOMEM; 682130365Smlaier break; 683130365Smlaier } 684130365Smlaier 685130365Smlaier#ifdef __NetBSD__ 686130365Smlaier s = splnet(); 687130365Smlaier#else 688130365Smlaier s = splimp(); 689130365Smlaier#endif 690130365Smlaier _flushq(rqp->rq_q); 691130365Smlaier limit = fc->rio_limit; 692130365Smlaier if (limit < fc->q_params[RIO_NDROPPREC-1].th_max) 693130365Smlaier limit = fc->q_params[RIO_NDROPPREC-1].th_max; 694130365Smlaier qlimit(rqp->rq_q) = limit; 695130365Smlaier 696130365Smlaier rio_destroy(rqp->rq_rio); 697130365Smlaier rqp->rq_rio = new; 698130365Smlaier 699130365Smlaier splx(s); 700130365Smlaier 701130365Smlaier /* write back new values */ 702130365Smlaier fc->rio_limit = limit; 703130365Smlaier for (i = 0; i < RIO_NDROPPREC; i++) { 704130365Smlaier fc->q_params[i].inv_pmax = 705130365Smlaier rqp->rq_rio->rio_precstate[i].inv_pmax; 706130365Smlaier fc->q_params[i].th_min = 707130365Smlaier rqp->rq_rio->rio_precstate[i].th_min; 708130365Smlaier fc->q_params[i].th_max = 709130365Smlaier rqp->rq_rio->rio_precstate[i].th_max; 710130365Smlaier } 711130365Smlaier } while (/*CONSTCOND*/ 0); 712130365Smlaier break; 713130365Smlaier 714130365Smlaier case RIO_SETDEFAULTS: 715130365Smlaier do { 716130365Smlaier struct redparams *rp; 717130365Smlaier int i; 718130365Smlaier 719130365Smlaier rp = (struct redparams *)addr; 720130365Smlaier for (i = 0; i < RIO_NDROPPREC; i++) 721130365Smlaier default_rio_params[i] = rp[i]; 722130365Smlaier } while (/*CONSTCOND*/ 0); 723130365Smlaier break; 724130365Smlaier 725130365Smlaier default: 726130365Smlaier error = EINVAL; 727130365Smlaier break; 728130365Smlaier } 729130365Smlaier 730130365Smlaier return error; 731130365Smlaier} 732130365Smlaier 733130365Smlaierstatic int 734130365Smlaierrio_detach(rqp) 735130365Smlaier rio_queue_t *rqp; 736130365Smlaier{ 737130365Smlaier rio_queue_t *tmp; 738130365Smlaier int error = 0; 739130365Smlaier 740130365Smlaier if (ALTQ_IS_ENABLED(rqp->rq_ifq)) 741130365Smlaier altq_disable(rqp->rq_ifq); 742130365Smlaier 743130365Smlaier if ((error = altq_detach(rqp->rq_ifq))) 744130365Smlaier return (error); 745130365Smlaier 746130365Smlaier if (rio_list == rqp) 747130365Smlaier rio_list = rqp->rq_next; 748130365Smlaier else { 749130365Smlaier for (tmp = rio_list; tmp != NULL; tmp = tmp->rq_next) 750130365Smlaier if (tmp->rq_next == rqp) { 751130365Smlaier tmp->rq_next = rqp->rq_next; 752130365Smlaier break; 753130365Smlaier } 754130365Smlaier if (tmp == NULL) 755130365Smlaier printf("rio_detach: no state found in rio_list!\n"); 756130365Smlaier } 757130365Smlaier 758130365Smlaier rio_destroy(rqp->rq_rio); 759184205Sdes free(rqp->rq_q, M_DEVBUF); 760184205Sdes free(rqp, M_DEVBUF); 761130365Smlaier return (error); 762130365Smlaier} 763130365Smlaier 764130365Smlaier/* 765130365Smlaier * rio support routines 766130365Smlaier */ 767130365Smlaierstatic int 768130365Smlaierrio_request(ifq, req, arg) 769130365Smlaier struct ifaltq *ifq; 770130365Smlaier int req; 771130365Smlaier void *arg; 772130365Smlaier{ 773130365Smlaier rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc; 774130365Smlaier 775130368Smlaier IFQ_LOCK_ASSERT(ifq); 776130368Smlaier 777130365Smlaier switch (req) { 778130365Smlaier case ALTRQ_PURGE: 779130365Smlaier _flushq(rqp->rq_q); 780130365Smlaier if (ALTQ_IS_ENABLED(ifq)) 781130365Smlaier ifq->ifq_len = 0; 782130365Smlaier break; 783130365Smlaier } 784130365Smlaier return (0); 785130365Smlaier} 786130365Smlaier 787130365Smlaier/* 788130365Smlaier * enqueue routine: 789130365Smlaier * 790130365Smlaier * returns: 0 when successfully queued. 791130365Smlaier * ENOBUFS when drop occurs. 792130365Smlaier */ 793130365Smlaierstatic int 794130365Smlaierrio_enqueue(ifq, m, pktattr) 795130365Smlaier struct ifaltq *ifq; 796130365Smlaier struct mbuf *m; 797130365Smlaier struct altq_pktattr *pktattr; 798130365Smlaier{ 799130365Smlaier rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc; 800130365Smlaier int error = 0; 801130365Smlaier 802130368Smlaier IFQ_LOCK_ASSERT(ifq); 803130368Smlaier 804130365Smlaier if (rio_addq(rqp->rq_rio, rqp->rq_q, m, pktattr) == 0) 805130365Smlaier ifq->ifq_len++; 806130365Smlaier else 807130365Smlaier error = ENOBUFS; 808130365Smlaier return error; 809130365Smlaier} 810130365Smlaier 811130365Smlaier/* 812130365Smlaier * dequeue routine: 813130365Smlaier * must be called in splimp. 814130365Smlaier * 815130365Smlaier * returns: mbuf dequeued. 816130365Smlaier * NULL when no packet is available in the queue. 817130365Smlaier */ 818130365Smlaier 819130365Smlaierstatic struct mbuf * 820130365Smlaierrio_dequeue(ifq, op) 821130365Smlaier struct ifaltq *ifq; 822130365Smlaier int op; 823130365Smlaier{ 824130365Smlaier rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc; 825130365Smlaier struct mbuf *m = NULL; 826130365Smlaier 827130368Smlaier IFQ_LOCK_ASSERT(ifq); 828130368Smlaier 829130365Smlaier if (op == ALTDQ_POLL) 830130365Smlaier return qhead(rqp->rq_q); 831130365Smlaier 832130365Smlaier m = rio_getq(rqp->rq_rio, rqp->rq_q); 833130365Smlaier if (m != NULL) 834130365Smlaier ifq->ifq_len--; 835130365Smlaier return m; 836130365Smlaier} 837130365Smlaier 838130365Smlaier#ifdef KLD_MODULE 839130365Smlaier 840130365Smlaierstatic struct altqsw rio_sw = 841130365Smlaier {"rio", rioopen, rioclose, rioioctl}; 842130365Smlaier 843130365SmlaierALTQ_MODULE(altq_rio, ALTQT_RIO, &rio_sw); 844130365SmlaierMODULE_VERSION(altq_rio, 1); 845130365SmlaierMODULE_DEPEND(altq_rio, altq_red, 1, 1, 1); 846130365Smlaier 847130365Smlaier#endif /* KLD_MODULE */ 848130365Smlaier#endif /* ALTQ3_COMPAT */ 849130365Smlaier 850130365Smlaier#endif /* ALTQ_RIO */ 851