1/*	$FreeBSD$	*/
2/*	$KAME: altq_red.c,v 1.18 2003/09/05 22:40:36 itojun Exp $	*/
3
4/*
5 * Copyright (C) 1997-2003
6 *	Sony Computer Science Laboratories Inc.  All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 */
30/*
31 * Copyright (c) 1990-1994 Regents of the University of California.
32 * All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 *    notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 *    notice, this list of conditions and the following disclaimer in the
41 *    documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 *    must display the following acknowledgement:
44 *	This product includes software developed by the Computer Systems
45 *	Engineering Group at Lawrence Berkeley Laboratory.
46 * 4. Neither the name of the University nor of the Laboratory may be used
47 *    to endorse or promote products derived from this software without
48 *    specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 */
62
63#if defined(__FreeBSD__) || defined(__NetBSD__)
64#include "opt_altq.h"
65#include "opt_inet.h"
66#ifdef __FreeBSD__
67#include "opt_inet6.h"
68#endif
69#endif /* __FreeBSD__ || __NetBSD__ */
70#ifdef ALTQ_RED	/* red is enabled by ALTQ_RED option in opt_altq.h */
71
72#include <sys/param.h>
73#include <sys/malloc.h>
74#include <sys/mbuf.h>
75#include <sys/socket.h>
76#include <sys/systm.h>
77#include <sys/errno.h>
78#if 1 /* ALTQ3_COMPAT */
79#include <sys/sockio.h>
80#include <sys/proc.h>
81#include <sys/kernel.h>
82#ifdef ALTQ_FLOWVALVE
83#include <sys/queue.h>
84#include <sys/time.h>
85#endif
86#endif /* ALTQ3_COMPAT */
87
88#include <net/if.h>
89
90#include <netinet/in.h>
91#include <netinet/in_systm.h>
92#include <netinet/ip.h>
93#ifdef INET6
94#include <netinet/ip6.h>
95#endif
96
97#include <net/pfvar.h>
98#include <altq/altq.h>
99#include <altq/altq_red.h>
100#ifdef ALTQ3_COMPAT
101#include <altq/altq_conf.h>
102#ifdef ALTQ_FLOWVALVE
103#include <altq/altq_flowvalve.h>
104#endif
105#endif
106
107/*
108 * ALTQ/RED (Random Early Detection) implementation using 32-bit
109 * fixed-point calculation.
110 *
111 * written by kjc using the ns code as a reference.
112 * you can learn more about red and ns from Sally's home page at
113 * http://www-nrg.ee.lbl.gov/floyd/
114 *
115 * most of the red parameter values are fixed in this implementation
116 * to prevent fixed-point overflow/underflow.
117 * if you change the parameters, watch out for overflow/underflow!
118 *
119 * the parameters used are recommended values by Sally.
120 * the corresponding ns config looks:
121 *	q_weight=0.00195
122 *	minthresh=5 maxthresh=15 queue-size=60
123 *	linterm=30
124 *	dropmech=drop-tail
125 *	bytes=false (can't be handled by 32-bit fixed-point)
126 *	doubleq=false dqthresh=false
127 *	wait=true
128 */
129/*
130 * alternative red parameters for a slow link.
131 *
132 * assume the queue length becomes from zero to L and keeps L, it takes
133 * N packets for q_avg to reach 63% of L.
134 * when q_weight is 0.002, N is about 500 packets.
135 * for a slow link like dial-up, 500 packets takes more than 1 minute!
136 * when q_weight is 0.008, N is about 127 packets.
137 * when q_weight is 0.016, N is about 63 packets.
138 * bursts of 50 packets are allowed for 0.002, bursts of 25 packets
139 * are allowed for 0.016.
140 * see Sally's paper for more details.
141 */
142/* normal red parameters */
143#define	W_WEIGHT	512	/* inverse of weight of EWMA (511/512) */
144				/* q_weight = 0.00195 */
145
146/* red parameters for a slow link */
147#define	W_WEIGHT_1	128	/* inverse of weight of EWMA (127/128) */
148				/* q_weight = 0.0078125 */
149
150/* red parameters for a very slow link (e.g., dialup) */
151#define	W_WEIGHT_2	64	/* inverse of weight of EWMA (63/64) */
152				/* q_weight = 0.015625 */
153
154/* fixed-point uses 12-bit decimal places */
155#define	FP_SHIFT	12	/* fixed-point shift */
156
157/* red parameters for drop probability */
158#define	INV_P_MAX	10	/* inverse of max drop probability */
159#define	TH_MIN		5	/* min threshold */
160#define	TH_MAX		15	/* max threshold */
161
162#define	RED_LIMIT	60	/* default max queue lenght */
163#define	RED_STATS		/* collect statistics */
164
165/*
166 * our default policy for forced-drop is drop-tail.
167 * (in altq-1.1.2 or earlier, the default was random-drop.
168 * but it makes more sense to punish the cause of the surge.)
169 * to switch to the random-drop policy, define "RED_RANDOM_DROP".
170 */
171
172#ifdef ALTQ3_COMPAT
173#ifdef ALTQ_FLOWVALVE
174/*
175 * flow-valve is an extention to protect red from unresponsive flows
176 * and to promote end-to-end congestion control.
177 * flow-valve observes the average drop rates of the flows that have
178 * experienced packet drops in the recent past.
179 * when the average drop rate exceeds the threshold, the flow is
180 * blocked by the flow-valve.  the trapped flow should back off
181 * exponentially to escape from the flow-valve.
182 */
183#ifdef RED_RANDOM_DROP
184#error "random-drop can't be used with flow-valve!"
185#endif
186#endif /* ALTQ_FLOWVALVE */
187
188/* red_list keeps all red_queue_t's allocated. */
189static red_queue_t *red_list = NULL;
190
191#endif /* ALTQ3_COMPAT */
192
193/* default red parameter values */
194static int default_th_min = TH_MIN;
195static int default_th_max = TH_MAX;
196static int default_inv_pmax = INV_P_MAX;
197
198#ifdef ALTQ3_COMPAT
199/* internal function prototypes */
200static int red_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
201static struct mbuf *red_dequeue(struct ifaltq *, int);
202static int red_request(struct ifaltq *, int, void *);
203static void red_purgeq(red_queue_t *);
204static int red_detach(red_queue_t *);
205#ifdef ALTQ_FLOWVALVE
206static __inline struct fve *flowlist_lookup(struct flowvalve *,
207			 struct altq_pktattr *, struct timeval *);
208static __inline struct fve *flowlist_reclaim(struct flowvalve *,
209					     struct altq_pktattr *);
210static __inline void flowlist_move_to_head(struct flowvalve *, struct fve *);
211static __inline int fv_p2f(struct flowvalve *, int);
212#if 0 /* XXX: make the compiler happy (fv_alloc unused) */
213static struct flowvalve *fv_alloc(struct red *);
214#endif
215static void fv_destroy(struct flowvalve *);
216static int fv_checkflow(struct flowvalve *, struct altq_pktattr *,
217			struct fve **);
218static void fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *,
219			 struct fve *);
220#endif
221#endif /* ALTQ3_COMPAT */
222
223/*
224 * red support routines
225 */
226red_t *
227red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags,
228   int pkttime)
229{
230	red_t	*rp;
231	int	 w, i;
232	int	 npkts_per_sec;
233
234	rp = malloc(sizeof(red_t), M_DEVBUF, M_NOWAIT | M_ZERO);
235	if (rp == NULL)
236		return (NULL);
237
238	if (weight == 0)
239		rp->red_weight = W_WEIGHT;
240	else
241		rp->red_weight = weight;
242
243	/* allocate weight table */
244	rp->red_wtab = wtab_alloc(rp->red_weight);
245	if (rp->red_wtab == NULL) {
246		free(rp, M_DEVBUF);
247		return (NULL);
248	}
249
250	rp->red_avg = 0;
251	rp->red_idle = 1;
252
253	if (inv_pmax == 0)
254		rp->red_inv_pmax = default_inv_pmax;
255	else
256		rp->red_inv_pmax = inv_pmax;
257	if (th_min == 0)
258		rp->red_thmin = default_th_min;
259	else
260		rp->red_thmin = th_min;
261	if (th_max == 0)
262		rp->red_thmax = default_th_max;
263	else
264		rp->red_thmax = th_max;
265
266	rp->red_flags = flags;
267
268	if (pkttime == 0)
269		/* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
270		rp->red_pkttime = 800;
271	else
272		rp->red_pkttime = pkttime;
273
274	if (weight == 0) {
275		/* when the link is very slow, adjust red parameters */
276		npkts_per_sec = 1000000 / rp->red_pkttime;
277		if (npkts_per_sec < 50) {
278			/* up to about 400Kbps */
279			rp->red_weight = W_WEIGHT_2;
280		} else if (npkts_per_sec < 300) {
281			/* up to about 2.4Mbps */
282			rp->red_weight = W_WEIGHT_1;
283		}
284	}
285
286	/* calculate wshift.  weight must be power of 2 */
287	w = rp->red_weight;
288	for (i = 0; w > 1; i++)
289		w = w >> 1;
290	rp->red_wshift = i;
291	w = 1 << rp->red_wshift;
292	if (w != rp->red_weight) {
293		printf("invalid weight value %d for red! use %d\n",
294		       rp->red_weight, w);
295		rp->red_weight = w;
296	}
297
298	/*
299	 * thmin_s and thmax_s are scaled versions of th_min and th_max
300	 * to be compared with avg.
301	 */
302	rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
303	rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
304
305	/*
306	 * precompute probability denominator
307	 *  probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
308	 */
309	rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
310			 * rp->red_inv_pmax) << FP_SHIFT;
311
312	microtime(&rp->red_last);
313	return (rp);
314}
315
316void
317red_destroy(red_t *rp)
318{
319#ifdef ALTQ3_COMPAT
320#ifdef ALTQ_FLOWVALVE
321	if (rp->red_flowvalve != NULL)
322		fv_destroy(rp->red_flowvalve);
323#endif
324#endif /* ALTQ3_COMPAT */
325	wtab_destroy(rp->red_wtab);
326	free(rp, M_DEVBUF);
327}
328
329void
330red_getstats(red_t *rp, struct redstats *sp)
331{
332	sp->q_avg		= rp->red_avg >> rp->red_wshift;
333	sp->xmit_cnt		= rp->red_stats.xmit_cnt;
334	sp->drop_cnt		= rp->red_stats.drop_cnt;
335	sp->drop_forced		= rp->red_stats.drop_forced;
336	sp->drop_unforced	= rp->red_stats.drop_unforced;
337	sp->marked_packets	= rp->red_stats.marked_packets;
338}
339
340int
341red_addq(red_t *rp, class_queue_t *q, struct mbuf *m,
342    struct altq_pktattr *pktattr)
343{
344	int avg, droptype;
345	int n;
346#ifdef ALTQ3_COMPAT
347#ifdef ALTQ_FLOWVALVE
348	struct fve *fve = NULL;
349
350	if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
351		if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
352			m_freem(m);
353			return (-1);
354		}
355#endif
356#endif /* ALTQ3_COMPAT */
357
358	avg = rp->red_avg;
359
360	/*
361	 * if we were idle, we pretend that n packets arrived during
362	 * the idle period.
363	 */
364	if (rp->red_idle) {
365		struct timeval now;
366		int t;
367
368		rp->red_idle = 0;
369		microtime(&now);
370		t = (now.tv_sec - rp->red_last.tv_sec);
371		if (t > 60) {
372			/*
373			 * being idle for more than 1 minute, set avg to zero.
374			 * this prevents t from overflow.
375			 */
376			avg = 0;
377		} else {
378			t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
379			n = t / rp->red_pkttime - 1;
380
381			/* the following line does (avg = (1 - Wq)^n * avg) */
382			if (n > 0)
383				avg = (avg >> FP_SHIFT) *
384				    pow_w(rp->red_wtab, n);
385		}
386	}
387
388	/* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
389	avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
390	rp->red_avg = avg;		/* save the new value */
391
392	/*
393	 * red_count keeps a tally of arriving traffic that has not
394	 * been dropped.
395	 */
396	rp->red_count++;
397
398	/* see if we drop early */
399	droptype = DTYPE_NODROP;
400	if (avg >= rp->red_thmin_s && qlen(q) > 1) {
401		if (avg >= rp->red_thmax_s) {
402			/* avg >= th_max: forced drop */
403			droptype = DTYPE_FORCED;
404		} else if (rp->red_old == 0) {
405			/* first exceeds th_min */
406			rp->red_count = 1;
407			rp->red_old = 1;
408		} else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
409				      rp->red_probd, rp->red_count)) {
410			/* mark or drop by red */
411			if ((rp->red_flags & REDF_ECN) &&
412			    mark_ecn(m, pktattr, rp->red_flags)) {
413				/* successfully marked.  do not drop. */
414				rp->red_count = 0;
415#ifdef RED_STATS
416				rp->red_stats.marked_packets++;
417#endif
418			} else {
419				/* unforced drop by red */
420				droptype = DTYPE_EARLY;
421			}
422		}
423	} else {
424		/* avg < th_min */
425		rp->red_old = 0;
426	}
427
428	/*
429	 * if the queue length hits the hard limit, it's a forced drop.
430	 */
431	if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
432		droptype = DTYPE_FORCED;
433
434#ifdef RED_RANDOM_DROP
435	/* if successful or forced drop, enqueue this packet. */
436	if (droptype != DTYPE_EARLY)
437		_addq(q, m);
438#else
439	/* if successful, enqueue this packet. */
440	if (droptype == DTYPE_NODROP)
441		_addq(q, m);
442#endif
443	if (droptype != DTYPE_NODROP) {
444		if (droptype == DTYPE_EARLY) {
445			/* drop the incoming packet */
446#ifdef RED_STATS
447			rp->red_stats.drop_unforced++;
448#endif
449		} else {
450			/* forced drop, select a victim packet in the queue. */
451#ifdef RED_RANDOM_DROP
452			m = _getq_random(q);
453#endif
454#ifdef RED_STATS
455			rp->red_stats.drop_forced++;
456#endif
457		}
458#ifdef RED_STATS
459		PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
460#endif
461		rp->red_count = 0;
462#ifdef ALTQ3_COMPAT
463#ifdef ALTQ_FLOWVALVE
464		if (rp->red_flowvalve != NULL)
465			fv_dropbyred(rp->red_flowvalve, pktattr, fve);
466#endif
467#endif /* ALTQ3_COMPAT */
468		m_freem(m);
469		return (-1);
470	}
471	/* successfully queued */
472#ifdef RED_STATS
473	PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
474#endif
475	return (0);
476}
477
478/*
479 * early-drop probability is calculated as follows:
480 *   prob = p_max * (avg - th_min) / (th_max - th_min)
481 *   prob_a = prob / (2 - count*prob)
482 *	    = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
483 * here prob_a increases as successive undrop count increases.
484 * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
485 * becomes 1 when (count >= (2 / prob))).
486 */
487int
488drop_early(int fp_len, int fp_probd, int count)
489{
490	int	d;		/* denominator of drop-probability */
491
492	d = fp_probd - count * fp_len;
493	if (d <= 0)
494		/* count exceeds the hard limit: drop or mark */
495		return (1);
496
497	/*
498	 * now the range of d is [1..600] in fixed-point. (when
499	 * th_max-th_min=10 and p_max=1/30)
500	 * drop probability = (avg - TH_MIN) / d
501	 */
502
503	if ((arc4random() % d) < fp_len) {
504		/* drop or mark */
505		return (1);
506	}
507	/* no drop/mark */
508	return (0);
509}
510
511/*
512 * try to mark CE bit to the packet.
513 *    returns 1 if successfully marked, 0 otherwise.
514 */
515int
516mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags)
517{
518	struct mbuf	*m0;
519	struct pf_mtag	*at;
520	void		*hdr;
521
522	at = pf_find_mtag(m);
523	if (at != NULL) {
524		hdr = at->hdr;
525#ifdef ALTQ3_COMPAT
526	} else if (pktattr != NULL) {
527		af = pktattr->pattr_af;
528		hdr = pktattr->pattr_hdr;
529#endif /* ALTQ3_COMPAT */
530	} else
531		return (0);
532
533	/* verify that pattr_hdr is within the mbuf data */
534	for (m0 = m; m0 != NULL; m0 = m0->m_next)
535		if (((caddr_t)hdr >= m0->m_data) &&
536		    ((caddr_t)hdr < m0->m_data + m0->m_len))
537			break;
538	if (m0 == NULL) {
539		/* ick, tag info is stale */
540		return (0);
541	}
542
543	switch (((struct ip *)hdr)->ip_v) {
544	case IPVERSION:
545		if (flags & REDF_ECN4) {
546			struct ip *ip = hdr;
547			u_int8_t otos;
548			int sum;
549
550			if (ip->ip_v != 4)
551				return (0);	/* version mismatch! */
552
553			if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
554				return (0);	/* not-ECT */
555			if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
556				return (1);	/* already marked */
557
558			/*
559			 * ecn-capable but not marked,
560			 * mark CE and update checksum
561			 */
562			otos = ip->ip_tos;
563			ip->ip_tos |= IPTOS_ECN_CE;
564			/*
565			 * update checksum (from RFC1624)
566			 *	   HC' = ~(~HC + ~m + m')
567			 */
568			sum = ~ntohs(ip->ip_sum) & 0xffff;
569			sum += (~otos & 0xffff) + ip->ip_tos;
570			sum = (sum >> 16) + (sum & 0xffff);
571			sum += (sum >> 16);  /* add carry */
572			ip->ip_sum = htons(~sum & 0xffff);
573			return (1);
574		}
575		break;
576#ifdef INET6
577	case (IPV6_VERSION >> 4):
578		if (flags & REDF_ECN6) {
579			struct ip6_hdr *ip6 = hdr;
580			u_int32_t flowlabel;
581
582			flowlabel = ntohl(ip6->ip6_flow);
583			if ((flowlabel >> 28) != 6)
584				return (0);	/* version mismatch! */
585			if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
586			    (IPTOS_ECN_NOTECT << 20))
587				return (0);	/* not-ECT */
588			if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
589			    (IPTOS_ECN_CE << 20))
590				return (1);	/* already marked */
591			/*
592			 * ecn-capable but not marked,  mark CE
593			 */
594			flowlabel |= (IPTOS_ECN_CE << 20);
595			ip6->ip6_flow = htonl(flowlabel);
596			return (1);
597		}
598		break;
599#endif  /* INET6 */
600	}
601
602	/* not marked */
603	return (0);
604}
605
606struct mbuf *
607red_getq(rp, q)
608	red_t *rp;
609	class_queue_t *q;
610{
611	struct mbuf *m;
612
613	if ((m = _getq(q)) == NULL) {
614		if (rp->red_idle == 0) {
615			rp->red_idle = 1;
616			microtime(&rp->red_last);
617		}
618		return NULL;
619	}
620
621	rp->red_idle = 0;
622	return (m);
623}
624
625/*
626 * helper routine to calibrate avg during idle.
627 * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
628 * here Wq = 1/weight and the code assumes Wq is close to zero.
629 *
630 * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
631 */
632static struct wtab *wtab_list = NULL;	/* pointer to wtab list */
633
634struct wtab *
635wtab_alloc(int weight)
636{
637	struct wtab	*w;
638	int		 i;
639
640	for (w = wtab_list; w != NULL; w = w->w_next)
641		if (w->w_weight == weight) {
642			w->w_refcount++;
643			return (w);
644		}
645
646	w = malloc(sizeof(struct wtab), M_DEVBUF, M_NOWAIT | M_ZERO);
647	if (w == NULL)
648		return (NULL);
649	w->w_weight = weight;
650	w->w_refcount = 1;
651	w->w_next = wtab_list;
652	wtab_list = w;
653
654	/* initialize the weight table */
655	w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
656	for (i = 1; i < 32; i++) {
657		w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
658		if (w->w_tab[i] == 0 && w->w_param_max == 0)
659			w->w_param_max = 1 << i;
660	}
661
662	return (w);
663}
664
665int
666wtab_destroy(struct wtab *w)
667{
668	struct wtab	*prev;
669
670	if (--w->w_refcount > 0)
671		return (0);
672
673	if (wtab_list == w)
674		wtab_list = w->w_next;
675	else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
676		if (prev->w_next == w) {
677			prev->w_next = w->w_next;
678			break;
679		}
680
681	free(w, M_DEVBUF);
682	return (0);
683}
684
685int32_t
686pow_w(struct wtab *w, int n)
687{
688	int	i, bit;
689	int32_t	val;
690
691	if (n >= w->w_param_max)
692		return (0);
693
694	val = 1 << FP_SHIFT;
695	if (n <= 0)
696		return (val);
697
698	bit = 1;
699	i = 0;
700	while (n) {
701		if (n & bit) {
702			val = (val * w->w_tab[i]) >> FP_SHIFT;
703			n &= ~bit;
704		}
705		i++;
706		bit <<=  1;
707	}
708	return (val);
709}
710
711#ifdef ALTQ3_COMPAT
712/*
713 * red device interface
714 */
715altqdev_decl(red);
716
717int
718redopen(dev, flag, fmt, p)
719	dev_t dev;
720	int flag, fmt;
721#if (__FreeBSD_version > 500000)
722	struct thread *p;
723#else
724	struct proc *p;
725#endif
726{
727	/* everything will be done when the queueing scheme is attached. */
728	return 0;
729}
730
731int
732redclose(dev, flag, fmt, p)
733	dev_t dev;
734	int flag, fmt;
735#if (__FreeBSD_version > 500000)
736	struct thread *p;
737#else
738	struct proc *p;
739#endif
740{
741	red_queue_t *rqp;
742	int err, error = 0;
743
744	while ((rqp = red_list) != NULL) {
745		/* destroy all */
746		err = red_detach(rqp);
747		if (err != 0 && error == 0)
748			error = err;
749	}
750
751	return error;
752}
753
754int
755redioctl(dev, cmd, addr, flag, p)
756	dev_t dev;
757	ioctlcmd_t cmd;
758	caddr_t addr;
759	int flag;
760#if (__FreeBSD_version > 500000)
761	struct thread *p;
762#else
763	struct proc *p;
764#endif
765{
766	red_queue_t *rqp;
767	struct red_interface *ifacep;
768	struct ifnet *ifp;
769	int	error = 0;
770
771	/* check super-user privilege */
772	switch (cmd) {
773	case RED_GETSTATS:
774		break;
775	default:
776#if (__FreeBSD_version > 700000)
777		if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
778#elsif (__FreeBSD_version > 400000)
779		if ((error = suser(p)) != 0)
780#else
781		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
782#endif
783			return (error);
784		break;
785	}
786
787	switch (cmd) {
788
789	case RED_ENABLE:
790		ifacep = (struct red_interface *)addr;
791		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
792			error = EBADF;
793			break;
794		}
795		error = altq_enable(rqp->rq_ifq);
796		break;
797
798	case RED_DISABLE:
799		ifacep = (struct red_interface *)addr;
800		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
801			error = EBADF;
802			break;
803		}
804		error = altq_disable(rqp->rq_ifq);
805		break;
806
807	case RED_IF_ATTACH:
808		ifp = ifunit(((struct red_interface *)addr)->red_ifname);
809		if (ifp == NULL) {
810			error = ENXIO;
811			break;
812		}
813
814		/* allocate and initialize red_queue_t */
815		rqp = malloc(sizeof(red_queue_t), M_DEVBUF, M_WAITOK);
816		if (rqp == NULL) {
817			error = ENOMEM;
818			break;
819		}
820		bzero(rqp, sizeof(red_queue_t));
821
822		rqp->rq_q = malloc(sizeof(class_queue_t),
823		       M_DEVBUF, M_WAITOK);
824		if (rqp->rq_q == NULL) {
825			free(rqp, M_DEVBUF);
826			error = ENOMEM;
827			break;
828		}
829		bzero(rqp->rq_q, sizeof(class_queue_t));
830
831		rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
832		if (rqp->rq_red == NULL) {
833			free(rqp->rq_q, M_DEVBUF);
834			free(rqp, M_DEVBUF);
835			error = ENOMEM;
836			break;
837		}
838
839		rqp->rq_ifq = &ifp->if_snd;
840		qtail(rqp->rq_q) = NULL;
841		qlen(rqp->rq_q) = 0;
842		qlimit(rqp->rq_q) = RED_LIMIT;
843		qtype(rqp->rq_q) = Q_RED;
844
845		/*
846		 * set RED to this ifnet structure.
847		 */
848		error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
849				    red_enqueue, red_dequeue, red_request,
850				    NULL, NULL);
851		if (error) {
852			red_destroy(rqp->rq_red);
853			free(rqp->rq_q, M_DEVBUF);
854			free(rqp, M_DEVBUF);
855			break;
856		}
857
858		/* add this state to the red list */
859		rqp->rq_next = red_list;
860		red_list = rqp;
861		break;
862
863	case RED_IF_DETACH:
864		ifacep = (struct red_interface *)addr;
865		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
866			error = EBADF;
867			break;
868		}
869		error = red_detach(rqp);
870		break;
871
872	case RED_GETSTATS:
873		do {
874			struct red_stats *q_stats;
875			red_t *rp;
876
877			q_stats = (struct red_stats *)addr;
878			if ((rqp = altq_lookup(q_stats->iface.red_ifname,
879					     ALTQT_RED)) == NULL) {
880				error = EBADF;
881				break;
882			}
883
884			q_stats->q_len 	   = qlen(rqp->rq_q);
885			q_stats->q_limit   = qlimit(rqp->rq_q);
886
887			rp = rqp->rq_red;
888			q_stats->q_avg 	   = rp->red_avg >> rp->red_wshift;
889			q_stats->xmit_cnt  = rp->red_stats.xmit_cnt;
890			q_stats->drop_cnt  = rp->red_stats.drop_cnt;
891			q_stats->drop_forced   = rp->red_stats.drop_forced;
892			q_stats->drop_unforced = rp->red_stats.drop_unforced;
893			q_stats->marked_packets = rp->red_stats.marked_packets;
894
895			q_stats->weight		= rp->red_weight;
896			q_stats->inv_pmax	= rp->red_inv_pmax;
897			q_stats->th_min		= rp->red_thmin;
898			q_stats->th_max		= rp->red_thmax;
899
900#ifdef ALTQ_FLOWVALVE
901			if (rp->red_flowvalve != NULL) {
902				struct flowvalve *fv = rp->red_flowvalve;
903				q_stats->fv_flows    = fv->fv_flows;
904				q_stats->fv_pass     = fv->fv_stats.pass;
905				q_stats->fv_predrop  = fv->fv_stats.predrop;
906				q_stats->fv_alloc    = fv->fv_stats.alloc;
907				q_stats->fv_escape   = fv->fv_stats.escape;
908			} else {
909#endif /* ALTQ_FLOWVALVE */
910				q_stats->fv_flows    = 0;
911				q_stats->fv_pass     = 0;
912				q_stats->fv_predrop  = 0;
913				q_stats->fv_alloc    = 0;
914				q_stats->fv_escape   = 0;
915#ifdef ALTQ_FLOWVALVE
916			}
917#endif /* ALTQ_FLOWVALVE */
918		} while (/*CONSTCOND*/ 0);
919		break;
920
921	case RED_CONFIG:
922		do {
923			struct red_conf *fc;
924			red_t *new;
925			int s, limit;
926
927			fc = (struct red_conf *)addr;
928			if ((rqp = altq_lookup(fc->iface.red_ifname,
929					       ALTQT_RED)) == NULL) {
930				error = EBADF;
931				break;
932			}
933			new = red_alloc(fc->red_weight,
934					fc->red_inv_pmax,
935					fc->red_thmin,
936					fc->red_thmax,
937					fc->red_flags,
938					fc->red_pkttime);
939			if (new == NULL) {
940				error = ENOMEM;
941				break;
942			}
943
944#ifdef __NetBSD__
945			s = splnet();
946#else
947			s = splimp();
948#endif
949			red_purgeq(rqp);
950			limit = fc->red_limit;
951			if (limit < fc->red_thmax)
952				limit = fc->red_thmax;
953			qlimit(rqp->rq_q) = limit;
954			fc->red_limit = limit;	/* write back the new value */
955
956			red_destroy(rqp->rq_red);
957			rqp->rq_red = new;
958
959			splx(s);
960
961			/* write back new values */
962			fc->red_limit = limit;
963			fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
964			fc->red_thmin = rqp->rq_red->red_thmin;
965			fc->red_thmax = rqp->rq_red->red_thmax;
966
967		} while (/*CONSTCOND*/ 0);
968		break;
969
970	case RED_SETDEFAULTS:
971		do {
972			struct redparams *rp;
973
974			rp = (struct redparams *)addr;
975
976			default_th_min = rp->th_min;
977			default_th_max = rp->th_max;
978			default_inv_pmax = rp->inv_pmax;
979		} while (/*CONSTCOND*/ 0);
980		break;
981
982	default:
983		error = EINVAL;
984		break;
985	}
986	return error;
987}
988
989static int
990red_detach(rqp)
991	red_queue_t *rqp;
992{
993	red_queue_t *tmp;
994	int error = 0;
995
996	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
997		altq_disable(rqp->rq_ifq);
998
999	if ((error = altq_detach(rqp->rq_ifq)))
1000		return (error);
1001
1002	if (red_list == rqp)
1003		red_list = rqp->rq_next;
1004	else {
1005		for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
1006			if (tmp->rq_next == rqp) {
1007				tmp->rq_next = rqp->rq_next;
1008				break;
1009			}
1010		if (tmp == NULL)
1011			printf("red_detach: no state found in red_list!\n");
1012	}
1013
1014	red_destroy(rqp->rq_red);
1015	free(rqp->rq_q, M_DEVBUF);
1016	free(rqp, M_DEVBUF);
1017	return (error);
1018}
1019
1020/*
1021 * enqueue routine:
1022 *
1023 *	returns: 0 when successfully queued.
1024 *		 ENOBUFS when drop occurs.
1025 */
1026static int
1027red_enqueue(ifq, m, pktattr)
1028	struct ifaltq *ifq;
1029	struct mbuf *m;
1030	struct altq_pktattr *pktattr;
1031{
1032	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1033
1034	IFQ_LOCK_ASSERT(ifq);
1035
1036	if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
1037		return ENOBUFS;
1038	ifq->ifq_len++;
1039	return 0;
1040}
1041
1042/*
1043 * dequeue routine:
1044 *	must be called in splimp.
1045 *
1046 *	returns: mbuf dequeued.
1047 *		 NULL when no packet is available in the queue.
1048 */
1049
1050static struct mbuf *
1051red_dequeue(ifq, op)
1052	struct ifaltq *ifq;
1053	int op;
1054{
1055	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1056	struct mbuf *m;
1057
1058	IFQ_LOCK_ASSERT(ifq);
1059
1060	if (op == ALTDQ_POLL)
1061		return qhead(rqp->rq_q);
1062
1063	/* op == ALTDQ_REMOVE */
1064	m =  red_getq(rqp->rq_red, rqp->rq_q);
1065	if (m != NULL)
1066		ifq->ifq_len--;
1067	return (m);
1068}
1069
1070static int
1071red_request(ifq, req, arg)
1072	struct ifaltq *ifq;
1073	int req;
1074	void *arg;
1075{
1076	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1077
1078	IFQ_LOCK_ASSERT(ifq);
1079
1080	switch (req) {
1081	case ALTRQ_PURGE:
1082		red_purgeq(rqp);
1083		break;
1084	}
1085	return (0);
1086}
1087
1088static void
1089red_purgeq(rqp)
1090	red_queue_t *rqp;
1091{
1092	_flushq(rqp->rq_q);
1093	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
1094		rqp->rq_ifq->ifq_len = 0;
1095}
1096
1097#ifdef ALTQ_FLOWVALVE
1098
1099#define	FV_PSHIFT	7	/* weight of average drop rate -- 1/128 */
1100#define	FV_PSCALE(x)	((x) << FV_PSHIFT)
1101#define	FV_PUNSCALE(x)	((x) >> FV_PSHIFT)
1102#define	FV_FSHIFT	5	/* weight of average fraction -- 1/32 */
1103#define	FV_FSCALE(x)	((x) << FV_FSHIFT)
1104#define	FV_FUNSCALE(x)	((x) >> FV_FSHIFT)
1105
1106#define	FV_TIMER	(3 * hz)	/* timer value for garbage collector */
1107#define	FV_FLOWLISTSIZE		64	/* how many flows in flowlist */
1108
1109#define	FV_N			10	/* update fve_f every FV_N packets */
1110
1111#define	FV_BACKOFFTHRESH	1  /* backoff threshold interval in second */
1112#define	FV_TTHRESH		3  /* time threshold to delete fve */
1113#define	FV_ALPHA		5  /* extra packet count */
1114
1115#define	FV_STATS
1116
1117#if (__FreeBSD_version > 300000)
1118#define	FV_TIMESTAMP(tp)	getmicrotime(tp)
1119#else
1120#define	FV_TIMESTAMP(tp)	{ (*(tp)) = time; }
1121#endif
1122
1123/*
1124 * Brtt table: 127 entry table to convert drop rate (p) to
1125 * the corresponding bandwidth fraction (f)
1126 * the following equation is implemented to use scaled values,
1127 * fve_p and fve_f, in the fixed point format.
1128 *
1129 *   Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
1130 *   f = Brtt(p) / (max_th + alpha)
1131 */
1132#define	BRTT_SIZE	128
1133#define	BRTT_SHIFT	12
1134#define	BRTT_MASK	0x0007f000
1135#define	BRTT_PMAX	(1 << (FV_PSHIFT + FP_SHIFT))
1136
1137const int brtt_tab[BRTT_SIZE] = {
1138	0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
1139	392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
1140	225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
1141	145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
1142	98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
1143	67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
1144	47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
1145	33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
1146	24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
1147	18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
1148	14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
1149	10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
1150	8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
1151	6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
1152	5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
1153	4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
1154};
1155
1156static __inline struct fve *
1157flowlist_lookup(fv, pktattr, now)
1158	struct flowvalve *fv;
1159	struct altq_pktattr *pktattr;
1160	struct timeval *now;
1161{
1162	struct fve *fve;
1163	int flows;
1164	struct ip *ip;
1165#ifdef INET6
1166	struct ip6_hdr *ip6;
1167#endif
1168	struct timeval tthresh;
1169
1170	if (pktattr == NULL)
1171		return (NULL);
1172
1173	tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
1174	flows = 0;
1175	/*
1176	 * search the flow list
1177	 */
1178	switch (pktattr->pattr_af) {
1179	case AF_INET:
1180		ip = (struct ip *)pktattr->pattr_hdr;
1181		TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1182			if (fve->fve_lastdrop.tv_sec == 0)
1183				break;
1184			if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1185				fve->fve_lastdrop.tv_sec = 0;
1186				break;
1187			}
1188			if (fve->fve_flow.flow_af == AF_INET &&
1189			    fve->fve_flow.flow_ip.ip_src.s_addr ==
1190			    ip->ip_src.s_addr &&
1191			    fve->fve_flow.flow_ip.ip_dst.s_addr ==
1192			    ip->ip_dst.s_addr)
1193				return (fve);
1194			flows++;
1195		}
1196		break;
1197#ifdef INET6
1198	case AF_INET6:
1199		ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1200		TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1201			if (fve->fve_lastdrop.tv_sec == 0)
1202				break;
1203			if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1204				fve->fve_lastdrop.tv_sec = 0;
1205				break;
1206			}
1207			if (fve->fve_flow.flow_af == AF_INET6 &&
1208			    IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
1209					       &ip6->ip6_src) &&
1210			    IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
1211					       &ip6->ip6_dst))
1212				return (fve);
1213			flows++;
1214		}
1215		break;
1216#endif /* INET6 */
1217
1218	default:
1219		/* unknown protocol.  no drop. */
1220		return (NULL);
1221	}
1222	fv->fv_flows = flows;	/* save the number of active fve's */
1223	return (NULL);
1224}
1225
1226static __inline struct fve *
1227flowlist_reclaim(fv, pktattr)
1228	struct flowvalve *fv;
1229	struct altq_pktattr *pktattr;
1230{
1231	struct fve *fve;
1232	struct ip *ip;
1233#ifdef INET6
1234	struct ip6_hdr *ip6;
1235#endif
1236
1237	/*
1238	 * get an entry from the tail of the LRU list.
1239	 */
1240	fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
1241
1242	switch (pktattr->pattr_af) {
1243	case AF_INET:
1244		ip = (struct ip *)pktattr->pattr_hdr;
1245		fve->fve_flow.flow_af = AF_INET;
1246		fve->fve_flow.flow_ip.ip_src = ip->ip_src;
1247		fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
1248		break;
1249#ifdef INET6
1250	case AF_INET6:
1251		ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1252		fve->fve_flow.flow_af = AF_INET6;
1253		fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
1254		fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
1255		break;
1256#endif
1257	}
1258
1259	fve->fve_state = Green;
1260	fve->fve_p = 0.0;
1261	fve->fve_f = 0.0;
1262	fve->fve_ifseq = fv->fv_ifseq - 1;
1263	fve->fve_count = 0;
1264
1265	fv->fv_flows++;
1266#ifdef FV_STATS
1267	fv->fv_stats.alloc++;
1268#endif
1269	return (fve);
1270}
1271
1272static __inline void
1273flowlist_move_to_head(fv, fve)
1274	struct flowvalve *fv;
1275	struct fve *fve;
1276{
1277	if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
1278		TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
1279		TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
1280	}
1281}
1282
1283#if 0 /* XXX: make the compiler happy (fv_alloc unused) */
1284/*
1285 * allocate flowvalve structure
1286 */
1287static struct flowvalve *
1288fv_alloc(rp)
1289	struct red *rp;
1290{
1291	struct flowvalve *fv;
1292	struct fve *fve;
1293	int i, num;
1294
1295	num = FV_FLOWLISTSIZE;
1296	fv = malloc(sizeof(struct flowvalve),
1297	       M_DEVBUF, M_WAITOK);
1298	if (fv == NULL)
1299		return (NULL);
1300	bzero(fv, sizeof(struct flowvalve));
1301
1302	fv->fv_fves = malloc(sizeof(struct fve) * num,
1303	       M_DEVBUF, M_WAITOK);
1304	if (fv->fv_fves == NULL) {
1305		free(fv, M_DEVBUF);
1306		return (NULL);
1307	}
1308	bzero(fv->fv_fves, sizeof(struct fve) * num);
1309
1310	fv->fv_flows = 0;
1311	TAILQ_INIT(&fv->fv_flowlist);
1312	for (i = 0; i < num; i++) {
1313		fve = &fv->fv_fves[i];
1314		fve->fve_lastdrop.tv_sec = 0;
1315		TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
1316	}
1317
1318	/* initialize drop rate threshold in scaled fixed-point */
1319	fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
1320
1321	/* initialize drop rate to fraction table */
1322	fv->fv_p2ftab = malloc(sizeof(int) * BRTT_SIZE,
1323	       M_DEVBUF, M_WAITOK);
1324	if (fv->fv_p2ftab == NULL) {
1325		free(fv->fv_fves, M_DEVBUF);
1326		free(fv, M_DEVBUF);
1327		return (NULL);
1328	}
1329	/*
1330	 * create the p2f table.
1331	 * (shift is used to keep the precision)
1332	 */
1333	for (i = 1; i < BRTT_SIZE; i++) {
1334		int f;
1335
1336		f = brtt_tab[i] << 8;
1337		fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
1338	}
1339
1340	return (fv);
1341}
1342#endif
1343
1344static void fv_destroy(fv)
1345	struct flowvalve *fv;
1346{
1347	free(fv->fv_p2ftab, M_DEVBUF);
1348	free(fv->fv_fves, M_DEVBUF);
1349	free(fv, M_DEVBUF);
1350}
1351
1352static __inline int
1353fv_p2f(fv, p)
1354	struct flowvalve	*fv;
1355	int	p;
1356{
1357	int val, f;
1358
1359	if (p >= BRTT_PMAX)
1360		f = fv->fv_p2ftab[BRTT_SIZE-1];
1361	else if ((val = (p & BRTT_MASK)))
1362		f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
1363	else
1364		f = fv->fv_p2ftab[1];
1365	return (f);
1366}
1367
1368/*
1369 * check if an arriving packet should be pre-dropped.
1370 * called from red_addq() when a packet arrives.
1371 * returns 1 when the packet should be pre-dropped.
1372 * should be called in splimp.
1373 */
1374static int
1375fv_checkflow(fv, pktattr, fcache)
1376	struct flowvalve *fv;
1377	struct altq_pktattr *pktattr;
1378	struct fve **fcache;
1379{
1380	struct fve *fve;
1381	struct timeval now;
1382
1383	fv->fv_ifseq++;
1384	FV_TIMESTAMP(&now);
1385
1386	if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1387		/* no matching entry in the flowlist */
1388		return (0);
1389
1390	*fcache = fve;
1391
1392	/* update fraction f for every FV_N packets */
1393	if (++fve->fve_count == FV_N) {
1394		/*
1395		 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
1396		 */
1397		fve->fve_f =
1398			(FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
1399			+ fve->fve_f - FV_FUNSCALE(fve->fve_f);
1400		fve->fve_ifseq = fv->fv_ifseq;
1401		fve->fve_count = 0;
1402	}
1403
1404	/*
1405	 * overpumping test
1406	 */
1407	if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
1408		int fthresh;
1409
1410		/* calculate a threshold */
1411		fthresh = fv_p2f(fv, fve->fve_p);
1412		if (fve->fve_f > fthresh)
1413			fve->fve_state = Red;
1414	}
1415
1416	if (fve->fve_state == Red) {
1417		/*
1418		 * backoff test
1419		 */
1420		if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
1421			/* no drop for at least FV_BACKOFFTHRESH sec */
1422			fve->fve_p = 0;
1423			fve->fve_state = Green;
1424#ifdef FV_STATS
1425			fv->fv_stats.escape++;
1426#endif
1427		} else {
1428			/* block this flow */
1429			flowlist_move_to_head(fv, fve);
1430			fve->fve_lastdrop = now;
1431#ifdef FV_STATS
1432			fv->fv_stats.predrop++;
1433#endif
1434			return (1);
1435		}
1436	}
1437
1438	/*
1439	 * p = (1 - Wp) * p
1440	 */
1441	fve->fve_p -= FV_PUNSCALE(fve->fve_p);
1442	if (fve->fve_p < 0)
1443		fve->fve_p = 0;
1444#ifdef FV_STATS
1445	fv->fv_stats.pass++;
1446#endif
1447	return (0);
1448}
1449
1450/*
1451 * called from red_addq when a packet is dropped by red.
1452 * should be called in splimp.
1453 */
1454static void fv_dropbyred(fv, pktattr, fcache)
1455	struct flowvalve *fv;
1456	struct altq_pktattr *pktattr;
1457	struct fve *fcache;
1458{
1459	struct fve *fve;
1460	struct timeval now;
1461
1462	if (pktattr == NULL)
1463		return;
1464	FV_TIMESTAMP(&now);
1465
1466	if (fcache != NULL)
1467		/* the fve of this packet is already cached */
1468		fve = fcache;
1469	else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1470		fve = flowlist_reclaim(fv, pktattr);
1471
1472	flowlist_move_to_head(fv, fve);
1473
1474	/*
1475	 * update p:  the following line cancels the update
1476	 *	      in fv_checkflow() and calculate
1477	 *	p = Wp + (1 - Wp) * p
1478	 */
1479	fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
1480
1481	fve->fve_lastdrop = now;
1482}
1483
1484#endif /* ALTQ_FLOWVALVE */
1485
1486#ifdef KLD_MODULE
1487
1488static struct altqsw red_sw =
1489	{"red", redopen, redclose, redioctl};
1490
1491ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
1492MODULE_VERSION(altq_red, 1);
1493
1494#endif /* KLD_MODULE */
1495#endif /* ALTQ3_COMPAT */
1496
1497#endif /* ALTQ_RED */
1498