altq_cdnr.c revision 263086
1/*	$FreeBSD: stable/10/sys/contrib/altq/altq/altq_cdnr.c 263086 2014-03-12 10:45:58Z glebius $	*/
2/*	$KAME: altq_cdnr.c,v 1.15 2005/04/13 03:44:24 suz Exp $	*/
3
4/*
5 * Copyright (C) 1999-2002
6 *	Sony Computer Science Laboratories Inc.  All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#if defined(__FreeBSD__) || defined(__NetBSD__)
31#include "opt_altq.h"
32#include "opt_inet.h"
33#ifdef __FreeBSD__
34#include "opt_inet6.h"
35#endif
36#endif /* __FreeBSD__ || __NetBSD__ */
37
38#include <sys/param.h>
39#include <sys/malloc.h>
40#include <sys/mbuf.h>
41#include <sys/socket.h>
42#include <sys/sockio.h>
43#include <sys/systm.h>
44#include <sys/proc.h>
45#include <sys/errno.h>
46#include <sys/kernel.h>
47#include <sys/queue.h>
48
49#include <net/if.h>
50#include <net/if_types.h>
51#include <netinet/in.h>
52#include <netinet/in_systm.h>
53#include <netinet/ip.h>
54#ifdef INET6
55#include <netinet/ip6.h>
56#endif
57
58#include <altq/if_altq.h>
59#include <altq/altq.h>
60#ifdef ALTQ3_COMPAT
61#include <altq/altq_conf.h>
62#endif
63#include <altq/altq_cdnr.h>
64
65#ifdef ALTQ3_COMPAT
66/*
67 * diffserv traffic conditioning module
68 */
69
70int altq_cdnr_enabled = 0;
71
72/* traffic conditioner is enabled by ALTQ_CDNR option in opt_altq.h */
73#ifdef ALTQ_CDNR
74
75/* cdnr_list keeps all cdnr's allocated. */
76static LIST_HEAD(, top_cdnr) tcb_list;
77
78static int altq_cdnr_input(struct mbuf *, int);
79static struct top_cdnr *tcb_lookup(char *ifname);
80static struct cdnr_block *cdnr_handle2cb(u_long);
81static u_long cdnr_cb2handle(struct cdnr_block *);
82static void *cdnr_cballoc(struct top_cdnr *, int,
83       struct tc_action *(*)(struct cdnr_block *, struct cdnr_pktinfo *));
84static void cdnr_cbdestroy(void *);
85static int tca_verify_action(struct tc_action *);
86static void tca_import_action(struct tc_action *, struct tc_action *);
87static void tca_invalidate_action(struct tc_action *);
88
89static int generic_element_destroy(struct cdnr_block *);
90static struct top_cdnr *top_create(struct ifaltq *);
91static int top_destroy(struct top_cdnr *);
92static struct cdnr_block *element_create(struct top_cdnr *, struct tc_action *);
93static int element_destroy(struct cdnr_block *);
94static void tb_import_profile(struct tbe *, struct tb_profile *);
95static struct tbmeter *tbm_create(struct top_cdnr *, struct tb_profile *,
96				  struct tc_action *, struct tc_action *);
97static int tbm_destroy(struct tbmeter *);
98static struct tc_action *tbm_input(struct cdnr_block *, struct cdnr_pktinfo *);
99static struct trtcm *trtcm_create(struct top_cdnr *,
100		  struct tb_profile *, struct tb_profile *,
101		  struct tc_action *, struct tc_action *, struct tc_action *,
102		  int);
103static int trtcm_destroy(struct trtcm *);
104static struct tc_action *trtcm_input(struct cdnr_block *, struct cdnr_pktinfo *);
105static struct tswtcm *tswtcm_create(struct top_cdnr *,
106		  u_int32_t, u_int32_t, u_int32_t,
107		  struct tc_action *, struct tc_action *, struct tc_action *);
108static int tswtcm_destroy(struct tswtcm *);
109static struct tc_action *tswtcm_input(struct cdnr_block *, struct cdnr_pktinfo *);
110
111static int cdnrcmd_if_attach(char *);
112static int cdnrcmd_if_detach(char *);
113static int cdnrcmd_add_element(struct cdnr_add_element *);
114static int cdnrcmd_delete_element(struct cdnr_delete_element *);
115static int cdnrcmd_add_filter(struct cdnr_add_filter *);
116static int cdnrcmd_delete_filter(struct cdnr_delete_filter *);
117static int cdnrcmd_add_tbm(struct cdnr_add_tbmeter *);
118static int cdnrcmd_modify_tbm(struct cdnr_modify_tbmeter *);
119static int cdnrcmd_tbm_stats(struct cdnr_tbmeter_stats *);
120static int cdnrcmd_add_trtcm(struct cdnr_add_trtcm *);
121static int cdnrcmd_modify_trtcm(struct cdnr_modify_trtcm *);
122static int cdnrcmd_tcm_stats(struct cdnr_tcm_stats *);
123static int cdnrcmd_add_tswtcm(struct cdnr_add_tswtcm *);
124static int cdnrcmd_modify_tswtcm(struct cdnr_modify_tswtcm *);
125static int cdnrcmd_get_stats(struct cdnr_get_stats *);
126
127altqdev_decl(cdnr);
128
129/*
130 * top level input function called from ip_input.
131 * should be called before converting header fields to host-byte-order.
132 */
133int
134altq_cdnr_input(m, af)
135	struct mbuf	*m;
136	int		af;	/* address family */
137{
138	struct ifnet		*ifp;
139	struct ip		*ip;
140	struct top_cdnr		*top;
141	struct tc_action	*tca;
142	struct cdnr_block	*cb;
143	struct cdnr_pktinfo	pktinfo;
144
145	ifp = m->m_pkthdr.rcvif;
146	if (!ALTQ_IS_CNDTNING(&ifp->if_snd))
147		/* traffic conditioner is not enabled on this interface */
148		return (1);
149
150	top = ifp->if_snd.altq_cdnr;
151
152	ip = mtod(m, struct ip *);
153#ifdef INET6
154	if (af == AF_INET6) {
155		u_int32_t flowlabel;
156
157		flowlabel = ((struct ip6_hdr *)ip)->ip6_flow;
158		pktinfo.pkt_dscp = (ntohl(flowlabel) >> 20) & DSCP_MASK;
159	} else
160#endif
161		pktinfo.pkt_dscp = ip->ip_tos & DSCP_MASK;
162	pktinfo.pkt_len = m_pktlen(m);
163
164	tca = NULL;
165
166	cb = acc_classify(&top->tc_classifier, m, af);
167	if (cb != NULL)
168		tca = &cb->cb_action;
169
170	if (tca == NULL)
171		tca = &top->tc_block.cb_action;
172
173	while (1) {
174		PKTCNTR_ADD(&top->tc_cnts[tca->tca_code], pktinfo.pkt_len);
175
176		switch (tca->tca_code) {
177		case TCACODE_PASS:
178			return (1);
179		case TCACODE_DROP:
180			m_freem(m);
181			return (0);
182		case TCACODE_RETURN:
183			return (0);
184		case TCACODE_MARK:
185#ifdef INET6
186			if (af == AF_INET6) {
187				struct ip6_hdr *ip6 = (struct ip6_hdr *)ip;
188				u_int32_t flowlabel;
189
190				flowlabel = ntohl(ip6->ip6_flow);
191				flowlabel = (tca->tca_dscp << 20) |
192					(flowlabel & ~(DSCP_MASK << 20));
193				ip6->ip6_flow = htonl(flowlabel);
194			} else
195#endif
196				ip->ip_tos = tca->tca_dscp |
197					(ip->ip_tos & DSCP_CUMASK);
198			return (1);
199		case TCACODE_NEXT:
200			cb = tca->tca_next;
201			tca = (*cb->cb_input)(cb, &pktinfo);
202			break;
203		case TCACODE_NONE:
204		default:
205			return (1);
206		}
207	}
208}
209
210static struct top_cdnr *
211tcb_lookup(ifname)
212	char *ifname;
213{
214	struct top_cdnr *top;
215	struct ifnet *ifp;
216
217	if ((ifp = ifunit(ifname)) != NULL)
218		LIST_FOREACH(top, &tcb_list, tc_next)
219			if (top->tc_ifq->altq_ifp == ifp)
220				return (top);
221	return (NULL);
222}
223
224static struct cdnr_block *
225cdnr_handle2cb(handle)
226	u_long handle;
227{
228	struct cdnr_block *cb;
229
230	cb = (struct cdnr_block *)handle;
231	if (handle != ALIGN(cb))
232		return (NULL);
233
234	if (cb == NULL || cb->cb_handle != handle)
235		return (NULL);
236	return (cb);
237}
238
239static u_long
240cdnr_cb2handle(cb)
241	struct cdnr_block *cb;
242{
243	return (cb->cb_handle);
244}
245
246static void *
247cdnr_cballoc(top, type, input_func)
248	struct top_cdnr *top;
249	int type;
250	struct tc_action *(*input_func)(struct cdnr_block *,
251					struct cdnr_pktinfo *);
252{
253	struct cdnr_block *cb;
254	int size;
255
256	switch (type) {
257	case TCETYPE_TOP:
258		size = sizeof(struct top_cdnr);
259		break;
260	case TCETYPE_ELEMENT:
261		size = sizeof(struct cdnr_block);
262		break;
263	case TCETYPE_TBMETER:
264		size = sizeof(struct tbmeter);
265		break;
266	case TCETYPE_TRTCM:
267		size = sizeof(struct trtcm);
268		break;
269	case TCETYPE_TSWTCM:
270		size = sizeof(struct tswtcm);
271		break;
272	default:
273		return (NULL);
274	}
275
276	cb = malloc(size, M_DEVBUF, M_WAITOK);
277	if (cb == NULL)
278		return (NULL);
279	bzero(cb, size);
280
281	cb->cb_len = size;
282	cb->cb_type = type;
283	cb->cb_ref = 0;
284	cb->cb_handle = (u_long)cb;
285	if (top == NULL)
286		cb->cb_top = (struct top_cdnr *)cb;
287	else
288		cb->cb_top = top;
289
290	if (input_func != NULL) {
291		/*
292		 * if this cdnr has an action function,
293		 * make tc_action to call itself.
294		 */
295		cb->cb_action.tca_code = TCACODE_NEXT;
296		cb->cb_action.tca_next = cb;
297		cb->cb_input = input_func;
298	} else
299		cb->cb_action.tca_code = TCACODE_NONE;
300
301	/* if this isn't top, register the element to the top level cdnr */
302	if (top != NULL)
303		LIST_INSERT_HEAD(&top->tc_elements, cb, cb_next);
304
305	return ((void *)cb);
306}
307
308static void
309cdnr_cbdestroy(cblock)
310	void *cblock;
311{
312	struct cdnr_block *cb = cblock;
313
314	/* delete filters belonging to this cdnr */
315	acc_discard_filters(&cb->cb_top->tc_classifier, cb, 0);
316
317	/* remove from the top level cdnr */
318	if (cb->cb_top != cblock)
319		LIST_REMOVE(cb, cb_next);
320
321	free(cb, M_DEVBUF);
322}
323
324/*
325 * conditioner common destroy routine
326 */
327static int
328generic_element_destroy(cb)
329	struct cdnr_block *cb;
330{
331	int error = 0;
332
333	switch (cb->cb_type) {
334	case TCETYPE_TOP:
335		error = top_destroy((struct top_cdnr *)cb);
336		break;
337	case TCETYPE_ELEMENT:
338		error = element_destroy(cb);
339		break;
340	case TCETYPE_TBMETER:
341		error = tbm_destroy((struct tbmeter *)cb);
342		break;
343	case TCETYPE_TRTCM:
344		error = trtcm_destroy((struct trtcm *)cb);
345		break;
346	case TCETYPE_TSWTCM:
347		error = tswtcm_destroy((struct tswtcm *)cb);
348		break;
349	default:
350		error = EINVAL;
351	}
352	return (error);
353}
354
355static int
356tca_verify_action(utca)
357	struct tc_action *utca;
358{
359	switch (utca->tca_code) {
360	case TCACODE_PASS:
361	case TCACODE_DROP:
362	case TCACODE_MARK:
363		/* these are ok */
364		break;
365
366	case TCACODE_HANDLE:
367		/* verify handle value */
368		if (cdnr_handle2cb(utca->tca_handle) == NULL)
369			return (-1);
370		break;
371
372	case TCACODE_NONE:
373	case TCACODE_RETURN:
374	case TCACODE_NEXT:
375	default:
376		/* should not be passed from a user */
377		return (-1);
378	}
379	return (0);
380}
381
382static void
383tca_import_action(ktca, utca)
384	struct tc_action *ktca, *utca;
385{
386	struct cdnr_block *cb;
387
388	*ktca = *utca;
389	if (ktca->tca_code == TCACODE_HANDLE) {
390		cb = cdnr_handle2cb(ktca->tca_handle);
391		if (cb == NULL) {
392			ktca->tca_code = TCACODE_NONE;
393			return;
394		}
395		ktca->tca_code = TCACODE_NEXT;
396		ktca->tca_next = cb;
397		cb->cb_ref++;
398	} else if (ktca->tca_code == TCACODE_MARK) {
399		ktca->tca_dscp &= DSCP_MASK;
400	}
401	return;
402}
403
404static void
405tca_invalidate_action(tca)
406	struct tc_action *tca;
407{
408	struct cdnr_block *cb;
409
410	if (tca->tca_code == TCACODE_NEXT) {
411		cb = tca->tca_next;
412		if (cb == NULL)
413			return;
414		cb->cb_ref--;
415	}
416	tca->tca_code = TCACODE_NONE;
417}
418
419/*
420 * top level traffic conditioner
421 */
422static struct top_cdnr *
423top_create(ifq)
424	struct ifaltq *ifq;
425{
426	struct top_cdnr *top;
427
428	if ((top = cdnr_cballoc(NULL, TCETYPE_TOP, NULL)) == NULL)
429		return (NULL);
430
431	top->tc_ifq = ifq;
432	/* set default action for the top level conditioner */
433	top->tc_block.cb_action.tca_code = TCACODE_PASS;
434
435	LIST_INSERT_HEAD(&tcb_list, top, tc_next);
436
437	ifq->altq_cdnr = top;
438
439	return (top);
440}
441
442static int
443top_destroy(top)
444	struct top_cdnr *top;
445{
446	struct cdnr_block *cb;
447
448	if (ALTQ_IS_CNDTNING(top->tc_ifq))
449		ALTQ_CLEAR_CNDTNING(top->tc_ifq);
450	top->tc_ifq->altq_cdnr = NULL;
451
452	/*
453	 * destroy all the conditioner elements belonging to this interface
454	 */
455	while ((cb = LIST_FIRST(&top->tc_elements)) != NULL) {
456		while (cb != NULL && cb->cb_ref > 0)
457			cb = LIST_NEXT(cb, cb_next);
458		if (cb != NULL)
459			generic_element_destroy(cb);
460	}
461
462	LIST_REMOVE(top, tc_next);
463
464	cdnr_cbdestroy(top);
465
466	/* if there is no active conditioner, remove the input hook */
467	if (altq_input != NULL) {
468		LIST_FOREACH(top, &tcb_list, tc_next)
469			if (ALTQ_IS_CNDTNING(top->tc_ifq))
470				break;
471		if (top == NULL)
472			altq_input = NULL;
473	}
474
475	return (0);
476}
477
478/*
479 * simple tc elements without input function (e.g., dropper and makers).
480 */
481static struct cdnr_block *
482element_create(top, action)
483	struct top_cdnr *top;
484	struct tc_action *action;
485{
486	struct cdnr_block *cb;
487
488	if (tca_verify_action(action) < 0)
489		return (NULL);
490
491	if ((cb = cdnr_cballoc(top, TCETYPE_ELEMENT, NULL)) == NULL)
492		return (NULL);
493
494	tca_import_action(&cb->cb_action, action);
495
496	return (cb);
497}
498
499static int
500element_destroy(cb)
501	struct cdnr_block *cb;
502{
503	if (cb->cb_ref > 0)
504		return (EBUSY);
505
506	tca_invalidate_action(&cb->cb_action);
507
508	cdnr_cbdestroy(cb);
509	return (0);
510}
511
512/*
513 * internal representation of token bucket parameters
514 *	rate: 	byte_per_unittime << 32
515 *		(((bits_per_sec) / 8) << 32) / machclk_freq
516 *	depth:	byte << 32
517 *
518 */
519#define	TB_SHIFT	32
520#define	TB_SCALE(x)	((u_int64_t)(x) << TB_SHIFT)
521#define	TB_UNSCALE(x)	((x) >> TB_SHIFT)
522
523static void
524tb_import_profile(tb, profile)
525	struct tbe *tb;
526	struct tb_profile *profile;
527{
528	tb->rate = TB_SCALE(profile->rate / 8) / machclk_freq;
529	tb->depth = TB_SCALE(profile->depth);
530	if (tb->rate > 0)
531		tb->filluptime = tb->depth / tb->rate;
532	else
533		tb->filluptime = 0xffffffffffffffffLL;
534	tb->token = tb->depth;
535	tb->last = read_machclk();
536}
537
538/*
539 * simple token bucket meter
540 */
541static struct tbmeter *
542tbm_create(top, profile, in_action, out_action)
543	struct top_cdnr *top;
544	struct tb_profile *profile;
545	struct tc_action *in_action, *out_action;
546{
547	struct tbmeter *tbm = NULL;
548
549	if (tca_verify_action(in_action) < 0
550	    || tca_verify_action(out_action) < 0)
551		return (NULL);
552
553	if ((tbm = cdnr_cballoc(top, TCETYPE_TBMETER,
554				tbm_input)) == NULL)
555		return (NULL);
556
557	tb_import_profile(&tbm->tb, profile);
558
559	tca_import_action(&tbm->in_action, in_action);
560	tca_import_action(&tbm->out_action, out_action);
561
562	return (tbm);
563}
564
565static int
566tbm_destroy(tbm)
567	struct tbmeter *tbm;
568{
569	if (tbm->cdnrblk.cb_ref > 0)
570		return (EBUSY);
571
572	tca_invalidate_action(&tbm->in_action);
573	tca_invalidate_action(&tbm->out_action);
574
575	cdnr_cbdestroy(tbm);
576	return (0);
577}
578
579static struct tc_action *
580tbm_input(cb, pktinfo)
581	struct cdnr_block *cb;
582	struct cdnr_pktinfo *pktinfo;
583{
584	struct tbmeter *tbm = (struct tbmeter *)cb;
585	u_int64_t	len;
586	u_int64_t	interval, now;
587
588	len = TB_SCALE(pktinfo->pkt_len);
589
590	if (tbm->tb.token < len) {
591		now = read_machclk();
592		interval = now - tbm->tb.last;
593		if (interval >= tbm->tb.filluptime)
594			tbm->tb.token = tbm->tb.depth;
595		else {
596			tbm->tb.token += interval * tbm->tb.rate;
597			if (tbm->tb.token > tbm->tb.depth)
598				tbm->tb.token = tbm->tb.depth;
599		}
600		tbm->tb.last = now;
601	}
602
603	if (tbm->tb.token < len) {
604		PKTCNTR_ADD(&tbm->out_cnt, pktinfo->pkt_len);
605		return (&tbm->out_action);
606	}
607
608	tbm->tb.token -= len;
609	PKTCNTR_ADD(&tbm->in_cnt, pktinfo->pkt_len);
610	return (&tbm->in_action);
611}
612
613/*
614 * two rate three color marker
615 * as described in draft-heinanen-diffserv-trtcm-01.txt
616 */
617static struct trtcm *
618trtcm_create(top, cmtd_profile, peak_profile,
619	     green_action, yellow_action, red_action, coloraware)
620	struct top_cdnr *top;
621	struct tb_profile *cmtd_profile, *peak_profile;
622	struct tc_action *green_action, *yellow_action, *red_action;
623	int	coloraware;
624{
625	struct trtcm *tcm = NULL;
626
627	if (tca_verify_action(green_action) < 0
628	    || tca_verify_action(yellow_action) < 0
629	    || tca_verify_action(red_action) < 0)
630		return (NULL);
631
632	if ((tcm = cdnr_cballoc(top, TCETYPE_TRTCM,
633				trtcm_input)) == NULL)
634		return (NULL);
635
636	tb_import_profile(&tcm->cmtd_tb, cmtd_profile);
637	tb_import_profile(&tcm->peak_tb, peak_profile);
638
639	tca_import_action(&tcm->green_action, green_action);
640	tca_import_action(&tcm->yellow_action, yellow_action);
641	tca_import_action(&tcm->red_action, red_action);
642
643	/* set dscps to use */
644	if (tcm->green_action.tca_code == TCACODE_MARK)
645		tcm->green_dscp = tcm->green_action.tca_dscp & DSCP_MASK;
646	else
647		tcm->green_dscp = DSCP_AF11;
648	if (tcm->yellow_action.tca_code == TCACODE_MARK)
649		tcm->yellow_dscp = tcm->yellow_action.tca_dscp & DSCP_MASK;
650	else
651		tcm->yellow_dscp = DSCP_AF12;
652	if (tcm->red_action.tca_code == TCACODE_MARK)
653		tcm->red_dscp = tcm->red_action.tca_dscp & DSCP_MASK;
654	else
655		tcm->red_dscp = DSCP_AF13;
656
657	tcm->coloraware = coloraware;
658
659	return (tcm);
660}
661
662static int
663trtcm_destroy(tcm)
664	struct trtcm *tcm;
665{
666	if (tcm->cdnrblk.cb_ref > 0)
667		return (EBUSY);
668
669	tca_invalidate_action(&tcm->green_action);
670	tca_invalidate_action(&tcm->yellow_action);
671	tca_invalidate_action(&tcm->red_action);
672
673	cdnr_cbdestroy(tcm);
674	return (0);
675}
676
677static struct tc_action *
678trtcm_input(cb, pktinfo)
679	struct cdnr_block *cb;
680	struct cdnr_pktinfo *pktinfo;
681{
682	struct trtcm *tcm = (struct trtcm *)cb;
683	u_int64_t	len;
684	u_int64_t	interval, now;
685	u_int8_t	color;
686
687	len = TB_SCALE(pktinfo->pkt_len);
688	if (tcm->coloraware) {
689		color = pktinfo->pkt_dscp;
690		if (color != tcm->yellow_dscp && color != tcm->red_dscp)
691			color = tcm->green_dscp;
692	} else {
693		/* if color-blind, precolor it as green */
694		color = tcm->green_dscp;
695	}
696
697	now = read_machclk();
698	if (tcm->cmtd_tb.token < len) {
699		interval = now - tcm->cmtd_tb.last;
700		if (interval >= tcm->cmtd_tb.filluptime)
701			tcm->cmtd_tb.token = tcm->cmtd_tb.depth;
702		else {
703			tcm->cmtd_tb.token += interval * tcm->cmtd_tb.rate;
704			if (tcm->cmtd_tb.token > tcm->cmtd_tb.depth)
705				tcm->cmtd_tb.token = tcm->cmtd_tb.depth;
706		}
707		tcm->cmtd_tb.last = now;
708	}
709	if (tcm->peak_tb.token < len) {
710		interval = now - tcm->peak_tb.last;
711		if (interval >= tcm->peak_tb.filluptime)
712			tcm->peak_tb.token = tcm->peak_tb.depth;
713		else {
714			tcm->peak_tb.token += interval * tcm->peak_tb.rate;
715			if (tcm->peak_tb.token > tcm->peak_tb.depth)
716				tcm->peak_tb.token = tcm->peak_tb.depth;
717		}
718		tcm->peak_tb.last = now;
719	}
720
721	if (color == tcm->red_dscp || tcm->peak_tb.token < len) {
722		pktinfo->pkt_dscp = tcm->red_dscp;
723		PKTCNTR_ADD(&tcm->red_cnt, pktinfo->pkt_len);
724		return (&tcm->red_action);
725	}
726
727	if (color == tcm->yellow_dscp || tcm->cmtd_tb.token < len) {
728		pktinfo->pkt_dscp = tcm->yellow_dscp;
729		tcm->peak_tb.token -= len;
730		PKTCNTR_ADD(&tcm->yellow_cnt, pktinfo->pkt_len);
731		return (&tcm->yellow_action);
732	}
733
734	pktinfo->pkt_dscp = tcm->green_dscp;
735	tcm->cmtd_tb.token -= len;
736	tcm->peak_tb.token -= len;
737	PKTCNTR_ADD(&tcm->green_cnt, pktinfo->pkt_len);
738	return (&tcm->green_action);
739}
740
741/*
742 * time sliding window three color marker
743 * as described in draft-fang-diffserv-tc-tswtcm-00.txt
744 */
745static struct tswtcm *
746tswtcm_create(top, cmtd_rate, peak_rate, avg_interval,
747	      green_action, yellow_action, red_action)
748	struct top_cdnr *top;
749	u_int32_t	cmtd_rate, peak_rate, avg_interval;
750	struct tc_action *green_action, *yellow_action, *red_action;
751{
752	struct tswtcm *tsw;
753
754	if (tca_verify_action(green_action) < 0
755	    || tca_verify_action(yellow_action) < 0
756	    || tca_verify_action(red_action) < 0)
757		return (NULL);
758
759	if ((tsw = cdnr_cballoc(top, TCETYPE_TSWTCM,
760				tswtcm_input)) == NULL)
761		return (NULL);
762
763	tca_import_action(&tsw->green_action, green_action);
764	tca_import_action(&tsw->yellow_action, yellow_action);
765	tca_import_action(&tsw->red_action, red_action);
766
767	/* set dscps to use */
768	if (tsw->green_action.tca_code == TCACODE_MARK)
769		tsw->green_dscp = tsw->green_action.tca_dscp & DSCP_MASK;
770	else
771		tsw->green_dscp = DSCP_AF11;
772	if (tsw->yellow_action.tca_code == TCACODE_MARK)
773		tsw->yellow_dscp = tsw->yellow_action.tca_dscp & DSCP_MASK;
774	else
775		tsw->yellow_dscp = DSCP_AF12;
776	if (tsw->red_action.tca_code == TCACODE_MARK)
777		tsw->red_dscp = tsw->red_action.tca_dscp & DSCP_MASK;
778	else
779		tsw->red_dscp = DSCP_AF13;
780
781	/* convert rates from bits/sec to bytes/sec */
782	tsw->cmtd_rate = cmtd_rate / 8;
783	tsw->peak_rate = peak_rate / 8;
784	tsw->avg_rate = 0;
785
786	/* timewin is converted from msec to machine clock unit */
787	tsw->timewin = (u_int64_t)machclk_freq * avg_interval / 1000;
788
789	return (tsw);
790}
791
792static int
793tswtcm_destroy(tsw)
794	struct tswtcm *tsw;
795{
796	if (tsw->cdnrblk.cb_ref > 0)
797		return (EBUSY);
798
799	tca_invalidate_action(&tsw->green_action);
800	tca_invalidate_action(&tsw->yellow_action);
801	tca_invalidate_action(&tsw->red_action);
802
803	cdnr_cbdestroy(tsw);
804	return (0);
805}
806
807static struct tc_action *
808tswtcm_input(cb, pktinfo)
809	struct cdnr_block *cb;
810	struct cdnr_pktinfo *pktinfo;
811{
812	struct tswtcm	*tsw = (struct tswtcm *)cb;
813	int		len;
814	u_int32_t	avg_rate;
815	u_int64_t	interval, now, tmp;
816
817	/*
818	 * rate estimator
819	 */
820	len = pktinfo->pkt_len;
821	now = read_machclk();
822
823	interval = now - tsw->t_front;
824	/*
825	 * calculate average rate:
826	 *	avg = (avg * timewin + pkt_len)/(timewin + interval)
827	 * pkt_len needs to be multiplied by machclk_freq in order to
828	 * get (bytes/sec).
829	 * note: when avg_rate (bytes/sec) and timewin (machclk unit) are
830	 * less than 32 bits, the following 64-bit operation has enough
831	 * precision.
832	 */
833	tmp = ((u_int64_t)tsw->avg_rate * tsw->timewin
834	       + (u_int64_t)len * machclk_freq) / (tsw->timewin + interval);
835	tsw->avg_rate = avg_rate = (u_int32_t)tmp;
836	tsw->t_front = now;
837
838	/*
839	 * marker
840	 */
841	if (avg_rate > tsw->cmtd_rate) {
842		u_int32_t randval = arc4random() % avg_rate;
843
844		if (avg_rate > tsw->peak_rate) {
845			if (randval < avg_rate - tsw->peak_rate) {
846				/* mark red */
847				pktinfo->pkt_dscp = tsw->red_dscp;
848				PKTCNTR_ADD(&tsw->red_cnt, len);
849				return (&tsw->red_action);
850			} else if (randval < avg_rate - tsw->cmtd_rate)
851				goto mark_yellow;
852		} else {
853			/* peak_rate >= avg_rate > cmtd_rate */
854			if (randval < avg_rate - tsw->cmtd_rate) {
855			mark_yellow:
856				pktinfo->pkt_dscp = tsw->yellow_dscp;
857				PKTCNTR_ADD(&tsw->yellow_cnt, len);
858				return (&tsw->yellow_action);
859			}
860		}
861	}
862
863	/* mark green */
864	pktinfo->pkt_dscp = tsw->green_dscp;
865	PKTCNTR_ADD(&tsw->green_cnt, len);
866	return (&tsw->green_action);
867}
868
869/*
870 * ioctl requests
871 */
872static int
873cdnrcmd_if_attach(ifname)
874	char *ifname;
875{
876	struct ifnet *ifp;
877	struct top_cdnr *top;
878
879	if ((ifp = ifunit(ifname)) == NULL)
880		return (EBADF);
881
882	if (ifp->if_snd.altq_cdnr != NULL)
883		return (EBUSY);
884
885	if ((top = top_create(&ifp->if_snd)) == NULL)
886		return (ENOMEM);
887	return (0);
888}
889
890static int
891cdnrcmd_if_detach(ifname)
892	char *ifname;
893{
894	struct top_cdnr *top;
895
896	if ((top = tcb_lookup(ifname)) == NULL)
897		return (EBADF);
898
899	return top_destroy(top);
900}
901
902static int
903cdnrcmd_add_element(ap)
904	struct cdnr_add_element *ap;
905{
906	struct top_cdnr *top;
907	struct cdnr_block *cb;
908
909	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
910		return (EBADF);
911
912	cb = element_create(top, &ap->action);
913	if (cb == NULL)
914		return (EINVAL);
915	/* return a class handle to the user */
916	ap->cdnr_handle = cdnr_cb2handle(cb);
917	return (0);
918}
919
920static int
921cdnrcmd_delete_element(ap)
922	struct cdnr_delete_element *ap;
923{
924	struct top_cdnr *top;
925	struct cdnr_block *cb;
926
927	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
928		return (EBADF);
929
930	if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
931		return (EINVAL);
932
933	if (cb->cb_type != TCETYPE_ELEMENT)
934		return generic_element_destroy(cb);
935
936	return element_destroy(cb);
937}
938
939static int
940cdnrcmd_add_filter(ap)
941	struct cdnr_add_filter *ap;
942{
943	struct top_cdnr *top;
944	struct cdnr_block *cb;
945
946	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
947		return (EBADF);
948
949	if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
950		return (EINVAL);
951
952	return acc_add_filter(&top->tc_classifier, &ap->filter,
953			      cb, &ap->filter_handle);
954}
955
956static int
957cdnrcmd_delete_filter(ap)
958	struct cdnr_delete_filter *ap;
959{
960	struct top_cdnr *top;
961
962	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
963		return (EBADF);
964
965	return acc_delete_filter(&top->tc_classifier, ap->filter_handle);
966}
967
968static int
969cdnrcmd_add_tbm(ap)
970	struct cdnr_add_tbmeter *ap;
971{
972	struct top_cdnr *top;
973	struct tbmeter *tbm;
974
975	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
976		return (EBADF);
977
978	tbm = tbm_create(top, &ap->profile, &ap->in_action, &ap->out_action);
979	if (tbm == NULL)
980		return (EINVAL);
981	/* return a class handle to the user */
982	ap->cdnr_handle = cdnr_cb2handle(&tbm->cdnrblk);
983	return (0);
984}
985
986static int
987cdnrcmd_modify_tbm(ap)
988	struct cdnr_modify_tbmeter *ap;
989{
990	struct tbmeter *tbm;
991
992	if ((tbm = (struct tbmeter *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
993		return (EINVAL);
994
995	tb_import_profile(&tbm->tb, &ap->profile);
996
997	return (0);
998}
999
1000static int
1001cdnrcmd_tbm_stats(ap)
1002	struct cdnr_tbmeter_stats *ap;
1003{
1004	struct tbmeter *tbm;
1005
1006	if ((tbm = (struct tbmeter *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
1007		return (EINVAL);
1008
1009	ap->in_cnt = tbm->in_cnt;
1010	ap->out_cnt = tbm->out_cnt;
1011
1012	return (0);
1013}
1014
1015static int
1016cdnrcmd_add_trtcm(ap)
1017	struct cdnr_add_trtcm *ap;
1018{
1019	struct top_cdnr *top;
1020	struct trtcm *tcm;
1021
1022	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
1023		return (EBADF);
1024
1025	tcm = trtcm_create(top, &ap->cmtd_profile, &ap->peak_profile,
1026			   &ap->green_action, &ap->yellow_action,
1027			   &ap->red_action, ap->coloraware);
1028	if (tcm == NULL)
1029		return (EINVAL);
1030
1031	/* return a class handle to the user */
1032	ap->cdnr_handle = cdnr_cb2handle(&tcm->cdnrblk);
1033	return (0);
1034}
1035
1036static int
1037cdnrcmd_modify_trtcm(ap)
1038	struct cdnr_modify_trtcm *ap;
1039{
1040	struct trtcm *tcm;
1041
1042	if ((tcm = (struct trtcm *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
1043		return (EINVAL);
1044
1045	tb_import_profile(&tcm->cmtd_tb, &ap->cmtd_profile);
1046	tb_import_profile(&tcm->peak_tb, &ap->peak_profile);
1047
1048	return (0);
1049}
1050
1051static int
1052cdnrcmd_tcm_stats(ap)
1053	struct cdnr_tcm_stats *ap;
1054{
1055	struct cdnr_block *cb;
1056
1057	if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
1058		return (EINVAL);
1059
1060	if (cb->cb_type == TCETYPE_TRTCM) {
1061	    struct trtcm *tcm = (struct trtcm *)cb;
1062
1063	    ap->green_cnt = tcm->green_cnt;
1064	    ap->yellow_cnt = tcm->yellow_cnt;
1065	    ap->red_cnt = tcm->red_cnt;
1066	} else if (cb->cb_type == TCETYPE_TSWTCM) {
1067	    struct tswtcm *tsw = (struct tswtcm *)cb;
1068
1069	    ap->green_cnt = tsw->green_cnt;
1070	    ap->yellow_cnt = tsw->yellow_cnt;
1071	    ap->red_cnt = tsw->red_cnt;
1072	} else
1073	    return (EINVAL);
1074
1075	return (0);
1076}
1077
1078static int
1079cdnrcmd_add_tswtcm(ap)
1080	struct cdnr_add_tswtcm *ap;
1081{
1082	struct top_cdnr *top;
1083	struct tswtcm *tsw;
1084
1085	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
1086		return (EBADF);
1087
1088	if (ap->cmtd_rate > ap->peak_rate)
1089		return (EINVAL);
1090
1091	tsw = tswtcm_create(top, ap->cmtd_rate, ap->peak_rate,
1092			    ap->avg_interval, &ap->green_action,
1093			    &ap->yellow_action, &ap->red_action);
1094	if (tsw == NULL)
1095	    return (EINVAL);
1096
1097	/* return a class handle to the user */
1098	ap->cdnr_handle = cdnr_cb2handle(&tsw->cdnrblk);
1099	return (0);
1100}
1101
1102static int
1103cdnrcmd_modify_tswtcm(ap)
1104	struct cdnr_modify_tswtcm *ap;
1105{
1106	struct tswtcm *tsw;
1107
1108	if ((tsw = (struct tswtcm *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
1109		return (EINVAL);
1110
1111	if (ap->cmtd_rate > ap->peak_rate)
1112		return (EINVAL);
1113
1114	/* convert rates from bits/sec to bytes/sec */
1115	tsw->cmtd_rate = ap->cmtd_rate / 8;
1116	tsw->peak_rate = ap->peak_rate / 8;
1117	tsw->avg_rate = 0;
1118
1119	/* timewin is converted from msec to machine clock unit */
1120	tsw->timewin = (u_int64_t)machclk_freq * ap->avg_interval / 1000;
1121
1122	return (0);
1123}
1124
1125static int
1126cdnrcmd_get_stats(ap)
1127	struct cdnr_get_stats *ap;
1128{
1129	struct top_cdnr *top;
1130	struct cdnr_block *cb;
1131	struct tbmeter *tbm;
1132	struct trtcm *tcm;
1133	struct tswtcm *tsw;
1134	struct tce_stats tce, *usp;
1135	int error, n, nskip, nelements;
1136
1137	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
1138		return (EBADF);
1139
1140	/* copy action stats */
1141	bcopy(top->tc_cnts, ap->cnts, sizeof(ap->cnts));
1142
1143	/* stats for each element */
1144	nelements = ap->nelements;
1145	usp = ap->tce_stats;
1146	if (nelements <= 0 || usp == NULL)
1147		return (0);
1148
1149	nskip = ap->nskip;
1150	n = 0;
1151	LIST_FOREACH(cb, &top->tc_elements, cb_next) {
1152		if (nskip > 0) {
1153			nskip--;
1154			continue;
1155		}
1156
1157		bzero(&tce, sizeof(tce));
1158		tce.tce_handle = cb->cb_handle;
1159		tce.tce_type = cb->cb_type;
1160		switch (cb->cb_type) {
1161		case TCETYPE_TBMETER:
1162			tbm = (struct tbmeter *)cb;
1163			tce.tce_cnts[0] = tbm->in_cnt;
1164			tce.tce_cnts[1] = tbm->out_cnt;
1165			break;
1166		case TCETYPE_TRTCM:
1167			tcm = (struct trtcm *)cb;
1168			tce.tce_cnts[0] = tcm->green_cnt;
1169			tce.tce_cnts[1] = tcm->yellow_cnt;
1170			tce.tce_cnts[2] = tcm->red_cnt;
1171			break;
1172		case TCETYPE_TSWTCM:
1173			tsw = (struct tswtcm *)cb;
1174			tce.tce_cnts[0] = tsw->green_cnt;
1175			tce.tce_cnts[1] = tsw->yellow_cnt;
1176			tce.tce_cnts[2] = tsw->red_cnt;
1177			break;
1178		default:
1179			continue;
1180		}
1181
1182		if ((error = copyout((caddr_t)&tce, (caddr_t)usp++,
1183				     sizeof(tce))) != 0)
1184			return (error);
1185
1186		if (++n == nelements)
1187			break;
1188	}
1189	ap->nelements = n;
1190
1191	return (0);
1192}
1193
1194/*
1195 * conditioner device interface
1196 */
1197int
1198cdnropen(dev, flag, fmt, p)
1199	dev_t dev;
1200	int flag, fmt;
1201#if (__FreeBSD_version > 500000)
1202	struct thread *p;
1203#else
1204	struct proc *p;
1205#endif
1206{
1207	if (machclk_freq == 0)
1208		init_machclk();
1209
1210	if (machclk_freq == 0) {
1211		printf("cdnr: no cpu clock available!\n");
1212		return (ENXIO);
1213	}
1214
1215	/* everything will be done when the queueing scheme is attached. */
1216	return 0;
1217}
1218
1219int
1220cdnrclose(dev, flag, fmt, p)
1221	dev_t dev;
1222	int flag, fmt;
1223#if (__FreeBSD_version > 500000)
1224	struct thread *p;
1225#else
1226	struct proc *p;
1227#endif
1228{
1229	struct top_cdnr *top;
1230	int err, error = 0;
1231
1232	while ((top = LIST_FIRST(&tcb_list)) != NULL) {
1233		/* destroy all */
1234		err = top_destroy(top);
1235		if (err != 0 && error == 0)
1236			error = err;
1237	}
1238	altq_input = NULL;
1239
1240	return (error);
1241}
1242
1243int
1244cdnrioctl(dev, cmd, addr, flag, p)
1245	dev_t dev;
1246	ioctlcmd_t cmd;
1247	caddr_t addr;
1248	int flag;
1249#if (__FreeBSD_version > 500000)
1250	struct thread *p;
1251#else
1252	struct proc *p;
1253#endif
1254{
1255	struct top_cdnr *top;
1256	struct cdnr_interface *ifacep;
1257	int	s, error = 0;
1258
1259	/* check super-user privilege */
1260	switch (cmd) {
1261	case CDNR_GETSTATS:
1262		break;
1263	default:
1264#if (__FreeBSD_version > 700000)
1265		if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
1266#elsif (__FreeBSD_version > 400000)
1267		if ((error = suser(p)) != 0)
1268#else
1269		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
1270#endif
1271			return (error);
1272		break;
1273	}
1274
1275#ifdef __NetBSD__
1276	s = splnet();
1277#else
1278	s = splimp();
1279#endif
1280	switch (cmd) {
1281
1282	case CDNR_IF_ATTACH:
1283		ifacep = (struct cdnr_interface *)addr;
1284		error = cdnrcmd_if_attach(ifacep->cdnr_ifname);
1285		break;
1286
1287	case CDNR_IF_DETACH:
1288		ifacep = (struct cdnr_interface *)addr;
1289		error = cdnrcmd_if_detach(ifacep->cdnr_ifname);
1290		break;
1291
1292	case CDNR_ENABLE:
1293	case CDNR_DISABLE:
1294		ifacep = (struct cdnr_interface *)addr;
1295		if ((top = tcb_lookup(ifacep->cdnr_ifname)) == NULL) {
1296			error = EBADF;
1297			break;
1298		}
1299
1300		switch (cmd) {
1301
1302		case CDNR_ENABLE:
1303			ALTQ_SET_CNDTNING(top->tc_ifq);
1304			if (altq_input == NULL)
1305				altq_input = altq_cdnr_input;
1306			break;
1307
1308		case CDNR_DISABLE:
1309			ALTQ_CLEAR_CNDTNING(top->tc_ifq);
1310			LIST_FOREACH(top, &tcb_list, tc_next)
1311				if (ALTQ_IS_CNDTNING(top->tc_ifq))
1312					break;
1313			if (top == NULL)
1314				altq_input = NULL;
1315			break;
1316		}
1317		break;
1318
1319	case CDNR_ADD_ELEM:
1320		error = cdnrcmd_add_element((struct cdnr_add_element *)addr);
1321		break;
1322
1323	case CDNR_DEL_ELEM:
1324		error = cdnrcmd_delete_element((struct cdnr_delete_element *)addr);
1325		break;
1326
1327	case CDNR_ADD_TBM:
1328		error = cdnrcmd_add_tbm((struct cdnr_add_tbmeter *)addr);
1329		break;
1330
1331	case CDNR_MOD_TBM:
1332		error = cdnrcmd_modify_tbm((struct cdnr_modify_tbmeter *)addr);
1333		break;
1334
1335	case CDNR_TBM_STATS:
1336		error = cdnrcmd_tbm_stats((struct cdnr_tbmeter_stats *)addr);
1337		break;
1338
1339	case CDNR_ADD_TCM:
1340		error = cdnrcmd_add_trtcm((struct cdnr_add_trtcm *)addr);
1341		break;
1342
1343	case CDNR_MOD_TCM:
1344		error = cdnrcmd_modify_trtcm((struct cdnr_modify_trtcm *)addr);
1345		break;
1346
1347	case CDNR_TCM_STATS:
1348		error = cdnrcmd_tcm_stats((struct cdnr_tcm_stats *)addr);
1349		break;
1350
1351	case CDNR_ADD_FILTER:
1352		error = cdnrcmd_add_filter((struct cdnr_add_filter *)addr);
1353		break;
1354
1355	case CDNR_DEL_FILTER:
1356		error = cdnrcmd_delete_filter((struct cdnr_delete_filter *)addr);
1357		break;
1358
1359	case CDNR_GETSTATS:
1360		error = cdnrcmd_get_stats((struct cdnr_get_stats *)addr);
1361		break;
1362
1363	case CDNR_ADD_TSW:
1364		error = cdnrcmd_add_tswtcm((struct cdnr_add_tswtcm *)addr);
1365		break;
1366
1367	case CDNR_MOD_TSW:
1368		error = cdnrcmd_modify_tswtcm((struct cdnr_modify_tswtcm *)addr);
1369		break;
1370
1371	default:
1372		error = EINVAL;
1373		break;
1374	}
1375	splx(s);
1376
1377	return error;
1378}
1379
1380#ifdef KLD_MODULE
1381
1382static struct altqsw cdnr_sw =
1383	{"cdnr", cdnropen, cdnrclose, cdnrioctl};
1384
1385ALTQ_MODULE(altq_cdnr, ALTQT_CDNR, &cdnr_sw);
1386
1387#endif /* KLD_MODULE */
1388
1389#endif /* ALTQ3_COMPAT */
1390#endif /* ALTQ_CDNR */
1391