altq_subr.c revision 298091
1/*	$FreeBSD: stable/10/sys/contrib/altq/altq/altq_subr.c 298091 2016-04-16 02:11:04Z loos $	*/
2/*	$KAME: altq_subr.c,v 1.21 2003/11/06 06:32:53 kjc Exp $	*/
3
4/*
5 * Copyright (C) 1997-2003
6 *	Sony Computer Science Laboratories Inc.  All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#if defined(__FreeBSD__) || defined(__NetBSD__)
31#include "opt_altq.h"
32#include "opt_inet.h"
33#ifdef __FreeBSD__
34#include "opt_inet6.h"
35#endif
36#endif /* __FreeBSD__ || __NetBSD__ */
37
38#include <sys/param.h>
39#include <sys/malloc.h>
40#include <sys/mbuf.h>
41#include <sys/systm.h>
42#include <sys/proc.h>
43#include <sys/socket.h>
44#include <sys/socketvar.h>
45#include <sys/kernel.h>
46#include <sys/errno.h>
47#include <sys/syslog.h>
48#include <sys/sysctl.h>
49#include <sys/queue.h>
50
51#include <net/if.h>
52#include <net/if_var.h>
53#include <net/if_dl.h>
54#include <net/if_types.h>
55#ifdef __FreeBSD__
56#include <net/vnet.h>
57#endif
58
59#include <netinet/in.h>
60#include <netinet/in_systm.h>
61#include <netinet/ip.h>
62#ifdef INET6
63#include <netinet/ip6.h>
64#endif
65#include <netinet/tcp.h>
66#include <netinet/udp.h>
67
68#include <netpfil/pf/pf.h>
69#include <netpfil/pf/pf_altq.h>
70#include <altq/altq.h>
71#ifdef ALTQ3_COMPAT
72#include <altq/altq_conf.h>
73#endif
74
75/* machine dependent clock related includes */
76#ifdef __FreeBSD__
77#include <sys/bus.h>
78#include <sys/cpu.h>
79#include <sys/eventhandler.h>
80#include <machine/clock.h>
81#endif
82#if defined(__amd64__) || defined(__i386__)
83#include <machine/cpufunc.h>		/* for pentium tsc */
84#include <machine/specialreg.h>		/* for CPUID_TSC */
85#ifdef __FreeBSD__
86#include <machine/md_var.h>		/* for cpu_feature */
87#elif defined(__NetBSD__) || defined(__OpenBSD__)
88#include <machine/cpu.h>		/* for cpu_feature */
89#endif
90#endif /* __amd64 || __i386__ */
91
92/*
93 * internal function prototypes
94 */
95static void	tbr_timeout(void *);
96int (*altq_input)(struct mbuf *, int) = NULL;
97static struct mbuf *tbr_dequeue(struct ifaltq *, int);
98static int tbr_timer = 0;	/* token bucket regulator timer */
99#if !defined(__FreeBSD__) || (__FreeBSD_version < 600000)
100static struct callout tbr_callout = CALLOUT_INITIALIZER;
101#else
102static struct callout tbr_callout;
103#endif
104
105#ifdef ALTQ3_CLFIER_COMPAT
106static int 	extract_ports4(struct mbuf *, struct ip *, struct flowinfo_in *);
107#ifdef INET6
108static int 	extract_ports6(struct mbuf *, struct ip6_hdr *,
109			       struct flowinfo_in6 *);
110#endif
111static int	apply_filter4(u_int32_t, struct flow_filter *,
112			      struct flowinfo_in *);
113static int	apply_ppfilter4(u_int32_t, struct flow_filter *,
114				struct flowinfo_in *);
115#ifdef INET6
116static int	apply_filter6(u_int32_t, struct flow_filter6 *,
117			      struct flowinfo_in6 *);
118#endif
119static int	apply_tosfilter4(u_int32_t, struct flow_filter *,
120				 struct flowinfo_in *);
121static u_long	get_filt_handle(struct acc_classifier *, int);
122static struct acc_filter *filth_to_filtp(struct acc_classifier *, u_long);
123static u_int32_t filt2fibmask(struct flow_filter *);
124
125static void 	ip4f_cache(struct ip *, struct flowinfo_in *);
126static int 	ip4f_lookup(struct ip *, struct flowinfo_in *);
127static int 	ip4f_init(void);
128static struct ip4_frag	*ip4f_alloc(void);
129static void 	ip4f_free(struct ip4_frag *);
130#endif /* ALTQ3_CLFIER_COMPAT */
131
132/*
133 * alternate queueing support routines
134 */
135
136/* look up the queue state by the interface name and the queueing type. */
137void *
138altq_lookup(name, type)
139	char *name;
140	int type;
141{
142	struct ifnet *ifp;
143
144	if ((ifp = ifunit(name)) != NULL) {
145		/* read if_snd unlocked */
146		if (type != ALTQT_NONE && ifp->if_snd.altq_type == type)
147			return (ifp->if_snd.altq_disc);
148	}
149
150	return NULL;
151}
152
153int
154altq_attach(ifq, type, discipline, enqueue, dequeue, request, clfier, classify)
155	struct ifaltq *ifq;
156	int type;
157	void *discipline;
158	int (*enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
159	struct mbuf *(*dequeue)(struct ifaltq *, int);
160	int (*request)(struct ifaltq *, int, void *);
161	void *clfier;
162	void *(*classify)(void *, struct mbuf *, int);
163{
164	IFQ_LOCK(ifq);
165	if (!ALTQ_IS_READY(ifq)) {
166		IFQ_UNLOCK(ifq);
167		return ENXIO;
168	}
169
170#ifdef ALTQ3_COMPAT
171	/*
172	 * pfaltq can override the existing discipline, but altq3 cannot.
173	 * check these if clfier is not NULL (which implies altq3).
174	 */
175	if (clfier != NULL) {
176		if (ALTQ_IS_ENABLED(ifq)) {
177			IFQ_UNLOCK(ifq);
178			return EBUSY;
179		}
180		if (ALTQ_IS_ATTACHED(ifq)) {
181			IFQ_UNLOCK(ifq);
182			return EEXIST;
183		}
184	}
185#endif
186	ifq->altq_type     = type;
187	ifq->altq_disc     = discipline;
188	ifq->altq_enqueue  = enqueue;
189	ifq->altq_dequeue  = dequeue;
190	ifq->altq_request  = request;
191	ifq->altq_clfier   = clfier;
192	ifq->altq_classify = classify;
193	ifq->altq_flags &= (ALTQF_CANTCHANGE|ALTQF_ENABLED);
194#ifdef ALTQ3_COMPAT
195#ifdef ALTQ_KLD
196	altq_module_incref(type);
197#endif
198#endif
199	IFQ_UNLOCK(ifq);
200	return 0;
201}
202
203int
204altq_detach(ifq)
205	struct ifaltq *ifq;
206{
207	IFQ_LOCK(ifq);
208
209	if (!ALTQ_IS_READY(ifq)) {
210		IFQ_UNLOCK(ifq);
211		return ENXIO;
212	}
213	if (ALTQ_IS_ENABLED(ifq)) {
214		IFQ_UNLOCK(ifq);
215		return EBUSY;
216	}
217	if (!ALTQ_IS_ATTACHED(ifq)) {
218		IFQ_UNLOCK(ifq);
219		return (0);
220	}
221#ifdef ALTQ3_COMPAT
222#ifdef ALTQ_KLD
223	altq_module_declref(ifq->altq_type);
224#endif
225#endif
226
227	ifq->altq_type     = ALTQT_NONE;
228	ifq->altq_disc     = NULL;
229	ifq->altq_enqueue  = NULL;
230	ifq->altq_dequeue  = NULL;
231	ifq->altq_request  = NULL;
232	ifq->altq_clfier   = NULL;
233	ifq->altq_classify = NULL;
234	ifq->altq_flags &= ALTQF_CANTCHANGE;
235
236	IFQ_UNLOCK(ifq);
237	return 0;
238}
239
240int
241altq_enable(ifq)
242	struct ifaltq *ifq;
243{
244	int s;
245
246	IFQ_LOCK(ifq);
247
248	if (!ALTQ_IS_READY(ifq)) {
249		IFQ_UNLOCK(ifq);
250		return ENXIO;
251	}
252	if (ALTQ_IS_ENABLED(ifq)) {
253		IFQ_UNLOCK(ifq);
254		return 0;
255	}
256
257#ifdef __NetBSD__
258	s = splnet();
259#else
260	s = splimp();
261#endif
262	IFQ_PURGE_NOLOCK(ifq);
263	ASSERT(ifq->ifq_len == 0);
264	ifq->ifq_drv_maxlen = 0;		/* disable bulk dequeue */
265	ifq->altq_flags |= ALTQF_ENABLED;
266	if (ifq->altq_clfier != NULL)
267		ifq->altq_flags |= ALTQF_CLASSIFY;
268	splx(s);
269
270	IFQ_UNLOCK(ifq);
271	return 0;
272}
273
274int
275altq_disable(ifq)
276	struct ifaltq *ifq;
277{
278	int s;
279
280	IFQ_LOCK(ifq);
281	if (!ALTQ_IS_ENABLED(ifq)) {
282		IFQ_UNLOCK(ifq);
283		return 0;
284	}
285
286#ifdef __NetBSD__
287	s = splnet();
288#else
289	s = splimp();
290#endif
291	IFQ_PURGE_NOLOCK(ifq);
292	ASSERT(ifq->ifq_len == 0);
293	ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY);
294	splx(s);
295
296	IFQ_UNLOCK(ifq);
297	return 0;
298}
299
300#ifdef ALTQ_DEBUG
301void
302altq_assert(file, line, failedexpr)
303	const char *file, *failedexpr;
304	int line;
305{
306	(void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n",
307		     failedexpr, file, line);
308	panic("altq assertion");
309	/* NOTREACHED */
310}
311#endif
312
313/*
314 * internal representation of token bucket parameters
315 *	rate:	byte_per_unittime << 32
316 *		(((bits_per_sec) / 8) << 32) / machclk_freq
317 *	depth:	byte << 32
318 *
319 */
320#define	TBR_SHIFT	32
321#define	TBR_SCALE(x)	((int64_t)(x) << TBR_SHIFT)
322#define	TBR_UNSCALE(x)	((x) >> TBR_SHIFT)
323
324static struct mbuf *
325tbr_dequeue(ifq, op)
326	struct ifaltq *ifq;
327	int op;
328{
329	struct tb_regulator *tbr;
330	struct mbuf *m;
331	int64_t interval;
332	u_int64_t now;
333
334	IFQ_LOCK_ASSERT(ifq);
335	tbr = ifq->altq_tbr;
336	if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) {
337		/* if this is a remove after poll, bypass tbr check */
338	} else {
339		/* update token only when it is negative */
340		if (tbr->tbr_token <= 0) {
341			now = read_machclk();
342			interval = now - tbr->tbr_last;
343			if (interval >= tbr->tbr_filluptime)
344				tbr->tbr_token = tbr->tbr_depth;
345			else {
346				tbr->tbr_token += interval * tbr->tbr_rate;
347				if (tbr->tbr_token > tbr->tbr_depth)
348					tbr->tbr_token = tbr->tbr_depth;
349			}
350			tbr->tbr_last = now;
351		}
352		/* if token is still negative, don't allow dequeue */
353		if (tbr->tbr_token <= 0)
354			return (NULL);
355	}
356
357	if (ALTQ_IS_ENABLED(ifq))
358		m = (*ifq->altq_dequeue)(ifq, op);
359	else {
360		if (op == ALTDQ_POLL)
361			_IF_POLL(ifq, m);
362		else
363			_IF_DEQUEUE(ifq, m);
364	}
365
366	if (m != NULL && op == ALTDQ_REMOVE)
367		tbr->tbr_token -= TBR_SCALE(m_pktlen(m));
368	tbr->tbr_lastop = op;
369	return (m);
370}
371
372/*
373 * set a token bucket regulator.
374 * if the specified rate is zero, the token bucket regulator is deleted.
375 */
376int
377tbr_set(ifq, profile)
378	struct ifaltq *ifq;
379	struct tb_profile *profile;
380{
381	struct tb_regulator *tbr, *otbr;
382
383	if (tbr_dequeue_ptr == NULL)
384		tbr_dequeue_ptr = tbr_dequeue;
385
386	if (machclk_freq == 0)
387		init_machclk();
388	if (machclk_freq == 0) {
389		printf("tbr_set: no cpu clock available!\n");
390		return (ENXIO);
391	}
392
393	IFQ_LOCK(ifq);
394	if (profile->rate == 0) {
395		/* delete this tbr */
396		if ((tbr = ifq->altq_tbr) == NULL) {
397			IFQ_UNLOCK(ifq);
398			return (ENOENT);
399		}
400		ifq->altq_tbr = NULL;
401		free(tbr, M_DEVBUF);
402		IFQ_UNLOCK(ifq);
403		return (0);
404	}
405
406	tbr = malloc(sizeof(struct tb_regulator), M_DEVBUF, M_NOWAIT | M_ZERO);
407	if (tbr == NULL) {
408		IFQ_UNLOCK(ifq);
409		return (ENOMEM);
410	}
411
412	tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq;
413	tbr->tbr_depth = TBR_SCALE(profile->depth);
414	if (tbr->tbr_rate > 0)
415		tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
416	else
417		tbr->tbr_filluptime = 0xffffffffffffffffLL;
418	tbr->tbr_token = tbr->tbr_depth;
419	tbr->tbr_last = read_machclk();
420	tbr->tbr_lastop = ALTDQ_REMOVE;
421
422	otbr = ifq->altq_tbr;
423	ifq->altq_tbr = tbr;	/* set the new tbr */
424
425	if (otbr != NULL)
426		free(otbr, M_DEVBUF);
427	else {
428		if (tbr_timer == 0) {
429			CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
430			tbr_timer = 1;
431		}
432	}
433	IFQ_UNLOCK(ifq);
434	return (0);
435}
436
437/*
438 * tbr_timeout goes through the interface list, and kicks the drivers
439 * if necessary.
440 *
441 * MPSAFE
442 */
443static void
444tbr_timeout(arg)
445	void *arg;
446{
447#ifdef __FreeBSD__
448	VNET_ITERATOR_DECL(vnet_iter);
449#endif
450	struct ifnet *ifp;
451	int active, s;
452
453	active = 0;
454#ifdef __NetBSD__
455	s = splnet();
456#else
457	s = splimp();
458#endif
459#ifdef __FreeBSD__
460	IFNET_RLOCK_NOSLEEP();
461	VNET_LIST_RLOCK_NOSLEEP();
462	VNET_FOREACH(vnet_iter) {
463		CURVNET_SET(vnet_iter);
464#endif
465		for (ifp = TAILQ_FIRST(&V_ifnet); ifp;
466		    ifp = TAILQ_NEXT(ifp, if_list)) {
467			/* read from if_snd unlocked */
468			if (!TBR_IS_ENABLED(&ifp->if_snd))
469				continue;
470			active++;
471			if (!IFQ_IS_EMPTY(&ifp->if_snd) &&
472			    ifp->if_start != NULL)
473				(*ifp->if_start)(ifp);
474		}
475#ifdef __FreeBSD__
476		CURVNET_RESTORE();
477	}
478	VNET_LIST_RUNLOCK_NOSLEEP();
479	IFNET_RUNLOCK_NOSLEEP();
480#endif
481	splx(s);
482	if (active > 0)
483		CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
484	else
485		tbr_timer = 0;	/* don't need tbr_timer anymore */
486}
487
488/*
489 * get token bucket regulator profile
490 */
491int
492tbr_get(ifq, profile)
493	struct ifaltq *ifq;
494	struct tb_profile *profile;
495{
496	struct tb_regulator *tbr;
497
498	IFQ_LOCK(ifq);
499	if ((tbr = ifq->altq_tbr) == NULL) {
500		profile->rate = 0;
501		profile->depth = 0;
502	} else {
503		profile->rate =
504		    (u_int)TBR_UNSCALE(tbr->tbr_rate * 8 * machclk_freq);
505		profile->depth = (u_int)TBR_UNSCALE(tbr->tbr_depth);
506	}
507	IFQ_UNLOCK(ifq);
508	return (0);
509}
510
511/*
512 * attach a discipline to the interface.  if one already exists, it is
513 * overridden.
514 * Locking is done in the discipline specific attach functions. Basically
515 * they call back to altq_attach which takes care of the attach and locking.
516 */
517int
518altq_pfattach(struct pf_altq *a)
519{
520	int error = 0;
521
522	switch (a->scheduler) {
523	case ALTQT_NONE:
524		break;
525#ifdef ALTQ_CBQ
526	case ALTQT_CBQ:
527		error = cbq_pfattach(a);
528		break;
529#endif
530#ifdef ALTQ_PRIQ
531	case ALTQT_PRIQ:
532		error = priq_pfattach(a);
533		break;
534#endif
535#ifdef ALTQ_HFSC
536	case ALTQT_HFSC:
537		error = hfsc_pfattach(a);
538		break;
539#endif
540#ifdef ALTQ_FAIRQ
541	case ALTQT_FAIRQ:
542		error = fairq_pfattach(a);
543		break;
544#endif
545	default:
546		error = ENXIO;
547	}
548
549	return (error);
550}
551
552/*
553 * detach a discipline from the interface.
554 * it is possible that the discipline was already overridden by another
555 * discipline.
556 */
557int
558altq_pfdetach(struct pf_altq *a)
559{
560	struct ifnet *ifp;
561	int s, error = 0;
562
563	if ((ifp = ifunit(a->ifname)) == NULL)
564		return (EINVAL);
565
566	/* if this discipline is no longer referenced, just return */
567	/* read unlocked from if_snd */
568	if (a->altq_disc == NULL || a->altq_disc != ifp->if_snd.altq_disc)
569		return (0);
570
571#ifdef __NetBSD__
572	s = splnet();
573#else
574	s = splimp();
575#endif
576	/* read unlocked from if_snd, _disable and _detach take care */
577	if (ALTQ_IS_ENABLED(&ifp->if_snd))
578		error = altq_disable(&ifp->if_snd);
579	if (error == 0)
580		error = altq_detach(&ifp->if_snd);
581	splx(s);
582
583	return (error);
584}
585
586/*
587 * add a discipline or a queue
588 * Locking is done in the discipline specific functions with regards to
589 * malloc with WAITOK, also it is not yet clear which lock to use.
590 */
591int
592altq_add(struct pf_altq *a)
593{
594	int error = 0;
595
596	if (a->qname[0] != 0)
597		return (altq_add_queue(a));
598
599	if (machclk_freq == 0)
600		init_machclk();
601	if (machclk_freq == 0)
602		panic("altq_add: no cpu clock");
603
604	switch (a->scheduler) {
605#ifdef ALTQ_CBQ
606	case ALTQT_CBQ:
607		error = cbq_add_altq(a);
608		break;
609#endif
610#ifdef ALTQ_PRIQ
611	case ALTQT_PRIQ:
612		error = priq_add_altq(a);
613		break;
614#endif
615#ifdef ALTQ_HFSC
616	case ALTQT_HFSC:
617		error = hfsc_add_altq(a);
618		break;
619#endif
620#ifdef ALTQ_FAIRQ
621        case ALTQT_FAIRQ:
622                error = fairq_add_altq(a);
623                break;
624#endif
625	default:
626		error = ENXIO;
627	}
628
629	return (error);
630}
631
632/*
633 * remove a discipline or a queue
634 * It is yet unclear what lock to use to protect this operation, the
635 * discipline specific functions will determine and grab it
636 */
637int
638altq_remove(struct pf_altq *a)
639{
640	int error = 0;
641
642	if (a->qname[0] != 0)
643		return (altq_remove_queue(a));
644
645	switch (a->scheduler) {
646#ifdef ALTQ_CBQ
647	case ALTQT_CBQ:
648		error = cbq_remove_altq(a);
649		break;
650#endif
651#ifdef ALTQ_PRIQ
652	case ALTQT_PRIQ:
653		error = priq_remove_altq(a);
654		break;
655#endif
656#ifdef ALTQ_HFSC
657	case ALTQT_HFSC:
658		error = hfsc_remove_altq(a);
659		break;
660#endif
661#ifdef ALTQ_FAIRQ
662        case ALTQT_FAIRQ:
663                error = fairq_remove_altq(a);
664                break;
665#endif
666	default:
667		error = ENXIO;
668	}
669
670	return (error);
671}
672
673/*
674 * add a queue to the discipline
675 * It is yet unclear what lock to use to protect this operation, the
676 * discipline specific functions will determine and grab it
677 */
678int
679altq_add_queue(struct pf_altq *a)
680{
681	int error = 0;
682
683	switch (a->scheduler) {
684#ifdef ALTQ_CBQ
685	case ALTQT_CBQ:
686		error = cbq_add_queue(a);
687		break;
688#endif
689#ifdef ALTQ_PRIQ
690	case ALTQT_PRIQ:
691		error = priq_add_queue(a);
692		break;
693#endif
694#ifdef ALTQ_HFSC
695	case ALTQT_HFSC:
696		error = hfsc_add_queue(a);
697		break;
698#endif
699#ifdef ALTQ_FAIRQ
700        case ALTQT_FAIRQ:
701                error = fairq_add_queue(a);
702                break;
703#endif
704	default:
705		error = ENXIO;
706	}
707
708	return (error);
709}
710
711/*
712 * remove a queue from the discipline
713 * It is yet unclear what lock to use to protect this operation, the
714 * discipline specific functions will determine and grab it
715 */
716int
717altq_remove_queue(struct pf_altq *a)
718{
719	int error = 0;
720
721	switch (a->scheduler) {
722#ifdef ALTQ_CBQ
723	case ALTQT_CBQ:
724		error = cbq_remove_queue(a);
725		break;
726#endif
727#ifdef ALTQ_PRIQ
728	case ALTQT_PRIQ:
729		error = priq_remove_queue(a);
730		break;
731#endif
732#ifdef ALTQ_HFSC
733	case ALTQT_HFSC:
734		error = hfsc_remove_queue(a);
735		break;
736#endif
737#ifdef ALTQ_FAIRQ
738        case ALTQT_FAIRQ:
739                error = fairq_remove_queue(a);
740                break;
741#endif
742	default:
743		error = ENXIO;
744	}
745
746	return (error);
747}
748
749/*
750 * get queue statistics
751 * Locking is done in the discipline specific functions with regards to
752 * copyout operations, also it is not yet clear which lock to use.
753 */
754int
755altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
756{
757	int error = 0;
758
759	switch (a->scheduler) {
760#ifdef ALTQ_CBQ
761	case ALTQT_CBQ:
762		error = cbq_getqstats(a, ubuf, nbytes);
763		break;
764#endif
765#ifdef ALTQ_PRIQ
766	case ALTQT_PRIQ:
767		error = priq_getqstats(a, ubuf, nbytes);
768		break;
769#endif
770#ifdef ALTQ_HFSC
771	case ALTQT_HFSC:
772		error = hfsc_getqstats(a, ubuf, nbytes);
773		break;
774#endif
775#ifdef ALTQ_FAIRQ
776        case ALTQT_FAIRQ:
777                error = fairq_getqstats(a, ubuf, nbytes);
778                break;
779#endif
780	default:
781		error = ENXIO;
782	}
783
784	return (error);
785}
786
787/*
788 * read and write diffserv field in IPv4 or IPv6 header
789 */
790u_int8_t
791read_dsfield(m, pktattr)
792	struct mbuf *m;
793	struct altq_pktattr *pktattr;
794{
795	struct mbuf *m0;
796	u_int8_t ds_field = 0;
797
798	if (pktattr == NULL ||
799	    (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
800		return ((u_int8_t)0);
801
802	/* verify that pattr_hdr is within the mbuf data */
803	for (m0 = m; m0 != NULL; m0 = m0->m_next)
804		if ((pktattr->pattr_hdr >= m0->m_data) &&
805		    (pktattr->pattr_hdr < m0->m_data + m0->m_len))
806			break;
807	if (m0 == NULL) {
808		/* ick, pattr_hdr is stale */
809		pktattr->pattr_af = AF_UNSPEC;
810#ifdef ALTQ_DEBUG
811		printf("read_dsfield: can't locate header!\n");
812#endif
813		return ((u_int8_t)0);
814	}
815
816	if (pktattr->pattr_af == AF_INET) {
817		struct ip *ip = (struct ip *)pktattr->pattr_hdr;
818
819		if (ip->ip_v != 4)
820			return ((u_int8_t)0);	/* version mismatch! */
821		ds_field = ip->ip_tos;
822	}
823#ifdef INET6
824	else if (pktattr->pattr_af == AF_INET6) {
825		struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
826		u_int32_t flowlabel;
827
828		flowlabel = ntohl(ip6->ip6_flow);
829		if ((flowlabel >> 28) != 6)
830			return ((u_int8_t)0);	/* version mismatch! */
831		ds_field = (flowlabel >> 20) & 0xff;
832	}
833#endif
834	return (ds_field);
835}
836
837void
838write_dsfield(struct mbuf *m, struct altq_pktattr *pktattr, u_int8_t dsfield)
839{
840	struct mbuf *m0;
841
842	if (pktattr == NULL ||
843	    (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
844		return;
845
846	/* verify that pattr_hdr is within the mbuf data */
847	for (m0 = m; m0 != NULL; m0 = m0->m_next)
848		if ((pktattr->pattr_hdr >= m0->m_data) &&
849		    (pktattr->pattr_hdr < m0->m_data + m0->m_len))
850			break;
851	if (m0 == NULL) {
852		/* ick, pattr_hdr is stale */
853		pktattr->pattr_af = AF_UNSPEC;
854#ifdef ALTQ_DEBUG
855		printf("write_dsfield: can't locate header!\n");
856#endif
857		return;
858	}
859
860	if (pktattr->pattr_af == AF_INET) {
861		struct ip *ip = (struct ip *)pktattr->pattr_hdr;
862		u_int8_t old;
863		int32_t sum;
864
865		if (ip->ip_v != 4)
866			return;		/* version mismatch! */
867		old = ip->ip_tos;
868		dsfield |= old & 3;	/* leave CU bits */
869		if (old == dsfield)
870			return;
871		ip->ip_tos = dsfield;
872		/*
873		 * update checksum (from RFC1624)
874		 *	   HC' = ~(~HC + ~m + m')
875		 */
876		sum = ~ntohs(ip->ip_sum) & 0xffff;
877		sum += 0xff00 + (~old & 0xff) + dsfield;
878		sum = (sum >> 16) + (sum & 0xffff);
879		sum += (sum >> 16);  /* add carry */
880
881		ip->ip_sum = htons(~sum & 0xffff);
882	}
883#ifdef INET6
884	else if (pktattr->pattr_af == AF_INET6) {
885		struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
886		u_int32_t flowlabel;
887
888		flowlabel = ntohl(ip6->ip6_flow);
889		if ((flowlabel >> 28) != 6)
890			return;		/* version mismatch! */
891		flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20);
892		ip6->ip6_flow = htonl(flowlabel);
893	}
894#endif
895	return;
896}
897
898
899/*
900 * high resolution clock support taking advantage of a machine dependent
901 * high resolution time counter (e.g., timestamp counter of intel pentium).
902 * we assume
903 *  - 64-bit-long monotonically-increasing counter
904 *  - frequency range is 100M-4GHz (CPU speed)
905 */
906/* if pcc is not available or disabled, emulate 256MHz using microtime() */
907#define	MACHCLK_SHIFT	8
908
909int machclk_usepcc;
910u_int32_t machclk_freq;
911u_int32_t machclk_per_tick;
912
913#if defined(__i386__) && defined(__NetBSD__)
914extern u_int64_t cpu_tsc_freq;
915#endif
916
917#if (__FreeBSD_version >= 700035)
918/* Update TSC freq with the value indicated by the caller. */
919static void
920tsc_freq_changed(void *arg, const struct cf_level *level, int status)
921{
922	/* If there was an error during the transition, don't do anything. */
923	if (status != 0)
924		return;
925
926#if (__FreeBSD_version >= 701102) && (defined(__amd64__) || defined(__i386__))
927	/* If TSC is P-state invariant, don't do anything. */
928	if (tsc_is_invariant)
929		return;
930#endif
931
932	/* Total setting for this level gives the new frequency in MHz. */
933	init_machclk();
934}
935EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL,
936    EVENTHANDLER_PRI_LAST);
937#endif /* __FreeBSD_version >= 700035 */
938
939static void
940init_machclk_setup(void)
941{
942#if (__FreeBSD_version >= 600000)
943	callout_init(&tbr_callout, 0);
944#endif
945
946	machclk_usepcc = 1;
947
948#if (!defined(__amd64__) && !defined(__i386__)) || defined(ALTQ_NOPCC)
949	machclk_usepcc = 0;
950#endif
951#if defined(__FreeBSD__) && defined(SMP)
952	machclk_usepcc = 0;
953#endif
954#if defined(__NetBSD__) && defined(MULTIPROCESSOR)
955	machclk_usepcc = 0;
956#endif
957#if defined(__amd64__) || defined(__i386__)
958	/* check if TSC is available */
959#ifdef __FreeBSD__
960	if ((cpu_feature & CPUID_TSC) == 0 ||
961	    atomic_load_acq_64(&tsc_freq) == 0)
962#else
963	if ((cpu_feature & CPUID_TSC) == 0)
964#endif
965		machclk_usepcc = 0;
966#endif
967}
968
969void
970init_machclk(void)
971{
972	static int called;
973
974	/* Call one-time initialization function. */
975	if (!called) {
976		init_machclk_setup();
977		called = 1;
978	}
979
980	if (machclk_usepcc == 0) {
981		/* emulate 256MHz using microtime() */
982		machclk_freq = 1000000 << MACHCLK_SHIFT;
983		machclk_per_tick = machclk_freq / hz;
984#ifdef ALTQ_DEBUG
985		printf("altq: emulate %uHz cpu clock\n", machclk_freq);
986#endif
987		return;
988	}
989
990	/*
991	 * if the clock frequency (of Pentium TSC or Alpha PCC) is
992	 * accessible, just use it.
993	 */
994#if defined(__amd64__) || defined(__i386__)
995#ifdef __FreeBSD__
996	machclk_freq = atomic_load_acq_64(&tsc_freq);
997#elif defined(__NetBSD__)
998	machclk_freq = (u_int32_t)cpu_tsc_freq;
999#elif defined(__OpenBSD__) && (defined(I586_CPU) || defined(I686_CPU))
1000	machclk_freq = pentium_mhz * 1000000;
1001#endif
1002#endif
1003
1004	/*
1005	 * if we don't know the clock frequency, measure it.
1006	 */
1007	if (machclk_freq == 0) {
1008		static int	wait;
1009		struct timeval	tv_start, tv_end;
1010		u_int64_t	start, end, diff;
1011		int		timo;
1012
1013		microtime(&tv_start);
1014		start = read_machclk();
1015		timo = hz;	/* 1 sec */
1016		(void)tsleep(&wait, PWAIT | PCATCH, "init_machclk", timo);
1017		microtime(&tv_end);
1018		end = read_machclk();
1019		diff = (u_int64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000
1020		    + tv_end.tv_usec - tv_start.tv_usec;
1021		if (diff != 0)
1022			machclk_freq = (u_int)((end - start) * 1000000 / diff);
1023	}
1024
1025	machclk_per_tick = machclk_freq / hz;
1026
1027#ifdef ALTQ_DEBUG
1028	printf("altq: CPU clock: %uHz\n", machclk_freq);
1029#endif
1030}
1031
1032#if defined(__OpenBSD__) && defined(__i386__)
1033static __inline u_int64_t
1034rdtsc(void)
1035{
1036	u_int64_t rv;
1037	__asm __volatile(".byte 0x0f, 0x31" : "=A" (rv));
1038	return (rv);
1039}
1040#endif /* __OpenBSD__ && __i386__ */
1041
1042u_int64_t
1043read_machclk(void)
1044{
1045	u_int64_t val;
1046
1047	if (machclk_usepcc) {
1048#if defined(__amd64__) || defined(__i386__)
1049		val = rdtsc();
1050#else
1051		panic("read_machclk");
1052#endif
1053	} else {
1054		struct timeval tv;
1055
1056		microtime(&tv);
1057		val = (((u_int64_t)(tv.tv_sec - boottime.tv_sec) * 1000000
1058		    + tv.tv_usec) << MACHCLK_SHIFT);
1059	}
1060	return (val);
1061}
1062
1063#ifdef ALTQ3_CLFIER_COMPAT
1064
1065#ifndef IPPROTO_ESP
1066#define	IPPROTO_ESP	50		/* encapsulating security payload */
1067#endif
1068#ifndef IPPROTO_AH
1069#define	IPPROTO_AH	51		/* authentication header */
1070#endif
1071
1072/*
1073 * extract flow information from a given packet.
1074 * filt_mask shows flowinfo fields required.
1075 * we assume the ip header is in one mbuf, and addresses and ports are
1076 * in network byte order.
1077 */
1078int
1079altq_extractflow(m, af, flow, filt_bmask)
1080	struct mbuf *m;
1081	int af;
1082	struct flowinfo *flow;
1083	u_int32_t	filt_bmask;
1084{
1085
1086	switch (af) {
1087	case PF_INET: {
1088		struct flowinfo_in *fin;
1089		struct ip *ip;
1090
1091		ip = mtod(m, struct ip *);
1092
1093		if (ip->ip_v != 4)
1094			break;
1095
1096		fin = (struct flowinfo_in *)flow;
1097		fin->fi_len = sizeof(struct flowinfo_in);
1098		fin->fi_family = AF_INET;
1099
1100		fin->fi_proto = ip->ip_p;
1101		fin->fi_tos = ip->ip_tos;
1102
1103		fin->fi_src.s_addr = ip->ip_src.s_addr;
1104		fin->fi_dst.s_addr = ip->ip_dst.s_addr;
1105
1106		if (filt_bmask & FIMB4_PORTS)
1107			/* if port info is required, extract port numbers */
1108			extract_ports4(m, ip, fin);
1109		else {
1110			fin->fi_sport = 0;
1111			fin->fi_dport = 0;
1112			fin->fi_gpi = 0;
1113		}
1114		return (1);
1115	}
1116
1117#ifdef INET6
1118	case PF_INET6: {
1119		struct flowinfo_in6 *fin6;
1120		struct ip6_hdr *ip6;
1121
1122		ip6 = mtod(m, struct ip6_hdr *);
1123		/* should we check the ip version? */
1124
1125		fin6 = (struct flowinfo_in6 *)flow;
1126		fin6->fi6_len = sizeof(struct flowinfo_in6);
1127		fin6->fi6_family = AF_INET6;
1128
1129		fin6->fi6_proto = ip6->ip6_nxt;
1130		fin6->fi6_tclass   = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
1131
1132		fin6->fi6_flowlabel = ip6->ip6_flow & htonl(0x000fffff);
1133		fin6->fi6_src = ip6->ip6_src;
1134		fin6->fi6_dst = ip6->ip6_dst;
1135
1136		if ((filt_bmask & FIMB6_PORTS) ||
1137		    ((filt_bmask & FIMB6_PROTO)
1138		     && ip6->ip6_nxt > IPPROTO_IPV6))
1139			/*
1140			 * if port info is required, or proto is required
1141			 * but there are option headers, extract port
1142			 * and protocol numbers.
1143			 */
1144			extract_ports6(m, ip6, fin6);
1145		else {
1146			fin6->fi6_sport = 0;
1147			fin6->fi6_dport = 0;
1148			fin6->fi6_gpi = 0;
1149		}
1150		return (1);
1151	}
1152#endif /* INET6 */
1153
1154	default:
1155		break;
1156	}
1157
1158	/* failed */
1159	flow->fi_len = sizeof(struct flowinfo);
1160	flow->fi_family = AF_UNSPEC;
1161	return (0);
1162}
1163
1164/*
1165 * helper routine to extract port numbers
1166 */
1167/* structure for ipsec and ipv6 option header template */
1168struct _opt6 {
1169	u_int8_t	opt6_nxt;	/* next header */
1170	u_int8_t	opt6_hlen;	/* header extension length */
1171	u_int16_t	_pad;
1172	u_int32_t	ah_spi;		/* security parameter index
1173					   for authentication header */
1174};
1175
1176/*
1177 * extract port numbers from a ipv4 packet.
1178 */
1179static int
1180extract_ports4(m, ip, fin)
1181	struct mbuf *m;
1182	struct ip *ip;
1183	struct flowinfo_in *fin;
1184{
1185	struct mbuf *m0;
1186	u_short ip_off;
1187	u_int8_t proto;
1188	int 	off;
1189
1190	fin->fi_sport = 0;
1191	fin->fi_dport = 0;
1192	fin->fi_gpi = 0;
1193
1194	ip_off = ntohs(ip->ip_off);
1195	/* if it is a fragment, try cached fragment info */
1196	if (ip_off & IP_OFFMASK) {
1197		ip4f_lookup(ip, fin);
1198		return (1);
1199	}
1200
1201	/* locate the mbuf containing the protocol header */
1202	for (m0 = m; m0 != NULL; m0 = m0->m_next)
1203		if (((caddr_t)ip >= m0->m_data) &&
1204		    ((caddr_t)ip < m0->m_data + m0->m_len))
1205			break;
1206	if (m0 == NULL) {
1207#ifdef ALTQ_DEBUG
1208		printf("extract_ports4: can't locate header! ip=%p\n", ip);
1209#endif
1210		return (0);
1211	}
1212	off = ((caddr_t)ip - m0->m_data) + (ip->ip_hl << 2);
1213	proto = ip->ip_p;
1214
1215#ifdef ALTQ_IPSEC
1216 again:
1217#endif
1218	while (off >= m0->m_len) {
1219		off -= m0->m_len;
1220		m0 = m0->m_next;
1221		if (m0 == NULL)
1222			return (0);  /* bogus ip_hl! */
1223	}
1224	if (m0->m_len < off + 4)
1225		return (0);
1226
1227	switch (proto) {
1228	case IPPROTO_TCP:
1229	case IPPROTO_UDP: {
1230		struct udphdr *udp;
1231
1232		udp = (struct udphdr *)(mtod(m0, caddr_t) + off);
1233		fin->fi_sport = udp->uh_sport;
1234		fin->fi_dport = udp->uh_dport;
1235		fin->fi_proto = proto;
1236		}
1237		break;
1238
1239#ifdef ALTQ_IPSEC
1240	case IPPROTO_ESP:
1241		if (fin->fi_gpi == 0){
1242			u_int32_t *gpi;
1243
1244			gpi = (u_int32_t *)(mtod(m0, caddr_t) + off);
1245			fin->fi_gpi   = *gpi;
1246		}
1247		fin->fi_proto = proto;
1248		break;
1249
1250	case IPPROTO_AH: {
1251			/* get next header and header length */
1252			struct _opt6 *opt6;
1253
1254			opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
1255			proto = opt6->opt6_nxt;
1256			off += 8 + (opt6->opt6_hlen * 4);
1257			if (fin->fi_gpi == 0 && m0->m_len >= off + 8)
1258				fin->fi_gpi = opt6->ah_spi;
1259		}
1260		/* goto the next header */
1261		goto again;
1262#endif  /* ALTQ_IPSEC */
1263
1264	default:
1265		fin->fi_proto = proto;
1266		return (0);
1267	}
1268
1269	/* if this is a first fragment, cache it. */
1270	if (ip_off & IP_MF)
1271		ip4f_cache(ip, fin);
1272
1273	return (1);
1274}
1275
1276#ifdef INET6
1277static int
1278extract_ports6(m, ip6, fin6)
1279	struct mbuf *m;
1280	struct ip6_hdr *ip6;
1281	struct flowinfo_in6 *fin6;
1282{
1283	struct mbuf *m0;
1284	int	off;
1285	u_int8_t proto;
1286
1287	fin6->fi6_gpi   = 0;
1288	fin6->fi6_sport = 0;
1289	fin6->fi6_dport = 0;
1290
1291	/* locate the mbuf containing the protocol header */
1292	for (m0 = m; m0 != NULL; m0 = m0->m_next)
1293		if (((caddr_t)ip6 >= m0->m_data) &&
1294		    ((caddr_t)ip6 < m0->m_data + m0->m_len))
1295			break;
1296	if (m0 == NULL) {
1297#ifdef ALTQ_DEBUG
1298		printf("extract_ports6: can't locate header! ip6=%p\n", ip6);
1299#endif
1300		return (0);
1301	}
1302	off = ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr);
1303
1304	proto = ip6->ip6_nxt;
1305	do {
1306		while (off >= m0->m_len) {
1307			off -= m0->m_len;
1308			m0 = m0->m_next;
1309			if (m0 == NULL)
1310				return (0);
1311		}
1312		if (m0->m_len < off + 4)
1313			return (0);
1314
1315		switch (proto) {
1316		case IPPROTO_TCP:
1317		case IPPROTO_UDP: {
1318			struct udphdr *udp;
1319
1320			udp = (struct udphdr *)(mtod(m0, caddr_t) + off);
1321			fin6->fi6_sport = udp->uh_sport;
1322			fin6->fi6_dport = udp->uh_dport;
1323			fin6->fi6_proto = proto;
1324			}
1325			return (1);
1326
1327		case IPPROTO_ESP:
1328			if (fin6->fi6_gpi == 0) {
1329				u_int32_t *gpi;
1330
1331				gpi = (u_int32_t *)(mtod(m0, caddr_t) + off);
1332				fin6->fi6_gpi   = *gpi;
1333			}
1334			fin6->fi6_proto = proto;
1335			return (1);
1336
1337		case IPPROTO_AH: {
1338			/* get next header and header length */
1339			struct _opt6 *opt6;
1340
1341			opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
1342			if (fin6->fi6_gpi == 0 && m0->m_len >= off + 8)
1343				fin6->fi6_gpi = opt6->ah_spi;
1344			proto = opt6->opt6_nxt;
1345			off += 8 + (opt6->opt6_hlen * 4);
1346			/* goto the next header */
1347			break;
1348			}
1349
1350		case IPPROTO_HOPOPTS:
1351		case IPPROTO_ROUTING:
1352		case IPPROTO_DSTOPTS: {
1353			/* get next header and header length */
1354			struct _opt6 *opt6;
1355
1356			opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
1357			proto = opt6->opt6_nxt;
1358			off += (opt6->opt6_hlen + 1) * 8;
1359			/* goto the next header */
1360			break;
1361			}
1362
1363		case IPPROTO_FRAGMENT:
1364			/* ipv6 fragmentations are not supported yet */
1365		default:
1366			fin6->fi6_proto = proto;
1367			return (0);
1368		}
1369	} while (1);
1370	/*NOTREACHED*/
1371}
1372#endif /* INET6 */
1373
1374/*
1375 * altq common classifier
1376 */
1377int
1378acc_add_filter(classifier, filter, class, phandle)
1379	struct acc_classifier *classifier;
1380	struct flow_filter *filter;
1381	void	*class;
1382	u_long	*phandle;
1383{
1384	struct acc_filter *afp, *prev, *tmp;
1385	int	i, s;
1386
1387#ifdef INET6
1388	if (filter->ff_flow.fi_family != AF_INET &&
1389	    filter->ff_flow.fi_family != AF_INET6)
1390		return (EINVAL);
1391#else
1392	if (filter->ff_flow.fi_family != AF_INET)
1393		return (EINVAL);
1394#endif
1395
1396	afp = malloc(sizeof(struct acc_filter),
1397	       M_DEVBUF, M_WAITOK);
1398	if (afp == NULL)
1399		return (ENOMEM);
1400	bzero(afp, sizeof(struct acc_filter));
1401
1402	afp->f_filter = *filter;
1403	afp->f_class = class;
1404
1405	i = ACC_WILDCARD_INDEX;
1406	if (filter->ff_flow.fi_family == AF_INET) {
1407		struct flow_filter *filter4 = &afp->f_filter;
1408
1409		/*
1410		 * if address is 0, it's a wildcard.  if address mask
1411		 * isn't set, use full mask.
1412		 */
1413		if (filter4->ff_flow.fi_dst.s_addr == 0)
1414			filter4->ff_mask.mask_dst.s_addr = 0;
1415		else if (filter4->ff_mask.mask_dst.s_addr == 0)
1416			filter4->ff_mask.mask_dst.s_addr = 0xffffffff;
1417		if (filter4->ff_flow.fi_src.s_addr == 0)
1418			filter4->ff_mask.mask_src.s_addr = 0;
1419		else if (filter4->ff_mask.mask_src.s_addr == 0)
1420			filter4->ff_mask.mask_src.s_addr = 0xffffffff;
1421
1422		/* clear extra bits in addresses  */
1423		   filter4->ff_flow.fi_dst.s_addr &=
1424		       filter4->ff_mask.mask_dst.s_addr;
1425		   filter4->ff_flow.fi_src.s_addr &=
1426		       filter4->ff_mask.mask_src.s_addr;
1427
1428		/*
1429		 * if dst address is a wildcard, use hash-entry
1430		 * ACC_WILDCARD_INDEX.
1431		 */
1432		if (filter4->ff_mask.mask_dst.s_addr != 0xffffffff)
1433			i = ACC_WILDCARD_INDEX;
1434		else
1435			i = ACC_GET_HASH_INDEX(filter4->ff_flow.fi_dst.s_addr);
1436	}
1437#ifdef INET6
1438	else if (filter->ff_flow.fi_family == AF_INET6) {
1439		struct flow_filter6 *filter6 =
1440			(struct flow_filter6 *)&afp->f_filter;
1441#ifndef IN6MASK0 /* taken from kame ipv6 */
1442#define	IN6MASK0	{{{ 0, 0, 0, 0 }}}
1443#define	IN6MASK128	{{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}}
1444		const struct in6_addr in6mask0 = IN6MASK0;
1445		const struct in6_addr in6mask128 = IN6MASK128;
1446#endif
1447
1448		if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_dst))
1449			filter6->ff_mask6.mask6_dst = in6mask0;
1450		else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_dst))
1451			filter6->ff_mask6.mask6_dst = in6mask128;
1452		if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_src))
1453			filter6->ff_mask6.mask6_src = in6mask0;
1454		else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_src))
1455			filter6->ff_mask6.mask6_src = in6mask128;
1456
1457		/* clear extra bits in addresses  */
1458		for (i = 0; i < 16; i++)
1459			filter6->ff_flow6.fi6_dst.s6_addr[i] &=
1460			    filter6->ff_mask6.mask6_dst.s6_addr[i];
1461		for (i = 0; i < 16; i++)
1462			filter6->ff_flow6.fi6_src.s6_addr[i] &=
1463			    filter6->ff_mask6.mask6_src.s6_addr[i];
1464
1465		if (filter6->ff_flow6.fi6_flowlabel == 0)
1466			i = ACC_WILDCARD_INDEX;
1467		else
1468			i = ACC_GET_HASH_INDEX(filter6->ff_flow6.fi6_flowlabel);
1469	}
1470#endif /* INET6 */
1471
1472	afp->f_handle = get_filt_handle(classifier, i);
1473
1474	/* update filter bitmask */
1475	afp->f_fbmask = filt2fibmask(filter);
1476	classifier->acc_fbmask |= afp->f_fbmask;
1477
1478	/*
1479	 * add this filter to the filter list.
1480	 * filters are ordered from the highest rule number.
1481	 */
1482#ifdef __NetBSD__
1483	s = splnet();
1484#else
1485	s = splimp();
1486#endif
1487	prev = NULL;
1488	LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) {
1489		if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno)
1490			prev = tmp;
1491		else
1492			break;
1493	}
1494	if (prev == NULL)
1495		LIST_INSERT_HEAD(&classifier->acc_filters[i], afp, f_chain);
1496	else
1497		LIST_INSERT_AFTER(prev, afp, f_chain);
1498	splx(s);
1499
1500	*phandle = afp->f_handle;
1501	return (0);
1502}
1503
1504int
1505acc_delete_filter(classifier, handle)
1506	struct acc_classifier *classifier;
1507	u_long handle;
1508{
1509	struct acc_filter *afp;
1510	int	s;
1511
1512	if ((afp = filth_to_filtp(classifier, handle)) == NULL)
1513		return (EINVAL);
1514
1515#ifdef __NetBSD__
1516	s = splnet();
1517#else
1518	s = splimp();
1519#endif
1520	LIST_REMOVE(afp, f_chain);
1521	splx(s);
1522
1523	free(afp, M_DEVBUF);
1524
1525	/* todo: update filt_bmask */
1526
1527	return (0);
1528}
1529
1530/*
1531 * delete filters referencing to the specified class.
1532 * if the all flag is not 0, delete all the filters.
1533 */
1534int
1535acc_discard_filters(classifier, class, all)
1536	struct acc_classifier *classifier;
1537	void	*class;
1538	int	all;
1539{
1540	struct acc_filter *afp;
1541	int	i, s;
1542
1543#ifdef __NetBSD__
1544	s = splnet();
1545#else
1546	s = splimp();
1547#endif
1548	for (i = 0; i < ACC_FILTER_TABLESIZE; i++) {
1549		do {
1550			LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1551				if (all || afp->f_class == class) {
1552					LIST_REMOVE(afp, f_chain);
1553					free(afp, M_DEVBUF);
1554					/* start again from the head */
1555					break;
1556				}
1557		} while (afp != NULL);
1558	}
1559	splx(s);
1560
1561	if (all)
1562		classifier->acc_fbmask = 0;
1563
1564	return (0);
1565}
1566
1567void *
1568acc_classify(clfier, m, af)
1569	void *clfier;
1570	struct mbuf *m;
1571	int af;
1572{
1573	struct acc_classifier *classifier;
1574	struct flowinfo flow;
1575	struct acc_filter *afp;
1576	int	i;
1577
1578	classifier = (struct acc_classifier *)clfier;
1579	altq_extractflow(m, af, &flow, classifier->acc_fbmask);
1580
1581	if (flow.fi_family == AF_INET) {
1582		struct flowinfo_in *fp = (struct flowinfo_in *)&flow;
1583
1584		if ((classifier->acc_fbmask & FIMB4_ALL) == FIMB4_TOS) {
1585			/* only tos is used */
1586			LIST_FOREACH(afp,
1587				 &classifier->acc_filters[ACC_WILDCARD_INDEX],
1588				 f_chain)
1589				if (apply_tosfilter4(afp->f_fbmask,
1590						     &afp->f_filter, fp))
1591					/* filter matched */
1592					return (afp->f_class);
1593		} else if ((classifier->acc_fbmask &
1594			(~(FIMB4_PROTO|FIMB4_SPORT|FIMB4_DPORT) & FIMB4_ALL))
1595		    == 0) {
1596			/* only proto and ports are used */
1597			LIST_FOREACH(afp,
1598				 &classifier->acc_filters[ACC_WILDCARD_INDEX],
1599				 f_chain)
1600				if (apply_ppfilter4(afp->f_fbmask,
1601						    &afp->f_filter, fp))
1602					/* filter matched */
1603					return (afp->f_class);
1604		} else {
1605			/* get the filter hash entry from its dest address */
1606			i = ACC_GET_HASH_INDEX(fp->fi_dst.s_addr);
1607			do {
1608				/*
1609				 * go through this loop twice.  first for dst
1610				 * hash, second for wildcards.
1611				 */
1612				LIST_FOREACH(afp, &classifier->acc_filters[i],
1613					     f_chain)
1614					if (apply_filter4(afp->f_fbmask,
1615							  &afp->f_filter, fp))
1616						/* filter matched */
1617						return (afp->f_class);
1618
1619				/*
1620				 * check again for filters with a dst addr
1621				 * wildcard.
1622				 * (daddr == 0 || dmask != 0xffffffff).
1623				 */
1624				if (i != ACC_WILDCARD_INDEX)
1625					i = ACC_WILDCARD_INDEX;
1626				else
1627					break;
1628			} while (1);
1629		}
1630	}
1631#ifdef INET6
1632	else if (flow.fi_family == AF_INET6) {
1633		struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)&flow;
1634
1635		/* get the filter hash entry from its flow ID */
1636		if (fp6->fi6_flowlabel != 0)
1637			i = ACC_GET_HASH_INDEX(fp6->fi6_flowlabel);
1638		else
1639			/* flowlable can be zero */
1640			i = ACC_WILDCARD_INDEX;
1641
1642		/* go through this loop twice.  first for flow hash, second
1643		   for wildcards. */
1644		do {
1645			LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1646				if (apply_filter6(afp->f_fbmask,
1647					(struct flow_filter6 *)&afp->f_filter,
1648					fp6))
1649					/* filter matched */
1650					return (afp->f_class);
1651
1652			/*
1653			 * check again for filters with a wildcard.
1654			 */
1655			if (i != ACC_WILDCARD_INDEX)
1656				i = ACC_WILDCARD_INDEX;
1657			else
1658				break;
1659		} while (1);
1660	}
1661#endif /* INET6 */
1662
1663	/* no filter matched */
1664	return (NULL);
1665}
1666
1667static int
1668apply_filter4(fbmask, filt, pkt)
1669	u_int32_t	fbmask;
1670	struct flow_filter *filt;
1671	struct flowinfo_in *pkt;
1672{
1673	if (filt->ff_flow.fi_family != AF_INET)
1674		return (0);
1675	if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
1676		return (0);
1677	if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
1678		return (0);
1679	if ((fbmask & FIMB4_DADDR) &&
1680	    filt->ff_flow.fi_dst.s_addr !=
1681	    (pkt->fi_dst.s_addr & filt->ff_mask.mask_dst.s_addr))
1682		return (0);
1683	if ((fbmask & FIMB4_SADDR) &&
1684	    filt->ff_flow.fi_src.s_addr !=
1685	    (pkt->fi_src.s_addr & filt->ff_mask.mask_src.s_addr))
1686		return (0);
1687	if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
1688		return (0);
1689	if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
1690	    (pkt->fi_tos & filt->ff_mask.mask_tos))
1691		return (0);
1692	if ((fbmask & FIMB4_GPI) && filt->ff_flow.fi_gpi != (pkt->fi_gpi))
1693		return (0);
1694	/* match */
1695	return (1);
1696}
1697
1698/*
1699 * filter matching function optimized for a common case that checks
1700 * only protocol and port numbers
1701 */
1702static int
1703apply_ppfilter4(fbmask, filt, pkt)
1704	u_int32_t	fbmask;
1705	struct flow_filter *filt;
1706	struct flowinfo_in *pkt;
1707{
1708	if (filt->ff_flow.fi_family != AF_INET)
1709		return (0);
1710	if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
1711		return (0);
1712	if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
1713		return (0);
1714	if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
1715		return (0);
1716	/* match */
1717	return (1);
1718}
1719
1720/*
1721 * filter matching function only for tos field.
1722 */
1723static int
1724apply_tosfilter4(fbmask, filt, pkt)
1725	u_int32_t	fbmask;
1726	struct flow_filter *filt;
1727	struct flowinfo_in *pkt;
1728{
1729	if (filt->ff_flow.fi_family != AF_INET)
1730		return (0);
1731	if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
1732	    (pkt->fi_tos & filt->ff_mask.mask_tos))
1733		return (0);
1734	/* match */
1735	return (1);
1736}
1737
1738#ifdef INET6
1739static int
1740apply_filter6(fbmask, filt, pkt)
1741	u_int32_t	fbmask;
1742	struct flow_filter6 *filt;
1743	struct flowinfo_in6 *pkt;
1744{
1745	int i;
1746
1747	if (filt->ff_flow6.fi6_family != AF_INET6)
1748		return (0);
1749	if ((fbmask & FIMB6_FLABEL) &&
1750	    filt->ff_flow6.fi6_flowlabel != pkt->fi6_flowlabel)
1751		return (0);
1752	if ((fbmask & FIMB6_PROTO) &&
1753	    filt->ff_flow6.fi6_proto != pkt->fi6_proto)
1754		return (0);
1755	if ((fbmask & FIMB6_SPORT) &&
1756	    filt->ff_flow6.fi6_sport != pkt->fi6_sport)
1757		return (0);
1758	if ((fbmask & FIMB6_DPORT) &&
1759	    filt->ff_flow6.fi6_dport != pkt->fi6_dport)
1760		return (0);
1761	if (fbmask & FIMB6_SADDR) {
1762		for (i = 0; i < 4; i++)
1763			if (filt->ff_flow6.fi6_src.s6_addr32[i] !=
1764			    (pkt->fi6_src.s6_addr32[i] &
1765			     filt->ff_mask6.mask6_src.s6_addr32[i]))
1766				return (0);
1767	}
1768	if (fbmask & FIMB6_DADDR) {
1769		for (i = 0; i < 4; i++)
1770			if (filt->ff_flow6.fi6_dst.s6_addr32[i] !=
1771			    (pkt->fi6_dst.s6_addr32[i] &
1772			     filt->ff_mask6.mask6_dst.s6_addr32[i]))
1773				return (0);
1774	}
1775	if ((fbmask & FIMB6_TCLASS) &&
1776	    filt->ff_flow6.fi6_tclass !=
1777	    (pkt->fi6_tclass & filt->ff_mask6.mask6_tclass))
1778		return (0);
1779	if ((fbmask & FIMB6_GPI) &&
1780	    filt->ff_flow6.fi6_gpi != pkt->fi6_gpi)
1781		return (0);
1782	/* match */
1783	return (1);
1784}
1785#endif /* INET6 */
1786
1787/*
1788 *  filter handle:
1789 *	bit 20-28: index to the filter hash table
1790 *	bit  0-19: unique id in the hash bucket.
1791 */
1792static u_long
1793get_filt_handle(classifier, i)
1794	struct acc_classifier *classifier;
1795	int	i;
1796{
1797	static u_long handle_number = 1;
1798	u_long 	handle;
1799	struct acc_filter *afp;
1800
1801	while (1) {
1802		handle = handle_number++ & 0x000fffff;
1803
1804		if (LIST_EMPTY(&classifier->acc_filters[i]))
1805			break;
1806
1807		LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1808			if ((afp->f_handle & 0x000fffff) == handle)
1809				break;
1810		if (afp == NULL)
1811			break;
1812		/* this handle is already used, try again */
1813	}
1814
1815	return ((i << 20) | handle);
1816}
1817
1818/* convert filter handle to filter pointer */
1819static struct acc_filter *
1820filth_to_filtp(classifier, handle)
1821	struct acc_classifier *classifier;
1822	u_long handle;
1823{
1824	struct acc_filter *afp;
1825	int	i;
1826
1827	i = ACC_GET_HINDEX(handle);
1828
1829	LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1830		if (afp->f_handle == handle)
1831			return (afp);
1832
1833	return (NULL);
1834}
1835
1836/* create flowinfo bitmask */
1837static u_int32_t
1838filt2fibmask(filt)
1839	struct flow_filter *filt;
1840{
1841	u_int32_t mask = 0;
1842#ifdef INET6
1843	struct flow_filter6 *filt6;
1844#endif
1845
1846	switch (filt->ff_flow.fi_family) {
1847	case AF_INET:
1848		if (filt->ff_flow.fi_proto != 0)
1849			mask |= FIMB4_PROTO;
1850		if (filt->ff_flow.fi_tos != 0)
1851			mask |= FIMB4_TOS;
1852		if (filt->ff_flow.fi_dst.s_addr != 0)
1853			mask |= FIMB4_DADDR;
1854		if (filt->ff_flow.fi_src.s_addr != 0)
1855			mask |= FIMB4_SADDR;
1856		if (filt->ff_flow.fi_sport != 0)
1857			mask |= FIMB4_SPORT;
1858		if (filt->ff_flow.fi_dport != 0)
1859			mask |= FIMB4_DPORT;
1860		if (filt->ff_flow.fi_gpi != 0)
1861			mask |= FIMB4_GPI;
1862		break;
1863#ifdef INET6
1864	case AF_INET6:
1865		filt6 = (struct flow_filter6 *)filt;
1866
1867		if (filt6->ff_flow6.fi6_proto != 0)
1868			mask |= FIMB6_PROTO;
1869		if (filt6->ff_flow6.fi6_tclass != 0)
1870			mask |= FIMB6_TCLASS;
1871		if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_dst))
1872			mask |= FIMB6_DADDR;
1873		if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_src))
1874			mask |= FIMB6_SADDR;
1875		if (filt6->ff_flow6.fi6_sport != 0)
1876			mask |= FIMB6_SPORT;
1877		if (filt6->ff_flow6.fi6_dport != 0)
1878			mask |= FIMB6_DPORT;
1879		if (filt6->ff_flow6.fi6_gpi != 0)
1880			mask |= FIMB6_GPI;
1881		if (filt6->ff_flow6.fi6_flowlabel != 0)
1882			mask |= FIMB6_FLABEL;
1883		break;
1884#endif /* INET6 */
1885	}
1886	return (mask);
1887}
1888
1889
1890/*
1891 * helper functions to handle IPv4 fragments.
1892 * currently only in-sequence fragments are handled.
1893 *	- fragment info is cached in a LRU list.
1894 *	- when a first fragment is found, cache its flow info.
1895 *	- when a non-first fragment is found, lookup the cache.
1896 */
1897
1898struct ip4_frag {
1899    TAILQ_ENTRY(ip4_frag) ip4f_chain;
1900    char    ip4f_valid;
1901    u_short ip4f_id;
1902    struct flowinfo_in ip4f_info;
1903};
1904
1905static TAILQ_HEAD(ip4f_list, ip4_frag) ip4f_list; /* IPv4 fragment cache */
1906
1907#define	IP4F_TABSIZE		16	/* IPv4 fragment cache size */
1908
1909
1910static void
1911ip4f_cache(ip, fin)
1912	struct ip *ip;
1913	struct flowinfo_in *fin;
1914{
1915	struct ip4_frag *fp;
1916
1917	if (TAILQ_EMPTY(&ip4f_list)) {
1918		/* first time call, allocate fragment cache entries. */
1919		if (ip4f_init() < 0)
1920			/* allocation failed! */
1921			return;
1922	}
1923
1924	fp = ip4f_alloc();
1925	fp->ip4f_id = ip->ip_id;
1926	fp->ip4f_info.fi_proto = ip->ip_p;
1927	fp->ip4f_info.fi_src.s_addr = ip->ip_src.s_addr;
1928	fp->ip4f_info.fi_dst.s_addr = ip->ip_dst.s_addr;
1929
1930	/* save port numbers */
1931	fp->ip4f_info.fi_sport = fin->fi_sport;
1932	fp->ip4f_info.fi_dport = fin->fi_dport;
1933	fp->ip4f_info.fi_gpi   = fin->fi_gpi;
1934}
1935
1936static int
1937ip4f_lookup(ip, fin)
1938	struct ip *ip;
1939	struct flowinfo_in *fin;
1940{
1941	struct ip4_frag *fp;
1942
1943	for (fp = TAILQ_FIRST(&ip4f_list); fp != NULL && fp->ip4f_valid;
1944	     fp = TAILQ_NEXT(fp, ip4f_chain))
1945		if (ip->ip_id == fp->ip4f_id &&
1946		    ip->ip_src.s_addr == fp->ip4f_info.fi_src.s_addr &&
1947		    ip->ip_dst.s_addr == fp->ip4f_info.fi_dst.s_addr &&
1948		    ip->ip_p == fp->ip4f_info.fi_proto) {
1949
1950			/* found the matching entry */
1951			fin->fi_sport = fp->ip4f_info.fi_sport;
1952			fin->fi_dport = fp->ip4f_info.fi_dport;
1953			fin->fi_gpi   = fp->ip4f_info.fi_gpi;
1954
1955			if ((ntohs(ip->ip_off) & IP_MF) == 0)
1956				/* this is the last fragment,
1957				   release the entry. */
1958				ip4f_free(fp);
1959
1960			return (1);
1961		}
1962
1963	/* no matching entry found */
1964	return (0);
1965}
1966
1967static int
1968ip4f_init(void)
1969{
1970	struct ip4_frag *fp;
1971	int i;
1972
1973	TAILQ_INIT(&ip4f_list);
1974	for (i=0; i<IP4F_TABSIZE; i++) {
1975		fp = malloc(sizeof(struct ip4_frag),
1976		       M_DEVBUF, M_NOWAIT);
1977		if (fp == NULL) {
1978			printf("ip4f_init: can't alloc %dth entry!\n", i);
1979			if (i == 0)
1980				return (-1);
1981			return (0);
1982		}
1983		fp->ip4f_valid = 0;
1984		TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
1985	}
1986	return (0);
1987}
1988
1989static struct ip4_frag *
1990ip4f_alloc(void)
1991{
1992	struct ip4_frag *fp;
1993
1994	/* reclaim an entry at the tail, put it at the head */
1995	fp = TAILQ_LAST(&ip4f_list, ip4f_list);
1996	TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
1997	fp->ip4f_valid = 1;
1998	TAILQ_INSERT_HEAD(&ip4f_list, fp, ip4f_chain);
1999	return (fp);
2000}
2001
2002static void
2003ip4f_free(fp)
2004	struct ip4_frag *fp;
2005{
2006	TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
2007	fp->ip4f_valid = 0;
2008	TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
2009}
2010
2011#endif /* ALTQ3_CLFIER_COMPAT */
2012