altq_subr.c revision 298133
1/*	$FreeBSD: stable/10/sys/contrib/altq/altq/altq_subr.c 298133 2016-04-16 22:02:32Z loos $	*/
2/*	$KAME: altq_subr.c,v 1.21 2003/11/06 06:32:53 kjc Exp $	*/
3
4/*
5 * Copyright (C) 1997-2003
6 *	Sony Computer Science Laboratories Inc.  All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#if defined(__FreeBSD__) || defined(__NetBSD__)
31#include "opt_altq.h"
32#include "opt_inet.h"
33#ifdef __FreeBSD__
34#include "opt_inet6.h"
35#endif
36#endif /* __FreeBSD__ || __NetBSD__ */
37
38#include <sys/param.h>
39#include <sys/malloc.h>
40#include <sys/mbuf.h>
41#include <sys/systm.h>
42#include <sys/proc.h>
43#include <sys/socket.h>
44#include <sys/socketvar.h>
45#include <sys/kernel.h>
46#include <sys/errno.h>
47#include <sys/syslog.h>
48#include <sys/sysctl.h>
49#include <sys/queue.h>
50
51#include <net/if.h>
52#include <net/if_var.h>
53#include <net/if_dl.h>
54#include <net/if_types.h>
55#ifdef __FreeBSD__
56#include <net/vnet.h>
57#endif
58
59#include <netinet/in.h>
60#include <netinet/in_systm.h>
61#include <netinet/ip.h>
62#ifdef INET6
63#include <netinet/ip6.h>
64#endif
65#include <netinet/tcp.h>
66#include <netinet/udp.h>
67
68#include <netpfil/pf/pf.h>
69#include <netpfil/pf/pf_altq.h>
70#include <altq/altq.h>
71#ifdef ALTQ3_COMPAT
72#include <altq/altq_conf.h>
73#endif
74
75/* machine dependent clock related includes */
76#ifdef __FreeBSD__
77#include <sys/bus.h>
78#include <sys/cpu.h>
79#include <sys/eventhandler.h>
80#include <machine/clock.h>
81#endif
82#if defined(__amd64__) || defined(__i386__)
83#include <machine/cpufunc.h>		/* for pentium tsc */
84#include <machine/specialreg.h>		/* for CPUID_TSC */
85#ifdef __FreeBSD__
86#include <machine/md_var.h>		/* for cpu_feature */
87#elif defined(__NetBSD__) || defined(__OpenBSD__)
88#include <machine/cpu.h>		/* for cpu_feature */
89#endif
90#endif /* __amd64 || __i386__ */
91
92/*
93 * internal function prototypes
94 */
95static void	tbr_timeout(void *);
96int (*altq_input)(struct mbuf *, int) = NULL;
97static struct mbuf *tbr_dequeue(struct ifaltq *, int);
98static int tbr_timer = 0;	/* token bucket regulator timer */
99#if !defined(__FreeBSD__) || (__FreeBSD_version < 600000)
100static struct callout tbr_callout = CALLOUT_INITIALIZER;
101#else
102static struct callout tbr_callout;
103#endif
104
105#ifdef ALTQ3_CLFIER_COMPAT
106static int 	extract_ports4(struct mbuf *, struct ip *, struct flowinfo_in *);
107#ifdef INET6
108static int 	extract_ports6(struct mbuf *, struct ip6_hdr *,
109			       struct flowinfo_in6 *);
110#endif
111static int	apply_filter4(u_int32_t, struct flow_filter *,
112			      struct flowinfo_in *);
113static int	apply_ppfilter4(u_int32_t, struct flow_filter *,
114				struct flowinfo_in *);
115#ifdef INET6
116static int	apply_filter6(u_int32_t, struct flow_filter6 *,
117			      struct flowinfo_in6 *);
118#endif
119static int	apply_tosfilter4(u_int32_t, struct flow_filter *,
120				 struct flowinfo_in *);
121static u_long	get_filt_handle(struct acc_classifier *, int);
122static struct acc_filter *filth_to_filtp(struct acc_classifier *, u_long);
123static u_int32_t filt2fibmask(struct flow_filter *);
124
125static void 	ip4f_cache(struct ip *, struct flowinfo_in *);
126static int 	ip4f_lookup(struct ip *, struct flowinfo_in *);
127static int 	ip4f_init(void);
128static struct ip4_frag	*ip4f_alloc(void);
129static void 	ip4f_free(struct ip4_frag *);
130#endif /* ALTQ3_CLFIER_COMPAT */
131
132/*
133 * alternate queueing support routines
134 */
135
136/* look up the queue state by the interface name and the queueing type. */
137void *
138altq_lookup(name, type)
139	char *name;
140	int type;
141{
142	struct ifnet *ifp;
143
144	if ((ifp = ifunit(name)) != NULL) {
145		/* read if_snd unlocked */
146		if (type != ALTQT_NONE && ifp->if_snd.altq_type == type)
147			return (ifp->if_snd.altq_disc);
148	}
149
150	return NULL;
151}
152
153int
154altq_attach(ifq, type, discipline, enqueue, dequeue, request, clfier, classify)
155	struct ifaltq *ifq;
156	int type;
157	void *discipline;
158	int (*enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
159	struct mbuf *(*dequeue)(struct ifaltq *, int);
160	int (*request)(struct ifaltq *, int, void *);
161	void *clfier;
162	void *(*classify)(void *, struct mbuf *, int);
163{
164	IFQ_LOCK(ifq);
165	if (!ALTQ_IS_READY(ifq)) {
166		IFQ_UNLOCK(ifq);
167		return ENXIO;
168	}
169
170#ifdef ALTQ3_COMPAT
171	/*
172	 * pfaltq can override the existing discipline, but altq3 cannot.
173	 * check these if clfier is not NULL (which implies altq3).
174	 */
175	if (clfier != NULL) {
176		if (ALTQ_IS_ENABLED(ifq)) {
177			IFQ_UNLOCK(ifq);
178			return EBUSY;
179		}
180		if (ALTQ_IS_ATTACHED(ifq)) {
181			IFQ_UNLOCK(ifq);
182			return EEXIST;
183		}
184	}
185#endif
186	ifq->altq_type     = type;
187	ifq->altq_disc     = discipline;
188	ifq->altq_enqueue  = enqueue;
189	ifq->altq_dequeue  = dequeue;
190	ifq->altq_request  = request;
191	ifq->altq_clfier   = clfier;
192	ifq->altq_classify = classify;
193	ifq->altq_flags &= (ALTQF_CANTCHANGE|ALTQF_ENABLED);
194#ifdef ALTQ3_COMPAT
195#ifdef ALTQ_KLD
196	altq_module_incref(type);
197#endif
198#endif
199	IFQ_UNLOCK(ifq);
200	return 0;
201}
202
203int
204altq_detach(ifq)
205	struct ifaltq *ifq;
206{
207	IFQ_LOCK(ifq);
208
209	if (!ALTQ_IS_READY(ifq)) {
210		IFQ_UNLOCK(ifq);
211		return ENXIO;
212	}
213	if (ALTQ_IS_ENABLED(ifq)) {
214		IFQ_UNLOCK(ifq);
215		return EBUSY;
216	}
217	if (!ALTQ_IS_ATTACHED(ifq)) {
218		IFQ_UNLOCK(ifq);
219		return (0);
220	}
221#ifdef ALTQ3_COMPAT
222#ifdef ALTQ_KLD
223	altq_module_declref(ifq->altq_type);
224#endif
225#endif
226
227	ifq->altq_type     = ALTQT_NONE;
228	ifq->altq_disc     = NULL;
229	ifq->altq_enqueue  = NULL;
230	ifq->altq_dequeue  = NULL;
231	ifq->altq_request  = NULL;
232	ifq->altq_clfier   = NULL;
233	ifq->altq_classify = NULL;
234	ifq->altq_flags &= ALTQF_CANTCHANGE;
235
236	IFQ_UNLOCK(ifq);
237	return 0;
238}
239
240int
241altq_enable(ifq)
242	struct ifaltq *ifq;
243{
244	int s;
245
246	IFQ_LOCK(ifq);
247
248	if (!ALTQ_IS_READY(ifq)) {
249		IFQ_UNLOCK(ifq);
250		return ENXIO;
251	}
252	if (ALTQ_IS_ENABLED(ifq)) {
253		IFQ_UNLOCK(ifq);
254		return 0;
255	}
256
257#ifdef __NetBSD__
258	s = splnet();
259#else
260	s = splimp();
261#endif
262	IFQ_PURGE_NOLOCK(ifq);
263	ASSERT(ifq->ifq_len == 0);
264	ifq->ifq_drv_maxlen = 0;		/* disable bulk dequeue */
265	ifq->altq_flags |= ALTQF_ENABLED;
266	if (ifq->altq_clfier != NULL)
267		ifq->altq_flags |= ALTQF_CLASSIFY;
268	splx(s);
269
270	IFQ_UNLOCK(ifq);
271	return 0;
272}
273
274int
275altq_disable(ifq)
276	struct ifaltq *ifq;
277{
278	int s;
279
280	IFQ_LOCK(ifq);
281	if (!ALTQ_IS_ENABLED(ifq)) {
282		IFQ_UNLOCK(ifq);
283		return 0;
284	}
285
286#ifdef __NetBSD__
287	s = splnet();
288#else
289	s = splimp();
290#endif
291	IFQ_PURGE_NOLOCK(ifq);
292	ASSERT(ifq->ifq_len == 0);
293	ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY);
294	splx(s);
295
296	IFQ_UNLOCK(ifq);
297	return 0;
298}
299
300#ifdef ALTQ_DEBUG
301void
302altq_assert(file, line, failedexpr)
303	const char *file, *failedexpr;
304	int line;
305{
306	(void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n",
307		     failedexpr, file, line);
308	panic("altq assertion");
309	/* NOTREACHED */
310}
311#endif
312
313/*
314 * internal representation of token bucket parameters
315 *	rate:	byte_per_unittime << 32
316 *		(((bits_per_sec) / 8) << 32) / machclk_freq
317 *	depth:	byte << 32
318 *
319 */
320#define	TBR_SHIFT	32
321#define	TBR_SCALE(x)	((int64_t)(x) << TBR_SHIFT)
322#define	TBR_UNSCALE(x)	((x) >> TBR_SHIFT)
323
324static struct mbuf *
325tbr_dequeue(ifq, op)
326	struct ifaltq *ifq;
327	int op;
328{
329	struct tb_regulator *tbr;
330	struct mbuf *m;
331	int64_t interval;
332	u_int64_t now;
333
334	IFQ_LOCK_ASSERT(ifq);
335	tbr = ifq->altq_tbr;
336	if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) {
337		/* if this is a remove after poll, bypass tbr check */
338	} else {
339		/* update token only when it is negative */
340		if (tbr->tbr_token <= 0) {
341			now = read_machclk();
342			interval = now - tbr->tbr_last;
343			if (interval >= tbr->tbr_filluptime)
344				tbr->tbr_token = tbr->tbr_depth;
345			else {
346				tbr->tbr_token += interval * tbr->tbr_rate;
347				if (tbr->tbr_token > tbr->tbr_depth)
348					tbr->tbr_token = tbr->tbr_depth;
349			}
350			tbr->tbr_last = now;
351		}
352		/* if token is still negative, don't allow dequeue */
353		if (tbr->tbr_token <= 0)
354			return (NULL);
355	}
356
357	if (ALTQ_IS_ENABLED(ifq))
358		m = (*ifq->altq_dequeue)(ifq, op);
359	else {
360		if (op == ALTDQ_POLL)
361			_IF_POLL(ifq, m);
362		else
363			_IF_DEQUEUE(ifq, m);
364	}
365
366	if (m != NULL && op == ALTDQ_REMOVE)
367		tbr->tbr_token -= TBR_SCALE(m_pktlen(m));
368	tbr->tbr_lastop = op;
369	return (m);
370}
371
372/*
373 * set a token bucket regulator.
374 * if the specified rate is zero, the token bucket regulator is deleted.
375 */
376int
377tbr_set(ifq, profile)
378	struct ifaltq *ifq;
379	struct tb_profile *profile;
380{
381	struct tb_regulator *tbr, *otbr;
382
383	if (tbr_dequeue_ptr == NULL)
384		tbr_dequeue_ptr = tbr_dequeue;
385
386	if (machclk_freq == 0)
387		init_machclk();
388	if (machclk_freq == 0) {
389		printf("tbr_set: no cpu clock available!\n");
390		return (ENXIO);
391	}
392
393	IFQ_LOCK(ifq);
394	if (profile->rate == 0) {
395		/* delete this tbr */
396		if ((tbr = ifq->altq_tbr) == NULL) {
397			IFQ_UNLOCK(ifq);
398			return (ENOENT);
399		}
400		ifq->altq_tbr = NULL;
401		free(tbr, M_DEVBUF);
402		IFQ_UNLOCK(ifq);
403		return (0);
404	}
405
406	tbr = malloc(sizeof(struct tb_regulator), M_DEVBUF, M_NOWAIT | M_ZERO);
407	if (tbr == NULL) {
408		IFQ_UNLOCK(ifq);
409		return (ENOMEM);
410	}
411
412	tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq;
413	tbr->tbr_depth = TBR_SCALE(profile->depth);
414	if (tbr->tbr_rate > 0)
415		tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
416	else
417		tbr->tbr_filluptime = 0xffffffffffffffffLL;
418	tbr->tbr_token = tbr->tbr_depth;
419	tbr->tbr_last = read_machclk();
420	tbr->tbr_lastop = ALTDQ_REMOVE;
421
422	otbr = ifq->altq_tbr;
423	ifq->altq_tbr = tbr;	/* set the new tbr */
424
425	if (otbr != NULL)
426		free(otbr, M_DEVBUF);
427	else {
428		if (tbr_timer == 0) {
429			CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
430			tbr_timer = 1;
431		}
432	}
433	IFQ_UNLOCK(ifq);
434	return (0);
435}
436
437/*
438 * tbr_timeout goes through the interface list, and kicks the drivers
439 * if necessary.
440 *
441 * MPSAFE
442 */
443static void
444tbr_timeout(arg)
445	void *arg;
446{
447#ifdef __FreeBSD__
448	VNET_ITERATOR_DECL(vnet_iter);
449#endif
450	struct ifnet *ifp;
451	int active, s;
452
453	active = 0;
454#ifdef __NetBSD__
455	s = splnet();
456#else
457	s = splimp();
458#endif
459#ifdef __FreeBSD__
460	IFNET_RLOCK_NOSLEEP();
461	VNET_LIST_RLOCK_NOSLEEP();
462	VNET_FOREACH(vnet_iter) {
463		CURVNET_SET(vnet_iter);
464#endif
465		for (ifp = TAILQ_FIRST(&V_ifnet); ifp;
466		    ifp = TAILQ_NEXT(ifp, if_list)) {
467			/* read from if_snd unlocked */
468			if (!TBR_IS_ENABLED(&ifp->if_snd))
469				continue;
470			active++;
471			if (!IFQ_IS_EMPTY(&ifp->if_snd) &&
472			    ifp->if_start != NULL)
473				(*ifp->if_start)(ifp);
474		}
475#ifdef __FreeBSD__
476		CURVNET_RESTORE();
477	}
478	VNET_LIST_RUNLOCK_NOSLEEP();
479	IFNET_RUNLOCK_NOSLEEP();
480#endif
481	splx(s);
482	if (active > 0)
483		CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
484	else
485		tbr_timer = 0;	/* don't need tbr_timer anymore */
486}
487
488/*
489 * get token bucket regulator profile
490 */
491int
492tbr_get(ifq, profile)
493	struct ifaltq *ifq;
494	struct tb_profile *profile;
495{
496	struct tb_regulator *tbr;
497
498	IFQ_LOCK(ifq);
499	if ((tbr = ifq->altq_tbr) == NULL) {
500		profile->rate = 0;
501		profile->depth = 0;
502	} else {
503		profile->rate =
504		    (u_int)TBR_UNSCALE(tbr->tbr_rate * 8 * machclk_freq);
505		profile->depth = (u_int)TBR_UNSCALE(tbr->tbr_depth);
506	}
507	IFQ_UNLOCK(ifq);
508	return (0);
509}
510
511/*
512 * attach a discipline to the interface.  if one already exists, it is
513 * overridden.
514 * Locking is done in the discipline specific attach functions. Basically
515 * they call back to altq_attach which takes care of the attach and locking.
516 */
517int
518altq_pfattach(struct pf_altq *a)
519{
520	int error = 0;
521
522	switch (a->scheduler) {
523	case ALTQT_NONE:
524		break;
525#ifdef ALTQ_CBQ
526	case ALTQT_CBQ:
527		error = cbq_pfattach(a);
528		break;
529#endif
530#ifdef ALTQ_PRIQ
531	case ALTQT_PRIQ:
532		error = priq_pfattach(a);
533		break;
534#endif
535#ifdef ALTQ_HFSC
536	case ALTQT_HFSC:
537		error = hfsc_pfattach(a);
538		break;
539#endif
540#ifdef ALTQ_FAIRQ
541	case ALTQT_FAIRQ:
542		error = fairq_pfattach(a);
543		break;
544#endif
545#ifdef ALTQ_CODEL
546	case ALTQT_CODEL:
547		error = codel_pfattach(a);
548		break;
549#endif
550	default:
551		error = ENXIO;
552	}
553
554	return (error);
555}
556
557/*
558 * detach a discipline from the interface.
559 * it is possible that the discipline was already overridden by another
560 * discipline.
561 */
562int
563altq_pfdetach(struct pf_altq *a)
564{
565	struct ifnet *ifp;
566	int s, error = 0;
567
568	if ((ifp = ifunit(a->ifname)) == NULL)
569		return (EINVAL);
570
571	/* if this discipline is no longer referenced, just return */
572	/* read unlocked from if_snd */
573	if (a->altq_disc == NULL || a->altq_disc != ifp->if_snd.altq_disc)
574		return (0);
575
576#ifdef __NetBSD__
577	s = splnet();
578#else
579	s = splimp();
580#endif
581	/* read unlocked from if_snd, _disable and _detach take care */
582	if (ALTQ_IS_ENABLED(&ifp->if_snd))
583		error = altq_disable(&ifp->if_snd);
584	if (error == 0)
585		error = altq_detach(&ifp->if_snd);
586	splx(s);
587
588	return (error);
589}
590
591/*
592 * add a discipline or a queue
593 * Locking is done in the discipline specific functions with regards to
594 * malloc with WAITOK, also it is not yet clear which lock to use.
595 */
596int
597altq_add(struct pf_altq *a)
598{
599	int error = 0;
600
601	if (a->qname[0] != 0)
602		return (altq_add_queue(a));
603
604	if (machclk_freq == 0)
605		init_machclk();
606	if (machclk_freq == 0)
607		panic("altq_add: no cpu clock");
608
609	switch (a->scheduler) {
610#ifdef ALTQ_CBQ
611	case ALTQT_CBQ:
612		error = cbq_add_altq(a);
613		break;
614#endif
615#ifdef ALTQ_PRIQ
616	case ALTQT_PRIQ:
617		error = priq_add_altq(a);
618		break;
619#endif
620#ifdef ALTQ_HFSC
621	case ALTQT_HFSC:
622		error = hfsc_add_altq(a);
623		break;
624#endif
625#ifdef ALTQ_FAIRQ
626        case ALTQT_FAIRQ:
627                error = fairq_add_altq(a);
628                break;
629#endif
630#ifdef ALTQ_CODEL
631	case ALTQT_CODEL:
632		error = codel_add_altq(a);
633		break;
634#endif
635	default:
636		error = ENXIO;
637	}
638
639	return (error);
640}
641
642/*
643 * remove a discipline or a queue
644 * It is yet unclear what lock to use to protect this operation, the
645 * discipline specific functions will determine and grab it
646 */
647int
648altq_remove(struct pf_altq *a)
649{
650	int error = 0;
651
652	if (a->qname[0] != 0)
653		return (altq_remove_queue(a));
654
655	switch (a->scheduler) {
656#ifdef ALTQ_CBQ
657	case ALTQT_CBQ:
658		error = cbq_remove_altq(a);
659		break;
660#endif
661#ifdef ALTQ_PRIQ
662	case ALTQT_PRIQ:
663		error = priq_remove_altq(a);
664		break;
665#endif
666#ifdef ALTQ_HFSC
667	case ALTQT_HFSC:
668		error = hfsc_remove_altq(a);
669		break;
670#endif
671#ifdef ALTQ_FAIRQ
672        case ALTQT_FAIRQ:
673                error = fairq_remove_altq(a);
674                break;
675#endif
676#ifdef ALTQ_CODEL
677	case ALTQT_CODEL:
678		error = codel_remove_altq(a);
679		break;
680#endif
681	default:
682		error = ENXIO;
683	}
684
685	return (error);
686}
687
688/*
689 * add a queue to the discipline
690 * It is yet unclear what lock to use to protect this operation, the
691 * discipline specific functions will determine and grab it
692 */
693int
694altq_add_queue(struct pf_altq *a)
695{
696	int error = 0;
697
698	switch (a->scheduler) {
699#ifdef ALTQ_CBQ
700	case ALTQT_CBQ:
701		error = cbq_add_queue(a);
702		break;
703#endif
704#ifdef ALTQ_PRIQ
705	case ALTQT_PRIQ:
706		error = priq_add_queue(a);
707		break;
708#endif
709#ifdef ALTQ_HFSC
710	case ALTQT_HFSC:
711		error = hfsc_add_queue(a);
712		break;
713#endif
714#ifdef ALTQ_FAIRQ
715        case ALTQT_FAIRQ:
716                error = fairq_add_queue(a);
717                break;
718#endif
719	default:
720		error = ENXIO;
721	}
722
723	return (error);
724}
725
726/*
727 * remove a queue from the discipline
728 * It is yet unclear what lock to use to protect this operation, the
729 * discipline specific functions will determine and grab it
730 */
731int
732altq_remove_queue(struct pf_altq *a)
733{
734	int error = 0;
735
736	switch (a->scheduler) {
737#ifdef ALTQ_CBQ
738	case ALTQT_CBQ:
739		error = cbq_remove_queue(a);
740		break;
741#endif
742#ifdef ALTQ_PRIQ
743	case ALTQT_PRIQ:
744		error = priq_remove_queue(a);
745		break;
746#endif
747#ifdef ALTQ_HFSC
748	case ALTQT_HFSC:
749		error = hfsc_remove_queue(a);
750		break;
751#endif
752#ifdef ALTQ_FAIRQ
753        case ALTQT_FAIRQ:
754                error = fairq_remove_queue(a);
755                break;
756#endif
757	default:
758		error = ENXIO;
759	}
760
761	return (error);
762}
763
764/*
765 * get queue statistics
766 * Locking is done in the discipline specific functions with regards to
767 * copyout operations, also it is not yet clear which lock to use.
768 */
769int
770altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
771{
772	int error = 0;
773
774	switch (a->scheduler) {
775#ifdef ALTQ_CBQ
776	case ALTQT_CBQ:
777		error = cbq_getqstats(a, ubuf, nbytes);
778		break;
779#endif
780#ifdef ALTQ_PRIQ
781	case ALTQT_PRIQ:
782		error = priq_getqstats(a, ubuf, nbytes);
783		break;
784#endif
785#ifdef ALTQ_HFSC
786	case ALTQT_HFSC:
787		error = hfsc_getqstats(a, ubuf, nbytes);
788		break;
789#endif
790#ifdef ALTQ_FAIRQ
791        case ALTQT_FAIRQ:
792                error = fairq_getqstats(a, ubuf, nbytes);
793                break;
794#endif
795#ifdef ALTQ_CODEL
796	case ALTQT_CODEL:
797		error = codel_getqstats(a, ubuf, nbytes);
798		break;
799#endif
800	default:
801		error = ENXIO;
802	}
803
804	return (error);
805}
806
807/*
808 * read and write diffserv field in IPv4 or IPv6 header
809 */
810u_int8_t
811read_dsfield(m, pktattr)
812	struct mbuf *m;
813	struct altq_pktattr *pktattr;
814{
815	struct mbuf *m0;
816	u_int8_t ds_field = 0;
817
818	if (pktattr == NULL ||
819	    (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
820		return ((u_int8_t)0);
821
822	/* verify that pattr_hdr is within the mbuf data */
823	for (m0 = m; m0 != NULL; m0 = m0->m_next)
824		if ((pktattr->pattr_hdr >= m0->m_data) &&
825		    (pktattr->pattr_hdr < m0->m_data + m0->m_len))
826			break;
827	if (m0 == NULL) {
828		/* ick, pattr_hdr is stale */
829		pktattr->pattr_af = AF_UNSPEC;
830#ifdef ALTQ_DEBUG
831		printf("read_dsfield: can't locate header!\n");
832#endif
833		return ((u_int8_t)0);
834	}
835
836	if (pktattr->pattr_af == AF_INET) {
837		struct ip *ip = (struct ip *)pktattr->pattr_hdr;
838
839		if (ip->ip_v != 4)
840			return ((u_int8_t)0);	/* version mismatch! */
841		ds_field = ip->ip_tos;
842	}
843#ifdef INET6
844	else if (pktattr->pattr_af == AF_INET6) {
845		struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
846		u_int32_t flowlabel;
847
848		flowlabel = ntohl(ip6->ip6_flow);
849		if ((flowlabel >> 28) != 6)
850			return ((u_int8_t)0);	/* version mismatch! */
851		ds_field = (flowlabel >> 20) & 0xff;
852	}
853#endif
854	return (ds_field);
855}
856
857void
858write_dsfield(struct mbuf *m, struct altq_pktattr *pktattr, u_int8_t dsfield)
859{
860	struct mbuf *m0;
861
862	if (pktattr == NULL ||
863	    (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
864		return;
865
866	/* verify that pattr_hdr is within the mbuf data */
867	for (m0 = m; m0 != NULL; m0 = m0->m_next)
868		if ((pktattr->pattr_hdr >= m0->m_data) &&
869		    (pktattr->pattr_hdr < m0->m_data + m0->m_len))
870			break;
871	if (m0 == NULL) {
872		/* ick, pattr_hdr is stale */
873		pktattr->pattr_af = AF_UNSPEC;
874#ifdef ALTQ_DEBUG
875		printf("write_dsfield: can't locate header!\n");
876#endif
877		return;
878	}
879
880	if (pktattr->pattr_af == AF_INET) {
881		struct ip *ip = (struct ip *)pktattr->pattr_hdr;
882		u_int8_t old;
883		int32_t sum;
884
885		if (ip->ip_v != 4)
886			return;		/* version mismatch! */
887		old = ip->ip_tos;
888		dsfield |= old & 3;	/* leave CU bits */
889		if (old == dsfield)
890			return;
891		ip->ip_tos = dsfield;
892		/*
893		 * update checksum (from RFC1624)
894		 *	   HC' = ~(~HC + ~m + m')
895		 */
896		sum = ~ntohs(ip->ip_sum) & 0xffff;
897		sum += 0xff00 + (~old & 0xff) + dsfield;
898		sum = (sum >> 16) + (sum & 0xffff);
899		sum += (sum >> 16);  /* add carry */
900
901		ip->ip_sum = htons(~sum & 0xffff);
902	}
903#ifdef INET6
904	else if (pktattr->pattr_af == AF_INET6) {
905		struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
906		u_int32_t flowlabel;
907
908		flowlabel = ntohl(ip6->ip6_flow);
909		if ((flowlabel >> 28) != 6)
910			return;		/* version mismatch! */
911		flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20);
912		ip6->ip6_flow = htonl(flowlabel);
913	}
914#endif
915	return;
916}
917
918
919/*
920 * high resolution clock support taking advantage of a machine dependent
921 * high resolution time counter (e.g., timestamp counter of intel pentium).
922 * we assume
923 *  - 64-bit-long monotonically-increasing counter
924 *  - frequency range is 100M-4GHz (CPU speed)
925 */
926/* if pcc is not available or disabled, emulate 256MHz using microtime() */
927#define	MACHCLK_SHIFT	8
928
929int machclk_usepcc;
930u_int32_t machclk_freq;
931u_int32_t machclk_per_tick;
932
933#if defined(__i386__) && defined(__NetBSD__)
934extern u_int64_t cpu_tsc_freq;
935#endif
936
937#if (__FreeBSD_version >= 700035)
938/* Update TSC freq with the value indicated by the caller. */
939static void
940tsc_freq_changed(void *arg, const struct cf_level *level, int status)
941{
942	/* If there was an error during the transition, don't do anything. */
943	if (status != 0)
944		return;
945
946#if (__FreeBSD_version >= 701102) && (defined(__amd64__) || defined(__i386__))
947	/* If TSC is P-state invariant, don't do anything. */
948	if (tsc_is_invariant)
949		return;
950#endif
951
952	/* Total setting for this level gives the new frequency in MHz. */
953	init_machclk();
954}
955EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL,
956    EVENTHANDLER_PRI_LAST);
957#endif /* __FreeBSD_version >= 700035 */
958
959static void
960init_machclk_setup(void)
961{
962#if (__FreeBSD_version >= 600000)
963	callout_init(&tbr_callout, 0);
964#endif
965
966	machclk_usepcc = 1;
967
968#if (!defined(__amd64__) && !defined(__i386__)) || defined(ALTQ_NOPCC)
969	machclk_usepcc = 0;
970#endif
971#if defined(__FreeBSD__) && defined(SMP)
972	machclk_usepcc = 0;
973#endif
974#if defined(__NetBSD__) && defined(MULTIPROCESSOR)
975	machclk_usepcc = 0;
976#endif
977#if defined(__amd64__) || defined(__i386__)
978	/* check if TSC is available */
979#ifdef __FreeBSD__
980	if ((cpu_feature & CPUID_TSC) == 0 ||
981	    atomic_load_acq_64(&tsc_freq) == 0)
982#else
983	if ((cpu_feature & CPUID_TSC) == 0)
984#endif
985		machclk_usepcc = 0;
986#endif
987}
988
989void
990init_machclk(void)
991{
992	static int called;
993
994	/* Call one-time initialization function. */
995	if (!called) {
996		init_machclk_setup();
997		called = 1;
998	}
999
1000	if (machclk_usepcc == 0) {
1001		/* emulate 256MHz using microtime() */
1002		machclk_freq = 1000000 << MACHCLK_SHIFT;
1003		machclk_per_tick = machclk_freq / hz;
1004#ifdef ALTQ_DEBUG
1005		printf("altq: emulate %uHz cpu clock\n", machclk_freq);
1006#endif
1007		return;
1008	}
1009
1010	/*
1011	 * if the clock frequency (of Pentium TSC or Alpha PCC) is
1012	 * accessible, just use it.
1013	 */
1014#if defined(__amd64__) || defined(__i386__)
1015#ifdef __FreeBSD__
1016	machclk_freq = atomic_load_acq_64(&tsc_freq);
1017#elif defined(__NetBSD__)
1018	machclk_freq = (u_int32_t)cpu_tsc_freq;
1019#elif defined(__OpenBSD__) && (defined(I586_CPU) || defined(I686_CPU))
1020	machclk_freq = pentium_mhz * 1000000;
1021#endif
1022#endif
1023
1024	/*
1025	 * if we don't know the clock frequency, measure it.
1026	 */
1027	if (machclk_freq == 0) {
1028		static int	wait;
1029		struct timeval	tv_start, tv_end;
1030		u_int64_t	start, end, diff;
1031		int		timo;
1032
1033		microtime(&tv_start);
1034		start = read_machclk();
1035		timo = hz;	/* 1 sec */
1036		(void)tsleep(&wait, PWAIT | PCATCH, "init_machclk", timo);
1037		microtime(&tv_end);
1038		end = read_machclk();
1039		diff = (u_int64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000
1040		    + tv_end.tv_usec - tv_start.tv_usec;
1041		if (diff != 0)
1042			machclk_freq = (u_int)((end - start) * 1000000 / diff);
1043	}
1044
1045	machclk_per_tick = machclk_freq / hz;
1046
1047#ifdef ALTQ_DEBUG
1048	printf("altq: CPU clock: %uHz\n", machclk_freq);
1049#endif
1050}
1051
1052#if defined(__OpenBSD__) && defined(__i386__)
1053static __inline u_int64_t
1054rdtsc(void)
1055{
1056	u_int64_t rv;
1057	__asm __volatile(".byte 0x0f, 0x31" : "=A" (rv));
1058	return (rv);
1059}
1060#endif /* __OpenBSD__ && __i386__ */
1061
1062u_int64_t
1063read_machclk(void)
1064{
1065	u_int64_t val;
1066
1067	if (machclk_usepcc) {
1068#if defined(__amd64__) || defined(__i386__)
1069		val = rdtsc();
1070#else
1071		panic("read_machclk");
1072#endif
1073	} else {
1074		struct timeval tv;
1075
1076		microtime(&tv);
1077		val = (((u_int64_t)(tv.tv_sec - boottime.tv_sec) * 1000000
1078		    + tv.tv_usec) << MACHCLK_SHIFT);
1079	}
1080	return (val);
1081}
1082
1083#ifdef ALTQ3_CLFIER_COMPAT
1084
1085#ifndef IPPROTO_ESP
1086#define	IPPROTO_ESP	50		/* encapsulating security payload */
1087#endif
1088#ifndef IPPROTO_AH
1089#define	IPPROTO_AH	51		/* authentication header */
1090#endif
1091
1092/*
1093 * extract flow information from a given packet.
1094 * filt_mask shows flowinfo fields required.
1095 * we assume the ip header is in one mbuf, and addresses and ports are
1096 * in network byte order.
1097 */
1098int
1099altq_extractflow(m, af, flow, filt_bmask)
1100	struct mbuf *m;
1101	int af;
1102	struct flowinfo *flow;
1103	u_int32_t	filt_bmask;
1104{
1105
1106	switch (af) {
1107	case PF_INET: {
1108		struct flowinfo_in *fin;
1109		struct ip *ip;
1110
1111		ip = mtod(m, struct ip *);
1112
1113		if (ip->ip_v != 4)
1114			break;
1115
1116		fin = (struct flowinfo_in *)flow;
1117		fin->fi_len = sizeof(struct flowinfo_in);
1118		fin->fi_family = AF_INET;
1119
1120		fin->fi_proto = ip->ip_p;
1121		fin->fi_tos = ip->ip_tos;
1122
1123		fin->fi_src.s_addr = ip->ip_src.s_addr;
1124		fin->fi_dst.s_addr = ip->ip_dst.s_addr;
1125
1126		if (filt_bmask & FIMB4_PORTS)
1127			/* if port info is required, extract port numbers */
1128			extract_ports4(m, ip, fin);
1129		else {
1130			fin->fi_sport = 0;
1131			fin->fi_dport = 0;
1132			fin->fi_gpi = 0;
1133		}
1134		return (1);
1135	}
1136
1137#ifdef INET6
1138	case PF_INET6: {
1139		struct flowinfo_in6 *fin6;
1140		struct ip6_hdr *ip6;
1141
1142		ip6 = mtod(m, struct ip6_hdr *);
1143		/* should we check the ip version? */
1144
1145		fin6 = (struct flowinfo_in6 *)flow;
1146		fin6->fi6_len = sizeof(struct flowinfo_in6);
1147		fin6->fi6_family = AF_INET6;
1148
1149		fin6->fi6_proto = ip6->ip6_nxt;
1150		fin6->fi6_tclass   = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
1151
1152		fin6->fi6_flowlabel = ip6->ip6_flow & htonl(0x000fffff);
1153		fin6->fi6_src = ip6->ip6_src;
1154		fin6->fi6_dst = ip6->ip6_dst;
1155
1156		if ((filt_bmask & FIMB6_PORTS) ||
1157		    ((filt_bmask & FIMB6_PROTO)
1158		     && ip6->ip6_nxt > IPPROTO_IPV6))
1159			/*
1160			 * if port info is required, or proto is required
1161			 * but there are option headers, extract port
1162			 * and protocol numbers.
1163			 */
1164			extract_ports6(m, ip6, fin6);
1165		else {
1166			fin6->fi6_sport = 0;
1167			fin6->fi6_dport = 0;
1168			fin6->fi6_gpi = 0;
1169		}
1170		return (1);
1171	}
1172#endif /* INET6 */
1173
1174	default:
1175		break;
1176	}
1177
1178	/* failed */
1179	flow->fi_len = sizeof(struct flowinfo);
1180	flow->fi_family = AF_UNSPEC;
1181	return (0);
1182}
1183
1184/*
1185 * helper routine to extract port numbers
1186 */
1187/* structure for ipsec and ipv6 option header template */
1188struct _opt6 {
1189	u_int8_t	opt6_nxt;	/* next header */
1190	u_int8_t	opt6_hlen;	/* header extension length */
1191	u_int16_t	_pad;
1192	u_int32_t	ah_spi;		/* security parameter index
1193					   for authentication header */
1194};
1195
1196/*
1197 * extract port numbers from a ipv4 packet.
1198 */
1199static int
1200extract_ports4(m, ip, fin)
1201	struct mbuf *m;
1202	struct ip *ip;
1203	struct flowinfo_in *fin;
1204{
1205	struct mbuf *m0;
1206	u_short ip_off;
1207	u_int8_t proto;
1208	int 	off;
1209
1210	fin->fi_sport = 0;
1211	fin->fi_dport = 0;
1212	fin->fi_gpi = 0;
1213
1214	ip_off = ntohs(ip->ip_off);
1215	/* if it is a fragment, try cached fragment info */
1216	if (ip_off & IP_OFFMASK) {
1217		ip4f_lookup(ip, fin);
1218		return (1);
1219	}
1220
1221	/* locate the mbuf containing the protocol header */
1222	for (m0 = m; m0 != NULL; m0 = m0->m_next)
1223		if (((caddr_t)ip >= m0->m_data) &&
1224		    ((caddr_t)ip < m0->m_data + m0->m_len))
1225			break;
1226	if (m0 == NULL) {
1227#ifdef ALTQ_DEBUG
1228		printf("extract_ports4: can't locate header! ip=%p\n", ip);
1229#endif
1230		return (0);
1231	}
1232	off = ((caddr_t)ip - m0->m_data) + (ip->ip_hl << 2);
1233	proto = ip->ip_p;
1234
1235#ifdef ALTQ_IPSEC
1236 again:
1237#endif
1238	while (off >= m0->m_len) {
1239		off -= m0->m_len;
1240		m0 = m0->m_next;
1241		if (m0 == NULL)
1242			return (0);  /* bogus ip_hl! */
1243	}
1244	if (m0->m_len < off + 4)
1245		return (0);
1246
1247	switch (proto) {
1248	case IPPROTO_TCP:
1249	case IPPROTO_UDP: {
1250		struct udphdr *udp;
1251
1252		udp = (struct udphdr *)(mtod(m0, caddr_t) + off);
1253		fin->fi_sport = udp->uh_sport;
1254		fin->fi_dport = udp->uh_dport;
1255		fin->fi_proto = proto;
1256		}
1257		break;
1258
1259#ifdef ALTQ_IPSEC
1260	case IPPROTO_ESP:
1261		if (fin->fi_gpi == 0){
1262			u_int32_t *gpi;
1263
1264			gpi = (u_int32_t *)(mtod(m0, caddr_t) + off);
1265			fin->fi_gpi   = *gpi;
1266		}
1267		fin->fi_proto = proto;
1268		break;
1269
1270	case IPPROTO_AH: {
1271			/* get next header and header length */
1272			struct _opt6 *opt6;
1273
1274			opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
1275			proto = opt6->opt6_nxt;
1276			off += 8 + (opt6->opt6_hlen * 4);
1277			if (fin->fi_gpi == 0 && m0->m_len >= off + 8)
1278				fin->fi_gpi = opt6->ah_spi;
1279		}
1280		/* goto the next header */
1281		goto again;
1282#endif  /* ALTQ_IPSEC */
1283
1284	default:
1285		fin->fi_proto = proto;
1286		return (0);
1287	}
1288
1289	/* if this is a first fragment, cache it. */
1290	if (ip_off & IP_MF)
1291		ip4f_cache(ip, fin);
1292
1293	return (1);
1294}
1295
1296#ifdef INET6
1297static int
1298extract_ports6(m, ip6, fin6)
1299	struct mbuf *m;
1300	struct ip6_hdr *ip6;
1301	struct flowinfo_in6 *fin6;
1302{
1303	struct mbuf *m0;
1304	int	off;
1305	u_int8_t proto;
1306
1307	fin6->fi6_gpi   = 0;
1308	fin6->fi6_sport = 0;
1309	fin6->fi6_dport = 0;
1310
1311	/* locate the mbuf containing the protocol header */
1312	for (m0 = m; m0 != NULL; m0 = m0->m_next)
1313		if (((caddr_t)ip6 >= m0->m_data) &&
1314		    ((caddr_t)ip6 < m0->m_data + m0->m_len))
1315			break;
1316	if (m0 == NULL) {
1317#ifdef ALTQ_DEBUG
1318		printf("extract_ports6: can't locate header! ip6=%p\n", ip6);
1319#endif
1320		return (0);
1321	}
1322	off = ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr);
1323
1324	proto = ip6->ip6_nxt;
1325	do {
1326		while (off >= m0->m_len) {
1327			off -= m0->m_len;
1328			m0 = m0->m_next;
1329			if (m0 == NULL)
1330				return (0);
1331		}
1332		if (m0->m_len < off + 4)
1333			return (0);
1334
1335		switch (proto) {
1336		case IPPROTO_TCP:
1337		case IPPROTO_UDP: {
1338			struct udphdr *udp;
1339
1340			udp = (struct udphdr *)(mtod(m0, caddr_t) + off);
1341			fin6->fi6_sport = udp->uh_sport;
1342			fin6->fi6_dport = udp->uh_dport;
1343			fin6->fi6_proto = proto;
1344			}
1345			return (1);
1346
1347		case IPPROTO_ESP:
1348			if (fin6->fi6_gpi == 0) {
1349				u_int32_t *gpi;
1350
1351				gpi = (u_int32_t *)(mtod(m0, caddr_t) + off);
1352				fin6->fi6_gpi   = *gpi;
1353			}
1354			fin6->fi6_proto = proto;
1355			return (1);
1356
1357		case IPPROTO_AH: {
1358			/* get next header and header length */
1359			struct _opt6 *opt6;
1360
1361			opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
1362			if (fin6->fi6_gpi == 0 && m0->m_len >= off + 8)
1363				fin6->fi6_gpi = opt6->ah_spi;
1364			proto = opt6->opt6_nxt;
1365			off += 8 + (opt6->opt6_hlen * 4);
1366			/* goto the next header */
1367			break;
1368			}
1369
1370		case IPPROTO_HOPOPTS:
1371		case IPPROTO_ROUTING:
1372		case IPPROTO_DSTOPTS: {
1373			/* get next header and header length */
1374			struct _opt6 *opt6;
1375
1376			opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
1377			proto = opt6->opt6_nxt;
1378			off += (opt6->opt6_hlen + 1) * 8;
1379			/* goto the next header */
1380			break;
1381			}
1382
1383		case IPPROTO_FRAGMENT:
1384			/* ipv6 fragmentations are not supported yet */
1385		default:
1386			fin6->fi6_proto = proto;
1387			return (0);
1388		}
1389	} while (1);
1390	/*NOTREACHED*/
1391}
1392#endif /* INET6 */
1393
1394/*
1395 * altq common classifier
1396 */
1397int
1398acc_add_filter(classifier, filter, class, phandle)
1399	struct acc_classifier *classifier;
1400	struct flow_filter *filter;
1401	void	*class;
1402	u_long	*phandle;
1403{
1404	struct acc_filter *afp, *prev, *tmp;
1405	int	i, s;
1406
1407#ifdef INET6
1408	if (filter->ff_flow.fi_family != AF_INET &&
1409	    filter->ff_flow.fi_family != AF_INET6)
1410		return (EINVAL);
1411#else
1412	if (filter->ff_flow.fi_family != AF_INET)
1413		return (EINVAL);
1414#endif
1415
1416	afp = malloc(sizeof(struct acc_filter),
1417	       M_DEVBUF, M_WAITOK);
1418	if (afp == NULL)
1419		return (ENOMEM);
1420	bzero(afp, sizeof(struct acc_filter));
1421
1422	afp->f_filter = *filter;
1423	afp->f_class = class;
1424
1425	i = ACC_WILDCARD_INDEX;
1426	if (filter->ff_flow.fi_family == AF_INET) {
1427		struct flow_filter *filter4 = &afp->f_filter;
1428
1429		/*
1430		 * if address is 0, it's a wildcard.  if address mask
1431		 * isn't set, use full mask.
1432		 */
1433		if (filter4->ff_flow.fi_dst.s_addr == 0)
1434			filter4->ff_mask.mask_dst.s_addr = 0;
1435		else if (filter4->ff_mask.mask_dst.s_addr == 0)
1436			filter4->ff_mask.mask_dst.s_addr = 0xffffffff;
1437		if (filter4->ff_flow.fi_src.s_addr == 0)
1438			filter4->ff_mask.mask_src.s_addr = 0;
1439		else if (filter4->ff_mask.mask_src.s_addr == 0)
1440			filter4->ff_mask.mask_src.s_addr = 0xffffffff;
1441
1442		/* clear extra bits in addresses  */
1443		   filter4->ff_flow.fi_dst.s_addr &=
1444		       filter4->ff_mask.mask_dst.s_addr;
1445		   filter4->ff_flow.fi_src.s_addr &=
1446		       filter4->ff_mask.mask_src.s_addr;
1447
1448		/*
1449		 * if dst address is a wildcard, use hash-entry
1450		 * ACC_WILDCARD_INDEX.
1451		 */
1452		if (filter4->ff_mask.mask_dst.s_addr != 0xffffffff)
1453			i = ACC_WILDCARD_INDEX;
1454		else
1455			i = ACC_GET_HASH_INDEX(filter4->ff_flow.fi_dst.s_addr);
1456	}
1457#ifdef INET6
1458	else if (filter->ff_flow.fi_family == AF_INET6) {
1459		struct flow_filter6 *filter6 =
1460			(struct flow_filter6 *)&afp->f_filter;
1461#ifndef IN6MASK0 /* taken from kame ipv6 */
1462#define	IN6MASK0	{{{ 0, 0, 0, 0 }}}
1463#define	IN6MASK128	{{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}}
1464		const struct in6_addr in6mask0 = IN6MASK0;
1465		const struct in6_addr in6mask128 = IN6MASK128;
1466#endif
1467
1468		if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_dst))
1469			filter6->ff_mask6.mask6_dst = in6mask0;
1470		else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_dst))
1471			filter6->ff_mask6.mask6_dst = in6mask128;
1472		if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_src))
1473			filter6->ff_mask6.mask6_src = in6mask0;
1474		else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_src))
1475			filter6->ff_mask6.mask6_src = in6mask128;
1476
1477		/* clear extra bits in addresses  */
1478		for (i = 0; i < 16; i++)
1479			filter6->ff_flow6.fi6_dst.s6_addr[i] &=
1480			    filter6->ff_mask6.mask6_dst.s6_addr[i];
1481		for (i = 0; i < 16; i++)
1482			filter6->ff_flow6.fi6_src.s6_addr[i] &=
1483			    filter6->ff_mask6.mask6_src.s6_addr[i];
1484
1485		if (filter6->ff_flow6.fi6_flowlabel == 0)
1486			i = ACC_WILDCARD_INDEX;
1487		else
1488			i = ACC_GET_HASH_INDEX(filter6->ff_flow6.fi6_flowlabel);
1489	}
1490#endif /* INET6 */
1491
1492	afp->f_handle = get_filt_handle(classifier, i);
1493
1494	/* update filter bitmask */
1495	afp->f_fbmask = filt2fibmask(filter);
1496	classifier->acc_fbmask |= afp->f_fbmask;
1497
1498	/*
1499	 * add this filter to the filter list.
1500	 * filters are ordered from the highest rule number.
1501	 */
1502#ifdef __NetBSD__
1503	s = splnet();
1504#else
1505	s = splimp();
1506#endif
1507	prev = NULL;
1508	LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) {
1509		if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno)
1510			prev = tmp;
1511		else
1512			break;
1513	}
1514	if (prev == NULL)
1515		LIST_INSERT_HEAD(&classifier->acc_filters[i], afp, f_chain);
1516	else
1517		LIST_INSERT_AFTER(prev, afp, f_chain);
1518	splx(s);
1519
1520	*phandle = afp->f_handle;
1521	return (0);
1522}
1523
1524int
1525acc_delete_filter(classifier, handle)
1526	struct acc_classifier *classifier;
1527	u_long handle;
1528{
1529	struct acc_filter *afp;
1530	int	s;
1531
1532	if ((afp = filth_to_filtp(classifier, handle)) == NULL)
1533		return (EINVAL);
1534
1535#ifdef __NetBSD__
1536	s = splnet();
1537#else
1538	s = splimp();
1539#endif
1540	LIST_REMOVE(afp, f_chain);
1541	splx(s);
1542
1543	free(afp, M_DEVBUF);
1544
1545	/* todo: update filt_bmask */
1546
1547	return (0);
1548}
1549
1550/*
1551 * delete filters referencing to the specified class.
1552 * if the all flag is not 0, delete all the filters.
1553 */
1554int
1555acc_discard_filters(classifier, class, all)
1556	struct acc_classifier *classifier;
1557	void	*class;
1558	int	all;
1559{
1560	struct acc_filter *afp;
1561	int	i, s;
1562
1563#ifdef __NetBSD__
1564	s = splnet();
1565#else
1566	s = splimp();
1567#endif
1568	for (i = 0; i < ACC_FILTER_TABLESIZE; i++) {
1569		do {
1570			LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1571				if (all || afp->f_class == class) {
1572					LIST_REMOVE(afp, f_chain);
1573					free(afp, M_DEVBUF);
1574					/* start again from the head */
1575					break;
1576				}
1577		} while (afp != NULL);
1578	}
1579	splx(s);
1580
1581	if (all)
1582		classifier->acc_fbmask = 0;
1583
1584	return (0);
1585}
1586
1587void *
1588acc_classify(clfier, m, af)
1589	void *clfier;
1590	struct mbuf *m;
1591	int af;
1592{
1593	struct acc_classifier *classifier;
1594	struct flowinfo flow;
1595	struct acc_filter *afp;
1596	int	i;
1597
1598	classifier = (struct acc_classifier *)clfier;
1599	altq_extractflow(m, af, &flow, classifier->acc_fbmask);
1600
1601	if (flow.fi_family == AF_INET) {
1602		struct flowinfo_in *fp = (struct flowinfo_in *)&flow;
1603
1604		if ((classifier->acc_fbmask & FIMB4_ALL) == FIMB4_TOS) {
1605			/* only tos is used */
1606			LIST_FOREACH(afp,
1607				 &classifier->acc_filters[ACC_WILDCARD_INDEX],
1608				 f_chain)
1609				if (apply_tosfilter4(afp->f_fbmask,
1610						     &afp->f_filter, fp))
1611					/* filter matched */
1612					return (afp->f_class);
1613		} else if ((classifier->acc_fbmask &
1614			(~(FIMB4_PROTO|FIMB4_SPORT|FIMB4_DPORT) & FIMB4_ALL))
1615		    == 0) {
1616			/* only proto and ports are used */
1617			LIST_FOREACH(afp,
1618				 &classifier->acc_filters[ACC_WILDCARD_INDEX],
1619				 f_chain)
1620				if (apply_ppfilter4(afp->f_fbmask,
1621						    &afp->f_filter, fp))
1622					/* filter matched */
1623					return (afp->f_class);
1624		} else {
1625			/* get the filter hash entry from its dest address */
1626			i = ACC_GET_HASH_INDEX(fp->fi_dst.s_addr);
1627			do {
1628				/*
1629				 * go through this loop twice.  first for dst
1630				 * hash, second for wildcards.
1631				 */
1632				LIST_FOREACH(afp, &classifier->acc_filters[i],
1633					     f_chain)
1634					if (apply_filter4(afp->f_fbmask,
1635							  &afp->f_filter, fp))
1636						/* filter matched */
1637						return (afp->f_class);
1638
1639				/*
1640				 * check again for filters with a dst addr
1641				 * wildcard.
1642				 * (daddr == 0 || dmask != 0xffffffff).
1643				 */
1644				if (i != ACC_WILDCARD_INDEX)
1645					i = ACC_WILDCARD_INDEX;
1646				else
1647					break;
1648			} while (1);
1649		}
1650	}
1651#ifdef INET6
1652	else if (flow.fi_family == AF_INET6) {
1653		struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)&flow;
1654
1655		/* get the filter hash entry from its flow ID */
1656		if (fp6->fi6_flowlabel != 0)
1657			i = ACC_GET_HASH_INDEX(fp6->fi6_flowlabel);
1658		else
1659			/* flowlable can be zero */
1660			i = ACC_WILDCARD_INDEX;
1661
1662		/* go through this loop twice.  first for flow hash, second
1663		   for wildcards. */
1664		do {
1665			LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1666				if (apply_filter6(afp->f_fbmask,
1667					(struct flow_filter6 *)&afp->f_filter,
1668					fp6))
1669					/* filter matched */
1670					return (afp->f_class);
1671
1672			/*
1673			 * check again for filters with a wildcard.
1674			 */
1675			if (i != ACC_WILDCARD_INDEX)
1676				i = ACC_WILDCARD_INDEX;
1677			else
1678				break;
1679		} while (1);
1680	}
1681#endif /* INET6 */
1682
1683	/* no filter matched */
1684	return (NULL);
1685}
1686
1687static int
1688apply_filter4(fbmask, filt, pkt)
1689	u_int32_t	fbmask;
1690	struct flow_filter *filt;
1691	struct flowinfo_in *pkt;
1692{
1693	if (filt->ff_flow.fi_family != AF_INET)
1694		return (0);
1695	if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
1696		return (0);
1697	if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
1698		return (0);
1699	if ((fbmask & FIMB4_DADDR) &&
1700	    filt->ff_flow.fi_dst.s_addr !=
1701	    (pkt->fi_dst.s_addr & filt->ff_mask.mask_dst.s_addr))
1702		return (0);
1703	if ((fbmask & FIMB4_SADDR) &&
1704	    filt->ff_flow.fi_src.s_addr !=
1705	    (pkt->fi_src.s_addr & filt->ff_mask.mask_src.s_addr))
1706		return (0);
1707	if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
1708		return (0);
1709	if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
1710	    (pkt->fi_tos & filt->ff_mask.mask_tos))
1711		return (0);
1712	if ((fbmask & FIMB4_GPI) && filt->ff_flow.fi_gpi != (pkt->fi_gpi))
1713		return (0);
1714	/* match */
1715	return (1);
1716}
1717
1718/*
1719 * filter matching function optimized for a common case that checks
1720 * only protocol and port numbers
1721 */
1722static int
1723apply_ppfilter4(fbmask, filt, pkt)
1724	u_int32_t	fbmask;
1725	struct flow_filter *filt;
1726	struct flowinfo_in *pkt;
1727{
1728	if (filt->ff_flow.fi_family != AF_INET)
1729		return (0);
1730	if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
1731		return (0);
1732	if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
1733		return (0);
1734	if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
1735		return (0);
1736	/* match */
1737	return (1);
1738}
1739
1740/*
1741 * filter matching function only for tos field.
1742 */
1743static int
1744apply_tosfilter4(fbmask, filt, pkt)
1745	u_int32_t	fbmask;
1746	struct flow_filter *filt;
1747	struct flowinfo_in *pkt;
1748{
1749	if (filt->ff_flow.fi_family != AF_INET)
1750		return (0);
1751	if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
1752	    (pkt->fi_tos & filt->ff_mask.mask_tos))
1753		return (0);
1754	/* match */
1755	return (1);
1756}
1757
1758#ifdef INET6
1759static int
1760apply_filter6(fbmask, filt, pkt)
1761	u_int32_t	fbmask;
1762	struct flow_filter6 *filt;
1763	struct flowinfo_in6 *pkt;
1764{
1765	int i;
1766
1767	if (filt->ff_flow6.fi6_family != AF_INET6)
1768		return (0);
1769	if ((fbmask & FIMB6_FLABEL) &&
1770	    filt->ff_flow6.fi6_flowlabel != pkt->fi6_flowlabel)
1771		return (0);
1772	if ((fbmask & FIMB6_PROTO) &&
1773	    filt->ff_flow6.fi6_proto != pkt->fi6_proto)
1774		return (0);
1775	if ((fbmask & FIMB6_SPORT) &&
1776	    filt->ff_flow6.fi6_sport != pkt->fi6_sport)
1777		return (0);
1778	if ((fbmask & FIMB6_DPORT) &&
1779	    filt->ff_flow6.fi6_dport != pkt->fi6_dport)
1780		return (0);
1781	if (fbmask & FIMB6_SADDR) {
1782		for (i = 0; i < 4; i++)
1783			if (filt->ff_flow6.fi6_src.s6_addr32[i] !=
1784			    (pkt->fi6_src.s6_addr32[i] &
1785			     filt->ff_mask6.mask6_src.s6_addr32[i]))
1786				return (0);
1787	}
1788	if (fbmask & FIMB6_DADDR) {
1789		for (i = 0; i < 4; i++)
1790			if (filt->ff_flow6.fi6_dst.s6_addr32[i] !=
1791			    (pkt->fi6_dst.s6_addr32[i] &
1792			     filt->ff_mask6.mask6_dst.s6_addr32[i]))
1793				return (0);
1794	}
1795	if ((fbmask & FIMB6_TCLASS) &&
1796	    filt->ff_flow6.fi6_tclass !=
1797	    (pkt->fi6_tclass & filt->ff_mask6.mask6_tclass))
1798		return (0);
1799	if ((fbmask & FIMB6_GPI) &&
1800	    filt->ff_flow6.fi6_gpi != pkt->fi6_gpi)
1801		return (0);
1802	/* match */
1803	return (1);
1804}
1805#endif /* INET6 */
1806
1807/*
1808 *  filter handle:
1809 *	bit 20-28: index to the filter hash table
1810 *	bit  0-19: unique id in the hash bucket.
1811 */
1812static u_long
1813get_filt_handle(classifier, i)
1814	struct acc_classifier *classifier;
1815	int	i;
1816{
1817	static u_long handle_number = 1;
1818	u_long 	handle;
1819	struct acc_filter *afp;
1820
1821	while (1) {
1822		handle = handle_number++ & 0x000fffff;
1823
1824		if (LIST_EMPTY(&classifier->acc_filters[i]))
1825			break;
1826
1827		LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1828			if ((afp->f_handle & 0x000fffff) == handle)
1829				break;
1830		if (afp == NULL)
1831			break;
1832		/* this handle is already used, try again */
1833	}
1834
1835	return ((i << 20) | handle);
1836}
1837
1838/* convert filter handle to filter pointer */
1839static struct acc_filter *
1840filth_to_filtp(classifier, handle)
1841	struct acc_classifier *classifier;
1842	u_long handle;
1843{
1844	struct acc_filter *afp;
1845	int	i;
1846
1847	i = ACC_GET_HINDEX(handle);
1848
1849	LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1850		if (afp->f_handle == handle)
1851			return (afp);
1852
1853	return (NULL);
1854}
1855
1856/* create flowinfo bitmask */
1857static u_int32_t
1858filt2fibmask(filt)
1859	struct flow_filter *filt;
1860{
1861	u_int32_t mask = 0;
1862#ifdef INET6
1863	struct flow_filter6 *filt6;
1864#endif
1865
1866	switch (filt->ff_flow.fi_family) {
1867	case AF_INET:
1868		if (filt->ff_flow.fi_proto != 0)
1869			mask |= FIMB4_PROTO;
1870		if (filt->ff_flow.fi_tos != 0)
1871			mask |= FIMB4_TOS;
1872		if (filt->ff_flow.fi_dst.s_addr != 0)
1873			mask |= FIMB4_DADDR;
1874		if (filt->ff_flow.fi_src.s_addr != 0)
1875			mask |= FIMB4_SADDR;
1876		if (filt->ff_flow.fi_sport != 0)
1877			mask |= FIMB4_SPORT;
1878		if (filt->ff_flow.fi_dport != 0)
1879			mask |= FIMB4_DPORT;
1880		if (filt->ff_flow.fi_gpi != 0)
1881			mask |= FIMB4_GPI;
1882		break;
1883#ifdef INET6
1884	case AF_INET6:
1885		filt6 = (struct flow_filter6 *)filt;
1886
1887		if (filt6->ff_flow6.fi6_proto != 0)
1888			mask |= FIMB6_PROTO;
1889		if (filt6->ff_flow6.fi6_tclass != 0)
1890			mask |= FIMB6_TCLASS;
1891		if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_dst))
1892			mask |= FIMB6_DADDR;
1893		if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_src))
1894			mask |= FIMB6_SADDR;
1895		if (filt6->ff_flow6.fi6_sport != 0)
1896			mask |= FIMB6_SPORT;
1897		if (filt6->ff_flow6.fi6_dport != 0)
1898			mask |= FIMB6_DPORT;
1899		if (filt6->ff_flow6.fi6_gpi != 0)
1900			mask |= FIMB6_GPI;
1901		if (filt6->ff_flow6.fi6_flowlabel != 0)
1902			mask |= FIMB6_FLABEL;
1903		break;
1904#endif /* INET6 */
1905	}
1906	return (mask);
1907}
1908
1909
1910/*
1911 * helper functions to handle IPv4 fragments.
1912 * currently only in-sequence fragments are handled.
1913 *	- fragment info is cached in a LRU list.
1914 *	- when a first fragment is found, cache its flow info.
1915 *	- when a non-first fragment is found, lookup the cache.
1916 */
1917
1918struct ip4_frag {
1919    TAILQ_ENTRY(ip4_frag) ip4f_chain;
1920    char    ip4f_valid;
1921    u_short ip4f_id;
1922    struct flowinfo_in ip4f_info;
1923};
1924
1925static TAILQ_HEAD(ip4f_list, ip4_frag) ip4f_list; /* IPv4 fragment cache */
1926
1927#define	IP4F_TABSIZE		16	/* IPv4 fragment cache size */
1928
1929
1930static void
1931ip4f_cache(ip, fin)
1932	struct ip *ip;
1933	struct flowinfo_in *fin;
1934{
1935	struct ip4_frag *fp;
1936
1937	if (TAILQ_EMPTY(&ip4f_list)) {
1938		/* first time call, allocate fragment cache entries. */
1939		if (ip4f_init() < 0)
1940			/* allocation failed! */
1941			return;
1942	}
1943
1944	fp = ip4f_alloc();
1945	fp->ip4f_id = ip->ip_id;
1946	fp->ip4f_info.fi_proto = ip->ip_p;
1947	fp->ip4f_info.fi_src.s_addr = ip->ip_src.s_addr;
1948	fp->ip4f_info.fi_dst.s_addr = ip->ip_dst.s_addr;
1949
1950	/* save port numbers */
1951	fp->ip4f_info.fi_sport = fin->fi_sport;
1952	fp->ip4f_info.fi_dport = fin->fi_dport;
1953	fp->ip4f_info.fi_gpi   = fin->fi_gpi;
1954}
1955
1956static int
1957ip4f_lookup(ip, fin)
1958	struct ip *ip;
1959	struct flowinfo_in *fin;
1960{
1961	struct ip4_frag *fp;
1962
1963	for (fp = TAILQ_FIRST(&ip4f_list); fp != NULL && fp->ip4f_valid;
1964	     fp = TAILQ_NEXT(fp, ip4f_chain))
1965		if (ip->ip_id == fp->ip4f_id &&
1966		    ip->ip_src.s_addr == fp->ip4f_info.fi_src.s_addr &&
1967		    ip->ip_dst.s_addr == fp->ip4f_info.fi_dst.s_addr &&
1968		    ip->ip_p == fp->ip4f_info.fi_proto) {
1969
1970			/* found the matching entry */
1971			fin->fi_sport = fp->ip4f_info.fi_sport;
1972			fin->fi_dport = fp->ip4f_info.fi_dport;
1973			fin->fi_gpi   = fp->ip4f_info.fi_gpi;
1974
1975			if ((ntohs(ip->ip_off) & IP_MF) == 0)
1976				/* this is the last fragment,
1977				   release the entry. */
1978				ip4f_free(fp);
1979
1980			return (1);
1981		}
1982
1983	/* no matching entry found */
1984	return (0);
1985}
1986
1987static int
1988ip4f_init(void)
1989{
1990	struct ip4_frag *fp;
1991	int i;
1992
1993	TAILQ_INIT(&ip4f_list);
1994	for (i=0; i<IP4F_TABSIZE; i++) {
1995		fp = malloc(sizeof(struct ip4_frag),
1996		       M_DEVBUF, M_NOWAIT);
1997		if (fp == NULL) {
1998			printf("ip4f_init: can't alloc %dth entry!\n", i);
1999			if (i == 0)
2000				return (-1);
2001			return (0);
2002		}
2003		fp->ip4f_valid = 0;
2004		TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
2005	}
2006	return (0);
2007}
2008
2009static struct ip4_frag *
2010ip4f_alloc(void)
2011{
2012	struct ip4_frag *fp;
2013
2014	/* reclaim an entry at the tail, put it at the head */
2015	fp = TAILQ_LAST(&ip4f_list, ip4f_list);
2016	TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
2017	fp->ip4f_valid = 1;
2018	TAILQ_INSERT_HEAD(&ip4f_list, fp, ip4f_chain);
2019	return (fp);
2020}
2021
2022static void
2023ip4f_free(fp)
2024	struct ip4_frag *fp;
2025{
2026	TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
2027	fp->ip4f_valid = 0;
2028	TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
2029}
2030
2031#endif /* ALTQ3_CLFIER_COMPAT */
2032