1/*	$FreeBSD$	*/
2/*	$KAME: altq_subr.c,v 1.21 2003/11/06 06:32:53 kjc Exp $	*/
3
4/*
5 * Copyright (C) 1997-2003
6 *	Sony Computer Science Laboratories Inc.  All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#if defined(__FreeBSD__) || defined(__NetBSD__)
31#include "opt_altq.h"
32#include "opt_inet.h"
33#ifdef __FreeBSD__
34#include "opt_inet6.h"
35#endif
36#endif /* __FreeBSD__ || __NetBSD__ */
37
38#include <sys/param.h>
39#include <sys/malloc.h>
40#include <sys/mbuf.h>
41#include <sys/systm.h>
42#include <sys/proc.h>
43#include <sys/socket.h>
44#include <sys/socketvar.h>
45#include <sys/kernel.h>
46#include <sys/errno.h>
47#include <sys/syslog.h>
48#include <sys/sysctl.h>
49#include <sys/queue.h>
50
51#include <net/if.h>
52#include <net/if_dl.h>
53#include <net/if_types.h>
54#ifdef __FreeBSD__
55#include <net/vnet.h>
56#endif
57
58#include <netinet/in.h>
59#include <netinet/in_systm.h>
60#include <netinet/ip.h>
61#ifdef INET6
62#include <netinet/ip6.h>
63#endif
64#include <netinet/tcp.h>
65#include <netinet/udp.h>
66
67#include <net/pfvar.h>
68#include <altq/altq.h>
69#ifdef ALTQ3_COMPAT
70#include <altq/altq_conf.h>
71#endif
72
73/* machine dependent clock related includes */
74#ifdef __FreeBSD__
75#include <sys/bus.h>
76#include <sys/cpu.h>
77#include <sys/eventhandler.h>
78#include <machine/clock.h>
79#endif
80#if defined(__amd64__) || defined(__i386__)
81#include <machine/cpufunc.h>		/* for pentium tsc */
82#include <machine/specialreg.h>		/* for CPUID_TSC */
83#ifdef __FreeBSD__
84#include <machine/md_var.h>		/* for cpu_feature */
85#elif defined(__NetBSD__) || defined(__OpenBSD__)
86#include <machine/cpu.h>		/* for cpu_feature */
87#endif
88#endif /* __amd64 || __i386__ */
89
90/*
91 * internal function prototypes
92 */
93static void	tbr_timeout(void *);
94int (*altq_input)(struct mbuf *, int) = NULL;
95static struct mbuf *tbr_dequeue(struct ifaltq *, int);
96static int tbr_timer = 0;	/* token bucket regulator timer */
97#if !defined(__FreeBSD__) || (__FreeBSD_version < 600000)
98static struct callout tbr_callout = CALLOUT_INITIALIZER;
99#else
100static struct callout tbr_callout;
101#endif
102
103#ifdef ALTQ3_CLFIER_COMPAT
104static int 	extract_ports4(struct mbuf *, struct ip *, struct flowinfo_in *);
105#ifdef INET6
106static int 	extract_ports6(struct mbuf *, struct ip6_hdr *,
107			       struct flowinfo_in6 *);
108#endif
109static int	apply_filter4(u_int32_t, struct flow_filter *,
110			      struct flowinfo_in *);
111static int	apply_ppfilter4(u_int32_t, struct flow_filter *,
112				struct flowinfo_in *);
113#ifdef INET6
114static int	apply_filter6(u_int32_t, struct flow_filter6 *,
115			      struct flowinfo_in6 *);
116#endif
117static int	apply_tosfilter4(u_int32_t, struct flow_filter *,
118				 struct flowinfo_in *);
119static u_long	get_filt_handle(struct acc_classifier *, int);
120static struct acc_filter *filth_to_filtp(struct acc_classifier *, u_long);
121static u_int32_t filt2fibmask(struct flow_filter *);
122
123static void 	ip4f_cache(struct ip *, struct flowinfo_in *);
124static int 	ip4f_lookup(struct ip *, struct flowinfo_in *);
125static int 	ip4f_init(void);
126static struct ip4_frag	*ip4f_alloc(void);
127static void 	ip4f_free(struct ip4_frag *);
128#endif /* ALTQ3_CLFIER_COMPAT */
129
130/*
131 * alternate queueing support routines
132 */
133
134/* look up the queue state by the interface name and the queueing type. */
135void *
136altq_lookup(name, type)
137	char *name;
138	int type;
139{
140	struct ifnet *ifp;
141
142	if ((ifp = ifunit(name)) != NULL) {
143		/* read if_snd unlocked */
144		if (type != ALTQT_NONE && ifp->if_snd.altq_type == type)
145			return (ifp->if_snd.altq_disc);
146	}
147
148	return NULL;
149}
150
151int
152altq_attach(ifq, type, discipline, enqueue, dequeue, request, clfier, classify)
153	struct ifaltq *ifq;
154	int type;
155	void *discipline;
156	int (*enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
157	struct mbuf *(*dequeue)(struct ifaltq *, int);
158	int (*request)(struct ifaltq *, int, void *);
159	void *clfier;
160	void *(*classify)(void *, struct mbuf *, int);
161{
162	IFQ_LOCK(ifq);
163	if (!ALTQ_IS_READY(ifq)) {
164		IFQ_UNLOCK(ifq);
165		return ENXIO;
166	}
167
168#ifdef ALTQ3_COMPAT
169	/*
170	 * pfaltq can override the existing discipline, but altq3 cannot.
171	 * check these if clfier is not NULL (which implies altq3).
172	 */
173	if (clfier != NULL) {
174		if (ALTQ_IS_ENABLED(ifq)) {
175			IFQ_UNLOCK(ifq);
176			return EBUSY;
177		}
178		if (ALTQ_IS_ATTACHED(ifq)) {
179			IFQ_UNLOCK(ifq);
180			return EEXIST;
181		}
182	}
183#endif
184	ifq->altq_type     = type;
185	ifq->altq_disc     = discipline;
186	ifq->altq_enqueue  = enqueue;
187	ifq->altq_dequeue  = dequeue;
188	ifq->altq_request  = request;
189	ifq->altq_clfier   = clfier;
190	ifq->altq_classify = classify;
191	ifq->altq_flags &= (ALTQF_CANTCHANGE|ALTQF_ENABLED);
192#ifdef ALTQ3_COMPAT
193#ifdef ALTQ_KLD
194	altq_module_incref(type);
195#endif
196#endif
197	IFQ_UNLOCK(ifq);
198	return 0;
199}
200
201int
202altq_detach(ifq)
203	struct ifaltq *ifq;
204{
205	IFQ_LOCK(ifq);
206
207	if (!ALTQ_IS_READY(ifq)) {
208		IFQ_UNLOCK(ifq);
209		return ENXIO;
210	}
211	if (ALTQ_IS_ENABLED(ifq)) {
212		IFQ_UNLOCK(ifq);
213		return EBUSY;
214	}
215	if (!ALTQ_IS_ATTACHED(ifq)) {
216		IFQ_UNLOCK(ifq);
217		return (0);
218	}
219#ifdef ALTQ3_COMPAT
220#ifdef ALTQ_KLD
221	altq_module_declref(ifq->altq_type);
222#endif
223#endif
224
225	ifq->altq_type     = ALTQT_NONE;
226	ifq->altq_disc     = NULL;
227	ifq->altq_enqueue  = NULL;
228	ifq->altq_dequeue  = NULL;
229	ifq->altq_request  = NULL;
230	ifq->altq_clfier   = NULL;
231	ifq->altq_classify = NULL;
232	ifq->altq_flags &= ALTQF_CANTCHANGE;
233
234	IFQ_UNLOCK(ifq);
235	return 0;
236}
237
238int
239altq_enable(ifq)
240	struct ifaltq *ifq;
241{
242	int s;
243
244	IFQ_LOCK(ifq);
245
246	if (!ALTQ_IS_READY(ifq)) {
247		IFQ_UNLOCK(ifq);
248		return ENXIO;
249	}
250	if (ALTQ_IS_ENABLED(ifq)) {
251		IFQ_UNLOCK(ifq);
252		return 0;
253	}
254
255#ifdef __NetBSD__
256	s = splnet();
257#else
258	s = splimp();
259#endif
260	IFQ_PURGE_NOLOCK(ifq);
261	ASSERT(ifq->ifq_len == 0);
262	ifq->ifq_drv_maxlen = 0;		/* disable bulk dequeue */
263	ifq->altq_flags |= ALTQF_ENABLED;
264	if (ifq->altq_clfier != NULL)
265		ifq->altq_flags |= ALTQF_CLASSIFY;
266	splx(s);
267
268	IFQ_UNLOCK(ifq);
269	return 0;
270}
271
272int
273altq_disable(ifq)
274	struct ifaltq *ifq;
275{
276	int s;
277
278	IFQ_LOCK(ifq);
279	if (!ALTQ_IS_ENABLED(ifq)) {
280		IFQ_UNLOCK(ifq);
281		return 0;
282	}
283
284#ifdef __NetBSD__
285	s = splnet();
286#else
287	s = splimp();
288#endif
289	IFQ_PURGE_NOLOCK(ifq);
290	ASSERT(ifq->ifq_len == 0);
291	ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY);
292	splx(s);
293
294	IFQ_UNLOCK(ifq);
295	return 0;
296}
297
298#ifdef ALTQ_DEBUG
299void
300altq_assert(file, line, failedexpr)
301	const char *file, *failedexpr;
302	int line;
303{
304	(void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n",
305		     failedexpr, file, line);
306	panic("altq assertion");
307	/* NOTREACHED */
308}
309#endif
310
311/*
312 * internal representation of token bucket parameters
313 *	rate:	byte_per_unittime << 32
314 *		(((bits_per_sec) / 8) << 32) / machclk_freq
315 *	depth:	byte << 32
316 *
317 */
318#define	TBR_SHIFT	32
319#define	TBR_SCALE(x)	((int64_t)(x) << TBR_SHIFT)
320#define	TBR_UNSCALE(x)	((x) >> TBR_SHIFT)
321
322static struct mbuf *
323tbr_dequeue(ifq, op)
324	struct ifaltq *ifq;
325	int op;
326{
327	struct tb_regulator *tbr;
328	struct mbuf *m;
329	int64_t interval;
330	u_int64_t now;
331
332	IFQ_LOCK_ASSERT(ifq);
333	tbr = ifq->altq_tbr;
334	if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) {
335		/* if this is a remove after poll, bypass tbr check */
336	} else {
337		/* update token only when it is negative */
338		if (tbr->tbr_token <= 0) {
339			now = read_machclk();
340			interval = now - tbr->tbr_last;
341			if (interval >= tbr->tbr_filluptime)
342				tbr->tbr_token = tbr->tbr_depth;
343			else {
344				tbr->tbr_token += interval * tbr->tbr_rate;
345				if (tbr->tbr_token > tbr->tbr_depth)
346					tbr->tbr_token = tbr->tbr_depth;
347			}
348			tbr->tbr_last = now;
349		}
350		/* if token is still negative, don't allow dequeue */
351		if (tbr->tbr_token <= 0)
352			return (NULL);
353	}
354
355	if (ALTQ_IS_ENABLED(ifq))
356		m = (*ifq->altq_dequeue)(ifq, op);
357	else {
358		if (op == ALTDQ_POLL)
359			_IF_POLL(ifq, m);
360		else
361			_IF_DEQUEUE(ifq, m);
362	}
363
364	if (m != NULL && op == ALTDQ_REMOVE)
365		tbr->tbr_token -= TBR_SCALE(m_pktlen(m));
366	tbr->tbr_lastop = op;
367	return (m);
368}
369
370/*
371 * set a token bucket regulator.
372 * if the specified rate is zero, the token bucket regulator is deleted.
373 */
374int
375tbr_set(ifq, profile)
376	struct ifaltq *ifq;
377	struct tb_profile *profile;
378{
379	struct tb_regulator *tbr, *otbr;
380
381	if (tbr_dequeue_ptr == NULL)
382		tbr_dequeue_ptr = tbr_dequeue;
383
384	if (machclk_freq == 0)
385		init_machclk();
386	if (machclk_freq == 0) {
387		printf("tbr_set: no cpu clock available!\n");
388		return (ENXIO);
389	}
390
391	IFQ_LOCK(ifq);
392	if (profile->rate == 0) {
393		/* delete this tbr */
394		if ((tbr = ifq->altq_tbr) == NULL) {
395			IFQ_UNLOCK(ifq);
396			return (ENOENT);
397		}
398		ifq->altq_tbr = NULL;
399		free(tbr, M_DEVBUF);
400		IFQ_UNLOCK(ifq);
401		return (0);
402	}
403
404	IFQ_UNLOCK(ifq);
405	tbr = malloc(sizeof(struct tb_regulator),
406	       M_DEVBUF, M_WAITOK);
407	if (tbr == NULL) {		/* can not happen */
408		IFQ_UNLOCK(ifq);
409		return (ENOMEM);
410	}
411	bzero(tbr, sizeof(struct tb_regulator));
412
413	tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq;
414	tbr->tbr_depth = TBR_SCALE(profile->depth);
415	if (tbr->tbr_rate > 0)
416		tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
417	else
418		tbr->tbr_filluptime = 0xffffffffffffffffLL;
419	tbr->tbr_token = tbr->tbr_depth;
420	tbr->tbr_last = read_machclk();
421	tbr->tbr_lastop = ALTDQ_REMOVE;
422
423	IFQ_LOCK(ifq);
424	otbr = ifq->altq_tbr;
425	ifq->altq_tbr = tbr;	/* set the new tbr */
426
427	if (otbr != NULL)
428		free(otbr, M_DEVBUF);
429	else {
430		if (tbr_timer == 0) {
431			CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
432			tbr_timer = 1;
433		}
434	}
435	IFQ_UNLOCK(ifq);
436	return (0);
437}
438
439/*
440 * tbr_timeout goes through the interface list, and kicks the drivers
441 * if necessary.
442 *
443 * MPSAFE
444 */
445static void
446tbr_timeout(arg)
447	void *arg;
448{
449#ifdef __FreeBSD__
450	VNET_ITERATOR_DECL(vnet_iter);
451#endif
452	struct ifnet *ifp;
453	int active, s;
454
455	active = 0;
456#ifdef __NetBSD__
457	s = splnet();
458#else
459	s = splimp();
460#endif
461#ifdef __FreeBSD__
462	IFNET_RLOCK_NOSLEEP();
463	VNET_LIST_RLOCK_NOSLEEP();
464	VNET_FOREACH(vnet_iter) {
465		CURVNET_SET(vnet_iter);
466#endif
467		for (ifp = TAILQ_FIRST(&V_ifnet); ifp;
468		    ifp = TAILQ_NEXT(ifp, if_list)) {
469			/* read from if_snd unlocked */
470			if (!TBR_IS_ENABLED(&ifp->if_snd))
471				continue;
472			active++;
473			if (!IFQ_IS_EMPTY(&ifp->if_snd) &&
474			    ifp->if_start != NULL)
475				(*ifp->if_start)(ifp);
476		}
477#ifdef __FreeBSD__
478		CURVNET_RESTORE();
479	}
480	VNET_LIST_RUNLOCK_NOSLEEP();
481	IFNET_RUNLOCK_NOSLEEP();
482#endif
483	splx(s);
484	if (active > 0)
485		CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
486	else
487		tbr_timer = 0;	/* don't need tbr_timer anymore */
488}
489
490/*
491 * get token bucket regulator profile
492 */
493int
494tbr_get(ifq, profile)
495	struct ifaltq *ifq;
496	struct tb_profile *profile;
497{
498	struct tb_regulator *tbr;
499
500	IFQ_LOCK(ifq);
501	if ((tbr = ifq->altq_tbr) == NULL) {
502		profile->rate = 0;
503		profile->depth = 0;
504	} else {
505		profile->rate =
506		    (u_int)TBR_UNSCALE(tbr->tbr_rate * 8 * machclk_freq);
507		profile->depth = (u_int)TBR_UNSCALE(tbr->tbr_depth);
508	}
509	IFQ_UNLOCK(ifq);
510	return (0);
511}
512
513/*
514 * attach a discipline to the interface.  if one already exists, it is
515 * overridden.
516 * Locking is done in the discipline specific attach functions. Basically
517 * they call back to altq_attach which takes care of the attach and locking.
518 */
519int
520altq_pfattach(struct pf_altq *a)
521{
522	int error = 0;
523
524	switch (a->scheduler) {
525	case ALTQT_NONE:
526		break;
527#ifdef ALTQ_CBQ
528	case ALTQT_CBQ:
529		error = cbq_pfattach(a);
530		break;
531#endif
532#ifdef ALTQ_PRIQ
533	case ALTQT_PRIQ:
534		error = priq_pfattach(a);
535		break;
536#endif
537#ifdef ALTQ_HFSC
538	case ALTQT_HFSC:
539		error = hfsc_pfattach(a);
540		break;
541#endif
542	default:
543		error = ENXIO;
544	}
545
546	return (error);
547}
548
549/*
550 * detach a discipline from the interface.
551 * it is possible that the discipline was already overridden by another
552 * discipline.
553 */
554int
555altq_pfdetach(struct pf_altq *a)
556{
557	struct ifnet *ifp;
558	int s, error = 0;
559
560	if ((ifp = ifunit(a->ifname)) == NULL)
561		return (EINVAL);
562
563	/* if this discipline is no longer referenced, just return */
564	/* read unlocked from if_snd */
565	if (a->altq_disc == NULL || a->altq_disc != ifp->if_snd.altq_disc)
566		return (0);
567
568#ifdef __NetBSD__
569	s = splnet();
570#else
571	s = splimp();
572#endif
573	/* read unlocked from if_snd, _disable and _detach take care */
574	if (ALTQ_IS_ENABLED(&ifp->if_snd))
575		error = altq_disable(&ifp->if_snd);
576	if (error == 0)
577		error = altq_detach(&ifp->if_snd);
578	splx(s);
579
580	return (error);
581}
582
583/*
584 * add a discipline or a queue
585 * Locking is done in the discipline specific functions with regards to
586 * malloc with WAITOK, also it is not yet clear which lock to use.
587 */
588int
589altq_add(struct pf_altq *a)
590{
591	int error = 0;
592
593	if (a->qname[0] != 0)
594		return (altq_add_queue(a));
595
596	if (machclk_freq == 0)
597		init_machclk();
598	if (machclk_freq == 0)
599		panic("altq_add: no cpu clock");
600
601	switch (a->scheduler) {
602#ifdef ALTQ_CBQ
603	case ALTQT_CBQ:
604		error = cbq_add_altq(a);
605		break;
606#endif
607#ifdef ALTQ_PRIQ
608	case ALTQT_PRIQ:
609		error = priq_add_altq(a);
610		break;
611#endif
612#ifdef ALTQ_HFSC
613	case ALTQT_HFSC:
614		error = hfsc_add_altq(a);
615		break;
616#endif
617	default:
618		error = ENXIO;
619	}
620
621	return (error);
622}
623
624/*
625 * remove a discipline or a queue
626 * It is yet unclear what lock to use to protect this operation, the
627 * discipline specific functions will determine and grab it
628 */
629int
630altq_remove(struct pf_altq *a)
631{
632	int error = 0;
633
634	if (a->qname[0] != 0)
635		return (altq_remove_queue(a));
636
637	switch (a->scheduler) {
638#ifdef ALTQ_CBQ
639	case ALTQT_CBQ:
640		error = cbq_remove_altq(a);
641		break;
642#endif
643#ifdef ALTQ_PRIQ
644	case ALTQT_PRIQ:
645		error = priq_remove_altq(a);
646		break;
647#endif
648#ifdef ALTQ_HFSC
649	case ALTQT_HFSC:
650		error = hfsc_remove_altq(a);
651		break;
652#endif
653	default:
654		error = ENXIO;
655	}
656
657	return (error);
658}
659
660/*
661 * add a queue to the discipline
662 * It is yet unclear what lock to use to protect this operation, the
663 * discipline specific functions will determine and grab it
664 */
665int
666altq_add_queue(struct pf_altq *a)
667{
668	int error = 0;
669
670	switch (a->scheduler) {
671#ifdef ALTQ_CBQ
672	case ALTQT_CBQ:
673		error = cbq_add_queue(a);
674		break;
675#endif
676#ifdef ALTQ_PRIQ
677	case ALTQT_PRIQ:
678		error = priq_add_queue(a);
679		break;
680#endif
681#ifdef ALTQ_HFSC
682	case ALTQT_HFSC:
683		error = hfsc_add_queue(a);
684		break;
685#endif
686	default:
687		error = ENXIO;
688	}
689
690	return (error);
691}
692
693/*
694 * remove a queue from the discipline
695 * It is yet unclear what lock to use to protect this operation, the
696 * discipline specific functions will determine and grab it
697 */
698int
699altq_remove_queue(struct pf_altq *a)
700{
701	int error = 0;
702
703	switch (a->scheduler) {
704#ifdef ALTQ_CBQ
705	case ALTQT_CBQ:
706		error = cbq_remove_queue(a);
707		break;
708#endif
709#ifdef ALTQ_PRIQ
710	case ALTQT_PRIQ:
711		error = priq_remove_queue(a);
712		break;
713#endif
714#ifdef ALTQ_HFSC
715	case ALTQT_HFSC:
716		error = hfsc_remove_queue(a);
717		break;
718#endif
719	default:
720		error = ENXIO;
721	}
722
723	return (error);
724}
725
726/*
727 * get queue statistics
728 * Locking is done in the discipline specific functions with regards to
729 * copyout operations, also it is not yet clear which lock to use.
730 */
731int
732altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
733{
734	int error = 0;
735
736	switch (a->scheduler) {
737#ifdef ALTQ_CBQ
738	case ALTQT_CBQ:
739		error = cbq_getqstats(a, ubuf, nbytes);
740		break;
741#endif
742#ifdef ALTQ_PRIQ
743	case ALTQT_PRIQ:
744		error = priq_getqstats(a, ubuf, nbytes);
745		break;
746#endif
747#ifdef ALTQ_HFSC
748	case ALTQT_HFSC:
749		error = hfsc_getqstats(a, ubuf, nbytes);
750		break;
751#endif
752	default:
753		error = ENXIO;
754	}
755
756	return (error);
757}
758
759/*
760 * read and write diffserv field in IPv4 or IPv6 header
761 */
762u_int8_t
763read_dsfield(m, pktattr)
764	struct mbuf *m;
765	struct altq_pktattr *pktattr;
766{
767	struct mbuf *m0;
768	u_int8_t ds_field = 0;
769
770	if (pktattr == NULL ||
771	    (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
772		return ((u_int8_t)0);
773
774	/* verify that pattr_hdr is within the mbuf data */
775	for (m0 = m; m0 != NULL; m0 = m0->m_next)
776		if ((pktattr->pattr_hdr >= m0->m_data) &&
777		    (pktattr->pattr_hdr < m0->m_data + m0->m_len))
778			break;
779	if (m0 == NULL) {
780		/* ick, pattr_hdr is stale */
781		pktattr->pattr_af = AF_UNSPEC;
782#ifdef ALTQ_DEBUG
783		printf("read_dsfield: can't locate header!\n");
784#endif
785		return ((u_int8_t)0);
786	}
787
788	if (pktattr->pattr_af == AF_INET) {
789		struct ip *ip = (struct ip *)pktattr->pattr_hdr;
790
791		if (ip->ip_v != 4)
792			return ((u_int8_t)0);	/* version mismatch! */
793		ds_field = ip->ip_tos;
794	}
795#ifdef INET6
796	else if (pktattr->pattr_af == AF_INET6) {
797		struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
798		u_int32_t flowlabel;
799
800		flowlabel = ntohl(ip6->ip6_flow);
801		if ((flowlabel >> 28) != 6)
802			return ((u_int8_t)0);	/* version mismatch! */
803		ds_field = (flowlabel >> 20) & 0xff;
804	}
805#endif
806	return (ds_field);
807}
808
809void
810write_dsfield(struct mbuf *m, struct altq_pktattr *pktattr, u_int8_t dsfield)
811{
812	struct mbuf *m0;
813
814	if (pktattr == NULL ||
815	    (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
816		return;
817
818	/* verify that pattr_hdr is within the mbuf data */
819	for (m0 = m; m0 != NULL; m0 = m0->m_next)
820		if ((pktattr->pattr_hdr >= m0->m_data) &&
821		    (pktattr->pattr_hdr < m0->m_data + m0->m_len))
822			break;
823	if (m0 == NULL) {
824		/* ick, pattr_hdr is stale */
825		pktattr->pattr_af = AF_UNSPEC;
826#ifdef ALTQ_DEBUG
827		printf("write_dsfield: can't locate header!\n");
828#endif
829		return;
830	}
831
832	if (pktattr->pattr_af == AF_INET) {
833		struct ip *ip = (struct ip *)pktattr->pattr_hdr;
834		u_int8_t old;
835		int32_t sum;
836
837		if (ip->ip_v != 4)
838			return;		/* version mismatch! */
839		old = ip->ip_tos;
840		dsfield |= old & 3;	/* leave CU bits */
841		if (old == dsfield)
842			return;
843		ip->ip_tos = dsfield;
844		/*
845		 * update checksum (from RFC1624)
846		 *	   HC' = ~(~HC + ~m + m')
847		 */
848		sum = ~ntohs(ip->ip_sum) & 0xffff;
849		sum += 0xff00 + (~old & 0xff) + dsfield;
850		sum = (sum >> 16) + (sum & 0xffff);
851		sum += (sum >> 16);  /* add carry */
852
853		ip->ip_sum = htons(~sum & 0xffff);
854	}
855#ifdef INET6
856	else if (pktattr->pattr_af == AF_INET6) {
857		struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
858		u_int32_t flowlabel;
859
860		flowlabel = ntohl(ip6->ip6_flow);
861		if ((flowlabel >> 28) != 6)
862			return;		/* version mismatch! */
863		flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20);
864		ip6->ip6_flow = htonl(flowlabel);
865	}
866#endif
867	return;
868}
869
870
871/*
872 * high resolution clock support taking advantage of a machine dependent
873 * high resolution time counter (e.g., timestamp counter of intel pentium).
874 * we assume
875 *  - 64-bit-long monotonically-increasing counter
876 *  - frequency range is 100M-4GHz (CPU speed)
877 */
878/* if pcc is not available or disabled, emulate 256MHz using microtime() */
879#define	MACHCLK_SHIFT	8
880
881int machclk_usepcc;
882u_int32_t machclk_freq;
883u_int32_t machclk_per_tick;
884
885#if defined(__i386__) && defined(__NetBSD__)
886extern u_int64_t cpu_tsc_freq;
887#endif
888
889#if (__FreeBSD_version >= 700035)
890/* Update TSC freq with the value indicated by the caller. */
891static void
892tsc_freq_changed(void *arg, const struct cf_level *level, int status)
893{
894	/* If there was an error during the transition, don't do anything. */
895	if (status != 0)
896		return;
897
898#if (__FreeBSD_version >= 701102) && (defined(__amd64__) || defined(__i386__))
899	/* If TSC is P-state invariant, don't do anything. */
900	if (tsc_is_invariant)
901		return;
902#endif
903
904	/* Total setting for this level gives the new frequency in MHz. */
905	init_machclk();
906}
907EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL,
908    EVENTHANDLER_PRI_LAST);
909#endif /* __FreeBSD_version >= 700035 */
910
911static void
912init_machclk_setup(void)
913{
914#if (__FreeBSD_version >= 600000)
915	callout_init(&tbr_callout, 0);
916#endif
917
918	machclk_usepcc = 1;
919
920#if (!defined(__amd64__) && !defined(__i386__)) || defined(ALTQ_NOPCC)
921	machclk_usepcc = 0;
922#endif
923#if defined(__FreeBSD__) && defined(SMP)
924	machclk_usepcc = 0;
925#endif
926#if defined(__NetBSD__) && defined(MULTIPROCESSOR)
927	machclk_usepcc = 0;
928#endif
929#if defined(__amd64__) || defined(__i386__)
930	/* check if TSC is available */
931#ifdef __FreeBSD__
932	if ((cpu_feature & CPUID_TSC) == 0 ||
933	    atomic_load_acq_64(&tsc_freq) == 0)
934#else
935	if ((cpu_feature & CPUID_TSC) == 0)
936#endif
937		machclk_usepcc = 0;
938#endif
939}
940
941void
942init_machclk(void)
943{
944	static int called;
945
946	/* Call one-time initialization function. */
947	if (!called) {
948		init_machclk_setup();
949		called = 1;
950	}
951
952	if (machclk_usepcc == 0) {
953		/* emulate 256MHz using microtime() */
954		machclk_freq = 1000000 << MACHCLK_SHIFT;
955		machclk_per_tick = machclk_freq / hz;
956#ifdef ALTQ_DEBUG
957		printf("altq: emulate %uHz cpu clock\n", machclk_freq);
958#endif
959		return;
960	}
961
962	/*
963	 * if the clock frequency (of Pentium TSC or Alpha PCC) is
964	 * accessible, just use it.
965	 */
966#if defined(__amd64__) || defined(__i386__)
967#ifdef __FreeBSD__
968	machclk_freq = atomic_load_acq_64(&tsc_freq);
969#elif defined(__NetBSD__)
970	machclk_freq = (u_int32_t)cpu_tsc_freq;
971#elif defined(__OpenBSD__) && (defined(I586_CPU) || defined(I686_CPU))
972	machclk_freq = pentium_mhz * 1000000;
973#endif
974#endif
975
976	/*
977	 * if we don't know the clock frequency, measure it.
978	 */
979	if (machclk_freq == 0) {
980		static int	wait;
981		struct timeval	tv_start, tv_end;
982		u_int64_t	start, end, diff;
983		int		timo;
984
985		microtime(&tv_start);
986		start = read_machclk();
987		timo = hz;	/* 1 sec */
988		(void)tsleep(&wait, PWAIT | PCATCH, "init_machclk", timo);
989		microtime(&tv_end);
990		end = read_machclk();
991		diff = (u_int64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000
992		    + tv_end.tv_usec - tv_start.tv_usec;
993		if (diff != 0)
994			machclk_freq = (u_int)((end - start) * 1000000 / diff);
995	}
996
997	machclk_per_tick = machclk_freq / hz;
998
999#ifdef ALTQ_DEBUG
1000	printf("altq: CPU clock: %uHz\n", machclk_freq);
1001#endif
1002}
1003
1004#if defined(__OpenBSD__) && defined(__i386__)
1005static __inline u_int64_t
1006rdtsc(void)
1007{
1008	u_int64_t rv;
1009	__asm __volatile(".byte 0x0f, 0x31" : "=A" (rv));
1010	return (rv);
1011}
1012#endif /* __OpenBSD__ && __i386__ */
1013
1014u_int64_t
1015read_machclk(void)
1016{
1017	u_int64_t val;
1018
1019	if (machclk_usepcc) {
1020#if defined(__amd64__) || defined(__i386__)
1021		val = rdtsc();
1022#else
1023		panic("read_machclk");
1024#endif
1025	} else {
1026		struct timeval tv;
1027
1028		microtime(&tv);
1029		val = (((u_int64_t)(tv.tv_sec - boottime.tv_sec) * 1000000
1030		    + tv.tv_usec) << MACHCLK_SHIFT);
1031	}
1032	return (val);
1033}
1034
1035#ifdef ALTQ3_CLFIER_COMPAT
1036
1037#ifndef IPPROTO_ESP
1038#define	IPPROTO_ESP	50		/* encapsulating security payload */
1039#endif
1040#ifndef IPPROTO_AH
1041#define	IPPROTO_AH	51		/* authentication header */
1042#endif
1043
1044/*
1045 * extract flow information from a given packet.
1046 * filt_mask shows flowinfo fields required.
1047 * we assume the ip header is in one mbuf, and addresses and ports are
1048 * in network byte order.
1049 */
1050int
1051altq_extractflow(m, af, flow, filt_bmask)
1052	struct mbuf *m;
1053	int af;
1054	struct flowinfo *flow;
1055	u_int32_t	filt_bmask;
1056{
1057
1058	switch (af) {
1059	case PF_INET: {
1060		struct flowinfo_in *fin;
1061		struct ip *ip;
1062
1063		ip = mtod(m, struct ip *);
1064
1065		if (ip->ip_v != 4)
1066			break;
1067
1068		fin = (struct flowinfo_in *)flow;
1069		fin->fi_len = sizeof(struct flowinfo_in);
1070		fin->fi_family = AF_INET;
1071
1072		fin->fi_proto = ip->ip_p;
1073		fin->fi_tos = ip->ip_tos;
1074
1075		fin->fi_src.s_addr = ip->ip_src.s_addr;
1076		fin->fi_dst.s_addr = ip->ip_dst.s_addr;
1077
1078		if (filt_bmask & FIMB4_PORTS)
1079			/* if port info is required, extract port numbers */
1080			extract_ports4(m, ip, fin);
1081		else {
1082			fin->fi_sport = 0;
1083			fin->fi_dport = 0;
1084			fin->fi_gpi = 0;
1085		}
1086		return (1);
1087	}
1088
1089#ifdef INET6
1090	case PF_INET6: {
1091		struct flowinfo_in6 *fin6;
1092		struct ip6_hdr *ip6;
1093
1094		ip6 = mtod(m, struct ip6_hdr *);
1095		/* should we check the ip version? */
1096
1097		fin6 = (struct flowinfo_in6 *)flow;
1098		fin6->fi6_len = sizeof(struct flowinfo_in6);
1099		fin6->fi6_family = AF_INET6;
1100
1101		fin6->fi6_proto = ip6->ip6_nxt;
1102		fin6->fi6_tclass   = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
1103
1104		fin6->fi6_flowlabel = ip6->ip6_flow & htonl(0x000fffff);
1105		fin6->fi6_src = ip6->ip6_src;
1106		fin6->fi6_dst = ip6->ip6_dst;
1107
1108		if ((filt_bmask & FIMB6_PORTS) ||
1109		    ((filt_bmask & FIMB6_PROTO)
1110		     && ip6->ip6_nxt > IPPROTO_IPV6))
1111			/*
1112			 * if port info is required, or proto is required
1113			 * but there are option headers, extract port
1114			 * and protocol numbers.
1115			 */
1116			extract_ports6(m, ip6, fin6);
1117		else {
1118			fin6->fi6_sport = 0;
1119			fin6->fi6_dport = 0;
1120			fin6->fi6_gpi = 0;
1121		}
1122		return (1);
1123	}
1124#endif /* INET6 */
1125
1126	default:
1127		break;
1128	}
1129
1130	/* failed */
1131	flow->fi_len = sizeof(struct flowinfo);
1132	flow->fi_family = AF_UNSPEC;
1133	return (0);
1134}
1135
1136/*
1137 * helper routine to extract port numbers
1138 */
1139/* structure for ipsec and ipv6 option header template */
1140struct _opt6 {
1141	u_int8_t	opt6_nxt;	/* next header */
1142	u_int8_t	opt6_hlen;	/* header extension length */
1143	u_int16_t	_pad;
1144	u_int32_t	ah_spi;		/* security parameter index
1145					   for authentication header */
1146};
1147
1148/*
1149 * extract port numbers from a ipv4 packet.
1150 */
1151static int
1152extract_ports4(m, ip, fin)
1153	struct mbuf *m;
1154	struct ip *ip;
1155	struct flowinfo_in *fin;
1156{
1157	struct mbuf *m0;
1158	u_short ip_off;
1159	u_int8_t proto;
1160	int 	off;
1161
1162	fin->fi_sport = 0;
1163	fin->fi_dport = 0;
1164	fin->fi_gpi = 0;
1165
1166	ip_off = ntohs(ip->ip_off);
1167	/* if it is a fragment, try cached fragment info */
1168	if (ip_off & IP_OFFMASK) {
1169		ip4f_lookup(ip, fin);
1170		return (1);
1171	}
1172
1173	/* locate the mbuf containing the protocol header */
1174	for (m0 = m; m0 != NULL; m0 = m0->m_next)
1175		if (((caddr_t)ip >= m0->m_data) &&
1176		    ((caddr_t)ip < m0->m_data + m0->m_len))
1177			break;
1178	if (m0 == NULL) {
1179#ifdef ALTQ_DEBUG
1180		printf("extract_ports4: can't locate header! ip=%p\n", ip);
1181#endif
1182		return (0);
1183	}
1184	off = ((caddr_t)ip - m0->m_data) + (ip->ip_hl << 2);
1185	proto = ip->ip_p;
1186
1187#ifdef ALTQ_IPSEC
1188 again:
1189#endif
1190	while (off >= m0->m_len) {
1191		off -= m0->m_len;
1192		m0 = m0->m_next;
1193		if (m0 == NULL)
1194			return (0);  /* bogus ip_hl! */
1195	}
1196	if (m0->m_len < off + 4)
1197		return (0);
1198
1199	switch (proto) {
1200	case IPPROTO_TCP:
1201	case IPPROTO_UDP: {
1202		struct udphdr *udp;
1203
1204		udp = (struct udphdr *)(mtod(m0, caddr_t) + off);
1205		fin->fi_sport = udp->uh_sport;
1206		fin->fi_dport = udp->uh_dport;
1207		fin->fi_proto = proto;
1208		}
1209		break;
1210
1211#ifdef ALTQ_IPSEC
1212	case IPPROTO_ESP:
1213		if (fin->fi_gpi == 0){
1214			u_int32_t *gpi;
1215
1216			gpi = (u_int32_t *)(mtod(m0, caddr_t) + off);
1217			fin->fi_gpi   = *gpi;
1218		}
1219		fin->fi_proto = proto;
1220		break;
1221
1222	case IPPROTO_AH: {
1223			/* get next header and header length */
1224			struct _opt6 *opt6;
1225
1226			opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
1227			proto = opt6->opt6_nxt;
1228			off += 8 + (opt6->opt6_hlen * 4);
1229			if (fin->fi_gpi == 0 && m0->m_len >= off + 8)
1230				fin->fi_gpi = opt6->ah_spi;
1231		}
1232		/* goto the next header */
1233		goto again;
1234#endif  /* ALTQ_IPSEC */
1235
1236	default:
1237		fin->fi_proto = proto;
1238		return (0);
1239	}
1240
1241	/* if this is a first fragment, cache it. */
1242	if (ip_off & IP_MF)
1243		ip4f_cache(ip, fin);
1244
1245	return (1);
1246}
1247
1248#ifdef INET6
1249static int
1250extract_ports6(m, ip6, fin6)
1251	struct mbuf *m;
1252	struct ip6_hdr *ip6;
1253	struct flowinfo_in6 *fin6;
1254{
1255	struct mbuf *m0;
1256	int	off;
1257	u_int8_t proto;
1258
1259	fin6->fi6_gpi   = 0;
1260	fin6->fi6_sport = 0;
1261	fin6->fi6_dport = 0;
1262
1263	/* locate the mbuf containing the protocol header */
1264	for (m0 = m; m0 != NULL; m0 = m0->m_next)
1265		if (((caddr_t)ip6 >= m0->m_data) &&
1266		    ((caddr_t)ip6 < m0->m_data + m0->m_len))
1267			break;
1268	if (m0 == NULL) {
1269#ifdef ALTQ_DEBUG
1270		printf("extract_ports6: can't locate header! ip6=%p\n", ip6);
1271#endif
1272		return (0);
1273	}
1274	off = ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr);
1275
1276	proto = ip6->ip6_nxt;
1277	do {
1278		while (off >= m0->m_len) {
1279			off -= m0->m_len;
1280			m0 = m0->m_next;
1281			if (m0 == NULL)
1282				return (0);
1283		}
1284		if (m0->m_len < off + 4)
1285			return (0);
1286
1287		switch (proto) {
1288		case IPPROTO_TCP:
1289		case IPPROTO_UDP: {
1290			struct udphdr *udp;
1291
1292			udp = (struct udphdr *)(mtod(m0, caddr_t) + off);
1293			fin6->fi6_sport = udp->uh_sport;
1294			fin6->fi6_dport = udp->uh_dport;
1295			fin6->fi6_proto = proto;
1296			}
1297			return (1);
1298
1299		case IPPROTO_ESP:
1300			if (fin6->fi6_gpi == 0) {
1301				u_int32_t *gpi;
1302
1303				gpi = (u_int32_t *)(mtod(m0, caddr_t) + off);
1304				fin6->fi6_gpi   = *gpi;
1305			}
1306			fin6->fi6_proto = proto;
1307			return (1);
1308
1309		case IPPROTO_AH: {
1310			/* get next header and header length */
1311			struct _opt6 *opt6;
1312
1313			opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
1314			if (fin6->fi6_gpi == 0 && m0->m_len >= off + 8)
1315				fin6->fi6_gpi = opt6->ah_spi;
1316			proto = opt6->opt6_nxt;
1317			off += 8 + (opt6->opt6_hlen * 4);
1318			/* goto the next header */
1319			break;
1320			}
1321
1322		case IPPROTO_HOPOPTS:
1323		case IPPROTO_ROUTING:
1324		case IPPROTO_DSTOPTS: {
1325			/* get next header and header length */
1326			struct _opt6 *opt6;
1327
1328			opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
1329			proto = opt6->opt6_nxt;
1330			off += (opt6->opt6_hlen + 1) * 8;
1331			/* goto the next header */
1332			break;
1333			}
1334
1335		case IPPROTO_FRAGMENT:
1336			/* ipv6 fragmentations are not supported yet */
1337		default:
1338			fin6->fi6_proto = proto;
1339			return (0);
1340		}
1341	} while (1);
1342	/*NOTREACHED*/
1343}
1344#endif /* INET6 */
1345
1346/*
1347 * altq common classifier
1348 */
1349int
1350acc_add_filter(classifier, filter, class, phandle)
1351	struct acc_classifier *classifier;
1352	struct flow_filter *filter;
1353	void	*class;
1354	u_long	*phandle;
1355{
1356	struct acc_filter *afp, *prev, *tmp;
1357	int	i, s;
1358
1359#ifdef INET6
1360	if (filter->ff_flow.fi_family != AF_INET &&
1361	    filter->ff_flow.fi_family != AF_INET6)
1362		return (EINVAL);
1363#else
1364	if (filter->ff_flow.fi_family != AF_INET)
1365		return (EINVAL);
1366#endif
1367
1368	afp = malloc(sizeof(struct acc_filter),
1369	       M_DEVBUF, M_WAITOK);
1370	if (afp == NULL)
1371		return (ENOMEM);
1372	bzero(afp, sizeof(struct acc_filter));
1373
1374	afp->f_filter = *filter;
1375	afp->f_class = class;
1376
1377	i = ACC_WILDCARD_INDEX;
1378	if (filter->ff_flow.fi_family == AF_INET) {
1379		struct flow_filter *filter4 = &afp->f_filter;
1380
1381		/*
1382		 * if address is 0, it's a wildcard.  if address mask
1383		 * isn't set, use full mask.
1384		 */
1385		if (filter4->ff_flow.fi_dst.s_addr == 0)
1386			filter4->ff_mask.mask_dst.s_addr = 0;
1387		else if (filter4->ff_mask.mask_dst.s_addr == 0)
1388			filter4->ff_mask.mask_dst.s_addr = 0xffffffff;
1389		if (filter4->ff_flow.fi_src.s_addr == 0)
1390			filter4->ff_mask.mask_src.s_addr = 0;
1391		else if (filter4->ff_mask.mask_src.s_addr == 0)
1392			filter4->ff_mask.mask_src.s_addr = 0xffffffff;
1393
1394		/* clear extra bits in addresses  */
1395		   filter4->ff_flow.fi_dst.s_addr &=
1396		       filter4->ff_mask.mask_dst.s_addr;
1397		   filter4->ff_flow.fi_src.s_addr &=
1398		       filter4->ff_mask.mask_src.s_addr;
1399
1400		/*
1401		 * if dst address is a wildcard, use hash-entry
1402		 * ACC_WILDCARD_INDEX.
1403		 */
1404		if (filter4->ff_mask.mask_dst.s_addr != 0xffffffff)
1405			i = ACC_WILDCARD_INDEX;
1406		else
1407			i = ACC_GET_HASH_INDEX(filter4->ff_flow.fi_dst.s_addr);
1408	}
1409#ifdef INET6
1410	else if (filter->ff_flow.fi_family == AF_INET6) {
1411		struct flow_filter6 *filter6 =
1412			(struct flow_filter6 *)&afp->f_filter;
1413#ifndef IN6MASK0 /* taken from kame ipv6 */
1414#define	IN6MASK0	{{{ 0, 0, 0, 0 }}}
1415#define	IN6MASK128	{{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}}
1416		const struct in6_addr in6mask0 = IN6MASK0;
1417		const struct in6_addr in6mask128 = IN6MASK128;
1418#endif
1419
1420		if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_dst))
1421			filter6->ff_mask6.mask6_dst = in6mask0;
1422		else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_dst))
1423			filter6->ff_mask6.mask6_dst = in6mask128;
1424		if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_src))
1425			filter6->ff_mask6.mask6_src = in6mask0;
1426		else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_src))
1427			filter6->ff_mask6.mask6_src = in6mask128;
1428
1429		/* clear extra bits in addresses  */
1430		for (i = 0; i < 16; i++)
1431			filter6->ff_flow6.fi6_dst.s6_addr[i] &=
1432			    filter6->ff_mask6.mask6_dst.s6_addr[i];
1433		for (i = 0; i < 16; i++)
1434			filter6->ff_flow6.fi6_src.s6_addr[i] &=
1435			    filter6->ff_mask6.mask6_src.s6_addr[i];
1436
1437		if (filter6->ff_flow6.fi6_flowlabel == 0)
1438			i = ACC_WILDCARD_INDEX;
1439		else
1440			i = ACC_GET_HASH_INDEX(filter6->ff_flow6.fi6_flowlabel);
1441	}
1442#endif /* INET6 */
1443
1444	afp->f_handle = get_filt_handle(classifier, i);
1445
1446	/* update filter bitmask */
1447	afp->f_fbmask = filt2fibmask(filter);
1448	classifier->acc_fbmask |= afp->f_fbmask;
1449
1450	/*
1451	 * add this filter to the filter list.
1452	 * filters are ordered from the highest rule number.
1453	 */
1454#ifdef __NetBSD__
1455	s = splnet();
1456#else
1457	s = splimp();
1458#endif
1459	prev = NULL;
1460	LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) {
1461		if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno)
1462			prev = tmp;
1463		else
1464			break;
1465	}
1466	if (prev == NULL)
1467		LIST_INSERT_HEAD(&classifier->acc_filters[i], afp, f_chain);
1468	else
1469		LIST_INSERT_AFTER(prev, afp, f_chain);
1470	splx(s);
1471
1472	*phandle = afp->f_handle;
1473	return (0);
1474}
1475
1476int
1477acc_delete_filter(classifier, handle)
1478	struct acc_classifier *classifier;
1479	u_long handle;
1480{
1481	struct acc_filter *afp;
1482	int	s;
1483
1484	if ((afp = filth_to_filtp(classifier, handle)) == NULL)
1485		return (EINVAL);
1486
1487#ifdef __NetBSD__
1488	s = splnet();
1489#else
1490	s = splimp();
1491#endif
1492	LIST_REMOVE(afp, f_chain);
1493	splx(s);
1494
1495	free(afp, M_DEVBUF);
1496
1497	/* todo: update filt_bmask */
1498
1499	return (0);
1500}
1501
1502/*
1503 * delete filters referencing to the specified class.
1504 * if the all flag is not 0, delete all the filters.
1505 */
1506int
1507acc_discard_filters(classifier, class, all)
1508	struct acc_classifier *classifier;
1509	void	*class;
1510	int	all;
1511{
1512	struct acc_filter *afp;
1513	int	i, s;
1514
1515#ifdef __NetBSD__
1516	s = splnet();
1517#else
1518	s = splimp();
1519#endif
1520	for (i = 0; i < ACC_FILTER_TABLESIZE; i++) {
1521		do {
1522			LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1523				if (all || afp->f_class == class) {
1524					LIST_REMOVE(afp, f_chain);
1525					free(afp, M_DEVBUF);
1526					/* start again from the head */
1527					break;
1528				}
1529		} while (afp != NULL);
1530	}
1531	splx(s);
1532
1533	if (all)
1534		classifier->acc_fbmask = 0;
1535
1536	return (0);
1537}
1538
1539void *
1540acc_classify(clfier, m, af)
1541	void *clfier;
1542	struct mbuf *m;
1543	int af;
1544{
1545	struct acc_classifier *classifier;
1546	struct flowinfo flow;
1547	struct acc_filter *afp;
1548	int	i;
1549
1550	classifier = (struct acc_classifier *)clfier;
1551	altq_extractflow(m, af, &flow, classifier->acc_fbmask);
1552
1553	if (flow.fi_family == AF_INET) {
1554		struct flowinfo_in *fp = (struct flowinfo_in *)&flow;
1555
1556		if ((classifier->acc_fbmask & FIMB4_ALL) == FIMB4_TOS) {
1557			/* only tos is used */
1558			LIST_FOREACH(afp,
1559				 &classifier->acc_filters[ACC_WILDCARD_INDEX],
1560				 f_chain)
1561				if (apply_tosfilter4(afp->f_fbmask,
1562						     &afp->f_filter, fp))
1563					/* filter matched */
1564					return (afp->f_class);
1565		} else if ((classifier->acc_fbmask &
1566			(~(FIMB4_PROTO|FIMB4_SPORT|FIMB4_DPORT) & FIMB4_ALL))
1567		    == 0) {
1568			/* only proto and ports are used */
1569			LIST_FOREACH(afp,
1570				 &classifier->acc_filters[ACC_WILDCARD_INDEX],
1571				 f_chain)
1572				if (apply_ppfilter4(afp->f_fbmask,
1573						    &afp->f_filter, fp))
1574					/* filter matched */
1575					return (afp->f_class);
1576		} else {
1577			/* get the filter hash entry from its dest address */
1578			i = ACC_GET_HASH_INDEX(fp->fi_dst.s_addr);
1579			do {
1580				/*
1581				 * go through this loop twice.  first for dst
1582				 * hash, second for wildcards.
1583				 */
1584				LIST_FOREACH(afp, &classifier->acc_filters[i],
1585					     f_chain)
1586					if (apply_filter4(afp->f_fbmask,
1587							  &afp->f_filter, fp))
1588						/* filter matched */
1589						return (afp->f_class);
1590
1591				/*
1592				 * check again for filters with a dst addr
1593				 * wildcard.
1594				 * (daddr == 0 || dmask != 0xffffffff).
1595				 */
1596				if (i != ACC_WILDCARD_INDEX)
1597					i = ACC_WILDCARD_INDEX;
1598				else
1599					break;
1600			} while (1);
1601		}
1602	}
1603#ifdef INET6
1604	else if (flow.fi_family == AF_INET6) {
1605		struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)&flow;
1606
1607		/* get the filter hash entry from its flow ID */
1608		if (fp6->fi6_flowlabel != 0)
1609			i = ACC_GET_HASH_INDEX(fp6->fi6_flowlabel);
1610		else
1611			/* flowlable can be zero */
1612			i = ACC_WILDCARD_INDEX;
1613
1614		/* go through this loop twice.  first for flow hash, second
1615		   for wildcards. */
1616		do {
1617			LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1618				if (apply_filter6(afp->f_fbmask,
1619					(struct flow_filter6 *)&afp->f_filter,
1620					fp6))
1621					/* filter matched */
1622					return (afp->f_class);
1623
1624			/*
1625			 * check again for filters with a wildcard.
1626			 */
1627			if (i != ACC_WILDCARD_INDEX)
1628				i = ACC_WILDCARD_INDEX;
1629			else
1630				break;
1631		} while (1);
1632	}
1633#endif /* INET6 */
1634
1635	/* no filter matched */
1636	return (NULL);
1637}
1638
1639static int
1640apply_filter4(fbmask, filt, pkt)
1641	u_int32_t	fbmask;
1642	struct flow_filter *filt;
1643	struct flowinfo_in *pkt;
1644{
1645	if (filt->ff_flow.fi_family != AF_INET)
1646		return (0);
1647	if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
1648		return (0);
1649	if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
1650		return (0);
1651	if ((fbmask & FIMB4_DADDR) &&
1652	    filt->ff_flow.fi_dst.s_addr !=
1653	    (pkt->fi_dst.s_addr & filt->ff_mask.mask_dst.s_addr))
1654		return (0);
1655	if ((fbmask & FIMB4_SADDR) &&
1656	    filt->ff_flow.fi_src.s_addr !=
1657	    (pkt->fi_src.s_addr & filt->ff_mask.mask_src.s_addr))
1658		return (0);
1659	if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
1660		return (0);
1661	if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
1662	    (pkt->fi_tos & filt->ff_mask.mask_tos))
1663		return (0);
1664	if ((fbmask & FIMB4_GPI) && filt->ff_flow.fi_gpi != (pkt->fi_gpi))
1665		return (0);
1666	/* match */
1667	return (1);
1668}
1669
1670/*
1671 * filter matching function optimized for a common case that checks
1672 * only protocol and port numbers
1673 */
1674static int
1675apply_ppfilter4(fbmask, filt, pkt)
1676	u_int32_t	fbmask;
1677	struct flow_filter *filt;
1678	struct flowinfo_in *pkt;
1679{
1680	if (filt->ff_flow.fi_family != AF_INET)
1681		return (0);
1682	if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
1683		return (0);
1684	if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
1685		return (0);
1686	if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
1687		return (0);
1688	/* match */
1689	return (1);
1690}
1691
1692/*
1693 * filter matching function only for tos field.
1694 */
1695static int
1696apply_tosfilter4(fbmask, filt, pkt)
1697	u_int32_t	fbmask;
1698	struct flow_filter *filt;
1699	struct flowinfo_in *pkt;
1700{
1701	if (filt->ff_flow.fi_family != AF_INET)
1702		return (0);
1703	if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
1704	    (pkt->fi_tos & filt->ff_mask.mask_tos))
1705		return (0);
1706	/* match */
1707	return (1);
1708}
1709
1710#ifdef INET6
1711static int
1712apply_filter6(fbmask, filt, pkt)
1713	u_int32_t	fbmask;
1714	struct flow_filter6 *filt;
1715	struct flowinfo_in6 *pkt;
1716{
1717	int i;
1718
1719	if (filt->ff_flow6.fi6_family != AF_INET6)
1720		return (0);
1721	if ((fbmask & FIMB6_FLABEL) &&
1722	    filt->ff_flow6.fi6_flowlabel != pkt->fi6_flowlabel)
1723		return (0);
1724	if ((fbmask & FIMB6_PROTO) &&
1725	    filt->ff_flow6.fi6_proto != pkt->fi6_proto)
1726		return (0);
1727	if ((fbmask & FIMB6_SPORT) &&
1728	    filt->ff_flow6.fi6_sport != pkt->fi6_sport)
1729		return (0);
1730	if ((fbmask & FIMB6_DPORT) &&
1731	    filt->ff_flow6.fi6_dport != pkt->fi6_dport)
1732		return (0);
1733	if (fbmask & FIMB6_SADDR) {
1734		for (i = 0; i < 4; i++)
1735			if (filt->ff_flow6.fi6_src.s6_addr32[i] !=
1736			    (pkt->fi6_src.s6_addr32[i] &
1737			     filt->ff_mask6.mask6_src.s6_addr32[i]))
1738				return (0);
1739	}
1740	if (fbmask & FIMB6_DADDR) {
1741		for (i = 0; i < 4; i++)
1742			if (filt->ff_flow6.fi6_dst.s6_addr32[i] !=
1743			    (pkt->fi6_dst.s6_addr32[i] &
1744			     filt->ff_mask6.mask6_dst.s6_addr32[i]))
1745				return (0);
1746	}
1747	if ((fbmask & FIMB6_TCLASS) &&
1748	    filt->ff_flow6.fi6_tclass !=
1749	    (pkt->fi6_tclass & filt->ff_mask6.mask6_tclass))
1750		return (0);
1751	if ((fbmask & FIMB6_GPI) &&
1752	    filt->ff_flow6.fi6_gpi != pkt->fi6_gpi)
1753		return (0);
1754	/* match */
1755	return (1);
1756}
1757#endif /* INET6 */
1758
1759/*
1760 *  filter handle:
1761 *	bit 20-28: index to the filter hash table
1762 *	bit  0-19: unique id in the hash bucket.
1763 */
1764static u_long
1765get_filt_handle(classifier, i)
1766	struct acc_classifier *classifier;
1767	int	i;
1768{
1769	static u_long handle_number = 1;
1770	u_long 	handle;
1771	struct acc_filter *afp;
1772
1773	while (1) {
1774		handle = handle_number++ & 0x000fffff;
1775
1776		if (LIST_EMPTY(&classifier->acc_filters[i]))
1777			break;
1778
1779		LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1780			if ((afp->f_handle & 0x000fffff) == handle)
1781				break;
1782		if (afp == NULL)
1783			break;
1784		/* this handle is already used, try again */
1785	}
1786
1787	return ((i << 20) | handle);
1788}
1789
1790/* convert filter handle to filter pointer */
1791static struct acc_filter *
1792filth_to_filtp(classifier, handle)
1793	struct acc_classifier *classifier;
1794	u_long handle;
1795{
1796	struct acc_filter *afp;
1797	int	i;
1798
1799	i = ACC_GET_HINDEX(handle);
1800
1801	LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1802		if (afp->f_handle == handle)
1803			return (afp);
1804
1805	return (NULL);
1806}
1807
1808/* create flowinfo bitmask */
1809static u_int32_t
1810filt2fibmask(filt)
1811	struct flow_filter *filt;
1812{
1813	u_int32_t mask = 0;
1814#ifdef INET6
1815	struct flow_filter6 *filt6;
1816#endif
1817
1818	switch (filt->ff_flow.fi_family) {
1819	case AF_INET:
1820		if (filt->ff_flow.fi_proto != 0)
1821			mask |= FIMB4_PROTO;
1822		if (filt->ff_flow.fi_tos != 0)
1823			mask |= FIMB4_TOS;
1824		if (filt->ff_flow.fi_dst.s_addr != 0)
1825			mask |= FIMB4_DADDR;
1826		if (filt->ff_flow.fi_src.s_addr != 0)
1827			mask |= FIMB4_SADDR;
1828		if (filt->ff_flow.fi_sport != 0)
1829			mask |= FIMB4_SPORT;
1830		if (filt->ff_flow.fi_dport != 0)
1831			mask |= FIMB4_DPORT;
1832		if (filt->ff_flow.fi_gpi != 0)
1833			mask |= FIMB4_GPI;
1834		break;
1835#ifdef INET6
1836	case AF_INET6:
1837		filt6 = (struct flow_filter6 *)filt;
1838
1839		if (filt6->ff_flow6.fi6_proto != 0)
1840			mask |= FIMB6_PROTO;
1841		if (filt6->ff_flow6.fi6_tclass != 0)
1842			mask |= FIMB6_TCLASS;
1843		if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_dst))
1844			mask |= FIMB6_DADDR;
1845		if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_src))
1846			mask |= FIMB6_SADDR;
1847		if (filt6->ff_flow6.fi6_sport != 0)
1848			mask |= FIMB6_SPORT;
1849		if (filt6->ff_flow6.fi6_dport != 0)
1850			mask |= FIMB6_DPORT;
1851		if (filt6->ff_flow6.fi6_gpi != 0)
1852			mask |= FIMB6_GPI;
1853		if (filt6->ff_flow6.fi6_flowlabel != 0)
1854			mask |= FIMB6_FLABEL;
1855		break;
1856#endif /* INET6 */
1857	}
1858	return (mask);
1859}
1860
1861
1862/*
1863 * helper functions to handle IPv4 fragments.
1864 * currently only in-sequence fragments are handled.
1865 *	- fragment info is cached in a LRU list.
1866 *	- when a first fragment is found, cache its flow info.
1867 *	- when a non-first fragment is found, lookup the cache.
1868 */
1869
1870struct ip4_frag {
1871    TAILQ_ENTRY(ip4_frag) ip4f_chain;
1872    char    ip4f_valid;
1873    u_short ip4f_id;
1874    struct flowinfo_in ip4f_info;
1875};
1876
1877static TAILQ_HEAD(ip4f_list, ip4_frag) ip4f_list; /* IPv4 fragment cache */
1878
1879#define	IP4F_TABSIZE		16	/* IPv4 fragment cache size */
1880
1881
1882static void
1883ip4f_cache(ip, fin)
1884	struct ip *ip;
1885	struct flowinfo_in *fin;
1886{
1887	struct ip4_frag *fp;
1888
1889	if (TAILQ_EMPTY(&ip4f_list)) {
1890		/* first time call, allocate fragment cache entries. */
1891		if (ip4f_init() < 0)
1892			/* allocation failed! */
1893			return;
1894	}
1895
1896	fp = ip4f_alloc();
1897	fp->ip4f_id = ip->ip_id;
1898	fp->ip4f_info.fi_proto = ip->ip_p;
1899	fp->ip4f_info.fi_src.s_addr = ip->ip_src.s_addr;
1900	fp->ip4f_info.fi_dst.s_addr = ip->ip_dst.s_addr;
1901
1902	/* save port numbers */
1903	fp->ip4f_info.fi_sport = fin->fi_sport;
1904	fp->ip4f_info.fi_dport = fin->fi_dport;
1905	fp->ip4f_info.fi_gpi   = fin->fi_gpi;
1906}
1907
1908static int
1909ip4f_lookup(ip, fin)
1910	struct ip *ip;
1911	struct flowinfo_in *fin;
1912{
1913	struct ip4_frag *fp;
1914
1915	for (fp = TAILQ_FIRST(&ip4f_list); fp != NULL && fp->ip4f_valid;
1916	     fp = TAILQ_NEXT(fp, ip4f_chain))
1917		if (ip->ip_id == fp->ip4f_id &&
1918		    ip->ip_src.s_addr == fp->ip4f_info.fi_src.s_addr &&
1919		    ip->ip_dst.s_addr == fp->ip4f_info.fi_dst.s_addr &&
1920		    ip->ip_p == fp->ip4f_info.fi_proto) {
1921
1922			/* found the matching entry */
1923			fin->fi_sport = fp->ip4f_info.fi_sport;
1924			fin->fi_dport = fp->ip4f_info.fi_dport;
1925			fin->fi_gpi   = fp->ip4f_info.fi_gpi;
1926
1927			if ((ntohs(ip->ip_off) & IP_MF) == 0)
1928				/* this is the last fragment,
1929				   release the entry. */
1930				ip4f_free(fp);
1931
1932			return (1);
1933		}
1934
1935	/* no matching entry found */
1936	return (0);
1937}
1938
1939static int
1940ip4f_init(void)
1941{
1942	struct ip4_frag *fp;
1943	int i;
1944
1945	TAILQ_INIT(&ip4f_list);
1946	for (i=0; i<IP4F_TABSIZE; i++) {
1947		fp = malloc(sizeof(struct ip4_frag),
1948		       M_DEVBUF, M_NOWAIT);
1949		if (fp == NULL) {
1950			printf("ip4f_init: can't alloc %dth entry!\n", i);
1951			if (i == 0)
1952				return (-1);
1953			return (0);
1954		}
1955		fp->ip4f_valid = 0;
1956		TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
1957	}
1958	return (0);
1959}
1960
1961static struct ip4_frag *
1962ip4f_alloc(void)
1963{
1964	struct ip4_frag *fp;
1965
1966	/* reclaim an entry at the tail, put it at the head */
1967	fp = TAILQ_LAST(&ip4f_list, ip4f_list);
1968	TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
1969	fp->ip4f_valid = 1;
1970	TAILQ_INSERT_HEAD(&ip4f_list, fp, ip4f_chain);
1971	return (fp);
1972}
1973
1974static void
1975ip4f_free(fp)
1976	struct ip4_frag *fp;
1977{
1978	TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
1979	fp->ip4f_valid = 0;
1980	TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
1981}
1982
1983#endif /* ALTQ3_CLFIER_COMPAT */
1984