1/*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * Copyright (c) 1990, 1991, 1993
30 *	The Regents of the University of California.  All rights reserved.
31 *
32 * This code is derived from the Stanford/CMU enet packet filter,
33 * (net/enet.c) distributed as part of 4.3BSD, and code contributed
34 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
35 * Berkeley Laboratory.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 *    notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 *    notice, this list of conditions and the following disclaimer in the
44 *    documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 *    must display the following acknowledgement:
47 *	This product includes software developed by the University of
48 *	California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 *    may be used to endorse or promote products derived from this software
51 *    without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 *      @(#)bpf.c	8.2 (Berkeley) 3/28/94
66 *
67 * $FreeBSD: src/sys/net/bpf.c,v 1.59.2.5 2001/01/05 04:49:09 jdp Exp $
68 */
69/*
70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71 * support for mandatory and extensible security protections.  This notice
72 * is included in support of clause 2.2 (b) of the Apple Public License,
73 * Version 2.0.
74 */
75
76#include "bpf.h"
77
78#ifndef __GNUC__
79#define inline
80#else
81#define inline __inline
82#endif
83
84#include <sys/param.h>
85#include <sys/systm.h>
86#include <sys/conf.h>
87#include <sys/malloc.h>
88#include <sys/mbuf.h>
89#include <sys/time.h>
90#include <sys/proc.h>
91#include <sys/signalvar.h>
92#include <sys/filio.h>
93#include <sys/sockio.h>
94#include <sys/ttycom.h>
95#include <sys/filedesc.h>
96#include <sys/uio_internal.h>
97#include <sys/file_internal.h>
98#include <sys/event.h>
99
100#include <sys/poll.h>
101
102#include <sys/socket.h>
103#include <sys/socketvar.h>
104#include <sys/vnode.h>
105
106#include <net/if.h>
107#include <net/bpf.h>
108#include <net/bpfdesc.h>
109
110#include <netinet/in.h>
111#include <netinet/in_pcb.h>
112#include <netinet/in_var.h>
113#include <netinet/ip_var.h>
114#include <netinet/tcp.h>
115#include <netinet/tcp_var.h>
116#include <netinet/udp.h>
117#include <netinet/udp_var.h>
118#include <netinet/if_ether.h>
119#include <sys/kernel.h>
120#include <sys/sysctl.h>
121#include <net/firewire.h>
122
123#include <miscfs/devfs/devfs.h>
124#include <net/dlil.h>
125
126#include <kern/locks.h>
127#include <kern/thread_call.h>
128
129#if CONFIG_MACF_NET
130#include <security/mac_framework.h>
131#endif /* MAC_NET */
132
133extern int tvtohz(struct timeval *);
134
135#define BPF_BUFSIZE 4096
136#define UIOMOVE(cp, len, code, uio) uiomove(cp, len, uio)
137
138
139#define PRINET  26			/* interruptible */
140
141/*
142 * The default read buffer size is patchable.
143 */
144static unsigned int bpf_bufsize = BPF_BUFSIZE;
145SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW | CTLFLAG_LOCKED,
146	&bpf_bufsize, 0, "");
147__private_extern__ unsigned int bpf_maxbufsize = BPF_MAXBUFSIZE;
148SYSCTL_INT(_debug, OID_AUTO, bpf_maxbufsize, CTLFLAG_RW | CTLFLAG_LOCKED,
149	&bpf_maxbufsize, 0, "");
150static unsigned int bpf_maxdevices = 256;
151SYSCTL_UINT(_debug, OID_AUTO, bpf_maxdevices, CTLFLAG_RW | CTLFLAG_LOCKED,
152	&bpf_maxdevices, 0, "");
153
154/*
155 *  bpf_iflist is the list of interfaces; each corresponds to an ifnet
156 *  bpf_dtab holds pointer to the descriptors, indexed by minor device #
157 */
158static struct bpf_if	*bpf_iflist;
159#ifdef __APPLE__
160/*
161 * BSD now stores the bpf_d in the dev_t which is a struct
162 * on their system. Our dev_t is an int, so we still store
163 * the bpf_d in a separate table indexed by minor device #.
164 *
165 * The value stored in bpf_dtab[n] represent three states:
166 *  0: device not opened
167 *  1: device opening or closing
168 *  other: device <n> opened with pointer to storage
169 */
170static struct bpf_d	**bpf_dtab = NULL;
171static unsigned int bpf_dtab_size = 0;
172static unsigned int	nbpfilter = 0;
173
174decl_lck_mtx_data(static, bpf_mlock_data);
175static lck_mtx_t		*bpf_mlock = &bpf_mlock_data;
176static lck_grp_t		*bpf_mlock_grp;
177static lck_grp_attr_t	*bpf_mlock_grp_attr;
178static lck_attr_t		*bpf_mlock_attr;
179
180/*
181 * Mark a descriptor free by making it point to itself.
182 * This is probably cheaper than marking with a constant since
183 * the address should be in a register anyway.
184 */
185#endif /* __APPLE__ */
186
187static int	bpf_allocbufs(struct bpf_d *);
188static errno_t	bpf_attachd(struct bpf_d *d, struct bpf_if *bp);
189static void	bpf_detachd(struct bpf_d *d);
190static void	bpf_freed(struct bpf_d *);
191static void	bpf_mcopy(const void *, void *, size_t);
192static int	bpf_movein(struct uio *, int,
193		    struct mbuf **, struct sockaddr *, int *);
194static int	bpf_setif(struct bpf_d *, ifnet_t ifp, u_int32_t dlt);
195static void bpf_timed_out(void *, void *);
196static void bpf_wakeup(struct bpf_d *);
197static void	catchpacket(struct bpf_d *, u_char *, struct mbuf *, u_int,
198		    u_int, int, void (*)(const void *, void *, size_t));
199static void	reset_d(struct bpf_d *);
200static int bpf_setf(struct bpf_d *, u_int bf_len, user_addr_t bf_insns);
201static int	bpf_getdltlist(struct bpf_d *, caddr_t, struct proc *);
202static int	bpf_setdlt(struct bpf_d *, u_int);
203static int	bpf_set_traffic_class(struct bpf_d *, int);
204static void	bpf_set_packet_service_class(struct mbuf *, int);
205
206/*static  void *bpf_devfs_token[MAXBPFILTER];*/
207
208static  int bpf_devsw_installed;
209
210void bpf_init(void *unused);
211static int bpf_tap_callback(struct ifnet *ifp, struct mbuf *m);
212
213/*
214 * Darwin differs from BSD here, the following are static
215 * on BSD and not static on Darwin.
216 */
217	d_open_t	    bpfopen;
218	d_close_t	    bpfclose;
219	d_read_t	    bpfread;
220	d_write_t	    bpfwrite;
221	ioctl_fcn_t	    bpfioctl;
222	select_fcn_t	    bpfselect;
223
224
225/* Darwin's cdevsw struct differs slightly from BSDs */
226#define CDEV_MAJOR 23
227static struct cdevsw bpf_cdevsw = {
228	/* open */	    bpfopen,
229	/* close */	    bpfclose,
230	/* read */	    bpfread,
231	/* write */	    bpfwrite,
232	/* ioctl */	    bpfioctl,
233	/* stop */	    eno_stop,
234	/* reset */	    eno_reset,
235	/* tty */	    NULL,
236	/* select */	    bpfselect,
237	/* mmap */	    eno_mmap,
238	/* strategy*/	    eno_strat,
239	/* getc */	    eno_getc,
240	/* putc */	    eno_putc,
241	/* type */	    0
242};
243
244#define SOCKADDR_HDR_LEN	   offsetof(struct sockaddr, sa_data)
245
246static int
247bpf_movein(struct uio *uio, int linktype, struct mbuf **mp, struct sockaddr *sockp, int *datlen)
248{
249	struct mbuf *m;
250	int error;
251	int len;
252	uint8_t sa_family;
253	int hlen;
254
255	switch (linktype) {
256
257#if SLIP
258	case DLT_SLIP:
259		sa_family = AF_INET;
260		hlen = 0;
261		break;
262#endif /* SLIP */
263
264	case DLT_EN10MB:
265		sa_family = AF_UNSPEC;
266		/* XXX Would MAXLINKHDR be better? */
267		hlen = sizeof(struct ether_header);
268		break;
269
270#if FDDI
271	case DLT_FDDI:
272	#if defined(__FreeBSD__) || defined(__bsdi__)
273		sa_family = AF_IMPLINK;
274		hlen = 0;
275	#else
276		sa_family = AF_UNSPEC;
277		/* XXX 4(FORMAC)+6(dst)+6(src)+3(LLC)+5(SNAP) */
278		hlen = 24;
279	#endif
280		break;
281#endif /* FDDI */
282
283	case DLT_RAW:
284	case DLT_NULL:
285		sa_family = AF_UNSPEC;
286		hlen = 0;
287		break;
288
289	#ifdef __FreeBSD__
290	case DLT_ATM_RFC1483:
291		/*
292		 * en atm driver requires 4-byte atm pseudo header.
293		 * though it isn't standard, vpi:vci needs to be
294		 * specified anyway.
295		 */
296		sa_family = AF_UNSPEC;
297		hlen = 12; 	/* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
298		break;
299	#endif
300
301	case DLT_PPP:
302		sa_family = AF_UNSPEC;
303		hlen = 4;	/* This should match PPP_HDRLEN */
304		break;
305
306	case DLT_APPLE_IP_OVER_IEEE1394:
307		sa_family = AF_UNSPEC;
308		hlen = sizeof(struct firewire_header);
309		break;
310
311	case DLT_IEEE802_11:            /* IEEE 802.11 wireless */
312		sa_family = AF_IEEE80211;
313		hlen = 0;
314		break;
315
316	case DLT_IEEE802_11_RADIO:
317		sa_family = AF_IEEE80211;
318		hlen = 0;
319		break;
320
321	default:
322		return (EIO);
323	}
324
325	// LP64todo - fix this!
326	len = uio_resid(uio);
327	*datlen = len - hlen;
328	if ((unsigned)len > MCLBYTES)
329		return (EIO);
330
331	if (sockp) {
332		/*
333		 * Build a sockaddr based on the data link layer type.
334		 * We do this at this level because the ethernet header
335		 * is copied directly into the data field of the sockaddr.
336		 * In the case of SLIP, there is no header and the packet
337		 * is forwarded as is.
338		 * Also, we are careful to leave room at the front of the mbuf
339		 * for the link level header.
340		 */
341		if ((hlen + SOCKADDR_HDR_LEN) > sockp->sa_len) {
342			return (EIO);
343		}
344		sockp->sa_family = sa_family;
345	} else {
346		/*
347		 * We're directly sending the packet data supplied by
348		 * the user; we don't need to make room for the link
349		 * header, and don't need the header length value any
350		 * more, so set it to 0.
351		 */
352		hlen = 0;
353	}
354
355	MGETHDR(m, M_WAIT, MT_DATA);
356	if (m == 0)
357		return (ENOBUFS);
358	if ((unsigned)len > MHLEN) {
359		MCLGET(m, M_WAIT);
360		if ((m->m_flags & M_EXT) == 0) {
361			error = ENOBUFS;
362			goto bad;
363		}
364	}
365	m->m_pkthdr.len = m->m_len = len;
366	m->m_pkthdr.rcvif = NULL;
367	*mp = m;
368
369	/*
370	 * Make room for link header.
371	 */
372	if (hlen != 0) {
373		m->m_pkthdr.len -= hlen;
374		m->m_len -= hlen;
375		m->m_data += hlen; /* XXX */
376		error = UIOMOVE((caddr_t)sockp->sa_data, hlen, UIO_WRITE, uio);
377		if (error)
378			goto bad;
379	}
380	error = UIOMOVE(mtod(m, caddr_t), len - hlen, UIO_WRITE, uio);
381	if (error)
382		goto bad;
383
384	/* Check for multicast destination */
385	switch (linktype) {
386		case DLT_EN10MB: {
387			struct ether_header *eh = mtod(m, struct ether_header *);
388
389			if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
390				if (_ether_cmp(etherbroadcastaddr, eh->ether_dhost) == 0)
391					m->m_flags |= M_BCAST;
392				else
393					m->m_flags |= M_MCAST;
394			}
395			break;
396		}
397	}
398
399	return 0;
400 bad:
401	m_freem(m);
402	return (error);
403}
404
405#ifdef __APPLE__
406
407/*
408 * The dynamic addition of a new device node must block all processes that are opening
409 * the last device so that no process will get an unexpected ENOENT
410 */
411static void
412bpf_make_dev_t(int maj)
413{
414	static int		bpf_growing = 0;
415	unsigned int	cur_size = nbpfilter, i;
416
417	if (nbpfilter >= bpf_maxdevices)
418		return;
419
420	while (bpf_growing) {
421		/* Wait until new device has been created */
422		(void)tsleep((caddr_t)&bpf_growing, PZERO, "bpf_growing", 0);
423	}
424	if (nbpfilter > cur_size) {
425		/* other thread grew it already */
426		return;
427	}
428	bpf_growing = 1;
429
430	/* need to grow bpf_dtab first */
431	if (nbpfilter == bpf_dtab_size) {
432		int new_dtab_size;
433		struct bpf_d **new_dtab = NULL;
434		struct bpf_d **old_dtab = NULL;
435
436		new_dtab_size = bpf_dtab_size + NBPFILTER;
437		new_dtab = (struct bpf_d **)_MALLOC(sizeof(struct bpf_d *) * new_dtab_size, M_DEVBUF, M_WAIT);
438		if (new_dtab == 0) {
439			printf("bpf_make_dev_t: malloc bpf_dtab failed\n");
440			goto done;
441		}
442		if (bpf_dtab) {
443			bcopy(bpf_dtab, new_dtab,
444				  sizeof(struct bpf_d *) * bpf_dtab_size);
445		}
446		bzero(new_dtab + bpf_dtab_size,
447			  sizeof(struct bpf_d *) * NBPFILTER);
448		old_dtab = bpf_dtab;
449		bpf_dtab = new_dtab;
450		bpf_dtab_size = new_dtab_size;
451		if (old_dtab != NULL)
452			_FREE(old_dtab, M_DEVBUF);
453	}
454	i = nbpfilter++;
455	(void) devfs_make_node(makedev(maj, i),
456				DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0600,
457				"bpf%d", i);
458done:
459	bpf_growing = 0;
460	wakeup((caddr_t)&bpf_growing);
461}
462
463#endif
464
465/*
466 * Attach file to the bpf interface, i.e. make d listen on bp.
467 */
468static errno_t
469bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
470{
471	int first = bp->bif_dlist == NULL;
472	int	error = 0;
473
474	/*
475	 * Point d at bp, and add d to the interface's list of listeners.
476	 * Finally, point the driver's bpf cookie at the interface so
477	 * it will divert packets to bpf.
478	 */
479	d->bd_bif = bp;
480	d->bd_next = bp->bif_dlist;
481	bp->bif_dlist = d;
482
483	if (first) {
484		/* Find the default bpf entry for this ifp */
485		if (bp->bif_ifp->if_bpf == NULL) {
486			struct bpf_if	*primary;
487
488			for (primary = bpf_iflist; primary && primary->bif_ifp != bp->bif_ifp;
489				 primary = primary->bif_next)
490				;
491
492			bp->bif_ifp->if_bpf = primary;
493		}
494
495		/* Only call dlil_set_bpf_tap for primary dlt */
496		if (bp->bif_ifp->if_bpf == bp)
497			dlil_set_bpf_tap(bp->bif_ifp, BPF_TAP_INPUT_OUTPUT, bpf_tap_callback);
498
499		if (bp->bif_tap)
500			error = bp->bif_tap(bp->bif_ifp, bp->bif_dlt, BPF_TAP_INPUT_OUTPUT);
501	}
502
503	return error;
504}
505
506/*
507 * Detach a file from its interface.
508 */
509static void
510bpf_detachd(struct bpf_d *d)
511{
512	struct bpf_d **p;
513	struct bpf_if *bp;
514	struct ifnet  *ifp;
515
516	ifp = d->bd_bif->bif_ifp;
517	bp = d->bd_bif;
518
519	/* Remove d from the interface's descriptor list. */
520	p = &bp->bif_dlist;
521	while (*p != d) {
522		p = &(*p)->bd_next;
523		if (*p == 0)
524			panic("bpf_detachd: descriptor not in list");
525	}
526	*p = (*p)->bd_next;
527	if (bp->bif_dlist == 0) {
528		/*
529		 * Let the driver know that there are no more listeners.
530		 */
531		/* Only call dlil_set_bpf_tap for primary dlt */
532		if (bp->bif_ifp->if_bpf == bp)
533			dlil_set_bpf_tap(ifp, BPF_TAP_DISABLE, NULL);
534		if (bp->bif_tap)
535			bp->bif_tap(ifp, bp->bif_dlt, BPF_TAP_DISABLE);
536
537		for (bp = bpf_iflist; bp; bp = bp->bif_next)
538			if (bp->bif_ifp == ifp && bp->bif_dlist != 0)
539				break;
540		if (bp == NULL)
541			ifp->if_bpf = NULL;
542	}
543	d->bd_bif = NULL;
544	/*
545	 * Check if this descriptor had requested promiscuous mode.
546	 * If so, turn it off.
547	 */
548	if (d->bd_promisc) {
549		d->bd_promisc = 0;
550		lck_mtx_unlock(bpf_mlock);
551		if (ifnet_set_promiscuous(ifp, 0)) {
552			/*
553			 * Something is really wrong if we were able to put
554			 * the driver into promiscuous mode, but can't
555			 * take it out.
556			 * Most likely the network interface is gone.
557			 */
558			printf("bpf: ifnet_set_promiscuous failed");
559		}
560		lck_mtx_lock(bpf_mlock);
561	}
562}
563
564
565/*
566 * Start asynchronous timer, if necessary.
567 * Must be called with bpf_mlock held.
568 */
569static void
570bpf_start_timer(struct bpf_d *d)
571{
572	uint64_t deadline;
573	struct timeval tv;
574
575	if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
576		tv.tv_sec = d->bd_rtout / hz;
577		tv.tv_usec = (d->bd_rtout % hz) * tick;
578
579		clock_interval_to_deadline((uint64_t)tv.tv_sec * USEC_PER_SEC + tv.tv_usec,
580				NSEC_PER_USEC,
581				&deadline);
582		/*
583		 * The state is BPF_IDLE, so the timer hasn't
584		 * been started yet, and hasn't gone off yet;
585		 * there is no thread call scheduled, so this
586		 * won't change the schedule.
587		 *
588		 * XXX - what if, by the time it gets entered,
589		 * the deadline has already passed?
590		 */
591		thread_call_enter_delayed(d->bd_thread_call, deadline);
592		d->bd_state = BPF_WAITING;
593	}
594}
595
596/*
597 * Cancel asynchronous timer.
598 * Must be called with bpf_mlock held.
599 */
600static boolean_t
601bpf_stop_timer(struct bpf_d *d)
602{
603	/*
604	 * If the timer has already gone off, this does nothing.
605	 * Our caller is expected to set d->bd_state to BPF_IDLE,
606	 * with the bpf_mlock, after we are called. bpf_timed_out()
607	 * also grabs bpf_mlock, so, if the timer has gone off and
608	 * bpf_timed_out() hasn't finished, it's waiting for the
609	 * lock; when this thread releases the lock, it will
610	 * find the state is BPF_IDLE, and just release the
611	 * lock and return.
612	 */
613	return (thread_call_cancel(d->bd_thread_call));
614}
615
616
617
618/*
619 * Open ethernet device.  Returns ENXIO for illegal minor device number,
620 * EBUSY if file is open by another process.
621 */
622/* ARGSUSED */
623int
624bpfopen(dev_t dev, int flags, __unused int fmt,
625	__unused struct proc *p)
626{
627	struct bpf_d *d;
628
629	lck_mtx_lock(bpf_mlock);
630	if ((unsigned int) minor(dev) >= nbpfilter) {
631		lck_mtx_unlock(bpf_mlock);
632		return (ENXIO);
633	}
634	/*
635	 * New device nodes are created on demand when opening the last one.
636	 * The programming model is for processes to loop on the minor starting at 0
637	 * as long as EBUSY is returned. The loop stops when either the open succeeds or
638	 * an error other that EBUSY is returned. That means that bpf_make_dev_t() must
639	 * block all processes that are opening the last  node. If not all
640	 * processes are blocked, they could unexpectedly get ENOENT and abort their
641	 * opening loop.
642	 */
643	if ((unsigned int) minor(dev) == (nbpfilter - 1))
644		bpf_make_dev_t(major(dev));
645
646	/*
647	 * Each minor can be opened by only one process.  If the requested
648	 * minor is in use, return EBUSY.
649	 *
650	 * Important: bpfopen() and bpfclose() have to check and set the status of a device
651	 * in the same lockin context otherwise the device may be leaked because the vnode use count
652	 * will be unpextectly greater than 1 when close() is called.
653	 */
654	if (bpf_dtab[minor(dev)] == 0) {
655		bpf_dtab[minor(dev)] = (void *)1;	/* Mark opening */
656	} else {
657		lck_mtx_unlock(bpf_mlock);
658		return (EBUSY);
659	}
660	d = (struct bpf_d *)_MALLOC(sizeof(struct bpf_d), M_DEVBUF, M_WAIT);
661	if (d == NULL) {
662		/* this really is a catastrophic failure */
663		printf("bpfopen: malloc bpf_d failed\n");
664		bpf_dtab[minor(dev)] = NULL;
665		lck_mtx_unlock(bpf_mlock);
666		return ENOMEM;
667	}
668	bzero(d, sizeof(struct bpf_d));
669
670	/*
671	 * It is not necessary to take the BPF lock here because no other
672	 * thread can access the device until it is marked opened...
673	 */
674
675	/* Mark "in use" and do most initialization. */
676	d->bd_bufsize = bpf_bufsize;
677	d->bd_sig = SIGIO;
678	d->bd_seesent = 1;
679	d->bd_oflags = flags;
680	d->bd_state = BPF_IDLE;
681	d->bd_thread_call = thread_call_allocate(bpf_timed_out, d);
682	d->bd_traffic_class = SO_TC_BE;
683
684	if (d->bd_thread_call == NULL) {
685		printf("bpfopen: malloc thread call failed\n");
686		bpf_dtab[minor(dev)] = NULL;
687		lck_mtx_unlock(bpf_mlock);
688		_FREE(d, M_DEVBUF);
689		return ENOMEM;
690	}
691#if CONFIG_MACF_NET
692	mac_bpfdesc_label_init(d);
693	mac_bpfdesc_label_associate(kauth_cred_get(), d);
694#endif
695	bpf_dtab[minor(dev)] = d; 				/* Mark opened */
696	lck_mtx_unlock(bpf_mlock);
697
698	return (0);
699}
700
701/*
702 * Close the descriptor by detaching it from its interface,
703 * deallocating its buffers, and marking it free.
704 */
705/* ARGSUSED */
706int
707bpfclose(dev_t dev, __unused int flags, __unused int fmt,
708	 __unused struct proc *p)
709{
710	struct bpf_d *d;
711
712	/* Take BPF lock to ensure no other thread is using the device */
713	lck_mtx_lock(bpf_mlock);
714
715	d = bpf_dtab[minor(dev)];
716	if (d == 0 || d == (void *)1) {
717		lck_mtx_unlock(bpf_mlock);
718		return (ENXIO);
719	}
720	bpf_dtab[minor(dev)] = (void *)1;		/* Mark closing */
721
722	/*
723	 * Deal with any in-progress timeouts.
724	 */
725	switch (d->bd_state) {
726		case BPF_IDLE:
727			/*
728			 * Not waiting for a timeout, and no timeout happened.
729			 */
730			break;
731
732		case BPF_WAITING:
733			/*
734			 * Waiting for a timeout.
735			 * Cancel any timer that has yet to go off,
736			 * and mark the state as "closing".
737			 * Then drop the lock to allow any timers that
738			 * *have* gone off to run to completion, and wait
739			 * for them to finish.
740			 */
741			if (!bpf_stop_timer(d)) {
742				/*
743				 * There was no pending call, so the call must
744				 * have been in progress. Wait for the call to
745				 * complete; we have to drop the lock while
746				 * waiting. to let the in-progrss call complete
747				 */
748				d->bd_state = BPF_DRAINING;
749				while (d->bd_state == BPF_DRAINING)
750					msleep((caddr_t)d, bpf_mlock, PRINET,
751							"bpfdraining", NULL);
752			}
753			d->bd_state = BPF_IDLE;
754			break;
755
756		case BPF_TIMED_OUT:
757			/*
758			 * Timer went off, and the timeout routine finished.
759			 */
760			d->bd_state = BPF_IDLE;
761			break;
762
763		case BPF_DRAINING:
764			/*
765			 * Another thread is blocked on a close waiting for
766			 * a timeout to finish.
767			 * This "shouldn't happen", as the first thread to enter
768			 * bpfclose() will set bpf_dtab[minor(dev)] to 1, and
769			 * all subsequent threads should see that and fail with
770			 * ENXIO.
771			 */
772			panic("Two threads blocked in a BPF close");
773			break;
774	}
775
776	if (d->bd_bif)
777		bpf_detachd(d);
778	selthreadclear(&d->bd_sel);
779#if CONFIG_MACF_NET
780	mac_bpfdesc_label_destroy(d);
781#endif
782	thread_call_free(d->bd_thread_call);
783	bpf_freed(d);
784
785	/* Mark free in same context as bpfopen comes to check */
786	bpf_dtab[minor(dev)] = NULL;			/* Mark closed */
787	lck_mtx_unlock(bpf_mlock);
788
789	_FREE(d, M_DEVBUF);
790
791	return (0);
792}
793
794
795#define BPF_SLEEP bpf_sleep
796
797static int
798bpf_sleep(struct bpf_d *d, int pri, const char *wmesg, int timo)
799{
800	u_int64_t abstime = 0;
801
802	if(timo)
803		clock_interval_to_deadline(timo, NSEC_PER_SEC / hz, &abstime);
804
805	return msleep1((caddr_t)d, bpf_mlock, pri, wmesg, abstime);
806}
807
808static struct inpcb *
809bpf_findinpcb(struct inpcbinfo *pcbinfo, uint32_t flowhash)
810{
811	struct inpcb *inp = NULL;
812
813	if (!flowhash) return (NULL);
814
815	lck_rw_lock_shared(pcbinfo->mtx);
816	LIST_FOREACH(inp, pcbinfo->listhead, inp_list) {
817		if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
818		    	if (inp->inp_flowhash == flowhash)
819				break;
820			in_pcb_checkstate(inp, WNT_RELEASE, 0);
821		}
822	}
823	lck_rw_done(pcbinfo->mtx);
824
825	return (inp);
826}
827
828/*
829 * Rotate the packet buffers in descriptor d.  Move the store buffer
830 * into the hold slot, and the free buffer into the store slot.
831 * Zero the length of the new store buffer.
832 */
833#define ROTATE_BUFFERS(d) \
834	(d)->bd_hbuf = (d)->bd_sbuf; \
835	(d)->bd_hlen = (d)->bd_slen; \
836	(d)->bd_sbuf = (d)->bd_fbuf; \
837	(d)->bd_slen = 0; \
838	(d)->bd_fbuf = NULL;
839/*
840 *  bpfread - read next chunk of packets from buffers
841 */
842int
843bpfread(dev_t dev, struct uio *uio, int ioflag)
844{
845	struct bpf_d *d;
846	int timed_out;
847	int error;
848
849	lck_mtx_lock(bpf_mlock);
850
851	d = bpf_dtab[minor(dev)];
852	if (d == 0 || d == (void *)1) {
853		lck_mtx_unlock(bpf_mlock);
854		return (ENXIO);
855	}
856
857	/*
858	 * Restrict application to use a buffer the same size as
859	 * as kernel buffers.
860	 */
861	if (uio_resid(uio) != d->bd_bufsize) {
862		lck_mtx_unlock(bpf_mlock);
863		return (EINVAL);
864	}
865
866 	if (d->bd_state == BPF_WAITING)
867		bpf_stop_timer(d);
868
869	timed_out = (d->bd_state == BPF_TIMED_OUT);
870	d->bd_state = BPF_IDLE;
871
872	/*
873	 * If the hold buffer is empty, then do a timed sleep, which
874	 * ends when the timeout expires or when enough packets
875	 * have arrived to fill the store buffer.
876	 */
877	while (d->bd_hbuf == 0) {
878		if ((d->bd_immediate || timed_out || (ioflag & IO_NDELAY))
879			&& d->bd_slen != 0) {
880			/*
881			 * We're in immediate mode, or are reading
882			 * in non-blocking mode, or a timer was
883			 * started before the read (e.g., by select()
884			 * or poll()) and has expired and a packet(s)
885			 * either arrived since the previous
886			 * read or arrived while we were asleep.
887			 * Rotate the buffers and return what's here.
888			 */
889			ROTATE_BUFFERS(d);
890			break;
891		}
892
893		/*
894		 * No data is available, check to see if the bpf device
895		 * is still pointed at a real interface.  If not, return
896		 * ENXIO so that the userland process knows to rebind
897		 * it before using it again.
898		 */
899		if (d->bd_bif == NULL) {
900			lck_mtx_unlock(bpf_mlock);
901			return (ENXIO);
902		}
903		if (ioflag & IO_NDELAY) {
904			lck_mtx_unlock(bpf_mlock);
905			return (EWOULDBLOCK);
906		}
907		error = BPF_SLEEP(d, PRINET|PCATCH, "bpf",
908				  d->bd_rtout);
909		/*
910		 * Make sure device is still opened
911		 */
912		d = bpf_dtab[minor(dev)];
913		if (d == 0 || d == (void *)1) {
914			lck_mtx_unlock(bpf_mlock);
915			return (ENXIO);
916		}
917		if (error == EINTR || error == ERESTART) {
918			lck_mtx_unlock(bpf_mlock);
919			return (error);
920		}
921		if (error == EWOULDBLOCK) {
922			/*
923			 * On a timeout, return what's in the buffer,
924			 * which may be nothing.  If there is something
925			 * in the store buffer, we can rotate the buffers.
926			 */
927			if (d->bd_hbuf)
928				/*
929				 * We filled up the buffer in between
930				 * getting the timeout and arriving
931				 * here, so we don't need to rotate.
932				 */
933				break;
934
935			if (d->bd_slen == 0) {
936				lck_mtx_unlock(bpf_mlock);
937				return (0);
938			}
939			ROTATE_BUFFERS(d);
940			break;
941		}
942	}
943	/*
944	 * At this point, we know we have something in the hold slot.
945	 */
946
947	/*
948	 * Before we move data to userland, we fill out the extended
949	 * header fields.
950	 */
951	if (d->bd_extendedhdr) {
952		char *p;
953
954		p = d->bd_hbuf;
955		while (p < d->bd_hbuf + d->bd_hlen) {
956			struct bpf_hdr_ext *ehp;
957			struct inpcb *inp;
958			uint32_t flowhash;
959			pid_t pid;
960
961			ehp = (struct bpf_hdr_ext *)(void *)p;
962			if ((flowhash = ehp->bh_flowhash)) {
963				if (ehp->bh_flags & BPF_HDR_EXT_FLAGS_TCP)
964					inp = bpf_findinpcb(&tcbinfo, flowhash);
965				else
966					inp = bpf_findinpcb(&udbinfo, flowhash);
967				if (inp) {
968					socket_lock(inp->inp_socket, 0);
969					pid = inp->inp_socket->last_pid;
970					in_pcb_checkstate(inp, WNT_RELEASE, 1);
971					socket_unlock(inp->inp_socket, 0);
972					ehp->bh_pid = pid;
973					proc_name(pid, ehp->bh_comm, MAXCOMLEN);
974				}
975				ehp->bh_flowhash = 0;
976			}
977			p += BPF_WORDALIGN(ehp->bh_hdrlen + ehp->bh_caplen);
978		}
979	}
980	/*
981	 * Move data from hold buffer into user space.
982	 * We know the entire buffer is transferred since
983	 * we checked above that the read buffer is bpf_bufsize bytes.
984	 */
985	error = UIOMOVE(d->bd_hbuf, d->bd_hlen, UIO_READ, uio);
986
987	d->bd_fbuf = d->bd_hbuf;
988	d->bd_hbuf = NULL;
989	d->bd_hlen = 0;
990	lck_mtx_unlock(bpf_mlock);
991	return (error);
992}
993
994
995/*
996 * If there are processes sleeping on this descriptor, wake them up.
997 */
998static void
999bpf_wakeup(struct bpf_d *d)
1000{
1001	if (d->bd_state == BPF_WAITING) {
1002		bpf_stop_timer(d);
1003		d->bd_state = BPF_IDLE;
1004	}
1005	wakeup((caddr_t)d);
1006	if (d->bd_async && d->bd_sig && d->bd_sigio)
1007		pgsigio(d->bd_sigio, d->bd_sig);
1008
1009	selwakeup(&d->bd_sel);
1010	KNOTE(&d->bd_sel.si_note, 1);
1011#ifndef __APPLE__
1012	/* XXX */
1013	d->bd_sel.si_pid = 0;
1014#endif
1015}
1016
1017
1018static void
1019bpf_timed_out(void *arg, __unused void *dummy)
1020{
1021	struct bpf_d *d = (struct bpf_d *)arg;
1022
1023	lck_mtx_lock(bpf_mlock);
1024	if (d->bd_state == BPF_WAITING) {
1025		/*
1026		 * There's a select or kqueue waiting for this; if there's
1027		 * now stuff to read, wake it up.
1028		 */
1029		d->bd_state = BPF_TIMED_OUT;
1030		if (d->bd_slen != 0)
1031			bpf_wakeup(d);
1032	} else if (d->bd_state == BPF_DRAINING) {
1033		/*
1034		 * A close is waiting for this to finish.
1035		 * Mark it as finished, and wake the close up.
1036		 */
1037		d->bd_state = BPF_IDLE;
1038		bpf_wakeup(d);
1039	}
1040	lck_mtx_unlock(bpf_mlock);
1041}
1042
1043
1044
1045
1046
1047/* keep in sync with bpf_movein above: */
1048#define MAX_DATALINK_HDR_LEN	(sizeof(struct firewire_header))
1049
1050int
1051bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag)
1052{
1053	struct bpf_d *d;
1054	struct ifnet *ifp;
1055	struct mbuf *m = NULL;
1056	int error;
1057	char 		  dst_buf[SOCKADDR_HDR_LEN + MAX_DATALINK_HDR_LEN];
1058	int datlen = 0;
1059    int bif_dlt;
1060    int bd_hdrcmplt;
1061
1062	lck_mtx_lock(bpf_mlock);
1063
1064	d = bpf_dtab[minor(dev)];
1065	if (d == 0 || d == (void *)1) {
1066		lck_mtx_unlock(bpf_mlock);
1067		return (ENXIO);
1068	}
1069	if (d->bd_bif == 0) {
1070		lck_mtx_unlock(bpf_mlock);
1071		return (ENXIO);
1072	}
1073
1074	ifp = d->bd_bif->bif_ifp;
1075
1076	if ((ifp->if_flags & IFF_UP) == 0) {
1077		lck_mtx_unlock(bpf_mlock);
1078		return (ENETDOWN);
1079	}
1080	if (uio_resid(uio) == 0) {
1081		lck_mtx_unlock(bpf_mlock);
1082		return (0);
1083	}
1084	((struct sockaddr *)dst_buf)->sa_len = sizeof(dst_buf);
1085
1086	/*
1087	 * fix for PR-6849527
1088	 * geting variables onto stack before dropping lock for bpf_movein()
1089	 */
1090	bif_dlt = (int)d->bd_bif->bif_dlt;
1091	bd_hdrcmplt  = d->bd_hdrcmplt;
1092
1093	/* bpf_movein allocating mbufs; drop lock */
1094	lck_mtx_unlock(bpf_mlock);
1095
1096	error = bpf_movein(uio, bif_dlt, &m,
1097	bd_hdrcmplt ? NULL : (struct sockaddr *)dst_buf,
1098	&datlen);
1099
1100	if (error) {
1101		return (error);
1102	}
1103
1104	/* taking the lock again and verifying whether device is open */
1105	lck_mtx_lock(bpf_mlock);
1106	d = bpf_dtab[minor(dev)];
1107	if (d == 0 || d == (void *)1) {
1108		lck_mtx_unlock(bpf_mlock);
1109		m_freem(m);
1110		return (ENXIO);
1111	}
1112
1113	if (d->bd_bif == NULL) {
1114		lck_mtx_unlock(bpf_mlock);
1115		m_free(m);
1116		return (ENXIO);
1117	}
1118
1119	if ((unsigned)datlen > ifp->if_mtu) {
1120		lck_mtx_unlock(bpf_mlock);
1121		m_freem(m);
1122		return (EMSGSIZE);
1123	}
1124
1125
1126#if CONFIG_MACF_NET
1127	mac_mbuf_label_associate_bpfdesc(d, m);
1128#endif
1129
1130	bpf_set_packet_service_class(m, d->bd_traffic_class);
1131
1132	lck_mtx_unlock(bpf_mlock);
1133
1134	if (d->bd_hdrcmplt) {
1135		if (d->bd_bif->bif_send)
1136			error = d->bd_bif->bif_send(ifp, d->bd_bif->bif_dlt, m);
1137		else
1138			error = dlil_output(ifp, 0, m, NULL, NULL, 1, NULL);
1139	} else {
1140		error = dlil_output(ifp, PF_INET, m, NULL,
1141		    (struct sockaddr *)dst_buf, 0, NULL);
1142	}
1143
1144	/*
1145	 * The driver frees the mbuf.
1146	 */
1147	return (error);
1148}
1149
1150/*
1151 * Reset a descriptor by flushing its packet buffer and clearing the
1152 * receive and drop counts.
1153 */
1154static void
1155reset_d(struct bpf_d *d)
1156{
1157	if (d->bd_hbuf) {
1158		/* Free the hold buffer. */
1159		d->bd_fbuf = d->bd_hbuf;
1160		d->bd_hbuf = NULL;
1161	}
1162	d->bd_slen = 0;
1163	d->bd_hlen = 0;
1164	d->bd_rcount = 0;
1165	d->bd_dcount = 0;
1166}
1167
1168/*
1169 *  FIONREAD		Check for read packet available.
1170 *  SIOCGIFADDR		Get interface address - convenient hook to driver.
1171 *  BIOCGBLEN		Get buffer len [for read()].
1172 *  BIOCSETF		Set ethernet read filter.
1173 *  BIOCFLUSH		Flush read packet buffer.
1174 *  BIOCPROMISC		Put interface into promiscuous mode.
1175 *  BIOCGDLT		Get link layer type.
1176 *  BIOCGETIF		Get interface name.
1177 *  BIOCSETIF		Set interface.
1178 *  BIOCSRTIMEOUT	Set read timeout.
1179 *  BIOCGRTIMEOUT	Get read timeout.
1180 *  BIOCGSTATS		Get packet stats.
1181 *  BIOCIMMEDIATE	Set immediate mode.
1182 *  BIOCVERSION		Get filter language version.
1183 *  BIOCGHDRCMPLT	Get "header already complete" flag
1184 *  BIOCSHDRCMPLT	Set "header already complete" flag
1185 *  BIOCGSEESENT	Get "see packets sent" flag
1186 *  BIOCSSEESENT	Set "see packets sent" flag
1187 *  BIOCSETTC		Set traffic class.
1188 *  BIOCGETTC		Get traffic class.
1189 *  BIOCSEXTHDR		Set "extended header" flag
1190 */
1191/* ARGSUSED */
1192int
1193bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
1194    struct proc *p)
1195{
1196	struct bpf_d *d;
1197	int error = 0, int_arg;
1198	struct ifreq ifr;
1199
1200	lck_mtx_lock(bpf_mlock);
1201
1202	d = bpf_dtab[minor(dev)];
1203	if (d == 0 || d == (void *)1) {
1204		lck_mtx_unlock(bpf_mlock);
1205		return (ENXIO);
1206	}
1207
1208	if (d->bd_state == BPF_WAITING)
1209		bpf_stop_timer(d);
1210	d->bd_state = BPF_IDLE;
1211
1212	switch (cmd) {
1213
1214	default:
1215		error = EINVAL;
1216		break;
1217
1218	/*
1219	 * Check for read packet available.
1220	 */
1221	case FIONREAD:			/* int */
1222		{
1223			int n;
1224
1225			n = d->bd_slen;
1226			if (d->bd_hbuf)
1227				n += d->bd_hlen;
1228
1229			bcopy(&n, addr, sizeof (n));
1230			break;
1231		}
1232
1233	case SIOCGIFADDR:		/* struct ifreq */
1234		{
1235			struct ifnet *ifp;
1236
1237			if (d->bd_bif == 0)
1238				error = EINVAL;
1239			else {
1240				ifp = d->bd_bif->bif_ifp;
1241				error = ifnet_ioctl(ifp, 0, cmd, addr);
1242			}
1243			break;
1244		}
1245
1246	/*
1247	 * Get buffer len [for read()].
1248	 */
1249	case BIOCGBLEN:			/* u_int */
1250		bcopy(&d->bd_bufsize, addr, sizeof (u_int));
1251		break;
1252
1253	/*
1254	 * Set buffer length.
1255	 */
1256	case BIOCSBLEN:			/* u_int */
1257		if (d->bd_bif != 0)
1258			error = EINVAL;
1259		else {
1260			u_int size;
1261
1262			bcopy(addr, &size, sizeof (size));
1263
1264			if (size > bpf_maxbufsize)
1265				size = bpf_maxbufsize;
1266			else if (size < BPF_MINBUFSIZE)
1267				size = BPF_MINBUFSIZE;
1268			bcopy(&size, addr, sizeof (size));
1269			d->bd_bufsize = size;
1270		}
1271		break;
1272
1273	/*
1274	 * Set link layer read filter.
1275	 */
1276	case BIOCSETF32: {		/* struct bpf_program32 */
1277		struct bpf_program32 prg32;
1278
1279		bcopy(addr, &prg32, sizeof (prg32));
1280		error = bpf_setf(d, prg32.bf_len,
1281		    CAST_USER_ADDR_T(prg32.bf_insns));
1282		break;
1283	}
1284
1285	case BIOCSETF64: {		/* struct bpf_program64 */
1286		struct bpf_program64 prg64;
1287
1288		bcopy(addr, &prg64, sizeof (prg64));
1289		error = bpf_setf(d, prg64.bf_len, prg64.bf_insns);
1290		break;
1291	}
1292
1293	/*
1294	 * Flush read packet buffer.
1295	 */
1296	case BIOCFLUSH:
1297		reset_d(d);
1298		break;
1299
1300	/*
1301	 * Put interface into promiscuous mode.
1302	 */
1303	case BIOCPROMISC:
1304		if (d->bd_bif == 0) {
1305			/*
1306			 * No interface attached yet.
1307			 */
1308			error = EINVAL;
1309			break;
1310		}
1311		if (d->bd_promisc == 0) {
1312			lck_mtx_unlock(bpf_mlock);
1313			error = ifnet_set_promiscuous(d->bd_bif->bif_ifp, 1);
1314			lck_mtx_lock(bpf_mlock);
1315			if (error == 0)
1316				d->bd_promisc = 1;
1317		}
1318		break;
1319
1320	/*
1321	 * Get device parameters.
1322	 */
1323	case BIOCGDLT:			/* u_int */
1324		if (d->bd_bif == 0)
1325			error = EINVAL;
1326		else
1327			bcopy(&d->bd_bif->bif_dlt, addr, sizeof (u_int));
1328		break;
1329
1330	/*
1331	 * Get a list of supported data link types.
1332	 */
1333	case BIOCGDLTLIST:		/* struct bpf_dltlist */
1334		if (d->bd_bif == NULL) {
1335			error = EINVAL;
1336		} else {
1337			error = bpf_getdltlist(d, addr, p);
1338		}
1339		break;
1340
1341	/*
1342	 * Set data link type.
1343	 */
1344	case BIOCSDLT:			/* u_int */
1345		if (d->bd_bif == NULL) {
1346			error = EINVAL;
1347		} else {
1348			u_int dlt;
1349
1350			bcopy(addr, &dlt, sizeof (dlt));
1351			error = bpf_setdlt(d, dlt);
1352		}
1353		break;
1354
1355	/*
1356	 * Get interface name.
1357	 */
1358	case BIOCGETIF:			/* struct ifreq */
1359		if (d->bd_bif == 0)
1360			error = EINVAL;
1361		else {
1362			struct ifnet *const ifp = d->bd_bif->bif_ifp;
1363
1364			snprintf(((struct ifreq *)(void *)addr)->ifr_name,
1365			    sizeof (ifr.ifr_name), "%s%d", ifp->if_name,
1366			    ifp->if_unit);
1367		}
1368		break;
1369
1370	/*
1371	 * Set interface.
1372	 */
1373	case BIOCSETIF: {		/* struct ifreq */
1374		ifnet_t	ifp;
1375
1376		bcopy(addr, &ifr, sizeof (ifr));
1377		ifr.ifr_name[IFNAMSIZ - 1] = '\0';
1378		ifp = ifunit(ifr.ifr_name);
1379		if (ifp == NULL)
1380			error = ENXIO;
1381		else
1382			error = bpf_setif(d, ifp, 0);
1383		break;
1384	}
1385
1386	/*
1387	 * Set read timeout.
1388	 */
1389        case BIOCSRTIMEOUT32: {		/* struct user32_timeval */
1390		struct user32_timeval _tv;
1391		struct timeval tv;
1392
1393		bcopy(addr, &_tv, sizeof (_tv));
1394		tv.tv_sec  = _tv.tv_sec;
1395		tv.tv_usec = _tv.tv_usec;
1396
1397		/*
1398		 * Subtract 1 tick from tvtohz() since this isn't
1399		 * a one-shot timer.
1400		 */
1401		if ((error = itimerfix(&tv)) == 0)
1402			d->bd_rtout = tvtohz(&tv) - 1;
1403		break;
1404	}
1405
1406        case BIOCSRTIMEOUT64: {		/* struct user64_timeval */
1407		struct user64_timeval _tv;
1408		struct timeval tv;
1409
1410		bcopy(addr, &_tv, sizeof (_tv));
1411		tv.tv_sec  = _tv.tv_sec;
1412		tv.tv_usec = _tv.tv_usec;
1413
1414		/*
1415		 * Subtract 1 tick from tvtohz() since this isn't
1416		 * a one-shot timer.
1417		 */
1418		if ((error = itimerfix(&tv)) == 0)
1419			d->bd_rtout = tvtohz(&tv) - 1;
1420		break;
1421	}
1422
1423        /*
1424	 * Get read timeout.
1425	 */
1426	case BIOCGRTIMEOUT32: {		/* struct user32_timeval */
1427		struct user32_timeval tv;
1428
1429		bzero(&tv, sizeof (tv));
1430		tv.tv_sec = d->bd_rtout / hz;
1431		tv.tv_usec = (d->bd_rtout % hz) * tick;
1432		bcopy(&tv, addr, sizeof (tv));
1433		break;
1434	}
1435
1436	case BIOCGRTIMEOUT64: {		/* struct user64_timeval */
1437		struct user64_timeval tv;
1438
1439		bzero(&tv, sizeof (tv));
1440		tv.tv_sec = d->bd_rtout / hz;
1441		tv.tv_usec = (d->bd_rtout % hz) * tick;
1442		bcopy(&tv, addr, sizeof (tv));
1443		break;
1444	}
1445
1446	/*
1447	 * Get packet stats.
1448	 */
1449	case BIOCGSTATS: {		/* struct bpf_stat */
1450		struct bpf_stat bs;
1451
1452		bzero(&bs, sizeof (bs));
1453		bs.bs_recv = d->bd_rcount;
1454		bs.bs_drop = d->bd_dcount;
1455		bcopy(&bs, addr, sizeof (bs));
1456		break;
1457	}
1458
1459	/*
1460	 * Set immediate mode.
1461	 */
1462	case BIOCIMMEDIATE:		/* u_int */
1463		bcopy(addr, &d->bd_immediate, sizeof (u_int));
1464		break;
1465
1466	case BIOCVERSION: {		/* struct bpf_version */
1467		struct bpf_version bv;
1468
1469		bzero(&bv, sizeof (bv));
1470		bv.bv_major = BPF_MAJOR_VERSION;
1471		bv.bv_minor = BPF_MINOR_VERSION;
1472		bcopy(&bv, addr, sizeof (bv));
1473		break;
1474	}
1475
1476	/*
1477	 * Get "header already complete" flag
1478	 */
1479	case BIOCGHDRCMPLT:		/* u_int */
1480		bcopy(&d->bd_hdrcmplt, addr, sizeof (u_int));
1481		break;
1482
1483	/*
1484	 * Set "header already complete" flag
1485	 */
1486	case BIOCSHDRCMPLT:		/* u_int */
1487		bcopy(addr, &int_arg, sizeof (int_arg));
1488		d->bd_hdrcmplt = int_arg ? 1 : 0;
1489		break;
1490
1491	/*
1492	 * Get "see sent packets" flag
1493	 */
1494	case BIOCGSEESENT:		/* u_int */
1495		bcopy(&d->bd_seesent, addr, sizeof (u_int));
1496		break;
1497
1498	/*
1499	 * Set "see sent packets" flag
1500	 */
1501	case BIOCSSEESENT:		/* u_int */
1502		bcopy(addr, &d->bd_seesent, sizeof (u_int));
1503		break;
1504
1505	/*
1506	 * Set traffic service class
1507	 */
1508	case BIOCSETTC: {		/* int */
1509		int tc;
1510
1511		bcopy(addr, &tc, sizeof (int));
1512		error = bpf_set_traffic_class(d, tc);
1513		break;
1514	}
1515
1516	/*
1517	 * Get traffic service class
1518	 */
1519	case BIOCGETTC:			/* int */
1520		bcopy(&d->bd_traffic_class, addr, sizeof (int));
1521		break;
1522
1523	case FIONBIO:		/* Non-blocking I/O; int */
1524		break;
1525
1526	case FIOASYNC:		/* Send signal on receive packets; int */
1527		bcopy(addr, &d->bd_async, sizeof (int));
1528		break;
1529#ifndef __APPLE__
1530	case FIOSETOWN:
1531		error = fsetown(*(int *)addr, &d->bd_sigio);
1532		break;
1533
1534	case FIOGETOWN:
1535		*(int *)addr = fgetown(d->bd_sigio);
1536		break;
1537
1538	/* This is deprecated, FIOSETOWN should be used instead. */
1539	case TIOCSPGRP:
1540		error = fsetown(-(*(int *)addr), &d->bd_sigio);
1541		break;
1542
1543	/* This is deprecated, FIOGETOWN should be used instead. */
1544	case TIOCGPGRP:
1545		*(int *)addr = -fgetown(d->bd_sigio);
1546		break;
1547#endif
1548	case BIOCSRSIG: {	/* Set receive signal; u_int */
1549		u_int sig;
1550
1551		bcopy(addr, &sig, sizeof (u_int));
1552
1553		if (sig >= NSIG)
1554			error = EINVAL;
1555		else
1556			d->bd_sig = sig;
1557		break;
1558	}
1559	case BIOCGRSIG:			/* u_int */
1560		bcopy(&d->bd_sig, addr, sizeof (u_int));
1561		break;
1562	case BIOCSEXTHDR:
1563		bcopy(addr, &d->bd_extendedhdr, sizeof (u_int));
1564		break;
1565	}
1566
1567	lck_mtx_unlock(bpf_mlock);
1568
1569	return (error);
1570}
1571
1572/*
1573 * Set d's packet filter program to fp.  If this file already has a filter,
1574 * free it and replace it.  Returns EINVAL for bogus requests.
1575 */
1576static int
1577bpf_setf(struct bpf_d *d, u_int bf_len, user_addr_t bf_insns)
1578{
1579	struct bpf_insn *fcode, *old;
1580	u_int flen, size;
1581
1582	old = d->bd_filter;
1583	if (bf_insns == USER_ADDR_NULL) {
1584		if (bf_len != 0)
1585			return (EINVAL);
1586		d->bd_filter = NULL;
1587		reset_d(d);
1588		if (old != 0)
1589			FREE((caddr_t)old, M_DEVBUF);
1590		return (0);
1591	}
1592	flen = bf_len;
1593	if (flen > BPF_MAXINSNS)
1594		return (EINVAL);
1595
1596	size = flen * sizeof(struct bpf_insn);
1597	fcode = (struct bpf_insn *) _MALLOC(size, M_DEVBUF, M_WAIT);
1598#ifdef __APPLE__
1599	if (fcode == NULL)
1600		return (ENOBUFS);
1601#endif
1602	if (copyin(bf_insns, (caddr_t)fcode, size) == 0 &&
1603	    bpf_validate(fcode, (int)flen)) {
1604		d->bd_filter = fcode;
1605		reset_d(d);
1606		if (old != 0)
1607			FREE((caddr_t)old, M_DEVBUF);
1608
1609		return (0);
1610	}
1611	FREE((caddr_t)fcode, M_DEVBUF);
1612	return (EINVAL);
1613}
1614
1615/*
1616 * Detach a file from its current interface (if attached at all) and attach
1617 * to the interface indicated by the name stored in ifr.
1618 * Return an errno or 0.
1619 */
1620static int
1621bpf_setif(struct bpf_d *d, ifnet_t theywant, u_int32_t dlt)
1622{
1623	struct bpf_if *bp;
1624	int error;
1625
1626	/*
1627	 * Look through attached interfaces for the named one.
1628	 */
1629	for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
1630		struct ifnet *ifp = bp->bif_ifp;
1631
1632		if (ifp == 0 || ifp != theywant || (dlt != 0 && dlt != bp->bif_dlt))
1633			continue;
1634		/*
1635		 * We found the requested interface.
1636		 * Allocate the packet buffers if we need to.
1637		 * If we're already attached to requested interface,
1638		 * just flush the buffer.
1639		 */
1640		if (d->bd_sbuf == 0) {
1641			error = bpf_allocbufs(d);
1642			if (error != 0)
1643				return (error);
1644		}
1645		if (bp != d->bd_bif) {
1646			if (d->bd_bif)
1647				/*
1648				 * Detach if attached to something else.
1649				 */
1650				bpf_detachd(d);
1651
1652			if (bpf_attachd(d, bp) != 0) {
1653				return ENXIO;
1654			}
1655		}
1656		reset_d(d);
1657		return (0);
1658	}
1659	/* Not found. */
1660	return (ENXIO);
1661}
1662
1663
1664
1665/*
1666 * Get a list of available data link type of the interface.
1667 */
1668static int
1669bpf_getdltlist(struct bpf_d *d, caddr_t addr, struct proc *p)
1670{
1671	u_int		n;
1672	int		error;
1673	struct ifnet	*ifp;
1674	struct bpf_if	*bp;
1675	user_addr_t	dlist;
1676	struct bpf_dltlist bfl;
1677
1678	bcopy(addr, &bfl, sizeof (bfl));
1679	if (proc_is64bit(p)) {
1680		dlist = (user_addr_t)bfl.bfl_u.bflu_pad;
1681	} else {
1682		dlist = CAST_USER_ADDR_T(bfl.bfl_u.bflu_list);
1683	}
1684
1685	ifp = d->bd_bif->bif_ifp;
1686	n = 0;
1687	error = 0;
1688	for (bp = bpf_iflist; bp; bp = bp->bif_next) {
1689		if (bp->bif_ifp != ifp)
1690			continue;
1691		if (dlist != USER_ADDR_NULL) {
1692			if (n >= bfl.bfl_len) {
1693				return (ENOMEM);
1694			}
1695			error = copyout(&bp->bif_dlt, dlist,
1696			    sizeof (bp->bif_dlt));
1697			if (error != 0)
1698				break;
1699			dlist += sizeof (bp->bif_dlt);
1700		}
1701		n++;
1702	}
1703	bfl.bfl_len = n;
1704	bcopy(&bfl, addr, sizeof (bfl));
1705
1706	return (error);
1707}
1708
1709/*
1710 * Set the data link type of a BPF instance.
1711 */
1712static int
1713bpf_setdlt(struct bpf_d *d, uint32_t dlt)
1714
1715
1716{
1717	int error, opromisc;
1718	struct ifnet *ifp;
1719	struct bpf_if *bp;
1720
1721	if (d->bd_bif->bif_dlt == dlt)
1722		return (0);
1723	ifp = d->bd_bif->bif_ifp;
1724	for (bp = bpf_iflist; bp; bp = bp->bif_next) {
1725		if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
1726			break;
1727	}
1728	if (bp != NULL) {
1729		opromisc = d->bd_promisc;
1730		bpf_detachd(d);
1731		error = bpf_attachd(d, bp);
1732		if (error) {
1733			printf("bpf_setdlt: bpf_attachd %s%d failed (%d)\n",
1734				ifnet_name(bp->bif_ifp), ifnet_unit(bp->bif_ifp), error);
1735			return error;
1736		}
1737		reset_d(d);
1738		if (opromisc) {
1739			lck_mtx_unlock(bpf_mlock);
1740			error = ifnet_set_promiscuous(bp->bif_ifp, 1);
1741			lck_mtx_lock(bpf_mlock);
1742			if (error)
1743				printf("bpf_setdlt: ifpromisc %s%d failed (%d)\n",
1744					   ifnet_name(bp->bif_ifp), ifnet_unit(bp->bif_ifp), error);
1745			else
1746				d->bd_promisc = 1;
1747		}
1748	}
1749	return (bp == NULL ? EINVAL : 0);
1750}
1751
1752static int
1753bpf_set_traffic_class(struct bpf_d *d, int tc)
1754{
1755	int error = 0;
1756
1757	if (!SO_VALID_TC(tc))
1758		error = EINVAL;
1759	else
1760		d->bd_traffic_class = tc;
1761
1762	return (error);
1763}
1764
1765static void
1766bpf_set_packet_service_class(struct mbuf *m, int tc)
1767{
1768	if (!(m->m_flags & M_PKTHDR))
1769		return;
1770
1771	VERIFY(SO_VALID_TC(tc));
1772	(void) m_set_service_class(m, so_tc2msc(tc));
1773}
1774
1775/*
1776 * Support for select()
1777 *
1778 * Return true iff the specific operation will not block indefinitely.
1779 * Otherwise, return false but make a note that a selwakeup() must be done.
1780 */
1781int
1782bpfselect(dev_t dev, int which, void * wql, struct proc *p)
1783{
1784	struct bpf_d *d;
1785	int ret = 0;
1786
1787	lck_mtx_lock(bpf_mlock);
1788
1789	d = bpf_dtab[minor(dev)];
1790	if (d == 0 || d == (void *)1) {
1791		lck_mtx_unlock(bpf_mlock);
1792		return (ENXIO);
1793	}
1794
1795	if (d->bd_bif == NULL) {
1796		lck_mtx_unlock(bpf_mlock);
1797		return (ENXIO);
1798	}
1799
1800	switch (which) {
1801		case FREAD:
1802			if (d->bd_hlen != 0 ||
1803					((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
1804					 d->bd_slen != 0))
1805				ret = 1; /* read has data to return */
1806			else {
1807				/*
1808				 * Read has no data to return.
1809				 * Make the select wait, and start a timer if
1810				 * necessary.
1811				 */
1812				selrecord(p, &d->bd_sel, wql);
1813				bpf_start_timer(d);
1814			}
1815			break;
1816
1817		case FWRITE:
1818			ret = 1; /* can't determine whether a write would block */
1819			break;
1820	}
1821
1822	lck_mtx_unlock(bpf_mlock);
1823	return (ret);
1824}
1825
1826
1827/*
1828 * Support for kevent() system call.  Register EVFILT_READ filters and
1829 * reject all others.
1830 */
1831int bpfkqfilter(dev_t dev, struct knote *kn);
1832static void filt_bpfdetach(struct knote *);
1833static int filt_bpfread(struct knote *, long);
1834
1835static struct filterops bpfread_filtops = {
1836	.f_isfd = 1,
1837	.f_detach = filt_bpfdetach,
1838	.f_event = filt_bpfread,
1839};
1840
1841int
1842bpfkqfilter(dev_t dev, struct knote *kn)
1843{
1844	struct bpf_d *d;
1845
1846	/*
1847	 * Is this device a bpf?
1848	 */
1849	if (major(dev) != CDEV_MAJOR) {
1850		return (EINVAL);
1851	}
1852
1853	if (kn->kn_filter != EVFILT_READ) {
1854		return (EINVAL);
1855	}
1856
1857	lck_mtx_lock(bpf_mlock);
1858
1859	d = bpf_dtab[minor(dev)];
1860	if (d == 0 || d == (void *)1) {
1861		lck_mtx_unlock(bpf_mlock);
1862		return (ENXIO);
1863	}
1864
1865	if (d->bd_bif == NULL) {
1866		lck_mtx_unlock(bpf_mlock);
1867		return (ENXIO);
1868	}
1869
1870	kn->kn_hook = d;
1871	kn->kn_fop = &bpfread_filtops;
1872	KNOTE_ATTACH(&d->bd_sel.si_note, kn);
1873	lck_mtx_unlock(bpf_mlock);
1874	return 0;
1875}
1876
1877static void
1878filt_bpfdetach(struct knote *kn)
1879{
1880	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
1881
1882	lck_mtx_lock(bpf_mlock);
1883	KNOTE_DETACH(&d->bd_sel.si_note, kn);
1884	lck_mtx_unlock(bpf_mlock);
1885}
1886
1887static int
1888filt_bpfread(struct knote *kn, long hint)
1889{
1890	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
1891	int ready = 0;
1892
1893	if (hint == 0)
1894		lck_mtx_lock(bpf_mlock);
1895
1896	if (d->bd_immediate) {
1897		/*
1898		 * If there's data in the hold buffer, it's the
1899		 * amount of data a read will return.
1900		 *
1901		 * If there's no data in the hold buffer, but
1902		 * there's data in the store buffer, a read will
1903		 * immediately rotate the store buffer to the
1904		 * hold buffer, the amount of data in the store
1905		 * buffer is the amount of data a read will
1906		 * return.
1907		 *
1908		 * If there's no data in either buffer, we're not
1909		 * ready to read.
1910		 */
1911		kn->kn_data = (d->bd_hlen == 0 ? d->bd_slen : d->bd_hlen);
1912		int64_t lowwat = 1;
1913		if (kn->kn_sfflags & NOTE_LOWAT)
1914		{
1915			if (kn->kn_sdata > d->bd_bufsize)
1916				lowwat = d->bd_bufsize;
1917			else if (kn->kn_sdata > lowwat)
1918				lowwat = kn->kn_sdata;
1919		}
1920		ready = (kn->kn_data >= lowwat);
1921	} else {
1922		/*
1923		 * If there's data in the hold buffer, it's the
1924		 * amount of data a read will return.
1925		 *
1926		 * If there's no data in the hold buffer, but
1927		 * there's data in the store buffer, if the
1928		 * timer has expired a read will immediately
1929		 * rotate the store buffer to the hold buffer,
1930		 * so the amount of data in the store buffer is
1931		 * the amount of data a read will return.
1932		 *
1933		 * If there's no data in either buffer, or there's
1934		 * no data in the hold buffer and the timer hasn't
1935		 * expired, we're not ready to read.
1936		 */
1937		kn->kn_data = (d->bd_hlen == 0 && d->bd_state == BPF_TIMED_OUT ?
1938				d->bd_slen : d->bd_hlen);
1939		ready = (kn->kn_data > 0);
1940	}
1941	if (!ready)
1942		bpf_start_timer(d);
1943
1944	if (hint == 0)
1945		lck_mtx_unlock(bpf_mlock);
1946	return (ready);
1947}
1948
1949/*
1950 * Copy data from an mbuf chain into a buffer.  This code is derived
1951 * from m_copydata in sys/uipc_mbuf.c.
1952 */
1953static void
1954bpf_mcopy(const void *src_arg, void *dst_arg, size_t len)
1955{
1956	struct mbuf *m = (struct mbuf *)(uintptr_t)(src_arg);
1957	u_int count;
1958	u_char *dst;
1959
1960	dst = dst_arg;
1961	while (len > 0) {
1962		if (m == 0)
1963			panic("bpf_mcopy");
1964		count = min(m->m_len, len);
1965		bcopy(mbuf_data(m), dst, count);
1966		m = m->m_next;
1967		dst += count;
1968		len -= count;
1969	}
1970}
1971
1972static inline void
1973bpf_tap_imp(
1974	ifnet_t		ifp,
1975	u_int32_t	dlt,
1976	mbuf_t		m,
1977	void*		hdr,
1978	size_t		hlen,
1979	int		outbound)
1980{
1981	struct bpf_if *bp;
1982	struct mbuf *savedm = m;
1983
1984	/*
1985	 * It's possible that we get here after the bpf descriptor has been
1986	 * detached from the interface; in such a case we simply return.
1987	 * Lock ordering is important since we can be called asynchronously
1988	 * (from the IOKit) to process an inbound packet; when that happens
1989	 * we would have been holding its "gateLock" and will be acquiring
1990	 * "bpf_mlock" upon entering this routine.  Due to that, we release
1991	 * "bpf_mlock" prior to calling ifnet_set_promiscuous (which will
1992	 * acquire "gateLock" in the IOKit), in order to avoid a deadlock
1993	 * when a ifnet_set_promiscuous request simultaneously collides with
1994	 * an inbound packet being passed into the tap callback.
1995	 */
1996	lck_mtx_lock(bpf_mlock);
1997	if (ifp->if_bpf == NULL) {
1998		lck_mtx_unlock(bpf_mlock);
1999		return;
2000	}
2001	bp = ifp->if_bpf;
2002	for (bp = ifp->if_bpf; bp && bp->bif_ifp == ifp &&
2003		 (dlt != 0 && bp->bif_dlt != dlt); bp = bp->bif_next)
2004		;
2005	if (bp && bp->bif_ifp == ifp && bp->bif_dlist != NULL) {
2006		struct bpf_d	*d;
2007		struct m_hdr	hack_hdr;
2008		u_int	pktlen = 0;
2009		u_int	slen = 0;
2010		struct mbuf *m0;
2011
2012		if (hdr) {
2013			/*
2014			 * This is gross. We mock up an mbuf that points to the
2015			 * header buffer. This means we don't have to copy the
2016			 * header. A number of interfaces prepended headers just
2017			 * for bpf by allocating an mbuf on the stack. We want to
2018			 * give developers an easy way to prepend a header for bpf.
2019			 * Since a developer allocating an mbuf on the stack is bad,
2020			 * we do even worse here, allocating only a header to point
2021			 * to a buffer the developer supplied. This makes assumptions
2022			 * that bpf_filter and catchpacket will not look at anything
2023			 * in the mbuf other than the header. This was true at the
2024			 * time this code was written.
2025			 */
2026			hack_hdr.mh_next = m;
2027			hack_hdr.mh_nextpkt = NULL;
2028			hack_hdr.mh_len = hlen;
2029			hack_hdr.mh_data = hdr;
2030			hack_hdr.mh_type = m->m_type;
2031			hack_hdr.mh_flags = 0;
2032
2033			m = (mbuf_t)&hack_hdr;
2034		}
2035
2036		for (m0 = m; m0 != 0; m0 = m0->m_next)
2037			pktlen += m0->m_len;
2038
2039		for (d = bp->bif_dlist; d; d = d->bd_next) {
2040			if (outbound && !d->bd_seesent)
2041				continue;
2042			++d->bd_rcount;
2043			slen = bpf_filter(d->bd_filter, (u_char *)m, pktlen, 0);
2044			if (slen != 0) {
2045#if CONFIG_MACF_NET
2046				if (mac_bpfdesc_check_receive(d, bp->bif_ifp) != 0)
2047					continue;
2048#endif
2049				catchpacket(d, (u_char *)m, savedm, pktlen,
2050				    slen, outbound, bpf_mcopy);
2051			}
2052		}
2053	}
2054	lck_mtx_unlock(bpf_mlock);
2055}
2056
2057void
2058bpf_tap_out(
2059	ifnet_t		ifp,
2060	u_int32_t	dlt,
2061	mbuf_t		m,
2062	void*		hdr,
2063	size_t		hlen)
2064{
2065	bpf_tap_imp(ifp, dlt, m, hdr, hlen, 1);
2066}
2067
2068void
2069bpf_tap_in(
2070	ifnet_t		ifp,
2071	u_int32_t	dlt,
2072	mbuf_t		m,
2073	void*		hdr,
2074	size_t		hlen)
2075{
2076	bpf_tap_imp(ifp, dlt, m, hdr, hlen, 0);
2077}
2078
2079/* Callback registered with Ethernet driver. */
2080static int bpf_tap_callback(struct ifnet *ifp, struct mbuf *m)
2081{
2082	bpf_tap_imp(ifp, 0, m, NULL, 0, mbuf_pkthdr_rcvif(m) == NULL);
2083
2084	return 0;
2085}
2086
2087/*
2088 * Move the packet data from interface memory (pkt) into the
2089 * store buffer.  Return 1 if it's time to wakeup a listener (buffer full),
2090 * otherwise 0.  "copy" is the routine called to do the actual data
2091 * transfer.  bcopy is passed in to copy contiguous chunks, while
2092 * bpf_mcopy is passed in to copy mbuf chains.  In the latter case,
2093 * pkt is really an mbuf.
2094 */
2095static void
2096catchpacket(struct bpf_d *d, u_char *pkt, struct mbuf *m, u_int pktlen,
2097	u_int snaplen, int outbound,
2098	void (*cpfn)(const void *, void *, size_t))
2099{
2100	struct bpf_hdr *hp;
2101	struct bpf_hdr_ext *ehp;
2102	int totlen, curlen;
2103	int hdrlen, caplen;
2104	int do_wakeup = 0;
2105	u_char *payload;
2106
2107	hdrlen = d->bd_extendedhdr ? d->bd_bif->bif_exthdrlen :
2108	    d->bd_bif->bif_hdrlen;
2109	/*
2110	 * Figure out how many bytes to move.  If the packet is
2111	 * greater or equal to the snapshot length, transfer that
2112	 * much.  Otherwise, transfer the whole packet (unless
2113	 * we hit the buffer size limit).
2114	 */
2115	totlen = hdrlen + min(snaplen, pktlen);
2116	if (totlen > d->bd_bufsize)
2117		totlen = d->bd_bufsize;
2118
2119	/*
2120	 * Round up the end of the previous packet to the next longword.
2121	 */
2122	curlen = BPF_WORDALIGN(d->bd_slen);
2123	if (curlen + totlen > d->bd_bufsize) {
2124		/*
2125		 * This packet will overflow the storage buffer.
2126		 * Rotate the buffers if we can, then wakeup any
2127		 * pending reads.
2128		 */
2129		if (d->bd_fbuf == NULL) {
2130			/*
2131			 * We haven't completed the previous read yet,
2132			 * so drop the packet.
2133			 */
2134			++d->bd_dcount;
2135			return;
2136		}
2137		ROTATE_BUFFERS(d);
2138		do_wakeup = 1;
2139		curlen = 0;
2140	}
2141	else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
2142		/*
2143		 * Immediate mode is set, or the read timeout has
2144		 * already expired during a select call. A packet
2145		 * arrived, so the reader should be woken up.
2146		 */
2147		do_wakeup = 1;
2148
2149	/*
2150	 * Append the bpf header.
2151	 */
2152	struct timeval tv;
2153	microtime(&tv);
2154 	if (d->bd_extendedhdr) {
2155 		ehp = (struct bpf_hdr_ext *)(void *)(d->bd_sbuf + curlen);
2156 		memset(ehp, 0, sizeof(*ehp));
2157 		ehp->bh_tstamp.tv_sec = tv.tv_sec;
2158 		ehp->bh_tstamp.tv_usec = tv.tv_usec;
2159 		ehp->bh_datalen = pktlen;
2160 		ehp->bh_hdrlen = hdrlen;
2161 		ehp->bh_caplen = totlen - hdrlen;
2162 		if (outbound) {
2163			if (m->m_pkthdr.m_fhflags & PF_TAG_FLOWHASH)
2164				ehp->bh_flowhash = m->m_pkthdr.m_flowhash;
2165			ehp->bh_svc = so_svc2tc(m->m_pkthdr.svc);
2166			ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_OUT;
2167			if (m->m_pkthdr.m_fhflags & PF_TAG_TCP)
2168				ehp->bh_flags |= BPF_HDR_EXT_FLAGS_TCP;
2169 		} else
2170			ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_IN;
2171 		payload = (u_char *)ehp + hdrlen;
2172 		caplen = ehp->bh_caplen;
2173 	} else {
2174 		hp = (struct bpf_hdr *)(void *)(d->bd_sbuf + curlen);
2175 		hp->bh_tstamp.tv_sec = tv.tv_sec;
2176 		hp->bh_tstamp.tv_usec = tv.tv_usec;
2177 		hp->bh_datalen = pktlen;
2178 		hp->bh_hdrlen = hdrlen;
2179 		hp->bh_caplen = totlen - hdrlen;
2180 		payload = (u_char *)hp + hdrlen;
2181 		caplen = hp->bh_caplen;
2182 	}
2183	/*
2184	 * Copy the packet data into the store buffer and update its length.
2185	 */
2186	(*cpfn)(pkt, payload, caplen);
2187	d->bd_slen = curlen + totlen;
2188
2189	if (do_wakeup)
2190		bpf_wakeup(d);
2191}
2192
2193/*
2194 * Initialize all nonzero fields of a descriptor.
2195 */
2196static int
2197bpf_allocbufs(struct bpf_d *d)
2198{
2199	d->bd_fbuf = (caddr_t) _MALLOC(d->bd_bufsize, M_DEVBUF, M_WAIT);
2200	if (d->bd_fbuf == 0)
2201		return (ENOBUFS);
2202
2203	d->bd_sbuf = (caddr_t) _MALLOC(d->bd_bufsize, M_DEVBUF, M_WAIT);
2204	if (d->bd_sbuf == 0) {
2205		FREE(d->bd_fbuf, M_DEVBUF);
2206		return (ENOBUFS);
2207	}
2208	d->bd_slen = 0;
2209	d->bd_hlen = 0;
2210	return (0);
2211}
2212
2213/*
2214 * Free buffers currently in use by a descriptor.
2215 * Called on close.
2216 */
2217static void
2218bpf_freed(struct bpf_d *d)
2219{
2220	/*
2221	 * We don't need to lock out interrupts since this descriptor has
2222	 * been detached from its interface and it yet hasn't been marked
2223	 * free.
2224	 */
2225	if (d->bd_sbuf != 0) {
2226		FREE(d->bd_sbuf, M_DEVBUF);
2227		if (d->bd_hbuf != 0)
2228			FREE(d->bd_hbuf, M_DEVBUF);
2229		if (d->bd_fbuf != 0)
2230			FREE(d->bd_fbuf, M_DEVBUF);
2231	}
2232	if (d->bd_filter)
2233		FREE((caddr_t)d->bd_filter, M_DEVBUF);
2234}
2235
2236/*
2237 * Attach an interface to bpf.  driverp is a pointer to a (struct bpf_if *)
2238 * in the driver's softc; dlt is the link layer type; hdrlen is the fixed
2239 * size of the link header (variable length headers not yet supported).
2240 */
2241void
2242bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
2243{
2244	bpf_attach(ifp, dlt, hdrlen, NULL, NULL);
2245}
2246
2247errno_t
2248bpf_attach(
2249	ifnet_t			ifp,
2250	u_int32_t		dlt,
2251	u_int32_t		hdrlen,
2252	bpf_send_func	send,
2253	bpf_tap_func	tap)
2254{
2255	struct bpf_if *bp_new;
2256	struct bpf_if *bp_temp;
2257	struct bpf_if *bp_first = NULL;
2258
2259	bp_new = (struct bpf_if *) _MALLOC(sizeof(*bp_new), M_DEVBUF, M_WAIT);
2260	if (bp_new == 0)
2261		panic("bpfattach");
2262
2263	lck_mtx_lock(bpf_mlock);
2264
2265	/*
2266	 * Check if this interface/dlt is already attached, record first
2267	 * attachment for this interface.
2268	 */
2269	for (bp_temp = bpf_iflist; bp_temp && (bp_temp->bif_ifp != ifp ||
2270		 bp_temp->bif_dlt != dlt); bp_temp = bp_temp->bif_next) {
2271		 if (bp_temp->bif_ifp == ifp && bp_first == NULL)
2272		 	bp_first = bp_temp;
2273	}
2274
2275	if (bp_temp != NULL) {
2276		printf("bpfattach - %s%d with dlt %d is already attached\n",
2277			ifp->if_name, ifp->if_unit, dlt);
2278		FREE(bp_new, M_DEVBUF);
2279		lck_mtx_unlock(bpf_mlock);
2280		return EEXIST;
2281	}
2282
2283	bzero(bp_new, sizeof(*bp_new));
2284	bp_new->bif_ifp = ifp;
2285	bp_new->bif_dlt = dlt;
2286	bp_new->bif_send = send;
2287	bp_new->bif_tap = tap;
2288
2289	if (bp_first == NULL) {
2290		/* No other entries for this ifp */
2291		bp_new->bif_next = bpf_iflist;
2292		bpf_iflist = bp_new;
2293	}
2294	else {
2295		/* Add this after the first entry for this interface */
2296		bp_new->bif_next = bp_first->bif_next;
2297		bp_first->bif_next = bp_new;
2298	}
2299
2300	/*
2301	 * Compute the length of the bpf header.  This is not necessarily
2302	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
2303	 * that the network layer header begins on a longword boundary (for
2304	 * performance reasons and to alleviate alignment restrictions).
2305	 */
2306	bp_new->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
2307	bp_new->bif_exthdrlen = BPF_WORDALIGN(hdrlen +
2308	    sizeof(struct bpf_hdr_ext)) - hdrlen;
2309
2310	/* Take a reference on the interface */
2311	ifnet_reference(ifp);
2312
2313	lck_mtx_unlock(bpf_mlock);
2314
2315#ifndef __APPLE__
2316	if (bootverbose)
2317		printf("bpf: %s%d attached\n", ifp->if_name, ifp->if_unit);
2318#endif
2319
2320	return 0;
2321}
2322
2323/*
2324 * Detach bpf from an interface.  This involves detaching each descriptor
2325 * associated with the interface, and leaving bd_bif NULL.  Notify each
2326 * descriptor as it's detached so that any sleepers wake up and get
2327 * ENXIO.
2328 */
2329void
2330bpfdetach(struct ifnet *ifp)
2331{
2332	struct bpf_if	*bp, *bp_prev, *bp_next;
2333	struct bpf_if	*bp_free = NULL;
2334	struct bpf_d	*d;
2335
2336
2337	lck_mtx_lock(bpf_mlock);
2338
2339	/* Locate BPF interface information */
2340	bp_prev = NULL;
2341	for (bp = bpf_iflist; bp != NULL; bp = bp_next) {
2342		bp_next = bp->bif_next;
2343		if (ifp != bp->bif_ifp) {
2344			bp_prev = bp;
2345			continue;
2346		}
2347
2348		while ((d = bp->bif_dlist) != NULL) {
2349			bpf_detachd(d);
2350			bpf_wakeup(d);
2351		}
2352
2353		if (bp_prev) {
2354			bp_prev->bif_next = bp->bif_next;
2355		} else {
2356			bpf_iflist = bp->bif_next;
2357		}
2358
2359		bp->bif_next = bp_free;
2360		bp_free = bp;
2361
2362		ifnet_release(ifp);
2363	}
2364
2365	lck_mtx_unlock(bpf_mlock);
2366
2367	FREE(bp, M_DEVBUF);
2368
2369}
2370
2371void
2372bpf_init(__unused void *unused)
2373{
2374#ifdef __APPLE__
2375	int 	i;
2376	int	maj;
2377
2378	if (bpf_devsw_installed == 0) {
2379		bpf_devsw_installed = 1;
2380
2381        bpf_mlock_grp_attr = lck_grp_attr_alloc_init();
2382
2383        bpf_mlock_grp = lck_grp_alloc_init("bpf", bpf_mlock_grp_attr);
2384
2385        bpf_mlock_attr = lck_attr_alloc_init();
2386
2387        lck_mtx_init(bpf_mlock, bpf_mlock_grp, bpf_mlock_attr);
2388
2389		maj = cdevsw_add(CDEV_MAJOR, &bpf_cdevsw);
2390		if (maj == -1) {
2391			if (bpf_mlock_attr)
2392				lck_attr_free(bpf_mlock_attr);
2393			if (bpf_mlock_grp)
2394				lck_grp_free(bpf_mlock_grp);
2395			if (bpf_mlock_grp_attr)
2396				lck_grp_attr_free(bpf_mlock_grp_attr);
2397
2398			bpf_mlock = NULL;
2399			bpf_mlock_attr = NULL;
2400			bpf_mlock_grp = NULL;
2401			bpf_mlock_grp_attr = NULL;
2402			bpf_devsw_installed = 0;
2403			printf("bpf_init: failed to allocate a major number!\n");
2404			return;
2405		}
2406
2407		for (i = 0 ; i < NBPFILTER; i++)
2408			bpf_make_dev_t(maj);
2409	}
2410#else
2411	cdevsw_add(&bpf_cdevsw);
2412#endif
2413}
2414
2415#ifndef __APPLE__
2416SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,bpf_drvinit,NULL)
2417#endif
2418
2419#if CONFIG_MACF_NET
2420struct label *
2421mac_bpfdesc_label_get(struct bpf_d *d)
2422{
2423
2424	return (d->bd_label);
2425}
2426
2427void
2428mac_bpfdesc_label_set(struct bpf_d *d, struct label *label)
2429{
2430
2431	d->bd_label = label;
2432}
2433#endif
2434