1/*
2 * Copyright (c) 1990, 1991, 1992, 1993, 1994, 1995, 1996
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that: (1) source code distributions
7 * retain the above copyright notice and this paragraph in its entirety, (2)
8 * distributions including binary code include the above copyright notice and
9 * this paragraph in its entirety in the documentation or other materials
10 * provided with the distribution, and (3) all advertising materials mentioning
11 * features or use of this software display the following acknowledgement:
12 * ``This product includes software developed by the University of California,
13 * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
14 * the University nor the names of its contributors may be used to endorse
15 * or promote products derived from this software without specific prior
16 * written permission.
17 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
18 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
20 *
21 * packet filter subroutines for tcpdump
22 *	Extraction/creation by Jeffrey Mogul, DECWRL
23 */
24
25#ifndef lint
26static const char rcsid[] _U_ =
27    "@(#) $Header: /tcpdump/master/libpcap/pcap-pf.c,v 1.97 2008-04-14 20:40:58 guy Exp $ (LBL)";
28#endif
29
30#ifdef HAVE_CONFIG_H
31#include "config.h"
32#endif
33
34#include <sys/types.h>
35#include <sys/time.h>
36#include <sys/timeb.h>
37#include <sys/socket.h>
38#include <sys/file.h>
39#include <sys/ioctl.h>
40#include <net/pfilt.h>
41
42struct mbuf;
43struct rtentry;
44#include <net/if.h>
45
46#include <netinet/in.h>
47#include <netinet/in_systm.h>
48#include <netinet/ip.h>
49#include <netinet/if_ether.h>
50#include <netinet/ip_var.h>
51#include <netinet/udp.h>
52#include <netinet/udp_var.h>
53#include <netinet/tcp.h>
54#include <netinet/tcpip.h>
55
56#include <ctype.h>
57#include <errno.h>
58#include <netdb.h>
59#include <stdio.h>
60#include <stdlib.h>
61#include <string.h>
62#include <unistd.h>
63
64/*
65 * Make "pcap.h" not include "pcap/bpf.h"; we are going to include the
66 * native OS version, as we need various BPF ioctls from it.
67 */
68#define PCAP_DONT_INCLUDE_PCAP_BPF_H
69#include <net/bpf.h>
70
71#include "pcap-int.h"
72
73#ifdef HAVE_OS_PROTO_H
74#include "os-proto.h"
75#endif
76
77static int pcap_setfilter_pf(pcap_t *, struct bpf_program *);
78
79/*
80 * BUFSPACE is the size in bytes of the packet read buffer.  Most tcpdump
81 * applications aren't going to need more than 200 bytes of packet header
82 * and the read shouldn't return more packets than packetfilter's internal
83 * queue limit (bounded at 256).
84 */
85#define BUFSPACE (200 * 256)
86
87static int
88pcap_read_pf(pcap_t *pc, int cnt, pcap_handler callback, u_char *user)
89{
90	register u_char *p, *bp;
91	register int cc, n, buflen, inc;
92	register struct enstamp *sp;
93#ifdef LBL_ALIGN
94	struct enstamp stamp;
95#endif
96#ifdef PCAP_FDDIPAD
97	register int pad;
98#endif
99
100 again:
101	cc = pc->cc;
102	if (cc == 0) {
103		cc = read(pc->fd, (char *)pc->buffer + pc->offset, pc->bufsize);
104		if (cc < 0) {
105			if (errno == EWOULDBLOCK)
106				return (0);
107			if (errno == EINVAL &&
108			    lseek(pc->fd, 0L, SEEK_CUR) + pc->bufsize < 0) {
109				/*
110				 * Due to a kernel bug, after 2^31 bytes,
111				 * the kernel file offset overflows and
112				 * read fails with EINVAL. The lseek()
113				 * to 0 will fix things.
114				 */
115				(void)lseek(pc->fd, 0L, SEEK_SET);
116				goto again;
117			}
118			snprintf(pc->errbuf, sizeof(pc->errbuf), "pf read: %s",
119				pcap_strerror(errno));
120			return (-1);
121		}
122		bp = pc->buffer + pc->offset;
123	} else
124		bp = pc->bp;
125	/*
126	 * Loop through each packet.
127	 */
128	n = 0;
129#ifdef PCAP_FDDIPAD
130	pad = pc->fddipad;
131#endif
132	while (cc > 0) {
133		/*
134		 * Has "pcap_breakloop()" been called?
135		 * If so, return immediately - if we haven't read any
136		 * packets, clear the flag and return -2 to indicate
137		 * that we were told to break out of the loop, otherwise
138		 * leave the flag set, so that the *next* call will break
139		 * out of the loop without having read any packets, and
140		 * return the number of packets we've processed so far.
141		 */
142		if (pc->break_loop) {
143			if (n == 0) {
144				pc->break_loop = 0;
145				return (-2);
146			} else {
147				pc->cc = cc;
148				pc->bp = bp;
149				return (n);
150			}
151		}
152		if (cc < sizeof(*sp)) {
153			snprintf(pc->errbuf, sizeof(pc->errbuf),
154			    "pf short read (%d)", cc);
155			return (-1);
156		}
157#ifdef LBL_ALIGN
158		if ((long)bp & 3) {
159			sp = &stamp;
160			memcpy((char *)sp, (char *)bp, sizeof(*sp));
161		} else
162#endif
163			sp = (struct enstamp *)bp;
164		if (sp->ens_stamplen != sizeof(*sp)) {
165			snprintf(pc->errbuf, sizeof(pc->errbuf),
166			    "pf short stamplen (%d)",
167			    sp->ens_stamplen);
168			return (-1);
169		}
170
171		p = bp + sp->ens_stamplen;
172		buflen = sp->ens_count;
173		if (buflen > pc->snapshot)
174			buflen = pc->snapshot;
175
176		/* Calculate inc before possible pad update */
177		inc = ENALIGN(buflen + sp->ens_stamplen);
178		cc -= inc;
179		bp += inc;
180		pc->md.TotPkts++;
181		pc->md.TotDrops += sp->ens_dropped;
182		pc->md.TotMissed = sp->ens_ifoverflows;
183		if (pc->md.OrigMissed < 0)
184			pc->md.OrigMissed = pc->md.TotMissed;
185
186		/*
187		 * Short-circuit evaluation: if using BPF filter
188		 * in kernel, no need to do it now - we already know
189		 * the packet passed the filter.
190		 *
191#ifdef PCAP_FDDIPAD
192		 * Note: the filter code was generated assuming
193		 * that pc->fddipad was the amount of padding
194		 * before the header, as that's what's required
195		 * in the kernel, so we run the filter before
196		 * skipping that padding.
197#endif
198		 */
199		if (pc->md.use_bpf ||
200		    bpf_filter(pc->fcode.bf_insns, p, sp->ens_count, buflen)) {
201			struct pcap_pkthdr h;
202			pc->md.TotAccepted++;
203			h.ts = sp->ens_tstamp;
204#ifdef PCAP_FDDIPAD
205			h.len = sp->ens_count - pad;
206#else
207			h.len = sp->ens_count;
208#endif
209#ifdef PCAP_FDDIPAD
210			p += pad;
211			buflen -= pad;
212#endif
213			h.caplen = buflen;
214			(*callback)(user, &h, p);
215			if (++n >= cnt && cnt > 0) {
216				pc->cc = cc;
217				pc->bp = bp;
218				return (n);
219			}
220		}
221	}
222	pc->cc = 0;
223	return (n);
224}
225
226static int
227pcap_inject_pf(pcap_t *p, const void *buf, size_t size)
228{
229	int ret;
230
231	ret = write(p->fd, buf, size);
232	if (ret == -1) {
233		snprintf(p->errbuf, PCAP_ERRBUF_SIZE, "send: %s",
234		    pcap_strerror(errno));
235		return (-1);
236	}
237	return (ret);
238}
239
240static int
241pcap_stats_pf(pcap_t *p, struct pcap_stat *ps)
242{
243
244	/*
245	 * If packet filtering is being done in the kernel:
246	 *
247	 *	"ps_recv" counts only packets that passed the filter.
248	 *	This does not include packets dropped because we
249	 *	ran out of buffer space.  (XXX - perhaps it should,
250	 *	by adding "ps_drop" to "ps_recv", for compatibility
251	 *	with some other platforms.  On the other hand, on
252	 *	some platforms "ps_recv" counts only packets that
253	 *	passed the filter, and on others it counts packets
254	 *	that didn't pass the filter....)
255	 *
256	 *	"ps_drop" counts packets that passed the kernel filter
257	 *	(if any) but were dropped because the input queue was
258	 *	full.
259	 *
260	 *	"ps_ifdrop" counts packets dropped by the network
261	 *	inteface (regardless of whether they would have passed
262	 *	the input filter, of course).
263	 *
264	 * If packet filtering is not being done in the kernel:
265	 *
266	 *	"ps_recv" counts only packets that passed the filter.
267	 *
268	 *	"ps_drop" counts packets that were dropped because the
269	 *	input queue was full, regardless of whether they passed
270	 *	the userland filter.
271	 *
272	 *	"ps_ifdrop" counts packets dropped by the network
273	 *	inteface (regardless of whether they would have passed
274	 *	the input filter, of course).
275	 *
276	 * These statistics don't include packets not yet read from
277	 * the kernel by libpcap, but they may include packets not
278	 * yet read from libpcap by the application.
279	 */
280	ps->ps_recv = p->md.TotAccepted;
281	ps->ps_drop = p->md.TotDrops;
282	ps->ps_ifdrop = p->md.TotMissed - p->md.OrigMissed;
283	return (0);
284}
285
286/*
287 * We include the OS's <net/bpf.h>, not our "pcap/bpf.h", so we probably
288 * don't get DLT_DOCSIS defined.
289 */
290#ifndef DLT_DOCSIS
291#define DLT_DOCSIS	143
292#endif
293
294static int
295pcap_activate_pf(pcap_t *p)
296{
297	short enmode;
298	int backlog = -1;	/* request the most */
299	struct enfilter Filter;
300	struct endevp devparams;
301
302	/*
303	 * Initially try a read/write open (to allow the inject
304	 * method to work).  If that fails due to permission
305	 * issues, fall back to read-only.  This allows a
306	 * non-root user to be granted specific access to pcap
307	 * capabilities via file permissions.
308	 *
309	 * XXX - we should have an API that has a flag that
310	 * controls whether to open read-only or read-write,
311	 * so that denial of permission to send (or inability
312	 * to send, if sending packets isn't supported on
313	 * the device in question) can be indicated at open
314	 * time.
315	 *
316	 * XXX - we assume here that "pfopen()" does not, in fact, modify
317	 * its argument, even though it takes a "char *" rather than a
318	 * "const char *" as its first argument.  That appears to be
319	 * the case, at least on Digital UNIX 4.0.
320	 */
321	p->fd = pfopen(p->opt.source, O_RDWR);
322	if (p->fd == -1 && errno == EACCES)
323		p->fd = pfopen(p->opt.source, O_RDONLY);
324	if (p->fd < 0) {
325		snprintf(p->errbuf, PCAP_ERRBUF_SIZE, "pf open: %s: %s\n\
326your system may not be properly configured; see the packetfilter(4) man page\n",
327			p->opt.source, pcap_strerror(errno));
328		goto bad;
329	}
330	p->md.OrigMissed = -1;
331	enmode = ENTSTAMP|ENBATCH|ENNONEXCL;
332	if (p->opt.promisc)
333		enmode |= ENPROMISC;
334	if (ioctl(p->fd, EIOCMBIS, (caddr_t)&enmode) < 0) {
335		snprintf(p->errbuf, PCAP_ERRBUF_SIZE, "EIOCMBIS: %s",
336		    pcap_strerror(errno));
337		goto bad;
338	}
339#ifdef	ENCOPYALL
340	/* Try to set COPYALL mode so that we see packets to ourself */
341	enmode = ENCOPYALL;
342	(void)ioctl(p->fd, EIOCMBIS, (caddr_t)&enmode);/* OK if this fails */
343#endif
344	/* set the backlog */
345	if (ioctl(p->fd, EIOCSETW, (caddr_t)&backlog) < 0) {
346		snprintf(p->errbuf, PCAP_ERRBUF_SIZE, "EIOCSETW: %s",
347		    pcap_strerror(errno));
348		goto bad;
349	}
350	/* discover interface type */
351	if (ioctl(p->fd, EIOCDEVP, (caddr_t)&devparams) < 0) {
352		snprintf(p->errbuf, PCAP_ERRBUF_SIZE, "EIOCDEVP: %s",
353		    pcap_strerror(errno));
354		goto bad;
355	}
356	/* HACK: to compile prior to Ultrix 4.2 */
357#ifndef	ENDT_FDDI
358#define	ENDT_FDDI	4
359#endif
360	switch (devparams.end_dev_type) {
361
362	case ENDT_10MB:
363		p->linktype = DLT_EN10MB;
364		p->offset = 2;
365		/*
366		 * This is (presumably) a real Ethernet capture; give it a
367		 * link-layer-type list with DLT_EN10MB and DLT_DOCSIS, so
368		 * that an application can let you choose it, in case you're
369		 * capturing DOCSIS traffic that a Cisco Cable Modem
370		 * Termination System is putting out onto an Ethernet (it
371		 * doesn't put an Ethernet header onto the wire, it puts raw
372		 * DOCSIS frames out on the wire inside the low-level
373		 * Ethernet framing).
374		 */
375		p->dlt_list = (u_int *) malloc(sizeof(u_int) * 2);
376		/*
377		 * If that fails, just leave the list empty.
378		 */
379		if (p->dlt_list != NULL) {
380			p->dlt_list[0] = DLT_EN10MB;
381			p->dlt_list[1] = DLT_DOCSIS;
382			p->dlt_count = 2;
383		}
384		break;
385
386	case ENDT_FDDI:
387		p->linktype = DLT_FDDI;
388		break;
389
390#ifdef ENDT_SLIP
391	case ENDT_SLIP:
392		p->linktype = DLT_SLIP;
393		break;
394#endif
395
396#ifdef ENDT_PPP
397	case ENDT_PPP:
398		p->linktype = DLT_PPP;
399		break;
400#endif
401
402#ifdef ENDT_LOOPBACK
403	case ENDT_LOOPBACK:
404		/*
405		 * It appears to use Ethernet framing, at least on
406		 * Digital UNIX 4.0.
407		 */
408		p->linktype = DLT_EN10MB;
409		p->offset = 2;
410		break;
411#endif
412
413#ifdef ENDT_TRN
414	case ENDT_TRN:
415		p->linktype = DLT_IEEE802;
416		break;
417#endif
418
419	default:
420		/*
421		 * XXX - what about ENDT_IEEE802?  The pfilt.h header
422		 * file calls this "IEEE 802 networks (non-Ethernet)",
423		 * but that doesn't specify a specific link layer type;
424		 * it could be 802.4, or 802.5 (except that 802.5 is
425		 * ENDT_TRN), or 802.6, or 802.11, or....  That's why
426		 * DLT_IEEE802 was hijacked to mean Token Ring in various
427		 * BSDs, and why we went along with that hijacking.
428		 *
429		 * XXX - what about ENDT_HDLC and ENDT_NULL?
430		 * Presumably, as ENDT_OTHER is just "Miscellaneous
431		 * framing", there's not much we can do, as that
432		 * doesn't specify a particular type of header.
433		 */
434		snprintf(p->errbuf, PCAP_ERRBUF_SIZE,
435		    "unknown data-link type %u", devparams.end_dev_type);
436		goto bad;
437	}
438	/* set truncation */
439#ifdef PCAP_FDDIPAD
440	if (p->linktype == DLT_FDDI) {
441		p->fddipad = PCAP_FDDIPAD;
442
443		/* packetfilter includes the padding in the snapshot */
444		p->snapshot += PCAP_FDDIPAD;
445	} else
446		p->fddipad = 0;
447#endif
448	if (ioctl(p->fd, EIOCTRUNCATE, (caddr_t)&p->snapshot) < 0) {
449		snprintf(p->errbuf, PCAP_ERRBUF_SIZE, "EIOCTRUNCATE: %s",
450		    pcap_strerror(errno));
451		goto bad;
452	}
453	/* accept all packets */
454	memset(&Filter, 0, sizeof(Filter));
455	Filter.enf_Priority = 37;	/* anything > 2 */
456	Filter.enf_FilterLen = 0;	/* means "always true" */
457	if (ioctl(p->fd, EIOCSETF, (caddr_t)&Filter) < 0) {
458		snprintf(p->errbuf, PCAP_ERRBUF_SIZE, "EIOCSETF: %s",
459		    pcap_strerror(errno));
460		goto bad;
461	}
462
463	if (p->md.timeout != 0) {
464		struct timeval timeout;
465		timeout.tv_sec = p->md.timeout / 1000;
466		timeout.tv_usec = (p->md.timeout * 1000) % 1000000;
467		if (ioctl(p->fd, EIOCSRTIMEOUT, (caddr_t)&timeout) < 0) {
468			snprintf(p->errbuf, PCAP_ERRBUF_SIZE, "EIOCSRTIMEOUT: %s",
469				pcap_strerror(errno));
470			goto bad;
471		}
472	}
473
474	p->bufsize = BUFSPACE;
475	p->buffer = (u_char*)malloc(p->bufsize + p->offset);
476	if (p->buffer == NULL) {
477		strlcpy(p->errbuf, pcap_strerror(errno), PCAP_ERRBUF_SIZE);
478		goto bad;
479	}
480
481	/*
482	 * "select()" and "poll()" work on packetfilter devices.
483	 */
484	p->selectable_fd = p->fd;
485
486	p->read_op = pcap_read_pf;
487	p->inject_op = pcap_inject_pf;
488	p->setfilter_op = pcap_setfilter_pf;
489	p->setdirection_op = NULL;	/* Not implemented. */
490	p->set_datalink_op = NULL;	/* can't change data link type */
491	p->getnonblock_op = pcap_getnonblock_fd;
492	p->setnonblock_op = pcap_setnonblock_fd;
493	p->stats_op = pcap_stats_pf;
494
495	return (0);
496 bad:
497	pcap_cleanup_live_common(p);
498	return (PCAP_ERROR);
499}
500
501pcap_t *
502pcap_create_interface(const char *device, char *ebuf)
503{
504	pcap_t *p;
505
506	p = pcap_create_common(device, ebuf);
507	if (p == NULL)
508		return (NULL);
509
510	p->activate_op = pcap_activate_pf;
511	return (p);
512}
513
514int
515pcap_platform_finddevs(pcap_if_t **alldevsp, char *errbuf)
516{
517	return (0);
518}
519
520static int
521pcap_setfilter_pf(pcap_t *p, struct bpf_program *fp)
522{
523	struct bpf_version bv;
524
525	/*
526	 * See if BIOCVERSION works.  If not, we assume the kernel doesn't
527	 * support BPF-style filters (it's not documented in the bpf(7)
528	 * or packetfiler(7) man pages, but the code used to fail if
529	 * BIOCSETF worked but BIOCVERSION didn't, and I've seen it do
530	 * kernel filtering in DU 4.0, so presumably BIOCVERSION works
531	 * there, at least).
532	 */
533	if (ioctl(p->fd, BIOCVERSION, (caddr_t)&bv) >= 0) {
534		/*
535		 * OK, we have the version of the BPF interpreter;
536		 * is it the same major version as us, and the same
537		 * or better minor version?
538		 */
539		if (bv.bv_major == BPF_MAJOR_VERSION &&
540		    bv.bv_minor >= BPF_MINOR_VERSION) {
541			/*
542			 * Yes.  Try to install the filter.
543			 */
544			if (ioctl(p->fd, BIOCSETF, (caddr_t)fp) < 0) {
545				snprintf(p->errbuf, sizeof(p->errbuf),
546				    "BIOCSETF: %s", pcap_strerror(errno));
547				return (-1);
548			}
549
550			/*
551			 * OK, that succeeded.  We're doing filtering in
552			 * the kernel.  (We assume we don't have a
553			 * userland filter installed - that'd require
554			 * a previous version check to have failed but
555			 * this one to succeed.)
556			 *
557			 * XXX - this message should be supplied to the
558			 * application as a warning of some sort,
559			 * except that if it's a GUI application, it's
560			 * not clear that it should be displayed in
561			 * a window to annoy the user.
562			 */
563			fprintf(stderr, "tcpdump: Using kernel BPF filter\n");
564			p->md.use_bpf = 1;
565
566			/*
567			 * Discard any previously-received packets,
568			 * as they might have passed whatever filter
569			 * was formerly in effect, but might not pass
570			 * this filter (BIOCSETF discards packets buffered
571			 * in the kernel, so you can lose packets in any
572			 * case).
573			 */
574			p->cc = 0;
575			return (0);
576		}
577
578		/*
579		 * We can't use the kernel's BPF interpreter; don't give
580		 * up, just log a message and be inefficient.
581		 *
582		 * XXX - this should really be supplied to the application
583		 * as a warning of some sort.
584		 */
585		fprintf(stderr,
586	    "tcpdump: Requires BPF language %d.%d or higher; kernel is %d.%d\n",
587		    BPF_MAJOR_VERSION, BPF_MINOR_VERSION,
588		    bv.bv_major, bv.bv_minor);
589	}
590
591	/*
592	 * We couldn't do filtering in the kernel; do it in userland.
593	 */
594	if (install_bpf_program(p, fp) < 0)
595		return (-1);
596
597	/*
598	 * XXX - this message should be supplied by the application as
599	 * a warning of some sort.
600	 */
601	fprintf(stderr, "tcpdump: Filtering in user process\n");
602	p->md.use_bpf = 0;
603	return (0);
604}
605