1/*
2 * Copyright (C) 2014 Luigi Rizzo. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 *   1. Redistributions of source code must retain the above copyright
9 *      notice, this list of conditions and the following disclaimer.
10 *   2. Redistributions in binary form must reproduce the above copyright
11 *      notice, this list of conditions and the following disclaimer in the
12 *      documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#ifdef HAVE_CONFIG_H
28#include <config.h>
29#endif
30
31#include <poll.h>
32#include <errno.h>
33#include <netdb.h>
34#include <stdio.h>
35#include <stdlib.h>
36#include <string.h>
37#include <unistd.h>
38
39#define NETMAP_WITH_LIBS
40#include <net/netmap_user.h>
41
42#include "pcap-int.h"
43#include "pcap-netmap.h"
44
45#ifndef __FreeBSD__
46  /*
47   * On FreeBSD we use IFF_PPROMISC which is in ifr_flagshigh.
48   * Remap to IFF_PROMISC on other platforms.
49   *
50   * XXX - DragonFly BSD?
51   */
52  #define IFF_PPROMISC	IFF_PROMISC
53#endif /* __FreeBSD__ */
54
55struct pcap_netmap {
56	struct nm_desc *d;	/* pointer returned by nm_open() */
57	pcap_handler cb;	/* callback and argument */
58	u_char *cb_arg;
59	int must_clear_promisc;	/* flag */
60	uint64_t rx_pkts;	/* # of pkts received before the filter */
61};
62
63
64static int
65pcap_netmap_stats(pcap_t *p, struct pcap_stat *ps)
66{
67	struct pcap_netmap *pn = p->priv;
68
69	ps->ps_recv = (u_int)pn->rx_pkts;
70	ps->ps_drop = 0;
71	ps->ps_ifdrop = 0;
72	return 0;
73}
74
75
76static void
77pcap_netmap_filter(u_char *arg, struct pcap_pkthdr *h, const u_char *buf)
78{
79	pcap_t *p = (pcap_t *)arg;
80	struct pcap_netmap *pn = p->priv;
81	const struct bpf_insn *pc = p->fcode.bf_insns;
82
83	++pn->rx_pkts;
84	if (pc == NULL || pcap_filter(pc, buf, h->len, h->caplen))
85		pn->cb(pn->cb_arg, h, buf);
86}
87
88
89static int
90pcap_netmap_dispatch(pcap_t *p, int cnt, pcap_handler cb, u_char *user)
91{
92	int ret;
93	struct pcap_netmap *pn = p->priv;
94	struct nm_desc *d = pn->d;
95	struct pollfd pfd = { .fd = p->fd, .events = POLLIN, .revents = 0 };
96
97	pn->cb = cb;
98	pn->cb_arg = user;
99
100	for (;;) {
101		if (p->break_loop) {
102			p->break_loop = 0;
103			return PCAP_ERROR_BREAK;
104		}
105		/* nm_dispatch won't run forever */
106
107		ret = nm_dispatch((void *)d, cnt, (void *)pcap_netmap_filter, (void *)p);
108		if (ret != 0)
109			break;
110		errno = 0;
111		ret = poll(&pfd, 1, p->opt.timeout);
112	}
113	return ret;
114}
115
116
117/* XXX need to check the NIOCTXSYNC/poll */
118static int
119pcap_netmap_inject(pcap_t *p, const void *buf, int size)
120{
121	struct pcap_netmap *pn = p->priv;
122	struct nm_desc *d = pn->d;
123
124	return nm_inject(d, buf, size);
125}
126
127
128static int
129pcap_netmap_ioctl(pcap_t *p, u_long what, uint32_t *if_flags)
130{
131	struct pcap_netmap *pn = p->priv;
132	struct nm_desc *d = pn->d;
133	struct ifreq ifr;
134	int error, fd = d->fd;
135
136#ifdef linux
137	fd = socket(AF_INET, SOCK_DGRAM, 0);
138	if (fd < 0) {
139		fprintf(stderr, "Error: cannot get device control socket.\n");
140		return -1;
141	}
142#endif /* linux */
143	bzero(&ifr, sizeof(ifr));
144	strncpy(ifr.ifr_name, d->req.nr_name, sizeof(ifr.ifr_name));
145	switch (what) {
146	case SIOCSIFFLAGS:
147		/*
148		 * The flags we pass in are 32-bit and unsigned.
149		 *
150		 * On most if not all UN*Xes, ifr_flags is 16-bit and
151		 * signed, and the result of assigning a longer
152		 * unsigned value to a shorter signed value is
153		 * implementation-defined (even if, in practice, it'll
154		 * do what's intended on all platforms we support
155		 * result of assigning a 32-bit unsigned value).
156		 * So we mask out the upper 16 bits.
157		 */
158		ifr.ifr_flags = *if_flags & 0xffff;
159#ifdef __FreeBSD__
160		/*
161		 * In FreeBSD, we need to set the high-order flags,
162		 * as we're using IFF_PPROMISC, which is in those bits.
163		 *
164		 * XXX - DragonFly BSD?
165		 */
166		ifr.ifr_flagshigh = *if_flags >> 16;
167#endif /* __FreeBSD__ */
168		break;
169	}
170	error = ioctl(fd, what, &ifr);
171	if (!error) {
172		switch (what) {
173		case SIOCGIFFLAGS:
174			/*
175			 * The flags we return are 32-bit.
176			 *
177			 * On most if not all UN*Xes, ifr_flags is
178			 * 16-bit and signed, and will get sign-
179			 * extended, so that the upper 16 bits of
180			 * those flags will be forced on.  So we
181			 * mask out the upper 16 bits of the
182			 * sign-extended value.
183			 */
184			*if_flags = ifr.ifr_flags & 0xffff;
185#ifdef __FreeBSD__
186			/*
187			 * In FreeBSD, we need to return the
188			 * high-order flags, as we're using
189			 * IFF_PPROMISC, which is in those bits.
190			 *
191			 * XXX - DragonFly BSD?
192			 */
193			*if_flags |= (ifr.ifr_flagshigh << 16);
194#endif /* __FreeBSD__ */
195		}
196	}
197#ifdef linux
198	close(fd);
199#endif /* linux */
200	return error ? -1 : 0;
201}
202
203
204static void
205pcap_netmap_close(pcap_t *p)
206{
207	struct pcap_netmap *pn = p->priv;
208	struct nm_desc *d = pn->d;
209	uint32_t if_flags = 0;
210
211	if (pn->must_clear_promisc) {
212		pcap_netmap_ioctl(p, SIOCGIFFLAGS, &if_flags); /* fetch flags */
213		if (if_flags & IFF_PPROMISC) {
214			if_flags &= ~IFF_PPROMISC;
215			pcap_netmap_ioctl(p, SIOCSIFFLAGS, &if_flags);
216		}
217	}
218	nm_close(d);
219	pcap_cleanup_live_common(p);
220}
221
222
223static int
224pcap_netmap_activate(pcap_t *p)
225{
226	struct pcap_netmap *pn = p->priv;
227	struct nm_desc *d;
228	uint32_t if_flags = 0;
229
230	d = nm_open(p->opt.device, NULL, 0, NULL);
231	if (d == NULL) {
232		pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
233		    errno, "netmap open: cannot access %s",
234		    p->opt.device);
235		pcap_cleanup_live_common(p);
236		return (PCAP_ERROR);
237	}
238#if 0
239	fprintf(stderr, "%s device %s priv %p fd %d ports %d..%d\n",
240	    __FUNCTION__, p->opt.device, d, d->fd,
241	    d->first_rx_ring, d->last_rx_ring);
242#endif
243	pn->d = d;
244	p->fd = d->fd;
245
246	/*
247	 * Turn a negative snapshot value (invalid), a snapshot value of
248	 * 0 (unspecified), or a value bigger than the normal maximum
249	 * value, into the maximum allowed value.
250	 *
251	 * If some application really *needs* a bigger snapshot
252	 * length, we should just increase MAXIMUM_SNAPLEN.
253	 */
254	if (p->snapshot <= 0 || p->snapshot > MAXIMUM_SNAPLEN)
255		p->snapshot = MAXIMUM_SNAPLEN;
256
257	if (p->opt.promisc && !(d->req.nr_ringid & NETMAP_SW_RING)) {
258		pcap_netmap_ioctl(p, SIOCGIFFLAGS, &if_flags); /* fetch flags */
259		if (!(if_flags & IFF_PPROMISC)) {
260			pn->must_clear_promisc = 1;
261			if_flags |= IFF_PPROMISC;
262			pcap_netmap_ioctl(p, SIOCSIFFLAGS, &if_flags);
263		}
264	}
265	p->linktype = DLT_EN10MB;
266	p->selectable_fd = p->fd;
267	p->read_op = pcap_netmap_dispatch;
268	p->inject_op = pcap_netmap_inject;
269	p->setfilter_op = install_bpf_program;
270	p->setdirection_op = NULL;
271	p->set_datalink_op = NULL;
272	p->getnonblock_op = pcap_getnonblock_fd;
273	p->setnonblock_op = pcap_setnonblock_fd;
274	p->stats_op = pcap_netmap_stats;
275	p->cleanup_op = pcap_netmap_close;
276
277	return (0);
278}
279
280
281pcap_t *
282pcap_netmap_create(const char *device, char *ebuf, int *is_ours)
283{
284	pcap_t *p;
285
286	*is_ours = (!strncmp(device, "netmap:", 7) || !strncmp(device, "vale", 4));
287	if (! *is_ours)
288		return NULL;
289	p = PCAP_CREATE_COMMON(ebuf, struct pcap_netmap);
290	if (p == NULL)
291		return (NULL);
292	p->activate_op = pcap_netmap_activate;
293	return (p);
294}
295
296/*
297 * The "device name" for netmap devices isn't a name for a device, it's
298 * an expression that indicates how the device should be set up, so
299 * there's no way to enumerate them.
300 */
301int
302pcap_netmap_findalldevs(pcap_if_list_t *devlistp _U_, char *err_str _U_)
303{
304	return 0;
305}
306