1231650Sluigi/*
2262153Sluigi * Copyright (C) 2011-2014 Universita` di Pisa. All rights reserved.
3262153Sluigi *
4231650Sluigi * Redistribution and use in source and binary forms, with or without
5262153Sluigi * modification, are permitted provided that the following conditions
6262153Sluigi * are met:
7262153Sluigi *
8231650Sluigi *   1. Redistributions of source code must retain the above copyright
9231650Sluigi *      notice, this list of conditions and the following disclaimer.
10231650Sluigi *   2. Redistributions in binary form must reproduce the above copyright
11231650Sluigi *      notice, this list of conditions and the following disclaimer in the
12262153Sluigi *      documentation and/or other materials provided with the distribution.
13262153Sluigi *
14262153Sluigi * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15231650Sluigi * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16262153Sluigi * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17262153Sluigi * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18262153Sluigi * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19262153Sluigi * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20262153Sluigi * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21262153Sluigi * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22262153Sluigi * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23262153Sluigi * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24262153Sluigi * SUCH DAMAGE.
25231650Sluigi */
26231650Sluigi
27231650Sluigi/*
28231650Sluigi * $FreeBSD$
29231650Sluigi *
30262153Sluigi * Functions and macros to manipulate netmap structures and packets
31262153Sluigi * in userspace. See netmap(4) for more information.
32231650Sluigi *
33235549Sluigi * The address of the struct netmap_if, say nifp, is computed from the
34235549Sluigi * value returned from ioctl(.., NIOCREG, ...) and the mmap region:
35231650Sluigi *	ioctl(fd, NIOCREG, &req);
36231650Sluigi *	mem = mmap(0, ... );
37231650Sluigi *	nifp = NETMAP_IF(mem, req.nr_nifp);
38231650Sluigi *		(so simple, we could just do it manually)
39231650Sluigi *
40231650Sluigi * From there:
41231650Sluigi *	struct netmap_ring *NETMAP_TXRING(nifp, index)
42231650Sluigi *	struct netmap_ring *NETMAP_RXRING(nifp, index)
43231650Sluigi *		we can access ring->nr_cur, ring->nr_avail, ring->nr_flags
44231650Sluigi *
45231650Sluigi *	ring->slot[i] gives us the i-th slot (we can access
46262153Sluigi *		directly len, flags, buf_idx)
47231650Sluigi *
48257768Sluigi *	char *buf = NETMAP_BUF(ring, x) returns a pointer to
49257768Sluigi *		the buffer numbered x
50231650Sluigi *
51262153Sluigi * All ring indexes (head, cur, tail) should always move forward.
52262153Sluigi * To compute the next index in a circular ring you can use
53262153Sluigi *	i = nm_ring_next(ring, i);
54262153Sluigi *
55262153Sluigi * To ease porting apps from pcap to netmap we supply a few fuctions
56262153Sluigi * that can be called to open, close, read and write on netmap in a way
57262153Sluigi * similar to libpcap. Note that the read/write function depend on
58262153Sluigi * an ioctl()/select()/poll() being issued to refill rings or push
59262153Sluigi * packets out.
60262153Sluigi *
61262153Sluigi * In order to use these, include #define NETMAP_WITH_LIBS
62262153Sluigi * in the source file that invokes these functions.
63231650Sluigi */
64231650Sluigi
65231650Sluigi#ifndef _NET_NETMAP_USER_H_
66231650Sluigi#define _NET_NETMAP_USER_H_
67231650Sluigi
68262153Sluigi#include <stdint.h>
69262153Sluigi#include <sys/socket.h>		/* apple needs sockaddr */
70262153Sluigi#include <net/if.h>		/* IFNAMSIZ */
71262153Sluigi
72262153Sluigi#ifndef likely
73262153Sluigi#define likely(x)	__builtin_expect(!!(x), 1)
74262153Sluigi#define unlikely(x)	__builtin_expect(!!(x), 0)
75262153Sluigi#endif /* likely and unlikely */
76262153Sluigi
77262153Sluigi#include <net/netmap.h>
78262153Sluigi
79262153Sluigi/* helper macro */
80246355Sluigi#define _NETMAP_OFFSET(type, ptr, offset) \
81246355Sluigi	((type)(void *)((char *)(ptr) + (offset)))
82231650Sluigi
83262153Sluigi#define NETMAP_IF(_base, _ofs)	_NETMAP_OFFSET(struct netmap_if *, _base, _ofs)
84231650Sluigi
85246355Sluigi#define NETMAP_TXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \
86246355Sluigi	nifp, (nifp)->ring_ofs[index] )
87231650Sluigi
88246355Sluigi#define NETMAP_RXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *,	\
89246355Sluigi	nifp, (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + 1] )
90246355Sluigi
91231650Sluigi#define NETMAP_BUF(ring, index)				\
92231650Sluigi	((char *)(ring) + (ring)->buf_ofs + ((index)*(ring)->nr_buf_size))
93231650Sluigi
94235549Sluigi#define NETMAP_BUF_IDX(ring, buf)			\
95235549Sluigi	( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \
96262153Sluigi		(ring)->nr_buf_size )
97235549Sluigi
98231650Sluigi
99262153Sluigistatic inline uint32_t
100262153Sluiginm_ring_next(struct netmap_ring *r, uint32_t i)
101262153Sluigi{
102262153Sluigi	return ( unlikely(i + 1 == r->num_slots) ? 0 : i + 1);
103262153Sluigi}
104235549Sluigi
105262153Sluigi
106231650Sluigi/*
107262153Sluigi * Return 1 if we have pending transmissions in the tx ring.
108262153Sluigi * When everything is complete ring->head = ring->tail + 1 (modulo ring size)
109231650Sluigi */
110262153Sluigistatic inline int
111262153Sluiginm_tx_pending(struct netmap_ring *r)
112262153Sluigi{
113262153Sluigi	return nm_ring_next(r, r->tail) != r->head;
114262153Sluigi}
115231650Sluigi
116262153Sluigi
117262153Sluigistatic inline uint32_t
118262153Sluiginm_ring_space(struct netmap_ring *ring)
119262153Sluigi{
120262153Sluigi        int ret = ring->tail - ring->cur;
121262153Sluigi        if (ret < 0)
122262153Sluigi                ret += ring->num_slots;
123262153Sluigi        return ret;
124262153Sluigi}
125262153Sluigi
126262153Sluigi
127262153Sluigi#ifdef NETMAP_WITH_LIBS
128262153Sluigi/*
129262153Sluigi * Support for simple I/O libraries.
130262153Sluigi * Include other system headers required for compiling this.
131262153Sluigi */
132262153Sluigi
133262153Sluigi#ifndef HAVE_NETMAP_WITH_LIBS
134262153Sluigi#define HAVE_NETMAP_WITH_LIBS
135262153Sluigi
136262153Sluigi#include <sys/time.h>
137262153Sluigi#include <sys/mman.h>
138262153Sluigi#include <string.h>	/* memset */
139262153Sluigi#include <sys/ioctl.h>
140262153Sluigi#include <sys/errno.h>	/* EINVAL */
141262153Sluigi#include <fcntl.h>	/* O_RDWR */
142262153Sluigi#include <unistd.h>	/* close() */
143262153Sluigi#include <signal.h>
144262153Sluigi#include <stdlib.h>
145262153Sluigi
146262153Sluigi#ifndef ND /* debug macros */
147262153Sluigi/* debug support */
148262153Sluigi#define ND(_fmt, ...) do {} while(0)
149262153Sluigi#define D(_fmt, ...)						\
150262153Sluigi	do {							\
151262153Sluigi		struct timeval t0;				\
152262153Sluigi		gettimeofday(&t0, NULL);			\
153262153Sluigi		fprintf(stderr, "%03d.%06d %s [%d] " _fmt "\n",	\
154262153Sluigi		    (int)(t0.tv_sec % 1000), (int)t0.tv_usec,	\
155262153Sluigi		    __FUNCTION__, __LINE__, ##__VA_ARGS__);	\
156262153Sluigi        } while (0)
157262153Sluigi
158262153Sluigi/* Rate limited version of "D", lps indicates how many per second */
159262153Sluigi#define RD(lps, format, ...)                                    \
160262153Sluigi    do {                                                        \
161262153Sluigi        static int t0, __cnt;                                   \
162262153Sluigi        struct timeval __xxts;                                  \
163262153Sluigi        gettimeofday(&__xxts, NULL);                            \
164262153Sluigi        if (t0 != __xxts.tv_sec) {                              \
165262153Sluigi            t0 = __xxts.tv_sec;                                 \
166262153Sluigi            __cnt = 0;                                          \
167262153Sluigi        }                                                       \
168262153Sluigi        if (__cnt++ < lps) {                                    \
169262153Sluigi            D(format, ##__VA_ARGS__);                           \
170262153Sluigi        }                                                       \
171262153Sluigi    } while (0)
172262153Sluigi#endif
173262153Sluigi
174262153Sluigistruct nm_pkthdr {	/* same as pcap_pkthdr */
175262153Sluigi	struct timeval	ts;
176262153Sluigi	uint32_t	caplen;
177262153Sluigi	uint32_t	len;
178262153Sluigi};
179262153Sluigi
180262153Sluigistruct nm_stat {	/* same as pcap_stat	*/
181262153Sluigi	u_int	ps_recv;
182262153Sluigi	u_int	ps_drop;
183262153Sluigi	u_int	ps_ifdrop;
184262153Sluigi#ifdef WIN32
185262153Sluigi	u_int	bs_capt;
186262153Sluigi#endif /* WIN32 */
187262153Sluigi};
188262153Sluigi
189262153Sluigi#define NM_ERRBUF_SIZE	512
190262153Sluigi
191262153Sluigistruct nm_desc {
192262153Sluigi	struct nm_desc *self; /* point to self if netmap. */
193262153Sluigi	int fd;
194262153Sluigi	void *mem;
195262153Sluigi	int memsize;
196262153Sluigi	int done_mmap;	/* set if mem is the result of mmap */
197262153Sluigi	struct netmap_if * const nifp;
198262153Sluigi	uint16_t first_tx_ring, last_tx_ring, cur_tx_ring;
199262153Sluigi	uint16_t first_rx_ring, last_rx_ring, cur_rx_ring;
200262153Sluigi	struct nmreq req;	/* also contains the nr_name = ifname */
201262153Sluigi	struct nm_pkthdr hdr;
202262153Sluigi
203262153Sluigi	/*
204262153Sluigi	 * The memory contains netmap_if, rings and then buffers.
205262153Sluigi	 * Given a pointer (e.g. to nm_inject) we can compare with
206262153Sluigi	 * mem/buf_start/buf_end to tell if it is a buffer or
207262153Sluigi	 * some other descriptor in our region.
208262153Sluigi	 * We also store a pointer to some ring as it helps in the
209262153Sluigi	 * translation from buffer indexes to addresses.
210262153Sluigi	 */
211262153Sluigi	struct netmap_ring * const some_ring;
212262153Sluigi	void * const buf_start;
213262153Sluigi	void * const buf_end;
214262153Sluigi	/* parameters from pcap_open_live */
215262153Sluigi	int snaplen;
216262153Sluigi	int promisc;
217262153Sluigi	int to_ms;
218262153Sluigi	char *errbuf;
219262153Sluigi
220262153Sluigi	/* save flags so we can restore them on close */
221262153Sluigi	uint32_t if_flags;
222262153Sluigi        uint32_t if_reqcap;
223262153Sluigi        uint32_t if_curcap;
224262153Sluigi
225262153Sluigi	struct nm_stat st;
226262153Sluigi	char msg[NM_ERRBUF_SIZE];
227262153Sluigi};
228262153Sluigi
229262153Sluigi/*
230262153Sluigi * when the descriptor is open correctly, d->self == d
231262153Sluigi * Eventually we should also use some magic number.
232262153Sluigi */
233262153Sluigi#define P2NMD(p)		((struct nm_desc *)(p))
234262153Sluigi#define IS_NETMAP_DESC(d)	((d) && P2NMD(d)->self == P2NMD(d))
235262153Sluigi#define NETMAP_FD(d)		(P2NMD(d)->fd)
236262153Sluigi
237262153Sluigi
238262153Sluigi/*
239262153Sluigi * this is a slightly optimized copy routine which rounds
240262153Sluigi * to multiple of 64 bytes and is often faster than dealing
241262153Sluigi * with other odd sizes. We assume there is enough room
242262153Sluigi * in the source and destination buffers.
243262153Sluigi *
244262153Sluigi * XXX only for multiples of 64 bytes, non overlapped.
245262153Sluigi */
246262153Sluigistatic inline void
247262153Sluiginm_pkt_copy(const void *_src, void *_dst, int l)
248262153Sluigi{
249262153Sluigi	const uint64_t *src = (const uint64_t *)_src;
250262153Sluigi	uint64_t *dst = (uint64_t *)_dst;
251262153Sluigi
252262153Sluigi	if (unlikely(l >= 1024)) {
253262153Sluigi		memcpy(dst, src, l);
254262153Sluigi		return;
255262153Sluigi	}
256262153Sluigi	for (; likely(l > 0); l-=64) {
257262153Sluigi		*dst++ = *src++;
258262153Sluigi		*dst++ = *src++;
259262153Sluigi		*dst++ = *src++;
260262153Sluigi		*dst++ = *src++;
261262153Sluigi		*dst++ = *src++;
262262153Sluigi		*dst++ = *src++;
263262153Sluigi		*dst++ = *src++;
264262153Sluigi		*dst++ = *src++;
265262153Sluigi	}
266262153Sluigi}
267262153Sluigi
268262153Sluigi
269262153Sluigi/*
270262153Sluigi * The callback, invoked on each received packet. Same as libpcap
271262153Sluigi */
272262153Sluigitypedef void (*nm_cb_t)(u_char *, const struct nm_pkthdr *, const u_char *d);
273262153Sluigi
274262153Sluigi/*
275262153Sluigi *--- the pcap-like API ---
276262153Sluigi *
277262153Sluigi * nm_open() opens a file descriptor, binds to a port and maps memory.
278262153Sluigi *
279262153Sluigi * ifname	(netmap:foo or vale:foo) is the port name
280262153Sluigi *		a suffix can indicate the follwing:
281262153Sluigi *		^		bind the host (sw) ring pair
282262153Sluigi *		*		bind host and NIC ring pairs (transparent)
283262153Sluigi *		-NN		bind individual NIC ring pair
284262153Sluigi *		{NN		bind master side of pipe NN
285262153Sluigi *		}NN		bind slave side of pipe NN
286262153Sluigi *
287262153Sluigi * req		provides the initial values of nmreq before parsing ifname.
288262153Sluigi *		Remember that the ifname parsing will override the ring
289262153Sluigi *		number in nm_ringid, and part of nm_flags;
290262153Sluigi * flags	special functions, normally 0
291262153Sluigi *		indicates which fields of *arg are significant
292262153Sluigi * arg		special functions, normally NULL
293262153Sluigi *		if passed a netmap_desc with mem != NULL,
294262153Sluigi *		use that memory instead of mmap.
295262153Sluigi */
296262153Sluigi
297262153Sluigistatic struct nm_desc *nm_open(const char *ifname, const struct nmreq *req,
298262153Sluigi	uint64_t flags, const struct nm_desc *arg);
299262153Sluigi
300262153Sluigi/*
301262153Sluigi * nm_open can import some fields from the parent descriptor.
302262153Sluigi * These flags control which ones.
303262153Sluigi * Also in flags you can specify NETMAP_NO_TX_POLL and NETMAP_DO_RX_POLL,
304262153Sluigi * which set the initial value for these flags.
305262153Sluigi * Note that the 16 low bits of the flags are reserved for data
306262153Sluigi * that may go into the nmreq.
307262153Sluigi */
308262153Sluigienum {
309262153Sluigi	NM_OPEN_NO_MMAP =	0x040000, /* reuse mmap from parent */
310262153Sluigi	NM_OPEN_IFNAME =	0x080000, /* nr_name, nr_ringid, nr_flags */
311262153Sluigi	NM_OPEN_ARG1 =		0x100000,
312262153Sluigi	NM_OPEN_ARG2 =		0x200000,
313262153Sluigi	NM_OPEN_ARG3 =		0x400000,
314262153Sluigi	NM_OPEN_RING_CFG =	0x800000, /* tx|rx rings|slots */
315262153Sluigi};
316262153Sluigi
317262153Sluigi
318262153Sluigi/*
319262153Sluigi * nm_close()	closes and restores the port to its previous state
320262153Sluigi */
321262153Sluigi
322262153Sluigistatic int nm_close(struct nm_desc *);
323262153Sluigi
324262153Sluigi/*
325262153Sluigi * nm_inject() is the same as pcap_inject()
326262153Sluigi * nm_dispatch() is the same as pcap_dispatch()
327262153Sluigi * nm_nextpkt() is the same as pcap_next()
328262153Sluigi */
329262153Sluigi
330262153Sluigistatic int nm_inject(struct nm_desc *, const void *, size_t);
331262153Sluigistatic int nm_dispatch(struct nm_desc *, int, nm_cb_t, u_char *);
332262153Sluigistatic u_char *nm_nextpkt(struct nm_desc *, struct nm_pkthdr *);
333262153Sluigi
334262153Sluigi
335262153Sluigi/*
336262153Sluigi * Try to open, return descriptor if successful, NULL otherwise.
337262153Sluigi * An invalid netmap name will return errno = 0;
338262153Sluigi * You can pass a pointer to a pre-filled nm_desc to add special
339262153Sluigi * parameters. Flags is used as follows
340262153Sluigi * NM_OPEN_NO_MMAP	use the memory from arg, only
341262153Sluigi *			if the nr_arg2 (memory block) matches.
342262153Sluigi * NM_OPEN_ARG1		use req.nr_arg1 from arg
343262153Sluigi * NM_OPEN_ARG2		use req.nr_arg2 from arg
344262153Sluigi * NM_OPEN_RING_CFG	user ring config from arg
345262153Sluigi */
346262153Sluigistatic struct nm_desc *
347262153Sluiginm_open(const char *ifname, const struct nmreq *req,
348262153Sluigi	uint64_t new_flags, const struct nm_desc *arg)
349262153Sluigi{
350262153Sluigi	struct nm_desc *d = NULL;
351262153Sluigi	const struct nm_desc *parent = arg;
352262153Sluigi	u_int namelen;
353262153Sluigi	uint32_t nr_ringid = 0, nr_flags;
354262153Sluigi	const char *port = NULL;
355262153Sluigi	const char *errmsg = NULL;
356262153Sluigi
357262153Sluigi	if (strncmp(ifname, "netmap:", 7) && strncmp(ifname, "vale", 4)) {
358262153Sluigi		errno = 0; /* name not recognised, not an error */
359262153Sluigi		return NULL;
360262153Sluigi	}
361262153Sluigi	if (ifname[0] == 'n')
362262153Sluigi		ifname += 7;
363262153Sluigi	/* scan for a separator */
364262153Sluigi	for (port = ifname; *port && !index("-*^{}", *port); port++)
365262153Sluigi		;
366262153Sluigi	namelen = port - ifname;
367262153Sluigi	if (namelen >= sizeof(d->req.nr_name)) {
368262153Sluigi		errmsg = "name too long";
369262153Sluigi		goto fail;
370262153Sluigi	}
371262153Sluigi	switch (*port) {
372262153Sluigi	default:  /* '\0', no suffix */
373262153Sluigi		nr_flags = NR_REG_ALL_NIC;
374262153Sluigi		break;
375262153Sluigi	case '-': /* one NIC */
376262153Sluigi		nr_flags = NR_REG_ONE_NIC;
377262153Sluigi		nr_ringid = atoi(port + 1);
378262153Sluigi		break;
379262153Sluigi	case '*': /* NIC and SW, ignore port */
380262153Sluigi		nr_flags = NR_REG_NIC_SW;
381262153Sluigi		if (port[1]) {
382262153Sluigi			errmsg = "invalid port for nic+sw";
383262153Sluigi			goto fail;
384262153Sluigi		}
385262153Sluigi		break;
386262153Sluigi	case '^': /* only sw ring */
387262153Sluigi		nr_flags = NR_REG_SW;
388262153Sluigi		if (port[1]) {
389262153Sluigi			errmsg = "invalid port for sw ring";
390262153Sluigi			goto fail;
391262153Sluigi		}
392262153Sluigi		break;
393262153Sluigi	case '{':
394262153Sluigi		nr_flags = NR_REG_PIPE_MASTER;
395262153Sluigi		nr_ringid = atoi(port + 1);
396262153Sluigi		break;
397262153Sluigi	case '}':
398262153Sluigi		nr_flags = NR_REG_PIPE_SLAVE;
399262153Sluigi		nr_ringid = atoi(port + 1);
400262153Sluigi		break;
401262153Sluigi	}
402262153Sluigi
403262153Sluigi	if (nr_ringid >= NETMAP_RING_MASK) {
404262153Sluigi		errmsg = "invalid ringid";
405262153Sluigi		goto fail;
406262153Sluigi	}
407262153Sluigi	/* add the *XPOLL flags */
408262153Sluigi	nr_ringid |= new_flags & (NETMAP_NO_TX_POLL | NETMAP_DO_RX_POLL);
409262153Sluigi
410262153Sluigi	d = (struct nm_desc *)calloc(1, sizeof(*d));
411262153Sluigi	if (d == NULL) {
412262153Sluigi		errmsg = "nm_desc alloc failure";
413262153Sluigi		errno = ENOMEM;
414262153Sluigi		return NULL;
415262153Sluigi	}
416262153Sluigi	d->self = d;	/* set this early so nm_close() works */
417262153Sluigi	d->fd = open("/dev/netmap", O_RDWR);
418262153Sluigi	if (d->fd < 0) {
419262153Sluigi		errmsg = "cannot open /dev/netmap";
420262153Sluigi		goto fail;
421262153Sluigi	}
422262153Sluigi
423262153Sluigi	if (req)
424262153Sluigi		d->req = *req;
425262153Sluigi	d->req.nr_version = NETMAP_API;
426262153Sluigi	d->req.nr_ringid &= ~NETMAP_RING_MASK;
427262153Sluigi
428262153Sluigi	/* these fields are overridden by ifname and flags processing */
429262153Sluigi	d->req.nr_ringid |= nr_ringid;
430262153Sluigi	d->req.nr_flags = nr_flags;
431262153Sluigi	memcpy(d->req.nr_name, ifname, namelen);
432262153Sluigi	d->req.nr_name[namelen] = '\0';
433262153Sluigi	/* optionally import info from parent */
434262153Sluigi	if (IS_NETMAP_DESC(parent) && new_flags) {
435262153Sluigi		if (new_flags & NM_OPEN_ARG1)
436262153Sluigi			D("overriding ARG1 %d", parent->req.nr_arg1);
437262153Sluigi		d->req.nr_arg1 = new_flags & NM_OPEN_ARG1 ?
438262153Sluigi			parent->req.nr_arg1 : 4;
439262153Sluigi		if (new_flags & NM_OPEN_ARG2)
440262153Sluigi			D("overriding ARG2 %d", parent->req.nr_arg2);
441262153Sluigi		d->req.nr_arg2 = new_flags & NM_OPEN_ARG2 ?
442262153Sluigi			parent->req.nr_arg2 : 0;
443262153Sluigi		if (new_flags & NM_OPEN_ARG3)
444262153Sluigi			D("overriding ARG3 %d", parent->req.nr_arg3);
445262153Sluigi		d->req.nr_arg3 = new_flags & NM_OPEN_ARG3 ?
446262153Sluigi			parent->req.nr_arg3 : 0;
447262153Sluigi		if (new_flags & NM_OPEN_RING_CFG) {
448262153Sluigi			D("overriding RING_CFG");
449262153Sluigi			d->req.nr_tx_slots = parent->req.nr_tx_slots;
450262153Sluigi			d->req.nr_rx_slots = parent->req.nr_rx_slots;
451262153Sluigi			d->req.nr_tx_rings = parent->req.nr_tx_rings;
452262153Sluigi			d->req.nr_rx_rings = parent->req.nr_rx_rings;
453262153Sluigi		}
454262153Sluigi		if (new_flags & NM_OPEN_IFNAME) {
455262153Sluigi			D("overriding ifname %s ringid 0x%x flags 0x%x",
456262153Sluigi				parent->req.nr_name, parent->req.nr_ringid,
457262153Sluigi				parent->req.nr_flags);
458262153Sluigi			memcpy(d->req.nr_name, parent->req.nr_name,
459262153Sluigi				sizeof(d->req.nr_name));
460262153Sluigi			d->req.nr_ringid = parent->req.nr_ringid;
461262153Sluigi			d->req.nr_flags = parent->req.nr_flags;
462262153Sluigi		}
463262153Sluigi	}
464262153Sluigi	if (ioctl(d->fd, NIOCREGIF, &d->req)) {
465262153Sluigi		errmsg = "NIOCREGIF failed";
466262153Sluigi		goto fail;
467262153Sluigi	}
468262153Sluigi
469262153Sluigi	if (IS_NETMAP_DESC(parent) && parent->mem &&
470262153Sluigi	    parent->req.nr_arg2 == d->req.nr_arg2) {
471262153Sluigi		/* do not mmap, inherit from parent */
472262153Sluigi		d->memsize = parent->memsize;
473262153Sluigi		d->mem = parent->mem;
474262153Sluigi	} else {
475262153Sluigi		d->memsize = d->req.nr_memsize;
476262153Sluigi		d->mem = mmap(0, d->memsize, PROT_WRITE | PROT_READ, MAP_SHARED,
477262153Sluigi				d->fd, 0);
478262153Sluigi		if (d->mem == NULL) {
479262153Sluigi			errmsg = "mmap failed";
480262153Sluigi			goto fail;
481262153Sluigi		}
482262153Sluigi		d->done_mmap = 1;
483262153Sluigi	}
484262153Sluigi	{
485262153Sluigi		struct netmap_if *nifp = NETMAP_IF(d->mem, d->req.nr_offset);
486262153Sluigi		struct netmap_ring *r = NETMAP_RXRING(nifp, );
487262153Sluigi
488262153Sluigi		*(struct netmap_if **)(uintptr_t)&(d->nifp) = nifp;
489262153Sluigi		*(struct netmap_ring **)(uintptr_t)&d->some_ring = r;
490262153Sluigi		*(void **)(uintptr_t)&d->buf_start = NETMAP_BUF(r, 0);
491262153Sluigi		*(void **)(uintptr_t)&d->buf_end =
492262153Sluigi			(char *)d->mem + d->memsize;
493262153Sluigi	}
494262153Sluigi
495262153Sluigi	if (nr_flags ==  NR_REG_SW) { /* host stack */
496262153Sluigi		d->first_tx_ring = d->last_tx_ring = d->req.nr_tx_rings;
497262153Sluigi		d->first_rx_ring = d->last_rx_ring = d->req.nr_rx_rings;
498262153Sluigi	} else if (nr_flags ==  NR_REG_ALL_NIC) { /* only nic */
499262153Sluigi		d->first_tx_ring = 0;
500262153Sluigi		d->first_rx_ring = 0;
501262153Sluigi		d->last_tx_ring = d->req.nr_tx_rings - 1;
502262153Sluigi		d->last_rx_ring = d->req.nr_rx_rings - 1;
503262153Sluigi	} else if (nr_flags ==  NR_REG_NIC_SW) {
504262153Sluigi		d->first_tx_ring = 0;
505262153Sluigi		d->first_rx_ring = 0;
506262153Sluigi		d->last_tx_ring = d->req.nr_tx_rings;
507262153Sluigi		d->last_rx_ring = d->req.nr_rx_rings;
508262153Sluigi	} else if (nr_flags == NR_REG_ONE_NIC) {
509262153Sluigi		/* XXX check validity */
510262153Sluigi		d->first_tx_ring = d->last_tx_ring =
511262153Sluigi		d->first_rx_ring = d->last_rx_ring = nr_ringid;
512262153Sluigi	} else { /* pipes */
513262153Sluigi		d->first_tx_ring = d->last_tx_ring = 0;
514262153Sluigi		d->first_rx_ring = d->last_rx_ring = 0;
515262153Sluigi	}
516262153Sluigi
517262153Sluigi#ifdef DEBUG_NETMAP_USER
518262153Sluigi    { /* debugging code */
519262153Sluigi	int i;
520262153Sluigi
521262153Sluigi	D("%s tx %d .. %d %d rx %d .. %d %d", ifname,
522262153Sluigi		d->first_tx_ring, d->last_tx_ring, d->req.nr_tx_rings,
523262153Sluigi                d->first_rx_ring, d->last_rx_ring, d->req.nr_rx_rings);
524262153Sluigi	for (i = 0; i <= d->req.nr_tx_rings; i++) {
525262153Sluigi		struct netmap_ring *r = NETMAP_TXRING(d->nifp, i);
526262153Sluigi		D("TX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail);
527262153Sluigi	}
528262153Sluigi	for (i = 0; i <= d->req.nr_rx_rings; i++) {
529262153Sluigi		struct netmap_ring *r = NETMAP_RXRING(d->nifp, i);
530262153Sluigi		D("RX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail);
531262153Sluigi	}
532262153Sluigi    }
533262153Sluigi#endif /* debugging */
534262153Sluigi
535262153Sluigi	d->cur_tx_ring = d->first_tx_ring;
536262153Sluigi	d->cur_rx_ring = d->first_rx_ring;
537262153Sluigi	return d;
538262153Sluigi
539262153Sluigifail:
540262153Sluigi	nm_close(d);
541262153Sluigi	if (errmsg)
542262153Sluigi		D("%s %s", errmsg, ifname);
543262153Sluigi	errno = EINVAL;
544262153Sluigi	return NULL;
545262153Sluigi}
546262153Sluigi
547262153Sluigi
548262153Sluigistatic int
549262153Sluiginm_close(struct nm_desc *d)
550262153Sluigi{
551262153Sluigi	/*
552262153Sluigi	 * ugly trick to avoid unused warnings
553262153Sluigi	 */
554262153Sluigi	static void *__xxzt[] __attribute__ ((unused))  =
555262153Sluigi		{ (void *)nm_open, (void *)nm_inject,
556262153Sluigi		  (void *)nm_dispatch, (void *)nm_nextpkt } ;
557262153Sluigi
558262153Sluigi	if (d == NULL || d->self != d)
559262153Sluigi		return EINVAL;
560262153Sluigi	if (d->done_mmap && d->mem)
561262153Sluigi		munmap(d->mem, d->memsize);
562262153Sluigi	if (d->fd != -1)
563262153Sluigi		close(d->fd);
564262153Sluigi	bzero(d, sizeof(*d));
565262153Sluigi	free(d);
566262153Sluigi	return 0;
567262153Sluigi}
568262153Sluigi
569262153Sluigi
570262153Sluigi/*
571262153Sluigi * Same prototype as pcap_inject(), only need to cast.
572262153Sluigi */
573262153Sluigistatic int
574262153Sluiginm_inject(struct nm_desc *d, const void *buf, size_t size)
575262153Sluigi{
576262153Sluigi	u_int c, n = d->last_tx_ring - d->first_tx_ring + 1;
577262153Sluigi
578262153Sluigi	for (c = 0; c < n ; c++) {
579262153Sluigi		/* compute current ring to use */
580262153Sluigi		struct netmap_ring *ring;
581262153Sluigi		uint32_t i, idx;
582262153Sluigi		uint32_t ri = d->cur_tx_ring + c;
583262153Sluigi
584262153Sluigi		if (ri > d->last_tx_ring)
585262153Sluigi			ri = d->first_tx_ring;
586262153Sluigi		ring = NETMAP_TXRING(d->nifp, ri);
587262153Sluigi		if (nm_ring_empty(ring)) {
588262153Sluigi			continue;
589262153Sluigi		}
590262153Sluigi		i = ring->cur;
591262153Sluigi		idx = ring->slot[i].buf_idx;
592262153Sluigi		ring->slot[i].len = size;
593262153Sluigi		nm_pkt_copy(buf, NETMAP_BUF(ring, idx), size);
594262153Sluigi		d->cur_tx_ring = ri;
595262153Sluigi		ring->head = ring->cur = nm_ring_next(ring, i);
596262153Sluigi		return size;
597262153Sluigi	}
598262153Sluigi	return 0; /* fail */
599262153Sluigi}
600262153Sluigi
601262153Sluigi
602262153Sluigi/*
603262153Sluigi * Same prototype as pcap_dispatch(), only need to cast.
604262153Sluigi */
605262153Sluigistatic int
606262153Sluiginm_dispatch(struct nm_desc *d, int cnt, nm_cb_t cb, u_char *arg)
607262153Sluigi{
608262153Sluigi	int n = d->last_rx_ring - d->first_rx_ring + 1;
609262153Sluigi	int c, got = 0, ri = d->cur_rx_ring;
610262153Sluigi
611262153Sluigi	if (cnt == 0)
612262153Sluigi		cnt = -1;
613262153Sluigi	/* cnt == -1 means infinite, but rings have a finite amount
614262153Sluigi	 * of buffers and the int is large enough that we never wrap,
615262153Sluigi	 * so we can omit checking for -1
616262153Sluigi	 */
617262153Sluigi	for (c=0; c < n && cnt != got; c++) {
618262153Sluigi		/* compute current ring to use */
619262153Sluigi		struct netmap_ring *ring;
620262153Sluigi
621262153Sluigi		ri = d->cur_rx_ring + c;
622262153Sluigi		if (ri > d->last_rx_ring)
623262153Sluigi			ri = d->first_rx_ring;
624262153Sluigi		ring = NETMAP_RXRING(d->nifp, ri);
625262153Sluigi		for ( ; !nm_ring_empty(ring) && cnt != got; got++) {
626262153Sluigi			u_int i = ring->cur;
627262153Sluigi			u_int idx = ring->slot[i].buf_idx;
628262153Sluigi			u_char *buf = (u_char *)NETMAP_BUF(ring, idx);
629262153Sluigi
630262153Sluigi			// __builtin_prefetch(buf);
631262153Sluigi			d->hdr.len = d->hdr.caplen = ring->slot[i].len;
632262153Sluigi			d->hdr.ts = ring->ts;
633262153Sluigi			cb(arg, &d->hdr, buf);
634262153Sluigi			ring->head = ring->cur = nm_ring_next(ring, i);
635262153Sluigi		}
636262153Sluigi	}
637262153Sluigi	d->cur_rx_ring = ri;
638262153Sluigi	return got;
639262153Sluigi}
640262153Sluigi
641262153Sluigistatic u_char *
642262153Sluiginm_nextpkt(struct nm_desc *d, struct nm_pkthdr *hdr)
643262153Sluigi{
644262153Sluigi	int ri = d->cur_rx_ring;
645262153Sluigi
646262153Sluigi	do {
647262153Sluigi		/* compute current ring to use */
648262153Sluigi		struct netmap_ring *ring = NETMAP_RXRING(d->nifp, ri);
649262153Sluigi		if (!nm_ring_empty(ring)) {
650262153Sluigi			u_int i = ring->cur;
651262153Sluigi			u_int idx = ring->slot[i].buf_idx;
652262153Sluigi			u_char *buf = (u_char *)NETMAP_BUF(ring, idx);
653262153Sluigi
654262153Sluigi			// __builtin_prefetch(buf);
655262153Sluigi			hdr->ts = ring->ts;
656262153Sluigi			hdr->len = hdr->caplen = ring->slot[i].len;
657262153Sluigi			ring->cur = nm_ring_next(ring, i);
658262153Sluigi			/* we could postpone advancing head if we want
659262153Sluigi			 * to hold the buffer. This can be supported in
660262153Sluigi			 * the future.
661262153Sluigi			 */
662262153Sluigi			ring->head = ring->cur;
663262153Sluigi			d->cur_rx_ring = ri;
664262153Sluigi			return buf;
665262153Sluigi		}
666262153Sluigi		ri++;
667262153Sluigi		if (ri > d->last_rx_ring)
668262153Sluigi			ri = d->first_rx_ring;
669262153Sluigi	} while (ri != d->cur_rx_ring);
670262153Sluigi	return NULL; /* nothing found */
671262153Sluigi}
672262153Sluigi
673262153Sluigi#endif /* !HAVE_NETMAP_WITH_LIBS */
674262153Sluigi
675262153Sluigi#endif /* NETMAP_WITH_LIBS */
676262153Sluigi
677231650Sluigi#endif /* _NET_NETMAP_USER_H_ */
678