1/*
2 * Copyright (C) 2011-2014 Universita` di Pisa. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 *   1. Redistributions of source code must retain the above copyright
9 *      notice, this list of conditions and the following disclaimer.
10 *   2. Redistributions in binary form must reproduce the above copyright
11 *      notice, this list of conditions and the following disclaimer in the
12 *      documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27/*
28 * $FreeBSD$
29 *
30 * Functions and macros to manipulate netmap structures and packets
31 * in userspace. See netmap(4) for more information.
32 *
33 * The address of the struct netmap_if, say nifp, is computed from the
34 * value returned from ioctl(.., NIOCREG, ...) and the mmap region:
35 *	ioctl(fd, NIOCREG, &req);
36 *	mem = mmap(0, ... );
37 *	nifp = NETMAP_IF(mem, req.nr_nifp);
38 *		(so simple, we could just do it manually)
39 *
40 * From there:
41 *	struct netmap_ring *NETMAP_TXRING(nifp, index)
42 *	struct netmap_ring *NETMAP_RXRING(nifp, index)
43 *		we can access ring->cur, ring->head, ring->tail, etc.
44 *
45 *	ring->slot[i] gives us the i-th slot (we can access
46 *		directly len, flags, buf_idx)
47 *
48 *	char *buf = NETMAP_BUF(ring, x) returns a pointer to
49 *		the buffer numbered x
50 *
51 * All ring indexes (head, cur, tail) should always move forward.
52 * To compute the next index in a circular ring you can use
53 *	i = nm_ring_next(ring, i);
54 *
55 * To ease porting apps from pcap to netmap we supply a few fuctions
56 * that can be called to open, close, read and write on netmap in a way
57 * similar to libpcap. Note that the read/write function depend on
58 * an ioctl()/select()/poll() being issued to refill rings or push
59 * packets out.
60 *
61 * In order to use these, include #define NETMAP_WITH_LIBS
62 * in the source file that invokes these functions.
63 */
64
65#ifndef _NET_NETMAP_USER_H_
66#define _NET_NETMAP_USER_H_
67
68#include <stdint.h>
69#include <sys/socket.h>		/* apple needs sockaddr */
70#include <net/if.h>		/* IFNAMSIZ */
71
72#ifndef likely
73#define likely(x)	__builtin_expect(!!(x), 1)
74#define unlikely(x)	__builtin_expect(!!(x), 0)
75#endif /* likely and unlikely */
76
77#include <net/netmap.h>
78
79/* helper macro */
80#define _NETMAP_OFFSET(type, ptr, offset) \
81	((type)(void *)((char *)(ptr) + (offset)))
82
83#define NETMAP_IF(_base, _ofs)	_NETMAP_OFFSET(struct netmap_if *, _base, _ofs)
84
85#define NETMAP_TXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \
86	nifp, (nifp)->ring_ofs[index] )
87
88#define NETMAP_RXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *,	\
89	nifp, (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + 1] )
90
91#define NETMAP_BUF(ring, index)				\
92	((char *)(ring) + (ring)->buf_ofs + ((index)*(ring)->nr_buf_size))
93
94#define NETMAP_BUF_IDX(ring, buf)			\
95	( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \
96		(ring)->nr_buf_size )
97
98
99static inline uint32_t
100nm_ring_next(struct netmap_ring *r, uint32_t i)
101{
102	return ( unlikely(i + 1 == r->num_slots) ? 0 : i + 1);
103}
104
105
106/*
107 * Return 1 if we have pending transmissions in the tx ring.
108 * When everything is complete ring->head = ring->tail + 1 (modulo ring size)
109 */
110static inline int
111nm_tx_pending(struct netmap_ring *r)
112{
113	return nm_ring_next(r, r->tail) != r->head;
114}
115
116
117static inline uint32_t
118nm_ring_space(struct netmap_ring *ring)
119{
120        int ret = ring->tail - ring->cur;
121        if (ret < 0)
122                ret += ring->num_slots;
123        return ret;
124}
125
126
127#ifdef NETMAP_WITH_LIBS
128/*
129 * Support for simple I/O libraries.
130 * Include other system headers required for compiling this.
131 */
132
133#ifndef HAVE_NETMAP_WITH_LIBS
134#define HAVE_NETMAP_WITH_LIBS
135
136#include <stdio.h>
137#include <sys/time.h>
138#include <sys/mman.h>
139#include <string.h>	/* memset */
140#include <sys/ioctl.h>
141#include <sys/errno.h>	/* EINVAL */
142#include <fcntl.h>	/* O_RDWR */
143#include <unistd.h>	/* close() */
144#include <signal.h>
145#include <stdlib.h>
146
147#ifndef ND /* debug macros */
148/* debug support */
149#define ND(_fmt, ...) do {} while(0)
150#define D(_fmt, ...)						\
151	do {							\
152		struct timeval _t0;				\
153		gettimeofday(&_t0, NULL);			\
154		fprintf(stderr, "%03d.%06d %s [%d] " _fmt "\n",	\
155		    (int)(_t0.tv_sec % 1000), (int)_t0.tv_usec,	\
156		    __FUNCTION__, __LINE__, ##__VA_ARGS__);	\
157        } while (0)
158
159/* Rate limited version of "D", lps indicates how many per second */
160#define RD(lps, format, ...)                                    \
161    do {                                                        \
162        static int __t0, __cnt;                                 \
163        struct timeval __xxts;                                  \
164        gettimeofday(&__xxts, NULL);                            \
165        if (__t0 != __xxts.tv_sec) {                            \
166            __t0 = __xxts.tv_sec;                               \
167            __cnt = 0;                                          \
168        }                                                       \
169        if (__cnt++ < lps) {                                    \
170            D(format, ##__VA_ARGS__);                           \
171        }                                                       \
172    } while (0)
173#endif
174
175struct nm_pkthdr {	/* same as pcap_pkthdr */
176	struct timeval	ts;
177	uint32_t	caplen;
178	uint32_t	len;
179};
180
181struct nm_stat {	/* same as pcap_stat	*/
182	u_int	ps_recv;
183	u_int	ps_drop;
184	u_int	ps_ifdrop;
185#ifdef WIN32
186	u_int	bs_capt;
187#endif /* WIN32 */
188};
189
190#define NM_ERRBUF_SIZE	512
191
192struct nm_desc {
193	struct nm_desc *self; /* point to self if netmap. */
194	int fd;
195	void *mem;
196	uint32_t memsize;
197	int done_mmap;	/* set if mem is the result of mmap */
198	struct netmap_if * const nifp;
199	uint16_t first_tx_ring, last_tx_ring, cur_tx_ring;
200	uint16_t first_rx_ring, last_rx_ring, cur_rx_ring;
201	struct nmreq req;	/* also contains the nr_name = ifname */
202	struct nm_pkthdr hdr;
203
204	/*
205	 * The memory contains netmap_if, rings and then buffers.
206	 * Given a pointer (e.g. to nm_inject) we can compare with
207	 * mem/buf_start/buf_end to tell if it is a buffer or
208	 * some other descriptor in our region.
209	 * We also store a pointer to some ring as it helps in the
210	 * translation from buffer indexes to addresses.
211	 */
212	struct netmap_ring * const some_ring;
213	void * const buf_start;
214	void * const buf_end;
215	/* parameters from pcap_open_live */
216	int snaplen;
217	int promisc;
218	int to_ms;
219	char *errbuf;
220
221	/* save flags so we can restore them on close */
222	uint32_t if_flags;
223        uint32_t if_reqcap;
224        uint32_t if_curcap;
225
226	struct nm_stat st;
227	char msg[NM_ERRBUF_SIZE];
228};
229
230/*
231 * when the descriptor is open correctly, d->self == d
232 * Eventually we should also use some magic number.
233 */
234#define P2NMD(p)		((struct nm_desc *)(p))
235#define IS_NETMAP_DESC(d)	((d) && P2NMD(d)->self == P2NMD(d))
236#define NETMAP_FD(d)		(P2NMD(d)->fd)
237
238
239/*
240 * this is a slightly optimized copy routine which rounds
241 * to multiple of 64 bytes and is often faster than dealing
242 * with other odd sizes. We assume there is enough room
243 * in the source and destination buffers.
244 *
245 * XXX only for multiples of 64 bytes, non overlapped.
246 */
247static inline void
248nm_pkt_copy(const void *_src, void *_dst, int l)
249{
250	const uint64_t *src = (const uint64_t *)_src;
251	uint64_t *dst = (uint64_t *)_dst;
252
253	if (unlikely(l >= 1024)) {
254		memcpy(dst, src, l);
255		return;
256	}
257	for (; likely(l > 0); l-=64) {
258		*dst++ = *src++;
259		*dst++ = *src++;
260		*dst++ = *src++;
261		*dst++ = *src++;
262		*dst++ = *src++;
263		*dst++ = *src++;
264		*dst++ = *src++;
265		*dst++ = *src++;
266	}
267}
268
269
270/*
271 * The callback, invoked on each received packet. Same as libpcap
272 */
273typedef void (*nm_cb_t)(u_char *, const struct nm_pkthdr *, const u_char *d);
274
275/*
276 *--- the pcap-like API ---
277 *
278 * nm_open() opens a file descriptor, binds to a port and maps memory.
279 *
280 * ifname	(netmap:foo or vale:foo) is the port name
281 *		a suffix can indicate the follwing:
282 *		^		bind the host (sw) ring pair
283 *		*		bind host and NIC ring pairs (transparent)
284 *		-NN		bind individual NIC ring pair
285 *		{NN		bind master side of pipe NN
286 *		}NN		bind slave side of pipe NN
287 *
288 * req		provides the initial values of nmreq before parsing ifname.
289 *		Remember that the ifname parsing will override the ring
290 *		number in nm_ringid, and part of nm_flags;
291 * flags	special functions, normally 0
292 *		indicates which fields of *arg are significant
293 * arg		special functions, normally NULL
294 *		if passed a netmap_desc with mem != NULL,
295 *		use that memory instead of mmap.
296 */
297
298static struct nm_desc *nm_open(const char *ifname, const struct nmreq *req,
299	uint64_t flags, const struct nm_desc *arg);
300
301/*
302 * nm_open can import some fields from the parent descriptor.
303 * These flags control which ones.
304 * Also in flags you can specify NETMAP_NO_TX_POLL and NETMAP_DO_RX_POLL,
305 * which set the initial value for these flags.
306 * Note that the 16 low bits of the flags are reserved for data
307 * that may go into the nmreq.
308 */
309enum {
310	NM_OPEN_NO_MMAP =	0x040000, /* reuse mmap from parent */
311	NM_OPEN_IFNAME =	0x080000, /* nr_name, nr_ringid, nr_flags */
312	NM_OPEN_ARG1 =		0x100000,
313	NM_OPEN_ARG2 =		0x200000,
314	NM_OPEN_ARG3 =		0x400000,
315	NM_OPEN_RING_CFG =	0x800000, /* tx|rx rings|slots */
316};
317
318
319/*
320 * nm_close()	closes and restores the port to its previous state
321 */
322
323static int nm_close(struct nm_desc *);
324
325/*
326 * nm_inject() is the same as pcap_inject()
327 * nm_dispatch() is the same as pcap_dispatch()
328 * nm_nextpkt() is the same as pcap_next()
329 */
330
331static int nm_inject(struct nm_desc *, const void *, size_t);
332static int nm_dispatch(struct nm_desc *, int, nm_cb_t, u_char *);
333static u_char *nm_nextpkt(struct nm_desc *, struct nm_pkthdr *);
334
335
336/*
337 * Try to open, return descriptor if successful, NULL otherwise.
338 * An invalid netmap name will return errno = 0;
339 * You can pass a pointer to a pre-filled nm_desc to add special
340 * parameters. Flags is used as follows
341 * NM_OPEN_NO_MMAP	use the memory from arg, only
342 *			if the nr_arg2 (memory block) matches.
343 * NM_OPEN_ARG1		use req.nr_arg1 from arg
344 * NM_OPEN_ARG2		use req.nr_arg2 from arg
345 * NM_OPEN_RING_CFG	user ring config from arg
346 */
347static struct nm_desc *
348nm_open(const char *ifname, const struct nmreq *req,
349	uint64_t new_flags, const struct nm_desc *arg)
350{
351	struct nm_desc *d = NULL;
352	const struct nm_desc *parent = arg;
353	u_int namelen;
354	uint32_t nr_ringid = 0, nr_flags;
355	const char *port = NULL;
356	const char *errmsg = NULL;
357
358	if (strncmp(ifname, "netmap:", 7) && strncmp(ifname, "vale", 4)) {
359		errno = 0; /* name not recognised, not an error */
360		return NULL;
361	}
362	if (ifname[0] == 'n')
363		ifname += 7;
364	/* scan for a separator */
365	for (port = ifname; *port && !index("-*^{}", *port); port++)
366		;
367	namelen = port - ifname;
368	if (namelen >= sizeof(d->req.nr_name)) {
369		errmsg = "name too long";
370		goto fail;
371	}
372	switch (*port) {
373	default:  /* '\0', no suffix */
374		nr_flags = NR_REG_ALL_NIC;
375		break;
376	case '-': /* one NIC */
377		nr_flags = NR_REG_ONE_NIC;
378		nr_ringid = atoi(port + 1);
379		break;
380	case '*': /* NIC and SW, ignore port */
381		nr_flags = NR_REG_NIC_SW;
382		if (port[1]) {
383			errmsg = "invalid port for nic+sw";
384			goto fail;
385		}
386		break;
387	case '^': /* only sw ring */
388		nr_flags = NR_REG_SW;
389		if (port[1]) {
390			errmsg = "invalid port for sw ring";
391			goto fail;
392		}
393		break;
394	case '{':
395		nr_flags = NR_REG_PIPE_MASTER;
396		nr_ringid = atoi(port + 1);
397		break;
398	case '}':
399		nr_flags = NR_REG_PIPE_SLAVE;
400		nr_ringid = atoi(port + 1);
401		break;
402	}
403
404	if (nr_ringid >= NETMAP_RING_MASK) {
405		errmsg = "invalid ringid";
406		goto fail;
407	}
408
409	d = (struct nm_desc *)calloc(1, sizeof(*d));
410	if (d == NULL) {
411		errmsg = "nm_desc alloc failure";
412		errno = ENOMEM;
413		return NULL;
414	}
415	d->self = d;	/* set this early so nm_close() works */
416	d->fd = open("/dev/netmap", O_RDWR);
417	if (d->fd < 0) {
418		errmsg = "cannot open /dev/netmap";
419		goto fail;
420	}
421
422	if (req)
423		d->req = *req;
424	d->req.nr_version = NETMAP_API;
425	d->req.nr_ringid &= ~NETMAP_RING_MASK;
426
427	/* these fields are overridden by ifname and flags processing */
428	d->req.nr_ringid |= nr_ringid;
429	d->req.nr_flags = nr_flags;
430	memcpy(d->req.nr_name, ifname, namelen);
431	d->req.nr_name[namelen] = '\0';
432	/* optionally import info from parent */
433	if (IS_NETMAP_DESC(parent) && new_flags) {
434		if (new_flags & NM_OPEN_ARG1)
435			D("overriding ARG1 %d", parent->req.nr_arg1);
436		d->req.nr_arg1 = new_flags & NM_OPEN_ARG1 ?
437			parent->req.nr_arg1 : 4;
438		if (new_flags & NM_OPEN_ARG2)
439			D("overriding ARG2 %d", parent->req.nr_arg2);
440		d->req.nr_arg2 = new_flags & NM_OPEN_ARG2 ?
441			parent->req.nr_arg2 : 0;
442		if (new_flags & NM_OPEN_ARG3)
443			D("overriding ARG3 %d", parent->req.nr_arg3);
444		d->req.nr_arg3 = new_flags & NM_OPEN_ARG3 ?
445			parent->req.nr_arg3 : 0;
446		if (new_flags & NM_OPEN_RING_CFG) {
447			D("overriding RING_CFG");
448			d->req.nr_tx_slots = parent->req.nr_tx_slots;
449			d->req.nr_rx_slots = parent->req.nr_rx_slots;
450			d->req.nr_tx_rings = parent->req.nr_tx_rings;
451			d->req.nr_rx_rings = parent->req.nr_rx_rings;
452		}
453		if (new_flags & NM_OPEN_IFNAME) {
454			D("overriding ifname %s ringid 0x%x flags 0x%x",
455				parent->req.nr_name, parent->req.nr_ringid,
456				parent->req.nr_flags);
457			memcpy(d->req.nr_name, parent->req.nr_name,
458				sizeof(d->req.nr_name));
459			d->req.nr_ringid = parent->req.nr_ringid;
460			d->req.nr_flags = parent->req.nr_flags;
461		}
462	}
463	/* add the *XPOLL flags */
464	d->req.nr_ringid |= new_flags & (NETMAP_NO_TX_POLL | NETMAP_DO_RX_POLL);
465
466	if (ioctl(d->fd, NIOCREGIF, &d->req)) {
467		errmsg = "NIOCREGIF failed";
468		goto fail;
469	}
470
471	if (IS_NETMAP_DESC(parent) && parent->mem &&
472	    parent->req.nr_arg2 == d->req.nr_arg2) {
473		/* do not mmap, inherit from parent */
474		d->memsize = parent->memsize;
475		d->mem = parent->mem;
476	} else {
477		/* XXX TODO: check if memsize is too large (or there is overflow) */
478		d->memsize = d->req.nr_memsize;
479		d->mem = mmap(0, d->memsize, PROT_WRITE | PROT_READ, MAP_SHARED,
480				d->fd, 0);
481		if (d->mem == MAP_FAILED) {
482			errmsg = "mmap failed";
483			goto fail;
484		}
485		d->done_mmap = 1;
486	}
487	{
488		struct netmap_if *nifp = NETMAP_IF(d->mem, d->req.nr_offset);
489		struct netmap_ring *r = NETMAP_RXRING(nifp, );
490
491		*(struct netmap_if **)(uintptr_t)&(d->nifp) = nifp;
492		*(struct netmap_ring **)(uintptr_t)&d->some_ring = r;
493		*(void **)(uintptr_t)&d->buf_start = NETMAP_BUF(r, 0);
494		*(void **)(uintptr_t)&d->buf_end =
495			(char *)d->mem + d->memsize;
496	}
497
498	if (d->req.nr_flags ==  NR_REG_SW) { /* host stack */
499		d->first_tx_ring = d->last_tx_ring = d->req.nr_tx_rings;
500		d->first_rx_ring = d->last_rx_ring = d->req.nr_rx_rings;
501	} else if (d->req.nr_flags ==  NR_REG_ALL_NIC) { /* only nic */
502		d->first_tx_ring = 0;
503		d->first_rx_ring = 0;
504		d->last_tx_ring = d->req.nr_tx_rings - 1;
505		d->last_rx_ring = d->req.nr_rx_rings - 1;
506	} else if (d->req.nr_flags ==  NR_REG_NIC_SW) {
507		d->first_tx_ring = 0;
508		d->first_rx_ring = 0;
509		d->last_tx_ring = d->req.nr_tx_rings;
510		d->last_rx_ring = d->req.nr_rx_rings;
511	} else if (d->req.nr_flags == NR_REG_ONE_NIC) {
512		/* XXX check validity */
513		d->first_tx_ring = d->last_tx_ring =
514		d->first_rx_ring = d->last_rx_ring = d->req.nr_ringid & NETMAP_RING_MASK;
515	} else { /* pipes */
516		d->first_tx_ring = d->last_tx_ring = 0;
517		d->first_rx_ring = d->last_rx_ring = 0;
518	}
519
520#ifdef DEBUG_NETMAP_USER
521    { /* debugging code */
522	int i;
523
524	D("%s tx %d .. %d %d rx %d .. %d %d", ifname,
525		d->first_tx_ring, d->last_tx_ring, d->req.nr_tx_rings,
526                d->first_rx_ring, d->last_rx_ring, d->req.nr_rx_rings);
527	for (i = 0; i <= d->req.nr_tx_rings; i++) {
528		struct netmap_ring *r = NETMAP_TXRING(d->nifp, i);
529		D("TX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail);
530	}
531	for (i = 0; i <= d->req.nr_rx_rings; i++) {
532		struct netmap_ring *r = NETMAP_RXRING(d->nifp, i);
533		D("RX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail);
534	}
535    }
536#endif /* debugging */
537
538	d->cur_tx_ring = d->first_tx_ring;
539	d->cur_rx_ring = d->first_rx_ring;
540	return d;
541
542fail:
543	nm_close(d);
544	if (errmsg)
545		D("%s %s", errmsg, ifname);
546	if (errno == 0)
547		errno = EINVAL;
548	return NULL;
549}
550
551
552static int
553nm_close(struct nm_desc *d)
554{
555	/*
556	 * ugly trick to avoid unused warnings
557	 */
558	static void *__xxzt[] __attribute__ ((unused))  =
559		{ (void *)nm_open, (void *)nm_inject,
560		  (void *)nm_dispatch, (void *)nm_nextpkt } ;
561
562	if (d == NULL || d->self != d)
563		return EINVAL;
564	if (d->done_mmap && d->mem)
565		munmap(d->mem, d->memsize);
566	if (d->fd != -1)
567		close(d->fd);
568	bzero(d, sizeof(*d));
569	free(d);
570	return 0;
571}
572
573
574/*
575 * Same prototype as pcap_inject(), only need to cast.
576 */
577static int
578nm_inject(struct nm_desc *d, const void *buf, size_t size)
579{
580	u_int c, n = d->last_tx_ring - d->first_tx_ring + 1;
581
582	for (c = 0; c < n ; c++) {
583		/* compute current ring to use */
584		struct netmap_ring *ring;
585		uint32_t i, idx;
586		uint32_t ri = d->cur_tx_ring + c;
587
588		if (ri > d->last_tx_ring)
589			ri = d->first_tx_ring;
590		ring = NETMAP_TXRING(d->nifp, ri);
591		if (nm_ring_empty(ring)) {
592			continue;
593		}
594		i = ring->cur;
595		idx = ring->slot[i].buf_idx;
596		ring->slot[i].len = size;
597		nm_pkt_copy(buf, NETMAP_BUF(ring, idx), size);
598		d->cur_tx_ring = ri;
599		ring->head = ring->cur = nm_ring_next(ring, i);
600		return size;
601	}
602	return 0; /* fail */
603}
604
605
606/*
607 * Same prototype as pcap_dispatch(), only need to cast.
608 */
609static int
610nm_dispatch(struct nm_desc *d, int cnt, nm_cb_t cb, u_char *arg)
611{
612	int n = d->last_rx_ring - d->first_rx_ring + 1;
613	int c, got = 0, ri = d->cur_rx_ring;
614
615	if (cnt == 0)
616		cnt = -1;
617	/* cnt == -1 means infinite, but rings have a finite amount
618	 * of buffers and the int is large enough that we never wrap,
619	 * so we can omit checking for -1
620	 */
621	for (c=0; c < n && cnt != got; c++) {
622		/* compute current ring to use */
623		struct netmap_ring *ring;
624
625		ri = d->cur_rx_ring + c;
626		if (ri > d->last_rx_ring)
627			ri = d->first_rx_ring;
628		ring = NETMAP_RXRING(d->nifp, ri);
629		for ( ; !nm_ring_empty(ring) && cnt != got; got++) {
630			u_int i = ring->cur;
631			u_int idx = ring->slot[i].buf_idx;
632			u_char *buf = (u_char *)NETMAP_BUF(ring, idx);
633
634			// __builtin_prefetch(buf);
635			d->hdr.len = d->hdr.caplen = ring->slot[i].len;
636			d->hdr.ts = ring->ts;
637			cb(arg, &d->hdr, buf);
638			ring->head = ring->cur = nm_ring_next(ring, i);
639		}
640	}
641	d->cur_rx_ring = ri;
642	return got;
643}
644
645static u_char *
646nm_nextpkt(struct nm_desc *d, struct nm_pkthdr *hdr)
647{
648	int ri = d->cur_rx_ring;
649
650	do {
651		/* compute current ring to use */
652		struct netmap_ring *ring = NETMAP_RXRING(d->nifp, ri);
653		if (!nm_ring_empty(ring)) {
654			u_int i = ring->cur;
655			u_int idx = ring->slot[i].buf_idx;
656			u_char *buf = (u_char *)NETMAP_BUF(ring, idx);
657
658			// __builtin_prefetch(buf);
659			hdr->ts = ring->ts;
660			hdr->len = hdr->caplen = ring->slot[i].len;
661			ring->cur = nm_ring_next(ring, i);
662			/* we could postpone advancing head if we want
663			 * to hold the buffer. This can be supported in
664			 * the future.
665			 */
666			ring->head = ring->cur;
667			d->cur_rx_ring = ri;
668			return buf;
669		}
670		ri++;
671		if (ri > d->last_rx_ring)
672			ri = d->first_rx_ring;
673	} while (ri != d->cur_rx_ring);
674	return NULL; /* nothing found */
675}
676
677#endif /* !HAVE_NETMAP_WITH_LIBS */
678
679#endif /* NETMAP_WITH_LIBS */
680
681#endif /* _NET_NETMAP_USER_H_ */
682