1227614Sluigi/*
2262151Sluigi * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
3232238Sluigi *
4227614Sluigi * Redistribution and use in source and binary forms, with or without
5227614Sluigi * modification, are permitted provided that the following conditions
6227614Sluigi * are met:
7228276Sluigi *   1. Redistributions of source code must retain the above copyright
8228276Sluigi *      notice, this list of conditions and the following disclaimer.
9228276Sluigi *   2. Redistributions in binary form must reproduce the above copyright
10228276Sluigi *      notice, this list of conditions and the following disclaimer in the
11262151Sluigi *      documentation and/or other materials provided with the distribution.
12232238Sluigi *
13227614Sluigi * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14227614Sluigi * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15227614Sluigi * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16227614Sluigi * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17227614Sluigi * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18227614Sluigi * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19227614Sluigi * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20227614Sluigi * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21227614Sluigi * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22227614Sluigi * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23227614Sluigi * SUCH DAMAGE.
24227614Sluigi */
25227614Sluigi
26238812Sluigi
27227614Sluigi/*
28262151Sluigi * $FreeBSD$
29262151Sluigi *
30227614Sluigi * This module supports memory mapped access to network devices,
31227614Sluigi * see netmap(4).
32227614Sluigi *
33227614Sluigi * The module uses a large, memory pool allocated by the kernel
34227614Sluigi * and accessible as mmapped memory by multiple userspace threads/processes.
35227614Sluigi * The memory pool contains packet buffers and "netmap rings",
36227614Sluigi * i.e. user-accessible copies of the interface's queues.
37227614Sluigi *
38227614Sluigi * Access to the network card works like this:
39227614Sluigi * 1. a process/thread issues one or more open() on /dev/netmap, to create
40227614Sluigi *    select()able file descriptor on which events are reported.
41227614Sluigi * 2. on each descriptor, the process issues an ioctl() to identify
42227614Sluigi *    the interface that should report events to the file descriptor.
43227614Sluigi * 3. on each descriptor, the process issues an mmap() request to
44227614Sluigi *    map the shared memory region within the process' address space.
45227614Sluigi *    The list of interesting queues is indicated by a location in
46227614Sluigi *    the shared memory region.
47227614Sluigi * 4. using the functions in the netmap(4) userspace API, a process
48227614Sluigi *    can look up the occupation state of a queue, access memory buffers,
49227614Sluigi *    and retrieve received packets or enqueue packets to transmit.
50227614Sluigi * 5. using some ioctl()s the process can synchronize the userspace view
51227614Sluigi *    of the queue with the actual status in the kernel. This includes both
52227614Sluigi *    receiving the notification of new packets, and transmitting new
53227614Sluigi *    packets on the output interface.
54227614Sluigi * 6. select() or poll() can be used to wait for events on individual
55227614Sluigi *    transmit or receive queues (or all queues for a given interface).
56262151Sluigi *
57262151Sluigi
58262151Sluigi		SYNCHRONIZATION (USER)
59262151Sluigi
60262151SluigiThe netmap rings and data structures may be shared among multiple
61262151Sluigiuser threads or even independent processes.
62262151SluigiAny synchronization among those threads/processes is delegated
63262151Sluigito the threads themselves. Only one thread at a time can be in
64262151Sluigia system call on the same netmap ring. The OS does not enforce
65262151Sluigithis and only guarantees against system crashes in case of
66262151Sluigiinvalid usage.
67262151Sluigi
68262151Sluigi		LOCKING (INTERNAL)
69262151Sluigi
70262151SluigiWithin the kernel, access to the netmap rings is protected as follows:
71262151Sluigi
72262151Sluigi- a spinlock on each ring, to handle producer/consumer races on
73262151Sluigi  RX rings attached to the host stack (against multiple host
74262151Sluigi  threads writing from the host stack to the same ring),
75262151Sluigi  and on 'destination' rings attached to a VALE switch
76262151Sluigi  (i.e. RX rings in VALE ports, and TX rings in NIC/host ports)
77262151Sluigi  protecting multiple active senders for the same destination)
78262151Sluigi
79262151Sluigi- an atomic variable to guarantee that there is at most one
80262151Sluigi  instance of *_*xsync() on the ring at any time.
81262151Sluigi  For rings connected to user file
82262151Sluigi  descriptors, an atomic_test_and_set() protects this, and the
83262151Sluigi  lock on the ring is not actually used.
84262151Sluigi  For NIC RX rings connected to a VALE switch, an atomic_test_and_set()
85262151Sluigi  is also used to prevent multiple executions (the driver might indeed
86262151Sluigi  already guarantee this).
87262151Sluigi  For NIC TX rings connected to a VALE switch, the lock arbitrates
88262151Sluigi  access to the queue (both when allocating buffers and when pushing
89262151Sluigi  them out).
90262151Sluigi
91262151Sluigi- *xsync() should be protected against initializations of the card.
92262151Sluigi  On FreeBSD most devices have the reset routine protected by
93262151Sluigi  a RING lock (ixgbe, igb, em) or core lock (re). lem is missing
94262151Sluigi  the RING protection on rx_reset(), this should be added.
95262151Sluigi
96262151Sluigi  On linux there is an external lock on the tx path, which probably
97262151Sluigi  also arbitrates access to the reset routine. XXX to be revised
98262151Sluigi
99262151Sluigi- a per-interface core_lock protecting access from the host stack
100262151Sluigi  while interfaces may be detached from netmap mode.
101262151Sluigi  XXX there should be no need for this lock if we detach the interfaces
102262151Sluigi  only while they are down.
103262151Sluigi
104262151Sluigi
105262151Sluigi--- VALE SWITCH ---
106262151Sluigi
107262151SluigiNMG_LOCK() serializes all modifications to switches and ports.
108262151SluigiA switch cannot be deleted until all ports are gone.
109262151Sluigi
110262151SluigiFor each switch, an SX lock (RWlock on linux) protects
111262151Sluigideletion of ports. When configuring or deleting a new port, the
112262151Sluigilock is acquired in exclusive mode (after holding NMG_LOCK).
113262151SluigiWhen forwarding, the lock is acquired in shared mode (without NMG_LOCK).
114262151SluigiThe lock is held throughout the entire forwarding cycle,
115262151Sluigiduring which the thread may incur in a page fault.
116262151SluigiHence it is important that sleepable shared locks are used.
117262151Sluigi
118262151SluigiOn the rx ring, the per-port lock is grabbed initially to reserve
119262151Sluigia number of slot in the ring, then the lock is released,
120262151Sluigipackets are copied from source to destination, and then
121262151Sluigithe lock is acquired again and the receive ring is updated.
122262151Sluigi(A similar thing is done on the tx ring for NIC and host stack
123262151Sluigiports attached to the switch)
124262151Sluigi
125227614Sluigi */
126227614Sluigi
127270252Sluigi
128270252Sluigi/* --- internals ----
129270252Sluigi *
130270252Sluigi * Roadmap to the code that implements the above.
131270252Sluigi *
132270252Sluigi * > 1. a process/thread issues one or more open() on /dev/netmap, to create
133270252Sluigi * >    select()able file descriptor on which events are reported.
134270252Sluigi *
135270252Sluigi *  	Internally, we allocate a netmap_priv_d structure, that will be
136270252Sluigi *  	initialized on ioctl(NIOCREGIF).
137270252Sluigi *
138270252Sluigi *      os-specific:
139270252Sluigi *  	    FreeBSD: netmap_open (netmap_freebsd.c). The priv is
140270252Sluigi *  		     per-thread.
141270252Sluigi *  	    linux:   linux_netmap_open (netmap_linux.c). The priv is
142270252Sluigi *  		     per-open.
143270252Sluigi *
144270252Sluigi * > 2. on each descriptor, the process issues an ioctl() to identify
145270252Sluigi * >    the interface that should report events to the file descriptor.
146270252Sluigi *
147270252Sluigi * 	Implemented by netmap_ioctl(), NIOCREGIF case, with nmr->nr_cmd==0.
148270252Sluigi * 	Most important things happen in netmap_get_na() and
149270252Sluigi * 	netmap_do_regif(), called from there. Additional details can be
150270252Sluigi * 	found in the comments above those functions.
151270252Sluigi *
152270252Sluigi * 	In all cases, this action creates/takes-a-reference-to a
153270252Sluigi * 	netmap_*_adapter describing the port, and allocates a netmap_if
154270252Sluigi * 	and all necessary netmap rings, filling them with netmap buffers.
155270252Sluigi *
156270252Sluigi *      In this phase, the sync callbacks for each ring are set (these are used
157270252Sluigi *      in steps 5 and 6 below).  The callbacks depend on the type of adapter.
158270252Sluigi *      The adapter creation/initialization code puts them in the
159270252Sluigi * 	netmap_adapter (fields na->nm_txsync and na->nm_rxsync).  Then, they
160270252Sluigi * 	are copied from there to the netmap_kring's during netmap_do_regif(), by
161270252Sluigi * 	the nm_krings_create() callback.  All the nm_krings_create callbacks
162270252Sluigi * 	actually call netmap_krings_create() to perform this and the other
163270252Sluigi * 	common stuff. netmap_krings_create() also takes care of the host rings,
164270252Sluigi * 	if needed, by setting their sync callbacks appropriately.
165270252Sluigi *
166270252Sluigi * 	Additional actions depend on the kind of netmap_adapter that has been
167270252Sluigi * 	registered:
168270252Sluigi *
169270252Sluigi * 	- netmap_hw_adapter:  	     [netmap.c]
170270252Sluigi * 	     This is a system netdev/ifp with native netmap support.
171270252Sluigi * 	     The ifp is detached from the host stack by redirecting:
172270252Sluigi * 	       - transmissions (from the network stack) to netmap_transmit()
173270252Sluigi * 	       - receive notifications to the nm_notify() callback for
174270252Sluigi * 	         this adapter. The callback is normally netmap_notify(), unless
175270252Sluigi * 	         the ifp is attached to a bridge using bwrap, in which case it
176270252Sluigi * 	         is netmap_bwrap_intr_notify().
177270252Sluigi *
178270252Sluigi * 	- netmap_generic_adapter:      [netmap_generic.c]
179270252Sluigi * 	      A system netdev/ifp without native netmap support.
180270252Sluigi *
181270252Sluigi * 	(the decision about native/non native support is taken in
182270252Sluigi * 	 netmap_get_hw_na(), called by netmap_get_na())
183270252Sluigi *
184270252Sluigi * 	- netmap_vp_adapter 		[netmap_vale.c]
185270252Sluigi * 	      Returned by netmap_get_bdg_na().
186270252Sluigi * 	      This is a persistent or ephemeral VALE port. Ephemeral ports
187270252Sluigi * 	      are created on the fly if they don't already exist, and are
188270252Sluigi * 	      always attached to a bridge.
189270252Sluigi * 	      Persistent VALE ports must must be created seperately, and i
190270252Sluigi * 	      then attached like normal NICs. The NIOCREGIF we are examining
191270252Sluigi * 	      will find them only if they had previosly been created and
192270252Sluigi * 	      attached (see VALE_CTL below).
193270252Sluigi *
194270252Sluigi * 	- netmap_pipe_adapter 	      [netmap_pipe.c]
195270252Sluigi * 	      Returned by netmap_get_pipe_na().
196270252Sluigi * 	      Both pipe ends are created, if they didn't already exist.
197270252Sluigi *
198270252Sluigi * 	- netmap_monitor_adapter      [netmap_monitor.c]
199270252Sluigi * 	      Returned by netmap_get_monitor_na().
200270252Sluigi * 	      If successful, the nm_sync callbacks of the monitored adapter
201270252Sluigi * 	      will be intercepted by the returned monitor.
202270252Sluigi *
203270252Sluigi * 	- netmap_bwrap_adapter	      [netmap_vale.c]
204270252Sluigi * 	      Cannot be obtained in this way, see VALE_CTL below
205270252Sluigi *
206270252Sluigi *
207270252Sluigi * 	os-specific:
208270252Sluigi * 	    linux: we first go through linux_netmap_ioctl() to
209270252Sluigi * 	           adapt the FreeBSD interface to the linux one.
210270252Sluigi *
211270252Sluigi *
212270252Sluigi * > 3. on each descriptor, the process issues an mmap() request to
213270252Sluigi * >    map the shared memory region within the process' address space.
214270252Sluigi * >    The list of interesting queues is indicated by a location in
215270252Sluigi * >    the shared memory region.
216270252Sluigi *
217270252Sluigi *      os-specific:
218270252Sluigi *  	    FreeBSD: netmap_mmap_single (netmap_freebsd.c).
219270252Sluigi *  	    linux:   linux_netmap_mmap (netmap_linux.c).
220270252Sluigi *
221270252Sluigi * > 4. using the functions in the netmap(4) userspace API, a process
222270252Sluigi * >    can look up the occupation state of a queue, access memory buffers,
223270252Sluigi * >    and retrieve received packets or enqueue packets to transmit.
224270252Sluigi *
225270252Sluigi * 	these actions do not involve the kernel.
226270252Sluigi *
227270252Sluigi * > 5. using some ioctl()s the process can synchronize the userspace view
228270252Sluigi * >    of the queue with the actual status in the kernel. This includes both
229270252Sluigi * >    receiving the notification of new packets, and transmitting new
230270252Sluigi * >    packets on the output interface.
231270252Sluigi *
232270252Sluigi * 	These are implemented in netmap_ioctl(), NIOCTXSYNC and NIOCRXSYNC
233270252Sluigi * 	cases. They invoke the nm_sync callbacks on the netmap_kring
234270252Sluigi * 	structures, as initialized in step 2 and maybe later modified
235270252Sluigi * 	by a monitor. Monitors, however, will always call the original
236270252Sluigi * 	callback before doing anything else.
237270252Sluigi *
238270252Sluigi *
239270252Sluigi * > 6. select() or poll() can be used to wait for events on individual
240270252Sluigi * >    transmit or receive queues (or all queues for a given interface).
241270252Sluigi *
242270252Sluigi * 	Implemented in netmap_poll(). This will call the same nm_sync()
243270252Sluigi * 	callbacks as in step 5 above.
244270252Sluigi *
245270252Sluigi * 	os-specific:
246270252Sluigi * 		linux: we first go through linux_netmap_poll() to adapt
247270252Sluigi * 		       the FreeBSD interface to the linux one.
248270252Sluigi *
249270252Sluigi *
250270252Sluigi *  ----  VALE_CTL -----
251270252Sluigi *
252270252Sluigi *  VALE switches are controlled by issuing a NIOCREGIF with a non-null
253270252Sluigi *  nr_cmd in the nmreq structure. These subcommands are handled by
254270252Sluigi *  netmap_bdg_ctl() in netmap_vale.c. Persistent VALE ports are created
255270252Sluigi *  and destroyed by issuing the NETMAP_BDG_NEWIF and NETMAP_BDG_DELIF
256270252Sluigi *  subcommands, respectively.
257270252Sluigi *
258270252Sluigi *  Any network interface known to the system (including a persistent VALE
259270252Sluigi *  port) can be attached to a VALE switch by issuing the
260270252Sluigi *  NETMAP_BDG_ATTACH subcommand. After the attachment, persistent VALE ports
261270252Sluigi *  look exactly like ephemeral VALE ports (as created in step 2 above).  The
262270252Sluigi *  attachment of other interfaces, instead, requires the creation of a
263270252Sluigi *  netmap_bwrap_adapter.  Moreover, the attached interface must be put in
264270252Sluigi *  netmap mode. This may require the creation of a netmap_generic_adapter if
265270252Sluigi *  we have no native support for the interface, or if generic adapters have
266270252Sluigi *  been forced by sysctl.
267270252Sluigi *
268270252Sluigi *  Both persistent VALE ports and bwraps are handled by netmap_get_bdg_na(),
269270252Sluigi *  called by nm_bdg_ctl_attach(), and discriminated by the nm_bdg_attach()
270270252Sluigi *  callback.  In the case of the bwrap, the callback creates the
271270252Sluigi *  netmap_bwrap_adapter.  The initialization of the bwrap is then
272270252Sluigi *  completed by calling netmap_do_regif() on it, in the nm_bdg_ctl()
273270252Sluigi *  callback (netmap_bwrap_bdg_ctl in netmap_vale.c).
274270252Sluigi *  A generic adapter for the wrapped ifp will be created if needed, when
275270252Sluigi *  netmap_get_bdg_na() calls netmap_get_hw_na().
276270252Sluigi *
277270252Sluigi *
278270252Sluigi *  ---- DATAPATHS -----
279270252Sluigi *
280270252Sluigi *              -= SYSTEM DEVICE WITH NATIVE SUPPORT =-
281270252Sluigi *
282270252Sluigi *    na == NA(ifp) == netmap_hw_adapter created in DEVICE_netmap_attach()
283270252Sluigi *
284270252Sluigi *    - tx from netmap userspace:
285270252Sluigi *	 concurrently:
286270252Sluigi *           1) ioctl(NIOCTXSYNC)/netmap_poll() in process context
287270252Sluigi *                kring->nm_sync() == DEVICE_netmap_txsync()
288270252Sluigi *           2) device interrupt handler
289270252Sluigi *                na->nm_notify()  == netmap_notify()
290270252Sluigi *    - rx from netmap userspace:
291270252Sluigi *       concurrently:
292270252Sluigi *           1) ioctl(NIOCRXSYNC)/netmap_poll() in process context
293270252Sluigi *                kring->nm_sync() == DEVICE_netmap_rxsync()
294270252Sluigi *           2) device interrupt handler
295270252Sluigi *                na->nm_notify()  == netmap_notify()
296270252Sluigi *    - tx from host stack
297270252Sluigi *       concurrently:
298270252Sluigi *           1) host stack
299270252Sluigi *                netmap_transmit()
300270252Sluigi *                  na->nm_notify  == netmap_notify()
301270252Sluigi *           2) ioctl(NIOCRXSYNC)/netmap_poll() in process context
302270252Sluigi *                kring->nm_sync() == netmap_rxsync_from_host_compat
303270252Sluigi *                  netmap_rxsync_from_host(na, NULL, NULL)
304270252Sluigi *    - tx to host stack
305270252Sluigi *           ioctl(NIOCTXSYNC)/netmap_poll() in process context
306270252Sluigi *             kring->nm_sync() == netmap_txsync_to_host_compat
307270252Sluigi *               netmap_txsync_to_host(na)
308270252Sluigi *                 NM_SEND_UP()
309270252Sluigi *                   FreeBSD: na->if_input() == ?? XXX
310270252Sluigi *                   linux: netif_rx() with NM_MAGIC_PRIORITY_RX
311270252Sluigi *
312270252Sluigi *
313270252Sluigi *
314270252Sluigi *               -= SYSTEM DEVICE WITH GENERIC SUPPORT =-
315270252Sluigi *
316270252Sluigi *
317270252Sluigi *
318270252Sluigi *                           -= VALE PORT =-
319270252Sluigi *
320270252Sluigi *
321270252Sluigi *
322270252Sluigi *                           -= NETMAP PIPE =-
323270252Sluigi *
324270252Sluigi *
325270252Sluigi *
326270252Sluigi *  -= SYSTEM DEVICE WITH NATIVE SUPPORT, CONNECTED TO VALE, NO HOST RINGS =-
327270252Sluigi *
328270252Sluigi *
329270252Sluigi *
330270252Sluigi *  -= SYSTEM DEVICE WITH NATIVE SUPPORT, CONNECTED TO VALE, WITH HOST RINGS =-
331270252Sluigi *
332270252Sluigi *
333270252Sluigi *
334270252Sluigi *  -= SYSTEM DEVICE WITH GENERIC SUPPORT, CONNECTED TO VALE, NO HOST RINGS =-
335270252Sluigi *
336270252Sluigi *
337270252Sluigi *
338270252Sluigi *  -= SYSTEM DEVICE WITH GENERIC SUPPORT, CONNECTED TO VALE, WITH HOST RINGS =-
339270252Sluigi *
340270252Sluigi *
341270252Sluigi *
342270252Sluigi */
343270252Sluigi
344262151Sluigi/*
345262151Sluigi * OS-specific code that is used only within this file.
346262151Sluigi * Other OS-specific code that must be accessed by drivers
347262151Sluigi * is present in netmap_kern.h
348262151Sluigi */
349238837Sluigi
350262151Sluigi#if defined(__FreeBSD__)
351227614Sluigi#include <sys/cdefs.h> /* prerequisite */
352227614Sluigi#include <sys/types.h>
353227614Sluigi#include <sys/errno.h>
354227614Sluigi#include <sys/param.h>	/* defines used in kernel.h */
355227614Sluigi#include <sys/kernel.h>	/* types used in module initialization */
356262151Sluigi#include <sys/conf.h>	/* cdevsw struct, UID, GID */
357262151Sluigi#include <sys/filio.h>	/* FIONBIO */
358227614Sluigi#include <sys/sockio.h>
359227614Sluigi#include <sys/socketvar.h>	/* struct socket */
360227614Sluigi#include <sys/malloc.h>
361227614Sluigi#include <sys/poll.h>
362248084Sattilio#include <sys/rwlock.h>
363227614Sluigi#include <sys/socket.h> /* sockaddrs */
364227614Sluigi#include <sys/selinfo.h>
365227614Sluigi#include <sys/sysctl.h>
366262151Sluigi#include <sys/jail.h>
367262151Sluigi#include <net/vnet.h>
368227614Sluigi#include <net/if.h>
369262151Sluigi#include <net/if_var.h>
370227614Sluigi#include <net/bpf.h>		/* BIOCIMMEDIATE */
371227614Sluigi#include <machine/bus.h>	/* bus_dmamap_* */
372262151Sluigi#include <sys/endian.h>
373262151Sluigi#include <sys/refcount.h>
374227614Sluigi
375227614Sluigi
376262151Sluigi/* reduce conditional code */
377262151Sluigi// linux API, use for the knlist in FreeBSD
378262151Sluigi#define init_waitqueue_head(x)	knlist_init_mtx(&(x)->si_note, NULL)
379262151Sluigi
380262151Sluigivoid freebsd_selwakeup(struct selinfo *si, int pri);
381262151Sluigi#define OS_selwakeup(a, b)	freebsd_selwakeup(a, b)
382262151Sluigi
383262151Sluigi#elif defined(linux)
384262151Sluigi
385262151Sluigi#include "bsd_glue.h"
386262151Sluigi
387262151Sluigi
388262151Sluigi
389262151Sluigi#elif defined(__APPLE__)
390262151Sluigi
391262151Sluigi#warning OSX support is only partial
392262151Sluigi#include "osx_glue.h"
393262151Sluigi
394262151Sluigi#else
395262151Sluigi
396262151Sluigi#error	Unsupported platform
397262151Sluigi
398262151Sluigi#endif /* unsupported */
399262151Sluigi
400262151Sluigi/*
401262151Sluigi * common headers
402262151Sluigi */
403238912Sluigi#include <net/netmap.h>
404238912Sluigi#include <dev/netmap/netmap_kern.h>
405262151Sluigi#include <dev/netmap/netmap_mem2.h>
406238912Sluigi
407262151Sluigi
408262151SluigiMALLOC_DEFINE(M_NETMAP, "netmap", "Network memory map");
409262151Sluigi
410262151Sluigi/*
411262151Sluigi * The following variables are used by the drivers and replicate
412262151Sluigi * fields in the global memory pool. They only refer to buffers
413262151Sluigi * used by physical interfaces.
414262151Sluigi */
415231198Sluigiu_int netmap_total_buffers;
416241719Sluigiu_int netmap_buf_size;
417262151Sluigichar *netmap_buffer_base;	/* also address of an invalid buffer */
418231198Sluigi
419231198Sluigi/* user-controlled variables */
420231198Sluigiint netmap_verbose;
421231198Sluigi
422231198Sluigistatic int netmap_no_timestamp; /* don't timestamp on rxsync */
423231198Sluigi
424231198SluigiSYSCTL_NODE(_dev, OID_AUTO, netmap, CTLFLAG_RW, 0, "Netmap args");
425231198SluigiSYSCTL_INT(_dev_netmap, OID_AUTO, verbose,
426231198Sluigi    CTLFLAG_RW, &netmap_verbose, 0, "Verbose mode");
427231198SluigiSYSCTL_INT(_dev_netmap, OID_AUTO, no_timestamp,
428231198Sluigi    CTLFLAG_RW, &netmap_no_timestamp, 0, "no_timestamp");
429231198Sluigiint netmap_mitigate = 1;
430231198SluigiSYSCTL_INT(_dev_netmap, OID_AUTO, mitigate, CTLFLAG_RW, &netmap_mitigate, 0, "");
431234140Sluigiint netmap_no_pendintr = 1;
432231198SluigiSYSCTL_INT(_dev_netmap, OID_AUTO, no_pendintr,
433231198Sluigi    CTLFLAG_RW, &netmap_no_pendintr, 0, "Always look for new received packets.");
434251139Sluigiint netmap_txsync_retry = 2;
435251139SluigiSYSCTL_INT(_dev_netmap, OID_AUTO, txsync_retry, CTLFLAG_RW,
436251139Sluigi    &netmap_txsync_retry, 0 , "Number of txsync loops in bridge's flush.");
437231198Sluigi
438270252Sluigiint netmap_adaptive_io = 0;
439270252SluigiSYSCTL_INT(_dev_netmap, OID_AUTO, adaptive_io, CTLFLAG_RW,
440270252Sluigi    &netmap_adaptive_io, 0 , "Adaptive I/O on paravirt");
441270252Sluigi
442238812Sluigiint netmap_flags = 0;	/* debug flags */
443245836Sluigiint netmap_fwd = 0;	/* force transparent mode */
444262151Sluigiint netmap_mmap_unreg = 0; /* allow mmap of unregistered fds */
445231198Sluigi
446238812Sluigi/*
447262151Sluigi * netmap_admode selects the netmap mode to use.
448262151Sluigi * Invalid values are reset to NETMAP_ADMODE_BEST
449238812Sluigi */
450262151Sluigienum { NETMAP_ADMODE_BEST = 0,	/* use native, fallback to generic */
451262151Sluigi	NETMAP_ADMODE_NATIVE,	/* either native or none */
452262151Sluigi	NETMAP_ADMODE_GENERIC,	/* force generic */
453262151Sluigi	NETMAP_ADMODE_LAST };
454262151Sluigistatic int netmap_admode = NETMAP_ADMODE_BEST;
455250052Sluigi
456262151Sluigiint netmap_generic_mit = 100*1000;   /* Generic mitigation interval in nanoseconds. */
457262151Sluigiint netmap_generic_ringsize = 1024;   /* Generic ringsize. */
458262151Sluigiint netmap_generic_rings = 1;   /* number of queues in generic. */
459250052Sluigi
460262151SluigiSYSCTL_INT(_dev_netmap, OID_AUTO, flags, CTLFLAG_RW, &netmap_flags, 0 , "");
461262151SluigiSYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0 , "");
462262151SluigiSYSCTL_INT(_dev_netmap, OID_AUTO, mmap_unreg, CTLFLAG_RW, &netmap_mmap_unreg, 0, "");
463262151SluigiSYSCTL_INT(_dev_netmap, OID_AUTO, admode, CTLFLAG_RW, &netmap_admode, 0 , "");
464262151SluigiSYSCTL_INT(_dev_netmap, OID_AUTO, generic_mit, CTLFLAG_RW, &netmap_generic_mit, 0 , "");
465262151SluigiSYSCTL_INT(_dev_netmap, OID_AUTO, generic_ringsize, CTLFLAG_RW, &netmap_generic_ringsize, 0 , "");
466262151SluigiSYSCTL_INT(_dev_netmap, OID_AUTO, generic_rings, CTLFLAG_RW, &netmap_generic_rings, 0 , "");
467238837Sluigi
468262151SluigiNMG_LOCK_T	netmap_global_lock;
469250107Sluigi
470250107Sluigi
471262151Sluigistatic void
472262151Sluiginm_kr_get(struct netmap_kring *kr)
473262151Sluigi{
474262151Sluigi	while (NM_ATOMIC_TEST_AND_SET(&kr->nr_busy))
475262151Sluigi		tsleep(kr, 0, "NM_KR_GET", 4);
476262151Sluigi}
477250107Sluigi
478250107Sluigi
479250184Sluigi/*
480262151Sluigi * mark the ring as stopped, and run through the locks
481262151Sluigi * to make sure other users get to see it.
482250107Sluigi */
483270252Sluigistatic void
484262151Sluiginetmap_disable_ring(struct netmap_kring *kr)
485262151Sluigi{
486262151Sluigi	kr->nkr_stopped = 1;
487262151Sluigi	nm_kr_get(kr);
488262151Sluigi	mtx_lock(&kr->q_lock);
489262151Sluigi	mtx_unlock(&kr->q_lock);
490262151Sluigi	nm_kr_put(kr);
491262151Sluigi}
492250107Sluigi
493270252Sluigi/* stop or enable a single tx ring */
494270252Sluigivoid
495270252Sluiginetmap_set_txring(struct netmap_adapter *na, u_int ring_id, int stopped)
496270252Sluigi{
497270252Sluigi	if (stopped)
498270252Sluigi		netmap_disable_ring(na->tx_rings + ring_id);
499270252Sluigi	else
500270252Sluigi		na->tx_rings[ring_id].nkr_stopped = 0;
501270252Sluigi	/* nofify that the stopped state has changed. This is currently
502270252Sluigi	 *only used by bwrap to propagate the state to its own krings.
503270252Sluigi	 * (see netmap_bwrap_intr_notify).
504270252Sluigi	 */
505270252Sluigi	na->nm_notify(na, ring_id, NR_TX, NAF_DISABLE_NOTIFY);
506270252Sluigi}
507251139Sluigi
508270252Sluigi/* stop or enable a single rx ring */
509270252Sluigivoid
510270252Sluiginetmap_set_rxring(struct netmap_adapter *na, u_int ring_id, int stopped)
511270252Sluigi{
512270252Sluigi	if (stopped)
513270252Sluigi		netmap_disable_ring(na->rx_rings + ring_id);
514270252Sluigi	else
515270252Sluigi		na->rx_rings[ring_id].nkr_stopped = 0;
516270252Sluigi	/* nofify that the stopped state has changed. This is currently
517270252Sluigi	 *only used by bwrap to propagate the state to its own krings.
518270252Sluigi	 * (see netmap_bwrap_intr_notify).
519270252Sluigi	 */
520270252Sluigi	na->nm_notify(na, ring_id, NR_RX, NAF_DISABLE_NOTIFY);
521270252Sluigi}
522270252Sluigi
523270252Sluigi
524267282Sluigi/* stop or enable all the rings of na */
525270252Sluigivoid
526270252Sluiginetmap_set_all_rings(struct netmap_adapter *na, int stopped)
527262151Sluigi{
528262151Sluigi	int i;
529262151Sluigi	u_int ntx, nrx;
530238812Sluigi
531270252Sluigi	if (!nm_netmap_on(na))
532262151Sluigi		return;
533251139Sluigi
534262151Sluigi	ntx = netmap_real_tx_rings(na);
535262151Sluigi	nrx = netmap_real_rx_rings(na);
536238812Sluigi
537262151Sluigi	for (i = 0; i < ntx; i++) {
538270252Sluigi		netmap_set_txring(na, i, stopped);
539262151Sluigi	}
540251139Sluigi
541262151Sluigi	for (i = 0; i < nrx; i++) {
542270252Sluigi		netmap_set_rxring(na, i, stopped);
543262151Sluigi	}
544251139Sluigi}
545251139Sluigi
546267282Sluigi/*
547267282Sluigi * Convenience function used in drivers.  Waits for current txsync()s/rxsync()s
548267282Sluigi * to finish and prevents any new one from starting.  Call this before turning
549267282Sluigi * netmap mode off, or before removing the harware rings (e.g., on module
550267282Sluigi * onload).  As a rule of thumb for linux drivers, this should be placed near
551267282Sluigi * each napi_disable().
552267282Sluigi */
553262151Sluigivoid
554262151Sluiginetmap_disable_all_rings(struct ifnet *ifp)
555238812Sluigi{
556270252Sluigi	netmap_set_all_rings(NA(ifp), 1 /* stopped */);
557238812Sluigi}
558238812Sluigi
559267282Sluigi/*
560267282Sluigi * Convenience function used in drivers.  Re-enables rxsync and txsync on the
561267282Sluigi * adapter's rings In linux drivers, this should be placed near each
562267282Sluigi * napi_enable().
563267282Sluigi */
564262151Sluigivoid
565262151Sluiginetmap_enable_all_rings(struct ifnet *ifp)
566238812Sluigi{
567270252Sluigi	netmap_set_all_rings(NA(ifp), 0 /* enabled */);
568238812Sluigi}
569251139Sluigi
570251139Sluigi
571251139Sluigi/*
572262151Sluigi * generic bound_checking function
573251139Sluigi */
574262151Sluigiu_int
575262151Sluiginm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg)
576251139Sluigi{
577262151Sluigi	u_int oldv = *v;
578262151Sluigi	const char *op = NULL;
579251139Sluigi
580262151Sluigi	if (dflt < lo)
581262151Sluigi		dflt = lo;
582262151Sluigi	if (dflt > hi)
583262151Sluigi		dflt = hi;
584262151Sluigi	if (oldv < lo) {
585262151Sluigi		*v = dflt;
586262151Sluigi		op = "Bump";
587262151Sluigi	} else if (oldv > hi) {
588262151Sluigi		*v = hi;
589262151Sluigi		op = "Clamp";
590251139Sluigi	}
591262151Sluigi	if (op && msg)
592262151Sluigi		printf("%s %s to %d (was %d)\n", op, msg, *v, oldv);
593262151Sluigi	return *v;
594251139Sluigi}
595251139Sluigi
596251139Sluigi
597251139Sluigi/*
598262151Sluigi * packet-dump function, user-supplied or static buffer.
599262151Sluigi * The destination buffer must be at least 30+4*len
600251139Sluigi */
601262151Sluigiconst char *
602262151Sluiginm_dump_buf(char *p, int len, int lim, char *dst)
603251139Sluigi{
604262151Sluigi	static char _dst[8192];
605262151Sluigi	int i, j, i0;
606262151Sluigi	static char hex[] ="0123456789abcdef";
607262151Sluigi	char *o;	/* output position */
608251139Sluigi
609262151Sluigi#define P_HI(x)	hex[((x) & 0xf0)>>4]
610262151Sluigi#define P_LO(x)	hex[((x) & 0xf)]
611262151Sluigi#define P_C(x)	((x) >= 0x20 && (x) <= 0x7e ? (x) : '.')
612262151Sluigi	if (!dst)
613262151Sluigi		dst = _dst;
614262151Sluigi	if (lim <= 0 || lim > len)
615262151Sluigi		lim = len;
616262151Sluigi	o = dst;
617262151Sluigi	sprintf(o, "buf 0x%p len %d lim %d\n", p, len, lim);
618262151Sluigi	o += strlen(o);
619262151Sluigi	/* hexdump routine */
620262151Sluigi	for (i = 0; i < lim; ) {
621262151Sluigi		sprintf(o, "%5d: ", i);
622262151Sluigi		o += strlen(o);
623262151Sluigi		memset(o, ' ', 48);
624262151Sluigi		i0 = i;
625262151Sluigi		for (j=0; j < 16 && i < lim; i++, j++) {
626262151Sluigi			o[j*3] = P_HI(p[i]);
627262151Sluigi			o[j*3+1] = P_LO(p[i]);
628251139Sluigi		}
629262151Sluigi		i = i0;
630262151Sluigi		for (j=0; j < 16 && i < lim; i++, j++)
631262151Sluigi			o[j + 48] = P_C(p[i]);
632262151Sluigi		o[j+48] = '\n';
633262151Sluigi		o += j+49;
634251139Sluigi	}
635262151Sluigi	*o = '\0';
636262151Sluigi#undef P_HI
637262151Sluigi#undef P_LO
638262151Sluigi#undef P_C
639262151Sluigi	return dst;
640251139Sluigi}
641251139Sluigi
642238812Sluigi
643245835Sluigi/*
644245835Sluigi * Fetch configuration from the device, to cope with dynamic
645245835Sluigi * reconfigurations after loading the module.
646245835Sluigi */
647267282Sluigi/* call with NMG_LOCK held */
648262151Sluigiint
649245835Sluiginetmap_update_config(struct netmap_adapter *na)
650245835Sluigi{
651245835Sluigi	u_int txr, txd, rxr, rxd;
652245835Sluigi
653245835Sluigi	txr = txd = rxr = rxd = 0;
654245835Sluigi	if (na->nm_config) {
655262151Sluigi		na->nm_config(na, &txr, &txd, &rxr, &rxd);
656245835Sluigi	} else {
657245835Sluigi		/* take whatever we had at init time */
658245835Sluigi		txr = na->num_tx_rings;
659245835Sluigi		txd = na->num_tx_desc;
660245835Sluigi		rxr = na->num_rx_rings;
661245835Sluigi		rxd = na->num_rx_desc;
662250184Sluigi	}
663245835Sluigi
664245835Sluigi	if (na->num_tx_rings == txr && na->num_tx_desc == txd &&
665245835Sluigi	    na->num_rx_rings == rxr && na->num_rx_desc == rxd)
666245835Sluigi		return 0; /* nothing changed */
667262151Sluigi	if (netmap_verbose || na->active_fds > 0) {
668245835Sluigi		D("stored config %s: txring %d x %d, rxring %d x %d",
669270252Sluigi			na->name,
670245835Sluigi			na->num_tx_rings, na->num_tx_desc,
671245835Sluigi			na->num_rx_rings, na->num_rx_desc);
672245835Sluigi		D("new config %s: txring %d x %d, rxring %d x %d",
673270252Sluigi			na->name, txr, txd, rxr, rxd);
674245835Sluigi	}
675262151Sluigi	if (na->active_fds == 0) {
676245835Sluigi		D("configuration changed (but fine)");
677245835Sluigi		na->num_tx_rings = txr;
678245835Sluigi		na->num_tx_desc = txd;
679245835Sluigi		na->num_rx_rings = rxr;
680245835Sluigi		na->num_rx_desc = rxd;
681245835Sluigi		return 0;
682245835Sluigi	}
683245835Sluigi	D("configuration changed while active, this is bad...");
684245835Sluigi	return 1;
685245835Sluigi}
686245835Sluigi
687267282Sluigi/* kring->nm_sync callback for the host tx ring */
688262151Sluigistatic int
689262151Sluiginetmap_txsync_to_host_compat(struct netmap_kring *kring, int flags)
690262151Sluigi{
691267282Sluigi	(void)flags; /* unused */
692262151Sluigi	netmap_txsync_to_host(kring->na);
693262151Sluigi	return 0;
694262151Sluigi}
695227614Sluigi
696267282Sluigi/* kring->nm_sync callback for the host rx ring */
697241719Sluigistatic int
698262151Sluiginetmap_rxsync_from_host_compat(struct netmap_kring *kring, int flags)
699241719Sluigi{
700267282Sluigi	(void)flags; /* unused */
701262151Sluigi	netmap_rxsync_from_host(kring->na, NULL, NULL);
702262151Sluigi	return 0;
703241719Sluigi}
704241719Sluigi
705262151Sluigi
706262151Sluigi
707262151Sluigi/* create the krings array and initialize the fields common to all adapters.
708262151Sluigi * The array layout is this:
709227614Sluigi *
710262151Sluigi *                    +----------+
711262151Sluigi * na->tx_rings ----->|          | \
712262151Sluigi *                    |          |  } na->num_tx_ring
713262151Sluigi *                    |          | /
714262151Sluigi *                    +----------+
715262151Sluigi *                    |          |    host tx kring
716262151Sluigi * na->rx_rings ----> +----------+
717262151Sluigi *                    |          | \
718262151Sluigi *                    |          |  } na->num_rx_rings
719262151Sluigi *                    |          | /
720262151Sluigi *                    +----------+
721262151Sluigi *                    |          |    host rx kring
722262151Sluigi *                    +----------+
723262151Sluigi * na->tailroom ----->|          | \
724262151Sluigi *                    |          |  } tailroom bytes
725262151Sluigi *                    |          | /
726262151Sluigi *                    +----------+
727262151Sluigi *
728262151Sluigi * Note: for compatibility, host krings are created even when not needed.
729262151Sluigi * The tailroom space is currently used by vale ports for allocating leases.
730227614Sluigi */
731267282Sluigi/* call with NMG_LOCK held */
732262151Sluigiint
733262151Sluiginetmap_krings_create(struct netmap_adapter *na, u_int tailroom)
734227614Sluigi{
735262151Sluigi	u_int i, len, ndesc;
736262151Sluigi	struct netmap_kring *kring;
737262151Sluigi	u_int ntx, nrx;
738227614Sluigi
739262151Sluigi	/* account for the (possibly fake) host rings */
740262151Sluigi	ntx = na->num_tx_rings + 1;
741262151Sluigi	nrx = na->num_rx_rings + 1;
742227614Sluigi
743262151Sluigi	len = (ntx + nrx) * sizeof(struct netmap_kring) + tailroom;
744262151Sluigi
745262151Sluigi	na->tx_rings = malloc((size_t)len, M_DEVBUF, M_NOWAIT | M_ZERO);
746262151Sluigi	if (na->tx_rings == NULL) {
747262151Sluigi		D("Cannot allocate krings");
748262151Sluigi		return ENOMEM;
749262151Sluigi	}
750262151Sluigi	na->rx_rings = na->tx_rings + ntx;
751262151Sluigi
752262151Sluigi	/*
753262151Sluigi	 * All fields in krings are 0 except the one initialized below.
754262151Sluigi	 * but better be explicit on important kring fields.
755262151Sluigi	 */
756262151Sluigi	ndesc = na->num_tx_desc;
757262151Sluigi	for (i = 0; i < ntx; i++) { /* Transmit rings */
758262151Sluigi		kring = &na->tx_rings[i];
759262151Sluigi		bzero(kring, sizeof(*kring));
760262151Sluigi		kring->na = na;
761262151Sluigi		kring->ring_id = i;
762262151Sluigi		kring->nkr_num_slots = ndesc;
763262151Sluigi		if (i < na->num_tx_rings) {
764270252Sluigi			kring->nm_sync = na->nm_txsync;
765262151Sluigi		} else if (i == na->num_tx_rings) {
766262151Sluigi			kring->nm_sync = netmap_txsync_to_host_compat;
767262151Sluigi		}
768227614Sluigi		/*
769262151Sluigi		 * IMPORTANT: Always keep one slot empty.
770227614Sluigi		 */
771262151Sluigi		kring->rhead = kring->rcur = kring->nr_hwcur = 0;
772262151Sluigi		kring->rtail = kring->nr_hwtail = ndesc - 1;
773270252Sluigi		snprintf(kring->name, sizeof(kring->name) - 1, "%s TX%d", na->name, i);
774262151Sluigi		ND("ktx %s h %d c %d t %d",
775262151Sluigi			kring->name, kring->rhead, kring->rcur, kring->rtail);
776262151Sluigi		mtx_init(&kring->q_lock, "nm_txq_lock", NULL, MTX_DEF);
777262151Sluigi		init_waitqueue_head(&kring->si);
778262151Sluigi	}
779262151Sluigi
780262151Sluigi	ndesc = na->num_rx_desc;
781262151Sluigi	for (i = 0; i < nrx; i++) { /* Receive rings */
782262151Sluigi		kring = &na->rx_rings[i];
783262151Sluigi		bzero(kring, sizeof(*kring));
784262151Sluigi		kring->na = na;
785262151Sluigi		kring->ring_id = i;
786262151Sluigi		kring->nkr_num_slots = ndesc;
787262151Sluigi		if (i < na->num_rx_rings) {
788270252Sluigi			kring->nm_sync = na->nm_rxsync;
789262151Sluigi		} else if (i == na->num_rx_rings) {
790262151Sluigi			kring->nm_sync = netmap_rxsync_from_host_compat;
791232238Sluigi		}
792262151Sluigi		kring->rhead = kring->rcur = kring->nr_hwcur = 0;
793262151Sluigi		kring->rtail = kring->nr_hwtail = 0;
794270252Sluigi		snprintf(kring->name, sizeof(kring->name) - 1, "%s RX%d", na->name, i);
795262151Sluigi		ND("krx %s h %d c %d t %d",
796262151Sluigi			kring->name, kring->rhead, kring->rcur, kring->rtail);
797262151Sluigi		mtx_init(&kring->q_lock, "nm_rxq_lock", NULL, MTX_DEF);
798262151Sluigi		init_waitqueue_head(&kring->si);
799227614Sluigi	}
800262151Sluigi	init_waitqueue_head(&na->tx_si);
801262151Sluigi	init_waitqueue_head(&na->rx_si);
802262151Sluigi
803262151Sluigi	na->tailroom = na->rx_rings + nrx;
804262151Sluigi
805262151Sluigi	return 0;
806231198Sluigi}
807227614Sluigi
808251139Sluigi
809262151Sluigi/* undo the actions performed by netmap_krings_create */
810267282Sluigi/* call with NMG_LOCK held */
811262151Sluigivoid
812262151Sluiginetmap_krings_delete(struct netmap_adapter *na)
813238812Sluigi{
814262151Sluigi	struct netmap_kring *kring = na->tx_rings;
815231198Sluigi
816262151Sluigi	/* we rely on the krings layout described above */
817262151Sluigi	for ( ; kring != na->tailroom; kring++) {
818262151Sluigi		mtx_destroy(&kring->q_lock);
819238812Sluigi	}
820262151Sluigi	free(na->tx_rings, M_DEVBUF);
821262151Sluigi	na->tx_rings = na->rx_rings = na->tailroom = NULL;
822262151Sluigi}
823251139Sluigi
824251139Sluigi
825262151Sluigi/*
826262151Sluigi * Destructor for NIC ports. They also have an mbuf queue
827262151Sluigi * on the rings connected to the host so we need to purge
828262151Sluigi * them first.
829262151Sluigi */
830267282Sluigi/* call with NMG_LOCK held */
831262151Sluigistatic void
832262151Sluiginetmap_hw_krings_delete(struct netmap_adapter *na)
833262151Sluigi{
834262151Sluigi	struct mbq *q = &na->rx_rings[na->num_rx_rings].rx_queue;
835251139Sluigi
836262151Sluigi	ND("destroy sw mbq with len %d", mbq_len(q));
837262151Sluigi	mbq_purge(q);
838262151Sluigi	mbq_safe_destroy(q);
839262151Sluigi	netmap_krings_delete(na);
840238812Sluigi}
841238812Sluigi
842262151Sluigi
843267282Sluigi/* create a new netmap_if for a newly registered fd.
844267282Sluigi * If this is the first registration of the adapter,
845267282Sluigi * also create the netmap rings and their in-kernel view,
846267282Sluigi * the netmap krings.
847267282Sluigi */
848267282Sluigi/* call with NMG_LOCK held */
849262151Sluigistatic struct netmap_if*
850270252Sluiginetmap_if_new(struct netmap_adapter *na)
851231198Sluigi{
852262151Sluigi	struct netmap_if *nifp;
853231198Sluigi
854262151Sluigi	if (netmap_update_config(na)) {
855262151Sluigi		/* configuration mismatch, report and fail */
856262151Sluigi		return NULL;
857262151Sluigi	}
858249659Sluigi
859267282Sluigi	if (na->active_fds)	/* already registered */
860262151Sluigi		goto final;
861227614Sluigi
862267282Sluigi	/* create and init the krings arrays.
863267282Sluigi	 * Depending on the adapter, this may also create
864267282Sluigi	 * the netmap rings themselves
865267282Sluigi	 */
866262151Sluigi	if (na->nm_krings_create(na))
867270252Sluigi		return NULL;
868262151Sluigi
869267282Sluigi	/* create all missing netmap rings */
870262151Sluigi	if (netmap_mem_rings_create(na))
871262151Sluigi		goto cleanup;
872262151Sluigi
873262151Sluigifinal:
874262151Sluigi
875267282Sluigi	/* in all cases, create a new netmap if */
876270252Sluigi	nifp = netmap_mem_if_new(na);
877262151Sluigi	if (nifp == NULL)
878262151Sluigi		goto cleanup;
879262151Sluigi
880262151Sluigi	return (nifp);
881262151Sluigi
882262151Sluigicleanup:
883262151Sluigi
884262151Sluigi	if (na->active_fds == 0) {
885262151Sluigi		netmap_mem_rings_delete(na);
886262151Sluigi		na->nm_krings_delete(na);
887241719Sluigi	}
888262151Sluigi
889262151Sluigi	return NULL;
890227614Sluigi}
891227614Sluigi
892251139Sluigi
893262151Sluigi/* grab a reference to the memory allocator, if we don't have one already.  The
894262151Sluigi * reference is taken from the netmap_adapter registered with the priv.
895251139Sluigi */
896267282Sluigi/* call with NMG_LOCK held */
897241719Sluigistatic int
898262151Sluiginetmap_get_memory_locked(struct netmap_priv_d* p)
899241719Sluigi{
900262151Sluigi	struct netmap_mem_d *nmd;
901262151Sluigi	int error = 0;
902262151Sluigi
903262151Sluigi	if (p->np_na == NULL) {
904262151Sluigi		if (!netmap_mmap_unreg)
905262151Sluigi			return ENODEV;
906262151Sluigi		/* for compatibility with older versions of the API
907262151Sluigi 		 * we use the global allocator when no interface has been
908262151Sluigi 		 * registered
909262151Sluigi 		 */
910262151Sluigi		nmd = &nm_mem;
911262151Sluigi	} else {
912262151Sluigi		nmd = p->np_na->nm_mem;
913262151Sluigi	}
914262151Sluigi	if (p->np_mref == NULL) {
915270252Sluigi		error = netmap_mem_finalize(nmd, p->np_na);
916262151Sluigi		if (!error)
917262151Sluigi			p->np_mref = nmd;
918262151Sluigi	} else if (p->np_mref != nmd) {
919262151Sluigi		/* a virtual port has been registered, but previous
920262151Sluigi 		 * syscalls already used the global allocator.
921262151Sluigi 		 * We cannot continue
922262151Sluigi 		 */
923262151Sluigi		error = ENODEV;
924262151Sluigi	}
925262151Sluigi	return error;
926241719Sluigi}
927241719Sluigi
928251139Sluigi
929267282Sluigi/* call with NMG_LOCK *not* held */
930262151Sluigiint
931262151Sluiginetmap_get_memory(struct netmap_priv_d* p)
932241719Sluigi{
933262151Sluigi	int error;
934262151Sluigi	NMG_LOCK();
935262151Sluigi	error = netmap_get_memory_locked(p);
936262151Sluigi	NMG_UNLOCK();
937262151Sluigi	return error;
938241719Sluigi}
939241719Sluigi
940241719Sluigi
941267282Sluigi/* call with NMG_LOCK held */
942262151Sluigistatic int
943262151Sluiginetmap_have_memory_locked(struct netmap_priv_d* p)
944262151Sluigi{
945262151Sluigi	return p->np_mref != NULL;
946262151Sluigi}
947241719Sluigi
948251139Sluigi
949267282Sluigi/* call with NMG_LOCK held */
950262151Sluigistatic void
951262151Sluiginetmap_drop_memory_locked(struct netmap_priv_d* p)
952241719Sluigi{
953262151Sluigi	if (p->np_mref) {
954270252Sluigi		netmap_mem_deref(p->np_mref, p->np_na);
955262151Sluigi		p->np_mref = NULL;
956262151Sluigi	}
957241719Sluigi}
958241719Sluigi
959241719Sluigi
960227614Sluigi/*
961262151Sluigi * Call nm_register(ifp,0) to stop netmap mode on the interface and
962270252Sluigi * revert to normal operation.
963262151Sluigi * The second argument is the nifp to work on. In some cases it is
964262151Sluigi * not attached yet to the netmap_priv_d so we need to pass it as
965262151Sluigi * a separate argument.
966227614Sluigi */
967262151Sluigi/* call with NMG_LOCK held */
968262151Sluigistatic void
969262151Sluiginetmap_do_unregif(struct netmap_priv_d *priv, struct netmap_if *nifp)
970227614Sluigi{
971262151Sluigi	struct netmap_adapter *na = priv->np_na;
972241719Sluigi
973262151Sluigi	NMG_LOCK_ASSERT();
974262151Sluigi	na->active_fds--;
975262151Sluigi	if (na->active_fds <= 0) {	/* last instance */
976229947Sluigi
977262151Sluigi		if (netmap_verbose)
978270252Sluigi			D("deleting last instance for %s", na->name);
979262151Sluigi		/*
980262151Sluigi		 * (TO CHECK) This function is only called
981262151Sluigi		 * when the last reference to this file descriptor goes
982262151Sluigi		 * away. This means we cannot have any pending poll()
983262151Sluigi		 * or interrupt routine operating on the structure.
984262151Sluigi		 * XXX The file may be closed in a thread while
985262151Sluigi		 * another thread is using it.
986262151Sluigi		 * Linux keeps the file opened until the last reference
987262151Sluigi		 * by any outstanding ioctl/poll or mmap is gone.
988262151Sluigi		 * FreeBSD does not track mmap()s (but we do) and
989262151Sluigi		 * wakes up any sleeping poll(). Need to check what
990262151Sluigi		 * happens if the close() occurs while a concurrent
991262151Sluigi		 * syscall is running.
992262151Sluigi		 */
993270252Sluigi		na->nm_register(na, 0); /* off, clear flags */
994262151Sluigi		/* Wake up any sleeping threads. netmap_poll will
995262151Sluigi		 * then return POLLERR
996262151Sluigi		 * XXX The wake up now must happen during *_down(), when
997262151Sluigi		 * we order all activities to stop. -gl
998262151Sluigi		 */
999262151Sluigi		/* XXX kqueue(9) needed; these will mirror knlist_init. */
1000262151Sluigi		/* knlist_destroy(&na->tx_si.si_note); */
1001262151Sluigi		/* knlist_destroy(&na->rx_si.si_note); */
1002241719Sluigi
1003262151Sluigi		/* delete rings and buffers */
1004262151Sluigi		netmap_mem_rings_delete(na);
1005262151Sluigi		na->nm_krings_delete(na);
1006262151Sluigi	}
1007262151Sluigi	/* delete the nifp */
1008262151Sluigi	netmap_mem_if_delete(na, nifp);
1009262151Sluigi}
1010227614Sluigi
1011267282Sluigi/* call with NMG_LOCK held */
1012262151Sluigistatic __inline int
1013262151Sluiginm_tx_si_user(struct netmap_priv_d *priv)
1014262151Sluigi{
1015262151Sluigi	return (priv->np_na != NULL &&
1016262151Sluigi		(priv->np_txqlast - priv->np_txqfirst > 1));
1017227614Sluigi}
1018241719Sluigi
1019267282Sluigi/* call with NMG_LOCK held */
1020262151Sluigistatic __inline int
1021262151Sluiginm_rx_si_user(struct netmap_priv_d *priv)
1022241719Sluigi{
1023262151Sluigi	return (priv->np_na != NULL &&
1024262151Sluigi		(priv->np_rxqlast - priv->np_rxqfirst > 1));
1025241719Sluigi}
1026241719Sluigi
1027251139Sluigi
1028262151Sluigi/*
1029267282Sluigi * Destructor of the netmap_priv_d, called when the fd has
1030267282Sluigi * no active open() and mmap(). Also called in error paths.
1031267282Sluigi *
1032262151Sluigi * returns 1 if this is the last instance and we can free priv
1033262151Sluigi */
1034267282Sluigi/* call with NMG_LOCK held */
1035262151Sluigiint
1036262151Sluiginetmap_dtor_locked(struct netmap_priv_d *priv)
1037241719Sluigi{
1038262151Sluigi	struct netmap_adapter *na = priv->np_na;
1039241719Sluigi
1040262151Sluigi#ifdef __FreeBSD__
1041262151Sluigi	/*
1042262151Sluigi	 * np_refcount is the number of active mmaps on
1043262151Sluigi	 * this file descriptor
1044262151Sluigi	 */
1045262151Sluigi	if (--priv->np_refcount > 0) {
1046262151Sluigi		return 0;
1047262151Sluigi	}
1048262151Sluigi#endif /* __FreeBSD__ */
1049262151Sluigi	if (!na) {
1050262151Sluigi	    return 1; //XXX is it correct?
1051262151Sluigi	}
1052262151Sluigi	netmap_do_unregif(priv, priv->np_nifp);
1053262151Sluigi	priv->np_nifp = NULL;
1054262151Sluigi	netmap_drop_memory_locked(priv);
1055262151Sluigi	if (priv->np_na) {
1056262151Sluigi		if (nm_tx_si_user(priv))
1057262151Sluigi			na->tx_si_users--;
1058262151Sluigi		if (nm_rx_si_user(priv))
1059262151Sluigi			na->rx_si_users--;
1060262151Sluigi		netmap_adapter_put(na);
1061262151Sluigi		priv->np_na = NULL;
1062262151Sluigi	}
1063262151Sluigi	return 1;
1064262151Sluigi}
1065241719Sluigi
1066241719Sluigi
1067267282Sluigi/* call with NMG_LOCK *not* held */
1068262151Sluigivoid
1069262151Sluiginetmap_dtor(void *data)
1070262151Sluigi{
1071262151Sluigi	struct netmap_priv_d *priv = data;
1072262151Sluigi	int last_instance;
1073262151Sluigi
1074262151Sluigi	NMG_LOCK();
1075262151Sluigi	last_instance = netmap_dtor_locked(priv);
1076262151Sluigi	NMG_UNLOCK();
1077262151Sluigi	if (last_instance) {
1078262151Sluigi		bzero(priv, sizeof(*priv));	/* for safety */
1079262151Sluigi		free(priv, M_DEVBUF);
1080262151Sluigi	}
1081241719Sluigi}
1082227614Sluigi
1083227614Sluigi
1084262151Sluigi
1085262151Sluigi
1086227614Sluigi/*
1087228280Sluigi * Handlers for synchronization of the queues from/to the host.
1088245836Sluigi * Netmap has two operating modes:
1089245836Sluigi * - in the default mode, the rings connected to the host stack are
1090245836Sluigi *   just another ring pair managed by userspace;
1091245836Sluigi * - in transparent mode (XXX to be defined) incoming packets
1092245836Sluigi *   (from the host or the NIC) are marked as NS_FORWARD upon
1093245836Sluigi *   arrival, and the user application has a chance to reset the
1094245836Sluigi *   flag for packets that should be dropped.
1095245836Sluigi *   On the RXSYNC or poll(), packets in RX rings between
1096245836Sluigi *   kring->nr_kcur and ring->cur with NS_FORWARD still set are moved
1097245836Sluigi *   to the other side.
1098245836Sluigi * The transfer NIC --> host is relatively easy, just encapsulate
1099245836Sluigi * into mbufs and we are done. The host --> NIC side is slightly
1100245836Sluigi * harder because there might not be room in the tx ring so it
1101245836Sluigi * might take a while before releasing the buffer.
1102227614Sluigi */
1103245836Sluigi
1104251139Sluigi
1105245836Sluigi/*
1106245836Sluigi * pass a chain of buffers to the host stack as coming from 'dst'
1107262151Sluigi * We do not need to lock because the queue is private.
1108245836Sluigi */
1109227614Sluigistatic void
1110262151Sluiginetmap_send_up(struct ifnet *dst, struct mbq *q)
1111227614Sluigi{
1112245836Sluigi	struct mbuf *m;
1113227614Sluigi
1114245836Sluigi	/* send packets up, outside the lock */
1115262151Sluigi	while ((m = mbq_dequeue(q)) != NULL) {
1116245836Sluigi		if (netmap_verbose & NM_VERB_HOST)
1117245836Sluigi			D("sending up pkt %p size %d", m, MBUF_LEN(m));
1118245836Sluigi		NM_SEND_UP(dst, m);
1119228280Sluigi	}
1120262151Sluigi	mbq_destroy(q);
1121245836Sluigi}
1122227614Sluigi
1123245836Sluigi
1124245836Sluigi/*
1125245836Sluigi * put a copy of the buffers marked NS_FORWARD into an mbuf chain.
1126262151Sluigi * Take packets from hwcur to ring->head marked NS_FORWARD (or forced)
1127262151Sluigi * and pass them up. Drop remaining packets in the unlikely event
1128262151Sluigi * of an mbuf shortage.
1129245836Sluigi */
1130245836Sluigistatic void
1131245836Sluiginetmap_grab_packets(struct netmap_kring *kring, struct mbq *q, int force)
1132245836Sluigi{
1133262151Sluigi	u_int const lim = kring->nkr_num_slots - 1;
1134262151Sluigi	u_int const head = kring->ring->head;
1135262151Sluigi	u_int n;
1136262151Sluigi	struct netmap_adapter *na = kring->na;
1137245836Sluigi
1138262151Sluigi	for (n = kring->nr_hwcur; n != head; n = nm_next(n, lim)) {
1139262151Sluigi		struct mbuf *m;
1140245836Sluigi		struct netmap_slot *slot = &kring->ring->slot[n];
1141227614Sluigi
1142245836Sluigi		if ((slot->flags & NS_FORWARD) == 0 && !force)
1143245836Sluigi			continue;
1144270252Sluigi		if (slot->len < 14 || slot->len > NETMAP_BUF_SIZE(na)) {
1145262151Sluigi			RD(5, "bad pkt at %d len %d", n, slot->len);
1146227614Sluigi			continue;
1147227614Sluigi		}
1148245836Sluigi		slot->flags &= ~NS_FORWARD; // XXX needed ?
1149262151Sluigi		/* XXX TODO: adapt to the case of a multisegment packet */
1150270252Sluigi		m = m_devget(NMB(na, slot), slot->len, 0, na->ifp, NULL);
1151227614Sluigi
1152227614Sluigi		if (m == NULL)
1153227614Sluigi			break;
1154262151Sluigi		mbq_enqueue(q, m);
1155227614Sluigi	}
1156245836Sluigi}
1157245836Sluigi
1158251139Sluigi
1159245836Sluigi/*
1160262151Sluigi * Send to the NIC rings packets marked NS_FORWARD between
1161262151Sluigi * kring->nr_hwcur and kring->rhead
1162262151Sluigi * Called under kring->rx_queue.lock on the sw rx ring,
1163245836Sluigi */
1164262151Sluigistatic u_int
1165245836Sluiginetmap_sw_to_nic(struct netmap_adapter *na)
1166245836Sluigi{
1167245836Sluigi	struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings];
1168262151Sluigi	struct netmap_slot *rxslot = kring->ring->slot;
1169262151Sluigi	u_int i, rxcur = kring->nr_hwcur;
1170262151Sluigi	u_int const head = kring->rhead;
1171262151Sluigi	u_int const src_lim = kring->nkr_num_slots - 1;
1172262151Sluigi	u_int sent = 0;
1173245836Sluigi
1174262151Sluigi	/* scan rings to find space, then fill as much as possible */
1175262151Sluigi	for (i = 0; i < na->num_tx_rings; i++) {
1176262151Sluigi		struct netmap_kring *kdst = &na->tx_rings[i];
1177262151Sluigi		struct netmap_ring *rdst = kdst->ring;
1178262151Sluigi		u_int const dst_lim = kdst->nkr_num_slots - 1;
1179245836Sluigi
1180262151Sluigi		/* XXX do we trust ring or kring->rcur,rtail ? */
1181262151Sluigi		for (; rxcur != head && !nm_ring_empty(rdst);
1182262151Sluigi		     rxcur = nm_next(rxcur, src_lim) ) {
1183245836Sluigi			struct netmap_slot *src, *dst, tmp;
1184262151Sluigi			u_int dst_cur = rdst->cur;
1185262151Sluigi
1186262151Sluigi			src = &rxslot[rxcur];
1187262151Sluigi			if ((src->flags & NS_FORWARD) == 0 && !netmap_fwd)
1188262151Sluigi				continue;
1189262151Sluigi
1190262151Sluigi			sent++;
1191262151Sluigi
1192262151Sluigi			dst = &rdst->slot[dst_cur];
1193262151Sluigi
1194245836Sluigi			tmp = *src;
1195262151Sluigi
1196245836Sluigi			src->buf_idx = dst->buf_idx;
1197245836Sluigi			src->flags = NS_BUF_CHANGED;
1198245836Sluigi
1199245836Sluigi			dst->buf_idx = tmp.buf_idx;
1200245836Sluigi			dst->len = tmp.len;
1201245836Sluigi			dst->flags = NS_BUF_CHANGED;
1202245836Sluigi
1203270252Sluigi			rdst->cur = nm_next(dst_cur, dst_lim);
1204245836Sluigi		}
1205262151Sluigi		/* if (sent) XXX txsync ? */
1206245836Sluigi	}
1207262151Sluigi	return sent;
1208245836Sluigi}
1209245836Sluigi
1210251139Sluigi
1211245836Sluigi/*
1212262151Sluigi * netmap_txsync_to_host() passes packets up. We are called from a
1213245836Sluigi * system call in user process context, and the only contention
1214245836Sluigi * can be among multiple user threads erroneously calling
1215245836Sluigi * this routine concurrently.
1216245836Sluigi */
1217262151Sluigivoid
1218262151Sluiginetmap_txsync_to_host(struct netmap_adapter *na)
1219245836Sluigi{
1220245836Sluigi	struct netmap_kring *kring = &na->tx_rings[na->num_tx_rings];
1221245836Sluigi	struct netmap_ring *ring = kring->ring;
1222262151Sluigi	u_int const lim = kring->nkr_num_slots - 1;
1223262151Sluigi	u_int const head = kring->rhead;
1224262151Sluigi	struct mbq q;
1225245836Sluigi
1226262151Sluigi	/* Take packets from hwcur to head and pass them up.
1227262151Sluigi	 * force head = cur since netmap_grab_packets() stops at head
1228245836Sluigi	 * In case of no buffers we give up. At the end of the loop,
1229245836Sluigi	 * the queue is drained in all cases.
1230245836Sluigi	 */
1231262151Sluigi	mbq_init(&q);
1232262151Sluigi	ring->cur = head;
1233262151Sluigi	netmap_grab_packets(kring, &q, 1 /* force */);
1234262151Sluigi	ND("have %d pkts in queue", mbq_len(&q));
1235262151Sluigi	kring->nr_hwcur = head;
1236262151Sluigi	kring->nr_hwtail = head + lim;
1237262151Sluigi	if (kring->nr_hwtail > lim)
1238262151Sluigi		kring->nr_hwtail -= lim + 1;
1239262151Sluigi	nm_txsync_finalize(kring);
1240227614Sluigi
1241262151Sluigi	netmap_send_up(na->ifp, &q);
1242227614Sluigi}
1243227614Sluigi
1244251139Sluigi
1245227614Sluigi/*
1246228280Sluigi * rxsync backend for packets coming from the host stack.
1247262151Sluigi * They have been put in kring->rx_queue by netmap_transmit().
1248262151Sluigi * We protect access to the kring using kring->rx_queue.lock
1249228280Sluigi *
1250270252Sluigi * This routine also does the selrecord if called from the poll handler
1251270252Sluigi * (we know because td != NULL).
1252270252Sluigi *
1253270252Sluigi * NOTE: on linux, selrecord() is defined as a macro and uses pwait
1254270252Sluigi *     as an additional hidden argument.
1255262151Sluigi * returns the number of packets delivered to tx queues in
1256262151Sluigi * transparent mode, or a negative value if error
1257227614Sluigi */
1258262151Sluigiint
1259262151Sluiginetmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwait)
1260227614Sluigi{
1261234227Sluigi	struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings];
1262227614Sluigi	struct netmap_ring *ring = kring->ring;
1263262151Sluigi	u_int nm_i, n;
1264262151Sluigi	u_int const lim = kring->nkr_num_slots - 1;
1265262151Sluigi	u_int const head = kring->rhead;
1266262151Sluigi	int ret = 0;
1267262151Sluigi	struct mbq *q = &kring->rx_queue;
1268227614Sluigi
1269238837Sluigi	(void)pwait;	/* disable unused warnings */
1270262151Sluigi	(void)td;
1271262151Sluigi
1272267282Sluigi	mbq_lock(q);
1273262151Sluigi
1274262151Sluigi	/* First part: import newly received packets */
1275262151Sluigi	n = mbq_len(q);
1276262151Sluigi	if (n) { /* grab packets from the queue */
1277262151Sluigi		struct mbuf *m;
1278262151Sluigi		uint32_t stop_i;
1279262151Sluigi
1280262151Sluigi		nm_i = kring->nr_hwtail;
1281262151Sluigi		stop_i = nm_prev(nm_i, lim);
1282267282Sluigi		while ( nm_i != stop_i && (m = mbq_dequeue(q)) != NULL ) {
1283262151Sluigi			int len = MBUF_LEN(m);
1284262151Sluigi			struct netmap_slot *slot = &ring->slot[nm_i];
1285262151Sluigi
1286270252Sluigi			m_copydata(m, 0, len, NMB(na, slot));
1287262151Sluigi			ND("nm %d len %d", nm_i, len);
1288262151Sluigi			if (netmap_verbose)
1289270252Sluigi                                D("%s", nm_dump_buf(NMB(na, slot),len, 128, NULL));
1290262151Sluigi
1291262151Sluigi			slot->len = len;
1292262151Sluigi			slot->flags = kring->nkr_slot_flags;
1293262151Sluigi			nm_i = nm_next(nm_i, lim);
1294270252Sluigi			m_freem(m);
1295232238Sluigi		}
1296262151Sluigi		kring->nr_hwtail = nm_i;
1297232238Sluigi	}
1298262151Sluigi
1299262151Sluigi	/*
1300262151Sluigi	 * Second part: skip past packets that userspace has released.
1301262151Sluigi	 */
1302262151Sluigi	nm_i = kring->nr_hwcur;
1303262151Sluigi	if (nm_i != head) { /* something was released */
1304262151Sluigi		if (netmap_fwd || kring->ring->flags & NR_FORWARD)
1305262151Sluigi			ret = netmap_sw_to_nic(na);
1306262151Sluigi		kring->nr_hwcur = head;
1307262151Sluigi	}
1308262151Sluigi
1309262151Sluigi	nm_rxsync_finalize(kring);
1310262151Sluigi
1311270252Sluigi	/* access copies of cur,tail in the kring */
1312270252Sluigi	if (kring->rcur == kring->rtail && td) /* no bufs available */
1313270252Sluigi		selrecord(td, &kring->si);
1314270252Sluigi
1315267282Sluigi	mbq_unlock(q);
1316262151Sluigi	return ret;
1317227614Sluigi}
1318227614Sluigi
1319227614Sluigi
1320262151Sluigi/* Get a netmap adapter for the port.
1321251139Sluigi *
1322262151Sluigi * If it is possible to satisfy the request, return 0
1323262151Sluigi * with *na containing the netmap adapter found.
1324262151Sluigi * Otherwise return an error code, with *na containing NULL.
1325251139Sluigi *
1326262151Sluigi * When the port is attached to a bridge, we always return
1327262151Sluigi * EBUSY.
1328262151Sluigi * Otherwise, if the port is already bound to a file descriptor,
1329262151Sluigi * then we unconditionally return the existing adapter into *na.
1330262151Sluigi * In all the other cases, we return (into *na) either native,
1331262151Sluigi * generic or NULL, according to the following table:
1332262151Sluigi *
1333262151Sluigi *					native_support
1334262151Sluigi * active_fds   dev.netmap.admode         YES     NO
1335262151Sluigi * -------------------------------------------------------
1336262151Sluigi *    >0              *                 NA(ifp) NA(ifp)
1337262151Sluigi *
1338262151Sluigi *     0        NETMAP_ADMODE_BEST      NATIVE  GENERIC
1339262151Sluigi *     0        NETMAP_ADMODE_NATIVE    NATIVE   NULL
1340262151Sluigi *     0        NETMAP_ADMODE_GENERIC   GENERIC GENERIC
1341262151Sluigi *
1342227614Sluigi */
1343262151Sluigi
1344262151Sluigiint
1345262151Sluiginetmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na)
1346227614Sluigi{
1347262151Sluigi	/* generic support */
1348262151Sluigi	int i = netmap_admode;	/* Take a snapshot. */
1349262151Sluigi	int error = 0;
1350262151Sluigi	struct netmap_adapter *prev_na;
1351262151Sluigi	struct netmap_generic_adapter *gna;
1352238812Sluigi
1353262151Sluigi	*na = NULL; /* default */
1354238812Sluigi
1355262151Sluigi	/* reset in case of invalid value */
1356262151Sluigi	if (i < NETMAP_ADMODE_BEST || i >= NETMAP_ADMODE_LAST)
1357262151Sluigi		i = netmap_admode = NETMAP_ADMODE_BEST;
1358262151Sluigi
1359262151Sluigi	if (NETMAP_CAPABLE(ifp)) {
1360270252Sluigi		prev_na = NA(ifp);
1361262151Sluigi		/* If an adapter already exists, return it if
1362262151Sluigi		 * there are active file descriptors or if
1363262151Sluigi		 * netmap is not forced to use generic
1364262151Sluigi		 * adapters.
1365251139Sluigi		 */
1366270252Sluigi		if (NETMAP_OWNED_BY_ANY(prev_na)
1367270252Sluigi			|| i != NETMAP_ADMODE_GENERIC
1368270252Sluigi			|| prev_na->na_flags & NAF_FORCE_NATIVE
1369270252Sluigi#ifdef WITH_PIPES
1370270252Sluigi			/* ugly, but we cannot allow an adapter switch
1371270252Sluigi			 * if some pipe is referring to this one
1372270252Sluigi			 */
1373270252Sluigi			|| prev_na->na_next_pipe > 0
1374270252Sluigi#endif
1375270252Sluigi		) {
1376270252Sluigi			*na = prev_na;
1377262151Sluigi			return 0;
1378262151Sluigi		}
1379262151Sluigi	}
1380251139Sluigi
1381262151Sluigi	/* If there isn't native support and netmap is not allowed
1382262151Sluigi	 * to use generic adapters, we cannot satisfy the request.
1383262151Sluigi	 */
1384262151Sluigi	if (!NETMAP_CAPABLE(ifp) && i == NETMAP_ADMODE_NATIVE)
1385262151Sluigi		return EOPNOTSUPP;
1386251139Sluigi
1387262151Sluigi	/* Otherwise, create a generic adapter and return it,
1388262151Sluigi	 * saving the previously used netmap adapter, if any.
1389262151Sluigi	 *
1390262151Sluigi	 * Note that here 'prev_na', if not NULL, MUST be a
1391262151Sluigi	 * native adapter, and CANNOT be a generic one. This is
1392262151Sluigi	 * true because generic adapters are created on demand, and
1393262151Sluigi	 * destroyed when not used anymore. Therefore, if the adapter
1394262151Sluigi	 * currently attached to an interface 'ifp' is generic, it
1395262151Sluigi	 * must be that
1396262151Sluigi	 * (NA(ifp)->active_fds > 0 || NETMAP_OWNED_BY_KERN(NA(ifp))).
1397262151Sluigi	 * Consequently, if NA(ifp) is generic, we will enter one of
1398262151Sluigi	 * the branches above. This ensures that we never override
1399262151Sluigi	 * a generic adapter with another generic adapter.
1400227614Sluigi	 */
1401262151Sluigi	prev_na = NA(ifp);
1402262151Sluigi	error = generic_netmap_attach(ifp);
1403262151Sluigi	if (error)
1404262151Sluigi		return error;
1405262151Sluigi
1406262151Sluigi	*na = NA(ifp);
1407262151Sluigi	gna = (struct netmap_generic_adapter*)NA(ifp);
1408262151Sluigi	gna->prev = prev_na; /* save old na */
1409262151Sluigi	if (prev_na != NULL) {
1410262151Sluigi		ifunit_ref(ifp->if_xname);
1411262151Sluigi		// XXX add a refcount ?
1412262151Sluigi		netmap_adapter_get(prev_na);
1413251139Sluigi	}
1414262151Sluigi	ND("Created generic NA %p (prev %p)", gna, gna->prev);
1415262151Sluigi
1416262151Sluigi	return 0;
1417227614Sluigi}
1418227614Sluigi
1419227614Sluigi
1420227614Sluigi/*
1421262151Sluigi * MUST BE CALLED UNDER NMG_LOCK()
1422262151Sluigi *
1423262151Sluigi * Get a refcounted reference to a netmap adapter attached
1424262151Sluigi * to the interface specified by nmr.
1425262151Sluigi * This is always called in the execution of an ioctl().
1426262151Sluigi *
1427262151Sluigi * Return ENXIO if the interface specified by the request does
1428262151Sluigi * not exist, ENOTSUP if netmap is not supported by the interface,
1429262151Sluigi * EBUSY if the interface is already attached to a bridge,
1430262151Sluigi * EINVAL if parameters are invalid, ENOMEM if needed resources
1431262151Sluigi * could not be allocated.
1432262151Sluigi * If successful, hold a reference to the netmap adapter.
1433262151Sluigi *
1434262151Sluigi * No reference is kept on the real interface, which may then
1435262151Sluigi * disappear at any time.
1436262151Sluigi */
1437262151Sluigiint
1438262151Sluiginetmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
1439262151Sluigi{
1440262151Sluigi	struct ifnet *ifp = NULL;
1441262151Sluigi	int error = 0;
1442262151Sluigi	struct netmap_adapter *ret = NULL;
1443262151Sluigi
1444262151Sluigi	*na = NULL;     /* default return value */
1445262151Sluigi
1446262151Sluigi	NMG_LOCK_ASSERT();
1447262151Sluigi
1448270252Sluigi	/* we cascade through all possibile types of netmap adapter.
1449270252Sluigi	 * All netmap_get_*_na() functions return an error and an na,
1450270252Sluigi	 * with the following combinations:
1451270252Sluigi	 *
1452270252Sluigi	 * error    na
1453270252Sluigi	 *   0	   NULL		type doesn't match
1454270252Sluigi	 *  !0	   NULL		type matches, but na creation/lookup failed
1455270252Sluigi	 *   0	  !NULL		type matches and na created/found
1456270252Sluigi	 *  !0    !NULL		impossible
1457270252Sluigi	 */
1458270252Sluigi
1459270252Sluigi	/* try to see if this is a monitor port */
1460270252Sluigi	error = netmap_get_monitor_na(nmr, na, create);
1461270252Sluigi	if (error || *na != NULL)
1462270252Sluigi		return error;
1463270252Sluigi
1464270252Sluigi	/* try to see if this is a pipe port */
1465262151Sluigi	error = netmap_get_pipe_na(nmr, na, create);
1466262151Sluigi	if (error || *na != NULL)
1467262151Sluigi		return error;
1468262151Sluigi
1469270252Sluigi	/* try to see if this is a bridge port */
1470262151Sluigi	error = netmap_get_bdg_na(nmr, na, create);
1471262151Sluigi	if (error)
1472262151Sluigi		return error;
1473262151Sluigi
1474262151Sluigi	if (*na != NULL) /* valid match in netmap_get_bdg_na() */
1475262151Sluigi		goto pipes;
1476262151Sluigi
1477267282Sluigi	/*
1478267282Sluigi	 * This must be a hardware na, lookup the name in the system.
1479267282Sluigi	 * Note that by hardware we actually mean "it shows up in ifconfig".
1480267282Sluigi	 * This may still be a tap, a veth/epair, or even a
1481267282Sluigi	 * persistent VALE port.
1482267282Sluigi	 */
1483262151Sluigi	ifp = ifunit_ref(nmr->nr_name);
1484262151Sluigi	if (ifp == NULL) {
1485262151Sluigi	        return ENXIO;
1486262151Sluigi	}
1487262151Sluigi
1488262151Sluigi	error = netmap_get_hw_na(ifp, &ret);
1489262151Sluigi	if (error)
1490262151Sluigi		goto out;
1491262151Sluigi
1492262151Sluigi	*na = ret;
1493262151Sluigi	netmap_adapter_get(ret);
1494262151Sluigi
1495262151Sluigipipes:
1496267282Sluigi	/*
1497267282Sluigi	 * If we are opening a pipe whose parent was not in netmap mode,
1498267282Sluigi	 * we have to allocate the pipe array now.
1499267282Sluigi	 * XXX get rid of this clumsiness (2014-03-15)
1500267282Sluigi	 */
1501262151Sluigi	error = netmap_pipe_alloc(*na, nmr);
1502262151Sluigi
1503262151Sluigiout:
1504262151Sluigi	if (error && ret != NULL)
1505262151Sluigi		netmap_adapter_put(ret);
1506262151Sluigi
1507262151Sluigi	if (ifp)
1508267282Sluigi		if_rele(ifp); /* allow live unloading of drivers modules */
1509262151Sluigi
1510262151Sluigi	return error;
1511262151Sluigi}
1512262151Sluigi
1513262151Sluigi
1514262151Sluigi/*
1515262151Sluigi * validate parameters on entry for *_txsync()
1516262151Sluigi * Returns ring->cur if ok, or something >= kring->nkr_num_slots
1517262151Sluigi * in case of error.
1518262151Sluigi *
1519262151Sluigi * rhead, rcur and rtail=hwtail are stored from previous round.
1520262151Sluigi * hwcur is the next packet to send to the ring.
1521262151Sluigi *
1522262151Sluigi * We want
1523262151Sluigi *    hwcur <= *rhead <= head <= cur <= tail = *rtail <= hwtail
1524262151Sluigi *
1525262151Sluigi * hwcur, rhead, rtail and hwtail are reliable
1526262151Sluigi */
1527262151Sluigiu_int
1528262151Sluiginm_txsync_prologue(struct netmap_kring *kring)
1529262151Sluigi{
1530262151Sluigi	struct netmap_ring *ring = kring->ring;
1531262151Sluigi	u_int head = ring->head; /* read only once */
1532262151Sluigi	u_int cur = ring->cur; /* read only once */
1533262151Sluigi	u_int n = kring->nkr_num_slots;
1534262151Sluigi
1535262151Sluigi	ND(5, "%s kcur %d ktail %d head %d cur %d tail %d",
1536262151Sluigi		kring->name,
1537262151Sluigi		kring->nr_hwcur, kring->nr_hwtail,
1538262151Sluigi		ring->head, ring->cur, ring->tail);
1539262151Sluigi#if 1 /* kernel sanity checks; but we can trust the kring. */
1540262151Sluigi	if (kring->nr_hwcur >= n || kring->rhead >= n ||
1541262151Sluigi	    kring->rtail >= n ||  kring->nr_hwtail >= n)
1542262151Sluigi		goto error;
1543262151Sluigi#endif /* kernel sanity checks */
1544262151Sluigi	/*
1545262151Sluigi	 * user sanity checks. We only use 'cur',
1546262151Sluigi	 * A, B, ... are possible positions for cur:
1547262151Sluigi	 *
1548262151Sluigi	 *  0    A  cur   B  tail  C  n-1
1549262151Sluigi	 *  0    D  tail  E  cur   F  n-1
1550262151Sluigi	 *
1551262151Sluigi	 * B, F, D are valid. A, C, E are wrong
1552262151Sluigi	 */
1553262151Sluigi	if (kring->rtail >= kring->rhead) {
1554262151Sluigi		/* want rhead <= head <= rtail */
1555262151Sluigi		if (head < kring->rhead || head > kring->rtail)
1556262151Sluigi			goto error;
1557262151Sluigi		/* and also head <= cur <= rtail */
1558262151Sluigi		if (cur < head || cur > kring->rtail)
1559262151Sluigi			goto error;
1560262151Sluigi	} else { /* here rtail < rhead */
1561262151Sluigi		/* we need head outside rtail .. rhead */
1562262151Sluigi		if (head > kring->rtail && head < kring->rhead)
1563262151Sluigi			goto error;
1564262151Sluigi
1565262151Sluigi		/* two cases now: head <= rtail or head >= rhead  */
1566262151Sluigi		if (head <= kring->rtail) {
1567262151Sluigi			/* want head <= cur <= rtail */
1568262151Sluigi			if (cur < head || cur > kring->rtail)
1569262151Sluigi				goto error;
1570262151Sluigi		} else { /* head >= rhead */
1571262151Sluigi			/* cur must be outside rtail..head */
1572262151Sluigi			if (cur > kring->rtail && cur < head)
1573262151Sluigi				goto error;
1574262151Sluigi		}
1575262151Sluigi	}
1576262151Sluigi	if (ring->tail != kring->rtail) {
1577262151Sluigi		RD(5, "tail overwritten was %d need %d",
1578262151Sluigi			ring->tail, kring->rtail);
1579262151Sluigi		ring->tail = kring->rtail;
1580262151Sluigi	}
1581262151Sluigi	kring->rhead = head;
1582262151Sluigi	kring->rcur = cur;
1583262151Sluigi	return head;
1584262151Sluigi
1585262151Sluigierror:
1586262151Sluigi	RD(5, "%s kring error: hwcur %d rcur %d hwtail %d cur %d tail %d",
1587262151Sluigi		kring->name,
1588262151Sluigi		kring->nr_hwcur,
1589262151Sluigi		kring->rcur, kring->nr_hwtail,
1590262151Sluigi		cur, ring->tail);
1591262151Sluigi	return n;
1592262151Sluigi}
1593262151Sluigi
1594262151Sluigi
1595262151Sluigi/*
1596262151Sluigi * validate parameters on entry for *_rxsync()
1597262151Sluigi * Returns ring->head if ok, kring->nkr_num_slots on error.
1598262151Sluigi *
1599262151Sluigi * For a valid configuration,
1600262151Sluigi * hwcur <= head <= cur <= tail <= hwtail
1601262151Sluigi *
1602262151Sluigi * We only consider head and cur.
1603262151Sluigi * hwcur and hwtail are reliable.
1604262151Sluigi *
1605262151Sluigi */
1606262151Sluigiu_int
1607262151Sluiginm_rxsync_prologue(struct netmap_kring *kring)
1608262151Sluigi{
1609262151Sluigi	struct netmap_ring *ring = kring->ring;
1610262151Sluigi	uint32_t const n = kring->nkr_num_slots;
1611262151Sluigi	uint32_t head, cur;
1612262151Sluigi
1613262151Sluigi	ND("%s kc %d kt %d h %d c %d t %d",
1614262151Sluigi		kring->name,
1615262151Sluigi		kring->nr_hwcur, kring->nr_hwtail,
1616262151Sluigi		ring->head, ring->cur, ring->tail);
1617262151Sluigi	/*
1618262151Sluigi	 * Before storing the new values, we should check they do not
1619262151Sluigi	 * move backwards. However:
1620262151Sluigi	 * - head is not an issue because the previous value is hwcur;
1621262151Sluigi	 * - cur could in principle go back, however it does not matter
1622262151Sluigi	 *   because we are processing a brand new rxsync()
1623262151Sluigi	 */
1624262151Sluigi	cur = kring->rcur = ring->cur;	/* read only once */
1625262151Sluigi	head = kring->rhead = ring->head;	/* read only once */
1626262151Sluigi#if 1 /* kernel sanity checks */
1627262151Sluigi	if (kring->nr_hwcur >= n || kring->nr_hwtail >= n)
1628262151Sluigi		goto error;
1629262151Sluigi#endif /* kernel sanity checks */
1630262151Sluigi	/* user sanity checks */
1631262151Sluigi	if (kring->nr_hwtail >= kring->nr_hwcur) {
1632262151Sluigi		/* want hwcur <= rhead <= hwtail */
1633262151Sluigi		if (head < kring->nr_hwcur || head > kring->nr_hwtail)
1634262151Sluigi			goto error;
1635262151Sluigi		/* and also rhead <= rcur <= hwtail */
1636262151Sluigi		if (cur < head || cur > kring->nr_hwtail)
1637262151Sluigi			goto error;
1638262151Sluigi	} else {
1639262151Sluigi		/* we need rhead outside hwtail..hwcur */
1640262151Sluigi		if (head < kring->nr_hwcur && head > kring->nr_hwtail)
1641262151Sluigi			goto error;
1642262151Sluigi		/* two cases now: head <= hwtail or head >= hwcur  */
1643262151Sluigi		if (head <= kring->nr_hwtail) {
1644262151Sluigi			/* want head <= cur <= hwtail */
1645262151Sluigi			if (cur < head || cur > kring->nr_hwtail)
1646262151Sluigi				goto error;
1647262151Sluigi		} else {
1648262151Sluigi			/* cur must be outside hwtail..head */
1649262151Sluigi			if (cur < head && cur > kring->nr_hwtail)
1650262151Sluigi				goto error;
1651262151Sluigi		}
1652262151Sluigi	}
1653262151Sluigi	if (ring->tail != kring->rtail) {
1654262151Sluigi		RD(5, "%s tail overwritten was %d need %d",
1655262151Sluigi			kring->name,
1656262151Sluigi			ring->tail, kring->rtail);
1657262151Sluigi		ring->tail = kring->rtail;
1658262151Sluigi	}
1659262151Sluigi	return head;
1660262151Sluigi
1661262151Sluigierror:
1662262151Sluigi	RD(5, "kring error: hwcur %d rcur %d hwtail %d head %d cur %d tail %d",
1663262151Sluigi		kring->nr_hwcur,
1664262151Sluigi		kring->rcur, kring->nr_hwtail,
1665262151Sluigi		kring->rhead, kring->rcur, ring->tail);
1666262151Sluigi	return n;
1667262151Sluigi}
1668262151Sluigi
1669262151Sluigi
1670262151Sluigi/*
1671227614Sluigi * Error routine called when txsync/rxsync detects an error.
1672262151Sluigi * Can't do much more than resetting head =cur = hwcur, tail = hwtail
1673227614Sluigi * Return 1 on reinit.
1674228276Sluigi *
1675228276Sluigi * This routine is only called by the upper half of the kernel.
1676228276Sluigi * It only reads hwcur (which is changed only by the upper half, too)
1677262151Sluigi * and hwtail (which may be changed by the lower half, but only on
1678228276Sluigi * a tx ring and only to increase it, so any error will be recovered
1679228276Sluigi * on the next call). For the above, we don't strictly need to call
1680228276Sluigi * it under lock.
1681227614Sluigi */
1682227614Sluigiint
1683227614Sluiginetmap_ring_reinit(struct netmap_kring *kring)
1684227614Sluigi{
1685227614Sluigi	struct netmap_ring *ring = kring->ring;
1686227614Sluigi	u_int i, lim = kring->nkr_num_slots - 1;
1687227614Sluigi	int errors = 0;
1688227614Sluigi
1689262151Sluigi	// XXX KASSERT nm_kr_tryget
1690270252Sluigi	RD(10, "called for %s", kring->name);
1691262151Sluigi	// XXX probably wrong to trust userspace
1692262151Sluigi	kring->rhead = ring->head;
1693262151Sluigi	kring->rcur  = ring->cur;
1694262151Sluigi	kring->rtail = ring->tail;
1695262151Sluigi
1696227614Sluigi	if (ring->cur > lim)
1697227614Sluigi		errors++;
1698262151Sluigi	if (ring->head > lim)
1699262151Sluigi		errors++;
1700262151Sluigi	if (ring->tail > lim)
1701262151Sluigi		errors++;
1702227614Sluigi	for (i = 0; i <= lim; i++) {
1703227614Sluigi		u_int idx = ring->slot[i].buf_idx;
1704227614Sluigi		u_int len = ring->slot[i].len;
1705227614Sluigi		if (idx < 2 || idx >= netmap_total_buffers) {
1706262151Sluigi			RD(5, "bad index at slot %d idx %d len %d ", i, idx, len);
1707227614Sluigi			ring->slot[i].buf_idx = 0;
1708227614Sluigi			ring->slot[i].len = 0;
1709270252Sluigi		} else if (len > NETMAP_BUF_SIZE(kring->na)) {
1710227614Sluigi			ring->slot[i].len = 0;
1711262151Sluigi			RD(5, "bad len at slot %d idx %d len %d", i, idx, len);
1712227614Sluigi		}
1713227614Sluigi	}
1714227614Sluigi	if (errors) {
1715241719Sluigi		RD(10, "total %d errors", errors);
1716262151Sluigi		RD(10, "%s reinit, cur %d -> %d tail %d -> %d",
1717262151Sluigi			kring->name,
1718227614Sluigi			ring->cur, kring->nr_hwcur,
1719262151Sluigi			ring->tail, kring->nr_hwtail);
1720262151Sluigi		ring->head = kring->rhead = kring->nr_hwcur;
1721262151Sluigi		ring->cur  = kring->rcur  = kring->nr_hwcur;
1722262151Sluigi		ring->tail = kring->rtail = kring->nr_hwtail;
1723227614Sluigi	}
1724227614Sluigi	return (errors ? 1 : 0);
1725227614Sluigi}
1726227614Sluigi
1727270252Sluigi/* interpret the ringid and flags fields of an nmreq, by translating them
1728270252Sluigi * into a pair of intervals of ring indices:
1729270252Sluigi *
1730270252Sluigi * [priv->np_txqfirst, priv->np_txqlast) and
1731270252Sluigi * [priv->np_rxqfirst, priv->np_rxqlast)
1732270252Sluigi *
1733227614Sluigi */
1734270252Sluigiint
1735270252Sluiginetmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags)
1736227614Sluigi{
1737262151Sluigi	struct netmap_adapter *na = priv->np_na;
1738262151Sluigi	u_int j, i = ringid & NETMAP_RING_MASK;
1739262151Sluigi	u_int reg = flags & NR_REG_MASK;
1740227614Sluigi
1741262151Sluigi	if (reg == NR_REG_DEFAULT) {
1742262151Sluigi		/* convert from old ringid to flags */
1743262151Sluigi		if (ringid & NETMAP_SW_RING) {
1744262151Sluigi			reg = NR_REG_SW;
1745262151Sluigi		} else if (ringid & NETMAP_HW_RING) {
1746262151Sluigi			reg = NR_REG_ONE_NIC;
1747262151Sluigi		} else {
1748262151Sluigi			reg = NR_REG_ALL_NIC;
1749262151Sluigi		}
1750262151Sluigi		D("deprecated API, old ringid 0x%x -> ringid %x reg %d", ringid, i, reg);
1751227614Sluigi	}
1752262151Sluigi	switch (reg) {
1753262151Sluigi	case NR_REG_ALL_NIC:
1754262151Sluigi	case NR_REG_PIPE_MASTER:
1755262151Sluigi	case NR_REG_PIPE_SLAVE:
1756262151Sluigi		priv->np_txqfirst = 0;
1757262151Sluigi		priv->np_txqlast = na->num_tx_rings;
1758262151Sluigi		priv->np_rxqfirst = 0;
1759262151Sluigi		priv->np_rxqlast = na->num_rx_rings;
1760262151Sluigi		ND("%s %d %d", "ALL/PIPE",
1761262151Sluigi			priv->np_rxqfirst, priv->np_rxqlast);
1762262151Sluigi		break;
1763262151Sluigi	case NR_REG_SW:
1764262151Sluigi	case NR_REG_NIC_SW:
1765262151Sluigi		if (!(na->na_flags & NAF_HOST_RINGS)) {
1766262151Sluigi			D("host rings not supported");
1767262151Sluigi			return EINVAL;
1768262151Sluigi		}
1769262151Sluigi		priv->np_txqfirst = (reg == NR_REG_SW ?
1770262151Sluigi			na->num_tx_rings : 0);
1771262151Sluigi		priv->np_txqlast = na->num_tx_rings + 1;
1772262151Sluigi		priv->np_rxqfirst = (reg == NR_REG_SW ?
1773262151Sluigi			na->num_rx_rings : 0);
1774262151Sluigi		priv->np_rxqlast = na->num_rx_rings + 1;
1775262151Sluigi		ND("%s %d %d", reg == NR_REG_SW ? "SW" : "NIC+SW",
1776262151Sluigi			priv->np_rxqfirst, priv->np_rxqlast);
1777262151Sluigi		break;
1778262151Sluigi	case NR_REG_ONE_NIC:
1779262151Sluigi		if (i >= na->num_tx_rings && i >= na->num_rx_rings) {
1780262151Sluigi			D("invalid ring id %d", i);
1781262151Sluigi			return EINVAL;
1782262151Sluigi		}
1783262151Sluigi		/* if not enough rings, use the first one */
1784262151Sluigi		j = i;
1785262151Sluigi		if (j >= na->num_tx_rings)
1786262151Sluigi			j = 0;
1787262151Sluigi		priv->np_txqfirst = j;
1788262151Sluigi		priv->np_txqlast = j + 1;
1789262151Sluigi		j = i;
1790262151Sluigi		if (j >= na->num_rx_rings)
1791262151Sluigi			j = 0;
1792262151Sluigi		priv->np_rxqfirst = j;
1793262151Sluigi		priv->np_rxqlast = j + 1;
1794262151Sluigi		break;
1795262151Sluigi	default:
1796262151Sluigi		D("invalid regif type %d", reg);
1797262151Sluigi		return EINVAL;
1798227614Sluigi	}
1799262151Sluigi	priv->np_flags = (flags & ~NR_REG_MASK) | reg;
1800270252Sluigi
1801262151Sluigi	if (netmap_verbose) {
1802267282Sluigi		D("%s: tx [%d,%d) rx [%d,%d) id %d",
1803270252Sluigi			na->name,
1804262151Sluigi			priv->np_txqfirst,
1805262151Sluigi			priv->np_txqlast,
1806262151Sluigi			priv->np_rxqfirst,
1807262151Sluigi			priv->np_rxqlast,
1808262151Sluigi			i);
1809262151Sluigi	}
1810227614Sluigi	return 0;
1811227614Sluigi}
1812227614Sluigi
1813270252Sluigi
1814227614Sluigi/*
1815270252Sluigi * Set the ring ID. For devices with a single queue, a request
1816270252Sluigi * for all rings is the same as a single ring.
1817270252Sluigi */
1818270252Sluigistatic int
1819270252Sluiginetmap_set_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags)
1820270252Sluigi{
1821270252Sluigi	struct netmap_adapter *na = priv->np_na;
1822270252Sluigi	int error;
1823270252Sluigi
1824270252Sluigi	error = netmap_interp_ringid(priv, ringid, flags);
1825270252Sluigi	if (error) {
1826270252Sluigi		return error;
1827270252Sluigi	}
1828270252Sluigi
1829270252Sluigi	priv->np_txpoll = (ringid & NETMAP_NO_TX_POLL) ? 0 : 1;
1830270252Sluigi
1831270252Sluigi	/* optimization: count the users registered for more than
1832270252Sluigi	 * one ring, which are the ones sleeping on the global queue.
1833270252Sluigi	 * The default netmap_notify() callback will then
1834270252Sluigi	 * avoid signaling the global queue if nobody is using it
1835270252Sluigi	 */
1836270252Sluigi	if (nm_tx_si_user(priv))
1837270252Sluigi		na->tx_si_users++;
1838270252Sluigi	if (nm_rx_si_user(priv))
1839270252Sluigi		na->rx_si_users++;
1840270252Sluigi	return 0;
1841270252Sluigi}
1842270252Sluigi
1843270252Sluigi/*
1844251139Sluigi * possibly move the interface to netmap-mode.
1845251139Sluigi * If success it returns a pointer to netmap_if, otherwise NULL.
1846262151Sluigi * This must be called with NMG_LOCK held.
1847270252Sluigi *
1848270252Sluigi * The following na callbacks are called in the process:
1849270252Sluigi *
1850270252Sluigi * na->nm_config()			[by netmap_update_config]
1851270252Sluigi * (get current number and size of rings)
1852270252Sluigi *
1853270252Sluigi *  	We have a generic one for linux (netmap_linux_config).
1854270252Sluigi *  	The bwrap has to override this, since it has to forward
1855270252Sluigi *  	the request to the wrapped adapter (netmap_bwrap_config).
1856270252Sluigi *
1857270252Sluigi *    	XXX netmap_if_new calls this again (2014-03-15)
1858270252Sluigi *
1859270252Sluigi * na->nm_krings_create()		[by netmap_if_new]
1860270252Sluigi * (create and init the krings array)
1861270252Sluigi *
1862270252Sluigi * 	One of the following:
1863270252Sluigi *
1864270252Sluigi *	* netmap_hw_krings_create, 			(hw ports)
1865270252Sluigi *		creates the standard layout for the krings
1866270252Sluigi * 		and adds the mbq (used for the host rings).
1867270252Sluigi *
1868270252Sluigi * 	* netmap_vp_krings_create			(VALE ports)
1869270252Sluigi * 		add leases and scratchpads
1870270252Sluigi *
1871270252Sluigi * 	* netmap_pipe_krings_create			(pipes)
1872270252Sluigi * 		create the krings and rings of both ends and
1873270252Sluigi * 		cross-link them
1874270252Sluigi *
1875270252Sluigi *      * netmap_monitor_krings_create 			(monitors)
1876270252Sluigi *      	avoid allocating the mbq
1877270252Sluigi *
1878270252Sluigi *      * netmap_bwrap_krings_create			(bwraps)
1879270252Sluigi *      	create both the brap krings array,
1880270252Sluigi *      	the krings array of the wrapped adapter, and
1881270252Sluigi *      	(if needed) the fake array for the host adapter
1882270252Sluigi *
1883270252Sluigi * na->nm_register(, 1)
1884270252Sluigi * (put the adapter in netmap mode)
1885270252Sluigi *
1886270252Sluigi * 	This may be one of the following:
1887270252Sluigi * 	(XXX these should be either all *_register or all *_reg 2014-03-15)
1888270252Sluigi *
1889270252Sluigi * 	* netmap_hw_register				(hw ports)
1890270252Sluigi * 		checks that the ifp is still there, then calls
1891270252Sluigi * 		the hardware specific callback;
1892270252Sluigi *
1893270252Sluigi * 	* netmap_vp_reg					(VALE ports)
1894270252Sluigi *		If the port is connected to a bridge,
1895270252Sluigi *		set the NAF_NETMAP_ON flag under the
1896270252Sluigi *		bridge write lock.
1897270252Sluigi *
1898270252Sluigi *	* netmap_pipe_reg				(pipes)
1899270252Sluigi *		inform the other pipe end that it is no
1900270252Sluigi *		longer responsibile for the lifetime of this
1901270252Sluigi *		pipe end
1902270252Sluigi *
1903270252Sluigi *	* netmap_monitor_reg				(monitors)
1904270252Sluigi *		intercept the sync callbacks of the monitored
1905270252Sluigi *		rings
1906270252Sluigi *
1907270252Sluigi *	* netmap_bwrap_register				(bwraps)
1908270252Sluigi *		cross-link the bwrap and hwna rings,
1909270252Sluigi *		forward the request to the hwna, override
1910270252Sluigi *		the hwna notify callback (to get the frames
1911270252Sluigi *		coming from outside go through the bridge).
1912270252Sluigi *
1913270252Sluigi * XXX maybe netmap_if_new() should be merged with this (2014-03-15).
1914270252Sluigi *
1915251139Sluigi */
1916262151Sluigistruct netmap_if *
1917262151Sluiginetmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
1918262151Sluigi	uint16_t ringid, uint32_t flags, int *err)
1919251139Sluigi{
1920251139Sluigi	struct netmap_if *nifp = NULL;
1921262151Sluigi	int error, need_mem = 0;
1922251139Sluigi
1923262151Sluigi	NMG_LOCK_ASSERT();
1924251139Sluigi	/* ring configuration may have changed, fetch from the card */
1925251139Sluigi	netmap_update_config(na);
1926262151Sluigi	priv->np_na = na;     /* store the reference */
1927262151Sluigi	error = netmap_set_ringid(priv, ringid, flags);
1928251139Sluigi	if (error)
1929251139Sluigi		goto out;
1930262151Sluigi	/* ensure allocators are ready */
1931262151Sluigi	need_mem = !netmap_have_memory_locked(priv);
1932262151Sluigi	if (need_mem) {
1933262151Sluigi		error = netmap_get_memory_locked(priv);
1934262151Sluigi		ND("get_memory returned %d", error);
1935262151Sluigi		if (error)
1936262151Sluigi			goto out;
1937262151Sluigi	}
1938267282Sluigi	/* Allocate a netmap_if and, if necessary, all the netmap_ring's */
1939270252Sluigi	nifp = netmap_if_new(na);
1940251139Sluigi	if (nifp == NULL) { /* allocation failed */
1941251139Sluigi		error = ENOMEM;
1942262151Sluigi		goto out;
1943262151Sluigi	}
1944262151Sluigi	na->active_fds++;
1945270252Sluigi	if (!nm_netmap_on(na)) {
1946270252Sluigi		/* Netmap not active, set the card in netmap mode
1947251139Sluigi		 * and make it use the shared buffers.
1948251139Sluigi		 */
1949267282Sluigi		/* cache the allocator info in the na */
1950270252Sluigi		na->na_lut = netmap_mem_get_lut(na->nm_mem);
1951262151Sluigi		ND("%p->na_lut == %p", na, na->na_lut);
1952270252Sluigi		na->na_lut_objtotal = netmap_mem_get_buftotal(na->nm_mem);
1953270252Sluigi		na->na_lut_objsize = netmap_mem_get_bufsize(na->nm_mem);
1954262151Sluigi		error = na->nm_register(na, 1); /* mode on */
1955251139Sluigi		if (error) {
1956262151Sluigi			netmap_do_unregif(priv, nifp);
1957251139Sluigi			nifp = NULL;
1958251139Sluigi		}
1959251139Sluigi	}
1960251139Sluigiout:
1961251139Sluigi	*err = error;
1962251139Sluigi	if (error) {
1963267282Sluigi		/* we should drop the allocator, but only
1964267282Sluigi		 * if we were the ones who grabbed it
1965267282Sluigi		 */
1966262151Sluigi		if (need_mem)
1967262151Sluigi			netmap_drop_memory_locked(priv);
1968270252Sluigi		priv->np_na = NULL;
1969251139Sluigi	}
1970262151Sluigi	if (nifp != NULL) {
1971262151Sluigi		/*
1972262151Sluigi		 * advertise that the interface is ready bt setting ni_nifp.
1973262151Sluigi		 * The barrier is needed because readers (poll and *SYNC)
1974262151Sluigi		 * check for priv->np_nifp != NULL without locking
1975251139Sluigi		 */
1976262151Sluigi		wmb(); /* make sure previous writes are visible to all CPUs */
1977262151Sluigi		priv->np_nifp = nifp;
1978251139Sluigi	}
1979262151Sluigi	return nifp;
1980251139Sluigi}
1981251139Sluigi
1982251139Sluigi
1983251139Sluigi
1984251139Sluigi/*
1985227614Sluigi * ioctl(2) support for the "netmap" device.
1986227614Sluigi *
1987227614Sluigi * Following a list of accepted commands:
1988227614Sluigi * - NIOCGINFO
1989227614Sluigi * - SIOCGIFADDR	just for convenience
1990227614Sluigi * - NIOCREGIF
1991227614Sluigi * - NIOCTXSYNC
1992227614Sluigi * - NIOCRXSYNC
1993227614Sluigi *
1994227614Sluigi * Return 0 on success, errno otherwise.
1995227614Sluigi */
1996262151Sluigiint
1997238912Sluiginetmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
1998238912Sluigi	int fflag, struct thread *td)
1999227614Sluigi{
2000227614Sluigi	struct netmap_priv_d *priv = NULL;
2001227614Sluigi	struct nmreq *nmr = (struct nmreq *) data;
2002262151Sluigi	struct netmap_adapter *na = NULL;
2003227614Sluigi	int error;
2004262151Sluigi	u_int i, qfirst, qlast;
2005227614Sluigi	struct netmap_if *nifp;
2006262151Sluigi	struct netmap_kring *krings;
2007227614Sluigi
2008238912Sluigi	(void)dev;	/* UNUSED */
2009238912Sluigi	(void)fflag;	/* UNUSED */
2010238812Sluigi
2011262151Sluigi	if (cmd == NIOCGINFO || cmd == NIOCREGIF) {
2012262151Sluigi		/* truncate name */
2013262151Sluigi		nmr->nr_name[sizeof(nmr->nr_name) - 1] = '\0';
2014262151Sluigi		if (nmr->nr_version != NETMAP_API) {
2015262151Sluigi			D("API mismatch for %s got %d need %d",
2016262151Sluigi				nmr->nr_name,
2017262151Sluigi				nmr->nr_version, NETMAP_API);
2018262151Sluigi			nmr->nr_version = NETMAP_API;
2019262151Sluigi		}
2020262151Sluigi		if (nmr->nr_version < NETMAP_MIN_API ||
2021262151Sluigi		    nmr->nr_version > NETMAP_MAX_API) {
2022262151Sluigi			return EINVAL;
2023262151Sluigi		}
2024262151Sluigi	}
2025228276Sluigi	CURVNET_SET(TD_TO_VNET(td));
2026228276Sluigi
2027227614Sluigi	error = devfs_get_cdevpriv((void **)&priv);
2028241719Sluigi	if (error) {
2029228276Sluigi		CURVNET_RESTORE();
2030241719Sluigi		/* XXX ENOENT should be impossible, since the priv
2031241719Sluigi		 * is now created in the open */
2032241719Sluigi		return (error == ENOENT ? ENXIO : error);
2033228276Sluigi	}
2034227614Sluigi
2035227614Sluigi	switch (cmd) {
2036227614Sluigi	case NIOCGINFO:		/* return capabilities etc */
2037251139Sluigi		if (nmr->nr_cmd == NETMAP_BDG_LIST) {
2038251139Sluigi			error = netmap_bdg_ctl(nmr, NULL);
2039251139Sluigi			break;
2040251139Sluigi		}
2041262151Sluigi
2042262151Sluigi		NMG_LOCK();
2043262151Sluigi		do {
2044262151Sluigi			/* memsize is always valid */
2045262151Sluigi			struct netmap_mem_d *nmd = &nm_mem;
2046262151Sluigi			u_int memflags;
2047262151Sluigi
2048262151Sluigi			if (nmr->nr_name[0] != '\0') {
2049262151Sluigi				/* get a refcount */
2050262151Sluigi				error = netmap_get_na(nmr, &na, 1 /* create */);
2051262151Sluigi				if (error)
2052262151Sluigi					break;
2053262151Sluigi				nmd = na->nm_mem; /* get memory allocator */
2054262151Sluigi			}
2055262151Sluigi
2056262151Sluigi			error = netmap_mem_get_info(nmd, &nmr->nr_memsize, &memflags,
2057262151Sluigi				&nmr->nr_arg2);
2058262151Sluigi			if (error)
2059262151Sluigi				break;
2060262151Sluigi			if (na == NULL) /* only memory info */
2061262151Sluigi				break;
2062262151Sluigi			nmr->nr_offset = 0;
2063262151Sluigi			nmr->nr_rx_slots = nmr->nr_tx_slots = 0;
2064262151Sluigi			netmap_update_config(na);
2065262151Sluigi			nmr->nr_rx_rings = na->num_rx_rings;
2066262151Sluigi			nmr->nr_tx_rings = na->num_tx_rings;
2067262151Sluigi			nmr->nr_rx_slots = na->num_rx_desc;
2068262151Sluigi			nmr->nr_tx_slots = na->num_tx_desc;
2069262151Sluigi			netmap_adapter_put(na);
2070262151Sluigi		} while (0);
2071262151Sluigi		NMG_UNLOCK();
2072227614Sluigi		break;
2073227614Sluigi
2074227614Sluigi	case NIOCREGIF:
2075251139Sluigi		/* possibly attach/detach NIC and VALE switch */
2076251139Sluigi		i = nmr->nr_cmd;
2077262151Sluigi		if (i == NETMAP_BDG_ATTACH || i == NETMAP_BDG_DETACH
2078270252Sluigi				|| i == NETMAP_BDG_VNET_HDR
2079270252Sluigi				|| i == NETMAP_BDG_NEWIF
2080270252Sluigi				|| i == NETMAP_BDG_DELIF) {
2081251139Sluigi			error = netmap_bdg_ctl(nmr, NULL);
2082251139Sluigi			break;
2083251139Sluigi		} else if (i != 0) {
2084251139Sluigi			D("nr_cmd must be 0 not %d", i);
2085251139Sluigi			error = EINVAL;
2086251139Sluigi			break;
2087251139Sluigi		}
2088251139Sluigi
2089241719Sluigi		/* protect access to priv from concurrent NIOCREGIF */
2090262151Sluigi		NMG_LOCK();
2091262151Sluigi		do {
2092262151Sluigi			u_int memflags;
2093227614Sluigi
2094262151Sluigi			if (priv->np_na != NULL) {	/* thread already registered */
2095262151Sluigi				error = EBUSY;
2096262151Sluigi				break;
2097262151Sluigi			}
2098262151Sluigi			/* find the interface and a reference */
2099262151Sluigi			error = netmap_get_na(nmr, &na, 1 /* create */); /* keep reference */
2100262151Sluigi			if (error)
2101262151Sluigi				break;
2102262151Sluigi			if (NETMAP_OWNED_BY_KERN(na)) {
2103262151Sluigi				netmap_adapter_put(na);
2104262151Sluigi				error = EBUSY;
2105262151Sluigi				break;
2106262151Sluigi			}
2107262151Sluigi			nifp = netmap_do_regif(priv, na, nmr->nr_ringid, nmr->nr_flags, &error);
2108262151Sluigi			if (!nifp) {    /* reg. failed, release priv and ref */
2109262151Sluigi				netmap_adapter_put(na);
2110262151Sluigi				priv->np_nifp = NULL;
2111262151Sluigi				break;
2112262151Sluigi			}
2113262151Sluigi			priv->np_td = td; // XXX kqueue, debugging only
2114227614Sluigi
2115262151Sluigi			/* return the offset of the netmap_if object */
2116262151Sluigi			nmr->nr_rx_rings = na->num_rx_rings;
2117262151Sluigi			nmr->nr_tx_rings = na->num_tx_rings;
2118262151Sluigi			nmr->nr_rx_slots = na->num_rx_desc;
2119262151Sluigi			nmr->nr_tx_slots = na->num_tx_desc;
2120262151Sluigi			error = netmap_mem_get_info(na->nm_mem, &nmr->nr_memsize, &memflags,
2121262151Sluigi				&nmr->nr_arg2);
2122262151Sluigi			if (error) {
2123262151Sluigi				netmap_adapter_put(na);
2124262151Sluigi				break;
2125262151Sluigi			}
2126262151Sluigi			if (memflags & NETMAP_MEM_PRIVATE) {
2127262151Sluigi				*(uint32_t *)(uintptr_t)&nifp->ni_flags |= NI_PRIV_MEM;
2128262151Sluigi			}
2129262151Sluigi			priv->np_txsi = (priv->np_txqlast - priv->np_txqfirst > 1) ?
2130262151Sluigi				&na->tx_si : &na->tx_rings[priv->np_txqfirst].si;
2131262151Sluigi			priv->np_rxsi = (priv->np_rxqlast - priv->np_rxqfirst > 1) ?
2132262151Sluigi				&na->rx_si : &na->rx_rings[priv->np_rxqfirst].si;
2133227614Sluigi
2134262151Sluigi			if (nmr->nr_arg3) {
2135262151Sluigi				D("requested %d extra buffers", nmr->nr_arg3);
2136262151Sluigi				nmr->nr_arg3 = netmap_extra_alloc(na,
2137262151Sluigi					&nifp->ni_bufs_head, nmr->nr_arg3);
2138262151Sluigi				D("got %d extra buffers", nmr->nr_arg3);
2139262151Sluigi			}
2140262151Sluigi			nmr->nr_offset = netmap_mem_if_offset(na->nm_mem, nifp);
2141262151Sluigi		} while (0);
2142262151Sluigi		NMG_UNLOCK();
2143241719Sluigi		break;
2144241719Sluigi
2145241719Sluigi	case NIOCTXSYNC:
2146241719Sluigi	case NIOCRXSYNC:
2147241719Sluigi		nifp = priv->np_nifp;
2148241719Sluigi
2149241719Sluigi		if (nifp == NULL) {
2150228276Sluigi			error = ENXIO;
2151228276Sluigi			break;
2152228276Sluigi		}
2153241719Sluigi		rmb(); /* make sure following reads are not from cache */
2154227614Sluigi
2155262151Sluigi		na = priv->np_na;      /* we have a reference */
2156227614Sluigi
2157262151Sluigi		if (na == NULL) {
2158262151Sluigi			D("Internal error: nifp != NULL && na == NULL");
2159262151Sluigi			error = ENXIO;
2160262151Sluigi			break;
2161262151Sluigi		}
2162241719Sluigi
2163270252Sluigi		if (!nm_netmap_on(na)) {
2164228276Sluigi			error = ENXIO;
2165228276Sluigi			break;
2166228276Sluigi		}
2167241719Sluigi
2168262151Sluigi		if (cmd == NIOCTXSYNC) {
2169262151Sluigi			krings = na->tx_rings;
2170262151Sluigi			qfirst = priv->np_txqfirst;
2171262151Sluigi			qlast = priv->np_txqlast;
2172262151Sluigi		} else {
2173262151Sluigi			krings = na->rx_rings;
2174262151Sluigi			qfirst = priv->np_rxqfirst;
2175262151Sluigi			qlast = priv->np_rxqlast;
2176227614Sluigi		}
2177227614Sluigi
2178262151Sluigi		for (i = qfirst; i < qlast; i++) {
2179262151Sluigi			struct netmap_kring *kring = krings + i;
2180262151Sluigi			if (nm_kr_tryget(kring)) {
2181262151Sluigi				error = EBUSY;
2182262151Sluigi				goto out;
2183262151Sluigi			}
2184234174Sluigi			if (cmd == NIOCTXSYNC) {
2185234174Sluigi				if (netmap_verbose & NM_VERB_TXSYNC)
2186234174Sluigi					D("pre txsync ring %d cur %d hwcur %d",
2187234174Sluigi					    i, kring->ring->cur,
2188234174Sluigi					    kring->nr_hwcur);
2189262151Sluigi				if (nm_txsync_prologue(kring) >= kring->nkr_num_slots) {
2190262151Sluigi					netmap_ring_reinit(kring);
2191262151Sluigi				} else {
2192262151Sluigi					kring->nm_sync(kring, NAF_FORCE_RECLAIM);
2193262151Sluigi				}
2194234174Sluigi				if (netmap_verbose & NM_VERB_TXSYNC)
2195234174Sluigi					D("post txsync ring %d cur %d hwcur %d",
2196234174Sluigi					    i, kring->ring->cur,
2197234174Sluigi					    kring->nr_hwcur);
2198234174Sluigi			} else {
2199262151Sluigi				kring->nm_sync(kring, NAF_FORCE_READ);
2200234174Sluigi				microtime(&na->rx_rings[i].ring->ts);
2201234174Sluigi			}
2202262151Sluigi			nm_kr_put(kring);
2203227614Sluigi		}
2204227614Sluigi
2205234174Sluigi		break;
2206227614Sluigi
2207270252Sluigi	case NIOCCONFIG:
2208270252Sluigi		error = netmap_bdg_config(nmr);
2209270252Sluigi		break;
2210238812Sluigi#ifdef __FreeBSD__
2211262151Sluigi	case FIONBIO:
2212262151Sluigi	case FIOASYNC:
2213262151Sluigi		ND("FIONBIO/FIOASYNC are no-ops");
2214262151Sluigi		break;
2215262151Sluigi
2216227614Sluigi	case BIOCIMMEDIATE:
2217227614Sluigi	case BIOCGHDRCMPLT:
2218227614Sluigi	case BIOCSHDRCMPLT:
2219227614Sluigi	case BIOCSSEESENT:
2220227614Sluigi		D("ignore BIOCIMMEDIATE/BIOCSHDRCMPLT/BIOCSHDRCMPLT/BIOCSSEESENT");
2221227614Sluigi		break;
2222227614Sluigi
2223231881Sluigi	default:	/* allow device-specific ioctls */
2224227614Sluigi	    {
2225227614Sluigi		struct socket so;
2226270252Sluigi		struct ifnet *ifp;
2227262151Sluigi
2228227614Sluigi		bzero(&so, sizeof(so));
2229262151Sluigi		NMG_LOCK();
2230262151Sluigi		error = netmap_get_na(nmr, &na, 0 /* don't create */); /* keep reference */
2231262151Sluigi		if (error) {
2232262151Sluigi			netmap_adapter_put(na);
2233262151Sluigi			NMG_UNLOCK();
2234227614Sluigi			break;
2235262151Sluigi		}
2236262151Sluigi		ifp = na->ifp;
2237227614Sluigi		so.so_vnet = ifp->if_vnet;
2238227614Sluigi		// so->so_proto not null.
2239227614Sluigi		error = ifioctl(&so, cmd, data, td);
2240262151Sluigi		netmap_adapter_put(na);
2241262151Sluigi		NMG_UNLOCK();
2242231881Sluigi		break;
2243227614Sluigi	    }
2244238812Sluigi
2245238812Sluigi#else /* linux */
2246238812Sluigi	default:
2247238812Sluigi		error = EOPNOTSUPP;
2248238812Sluigi#endif /* linux */
2249227614Sluigi	}
2250262151Sluigiout:
2251227614Sluigi
2252228276Sluigi	CURVNET_RESTORE();
2253227614Sluigi	return (error);
2254227614Sluigi}
2255227614Sluigi
2256227614Sluigi
2257227614Sluigi/*
2258227614Sluigi * select(2) and poll(2) handlers for the "netmap" device.
2259227614Sluigi *
2260227614Sluigi * Can be called for one or more queues.
2261227614Sluigi * Return true the event mask corresponding to ready events.
2262227614Sluigi * If there are no ready events, do a selrecord on either individual
2263262151Sluigi * selinfo or on the global one.
2264227614Sluigi * Device-dependent parts (locking and sync of tx/rx rings)
2265227614Sluigi * are done through callbacks.
2266238812Sluigi *
2267238837Sluigi * On linux, arguments are really pwait, the poll table, and 'td' is struct file *
2268238837Sluigi * The first one is remapped to pwait as selrecord() uses the name as an
2269238837Sluigi * hidden argument.
2270227614Sluigi */
2271262151Sluigiint
2272238837Sluiginetmap_poll(struct cdev *dev, int events, struct thread *td)
2273227614Sluigi{
2274227614Sluigi	struct netmap_priv_d *priv = NULL;
2275227614Sluigi	struct netmap_adapter *na;
2276227614Sluigi	struct netmap_kring *kring;
2277262151Sluigi	u_int i, check_all_tx, check_all_rx, want_tx, want_rx, revents = 0;
2278262151Sluigi	struct mbq q;		/* packets from hw queues to host stack */
2279238837Sluigi	void *pwait = dev;	/* linux compatibility */
2280262151Sluigi	int is_kevent = 0;
2281227614Sluigi
2282262151Sluigi	/*
2283262151Sluigi	 * In order to avoid nested locks, we need to "double check"
2284262151Sluigi	 * txsync and rxsync if we decide to do a selrecord().
2285262151Sluigi	 * retry_tx (and retry_rx, later) prevent looping forever.
2286262151Sluigi	 */
2287262151Sluigi	int retry_tx = 1, retry_rx = 1;
2288262151Sluigi
2289238837Sluigi	(void)pwait;
2290262151Sluigi	mbq_init(&q);
2291238837Sluigi
2292262151Sluigi	/*
2293262151Sluigi	 * XXX kevent has curthread->tp_fop == NULL,
2294262151Sluigi	 * so devfs_get_cdevpriv() fails. We circumvent this by passing
2295262151Sluigi	 * priv as the first argument, which is also useful to avoid
2296262151Sluigi	 * the selrecord() which are not necessary in that case.
2297262151Sluigi	 */
2298262151Sluigi	if (devfs_get_cdevpriv((void **)&priv) != 0) {
2299262151Sluigi		is_kevent = 1;
2300262151Sluigi		if (netmap_verbose)
2301262151Sluigi			D("called from kevent");
2302262151Sluigi		priv = (struct netmap_priv_d *)dev;
2303262151Sluigi	}
2304262151Sluigi	if (priv == NULL)
2305227614Sluigi		return POLLERR;
2306227614Sluigi
2307241719Sluigi	if (priv->np_nifp == NULL) {
2308241719Sluigi		D("No if registered");
2309241719Sluigi		return POLLERR;
2310241719Sluigi	}
2311241719Sluigi	rmb(); /* make sure following reads are not from cache */
2312241719Sluigi
2313262151Sluigi	na = priv->np_na;
2314262151Sluigi
2315270252Sluigi	if (!nm_netmap_on(na))
2316227614Sluigi		return POLLERR;
2317227614Sluigi
2318227614Sluigi	if (netmap_verbose & 0x8000)
2319270252Sluigi		D("device %s events 0x%x", na->name, events);
2320227614Sluigi	want_tx = events & (POLLOUT | POLLWRNORM);
2321227614Sluigi	want_rx = events & (POLLIN | POLLRDNORM);
2322227614Sluigi
2323227614Sluigi
2324227614Sluigi	/*
2325262151Sluigi	 * check_all_{tx|rx} are set if the card has more than one queue AND
2326262151Sluigi	 * the file descriptor is bound to all of them. If so, we sleep on
2327262151Sluigi	 * the "global" selinfo, otherwise we sleep on individual selinfo
2328262151Sluigi	 * (FreeBSD only allows two selinfo's per file descriptor).
2329262151Sluigi	 * The interrupt routine in the driver wake one or the other
2330262151Sluigi	 * (or both) depending on which clients are active.
2331227614Sluigi	 *
2332227614Sluigi	 * rxsync() is only called if we run out of buffers on a POLLIN.
2333227614Sluigi	 * txsync() is called if we run out of buffers on POLLOUT, or
2334227614Sluigi	 * there are pending packets to send. The latter can be disabled
2335227614Sluigi	 * passing NETMAP_NO_TX_POLL in the NIOCREG call.
2336227614Sluigi	 */
2337262151Sluigi	check_all_tx = nm_tx_si_user(priv);
2338262151Sluigi	check_all_rx = nm_rx_si_user(priv);
2339227614Sluigi
2340227614Sluigi	/*
2341262151Sluigi	 * We start with a lock free round which is cheap if we have
2342262151Sluigi	 * slots available. If this fails, then lock and call the sync
2343227614Sluigi	 * routines.
2344227614Sluigi	 */
2345262151Sluigi	for (i = priv->np_rxqfirst; want_rx && i < priv->np_rxqlast; i++) {
2346232238Sluigi		kring = &na->rx_rings[i];
2347262151Sluigi		/* XXX compare ring->cur and kring->tail */
2348262151Sluigi		if (!nm_ring_empty(kring->ring)) {
2349232238Sluigi			revents |= want_rx;
2350232238Sluigi			want_rx = 0;	/* also breaks the loop */
2351227614Sluigi		}
2352232238Sluigi	}
2353262151Sluigi	for (i = priv->np_txqfirst; want_tx && i < priv->np_txqlast; i++) {
2354232238Sluigi		kring = &na->tx_rings[i];
2355262151Sluigi		/* XXX compare ring->cur and kring->tail */
2356262151Sluigi		if (!nm_ring_empty(kring->ring)) {
2357232238Sluigi			revents |= want_tx;
2358232238Sluigi			want_tx = 0;	/* also breaks the loop */
2359227614Sluigi		}
2360232238Sluigi	}
2361227614Sluigi
2362227614Sluigi	/*
2363262151Sluigi	 * If we want to push packets out (priv->np_txpoll) or
2364262151Sluigi	 * want_tx is still set, we must issue txsync calls
2365262151Sluigi	 * (on all rings, to avoid that the tx rings stall).
2366262151Sluigi	 * XXX should also check cur != hwcur on the tx rings.
2367262151Sluigi	 * Fortunately, normal tx mode has np_txpoll set.
2368227614Sluigi	 */
2369227614Sluigi	if (priv->np_txpoll || want_tx) {
2370262151Sluigi		/*
2371262151Sluigi		 * The first round checks if anyone is ready, if not
2372262151Sluigi		 * do a selrecord and another round to handle races.
2373262151Sluigi		 * want_tx goes to 0 if any space is found, and is
2374262151Sluigi		 * used to skip rings with no pending transmissions.
2375262151Sluigi		 */
2376245836Sluigiflush_tx:
2377262151Sluigi		for (i = priv->np_txqfirst; i < priv->np_txqlast; i++) {
2378262151Sluigi			int found = 0;
2379262151Sluigi
2380227614Sluigi			kring = &na->tx_rings[i];
2381227614Sluigi			if (!want_tx && kring->ring->cur == kring->nr_hwcur)
2382227614Sluigi				continue;
2383262151Sluigi			/* only one thread does txsync */
2384262151Sluigi			if (nm_kr_tryget(kring)) {
2385267282Sluigi				/* either busy or stopped
2386267282Sluigi				 * XXX if the ring is stopped, sleeping would
2387267282Sluigi				 * be better. In current code, however, we only
2388267282Sluigi				 * stop the rings for brief intervals (2014-03-14)
2389267282Sluigi				 */
2390262151Sluigi				if (netmap_verbose)
2391262151Sluigi					RD(2, "%p lost race on txring %d, ok",
2392262151Sluigi					    priv, i);
2393262151Sluigi				continue;
2394227614Sluigi			}
2395262151Sluigi			if (nm_txsync_prologue(kring) >= kring->nkr_num_slots) {
2396262151Sluigi				netmap_ring_reinit(kring);
2397227614Sluigi				revents |= POLLERR;
2398262151Sluigi			} else {
2399262151Sluigi				if (kring->nm_sync(kring, 0))
2400262151Sluigi					revents |= POLLERR;
2401262151Sluigi			}
2402227614Sluigi
2403262151Sluigi			/*
2404262151Sluigi			 * If we found new slots, notify potential
2405262151Sluigi			 * listeners on the same ring.
2406262151Sluigi			 * Since we just did a txsync, look at the copies
2407262151Sluigi			 * of cur,tail in the kring.
2408262151Sluigi			 */
2409262151Sluigi			found = kring->rcur != kring->rtail;
2410262151Sluigi			nm_kr_put(kring);
2411262151Sluigi			if (found) { /* notify other listeners */
2412262151Sluigi				revents |= want_tx;
2413262151Sluigi				want_tx = 0;
2414262151Sluigi				na->nm_notify(na, i, NR_TX, 0);
2415227614Sluigi			}
2416227614Sluigi		}
2417262151Sluigi		if (want_tx && retry_tx && !is_kevent) {
2418262151Sluigi			selrecord(td, check_all_tx ?
2419262151Sluigi			    &na->tx_si : &na->tx_rings[priv->np_txqfirst].si);
2420262151Sluigi			retry_tx = 0;
2421262151Sluigi			goto flush_tx;
2422262151Sluigi		}
2423227614Sluigi	}
2424227614Sluigi
2425227614Sluigi	/*
2426262151Sluigi	 * If want_rx is still set scan receive rings.
2427227614Sluigi	 * Do it on all rings because otherwise we starve.
2428227614Sluigi	 */
2429227614Sluigi	if (want_rx) {
2430262151Sluigi		int send_down = 0; /* transparent mode */
2431267282Sluigi		/* two rounds here for race avoidance */
2432262151Sluigido_retry_rx:
2433262151Sluigi		for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) {
2434262151Sluigi			int found = 0;
2435262151Sluigi
2436227614Sluigi			kring = &na->rx_rings[i];
2437262151Sluigi
2438262151Sluigi			if (nm_kr_tryget(kring)) {
2439262151Sluigi				if (netmap_verbose)
2440262151Sluigi					RD(2, "%p lost race on rxring %d, ok",
2441262151Sluigi					    priv, i);
2442262151Sluigi				continue;
2443227614Sluigi			}
2444262151Sluigi
2445262151Sluigi			/*
2446262151Sluigi			 * transparent mode support: collect packets
2447262151Sluigi			 * from the rxring(s).
2448270252Sluigi			 * XXX NR_FORWARD should only be read on
2449270252Sluigi			 * physical or NIC ports
2450262151Sluigi			 */
2451245836Sluigi			if (netmap_fwd ||kring->ring->flags & NR_FORWARD) {
2452245836Sluigi				ND(10, "forwarding some buffers up %d to %d",
2453245836Sluigi				    kring->nr_hwcur, kring->ring->cur);
2454245836Sluigi				netmap_grab_packets(kring, &q, netmap_fwd);
2455245836Sluigi			}
2456227614Sluigi
2457262151Sluigi			if (kring->nm_sync(kring, 0))
2458227614Sluigi				revents |= POLLERR;
2459231198Sluigi			if (netmap_no_timestamp == 0 ||
2460231198Sluigi					kring->ring->flags & NR_TIMESTAMP) {
2461227614Sluigi				microtime(&kring->ring->ts);
2462231198Sluigi			}
2463262151Sluigi			/* after an rxsync we can use kring->rcur, rtail */
2464262151Sluigi			found = kring->rcur != kring->rtail;
2465262151Sluigi			nm_kr_put(kring);
2466262151Sluigi			if (found) {
2467227614Sluigi				revents |= want_rx;
2468262151Sluigi				retry_rx = 0;
2469262151Sluigi				na->nm_notify(na, i, NR_RX, 0);
2470262151Sluigi			}
2471227614Sluigi		}
2472245836Sluigi
2473262151Sluigi		/* transparent mode XXX only during first pass ? */
2474262151Sluigi		if (na->na_flags & NAF_HOST_RINGS) {
2475262151Sluigi			kring = &na->rx_rings[na->num_rx_rings];
2476270252Sluigi			if (check_all_rx
2477270252Sluigi			    && (netmap_fwd || kring->ring->flags & NR_FORWARD)) {
2478270252Sluigi				/* XXX fix to use kring fields */
2479270252Sluigi				if (nm_ring_empty(kring->ring))
2480270252Sluigi					send_down = netmap_rxsync_from_host(na, td, dev);
2481270252Sluigi				if (!nm_ring_empty(kring->ring))
2482270252Sluigi					revents |= want_rx;
2483262151Sluigi			}
2484245836Sluigi		}
2485262151Sluigi
2486262151Sluigi		if (retry_rx && !is_kevent)
2487262151Sluigi			selrecord(td, check_all_rx ?
2488262151Sluigi			    &na->rx_si : &na->rx_rings[priv->np_rxqfirst].si);
2489262151Sluigi		if (send_down > 0 || retry_rx) {
2490262151Sluigi			retry_rx = 0;
2491262151Sluigi			if (send_down)
2492262151Sluigi				goto flush_tx; /* and retry_rx */
2493262151Sluigi			else
2494262151Sluigi				goto do_retry_rx;
2495262151Sluigi		}
2496245836Sluigi	}
2497245836Sluigi
2498262151Sluigi	/*
2499262151Sluigi	 * Transparent mode: marked bufs on rx rings between
2500262151Sluigi	 * kring->nr_hwcur and ring->head
2501262151Sluigi	 * are passed to the other endpoint.
2502267282Sluigi	 *
2503262151Sluigi	 * In this mode we also scan the sw rxring, which in
2504262151Sluigi	 * turn passes packets up.
2505262151Sluigi	 *
2506262151Sluigi	 * XXX Transparent mode at the moment requires to bind all
2507262151Sluigi 	 * rings to a single file descriptor.
2508262151Sluigi	 */
2509262151Sluigi
2510270252Sluigi	if (q.head && na->ifp != NULL)
2511262151Sluigi		netmap_send_up(na->ifp, &q);
2512227614Sluigi
2513227614Sluigi	return (revents);
2514227614Sluigi}
2515227614Sluigi
2516227614Sluigi
2517262151Sluigi/*-------------------- driver support routines -------------------*/
2518251139Sluigi
2519262151Sluigistatic int netmap_hw_krings_create(struct netmap_adapter *);
2520262151Sluigi
2521267282Sluigi/* default notify callback */
2522262151Sluigistatic int
2523262151Sluiginetmap_notify(struct netmap_adapter *na, u_int n_ring,
2524262151Sluigi	enum txrx tx, int flags)
2525231594Sluigi{
2526262151Sluigi	struct netmap_kring *kring;
2527231594Sluigi
2528262151Sluigi	if (tx == NR_TX) {
2529262151Sluigi		kring = na->tx_rings + n_ring;
2530262151Sluigi		OS_selwakeup(&kring->si, PI_NET);
2531267282Sluigi		/* optimization: avoid a wake up on the global
2532267282Sluigi		 * queue if nobody has registered for more
2533267282Sluigi		 * than one ring
2534267282Sluigi		 */
2535262151Sluigi		if (na->tx_si_users > 0)
2536262151Sluigi			OS_selwakeup(&na->tx_si, PI_NET);
2537262151Sluigi	} else {
2538262151Sluigi		kring = na->rx_rings + n_ring;
2539262151Sluigi		OS_selwakeup(&kring->si, PI_NET);
2540267282Sluigi		/* optimization: same as above */
2541262151Sluigi		if (na->rx_si_users > 0)
2542262151Sluigi			OS_selwakeup(&na->rx_si, PI_NET);
2543262151Sluigi	}
2544262151Sluigi	return 0;
2545262151Sluigi}
2546231594Sluigi
2547231594Sluigi
2548267282Sluigi/* called by all routines that create netmap_adapters.
2549267282Sluigi * Attach na to the ifp (if any) and provide defaults
2550267282Sluigi * for optional callbacks. Defaults assume that we
2551267282Sluigi * are creating an hardware netmap_adapter.
2552267282Sluigi */
2553262151Sluigiint
2554262151Sluiginetmap_attach_common(struct netmap_adapter *na)
2555262151Sluigi{
2556262151Sluigi	struct ifnet *ifp = na->ifp;
2557231594Sluigi
2558262151Sluigi	if (na->num_tx_rings == 0 || na->num_rx_rings == 0) {
2559262151Sluigi		D("%s: invalid rings tx %d rx %d",
2560270252Sluigi			na->name, na->num_tx_rings, na->num_rx_rings);
2561262151Sluigi		return EINVAL;
2562262151Sluigi	}
2563270252Sluigi	/* ifp is NULL for virtual adapters (bwrap, non-persistent VALE ports,
2564270252Sluigi	 * pipes, monitors). For bwrap we actually have a non-null ifp for
2565270252Sluigi	 * use by the external modules, but that is set after this
2566270252Sluigi	 * function has been called.
2567270252Sluigi	 * XXX this is ugly, maybe split this function in two (2014-03-14)
2568270252Sluigi	 */
2569270252Sluigi	if (ifp != NULL) {
2570270252Sluigi		WNA(ifp) = na;
2571231594Sluigi
2572262151Sluigi	/* the following is only needed for na that use the host port.
2573262151Sluigi	 * XXX do we have something similar for linux ?
2574262151Sluigi	 */
2575262151Sluigi#ifdef __FreeBSD__
2576270252Sluigi		na->if_input = ifp->if_input; /* for netmap_send_up */
2577262151Sluigi#endif /* __FreeBSD__ */
2578231594Sluigi
2579270252Sluigi		NETMAP_SET_CAPABLE(ifp);
2580270252Sluigi	}
2581262151Sluigi	if (na->nm_krings_create == NULL) {
2582267282Sluigi		/* we assume that we have been called by a driver,
2583267282Sluigi		 * since other port types all provide their own
2584267282Sluigi		 * nm_krings_create
2585267282Sluigi		 */
2586262151Sluigi		na->nm_krings_create = netmap_hw_krings_create;
2587262151Sluigi		na->nm_krings_delete = netmap_hw_krings_delete;
2588262151Sluigi	}
2589262151Sluigi	if (na->nm_notify == NULL)
2590262151Sluigi		na->nm_notify = netmap_notify;
2591262151Sluigi	na->active_fds = 0;
2592231594Sluigi
2593262151Sluigi	if (na->nm_mem == NULL)
2594270252Sluigi		/* use the global allocator */
2595262151Sluigi		na->nm_mem = &nm_mem;
2596270252Sluigi	if (na->nm_bdg_attach == NULL)
2597270252Sluigi		/* no special nm_bdg_attach callback. On VALE
2598270252Sluigi		 * attach, we need to interpose a bwrap
2599270252Sluigi		 */
2600270252Sluigi		na->nm_bdg_attach = netmap_bwrap_attach;
2601262151Sluigi	return 0;
2602262151Sluigi}
2603262151Sluigi
2604262151Sluigi
2605267282Sluigi/* standard cleanup, called by all destructors */
2606262151Sluigivoid
2607262151Sluiginetmap_detach_common(struct netmap_adapter *na)
2608262151Sluigi{
2609267282Sluigi	if (na->ifp != NULL)
2610262151Sluigi		WNA(na->ifp) = NULL; /* XXX do we need this? */
2611262151Sluigi
2612262151Sluigi	if (na->tx_rings) { /* XXX should not happen */
2613262151Sluigi		D("freeing leftover tx_rings");
2614262151Sluigi		na->nm_krings_delete(na);
2615231594Sluigi	}
2616262151Sluigi	netmap_pipe_dealloc(na);
2617262151Sluigi	if (na->na_flags & NAF_MEM_OWNER)
2618262151Sluigi		netmap_mem_private_delete(na->nm_mem);
2619262151Sluigi	bzero(na, sizeof(*na));
2620262151Sluigi	free(na, M_DEVBUF);
2621231594Sluigi}
2622231594Sluigi
2623270252Sluigi/* Wrapper for the register callback provided hardware drivers.
2624270252Sluigi * na->ifp == NULL means the the driver module has been
2625270252Sluigi * unloaded, so we cannot call into it.
2626270252Sluigi * Note that module unloading, in our patched linux drivers,
2627270252Sluigi * happens under NMG_LOCK and after having stopped all the
2628270252Sluigi * nic rings (see netmap_detach). This provides sufficient
2629270252Sluigi * protection for the other driver-provied callbacks
2630270252Sluigi * (i.e., nm_config and nm_*xsync), that therefore don't need
2631270252Sluigi * to wrapped.
2632270252Sluigi */
2633270252Sluigistatic int
2634270252Sluiginetmap_hw_register(struct netmap_adapter *na, int onoff)
2635270252Sluigi{
2636270252Sluigi	struct netmap_hw_adapter *hwna =
2637270252Sluigi		(struct netmap_hw_adapter*)na;
2638231594Sluigi
2639270252Sluigi	if (na->ifp == NULL)
2640270252Sluigi		return onoff ? ENXIO : 0;
2641270252Sluigi
2642270252Sluigi	return hwna->nm_hw_register(na, onoff);
2643270252Sluigi}
2644270252Sluigi
2645270252Sluigi
2646231594Sluigi/*
2647227614Sluigi * Initialize a ``netmap_adapter`` object created by driver on attach.
2648227614Sluigi * We allocate a block of memory with room for a struct netmap_adapter
2649227614Sluigi * plus two sets of N+2 struct netmap_kring (where N is the number
2650227614Sluigi * of hardware rings):
2651227614Sluigi * krings	0..N-1	are for the hardware queues.
2652227614Sluigi * kring	N	is for the host stack queue
2653262151Sluigi * kring	N+1	is only used for the selinfo for all queues. // XXX still true ?
2654227614Sluigi * Return 0 on success, ENOMEM otherwise.
2655227614Sluigi */
2656227614Sluigiint
2657262151Sluiginetmap_attach(struct netmap_adapter *arg)
2658227614Sluigi{
2659262151Sluigi	struct netmap_hw_adapter *hwna = NULL;
2660262151Sluigi	// XXX when is arg == NULL ?
2661245835Sluigi	struct ifnet *ifp = arg ? arg->ifp : NULL;
2662227614Sluigi
2663245835Sluigi	if (arg == NULL || ifp == NULL)
2664245835Sluigi		goto fail;
2665262151Sluigi	hwna = malloc(sizeof(*hwna), M_DEVBUF, M_NOWAIT | M_ZERO);
2666262151Sluigi	if (hwna == NULL)
2667245835Sluigi		goto fail;
2668262151Sluigi	hwna->up = *arg;
2669262151Sluigi	hwna->up.na_flags |= NAF_HOST_RINGS;
2670270252Sluigi	strncpy(hwna->up.name, ifp->if_xname, sizeof(hwna->up.name));
2671270252Sluigi	hwna->nm_hw_register = hwna->up.nm_register;
2672270252Sluigi	hwna->up.nm_register = netmap_hw_register;
2673262151Sluigi	if (netmap_attach_common(&hwna->up)) {
2674262151Sluigi		free(hwna, M_DEVBUF);
2675262151Sluigi		goto fail;
2676227614Sluigi	}
2677262151Sluigi	netmap_adapter_get(&hwna->up);
2678262151Sluigi
2679232238Sluigi#ifdef linux
2680251139Sluigi	if (ifp->netdev_ops) {
2681251139Sluigi		/* prepare a clone of the netdev ops */
2682251139Sluigi#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28)
2683262151Sluigi		hwna->nm_ndo.ndo_start_xmit = ifp->netdev_ops;
2684251139Sluigi#else
2685262151Sluigi		hwna->nm_ndo = *ifp->netdev_ops;
2686251139Sluigi#endif
2687238812Sluigi	}
2688262151Sluigi	hwna->nm_ndo.ndo_start_xmit = linux_netmap_start_xmit;
2689270252Sluigi	if (ifp->ethtool_ops) {
2690270252Sluigi		hwna->nm_eto = *ifp->ethtool_ops;
2691270252Sluigi	}
2692270252Sluigi	hwna->nm_eto.set_ringparam = linux_netmap_set_ringparam;
2693270252Sluigi#ifdef ETHTOOL_SCHANNELS
2694270252Sluigi	hwna->nm_eto.set_channels = linux_netmap_set_channels;
2695270252Sluigi#endif
2696270252Sluigi	if (arg->nm_config == NULL) {
2697270252Sluigi		hwna->up.nm_config = netmap_linux_config;
2698270252Sluigi	}
2699262151Sluigi#endif /* linux */
2700262151Sluigi
2701267282Sluigi	D("success for %s tx %d/%d rx %d/%d queues/slots",
2702270252Sluigi		hwna->up.name,
2703267282Sluigi		hwna->up.num_tx_rings, hwna->up.num_tx_desc,
2704267282Sluigi		hwna->up.num_rx_rings, hwna->up.num_rx_desc
2705267282Sluigi		);
2706245835Sluigi	return 0;
2707227614Sluigi
2708245835Sluigifail:
2709262151Sluigi	D("fail, arg %p ifp %p na %p", arg, ifp, hwna);
2710267282Sluigi	if (ifp)
2711267282Sluigi		netmap_detach(ifp);
2712262151Sluigi	return (hwna ? EINVAL : ENOMEM);
2713227614Sluigi}
2714227614Sluigi
2715227614Sluigi
2716227614Sluigivoid
2717262151SluigiNM_DBG(netmap_adapter_get)(struct netmap_adapter *na)
2718227614Sluigi{
2719262151Sluigi	if (!na) {
2720262151Sluigi		return;
2721262151Sluigi	}
2722227614Sluigi
2723262151Sluigi	refcount_acquire(&na->na_refcount);
2724262151Sluigi}
2725262151Sluigi
2726262151Sluigi
2727262151Sluigi/* returns 1 iff the netmap_adapter is destroyed */
2728262151Sluigiint
2729262151SluigiNM_DBG(netmap_adapter_put)(struct netmap_adapter *na)
2730262151Sluigi{
2731227614Sluigi	if (!na)
2732262151Sluigi		return 1;
2733227614Sluigi
2734262151Sluigi	if (!refcount_release(&na->na_refcount))
2735262151Sluigi		return 0;
2736239149Semaste
2737262151Sluigi	if (na->nm_dtor)
2738262151Sluigi		na->nm_dtor(na);
2739262151Sluigi
2740262151Sluigi	netmap_detach_common(na);
2741262151Sluigi
2742262151Sluigi	return 1;
2743262151Sluigi}
2744262151Sluigi
2745267282Sluigi/* nm_krings_create callback for all hardware native adapters */
2746262151Sluigiint
2747262151Sluiginetmap_hw_krings_create(struct netmap_adapter *na)
2748262151Sluigi{
2749262151Sluigi	int ret = netmap_krings_create(na, 0);
2750262151Sluigi	if (ret == 0) {
2751262151Sluigi		/* initialize the mbq for the sw rx ring */
2752262151Sluigi		mbq_safe_init(&na->rx_rings[na->num_rx_rings].rx_queue);
2753262151Sluigi		ND("initialized sw rx queue %d", na->num_rx_rings);
2754245835Sluigi	}
2755262151Sluigi	return ret;
2756227614Sluigi}
2757227614Sluigi
2758227614Sluigi
2759251139Sluigi
2760262151Sluigi/*
2761267282Sluigi * Called on module unload by the netmap-enabled drivers
2762262151Sluigi */
2763262151Sluigivoid
2764262151Sluiginetmap_detach(struct ifnet *ifp)
2765251139Sluigi{
2766262151Sluigi	struct netmap_adapter *na = NA(ifp);
2767251139Sluigi
2768262151Sluigi	if (!na)
2769262151Sluigi		return;
2770251139Sluigi
2771262151Sluigi	NMG_LOCK();
2772262151Sluigi	netmap_disable_all_rings(ifp);
2773262151Sluigi	if (!netmap_adapter_put(na)) {
2774262151Sluigi		/* someone is still using the adapter,
2775262151Sluigi		 * tell them that the interface is gone
2776262151Sluigi		 */
2777262151Sluigi		na->ifp = NULL;
2778270252Sluigi		// XXX also clear NAF_NATIVE_ON ?
2779270252Sluigi		na->na_flags &= ~NAF_NETMAP_ON;
2780262151Sluigi		/* give them a chance to notice */
2781262151Sluigi		netmap_enable_all_rings(ifp);
2782262151Sluigi	}
2783262151Sluigi	NMG_UNLOCK();
2784251139Sluigi}
2785251139Sluigi
2786251139Sluigi
2787227614Sluigi/*
2788228280Sluigi * Intercept packets from the network stack and pass them
2789228280Sluigi * to netmap as incoming packets on the 'software' ring.
2790262151Sluigi *
2791262151Sluigi * We only store packets in a bounded mbq and then copy them
2792262151Sluigi * in the relevant rxsync routine.
2793262151Sluigi *
2794262151Sluigi * We rely on the OS to make sure that the ifp and na do not go
2795262151Sluigi * away (typically the caller checks for IFF_DRV_RUNNING or the like).
2796262151Sluigi * In nm_register() or whenever there is a reinitialization,
2797262151Sluigi * we make sure to make the mode change visible here.
2798227614Sluigi */
2799227614Sluigiint
2800262151Sluiginetmap_transmit(struct ifnet *ifp, struct mbuf *m)
2801227614Sluigi{
2802227614Sluigi	struct netmap_adapter *na = NA(ifp);
2803262151Sluigi	struct netmap_kring *kring;
2804262151Sluigi	u_int len = MBUF_LEN(m);
2805262151Sluigi	u_int error = ENOBUFS;
2806262151Sluigi	struct mbq *q;
2807262151Sluigi	int space;
2808227614Sluigi
2809262151Sluigi	// XXX [Linux] we do not need this lock
2810262151Sluigi	// if we follow the down/configure/up protocol -gl
2811262151Sluigi	// mtx_lock(&na->core_lock);
2812262151Sluigi
2813270252Sluigi	if (!nm_netmap_on(na)) {
2814270252Sluigi		D("%s not in netmap mode anymore", na->name);
2815262151Sluigi		error = ENXIO;
2816262151Sluigi		goto done;
2817250107Sluigi	}
2818251139Sluigi
2819262151Sluigi	kring = &na->rx_rings[na->num_rx_rings];
2820262151Sluigi	q = &kring->rx_queue;
2821262151Sluigi
2822262151Sluigi	// XXX reconsider long packets if we handle fragments
2823270252Sluigi	if (len > NETMAP_BUF_SIZE(na)) { /* too long for us */
2824270252Sluigi		D("%s from_host, drop packet size %d > %d", na->name,
2825270252Sluigi			len, NETMAP_BUF_SIZE(na));
2826262151Sluigi		goto done;
2827227614Sluigi	}
2828227614Sluigi
2829262151Sluigi	/* protect against rxsync_from_host(), netmap_sw_to_nic()
2830262151Sluigi	 * and maybe other instances of netmap_transmit (the latter
2831262151Sluigi	 * not possible on Linux).
2832262151Sluigi	 * Also avoid overflowing the queue.
2833262151Sluigi	 */
2834267282Sluigi	mbq_lock(q);
2835262151Sluigi
2836262151Sluigi        space = kring->nr_hwtail - kring->nr_hwcur;
2837262151Sluigi        if (space < 0)
2838262151Sluigi                space += kring->nkr_num_slots;
2839262151Sluigi	if (space + mbq_len(q) >= kring->nkr_num_slots - 1) { // XXX
2840262151Sluigi		RD(10, "%s full hwcur %d hwtail %d qlen %d len %d m %p",
2841270252Sluigi			na->name, kring->nr_hwcur, kring->nr_hwtail, mbq_len(q),
2842262151Sluigi			len, m);
2843262151Sluigi	} else {
2844262151Sluigi		mbq_enqueue(q, m);
2845262151Sluigi		ND(10, "%s %d bufs in queue len %d m %p",
2846270252Sluigi			na->name, mbq_len(q), len, m);
2847262151Sluigi		/* notify outside the lock */
2848262151Sluigi		m = NULL;
2849262151Sluigi		error = 0;
2850262151Sluigi	}
2851267282Sluigi	mbq_unlock(q);
2852262151Sluigi
2853227614Sluigidone:
2854262151Sluigi	if (m)
2855262151Sluigi		m_freem(m);
2856262151Sluigi	/* unconditionally wake up listeners */
2857262151Sluigi	na->nm_notify(na, na->num_rx_rings, NR_RX, 0);
2858267282Sluigi	/* this is normally netmap_notify(), but for nics
2859267282Sluigi	 * connected to a bridge it is netmap_bwrap_intr_notify(),
2860267282Sluigi	 * that possibly forwards the frames through the switch
2861267282Sluigi	 */
2862227614Sluigi
2863227614Sluigi	return (error);
2864227614Sluigi}
2865227614Sluigi
2866227614Sluigi
2867227614Sluigi/*
2868227614Sluigi * netmap_reset() is called by the driver routines when reinitializing
2869227614Sluigi * a ring. The driver is in charge of locking to protect the kring.
2870262151Sluigi * If native netmap mode is not set just return NULL.
2871227614Sluigi */
2872227614Sluigistruct netmap_slot *
2873262151Sluiginetmap_reset(struct netmap_adapter *na, enum txrx tx, u_int n,
2874227614Sluigi	u_int new_cur)
2875227614Sluigi{
2876227614Sluigi	struct netmap_kring *kring;
2877228276Sluigi	int new_hwofs, lim;
2878227614Sluigi
2879270252Sluigi	if (!nm_native_on(na)) {
2880270252Sluigi		ND("interface not in native netmap mode");
2881227614Sluigi		return NULL;	/* nothing to reinitialize */
2882262151Sluigi	}
2883227614Sluigi
2884262151Sluigi	/* XXX note- in the new scheme, we are not guaranteed to be
2885262151Sluigi	 * under lock (e.g. when called on a device reset).
2886262151Sluigi	 * In this case, we should set a flag and do not trust too
2887262151Sluigi	 * much the values. In practice: TODO
2888262151Sluigi	 * - set a RESET flag somewhere in the kring
2889262151Sluigi	 * - do the processing in a conservative way
2890262151Sluigi	 * - let the *sync() fixup at the end.
2891262151Sluigi	 */
2892232238Sluigi	if (tx == NR_TX) {
2893241719Sluigi		if (n >= na->num_tx_rings)
2894241719Sluigi			return NULL;
2895232238Sluigi		kring = na->tx_rings + n;
2896262151Sluigi		// XXX check whether we should use hwcur or rcur
2897228276Sluigi		new_hwofs = kring->nr_hwcur - new_cur;
2898232238Sluigi	} else {
2899241719Sluigi		if (n >= na->num_rx_rings)
2900241719Sluigi			return NULL;
2901232238Sluigi		kring = na->rx_rings + n;
2902262151Sluigi		new_hwofs = kring->nr_hwtail - new_cur;
2903232238Sluigi	}
2904232238Sluigi	lim = kring->nkr_num_slots - 1;
2905228276Sluigi	if (new_hwofs > lim)
2906228276Sluigi		new_hwofs -= lim + 1;
2907228276Sluigi
2908262151Sluigi	/* Always set the new offset value and realign the ring. */
2909262151Sluigi	if (netmap_verbose)
2910262151Sluigi	    D("%s %s%d hwofs %d -> %d, hwtail %d -> %d",
2911270252Sluigi		na->name,
2912262151Sluigi		tx == NR_TX ? "TX" : "RX", n,
2913262151Sluigi		kring->nkr_hwofs, new_hwofs,
2914262151Sluigi		kring->nr_hwtail,
2915262151Sluigi		tx == NR_TX ? lim : kring->nr_hwtail);
2916228276Sluigi	kring->nkr_hwofs = new_hwofs;
2917262151Sluigi	if (tx == NR_TX) {
2918262151Sluigi		kring->nr_hwtail = kring->nr_hwcur + lim;
2919262151Sluigi		if (kring->nr_hwtail > lim)
2920262151Sluigi			kring->nr_hwtail -= lim + 1;
2921262151Sluigi	}
2922228276Sluigi
2923238812Sluigi#if 0 // def linux
2924238812Sluigi	/* XXX check that the mappings are correct */
2925238812Sluigi	/* need ring_nr, adapter->pdev, direction */
2926238812Sluigi	buffer_info->dma = dma_map_single(&pdev->dev, addr, adapter->rx_buffer_len, DMA_FROM_DEVICE);
2927238812Sluigi	if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) {
2928238812Sluigi		D("error mapping rx netmap buffer %d", i);
2929238812Sluigi		// XXX fix error handling
2930238812Sluigi	}
2931238812Sluigi
2932238812Sluigi#endif /* linux */
2933227614Sluigi	/*
2934262151Sluigi	 * Wakeup on the individual and global selwait
2935228276Sluigi	 * We do the wakeup here, but the ring is not yet reconfigured.
2936228276Sluigi	 * However, we are under lock so there are no races.
2937227614Sluigi	 */
2938262151Sluigi	na->nm_notify(na, n, tx, 0);
2939227614Sluigi	return kring->ring->slot;
2940227614Sluigi}
2941227614Sluigi
2942227614Sluigi
2943227614Sluigi/*
2944262151Sluigi * Dispatch rx/tx interrupts to the netmap rings.
2945262151Sluigi *
2946262151Sluigi * "work_done" is non-null on the RX path, NULL for the TX path.
2947262151Sluigi * We rely on the OS to make sure that there is only one active
2948262151Sluigi * instance per queue, and that there is appropriate locking.
2949262151Sluigi *
2950262151Sluigi * The 'notify' routine depends on what the ring is attached to.
2951262151Sluigi * - for a netmap file descriptor, do a selwakeup on the individual
2952262151Sluigi *   waitqueue, plus one on the global one if needed
2953270252Sluigi *   (see netmap_notify)
2954270252Sluigi * - for a nic connected to a switch, call the proper forwarding routine
2955270252Sluigi *   (see netmap_bwrap_intr_notify)
2956251139Sluigi */
2957262151Sluigivoid
2958262151Sluiginetmap_common_irq(struct ifnet *ifp, u_int q, u_int *work_done)
2959251139Sluigi{
2960251139Sluigi	struct netmap_adapter *na = NA(ifp);
2961262151Sluigi	struct netmap_kring *kring;
2962251139Sluigi
2963262151Sluigi	q &= NETMAP_RING_MASK;
2964262151Sluigi
2965262151Sluigi	if (netmap_verbose) {
2966262151Sluigi	        RD(5, "received %s queue %d", work_done ? "RX" : "TX" , q);
2967251139Sluigi	}
2968251139Sluigi
2969262151Sluigi	if (work_done) { /* RX path */
2970262151Sluigi		if (q >= na->num_rx_rings)
2971262151Sluigi			return;	// not a physical queue
2972262151Sluigi		kring = na->rx_rings + q;
2973262151Sluigi		kring->nr_kflags |= NKR_PENDINTR;	// XXX atomic ?
2974262151Sluigi		na->nm_notify(na, q, NR_RX, 0);
2975262151Sluigi		*work_done = 1; /* do not fire napi again */
2976262151Sluigi	} else { /* TX path */
2977262151Sluigi		if (q >= na->num_tx_rings)
2978262151Sluigi			return;	// not a physical queue
2979262151Sluigi		kring = na->tx_rings + q;
2980262151Sluigi		na->nm_notify(na, q, NR_TX, 0);
2981262151Sluigi	}
2982251139Sluigi}
2983251139Sluigi
2984251139Sluigi
2985251139Sluigi/*
2986262151Sluigi * Default functions to handle rx/tx interrupts from a physical device.
2987262151Sluigi * "work_done" is non-null on the RX path, NULL for the TX path.
2988250107Sluigi *
2989262151Sluigi * If the card is not in netmap mode, simply return 0,
2990262151Sluigi * so that the caller proceeds with regular processing.
2991262151Sluigi * Otherwise call netmap_common_irq() and return 1.
2992262151Sluigi *
2993262151Sluigi * If the card is connected to a netmap file descriptor,
2994262151Sluigi * do a selwakeup on the individual queue, plus one on the global one
2995262151Sluigi * if needed (multiqueue card _and_ there are multiqueue listeners),
2996262151Sluigi * and return 1.
2997262151Sluigi *
2998262151Sluigi * Finally, if called on rx from an interface connected to a switch,
2999262151Sluigi * calls the proper forwarding routine, and return 1.
3000231594Sluigi */
3001231881Sluigiint
3002262151Sluiginetmap_rx_irq(struct ifnet *ifp, u_int q, u_int *work_done)
3003231594Sluigi{
3004270252Sluigi	struct netmap_adapter *na = NA(ifp);
3005270252Sluigi
3006270252Sluigi	/*
3007270252Sluigi	 * XXX emulated netmap mode sets NAF_SKIP_INTR so
3008270252Sluigi	 * we still use the regular driver even though the previous
3009270252Sluigi	 * check fails. It is unclear whether we should use
3010270252Sluigi	 * nm_native_on() here.
3011270252Sluigi	 */
3012270252Sluigi	if (!nm_netmap_on(na))
3013231594Sluigi		return 0;
3014250107Sluigi
3015270252Sluigi	if (na->na_flags & NAF_SKIP_INTR) {
3016241719Sluigi		ND("use regular interrupt");
3017241719Sluigi		return 0;
3018241719Sluigi	}
3019241719Sluigi
3020262151Sluigi	netmap_common_irq(ifp, q, work_done);
3021231594Sluigi	return 1;
3022231594Sluigi}
3023231594Sluigi
3024232238Sluigi
3025238837Sluigi/*
3026262151Sluigi * Module loader and unloader
3027238812Sluigi *
3028262151Sluigi * netmap_init() creates the /dev/netmap device and initializes
3029262151Sluigi * all global variables. Returns 0 on success, errno on failure
3030262151Sluigi * (but there is no chance)
3031262151Sluigi *
3032262151Sluigi * netmap_fini() destroys everything.
3033238812Sluigi */
3034238812Sluigi
3035262151Sluigistatic struct cdev *netmap_dev; /* /dev/netmap character device. */
3036262151Sluigiextern struct cdevsw netmap_cdevsw;
3037238812Sluigi
3038238812Sluigi
3039262151Sluigivoid
3040262151Sluiginetmap_fini(void)
3041238812Sluigi{
3042262151Sluigi	// XXX destroy_bridges() ?
3043262151Sluigi	if (netmap_dev)
3044262151Sluigi		destroy_dev(netmap_dev);
3045262151Sluigi	netmap_mem_fini();
3046262151Sluigi	NMG_LOCK_DESTROY();
3047262151Sluigi	printf("netmap: unloaded module.\n");
3048238812Sluigi}
3049238812Sluigi
3050238812Sluigi
3051251139Sluigiint
3052227614Sluiginetmap_init(void)
3053227614Sluigi{
3054227614Sluigi	int error;
3055227614Sluigi
3056262151Sluigi	NMG_LOCK_INIT();
3057262151Sluigi
3058262151Sluigi	error = netmap_mem_init();
3059262151Sluigi	if (error != 0)
3060262151Sluigi		goto fail;
3061262151Sluigi	/* XXX could use make_dev_credv() to get error number */
3062227614Sluigi	netmap_dev = make_dev(&netmap_cdevsw, 0, UID_ROOT, GID_WHEEL, 0660,
3063227614Sluigi			      "netmap");
3064262151Sluigi	if (!netmap_dev)
3065262151Sluigi		goto fail;
3066238812Sluigi
3067262151Sluigi	netmap_init_bridges();
3068270252Sluigi#ifdef __FreeBSD__
3069270252Sluigi	nm_vi_init_index();
3070270252Sluigi#endif
3071262151Sluigi	printf("netmap: loaded module\n");
3072262151Sluigi	return (0);
3073262151Sluigifail:
3074262151Sluigi	netmap_fini();
3075262151Sluigi	return (EINVAL); /* may be incorrect */
3076227614Sluigi}
3077