1227614Sluigi/* 2260368Sluigi * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. 3259412Sluigi * 4227614Sluigi * Redistribution and use in source and binary forms, with or without 5259412Sluigi * modification, are permitted provided that the following conditions 6259412Sluigi * are met: 7259412Sluigi * 8227614Sluigi * 1. Redistributions of source code must retain the above copyright 9227614Sluigi * notice, this list of conditions and the following disclaimer. 10227614Sluigi * 2. Redistributions in binary form must reproduce the above copyright 11227614Sluigi * notice, this list of conditions and the following disclaimer in the 12259412Sluigi * documentation and/or other materials provided with the distribution. 13259412Sluigi * 14259412Sluigi * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``S IS''AND 15227614Sluigi * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16259412Sluigi * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17259412Sluigi * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18259412Sluigi * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19259412Sluigi * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20259412Sluigi * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21259412Sluigi * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22259412Sluigi * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23259412Sluigi * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24259412Sluigi * SUCH DAMAGE. 25227614Sluigi */ 26227614Sluigi 27227614Sluigi/* 28227614Sluigi * $FreeBSD: stable/11/sys/net/netmap.h 343866 2019-02-07 10:44:53Z vmaffione $ 29227614Sluigi * 30232238Sluigi * Definitions of constants and the structures used by the netmap 31232238Sluigi * framework, for the part visible to both kernel and userspace. 32232238Sluigi * Detailed info on netmap is available with "man netmap" or at 33259412Sluigi * 34232238Sluigi * http://info.iet.unipi.it/~luigi/netmap/ 35257529Sluigi * 36257529Sluigi * This API is also used to communicate with the VALE software switch 37227614Sluigi */ 38227614Sluigi 39227614Sluigi#ifndef _NET_NETMAP_H_ 40227614Sluigi#define _NET_NETMAP_H_ 41227614Sluigi 42342033Svmaffione#define NETMAP_API 13 /* current API version */ 43260368Sluigi 44342033Svmaffione#define NETMAP_MIN_API 13 /* min and max versions accepted */ 45261909Sluigi#define NETMAP_MAX_API 15 46227614Sluigi/* 47260368Sluigi * Some fields should be cache-aligned to reduce contention. 48260368Sluigi * The alignment is architecture and OS dependent, but rather than 49260368Sluigi * digging into OS headers to find the exact value we use an estimate 50260368Sluigi * that should cover most architectures. 51260368Sluigi */ 52260368Sluigi#define NM_CACHE_ALIGN 128 53260368Sluigi 54260368Sluigi/* 55227614Sluigi * --- Netmap data structures --- 56227614Sluigi * 57257529Sluigi * The userspace data structures used by netmap are shown below. 58257529Sluigi * They are allocated by the kernel and mmap()ed by userspace threads. 59257529Sluigi * Pointers are implemented as memory offsets or indexes, 60257529Sluigi * so that they can be easily dereferenced in kernel and userspace. 61227614Sluigi 62257529Sluigi KERNEL (opaque, obviously) 63227614Sluigi 64232238Sluigi ==================================================================== 65232238Sluigi | 66257529Sluigi USERSPACE | struct netmap_ring 67260368Sluigi +---->+---------------+ 68260368Sluigi / | head,cur,tail | 69260368Sluigi struct netmap_if (nifp, 1 per fd) / | buf_ofs | 70260368Sluigi +---------------+ / | other fields | 71260368Sluigi | ni_tx_rings | / +===============+ 72260368Sluigi | ni_rx_rings | / | buf_idx, len | slot[0] 73260368Sluigi | | / | flags, ptr | 74260368Sluigi | | / +---------------+ 75260368Sluigi +===============+ / | buf_idx, len | slot[1] 76260368Sluigi | txring_ofs[0] | (rel.to nifp)--' | flags, ptr | 77260368Sluigi | txring_ofs[1] | +---------------+ 78261909Sluigi (tx+1 entries) (num_slots entries) 79260368Sluigi | txring_ofs[t] | | buf_idx, len | slot[n-1] 80260368Sluigi +---------------+ | flags, ptr | 81260368Sluigi | rxring_ofs[0] | +---------------+ 82227614Sluigi | rxring_ofs[1] | 83261909Sluigi (rx+1 entries) 84257529Sluigi | rxring_ofs[r] | 85227614Sluigi +---------------+ 86227614Sluigi 87261909Sluigi * For each "interface" (NIC, host stack, PIPE, VALE switch port) bound to 88261909Sluigi * a file descriptor, the mmap()ed region contains a (logically readonly) 89257529Sluigi * struct netmap_if pointing to struct netmap_ring's. 90261909Sluigi * 91257529Sluigi * There is one netmap_ring per physical NIC ring, plus one tx/rx ring 92261909Sluigi * pair attached to the host stack (this pair is unused for non-NIC ports). 93257529Sluigi * 94257529Sluigi * All physical/host stack ports share the same memory region, 95257529Sluigi * so that zero-copy can be implemented between them. 96257529Sluigi * VALE switch ports instead have separate memory regions. 97257529Sluigi * 98257529Sluigi * The netmap_ring is the userspace-visible replica of the NIC ring. 99257529Sluigi * Each slot has the index of a buffer (MTU-sized and residing in the 100257529Sluigi * mmapped region), its length and some flags. An extra 64-bit pointer 101257529Sluigi * is provided for user-supplied buffers in the tx path. 102257529Sluigi * 103227614Sluigi * In user space, the buffer address is computed as 104261909Sluigi * (char *)ring + buf_ofs + index * NETMAP_BUF_SIZE 105261909Sluigi * 106261909Sluigi * Added in NETMAP_API 11: 107261909Sluigi * 108261909Sluigi * + NIOCREGIF can request the allocation of extra spare buffers from 109261909Sluigi * the same memory pool. The desired number of buffers must be in 110261909Sluigi * nr_arg3. The ioctl may return fewer buffers, depending on memory 111261909Sluigi * availability. nr_arg3 will return the actual value, and, once 112261909Sluigi * mapped, nifp->ni_bufs_head will be the index of the first buffer. 113261909Sluigi * 114261909Sluigi * The buffers are linked to each other using the first uint32_t 115261909Sluigi * as the index. On close, ni_bufs_head must point to the list of 116261909Sluigi * buffers to be released. 117261909Sluigi * 118261909Sluigi * + NIOCREGIF can request space for extra rings (and buffers) 119261909Sluigi * allocated in the same memory space. The number of extra rings 120261909Sluigi * is in nr_arg1, and is advisory. This is a no-op on NICs where 121261909Sluigi * the size of the memory space is fixed. 122261909Sluigi * 123261909Sluigi * + NIOCREGIF can attach to PIPE rings sharing the same memory 124261909Sluigi * space with a parent device. The ifname indicates the parent device, 125261909Sluigi * which must already exist. Flags in nr_flags indicate if we want to 126261909Sluigi * bind the master or slave side, the index (from nr_ringid) 127267167Sluigi * is just a cookie and does not need to be sequential. 128261909Sluigi * 129261909Sluigi * + NIOCREGIF can also attach to 'monitor' rings that replicate 130261909Sluigi * the content of specific rings, also from the same memory space. 131261909Sluigi * 132261909Sluigi * Extra flags in nr_flags support the above functions. 133261909Sluigi * Application libraries may use the following naming scheme: 134261909Sluigi * netmap:foo all NIC ring pairs 135261909Sluigi * netmap:foo^ only host ring pair 136261909Sluigi * netmap:foo+ all NIC ring + host ring pairs 137261909Sluigi * netmap:foo-k the k-th NIC ring pair 138261909Sluigi * netmap:foo{k PIPE ring pair k, master side 139261909Sluigi * netmap:foo}k PIPE ring pair k, slave side 140341477Svmaffione * 141341477Svmaffione * Some notes about host rings: 142341477Svmaffione * 143341477Svmaffione * + The RX host ring is used to store those packets that the host network 144341477Svmaffione * stack is trying to transmit through a NIC queue, but only if that queue 145341477Svmaffione * is currently in netmap mode. Netmap will not intercept host stack mbufs 146341477Svmaffione * designated to NIC queues that are not in netmap mode. As a consequence, 147341477Svmaffione * registering a netmap port with netmap:foo^ is not enough to intercept 148341477Svmaffione * mbufs in the RX host ring; the netmap port should be registered with 149341477Svmaffione * netmap:foo*, or another registration should be done to open at least a 150341477Svmaffione * NIC TX queue in netmap mode. 151341477Svmaffione * 152341477Svmaffione * + Netmap is not currently able to deal with intercepted trasmit mbufs which 153341477Svmaffione * require offloadings like TSO, UFO, checksumming offloadings, etc. It is 154341477Svmaffione * responsibility of the user to disable those offloadings (e.g. using 155341477Svmaffione * ifconfig on FreeBSD or ethtool -K on Linux) for an interface that is being 156341477Svmaffione * used in netmap mode. If the offloadings are not disabled, GSO and/or 157341477Svmaffione * unchecksummed packets may be dropped immediately or end up in the host RX 158341477Svmaffione * ring, and will be dropped as soon as the packet reaches another netmap 159341477Svmaffione * adapter. 160257529Sluigi */ 161257529Sluigi 162257529Sluigi/* 163257529Sluigi * struct netmap_slot is a buffer descriptor 164227614Sluigi */ 165227614Sluigistruct netmap_slot { 166257529Sluigi uint32_t buf_idx; /* buffer index */ 167260368Sluigi uint16_t len; /* length for this slot */ 168257529Sluigi uint16_t flags; /* buf changed, etc. */ 169260368Sluigi uint64_t ptr; /* pointer for indirect buffers */ 170260368Sluigi}; 171260368Sluigi 172260368Sluigi/* 173260368Sluigi * The following flags control how the slot is used 174260368Sluigi */ 175260368Sluigi 176257529Sluigi#define NS_BUF_CHANGED 0x0001 /* buf_idx changed */ 177260368Sluigi /* 178260368Sluigi * must be set whenever buf_idx is changed (as it might be 179260368Sluigi * necessary to recompute the physical address and mapping) 180285349Sluigi * 181285349Sluigi * It is also set by the kernel whenever the buf_idx is 182285349Sluigi * changed internally (e.g., by pipes). Applications may 183285349Sluigi * use this information to know when they can reuse the 184285349Sluigi * contents of previously prepared buffers. 185260368Sluigi */ 186260368Sluigi 187260368Sluigi#define NS_REPORT 0x0002 /* ask the hardware to report results */ 188260368Sluigi /* 189260368Sluigi * Request notification when slot is used by the hardware. 190260368Sluigi * Normally transmit completions are handled lazily and 191260368Sluigi * may be unreported. This flag lets us know when a slot 192260368Sluigi * has been sent (e.g. to terminate the sender). 193260368Sluigi */ 194260368Sluigi 195260368Sluigi#define NS_FORWARD 0x0004 /* pass packet 'forward' */ 196260368Sluigi /* 197260368Sluigi * (Only for physical ports, rx rings with NR_FORWARD set). 198260368Sluigi * Slot released to the kernel (i.e. before ring->head) with 199260368Sluigi * this flag set are passed to the peer ring (host/NIC), 200260368Sluigi * thus restoring the host-NIC connection for these slots. 201260368Sluigi * This supports efficient traffic monitoring or firewalling. 202260368Sluigi */ 203260368Sluigi 204260368Sluigi#define NS_NO_LEARN 0x0008 /* disable bridge learning */ 205260368Sluigi /* 206260368Sluigi * On a VALE switch, do not 'learn' the source port for 207260368Sluigi * this buffer. 208260368Sluigi */ 209260368Sluigi 210260368Sluigi#define NS_INDIRECT 0x0010 /* userspace buffer */ 211260368Sluigi /* 212260368Sluigi * (VALE tx rings only) data is in a userspace buffer, 213260368Sluigi * whose address is in the 'ptr' field in the slot. 214260368Sluigi */ 215260368Sluigi 216260368Sluigi#define NS_MOREFRAG 0x0020 /* packet has more fragments */ 217260368Sluigi /* 218341477Svmaffione * (VALE ports, ptnetmap ports and some NIC ports, e.g. 219341477Svmaffione * ixgbe and i40e on Linux) 220260368Sluigi * Set on all but the last slot of a multi-segment packet. 221260368Sluigi * The 'len' field refers to the individual fragment. 222260368Sluigi */ 223260368Sluigi 224245834Sluigi#define NS_PORT_SHIFT 8 225245834Sluigi#define NS_PORT_MASK (0xff << NS_PORT_SHIFT) 226260368Sluigi /* 227260368Sluigi * The high 8 bits of the flag, if not zero, indicate the 228260368Sluigi * destination port for the VALE switch, overriding 229260368Sluigi * the lookup table. 230260368Sluigi */ 231260368Sluigi 232257529Sluigi#define NS_RFRAGS(_slot) ( ((_slot)->flags >> 8) & 0xff) 233260368Sluigi /* 234260368Sluigi * (VALE rx rings only) the high 8 bits 235260368Sluigi * are the number of fragments. 236260368Sluigi */ 237227614Sluigi 238341477Svmaffione#define NETMAP_MAX_FRAGS 64 /* max number of fragments */ 239260368Sluigi 240341477Svmaffione 241227614Sluigi/* 242257529Sluigi * struct netmap_ring 243257529Sluigi * 244227614Sluigi * Netmap representation of a TX or RX ring (also known as "queue"). 245227614Sluigi * This is a queue implemented as a fixed-size circular array. 246260368Sluigi * At the software level the important fields are: head, cur, tail. 247227614Sluigi * 248227614Sluigi * In TX rings: 249257529Sluigi * 250260368Sluigi * head first slot available for transmission. 251260368Sluigi * cur wakeup point. select() and poll() will unblock 252260368Sluigi * when 'tail' moves past 'cur' 253260368Sluigi * tail (readonly) first slot reserved to the kernel 254257529Sluigi * 255260368Sluigi * [head .. tail-1] can be used for new packets to send; 256260368Sluigi * 'head' and 'cur' must be incremented as slots are filled 257260368Sluigi * with new packets to be sent; 258260368Sluigi * 'cur' can be moved further ahead if we need more space 259267167Sluigi * for new transmissions. XXX todo (2014-03-12) 260227614Sluigi * 261257529Sluigi * In RX rings: 262227614Sluigi * 263260368Sluigi * head first valid received packet 264260368Sluigi * cur wakeup point. select() and poll() will unblock 265260368Sluigi * when 'tail' moves past 'cur' 266260368Sluigi * tail (readonly) first slot reserved to the kernel 267257529Sluigi * 268260368Sluigi * [head .. tail-1] contain received packets; 269260368Sluigi * 'head' and 'cur' must be incremented as slots are consumed 270260368Sluigi * and can be returned to the kernel; 271260368Sluigi * 'cur' can be moved further ahead if we want to wait for 272260368Sluigi * new packets without returning the previous ones. 273257529Sluigi * 274227614Sluigi * DATA OWNERSHIP/LOCKING: 275260368Sluigi * The netmap_ring, and all slots and buffers in the range 276260368Sluigi * [head .. tail-1] are owned by the user program; 277260368Sluigi * the kernel only accesses them during a netmap system call 278260368Sluigi * and in the user thread context. 279227614Sluigi * 280260368Sluigi * Other slots and buffers are reserved for use by the kernel 281227614Sluigi */ 282227614Sluigistruct netmap_ring { 283227614Sluigi /* 284257529Sluigi * buf_ofs is meant to be used through macros. 285227614Sluigi * It contains the offset of the buffer region from this 286227614Sluigi * descriptor. 287227614Sluigi */ 288260368Sluigi const int64_t buf_ofs; 289227614Sluigi const uint32_t num_slots; /* number of slots in the ring. */ 290260368Sluigi const uint32_t nr_buf_size; 291260368Sluigi const uint16_t ringid; 292260368Sluigi const uint16_t dir; /* 0: tx, 1: rx */ 293227614Sluigi 294260368Sluigi uint32_t head; /* (u) first user slot */ 295260368Sluigi uint32_t cur; /* (u) wakeup point */ 296260368Sluigi uint32_t tail; /* (k) first kernel slot */ 297227614Sluigi 298260368Sluigi uint32_t flags; 299227614Sluigi 300260368Sluigi struct timeval ts; /* (k) time of last *sync() */ 301260368Sluigi 302260368Sluigi /* opaque room for a mutex or similar object */ 303341477Svmaffione#if !defined(_WIN32) || defined(__CYGWIN__) 304341477Svmaffione uint8_t __attribute__((__aligned__(NM_CACHE_ALIGN))) sem[128]; 305341477Svmaffione#else 306341477Svmaffione uint8_t __declspec(align(NM_CACHE_ALIGN)) sem[128]; 307341477Svmaffione#endif 308260368Sluigi 309227614Sluigi /* the slots follow. This struct has variable size */ 310232238Sluigi struct netmap_slot slot[0]; /* array of slots. */ 311227614Sluigi}; 312227614Sluigi 313227614Sluigi 314227614Sluigi/* 315260368Sluigi * RING FLAGS 316260368Sluigi */ 317260368Sluigi#define NR_TIMESTAMP 0x0002 /* set timestamp on *sync() */ 318260368Sluigi /* 319260368Sluigi * updates the 'ts' field on each netmap syscall. This saves 320260368Sluigi * saves a separate gettimeofday(), and is not much worse than 321260368Sluigi * software timestamps generated in the interrupt handler. 322260368Sluigi */ 323260368Sluigi 324260368Sluigi#define NR_FORWARD 0x0004 /* enable NS_FORWARD for ring */ 325260368Sluigi /* 326260368Sluigi * Enables the NS_FORWARD slot flag for the ring. 327260368Sluigi */ 328260368Sluigi 329341477Svmaffione/* 330341477Svmaffione * Helper functions for kernel and userspace 331341477Svmaffione */ 332260368Sluigi 333260368Sluigi/* 334342033Svmaffione * Check if space is available in the ring. We use ring->head, which 335342033Svmaffione * points to the next netmap slot to be published to netmap. It is 336342033Svmaffione * possible that the applications moves ring->cur ahead of ring->tail 337342033Svmaffione * (e.g., by setting ring->cur <== ring->tail), if it wants more slots 338342033Svmaffione * than the ones currently available, and it wants to be notified when 339342033Svmaffione * more arrive. See netmap(4) for more details and examples. 340341477Svmaffione */ 341341477Svmaffionestatic inline int 342341477Svmaffionenm_ring_empty(struct netmap_ring *ring) 343341477Svmaffione{ 344342033Svmaffione return (ring->head == ring->tail); 345341477Svmaffione} 346341477Svmaffione 347341477Svmaffione/* 348227614Sluigi * Netmap representation of an interface and its queue(s). 349257529Sluigi * This is initialized by the kernel when binding a file 350257529Sluigi * descriptor to a port, and should be considered as readonly 351257529Sluigi * by user programs. The kernel never uses it. 352257529Sluigi * 353227614Sluigi * There is one netmap_if for each file descriptor on which we want 354257529Sluigi * to select/poll. 355227614Sluigi * select/poll operates on one or all pairs depending on the value of 356227614Sluigi * nmr_queueid passed on the ioctl. 357227614Sluigi */ 358227614Sluigistruct netmap_if { 359232238Sluigi char ni_name[IFNAMSIZ]; /* name of the interface. */ 360257529Sluigi const uint32_t ni_version; /* API version, currently unused */ 361257529Sluigi const uint32_t ni_flags; /* properties */ 362257529Sluigi#define NI_PRIV_MEM 0x1 /* private memory region */ 363257529Sluigi 364227614Sluigi /* 365260368Sluigi * The number of packet rings available in netmap mode. 366260368Sluigi * Physical NICs can have different numbers of tx and rx rings. 367260368Sluigi * Physical NICs also have a 'host' ring pair. 368260368Sluigi * Additionally, clients can request additional ring pairs to 369260368Sluigi * be used for internal communication. 370260368Sluigi */ 371260368Sluigi const uint32_t ni_tx_rings; /* number of HW tx rings */ 372260368Sluigi const uint32_t ni_rx_rings; /* number of HW rx rings */ 373260368Sluigi 374261909Sluigi uint32_t ni_bufs_head; /* head index for extra bufs */ 375261909Sluigi uint32_t ni_spare1[5]; 376260368Sluigi /* 377232238Sluigi * The following array contains the offset of each netmap ring 378260368Sluigi * from this structure, in the following order: 379260368Sluigi * NIC tx rings (ni_tx_rings); host tx ring (1); extra tx rings; 380260368Sluigi * NIC rx rings (ni_rx_rings); host tx ring (1); extra rx rings. 381260368Sluigi * 382257529Sluigi * The area is filled up by the kernel on NIOCREGIF, 383227614Sluigi * and then only read by userspace code. 384227614Sluigi */ 385227614Sluigi const ssize_t ring_ofs[0]; 386227614Sluigi}; 387227614Sluigi 388341477Svmaffione/* Legacy interface to interact with a netmap control device. 389341477Svmaffione * Included for backward compatibility. The user should not include this 390341477Svmaffione * file directly. */ 391341477Svmaffione#include "netmap_legacy.h" 392260368Sluigi 393227614Sluigi/* 394341477Svmaffione * New API to control netmap control devices. New applications should only use 395341477Svmaffione * nmreq_xyz structs with the NIOCCTRL ioctl() command. 396227614Sluigi * 397341477Svmaffione * NIOCCTRL takes a nmreq_header struct, which contains the required 398341477Svmaffione * API version, the name of a netmap port, a command type, and pointers 399341477Svmaffione * to request body and options. 400260368Sluigi * 401341477Svmaffione * nr_name (in) 402341477Svmaffione * The name of the port (em0, valeXXX:YYY, eth0{pn1 etc.) 403227614Sluigi * 404341477Svmaffione * nr_version (in/out) 405341477Svmaffione * Must match NETMAP_API as used in the kernel, error otherwise. 406341477Svmaffione * Always returns the desired value on output. 407227614Sluigi * 408341477Svmaffione * nr_reqtype (in) 409341477Svmaffione * One of the NETMAP_REQ_* command types below 410251139Sluigi * 411341477Svmaffione * nr_body (in) 412341477Svmaffione * Pointer to a command-specific struct, described by one 413341477Svmaffione * of the struct nmreq_xyz below. 414251139Sluigi * 415341477Svmaffione * nr_options (in) 416341477Svmaffione * Command specific options, if any. 417227614Sluigi * 418341477Svmaffione * A NETMAP_REQ_REGISTER command activates netmap mode on the netmap 419341477Svmaffione * port (e.g. physical interface) specified by nmreq_header.nr_name. 420341477Svmaffione * The request body (struct nmreq_register) has several arguments to 421341477Svmaffione * specify how the port is to be registered. 422257529Sluigi * 423341477Svmaffione * nr_tx_slots, nr_tx_slots, nr_tx_rings, nr_rx_rings (in/out) 424341477Svmaffione * On input, non-zero values may be used to reconfigure the port 425341477Svmaffione * according to the requested values, but this is not guaranteed. 426341477Svmaffione * On output the actual values in use are reported. 427259412Sluigi * 428341477Svmaffione * nr_mode (in) 429341477Svmaffione * Indicate what set of rings must be bound to the netmap 430341477Svmaffione * device (e.g. all NIC rings, host rings only, NIC and 431341477Svmaffione * host rings, ...). Values are in NR_REG_*. 432261909Sluigi * 433341477Svmaffione * nr_ringid (in) 434341477Svmaffione * If nr_mode == NR_REG_ONE_NIC (only a single couple of TX/RX 435341477Svmaffione * rings), indicate which NIC TX and/or RX ring is to be bound 436341477Svmaffione * (0..nr_*x_rings-1). 437259412Sluigi * 438341477Svmaffione * nr_flags (in) 439341477Svmaffione * Indicate special options for how to open the port. 440257529Sluigi * 441341477Svmaffione * NR_NO_TX_POLL can be OR-ed to make select()/poll() push 442341477Svmaffione * packets on tx rings only if POLLOUT is set. 443341477Svmaffione * The default is to push any pending packet. 444257529Sluigi * 445341477Svmaffione * NR_DO_RX_POLL can be OR-ed to make select()/poll() release 446341477Svmaffione * packets on rx rings also when POLLIN is NOT set. 447341477Svmaffione * The default is to touch the rx ring only with POLLIN. 448341477Svmaffione * Note that this is the opposite of TX because it 449341477Svmaffione * reflects the common usage. 450261909Sluigi * 451341477Svmaffione * Other options are NR_MONITOR_TX, NR_MONITOR_RX, NR_ZCOPY_MON, 452341477Svmaffione * NR_EXCLUSIVE, NR_RX_RINGS_ONLY, NR_TX_RINGS_ONLY and 453341477Svmaffione * NR_ACCEPT_VNET_HDR. 454261909Sluigi * 455341477Svmaffione * nr_mem_id (in/out) 456341477Svmaffione * The identity of the memory region used. 457261909Sluigi * On input, 0 means the system decides autonomously, 458261909Sluigi * other values may try to select a specific region. 459261909Sluigi * On return the actual value is reported. 460261909Sluigi * Region '1' is the global allocator, normally shared 461261909Sluigi * by all interfaces. Other values are private regions. 462261909Sluigi * If two ports the same region zero-copy is possible. 463261909Sluigi * 464341477Svmaffione * nr_extra_bufs (in/out) 465341477Svmaffione * Number of extra buffers to be allocated. 466261909Sluigi * 467341477Svmaffione * The other NETMAP_REQ_* commands are described below. 468261909Sluigi * 469227614Sluigi */ 470227614Sluigi 471341477Svmaffione/* maximum size of a request, including all options */ 472341477Svmaffione#define NETMAP_REQ_MAXSIZE 4096 473260368Sluigi 474341477Svmaffione/* Header common to all request options. */ 475341477Svmaffionestruct nmreq_option { 476341477Svmaffione /* Pointer ot the next option. */ 477341477Svmaffione uint64_t nro_next; 478341477Svmaffione /* Option type. */ 479341477Svmaffione uint32_t nro_reqtype; 480341477Svmaffione /* (out) status of the option: 481341477Svmaffione * 0: recognized and processed 482341477Svmaffione * !=0: errno value 483341477Svmaffione */ 484341477Svmaffione uint32_t nro_status; 485342033Svmaffione /* Option size, used only for options that can have variable size 486342033Svmaffione * (e.g. because they contain arrays). For fixed-size options this 487342033Svmaffione * field should be set to zero. */ 488342033Svmaffione uint64_t nro_size; 489341477Svmaffione}; 490341477Svmaffione 491341477Svmaffione/* Header common to all requests. Do not reorder these fields, as we need 492341477Svmaffione * the second one (nr_reqtype) to know how much to copy from/to userspace. */ 493341477Svmaffionestruct nmreq_header { 494341477Svmaffione uint16_t nr_version; /* API version */ 495341477Svmaffione uint16_t nr_reqtype; /* nmreq type (NETMAP_REQ_*) */ 496341477Svmaffione uint32_t nr_reserved; /* must be zero */ 497341477Svmaffione#define NETMAP_REQ_IFNAMSIZ 64 498341477Svmaffione char nr_name[NETMAP_REQ_IFNAMSIZ]; /* port name */ 499341477Svmaffione uint64_t nr_options; /* command-specific options */ 500341477Svmaffione uint64_t nr_body; /* ptr to nmreq_xyz struct */ 501341477Svmaffione}; 502341477Svmaffione 503341477Svmaffioneenum { 504341477Svmaffione /* Register a netmap port with the device. */ 505341477Svmaffione NETMAP_REQ_REGISTER = 1, 506341477Svmaffione /* Get information from a netmap port. */ 507341477Svmaffione NETMAP_REQ_PORT_INFO_GET, 508341477Svmaffione /* Attach a netmap port to a VALE switch. */ 509341477Svmaffione NETMAP_REQ_VALE_ATTACH, 510341477Svmaffione /* Detach a netmap port from a VALE switch. */ 511341477Svmaffione NETMAP_REQ_VALE_DETACH, 512341477Svmaffione /* List the ports attached to a VALE switch. */ 513341477Svmaffione NETMAP_REQ_VALE_LIST, 514341477Svmaffione /* Set the port header length (was virtio-net header length). */ 515341477Svmaffione NETMAP_REQ_PORT_HDR_SET, 516341477Svmaffione /* Get the port header length (was virtio-net header length). */ 517341477Svmaffione NETMAP_REQ_PORT_HDR_GET, 518341477Svmaffione /* Create a new persistent VALE port. */ 519341477Svmaffione NETMAP_REQ_VALE_NEWIF, 520341477Svmaffione /* Delete a persistent VALE port. */ 521341477Svmaffione NETMAP_REQ_VALE_DELIF, 522341477Svmaffione /* Enable polling kernel thread(s) on an attached VALE port. */ 523341477Svmaffione NETMAP_REQ_VALE_POLLING_ENABLE, 524341477Svmaffione /* Disable polling kernel thread(s) on an attached VALE port. */ 525341477Svmaffione NETMAP_REQ_VALE_POLLING_DISABLE, 526341477Svmaffione /* Get info about the pools of a memory allocator. */ 527341477Svmaffione NETMAP_REQ_POOLS_INFO_GET, 528342033Svmaffione /* Start an in-kernel loop that syncs the rings periodically or 529342033Svmaffione * on notifications. The loop runs in the context of the ioctl 530342033Svmaffione * syscall, and only stops on NETMAP_REQ_SYNC_KLOOP_STOP. */ 531342033Svmaffione NETMAP_REQ_SYNC_KLOOP_START, 532342033Svmaffione /* Stops the thread executing the in-kernel loop. The thread 533342033Svmaffione * returns from the ioctl syscall. */ 534342033Svmaffione NETMAP_REQ_SYNC_KLOOP_STOP, 535342033Svmaffione /* Enable CSB mode on a registered netmap control device. */ 536342033Svmaffione NETMAP_REQ_CSB_ENABLE, 537341477Svmaffione}; 538341477Svmaffione 539341477Svmaffioneenum { 540341477Svmaffione /* On NETMAP_REQ_REGISTER, ask netmap to use memory allocated 541343866Svmaffione * from user-space allocated memory pools (e.g. hugepages). 542343866Svmaffione */ 543341477Svmaffione NETMAP_REQ_OPT_EXTMEM = 1, 544342033Svmaffione 545342033Svmaffione /* ON NETMAP_REQ_SYNC_KLOOP_START, ask netmap to use eventfd-based 546342033Svmaffione * notifications to synchronize the kernel loop with the application. 547342033Svmaffione */ 548342033Svmaffione NETMAP_REQ_OPT_SYNC_KLOOP_EVENTFDS, 549342033Svmaffione 550342033Svmaffione /* On NETMAP_REQ_REGISTER, ask netmap to work in CSB mode, where 551342033Svmaffione * head, cur and tail pointers are not exchanged through the 552342033Svmaffione * struct netmap_ring header, but rather using an user-provided 553343866Svmaffione * memory area (see struct nm_csb_atok and struct nm_csb_ktoa). 554343866Svmaffione */ 555342033Svmaffione NETMAP_REQ_OPT_CSB, 556343866Svmaffione 557343866Svmaffione /* An extension to NETMAP_REQ_OPT_SYNC_KLOOP_EVENTFDS, which specifies 558343866Svmaffione * if the TX and/or RX rings are synced in the context of the VM exit. 559343866Svmaffione * This requires the 'ioeventfd' fields to be valid (cannot be < 0). 560343866Svmaffione */ 561343866Svmaffione NETMAP_REQ_OPT_SYNC_KLOOP_MODE, 562341477Svmaffione}; 563341477Svmaffione 564227614Sluigi/* 565341477Svmaffione * nr_reqtype: NETMAP_REQ_REGISTER 566341477Svmaffione * Bind (register) a netmap port to this control device. 567227614Sluigi */ 568341477Svmaffionestruct nmreq_register { 569341477Svmaffione uint64_t nr_offset; /* nifp offset in the shared region */ 570341477Svmaffione uint64_t nr_memsize; /* size of the shared region */ 571232238Sluigi uint32_t nr_tx_slots; /* slots in tx rings */ 572232238Sluigi uint32_t nr_rx_slots; /* slots in rx rings */ 573232238Sluigi uint16_t nr_tx_rings; /* number of tx rings */ 574232238Sluigi uint16_t nr_rx_rings; /* number of rx rings */ 575261909Sluigi 576341477Svmaffione uint16_t nr_mem_id; /* id of the memory allocator */ 577227614Sluigi uint16_t nr_ringid; /* ring(s) we care about */ 578341477Svmaffione uint32_t nr_mode; /* specify NR_REG_* modes */ 579342033Svmaffione uint32_t nr_extra_bufs; /* number of requested extra buffers */ 580261909Sluigi 581341477Svmaffione uint64_t nr_flags; /* additional flags (see below) */ 582341477Svmaffione/* monitors use nr_ringid and nr_mode to select the rings to monitor */ 583341477Svmaffione#define NR_MONITOR_TX 0x100 584341477Svmaffione#define NR_MONITOR_RX 0x200 585341477Svmaffione#define NR_ZCOPY_MON 0x400 586341477Svmaffione/* request exclusive access to the selected rings */ 587341477Svmaffione#define NR_EXCLUSIVE 0x800 588342033Svmaffione/* 0x1000 unused */ 589341477Svmaffione#define NR_RX_RINGS_ONLY 0x2000 590341477Svmaffione#define NR_TX_RINGS_ONLY 0x4000 591341477Svmaffione/* Applications set this flag if they are able to deal with virtio-net headers, 592341477Svmaffione * that is send/receive frames that start with a virtio-net header. 593341477Svmaffione * If not set, NIOCREGIF will fail with netmap ports that require applications 594341477Svmaffione * to use those headers. If the flag is set, the application can use the 595341477Svmaffione * NETMAP_VNET_HDR_GET command to figure out the header length. */ 596341477Svmaffione#define NR_ACCEPT_VNET_HDR 0x8000 597341477Svmaffione/* The following two have the same meaning of NETMAP_NO_TX_POLL and 598341477Svmaffione * NETMAP_DO_RX_POLL. */ 599341477Svmaffione#define NR_DO_RX_POLL 0x10000 600341477Svmaffione#define NR_NO_TX_POLL 0x20000 601227614Sluigi}; 602227614Sluigi 603341477Svmaffione/* Valid values for nmreq_register.nr_mode (see above). */ 604261909Sluigienum { NR_REG_DEFAULT = 0, /* backward compat, should not be used. */ 605261909Sluigi NR_REG_ALL_NIC = 1, 606261909Sluigi NR_REG_SW = 2, 607261909Sluigi NR_REG_NIC_SW = 3, 608261909Sluigi NR_REG_ONE_NIC = 4, 609341477Svmaffione NR_REG_PIPE_MASTER = 5, /* deprecated, use "x{y" port name syntax */ 610341477Svmaffione NR_REG_PIPE_SLAVE = 6, /* deprecated, use "x}y" port name syntax */ 611342033Svmaffione NR_REG_NULL = 7, 612261909Sluigi}; 613260368Sluigi 614341477Svmaffione/* A single ioctl number is shared by all the new API command. 615342033Svmaffione * Demultiplexing is done using the hdr.nr_reqtype field. 616341477Svmaffione * FreeBSD uses the size value embedded in the _IOWR to determine 617341477Svmaffione * how much to copy in/out, so we define the ioctl() command 618341477Svmaffione * specifying only nmreq_header, and copyin/copyout the rest. */ 619341477Svmaffione#define NIOCCTRL _IOWR('i', 151, struct nmreq_header) 620261909Sluigi 621341477Svmaffione/* The ioctl commands to sync TX/RX netmap rings. 622341477Svmaffione * NIOCTXSYNC, NIOCRXSYNC synchronize tx or rx queues, 623341477Svmaffione * whose identity is set in NIOCREGIF through nr_ringid. 624341477Svmaffione * These are non blocking and take no argument. */ 625227614Sluigi#define NIOCTXSYNC _IO('i', 148) /* sync tx queues */ 626227614Sluigi#define NIOCRXSYNC _IO('i', 149) /* sync rx queues */ 627227614Sluigi 628341477Svmaffione/* 629341477Svmaffione * nr_reqtype: NETMAP_REQ_PORT_INFO_GET 630341477Svmaffione * Get information about a netmap port, including number of rings. 631342033Svmaffione * slots per ring, id of the memory allocator, etc. The netmap 632342033Svmaffione * control device used for this operation does not need to be bound 633342033Svmaffione * to a netmap port. 634341477Svmaffione */ 635341477Svmaffionestruct nmreq_port_info_get { 636341477Svmaffione uint64_t nr_memsize; /* size of the shared region */ 637341477Svmaffione uint32_t nr_tx_slots; /* slots in tx rings */ 638341477Svmaffione uint32_t nr_rx_slots; /* slots in rx rings */ 639341477Svmaffione uint16_t nr_tx_rings; /* number of tx rings */ 640341477Svmaffione uint16_t nr_rx_rings; /* number of rx rings */ 641342033Svmaffione uint16_t nr_mem_id; /* memory allocator id (in/out) */ 642342033Svmaffione uint16_t pad1; 643341477Svmaffione}; 644260368Sluigi 645341477Svmaffione#define NM_BDG_NAME "vale" /* prefix for bridge port name */ 646341477Svmaffione 647260368Sluigi/* 648341477Svmaffione * nr_reqtype: NETMAP_REQ_VALE_ATTACH 649341477Svmaffione * Attach a netmap port to a VALE switch. Both the name of the netmap 650341477Svmaffione * port and the VALE switch are specified through the nr_name argument. 651341477Svmaffione * The attach operation could need to register a port, so at least 652341477Svmaffione * the same arguments are available. 653341477Svmaffione * port_index will contain the index where the port has been attached. 654260368Sluigi */ 655341477Svmaffionestruct nmreq_vale_attach { 656341477Svmaffione struct nmreq_register reg; 657341477Svmaffione uint32_t port_index; 658342033Svmaffione uint32_t pad1; 659341477Svmaffione}; 660260368Sluigi 661260368Sluigi/* 662341477Svmaffione * nr_reqtype: NETMAP_REQ_VALE_DETACH 663341477Svmaffione * Detach a netmap port from a VALE switch. Both the name of the netmap 664341477Svmaffione * port and the VALE switch are specified through the nr_name argument. 665341477Svmaffione * port_index will contain the index where the port was attached. 666260368Sluigi */ 667341477Svmaffionestruct nmreq_vale_detach { 668341477Svmaffione uint32_t port_index; 669342033Svmaffione uint32_t pad1; 670341477Svmaffione}; 671260368Sluigi 672270063Sluigi/* 673341477Svmaffione * nr_reqtype: NETMAP_REQ_VALE_LIST 674341477Svmaffione * List the ports of a VALE switch. 675270063Sluigi */ 676341477Svmaffionestruct nmreq_vale_list { 677341477Svmaffione /* Name of the VALE port (valeXXX:YYY) or empty. */ 678341477Svmaffione uint16_t nr_bridge_idx; 679342033Svmaffione uint16_t pad1; 680341477Svmaffione uint32_t nr_port_idx; 681270063Sluigi}; 682270063Sluigi 683341477Svmaffione/* 684341477Svmaffione * nr_reqtype: NETMAP_REQ_PORT_HDR_SET or NETMAP_REQ_PORT_HDR_GET 685342033Svmaffione * Set or get the port header length of the port identified by hdr.nr_name. 686342033Svmaffione * The control device does not need to be bound to a netmap port. 687341477Svmaffione */ 688341477Svmaffionestruct nmreq_port_hdr { 689341477Svmaffione uint32_t nr_hdr_len; 690342033Svmaffione uint32_t pad1; 691341477Svmaffione}; 692341477Svmaffione 693341477Svmaffione/* 694341477Svmaffione * nr_reqtype: NETMAP_REQ_VALE_NEWIF 695341477Svmaffione * Create a new persistent VALE port. 696341477Svmaffione */ 697341477Svmaffionestruct nmreq_vale_newif { 698341477Svmaffione uint32_t nr_tx_slots; /* slots in tx rings */ 699341477Svmaffione uint32_t nr_rx_slots; /* slots in rx rings */ 700341477Svmaffione uint16_t nr_tx_rings; /* number of tx rings */ 701341477Svmaffione uint16_t nr_rx_rings; /* number of rx rings */ 702341477Svmaffione uint16_t nr_mem_id; /* id of the memory allocator */ 703342033Svmaffione uint16_t pad1; 704341477Svmaffione}; 705341477Svmaffione 706341477Svmaffione/* 707341477Svmaffione * nr_reqtype: NETMAP_REQ_VALE_POLLING_ENABLE or NETMAP_REQ_VALE_POLLING_DISABLE 708341477Svmaffione * Enable or disable polling kthreads on a VALE port. 709341477Svmaffione */ 710341477Svmaffionestruct nmreq_vale_polling { 711341477Svmaffione uint32_t nr_mode; 712341477Svmaffione#define NETMAP_POLLING_MODE_SINGLE_CPU 1 713341477Svmaffione#define NETMAP_POLLING_MODE_MULTI_CPU 2 714341477Svmaffione uint32_t nr_first_cpu_id; 715341477Svmaffione uint32_t nr_num_polling_cpus; 716342033Svmaffione uint32_t pad1; 717341477Svmaffione}; 718341477Svmaffione 719341477Svmaffione/* 720341477Svmaffione * nr_reqtype: NETMAP_REQ_POOLS_INFO_GET 721342033Svmaffione * Get info about the pools of the memory allocator of the netmap 722342033Svmaffione * port specified by hdr.nr_name and nr_mem_id. The netmap control 723342033Svmaffione * device used for this operation does not need to be bound to a netmap 724342033Svmaffione * port. 725341477Svmaffione */ 726341477Svmaffionestruct nmreq_pools_info { 727341477Svmaffione uint64_t nr_memsize; 728342033Svmaffione uint16_t nr_mem_id; /* in/out argument */ 729342033Svmaffione uint16_t pad1[3]; 730341477Svmaffione uint64_t nr_if_pool_offset; 731341477Svmaffione uint32_t nr_if_pool_objtotal; 732341477Svmaffione uint32_t nr_if_pool_objsize; 733341477Svmaffione uint64_t nr_ring_pool_offset; 734341477Svmaffione uint32_t nr_ring_pool_objtotal; 735341477Svmaffione uint32_t nr_ring_pool_objsize; 736341477Svmaffione uint64_t nr_buf_pool_offset; 737341477Svmaffione uint32_t nr_buf_pool_objtotal; 738341477Svmaffione uint32_t nr_buf_pool_objsize; 739341477Svmaffione}; 740341477Svmaffione 741341477Svmaffione/* 742342033Svmaffione * nr_reqtype: NETMAP_REQ_SYNC_KLOOP_START 743342033Svmaffione * Start an in-kernel loop that syncs the rings periodically or on 744342033Svmaffione * notifications. The loop runs in the context of the ioctl syscall, 745342033Svmaffione * and only stops on NETMAP_REQ_SYNC_KLOOP_STOP. 746342033Svmaffione * The registered netmap port must be open in CSB mode. 747342033Svmaffione */ 748342033Svmaffionestruct nmreq_sync_kloop_start { 749342033Svmaffione /* Sleeping is the default synchronization method for the kloop. 750342033Svmaffione * The 'sleep_us' field specifies how many microsconds to sleep for 751342033Svmaffione * when there is no work to do, before doing another kloop iteration. 752342033Svmaffione */ 753342033Svmaffione uint32_t sleep_us; 754342033Svmaffione uint32_t pad1; 755342033Svmaffione}; 756342033Svmaffione 757342033Svmaffione/* A CSB entry for the application --> kernel direction. */ 758342033Svmaffionestruct nm_csb_atok { 759342033Svmaffione uint32_t head; /* AW+ KR+ the head of the appl netmap_ring */ 760342033Svmaffione uint32_t cur; /* AW+ KR+ the cur of the appl netmap_ring */ 761342033Svmaffione uint32_t appl_need_kick; /* AW+ KR+ kern --> appl notification enable */ 762342033Svmaffione uint32_t sync_flags; /* AW+ KR+ the flags of the appl [tx|rx]sync() */ 763342033Svmaffione uint32_t pad[12]; /* pad to a 64 bytes cacheline */ 764342033Svmaffione}; 765342033Svmaffione 766342033Svmaffione/* A CSB entry for the application <-- kernel direction. */ 767342033Svmaffionestruct nm_csb_ktoa { 768342033Svmaffione uint32_t hwcur; /* AR+ KW+ the hwcur of the kern netmap_kring */ 769342033Svmaffione uint32_t hwtail; /* AR+ KW+ the hwtail of the kern netmap_kring */ 770342033Svmaffione uint32_t kern_need_kick; /* AR+ KW+ appl-->kern notification enable */ 771342033Svmaffione uint32_t pad[13]; 772342033Svmaffione}; 773342033Svmaffione 774342033Svmaffione#ifdef __linux__ 775342033Svmaffione 776342033Svmaffione#ifdef __KERNEL__ 777342033Svmaffione#define nm_stst_barrier smp_wmb 778343832Svmaffione#define nm_ldld_barrier smp_rmb 779343832Svmaffione#define nm_stld_barrier smp_mb 780342033Svmaffione#else /* !__KERNEL__ */ 781342033Svmaffionestatic inline void nm_stst_barrier(void) 782342033Svmaffione{ 783342033Svmaffione /* A memory barrier with release semantic has the combined 784342033Svmaffione * effect of a store-store barrier and a load-store barrier, 785342033Svmaffione * which is fine for us. */ 786342033Svmaffione __atomic_thread_fence(__ATOMIC_RELEASE); 787342033Svmaffione} 788343832Svmaffionestatic inline void nm_ldld_barrier(void) 789343832Svmaffione{ 790343832Svmaffione /* A memory barrier with acquire semantic has the combined 791343832Svmaffione * effect of a load-load barrier and a store-load barrier, 792343832Svmaffione * which is fine for us. */ 793343832Svmaffione __atomic_thread_fence(__ATOMIC_ACQUIRE); 794343832Svmaffione} 795342033Svmaffione#endif /* !__KERNEL__ */ 796342033Svmaffione 797342033Svmaffione#elif defined(__FreeBSD__) 798342033Svmaffione 799342033Svmaffione#ifdef _KERNEL 800342033Svmaffione#define nm_stst_barrier atomic_thread_fence_rel 801343832Svmaffione#define nm_ldld_barrier atomic_thread_fence_acq 802343832Svmaffione#define nm_stld_barrier atomic_thread_fence_seq_cst 803342033Svmaffione#else /* !_KERNEL */ 804342033Svmaffione#include <stdatomic.h> 805342033Svmaffionestatic inline void nm_stst_barrier(void) 806342033Svmaffione{ 807342033Svmaffione atomic_thread_fence(memory_order_release); 808342033Svmaffione} 809343832Svmaffionestatic inline void nm_ldld_barrier(void) 810343832Svmaffione{ 811343832Svmaffione atomic_thread_fence(memory_order_acquire); 812343832Svmaffione} 813342033Svmaffione#endif /* !_KERNEL */ 814342033Svmaffione 815342033Svmaffione#else /* !__linux__ && !__FreeBSD__ */ 816342033Svmaffione#error "OS not supported" 817342033Svmaffione#endif /* !__linux__ && !__FreeBSD__ */ 818342033Svmaffione 819342033Svmaffione/* Application side of sync-kloop: Write ring pointers (cur, head) to the CSB. 820342033Svmaffione * This routine is coupled with sync_kloop_kernel_read(). */ 821342033Svmaffionestatic inline void 822342033Svmaffionenm_sync_kloop_appl_write(struct nm_csb_atok *atok, uint32_t cur, 823342033Svmaffione uint32_t head) 824342033Svmaffione{ 825343832Svmaffione /* Issue a first store-store barrier to make sure writes to the 826343832Svmaffione * netmap ring do not overcome updates on atok->cur and atok->head. */ 827343832Svmaffione nm_stst_barrier(); 828343832Svmaffione 829342033Svmaffione /* 830342033Svmaffione * We need to write cur and head to the CSB but we cannot do it atomically. 831342033Svmaffione * There is no way we can prevent the host from reading the updated value 832342033Svmaffione * of one of the two and the old value of the other. However, if we make 833342033Svmaffione * sure that the host never reads a value of head more recent than the 834342033Svmaffione * value of cur we are safe. We can allow the host to read a value of cur 835342033Svmaffione * more recent than the value of head, since in the netmap ring cur can be 836342033Svmaffione * ahead of head and cur cannot wrap around head because it must be behind 837342033Svmaffione * tail. Inverting the order of writes below could instead result into the 838342033Svmaffione * host to think head went ahead of cur, which would cause the sync 839342033Svmaffione * prologue to fail. 840342033Svmaffione * 841342033Svmaffione * The following memory barrier scheme is used to make this happen: 842342033Svmaffione * 843343832Svmaffione * Guest Host 844342033Svmaffione * 845343832Svmaffione * STORE(cur) LOAD(head) 846343832Svmaffione * wmb() <-----------> rmb() 847343832Svmaffione * STORE(head) LOAD(cur) 848342033Svmaffione * 849342033Svmaffione */ 850342033Svmaffione atok->cur = cur; 851342033Svmaffione nm_stst_barrier(); 852342033Svmaffione atok->head = head; 853342033Svmaffione} 854342033Svmaffione 855342033Svmaffione/* Application side of sync-kloop: Read kring pointers (hwcur, hwtail) from 856342033Svmaffione * the CSB. This routine is coupled with sync_kloop_kernel_write(). */ 857342033Svmaffionestatic inline void 858342033Svmaffionenm_sync_kloop_appl_read(struct nm_csb_ktoa *ktoa, uint32_t *hwtail, 859342033Svmaffione uint32_t *hwcur) 860342033Svmaffione{ 861342033Svmaffione /* 862342033Svmaffione * We place a memory barrier to make sure that the update of hwtail never 863342033Svmaffione * overtakes the update of hwcur. 864342033Svmaffione * (see explanation in sync_kloop_kernel_write). 865342033Svmaffione */ 866342033Svmaffione *hwtail = ktoa->hwtail; 867343832Svmaffione nm_ldld_barrier(); 868342033Svmaffione *hwcur = ktoa->hwcur; 869343832Svmaffione 870343832Svmaffione /* Make sure that loads from ktoa->hwtail and ktoa->hwcur are not delayed 871343832Svmaffione * after the loads from the netmap ring. */ 872343832Svmaffione nm_ldld_barrier(); 873342033Svmaffione} 874342033Svmaffione 875342033Svmaffione/* 876341477Svmaffione * data for NETMAP_REQ_OPT_* options 877341477Svmaffione */ 878341477Svmaffione 879342033Svmaffionestruct nmreq_opt_sync_kloop_eventfds { 880342033Svmaffione struct nmreq_option nro_opt; /* common header */ 881342033Svmaffione /* An array of N entries for bidirectional notifications between 882342033Svmaffione * the kernel loop and the application. The number of entries and 883342033Svmaffione * their order must agree with the CSB arrays passed in the 884342033Svmaffione * NETMAP_REQ_OPT_CSB option. Each entry contains a file descriptor 885342033Svmaffione * backed by an eventfd. 886343866Svmaffione * 887343866Svmaffione * If any of the 'ioeventfd' entries is < 0, the event loop uses 888343866Svmaffione * the sleeping synchronization strategy (according to sleep_us), 889343866Svmaffione * and keeps kern_need_kick always disabled. 890343866Svmaffione * Each 'irqfd' can be < 0, and in that case the corresponding queue 891343866Svmaffione * is never notified. 892342033Svmaffione */ 893342033Svmaffione struct { 894342033Svmaffione /* Notifier for the application --> kernel loop direction. */ 895342033Svmaffione int32_t ioeventfd; 896342033Svmaffione /* Notifier for the kernel loop --> application direction. */ 897342033Svmaffione int32_t irqfd; 898342033Svmaffione } eventfds[0]; 899342033Svmaffione}; 900342033Svmaffione 901343866Svmaffionestruct nmreq_opt_sync_kloop_mode { 902343866Svmaffione struct nmreq_option nro_opt; /* common header */ 903343866Svmaffione#define NM_OPT_SYNC_KLOOP_DIRECT_TX (1 << 0) 904343866Svmaffione#define NM_OPT_SYNC_KLOOP_DIRECT_RX (1 << 1) 905343866Svmaffione uint32_t mode; 906343866Svmaffione}; 907343866Svmaffione 908341477Svmaffionestruct nmreq_opt_extmem { 909341477Svmaffione struct nmreq_option nro_opt; /* common header */ 910341477Svmaffione uint64_t nro_usrptr; /* (in) ptr to usr memory */ 911341477Svmaffione struct nmreq_pools_info nro_info; /* (in/out) */ 912341477Svmaffione}; 913341477Svmaffione 914342033Svmaffionestruct nmreq_opt_csb { 915342033Svmaffione struct nmreq_option nro_opt; 916342033Svmaffione 917342033Svmaffione /* Array of CSB entries for application --> kernel communication 918342033Svmaffione * (N entries). */ 919342033Svmaffione uint64_t csb_atok; 920342033Svmaffione 921342033Svmaffione /* Array of CSB entries for kernel --> application communication 922342033Svmaffione * (N entries). */ 923342033Svmaffione uint64_t csb_ktoa; 924342033Svmaffione}; 925342033Svmaffione 926227614Sluigi#endif /* _NET_NETMAP_H_ */ 927