187902Sluigi/*-
290550Sluigi * Copyright (c) 2001-2002 Luigi Rizzo
387902Sluigi *
490550Sluigi * Supported by: the Xorp Project (www.xorp.org)
590550Sluigi *
687902Sluigi * Redistribution and use in source and binary forms, with or without
787902Sluigi * modification, are permitted provided that the following conditions
887902Sluigi * are met:
987902Sluigi * 1. Redistributions of source code must retain the above copyright
1087902Sluigi *    notice, this list of conditions and the following disclaimer.
1187902Sluigi * 2. Redistributions in binary form must reproduce the above copyright
1287902Sluigi *    notice, this list of conditions and the following disclaimer in the
1387902Sluigi *    documentation and/or other materials provided with the distribution.
1487902Sluigi *
1587902Sluigi * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
1687902Sluigi * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1787902Sluigi * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1887902Sluigi * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
1987902Sluigi * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2087902Sluigi * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2187902Sluigi * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2287902Sluigi * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2387902Sluigi * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2487902Sluigi * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2587902Sluigi * SUCH DAMAGE.
2687902Sluigi */
2787902Sluigi
28116182Sobrien#include <sys/cdefs.h>
29116182Sobrien__FBSDID("$FreeBSD$");
30116182Sobrien
31150968Sglebius#include "opt_device_polling.h"
32150968Sglebius
3387902Sluigi#include <sys/param.h>
3487902Sluigi#include <sys/systm.h>
3587902Sluigi#include <sys/kernel.h>
36185935Sbz#include <sys/kthread.h>
37185935Sbz#include <sys/proc.h>
38193219Srwatson#include <sys/eventhandler.h>
39185935Sbz#include <sys/resourcevar.h>
4087902Sluigi#include <sys/socket.h>			/* needed by net/if.h		*/
41150789Sglebius#include <sys/sockio.h>
4287902Sluigi#include <sys/sysctl.h>
43149798Sglebius#include <sys/syslog.h>
4487902Sluigi
4587902Sluigi#include <net/if.h>			/* for IFF_* flags		*/
4687902Sluigi#include <net/netisr.h>			/* for NETISR_POLL		*/
47185571Sbz#include <net/vnet.h>
4887902Sluigi
4990550Sluigivoid hardclock_device_poll(void);	/* hook from hardclock		*/
5087902Sluigi
51150866Sglebiusstatic struct mtx	poll_mtx;
52150866Sglebius
5387902Sluigi/*
5487902Sluigi * Polling support for [network] device drivers.
5587902Sluigi *
56150789Sglebius * Drivers which support this feature can register with the
5787902Sluigi * polling code.
5887902Sluigi *
5987902Sluigi * If registration is successful, the driver must disable interrupts,
6087902Sluigi * and further I/O is performed through the handler, which is invoked
6187902Sluigi * (at least once per clock tick) with 3 arguments: the "arg" passed at
6287902Sluigi * register time (a struct ifnet pointer), a command, and a "count" limit.
6387902Sluigi *
6487902Sluigi * The command can be one of the following:
6587902Sluigi *  POLL_ONLY: quick move of "count" packets from input/output queues.
6687902Sluigi *  POLL_AND_CHECK_STATUS: as above, plus check status registers or do
6787902Sluigi *	other more expensive operations. This command is issued periodically
6887902Sluigi *	but less frequently than POLL_ONLY.
6987902Sluigi *
7087902Sluigi * The count limit specifies how much work the handler can do during the
7187902Sluigi * call -- typically this is the number of packets to be received, or
7287902Sluigi * transmitted, etc. (drivers are free to interpret this number, as long
7387902Sluigi * as the max time spent in the function grows roughly linearly with the
7487902Sluigi * count).
7587902Sluigi *
76150789Sglebius * Polling is enabled and disabled via setting IFCAP_POLLING flag on
77150789Sglebius * the interface. The driver ioctl handler should register interface
78150789Sglebius * with polling and disable interrupts, if registration was successful.
7987902Sluigi *
8087902Sluigi * A second variable controls the sharing of CPU between polling/kernel
8187902Sluigi * network processing, and other activities (typically userlevel tasks):
8287902Sluigi * kern.polling.user_frac (between 0 and 100, default 50) sets the share
8387902Sluigi * of CPU allocated to user tasks. CPU is allocated proportionally to the
8487902Sluigi * shares, by dynamically adjusting the "count" (poll_burst).
8587902Sluigi *
8687902Sluigi * Other parameters can should be left to their default values.
8787902Sluigi * The following constraints hold
8887902Sluigi *
8987902Sluigi *	1 <= poll_each_burst <= poll_burst <= poll_burst_max
9087902Sluigi *	MIN_POLL_BURST_MAX <= poll_burst_max <= MAX_POLL_BURST_MAX
9187902Sluigi */
9287902Sluigi
9387902Sluigi#define MIN_POLL_BURST_MAX	10
94250911Sluigi#define MAX_POLL_BURST_MAX	20000
9587902Sluigi
96150866Sglebiusstatic uint32_t poll_burst = 5;
97150866Sglebiusstatic uint32_t poll_burst_max = 150;	/* good for 100Mbit net and HZ=1000 */
98150866Sglebiusstatic uint32_t poll_each_burst = 5;
99150866Sglebius
100227309Sedstatic SYSCTL_NODE(_kern, OID_AUTO, polling, CTLFLAG_RW, 0,
10187902Sluigi	"Device polling parameters");
10287902Sluigi
103150866SglebiusSYSCTL_UINT(_kern_polling, OID_AUTO, burst, CTLFLAG_RD,
10487902Sluigi	&poll_burst, 0, "Current polling burst size");
10587902Sluigi
106193219Srwatsonstatic int	netisr_poll_scheduled;
107193219Srwatsonstatic int	netisr_pollmore_scheduled;
108193219Srwatsonstatic int	poll_shutting_down;
109193219Srwatson
110150866Sglebiusstatic int poll_burst_max_sysctl(SYSCTL_HANDLER_ARGS)
111150866Sglebius{
112150866Sglebius	uint32_t val = poll_burst_max;
113150866Sglebius	int error;
11487902Sluigi
115170289Sdwmalone	error = sysctl_handle_int(oidp, &val, 0, req);
116150866Sglebius	if (error || !req->newptr )
117150866Sglebius		return (error);
118150866Sglebius	if (val < MIN_POLL_BURST_MAX || val > MAX_POLL_BURST_MAX)
119150866Sglebius		return (EINVAL);
12087902Sluigi
121150866Sglebius	mtx_lock(&poll_mtx);
122150866Sglebius	poll_burst_max = val;
123150866Sglebius	if (poll_burst > poll_burst_max)
124150866Sglebius		poll_burst = poll_burst_max;
125150866Sglebius	if (poll_each_burst > poll_burst_max)
126150866Sglebius		poll_each_burst = MIN_POLL_BURST_MAX;
127150866Sglebius	mtx_unlock(&poll_mtx);
128150866Sglebius
129150866Sglebius	return (0);
130150866Sglebius}
131150866SglebiusSYSCTL_PROC(_kern_polling, OID_AUTO, burst_max, CTLTYPE_UINT | CTLFLAG_RW,
132150866Sglebius	0, sizeof(uint32_t), poll_burst_max_sysctl, "I", "Max Polling burst size");
133150866Sglebius
134150866Sglebiusstatic int poll_each_burst_sysctl(SYSCTL_HANDLER_ARGS)
135150866Sglebius{
136150866Sglebius	uint32_t val = poll_each_burst;
137150866Sglebius	int error;
138150866Sglebius
139170289Sdwmalone	error = sysctl_handle_int(oidp, &val, 0, req);
140150866Sglebius	if (error || !req->newptr )
141150866Sglebius		return (error);
142150866Sglebius	if (val < 1)
143150866Sglebius		return (EINVAL);
144150866Sglebius
145150866Sglebius	mtx_lock(&poll_mtx);
146150866Sglebius	if (val > poll_burst_max) {
147150866Sglebius		mtx_unlock(&poll_mtx);
148150866Sglebius		return (EINVAL);
149150866Sglebius	}
150150866Sglebius	poll_each_burst = val;
151150866Sglebius	mtx_unlock(&poll_mtx);
152150866Sglebius
153150866Sglebius	return (0);
154150866Sglebius}
155150866SglebiusSYSCTL_PROC(_kern_polling, OID_AUTO, each_burst, CTLTYPE_UINT | CTLFLAG_RW,
156150866Sglebius	0, sizeof(uint32_t), poll_each_burst_sysctl, "I",
157150866Sglebius	"Max size of each burst");
158150866Sglebius
159150866Sglebiusstatic uint32_t poll_in_idle_loop=0;	/* do we poll in idle loop ? */
16090550SluigiSYSCTL_UINT(_kern_polling, OID_AUTO, idle_poll, CTLFLAG_RW,
16190550Sluigi	&poll_in_idle_loop, 0, "Enable device polling in idle loop");
16290550Sluigi
163150866Sglebiusstatic uint32_t user_frac = 50;
164150866Sglebiusstatic int user_frac_sysctl(SYSCTL_HANDLER_ARGS)
165150866Sglebius{
166150866Sglebius	uint32_t val = user_frac;
167150866Sglebius	int error;
16887902Sluigi
169170289Sdwmalone	error = sysctl_handle_int(oidp, &val, 0, req);
170150866Sglebius	if (error || !req->newptr )
171150866Sglebius		return (error);
172254031Skevlo	if (val > 99)
173150866Sglebius		return (EINVAL);
17487902Sluigi
175150866Sglebius	mtx_lock(&poll_mtx);
176150866Sglebius	user_frac = val;
177150866Sglebius	mtx_unlock(&poll_mtx);
178150866Sglebius
179150866Sglebius	return (0);
180150866Sglebius}
181150866SglebiusSYSCTL_PROC(_kern_polling, OID_AUTO, user_frac, CTLTYPE_UINT | CTLFLAG_RW,
182150866Sglebius	0, sizeof(uint32_t), user_frac_sysctl, "I",
183150866Sglebius	"Desired user fraction of cpu time");
184150866Sglebius
185150866Sglebiusstatic uint32_t reg_frac_count = 0;
186150866Sglebiusstatic uint32_t reg_frac = 20 ;
187150866Sglebiusstatic int reg_frac_sysctl(SYSCTL_HANDLER_ARGS)
188150866Sglebius{
189150866Sglebius	uint32_t val = reg_frac;
190150866Sglebius	int error;
191150866Sglebius
192170289Sdwmalone	error = sysctl_handle_int(oidp, &val, 0, req);
193150866Sglebius	if (error || !req->newptr )
194150866Sglebius		return (error);
195150866Sglebius	if (val < 1 || val > hz)
196150866Sglebius		return (EINVAL);
197150866Sglebius
198150866Sglebius	mtx_lock(&poll_mtx);
199150866Sglebius	reg_frac = val;
200150866Sglebius	if (reg_frac_count >= reg_frac)
201150866Sglebius		reg_frac_count = 0;
202150866Sglebius	mtx_unlock(&poll_mtx);
203150866Sglebius
204150866Sglebius	return (0);
205150866Sglebius}
206150866SglebiusSYSCTL_PROC(_kern_polling, OID_AUTO, reg_frac, CTLTYPE_UINT | CTLFLAG_RW,
207150866Sglebius	0, sizeof(uint32_t), reg_frac_sysctl, "I",
208150866Sglebius	"Every this many cycles check registers");
209150866Sglebius
210150866Sglebiusstatic uint32_t short_ticks;
211150866SglebiusSYSCTL_UINT(_kern_polling, OID_AUTO, short_ticks, CTLFLAG_RD,
21287902Sluigi	&short_ticks, 0, "Hardclock ticks shorter than they should be");
21387902Sluigi
214150866Sglebiusstatic uint32_t lost_polls;
215150866SglebiusSYSCTL_UINT(_kern_polling, OID_AUTO, lost_polls, CTLFLAG_RD,
21687902Sluigi	&lost_polls, 0, "How many times we would have lost a poll tick");
21787902Sluigi
218150866Sglebiusstatic uint32_t pending_polls;
219150866SglebiusSYSCTL_UINT(_kern_polling, OID_AUTO, pending_polls, CTLFLAG_RD,
22090550Sluigi	&pending_polls, 0, "Do we need to poll again");
22190550Sluigi
22290550Sluigistatic int residual_burst = 0;
223150866SglebiusSYSCTL_INT(_kern_polling, OID_AUTO, residual_burst, CTLFLAG_RD,
22490550Sluigi	&residual_burst, 0, "# of residual cycles in burst");
22590550Sluigi
226150866Sglebiusstatic uint32_t poll_handlers; /* next free entry in pr[]. */
22790550SluigiSYSCTL_UINT(_kern_polling, OID_AUTO, handlers, CTLFLAG_RD,
22887902Sluigi	&poll_handlers, 0, "Number of registered poll handlers");
22987902Sluigi
230150866Sglebiusstatic uint32_t phase;
231150866SglebiusSYSCTL_UINT(_kern_polling, OID_AUTO, phase, CTLFLAG_RD,
23290550Sluigi	&phase, 0, "Polling phase");
23390550Sluigi
234150866Sglebiusstatic uint32_t suspect;
235150866SglebiusSYSCTL_UINT(_kern_polling, OID_AUTO, suspect, CTLFLAG_RD,
23690550Sluigi	&suspect, 0, "suspect event");
23790550Sluigi
238150866Sglebiusstatic uint32_t stalled;
239150866SglebiusSYSCTL_UINT(_kern_polling, OID_AUTO, stalled, CTLFLAG_RD,
24090550Sluigi	&stalled, 0, "potential stalls");
24190550Sluigi
242150866Sglebiusstatic uint32_t idlepoll_sleeping; /* idlepoll is sleeping */
24390550SluigiSYSCTL_UINT(_kern_polling, OID_AUTO, idlepoll_sleeping, CTLFLAG_RD,
24488156Sluigi	&idlepoll_sleeping, 0, "idlepoll is sleeping");
24588156Sluigi
24687902Sluigi
24787902Sluigi#define POLL_LIST_LEN  128
24887902Sluigistruct pollrec {
24987902Sluigi	poll_handler_t	*handler;
25087902Sluigi	struct ifnet	*ifp;
25187902Sluigi};
25287902Sluigi
25387902Sluigistatic struct pollrec pr[POLL_LIST_LEN];
254149798Sglebius
255111888Sjlemonstatic void
256193219Srwatsonpoll_shutdown(void *arg, int howto)
257193219Srwatson{
258193219Srwatson
259193219Srwatson	poll_shutting_down = 1;
260193219Srwatson}
261193219Srwatson
262193219Srwatsonstatic void
26390550Sluigiinit_device_poll(void)
26490550Sluigi{
265111888Sjlemon
266149798Sglebius	mtx_init(&poll_mtx, "polling", NULL, MTX_DEF);
267193219Srwatson	EVENTHANDLER_REGISTER(shutdown_post_sync, poll_shutdown, NULL,
268193219Srwatson	    SHUTDOWN_PRI_LAST);
26990550Sluigi}
270261276SbrooksSYSINIT(device_poll, SI_SUB_SOFTINTR, SI_ORDER_MIDDLE, init_device_poll, NULL);
27190550Sluigi
272111888Sjlemon
27390550Sluigi/*
27487902Sluigi * Hook from hardclock. Tries to schedule a netisr, but keeps track
27587902Sluigi * of lost ticks due to the previous handler taking too long.
27698946Sluigi * Normally, this should not happen, because polling handler should
27798946Sluigi * run for a short time. However, in some cases (e.g. when there are
27898946Sluigi * changes in link status etc.) the drivers take a very long time
27998946Sluigi * (even in the order of milliseconds) to reset and reconfigure the
28098946Sluigi * device, causing apparent lost polls.
28198946Sluigi *
28287902Sluigi * The first part of the code is just for debugging purposes, and tries
28387902Sluigi * to count how often hardclock ticks are shorter than they should,
28487902Sluigi * meaning either stray interrupts or delayed events.
28587902Sluigi */
28687902Sluigivoid
28787902Sluigihardclock_device_poll(void)
28887902Sluigi{
28987902Sluigi	static struct timeval prev_t, t;
29087902Sluigi	int delta;
29187902Sluigi
292193219Srwatson	if (poll_handlers == 0 || poll_shutting_down)
29390550Sluigi		return;
29490550Sluigi
29587902Sluigi	microuptime(&t);
29687902Sluigi	delta = (t.tv_usec - prev_t.tv_usec) +
29787902Sluigi		(t.tv_sec - prev_t.tv_sec)*1000000;
29887902Sluigi	if (delta * hz < 500000)
29987902Sluigi		short_ticks++;
30087902Sluigi	else
30187902Sluigi		prev_t = t;
30287902Sluigi
30390550Sluigi	if (pending_polls > 100) {
30498946Sluigi		/*
30598946Sluigi		 * Too much, assume it has stalled (not always true
30698946Sluigi		 * see comment above).
30798946Sluigi		 */
30890550Sluigi		stalled++;
30990550Sluigi		pending_polls = 0;
31090550Sluigi		phase = 0;
31187902Sluigi	}
31290550Sluigi
31390550Sluigi	if (phase <= 2) {
31490550Sluigi		if (phase != 0)
31590550Sluigi			suspect++;
31690550Sluigi		phase = 1;
317193219Srwatson		netisr_poll_scheduled = 1;
318193219Srwatson		netisr_pollmore_scheduled = 1;
319193219Srwatson		netisr_sched_poll();
32090550Sluigi		phase = 2;
32190550Sluigi	}
32290550Sluigi	if (pending_polls++ > 0)
32390550Sluigi		lost_polls++;
32487902Sluigi}
32587902Sluigi
32687902Sluigi/*
327150789Sglebius * ether_poll is called from the idle loop.
32887902Sluigi */
329192404Srwatsonstatic void
33087902Sluigiether_poll(int count)
33187902Sluigi{
33287902Sluigi	int i;
33387902Sluigi
334149798Sglebius	mtx_lock(&poll_mtx);
33587902Sluigi
33687902Sluigi	if (count > poll_each_burst)
33787902Sluigi		count = poll_each_burst;
338149798Sglebius
339150789Sglebius	for (i = 0 ; i < poll_handlers ; i++)
340150789Sglebius		pr[i].handler(pr[i].ifp, POLL_ONLY, count);
341150789Sglebius
342149798Sglebius	mtx_unlock(&poll_mtx);
34387902Sluigi}
34487902Sluigi
34587902Sluigi/*
34690550Sluigi * netisr_pollmore is called after other netisr's, possibly scheduling
34787902Sluigi * another NETISR_POLL call, or adapting the burst size for the next cycle.
34887902Sluigi *
34987902Sluigi * It is very bad to fetch large bursts of packets from a single card at once,
35087902Sluigi * because the burst could take a long time to be completely processed, or
35187902Sluigi * could saturate the intermediate queue (ipintrq or similar) leading to
35287902Sluigi * losses or unfairness. To reduce the problem, and also to account better for
35390550Sluigi * time spent in network-related processing, we split the burst in smaller
35487902Sluigi * chunks of fixed size, giving control to the other netisr's between chunks.
35587902Sluigi * This helps in improving the fairness, reducing livelock (because we
35687902Sluigi * emulate more closely the "process to completion" that we have with
35787902Sluigi * fastforwarding) and accounting for the work performed in low level
35887902Sluigi * handling and forwarding.
35987902Sluigi */
36087902Sluigi
36187902Sluigistatic struct timeval poll_start_t;
36287902Sluigi
36387902Sluigivoid
36490550Sluiginetisr_pollmore()
36587902Sluigi{
36687902Sluigi	struct timeval t;
36787902Sluigi	int kern_load;
36887902Sluigi
369149798Sglebius	mtx_lock(&poll_mtx);
370193219Srwatson	if (!netisr_pollmore_scheduled) {
371193219Srwatson		mtx_unlock(&poll_mtx);
372193219Srwatson		return;
373193219Srwatson	}
374193219Srwatson	netisr_pollmore_scheduled = 0;
37590550Sluigi	phase = 5;
37687902Sluigi	if (residual_burst > 0) {
377193219Srwatson		netisr_poll_scheduled = 1;
378193219Srwatson		netisr_pollmore_scheduled = 1;
379193219Srwatson		netisr_sched_poll();
380149798Sglebius		mtx_unlock(&poll_mtx);
38187902Sluigi		/* will run immediately on return, followed by netisrs */
382111888Sjlemon		return;
38387902Sluigi	}
38487902Sluigi	/* here we can account time spent in netisr's in this tick */
38587902Sluigi	microuptime(&t);
38687902Sluigi	kern_load = (t.tv_usec - poll_start_t.tv_usec) +
38787902Sluigi		(t.tv_sec - poll_start_t.tv_sec)*1000000;	/* us */
38887902Sluigi	kern_load = (kern_load * hz) / 10000;			/* 0..100 */
38987902Sluigi	if (kern_load > (100 - user_frac)) { /* try decrease ticks */
39087902Sluigi		if (poll_burst > 1)
39187902Sluigi			poll_burst--;
39287902Sluigi	} else {
39387902Sluigi		if (poll_burst < poll_burst_max)
39487902Sluigi			poll_burst++;
39587902Sluigi	}
39687902Sluigi
39790550Sluigi	pending_polls--;
39890550Sluigi	if (pending_polls == 0) /* we are done */
39990550Sluigi		phase = 0;
40090550Sluigi	else {
40187902Sluigi		/*
40287902Sluigi		 * Last cycle was long and caused us to miss one or more
40390550Sluigi		 * hardclock ticks. Restart processing again, but slightly
40487902Sluigi		 * reduce the burst size to prevent that this happens again.
40587902Sluigi		 */
40687902Sluigi		poll_burst -= (poll_burst / 8);
40787902Sluigi		if (poll_burst < 1)
40887902Sluigi			poll_burst = 1;
409193219Srwatson		netisr_poll_scheduled = 1;
410193219Srwatson		netisr_pollmore_scheduled = 1;
411193219Srwatson		netisr_sched_poll();
41290550Sluigi		phase = 6;
41390550Sluigi	}
414149798Sglebius	mtx_unlock(&poll_mtx);
41587902Sluigi}
41687902Sluigi
41787902Sluigi/*
418193219Srwatson * netisr_poll is typically scheduled once per tick.
41987902Sluigi */
420193219Srwatsonvoid
42190550Sluiginetisr_poll(void)
42287902Sluigi{
42387902Sluigi	int i, cycles;
42487902Sluigi	enum poll_cmd arg = POLL_ONLY;
42587902Sluigi
426149798Sglebius	mtx_lock(&poll_mtx);
427193219Srwatson	if (!netisr_poll_scheduled) {
428193219Srwatson		mtx_unlock(&poll_mtx);
429193219Srwatson		return;
430193219Srwatson	}
431193219Srwatson	netisr_poll_scheduled = 0;
43290550Sluigi	phase = 3;
43387902Sluigi	if (residual_burst == 0) { /* first call in this tick */
43487902Sluigi		microuptime(&poll_start_t);
435150866Sglebius		if (++reg_frac_count == reg_frac) {
43687902Sluigi			arg = POLL_AND_CHECK_STATUS;
437150866Sglebius			reg_frac_count = 0;
43887902Sluigi		}
43987902Sluigi
44087902Sluigi		residual_burst = poll_burst;
44187902Sluigi	}
44287902Sluigi	cycles = (residual_burst < poll_each_burst) ?
44387902Sluigi		residual_burst : poll_each_burst;
44487902Sluigi	residual_burst -= cycles;
44587902Sluigi
446150789Sglebius	for (i = 0 ; i < poll_handlers ; i++)
447150789Sglebius		pr[i].handler(pr[i].ifp, arg, cycles);
448149798Sglebius
44990550Sluigi	phase = 4;
450149798Sglebius	mtx_unlock(&poll_mtx);
45187902Sluigi}
45287902Sluigi
45387902Sluigi/*
454150789Sglebius * Try to register routine for polling. Returns 0 if successful
455150789Sglebius * (and polling should be enabled), error code otherwise.
45687902Sluigi * A device is not supposed to register itself multiple times.
45787902Sluigi *
458150789Sglebius * This is called from within the *_ioctl() functions.
45987902Sluigi */
46087902Sluigiint
46187902Sluigiether_poll_register(poll_handler_t *h, struct ifnet *ifp)
46287902Sluigi{
463149798Sglebius	int i;
46487902Sluigi
465150789Sglebius	KASSERT(h != NULL, ("%s: handler is NULL", __func__));
466150789Sglebius	KASSERT(ifp != NULL, ("%s: ifp is NULL", __func__));
467150789Sglebius
468149798Sglebius	mtx_lock(&poll_mtx);
46987902Sluigi	if (poll_handlers >= POLL_LIST_LEN) {
47087902Sluigi		/*
47187902Sluigi		 * List full, cannot register more entries.
47287902Sluigi		 * This should never happen; if it does, it is probably a
47387902Sluigi		 * broken driver trying to register multiple times. Checking
47487902Sluigi		 * this at runtime is expensive, and won't solve the problem
47587902Sluigi		 * anyways, so just report a few times and then give up.
47687902Sluigi		 */
47787902Sluigi		static int verbose = 10 ;
47887902Sluigi		if (verbose >0) {
479149798Sglebius			log(LOG_ERR, "poll handlers list full, "
480149798Sglebius			    "maybe a broken driver ?\n");
48187902Sluigi			verbose--;
48287902Sluigi		}
483149798Sglebius		mtx_unlock(&poll_mtx);
484150789Sglebius		return (ENOMEM); /* no polling for you */
48587902Sluigi	}
48687902Sluigi
487149798Sglebius	for (i = 0 ; i < poll_handlers ; i++)
488149798Sglebius		if (pr[i].ifp == ifp && pr[i].handler != NULL) {
489149798Sglebius			mtx_unlock(&poll_mtx);
490149798Sglebius			log(LOG_DEBUG, "ether_poll_register: %s: handler"
491149798Sglebius			    " already registered\n", ifp->if_xname);
492150789Sglebius			return (EEXIST);
493149798Sglebius		}
494149798Sglebius
49587902Sluigi	pr[poll_handlers].handler = h;
49687902Sluigi	pr[poll_handlers].ifp = ifp;
49787902Sluigi	poll_handlers++;
498149798Sglebius	mtx_unlock(&poll_mtx);
49988156Sluigi	if (idlepoll_sleeping)
50088156Sluigi		wakeup(&idlepoll_sleeping);
501150789Sglebius	return (0);
50287902Sluigi}
50387902Sluigi
50487902Sluigi/*
505150789Sglebius * Remove interface from the polling list. Called from *_ioctl(), too.
50687902Sluigi */
50787902Sluigiint
50887902Sluigiether_poll_deregister(struct ifnet *ifp)
50987902Sluigi{
51087902Sluigi	int i;
51187902Sluigi
512150789Sglebius	KASSERT(ifp != NULL, ("%s: ifp is NULL", __func__));
513150789Sglebius
514150789Sglebius	mtx_lock(&poll_mtx);
515149798Sglebius
51687902Sluigi	for (i = 0 ; i < poll_handlers ; i++)
51787902Sluigi		if (pr[i].ifp == ifp) /* found it */
51887902Sluigi			break;
51987902Sluigi	if (i == poll_handlers) {
520149798Sglebius		log(LOG_DEBUG, "ether_poll_deregister: %s: not found!\n",
521149798Sglebius		    ifp->if_xname);
522150789Sglebius		mtx_unlock(&poll_mtx);
523150789Sglebius		return (ENOENT);
52487902Sluigi	}
52587902Sluigi	poll_handlers--;
52687902Sluigi	if (i < poll_handlers) { /* Last entry replaces this one. */
52787902Sluigi		pr[i].handler = pr[poll_handlers].handler;
52887902Sluigi		pr[i].ifp = pr[poll_handlers].ifp;
52987902Sluigi	}
530149798Sglebius	mtx_unlock(&poll_mtx);
531150789Sglebius	return (0);
53287902Sluigi}
53388156Sluigi
53488156Sluigistatic void
53588156Sluigipoll_idle(void)
53688156Sluigi{
53788156Sluigi	struct thread *td = curthread;
53888156Sluigi	struct rtprio rtp;
53988156Sluigi
54088156Sluigi	rtp.prio = RTP_PRIO_MAX;	/* lowest priority */
54188156Sluigi	rtp.type = RTP_PRIO_IDLE;
542170307Sjeff	PROC_SLOCK(td->td_proc);
543163709Sjb	rtp_to_pri(&rtp, td);
544170307Sjeff	PROC_SUNLOCK(td->td_proc);
54588156Sluigi
54688156Sluigi	for (;;) {
54790550Sluigi		if (poll_in_idle_loop && poll_handlers > 0) {
54888156Sluigi			idlepoll_sleeping = 0;
54988156Sluigi			ether_poll(poll_each_burst);
550170307Sjeff			thread_lock(td);
551131516Sjhb			mi_switch(SW_VOL, NULL);
552170307Sjeff			thread_unlock(td);
55388156Sluigi		} else {
55488156Sluigi			idlepoll_sleeping = 1;
555157815Sjhb			tsleep(&idlepoll_sleeping, 0, "pollid", hz * 3);
55688156Sluigi		}
55788156Sluigi	}
55888156Sluigi}
55988156Sluigi
56088156Sluigistatic struct proc *idlepoll;
56188156Sluigistatic struct kproc_desc idlepoll_kp = {
56288156Sluigi	 "idlepoll",
56388156Sluigi	 poll_idle,
56488156Sluigi	 &idlepoll
56588156Sluigi};
566177253SrwatsonSYSINIT(idlepoll, SI_SUB_KTHREAD_VM, SI_ORDER_ANY, kproc_start,
567177253Srwatson    &idlepoll_kp);
568