1/*	$OpenBSD: kqueue.c,v 1.5 2002/07/10 14:41:31 art Exp $	*/
2
3/*
4 * Copyright 2000-2002 Niels Provos <provos@citi.umich.edu>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 *    derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29#ifdef HAVE_CONFIG_H
30#include "config.h"
31#endif
32
33#include <sys/types.h>
34#ifdef HAVE_SYS_TIME_H
35#include <sys/time.h>
36#else
37#include <sys/_time.h>
38#endif
39#include <sys/queue.h>
40#include <sys/event.h>
41#include <signal.h>
42#include <stdio.h>
43#include <stdlib.h>
44#include <string.h>
45#include <unistd.h>
46#include <errno.h>
47#ifdef HAVE_INTTYPES_H
48#include <inttypes.h>
49#endif
50
51#if defined(HAVE_INTTYPES_H) && !defined(__OpenBSD__) && !defined(__FreeBSD__)
52#define INTPTR(x)	(intptr_t)x
53#else
54#define INTPTR(x)	x
55#endif
56
57#include "event.h"
58#include "log.h"
59
60#define EVLIST_X_KQINKERNEL	0x1000
61
62#define NEVENT		64
63
64struct kqop {
65	struct kevent *changes;
66	int nchanges;
67	struct kevent *events;
68	int nevents;
69	int kq;
70};
71
72void *kq_init	(void);
73int kq_add	(void *, struct event *);
74int kq_del	(void *, struct event *);
75int kq_recalc	(struct event_base *, void *, int);
76int kq_dispatch	(struct event_base *, void *, struct timeval *);
77int kq_insert	(struct kqop *, struct kevent *);
78void kq_dealloc (void *);
79
80const struct eventop kqops = {
81	"kqueue",
82	kq_init,
83	kq_add,
84	kq_del,
85	kq_recalc,
86	kq_dispatch,
87	kq_dealloc
88};
89
90void *
91kq_init(void)
92{
93	int kq;
94	struct kqop *kqueueop;
95
96	/* Disable kqueue when this environment variable is set */
97	if (getenv("EVENT_NOKQUEUE"))
98		return (NULL);
99
100	if (!(kqueueop = calloc(1, sizeof(struct kqop))))
101		return (NULL);
102
103	/* Initalize the kernel queue */
104
105	if ((kq = kqueue()) == -1) {
106		event_warn("kqueue");
107		free (kqueueop);
108		return (NULL);
109	}
110
111	kqueueop->kq = kq;
112
113	/* Initalize fields */
114	kqueueop->changes = malloc(NEVENT * sizeof(struct kevent));
115	if (kqueueop->changes == NULL) {
116		free (kqueueop);
117		return (NULL);
118	}
119	kqueueop->events = malloc(NEVENT * sizeof(struct kevent));
120	if (kqueueop->events == NULL) {
121		free (kqueueop->changes);
122		free (kqueueop);
123		return (NULL);
124	}
125	kqueueop->nevents = NEVENT;
126
127	/* Check for Mac OS X kqueue bug. */
128	kqueueop->changes[0].ident = -1;
129	kqueueop->changes[0].filter = EVFILT_READ;
130	kqueueop->changes[0].flags = EV_ADD;
131	/*
132	 * If kqueue works, then kevent will succeed, and it will
133	 * stick an error in events[0].  If kqueue is broken, then
134	 * kevent will fail.
135	 */
136	if (kevent(kq,
137		kqueueop->changes, 1, kqueueop->events, NEVENT, NULL) != 1 ||
138	    kqueueop->events[0].ident != -1 ||
139	    kqueueop->events[0].flags != EV_ERROR) {
140		event_warn("%s: detected broken kqueue; not using.", __func__);
141		free(kqueueop->changes);
142		free(kqueueop->events);
143		free(kqueueop);
144		close(kq);
145		return (NULL);
146	}
147
148	return (kqueueop);
149}
150
151int
152kq_recalc(struct event_base *base, void *arg, int max)
153{
154	return (0);
155}
156
157int
158kq_insert(struct kqop *kqop, struct kevent *kev)
159{
160	int nevents = kqop->nevents;
161
162	if (kqop->nchanges == nevents) {
163		struct kevent *newchange;
164		struct kevent *newresult;
165
166		nevents *= 2;
167
168		newchange = realloc(kqop->changes,
169				    nevents * sizeof(struct kevent));
170		if (newchange == NULL) {
171			event_warn("%s: malloc", __func__);
172			return (-1);
173		}
174		kqop->changes = newchange;
175
176		newresult = realloc(kqop->events,
177				    nevents * sizeof(struct kevent));
178
179		/*
180		 * If we fail, we don't have to worry about freeing,
181		 * the next realloc will pick it up.
182		 */
183		if (newresult == NULL) {
184			event_warn("%s: malloc", __func__);
185			return (-1);
186		}
187		kqop->events = newresult;
188
189		kqop->nevents = nevents;
190	}
191
192	memcpy(&kqop->changes[kqop->nchanges++], kev, sizeof(struct kevent));
193
194	event_debug(("%s: fd %d %s%s",
195		 __func__, kev->ident,
196		 kev->filter == EVFILT_READ ? "EVFILT_READ" : "EVFILT_WRITE",
197		 kev->flags == EV_DELETE ? " (del)" : ""));
198
199	return (0);
200}
201
202static void
203kq_sighandler(int sig)
204{
205	/* Do nothing here */
206}
207
208int
209kq_dispatch(struct event_base *base, void *arg, struct timeval *tv)
210{
211	struct kqop *kqop = arg;
212	struct kevent *changes = kqop->changes;
213	struct kevent *events = kqop->events;
214	struct event *ev;
215	struct timespec ts;
216	int i, res;
217
218	TIMEVAL_TO_TIMESPEC(tv, &ts);
219
220	res = kevent(kqop->kq, changes, kqop->nchanges,
221	    events, kqop->nevents, &ts);
222	kqop->nchanges = 0;
223	if (res == -1) {
224		if (errno != EINTR) {
225                        event_warn("kevent");
226			return (-1);
227		}
228
229		return (0);
230	}
231
232	event_debug(("%s: kevent reports %d", __func__, res));
233
234	for (i = 0; i < res; i++) {
235		int which = 0;
236
237		if (events[i].flags & EV_ERROR) {
238			/*
239			 * Error messages that can happen, when a delete fails.
240			 *   EBADF happens when the file discriptor has been
241			 *   closed,
242			 *   ENOENT when the file discriptor was closed and
243			 *   then reopened.
244			 *   EINVAL for some reasons not understood; EINVAL
245			 *   should not be returned ever; but FreeBSD does :-\
246			 * An error is also indicated when a callback deletes
247			 * an event we are still processing.  In that case
248			 * the data field is set to ENOENT.
249			 */
250			if (events[i].data == EBADF ||
251			    events[i].data == EINVAL ||
252			    events[i].data == ENOENT)
253				continue;
254			errno = events[i].data;
255			return (-1);
256		}
257
258		ev = (struct event *)events[i].udata;
259
260		if (events[i].filter == EVFILT_READ) {
261			which |= EV_READ;
262		} else if (events[i].filter == EVFILT_WRITE) {
263			which |= EV_WRITE;
264		} else if (events[i].filter == EVFILT_SIGNAL) {
265			which |= EV_SIGNAL;
266		}
267
268		if (!which)
269			continue;
270
271		if (!(ev->ev_events & EV_PERSIST))
272			event_del(ev);
273
274		event_active(ev, which,
275		    ev->ev_events & EV_SIGNAL ? events[i].data : 1);
276	}
277
278	return (0);
279}
280
281
282int
283kq_add(void *arg, struct event *ev)
284{
285	struct kqop *kqop = arg;
286	struct kevent kev;
287
288	if (ev->ev_events & EV_SIGNAL) {
289		int nsignal = EVENT_SIGNAL(ev);
290
291 		memset(&kev, 0, sizeof(kev));
292		kev.ident = nsignal;
293		kev.filter = EVFILT_SIGNAL;
294		kev.flags = EV_ADD;
295		if (!(ev->ev_events & EV_PERSIST))
296			kev.flags |= EV_ONESHOT;
297		kev.udata = INTPTR(ev);
298
299		if (kq_insert(kqop, &kev) == -1)
300			return (-1);
301
302		if (signal(nsignal, kq_sighandler) == SIG_ERR)
303			return (-1);
304
305		ev->ev_flags |= EVLIST_X_KQINKERNEL;
306		return (0);
307	}
308
309	if (ev->ev_events & EV_READ) {
310 		memset(&kev, 0, sizeof(kev));
311		kev.ident = ev->ev_fd;
312		kev.filter = EVFILT_READ;
313#ifdef NOTE_EOF
314		/* Make it behave like select() and poll() */
315		kev.fflags = NOTE_EOF;
316#endif
317		kev.flags = EV_ADD;
318		if (!(ev->ev_events & EV_PERSIST))
319			kev.flags |= EV_ONESHOT;
320		kev.udata = INTPTR(ev);
321
322		if (kq_insert(kqop, &kev) == -1)
323			return (-1);
324
325		ev->ev_flags |= EVLIST_X_KQINKERNEL;
326	}
327
328	if (ev->ev_events & EV_WRITE) {
329 		memset(&kev, 0, sizeof(kev));
330		kev.ident = ev->ev_fd;
331		kev.filter = EVFILT_WRITE;
332		kev.flags = EV_ADD;
333		if (!(ev->ev_events & EV_PERSIST))
334			kev.flags |= EV_ONESHOT;
335		kev.udata = INTPTR(ev);
336
337		if (kq_insert(kqop, &kev) == -1)
338			return (-1);
339
340		ev->ev_flags |= EVLIST_X_KQINKERNEL;
341	}
342
343	return (0);
344}
345
346int
347kq_del(void *arg, struct event *ev)
348{
349	struct kqop *kqop = arg;
350	struct kevent kev;
351
352	if (!(ev->ev_flags & EVLIST_X_KQINKERNEL))
353		return (0);
354
355	if (ev->ev_events & EV_SIGNAL) {
356		int nsignal = EVENT_SIGNAL(ev);
357
358 		memset(&kev, 0, sizeof(kev));
359		kev.ident = nsignal;
360		kev.filter = EVFILT_SIGNAL;
361		kev.flags = EV_DELETE;
362
363		if (kq_insert(kqop, &kev) == -1)
364			return (-1);
365
366		if (signal(nsignal, SIG_DFL) == SIG_ERR)
367			return (-1);
368
369		ev->ev_flags &= ~EVLIST_X_KQINKERNEL;
370		return (0);
371	}
372
373	if (ev->ev_events & EV_READ) {
374 		memset(&kev, 0, sizeof(kev));
375		kev.ident = ev->ev_fd;
376		kev.filter = EVFILT_READ;
377		kev.flags = EV_DELETE;
378
379		if (kq_insert(kqop, &kev) == -1)
380			return (-1);
381
382		ev->ev_flags &= ~EVLIST_X_KQINKERNEL;
383	}
384
385	if (ev->ev_events & EV_WRITE) {
386 		memset(&kev, 0, sizeof(kev));
387		kev.ident = ev->ev_fd;
388		kev.filter = EVFILT_WRITE;
389		kev.flags = EV_DELETE;
390
391		if (kq_insert(kqop, &kev) == -1)
392			return (-1);
393
394		ev->ev_flags &= ~EVLIST_X_KQINKERNEL;
395	}
396
397	return (0);
398}
399
400void
401kq_dealloc(void *arg)
402{
403	struct kqop *kqop = arg;
404
405	if (kqop->changes)
406		free(kqop->changes);
407	if (kqop->events)
408		free(kqop->events);
409	if (kqop->kq)
410		close(kqop->kq);
411	memset(kqop, 0, sizeof(struct kqop));
412	free(kqop);
413}
414