kqueue.c revision 289166
1/* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements.  See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License.  You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "apr.h"
18#include "apr_poll.h"
19#include "apr_time.h"
20#include "apr_portable.h"
21#include "apr_arch_file_io.h"
22#include "apr_arch_networkio.h"
23#include "apr_arch_poll_private.h"
24#include "apr_arch_inherit.h"
25
26#ifdef HAVE_KQUEUE
27
28static apr_int16_t get_kqueue_revent(apr_int16_t event, apr_int16_t flags)
29{
30    apr_int16_t rv = 0;
31
32    if (event == EVFILT_READ)
33        rv |= APR_POLLIN;
34    else if (event == EVFILT_WRITE)
35        rv |= APR_POLLOUT;
36    if (flags & EV_EOF)
37        rv |= APR_POLLHUP;
38    /* APR_POLLPRI, APR_POLLERR, and APR_POLLNVAL are not handled by this
39     * implementation.
40     * TODO: See if EV_ERROR + certain system errors in the returned data field
41     * should map to APR_POLLNVAL.
42     */
43    return rv;
44}
45
46struct apr_pollset_private_t
47{
48    int kqueue_fd;
49    struct kevent kevent;
50    apr_uint32_t setsize;
51    struct kevent *ke_set;
52    apr_pollfd_t *result_set;
53#if APR_HAS_THREADS
54    /* A thread mutex to protect operations on the rings */
55    apr_thread_mutex_t *ring_lock;
56#endif
57    /* A ring containing all of the pollfd_t that are active */
58    APR_RING_HEAD(pfd_query_ring_t, pfd_elem_t) query_ring;
59    /* A ring of pollfd_t that have been used, and then _remove'd */
60    APR_RING_HEAD(pfd_free_ring_t, pfd_elem_t) free_ring;
61    /* A ring of pollfd_t where rings that have been _remove'd but
62       might still be inside a _poll */
63    APR_RING_HEAD(pfd_dead_ring_t, pfd_elem_t) dead_ring;
64};
65
66static apr_status_t impl_pollset_cleanup(apr_pollset_t *pollset)
67{
68    close(pollset->p->kqueue_fd);
69    return APR_SUCCESS;
70}
71
72static apr_status_t impl_pollset_create(apr_pollset_t *pollset,
73                                        apr_uint32_t size,
74                                        apr_pool_t *p,
75                                        apr_uint32_t flags)
76{
77    apr_status_t rv;
78    pollset->p = apr_palloc(p, sizeof(apr_pollset_private_t));
79#if APR_HAS_THREADS
80    if (flags & APR_POLLSET_THREADSAFE &&
81        ((rv = apr_thread_mutex_create(&pollset->p->ring_lock,
82                                       APR_THREAD_MUTEX_DEFAULT,
83                                       p)) != APR_SUCCESS)) {
84        pollset->p = NULL;
85        return rv;
86    }
87#else
88    if (flags & APR_POLLSET_THREADSAFE) {
89        pollset->p = NULL;
90        return APR_ENOTIMPL;
91    }
92#endif
93
94    /* POLLIN and POLLOUT are represented in different returned
95     * events, so we need 2 entries per descriptor in the result set,
96     * both for what is returned by kevent() and what is returned to
97     * the caller of apr_pollset_poll() (since it doesn't spend the
98     * CPU to coalesce separate APR_POLLIN and APR_POLLOUT events
99     * for the same descriptor)
100     */
101    pollset->p->setsize = 2 * size;
102
103    pollset->p->ke_set =
104        (struct kevent *) apr_palloc(p, pollset->p->setsize * sizeof(struct kevent));
105
106    memset(pollset->p->ke_set, 0, pollset->p->setsize * sizeof(struct kevent));
107
108    pollset->p->kqueue_fd = kqueue();
109
110    if (pollset->p->kqueue_fd == -1) {
111        pollset->p = NULL;
112        return apr_get_netos_error();
113    }
114
115    {
116        int flags;
117
118        if ((flags = fcntl(pollset->p->kqueue_fd, F_GETFD)) == -1) {
119            rv = errno;
120            close(pollset->p->kqueue_fd);
121            pollset->p = NULL;
122            return rv;
123        }
124
125        flags |= FD_CLOEXEC;
126        if (fcntl(pollset->p->kqueue_fd, F_SETFD, flags) == -1) {
127            rv = errno;
128            close(pollset->p->kqueue_fd);
129            pollset->p = NULL;
130            return rv;
131        }
132    }
133
134    pollset->p->result_set = apr_palloc(p, pollset->p->setsize * sizeof(apr_pollfd_t));
135
136    APR_RING_INIT(&pollset->p->query_ring, pfd_elem_t, link);
137    APR_RING_INIT(&pollset->p->free_ring, pfd_elem_t, link);
138    APR_RING_INIT(&pollset->p->dead_ring, pfd_elem_t, link);
139
140    return APR_SUCCESS;
141}
142
143static apr_status_t impl_pollset_add(apr_pollset_t *pollset,
144                                     const apr_pollfd_t *descriptor)
145{
146    apr_os_sock_t fd;
147    pfd_elem_t *elem;
148    apr_status_t rv = APR_SUCCESS;
149
150    pollset_lock_rings();
151
152    if (!APR_RING_EMPTY(&(pollset->p->free_ring), pfd_elem_t, link)) {
153        elem = APR_RING_FIRST(&(pollset->p->free_ring));
154        APR_RING_REMOVE(elem, link);
155    }
156    else {
157        elem = (pfd_elem_t *) apr_palloc(pollset->pool, sizeof(pfd_elem_t));
158        APR_RING_ELEM_INIT(elem, link);
159    }
160    elem->pfd = *descriptor;
161
162    if (descriptor->desc_type == APR_POLL_SOCKET) {
163        fd = descriptor->desc.s->socketdes;
164    }
165    else {
166        fd = descriptor->desc.f->filedes;
167    }
168
169    if (descriptor->reqevents & APR_POLLIN) {
170        EV_SET(&pollset->p->kevent, fd, EVFILT_READ, EV_ADD, 0, 0, elem);
171
172        if (kevent(pollset->p->kqueue_fd, &pollset->p->kevent, 1, NULL, 0,
173                   NULL) == -1) {
174            rv = apr_get_netos_error();
175        }
176    }
177
178    if (descriptor->reqevents & APR_POLLOUT && rv == APR_SUCCESS) {
179        EV_SET(&pollset->p->kevent, fd, EVFILT_WRITE, EV_ADD, 0, 0, elem);
180
181        if (kevent(pollset->p->kqueue_fd, &pollset->p->kevent, 1, NULL, 0,
182                   NULL) == -1) {
183            rv = apr_get_netos_error();
184        }
185    }
186
187    if (rv == APR_SUCCESS) {
188        APR_RING_INSERT_TAIL(&(pollset->p->query_ring), elem, pfd_elem_t, link);
189    }
190    else {
191        APR_RING_INSERT_TAIL(&(pollset->p->free_ring), elem, pfd_elem_t, link);
192    }
193
194    pollset_unlock_rings();
195
196    return rv;
197}
198
199static apr_status_t impl_pollset_remove(apr_pollset_t *pollset,
200                                        const apr_pollfd_t *descriptor)
201{
202    pfd_elem_t *ep;
203    apr_status_t rv;
204    apr_os_sock_t fd;
205
206    pollset_lock_rings();
207
208    if (descriptor->desc_type == APR_POLL_SOCKET) {
209        fd = descriptor->desc.s->socketdes;
210    }
211    else {
212        fd = descriptor->desc.f->filedes;
213    }
214
215    rv = APR_NOTFOUND; /* unless at least one of the specified conditions is */
216    if (descriptor->reqevents & APR_POLLIN) {
217        EV_SET(&pollset->p->kevent, fd, EVFILT_READ, EV_DELETE, 0, 0, NULL);
218
219        if (kevent(pollset->p->kqueue_fd, &pollset->p->kevent, 1, NULL, 0,
220                   NULL) != -1) {
221            rv = APR_SUCCESS;
222        }
223    }
224
225    if (descriptor->reqevents & APR_POLLOUT) {
226        EV_SET(&pollset->p->kevent, fd, EVFILT_WRITE, EV_DELETE, 0, 0, NULL);
227
228        if (kevent(pollset->p->kqueue_fd, &pollset->p->kevent, 1, NULL, 0,
229                   NULL) != -1) {
230            rv = APR_SUCCESS;
231        }
232    }
233
234    for (ep = APR_RING_FIRST(&(pollset->p->query_ring));
235         ep != APR_RING_SENTINEL(&(pollset->p->query_ring),
236                                 pfd_elem_t, link);
237         ep = APR_RING_NEXT(ep, link)) {
238
239        if (descriptor->desc.s == ep->pfd.desc.s) {
240            APR_RING_REMOVE(ep, link);
241            APR_RING_INSERT_TAIL(&(pollset->p->dead_ring),
242                                 ep, pfd_elem_t, link);
243            break;
244        }
245    }
246
247    pollset_unlock_rings();
248
249    return rv;
250}
251
252static apr_status_t impl_pollset_poll(apr_pollset_t *pollset,
253                                      apr_interval_time_t timeout,
254                                      apr_int32_t *num,
255                                      const apr_pollfd_t **descriptors)
256{
257    int ret, i, j;
258    struct timespec tv, *tvptr;
259    apr_status_t rv = APR_SUCCESS;
260    apr_pollfd_t fd;
261
262    if (timeout < 0) {
263        tvptr = NULL;
264    }
265    else {
266        tv.tv_sec = (long) apr_time_sec(timeout);
267        tv.tv_nsec = (long) apr_time_usec(timeout) * 1000;
268        tvptr = &tv;
269    }
270
271    ret = kevent(pollset->p->kqueue_fd, NULL, 0, pollset->p->ke_set,
272                 pollset->p->setsize, tvptr);
273    (*num) = ret;
274    if (ret < 0) {
275        rv = apr_get_netos_error();
276    }
277    else if (ret == 0) {
278        rv = APR_TIMEUP;
279    }
280    else {
281        for (i = 0, j = 0; i < ret; i++) {
282            fd = (((pfd_elem_t*)(pollset->p->ke_set[i].udata))->pfd);
283            if ((pollset->flags & APR_POLLSET_WAKEABLE) &&
284                fd.desc_type == APR_POLL_FILE &&
285                fd.desc.f == pollset->wakeup_pipe[0]) {
286                apr_pollset_drain_wakeup_pipe(pollset);
287                rv = APR_EINTR;
288            }
289            else {
290                pollset->p->result_set[j] = fd;
291                pollset->p->result_set[j].rtnevents =
292                        get_kqueue_revent(pollset->p->ke_set[i].filter,
293                                          pollset->p->ke_set[i].flags);
294                j++;
295            }
296        }
297        if ((*num = j)) { /* any event besides wakeup pipe? */
298            rv = APR_SUCCESS;
299            if (descriptors) {
300                *descriptors = pollset->p->result_set;
301            }
302        }
303    }
304
305
306    pollset_lock_rings();
307
308    /* Shift all PFDs in the Dead Ring to the Free Ring */
309    APR_RING_CONCAT(&(pollset->p->free_ring), &(pollset->p->dead_ring),
310                    pfd_elem_t, link);
311
312    pollset_unlock_rings();
313
314    return rv;
315}
316
317static apr_pollset_provider_t impl = {
318    impl_pollset_create,
319    impl_pollset_add,
320    impl_pollset_remove,
321    impl_pollset_poll,
322    impl_pollset_cleanup,
323    "kqueue"
324};
325
326apr_pollset_provider_t *apr_pollset_provider_kqueue = &impl;
327
328static apr_status_t cb_cleanup(void *b_)
329{
330    apr_pollcb_t *pollcb = (apr_pollcb_t *) b_;
331    close(pollcb->fd);
332    return APR_SUCCESS;
333}
334
335static apr_status_t impl_pollcb_create(apr_pollcb_t *pollcb,
336                                       apr_uint32_t size,
337                                       apr_pool_t *p,
338                                       apr_uint32_t flags)
339{
340    int fd;
341
342    fd = kqueue();
343    if (fd < 0) {
344        return apr_get_netos_error();
345    }
346
347    {
348        int flags;
349        apr_status_t rv;
350
351        if ((flags = fcntl(fd, F_GETFD)) == -1) {
352            rv = errno;
353            close(fd);
354            pollcb->fd = -1;
355            return rv;
356        }
357
358        flags |= FD_CLOEXEC;
359        if (fcntl(fd, F_SETFD, flags) == -1) {
360            rv = errno;
361            close(fd);
362            pollcb->fd = -1;
363            return rv;
364        }
365    }
366
367    pollcb->fd = fd;
368    pollcb->pollset.ke = (struct kevent *)apr_pcalloc(p, 2 * size * sizeof(struct kevent));
369    apr_pool_cleanup_register(p, pollcb, cb_cleanup, apr_pool_cleanup_null);
370
371    return APR_SUCCESS;
372}
373
374static apr_status_t impl_pollcb_add(apr_pollcb_t *pollcb,
375                                    apr_pollfd_t *descriptor)
376{
377    apr_os_sock_t fd;
378    struct kevent ev;
379    apr_status_t rv = APR_SUCCESS;
380
381    if (descriptor->desc_type == APR_POLL_SOCKET) {
382        fd = descriptor->desc.s->socketdes;
383    }
384    else {
385        fd = descriptor->desc.f->filedes;
386    }
387
388    if (descriptor->reqevents & APR_POLLIN) {
389        EV_SET(&ev, fd, EVFILT_READ, EV_ADD, 0, 0, descriptor);
390
391        if (kevent(pollcb->fd, &ev, 1, NULL, 0, NULL) == -1) {
392            rv = apr_get_netos_error();
393        }
394    }
395
396    if (descriptor->reqevents & APR_POLLOUT && rv == APR_SUCCESS) {
397        EV_SET(&ev, fd, EVFILT_WRITE, EV_ADD, 0, 0, descriptor);
398
399        if (kevent(pollcb->fd, &ev, 1, NULL, 0, NULL) == -1) {
400            rv = apr_get_netos_error();
401        }
402    }
403
404    return rv;
405}
406
407static apr_status_t impl_pollcb_remove(apr_pollcb_t *pollcb,
408                                       apr_pollfd_t *descriptor)
409{
410    apr_status_t rv;
411    struct kevent ev;
412    apr_os_sock_t fd;
413
414    if (descriptor->desc_type == APR_POLL_SOCKET) {
415        fd = descriptor->desc.s->socketdes;
416    }
417    else {
418        fd = descriptor->desc.f->filedes;
419    }
420
421    rv = APR_NOTFOUND; /* unless at least one of the specified conditions is */
422    if (descriptor->reqevents & APR_POLLIN) {
423        EV_SET(&ev, fd, EVFILT_READ, EV_DELETE, 0, 0, NULL);
424
425        if (kevent(pollcb->fd, &ev, 1, NULL, 0, NULL) != -1) {
426            rv = APR_SUCCESS;
427        }
428    }
429
430    if (descriptor->reqevents & APR_POLLOUT) {
431        EV_SET(&ev, fd, EVFILT_WRITE, EV_DELETE, 0, 0, NULL);
432
433        if (kevent(pollcb->fd, &ev, 1, NULL, 0, NULL) != -1) {
434            rv = APR_SUCCESS;
435        }
436    }
437
438    return rv;
439}
440
441
442static apr_status_t impl_pollcb_poll(apr_pollcb_t *pollcb,
443                                     apr_interval_time_t timeout,
444                                     apr_pollcb_cb_t func,
445                                     void *baton)
446{
447    int ret, i;
448    struct timespec tv, *tvptr;
449    apr_status_t rv = APR_SUCCESS;
450
451    if (timeout < 0) {
452        tvptr = NULL;
453    }
454    else {
455        tv.tv_sec = (long) apr_time_sec(timeout);
456        tv.tv_nsec = (long) apr_time_usec(timeout) * 1000;
457        tvptr = &tv;
458    }
459
460    ret = kevent(pollcb->fd, NULL, 0, pollcb->pollset.ke, 2 * pollcb->nalloc,
461                 tvptr);
462
463    if (ret < 0) {
464        rv = apr_get_netos_error();
465    }
466    else if (ret == 0) {
467        rv = APR_TIMEUP;
468    }
469    else {
470        for (i = 0; i < ret; i++) {
471            apr_pollfd_t *pollfd = (apr_pollfd_t *)(pollcb->pollset.ke[i].udata);
472
473            pollfd->rtnevents = get_kqueue_revent(pollcb->pollset.ke[i].filter,
474                                                  pollcb->pollset.ke[i].flags);
475
476            rv = func(baton, pollfd);
477
478            if (rv) {
479                return rv;
480            }
481        }
482    }
483
484    return rv;
485}
486
487static apr_pollcb_provider_t impl_cb = {
488    impl_pollcb_create,
489    impl_pollcb_add,
490    impl_pollcb_remove,
491    impl_pollcb_poll,
492    "kqueue"
493};
494
495apr_pollcb_provider_t *apr_pollcb_provider_kqueue = &impl_cb;
496
497#endif /* HAVE_KQUEUE */
498