1/* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements.  See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License.  You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "apr.h"
18#include "apr_poll.h"
19#include "apr_time.h"
20#include "apr_portable.h"
21#include "apr_arch_file_io.h"
22#include "apr_arch_networkio.h"
23#include "apr_arch_poll_private.h"
24#include "apr_arch_inherit.h"
25
26#if defined(HAVE_EPOLL)
27
28static apr_int16_t get_epoll_event(apr_int16_t event)
29{
30    apr_int16_t rv = 0;
31
32    if (event & APR_POLLIN)
33        rv |= EPOLLIN;
34    if (event & APR_POLLPRI)
35        rv |= EPOLLPRI;
36    if (event & APR_POLLOUT)
37        rv |= EPOLLOUT;
38    /* APR_POLLNVAL is not handled by epoll.  EPOLLERR and EPOLLHUP are return-only */
39
40    return rv;
41}
42
43static apr_int16_t get_epoll_revent(apr_int16_t event)
44{
45    apr_int16_t rv = 0;
46
47    if (event & EPOLLIN)
48        rv |= APR_POLLIN;
49    if (event & EPOLLPRI)
50        rv |= APR_POLLPRI;
51    if (event & EPOLLOUT)
52        rv |= APR_POLLOUT;
53    if (event & EPOLLERR)
54        rv |= APR_POLLERR;
55    if (event & EPOLLHUP)
56        rv |= APR_POLLHUP;
57    /* APR_POLLNVAL is not handled by epoll. */
58
59    return rv;
60}
61
62struct apr_pollset_private_t
63{
64    int epoll_fd;
65    struct epoll_event *pollset;
66    apr_pollfd_t *result_set;
67#if APR_HAS_THREADS
68    /* A thread mutex to protect operations on the rings */
69    apr_thread_mutex_t *ring_lock;
70#endif
71    /* A ring containing all of the pollfd_t that are active */
72    APR_RING_HEAD(pfd_query_ring_t, pfd_elem_t) query_ring;
73    /* A ring of pollfd_t that have been used, and then _remove()'d */
74    APR_RING_HEAD(pfd_free_ring_t, pfd_elem_t) free_ring;
75    /* A ring of pollfd_t where rings that have been _remove()`ed but
76        might still be inside a _poll() */
77    APR_RING_HEAD(pfd_dead_ring_t, pfd_elem_t) dead_ring;
78};
79
80static apr_status_t impl_pollset_cleanup(apr_pollset_t *pollset)
81{
82    close(pollset->p->epoll_fd);
83    return APR_SUCCESS;
84}
85
86
87static apr_status_t impl_pollset_create(apr_pollset_t *pollset,
88                                        apr_uint32_t size,
89                                        apr_pool_t *p,
90                                        apr_uint32_t flags)
91{
92    apr_status_t rv;
93    int fd;
94
95#ifdef HAVE_EPOLL_CREATE1
96    fd = epoll_create1(EPOLL_CLOEXEC);
97#else
98    fd = epoll_create(size);
99#endif
100    if (fd < 0) {
101        pollset->p = NULL;
102        return apr_get_netos_error();
103    }
104
105#ifndef HAVE_EPOLL_CREATE1
106    {
107        int flags;
108
109        if ((flags = fcntl(fd, F_GETFD)) == -1)
110            return errno;
111
112        flags |= FD_CLOEXEC;
113        if (fcntl(fd, F_SETFD, flags) == -1)
114            return errno;
115    }
116#endif
117
118    pollset->p = apr_palloc(p, sizeof(apr_pollset_private_t));
119#if APR_HAS_THREADS
120    if ((flags & APR_POLLSET_THREADSAFE) &&
121        !(flags & APR_POLLSET_NOCOPY) &&
122        ((rv = apr_thread_mutex_create(&pollset->p->ring_lock,
123                                       APR_THREAD_MUTEX_DEFAULT,
124                                       p)) != APR_SUCCESS)) {
125        pollset->p = NULL;
126        return rv;
127    }
128#else
129    if (flags & APR_POLLSET_THREADSAFE) {
130        pollset->p = NULL;
131        return APR_ENOTIMPL;
132    }
133#endif
134    pollset->p->epoll_fd = fd;
135    pollset->p->pollset = apr_palloc(p, size * sizeof(struct epoll_event));
136    pollset->p->result_set = apr_palloc(p, size * sizeof(apr_pollfd_t));
137
138    if (!(flags & APR_POLLSET_NOCOPY)) {
139        APR_RING_INIT(&pollset->p->query_ring, pfd_elem_t, link);
140        APR_RING_INIT(&pollset->p->free_ring, pfd_elem_t, link);
141        APR_RING_INIT(&pollset->p->dead_ring, pfd_elem_t, link);
142    }
143    return APR_SUCCESS;
144}
145
146static apr_status_t impl_pollset_add(apr_pollset_t *pollset,
147                                     const apr_pollfd_t *descriptor)
148{
149    struct epoll_event ev = {0};
150    int ret = -1;
151    pfd_elem_t *elem = NULL;
152    apr_status_t rv = APR_SUCCESS;
153
154    ev.events = get_epoll_event(descriptor->reqevents);
155
156    if (pollset->flags & APR_POLLSET_NOCOPY) {
157        ev.data.ptr = (void *)descriptor;
158    }
159    else {
160        pollset_lock_rings();
161
162        if (!APR_RING_EMPTY(&(pollset->p->free_ring), pfd_elem_t, link)) {
163            elem = APR_RING_FIRST(&(pollset->p->free_ring));
164            APR_RING_REMOVE(elem, link);
165        }
166        else {
167            elem = (pfd_elem_t *) apr_palloc(pollset->pool, sizeof(pfd_elem_t));
168            APR_RING_ELEM_INIT(elem, link);
169        }
170        elem->pfd = *descriptor;
171        ev.data.ptr = elem;
172    }
173    if (descriptor->desc_type == APR_POLL_SOCKET) {
174        ret = epoll_ctl(pollset->p->epoll_fd, EPOLL_CTL_ADD,
175                        descriptor->desc.s->socketdes, &ev);
176    }
177    else {
178        ret = epoll_ctl(pollset->p->epoll_fd, EPOLL_CTL_ADD,
179                        descriptor->desc.f->filedes, &ev);
180    }
181
182    if (0 != ret) {
183        rv = apr_get_netos_error();
184    }
185
186    if (!(pollset->flags & APR_POLLSET_NOCOPY)) {
187        if (rv != APR_SUCCESS) {
188            APR_RING_INSERT_TAIL(&(pollset->p->free_ring), elem, pfd_elem_t, link);
189        }
190        else {
191            APR_RING_INSERT_TAIL(&(pollset->p->query_ring), elem, pfd_elem_t, link);
192        }
193        pollset_unlock_rings();
194    }
195
196    return rv;
197}
198
199static apr_status_t impl_pollset_remove(apr_pollset_t *pollset,
200                                        const apr_pollfd_t *descriptor)
201{
202    pfd_elem_t *ep;
203    apr_status_t rv = APR_SUCCESS;
204    struct epoll_event ev = {0}; /* ignored, but must be passed with
205                                  * kernel < 2.6.9
206                                  */
207    int ret = -1;
208
209    if (descriptor->desc_type == APR_POLL_SOCKET) {
210        ret = epoll_ctl(pollset->p->epoll_fd, EPOLL_CTL_DEL,
211                        descriptor->desc.s->socketdes, &ev);
212    }
213    else {
214        ret = epoll_ctl(pollset->p->epoll_fd, EPOLL_CTL_DEL,
215                        descriptor->desc.f->filedes, &ev);
216    }
217    if (ret < 0) {
218        rv = APR_NOTFOUND;
219    }
220
221    if (!(pollset->flags & APR_POLLSET_NOCOPY)) {
222        pollset_lock_rings();
223
224        for (ep = APR_RING_FIRST(&(pollset->p->query_ring));
225             ep != APR_RING_SENTINEL(&(pollset->p->query_ring),
226                                     pfd_elem_t, link);
227             ep = APR_RING_NEXT(ep, link)) {
228
229            if (descriptor->desc.s == ep->pfd.desc.s) {
230                APR_RING_REMOVE(ep, link);
231                APR_RING_INSERT_TAIL(&(pollset->p->dead_ring),
232                                     ep, pfd_elem_t, link);
233                break;
234            }
235        }
236
237        pollset_unlock_rings();
238    }
239
240    return rv;
241}
242
243static apr_status_t impl_pollset_poll(apr_pollset_t *pollset,
244                                           apr_interval_time_t timeout,
245                                           apr_int32_t *num,
246                                           const apr_pollfd_t **descriptors)
247{
248    int ret, i, j;
249    apr_status_t rv = APR_SUCCESS;
250    apr_pollfd_t *fdptr;
251
252    if (timeout > 0) {
253        timeout /= 1000;
254    }
255
256    ret = epoll_wait(pollset->p->epoll_fd, pollset->p->pollset, pollset->nalloc,
257                     timeout);
258    (*num) = ret;
259
260    if (ret < 0) {
261        rv = apr_get_netos_error();
262    }
263    else if (ret == 0) {
264        rv = APR_TIMEUP;
265    }
266    else {
267        for (i = 0, j = 0; i < ret; i++) {
268            if (pollset->flags & APR_POLLSET_NOCOPY) {
269                fdptr = (apr_pollfd_t *)(pollset->p->pollset[i].data.ptr);
270            }
271            else {
272                fdptr = &(((pfd_elem_t *) (pollset->p->pollset[i].data.ptr))->pfd);
273            }
274            /* Check if the polled descriptor is our
275             * wakeup pipe. In that case do not put it result set.
276             */
277            if ((pollset->flags & APR_POLLSET_WAKEABLE) &&
278                fdptr->desc_type == APR_POLL_FILE &&
279                fdptr->desc.f == pollset->wakeup_pipe[0]) {
280                apr_pollset_drain_wakeup_pipe(pollset);
281                rv = APR_EINTR;
282            }
283            else {
284                pollset->p->result_set[j] = *fdptr;
285                pollset->p->result_set[j].rtnevents =
286                    get_epoll_revent(pollset->p->pollset[i].events);
287                j++;
288            }
289        }
290        if (((*num) = j)) { /* any event besides wakeup pipe? */
291            rv = APR_SUCCESS;
292
293            if (descriptors) {
294                *descriptors = pollset->p->result_set;
295            }
296        }
297    }
298
299    if (!(pollset->flags & APR_POLLSET_NOCOPY)) {
300        pollset_lock_rings();
301
302        /* Shift all PFDs in the Dead Ring to the Free Ring */
303        APR_RING_CONCAT(&(pollset->p->free_ring), &(pollset->p->dead_ring), pfd_elem_t, link);
304
305        pollset_unlock_rings();
306    }
307
308    return rv;
309}
310
311static apr_pollset_provider_t impl = {
312    impl_pollset_create,
313    impl_pollset_add,
314    impl_pollset_remove,
315    impl_pollset_poll,
316    impl_pollset_cleanup,
317    "epoll"
318};
319
320apr_pollset_provider_t *apr_pollset_provider_epoll = &impl;
321
322static apr_status_t cb_cleanup(void *p_)
323{
324    apr_pollcb_t *pollcb = (apr_pollcb_t *) p_;
325    close(pollcb->fd);
326    return APR_SUCCESS;
327}
328
329static apr_status_t impl_pollcb_create(apr_pollcb_t *pollcb,
330                                       apr_uint32_t size,
331                                       apr_pool_t *p,
332                                       apr_uint32_t flags)
333{
334    int fd;
335
336#ifdef HAVE_EPOLL_CREATE1
337    fd = epoll_create1(EPOLL_CLOEXEC);
338#else
339    fd = epoll_create(size);
340#endif
341
342    if (fd < 0) {
343        return apr_get_netos_error();
344    }
345
346#ifndef HAVE_EPOLL_CREATE1
347    {
348        int flags;
349
350        if ((flags = fcntl(fd, F_GETFD)) == -1)
351            return errno;
352
353        flags |= FD_CLOEXEC;
354        if (fcntl(fd, F_SETFD, flags) == -1)
355            return errno;
356    }
357#endif
358
359    pollcb->fd = fd;
360    pollcb->pollset.epoll = apr_palloc(p, size * sizeof(struct epoll_event));
361    apr_pool_cleanup_register(p, pollcb, cb_cleanup, apr_pool_cleanup_null);
362
363    return APR_SUCCESS;
364}
365
366static apr_status_t impl_pollcb_add(apr_pollcb_t *pollcb,
367                                    apr_pollfd_t *descriptor)
368{
369    struct epoll_event ev;
370    int ret;
371
372    ev.events = get_epoll_event(descriptor->reqevents);
373    ev.data.ptr = (void *)descriptor;
374
375    if (descriptor->desc_type == APR_POLL_SOCKET) {
376        ret = epoll_ctl(pollcb->fd, EPOLL_CTL_ADD,
377                        descriptor->desc.s->socketdes, &ev);
378    }
379    else {
380        ret = epoll_ctl(pollcb->fd, EPOLL_CTL_ADD,
381                        descriptor->desc.f->filedes, &ev);
382    }
383
384    if (ret == -1) {
385        return apr_get_netos_error();
386    }
387
388    return APR_SUCCESS;
389}
390
391static apr_status_t impl_pollcb_remove(apr_pollcb_t *pollcb,
392                                       apr_pollfd_t *descriptor)
393{
394    apr_status_t rv = APR_SUCCESS;
395    struct epoll_event ev = {0}; /* ignored, but must be passed with
396                                  * kernel < 2.6.9
397                                  */
398    int ret = -1;
399
400    if (descriptor->desc_type == APR_POLL_SOCKET) {
401        ret = epoll_ctl(pollcb->fd, EPOLL_CTL_DEL,
402                        descriptor->desc.s->socketdes, &ev);
403    }
404    else {
405        ret = epoll_ctl(pollcb->fd, EPOLL_CTL_DEL,
406                        descriptor->desc.f->filedes, &ev);
407    }
408
409    if (ret < 0) {
410        rv = APR_NOTFOUND;
411    }
412
413    return rv;
414}
415
416
417static apr_status_t impl_pollcb_poll(apr_pollcb_t *pollcb,
418                                     apr_interval_time_t timeout,
419                                     apr_pollcb_cb_t func,
420                                     void *baton)
421{
422    int ret, i;
423    apr_status_t rv = APR_SUCCESS;
424
425    if (timeout > 0) {
426        timeout /= 1000;
427    }
428
429    ret = epoll_wait(pollcb->fd, pollcb->pollset.epoll, pollcb->nalloc,
430                     timeout);
431    if (ret < 0) {
432        rv = apr_get_netos_error();
433    }
434    else if (ret == 0) {
435        rv = APR_TIMEUP;
436    }
437    else {
438        for (i = 0; i < ret; i++) {
439            apr_pollfd_t *pollfd = (apr_pollfd_t *)(pollcb->pollset.epoll[i].data.ptr);
440            pollfd->rtnevents = get_epoll_revent(pollcb->pollset.epoll[i].events);
441
442            rv = func(baton, pollfd);
443            if (rv) {
444                return rv;
445            }
446        }
447    }
448
449    return rv;
450}
451
452static apr_pollcb_provider_t impl_cb = {
453    impl_pollcb_create,
454    impl_pollcb_add,
455    impl_pollcb_remove,
456    impl_pollcb_poll,
457    "epoll"
458};
459
460apr_pollcb_provider_t *apr_pollcb_provider_epoll = &impl_cb;
461
462#endif /* HAVE_EPOLL */
463