1/* Licensed to the Apache Software Foundation (ASF) under one or more 2 * contributor license agreements. See the NOTICE file distributed with 3 * this work for additional information regarding copyright ownership. 4 * The ASF licenses this file to You under the Apache License, Version 2.0 5 * (the "License"); you may not use this file except in compliance with 6 * the License. You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "apr.h" 18#include "apr_poll.h" 19#include "apr_time.h" 20#include "apr_portable.h" 21#include "apr_arch_file_io.h" 22#include "apr_arch_networkio.h" 23#include "apr_arch_poll_private.h" 24#include "apr_arch_inherit.h" 25 26#if defined(HAVE_EPOLL) 27 28static apr_int16_t get_epoll_event(apr_int16_t event) 29{ 30 apr_int16_t rv = 0; 31 32 if (event & APR_POLLIN) 33 rv |= EPOLLIN; 34 if (event & APR_POLLPRI) 35 rv |= EPOLLPRI; 36 if (event & APR_POLLOUT) 37 rv |= EPOLLOUT; 38 /* APR_POLLNVAL is not handled by epoll. EPOLLERR and EPOLLHUP are return-only */ 39 40 return rv; 41} 42 43static apr_int16_t get_epoll_revent(apr_int16_t event) 44{ 45 apr_int16_t rv = 0; 46 47 if (event & EPOLLIN) 48 rv |= APR_POLLIN; 49 if (event & EPOLLPRI) 50 rv |= APR_POLLPRI; 51 if (event & EPOLLOUT) 52 rv |= APR_POLLOUT; 53 if (event & EPOLLERR) 54 rv |= APR_POLLERR; 55 if (event & EPOLLHUP) 56 rv |= APR_POLLHUP; 57 /* APR_POLLNVAL is not handled by epoll. */ 58 59 return rv; 60} 61 62struct apr_pollset_private_t 63{ 64 int epoll_fd; 65 struct epoll_event *pollset; 66 apr_pollfd_t *result_set; 67#if APR_HAS_THREADS 68 /* A thread mutex to protect operations on the rings */ 69 apr_thread_mutex_t *ring_lock; 70#endif 71 /* A ring containing all of the pollfd_t that are active */ 72 APR_RING_HEAD(pfd_query_ring_t, pfd_elem_t) query_ring; 73 /* A ring of pollfd_t that have been used, and then _remove()'d */ 74 APR_RING_HEAD(pfd_free_ring_t, pfd_elem_t) free_ring; 75 /* A ring of pollfd_t where rings that have been _remove()`ed but 76 might still be inside a _poll() */ 77 APR_RING_HEAD(pfd_dead_ring_t, pfd_elem_t) dead_ring; 78}; 79 80static apr_status_t impl_pollset_cleanup(apr_pollset_t *pollset) 81{ 82 close(pollset->p->epoll_fd); 83 return APR_SUCCESS; 84} 85 86 87static apr_status_t impl_pollset_create(apr_pollset_t *pollset, 88 apr_uint32_t size, 89 apr_pool_t *p, 90 apr_uint32_t flags) 91{ 92 apr_status_t rv; 93 int fd; 94 95#ifdef HAVE_EPOLL_CREATE1 96 fd = epoll_create1(EPOLL_CLOEXEC); 97#else 98 fd = epoll_create(size); 99#endif 100 if (fd < 0) { 101 pollset->p = NULL; 102 return apr_get_netos_error(); 103 } 104 105#ifndef HAVE_EPOLL_CREATE1 106 { 107 int flags; 108 109 if ((flags = fcntl(fd, F_GETFD)) == -1) 110 return errno; 111 112 flags |= FD_CLOEXEC; 113 if (fcntl(fd, F_SETFD, flags) == -1) 114 return errno; 115 } 116#endif 117 118 pollset->p = apr_palloc(p, sizeof(apr_pollset_private_t)); 119#if APR_HAS_THREADS 120 if ((flags & APR_POLLSET_THREADSAFE) && 121 !(flags & APR_POLLSET_NOCOPY) && 122 ((rv = apr_thread_mutex_create(&pollset->p->ring_lock, 123 APR_THREAD_MUTEX_DEFAULT, 124 p)) != APR_SUCCESS)) { 125 pollset->p = NULL; 126 return rv; 127 } 128#else 129 if (flags & APR_POLLSET_THREADSAFE) { 130 pollset->p = NULL; 131 return APR_ENOTIMPL; 132 } 133#endif 134 pollset->p->epoll_fd = fd; 135 pollset->p->pollset = apr_palloc(p, size * sizeof(struct epoll_event)); 136 pollset->p->result_set = apr_palloc(p, size * sizeof(apr_pollfd_t)); 137 138 if (!(flags & APR_POLLSET_NOCOPY)) { 139 APR_RING_INIT(&pollset->p->query_ring, pfd_elem_t, link); 140 APR_RING_INIT(&pollset->p->free_ring, pfd_elem_t, link); 141 APR_RING_INIT(&pollset->p->dead_ring, pfd_elem_t, link); 142 } 143 return APR_SUCCESS; 144} 145 146static apr_status_t impl_pollset_add(apr_pollset_t *pollset, 147 const apr_pollfd_t *descriptor) 148{ 149 struct epoll_event ev = {0}; 150 int ret = -1; 151 pfd_elem_t *elem = NULL; 152 apr_status_t rv = APR_SUCCESS; 153 154 ev.events = get_epoll_event(descriptor->reqevents); 155 156 if (pollset->flags & APR_POLLSET_NOCOPY) { 157 ev.data.ptr = (void *)descriptor; 158 } 159 else { 160 pollset_lock_rings(); 161 162 if (!APR_RING_EMPTY(&(pollset->p->free_ring), pfd_elem_t, link)) { 163 elem = APR_RING_FIRST(&(pollset->p->free_ring)); 164 APR_RING_REMOVE(elem, link); 165 } 166 else { 167 elem = (pfd_elem_t *) apr_palloc(pollset->pool, sizeof(pfd_elem_t)); 168 APR_RING_ELEM_INIT(elem, link); 169 } 170 elem->pfd = *descriptor; 171 ev.data.ptr = elem; 172 } 173 if (descriptor->desc_type == APR_POLL_SOCKET) { 174 ret = epoll_ctl(pollset->p->epoll_fd, EPOLL_CTL_ADD, 175 descriptor->desc.s->socketdes, &ev); 176 } 177 else { 178 ret = epoll_ctl(pollset->p->epoll_fd, EPOLL_CTL_ADD, 179 descriptor->desc.f->filedes, &ev); 180 } 181 182 if (0 != ret) { 183 rv = apr_get_netos_error(); 184 } 185 186 if (!(pollset->flags & APR_POLLSET_NOCOPY)) { 187 if (rv != APR_SUCCESS) { 188 APR_RING_INSERT_TAIL(&(pollset->p->free_ring), elem, pfd_elem_t, link); 189 } 190 else { 191 APR_RING_INSERT_TAIL(&(pollset->p->query_ring), elem, pfd_elem_t, link); 192 } 193 pollset_unlock_rings(); 194 } 195 196 return rv; 197} 198 199static apr_status_t impl_pollset_remove(apr_pollset_t *pollset, 200 const apr_pollfd_t *descriptor) 201{ 202 pfd_elem_t *ep; 203 apr_status_t rv = APR_SUCCESS; 204 struct epoll_event ev = {0}; /* ignored, but must be passed with 205 * kernel < 2.6.9 206 */ 207 int ret = -1; 208 209 if (descriptor->desc_type == APR_POLL_SOCKET) { 210 ret = epoll_ctl(pollset->p->epoll_fd, EPOLL_CTL_DEL, 211 descriptor->desc.s->socketdes, &ev); 212 } 213 else { 214 ret = epoll_ctl(pollset->p->epoll_fd, EPOLL_CTL_DEL, 215 descriptor->desc.f->filedes, &ev); 216 } 217 if (ret < 0) { 218 rv = APR_NOTFOUND; 219 } 220 221 if (!(pollset->flags & APR_POLLSET_NOCOPY)) { 222 pollset_lock_rings(); 223 224 for (ep = APR_RING_FIRST(&(pollset->p->query_ring)); 225 ep != APR_RING_SENTINEL(&(pollset->p->query_ring), 226 pfd_elem_t, link); 227 ep = APR_RING_NEXT(ep, link)) { 228 229 if (descriptor->desc.s == ep->pfd.desc.s) { 230 APR_RING_REMOVE(ep, link); 231 APR_RING_INSERT_TAIL(&(pollset->p->dead_ring), 232 ep, pfd_elem_t, link); 233 break; 234 } 235 } 236 237 pollset_unlock_rings(); 238 } 239 240 return rv; 241} 242 243static apr_status_t impl_pollset_poll(apr_pollset_t *pollset, 244 apr_interval_time_t timeout, 245 apr_int32_t *num, 246 const apr_pollfd_t **descriptors) 247{ 248 int ret, i, j; 249 apr_status_t rv = APR_SUCCESS; 250 apr_pollfd_t *fdptr; 251 252 if (timeout > 0) { 253 timeout /= 1000; 254 } 255 256 ret = epoll_wait(pollset->p->epoll_fd, pollset->p->pollset, pollset->nalloc, 257 timeout); 258 (*num) = ret; 259 260 if (ret < 0) { 261 rv = apr_get_netos_error(); 262 } 263 else if (ret == 0) { 264 rv = APR_TIMEUP; 265 } 266 else { 267 for (i = 0, j = 0; i < ret; i++) { 268 if (pollset->flags & APR_POLLSET_NOCOPY) { 269 fdptr = (apr_pollfd_t *)(pollset->p->pollset[i].data.ptr); 270 } 271 else { 272 fdptr = &(((pfd_elem_t *) (pollset->p->pollset[i].data.ptr))->pfd); 273 } 274 /* Check if the polled descriptor is our 275 * wakeup pipe. In that case do not put it result set. 276 */ 277 if ((pollset->flags & APR_POLLSET_WAKEABLE) && 278 fdptr->desc_type == APR_POLL_FILE && 279 fdptr->desc.f == pollset->wakeup_pipe[0]) { 280 apr_pollset_drain_wakeup_pipe(pollset); 281 rv = APR_EINTR; 282 } 283 else { 284 pollset->p->result_set[j] = *fdptr; 285 pollset->p->result_set[j].rtnevents = 286 get_epoll_revent(pollset->p->pollset[i].events); 287 j++; 288 } 289 } 290 if (((*num) = j)) { /* any event besides wakeup pipe? */ 291 rv = APR_SUCCESS; 292 293 if (descriptors) { 294 *descriptors = pollset->p->result_set; 295 } 296 } 297 } 298 299 if (!(pollset->flags & APR_POLLSET_NOCOPY)) { 300 pollset_lock_rings(); 301 302 /* Shift all PFDs in the Dead Ring to the Free Ring */ 303 APR_RING_CONCAT(&(pollset->p->free_ring), &(pollset->p->dead_ring), pfd_elem_t, link); 304 305 pollset_unlock_rings(); 306 } 307 308 return rv; 309} 310 311static apr_pollset_provider_t impl = { 312 impl_pollset_create, 313 impl_pollset_add, 314 impl_pollset_remove, 315 impl_pollset_poll, 316 impl_pollset_cleanup, 317 "epoll" 318}; 319 320apr_pollset_provider_t *apr_pollset_provider_epoll = &impl; 321 322static apr_status_t cb_cleanup(void *p_) 323{ 324 apr_pollcb_t *pollcb = (apr_pollcb_t *) p_; 325 close(pollcb->fd); 326 return APR_SUCCESS; 327} 328 329static apr_status_t impl_pollcb_create(apr_pollcb_t *pollcb, 330 apr_uint32_t size, 331 apr_pool_t *p, 332 apr_uint32_t flags) 333{ 334 int fd; 335 336#ifdef HAVE_EPOLL_CREATE1 337 fd = epoll_create1(EPOLL_CLOEXEC); 338#else 339 fd = epoll_create(size); 340#endif 341 342 if (fd < 0) { 343 return apr_get_netos_error(); 344 } 345 346#ifndef HAVE_EPOLL_CREATE1 347 { 348 int flags; 349 350 if ((flags = fcntl(fd, F_GETFD)) == -1) 351 return errno; 352 353 flags |= FD_CLOEXEC; 354 if (fcntl(fd, F_SETFD, flags) == -1) 355 return errno; 356 } 357#endif 358 359 pollcb->fd = fd; 360 pollcb->pollset.epoll = apr_palloc(p, size * sizeof(struct epoll_event)); 361 apr_pool_cleanup_register(p, pollcb, cb_cleanup, apr_pool_cleanup_null); 362 363 return APR_SUCCESS; 364} 365 366static apr_status_t impl_pollcb_add(apr_pollcb_t *pollcb, 367 apr_pollfd_t *descriptor) 368{ 369 struct epoll_event ev; 370 int ret; 371 372 ev.events = get_epoll_event(descriptor->reqevents); 373 ev.data.ptr = (void *)descriptor; 374 375 if (descriptor->desc_type == APR_POLL_SOCKET) { 376 ret = epoll_ctl(pollcb->fd, EPOLL_CTL_ADD, 377 descriptor->desc.s->socketdes, &ev); 378 } 379 else { 380 ret = epoll_ctl(pollcb->fd, EPOLL_CTL_ADD, 381 descriptor->desc.f->filedes, &ev); 382 } 383 384 if (ret == -1) { 385 return apr_get_netos_error(); 386 } 387 388 return APR_SUCCESS; 389} 390 391static apr_status_t impl_pollcb_remove(apr_pollcb_t *pollcb, 392 apr_pollfd_t *descriptor) 393{ 394 apr_status_t rv = APR_SUCCESS; 395 struct epoll_event ev = {0}; /* ignored, but must be passed with 396 * kernel < 2.6.9 397 */ 398 int ret = -1; 399 400 if (descriptor->desc_type == APR_POLL_SOCKET) { 401 ret = epoll_ctl(pollcb->fd, EPOLL_CTL_DEL, 402 descriptor->desc.s->socketdes, &ev); 403 } 404 else { 405 ret = epoll_ctl(pollcb->fd, EPOLL_CTL_DEL, 406 descriptor->desc.f->filedes, &ev); 407 } 408 409 if (ret < 0) { 410 rv = APR_NOTFOUND; 411 } 412 413 return rv; 414} 415 416 417static apr_status_t impl_pollcb_poll(apr_pollcb_t *pollcb, 418 apr_interval_time_t timeout, 419 apr_pollcb_cb_t func, 420 void *baton) 421{ 422 int ret, i; 423 apr_status_t rv = APR_SUCCESS; 424 425 if (timeout > 0) { 426 timeout /= 1000; 427 } 428 429 ret = epoll_wait(pollcb->fd, pollcb->pollset.epoll, pollcb->nalloc, 430 timeout); 431 if (ret < 0) { 432 rv = apr_get_netos_error(); 433 } 434 else if (ret == 0) { 435 rv = APR_TIMEUP; 436 } 437 else { 438 for (i = 0; i < ret; i++) { 439 apr_pollfd_t *pollfd = (apr_pollfd_t *)(pollcb->pollset.epoll[i].data.ptr); 440 pollfd->rtnevents = get_epoll_revent(pollcb->pollset.epoll[i].events); 441 442 rv = func(baton, pollfd); 443 if (rv) { 444 return rv; 445 } 446 } 447 } 448 449 return rv; 450} 451 452static apr_pollcb_provider_t impl_cb = { 453 impl_pollcb_create, 454 impl_pollcb_add, 455 impl_pollcb_remove, 456 impl_pollcb_poll, 457 "epoll" 458}; 459 460apr_pollcb_provider_t *apr_pollcb_provider_epoll = &impl_cb; 461 462#endif /* HAVE_EPOLL */ 463