linux_event.c revision 293606
1/*- 2 * Copyright (c) 2007 Roman Divacky 3 * Copyright (c) 2014 Dmitry Chagin 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28#include <sys/cdefs.h> 29__FBSDID("$FreeBSD: stable/10/sys/compat/linux/linux_event.c 293606 2016-01-09 18:23:34Z dchagin $"); 30 31#include "opt_compat.h" 32 33#include <sys/param.h> 34#include <sys/systm.h> 35#include <sys/imgact.h> 36#include <sys/kernel.h> 37#include <sys/limits.h> 38#include <sys/lock.h> 39#include <sys/mutex.h> 40#include <sys/capability.h> 41#include <sys/types.h> 42#include <sys/file.h> 43#include <sys/filedesc.h> 44#include <sys/errno.h> 45#include <sys/event.h> 46#include <sys/poll.h> 47#include <sys/proc.h> 48#include <sys/selinfo.h> 49#include <sys/sx.h> 50#include <sys/syscallsubr.h> 51#include <sys/timespec.h> 52 53#ifdef COMPAT_LINUX32 54#include <machine/../linux32/linux.h> 55#include <machine/../linux32/linux32_proto.h> 56#else 57#include <machine/../linux/linux.h> 58#include <machine/../linux/linux_proto.h> 59#endif 60 61#include <compat/linux/linux_emul.h> 62#include <compat/linux/linux_event.h> 63#include <compat/linux/linux_file.h> 64#include <compat/linux/linux_util.h> 65 66/* 67 * epoll defines 'struct epoll_event' with the field 'data' as 64 bits 68 * on all architectures. But on 32 bit architectures BSD 'struct kevent' only 69 * has 32 bit opaque pointer as 'udata' field. So we can't pass epoll supplied 70 * data verbatuim. Therefore we allocate 64-bit memory block to pass 71 * user supplied data for every file descriptor. 72 */ 73 74typedef uint64_t epoll_udata_t; 75 76struct epoll_emuldata { 77 uint32_t fdc; /* epoll udata max index */ 78 epoll_udata_t udata[1]; /* epoll user data vector */ 79}; 80 81#define EPOLL_DEF_SZ 16 82#define EPOLL_SIZE(fdn) \ 83 (sizeof(struct epoll_emuldata)+(fdn) * sizeof(epoll_udata_t)) 84 85struct epoll_event { 86 uint32_t events; 87 epoll_udata_t data; 88} 89#if defined(__amd64__) 90__attribute__((packed)) 91#endif 92; 93 94#define LINUX_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) 95 96static void epoll_fd_install(struct thread *td, int fd, epoll_udata_t udata); 97static int epoll_to_kevent(struct thread *td, struct file *epfp, 98 int fd, struct epoll_event *l_event, int *kev_flags, 99 struct kevent *kevent, int *nkevents); 100static void kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event); 101static int epoll_kev_copyout(void *arg, struct kevent *kevp, int count); 102static int epoll_kev_copyin(void *arg, struct kevent *kevp, int count); 103static int epoll_delete_event(struct thread *td, struct file *epfp, 104 int fd, int filter); 105static int epoll_delete_all_events(struct thread *td, struct file *epfp, 106 int fd); 107 108struct epoll_copyin_args { 109 struct kevent *changelist; 110}; 111 112struct epoll_copyout_args { 113 struct epoll_event *leventlist; 114 struct proc *p; 115 uint32_t count; 116 int error; 117}; 118 119/* eventfd */ 120typedef uint64_t eventfd_t; 121 122static fo_rdwr_t eventfd_read; 123static fo_rdwr_t eventfd_write; 124static fo_truncate_t eventfd_truncate; 125static fo_ioctl_t eventfd_ioctl; 126static fo_poll_t eventfd_poll; 127static fo_kqfilter_t eventfd_kqfilter; 128static fo_stat_t eventfd_stat; 129static fo_close_t eventfd_close; 130 131static struct fileops eventfdops = { 132 .fo_read = eventfd_read, 133 .fo_write = eventfd_write, 134 .fo_truncate = eventfd_truncate, 135 .fo_ioctl = eventfd_ioctl, 136 .fo_poll = eventfd_poll, 137 .fo_kqfilter = eventfd_kqfilter, 138 .fo_stat = eventfd_stat, 139 .fo_close = eventfd_close, 140 .fo_chmod = invfo_chmod, 141 .fo_chown = invfo_chown, 142 .fo_sendfile = invfo_sendfile, 143 .fo_flags = DFLAG_PASSABLE 144}; 145 146static void filt_eventfddetach(struct knote *kn); 147static int filt_eventfdread(struct knote *kn, long hint); 148static int filt_eventfdwrite(struct knote *kn, long hint); 149 150static struct filterops eventfd_rfiltops = { 151 .f_isfd = 1, 152 .f_detach = filt_eventfddetach, 153 .f_event = filt_eventfdread 154}; 155static struct filterops eventfd_wfiltops = { 156 .f_isfd = 1, 157 .f_detach = filt_eventfddetach, 158 .f_event = filt_eventfdwrite 159}; 160 161struct eventfd { 162 eventfd_t efd_count; 163 uint32_t efd_flags; 164 struct selinfo efd_sel; 165 struct mtx efd_lock; 166}; 167 168static int eventfd_create(struct thread *td, uint32_t initval, int flags); 169 170 171static void 172epoll_fd_install(struct thread *td, int fd, epoll_udata_t udata) 173{ 174 struct linux_pemuldata *pem; 175 struct epoll_emuldata *emd; 176 struct proc *p; 177 178 p = td->td_proc; 179 180 pem = pem_find(p); 181 KASSERT(pem != NULL, ("epoll proc emuldata not found.\n")); 182 183 LINUX_PEM_XLOCK(pem); 184 if (pem->epoll == NULL) { 185 emd = malloc(EPOLL_SIZE(fd), M_EPOLL, M_WAITOK); 186 emd->fdc = fd; 187 pem->epoll = emd; 188 } else { 189 emd = pem->epoll; 190 if (fd > emd->fdc) { 191 emd = realloc(emd, EPOLL_SIZE(fd), M_EPOLL, M_WAITOK); 192 emd->fdc = fd; 193 pem->epoll = emd; 194 } 195 } 196 emd->udata[fd] = udata; 197 LINUX_PEM_XUNLOCK(pem); 198} 199 200static int 201epoll_create_common(struct thread *td, int flags) 202{ 203 int error; 204 205 error = kern_kqueue(td, flags); 206 if (error) 207 return (error); 208 209 epoll_fd_install(td, EPOLL_DEF_SZ, 0); 210 211 return (0); 212} 213 214int 215linux_epoll_create(struct thread *td, struct linux_epoll_create_args *args) 216{ 217 218 /* 219 * args->size is unused. Linux just tests it 220 * and then forgets it as well. 221 */ 222 if (args->size <= 0) 223 return (EINVAL); 224 225 return (epoll_create_common(td, 0)); 226} 227 228int 229linux_epoll_create1(struct thread *td, struct linux_epoll_create1_args *args) 230{ 231 int flags; 232 233 if ((args->flags & ~(LINUX_O_CLOEXEC)) != 0) 234 return (EINVAL); 235 236 flags = 0; 237 if ((args->flags & LINUX_O_CLOEXEC) != 0) 238 flags |= O_CLOEXEC; 239 240 return (epoll_create_common(td, flags)); 241} 242 243/* Structure converting function from epoll to kevent. */ 244static int 245epoll_to_kevent(struct thread *td, struct file *epfp, 246 int fd, struct epoll_event *l_event, int *kev_flags, 247 struct kevent *kevent, int *nkevents) 248{ 249 uint32_t levents = l_event->events; 250 struct linux_pemuldata *pem; 251 struct proc *p; 252 253 /* flags related to how event is registered */ 254 if ((levents & LINUX_EPOLLONESHOT) != 0) 255 *kev_flags |= EV_ONESHOT; 256 if ((levents & LINUX_EPOLLET) != 0) 257 *kev_flags |= EV_CLEAR; 258 if ((levents & LINUX_EPOLLERR) != 0) 259 *kev_flags |= EV_ERROR; 260 if ((levents & LINUX_EPOLLRDHUP) != 0) 261 *kev_flags |= EV_EOF; 262 263 /* flags related to what event is registered */ 264 if ((levents & LINUX_EPOLL_EVRD) != 0) { 265 EV_SET(kevent++, fd, EVFILT_READ, *kev_flags, 0, 0, 0); 266 ++(*nkevents); 267 } 268 if ((levents & LINUX_EPOLL_EVWR) != 0) { 269 EV_SET(kevent++, fd, EVFILT_WRITE, *kev_flags, 0, 0, 0); 270 ++(*nkevents); 271 } 272 273 if ((levents & ~(LINUX_EPOLL_EVSUP)) != 0) { 274 p = td->td_proc; 275 276 pem = pem_find(p); 277 KASSERT(pem != NULL, ("epoll proc emuldata not found.\n")); 278 KASSERT(pem->epoll != NULL, ("epoll proc epolldata not found.\n")); 279 280 LINUX_PEM_XLOCK(pem); 281 if ((pem->flags & LINUX_XUNSUP_EPOLL) == 0) { 282 pem->flags |= LINUX_XUNSUP_EPOLL; 283 LINUX_PEM_XUNLOCK(pem); 284 linux_msg(td, "epoll_ctl unsupported flags: 0x%x\n", 285 levents); 286 } else 287 LINUX_PEM_XUNLOCK(pem); 288 return (EINVAL); 289 } 290 291 return (0); 292} 293 294/* 295 * Structure converting function from kevent to epoll. In a case 296 * this is called on error in registration we store the error in 297 * event->data and pick it up later in linux_epoll_ctl(). 298 */ 299static void 300kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event) 301{ 302 303 if ((kevent->flags & EV_ERROR) != 0) { 304 l_event->events = LINUX_EPOLLERR; 305 return; 306 } 307 308 switch (kevent->filter) { 309 case EVFILT_READ: 310 l_event->events = LINUX_EPOLLIN|LINUX_EPOLLRDNORM|LINUX_EPOLLPRI; 311 if ((kevent->flags & EV_EOF) != 0) 312 l_event->events |= LINUX_EPOLLRDHUP; 313 break; 314 case EVFILT_WRITE: 315 l_event->events = LINUX_EPOLLOUT|LINUX_EPOLLWRNORM; 316 break; 317 } 318} 319 320/* 321 * Copyout callback used by kevent. This converts kevent 322 * events to epoll events and copies them back to the 323 * userspace. This is also called on error on registering 324 * of the filter. 325 */ 326static int 327epoll_kev_copyout(void *arg, struct kevent *kevp, int count) 328{ 329 struct epoll_copyout_args *args; 330 struct linux_pemuldata *pem; 331 struct epoll_emuldata *emd; 332 struct epoll_event *eep; 333 int error, fd, i; 334 335 args = (struct epoll_copyout_args*) arg; 336 eep = malloc(sizeof(*eep) * count, M_EPOLL, M_WAITOK | M_ZERO); 337 338 pem = pem_find(args->p); 339 KASSERT(pem != NULL, ("epoll proc emuldata not found.\n")); 340 LINUX_PEM_SLOCK(pem); 341 emd = pem->epoll; 342 KASSERT(emd != NULL, ("epoll proc epolldata not found.\n")); 343 344 for (i = 0; i < count; i++) { 345 kevent_to_epoll(&kevp[i], &eep[i]); 346 347 fd = kevp[i].ident; 348 KASSERT(fd <= emd->fdc, ("epoll user data vector" 349 " is too small.\n")); 350 eep[i].data = emd->udata[fd]; 351 } 352 LINUX_PEM_SUNLOCK(pem); 353 354 error = copyout(eep, args->leventlist, count * sizeof(*eep)); 355 if (error == 0) { 356 args->leventlist += count; 357 args->count += count; 358 } else if (args->error == 0) 359 args->error = error; 360 361 free(eep, M_EPOLL); 362 return (error); 363} 364 365/* 366 * Copyin callback used by kevent. This copies already 367 * converted filters from kernel memory to the kevent 368 * internal kernel memory. Hence the memcpy instead of 369 * copyin. 370 */ 371static int 372epoll_kev_copyin(void *arg, struct kevent *kevp, int count) 373{ 374 struct epoll_copyin_args *args; 375 376 args = (struct epoll_copyin_args*) arg; 377 378 memcpy(kevp, args->changelist, count * sizeof(*kevp)); 379 args->changelist += count; 380 381 return (0); 382} 383 384/* 385 * Load epoll filter, convert it to kevent filter 386 * and load it into kevent subsystem. 387 */ 388int 389linux_epoll_ctl(struct thread *td, struct linux_epoll_ctl_args *args) 390{ 391 struct file *epfp, *fp; 392 struct epoll_copyin_args ciargs; 393 struct kevent kev[2]; 394 struct kevent_copyops k_ops = { &ciargs, 395 NULL, 396 epoll_kev_copyin}; 397 struct epoll_event le; 398 cap_rights_t rights; 399 int kev_flags; 400 int nchanges = 0; 401 int error; 402 403 if (args->op != LINUX_EPOLL_CTL_DEL) { 404 error = copyin(args->event, &le, sizeof(le)); 405 if (error != 0) 406 return (error); 407 } 408 409 error = fget(td, args->epfd, 410 cap_rights_init(&rights, CAP_KQUEUE_CHANGE), &epfp); 411 if (error != 0) 412 return (error); 413 if (epfp->f_type != DTYPE_KQUEUE) 414 goto leave1; 415 416 /* Protect user data vector from incorrectly supplied fd. */ 417 error = fget(td, args->fd, cap_rights_init(&rights, CAP_POLL_EVENT), &fp); 418 if (error != 0) 419 goto leave1; 420 421 /* Linux disallows spying on himself */ 422 if (epfp == fp) { 423 error = EINVAL; 424 goto leave0; 425 } 426 427 ciargs.changelist = kev; 428 429 switch (args->op) { 430 case LINUX_EPOLL_CTL_MOD: 431 /* 432 * We don't memorize which events were set for this FD 433 * on this level, so just delete all we could have set: 434 * EVFILT_READ and EVFILT_WRITE, ignoring any errors 435 */ 436 error = epoll_delete_all_events(td, epfp, args->fd); 437 if (error) 438 goto leave0; 439 /* FALLTHROUGH */ 440 441 case LINUX_EPOLL_CTL_ADD: 442 kev_flags = EV_ADD | EV_ENABLE; 443 break; 444 445 case LINUX_EPOLL_CTL_DEL: 446 /* CTL_DEL means unregister this fd with this epoll */ 447 error = epoll_delete_all_events(td, epfp, args->fd); 448 goto leave0; 449 450 default: 451 error = EINVAL; 452 goto leave0; 453 } 454 455 error = epoll_to_kevent(td, epfp, args->fd, &le, &kev_flags, 456 kev, &nchanges); 457 if (error) 458 goto leave0; 459 460 epoll_fd_install(td, args->fd, le.data); 461 462 error = kern_kevent_fp(td, epfp, nchanges, 0, &k_ops, NULL); 463 464leave0: 465 fdrop(fp, td); 466 467leave1: 468 fdrop(epfp, td); 469 return (error); 470} 471 472/* 473 * Wait for a filter to be triggered on the epoll file descriptor. 474 */ 475static int 476linux_epoll_wait_common(struct thread *td, int epfd, struct epoll_event *events, 477 int maxevents, int timeout, sigset_t *uset) 478{ 479 struct file *epfp; 480 struct timespec ts, *tsp; 481 cap_rights_t rights; 482 struct epoll_copyout_args coargs; 483 struct kevent_copyops k_ops = { &coargs, 484 epoll_kev_copyout, 485 NULL}; 486 int error; 487 488 if (maxevents <= 0 || maxevents > LINUX_MAX_EVENTS) 489 return (EINVAL); 490 491 if (uset != NULL) { 492 error = kern_sigprocmask(td, SIG_SETMASK, uset, 493 &td->td_oldsigmask, 0); 494 if (error != 0) 495 return (error); 496 td->td_pflags |= TDP_OLDMASK; 497 /* 498 * Make sure that ast() is called on return to 499 * usermode and TDP_OLDMASK is cleared, restoring old 500 * sigmask. 501 */ 502 thread_lock(td); 503 td->td_flags |= TDF_ASTPENDING; 504 thread_unlock(td); 505 } 506 507 error = fget(td, epfd, 508 cap_rights_init(&rights, CAP_KQUEUE_EVENT), &epfp); 509 if (error != 0) 510 return (error); 511 512 coargs.leventlist = events; 513 coargs.p = td->td_proc; 514 coargs.count = 0; 515 coargs.error = 0; 516 517 if (timeout != -1) { 518 if (timeout < 0) { 519 error = EINVAL; 520 goto leave; 521 } 522 /* Convert from milliseconds to timespec. */ 523 ts.tv_sec = timeout / 1000; 524 ts.tv_nsec = (timeout % 1000) * 1000000; 525 tsp = &ts; 526 } else { 527 tsp = NULL; 528 } 529 530 error = kern_kevent_fp(td, epfp, 0, maxevents, &k_ops, tsp); 531 if (error == 0 && coargs.error != 0) 532 error = coargs.error; 533 534 /* 535 * kern_kevent might return ENOMEM which is not expected from epoll_wait. 536 * Maybe we should translate that but I don't think it matters at all. 537 */ 538 if (error == 0) 539 td->td_retval[0] = coargs.count; 540leave: 541 fdrop(epfp, td); 542 return (error); 543} 544 545int 546linux_epoll_wait(struct thread *td, struct linux_epoll_wait_args *args) 547{ 548 549 return (linux_epoll_wait_common(td, args->epfd, args->events, 550 args->maxevents, args->timeout, NULL)); 551} 552 553int 554linux_epoll_pwait(struct thread *td, struct linux_epoll_pwait_args *args) 555{ 556 sigset_t mask, *pmask; 557 l_sigset_t lmask; 558 int error; 559 560 if (args->mask != NULL) { 561 error = copyin(args->mask, &lmask, sizeof(l_sigset_t)); 562 if (error != 0) 563 return (error); 564 linux_to_bsd_sigset(&lmask, &mask); 565 pmask = &mask; 566 } else 567 pmask = NULL; 568 return (linux_epoll_wait_common(td, args->epfd, args->events, 569 args->maxevents, args->timeout, pmask)); 570} 571 572static int 573epoll_delete_event(struct thread *td, struct file *epfp, int fd, int filter) 574{ 575 struct epoll_copyin_args ciargs; 576 struct kevent kev; 577 struct kevent_copyops k_ops = { &ciargs, 578 NULL, 579 epoll_kev_copyin}; 580 int error; 581 582 ciargs.changelist = &kev; 583 EV_SET(&kev, fd, filter, EV_DELETE | EV_DISABLE, 0, 0, 0); 584 585 error = kern_kevent_fp(td, epfp, 1, 0, &k_ops, NULL); 586 587 /* 588 * here we ignore ENONT, because we don't keep track of events here 589 */ 590 if (error == ENOENT) 591 error = 0; 592 return (error); 593} 594 595static int 596epoll_delete_all_events(struct thread *td, struct file *epfp, int fd) 597{ 598 int error1, error2; 599 600 error1 = epoll_delete_event(td, epfp, fd, EVFILT_READ); 601 error2 = epoll_delete_event(td, epfp, fd, EVFILT_WRITE); 602 603 /* report any errors we got */ 604 return (error1 == 0 ? error2 : error1); 605} 606 607static int 608eventfd_create(struct thread *td, uint32_t initval, int flags) 609{ 610 struct filedesc *fdp; 611 struct eventfd *efd; 612 struct file *fp; 613 int fflags, fd, error; 614 615 fflags = 0; 616 if ((flags & LINUX_O_CLOEXEC) != 0) 617 fflags |= O_CLOEXEC; 618 619 fdp = td->td_proc->p_fd; 620 error = falloc(td, &fp, &fd, fflags); 621 if (error) 622 return (error); 623 624 efd = malloc(sizeof(*efd), M_EPOLL, M_WAITOK | M_ZERO); 625 efd->efd_flags = flags; 626 efd->efd_count = initval; 627 mtx_init(&efd->efd_lock, "eventfd", NULL, MTX_DEF); 628 629 knlist_init_mtx(&efd->efd_sel.si_note, &efd->efd_lock); 630 631 fflags = FREAD | FWRITE; 632 if ((flags & LINUX_O_NONBLOCK) != 0) 633 fflags |= FNONBLOCK; 634 635 finit(fp, fflags, DTYPE_LINUXEFD, efd, &eventfdops); 636 fdrop(fp, td); 637 638 td->td_retval[0] = fd; 639 return (error); 640} 641 642int 643linux_eventfd(struct thread *td, struct linux_eventfd_args *args) 644{ 645 646 return (eventfd_create(td, args->initval, 0)); 647} 648 649int 650linux_eventfd2(struct thread *td, struct linux_eventfd2_args *args) 651{ 652 653 if ((args->flags & ~(LINUX_O_CLOEXEC|LINUX_O_NONBLOCK|LINUX_EFD_SEMAPHORE)) != 0) 654 return (EINVAL); 655 656 return (eventfd_create(td, args->initval, args->flags)); 657} 658 659static int 660eventfd_close(struct file *fp, struct thread *td) 661{ 662 struct eventfd *efd; 663 664 efd = fp->f_data; 665 if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 666 return (EBADF); 667 668 seldrain(&efd->efd_sel); 669 knlist_destroy(&efd->efd_sel.si_note); 670 671 fp->f_ops = &badfileops; 672 mtx_destroy(&efd->efd_lock); 673 free(efd, M_EPOLL); 674 675 return (0); 676} 677 678static int 679eventfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred, 680 int flags, struct thread *td) 681{ 682 struct eventfd *efd; 683 eventfd_t count; 684 int error; 685 686 efd = fp->f_data; 687 if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 688 return (EBADF); 689 690 if (uio->uio_resid < sizeof(eventfd_t)) 691 return (EINVAL); 692 693 error = 0; 694 mtx_lock(&efd->efd_lock); 695retry: 696 if (efd->efd_count == 0) { 697 if ((efd->efd_flags & LINUX_O_NONBLOCK) != 0) { 698 mtx_unlock(&efd->efd_lock); 699 return (EAGAIN); 700 } 701 error = mtx_sleep(&efd->efd_count, &efd->efd_lock, PCATCH, "lefdrd", 0); 702 if (error == 0) 703 goto retry; 704 } 705 if (error == 0) { 706 if ((efd->efd_flags & LINUX_EFD_SEMAPHORE) != 0) { 707 count = 1; 708 --efd->efd_count; 709 } else { 710 count = efd->efd_count; 711 efd->efd_count = 0; 712 } 713 KNOTE_LOCKED(&efd->efd_sel.si_note, 0); 714 selwakeup(&efd->efd_sel); 715 wakeup(&efd->efd_count); 716 mtx_unlock(&efd->efd_lock); 717 error = uiomove(&count, sizeof(eventfd_t), uio); 718 } else 719 mtx_unlock(&efd->efd_lock); 720 721 return (error); 722} 723 724static int 725eventfd_write(struct file *fp, struct uio *uio, struct ucred *active_cred, 726 int flags, struct thread *td) 727{ 728 struct eventfd *efd; 729 eventfd_t count; 730 int error; 731 732 efd = fp->f_data; 733 if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 734 return (EBADF); 735 736 if (uio->uio_resid < sizeof(eventfd_t)) 737 return (EINVAL); 738 739 error = uiomove(&count, sizeof(eventfd_t), uio); 740 if (error) 741 return (error); 742 if (count == UINT64_MAX) 743 return (EINVAL); 744 745 mtx_lock(&efd->efd_lock); 746retry: 747 if (UINT64_MAX - efd->efd_count <= count) { 748 if ((efd->efd_flags & LINUX_O_NONBLOCK) != 0) { 749 mtx_unlock(&efd->efd_lock); 750 return (EAGAIN); 751 } 752 error = mtx_sleep(&efd->efd_count, &efd->efd_lock, 753 PCATCH, "lefdwr", 0); 754 if (error == 0) 755 goto retry; 756 } 757 if (error == 0) { 758 efd->efd_count += count; 759 KNOTE_LOCKED(&efd->efd_sel.si_note, 0); 760 selwakeup(&efd->efd_sel); 761 wakeup(&efd->efd_count); 762 } 763 mtx_unlock(&efd->efd_lock); 764 765 return (error); 766} 767 768static int 769eventfd_poll(struct file *fp, int events, struct ucred *active_cred, 770 struct thread *td) 771{ 772 struct eventfd *efd; 773 int revents = 0; 774 775 efd = fp->f_data; 776 if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 777 return (POLLERR); 778 779 mtx_lock(&efd->efd_lock); 780 if ((events & (POLLIN|POLLRDNORM)) && efd->efd_count > 0) 781 revents |= events & (POLLIN|POLLRDNORM); 782 if ((events & (POLLOUT|POLLWRNORM)) && UINT64_MAX - 1 > efd->efd_count) 783 revents |= events & (POLLOUT|POLLWRNORM); 784 if (revents == 0) 785 selrecord(td, &efd->efd_sel); 786 mtx_unlock(&efd->efd_lock); 787 788 return (revents); 789} 790 791/*ARGSUSED*/ 792static int 793eventfd_kqfilter(struct file *fp, struct knote *kn) 794{ 795 struct eventfd *efd; 796 797 efd = fp->f_data; 798 if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 799 return (EINVAL); 800 801 mtx_lock(&efd->efd_lock); 802 switch (kn->kn_filter) { 803 case EVFILT_READ: 804 kn->kn_fop = &eventfd_rfiltops; 805 break; 806 case EVFILT_WRITE: 807 kn->kn_fop = &eventfd_wfiltops; 808 break; 809 default: 810 mtx_unlock(&efd->efd_lock); 811 return (EINVAL); 812 } 813 814 kn->kn_hook = efd; 815 knlist_add(&efd->efd_sel.si_note, kn, 1); 816 mtx_unlock(&efd->efd_lock); 817 818 return (0); 819} 820 821static void 822filt_eventfddetach(struct knote *kn) 823{ 824 struct eventfd *efd = kn->kn_hook; 825 826 mtx_lock(&efd->efd_lock); 827 knlist_remove(&efd->efd_sel.si_note, kn, 1); 828 mtx_unlock(&efd->efd_lock); 829} 830 831/*ARGSUSED*/ 832static int 833filt_eventfdread(struct knote *kn, long hint) 834{ 835 struct eventfd *efd = kn->kn_hook; 836 int ret; 837 838 mtx_assert(&efd->efd_lock, MA_OWNED); 839 ret = (efd->efd_count > 0); 840 841 return (ret); 842} 843 844/*ARGSUSED*/ 845static int 846filt_eventfdwrite(struct knote *kn, long hint) 847{ 848 struct eventfd *efd = kn->kn_hook; 849 int ret; 850 851 mtx_assert(&efd->efd_lock, MA_OWNED); 852 ret = (UINT64_MAX - 1 > efd->efd_count); 853 854 return (ret); 855} 856 857/*ARGSUSED*/ 858static int 859eventfd_truncate(struct file *fp, off_t length, struct ucred *active_cred, 860 struct thread *td) 861{ 862 863 return (ENXIO); 864} 865 866/*ARGSUSED*/ 867static int 868eventfd_ioctl(struct file *fp, u_long cmd, void *data, 869 struct ucred *active_cred, struct thread *td) 870{ 871 872 return (ENXIO); 873} 874 875/*ARGSUSED*/ 876static int 877eventfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred, 878 struct thread *td) 879{ 880 881 return (ENXIO); 882} 883