linux_event.c revision 297502
1/*- 2 * Copyright (c) 2007 Roman Divacky 3 * Copyright (c) 2014 Dmitry Chagin 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28#include <sys/cdefs.h> 29__FBSDID("$FreeBSD: stable/10/sys/compat/linux/linux_event.c 297502 2016-04-02 06:15:14Z dchagin $"); 30 31#include "opt_compat.h" 32 33#include <sys/param.h> 34#include <sys/systm.h> 35#include <sys/imgact.h> 36#include <sys/kernel.h> 37#include <sys/limits.h> 38#include <sys/lock.h> 39#include <sys/mutex.h> 40#include <sys/capability.h> 41#include <sys/types.h> 42#include <sys/file.h> 43#include <sys/filedesc.h> 44#include <sys/filio.h> 45#include <sys/errno.h> 46#include <sys/event.h> 47#include <sys/poll.h> 48#include <sys/proc.h> 49#include <sys/selinfo.h> 50#include <sys/sx.h> 51#include <sys/syscallsubr.h> 52#include <sys/timespec.h> 53 54#ifdef COMPAT_LINUX32 55#include <machine/../linux32/linux.h> 56#include <machine/../linux32/linux32_proto.h> 57#else 58#include <machine/../linux/linux.h> 59#include <machine/../linux/linux_proto.h> 60#endif 61 62#include <compat/linux/linux_emul.h> 63#include <compat/linux/linux_event.h> 64#include <compat/linux/linux_file.h> 65#include <compat/linux/linux_util.h> 66 67/* 68 * epoll defines 'struct epoll_event' with the field 'data' as 64 bits 69 * on all architectures. But on 32 bit architectures BSD 'struct kevent' only 70 * has 32 bit opaque pointer as 'udata' field. So we can't pass epoll supplied 71 * data verbatuim. Therefore we allocate 64-bit memory block to pass 72 * user supplied data for every file descriptor. 73 */ 74 75typedef uint64_t epoll_udata_t; 76 77struct epoll_emuldata { 78 uint32_t fdc; /* epoll udata max index */ 79 epoll_udata_t udata[1]; /* epoll user data vector */ 80}; 81 82#define EPOLL_DEF_SZ 16 83#define EPOLL_SIZE(fdn) \ 84 (sizeof(struct epoll_emuldata)+(fdn) * sizeof(epoll_udata_t)) 85 86struct epoll_event { 87 uint32_t events; 88 epoll_udata_t data; 89} 90#if defined(__amd64__) 91__attribute__((packed)) 92#endif 93; 94 95#define LINUX_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) 96 97static void epoll_fd_install(struct thread *td, int fd, epoll_udata_t udata); 98static int epoll_to_kevent(struct thread *td, struct file *epfp, 99 int fd, struct epoll_event *l_event, int *kev_flags, 100 struct kevent *kevent, int *nkevents); 101static void kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event); 102static int epoll_kev_copyout(void *arg, struct kevent *kevp, int count); 103static int epoll_kev_copyin(void *arg, struct kevent *kevp, int count); 104static int epoll_delete_event(struct thread *td, struct file *epfp, 105 int fd, int filter); 106static int epoll_delete_all_events(struct thread *td, struct file *epfp, 107 int fd); 108 109struct epoll_copyin_args { 110 struct kevent *changelist; 111}; 112 113struct epoll_copyout_args { 114 struct epoll_event *leventlist; 115 struct proc *p; 116 uint32_t count; 117 int error; 118}; 119 120/* eventfd */ 121typedef uint64_t eventfd_t; 122 123static fo_rdwr_t eventfd_read; 124static fo_rdwr_t eventfd_write; 125static fo_truncate_t eventfd_truncate; 126static fo_ioctl_t eventfd_ioctl; 127static fo_poll_t eventfd_poll; 128static fo_kqfilter_t eventfd_kqfilter; 129static fo_stat_t eventfd_stat; 130static fo_close_t eventfd_close; 131 132static struct fileops eventfdops = { 133 .fo_read = eventfd_read, 134 .fo_write = eventfd_write, 135 .fo_truncate = eventfd_truncate, 136 .fo_ioctl = eventfd_ioctl, 137 .fo_poll = eventfd_poll, 138 .fo_kqfilter = eventfd_kqfilter, 139 .fo_stat = eventfd_stat, 140 .fo_close = eventfd_close, 141 .fo_chmod = invfo_chmod, 142 .fo_chown = invfo_chown, 143 .fo_sendfile = invfo_sendfile, 144 .fo_flags = DFLAG_PASSABLE 145}; 146 147static void filt_eventfddetach(struct knote *kn); 148static int filt_eventfdread(struct knote *kn, long hint); 149static int filt_eventfdwrite(struct knote *kn, long hint); 150 151static struct filterops eventfd_rfiltops = { 152 .f_isfd = 1, 153 .f_detach = filt_eventfddetach, 154 .f_event = filt_eventfdread 155}; 156static struct filterops eventfd_wfiltops = { 157 .f_isfd = 1, 158 .f_detach = filt_eventfddetach, 159 .f_event = filt_eventfdwrite 160}; 161 162struct eventfd { 163 eventfd_t efd_count; 164 uint32_t efd_flags; 165 struct selinfo efd_sel; 166 struct mtx efd_lock; 167}; 168 169static int eventfd_create(struct thread *td, uint32_t initval, int flags); 170 171 172static void 173epoll_fd_install(struct thread *td, int fd, epoll_udata_t udata) 174{ 175 struct linux_pemuldata *pem; 176 struct epoll_emuldata *emd; 177 struct proc *p; 178 179 p = td->td_proc; 180 181 pem = pem_find(p); 182 KASSERT(pem != NULL, ("epoll proc emuldata not found.\n")); 183 184 LINUX_PEM_XLOCK(pem); 185 if (pem->epoll == NULL) { 186 emd = malloc(EPOLL_SIZE(fd), M_EPOLL, M_WAITOK); 187 emd->fdc = fd; 188 pem->epoll = emd; 189 } else { 190 emd = pem->epoll; 191 if (fd > emd->fdc) { 192 emd = realloc(emd, EPOLL_SIZE(fd), M_EPOLL, M_WAITOK); 193 emd->fdc = fd; 194 pem->epoll = emd; 195 } 196 } 197 emd->udata[fd] = udata; 198 LINUX_PEM_XUNLOCK(pem); 199} 200 201static int 202epoll_create_common(struct thread *td, int flags) 203{ 204 int error; 205 206 error = kern_kqueue(td, flags); 207 if (error) 208 return (error); 209 210 epoll_fd_install(td, EPOLL_DEF_SZ, 0); 211 212 return (0); 213} 214 215int 216linux_epoll_create(struct thread *td, struct linux_epoll_create_args *args) 217{ 218 219 /* 220 * args->size is unused. Linux just tests it 221 * and then forgets it as well. 222 */ 223 if (args->size <= 0) 224 return (EINVAL); 225 226 return (epoll_create_common(td, 0)); 227} 228 229int 230linux_epoll_create1(struct thread *td, struct linux_epoll_create1_args *args) 231{ 232 int flags; 233 234 if ((args->flags & ~(LINUX_O_CLOEXEC)) != 0) 235 return (EINVAL); 236 237 flags = 0; 238 if ((args->flags & LINUX_O_CLOEXEC) != 0) 239 flags |= O_CLOEXEC; 240 241 return (epoll_create_common(td, flags)); 242} 243 244/* Structure converting function from epoll to kevent. */ 245static int 246epoll_to_kevent(struct thread *td, struct file *epfp, 247 int fd, struct epoll_event *l_event, int *kev_flags, 248 struct kevent *kevent, int *nkevents) 249{ 250 uint32_t levents = l_event->events; 251 struct linux_pemuldata *pem; 252 struct proc *p; 253 254 /* flags related to how event is registered */ 255 if ((levents & LINUX_EPOLLONESHOT) != 0) 256 *kev_flags |= EV_ONESHOT; 257 if ((levents & LINUX_EPOLLET) != 0) 258 *kev_flags |= EV_CLEAR; 259 if ((levents & LINUX_EPOLLERR) != 0) 260 *kev_flags |= EV_ERROR; 261 if ((levents & LINUX_EPOLLRDHUP) != 0) 262 *kev_flags |= EV_EOF; 263 264 /* flags related to what event is registered */ 265 if ((levents & LINUX_EPOLL_EVRD) != 0) { 266 EV_SET(kevent++, fd, EVFILT_READ, *kev_flags, 0, 0, 0); 267 ++(*nkevents); 268 } 269 if ((levents & LINUX_EPOLL_EVWR) != 0) { 270 EV_SET(kevent++, fd, EVFILT_WRITE, *kev_flags, 0, 0, 0); 271 ++(*nkevents); 272 } 273 274 if ((levents & ~(LINUX_EPOLL_EVSUP)) != 0) { 275 p = td->td_proc; 276 277 pem = pem_find(p); 278 KASSERT(pem != NULL, ("epoll proc emuldata not found.\n")); 279 KASSERT(pem->epoll != NULL, ("epoll proc epolldata not found.\n")); 280 281 LINUX_PEM_XLOCK(pem); 282 if ((pem->flags & LINUX_XUNSUP_EPOLL) == 0) { 283 pem->flags |= LINUX_XUNSUP_EPOLL; 284 LINUX_PEM_XUNLOCK(pem); 285 linux_msg(td, "epoll_ctl unsupported flags: 0x%x\n", 286 levents); 287 } else 288 LINUX_PEM_XUNLOCK(pem); 289 return (EINVAL); 290 } 291 292 return (0); 293} 294 295/* 296 * Structure converting function from kevent to epoll. In a case 297 * this is called on error in registration we store the error in 298 * event->data and pick it up later in linux_epoll_ctl(). 299 */ 300static void 301kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event) 302{ 303 304 if ((kevent->flags & EV_ERROR) != 0) { 305 l_event->events = LINUX_EPOLLERR; 306 return; 307 } 308 309 switch (kevent->filter) { 310 case EVFILT_READ: 311 l_event->events = LINUX_EPOLLIN|LINUX_EPOLLRDNORM|LINUX_EPOLLPRI; 312 if ((kevent->flags & EV_EOF) != 0) 313 l_event->events |= LINUX_EPOLLRDHUP; 314 break; 315 case EVFILT_WRITE: 316 l_event->events = LINUX_EPOLLOUT|LINUX_EPOLLWRNORM; 317 break; 318 } 319} 320 321/* 322 * Copyout callback used by kevent. This converts kevent 323 * events to epoll events and copies them back to the 324 * userspace. This is also called on error on registering 325 * of the filter. 326 */ 327static int 328epoll_kev_copyout(void *arg, struct kevent *kevp, int count) 329{ 330 struct epoll_copyout_args *args; 331 struct linux_pemuldata *pem; 332 struct epoll_emuldata *emd; 333 struct epoll_event *eep; 334 int error, fd, i; 335 336 args = (struct epoll_copyout_args*) arg; 337 eep = malloc(sizeof(*eep) * count, M_EPOLL, M_WAITOK | M_ZERO); 338 339 pem = pem_find(args->p); 340 KASSERT(pem != NULL, ("epoll proc emuldata not found.\n")); 341 LINUX_PEM_SLOCK(pem); 342 emd = pem->epoll; 343 KASSERT(emd != NULL, ("epoll proc epolldata not found.\n")); 344 345 for (i = 0; i < count; i++) { 346 kevent_to_epoll(&kevp[i], &eep[i]); 347 348 fd = kevp[i].ident; 349 KASSERT(fd <= emd->fdc, ("epoll user data vector" 350 " is too small.\n")); 351 eep[i].data = emd->udata[fd]; 352 } 353 LINUX_PEM_SUNLOCK(pem); 354 355 error = copyout(eep, args->leventlist, count * sizeof(*eep)); 356 if (error == 0) { 357 args->leventlist += count; 358 args->count += count; 359 } else if (args->error == 0) 360 args->error = error; 361 362 free(eep, M_EPOLL); 363 return (error); 364} 365 366/* 367 * Copyin callback used by kevent. This copies already 368 * converted filters from kernel memory to the kevent 369 * internal kernel memory. Hence the memcpy instead of 370 * copyin. 371 */ 372static int 373epoll_kev_copyin(void *arg, struct kevent *kevp, int count) 374{ 375 struct epoll_copyin_args *args; 376 377 args = (struct epoll_copyin_args*) arg; 378 379 memcpy(kevp, args->changelist, count * sizeof(*kevp)); 380 args->changelist += count; 381 382 return (0); 383} 384 385/* 386 * Load epoll filter, convert it to kevent filter 387 * and load it into kevent subsystem. 388 */ 389int 390linux_epoll_ctl(struct thread *td, struct linux_epoll_ctl_args *args) 391{ 392 struct file *epfp, *fp; 393 struct epoll_copyin_args ciargs; 394 struct kevent kev[2]; 395 struct kevent_copyops k_ops = { &ciargs, 396 NULL, 397 epoll_kev_copyin}; 398 struct epoll_event le; 399 cap_rights_t rights; 400 int kev_flags; 401 int nchanges = 0; 402 int error; 403 404 if (args->op != LINUX_EPOLL_CTL_DEL) { 405 error = copyin(args->event, &le, sizeof(le)); 406 if (error != 0) 407 return (error); 408 } 409 410 error = fget(td, args->epfd, 411 cap_rights_init(&rights, CAP_KQUEUE_CHANGE), &epfp); 412 if (error != 0) 413 return (error); 414 if (epfp->f_type != DTYPE_KQUEUE) 415 goto leave1; 416 417 /* Protect user data vector from incorrectly supplied fd. */ 418 error = fget(td, args->fd, cap_rights_init(&rights, CAP_POLL_EVENT), &fp); 419 if (error != 0) 420 goto leave1; 421 422 /* Linux disallows spying on himself */ 423 if (epfp == fp) { 424 error = EINVAL; 425 goto leave0; 426 } 427 428 ciargs.changelist = kev; 429 430 switch (args->op) { 431 case LINUX_EPOLL_CTL_MOD: 432 /* 433 * We don't memorize which events were set for this FD 434 * on this level, so just delete all we could have set: 435 * EVFILT_READ and EVFILT_WRITE, ignoring any errors 436 */ 437 error = epoll_delete_all_events(td, epfp, args->fd); 438 if (error) 439 goto leave0; 440 /* FALLTHROUGH */ 441 442 case LINUX_EPOLL_CTL_ADD: 443 kev_flags = EV_ADD | EV_ENABLE; 444 break; 445 446 case LINUX_EPOLL_CTL_DEL: 447 /* CTL_DEL means unregister this fd with this epoll */ 448 error = epoll_delete_all_events(td, epfp, args->fd); 449 goto leave0; 450 451 default: 452 error = EINVAL; 453 goto leave0; 454 } 455 456 error = epoll_to_kevent(td, epfp, args->fd, &le, &kev_flags, 457 kev, &nchanges); 458 if (error) 459 goto leave0; 460 461 epoll_fd_install(td, args->fd, le.data); 462 463 error = kern_kevent_fp(td, epfp, nchanges, 0, &k_ops, NULL); 464 465leave0: 466 fdrop(fp, td); 467 468leave1: 469 fdrop(epfp, td); 470 return (error); 471} 472 473/* 474 * Wait for a filter to be triggered on the epoll file descriptor. 475 */ 476static int 477linux_epoll_wait_common(struct thread *td, int epfd, struct epoll_event *events, 478 int maxevents, int timeout, sigset_t *uset) 479{ 480 struct file *epfp; 481 struct timespec ts, *tsp; 482 cap_rights_t rights; 483 struct epoll_copyout_args coargs; 484 struct kevent_copyops k_ops = { &coargs, 485 epoll_kev_copyout, 486 NULL}; 487 int error; 488 489 if (maxevents <= 0 || maxevents > LINUX_MAX_EVENTS) 490 return (EINVAL); 491 492 if (uset != NULL) { 493 error = kern_sigprocmask(td, SIG_SETMASK, uset, 494 &td->td_oldsigmask, 0); 495 if (error != 0) 496 return (error); 497 td->td_pflags |= TDP_OLDMASK; 498 /* 499 * Make sure that ast() is called on return to 500 * usermode and TDP_OLDMASK is cleared, restoring old 501 * sigmask. 502 */ 503 thread_lock(td); 504 td->td_flags |= TDF_ASTPENDING; 505 thread_unlock(td); 506 } 507 508 error = fget(td, epfd, 509 cap_rights_init(&rights, CAP_KQUEUE_EVENT), &epfp); 510 if (error != 0) 511 return (error); 512 513 coargs.leventlist = events; 514 coargs.p = td->td_proc; 515 coargs.count = 0; 516 coargs.error = 0; 517 518 if (timeout != -1) { 519 if (timeout < 0) { 520 error = EINVAL; 521 goto leave; 522 } 523 /* Convert from milliseconds to timespec. */ 524 ts.tv_sec = timeout / 1000; 525 ts.tv_nsec = (timeout % 1000) * 1000000; 526 tsp = &ts; 527 } else { 528 tsp = NULL; 529 } 530 531 error = kern_kevent_fp(td, epfp, 0, maxevents, &k_ops, tsp); 532 if (error == 0 && coargs.error != 0) 533 error = coargs.error; 534 535 /* 536 * kern_kevent might return ENOMEM which is not expected from epoll_wait. 537 * Maybe we should translate that but I don't think it matters at all. 538 */ 539 if (error == 0) 540 td->td_retval[0] = coargs.count; 541leave: 542 fdrop(epfp, td); 543 return (error); 544} 545 546int 547linux_epoll_wait(struct thread *td, struct linux_epoll_wait_args *args) 548{ 549 550 return (linux_epoll_wait_common(td, args->epfd, args->events, 551 args->maxevents, args->timeout, NULL)); 552} 553 554int 555linux_epoll_pwait(struct thread *td, struct linux_epoll_pwait_args *args) 556{ 557 sigset_t mask, *pmask; 558 l_sigset_t lmask; 559 int error; 560 561 if (args->mask != NULL) { 562 error = copyin(args->mask, &lmask, sizeof(l_sigset_t)); 563 if (error != 0) 564 return (error); 565 linux_to_bsd_sigset(&lmask, &mask); 566 pmask = &mask; 567 } else 568 pmask = NULL; 569 return (linux_epoll_wait_common(td, args->epfd, args->events, 570 args->maxevents, args->timeout, pmask)); 571} 572 573static int 574epoll_delete_event(struct thread *td, struct file *epfp, int fd, int filter) 575{ 576 struct epoll_copyin_args ciargs; 577 struct kevent kev; 578 struct kevent_copyops k_ops = { &ciargs, 579 NULL, 580 epoll_kev_copyin}; 581 int error; 582 583 ciargs.changelist = &kev; 584 EV_SET(&kev, fd, filter, EV_DELETE | EV_DISABLE, 0, 0, 0); 585 586 error = kern_kevent_fp(td, epfp, 1, 0, &k_ops, NULL); 587 588 /* 589 * here we ignore ENONT, because we don't keep track of events here 590 */ 591 if (error == ENOENT) 592 error = 0; 593 return (error); 594} 595 596static int 597epoll_delete_all_events(struct thread *td, struct file *epfp, int fd) 598{ 599 int error1, error2; 600 601 error1 = epoll_delete_event(td, epfp, fd, EVFILT_READ); 602 error2 = epoll_delete_event(td, epfp, fd, EVFILT_WRITE); 603 604 /* report any errors we got */ 605 return (error1 == 0 ? error2 : error1); 606} 607 608static int 609eventfd_create(struct thread *td, uint32_t initval, int flags) 610{ 611 struct filedesc *fdp; 612 struct eventfd *efd; 613 struct file *fp; 614 int fflags, fd, error; 615 616 fflags = 0; 617 if ((flags & LINUX_O_CLOEXEC) != 0) 618 fflags |= O_CLOEXEC; 619 620 fdp = td->td_proc->p_fd; 621 error = falloc(td, &fp, &fd, fflags); 622 if (error) 623 return (error); 624 625 efd = malloc(sizeof(*efd), M_EPOLL, M_WAITOK | M_ZERO); 626 efd->efd_flags = flags; 627 efd->efd_count = initval; 628 mtx_init(&efd->efd_lock, "eventfd", NULL, MTX_DEF); 629 630 knlist_init_mtx(&efd->efd_sel.si_note, &efd->efd_lock); 631 632 fflags = FREAD | FWRITE; 633 if ((flags & LINUX_O_NONBLOCK) != 0) 634 fflags |= FNONBLOCK; 635 636 finit(fp, fflags, DTYPE_LINUXEFD, efd, &eventfdops); 637 fdrop(fp, td); 638 639 td->td_retval[0] = fd; 640 return (error); 641} 642 643int 644linux_eventfd(struct thread *td, struct linux_eventfd_args *args) 645{ 646 647 return (eventfd_create(td, args->initval, 0)); 648} 649 650int 651linux_eventfd2(struct thread *td, struct linux_eventfd2_args *args) 652{ 653 654 if ((args->flags & ~(LINUX_O_CLOEXEC|LINUX_O_NONBLOCK|LINUX_EFD_SEMAPHORE)) != 0) 655 return (EINVAL); 656 657 return (eventfd_create(td, args->initval, args->flags)); 658} 659 660static int 661eventfd_close(struct file *fp, struct thread *td) 662{ 663 struct eventfd *efd; 664 665 efd = fp->f_data; 666 if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 667 return (EBADF); 668 669 seldrain(&efd->efd_sel); 670 knlist_destroy(&efd->efd_sel.si_note); 671 672 fp->f_ops = &badfileops; 673 mtx_destroy(&efd->efd_lock); 674 free(efd, M_EPOLL); 675 676 return (0); 677} 678 679static int 680eventfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred, 681 int flags, struct thread *td) 682{ 683 struct eventfd *efd; 684 eventfd_t count; 685 int error; 686 687 efd = fp->f_data; 688 if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 689 return (EBADF); 690 691 if (uio->uio_resid < sizeof(eventfd_t)) 692 return (EINVAL); 693 694 error = 0; 695 mtx_lock(&efd->efd_lock); 696retry: 697 if (efd->efd_count == 0) { 698 if ((efd->efd_flags & LINUX_O_NONBLOCK) != 0) { 699 mtx_unlock(&efd->efd_lock); 700 return (EAGAIN); 701 } 702 error = mtx_sleep(&efd->efd_count, &efd->efd_lock, PCATCH, "lefdrd", 0); 703 if (error == 0) 704 goto retry; 705 } 706 if (error == 0) { 707 if ((efd->efd_flags & LINUX_EFD_SEMAPHORE) != 0) { 708 count = 1; 709 --efd->efd_count; 710 } else { 711 count = efd->efd_count; 712 efd->efd_count = 0; 713 } 714 KNOTE_LOCKED(&efd->efd_sel.si_note, 0); 715 selwakeup(&efd->efd_sel); 716 wakeup(&efd->efd_count); 717 mtx_unlock(&efd->efd_lock); 718 error = uiomove(&count, sizeof(eventfd_t), uio); 719 } else 720 mtx_unlock(&efd->efd_lock); 721 722 return (error); 723} 724 725static int 726eventfd_write(struct file *fp, struct uio *uio, struct ucred *active_cred, 727 int flags, struct thread *td) 728{ 729 struct eventfd *efd; 730 eventfd_t count; 731 int error; 732 733 efd = fp->f_data; 734 if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 735 return (EBADF); 736 737 if (uio->uio_resid < sizeof(eventfd_t)) 738 return (EINVAL); 739 740 error = uiomove(&count, sizeof(eventfd_t), uio); 741 if (error) 742 return (error); 743 if (count == UINT64_MAX) 744 return (EINVAL); 745 746 mtx_lock(&efd->efd_lock); 747retry: 748 if (UINT64_MAX - efd->efd_count <= count) { 749 if ((efd->efd_flags & LINUX_O_NONBLOCK) != 0) { 750 mtx_unlock(&efd->efd_lock); 751 return (EAGAIN); 752 } 753 error = mtx_sleep(&efd->efd_count, &efd->efd_lock, 754 PCATCH, "lefdwr", 0); 755 if (error == 0) 756 goto retry; 757 } 758 if (error == 0) { 759 efd->efd_count += count; 760 KNOTE_LOCKED(&efd->efd_sel.si_note, 0); 761 selwakeup(&efd->efd_sel); 762 wakeup(&efd->efd_count); 763 } 764 mtx_unlock(&efd->efd_lock); 765 766 return (error); 767} 768 769static int 770eventfd_poll(struct file *fp, int events, struct ucred *active_cred, 771 struct thread *td) 772{ 773 struct eventfd *efd; 774 int revents = 0; 775 776 efd = fp->f_data; 777 if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 778 return (POLLERR); 779 780 mtx_lock(&efd->efd_lock); 781 if ((events & (POLLIN|POLLRDNORM)) && efd->efd_count > 0) 782 revents |= events & (POLLIN|POLLRDNORM); 783 if ((events & (POLLOUT|POLLWRNORM)) && UINT64_MAX - 1 > efd->efd_count) 784 revents |= events & (POLLOUT|POLLWRNORM); 785 if (revents == 0) 786 selrecord(td, &efd->efd_sel); 787 mtx_unlock(&efd->efd_lock); 788 789 return (revents); 790} 791 792/*ARGSUSED*/ 793static int 794eventfd_kqfilter(struct file *fp, struct knote *kn) 795{ 796 struct eventfd *efd; 797 798 efd = fp->f_data; 799 if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 800 return (EINVAL); 801 802 mtx_lock(&efd->efd_lock); 803 switch (kn->kn_filter) { 804 case EVFILT_READ: 805 kn->kn_fop = &eventfd_rfiltops; 806 break; 807 case EVFILT_WRITE: 808 kn->kn_fop = &eventfd_wfiltops; 809 break; 810 default: 811 mtx_unlock(&efd->efd_lock); 812 return (EINVAL); 813 } 814 815 kn->kn_hook = efd; 816 knlist_add(&efd->efd_sel.si_note, kn, 1); 817 mtx_unlock(&efd->efd_lock); 818 819 return (0); 820} 821 822static void 823filt_eventfddetach(struct knote *kn) 824{ 825 struct eventfd *efd = kn->kn_hook; 826 827 mtx_lock(&efd->efd_lock); 828 knlist_remove(&efd->efd_sel.si_note, kn, 1); 829 mtx_unlock(&efd->efd_lock); 830} 831 832/*ARGSUSED*/ 833static int 834filt_eventfdread(struct knote *kn, long hint) 835{ 836 struct eventfd *efd = kn->kn_hook; 837 int ret; 838 839 mtx_assert(&efd->efd_lock, MA_OWNED); 840 ret = (efd->efd_count > 0); 841 842 return (ret); 843} 844 845/*ARGSUSED*/ 846static int 847filt_eventfdwrite(struct knote *kn, long hint) 848{ 849 struct eventfd *efd = kn->kn_hook; 850 int ret; 851 852 mtx_assert(&efd->efd_lock, MA_OWNED); 853 ret = (UINT64_MAX - 1 > efd->efd_count); 854 855 return (ret); 856} 857 858/*ARGSUSED*/ 859static int 860eventfd_truncate(struct file *fp, off_t length, struct ucred *active_cred, 861 struct thread *td) 862{ 863 864 return (ENXIO); 865} 866 867/*ARGSUSED*/ 868static int 869eventfd_ioctl(struct file *fp, u_long cmd, void *data, 870 struct ucred *active_cred, struct thread *td) 871{ 872 struct eventfd *efd; 873 874 efd = fp->f_data; 875 if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 876 return (EINVAL); 877 878 switch (cmd) 879 { 880 case FIONBIO: 881 if (*(int *)data) 882 efd->efd_flags |= LINUX_O_NONBLOCK; 883 else 884 efd->efd_flags &= ~LINUX_O_NONBLOCK; 885 case FIOASYNC: 886 return (0); 887 default: 888 return (ENXIO); 889 } 890} 891 892/*ARGSUSED*/ 893static int 894eventfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred, 895 struct thread *td) 896{ 897 898 return (ENXIO); 899} 900