linux_event.c revision 293585
1/*- 2 * Copyright (c) 2007 Roman Divacky 3 * Copyright (c) 2014 Dmitry Chagin 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28#include <sys/cdefs.h> 29__FBSDID("$FreeBSD: stable/10/sys/compat/linux/linux_event.c 293585 2016-01-09 17:45:02Z dchagin $"); 30 31#include "opt_compat.h" 32 33#include <sys/param.h> 34#include <sys/systm.h> 35#include <sys/imgact.h> 36#include <sys/kernel.h> 37#include <sys/limits.h> 38#include <sys/lock.h> 39#include <sys/mutex.h> 40#include <sys/capability.h> 41#include <sys/types.h> 42#include <sys/file.h> 43#include <sys/filedesc.h> 44#include <sys/errno.h> 45#include <sys/event.h> 46#include <sys/poll.h> 47#include <sys/proc.h> 48#include <sys/selinfo.h> 49#include <sys/sx.h> 50#include <sys/syscallsubr.h> 51#include <sys/timespec.h> 52 53#ifdef COMPAT_LINUX32 54#include <machine/../linux32/linux.h> 55#include <machine/../linux32/linux32_proto.h> 56#else 57#include <machine/../linux/linux.h> 58#include <machine/../linux/linux_proto.h> 59#endif 60 61#include <compat/linux/linux_emul.h> 62#include <compat/linux/linux_event.h> 63#include <compat/linux/linux_file.h> 64#include <compat/linux/linux_util.h> 65 66/* 67 * epoll defines 'struct epoll_event' with the field 'data' as 64 bits 68 * on all architectures. But on 32 bit architectures BSD 'struct kevent' only 69 * has 32 bit opaque pointer as 'udata' field. So we can't pass epoll supplied 70 * data verbatuim. Therefore we allocate 64-bit memory block to pass 71 * user supplied data for every file descriptor. 72 */ 73 74typedef uint64_t epoll_udata_t; 75 76struct epoll_emuldata { 77 uint32_t fdc; /* epoll udata max index */ 78 epoll_udata_t udata[1]; /* epoll user data vector */ 79}; 80 81#define EPOLL_DEF_SZ 16 82#define EPOLL_SIZE(fdn) \ 83 (sizeof(struct epoll_emuldata)+(fdn) * sizeof(epoll_udata_t)) 84 85struct epoll_event { 86 uint32_t events; 87 epoll_udata_t data; 88} 89#if defined(__amd64__) 90__attribute__((packed)) 91#endif 92; 93 94#define LINUX_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) 95 96static void epoll_fd_install(struct thread *td, int fd, epoll_udata_t udata); 97static int epoll_to_kevent(struct thread *td, struct file *epfp, 98 int fd, struct epoll_event *l_event, int *kev_flags, 99 struct kevent *kevent, int *nkevents); 100static void kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event); 101static int epoll_kev_copyout(void *arg, struct kevent *kevp, int count); 102static int epoll_kev_copyin(void *arg, struct kevent *kevp, int count); 103static int epoll_delete_event(struct thread *td, struct file *epfp, 104 int fd, int filter); 105static int epoll_delete_all_events(struct thread *td, struct file *epfp, 106 int fd); 107 108struct epoll_copyin_args { 109 struct kevent *changelist; 110}; 111 112struct epoll_copyout_args { 113 struct epoll_event *leventlist; 114 struct proc *p; 115 uint32_t count; 116 int error; 117}; 118 119/* eventfd */ 120typedef uint64_t eventfd_t; 121 122static fo_rdwr_t eventfd_read; 123static fo_rdwr_t eventfd_write; 124static fo_truncate_t eventfd_truncate; 125static fo_ioctl_t eventfd_ioctl; 126static fo_poll_t eventfd_poll; 127static fo_kqfilter_t eventfd_kqfilter; 128static fo_stat_t eventfd_stat; 129static fo_close_t eventfd_close; 130 131static struct fileops eventfdops = { 132 .fo_read = eventfd_read, 133 .fo_write = eventfd_write, 134 .fo_truncate = eventfd_truncate, 135 .fo_ioctl = eventfd_ioctl, 136 .fo_poll = eventfd_poll, 137 .fo_kqfilter = eventfd_kqfilter, 138 .fo_stat = eventfd_stat, 139 .fo_close = eventfd_close, 140 .fo_chmod = invfo_chmod, 141 .fo_chown = invfo_chown, 142 .fo_sendfile = invfo_sendfile, 143 .fo_flags = DFLAG_PASSABLE 144}; 145 146static void filt_eventfddetach(struct knote *kn); 147static int filt_eventfdread(struct knote *kn, long hint); 148static int filt_eventfdwrite(struct knote *kn, long hint); 149 150static struct filterops eventfd_rfiltops = { 151 .f_isfd = 1, 152 .f_detach = filt_eventfddetach, 153 .f_event = filt_eventfdread 154}; 155static struct filterops eventfd_wfiltops = { 156 .f_isfd = 1, 157 .f_detach = filt_eventfddetach, 158 .f_event = filt_eventfdwrite 159}; 160 161struct eventfd { 162 eventfd_t efd_count; 163 uint32_t efd_flags; 164 struct selinfo efd_sel; 165 struct mtx efd_lock; 166}; 167 168static int eventfd_create(struct thread *td, uint32_t initval, int flags); 169 170 171static void 172epoll_fd_install(struct thread *td, int fd, epoll_udata_t udata) 173{ 174 struct linux_pemuldata *pem; 175 struct epoll_emuldata *emd; 176 struct proc *p; 177 178 p = td->td_proc; 179 180 pem = pem_find(p); 181 KASSERT(pem != NULL, ("epoll proc emuldata not found.\n")); 182 183 LINUX_PEM_XLOCK(pem); 184 if (pem->epoll == NULL) { 185 emd = malloc(EPOLL_SIZE(fd), M_EPOLL, M_WAITOK); 186 emd->fdc = fd; 187 pem->epoll = emd; 188 } else { 189 emd = pem->epoll; 190 if (fd > emd->fdc) { 191 emd = realloc(emd, EPOLL_SIZE(fd), M_EPOLL, M_WAITOK); 192 emd->fdc = fd; 193 pem->epoll = emd; 194 } 195 } 196 emd->udata[fd] = udata; 197 LINUX_PEM_XUNLOCK(pem); 198} 199 200static int 201epoll_create_common(struct thread *td, int flags) 202{ 203 int error; 204 205 error = kern_kqueue(td, flags); 206 if (error) 207 return (error); 208 209 epoll_fd_install(td, EPOLL_DEF_SZ, 0); 210 211 return (0); 212} 213 214int 215linux_epoll_create(struct thread *td, struct linux_epoll_create_args *args) 216{ 217 218 /* 219 * args->size is unused. Linux just tests it 220 * and then forgets it as well. 221 */ 222 if (args->size <= 0) 223 return (EINVAL); 224 225 return (epoll_create_common(td, 0)); 226} 227 228int 229linux_epoll_create1(struct thread *td, struct linux_epoll_create1_args *args) 230{ 231 int flags; 232 233 if ((args->flags & ~(LINUX_O_CLOEXEC)) != 0) 234 return (EINVAL); 235 236 flags = 0; 237 if ((args->flags & LINUX_O_CLOEXEC) != 0) 238 flags |= O_CLOEXEC; 239 240 return (epoll_create_common(td, flags)); 241} 242 243/* Structure converting function from epoll to kevent. */ 244static int 245epoll_to_kevent(struct thread *td, struct file *epfp, 246 int fd, struct epoll_event *l_event, int *kev_flags, 247 struct kevent *kevent, int *nkevents) 248{ 249 uint32_t levents = l_event->events; 250 struct linux_pemuldata *pem; 251 struct proc *p; 252 253 /* flags related to how event is registered */ 254 if ((levents & LINUX_EPOLLONESHOT) != 0) 255 *kev_flags |= EV_ONESHOT; 256 if ((levents & LINUX_EPOLLET) != 0) 257 *kev_flags |= EV_CLEAR; 258 if ((levents & LINUX_EPOLLERR) != 0) 259 *kev_flags |= EV_ERROR; 260 261 /* flags related to what event is registered */ 262 if ((levents & LINUX_EPOLL_EVRD) != 0) { 263 EV_SET(kevent++, fd, EVFILT_READ, *kev_flags, 0, 0, 0); 264 ++(*nkevents); 265 } 266 if ((levents & LINUX_EPOLL_EVWR) != 0) { 267 EV_SET(kevent++, fd, EVFILT_WRITE, *kev_flags, 0, 0, 0); 268 ++(*nkevents); 269 } 270 271 if ((levents & ~(LINUX_EPOLL_EVSUP)) != 0) { 272 p = td->td_proc; 273 274 pem = pem_find(p); 275 KASSERT(pem != NULL, ("epoll proc emuldata not found.\n")); 276 KASSERT(pem->epoll != NULL, ("epoll proc epolldata not found.\n")); 277 278 LINUX_PEM_XLOCK(pem); 279 if ((pem->flags & LINUX_XUNSUP_EPOLL) == 0) { 280 pem->flags |= LINUX_XUNSUP_EPOLL; 281 LINUX_PEM_XUNLOCK(pem); 282 linux_msg(td, "epoll_ctl unsupported flags: 0x%x\n", 283 levents); 284 } else 285 LINUX_PEM_XUNLOCK(pem); 286 return (EINVAL); 287 } 288 289 return (0); 290} 291 292/* 293 * Structure converting function from kevent to epoll. In a case 294 * this is called on error in registration we store the error in 295 * event->data and pick it up later in linux_epoll_ctl(). 296 */ 297static void 298kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event) 299{ 300 301 if ((kevent->flags & EV_ERROR) != 0) { 302 l_event->events = LINUX_EPOLLERR; 303 return; 304 } 305 306 switch (kevent->filter) { 307 case EVFILT_READ: 308 l_event->events = LINUX_EPOLLIN|LINUX_EPOLLRDNORM|LINUX_EPOLLPRI; 309 break; 310 case EVFILT_WRITE: 311 l_event->events = LINUX_EPOLLOUT|LINUX_EPOLLWRNORM; 312 break; 313 } 314} 315 316/* 317 * Copyout callback used by kevent. This converts kevent 318 * events to epoll events and copies them back to the 319 * userspace. This is also called on error on registering 320 * of the filter. 321 */ 322static int 323epoll_kev_copyout(void *arg, struct kevent *kevp, int count) 324{ 325 struct epoll_copyout_args *args; 326 struct linux_pemuldata *pem; 327 struct epoll_emuldata *emd; 328 struct epoll_event *eep; 329 int error, fd, i; 330 331 args = (struct epoll_copyout_args*) arg; 332 eep = malloc(sizeof(*eep) * count, M_EPOLL, M_WAITOK | M_ZERO); 333 334 pem = pem_find(args->p); 335 KASSERT(pem != NULL, ("epoll proc emuldata not found.\n")); 336 LINUX_PEM_SLOCK(pem); 337 emd = pem->epoll; 338 KASSERT(emd != NULL, ("epoll proc epolldata not found.\n")); 339 340 for (i = 0; i < count; i++) { 341 kevent_to_epoll(&kevp[i], &eep[i]); 342 343 fd = kevp[i].ident; 344 KASSERT(fd <= emd->fdc, ("epoll user data vector" 345 " is too small.\n")); 346 eep[i].data = emd->udata[fd]; 347 } 348 LINUX_PEM_SUNLOCK(pem); 349 350 error = copyout(eep, args->leventlist, count * sizeof(*eep)); 351 if (error == 0) { 352 args->leventlist += count; 353 args->count += count; 354 } else if (args->error == 0) 355 args->error = error; 356 357 free(eep, M_EPOLL); 358 return (error); 359} 360 361/* 362 * Copyin callback used by kevent. This copies already 363 * converted filters from kernel memory to the kevent 364 * internal kernel memory. Hence the memcpy instead of 365 * copyin. 366 */ 367static int 368epoll_kev_copyin(void *arg, struct kevent *kevp, int count) 369{ 370 struct epoll_copyin_args *args; 371 372 args = (struct epoll_copyin_args*) arg; 373 374 memcpy(kevp, args->changelist, count * sizeof(*kevp)); 375 args->changelist += count; 376 377 return (0); 378} 379 380/* 381 * Load epoll filter, convert it to kevent filter 382 * and load it into kevent subsystem. 383 */ 384int 385linux_epoll_ctl(struct thread *td, struct linux_epoll_ctl_args *args) 386{ 387 struct file *epfp, *fp; 388 struct epoll_copyin_args ciargs; 389 struct kevent kev[2]; 390 struct kevent_copyops k_ops = { &ciargs, 391 NULL, 392 epoll_kev_copyin}; 393 struct epoll_event le; 394 cap_rights_t rights; 395 int kev_flags; 396 int nchanges = 0; 397 int error; 398 399 if (args->op != LINUX_EPOLL_CTL_DEL) { 400 error = copyin(args->event, &le, sizeof(le)); 401 if (error != 0) 402 return (error); 403 } 404 405 error = fget(td, args->epfd, 406 cap_rights_init(&rights, CAP_KQUEUE_CHANGE), &epfp); 407 if (error != 0) 408 return (error); 409 if (epfp->f_type != DTYPE_KQUEUE) 410 goto leave1; 411 412 /* Protect user data vector from incorrectly supplied fd. */ 413 error = fget(td, args->fd, cap_rights_init(&rights, CAP_POLL_EVENT), &fp); 414 if (error != 0) 415 goto leave1; 416 417 /* Linux disallows spying on himself */ 418 if (epfp == fp) { 419 error = EINVAL; 420 goto leave0; 421 } 422 423 ciargs.changelist = kev; 424 425 switch (args->op) { 426 case LINUX_EPOLL_CTL_MOD: 427 /* 428 * We don't memorize which events were set for this FD 429 * on this level, so just delete all we could have set: 430 * EVFILT_READ and EVFILT_WRITE, ignoring any errors 431 */ 432 error = epoll_delete_all_events(td, epfp, args->fd); 433 if (error) 434 goto leave0; 435 /* FALLTHROUGH */ 436 437 case LINUX_EPOLL_CTL_ADD: 438 kev_flags = EV_ADD | EV_ENABLE; 439 break; 440 441 case LINUX_EPOLL_CTL_DEL: 442 /* CTL_DEL means unregister this fd with this epoll */ 443 error = epoll_delete_all_events(td, epfp, args->fd); 444 goto leave0; 445 446 default: 447 error = EINVAL; 448 goto leave0; 449 } 450 451 error = epoll_to_kevent(td, epfp, args->fd, &le, &kev_flags, 452 kev, &nchanges); 453 if (error) 454 goto leave0; 455 456 epoll_fd_install(td, args->fd, le.data); 457 458 error = kern_kevent_fp(td, epfp, nchanges, 0, &k_ops, NULL); 459 460leave0: 461 fdrop(fp, td); 462 463leave1: 464 fdrop(epfp, td); 465 return (error); 466} 467 468/* 469 * Wait for a filter to be triggered on the epoll file descriptor. 470 */ 471static int 472linux_epoll_wait_common(struct thread *td, int epfd, struct epoll_event *events, 473 int maxevents, int timeout, sigset_t *uset) 474{ 475 struct file *epfp; 476 struct timespec ts, *tsp; 477 cap_rights_t rights; 478 struct epoll_copyout_args coargs; 479 struct kevent_copyops k_ops = { &coargs, 480 epoll_kev_copyout, 481 NULL}; 482 int error; 483 484 if (maxevents <= 0 || maxevents > LINUX_MAX_EVENTS) 485 return (EINVAL); 486 487 if (uset != NULL) { 488 error = kern_sigprocmask(td, SIG_SETMASK, uset, 489 &td->td_oldsigmask, 0); 490 if (error != 0) 491 return (error); 492 td->td_pflags |= TDP_OLDMASK; 493 /* 494 * Make sure that ast() is called on return to 495 * usermode and TDP_OLDMASK is cleared, restoring old 496 * sigmask. 497 */ 498 thread_lock(td); 499 td->td_flags |= TDF_ASTPENDING; 500 thread_unlock(td); 501 } 502 503 error = fget(td, epfd, 504 cap_rights_init(&rights, CAP_KQUEUE_EVENT), &epfp); 505 if (error != 0) 506 return (error); 507 508 coargs.leventlist = events; 509 coargs.p = td->td_proc; 510 coargs.count = 0; 511 coargs.error = 0; 512 513 if (timeout != -1) { 514 if (timeout < 0) { 515 error = EINVAL; 516 goto leave; 517 } 518 /* Convert from milliseconds to timespec. */ 519 ts.tv_sec = timeout / 1000; 520 ts.tv_nsec = (timeout % 1000) * 1000000; 521 tsp = &ts; 522 } else { 523 tsp = NULL; 524 } 525 526 error = kern_kevent_fp(td, epfp, 0, maxevents, &k_ops, tsp); 527 if (error == 0 && coargs.error != 0) 528 error = coargs.error; 529 530 /* 531 * kern_kevent might return ENOMEM which is not expected from epoll_wait. 532 * Maybe we should translate that but I don't think it matters at all. 533 */ 534 if (error == 0) 535 td->td_retval[0] = coargs.count; 536leave: 537 fdrop(epfp, td); 538 return (error); 539} 540 541int 542linux_epoll_wait(struct thread *td, struct linux_epoll_wait_args *args) 543{ 544 545 return (linux_epoll_wait_common(td, args->epfd, args->events, 546 args->maxevents, args->timeout, NULL)); 547} 548 549int 550linux_epoll_pwait(struct thread *td, struct linux_epoll_pwait_args *args) 551{ 552 sigset_t mask, *pmask; 553 l_sigset_t lmask; 554 int error; 555 556 if (args->mask != NULL) { 557 error = copyin(args->mask, &lmask, sizeof(l_sigset_t)); 558 if (error != 0) 559 return (error); 560 linux_to_bsd_sigset(&lmask, &mask); 561 pmask = &mask; 562 } else 563 pmask = NULL; 564 return (linux_epoll_wait_common(td, args->epfd, args->events, 565 args->maxevents, args->timeout, pmask)); 566} 567 568static int 569epoll_delete_event(struct thread *td, struct file *epfp, int fd, int filter) 570{ 571 struct epoll_copyin_args ciargs; 572 struct kevent kev; 573 struct kevent_copyops k_ops = { &ciargs, 574 NULL, 575 epoll_kev_copyin}; 576 int error; 577 578 ciargs.changelist = &kev; 579 EV_SET(&kev, fd, filter, EV_DELETE | EV_DISABLE, 0, 0, 0); 580 581 error = kern_kevent_fp(td, epfp, 1, 0, &k_ops, NULL); 582 583 /* 584 * here we ignore ENONT, because we don't keep track of events here 585 */ 586 if (error == ENOENT) 587 error = 0; 588 return (error); 589} 590 591static int 592epoll_delete_all_events(struct thread *td, struct file *epfp, int fd) 593{ 594 int error1, error2; 595 596 error1 = epoll_delete_event(td, epfp, fd, EVFILT_READ); 597 error2 = epoll_delete_event(td, epfp, fd, EVFILT_WRITE); 598 599 /* report any errors we got */ 600 return (error1 == 0 ? error2 : error1); 601} 602 603static int 604eventfd_create(struct thread *td, uint32_t initval, int flags) 605{ 606 struct filedesc *fdp; 607 struct eventfd *efd; 608 struct file *fp; 609 int fflags, fd, error; 610 611 fflags = 0; 612 if ((flags & LINUX_O_CLOEXEC) != 0) 613 fflags |= O_CLOEXEC; 614 615 fdp = td->td_proc->p_fd; 616 error = falloc(td, &fp, &fd, fflags); 617 if (error) 618 return (error); 619 620 efd = malloc(sizeof(*efd), M_EPOLL, M_WAITOK | M_ZERO); 621 efd->efd_flags = flags; 622 efd->efd_count = initval; 623 mtx_init(&efd->efd_lock, "eventfd", NULL, MTX_DEF); 624 625 knlist_init_mtx(&efd->efd_sel.si_note, &efd->efd_lock); 626 627 fflags = FREAD | FWRITE; 628 if ((flags & LINUX_O_NONBLOCK) != 0) 629 fflags |= FNONBLOCK; 630 631 finit(fp, fflags, DTYPE_LINUXEFD, efd, &eventfdops); 632 fdrop(fp, td); 633 634 td->td_retval[0] = fd; 635 return (error); 636} 637 638int 639linux_eventfd(struct thread *td, struct linux_eventfd_args *args) 640{ 641 642 return (eventfd_create(td, args->initval, 0)); 643} 644 645int 646linux_eventfd2(struct thread *td, struct linux_eventfd2_args *args) 647{ 648 649 if ((args->flags & ~(LINUX_O_CLOEXEC|LINUX_O_NONBLOCK|LINUX_EFD_SEMAPHORE)) != 0) 650 return (EINVAL); 651 652 return (eventfd_create(td, args->initval, args->flags)); 653} 654 655static int 656eventfd_close(struct file *fp, struct thread *td) 657{ 658 struct eventfd *efd; 659 660 efd = fp->f_data; 661 if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 662 return (EBADF); 663 664 seldrain(&efd->efd_sel); 665 knlist_destroy(&efd->efd_sel.si_note); 666 667 fp->f_ops = &badfileops; 668 mtx_destroy(&efd->efd_lock); 669 free(efd, M_EPOLL); 670 671 return (0); 672} 673 674static int 675eventfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred, 676 int flags, struct thread *td) 677{ 678 struct eventfd *efd; 679 eventfd_t count; 680 int error; 681 682 efd = fp->f_data; 683 if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 684 return (EBADF); 685 686 if (uio->uio_resid < sizeof(eventfd_t)) 687 return (EINVAL); 688 689 error = 0; 690 mtx_lock(&efd->efd_lock); 691retry: 692 if (efd->efd_count == 0) { 693 if ((efd->efd_flags & LINUX_O_NONBLOCK) != 0) { 694 mtx_unlock(&efd->efd_lock); 695 return (EAGAIN); 696 } 697 error = mtx_sleep(&efd->efd_count, &efd->efd_lock, PCATCH, "lefdrd", 0); 698 if (error == 0) 699 goto retry; 700 } 701 if (error == 0) { 702 if ((efd->efd_flags & LINUX_EFD_SEMAPHORE) != 0) { 703 count = 1; 704 --efd->efd_count; 705 } else { 706 count = efd->efd_count; 707 efd->efd_count = 0; 708 } 709 KNOTE_LOCKED(&efd->efd_sel.si_note, 0); 710 selwakeup(&efd->efd_sel); 711 wakeup(&efd->efd_count); 712 mtx_unlock(&efd->efd_lock); 713 error = uiomove(&count, sizeof(eventfd_t), uio); 714 } else 715 mtx_unlock(&efd->efd_lock); 716 717 return (error); 718} 719 720static int 721eventfd_write(struct file *fp, struct uio *uio, struct ucred *active_cred, 722 int flags, struct thread *td) 723{ 724 struct eventfd *efd; 725 eventfd_t count; 726 int error; 727 728 efd = fp->f_data; 729 if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 730 return (EBADF); 731 732 if (uio->uio_resid < sizeof(eventfd_t)) 733 return (EINVAL); 734 735 error = uiomove(&count, sizeof(eventfd_t), uio); 736 if (error) 737 return (error); 738 if (count == UINT64_MAX) 739 return (EINVAL); 740 741 mtx_lock(&efd->efd_lock); 742retry: 743 if (UINT64_MAX - efd->efd_count <= count) { 744 if ((efd->efd_flags & LINUX_O_NONBLOCK) != 0) { 745 mtx_unlock(&efd->efd_lock); 746 return (EAGAIN); 747 } 748 error = mtx_sleep(&efd->efd_count, &efd->efd_lock, 749 PCATCH, "lefdwr", 0); 750 if (error == 0) 751 goto retry; 752 } 753 if (error == 0) { 754 efd->efd_count += count; 755 KNOTE_LOCKED(&efd->efd_sel.si_note, 0); 756 selwakeup(&efd->efd_sel); 757 wakeup(&efd->efd_count); 758 } 759 mtx_unlock(&efd->efd_lock); 760 761 return (error); 762} 763 764static int 765eventfd_poll(struct file *fp, int events, struct ucred *active_cred, 766 struct thread *td) 767{ 768 struct eventfd *efd; 769 int revents = 0; 770 771 efd = fp->f_data; 772 if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 773 return (POLLERR); 774 775 mtx_lock(&efd->efd_lock); 776 if ((events & (POLLIN|POLLRDNORM)) && efd->efd_count > 0) 777 revents |= events & (POLLIN|POLLRDNORM); 778 if ((events & (POLLOUT|POLLWRNORM)) && UINT64_MAX - 1 > efd->efd_count) 779 revents |= events & (POLLOUT|POLLWRNORM); 780 if (revents == 0) 781 selrecord(td, &efd->efd_sel); 782 mtx_unlock(&efd->efd_lock); 783 784 return (revents); 785} 786 787/*ARGSUSED*/ 788static int 789eventfd_kqfilter(struct file *fp, struct knote *kn) 790{ 791 struct eventfd *efd; 792 793 efd = fp->f_data; 794 if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 795 return (EINVAL); 796 797 mtx_lock(&efd->efd_lock); 798 switch (kn->kn_filter) { 799 case EVFILT_READ: 800 kn->kn_fop = &eventfd_rfiltops; 801 break; 802 case EVFILT_WRITE: 803 kn->kn_fop = &eventfd_wfiltops; 804 break; 805 default: 806 mtx_unlock(&efd->efd_lock); 807 return (EINVAL); 808 } 809 810 kn->kn_hook = efd; 811 knlist_add(&efd->efd_sel.si_note, kn, 1); 812 mtx_unlock(&efd->efd_lock); 813 814 return (0); 815} 816 817static void 818filt_eventfddetach(struct knote *kn) 819{ 820 struct eventfd *efd = kn->kn_hook; 821 822 mtx_lock(&efd->efd_lock); 823 knlist_remove(&efd->efd_sel.si_note, kn, 1); 824 mtx_unlock(&efd->efd_lock); 825} 826 827/*ARGSUSED*/ 828static int 829filt_eventfdread(struct knote *kn, long hint) 830{ 831 struct eventfd *efd = kn->kn_hook; 832 int ret; 833 834 mtx_assert(&efd->efd_lock, MA_OWNED); 835 ret = (efd->efd_count > 0); 836 837 return (ret); 838} 839 840/*ARGSUSED*/ 841static int 842filt_eventfdwrite(struct knote *kn, long hint) 843{ 844 struct eventfd *efd = kn->kn_hook; 845 int ret; 846 847 mtx_assert(&efd->efd_lock, MA_OWNED); 848 ret = (UINT64_MAX - 1 > efd->efd_count); 849 850 return (ret); 851} 852 853/*ARGSUSED*/ 854static int 855eventfd_truncate(struct file *fp, off_t length, struct ucred *active_cred, 856 struct thread *td) 857{ 858 859 return (ENXIO); 860} 861 862/*ARGSUSED*/ 863static int 864eventfd_ioctl(struct file *fp, u_long cmd, void *data, 865 struct ucred *active_cred, struct thread *td) 866{ 867 868 return (ENXIO); 869} 870 871/*ARGSUSED*/ 872static int 873eventfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred, 874 struct thread *td) 875{ 876 877 return (ENXIO); 878} 879