1/*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2014 Dmitry Chagin <dchagin@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28#include <sys/param.h> 29#include <sys/systm.h> 30#include <sys/kernel.h> 31#include <sys/malloc.h> 32#include <sys/limits.h> 33#include <sys/lock.h> 34#include <sys/mutex.h> 35#include <sys/types.h> 36#include <sys/user.h> 37#include <sys/fcntl.h> 38#include <sys/file.h> 39#include <sys/filedesc.h> 40#include <sys/filio.h> 41#include <sys/stat.h> 42#include <sys/errno.h> 43#include <sys/event.h> 44#include <sys/poll.h> 45#include <sys/proc.h> 46#include <sys/uio.h> 47#include <sys/selinfo.h> 48#include <sys/eventfd.h> 49 50#include <security/audit/audit.h> 51 52_Static_assert(EFD_CLOEXEC == O_CLOEXEC, "Mismatched EFD_CLOEXEC"); 53_Static_assert(EFD_NONBLOCK == O_NONBLOCK, "Mismatched EFD_NONBLOCK"); 54 55MALLOC_DEFINE(M_EVENTFD, "eventfd", "eventfd structures"); 56 57static fo_rdwr_t eventfd_read; 58static fo_rdwr_t eventfd_write; 59static fo_ioctl_t eventfd_ioctl; 60static fo_poll_t eventfd_poll; 61static fo_kqfilter_t eventfd_kqfilter; 62static fo_stat_t eventfd_stat; 63static fo_close_t eventfd_close; 64static fo_fill_kinfo_t eventfd_fill_kinfo; 65 66static struct fileops eventfdops = { 67 .fo_read = eventfd_read, 68 .fo_write = eventfd_write, 69 .fo_truncate = invfo_truncate, 70 .fo_ioctl = eventfd_ioctl, 71 .fo_poll = eventfd_poll, 72 .fo_kqfilter = eventfd_kqfilter, 73 .fo_stat = eventfd_stat, 74 .fo_close = eventfd_close, 75 .fo_chmod = invfo_chmod, 76 .fo_chown = invfo_chown, 77 .fo_sendfile = invfo_sendfile, 78 .fo_fill_kinfo = eventfd_fill_kinfo, 79 .fo_cmp = file_kcmp_generic, 80 .fo_flags = DFLAG_PASSABLE 81}; 82 83static void filt_eventfddetach(struct knote *kn); 84static int filt_eventfdread(struct knote *kn, long hint); 85static int filt_eventfdwrite(struct knote *kn, long hint); 86 87static struct filterops eventfd_rfiltops = { 88 .f_isfd = 1, 89 .f_detach = filt_eventfddetach, 90 .f_event = filt_eventfdread 91}; 92 93static struct filterops eventfd_wfiltops = { 94 .f_isfd = 1, 95 .f_detach = filt_eventfddetach, 96 .f_event = filt_eventfdwrite 97}; 98 99struct eventfd { 100 eventfd_t efd_count; 101 uint32_t efd_flags; 102 struct selinfo efd_sel; 103 struct mtx efd_lock; 104}; 105 106int 107eventfd_create_file(struct thread *td, struct file *fp, uint32_t initval, 108 int flags) 109{ 110 struct eventfd *efd; 111 int fflags; 112 113 AUDIT_ARG_FFLAGS(flags); 114 AUDIT_ARG_VALUE(initval); 115 116 efd = malloc(sizeof(*efd), M_EVENTFD, M_WAITOK | M_ZERO); 117 efd->efd_flags = flags; 118 efd->efd_count = initval; 119 mtx_init(&efd->efd_lock, "eventfd", NULL, MTX_DEF); 120 knlist_init_mtx(&efd->efd_sel.si_note, &efd->efd_lock); 121 122 fflags = FREAD | FWRITE; 123 if ((flags & EFD_NONBLOCK) != 0) 124 fflags |= FNONBLOCK; 125 finit(fp, fflags, DTYPE_EVENTFD, efd, &eventfdops); 126 127 return (0); 128} 129 130static int 131eventfd_close(struct file *fp, struct thread *td) 132{ 133 struct eventfd *efd; 134 135 efd = fp->f_data; 136 seldrain(&efd->efd_sel); 137 knlist_destroy(&efd->efd_sel.si_note); 138 mtx_destroy(&efd->efd_lock); 139 free(efd, M_EVENTFD); 140 return (0); 141} 142 143static int 144eventfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred, 145 int flags, struct thread *td) 146{ 147 struct eventfd *efd; 148 eventfd_t count; 149 int error; 150 151 if (uio->uio_resid < sizeof(eventfd_t)) 152 return (EINVAL); 153 154 error = 0; 155 efd = fp->f_data; 156 mtx_lock(&efd->efd_lock); 157 while (error == 0 && efd->efd_count == 0) { 158 if ((fp->f_flag & FNONBLOCK) != 0) { 159 mtx_unlock(&efd->efd_lock); 160 return (EAGAIN); 161 } 162 error = mtx_sleep(&efd->efd_count, &efd->efd_lock, PCATCH, 163 "efdrd", 0); 164 } 165 if (error == 0) { 166 MPASS(efd->efd_count > 0); 167 if ((efd->efd_flags & EFD_SEMAPHORE) != 0) { 168 count = 1; 169 --efd->efd_count; 170 } else { 171 count = efd->efd_count; 172 efd->efd_count = 0; 173 } 174 KNOTE_LOCKED(&efd->efd_sel.si_note, 0); 175 selwakeup(&efd->efd_sel); 176 wakeup(&efd->efd_count); 177 mtx_unlock(&efd->efd_lock); 178 error = uiomove(&count, sizeof(eventfd_t), uio); 179 } else 180 mtx_unlock(&efd->efd_lock); 181 182 return (error); 183} 184 185static int 186eventfd_write(struct file *fp, struct uio *uio, struct ucred *active_cred, 187 int flags, struct thread *td) 188{ 189 struct eventfd *efd; 190 eventfd_t count; 191 int error; 192 193 if (uio->uio_resid < sizeof(eventfd_t)) 194 return (EINVAL); 195 196 error = uiomove(&count, sizeof(eventfd_t), uio); 197 if (error != 0) 198 return (error); 199 if (count == UINT64_MAX) 200 return (EINVAL); 201 202 efd = fp->f_data; 203 mtx_lock(&efd->efd_lock); 204retry: 205 if (UINT64_MAX - efd->efd_count <= count) { 206 if ((fp->f_flag & FNONBLOCK) != 0) { 207 mtx_unlock(&efd->efd_lock); 208 /* Do not not return the number of bytes written */ 209 uio->uio_resid += sizeof(eventfd_t); 210 return (EAGAIN); 211 } 212 error = mtx_sleep(&efd->efd_count, &efd->efd_lock, 213 PCATCH, "efdwr", 0); 214 if (error == 0) 215 goto retry; 216 } 217 if (error == 0) { 218 MPASS(UINT64_MAX - efd->efd_count > count); 219 efd->efd_count += count; 220 KNOTE_LOCKED(&efd->efd_sel.si_note, 0); 221 selwakeup(&efd->efd_sel); 222 wakeup(&efd->efd_count); 223 } 224 mtx_unlock(&efd->efd_lock); 225 226 return (error); 227} 228 229static int 230eventfd_poll(struct file *fp, int events, struct ucred *active_cred, 231 struct thread *td) 232{ 233 struct eventfd *efd; 234 int revents; 235 236 efd = fp->f_data; 237 revents = 0; 238 mtx_lock(&efd->efd_lock); 239 if ((events & (POLLIN | POLLRDNORM)) != 0 && efd->efd_count > 0) 240 revents |= events & (POLLIN | POLLRDNORM); 241 if ((events & (POLLOUT | POLLWRNORM)) != 0 && UINT64_MAX - 1 > 242 efd->efd_count) 243 revents |= events & (POLLOUT | POLLWRNORM); 244 if (revents == 0) 245 selrecord(td, &efd->efd_sel); 246 mtx_unlock(&efd->efd_lock); 247 248 return (revents); 249} 250 251static int 252eventfd_kqfilter(struct file *fp, struct knote *kn) 253{ 254 struct eventfd *efd = fp->f_data; 255 256 mtx_lock(&efd->efd_lock); 257 switch (kn->kn_filter) { 258 case EVFILT_READ: 259 kn->kn_fop = &eventfd_rfiltops; 260 break; 261 case EVFILT_WRITE: 262 kn->kn_fop = &eventfd_wfiltops; 263 break; 264 default: 265 mtx_unlock(&efd->efd_lock); 266 return (EINVAL); 267 } 268 269 kn->kn_hook = efd; 270 knlist_add(&efd->efd_sel.si_note, kn, 1); 271 mtx_unlock(&efd->efd_lock); 272 273 return (0); 274} 275 276static void 277filt_eventfddetach(struct knote *kn) 278{ 279 struct eventfd *efd = kn->kn_hook; 280 281 mtx_lock(&efd->efd_lock); 282 knlist_remove(&efd->efd_sel.si_note, kn, 1); 283 mtx_unlock(&efd->efd_lock); 284} 285 286static int 287filt_eventfdread(struct knote *kn, long hint) 288{ 289 struct eventfd *efd = kn->kn_hook; 290 int ret; 291 292 mtx_assert(&efd->efd_lock, MA_OWNED); 293 kn->kn_data = (int64_t)efd->efd_count; 294 ret = efd->efd_count > 0; 295 296 return (ret); 297} 298 299static int 300filt_eventfdwrite(struct knote *kn, long hint) 301{ 302 struct eventfd *efd = kn->kn_hook; 303 int ret; 304 305 mtx_assert(&efd->efd_lock, MA_OWNED); 306 kn->kn_data = (int64_t)(UINT64_MAX - 1 - efd->efd_count); 307 ret = UINT64_MAX - 1 > efd->efd_count; 308 309 return (ret); 310} 311 312static int 313eventfd_ioctl(struct file *fp, u_long cmd, void *data, 314 struct ucred *active_cred, struct thread *td) 315{ 316 switch (cmd) { 317 case FIONBIO: 318 case FIOASYNC: 319 return (0); 320 } 321 322 return (ENOTTY); 323} 324 325static int 326eventfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred) 327{ 328 bzero((void *)st, sizeof *st); 329 st->st_mode = S_IFIFO; 330 return (0); 331} 332 333static int 334eventfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) 335{ 336 struct eventfd *efd = fp->f_data; 337 338 kif->kf_type = KF_TYPE_EVENTFD; 339 mtx_lock(&efd->efd_lock); 340 kif->kf_un.kf_eventfd.kf_eventfd_value = efd->efd_count; 341 kif->kf_un.kf_eventfd.kf_eventfd_flags = efd->efd_flags; 342 kif->kf_un.kf_eventfd.kf_eventfd_addr = (uintptr_t)efd; 343 mtx_unlock(&efd->efd_lock); 344 return (0); 345} 346