1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2007 Roman Divacky
5 * Copyright (c) 2014 Dmitry Chagin
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD$");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/kernel.h>
36#include <sys/malloc.h>
37#include <sys/limits.h>
38#include <sys/lock.h>
39#include <sys/mutex.h>
40#include <sys/types.h>
41#include <sys/user.h>
42#include <sys/fcntl.h>
43#include <sys/file.h>
44#include <sys/filedesc.h>
45#include <sys/filio.h>
46#include <sys/stat.h>
47#include <sys/errno.h>
48#include <sys/event.h>
49#include <sys/poll.h>
50#include <sys/proc.h>
51#include <sys/uio.h>
52#include <sys/selinfo.h>
53#include <sys/eventfd.h>
54
55#include <security/audit/audit.h>
56
57_Static_assert(EFD_CLOEXEC == O_CLOEXEC, "Mismatched EFD_CLOEXEC");
58_Static_assert(EFD_NONBLOCK == O_NONBLOCK, "Mismatched EFD_NONBLOCK");
59
60MALLOC_DEFINE(M_EVENTFD, "eventfd", "eventfd structures");
61
62static fo_rdwr_t	eventfd_read;
63static fo_rdwr_t	eventfd_write;
64static fo_ioctl_t	eventfd_ioctl;
65static fo_poll_t	eventfd_poll;
66static fo_kqfilter_t	eventfd_kqfilter;
67static fo_stat_t	eventfd_stat;
68static fo_close_t	eventfd_close;
69static fo_fill_kinfo_t	eventfd_fill_kinfo;
70
71static struct fileops eventfdops = {
72	.fo_read = eventfd_read,
73	.fo_write = eventfd_write,
74	.fo_truncate = invfo_truncate,
75	.fo_ioctl = eventfd_ioctl,
76	.fo_poll = eventfd_poll,
77	.fo_kqfilter = eventfd_kqfilter,
78	.fo_stat = eventfd_stat,
79	.fo_close = eventfd_close,
80	.fo_chmod = invfo_chmod,
81	.fo_chown = invfo_chown,
82	.fo_sendfile = invfo_sendfile,
83	.fo_fill_kinfo = eventfd_fill_kinfo,
84	.fo_flags = DFLAG_PASSABLE
85};
86
87static void	filt_eventfddetach(struct knote *kn);
88static int	filt_eventfdread(struct knote *kn, long hint);
89static int	filt_eventfdwrite(struct knote *kn, long hint);
90
91static struct filterops eventfd_rfiltops = {
92	.f_isfd = 1,
93	.f_detach = filt_eventfddetach,
94	.f_event = filt_eventfdread
95};
96
97static struct filterops eventfd_wfiltops = {
98	.f_isfd = 1,
99	.f_detach = filt_eventfddetach,
100	.f_event = filt_eventfdwrite
101};
102
103struct eventfd {
104	eventfd_t	efd_count;
105	uint32_t	efd_flags;
106	struct selinfo	efd_sel;
107	struct mtx	efd_lock;
108};
109
110int
111eventfd_create_file(struct thread *td, struct file *fp, uint32_t initval,
112    int flags)
113{
114	struct eventfd *efd;
115	int fflags;
116
117	AUDIT_ARG_FFLAGS(flags);
118	AUDIT_ARG_VALUE(initval);
119
120	efd = malloc(sizeof(*efd), M_EVENTFD, M_WAITOK | M_ZERO);
121	efd->efd_flags = flags;
122	efd->efd_count = initval;
123	mtx_init(&efd->efd_lock, "eventfd", NULL, MTX_DEF);
124	knlist_init_mtx(&efd->efd_sel.si_note, &efd->efd_lock);
125
126	fflags = FREAD | FWRITE;
127	if ((flags & EFD_NONBLOCK) != 0)
128		fflags |= FNONBLOCK;
129	finit(fp, fflags, DTYPE_EVENTFD, efd, &eventfdops);
130
131	return (0);
132}
133
134static int
135eventfd_close(struct file *fp, struct thread *td)
136{
137	struct eventfd *efd;
138
139	efd = fp->f_data;
140	seldrain(&efd->efd_sel);
141	knlist_destroy(&efd->efd_sel.si_note);
142	mtx_destroy(&efd->efd_lock);
143	free(efd, M_EVENTFD);
144	return (0);
145}
146
147static int
148eventfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
149    int flags, struct thread *td)
150{
151	struct eventfd *efd;
152	eventfd_t count;
153	int error;
154
155	if (uio->uio_resid < sizeof(eventfd_t))
156		return (EINVAL);
157
158	error = 0;
159	efd = fp->f_data;
160	mtx_lock(&efd->efd_lock);
161	while (error == 0 && efd->efd_count == 0) {
162		if ((fp->f_flag & FNONBLOCK) != 0) {
163			mtx_unlock(&efd->efd_lock);
164			return (EAGAIN);
165		}
166		error = mtx_sleep(&efd->efd_count, &efd->efd_lock, PCATCH,
167		    "efdrd", 0);
168	}
169	if (error == 0) {
170		MPASS(efd->efd_count > 0);
171		if ((efd->efd_flags & EFD_SEMAPHORE) != 0) {
172			count = 1;
173			--efd->efd_count;
174		} else {
175			count = efd->efd_count;
176			efd->efd_count = 0;
177		}
178		KNOTE_LOCKED(&efd->efd_sel.si_note, 0);
179		selwakeup(&efd->efd_sel);
180		wakeup(&efd->efd_count);
181		mtx_unlock(&efd->efd_lock);
182		error = uiomove(&count, sizeof(eventfd_t), uio);
183	} else
184		mtx_unlock(&efd->efd_lock);
185
186	return (error);
187}
188
189static int
190eventfd_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
191    int flags, struct thread *td)
192{
193	struct eventfd *efd;
194	eventfd_t count;
195	int error;
196
197	if (uio->uio_resid < sizeof(eventfd_t))
198		return (EINVAL);
199
200	error = uiomove(&count, sizeof(eventfd_t), uio);
201	if (error != 0)
202		return (error);
203	if (count == UINT64_MAX)
204		return (EINVAL);
205
206	efd = fp->f_data;
207	mtx_lock(&efd->efd_lock);
208retry:
209	if (UINT64_MAX - efd->efd_count <= count) {
210		if ((fp->f_flag & FNONBLOCK) != 0) {
211			mtx_unlock(&efd->efd_lock);
212			/* Do not not return the number of bytes written */
213			uio->uio_resid += sizeof(eventfd_t);
214			return (EAGAIN);
215		}
216		error = mtx_sleep(&efd->efd_count, &efd->efd_lock,
217		    PCATCH, "efdwr", 0);
218		if (error == 0)
219			goto retry;
220	}
221	if (error == 0) {
222		MPASS(UINT64_MAX - efd->efd_count > count);
223		efd->efd_count += count;
224		KNOTE_LOCKED(&efd->efd_sel.si_note, 0);
225		selwakeup(&efd->efd_sel);
226		wakeup(&efd->efd_count);
227	}
228	mtx_unlock(&efd->efd_lock);
229
230	return (error);
231}
232
233static int
234eventfd_poll(struct file *fp, int events, struct ucred *active_cred,
235    struct thread *td)
236{
237	struct eventfd *efd;
238	int revents;
239
240	efd = fp->f_data;
241	revents = 0;
242	mtx_lock(&efd->efd_lock);
243	if ((events & (POLLIN | POLLRDNORM)) != 0 && efd->efd_count > 0)
244		revents |= events & (POLLIN | POLLRDNORM);
245	if ((events & (POLLOUT | POLLWRNORM)) != 0 && UINT64_MAX - 1 >
246	    efd->efd_count)
247		revents |= events & (POLLOUT | POLLWRNORM);
248	if (revents == 0)
249		selrecord(td, &efd->efd_sel);
250	mtx_unlock(&efd->efd_lock);
251
252	return (revents);
253}
254
255static int
256eventfd_kqfilter(struct file *fp, struct knote *kn)
257{
258	struct eventfd *efd = fp->f_data;
259
260	mtx_lock(&efd->efd_lock);
261	switch (kn->kn_filter) {
262	case EVFILT_READ:
263		kn->kn_fop = &eventfd_rfiltops;
264		break;
265	case EVFILT_WRITE:
266		kn->kn_fop = &eventfd_wfiltops;
267		break;
268	default:
269		mtx_unlock(&efd->efd_lock);
270		return (EINVAL);
271	}
272
273	kn->kn_hook = efd;
274	knlist_add(&efd->efd_sel.si_note, kn, 1);
275	mtx_unlock(&efd->efd_lock);
276
277	return (0);
278}
279
280static void
281filt_eventfddetach(struct knote *kn)
282{
283	struct eventfd *efd = kn->kn_hook;
284
285	mtx_lock(&efd->efd_lock);
286	knlist_remove(&efd->efd_sel.si_note, kn, 1);
287	mtx_unlock(&efd->efd_lock);
288}
289
290static int
291filt_eventfdread(struct knote *kn, long hint)
292{
293	struct eventfd *efd = kn->kn_hook;
294	int ret;
295
296	mtx_assert(&efd->efd_lock, MA_OWNED);
297	kn->kn_data = (int64_t)efd->efd_count;
298	ret = efd->efd_count > 0;
299
300	return (ret);
301}
302
303static int
304filt_eventfdwrite(struct knote *kn, long hint)
305{
306	struct eventfd *efd = kn->kn_hook;
307	int ret;
308
309	mtx_assert(&efd->efd_lock, MA_OWNED);
310	kn->kn_data = (int64_t)(UINT64_MAX - 1 - efd->efd_count);
311	ret = UINT64_MAX - 1 > efd->efd_count;
312
313	return (ret);
314}
315
316static int
317eventfd_ioctl(struct file *fp, u_long cmd, void *data,
318    struct ucred *active_cred, struct thread *td)
319{
320	switch (cmd) {
321	case FIONBIO:
322	case FIOASYNC:
323		return (0);
324	}
325
326	return (ENOTTY);
327}
328
329static int
330eventfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred,
331    struct thread *td)
332{
333	bzero((void *)st, sizeof *st);
334	st->st_mode = S_IFIFO;
335	return (0);
336}
337
338static int
339eventfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
340{
341	struct eventfd *efd = fp->f_data;
342
343	kif->kf_type = KF_TYPE_EVENTFD;
344	mtx_lock(&efd->efd_lock);
345	kif->kf_un.kf_eventfd.kf_eventfd_value = efd->efd_count;
346	kif->kf_un.kf_eventfd.kf_eventfd_flags = efd->efd_flags;
347	mtx_unlock(&efd->efd_lock);
348	return (0);
349}
350