1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2014 Dmitry Chagin <dchagin@FreeBSD.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/param.h>
29#include <sys/systm.h>
30#include <sys/kernel.h>
31#include <sys/malloc.h>
32#include <sys/limits.h>
33#include <sys/lock.h>
34#include <sys/mutex.h>
35#include <sys/types.h>
36#include <sys/user.h>
37#include <sys/fcntl.h>
38#include <sys/file.h>
39#include <sys/filedesc.h>
40#include <sys/filio.h>
41#include <sys/stat.h>
42#include <sys/errno.h>
43#include <sys/event.h>
44#include <sys/poll.h>
45#include <sys/proc.h>
46#include <sys/uio.h>
47#include <sys/selinfo.h>
48#include <sys/eventfd.h>
49
50#include <security/audit/audit.h>
51
52_Static_assert(EFD_CLOEXEC == O_CLOEXEC, "Mismatched EFD_CLOEXEC");
53_Static_assert(EFD_NONBLOCK == O_NONBLOCK, "Mismatched EFD_NONBLOCK");
54
55MALLOC_DEFINE(M_EVENTFD, "eventfd", "eventfd structures");
56
57static fo_rdwr_t	eventfd_read;
58static fo_rdwr_t	eventfd_write;
59static fo_ioctl_t	eventfd_ioctl;
60static fo_poll_t	eventfd_poll;
61static fo_kqfilter_t	eventfd_kqfilter;
62static fo_stat_t	eventfd_stat;
63static fo_close_t	eventfd_close;
64static fo_fill_kinfo_t	eventfd_fill_kinfo;
65
66static struct fileops eventfdops = {
67	.fo_read = eventfd_read,
68	.fo_write = eventfd_write,
69	.fo_truncate = invfo_truncate,
70	.fo_ioctl = eventfd_ioctl,
71	.fo_poll = eventfd_poll,
72	.fo_kqfilter = eventfd_kqfilter,
73	.fo_stat = eventfd_stat,
74	.fo_close = eventfd_close,
75	.fo_chmod = invfo_chmod,
76	.fo_chown = invfo_chown,
77	.fo_sendfile = invfo_sendfile,
78	.fo_fill_kinfo = eventfd_fill_kinfo,
79	.fo_cmp = file_kcmp_generic,
80	.fo_flags = DFLAG_PASSABLE
81};
82
83static void	filt_eventfddetach(struct knote *kn);
84static int	filt_eventfdread(struct knote *kn, long hint);
85static int	filt_eventfdwrite(struct knote *kn, long hint);
86
87static struct filterops eventfd_rfiltops = {
88	.f_isfd = 1,
89	.f_detach = filt_eventfddetach,
90	.f_event = filt_eventfdread
91};
92
93static struct filterops eventfd_wfiltops = {
94	.f_isfd = 1,
95	.f_detach = filt_eventfddetach,
96	.f_event = filt_eventfdwrite
97};
98
99struct eventfd {
100	eventfd_t	efd_count;
101	uint32_t	efd_flags;
102	struct selinfo	efd_sel;
103	struct mtx	efd_lock;
104};
105
106int
107eventfd_create_file(struct thread *td, struct file *fp, uint32_t initval,
108    int flags)
109{
110	struct eventfd *efd;
111	int fflags;
112
113	AUDIT_ARG_FFLAGS(flags);
114	AUDIT_ARG_VALUE(initval);
115
116	efd = malloc(sizeof(*efd), M_EVENTFD, M_WAITOK | M_ZERO);
117	efd->efd_flags = flags;
118	efd->efd_count = initval;
119	mtx_init(&efd->efd_lock, "eventfd", NULL, MTX_DEF);
120	knlist_init_mtx(&efd->efd_sel.si_note, &efd->efd_lock);
121
122	fflags = FREAD | FWRITE;
123	if ((flags & EFD_NONBLOCK) != 0)
124		fflags |= FNONBLOCK;
125	finit(fp, fflags, DTYPE_EVENTFD, efd, &eventfdops);
126
127	return (0);
128}
129
130static int
131eventfd_close(struct file *fp, struct thread *td)
132{
133	struct eventfd *efd;
134
135	efd = fp->f_data;
136	seldrain(&efd->efd_sel);
137	knlist_destroy(&efd->efd_sel.si_note);
138	mtx_destroy(&efd->efd_lock);
139	free(efd, M_EVENTFD);
140	return (0);
141}
142
143static int
144eventfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
145    int flags, struct thread *td)
146{
147	struct eventfd *efd;
148	eventfd_t count;
149	int error;
150
151	if (uio->uio_resid < sizeof(eventfd_t))
152		return (EINVAL);
153
154	error = 0;
155	efd = fp->f_data;
156	mtx_lock(&efd->efd_lock);
157	while (error == 0 && efd->efd_count == 0) {
158		if ((fp->f_flag & FNONBLOCK) != 0) {
159			mtx_unlock(&efd->efd_lock);
160			return (EAGAIN);
161		}
162		error = mtx_sleep(&efd->efd_count, &efd->efd_lock, PCATCH,
163		    "efdrd", 0);
164	}
165	if (error == 0) {
166		MPASS(efd->efd_count > 0);
167		if ((efd->efd_flags & EFD_SEMAPHORE) != 0) {
168			count = 1;
169			--efd->efd_count;
170		} else {
171			count = efd->efd_count;
172			efd->efd_count = 0;
173		}
174		KNOTE_LOCKED(&efd->efd_sel.si_note, 0);
175		selwakeup(&efd->efd_sel);
176		wakeup(&efd->efd_count);
177		mtx_unlock(&efd->efd_lock);
178		error = uiomove(&count, sizeof(eventfd_t), uio);
179	} else
180		mtx_unlock(&efd->efd_lock);
181
182	return (error);
183}
184
185static int
186eventfd_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
187    int flags, struct thread *td)
188{
189	struct eventfd *efd;
190	eventfd_t count;
191	int error;
192
193	if (uio->uio_resid < sizeof(eventfd_t))
194		return (EINVAL);
195
196	error = uiomove(&count, sizeof(eventfd_t), uio);
197	if (error != 0)
198		return (error);
199	if (count == UINT64_MAX)
200		return (EINVAL);
201
202	efd = fp->f_data;
203	mtx_lock(&efd->efd_lock);
204retry:
205	if (UINT64_MAX - efd->efd_count <= count) {
206		if ((fp->f_flag & FNONBLOCK) != 0) {
207			mtx_unlock(&efd->efd_lock);
208			/* Do not not return the number of bytes written */
209			uio->uio_resid += sizeof(eventfd_t);
210			return (EAGAIN);
211		}
212		error = mtx_sleep(&efd->efd_count, &efd->efd_lock,
213		    PCATCH, "efdwr", 0);
214		if (error == 0)
215			goto retry;
216	}
217	if (error == 0) {
218		MPASS(UINT64_MAX - efd->efd_count > count);
219		efd->efd_count += count;
220		KNOTE_LOCKED(&efd->efd_sel.si_note, 0);
221		selwakeup(&efd->efd_sel);
222		wakeup(&efd->efd_count);
223	}
224	mtx_unlock(&efd->efd_lock);
225
226	return (error);
227}
228
229static int
230eventfd_poll(struct file *fp, int events, struct ucred *active_cred,
231    struct thread *td)
232{
233	struct eventfd *efd;
234	int revents;
235
236	efd = fp->f_data;
237	revents = 0;
238	mtx_lock(&efd->efd_lock);
239	if ((events & (POLLIN | POLLRDNORM)) != 0 && efd->efd_count > 0)
240		revents |= events & (POLLIN | POLLRDNORM);
241	if ((events & (POLLOUT | POLLWRNORM)) != 0 && UINT64_MAX - 1 >
242	    efd->efd_count)
243		revents |= events & (POLLOUT | POLLWRNORM);
244	if (revents == 0)
245		selrecord(td, &efd->efd_sel);
246	mtx_unlock(&efd->efd_lock);
247
248	return (revents);
249}
250
251static int
252eventfd_kqfilter(struct file *fp, struct knote *kn)
253{
254	struct eventfd *efd = fp->f_data;
255
256	mtx_lock(&efd->efd_lock);
257	switch (kn->kn_filter) {
258	case EVFILT_READ:
259		kn->kn_fop = &eventfd_rfiltops;
260		break;
261	case EVFILT_WRITE:
262		kn->kn_fop = &eventfd_wfiltops;
263		break;
264	default:
265		mtx_unlock(&efd->efd_lock);
266		return (EINVAL);
267	}
268
269	kn->kn_hook = efd;
270	knlist_add(&efd->efd_sel.si_note, kn, 1);
271	mtx_unlock(&efd->efd_lock);
272
273	return (0);
274}
275
276static void
277filt_eventfddetach(struct knote *kn)
278{
279	struct eventfd *efd = kn->kn_hook;
280
281	mtx_lock(&efd->efd_lock);
282	knlist_remove(&efd->efd_sel.si_note, kn, 1);
283	mtx_unlock(&efd->efd_lock);
284}
285
286static int
287filt_eventfdread(struct knote *kn, long hint)
288{
289	struct eventfd *efd = kn->kn_hook;
290	int ret;
291
292	mtx_assert(&efd->efd_lock, MA_OWNED);
293	kn->kn_data = (int64_t)efd->efd_count;
294	ret = efd->efd_count > 0;
295
296	return (ret);
297}
298
299static int
300filt_eventfdwrite(struct knote *kn, long hint)
301{
302	struct eventfd *efd = kn->kn_hook;
303	int ret;
304
305	mtx_assert(&efd->efd_lock, MA_OWNED);
306	kn->kn_data = (int64_t)(UINT64_MAX - 1 - efd->efd_count);
307	ret = UINT64_MAX - 1 > efd->efd_count;
308
309	return (ret);
310}
311
312static int
313eventfd_ioctl(struct file *fp, u_long cmd, void *data,
314    struct ucred *active_cred, struct thread *td)
315{
316	switch (cmd) {
317	case FIONBIO:
318	case FIOASYNC:
319		return (0);
320	}
321
322	return (ENOTTY);
323}
324
325static int
326eventfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred)
327{
328	bzero((void *)st, sizeof *st);
329	st->st_mode = S_IFIFO;
330	return (0);
331}
332
333static int
334eventfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
335{
336	struct eventfd *efd = fp->f_data;
337
338	kif->kf_type = KF_TYPE_EVENTFD;
339	mtx_lock(&efd->efd_lock);
340	kif->kf_un.kf_eventfd.kf_eventfd_value = efd->efd_count;
341	kif->kf_un.kf_eventfd.kf_eventfd_flags = efd->efd_flags;
342	kif->kf_un.kf_eventfd.kf_eventfd_addr = (uintptr_t)efd;
343	mtx_unlock(&efd->efd_lock);
344	return (0);
345}
346