1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2014 Dmitry Chagin <dchagin@FreeBSD.org>
5 * Copyright (c) 2023 Jake Freeland <jfree@FreeBSD.org>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <sys/param.h>
30#include <sys/systm.h>
31#include <sys/callout.h>
32#include <sys/fcntl.h>
33#include <sys/file.h>
34#include <sys/filedesc.h>
35#include <sys/filio.h>
36#include <sys/kernel.h>
37#include <sys/lock.h>
38#include <sys/malloc.h>
39#include <sys/mount.h>
40#include <sys/mutex.h>
41#include <sys/poll.h>
42#include <sys/proc.h>
43#include <sys/queue.h>
44#include <sys/selinfo.h>
45#include <sys/stat.h>
46#include <sys/sx.h>
47#include <sys/syscallsubr.h>
48#include <sys/sysctl.h>
49#include <sys/sysent.h>
50#include <sys/sysproto.h>
51#include <sys/timerfd.h>
52#include <sys/timespec.h>
53#include <sys/uio.h>
54#include <sys/user.h>
55
56#include <security/audit/audit.h>
57
58static MALLOC_DEFINE(M_TIMERFD, "timerfd", "timerfd structures");
59
60static struct mtx timerfd_list_lock;
61static LIST_HEAD(, timerfd) timerfd_list;
62MTX_SYSINIT(timerfd, &timerfd_list_lock, "timerfd_list_lock", MTX_DEF);
63
64static struct unrhdr64 tfdino_unr;
65
66#define	TFD_NOJUMP	0	/* Realtime clock has not jumped. */
67#define	TFD_READ	1	/* Jumped, tfd has been read since. */
68#define	TFD_ZREAD	2	/* Jumped backwards, CANCEL_ON_SET=false. */
69#define	TFD_CANCELED	4	/* Jumped, CANCEL_ON_SET=true. */
70#define	TFD_JUMPED	(TFD_ZREAD | TFD_CANCELED)
71
72/*
73 * One structure allocated per timerfd descriptor.
74 *
75 * Locking semantics:
76 * (t)	locked by tfd_lock mtx
77 * (l)	locked by timerfd_list_lock sx
78 * (c)	const until freeing
79 */
80struct timerfd {
81	/* User specified. */
82	struct itimerspec tfd_time;	/* (t) tfd timer */
83	clockid_t	tfd_clockid;	/* (c) timing base */
84	int		tfd_flags;	/* (c) creation flags */
85	int		tfd_timflags;	/* (t) timer flags */
86
87	/* Used internally. */
88	timerfd_t	tfd_count;	/* (t) expiration count since read */
89	bool		tfd_expired;	/* (t) true upon initial expiration */
90	struct mtx	tfd_lock;	/* tfd mtx lock */
91	struct callout	tfd_callout;	/* (t) expiration notification */
92	struct selinfo	tfd_sel;	/* (t) I/O alerts */
93	struct timespec	tfd_boottim;	/* (t) cached boottime */
94	int		tfd_jumped;	/* (t) timer jump status */
95	LIST_ENTRY(timerfd) entry;	/* (l) entry in list */
96
97	/* For stat(2). */
98	ino_t		tfd_ino;	/* (c) inode number */
99	struct timespec	tfd_atim;	/* (t) time of last read */
100	struct timespec	tfd_mtim;	/* (t) time of last settime */
101	struct timespec tfd_birthtim;	/* (c) creation time */
102};
103
104static void
105timerfd_init(void *data)
106{
107	new_unrhdr64(&tfdino_unr, 1);
108}
109
110SYSINIT(timerfd, SI_SUB_VFS, SI_ORDER_ANY, timerfd_init, NULL);
111
112static inline void
113timerfd_getboottime(struct timespec *ts)
114{
115	struct timeval tv;
116
117	getboottime(&tv);
118	TIMEVAL_TO_TIMESPEC(&tv, ts);
119}
120
121/*
122 * Call when a discontinuous jump has occured in CLOCK_REALTIME and
123 * update timerfd's cached boottime. A jump can be triggered using
124 * functions like clock_settime(2) or settimeofday(2).
125 *
126 * Timer is marked TFD_CANCELED if TFD_TIMER_CANCEL_ON_SET is set
127 * and the realtime clock jumps.
128 * Timer is marked TFD_ZREAD if TFD_TIMER_CANCEL_ON_SET is not set,
129 * but the realtime clock jumps backwards.
130 */
131void
132timerfd_jumped(void)
133{
134	struct timerfd *tfd;
135	struct timespec boottime, diff;
136
137	if (LIST_EMPTY(&timerfd_list))
138		return;
139
140	timerfd_getboottime(&boottime);
141	mtx_lock(&timerfd_list_lock);
142	LIST_FOREACH(tfd, &timerfd_list, entry) {
143		mtx_lock(&tfd->tfd_lock);
144		if (tfd->tfd_clockid != CLOCK_REALTIME ||
145		    (tfd->tfd_timflags & TFD_TIMER_ABSTIME) == 0 ||
146		    timespeccmp(&boottime, &tfd->tfd_boottim, ==)) {
147			mtx_unlock(&tfd->tfd_lock);
148			continue;
149		}
150
151		if (callout_active(&tfd->tfd_callout)) {
152			if ((tfd->tfd_timflags & TFD_TIMER_CANCEL_ON_SET) != 0)
153				tfd->tfd_jumped = TFD_CANCELED;
154			else if (timespeccmp(&boottime, &tfd->tfd_boottim, <))
155				tfd->tfd_jumped = TFD_ZREAD;
156
157			/*
158			 * Do not reschedule callout when
159			 * inside interval time loop.
160			 */
161			if (!tfd->tfd_expired) {
162				timespecsub(&boottime,
163				    &tfd->tfd_boottim, &diff);
164				timespecsub(&tfd->tfd_time.it_value,
165				    &diff, &tfd->tfd_time.it_value);
166				if (callout_stop(&tfd->tfd_callout) == 1) {
167					callout_schedule_sbt(&tfd->tfd_callout,
168					    tstosbt(tfd->tfd_time.it_value),
169					    0, C_ABSOLUTE);
170				}
171			}
172		}
173
174		tfd->tfd_boottim = boottime;
175		mtx_unlock(&tfd->tfd_lock);
176	}
177	mtx_unlock(&timerfd_list_lock);
178}
179
180static int
181timerfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
182    int flags, struct thread *td)
183{
184	struct timerfd *tfd = fp->f_data;
185	timerfd_t count;
186	int error = 0;
187
188	if (uio->uio_resid < sizeof(timerfd_t))
189		return (EINVAL);
190
191	mtx_lock(&tfd->tfd_lock);
192retry:
193	getnanotime(&tfd->tfd_atim);
194	if ((tfd->tfd_jumped & TFD_JUMPED) != 0) {
195		if (tfd->tfd_jumped == TFD_CANCELED)
196			error = ECANCELED;
197		tfd->tfd_jumped = TFD_READ;
198		tfd->tfd_count = 0;
199		mtx_unlock(&tfd->tfd_lock);
200		return (error);
201	} else {
202		tfd->tfd_jumped = TFD_NOJUMP;
203	}
204	if (tfd->tfd_count == 0) {
205		if ((fp->f_flag & FNONBLOCK) != 0) {
206			mtx_unlock(&tfd->tfd_lock);
207			return (EAGAIN);
208		}
209		td->td_rtcgen = atomic_load_acq_int(&rtc_generation);
210		error = mtx_sleep(&tfd->tfd_count, &tfd->tfd_lock,
211		    PCATCH, "tfdrd", 0);
212		if (error == 0) {
213			goto retry;
214		} else {
215			mtx_unlock(&tfd->tfd_lock);
216			return (error);
217		}
218	}
219
220	count = tfd->tfd_count;
221	tfd->tfd_count = 0;
222	mtx_unlock(&tfd->tfd_lock);
223	error = uiomove(&count, sizeof(timerfd_t), uio);
224
225	return (error);
226}
227
228static int
229timerfd_ioctl(struct file *fp, u_long cmd, void *data,
230    struct ucred *active_cred, struct thread *td)
231{
232	switch (cmd) {
233	case FIOASYNC:
234		if (*(int *)data != 0)
235			atomic_set_int(&fp->f_flag, FASYNC);
236		else
237			atomic_clear_int(&fp->f_flag, FASYNC);
238		return (0);
239	case FIONBIO:
240		if (*(int *)data != 0)
241			atomic_set_int(&fp->f_flag, FNONBLOCK);
242		else
243			atomic_clear_int(&fp->f_flag, FNONBLOCK);
244		return (0);
245	}
246	return (ENOTTY);
247}
248
249static int
250timerfd_poll(struct file *fp, int events, struct ucred *active_cred,
251    struct thread *td)
252{
253	struct timerfd *tfd = fp->f_data;
254	int revents = 0;
255
256	mtx_lock(&tfd->tfd_lock);
257	if ((events & (POLLIN | POLLRDNORM)) != 0 &&
258	    tfd->tfd_count > 0 && tfd->tfd_jumped != TFD_READ)
259		revents |= events & (POLLIN | POLLRDNORM);
260	if (revents == 0)
261		selrecord(td, &tfd->tfd_sel);
262	mtx_unlock(&tfd->tfd_lock);
263
264	return (revents);
265}
266
267static void
268filt_timerfddetach(struct knote *kn)
269{
270	struct timerfd *tfd = kn->kn_hook;
271
272	mtx_lock(&tfd->tfd_lock);
273	knlist_remove(&tfd->tfd_sel.si_note, kn, 1);
274	mtx_unlock(&tfd->tfd_lock);
275}
276
277static int
278filt_timerfdread(struct knote *kn, long hint)
279{
280	struct timerfd *tfd = kn->kn_hook;
281
282	mtx_assert(&tfd->tfd_lock, MA_OWNED);
283	kn->kn_data = (int64_t)tfd->tfd_count;
284	return (tfd->tfd_count > 0);
285}
286
287static struct filterops timerfd_rfiltops = {
288	.f_isfd = 1,
289	.f_detach = filt_timerfddetach,
290	.f_event = filt_timerfdread,
291};
292
293static int
294timerfd_kqfilter(struct file *fp, struct knote *kn)
295{
296	struct timerfd *tfd = fp->f_data;
297
298	if (kn->kn_filter != EVFILT_READ)
299		return (EINVAL);
300
301	kn->kn_fop = &timerfd_rfiltops;
302	kn->kn_hook = tfd;
303	knlist_add(&tfd->tfd_sel.si_note, kn, 0);
304
305	return (0);
306}
307
308static int
309timerfd_stat(struct file *fp, struct stat *sb, struct ucred *active_cred)
310{
311	struct timerfd *tfd = fp->f_data;
312
313	bzero(sb, sizeof(*sb));
314	sb->st_nlink = fp->f_count - 1;
315	sb->st_uid = fp->f_cred->cr_uid;
316	sb->st_gid = fp->f_cred->cr_gid;
317	sb->st_blksize = PAGE_SIZE;
318	mtx_lock(&tfd->tfd_lock);
319	sb->st_atim = tfd->tfd_atim;
320	sb->st_mtim = tfd->tfd_mtim;
321	mtx_unlock(&tfd->tfd_lock);
322	sb->st_ctim = sb->st_mtim;
323	sb->st_ino = tfd->tfd_ino;
324	sb->st_birthtim = tfd->tfd_birthtim;
325
326	return (0);
327}
328
329static int
330timerfd_close(struct file *fp, struct thread *td)
331{
332	struct timerfd *tfd = fp->f_data;
333
334	mtx_lock(&timerfd_list_lock);
335	LIST_REMOVE(tfd, entry);
336	mtx_unlock(&timerfd_list_lock);
337
338	callout_drain(&tfd->tfd_callout);
339	seldrain(&tfd->tfd_sel);
340	knlist_destroy(&tfd->tfd_sel.si_note);
341	mtx_destroy(&tfd->tfd_lock);
342	free(tfd, M_TIMERFD);
343	fp->f_ops = &badfileops;
344
345	return (0);
346}
347
348static int
349timerfd_fill_kinfo(struct file *fp, struct kinfo_file *kif,
350    struct filedesc *fdp)
351{
352	struct timerfd *tfd = fp->f_data;
353
354	kif->kf_type = KF_TYPE_TIMERFD;
355	kif->kf_un.kf_timerfd.kf_timerfd_clockid = tfd->tfd_clockid;
356	kif->kf_un.kf_timerfd.kf_timerfd_flags = tfd->tfd_flags;
357	kif->kf_un.kf_timerfd.kf_timerfd_addr = (uintptr_t)tfd;
358
359	return (0);
360}
361
362static struct fileops timerfdops = {
363	.fo_read = timerfd_read,
364	.fo_write = invfo_rdwr,
365	.fo_truncate = invfo_truncate,
366	.fo_ioctl = timerfd_ioctl,
367	.fo_poll = timerfd_poll,
368	.fo_kqfilter = timerfd_kqfilter,
369	.fo_stat = timerfd_stat,
370	.fo_close = timerfd_close,
371	.fo_chmod = invfo_chmod,
372	.fo_chown = invfo_chown,
373	.fo_sendfile = invfo_sendfile,
374	.fo_fill_kinfo = timerfd_fill_kinfo,
375	.fo_cmp = file_kcmp_generic,
376	.fo_flags = DFLAG_PASSABLE,
377};
378
379static void
380timerfd_curval(struct timerfd *tfd, struct itimerspec *old_value)
381{
382	struct timespec curr_value;
383
384	mtx_assert(&tfd->tfd_lock, MA_OWNED);
385	*old_value = tfd->tfd_time;
386	if (timespecisset(&tfd->tfd_time.it_value)) {
387		nanouptime(&curr_value);
388		timespecsub(&tfd->tfd_time.it_value, &curr_value,
389		    &old_value->it_value);
390	}
391}
392
393static void
394timerfd_expire(void *arg)
395{
396	struct timerfd *tfd = (struct timerfd *)arg;
397	struct timespec uptime;
398
399	++tfd->tfd_count;
400	tfd->tfd_expired = true;
401	if (timespecisset(&tfd->tfd_time.it_interval)) {
402		/* Count missed events. */
403		nanouptime(&uptime);
404		if (timespeccmp(&uptime, &tfd->tfd_time.it_value, >)) {
405			timespecsub(&uptime, &tfd->tfd_time.it_value, &uptime);
406			tfd->tfd_count += tstosbt(uptime) /
407			    tstosbt(tfd->tfd_time.it_interval);
408		}
409		timespecadd(&tfd->tfd_time.it_value,
410		    &tfd->tfd_time.it_interval, &tfd->tfd_time.it_value);
411		callout_schedule_sbt(&tfd->tfd_callout,
412		    tstosbt(tfd->tfd_time.it_value),
413		    0, C_ABSOLUTE);
414	} else {
415		/* Single shot timer. */
416		callout_deactivate(&tfd->tfd_callout);
417		timespecclear(&tfd->tfd_time.it_value);
418	}
419
420	wakeup(&tfd->tfd_count);
421	selwakeup(&tfd->tfd_sel);
422	KNOTE_LOCKED(&tfd->tfd_sel.si_note, 0);
423}
424
425int
426kern_timerfd_create(struct thread *td, int clockid, int flags)
427{
428	struct file *fp;
429	struct timerfd *tfd;
430	int error, fd, fflags;
431
432	AUDIT_ARG_VALUE(clockid);
433	AUDIT_ARG_FFLAGS(flags);
434
435	switch (clockid) {
436	case CLOCK_REALTIME:
437		/* FALLTHROUGH */
438	case CLOCK_MONOTONIC:
439		/* FALLTHROUGH */
440	case CLOCK_UPTIME:
441		/*
442		 * CLOCK_BOOTTIME should be added once different from
443		 * CLOCK_UPTIME
444		 */
445		break;
446	default:
447		return (EINVAL);
448	}
449	if ((flags & ~(TFD_CLOEXEC | TFD_NONBLOCK)) != 0)
450		return (EINVAL);
451
452	fflags = FREAD;
453	if ((flags & TFD_CLOEXEC) != 0)
454		fflags |= O_CLOEXEC;
455	if ((flags & TFD_NONBLOCK) != 0)
456		fflags |= FNONBLOCK;
457
458	error = falloc(td, &fp, &fd, fflags);
459	if (error != 0)
460		return (error);
461
462	tfd = malloc(sizeof(*tfd), M_TIMERFD, M_WAITOK | M_ZERO);
463	tfd->tfd_clockid = (clockid_t)clockid;
464	tfd->tfd_flags = flags;
465	tfd->tfd_ino = alloc_unr64(&tfdino_unr);
466	mtx_init(&tfd->tfd_lock, "timerfd", NULL, MTX_DEF);
467	callout_init_mtx(&tfd->tfd_callout, &tfd->tfd_lock, 0);
468	knlist_init_mtx(&tfd->tfd_sel.si_note, &tfd->tfd_lock);
469	timerfd_getboottime(&tfd->tfd_boottim);
470	getnanotime(&tfd->tfd_birthtim);
471	mtx_lock(&timerfd_list_lock);
472	LIST_INSERT_HEAD(&timerfd_list, tfd, entry);
473	mtx_unlock(&timerfd_list_lock);
474
475	finit(fp, fflags, DTYPE_TIMERFD, tfd, &timerfdops);
476
477	fdrop(fp, td);
478
479	td->td_retval[0] = fd;
480	return (0);
481}
482
483int
484kern_timerfd_gettime(struct thread *td, int fd, struct itimerspec *curr_value)
485{
486	struct file *fp;
487	struct timerfd *tfd;
488	int error;
489
490	error = fget(td, fd, &cap_write_rights, &fp);
491	if (error != 0)
492		return (error);
493	if (fp->f_type != DTYPE_TIMERFD) {
494		fdrop(fp, td);
495		return (EINVAL);
496	}
497	tfd = fp->f_data;
498
499	mtx_lock(&tfd->tfd_lock);
500	timerfd_curval(tfd, curr_value);
501	mtx_unlock(&tfd->tfd_lock);
502
503	fdrop(fp, td);
504	return (0);
505}
506
507int
508kern_timerfd_settime(struct thread *td, int fd, int flags,
509    const struct itimerspec *new_value, struct itimerspec *old_value)
510{
511	struct file *fp;
512	struct timerfd *tfd;
513	struct timespec ts;
514	int error = 0;
515
516	if ((flags & ~(TFD_TIMER_ABSTIME | TFD_TIMER_CANCEL_ON_SET)) != 0)
517		return (EINVAL);
518	if (!timespecvalid_interval(&new_value->it_value) ||
519	    !timespecvalid_interval(&new_value->it_interval))
520		return (EINVAL);
521
522	error = fget(td, fd, &cap_write_rights, &fp);
523	if (error != 0)
524		return (error);
525	if (fp->f_type != DTYPE_TIMERFD) {
526		fdrop(fp, td);
527		return (EINVAL);
528	}
529	tfd = fp->f_data;
530
531	mtx_lock(&tfd->tfd_lock);
532	getnanotime(&tfd->tfd_mtim);
533	tfd->tfd_timflags = flags;
534
535	/* Store old itimerspec, if applicable. */
536	if (old_value != NULL)
537		timerfd_curval(tfd, old_value);
538
539	/* Set new expiration. */
540	tfd->tfd_time = *new_value;
541	if (timespecisset(&tfd->tfd_time.it_value)) {
542		if ((flags & TFD_TIMER_ABSTIME) == 0) {
543			nanouptime(&ts);
544			timespecadd(&tfd->tfd_time.it_value, &ts,
545			    &tfd->tfd_time.it_value);
546		} else if (tfd->tfd_clockid == CLOCK_REALTIME) {
547			/* ECANCELED if unread jump is pending. */
548			if (tfd->tfd_jumped == TFD_CANCELED)
549				error = ECANCELED;
550			/* Convert from CLOCK_REALTIME to CLOCK_BOOTTIME. */
551			timespecsub(&tfd->tfd_time.it_value, &tfd->tfd_boottim,
552			    &tfd->tfd_time.it_value);
553		}
554		callout_reset_sbt(&tfd->tfd_callout,
555		    tstosbt(tfd->tfd_time.it_value),
556		    0, timerfd_expire, tfd, C_ABSOLUTE);
557	} else {
558		callout_stop(&tfd->tfd_callout);
559	}
560	tfd->tfd_count = 0;
561	tfd->tfd_expired = false;
562	tfd->tfd_jumped = TFD_NOJUMP;
563	mtx_unlock(&tfd->tfd_lock);
564
565	fdrop(fp, td);
566	return (error);
567}
568
569int
570sys_timerfd_create(struct thread *td, struct timerfd_create_args *uap)
571{
572	return (kern_timerfd_create(td, uap->clockid, uap->flags));
573}
574
575int
576sys_timerfd_gettime(struct thread *td, struct timerfd_gettime_args *uap)
577{
578	struct itimerspec curr_value;
579	int error;
580
581	error = kern_timerfd_gettime(td, uap->fd, &curr_value);
582	if (error == 0)
583		error = copyout(&curr_value, uap->curr_value,
584		    sizeof(curr_value));
585
586	return (error);
587}
588
589int
590sys_timerfd_settime(struct thread *td, struct timerfd_settime_args *uap)
591{
592	struct itimerspec new_value, old_value;
593	int error;
594
595	error = copyin(uap->new_value, &new_value, sizeof(new_value));
596	if (error != 0)
597		return (error);
598	if (uap->old_value == NULL) {
599		error = kern_timerfd_settime(td, uap->fd, uap->flags,
600		    &new_value, NULL);
601	} else {
602		error = kern_timerfd_settime(td, uap->fd, uap->flags,
603		    &new_value, &old_value);
604		if (error == 0)
605			error = copyout(&old_value, uap->old_value,
606			    sizeof(old_value));
607	}
608	return (error);
609}
610