kern_time.c revision 315658
1/*-
2 * Copyright (c) 1982, 1986, 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	@(#)kern_time.c	8.1 (Berkeley) 6/10/93
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: stable/10/sys/kern/kern_time.c 315658 2017-03-21 01:24:56Z vangyzen $");
34
35#include "opt_ktrace.h"
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/limits.h>
40#include <sys/clock.h>
41#include <sys/lock.h>
42#include <sys/mutex.h>
43#include <sys/sysproto.h>
44#include <sys/eventhandler.h>
45#include <sys/resourcevar.h>
46#include <sys/signalvar.h>
47#include <sys/kernel.h>
48#include <sys/sleepqueue.h>
49#include <sys/syscallsubr.h>
50#include <sys/sysctl.h>
51#include <sys/sysent.h>
52#include <sys/priv.h>
53#include <sys/proc.h>
54#include <sys/posix4.h>
55#include <sys/time.h>
56#include <sys/timers.h>
57#include <sys/timetc.h>
58#include <sys/vnode.h>
59#ifdef KTRACE
60#include <sys/ktrace.h>
61#endif
62
63#include <vm/vm.h>
64#include <vm/vm_extern.h>
65
66#define MAX_CLOCKS 	(CLOCK_MONOTONIC+1)
67#define CPUCLOCK_BIT		0x80000000
68#define CPUCLOCK_PROCESS_BIT	0x40000000
69#define CPUCLOCK_ID_MASK	(~(CPUCLOCK_BIT|CPUCLOCK_PROCESS_BIT))
70#define MAKE_THREAD_CPUCLOCK(tid)	(CPUCLOCK_BIT|(tid))
71#define MAKE_PROCESS_CPUCLOCK(pid)	\
72	(CPUCLOCK_BIT|CPUCLOCK_PROCESS_BIT|(pid))
73
74static struct kclock	posix_clocks[MAX_CLOCKS];
75static uma_zone_t	itimer_zone = NULL;
76
77/*
78 * Time of day and interval timer support.
79 *
80 * These routines provide the kernel entry points to get and set
81 * the time-of-day and per-process interval timers.  Subroutines
82 * here provide support for adding and subtracting timeval structures
83 * and decrementing interval timers, optionally reloading the interval
84 * timers when they expire.
85 */
86
87static int	settime(struct thread *, struct timeval *);
88static void	timevalfix(struct timeval *);
89
90static void	itimer_start(void);
91static int	itimer_init(void *, int, int);
92static void	itimer_fini(void *, int);
93static void	itimer_enter(struct itimer *);
94static void	itimer_leave(struct itimer *);
95static struct itimer *itimer_find(struct proc *, int);
96static void	itimers_alloc(struct proc *);
97static void	itimers_event_hook_exec(void *arg, struct proc *p, struct image_params *imgp);
98static void	itimers_event_hook_exit(void *arg, struct proc *p);
99static int	realtimer_create(struct itimer *);
100static int	realtimer_gettime(struct itimer *, struct itimerspec *);
101static int	realtimer_settime(struct itimer *, int,
102			struct itimerspec *, struct itimerspec *);
103static int	realtimer_delete(struct itimer *);
104static void	realtimer_clocktime(clockid_t, struct timespec *);
105static void	realtimer_expire(void *);
106
107int		register_posix_clock(int, struct kclock *);
108void		itimer_fire(struct itimer *it);
109int		itimespecfix(struct timespec *ts);
110
111#define CLOCK_CALL(clock, call, arglist)		\
112	((*posix_clocks[clock].call) arglist)
113
114SYSINIT(posix_timer, SI_SUB_P1003_1B, SI_ORDER_FIRST+4, itimer_start, NULL);
115
116
117static int
118settime(struct thread *td, struct timeval *tv)
119{
120	struct timeval delta, tv1, tv2;
121	static struct timeval maxtime, laststep;
122	struct timespec ts;
123	int s;
124
125	s = splclock();
126	microtime(&tv1);
127	delta = *tv;
128	timevalsub(&delta, &tv1);
129
130	/*
131	 * If the system is secure, we do not allow the time to be
132	 * set to a value earlier than 1 second less than the highest
133	 * time we have yet seen. The worst a miscreant can do in
134	 * this circumstance is "freeze" time. He couldn't go
135	 * back to the past.
136	 *
137	 * We similarly do not allow the clock to be stepped more
138	 * than one second, nor more than once per second. This allows
139	 * a miscreant to make the clock march double-time, but no worse.
140	 */
141	if (securelevel_gt(td->td_ucred, 1) != 0) {
142		if (delta.tv_sec < 0 || delta.tv_usec < 0) {
143			/*
144			 * Update maxtime to latest time we've seen.
145			 */
146			if (tv1.tv_sec > maxtime.tv_sec)
147				maxtime = tv1;
148			tv2 = *tv;
149			timevalsub(&tv2, &maxtime);
150			if (tv2.tv_sec < -1) {
151				tv->tv_sec = maxtime.tv_sec - 1;
152				printf("Time adjustment clamped to -1 second\n");
153			}
154		} else {
155			if (tv1.tv_sec == laststep.tv_sec) {
156				splx(s);
157				return (EPERM);
158			}
159			if (delta.tv_sec > 1) {
160				tv->tv_sec = tv1.tv_sec + 1;
161				printf("Time adjustment clamped to +1 second\n");
162			}
163			laststep = *tv;
164		}
165	}
166
167	ts.tv_sec = tv->tv_sec;
168	ts.tv_nsec = tv->tv_usec * 1000;
169	mtx_lock(&Giant);
170	tc_setclock(&ts);
171	resettodr();
172	mtx_unlock(&Giant);
173	return (0);
174}
175
176#ifndef _SYS_SYSPROTO_H_
177struct clock_getcpuclockid2_args {
178	id_t id;
179	int which,
180	clockid_t *clock_id;
181};
182#endif
183/* ARGSUSED */
184int
185sys_clock_getcpuclockid2(struct thread *td, struct clock_getcpuclockid2_args *uap)
186{
187	clockid_t clk_id;
188	int error;
189
190	error = kern_clock_getcpuclockid2(td, uap->id, uap->which, &clk_id);
191	if (error == 0)
192		error = copyout(&clk_id, uap->clock_id, sizeof(clockid_t));
193	return (error);
194}
195
196int
197kern_clock_getcpuclockid2(struct thread *td, id_t id, int which,
198    clockid_t *clk_id)
199{
200	struct proc *p;
201	pid_t pid;
202	lwpid_t tid;
203	int error;
204
205	switch (which) {
206	case CPUCLOCK_WHICH_PID:
207		if (id != 0) {
208			error = pget(id, PGET_CANSEE | PGET_NOTID, &p);
209			if (error != 0)
210				return (error);
211			PROC_UNLOCK(p);
212			pid = id;
213		} else {
214			pid = td->td_proc->p_pid;
215		}
216		*clk_id = MAKE_PROCESS_CPUCLOCK(pid);
217		return (0);
218	case CPUCLOCK_WHICH_TID:
219		tid = id == 0 ? td->td_tid : id;
220		*clk_id = MAKE_THREAD_CPUCLOCK(tid);
221		return (0);
222	default:
223		return (EINVAL);
224	}
225}
226
227#ifndef _SYS_SYSPROTO_H_
228struct clock_gettime_args {
229	clockid_t clock_id;
230	struct	timespec *tp;
231};
232#endif
233/* ARGSUSED */
234int
235sys_clock_gettime(struct thread *td, struct clock_gettime_args *uap)
236{
237	struct timespec ats;
238	int error;
239
240	error = kern_clock_gettime(td, uap->clock_id, &ats);
241	if (error == 0)
242		error = copyout(&ats, uap->tp, sizeof(ats));
243
244	return (error);
245}
246
247static inline void
248cputick2timespec(uint64_t runtime, struct timespec *ats)
249{
250	runtime = cputick2usec(runtime);
251	ats->tv_sec = runtime / 1000000;
252	ats->tv_nsec = runtime % 1000000 * 1000;
253}
254
255static void
256get_thread_cputime(struct thread *targettd, struct timespec *ats)
257{
258	uint64_t runtime, curtime, switchtime;
259
260	if (targettd == NULL) { /* current thread */
261		critical_enter();
262		switchtime = PCPU_GET(switchtime);
263		curtime = cpu_ticks();
264		runtime = curthread->td_runtime;
265		critical_exit();
266		runtime += curtime - switchtime;
267	} else {
268		thread_lock(targettd);
269		runtime = targettd->td_runtime;
270		thread_unlock(targettd);
271	}
272	cputick2timespec(runtime, ats);
273}
274
275static void
276get_process_cputime(struct proc *targetp, struct timespec *ats)
277{
278	uint64_t runtime;
279	struct rusage ru;
280
281	PROC_STATLOCK(targetp);
282	rufetch(targetp, &ru);
283	runtime = targetp->p_rux.rux_runtime;
284	PROC_STATUNLOCK(targetp);
285	cputick2timespec(runtime, ats);
286}
287
288static int
289get_cputime(struct thread *td, clockid_t clock_id, struct timespec *ats)
290{
291	struct proc *p, *p2;
292	struct thread *td2;
293	lwpid_t tid;
294	pid_t pid;
295	int error;
296
297	p = td->td_proc;
298	if ((clock_id & CPUCLOCK_PROCESS_BIT) == 0) {
299		tid = clock_id & CPUCLOCK_ID_MASK;
300		td2 = tdfind(tid, p->p_pid);
301		if (td2 == NULL)
302			return (EINVAL);
303		get_thread_cputime(td2, ats);
304		PROC_UNLOCK(td2->td_proc);
305	} else {
306		pid = clock_id & CPUCLOCK_ID_MASK;
307		error = pget(pid, PGET_CANSEE, &p2);
308		if (error != 0)
309			return (EINVAL);
310		get_process_cputime(p2, ats);
311		PROC_UNLOCK(p2);
312	}
313	return (0);
314}
315
316int
317kern_clock_gettime(struct thread *td, clockid_t clock_id, struct timespec *ats)
318{
319	struct timeval sys, user;
320	struct proc *p;
321
322	p = td->td_proc;
323	switch (clock_id) {
324	case CLOCK_REALTIME:		/* Default to precise. */
325	case CLOCK_REALTIME_PRECISE:
326		nanotime(ats);
327		break;
328	case CLOCK_REALTIME_FAST:
329		getnanotime(ats);
330		break;
331	case CLOCK_VIRTUAL:
332		PROC_LOCK(p);
333		PROC_STATLOCK(p);
334		calcru(p, &user, &sys);
335		PROC_STATUNLOCK(p);
336		PROC_UNLOCK(p);
337		TIMEVAL_TO_TIMESPEC(&user, ats);
338		break;
339	case CLOCK_PROF:
340		PROC_LOCK(p);
341		PROC_STATLOCK(p);
342		calcru(p, &user, &sys);
343		PROC_STATUNLOCK(p);
344		PROC_UNLOCK(p);
345		timevaladd(&user, &sys);
346		TIMEVAL_TO_TIMESPEC(&user, ats);
347		break;
348	case CLOCK_MONOTONIC:		/* Default to precise. */
349	case CLOCK_MONOTONIC_PRECISE:
350	case CLOCK_UPTIME:
351	case CLOCK_UPTIME_PRECISE:
352		nanouptime(ats);
353		break;
354	case CLOCK_UPTIME_FAST:
355	case CLOCK_MONOTONIC_FAST:
356		getnanouptime(ats);
357		break;
358	case CLOCK_SECOND:
359		ats->tv_sec = time_second;
360		ats->tv_nsec = 0;
361		break;
362	case CLOCK_THREAD_CPUTIME_ID:
363		get_thread_cputime(NULL, ats);
364		break;
365	case CLOCK_PROCESS_CPUTIME_ID:
366		PROC_LOCK(p);
367		get_process_cputime(p, ats);
368		PROC_UNLOCK(p);
369		break;
370	default:
371		if ((int)clock_id >= 0)
372			return (EINVAL);
373		return (get_cputime(td, clock_id, ats));
374	}
375	return (0);
376}
377
378#ifndef _SYS_SYSPROTO_H_
379struct clock_settime_args {
380	clockid_t clock_id;
381	const struct	timespec *tp;
382};
383#endif
384/* ARGSUSED */
385int
386sys_clock_settime(struct thread *td, struct clock_settime_args *uap)
387{
388	struct timespec ats;
389	int error;
390
391	if ((error = copyin(uap->tp, &ats, sizeof(ats))) != 0)
392		return (error);
393	return (kern_clock_settime(td, uap->clock_id, &ats));
394}
395
396int
397kern_clock_settime(struct thread *td, clockid_t clock_id, struct timespec *ats)
398{
399	struct timeval atv;
400	int error;
401
402	if ((error = priv_check(td, PRIV_CLOCK_SETTIME)) != 0)
403		return (error);
404	if (clock_id != CLOCK_REALTIME)
405		return (EINVAL);
406	if (ats->tv_nsec < 0 || ats->tv_nsec >= 1000000000)
407		return (EINVAL);
408	/* XXX Don't convert nsec->usec and back */
409	TIMESPEC_TO_TIMEVAL(&atv, ats);
410	error = settime(td, &atv);
411	return (error);
412}
413
414#ifndef _SYS_SYSPROTO_H_
415struct clock_getres_args {
416	clockid_t clock_id;
417	struct	timespec *tp;
418};
419#endif
420int
421sys_clock_getres(struct thread *td, struct clock_getres_args *uap)
422{
423	struct timespec ts;
424	int error;
425
426	if (uap->tp == NULL)
427		return (0);
428
429	error = kern_clock_getres(td, uap->clock_id, &ts);
430	if (error == 0)
431		error = copyout(&ts, uap->tp, sizeof(ts));
432	return (error);
433}
434
435int
436kern_clock_getres(struct thread *td, clockid_t clock_id, struct timespec *ts)
437{
438
439	ts->tv_sec = 0;
440	switch (clock_id) {
441	case CLOCK_REALTIME:
442	case CLOCK_REALTIME_FAST:
443	case CLOCK_REALTIME_PRECISE:
444	case CLOCK_MONOTONIC:
445	case CLOCK_MONOTONIC_FAST:
446	case CLOCK_MONOTONIC_PRECISE:
447	case CLOCK_UPTIME:
448	case CLOCK_UPTIME_FAST:
449	case CLOCK_UPTIME_PRECISE:
450		/*
451		 * Round up the result of the division cheaply by adding 1.
452		 * Rounding up is especially important if rounding down
453		 * would give 0.  Perfect rounding is unimportant.
454		 */
455		ts->tv_nsec = 1000000000 / tc_getfrequency() + 1;
456		break;
457	case CLOCK_VIRTUAL:
458	case CLOCK_PROF:
459		/* Accurately round up here because we can do so cheaply. */
460		ts->tv_nsec = (1000000000 + hz - 1) / hz;
461		break;
462	case CLOCK_SECOND:
463		ts->tv_sec = 1;
464		ts->tv_nsec = 0;
465		break;
466	case CLOCK_THREAD_CPUTIME_ID:
467	case CLOCK_PROCESS_CPUTIME_ID:
468	cputime:
469		/* sync with cputick2usec */
470		ts->tv_nsec = 1000000 / cpu_tickrate();
471		if (ts->tv_nsec == 0)
472			ts->tv_nsec = 1000;
473		break;
474	default:
475		if ((int)clock_id < 0)
476			goto cputime;
477		return (EINVAL);
478	}
479	return (0);
480}
481
482static uint8_t nanowait[MAXCPU];
483
484int
485kern_nanosleep(struct thread *td, struct timespec *rqt, struct timespec *rmt)
486{
487	struct timespec ts;
488	sbintime_t sbt, sbtt, prec, tmp;
489	time_t over;
490	int error;
491
492	if (rqt->tv_nsec < 0 || rqt->tv_nsec >= 1000000000)
493		return (EINVAL);
494	if (rqt->tv_sec < 0 || (rqt->tv_sec == 0 && rqt->tv_nsec == 0))
495		return (0);
496	ts = *rqt;
497	if (ts.tv_sec > INT32_MAX / 2) {
498		over = ts.tv_sec - INT32_MAX / 2;
499		ts.tv_sec -= over;
500	} else
501		over = 0;
502	tmp = tstosbt(ts);
503	prec = tmp;
504	prec >>= tc_precexp;
505	if (TIMESEL(&sbt, tmp))
506		sbt += tc_tick_sbt;
507	sbt += tmp;
508	error = tsleep_sbt(&nanowait[curcpu], PWAIT | PCATCH, "nanslp",
509	    sbt, prec, C_ABSOLUTE);
510	if (error != EWOULDBLOCK) {
511		if (error == ERESTART)
512			error = EINTR;
513		TIMESEL(&sbtt, tmp);
514		if (rmt != NULL) {
515			ts = sbttots(sbt - sbtt);
516			ts.tv_sec += over;
517			if (ts.tv_sec < 0)
518				timespecclear(&ts);
519			*rmt = ts;
520		}
521		if (sbtt >= sbt)
522			return (0);
523		return (error);
524	}
525	return (0);
526}
527
528#ifndef _SYS_SYSPROTO_H_
529struct nanosleep_args {
530	struct	timespec *rqtp;
531	struct	timespec *rmtp;
532};
533#endif
534/* ARGSUSED */
535int
536sys_nanosleep(struct thread *td, struct nanosleep_args *uap)
537{
538	struct timespec rmt, rqt;
539	int error;
540
541	error = copyin(uap->rqtp, &rqt, sizeof(rqt));
542	if (error)
543		return (error);
544
545	if (uap->rmtp &&
546	    !useracc((caddr_t)uap->rmtp, sizeof(rmt), VM_PROT_WRITE))
547			return (EFAULT);
548	error = kern_nanosleep(td, &rqt, &rmt);
549	if (error == EINTR && uap->rmtp) {
550		int error2;
551
552		error2 = copyout(&rmt, uap->rmtp, sizeof(rmt));
553		if (error2)
554			error = error2;
555	}
556	return (error);
557}
558
559#ifndef _SYS_SYSPROTO_H_
560struct gettimeofday_args {
561	struct	timeval *tp;
562	struct	timezone *tzp;
563};
564#endif
565/* ARGSUSED */
566int
567sys_gettimeofday(struct thread *td, struct gettimeofday_args *uap)
568{
569	struct timeval atv;
570	struct timezone rtz;
571	int error = 0;
572
573	if (uap->tp) {
574		microtime(&atv);
575		error = copyout(&atv, uap->tp, sizeof (atv));
576	}
577	if (error == 0 && uap->tzp != NULL) {
578		rtz.tz_minuteswest = tz_minuteswest;
579		rtz.tz_dsttime = tz_dsttime;
580		error = copyout(&rtz, uap->tzp, sizeof (rtz));
581	}
582	return (error);
583}
584
585#ifndef _SYS_SYSPROTO_H_
586struct settimeofday_args {
587	struct	timeval *tv;
588	struct	timezone *tzp;
589};
590#endif
591/* ARGSUSED */
592int
593sys_settimeofday(struct thread *td, struct settimeofday_args *uap)
594{
595	struct timeval atv, *tvp;
596	struct timezone atz, *tzp;
597	int error;
598
599	if (uap->tv) {
600		error = copyin(uap->tv, &atv, sizeof(atv));
601		if (error)
602			return (error);
603		tvp = &atv;
604	} else
605		tvp = NULL;
606	if (uap->tzp) {
607		error = copyin(uap->tzp, &atz, sizeof(atz));
608		if (error)
609			return (error);
610		tzp = &atz;
611	} else
612		tzp = NULL;
613	return (kern_settimeofday(td, tvp, tzp));
614}
615
616int
617kern_settimeofday(struct thread *td, struct timeval *tv, struct timezone *tzp)
618{
619	int error;
620
621	error = priv_check(td, PRIV_SETTIMEOFDAY);
622	if (error)
623		return (error);
624	/* Verify all parameters before changing time. */
625	if (tv) {
626		if (tv->tv_usec < 0 || tv->tv_usec >= 1000000)
627			return (EINVAL);
628		error = settime(td, tv);
629	}
630	if (tzp && error == 0) {
631		tz_minuteswest = tzp->tz_minuteswest;
632		tz_dsttime = tzp->tz_dsttime;
633	}
634	return (error);
635}
636
637/*
638 * Get value of an interval timer.  The process virtual and profiling virtual
639 * time timers are kept in the p_stats area, since they can be swapped out.
640 * These are kept internally in the way they are specified externally: in
641 * time until they expire.
642 *
643 * The real time interval timer is kept in the process table slot for the
644 * process, and its value (it_value) is kept as an absolute time rather than
645 * as a delta, so that it is easy to keep periodic real-time signals from
646 * drifting.
647 *
648 * Virtual time timers are processed in the hardclock() routine of
649 * kern_clock.c.  The real time timer is processed by a timeout routine,
650 * called from the softclock() routine.  Since a callout may be delayed in
651 * real time due to interrupt processing in the system, it is possible for
652 * the real time timeout routine (realitexpire, given below), to be delayed
653 * in real time past when it is supposed to occur.  It does not suffice,
654 * therefore, to reload the real timer .it_value from the real time timers
655 * .it_interval.  Rather, we compute the next time in absolute time the timer
656 * should go off.
657 */
658#ifndef _SYS_SYSPROTO_H_
659struct getitimer_args {
660	u_int	which;
661	struct	itimerval *itv;
662};
663#endif
664int
665sys_getitimer(struct thread *td, struct getitimer_args *uap)
666{
667	struct itimerval aitv;
668	int error;
669
670	error = kern_getitimer(td, uap->which, &aitv);
671	if (error != 0)
672		return (error);
673	return (copyout(&aitv, uap->itv, sizeof (struct itimerval)));
674}
675
676int
677kern_getitimer(struct thread *td, u_int which, struct itimerval *aitv)
678{
679	struct proc *p = td->td_proc;
680	struct timeval ctv;
681
682	if (which > ITIMER_PROF)
683		return (EINVAL);
684
685	if (which == ITIMER_REAL) {
686		/*
687		 * Convert from absolute to relative time in .it_value
688		 * part of real time timer.  If time for real time timer
689		 * has passed return 0, else return difference between
690		 * current time and time for the timer to go off.
691		 */
692		PROC_LOCK(p);
693		*aitv = p->p_realtimer;
694		PROC_UNLOCK(p);
695		if (timevalisset(&aitv->it_value)) {
696			microuptime(&ctv);
697			if (timevalcmp(&aitv->it_value, &ctv, <))
698				timevalclear(&aitv->it_value);
699			else
700				timevalsub(&aitv->it_value, &ctv);
701		}
702	} else {
703		PROC_ITIMLOCK(p);
704		*aitv = p->p_stats->p_timer[which];
705		PROC_ITIMUNLOCK(p);
706	}
707#ifdef KTRACE
708	if (KTRPOINT(td, KTR_STRUCT))
709		ktritimerval(aitv);
710#endif
711	return (0);
712}
713
714#ifndef _SYS_SYSPROTO_H_
715struct setitimer_args {
716	u_int	which;
717	struct	itimerval *itv, *oitv;
718};
719#endif
720int
721sys_setitimer(struct thread *td, struct setitimer_args *uap)
722{
723	struct itimerval aitv, oitv;
724	int error;
725
726	if (uap->itv == NULL) {
727		uap->itv = uap->oitv;
728		return (sys_getitimer(td, (struct getitimer_args *)uap));
729	}
730
731	if ((error = copyin(uap->itv, &aitv, sizeof(struct itimerval))))
732		return (error);
733	error = kern_setitimer(td, uap->which, &aitv, &oitv);
734	if (error != 0 || uap->oitv == NULL)
735		return (error);
736	return (copyout(&oitv, uap->oitv, sizeof(struct itimerval)));
737}
738
739int
740kern_setitimer(struct thread *td, u_int which, struct itimerval *aitv,
741    struct itimerval *oitv)
742{
743	struct proc *p = td->td_proc;
744	struct timeval ctv;
745	sbintime_t sbt, pr;
746
747	if (aitv == NULL)
748		return (kern_getitimer(td, which, oitv));
749
750	if (which > ITIMER_PROF)
751		return (EINVAL);
752#ifdef KTRACE
753	if (KTRPOINT(td, KTR_STRUCT))
754		ktritimerval(aitv);
755#endif
756	if (itimerfix(&aitv->it_value) ||
757	    aitv->it_value.tv_sec > INT32_MAX / 2)
758		return (EINVAL);
759	if (!timevalisset(&aitv->it_value))
760		timevalclear(&aitv->it_interval);
761	else if (itimerfix(&aitv->it_interval) ||
762	    aitv->it_interval.tv_sec > INT32_MAX / 2)
763		return (EINVAL);
764
765	if (which == ITIMER_REAL) {
766		PROC_LOCK(p);
767		if (timevalisset(&p->p_realtimer.it_value))
768			callout_stop(&p->p_itcallout);
769		microuptime(&ctv);
770		if (timevalisset(&aitv->it_value)) {
771			pr = tvtosbt(aitv->it_value) >> tc_precexp;
772			timevaladd(&aitv->it_value, &ctv);
773			sbt = tvtosbt(aitv->it_value);
774			callout_reset_sbt(&p->p_itcallout, sbt, pr,
775			    realitexpire, p, C_ABSOLUTE);
776		}
777		*oitv = p->p_realtimer;
778		p->p_realtimer = *aitv;
779		PROC_UNLOCK(p);
780		if (timevalisset(&oitv->it_value)) {
781			if (timevalcmp(&oitv->it_value, &ctv, <))
782				timevalclear(&oitv->it_value);
783			else
784				timevalsub(&oitv->it_value, &ctv);
785		}
786	} else {
787		if (aitv->it_interval.tv_sec == 0 &&
788		    aitv->it_interval.tv_usec != 0 &&
789		    aitv->it_interval.tv_usec < tick)
790			aitv->it_interval.tv_usec = tick;
791		if (aitv->it_value.tv_sec == 0 &&
792		    aitv->it_value.tv_usec != 0 &&
793		    aitv->it_value.tv_usec < tick)
794			aitv->it_value.tv_usec = tick;
795		PROC_ITIMLOCK(p);
796		*oitv = p->p_stats->p_timer[which];
797		p->p_stats->p_timer[which] = *aitv;
798		PROC_ITIMUNLOCK(p);
799	}
800#ifdef KTRACE
801	if (KTRPOINT(td, KTR_STRUCT))
802		ktritimerval(oitv);
803#endif
804	return (0);
805}
806
807/*
808 * Real interval timer expired:
809 * send process whose timer expired an alarm signal.
810 * If time is not set up to reload, then just return.
811 * Else compute next time timer should go off which is > current time.
812 * This is where delay in processing this timeout causes multiple
813 * SIGALRM calls to be compressed into one.
814 * tvtohz() always adds 1 to allow for the time until the next clock
815 * interrupt being strictly less than 1 clock tick, but we don't want
816 * that here since we want to appear to be in sync with the clock
817 * interrupt even when we're delayed.
818 */
819void
820realitexpire(void *arg)
821{
822	struct proc *p;
823	struct timeval ctv;
824	sbintime_t isbt;
825
826	p = (struct proc *)arg;
827	kern_psignal(p, SIGALRM);
828	if (!timevalisset(&p->p_realtimer.it_interval)) {
829		timevalclear(&p->p_realtimer.it_value);
830		if (p->p_flag & P_WEXIT)
831			wakeup(&p->p_itcallout);
832		return;
833	}
834	isbt = tvtosbt(p->p_realtimer.it_interval);
835	if (isbt >= sbt_timethreshold)
836		getmicrouptime(&ctv);
837	else
838		microuptime(&ctv);
839	do {
840		timevaladd(&p->p_realtimer.it_value,
841		    &p->p_realtimer.it_interval);
842	} while (timevalcmp(&p->p_realtimer.it_value, &ctv, <=));
843	callout_reset_sbt(&p->p_itcallout, tvtosbt(p->p_realtimer.it_value),
844	    isbt >> tc_precexp, realitexpire, p, C_ABSOLUTE);
845}
846
847/*
848 * Check that a proposed value to load into the .it_value or
849 * .it_interval part of an interval timer is acceptable, and
850 * fix it to have at least minimal value (i.e. if it is less
851 * than the resolution of the clock, round it up.)
852 */
853int
854itimerfix(struct timeval *tv)
855{
856
857	if (tv->tv_sec < 0 || tv->tv_usec < 0 || tv->tv_usec >= 1000000)
858		return (EINVAL);
859	if (tv->tv_sec == 0 && tv->tv_usec != 0 &&
860	    tv->tv_usec < (u_int)tick / 16)
861		tv->tv_usec = (u_int)tick / 16;
862	return (0);
863}
864
865/*
866 * Decrement an interval timer by a specified number
867 * of microseconds, which must be less than a second,
868 * i.e. < 1000000.  If the timer expires, then reload
869 * it.  In this case, carry over (usec - old value) to
870 * reduce the value reloaded into the timer so that
871 * the timer does not drift.  This routine assumes
872 * that it is called in a context where the timers
873 * on which it is operating cannot change in value.
874 */
875int
876itimerdecr(struct itimerval *itp, int usec)
877{
878
879	if (itp->it_value.tv_usec < usec) {
880		if (itp->it_value.tv_sec == 0) {
881			/* expired, and already in next interval */
882			usec -= itp->it_value.tv_usec;
883			goto expire;
884		}
885		itp->it_value.tv_usec += 1000000;
886		itp->it_value.tv_sec--;
887	}
888	itp->it_value.tv_usec -= usec;
889	usec = 0;
890	if (timevalisset(&itp->it_value))
891		return (1);
892	/* expired, exactly at end of interval */
893expire:
894	if (timevalisset(&itp->it_interval)) {
895		itp->it_value = itp->it_interval;
896		itp->it_value.tv_usec -= usec;
897		if (itp->it_value.tv_usec < 0) {
898			itp->it_value.tv_usec += 1000000;
899			itp->it_value.tv_sec--;
900		}
901	} else
902		itp->it_value.tv_usec = 0;		/* sec is already 0 */
903	return (0);
904}
905
906/*
907 * Add and subtract routines for timevals.
908 * N.B.: subtract routine doesn't deal with
909 * results which are before the beginning,
910 * it just gets very confused in this case.
911 * Caveat emptor.
912 */
913void
914timevaladd(struct timeval *t1, const struct timeval *t2)
915{
916
917	t1->tv_sec += t2->tv_sec;
918	t1->tv_usec += t2->tv_usec;
919	timevalfix(t1);
920}
921
922void
923timevalsub(struct timeval *t1, const struct timeval *t2)
924{
925
926	t1->tv_sec -= t2->tv_sec;
927	t1->tv_usec -= t2->tv_usec;
928	timevalfix(t1);
929}
930
931static void
932timevalfix(struct timeval *t1)
933{
934
935	if (t1->tv_usec < 0) {
936		t1->tv_sec--;
937		t1->tv_usec += 1000000;
938	}
939	if (t1->tv_usec >= 1000000) {
940		t1->tv_sec++;
941		t1->tv_usec -= 1000000;
942	}
943}
944
945/*
946 * ratecheck(): simple time-based rate-limit checking.
947 */
948int
949ratecheck(struct timeval *lasttime, const struct timeval *mininterval)
950{
951	struct timeval tv, delta;
952	int rv = 0;
953
954	getmicrouptime(&tv);		/* NB: 10ms precision */
955	delta = tv;
956	timevalsub(&delta, lasttime);
957
958	/*
959	 * check for 0,0 is so that the message will be seen at least once,
960	 * even if interval is huge.
961	 */
962	if (timevalcmp(&delta, mininterval, >=) ||
963	    (lasttime->tv_sec == 0 && lasttime->tv_usec == 0)) {
964		*lasttime = tv;
965		rv = 1;
966	}
967
968	return (rv);
969}
970
971/*
972 * ppsratecheck(): packets (or events) per second limitation.
973 *
974 * Return 0 if the limit is to be enforced (e.g. the caller
975 * should drop a packet because of the rate limitation).
976 *
977 * maxpps of 0 always causes zero to be returned.  maxpps of -1
978 * always causes 1 to be returned; this effectively defeats rate
979 * limiting.
980 *
981 * Note that we maintain the struct timeval for compatibility
982 * with other bsd systems.  We reuse the storage and just monitor
983 * clock ticks for minimal overhead.
984 */
985int
986ppsratecheck(struct timeval *lasttime, int *curpps, int maxpps)
987{
988	int now;
989
990	/*
991	 * Reset the last time and counter if this is the first call
992	 * or more than a second has passed since the last update of
993	 * lasttime.
994	 */
995	now = ticks;
996	if (lasttime->tv_sec == 0 || (u_int)(now - lasttime->tv_sec) >= hz) {
997		lasttime->tv_sec = now;
998		*curpps = 1;
999		return (maxpps != 0);
1000	} else {
1001		(*curpps)++;		/* NB: ignore potential overflow */
1002		return (maxpps < 0 || *curpps <= maxpps);
1003	}
1004}
1005
1006static void
1007itimer_start(void)
1008{
1009	struct kclock rt_clock = {
1010		.timer_create  = realtimer_create,
1011		.timer_delete  = realtimer_delete,
1012		.timer_settime = realtimer_settime,
1013		.timer_gettime = realtimer_gettime,
1014		.event_hook    = NULL
1015	};
1016
1017	itimer_zone = uma_zcreate("itimer", sizeof(struct itimer),
1018		NULL, NULL, itimer_init, itimer_fini, UMA_ALIGN_PTR, 0);
1019	register_posix_clock(CLOCK_REALTIME,  &rt_clock);
1020	register_posix_clock(CLOCK_MONOTONIC, &rt_clock);
1021	p31b_setcfg(CTL_P1003_1B_TIMERS, 200112L);
1022	p31b_setcfg(CTL_P1003_1B_DELAYTIMER_MAX, INT_MAX);
1023	p31b_setcfg(CTL_P1003_1B_TIMER_MAX, TIMER_MAX);
1024	EVENTHANDLER_REGISTER(process_exit, itimers_event_hook_exit,
1025		(void *)ITIMER_EV_EXIT, EVENTHANDLER_PRI_ANY);
1026	EVENTHANDLER_REGISTER(process_exec, itimers_event_hook_exec,
1027		(void *)ITIMER_EV_EXEC, EVENTHANDLER_PRI_ANY);
1028}
1029
1030int
1031register_posix_clock(int clockid, struct kclock *clk)
1032{
1033	if ((unsigned)clockid >= MAX_CLOCKS) {
1034		printf("%s: invalid clockid\n", __func__);
1035		return (0);
1036	}
1037	posix_clocks[clockid] = *clk;
1038	return (1);
1039}
1040
1041static int
1042itimer_init(void *mem, int size, int flags)
1043{
1044	struct itimer *it;
1045
1046	it = (struct itimer *)mem;
1047	mtx_init(&it->it_mtx, "itimer lock", NULL, MTX_DEF);
1048	return (0);
1049}
1050
1051static void
1052itimer_fini(void *mem, int size)
1053{
1054	struct itimer *it;
1055
1056	it = (struct itimer *)mem;
1057	mtx_destroy(&it->it_mtx);
1058}
1059
1060static void
1061itimer_enter(struct itimer *it)
1062{
1063
1064	mtx_assert(&it->it_mtx, MA_OWNED);
1065	it->it_usecount++;
1066}
1067
1068static void
1069itimer_leave(struct itimer *it)
1070{
1071
1072	mtx_assert(&it->it_mtx, MA_OWNED);
1073	KASSERT(it->it_usecount > 0, ("invalid it_usecount"));
1074
1075	if (--it->it_usecount == 0 && (it->it_flags & ITF_WANTED) != 0)
1076		wakeup(it);
1077}
1078
1079#ifndef _SYS_SYSPROTO_H_
1080struct ktimer_create_args {
1081	clockid_t clock_id;
1082	struct sigevent * evp;
1083	int * timerid;
1084};
1085#endif
1086int
1087sys_ktimer_create(struct thread *td, struct ktimer_create_args *uap)
1088{
1089	struct sigevent *evp, ev;
1090	int id;
1091	int error;
1092
1093	if (uap->evp == NULL) {
1094		evp = NULL;
1095	} else {
1096		error = copyin(uap->evp, &ev, sizeof(ev));
1097		if (error != 0)
1098			return (error);
1099		evp = &ev;
1100	}
1101	error = kern_ktimer_create(td, uap->clock_id, evp, &id, -1);
1102	if (error == 0) {
1103		error = copyout(&id, uap->timerid, sizeof(int));
1104		if (error != 0)
1105			kern_ktimer_delete(td, id);
1106	}
1107	return (error);
1108}
1109
1110int
1111kern_ktimer_create(struct thread *td, clockid_t clock_id, struct sigevent *evp,
1112    int *timerid, int preset_id)
1113{
1114	struct proc *p = td->td_proc;
1115	struct itimer *it;
1116	int id;
1117	int error;
1118
1119	if (clock_id < 0 || clock_id >= MAX_CLOCKS)
1120		return (EINVAL);
1121
1122	if (posix_clocks[clock_id].timer_create == NULL)
1123		return (EINVAL);
1124
1125	if (evp != NULL) {
1126		if (evp->sigev_notify != SIGEV_NONE &&
1127		    evp->sigev_notify != SIGEV_SIGNAL &&
1128		    evp->sigev_notify != SIGEV_THREAD_ID)
1129			return (EINVAL);
1130		if ((evp->sigev_notify == SIGEV_SIGNAL ||
1131		     evp->sigev_notify == SIGEV_THREAD_ID) &&
1132			!_SIG_VALID(evp->sigev_signo))
1133			return (EINVAL);
1134	}
1135
1136	if (p->p_itimers == NULL)
1137		itimers_alloc(p);
1138
1139	it = uma_zalloc(itimer_zone, M_WAITOK);
1140	it->it_flags = 0;
1141	it->it_usecount = 0;
1142	it->it_active = 0;
1143	timespecclear(&it->it_time.it_value);
1144	timespecclear(&it->it_time.it_interval);
1145	it->it_overrun = 0;
1146	it->it_overrun_last = 0;
1147	it->it_clockid = clock_id;
1148	it->it_timerid = -1;
1149	it->it_proc = p;
1150	ksiginfo_init(&it->it_ksi);
1151	it->it_ksi.ksi_flags |= KSI_INS | KSI_EXT;
1152	error = CLOCK_CALL(clock_id, timer_create, (it));
1153	if (error != 0)
1154		goto out;
1155
1156	PROC_LOCK(p);
1157	if (preset_id != -1) {
1158		KASSERT(preset_id >= 0 && preset_id < 3, ("invalid preset_id"));
1159		id = preset_id;
1160		if (p->p_itimers->its_timers[id] != NULL) {
1161			PROC_UNLOCK(p);
1162			error = 0;
1163			goto out;
1164		}
1165	} else {
1166		/*
1167		 * Find a free timer slot, skipping those reserved
1168		 * for setitimer().
1169		 */
1170		for (id = 3; id < TIMER_MAX; id++)
1171			if (p->p_itimers->its_timers[id] == NULL)
1172				break;
1173		if (id == TIMER_MAX) {
1174			PROC_UNLOCK(p);
1175			error = EAGAIN;
1176			goto out;
1177		}
1178	}
1179	it->it_timerid = id;
1180	p->p_itimers->its_timers[id] = it;
1181	if (evp != NULL)
1182		it->it_sigev = *evp;
1183	else {
1184		it->it_sigev.sigev_notify = SIGEV_SIGNAL;
1185		switch (clock_id) {
1186		default:
1187		case CLOCK_REALTIME:
1188			it->it_sigev.sigev_signo = SIGALRM;
1189			break;
1190		case CLOCK_VIRTUAL:
1191 			it->it_sigev.sigev_signo = SIGVTALRM;
1192			break;
1193		case CLOCK_PROF:
1194			it->it_sigev.sigev_signo = SIGPROF;
1195			break;
1196		}
1197		it->it_sigev.sigev_value.sival_int = id;
1198	}
1199
1200	if (it->it_sigev.sigev_notify == SIGEV_SIGNAL ||
1201	    it->it_sigev.sigev_notify == SIGEV_THREAD_ID) {
1202		it->it_ksi.ksi_signo = it->it_sigev.sigev_signo;
1203		it->it_ksi.ksi_code = SI_TIMER;
1204		it->it_ksi.ksi_value = it->it_sigev.sigev_value;
1205		it->it_ksi.ksi_timerid = id;
1206	}
1207	PROC_UNLOCK(p);
1208	*timerid = id;
1209	return (0);
1210
1211out:
1212	ITIMER_LOCK(it);
1213	CLOCK_CALL(it->it_clockid, timer_delete, (it));
1214	ITIMER_UNLOCK(it);
1215	uma_zfree(itimer_zone, it);
1216	return (error);
1217}
1218
1219#ifndef _SYS_SYSPROTO_H_
1220struct ktimer_delete_args {
1221	int timerid;
1222};
1223#endif
1224int
1225sys_ktimer_delete(struct thread *td, struct ktimer_delete_args *uap)
1226{
1227
1228	return (kern_ktimer_delete(td, uap->timerid));
1229}
1230
1231static struct itimer *
1232itimer_find(struct proc *p, int timerid)
1233{
1234	struct itimer *it;
1235
1236	PROC_LOCK_ASSERT(p, MA_OWNED);
1237	if ((p->p_itimers == NULL) ||
1238	    (timerid < 0) || (timerid >= TIMER_MAX) ||
1239	    (it = p->p_itimers->its_timers[timerid]) == NULL) {
1240		return (NULL);
1241	}
1242	ITIMER_LOCK(it);
1243	if ((it->it_flags & ITF_DELETING) != 0) {
1244		ITIMER_UNLOCK(it);
1245		it = NULL;
1246	}
1247	return (it);
1248}
1249
1250int
1251kern_ktimer_delete(struct thread *td, int timerid)
1252{
1253	struct proc *p = td->td_proc;
1254	struct itimer *it;
1255
1256	PROC_LOCK(p);
1257	it = itimer_find(p, timerid);
1258	if (it == NULL) {
1259		PROC_UNLOCK(p);
1260		return (EINVAL);
1261	}
1262	PROC_UNLOCK(p);
1263
1264	it->it_flags |= ITF_DELETING;
1265	while (it->it_usecount > 0) {
1266		it->it_flags |= ITF_WANTED;
1267		msleep(it, &it->it_mtx, PPAUSE, "itimer", 0);
1268	}
1269	it->it_flags &= ~ITF_WANTED;
1270	CLOCK_CALL(it->it_clockid, timer_delete, (it));
1271	ITIMER_UNLOCK(it);
1272
1273	PROC_LOCK(p);
1274	if (KSI_ONQ(&it->it_ksi))
1275		sigqueue_take(&it->it_ksi);
1276	p->p_itimers->its_timers[timerid] = NULL;
1277	PROC_UNLOCK(p);
1278	uma_zfree(itimer_zone, it);
1279	return (0);
1280}
1281
1282#ifndef _SYS_SYSPROTO_H_
1283struct ktimer_settime_args {
1284	int timerid;
1285	int flags;
1286	const struct itimerspec * value;
1287	struct itimerspec * ovalue;
1288};
1289#endif
1290int
1291sys_ktimer_settime(struct thread *td, struct ktimer_settime_args *uap)
1292{
1293	struct itimerspec val, oval, *ovalp;
1294	int error;
1295
1296	error = copyin(uap->value, &val, sizeof(val));
1297	if (error != 0)
1298		return (error);
1299	ovalp = uap->ovalue != NULL ? &oval : NULL;
1300	error = kern_ktimer_settime(td, uap->timerid, uap->flags, &val, ovalp);
1301	if (error == 0 && uap->ovalue != NULL)
1302		error = copyout(ovalp, uap->ovalue, sizeof(*ovalp));
1303	return (error);
1304}
1305
1306int
1307kern_ktimer_settime(struct thread *td, int timer_id, int flags,
1308    struct itimerspec *val, struct itimerspec *oval)
1309{
1310	struct proc *p;
1311	struct itimer *it;
1312	int error;
1313
1314	p = td->td_proc;
1315	PROC_LOCK(p);
1316	if (timer_id < 3 || (it = itimer_find(p, timer_id)) == NULL) {
1317		PROC_UNLOCK(p);
1318		error = EINVAL;
1319	} else {
1320		PROC_UNLOCK(p);
1321		itimer_enter(it);
1322		error = CLOCK_CALL(it->it_clockid, timer_settime, (it,
1323		    flags, val, oval));
1324		itimer_leave(it);
1325		ITIMER_UNLOCK(it);
1326	}
1327	return (error);
1328}
1329
1330#ifndef _SYS_SYSPROTO_H_
1331struct ktimer_gettime_args {
1332	int timerid;
1333	struct itimerspec * value;
1334};
1335#endif
1336int
1337sys_ktimer_gettime(struct thread *td, struct ktimer_gettime_args *uap)
1338{
1339	struct itimerspec val;
1340	int error;
1341
1342	error = kern_ktimer_gettime(td, uap->timerid, &val);
1343	if (error == 0)
1344		error = copyout(&val, uap->value, sizeof(val));
1345	return (error);
1346}
1347
1348int
1349kern_ktimer_gettime(struct thread *td, int timer_id, struct itimerspec *val)
1350{
1351	struct proc *p;
1352	struct itimer *it;
1353	int error;
1354
1355	p = td->td_proc;
1356	PROC_LOCK(p);
1357	if (timer_id < 3 || (it = itimer_find(p, timer_id)) == NULL) {
1358		PROC_UNLOCK(p);
1359		error = EINVAL;
1360	} else {
1361		PROC_UNLOCK(p);
1362		itimer_enter(it);
1363		error = CLOCK_CALL(it->it_clockid, timer_gettime, (it, val));
1364		itimer_leave(it);
1365		ITIMER_UNLOCK(it);
1366	}
1367	return (error);
1368}
1369
1370#ifndef _SYS_SYSPROTO_H_
1371struct timer_getoverrun_args {
1372	int timerid;
1373};
1374#endif
1375int
1376sys_ktimer_getoverrun(struct thread *td, struct ktimer_getoverrun_args *uap)
1377{
1378
1379	return (kern_ktimer_getoverrun(td, uap->timerid));
1380}
1381
1382int
1383kern_ktimer_getoverrun(struct thread *td, int timer_id)
1384{
1385	struct proc *p = td->td_proc;
1386	struct itimer *it;
1387	int error ;
1388
1389	PROC_LOCK(p);
1390	if (timer_id < 3 ||
1391	    (it = itimer_find(p, timer_id)) == NULL) {
1392		PROC_UNLOCK(p);
1393		error = EINVAL;
1394	} else {
1395		td->td_retval[0] = it->it_overrun_last;
1396		ITIMER_UNLOCK(it);
1397		PROC_UNLOCK(p);
1398		error = 0;
1399	}
1400	return (error);
1401}
1402
1403static int
1404realtimer_create(struct itimer *it)
1405{
1406	callout_init_mtx(&it->it_callout, &it->it_mtx, 0);
1407	return (0);
1408}
1409
1410static int
1411realtimer_delete(struct itimer *it)
1412{
1413	mtx_assert(&it->it_mtx, MA_OWNED);
1414
1415	/*
1416	 * clear timer's value and interval to tell realtimer_expire
1417	 * to not rearm the timer.
1418	 */
1419	timespecclear(&it->it_time.it_value);
1420	timespecclear(&it->it_time.it_interval);
1421	ITIMER_UNLOCK(it);
1422	callout_drain(&it->it_callout);
1423	ITIMER_LOCK(it);
1424	return (0);
1425}
1426
1427static int
1428realtimer_gettime(struct itimer *it, struct itimerspec *ovalue)
1429{
1430	struct timespec cts;
1431
1432	mtx_assert(&it->it_mtx, MA_OWNED);
1433
1434	realtimer_clocktime(it->it_clockid, &cts);
1435	*ovalue = it->it_time;
1436	if (ovalue->it_value.tv_sec != 0 || ovalue->it_value.tv_nsec != 0) {
1437		timespecsub(&ovalue->it_value, &cts);
1438		if (ovalue->it_value.tv_sec < 0 ||
1439		    (ovalue->it_value.tv_sec == 0 &&
1440		     ovalue->it_value.tv_nsec == 0)) {
1441			ovalue->it_value.tv_sec  = 0;
1442			ovalue->it_value.tv_nsec = 1;
1443		}
1444	}
1445	return (0);
1446}
1447
1448static int
1449realtimer_settime(struct itimer *it, int flags,
1450	struct itimerspec *value, struct itimerspec *ovalue)
1451{
1452	struct timespec cts, ts;
1453	struct timeval tv;
1454	struct itimerspec val;
1455
1456	mtx_assert(&it->it_mtx, MA_OWNED);
1457
1458	val = *value;
1459	if (itimespecfix(&val.it_value))
1460		return (EINVAL);
1461
1462	if (timespecisset(&val.it_value)) {
1463		if (itimespecfix(&val.it_interval))
1464			return (EINVAL);
1465	} else {
1466		timespecclear(&val.it_interval);
1467	}
1468
1469	if (ovalue != NULL)
1470		realtimer_gettime(it, ovalue);
1471
1472	it->it_time = val;
1473	if (timespecisset(&val.it_value)) {
1474		realtimer_clocktime(it->it_clockid, &cts);
1475		ts = val.it_value;
1476		if ((flags & TIMER_ABSTIME) == 0) {
1477			/* Convert to absolute time. */
1478			timespecadd(&it->it_time.it_value, &cts);
1479		} else {
1480			timespecsub(&ts, &cts);
1481			/*
1482			 * We don't care if ts is negative, tztohz will
1483			 * fix it.
1484			 */
1485		}
1486		TIMESPEC_TO_TIMEVAL(&tv, &ts);
1487		callout_reset(&it->it_callout, tvtohz(&tv),
1488			realtimer_expire, it);
1489	} else {
1490		callout_stop(&it->it_callout);
1491	}
1492
1493	return (0);
1494}
1495
1496static void
1497realtimer_clocktime(clockid_t id, struct timespec *ts)
1498{
1499	if (id == CLOCK_REALTIME)
1500		getnanotime(ts);
1501	else	/* CLOCK_MONOTONIC */
1502		getnanouptime(ts);
1503}
1504
1505int
1506itimer_accept(struct proc *p, int timerid, ksiginfo_t *ksi)
1507{
1508	struct itimer *it;
1509
1510	PROC_LOCK_ASSERT(p, MA_OWNED);
1511	it = itimer_find(p, timerid);
1512	if (it != NULL) {
1513		ksi->ksi_overrun = it->it_overrun;
1514		it->it_overrun_last = it->it_overrun;
1515		it->it_overrun = 0;
1516		ITIMER_UNLOCK(it);
1517		return (0);
1518	}
1519	return (EINVAL);
1520}
1521
1522int
1523itimespecfix(struct timespec *ts)
1524{
1525
1526	if (ts->tv_sec < 0 || ts->tv_nsec < 0 || ts->tv_nsec >= 1000000000)
1527		return (EINVAL);
1528	if (ts->tv_sec == 0 && ts->tv_nsec != 0 && ts->tv_nsec < tick * 1000)
1529		ts->tv_nsec = tick * 1000;
1530	return (0);
1531}
1532
1533/* Timeout callback for realtime timer */
1534static void
1535realtimer_expire(void *arg)
1536{
1537	struct timespec cts, ts;
1538	struct timeval tv;
1539	struct itimer *it;
1540
1541	it = (struct itimer *)arg;
1542
1543	realtimer_clocktime(it->it_clockid, &cts);
1544	/* Only fire if time is reached. */
1545	if (timespeccmp(&cts, &it->it_time.it_value, >=)) {
1546		if (timespecisset(&it->it_time.it_interval)) {
1547			timespecadd(&it->it_time.it_value,
1548				    &it->it_time.it_interval);
1549			while (timespeccmp(&cts, &it->it_time.it_value, >=)) {
1550				if (it->it_overrun < INT_MAX)
1551					it->it_overrun++;
1552				else
1553					it->it_ksi.ksi_errno = ERANGE;
1554				timespecadd(&it->it_time.it_value,
1555					    &it->it_time.it_interval);
1556			}
1557		} else {
1558			/* single shot timer ? */
1559			timespecclear(&it->it_time.it_value);
1560		}
1561		if (timespecisset(&it->it_time.it_value)) {
1562			ts = it->it_time.it_value;
1563			timespecsub(&ts, &cts);
1564			TIMESPEC_TO_TIMEVAL(&tv, &ts);
1565			callout_reset(&it->it_callout, tvtohz(&tv),
1566				 realtimer_expire, it);
1567		}
1568		itimer_enter(it);
1569		ITIMER_UNLOCK(it);
1570		itimer_fire(it);
1571		ITIMER_LOCK(it);
1572		itimer_leave(it);
1573	} else if (timespecisset(&it->it_time.it_value)) {
1574		ts = it->it_time.it_value;
1575		timespecsub(&ts, &cts);
1576		TIMESPEC_TO_TIMEVAL(&tv, &ts);
1577		callout_reset(&it->it_callout, tvtohz(&tv), realtimer_expire,
1578 			it);
1579	}
1580}
1581
1582void
1583itimer_fire(struct itimer *it)
1584{
1585	struct proc *p = it->it_proc;
1586	struct thread *td;
1587
1588	if (it->it_sigev.sigev_notify == SIGEV_SIGNAL ||
1589	    it->it_sigev.sigev_notify == SIGEV_THREAD_ID) {
1590		if (sigev_findtd(p, &it->it_sigev, &td) != 0) {
1591			ITIMER_LOCK(it);
1592			timespecclear(&it->it_time.it_value);
1593			timespecclear(&it->it_time.it_interval);
1594			callout_stop(&it->it_callout);
1595			ITIMER_UNLOCK(it);
1596			return;
1597		}
1598		if (!KSI_ONQ(&it->it_ksi)) {
1599			it->it_ksi.ksi_errno = 0;
1600			ksiginfo_set_sigev(&it->it_ksi, &it->it_sigev);
1601			tdsendsignal(p, td, it->it_ksi.ksi_signo, &it->it_ksi);
1602		} else {
1603			if (it->it_overrun < INT_MAX)
1604				it->it_overrun++;
1605			else
1606				it->it_ksi.ksi_errno = ERANGE;
1607		}
1608		PROC_UNLOCK(p);
1609	}
1610}
1611
1612static void
1613itimers_alloc(struct proc *p)
1614{
1615	struct itimers *its;
1616	int i;
1617
1618	its = malloc(sizeof (struct itimers), M_SUBPROC, M_WAITOK | M_ZERO);
1619	LIST_INIT(&its->its_virtual);
1620	LIST_INIT(&its->its_prof);
1621	TAILQ_INIT(&its->its_worklist);
1622	for (i = 0; i < TIMER_MAX; i++)
1623		its->its_timers[i] = NULL;
1624	PROC_LOCK(p);
1625	if (p->p_itimers == NULL) {
1626		p->p_itimers = its;
1627		PROC_UNLOCK(p);
1628	}
1629	else {
1630		PROC_UNLOCK(p);
1631		free(its, M_SUBPROC);
1632	}
1633}
1634
1635static void
1636itimers_event_hook_exec(void *arg, struct proc *p, struct image_params *imgp __unused)
1637{
1638	itimers_event_hook_exit(arg, p);
1639}
1640
1641/* Clean up timers when some process events are being triggered. */
1642static void
1643itimers_event_hook_exit(void *arg, struct proc *p)
1644{
1645	struct itimers *its;
1646	struct itimer *it;
1647	int event = (int)(intptr_t)arg;
1648	int i;
1649
1650	if (p->p_itimers != NULL) {
1651		its = p->p_itimers;
1652		for (i = 0; i < MAX_CLOCKS; ++i) {
1653			if (posix_clocks[i].event_hook != NULL)
1654				CLOCK_CALL(i, event_hook, (p, i, event));
1655		}
1656		/*
1657		 * According to susv3, XSI interval timers should be inherited
1658		 * by new image.
1659		 */
1660		if (event == ITIMER_EV_EXEC)
1661			i = 3;
1662		else if (event == ITIMER_EV_EXIT)
1663			i = 0;
1664		else
1665			panic("unhandled event");
1666		for (; i < TIMER_MAX; ++i) {
1667			if ((it = its->its_timers[i]) != NULL)
1668				kern_ktimer_delete(curthread, i);
1669		}
1670		if (its->its_timers[0] == NULL &&
1671		    its->its_timers[1] == NULL &&
1672		    its->its_timers[2] == NULL) {
1673			free(its, M_SUBPROC);
1674			p->p_itimers = NULL;
1675		}
1676	}
1677}
1678