kern_time.c revision 331722
1/*-
2 * Copyright (c) 1982, 1986, 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	@(#)kern_time.c	8.1 (Berkeley) 6/10/93
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: stable/11/sys/kern/kern_time.c 331722 2018-03-29 02:50:57Z eadler $");
34
35#include "opt_ktrace.h"
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/limits.h>
40#include <sys/clock.h>
41#include <sys/lock.h>
42#include <sys/mutex.h>
43#include <sys/sysproto.h>
44#include <sys/eventhandler.h>
45#include <sys/resourcevar.h>
46#include <sys/signalvar.h>
47#include <sys/kernel.h>
48#include <sys/sleepqueue.h>
49#include <sys/syscallsubr.h>
50#include <sys/sysctl.h>
51#include <sys/sysent.h>
52#include <sys/priv.h>
53#include <sys/proc.h>
54#include <sys/posix4.h>
55#include <sys/time.h>
56#include <sys/timers.h>
57#include <sys/timetc.h>
58#include <sys/vnode.h>
59#ifdef KTRACE
60#include <sys/ktrace.h>
61#endif
62
63#include <vm/vm.h>
64#include <vm/vm_extern.h>
65
66#define MAX_CLOCKS 	(CLOCK_MONOTONIC+1)
67#define CPUCLOCK_BIT		0x80000000
68#define CPUCLOCK_PROCESS_BIT	0x40000000
69#define CPUCLOCK_ID_MASK	(~(CPUCLOCK_BIT|CPUCLOCK_PROCESS_BIT))
70#define MAKE_THREAD_CPUCLOCK(tid)	(CPUCLOCK_BIT|(tid))
71#define MAKE_PROCESS_CPUCLOCK(pid)	\
72	(CPUCLOCK_BIT|CPUCLOCK_PROCESS_BIT|(pid))
73
74static struct kclock	posix_clocks[MAX_CLOCKS];
75static uma_zone_t	itimer_zone = NULL;
76
77/*
78 * Time of day and interval timer support.
79 *
80 * These routines provide the kernel entry points to get and set
81 * the time-of-day and per-process interval timers.  Subroutines
82 * here provide support for adding and subtracting timeval structures
83 * and decrementing interval timers, optionally reloading the interval
84 * timers when they expire.
85 */
86
87static int	settime(struct thread *, struct timeval *);
88static void	timevalfix(struct timeval *);
89static int	user_clock_nanosleep(struct thread *td, clockid_t clock_id,
90		    int flags, const struct timespec *ua_rqtp,
91		    struct timespec *ua_rmtp);
92
93static void	itimer_start(void);
94static int	itimer_init(void *, int, int);
95static void	itimer_fini(void *, int);
96static void	itimer_enter(struct itimer *);
97static void	itimer_leave(struct itimer *);
98static struct itimer *itimer_find(struct proc *, int);
99static void	itimers_alloc(struct proc *);
100static void	itimers_event_hook_exec(void *arg, struct proc *p, struct image_params *imgp);
101static void	itimers_event_hook_exit(void *arg, struct proc *p);
102static int	realtimer_create(struct itimer *);
103static int	realtimer_gettime(struct itimer *, struct itimerspec *);
104static int	realtimer_settime(struct itimer *, int,
105			struct itimerspec *, struct itimerspec *);
106static int	realtimer_delete(struct itimer *);
107static void	realtimer_clocktime(clockid_t, struct timespec *);
108static void	realtimer_expire(void *);
109
110int		register_posix_clock(int, struct kclock *);
111void		itimer_fire(struct itimer *it);
112int		itimespecfix(struct timespec *ts);
113
114#define CLOCK_CALL(clock, call, arglist)		\
115	((*posix_clocks[clock].call) arglist)
116
117SYSINIT(posix_timer, SI_SUB_P1003_1B, SI_ORDER_FIRST+4, itimer_start, NULL);
118
119
120static int
121settime(struct thread *td, struct timeval *tv)
122{
123	struct timeval delta, tv1, tv2;
124	static struct timeval maxtime, laststep;
125	struct timespec ts;
126
127	microtime(&tv1);
128	delta = *tv;
129	timevalsub(&delta, &tv1);
130
131	/*
132	 * If the system is secure, we do not allow the time to be
133	 * set to a value earlier than 1 second less than the highest
134	 * time we have yet seen. The worst a miscreant can do in
135	 * this circumstance is "freeze" time. He couldn't go
136	 * back to the past.
137	 *
138	 * We similarly do not allow the clock to be stepped more
139	 * than one second, nor more than once per second. This allows
140	 * a miscreant to make the clock march double-time, but no worse.
141	 */
142	if (securelevel_gt(td->td_ucred, 1) != 0) {
143		if (delta.tv_sec < 0 || delta.tv_usec < 0) {
144			/*
145			 * Update maxtime to latest time we've seen.
146			 */
147			if (tv1.tv_sec > maxtime.tv_sec)
148				maxtime = tv1;
149			tv2 = *tv;
150			timevalsub(&tv2, &maxtime);
151			if (tv2.tv_sec < -1) {
152				tv->tv_sec = maxtime.tv_sec - 1;
153				printf("Time adjustment clamped to -1 second\n");
154			}
155		} else {
156			if (tv1.tv_sec == laststep.tv_sec)
157				return (EPERM);
158			if (delta.tv_sec > 1) {
159				tv->tv_sec = tv1.tv_sec + 1;
160				printf("Time adjustment clamped to +1 second\n");
161			}
162			laststep = *tv;
163		}
164	}
165
166	ts.tv_sec = tv->tv_sec;
167	ts.tv_nsec = tv->tv_usec * 1000;
168	tc_setclock(&ts);
169	resettodr();
170	return (0);
171}
172
173#ifndef _SYS_SYSPROTO_H_
174struct clock_getcpuclockid2_args {
175	id_t id;
176	int which,
177	clockid_t *clock_id;
178};
179#endif
180/* ARGSUSED */
181int
182sys_clock_getcpuclockid2(struct thread *td, struct clock_getcpuclockid2_args *uap)
183{
184	clockid_t clk_id;
185	int error;
186
187	error = kern_clock_getcpuclockid2(td, uap->id, uap->which, &clk_id);
188	if (error == 0)
189		error = copyout(&clk_id, uap->clock_id, sizeof(clockid_t));
190	return (error);
191}
192
193int
194kern_clock_getcpuclockid2(struct thread *td, id_t id, int which,
195    clockid_t *clk_id)
196{
197	struct proc *p;
198	pid_t pid;
199	lwpid_t tid;
200	int error;
201
202	switch (which) {
203	case CPUCLOCK_WHICH_PID:
204		if (id != 0) {
205			error = pget(id, PGET_CANSEE | PGET_NOTID, &p);
206			if (error != 0)
207				return (error);
208			PROC_UNLOCK(p);
209			pid = id;
210		} else {
211			pid = td->td_proc->p_pid;
212		}
213		*clk_id = MAKE_PROCESS_CPUCLOCK(pid);
214		return (0);
215	case CPUCLOCK_WHICH_TID:
216		tid = id == 0 ? td->td_tid : id;
217		*clk_id = MAKE_THREAD_CPUCLOCK(tid);
218		return (0);
219	default:
220		return (EINVAL);
221	}
222}
223
224#ifndef _SYS_SYSPROTO_H_
225struct clock_gettime_args {
226	clockid_t clock_id;
227	struct	timespec *tp;
228};
229#endif
230/* ARGSUSED */
231int
232sys_clock_gettime(struct thread *td, struct clock_gettime_args *uap)
233{
234	struct timespec ats;
235	int error;
236
237	error = kern_clock_gettime(td, uap->clock_id, &ats);
238	if (error == 0)
239		error = copyout(&ats, uap->tp, sizeof(ats));
240
241	return (error);
242}
243
244static inline void
245cputick2timespec(uint64_t runtime, struct timespec *ats)
246{
247	runtime = cputick2usec(runtime);
248	ats->tv_sec = runtime / 1000000;
249	ats->tv_nsec = runtime % 1000000 * 1000;
250}
251
252static void
253get_thread_cputime(struct thread *targettd, struct timespec *ats)
254{
255	uint64_t runtime, curtime, switchtime;
256
257	if (targettd == NULL) { /* current thread */
258		critical_enter();
259		switchtime = PCPU_GET(switchtime);
260		curtime = cpu_ticks();
261		runtime = curthread->td_runtime;
262		critical_exit();
263		runtime += curtime - switchtime;
264	} else {
265		thread_lock(targettd);
266		runtime = targettd->td_runtime;
267		thread_unlock(targettd);
268	}
269	cputick2timespec(runtime, ats);
270}
271
272static void
273get_process_cputime(struct proc *targetp, struct timespec *ats)
274{
275	uint64_t runtime;
276	struct rusage ru;
277
278	PROC_STATLOCK(targetp);
279	rufetch(targetp, &ru);
280	runtime = targetp->p_rux.rux_runtime;
281	PROC_STATUNLOCK(targetp);
282	cputick2timespec(runtime, ats);
283}
284
285static int
286get_cputime(struct thread *td, clockid_t clock_id, struct timespec *ats)
287{
288	struct proc *p, *p2;
289	struct thread *td2;
290	lwpid_t tid;
291	pid_t pid;
292	int error;
293
294	p = td->td_proc;
295	if ((clock_id & CPUCLOCK_PROCESS_BIT) == 0) {
296		tid = clock_id & CPUCLOCK_ID_MASK;
297		td2 = tdfind(tid, p->p_pid);
298		if (td2 == NULL)
299			return (EINVAL);
300		get_thread_cputime(td2, ats);
301		PROC_UNLOCK(td2->td_proc);
302	} else {
303		pid = clock_id & CPUCLOCK_ID_MASK;
304		error = pget(pid, PGET_CANSEE, &p2);
305		if (error != 0)
306			return (EINVAL);
307		get_process_cputime(p2, ats);
308		PROC_UNLOCK(p2);
309	}
310	return (0);
311}
312
313int
314kern_clock_gettime(struct thread *td, clockid_t clock_id, struct timespec *ats)
315{
316	struct timeval sys, user;
317	struct proc *p;
318
319	p = td->td_proc;
320	switch (clock_id) {
321	case CLOCK_REALTIME:		/* Default to precise. */
322	case CLOCK_REALTIME_PRECISE:
323		nanotime(ats);
324		break;
325	case CLOCK_REALTIME_FAST:
326		getnanotime(ats);
327		break;
328	case CLOCK_VIRTUAL:
329		PROC_LOCK(p);
330		PROC_STATLOCK(p);
331		calcru(p, &user, &sys);
332		PROC_STATUNLOCK(p);
333		PROC_UNLOCK(p);
334		TIMEVAL_TO_TIMESPEC(&user, ats);
335		break;
336	case CLOCK_PROF:
337		PROC_LOCK(p);
338		PROC_STATLOCK(p);
339		calcru(p, &user, &sys);
340		PROC_STATUNLOCK(p);
341		PROC_UNLOCK(p);
342		timevaladd(&user, &sys);
343		TIMEVAL_TO_TIMESPEC(&user, ats);
344		break;
345	case CLOCK_MONOTONIC:		/* Default to precise. */
346	case CLOCK_MONOTONIC_PRECISE:
347	case CLOCK_UPTIME:
348	case CLOCK_UPTIME_PRECISE:
349		nanouptime(ats);
350		break;
351	case CLOCK_UPTIME_FAST:
352	case CLOCK_MONOTONIC_FAST:
353		getnanouptime(ats);
354		break;
355	case CLOCK_SECOND:
356		ats->tv_sec = time_second;
357		ats->tv_nsec = 0;
358		break;
359	case CLOCK_THREAD_CPUTIME_ID:
360		get_thread_cputime(NULL, ats);
361		break;
362	case CLOCK_PROCESS_CPUTIME_ID:
363		PROC_LOCK(p);
364		get_process_cputime(p, ats);
365		PROC_UNLOCK(p);
366		break;
367	default:
368		if ((int)clock_id >= 0)
369			return (EINVAL);
370		return (get_cputime(td, clock_id, ats));
371	}
372	return (0);
373}
374
375#ifndef _SYS_SYSPROTO_H_
376struct clock_settime_args {
377	clockid_t clock_id;
378	const struct	timespec *tp;
379};
380#endif
381/* ARGSUSED */
382int
383sys_clock_settime(struct thread *td, struct clock_settime_args *uap)
384{
385	struct timespec ats;
386	int error;
387
388	if ((error = copyin(uap->tp, &ats, sizeof(ats))) != 0)
389		return (error);
390	return (kern_clock_settime(td, uap->clock_id, &ats));
391}
392
393int
394kern_clock_settime(struct thread *td, clockid_t clock_id, struct timespec *ats)
395{
396	struct timeval atv;
397	int error;
398
399	if ((error = priv_check(td, PRIV_CLOCK_SETTIME)) != 0)
400		return (error);
401	if (clock_id != CLOCK_REALTIME)
402		return (EINVAL);
403	if (ats->tv_nsec < 0 || ats->tv_nsec >= 1000000000 ||
404	    ats->tv_sec < 0)
405		return (EINVAL);
406	/* XXX Don't convert nsec->usec and back */
407	TIMESPEC_TO_TIMEVAL(&atv, ats);
408	error = settime(td, &atv);
409	return (error);
410}
411
412#ifndef _SYS_SYSPROTO_H_
413struct clock_getres_args {
414	clockid_t clock_id;
415	struct	timespec *tp;
416};
417#endif
418int
419sys_clock_getres(struct thread *td, struct clock_getres_args *uap)
420{
421	struct timespec ts;
422	int error;
423
424	if (uap->tp == NULL)
425		return (0);
426
427	error = kern_clock_getres(td, uap->clock_id, &ts);
428	if (error == 0)
429		error = copyout(&ts, uap->tp, sizeof(ts));
430	return (error);
431}
432
433int
434kern_clock_getres(struct thread *td, clockid_t clock_id, struct timespec *ts)
435{
436
437	ts->tv_sec = 0;
438	switch (clock_id) {
439	case CLOCK_REALTIME:
440	case CLOCK_REALTIME_FAST:
441	case CLOCK_REALTIME_PRECISE:
442	case CLOCK_MONOTONIC:
443	case CLOCK_MONOTONIC_FAST:
444	case CLOCK_MONOTONIC_PRECISE:
445	case CLOCK_UPTIME:
446	case CLOCK_UPTIME_FAST:
447	case CLOCK_UPTIME_PRECISE:
448		/*
449		 * Round up the result of the division cheaply by adding 1.
450		 * Rounding up is especially important if rounding down
451		 * would give 0.  Perfect rounding is unimportant.
452		 */
453		ts->tv_nsec = 1000000000 / tc_getfrequency() + 1;
454		break;
455	case CLOCK_VIRTUAL:
456	case CLOCK_PROF:
457		/* Accurately round up here because we can do so cheaply. */
458		ts->tv_nsec = howmany(1000000000, hz);
459		break;
460	case CLOCK_SECOND:
461		ts->tv_sec = 1;
462		ts->tv_nsec = 0;
463		break;
464	case CLOCK_THREAD_CPUTIME_ID:
465	case CLOCK_PROCESS_CPUTIME_ID:
466	cputime:
467		/* sync with cputick2usec */
468		ts->tv_nsec = 1000000 / cpu_tickrate();
469		if (ts->tv_nsec == 0)
470			ts->tv_nsec = 1000;
471		break;
472	default:
473		if ((int)clock_id < 0)
474			goto cputime;
475		return (EINVAL);
476	}
477	return (0);
478}
479
480int
481kern_nanosleep(struct thread *td, struct timespec *rqt, struct timespec *rmt)
482{
483
484	return (kern_clock_nanosleep(td, CLOCK_REALTIME, TIMER_RELTIME, rqt,
485	    rmt));
486}
487
488static uint8_t nanowait[MAXCPU];
489
490int
491kern_clock_nanosleep(struct thread *td, clockid_t clock_id, int flags,
492    const struct timespec *rqt, struct timespec *rmt)
493{
494	struct timespec ts, now;
495	sbintime_t sbt, sbtt, prec, tmp;
496	time_t over;
497	int error;
498	bool is_abs_real;
499
500	if (rqt->tv_nsec < 0 || rqt->tv_nsec >= 1000000000)
501		return (EINVAL);
502	if ((flags & ~TIMER_ABSTIME) != 0)
503		return (EINVAL);
504	switch (clock_id) {
505	case CLOCK_REALTIME:
506	case CLOCK_REALTIME_PRECISE:
507	case CLOCK_REALTIME_FAST:
508	case CLOCK_SECOND:
509		is_abs_real = (flags & TIMER_ABSTIME) != 0;
510		break;
511	case CLOCK_MONOTONIC:
512	case CLOCK_MONOTONIC_PRECISE:
513	case CLOCK_MONOTONIC_FAST:
514	case CLOCK_UPTIME:
515	case CLOCK_UPTIME_PRECISE:
516	case CLOCK_UPTIME_FAST:
517		is_abs_real = false;
518		break;
519	case CLOCK_VIRTUAL:
520	case CLOCK_PROF:
521	case CLOCK_PROCESS_CPUTIME_ID:
522		return (ENOTSUP);
523	case CLOCK_THREAD_CPUTIME_ID:
524	default:
525		return (EINVAL);
526	}
527	do {
528		ts = *rqt;
529		if ((flags & TIMER_ABSTIME) != 0) {
530			if (is_abs_real)
531				td->td_rtcgen =
532				    atomic_load_acq_int(&rtc_generation);
533			error = kern_clock_gettime(td, clock_id, &now);
534			KASSERT(error == 0, ("kern_clock_gettime: %d", error));
535			timespecsub(&ts, &now);
536		}
537		if (ts.tv_sec < 0 || (ts.tv_sec == 0 && ts.tv_nsec == 0)) {
538			error = EWOULDBLOCK;
539			break;
540		}
541		if (ts.tv_sec > INT32_MAX / 2) {
542			over = ts.tv_sec - INT32_MAX / 2;
543			ts.tv_sec -= over;
544		} else
545			over = 0;
546		tmp = tstosbt(ts);
547		prec = tmp;
548		prec >>= tc_precexp;
549		if (TIMESEL(&sbt, tmp))
550			sbt += tc_tick_sbt;
551		sbt += tmp;
552		error = tsleep_sbt(&nanowait[curcpu], PWAIT | PCATCH, "nanslp",
553		    sbt, prec, C_ABSOLUTE);
554	} while (error == 0 && is_abs_real && td->td_rtcgen == 0);
555	td->td_rtcgen = 0;
556	if (error != EWOULDBLOCK) {
557		if (TIMESEL(&sbtt, tmp))
558			sbtt += tc_tick_sbt;
559		if (sbtt >= sbt)
560			return (0);
561		if (error == ERESTART)
562			error = EINTR;
563		if ((flags & TIMER_ABSTIME) == 0 && rmt != NULL) {
564			ts = sbttots(sbt - sbtt);
565			ts.tv_sec += over;
566			if (ts.tv_sec < 0)
567				timespecclear(&ts);
568			*rmt = ts;
569		}
570		return (error);
571	}
572	return (0);
573}
574
575#ifndef _SYS_SYSPROTO_H_
576struct nanosleep_args {
577	struct	timespec *rqtp;
578	struct	timespec *rmtp;
579};
580#endif
581/* ARGSUSED */
582int
583sys_nanosleep(struct thread *td, struct nanosleep_args *uap)
584{
585
586	return (user_clock_nanosleep(td, CLOCK_REALTIME, TIMER_RELTIME,
587	    uap->rqtp, uap->rmtp));
588}
589
590#ifndef _SYS_SYSPROTO_H_
591struct clock_nanosleep_args {
592	clockid_t clock_id;
593	int 	  flags;
594	struct	timespec *rqtp;
595	struct	timespec *rmtp;
596};
597#endif
598/* ARGSUSED */
599int
600sys_clock_nanosleep(struct thread *td, struct clock_nanosleep_args *uap)
601{
602	int error;
603
604	error = user_clock_nanosleep(td, uap->clock_id, uap->flags, uap->rqtp,
605	    uap->rmtp);
606	return (kern_posix_error(td, error));
607}
608
609static int
610user_clock_nanosleep(struct thread *td, clockid_t clock_id, int flags,
611    const struct timespec *ua_rqtp, struct timespec *ua_rmtp)
612{
613	struct timespec rmt, rqt;
614	int error;
615
616	error = copyin(ua_rqtp, &rqt, sizeof(rqt));
617	if (error)
618		return (error);
619	if (ua_rmtp != NULL && (flags & TIMER_ABSTIME) == 0 &&
620	    !useracc(ua_rmtp, sizeof(rmt), VM_PROT_WRITE))
621		return (EFAULT);
622	error = kern_clock_nanosleep(td, clock_id, flags, &rqt, &rmt);
623	if (error == EINTR && ua_rmtp != NULL && (flags & TIMER_ABSTIME) == 0) {
624		int error2;
625
626		error2 = copyout(&rmt, ua_rmtp, sizeof(rmt));
627		if (error2)
628			error = error2;
629	}
630	return (error);
631}
632
633#ifndef _SYS_SYSPROTO_H_
634struct gettimeofday_args {
635	struct	timeval *tp;
636	struct	timezone *tzp;
637};
638#endif
639/* ARGSUSED */
640int
641sys_gettimeofday(struct thread *td, struct gettimeofday_args *uap)
642{
643	struct timeval atv;
644	struct timezone rtz;
645	int error = 0;
646
647	if (uap->tp) {
648		microtime(&atv);
649		error = copyout(&atv, uap->tp, sizeof (atv));
650	}
651	if (error == 0 && uap->tzp != NULL) {
652		rtz.tz_minuteswest = tz_minuteswest;
653		rtz.tz_dsttime = tz_dsttime;
654		error = copyout(&rtz, uap->tzp, sizeof (rtz));
655	}
656	return (error);
657}
658
659#ifndef _SYS_SYSPROTO_H_
660struct settimeofday_args {
661	struct	timeval *tv;
662	struct	timezone *tzp;
663};
664#endif
665/* ARGSUSED */
666int
667sys_settimeofday(struct thread *td, struct settimeofday_args *uap)
668{
669	struct timeval atv, *tvp;
670	struct timezone atz, *tzp;
671	int error;
672
673	if (uap->tv) {
674		error = copyin(uap->tv, &atv, sizeof(atv));
675		if (error)
676			return (error);
677		tvp = &atv;
678	} else
679		tvp = NULL;
680	if (uap->tzp) {
681		error = copyin(uap->tzp, &atz, sizeof(atz));
682		if (error)
683			return (error);
684		tzp = &atz;
685	} else
686		tzp = NULL;
687	return (kern_settimeofday(td, tvp, tzp));
688}
689
690int
691kern_settimeofday(struct thread *td, struct timeval *tv, struct timezone *tzp)
692{
693	int error;
694
695	error = priv_check(td, PRIV_SETTIMEOFDAY);
696	if (error)
697		return (error);
698	/* Verify all parameters before changing time. */
699	if (tv) {
700		if (tv->tv_usec < 0 || tv->tv_usec >= 1000000 ||
701		    tv->tv_sec < 0)
702			return (EINVAL);
703		error = settime(td, tv);
704	}
705	if (tzp && error == 0) {
706		tz_minuteswest = tzp->tz_minuteswest;
707		tz_dsttime = tzp->tz_dsttime;
708	}
709	return (error);
710}
711
712/*
713 * Get value of an interval timer.  The process virtual and profiling virtual
714 * time timers are kept in the p_stats area, since they can be swapped out.
715 * These are kept internally in the way they are specified externally: in
716 * time until they expire.
717 *
718 * The real time interval timer is kept in the process table slot for the
719 * process, and its value (it_value) is kept as an absolute time rather than
720 * as a delta, so that it is easy to keep periodic real-time signals from
721 * drifting.
722 *
723 * Virtual time timers are processed in the hardclock() routine of
724 * kern_clock.c.  The real time timer is processed by a timeout routine,
725 * called from the softclock() routine.  Since a callout may be delayed in
726 * real time due to interrupt processing in the system, it is possible for
727 * the real time timeout routine (realitexpire, given below), to be delayed
728 * in real time past when it is supposed to occur.  It does not suffice,
729 * therefore, to reload the real timer .it_value from the real time timers
730 * .it_interval.  Rather, we compute the next time in absolute time the timer
731 * should go off.
732 */
733#ifndef _SYS_SYSPROTO_H_
734struct getitimer_args {
735	u_int	which;
736	struct	itimerval *itv;
737};
738#endif
739int
740sys_getitimer(struct thread *td, struct getitimer_args *uap)
741{
742	struct itimerval aitv;
743	int error;
744
745	error = kern_getitimer(td, uap->which, &aitv);
746	if (error != 0)
747		return (error);
748	return (copyout(&aitv, uap->itv, sizeof (struct itimerval)));
749}
750
751int
752kern_getitimer(struct thread *td, u_int which, struct itimerval *aitv)
753{
754	struct proc *p = td->td_proc;
755	struct timeval ctv;
756
757	if (which > ITIMER_PROF)
758		return (EINVAL);
759
760	if (which == ITIMER_REAL) {
761		/*
762		 * Convert from absolute to relative time in .it_value
763		 * part of real time timer.  If time for real time timer
764		 * has passed return 0, else return difference between
765		 * current time and time for the timer to go off.
766		 */
767		PROC_LOCK(p);
768		*aitv = p->p_realtimer;
769		PROC_UNLOCK(p);
770		if (timevalisset(&aitv->it_value)) {
771			microuptime(&ctv);
772			if (timevalcmp(&aitv->it_value, &ctv, <))
773				timevalclear(&aitv->it_value);
774			else
775				timevalsub(&aitv->it_value, &ctv);
776		}
777	} else {
778		PROC_ITIMLOCK(p);
779		*aitv = p->p_stats->p_timer[which];
780		PROC_ITIMUNLOCK(p);
781	}
782#ifdef KTRACE
783	if (KTRPOINT(td, KTR_STRUCT))
784		ktritimerval(aitv);
785#endif
786	return (0);
787}
788
789#ifndef _SYS_SYSPROTO_H_
790struct setitimer_args {
791	u_int	which;
792	struct	itimerval *itv, *oitv;
793};
794#endif
795int
796sys_setitimer(struct thread *td, struct setitimer_args *uap)
797{
798	struct itimerval aitv, oitv;
799	int error;
800
801	if (uap->itv == NULL) {
802		uap->itv = uap->oitv;
803		return (sys_getitimer(td, (struct getitimer_args *)uap));
804	}
805
806	if ((error = copyin(uap->itv, &aitv, sizeof(struct itimerval))))
807		return (error);
808	error = kern_setitimer(td, uap->which, &aitv, &oitv);
809	if (error != 0 || uap->oitv == NULL)
810		return (error);
811	return (copyout(&oitv, uap->oitv, sizeof(struct itimerval)));
812}
813
814int
815kern_setitimer(struct thread *td, u_int which, struct itimerval *aitv,
816    struct itimerval *oitv)
817{
818	struct proc *p = td->td_proc;
819	struct timeval ctv;
820	sbintime_t sbt, pr;
821
822	if (aitv == NULL)
823		return (kern_getitimer(td, which, oitv));
824
825	if (which > ITIMER_PROF)
826		return (EINVAL);
827#ifdef KTRACE
828	if (KTRPOINT(td, KTR_STRUCT))
829		ktritimerval(aitv);
830#endif
831	if (itimerfix(&aitv->it_value) ||
832	    aitv->it_value.tv_sec > INT32_MAX / 2)
833		return (EINVAL);
834	if (!timevalisset(&aitv->it_value))
835		timevalclear(&aitv->it_interval);
836	else if (itimerfix(&aitv->it_interval) ||
837	    aitv->it_interval.tv_sec > INT32_MAX / 2)
838		return (EINVAL);
839
840	if (which == ITIMER_REAL) {
841		PROC_LOCK(p);
842		if (timevalisset(&p->p_realtimer.it_value))
843			callout_stop(&p->p_itcallout);
844		microuptime(&ctv);
845		if (timevalisset(&aitv->it_value)) {
846			pr = tvtosbt(aitv->it_value) >> tc_precexp;
847			timevaladd(&aitv->it_value, &ctv);
848			sbt = tvtosbt(aitv->it_value);
849			callout_reset_sbt(&p->p_itcallout, sbt, pr,
850			    realitexpire, p, C_ABSOLUTE);
851		}
852		*oitv = p->p_realtimer;
853		p->p_realtimer = *aitv;
854		PROC_UNLOCK(p);
855		if (timevalisset(&oitv->it_value)) {
856			if (timevalcmp(&oitv->it_value, &ctv, <))
857				timevalclear(&oitv->it_value);
858			else
859				timevalsub(&oitv->it_value, &ctv);
860		}
861	} else {
862		if (aitv->it_interval.tv_sec == 0 &&
863		    aitv->it_interval.tv_usec != 0 &&
864		    aitv->it_interval.tv_usec < tick)
865			aitv->it_interval.tv_usec = tick;
866		if (aitv->it_value.tv_sec == 0 &&
867		    aitv->it_value.tv_usec != 0 &&
868		    aitv->it_value.tv_usec < tick)
869			aitv->it_value.tv_usec = tick;
870		PROC_ITIMLOCK(p);
871		*oitv = p->p_stats->p_timer[which];
872		p->p_stats->p_timer[which] = *aitv;
873		PROC_ITIMUNLOCK(p);
874	}
875#ifdef KTRACE
876	if (KTRPOINT(td, KTR_STRUCT))
877		ktritimerval(oitv);
878#endif
879	return (0);
880}
881
882/*
883 * Real interval timer expired:
884 * send process whose timer expired an alarm signal.
885 * If time is not set up to reload, then just return.
886 * Else compute next time timer should go off which is > current time.
887 * This is where delay in processing this timeout causes multiple
888 * SIGALRM calls to be compressed into one.
889 * tvtohz() always adds 1 to allow for the time until the next clock
890 * interrupt being strictly less than 1 clock tick, but we don't want
891 * that here since we want to appear to be in sync with the clock
892 * interrupt even when we're delayed.
893 */
894void
895realitexpire(void *arg)
896{
897	struct proc *p;
898	struct timeval ctv;
899	sbintime_t isbt;
900
901	p = (struct proc *)arg;
902	kern_psignal(p, SIGALRM);
903	if (!timevalisset(&p->p_realtimer.it_interval)) {
904		timevalclear(&p->p_realtimer.it_value);
905		if (p->p_flag & P_WEXIT)
906			wakeup(&p->p_itcallout);
907		return;
908	}
909	isbt = tvtosbt(p->p_realtimer.it_interval);
910	if (isbt >= sbt_timethreshold)
911		getmicrouptime(&ctv);
912	else
913		microuptime(&ctv);
914	do {
915		timevaladd(&p->p_realtimer.it_value,
916		    &p->p_realtimer.it_interval);
917	} while (timevalcmp(&p->p_realtimer.it_value, &ctv, <=));
918	callout_reset_sbt(&p->p_itcallout, tvtosbt(p->p_realtimer.it_value),
919	    isbt >> tc_precexp, realitexpire, p, C_ABSOLUTE);
920}
921
922/*
923 * Check that a proposed value to load into the .it_value or
924 * .it_interval part of an interval timer is acceptable, and
925 * fix it to have at least minimal value (i.e. if it is less
926 * than the resolution of the clock, round it up.)
927 */
928int
929itimerfix(struct timeval *tv)
930{
931
932	if (tv->tv_sec < 0 || tv->tv_usec < 0 || tv->tv_usec >= 1000000)
933		return (EINVAL);
934	if (tv->tv_sec == 0 && tv->tv_usec != 0 &&
935	    tv->tv_usec < (u_int)tick / 16)
936		tv->tv_usec = (u_int)tick / 16;
937	return (0);
938}
939
940/*
941 * Decrement an interval timer by a specified number
942 * of microseconds, which must be less than a second,
943 * i.e. < 1000000.  If the timer expires, then reload
944 * it.  In this case, carry over (usec - old value) to
945 * reduce the value reloaded into the timer so that
946 * the timer does not drift.  This routine assumes
947 * that it is called in a context where the timers
948 * on which it is operating cannot change in value.
949 */
950int
951itimerdecr(struct itimerval *itp, int usec)
952{
953
954	if (itp->it_value.tv_usec < usec) {
955		if (itp->it_value.tv_sec == 0) {
956			/* expired, and already in next interval */
957			usec -= itp->it_value.tv_usec;
958			goto expire;
959		}
960		itp->it_value.tv_usec += 1000000;
961		itp->it_value.tv_sec--;
962	}
963	itp->it_value.tv_usec -= usec;
964	usec = 0;
965	if (timevalisset(&itp->it_value))
966		return (1);
967	/* expired, exactly at end of interval */
968expire:
969	if (timevalisset(&itp->it_interval)) {
970		itp->it_value = itp->it_interval;
971		itp->it_value.tv_usec -= usec;
972		if (itp->it_value.tv_usec < 0) {
973			itp->it_value.tv_usec += 1000000;
974			itp->it_value.tv_sec--;
975		}
976	} else
977		itp->it_value.tv_usec = 0;		/* sec is already 0 */
978	return (0);
979}
980
981/*
982 * Add and subtract routines for timevals.
983 * N.B.: subtract routine doesn't deal with
984 * results which are before the beginning,
985 * it just gets very confused in this case.
986 * Caveat emptor.
987 */
988void
989timevaladd(struct timeval *t1, const struct timeval *t2)
990{
991
992	t1->tv_sec += t2->tv_sec;
993	t1->tv_usec += t2->tv_usec;
994	timevalfix(t1);
995}
996
997void
998timevalsub(struct timeval *t1, const struct timeval *t2)
999{
1000
1001	t1->tv_sec -= t2->tv_sec;
1002	t1->tv_usec -= t2->tv_usec;
1003	timevalfix(t1);
1004}
1005
1006static void
1007timevalfix(struct timeval *t1)
1008{
1009
1010	if (t1->tv_usec < 0) {
1011		t1->tv_sec--;
1012		t1->tv_usec += 1000000;
1013	}
1014	if (t1->tv_usec >= 1000000) {
1015		t1->tv_sec++;
1016		t1->tv_usec -= 1000000;
1017	}
1018}
1019
1020/*
1021 * ratecheck(): simple time-based rate-limit checking.
1022 */
1023int
1024ratecheck(struct timeval *lasttime, const struct timeval *mininterval)
1025{
1026	struct timeval tv, delta;
1027	int rv = 0;
1028
1029	getmicrouptime(&tv);		/* NB: 10ms precision */
1030	delta = tv;
1031	timevalsub(&delta, lasttime);
1032
1033	/*
1034	 * check for 0,0 is so that the message will be seen at least once,
1035	 * even if interval is huge.
1036	 */
1037	if (timevalcmp(&delta, mininterval, >=) ||
1038	    (lasttime->tv_sec == 0 && lasttime->tv_usec == 0)) {
1039		*lasttime = tv;
1040		rv = 1;
1041	}
1042
1043	return (rv);
1044}
1045
1046/*
1047 * ppsratecheck(): packets (or events) per second limitation.
1048 *
1049 * Return 0 if the limit is to be enforced (e.g. the caller
1050 * should drop a packet because of the rate limitation).
1051 *
1052 * maxpps of 0 always causes zero to be returned.  maxpps of -1
1053 * always causes 1 to be returned; this effectively defeats rate
1054 * limiting.
1055 *
1056 * Note that we maintain the struct timeval for compatibility
1057 * with other bsd systems.  We reuse the storage and just monitor
1058 * clock ticks for minimal overhead.
1059 */
1060int
1061ppsratecheck(struct timeval *lasttime, int *curpps, int maxpps)
1062{
1063	int now;
1064
1065	/*
1066	 * Reset the last time and counter if this is the first call
1067	 * or more than a second has passed since the last update of
1068	 * lasttime.
1069	 */
1070	now = ticks;
1071	if (lasttime->tv_sec == 0 || (u_int)(now - lasttime->tv_sec) >= hz) {
1072		lasttime->tv_sec = now;
1073		*curpps = 1;
1074		return (maxpps != 0);
1075	} else {
1076		(*curpps)++;		/* NB: ignore potential overflow */
1077		return (maxpps < 0 || *curpps <= maxpps);
1078	}
1079}
1080
1081static void
1082itimer_start(void)
1083{
1084	struct kclock rt_clock = {
1085		.timer_create  = realtimer_create,
1086		.timer_delete  = realtimer_delete,
1087		.timer_settime = realtimer_settime,
1088		.timer_gettime = realtimer_gettime,
1089		.event_hook    = NULL
1090	};
1091
1092	itimer_zone = uma_zcreate("itimer", sizeof(struct itimer),
1093		NULL, NULL, itimer_init, itimer_fini, UMA_ALIGN_PTR, 0);
1094	register_posix_clock(CLOCK_REALTIME,  &rt_clock);
1095	register_posix_clock(CLOCK_MONOTONIC, &rt_clock);
1096	p31b_setcfg(CTL_P1003_1B_TIMERS, 200112L);
1097	p31b_setcfg(CTL_P1003_1B_DELAYTIMER_MAX, INT_MAX);
1098	p31b_setcfg(CTL_P1003_1B_TIMER_MAX, TIMER_MAX);
1099	EVENTHANDLER_REGISTER(process_exit, itimers_event_hook_exit,
1100		(void *)ITIMER_EV_EXIT, EVENTHANDLER_PRI_ANY);
1101	EVENTHANDLER_REGISTER(process_exec, itimers_event_hook_exec,
1102		(void *)ITIMER_EV_EXEC, EVENTHANDLER_PRI_ANY);
1103}
1104
1105int
1106register_posix_clock(int clockid, struct kclock *clk)
1107{
1108	if ((unsigned)clockid >= MAX_CLOCKS) {
1109		printf("%s: invalid clockid\n", __func__);
1110		return (0);
1111	}
1112	posix_clocks[clockid] = *clk;
1113	return (1);
1114}
1115
1116static int
1117itimer_init(void *mem, int size, int flags)
1118{
1119	struct itimer *it;
1120
1121	it = (struct itimer *)mem;
1122	mtx_init(&it->it_mtx, "itimer lock", NULL, MTX_DEF);
1123	return (0);
1124}
1125
1126static void
1127itimer_fini(void *mem, int size)
1128{
1129	struct itimer *it;
1130
1131	it = (struct itimer *)mem;
1132	mtx_destroy(&it->it_mtx);
1133}
1134
1135static void
1136itimer_enter(struct itimer *it)
1137{
1138
1139	mtx_assert(&it->it_mtx, MA_OWNED);
1140	it->it_usecount++;
1141}
1142
1143static void
1144itimer_leave(struct itimer *it)
1145{
1146
1147	mtx_assert(&it->it_mtx, MA_OWNED);
1148	KASSERT(it->it_usecount > 0, ("invalid it_usecount"));
1149
1150	if (--it->it_usecount == 0 && (it->it_flags & ITF_WANTED) != 0)
1151		wakeup(it);
1152}
1153
1154#ifndef _SYS_SYSPROTO_H_
1155struct ktimer_create_args {
1156	clockid_t clock_id;
1157	struct sigevent * evp;
1158	int * timerid;
1159};
1160#endif
1161int
1162sys_ktimer_create(struct thread *td, struct ktimer_create_args *uap)
1163{
1164	struct sigevent *evp, ev;
1165	int id;
1166	int error;
1167
1168	if (uap->evp == NULL) {
1169		evp = NULL;
1170	} else {
1171		error = copyin(uap->evp, &ev, sizeof(ev));
1172		if (error != 0)
1173			return (error);
1174		evp = &ev;
1175	}
1176	error = kern_ktimer_create(td, uap->clock_id, evp, &id, -1);
1177	if (error == 0) {
1178		error = copyout(&id, uap->timerid, sizeof(int));
1179		if (error != 0)
1180			kern_ktimer_delete(td, id);
1181	}
1182	return (error);
1183}
1184
1185int
1186kern_ktimer_create(struct thread *td, clockid_t clock_id, struct sigevent *evp,
1187    int *timerid, int preset_id)
1188{
1189	struct proc *p = td->td_proc;
1190	struct itimer *it;
1191	int id;
1192	int error;
1193
1194	if (clock_id < 0 || clock_id >= MAX_CLOCKS)
1195		return (EINVAL);
1196
1197	if (posix_clocks[clock_id].timer_create == NULL)
1198		return (EINVAL);
1199
1200	if (evp != NULL) {
1201		if (evp->sigev_notify != SIGEV_NONE &&
1202		    evp->sigev_notify != SIGEV_SIGNAL &&
1203		    evp->sigev_notify != SIGEV_THREAD_ID)
1204			return (EINVAL);
1205		if ((evp->sigev_notify == SIGEV_SIGNAL ||
1206		     evp->sigev_notify == SIGEV_THREAD_ID) &&
1207			!_SIG_VALID(evp->sigev_signo))
1208			return (EINVAL);
1209	}
1210
1211	if (p->p_itimers == NULL)
1212		itimers_alloc(p);
1213
1214	it = uma_zalloc(itimer_zone, M_WAITOK);
1215	it->it_flags = 0;
1216	it->it_usecount = 0;
1217	it->it_active = 0;
1218	timespecclear(&it->it_time.it_value);
1219	timespecclear(&it->it_time.it_interval);
1220	it->it_overrun = 0;
1221	it->it_overrun_last = 0;
1222	it->it_clockid = clock_id;
1223	it->it_timerid = -1;
1224	it->it_proc = p;
1225	ksiginfo_init(&it->it_ksi);
1226	it->it_ksi.ksi_flags |= KSI_INS | KSI_EXT;
1227	error = CLOCK_CALL(clock_id, timer_create, (it));
1228	if (error != 0)
1229		goto out;
1230
1231	PROC_LOCK(p);
1232	if (preset_id != -1) {
1233		KASSERT(preset_id >= 0 && preset_id < 3, ("invalid preset_id"));
1234		id = preset_id;
1235		if (p->p_itimers->its_timers[id] != NULL) {
1236			PROC_UNLOCK(p);
1237			error = 0;
1238			goto out;
1239		}
1240	} else {
1241		/*
1242		 * Find a free timer slot, skipping those reserved
1243		 * for setitimer().
1244		 */
1245		for (id = 3; id < TIMER_MAX; id++)
1246			if (p->p_itimers->its_timers[id] == NULL)
1247				break;
1248		if (id == TIMER_MAX) {
1249			PROC_UNLOCK(p);
1250			error = EAGAIN;
1251			goto out;
1252		}
1253	}
1254	it->it_timerid = id;
1255	p->p_itimers->its_timers[id] = it;
1256	if (evp != NULL)
1257		it->it_sigev = *evp;
1258	else {
1259		it->it_sigev.sigev_notify = SIGEV_SIGNAL;
1260		switch (clock_id) {
1261		default:
1262		case CLOCK_REALTIME:
1263			it->it_sigev.sigev_signo = SIGALRM;
1264			break;
1265		case CLOCK_VIRTUAL:
1266 			it->it_sigev.sigev_signo = SIGVTALRM;
1267			break;
1268		case CLOCK_PROF:
1269			it->it_sigev.sigev_signo = SIGPROF;
1270			break;
1271		}
1272		it->it_sigev.sigev_value.sival_int = id;
1273	}
1274
1275	if (it->it_sigev.sigev_notify == SIGEV_SIGNAL ||
1276	    it->it_sigev.sigev_notify == SIGEV_THREAD_ID) {
1277		it->it_ksi.ksi_signo = it->it_sigev.sigev_signo;
1278		it->it_ksi.ksi_code = SI_TIMER;
1279		it->it_ksi.ksi_value = it->it_sigev.sigev_value;
1280		it->it_ksi.ksi_timerid = id;
1281	}
1282	PROC_UNLOCK(p);
1283	*timerid = id;
1284	return (0);
1285
1286out:
1287	ITIMER_LOCK(it);
1288	CLOCK_CALL(it->it_clockid, timer_delete, (it));
1289	ITIMER_UNLOCK(it);
1290	uma_zfree(itimer_zone, it);
1291	return (error);
1292}
1293
1294#ifndef _SYS_SYSPROTO_H_
1295struct ktimer_delete_args {
1296	int timerid;
1297};
1298#endif
1299int
1300sys_ktimer_delete(struct thread *td, struct ktimer_delete_args *uap)
1301{
1302
1303	return (kern_ktimer_delete(td, uap->timerid));
1304}
1305
1306static struct itimer *
1307itimer_find(struct proc *p, int timerid)
1308{
1309	struct itimer *it;
1310
1311	PROC_LOCK_ASSERT(p, MA_OWNED);
1312	if ((p->p_itimers == NULL) ||
1313	    (timerid < 0) || (timerid >= TIMER_MAX) ||
1314	    (it = p->p_itimers->its_timers[timerid]) == NULL) {
1315		return (NULL);
1316	}
1317	ITIMER_LOCK(it);
1318	if ((it->it_flags & ITF_DELETING) != 0) {
1319		ITIMER_UNLOCK(it);
1320		it = NULL;
1321	}
1322	return (it);
1323}
1324
1325int
1326kern_ktimer_delete(struct thread *td, int timerid)
1327{
1328	struct proc *p = td->td_proc;
1329	struct itimer *it;
1330
1331	PROC_LOCK(p);
1332	it = itimer_find(p, timerid);
1333	if (it == NULL) {
1334		PROC_UNLOCK(p);
1335		return (EINVAL);
1336	}
1337	PROC_UNLOCK(p);
1338
1339	it->it_flags |= ITF_DELETING;
1340	while (it->it_usecount > 0) {
1341		it->it_flags |= ITF_WANTED;
1342		msleep(it, &it->it_mtx, PPAUSE, "itimer", 0);
1343	}
1344	it->it_flags &= ~ITF_WANTED;
1345	CLOCK_CALL(it->it_clockid, timer_delete, (it));
1346	ITIMER_UNLOCK(it);
1347
1348	PROC_LOCK(p);
1349	if (KSI_ONQ(&it->it_ksi))
1350		sigqueue_take(&it->it_ksi);
1351	p->p_itimers->its_timers[timerid] = NULL;
1352	PROC_UNLOCK(p);
1353	uma_zfree(itimer_zone, it);
1354	return (0);
1355}
1356
1357#ifndef _SYS_SYSPROTO_H_
1358struct ktimer_settime_args {
1359	int timerid;
1360	int flags;
1361	const struct itimerspec * value;
1362	struct itimerspec * ovalue;
1363};
1364#endif
1365int
1366sys_ktimer_settime(struct thread *td, struct ktimer_settime_args *uap)
1367{
1368	struct itimerspec val, oval, *ovalp;
1369	int error;
1370
1371	error = copyin(uap->value, &val, sizeof(val));
1372	if (error != 0)
1373		return (error);
1374	ovalp = uap->ovalue != NULL ? &oval : NULL;
1375	error = kern_ktimer_settime(td, uap->timerid, uap->flags, &val, ovalp);
1376	if (error == 0 && uap->ovalue != NULL)
1377		error = copyout(ovalp, uap->ovalue, sizeof(*ovalp));
1378	return (error);
1379}
1380
1381int
1382kern_ktimer_settime(struct thread *td, int timer_id, int flags,
1383    struct itimerspec *val, struct itimerspec *oval)
1384{
1385	struct proc *p;
1386	struct itimer *it;
1387	int error;
1388
1389	p = td->td_proc;
1390	PROC_LOCK(p);
1391	if (timer_id < 3 || (it = itimer_find(p, timer_id)) == NULL) {
1392		PROC_UNLOCK(p);
1393		error = EINVAL;
1394	} else {
1395		PROC_UNLOCK(p);
1396		itimer_enter(it);
1397		error = CLOCK_CALL(it->it_clockid, timer_settime, (it,
1398		    flags, val, oval));
1399		itimer_leave(it);
1400		ITIMER_UNLOCK(it);
1401	}
1402	return (error);
1403}
1404
1405#ifndef _SYS_SYSPROTO_H_
1406struct ktimer_gettime_args {
1407	int timerid;
1408	struct itimerspec * value;
1409};
1410#endif
1411int
1412sys_ktimer_gettime(struct thread *td, struct ktimer_gettime_args *uap)
1413{
1414	struct itimerspec val;
1415	int error;
1416
1417	error = kern_ktimer_gettime(td, uap->timerid, &val);
1418	if (error == 0)
1419		error = copyout(&val, uap->value, sizeof(val));
1420	return (error);
1421}
1422
1423int
1424kern_ktimer_gettime(struct thread *td, int timer_id, struct itimerspec *val)
1425{
1426	struct proc *p;
1427	struct itimer *it;
1428	int error;
1429
1430	p = td->td_proc;
1431	PROC_LOCK(p);
1432	if (timer_id < 3 || (it = itimer_find(p, timer_id)) == NULL) {
1433		PROC_UNLOCK(p);
1434		error = EINVAL;
1435	} else {
1436		PROC_UNLOCK(p);
1437		itimer_enter(it);
1438		error = CLOCK_CALL(it->it_clockid, timer_gettime, (it, val));
1439		itimer_leave(it);
1440		ITIMER_UNLOCK(it);
1441	}
1442	return (error);
1443}
1444
1445#ifndef _SYS_SYSPROTO_H_
1446struct timer_getoverrun_args {
1447	int timerid;
1448};
1449#endif
1450int
1451sys_ktimer_getoverrun(struct thread *td, struct ktimer_getoverrun_args *uap)
1452{
1453
1454	return (kern_ktimer_getoverrun(td, uap->timerid));
1455}
1456
1457int
1458kern_ktimer_getoverrun(struct thread *td, int timer_id)
1459{
1460	struct proc *p = td->td_proc;
1461	struct itimer *it;
1462	int error ;
1463
1464	PROC_LOCK(p);
1465	if (timer_id < 3 ||
1466	    (it = itimer_find(p, timer_id)) == NULL) {
1467		PROC_UNLOCK(p);
1468		error = EINVAL;
1469	} else {
1470		td->td_retval[0] = it->it_overrun_last;
1471		ITIMER_UNLOCK(it);
1472		PROC_UNLOCK(p);
1473		error = 0;
1474	}
1475	return (error);
1476}
1477
1478static int
1479realtimer_create(struct itimer *it)
1480{
1481	callout_init_mtx(&it->it_callout, &it->it_mtx, 0);
1482	return (0);
1483}
1484
1485static int
1486realtimer_delete(struct itimer *it)
1487{
1488	mtx_assert(&it->it_mtx, MA_OWNED);
1489
1490	/*
1491	 * clear timer's value and interval to tell realtimer_expire
1492	 * to not rearm the timer.
1493	 */
1494	timespecclear(&it->it_time.it_value);
1495	timespecclear(&it->it_time.it_interval);
1496	ITIMER_UNLOCK(it);
1497	callout_drain(&it->it_callout);
1498	ITIMER_LOCK(it);
1499	return (0);
1500}
1501
1502static int
1503realtimer_gettime(struct itimer *it, struct itimerspec *ovalue)
1504{
1505	struct timespec cts;
1506
1507	mtx_assert(&it->it_mtx, MA_OWNED);
1508
1509	realtimer_clocktime(it->it_clockid, &cts);
1510	*ovalue = it->it_time;
1511	if (ovalue->it_value.tv_sec != 0 || ovalue->it_value.tv_nsec != 0) {
1512		timespecsub(&ovalue->it_value, &cts);
1513		if (ovalue->it_value.tv_sec < 0 ||
1514		    (ovalue->it_value.tv_sec == 0 &&
1515		     ovalue->it_value.tv_nsec == 0)) {
1516			ovalue->it_value.tv_sec  = 0;
1517			ovalue->it_value.tv_nsec = 1;
1518		}
1519	}
1520	return (0);
1521}
1522
1523static int
1524realtimer_settime(struct itimer *it, int flags,
1525	struct itimerspec *value, struct itimerspec *ovalue)
1526{
1527	struct timespec cts, ts;
1528	struct timeval tv;
1529	struct itimerspec val;
1530
1531	mtx_assert(&it->it_mtx, MA_OWNED);
1532
1533	val = *value;
1534	if (itimespecfix(&val.it_value))
1535		return (EINVAL);
1536
1537	if (timespecisset(&val.it_value)) {
1538		if (itimespecfix(&val.it_interval))
1539			return (EINVAL);
1540	} else {
1541		timespecclear(&val.it_interval);
1542	}
1543
1544	if (ovalue != NULL)
1545		realtimer_gettime(it, ovalue);
1546
1547	it->it_time = val;
1548	if (timespecisset(&val.it_value)) {
1549		realtimer_clocktime(it->it_clockid, &cts);
1550		ts = val.it_value;
1551		if ((flags & TIMER_ABSTIME) == 0) {
1552			/* Convert to absolute time. */
1553			timespecadd(&it->it_time.it_value, &cts);
1554		} else {
1555			timespecsub(&ts, &cts);
1556			/*
1557			 * We don't care if ts is negative, tztohz will
1558			 * fix it.
1559			 */
1560		}
1561		TIMESPEC_TO_TIMEVAL(&tv, &ts);
1562		callout_reset(&it->it_callout, tvtohz(&tv),
1563			realtimer_expire, it);
1564	} else {
1565		callout_stop(&it->it_callout);
1566	}
1567
1568	return (0);
1569}
1570
1571static void
1572realtimer_clocktime(clockid_t id, struct timespec *ts)
1573{
1574	if (id == CLOCK_REALTIME)
1575		getnanotime(ts);
1576	else	/* CLOCK_MONOTONIC */
1577		getnanouptime(ts);
1578}
1579
1580int
1581itimer_accept(struct proc *p, int timerid, ksiginfo_t *ksi)
1582{
1583	struct itimer *it;
1584
1585	PROC_LOCK_ASSERT(p, MA_OWNED);
1586	it = itimer_find(p, timerid);
1587	if (it != NULL) {
1588		ksi->ksi_overrun = it->it_overrun;
1589		it->it_overrun_last = it->it_overrun;
1590		it->it_overrun = 0;
1591		ITIMER_UNLOCK(it);
1592		return (0);
1593	}
1594	return (EINVAL);
1595}
1596
1597int
1598itimespecfix(struct timespec *ts)
1599{
1600
1601	if (ts->tv_sec < 0 || ts->tv_nsec < 0 || ts->tv_nsec >= 1000000000)
1602		return (EINVAL);
1603	if (ts->tv_sec == 0 && ts->tv_nsec != 0 && ts->tv_nsec < tick * 1000)
1604		ts->tv_nsec = tick * 1000;
1605	return (0);
1606}
1607
1608/* Timeout callback for realtime timer */
1609static void
1610realtimer_expire(void *arg)
1611{
1612	struct timespec cts, ts;
1613	struct timeval tv;
1614	struct itimer *it;
1615
1616	it = (struct itimer *)arg;
1617
1618	realtimer_clocktime(it->it_clockid, &cts);
1619	/* Only fire if time is reached. */
1620	if (timespeccmp(&cts, &it->it_time.it_value, >=)) {
1621		if (timespecisset(&it->it_time.it_interval)) {
1622			timespecadd(&it->it_time.it_value,
1623				    &it->it_time.it_interval);
1624			while (timespeccmp(&cts, &it->it_time.it_value, >=)) {
1625				if (it->it_overrun < INT_MAX)
1626					it->it_overrun++;
1627				else
1628					it->it_ksi.ksi_errno = ERANGE;
1629				timespecadd(&it->it_time.it_value,
1630					    &it->it_time.it_interval);
1631			}
1632		} else {
1633			/* single shot timer ? */
1634			timespecclear(&it->it_time.it_value);
1635		}
1636		if (timespecisset(&it->it_time.it_value)) {
1637			ts = it->it_time.it_value;
1638			timespecsub(&ts, &cts);
1639			TIMESPEC_TO_TIMEVAL(&tv, &ts);
1640			callout_reset(&it->it_callout, tvtohz(&tv),
1641				 realtimer_expire, it);
1642		}
1643		itimer_enter(it);
1644		ITIMER_UNLOCK(it);
1645		itimer_fire(it);
1646		ITIMER_LOCK(it);
1647		itimer_leave(it);
1648	} else if (timespecisset(&it->it_time.it_value)) {
1649		ts = it->it_time.it_value;
1650		timespecsub(&ts, &cts);
1651		TIMESPEC_TO_TIMEVAL(&tv, &ts);
1652		callout_reset(&it->it_callout, tvtohz(&tv), realtimer_expire,
1653 			it);
1654	}
1655}
1656
1657void
1658itimer_fire(struct itimer *it)
1659{
1660	struct proc *p = it->it_proc;
1661	struct thread *td;
1662
1663	if (it->it_sigev.sigev_notify == SIGEV_SIGNAL ||
1664	    it->it_sigev.sigev_notify == SIGEV_THREAD_ID) {
1665		if (sigev_findtd(p, &it->it_sigev, &td) != 0) {
1666			ITIMER_LOCK(it);
1667			timespecclear(&it->it_time.it_value);
1668			timespecclear(&it->it_time.it_interval);
1669			callout_stop(&it->it_callout);
1670			ITIMER_UNLOCK(it);
1671			return;
1672		}
1673		if (!KSI_ONQ(&it->it_ksi)) {
1674			it->it_ksi.ksi_errno = 0;
1675			ksiginfo_set_sigev(&it->it_ksi, &it->it_sigev);
1676			tdsendsignal(p, td, it->it_ksi.ksi_signo, &it->it_ksi);
1677		} else {
1678			if (it->it_overrun < INT_MAX)
1679				it->it_overrun++;
1680			else
1681				it->it_ksi.ksi_errno = ERANGE;
1682		}
1683		PROC_UNLOCK(p);
1684	}
1685}
1686
1687static void
1688itimers_alloc(struct proc *p)
1689{
1690	struct itimers *its;
1691	int i;
1692
1693	its = malloc(sizeof (struct itimers), M_SUBPROC, M_WAITOK | M_ZERO);
1694	LIST_INIT(&its->its_virtual);
1695	LIST_INIT(&its->its_prof);
1696	TAILQ_INIT(&its->its_worklist);
1697	for (i = 0; i < TIMER_MAX; i++)
1698		its->its_timers[i] = NULL;
1699	PROC_LOCK(p);
1700	if (p->p_itimers == NULL) {
1701		p->p_itimers = its;
1702		PROC_UNLOCK(p);
1703	}
1704	else {
1705		PROC_UNLOCK(p);
1706		free(its, M_SUBPROC);
1707	}
1708}
1709
1710static void
1711itimers_event_hook_exec(void *arg, struct proc *p, struct image_params *imgp __unused)
1712{
1713	itimers_event_hook_exit(arg, p);
1714}
1715
1716/* Clean up timers when some process events are being triggered. */
1717static void
1718itimers_event_hook_exit(void *arg, struct proc *p)
1719{
1720	struct itimers *its;
1721	struct itimer *it;
1722	int event = (int)(intptr_t)arg;
1723	int i;
1724
1725	if (p->p_itimers != NULL) {
1726		its = p->p_itimers;
1727		for (i = 0; i < MAX_CLOCKS; ++i) {
1728			if (posix_clocks[i].event_hook != NULL)
1729				CLOCK_CALL(i, event_hook, (p, i, event));
1730		}
1731		/*
1732		 * According to susv3, XSI interval timers should be inherited
1733		 * by new image.
1734		 */
1735		if (event == ITIMER_EV_EXEC)
1736			i = 3;
1737		else if (event == ITIMER_EV_EXIT)
1738			i = 0;
1739		else
1740			panic("unhandled event");
1741		for (; i < TIMER_MAX; ++i) {
1742			if ((it = its->its_timers[i]) != NULL)
1743				kern_ktimer_delete(curthread, i);
1744		}
1745		if (its->its_timers[0] == NULL &&
1746		    its->its_timers[1] == NULL &&
1747		    its->its_timers[2] == NULL) {
1748			free(its, M_SUBPROC);
1749			p->p_itimers = NULL;
1750		}
1751	}
1752}
1753