kern_time.c revision 140483
1/*- 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)kern_time.c 8.1 (Berkeley) 6/10/93 30 */ 31 32#include <sys/cdefs.h> 33__FBSDID("$FreeBSD: head/sys/kern/kern_time.c 140483 2005-01-19 18:09:50Z ps $"); 34 35#include "opt_mac.h" 36 37#include <sys/param.h> 38#include <sys/systm.h> 39#include <sys/lock.h> 40#include <sys/mutex.h> 41#include <sys/sysproto.h> 42#include <sys/resourcevar.h> 43#include <sys/signalvar.h> 44#include <sys/kernel.h> 45#include <sys/mac.h> 46#include <sys/syscallsubr.h> 47#include <sys/sysent.h> 48#include <sys/proc.h> 49#include <sys/time.h> 50#include <sys/timetc.h> 51#include <sys/vnode.h> 52 53#include <vm/vm.h> 54#include <vm/vm_extern.h> 55 56int tz_minuteswest; 57int tz_dsttime; 58 59/* 60 * Time of day and interval timer support. 61 * 62 * These routines provide the kernel entry points to get and set 63 * the time-of-day and per-process interval timers. Subroutines 64 * here provide support for adding and subtracting timeval structures 65 * and decrementing interval timers, optionally reloading the interval 66 * timers when they expire. 67 */ 68 69static int settime(struct thread *, struct timeval *); 70static void timevalfix(struct timeval *); 71static void no_lease_updatetime(int); 72 73static void 74no_lease_updatetime(deltat) 75 int deltat; 76{ 77} 78 79void (*lease_updatetime)(int) = no_lease_updatetime; 80 81static int 82settime(struct thread *td, struct timeval *tv) 83{ 84 struct timeval delta, tv1, tv2; 85 static struct timeval maxtime, laststep; 86 struct timespec ts; 87 int s; 88 89 s = splclock(); 90 microtime(&tv1); 91 delta = *tv; 92 timevalsub(&delta, &tv1); 93 94 /* 95 * If the system is secure, we do not allow the time to be 96 * set to a value earlier than 1 second less than the highest 97 * time we have yet seen. The worst a miscreant can do in 98 * this circumstance is "freeze" time. He couldn't go 99 * back to the past. 100 * 101 * We similarly do not allow the clock to be stepped more 102 * than one second, nor more than once per second. This allows 103 * a miscreant to make the clock march double-time, but no worse. 104 */ 105 if (securelevel_gt(td->td_ucred, 1) != 0) { 106 if (delta.tv_sec < 0 || delta.tv_usec < 0) { 107 /* 108 * Update maxtime to latest time we've seen. 109 */ 110 if (tv1.tv_sec > maxtime.tv_sec) 111 maxtime = tv1; 112 tv2 = *tv; 113 timevalsub(&tv2, &maxtime); 114 if (tv2.tv_sec < -1) { 115 tv->tv_sec = maxtime.tv_sec - 1; 116 printf("Time adjustment clamped to -1 second\n"); 117 } 118 } else { 119 if (tv1.tv_sec == laststep.tv_sec) { 120 splx(s); 121 return (EPERM); 122 } 123 if (delta.tv_sec > 1) { 124 tv->tv_sec = tv1.tv_sec + 1; 125 printf("Time adjustment clamped to +1 second\n"); 126 } 127 laststep = *tv; 128 } 129 } 130 131 ts.tv_sec = tv->tv_sec; 132 ts.tv_nsec = tv->tv_usec * 1000; 133 mtx_lock(&Giant); 134 tc_setclock(&ts); 135 (void) splsoftclock(); 136 lease_updatetime(delta.tv_sec); 137 splx(s); 138 resettodr(); 139 mtx_unlock(&Giant); 140 return (0); 141} 142 143#ifndef _SYS_SYSPROTO_H_ 144struct clock_gettime_args { 145 clockid_t clock_id; 146 struct timespec *tp; 147}; 148#endif 149 150/* 151 * MPSAFE 152 */ 153/* ARGSUSED */ 154int 155clock_gettime(struct thread *td, struct clock_gettime_args *uap) 156{ 157 struct timespec ats; 158 struct timeval sys, user; 159 struct proc *p; 160 161 p = td->td_proc; 162 switch (uap->clock_id) { 163 case CLOCK_REALTIME: 164 nanotime(&ats); 165 break; 166 case CLOCK_VIRTUAL: 167 PROC_LOCK(p); 168 calcru(p, &user, &sys); 169 PROC_UNLOCK(p); 170 TIMEVAL_TO_TIMESPEC(&user, &ats); 171 break; 172 case CLOCK_PROF: 173 PROC_LOCK(p); 174 calcru(p, &user, &sys); 175 PROC_UNLOCK(p); 176 timevaladd(&user, &sys); 177 TIMEVAL_TO_TIMESPEC(&user, &ats); 178 break; 179 case CLOCK_MONOTONIC: 180 nanouptime(&ats); 181 break; 182 default: 183 return (EINVAL); 184 } 185 return (copyout(&ats, uap->tp, sizeof(ats))); 186} 187 188#ifndef _SYS_SYSPROTO_H_ 189struct clock_settime_args { 190 clockid_t clock_id; 191 const struct timespec *tp; 192}; 193#endif 194 195/* 196 * MPSAFE 197 */ 198/* ARGSUSED */ 199int 200clock_settime(struct thread *td, struct clock_settime_args *uap) 201{ 202 struct timeval atv; 203 struct timespec ats; 204 int error; 205 206#ifdef MAC 207 error = mac_check_system_settime(td->td_ucred); 208 if (error) 209 return (error); 210#endif 211 if ((error = suser(td)) != 0) 212 return (error); 213 if (uap->clock_id != CLOCK_REALTIME) 214 return (EINVAL); 215 if ((error = copyin(uap->tp, &ats, sizeof(ats))) != 0) 216 return (error); 217 if (ats.tv_nsec < 0 || ats.tv_nsec >= 1000000000) 218 return (EINVAL); 219 /* XXX Don't convert nsec->usec and back */ 220 TIMESPEC_TO_TIMEVAL(&atv, &ats); 221 error = settime(td, &atv); 222 return (error); 223} 224 225#ifndef _SYS_SYSPROTO_H_ 226struct clock_getres_args { 227 clockid_t clock_id; 228 struct timespec *tp; 229}; 230#endif 231 232int 233clock_getres(struct thread *td, struct clock_getres_args *uap) 234{ 235 struct timespec ts; 236 237 ts.tv_sec = 0; 238 switch (uap->clock_id) { 239 case CLOCK_REALTIME: 240 case CLOCK_MONOTONIC: 241 /* 242 * Round up the result of the division cheaply by adding 1. 243 * Rounding up is especially important if rounding down 244 * would give 0. Perfect rounding is unimportant. 245 */ 246 ts.tv_nsec = 1000000000 / tc_getfrequency() + 1; 247 break; 248 case CLOCK_VIRTUAL: 249 case CLOCK_PROF: 250 /* Accurately round up here because we can do so cheaply. */ 251 ts.tv_nsec = (1000000000 + hz - 1) / hz; 252 break; 253 default: 254 return (EINVAL); 255 } 256 if (uap->tp == NULL) 257 return (0); 258 return (copyout(&ts, uap->tp, sizeof(ts))); 259} 260 261static int nanowait; 262 263int 264kern_nanosleep(struct thread *td, struct timespec *rqt, struct timespec *rmt) 265{ 266 struct timespec ts, ts2, ts3; 267 struct timeval tv; 268 int error; 269 270 if (rqt->tv_nsec < 0 || rqt->tv_nsec >= 1000000000) 271 return (EINVAL); 272 if (rqt->tv_sec < 0 || (rqt->tv_sec == 0 && rqt->tv_nsec == 0)) 273 return (0); 274 getnanouptime(&ts); 275 timespecadd(&ts, rqt); 276 TIMESPEC_TO_TIMEVAL(&tv, rqt); 277 for (;;) { 278 error = tsleep(&nanowait, PWAIT | PCATCH, "nanslp", 279 tvtohz(&tv)); 280 getnanouptime(&ts2); 281 if (error != EWOULDBLOCK) { 282 if (error == ERESTART) 283 error = EINTR; 284 if (rmt != NULL) { 285 timespecsub(&ts, &ts2); 286 if (ts.tv_sec < 0) 287 timespecclear(&ts); 288 *rmt = ts; 289 } 290 return (error); 291 } 292 if (timespeccmp(&ts2, &ts, >=)) 293 return (0); 294 ts3 = ts; 295 timespecsub(&ts3, &ts2); 296 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 297 } 298} 299 300#ifndef _SYS_SYSPROTO_H_ 301struct nanosleep_args { 302 struct timespec *rqtp; 303 struct timespec *rmtp; 304}; 305#endif 306 307/* 308 * MPSAFE 309 */ 310/* ARGSUSED */ 311int 312nanosleep(struct thread *td, struct nanosleep_args *uap) 313{ 314 struct timespec rmt, rqt; 315 int error; 316 317 error = copyin(uap->rqtp, &rqt, sizeof(rqt)); 318 if (error) 319 return (error); 320 321 if (uap->rmtp && 322 !useracc((caddr_t)uap->rmtp, sizeof(rmt), VM_PROT_WRITE)) 323 return (EFAULT); 324 error = kern_nanosleep(td, &rqt, &rmt); 325 if (error && uap->rmtp) { 326 int error2; 327 328 error2 = copyout(&rmt, uap->rmtp, sizeof(rmt)); 329 if (error2) 330 error = error2; 331 } 332 return (error); 333} 334 335#ifndef _SYS_SYSPROTO_H_ 336struct gettimeofday_args { 337 struct timeval *tp; 338 struct timezone *tzp; 339}; 340#endif 341/* 342 * MPSAFE 343 */ 344/* ARGSUSED */ 345int 346gettimeofday(struct thread *td, struct gettimeofday_args *uap) 347{ 348 struct timeval atv; 349 struct timezone rtz; 350 int error = 0; 351 352 if (uap->tp) { 353 microtime(&atv); 354 error = copyout(&atv, uap->tp, sizeof (atv)); 355 } 356 if (error == 0 && uap->tzp != NULL) { 357 rtz.tz_minuteswest = tz_minuteswest; 358 rtz.tz_dsttime = tz_dsttime; 359 error = copyout(&rtz, uap->tzp, sizeof (rtz)); 360 } 361 return (error); 362} 363 364#ifndef _SYS_SYSPROTO_H_ 365struct settimeofday_args { 366 struct timeval *tv; 367 struct timezone *tzp; 368}; 369#endif 370/* 371 * MPSAFE 372 */ 373/* ARGSUSED */ 374int 375settimeofday(struct thread *td, struct settimeofday_args *uap) 376{ 377 struct timeval atv; 378 struct timezone atz; 379 int error = 0; 380 381#ifdef MAC 382 error = mac_check_system_settime(td->td_ucred); 383 if (error) 384 return (error); 385#endif 386 if ((error = suser(td))) 387 return (error); 388 /* Verify all parameters before changing time. */ 389 if (uap->tv) { 390 if ((error = copyin(uap->tv, &atv, sizeof(atv)))) 391 return (error); 392 if (atv.tv_usec < 0 || atv.tv_usec >= 1000000) 393 return (EINVAL); 394 } 395 if (uap->tzp && 396 (error = copyin(uap->tzp, &atz, sizeof(atz)))) 397 return (error); 398 399 if (uap->tv && (error = settime(td, &atv))) 400 return (error); 401 if (uap->tzp) { 402 tz_minuteswest = atz.tz_minuteswest; 403 tz_dsttime = atz.tz_dsttime; 404 } 405 return (error); 406} 407/* 408 * Get value of an interval timer. The process virtual and 409 * profiling virtual time timers are kept in the p_stats area, since 410 * they can be swapped out. These are kept internally in the 411 * way they are specified externally: in time until they expire. 412 * 413 * The real time interval timer is kept in the process table slot 414 * for the process, and its value (it_value) is kept as an 415 * absolute time rather than as a delta, so that it is easy to keep 416 * periodic real-time signals from drifting. 417 * 418 * Virtual time timers are processed in the hardclock() routine of 419 * kern_clock.c. The real time timer is processed by a timeout 420 * routine, called from the softclock() routine. Since a callout 421 * may be delayed in real time due to interrupt processing in the system, 422 * it is possible for the real time timeout routine (realitexpire, given below), 423 * to be delayed in real time past when it is supposed to occur. It 424 * does not suffice, therefore, to reload the real timer .it_value from the 425 * real time timers .it_interval. Rather, we compute the next time in 426 * absolute time the timer should go off. 427 */ 428#ifndef _SYS_SYSPROTO_H_ 429struct getitimer_args { 430 u_int which; 431 struct itimerval *itv; 432}; 433#endif 434/* 435 * MPSAFE 436 */ 437int 438getitimer(struct thread *td, struct getitimer_args *uap) 439{ 440 struct proc *p = td->td_proc; 441 struct timeval ctv; 442 struct itimerval aitv; 443 444 if (uap->which > ITIMER_PROF) 445 return (EINVAL); 446 447 if (uap->which == ITIMER_REAL) { 448 /* 449 * Convert from absolute to relative time in .it_value 450 * part of real time timer. If time for real time timer 451 * has passed return 0, else return difference between 452 * current time and time for the timer to go off. 453 */ 454 PROC_LOCK(p); 455 aitv = p->p_realtimer; 456 PROC_UNLOCK(p); 457 if (timevalisset(&aitv.it_value)) { 458 getmicrouptime(&ctv); 459 if (timevalcmp(&aitv.it_value, &ctv, <)) 460 timevalclear(&aitv.it_value); 461 else 462 timevalsub(&aitv.it_value, &ctv); 463 } 464 } else { 465 mtx_lock_spin(&sched_lock); 466 aitv = p->p_stats->p_timer[uap->which]; 467 mtx_unlock_spin(&sched_lock); 468 } 469 return (copyout(&aitv, uap->itv, sizeof (struct itimerval))); 470} 471 472#ifndef _SYS_SYSPROTO_H_ 473struct setitimer_args { 474 u_int which; 475 struct itimerval *itv, *oitv; 476}; 477#endif 478/* 479 * MPSAFE 480 */ 481int 482setitimer(struct thread *td, struct setitimer_args *uap) 483{ 484 struct proc *p = td->td_proc; 485 struct itimerval aitv, oitv; 486 struct timeval ctv; 487 int error; 488 489 if (uap->itv == NULL) { 490 uap->itv = uap->oitv; 491 return (getitimer(td, (struct getitimer_args *)uap)); 492 } 493 494 if (uap->which > ITIMER_PROF) 495 return (EINVAL); 496 if ((error = copyin(uap->itv, &aitv, sizeof(struct itimerval)))) 497 return (error); 498 if (itimerfix(&aitv.it_value)) 499 return (EINVAL); 500 if (!timevalisset(&aitv.it_value)) 501 timevalclear(&aitv.it_interval); 502 else if (itimerfix(&aitv.it_interval)) 503 return (EINVAL); 504 505 if (uap->which == ITIMER_REAL) { 506 PROC_LOCK(p); 507 if (timevalisset(&p->p_realtimer.it_value)) 508 callout_stop(&p->p_itcallout); 509 getmicrouptime(&ctv); 510 if (timevalisset(&aitv.it_value)) { 511 callout_reset(&p->p_itcallout, tvtohz(&aitv.it_value), 512 realitexpire, p); 513 timevaladd(&aitv.it_value, &ctv); 514 } 515 oitv = p->p_realtimer; 516 p->p_realtimer = aitv; 517 PROC_UNLOCK(p); 518 if (timevalisset(&oitv.it_value)) { 519 if (timevalcmp(&oitv.it_value, &ctv, <)) 520 timevalclear(&oitv.it_value); 521 else 522 timevalsub(&oitv.it_value, &ctv); 523 } 524 } else { 525 mtx_lock_spin(&sched_lock); 526 oitv = p->p_stats->p_timer[uap->which]; 527 p->p_stats->p_timer[uap->which] = aitv; 528 mtx_unlock_spin(&sched_lock); 529 } 530 if (uap->oitv == NULL) 531 return (0); 532 return (copyout(&oitv, uap->oitv, sizeof(struct itimerval))); 533} 534 535/* 536 * Real interval timer expired: 537 * send process whose timer expired an alarm signal. 538 * If time is not set up to reload, then just return. 539 * Else compute next time timer should go off which is > current time. 540 * This is where delay in processing this timeout causes multiple 541 * SIGALRM calls to be compressed into one. 542 * tvtohz() always adds 1 to allow for the time until the next clock 543 * interrupt being strictly less than 1 clock tick, but we don't want 544 * that here since we want to appear to be in sync with the clock 545 * interrupt even when we're delayed. 546 */ 547void 548realitexpire(void *arg) 549{ 550 struct proc *p; 551 struct timeval ctv, ntv; 552 553 p = (struct proc *)arg; 554 PROC_LOCK(p); 555 psignal(p, SIGALRM); 556 if (!timevalisset(&p->p_realtimer.it_interval)) { 557 timevalclear(&p->p_realtimer.it_value); 558 if (p->p_flag & P_WEXIT) 559 wakeup(&p->p_itcallout); 560 PROC_UNLOCK(p); 561 return; 562 } 563 for (;;) { 564 timevaladd(&p->p_realtimer.it_value, 565 &p->p_realtimer.it_interval); 566 getmicrouptime(&ctv); 567 if (timevalcmp(&p->p_realtimer.it_value, &ctv, >)) { 568 ntv = p->p_realtimer.it_value; 569 timevalsub(&ntv, &ctv); 570 callout_reset(&p->p_itcallout, tvtohz(&ntv) - 1, 571 realitexpire, p); 572 PROC_UNLOCK(p); 573 return; 574 } 575 } 576 /*NOTREACHED*/ 577} 578 579/* 580 * Check that a proposed value to load into the .it_value or 581 * .it_interval part of an interval timer is acceptable, and 582 * fix it to have at least minimal value (i.e. if it is less 583 * than the resolution of the clock, round it up.) 584 */ 585int 586itimerfix(struct timeval *tv) 587{ 588 589 if (tv->tv_sec < 0 || tv->tv_sec > 100000000 || 590 tv->tv_usec < 0 || tv->tv_usec >= 1000000) 591 return (EINVAL); 592 if (tv->tv_sec == 0 && tv->tv_usec != 0 && tv->tv_usec < tick) 593 tv->tv_usec = tick; 594 return (0); 595} 596 597/* 598 * Decrement an interval timer by a specified number 599 * of microseconds, which must be less than a second, 600 * i.e. < 1000000. If the timer expires, then reload 601 * it. In this case, carry over (usec - old value) to 602 * reduce the value reloaded into the timer so that 603 * the timer does not drift. This routine assumes 604 * that it is called in a context where the timers 605 * on which it is operating cannot change in value. 606 */ 607int 608itimerdecr(struct itimerval *itp, int usec) 609{ 610 611 if (itp->it_value.tv_usec < usec) { 612 if (itp->it_value.tv_sec == 0) { 613 /* expired, and already in next interval */ 614 usec -= itp->it_value.tv_usec; 615 goto expire; 616 } 617 itp->it_value.tv_usec += 1000000; 618 itp->it_value.tv_sec--; 619 } 620 itp->it_value.tv_usec -= usec; 621 usec = 0; 622 if (timevalisset(&itp->it_value)) 623 return (1); 624 /* expired, exactly at end of interval */ 625expire: 626 if (timevalisset(&itp->it_interval)) { 627 itp->it_value = itp->it_interval; 628 itp->it_value.tv_usec -= usec; 629 if (itp->it_value.tv_usec < 0) { 630 itp->it_value.tv_usec += 1000000; 631 itp->it_value.tv_sec--; 632 } 633 } else 634 itp->it_value.tv_usec = 0; /* sec is already 0 */ 635 return (0); 636} 637 638/* 639 * Add and subtract routines for timevals. 640 * N.B.: subtract routine doesn't deal with 641 * results which are before the beginning, 642 * it just gets very confused in this case. 643 * Caveat emptor. 644 */ 645void 646timevaladd(struct timeval *t1, const struct timeval *t2) 647{ 648 649 t1->tv_sec += t2->tv_sec; 650 t1->tv_usec += t2->tv_usec; 651 timevalfix(t1); 652} 653 654void 655timevalsub(struct timeval *t1, const struct timeval *t2) 656{ 657 658 t1->tv_sec -= t2->tv_sec; 659 t1->tv_usec -= t2->tv_usec; 660 timevalfix(t1); 661} 662 663static void 664timevalfix(struct timeval *t1) 665{ 666 667 if (t1->tv_usec < 0) { 668 t1->tv_sec--; 669 t1->tv_usec += 1000000; 670 } 671 if (t1->tv_usec >= 1000000) { 672 t1->tv_sec++; 673 t1->tv_usec -= 1000000; 674 } 675} 676 677/* 678 * ratecheck(): simple time-based rate-limit checking. 679 */ 680int 681ratecheck(struct timeval *lasttime, const struct timeval *mininterval) 682{ 683 struct timeval tv, delta; 684 int rv = 0; 685 686 getmicrouptime(&tv); /* NB: 10ms precision */ 687 delta = tv; 688 timevalsub(&delta, lasttime); 689 690 /* 691 * check for 0,0 is so that the message will be seen at least once, 692 * even if interval is huge. 693 */ 694 if (timevalcmp(&delta, mininterval, >=) || 695 (lasttime->tv_sec == 0 && lasttime->tv_usec == 0)) { 696 *lasttime = tv; 697 rv = 1; 698 } 699 700 return (rv); 701} 702 703/* 704 * ppsratecheck(): packets (or events) per second limitation. 705 * 706 * Return 0 if the limit is to be enforced (e.g. the caller 707 * should drop a packet because of the rate limitation). 708 * 709 * maxpps of 0 always causes zero to be returned. maxpps of -1 710 * always causes 1 to be returned; this effectively defeats rate 711 * limiting. 712 * 713 * Note that we maintain the struct timeval for compatibility 714 * with other bsd systems. We reuse the storage and just monitor 715 * clock ticks for minimal overhead. 716 */ 717int 718ppsratecheck(struct timeval *lasttime, int *curpps, int maxpps) 719{ 720 int now; 721 722 /* 723 * Reset the last time and counter if this is the first call 724 * or more than a second has passed since the last update of 725 * lasttime. 726 */ 727 now = ticks; 728 if (lasttime->tv_sec == 0 || (u_int)(now - lasttime->tv_sec) >= hz) { 729 lasttime->tv_sec = now; 730 *curpps = 1; 731 return (maxpps != 0); 732 } else { 733 (*curpps)++; /* NB: ignore potential overflow */ 734 return (maxpps < 0 || *curpps < maxpps); 735 } 736} 737