kern_acct.c revision 162370
1/*-
2 * Copyright (c) 1982, 1986, 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Copyright (c) 1994 Christopher G. Demetriou
11 * Copyright (c) 2005 Robert N. M. Watson
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 *    must display the following acknowledgement:
23 *	This product includes software developed by the University of
24 *	California, Berkeley and its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 *    may be used to endorse or promote products derived from this software
27 *    without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 *	@(#)kern_acct.c	8.1 (Berkeley) 6/14/93
42 */
43
44#include <sys/cdefs.h>
45__FBSDID("$FreeBSD: head/sys/kern/kern_acct.c 162370 2006-09-17 11:00:36Z rwatson $");
46
47#include "opt_mac.h"
48
49#include <sys/param.h>
50#include <sys/systm.h>
51#include <sys/acct.h>
52#include <sys/fcntl.h>
53#include <sys/kernel.h>
54#include <sys/kthread.h>
55#include <sys/lock.h>
56#include <sys/mac.h>
57#include <sys/mount.h>
58#include <sys/mutex.h>
59#include <sys/namei.h>
60#include <sys/proc.h>
61#include <sys/resourcevar.h>
62#include <sys/sched.h>
63#include <sys/sx.h>
64#include <sys/sysctl.h>
65#include <sys/sysent.h>
66#include <sys/syslog.h>
67#include <sys/sysproto.h>
68#include <sys/tty.h>
69#include <sys/vnode.h>
70
71/*
72 * The routines implemented in this file are described in:
73 *      Leffler, et al.: The Design and Implementation of the 4.3BSD
74 *	    UNIX Operating System (Addison Welley, 1989)
75 * on pages 62-63.
76 *
77 * Arguably, to simplify accounting operations, this mechanism should
78 * be replaced by one in which an accounting log file (similar to /dev/klog)
79 * is read by a user process, etc.  However, that has its own problems.
80 */
81
82/*
83 * Internal accounting functions.
84 * The former's operation is described in Leffler, et al., and the latter
85 * was provided by UCB with the 4.4BSD-Lite release
86 */
87static comp_t	encode_comp_t(u_long, u_long);
88static void	acctwatch(void);
89static void	acct_thread(void *);
90static int	acct_disable(struct thread *);
91
92/*
93 * Accounting vnode pointer, saved vnode pointer, and flags for each.
94 * acct_sx protects against changes to the active vnode and credentials
95 * while accounting records are being committed to disk.
96 */
97static int		 acct_configured;
98static int		 acct_suspended;
99static struct vnode	*acct_vp;
100static struct ucred	*acct_cred;
101static int		 acct_flags;
102static struct sx	 acct_sx;
103
104SX_SYSINIT(acct, &acct_sx, "acct_sx");
105
106/*
107 * State of the accounting kthread.
108 */
109static int		 acct_state;
110
111#define	ACCT_RUNNING	1	/* Accounting kthread is running. */
112#define	ACCT_EXITREQ	2	/* Accounting kthread should exit. */
113
114/*
115 * Values associated with enabling and disabling accounting
116 */
117static int acctsuspend = 2;	/* stop accounting when < 2% free space left */
118SYSCTL_INT(_kern, OID_AUTO, acct_suspend, CTLFLAG_RW,
119	&acctsuspend, 0, "percentage of free disk space below which accounting stops");
120
121static int acctresume = 4;	/* resume when free space risen to > 4% */
122SYSCTL_INT(_kern, OID_AUTO, acct_resume, CTLFLAG_RW,
123	&acctresume, 0, "percentage of free disk space above which accounting resumes");
124
125static int acctchkfreq = 15;	/* frequency (in seconds) to check space */
126
127static int
128sysctl_acct_chkfreq(SYSCTL_HANDLER_ARGS)
129{
130	int error, value;
131
132	/* Write out the old value. */
133	error = SYSCTL_OUT(req, &acctchkfreq, sizeof(int));
134	if (error || req->newptr == NULL)
135		return (error);
136
137	/* Read in and verify the new value. */
138	error = SYSCTL_IN(req, &value, sizeof(int));
139	if (error)
140		return (error);
141	if (value <= 0)
142		return (EINVAL);
143	acctchkfreq = value;
144	return (0);
145}
146SYSCTL_PROC(_kern, OID_AUTO, acct_chkfreq, CTLTYPE_INT|CTLFLAG_RW,
147    &acctchkfreq, 0, sysctl_acct_chkfreq, "I",
148    "frequency for checking the free space");
149
150SYSCTL_INT(_kern, OID_AUTO, acct_configured, CTLFLAG_RD, &acct_configured, 0,
151	"Accounting configured or not");
152
153SYSCTL_INT(_kern, OID_AUTO, acct_suspended, CTLFLAG_RD, &acct_suspended, 0,
154	"Accounting suspended or not");
155
156/*
157 * Accounting system call.  Written based on the specification and
158 * previous implementation done by Mark Tinguely.
159 *
160 * MPSAFE
161 */
162int
163acct(struct thread *td, struct acct_args *uap)
164{
165	struct nameidata nd;
166	int error, flags, vfslocked;
167
168	/* Make sure that the caller is root. */
169	error = suser(td);
170	if (error)
171		return (error);
172
173	/*
174	 * If accounting is to be started to a file, open that file for
175	 * appending and make sure it's a 'normal'.
176	 */
177	if (uap->path != NULL) {
178		NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1,
179		    UIO_USERSPACE, uap->path, td);
180		flags = FWRITE | O_APPEND;
181		error = vn_open(&nd, &flags, 0, -1);
182		if (error)
183			return (error);
184		vfslocked = NDHASGIANT(&nd);
185		NDFREE(&nd, NDF_ONLY_PNBUF);
186#ifdef MAC
187		error = mac_check_system_acct(td->td_ucred, nd.ni_vp);
188		if (error) {
189			VOP_UNLOCK(nd.ni_vp, 0, td);
190			vn_close(nd.ni_vp, flags, td->td_ucred, td);
191			VFS_UNLOCK_GIANT(vfslocked);
192			return (error);
193		}
194#endif
195		VOP_UNLOCK(nd.ni_vp, 0, td);
196		if (nd.ni_vp->v_type != VREG) {
197			vn_close(nd.ni_vp, flags, td->td_ucred, td);
198			VFS_UNLOCK_GIANT(vfslocked);
199			return (EACCES);
200		}
201		VFS_UNLOCK_GIANT(vfslocked);
202#ifdef MAC
203	} else {
204		error = mac_check_system_acct(td->td_ucred, NULL);
205		if (error)
206			return (error);
207#endif
208	}
209
210	/*
211	 * Disallow concurrent access to the accounting vnode while we swap
212	 * it out, in order to prevent access after close.
213	 */
214	sx_xlock(&acct_sx);
215
216	/*
217	 * If accounting was previously enabled, kill the old space-watcher,
218	 * close the file, and (if no new file was specified, leave).  Reset
219	 * the suspended state regardless of whether accounting remains
220	 * enabled.
221	 */
222	acct_suspended = 0;
223	if (acct_vp != NULL) {
224		vfslocked = VFS_LOCK_GIANT(acct_vp->v_mount);
225		error = acct_disable(td);
226		VFS_UNLOCK_GIANT(vfslocked);
227	}
228	if (uap->path == NULL) {
229		if (acct_state & ACCT_RUNNING) {
230			acct_state |= ACCT_EXITREQ;
231			wakeup(&acct_state);
232		}
233		sx_xunlock(&acct_sx);
234		return (error);
235	}
236
237	/*
238	 * Save the new accounting file vnode, and schedule the new
239	 * free space watcher.
240	 */
241	acct_vp = nd.ni_vp;
242	acct_cred = crhold(td->td_ucred);
243	acct_flags = flags;
244	if (acct_state & ACCT_RUNNING)
245		acct_state &= ~ACCT_EXITREQ;
246	else {
247		/*
248		 * Try to start up an accounting kthread.  We may start more
249		 * than one, but if so the extras will commit suicide as
250		 * soon as they start up.
251		 */
252		error = kthread_create(acct_thread, NULL, NULL, 0, 0,
253		    "accounting");
254		if (error) {
255			vfslocked = VFS_LOCK_GIANT(acct_vp->v_mount);
256			(void) vn_close(acct_vp, acct_flags, acct_cred, td);
257			VFS_UNLOCK_GIANT(vfslocked);
258			crfree(acct_cred);
259			acct_configured = 0;
260			acct_vp = NULL;
261			acct_cred = NULL;
262			acct_flags = 0;
263			sx_xunlock(&acct_sx);
264			log(LOG_NOTICE, "Unable to start accounting thread\n");
265			return (error);
266		}
267	}
268	acct_configured = 1;
269	sx_xunlock(&acct_sx);
270	log(LOG_NOTICE, "Accounting enabled\n");
271	return (error);
272}
273
274/*
275 * Disable currently in-progress accounting by closing the vnode, dropping
276 * our reference to the credential, and clearing the vnode's flags.
277 */
278static int
279acct_disable(struct thread *td)
280{
281	int error;
282
283	sx_assert(&acct_sx, SX_XLOCKED);
284	error = vn_close(acct_vp, acct_flags, acct_cred, td);
285	crfree(acct_cred);
286	acct_configured = 0;
287	acct_vp = NULL;
288	acct_cred = NULL;
289	acct_flags = 0;
290	log(LOG_NOTICE, "Accounting disabled\n");
291	return (error);
292}
293
294/*
295 * Write out process accounting information, on process exit.
296 * Data to be written out is specified in Leffler, et al.
297 * and are enumerated below.  (They're also noted in the system
298 * "acct.h" header file.)
299 */
300int
301acct_process(struct thread *td)
302{
303	struct acct acct;
304	struct timeval ut, st, tmp;
305	struct plimit *newlim, *oldlim;
306	struct proc *p;
307	struct rusage *r;
308	int t, ret, vfslocked;
309
310	/*
311	 * Lockless check of accounting condition before doing the hard
312	 * work.
313	 */
314	if (acct_vp == NULL || acct_suspended)
315		return (0);
316
317	sx_slock(&acct_sx);
318
319	/*
320	 * If accounting isn't enabled, don't bother.  Have to check again
321	 * once we own the lock in case we raced with disabling of accounting
322	 * by another thread.
323	 */
324	if (acct_vp == NULL || acct_suspended) {
325		sx_sunlock(&acct_sx);
326		return (0);
327	}
328
329	p = td->td_proc;
330
331	/*
332	 * Get process accounting information.
333	 */
334
335	PROC_LOCK(p);
336	/* (1) The name of the command that ran */
337	bcopy(p->p_comm, acct.ac_comm, sizeof acct.ac_comm);
338
339	/* (2) The amount of user and system time that was used */
340	calcru(p, &ut, &st);
341	acct.ac_utime = encode_comp_t(ut.tv_sec, ut.tv_usec);
342	acct.ac_stime = encode_comp_t(st.tv_sec, st.tv_usec);
343
344	/* (3) The elapsed time the command ran (and its starting time) */
345	tmp = boottime;
346	timevaladd(&tmp, &p->p_stats->p_start);
347	acct.ac_btime = tmp.tv_sec;
348	microuptime(&tmp);
349	timevalsub(&tmp, &p->p_stats->p_start);
350	acct.ac_etime = encode_comp_t(tmp.tv_sec, tmp.tv_usec);
351
352	/* (4) The average amount of memory used */
353	r = &p->p_stats->p_ru;
354	tmp = ut;
355	timevaladd(&tmp, &st);
356	t = tmp.tv_sec * hz + tmp.tv_usec / tick;
357	if (t)
358		acct.ac_mem = (r->ru_ixrss + r->ru_idrss + r->ru_isrss) / t;
359	else
360		acct.ac_mem = 0;
361
362	/* (5) The number of disk I/O operations done */
363	acct.ac_io = encode_comp_t(r->ru_inblock + r->ru_oublock, 0);
364
365	/* (6) The UID and GID of the process */
366	acct.ac_uid = p->p_ucred->cr_ruid;
367	acct.ac_gid = p->p_ucred->cr_rgid;
368
369	/* (7) The terminal from which the process was started */
370	SESS_LOCK(p->p_session);
371	if ((p->p_flag & P_CONTROLT) && p->p_pgrp->pg_session->s_ttyp)
372		acct.ac_tty = dev2udev(p->p_pgrp->pg_session->s_ttyp->t_dev);
373	else
374		acct.ac_tty = NODEV;
375	SESS_UNLOCK(p->p_session);
376
377	/* (8) The boolean flags that tell how the process terminated, etc. */
378	acct.ac_flag = p->p_acflag;
379	PROC_UNLOCK(p);
380
381	/*
382	 * Eliminate any file size rlimit.
383	 */
384	newlim = lim_alloc();
385	PROC_LOCK(p);
386	oldlim = p->p_limit;
387	lim_copy(newlim, oldlim);
388	newlim->pl_rlimit[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
389	p->p_limit = newlim;
390	PROC_UNLOCK(p);
391	lim_free(oldlim);
392
393	/*
394	 * Write the accounting information to the file.
395	 */
396	vfslocked = VFS_LOCK_GIANT(acct_vp->v_mount);
397	VOP_LEASE(acct_vp, td, acct_cred, LEASE_WRITE);
398	ret = vn_rdwr(UIO_WRITE, acct_vp, (caddr_t)&acct, sizeof (acct),
399	    (off_t)0, UIO_SYSSPACE, IO_APPEND|IO_UNIT, acct_cred, NOCRED,
400	    (int *)0, td);
401	VFS_UNLOCK_GIANT(vfslocked);
402	sx_sunlock(&acct_sx);
403	return (ret);
404}
405
406/*
407 * Encode_comp_t converts from ticks in seconds and microseconds
408 * to ticks in 1/AHZ seconds.  The encoding is described in
409 * Leffler, et al., on page 63.
410 */
411
412#define	MANTSIZE	13			/* 13 bit mantissa. */
413#define	EXPSIZE		3			/* Base 8 (3 bit) exponent. */
414#define	MAXFRACT	((1 << MANTSIZE) - 1)	/* Maximum fractional value. */
415
416static comp_t
417encode_comp_t(u_long s, u_long us)
418{
419	int exp, rnd;
420
421	exp = 0;
422	rnd = 0;
423	s *= AHZ;
424	s += us / (1000000 / AHZ);	/* Maximize precision. */
425
426	while (s > MAXFRACT) {
427	rnd = s & (1 << (EXPSIZE - 1));	/* Round up? */
428		s >>= EXPSIZE;		/* Base 8 exponent == 3 bit shift. */
429		exp++;
430	}
431
432	/* If we need to round up, do it (and handle overflow correctly). */
433	if (rnd && (++s > MAXFRACT)) {
434		s >>= EXPSIZE;
435		exp++;
436	}
437
438	/* Clean it up and polish it off. */
439	exp <<= MANTSIZE;		/* Shift the exponent into place */
440	exp += s;			/* and add on the mantissa. */
441	return (exp);
442}
443
444/*
445 * Periodically check the filesystem to see if accounting
446 * should be turned on or off.  Beware the case where the vnode
447 * has been vgone()'d out from underneath us, e.g. when the file
448 * system containing the accounting file has been forcibly unmounted.
449 */
450/* ARGSUSED */
451static void
452acctwatch(void)
453{
454	struct statfs sb;
455	int vfslocked;
456
457	sx_assert(&acct_sx, SX_XLOCKED);
458
459	/*
460	 * If accounting was disabled before our kthread was scheduled,
461	 * then acct_vp might be NULL.  If so, just ask our kthread to
462	 * exit and return.
463	 */
464	if (acct_vp == NULL) {
465		acct_state |= ACCT_EXITREQ;
466		return;
467	}
468
469	/*
470	 * If our vnode is no longer valid, tear it down and signal the
471	 * accounting thread to die.
472	 */
473	vfslocked = VFS_LOCK_GIANT(acct_vp->v_mount);
474	if (acct_vp->v_type == VBAD) {
475		(void) acct_disable(NULL);
476		VFS_UNLOCK_GIANT(vfslocked);
477		acct_state |= ACCT_EXITREQ;
478		return;
479	}
480
481	/*
482	 * Stopping here is better than continuing, maybe it will be VBAD
483	 * next time around.
484	 */
485	if (VFS_STATFS(acct_vp->v_mount, &sb, curthread) < 0) {
486		VFS_UNLOCK_GIANT(vfslocked);
487		return;
488	}
489	VFS_UNLOCK_GIANT(vfslocked);
490	if (acct_suspended) {
491		if (sb.f_bavail > (int64_t)(acctresume * sb.f_blocks /
492		    100)) {
493			acct_suspended = 0;
494			log(LOG_NOTICE, "Accounting resumed\n");
495		}
496	} else {
497		if (sb.f_bavail <= (int64_t)(acctsuspend * sb.f_blocks /
498		    100)) {
499			acct_suspended = 1;
500			log(LOG_NOTICE, "Accounting suspended\n");
501		}
502	}
503}
504
505/*
506 * The main loop for the dedicated kernel thread that periodically calls
507 * acctwatch().
508 */
509static void
510acct_thread(void *dummy)
511{
512	u_char pri;
513
514	/* This is a low-priority kernel thread. */
515	pri = PRI_MAX_KERN;
516	mtx_lock_spin(&sched_lock);
517	sched_prio(curthread, pri);
518	mtx_unlock_spin(&sched_lock);
519
520	/* If another accounting kthread is already running, just die. */
521	sx_xlock(&acct_sx);
522	if (acct_state & ACCT_RUNNING) {
523		sx_xunlock(&acct_sx);
524		kthread_exit(0);
525	}
526	acct_state |= ACCT_RUNNING;
527
528	/* Loop until we are asked to exit. */
529	while (!(acct_state & ACCT_EXITREQ)) {
530
531		/* Perform our periodic checks. */
532		acctwatch();
533
534		/*
535		 * We check this flag again before sleeping since the
536		 * acctwatch() might have shut down accounting and asked us
537		 * to exit.
538		 */
539		if (!(acct_state & ACCT_EXITREQ)) {
540			sx_xunlock(&acct_sx);
541			tsleep(&acct_state, pri, "-", acctchkfreq * hz);
542			sx_xlock(&acct_sx);
543		}
544	}
545
546	/*
547	 * Acknowledge the exit request and shutdown.  We clear both the
548	 * exit request and running flags.
549	 */
550	acct_state = 0;
551	sx_xunlock(&acct_sx);
552	kthread_exit(0);
553}
554