kern_rctl.c revision 284665
1/*-
2 * Copyright (c) 2010 The FreeBSD Foundation
3 * All rights reserved.
4 *
5 * This software was developed by Edward Tomasz Napierala under sponsorship
6 * from the FreeBSD Foundation.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * $FreeBSD: stable/10/sys/kern/kern_rctl.c 284665 2015-06-21 06:28:26Z trasz $
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: stable/10/sys/kern/kern_rctl.c 284665 2015-06-21 06:28:26Z trasz $");
34
35#include <sys/param.h>
36#include <sys/bus.h>
37#include <sys/malloc.h>
38#include <sys/queue.h>
39#include <sys/refcount.h>
40#include <sys/jail.h>
41#include <sys/kernel.h>
42#include <sys/limits.h>
43#include <sys/loginclass.h>
44#include <sys/priv.h>
45#include <sys/proc.h>
46#include <sys/racct.h>
47#include <sys/rctl.h>
48#include <sys/resourcevar.h>
49#include <sys/sx.h>
50#include <sys/sysent.h>
51#include <sys/sysproto.h>
52#include <sys/systm.h>
53#include <sys/types.h>
54#include <sys/eventhandler.h>
55#include <sys/lock.h>
56#include <sys/mutex.h>
57#include <sys/rwlock.h>
58#include <sys/sbuf.h>
59#include <sys/taskqueue.h>
60#include <sys/tree.h>
61#include <vm/uma.h>
62
63#ifdef RCTL
64#ifndef RACCT
65#error "The RCTL option requires the RACCT option"
66#endif
67
68FEATURE(rctl, "Resource Limits");
69
70#define	HRF_DEFAULT		0
71#define	HRF_DONT_INHERIT	1
72#define	HRF_DONT_ACCUMULATE	2
73
74/* Default buffer size for rctl_get_rules(2). */
75#define	RCTL_DEFAULT_BUFSIZE	4096
76#define	RCTL_MAX_INBUFLEN	4096
77#define	RCTL_LOG_BUFSIZE	128
78
79#define	RCTL_PCPU_SHIFT		(10 * 1000000)
80
81/*
82 * 'rctl_rule_link' connects a rule with every racct it's related to.
83 * For example, rule 'user:X:openfiles:deny=N/process' is linked
84 * with uidinfo for user X, and to each process of that user.
85 */
86struct rctl_rule_link {
87	LIST_ENTRY(rctl_rule_link)	rrl_next;
88	struct rctl_rule		*rrl_rule;
89	int				rrl_exceeded;
90};
91
92struct dict {
93	const char	*d_name;
94	int		d_value;
95};
96
97static struct dict subjectnames[] = {
98	{ "process", RCTL_SUBJECT_TYPE_PROCESS },
99	{ "user", RCTL_SUBJECT_TYPE_USER },
100	{ "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS },
101	{ "jail", RCTL_SUBJECT_TYPE_JAIL },
102	{ NULL, -1 }};
103
104static struct dict resourcenames[] = {
105	{ "cputime", RACCT_CPU },
106	{ "datasize", RACCT_DATA },
107	{ "stacksize", RACCT_STACK },
108	{ "coredumpsize", RACCT_CORE },
109	{ "memoryuse", RACCT_RSS },
110	{ "memorylocked", RACCT_MEMLOCK },
111	{ "maxproc", RACCT_NPROC },
112	{ "openfiles", RACCT_NOFILE },
113	{ "vmemoryuse", RACCT_VMEM },
114	{ "pseudoterminals", RACCT_NPTS },
115	{ "swapuse", RACCT_SWAP },
116	{ "nthr", RACCT_NTHR },
117	{ "msgqqueued", RACCT_MSGQQUEUED },
118	{ "msgqsize", RACCT_MSGQSIZE },
119	{ "nmsgq", RACCT_NMSGQ },
120	{ "nsem", RACCT_NSEM },
121	{ "nsemop", RACCT_NSEMOP },
122	{ "nshm", RACCT_NSHM },
123	{ "shmsize", RACCT_SHMSIZE },
124	{ "wallclock", RACCT_WALLCLOCK },
125	{ "pcpu", RACCT_PCTCPU },
126	{ NULL, -1 }};
127
128static struct dict actionnames[] = {
129	{ "sighup", RCTL_ACTION_SIGHUP },
130	{ "sigint", RCTL_ACTION_SIGINT },
131	{ "sigquit", RCTL_ACTION_SIGQUIT },
132	{ "sigill", RCTL_ACTION_SIGILL },
133	{ "sigtrap", RCTL_ACTION_SIGTRAP },
134	{ "sigabrt", RCTL_ACTION_SIGABRT },
135	{ "sigemt", RCTL_ACTION_SIGEMT },
136	{ "sigfpe", RCTL_ACTION_SIGFPE },
137	{ "sigkill", RCTL_ACTION_SIGKILL },
138	{ "sigbus", RCTL_ACTION_SIGBUS },
139	{ "sigsegv", RCTL_ACTION_SIGSEGV },
140	{ "sigsys", RCTL_ACTION_SIGSYS },
141	{ "sigpipe", RCTL_ACTION_SIGPIPE },
142	{ "sigalrm", RCTL_ACTION_SIGALRM },
143	{ "sigterm", RCTL_ACTION_SIGTERM },
144	{ "sigurg", RCTL_ACTION_SIGURG },
145	{ "sigstop", RCTL_ACTION_SIGSTOP },
146	{ "sigtstp", RCTL_ACTION_SIGTSTP },
147	{ "sigchld", RCTL_ACTION_SIGCHLD },
148	{ "sigttin", RCTL_ACTION_SIGTTIN },
149	{ "sigttou", RCTL_ACTION_SIGTTOU },
150	{ "sigio", RCTL_ACTION_SIGIO },
151	{ "sigxcpu", RCTL_ACTION_SIGXCPU },
152	{ "sigxfsz", RCTL_ACTION_SIGXFSZ },
153	{ "sigvtalrm", RCTL_ACTION_SIGVTALRM },
154	{ "sigprof", RCTL_ACTION_SIGPROF },
155	{ "sigwinch", RCTL_ACTION_SIGWINCH },
156	{ "siginfo", RCTL_ACTION_SIGINFO },
157	{ "sigusr1", RCTL_ACTION_SIGUSR1 },
158	{ "sigusr2", RCTL_ACTION_SIGUSR2 },
159	{ "sigthr", RCTL_ACTION_SIGTHR },
160	{ "deny", RCTL_ACTION_DENY },
161	{ "log", RCTL_ACTION_LOG },
162	{ "devctl", RCTL_ACTION_DEVCTL },
163	{ NULL, -1 }};
164
165static void rctl_init(void);
166SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL);
167
168static uma_zone_t rctl_rule_link_zone;
169static uma_zone_t rctl_rule_zone;
170static struct rwlock rctl_lock;
171RW_SYSINIT(rctl_lock, &rctl_lock, "RCTL lock");
172
173static int rctl_rule_fully_specified(const struct rctl_rule *rule);
174static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule);
175
176static MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits");
177
178static const char *
179rctl_subject_type_name(int subject)
180{
181	int i;
182
183	for (i = 0; subjectnames[i].d_name != NULL; i++) {
184		if (subjectnames[i].d_value == subject)
185			return (subjectnames[i].d_name);
186	}
187
188	panic("rctl_subject_type_name: unknown subject type %d", subject);
189}
190
191static const char *
192rctl_action_name(int action)
193{
194	int i;
195
196	for (i = 0; actionnames[i].d_name != NULL; i++) {
197		if (actionnames[i].d_value == action)
198			return (actionnames[i].d_name);
199	}
200
201	panic("rctl_action_name: unknown action %d", action);
202}
203
204const char *
205rctl_resource_name(int resource)
206{
207	int i;
208
209	for (i = 0; resourcenames[i].d_name != NULL; i++) {
210		if (resourcenames[i].d_value == resource)
211			return (resourcenames[i].d_name);
212	}
213
214	panic("rctl_resource_name: unknown resource %d", resource);
215}
216
217/*
218 * Return the amount of resource that can be allocated by 'p' before
219 * hitting 'rule'.
220 */
221static int64_t
222rctl_available_resource(const struct proc *p, const struct rctl_rule *rule)
223{
224	int resource;
225	int64_t available = INT64_MAX;
226	struct ucred *cred = p->p_ucred;
227
228	ASSERT_RACCT_ENABLED();
229	rw_assert(&rctl_lock, RA_LOCKED);
230
231	resource = rule->rr_resource;
232	switch (rule->rr_per) {
233	case RCTL_SUBJECT_TYPE_PROCESS:
234		available = rule->rr_amount -
235		    p->p_racct->r_resources[resource];
236		break;
237	case RCTL_SUBJECT_TYPE_USER:
238		available = rule->rr_amount -
239		    cred->cr_ruidinfo->ui_racct->r_resources[resource];
240		break;
241	case RCTL_SUBJECT_TYPE_LOGINCLASS:
242		available = rule->rr_amount -
243		    cred->cr_loginclass->lc_racct->r_resources[resource];
244		break;
245	case RCTL_SUBJECT_TYPE_JAIL:
246		available = rule->rr_amount -
247		    cred->cr_prison->pr_prison_racct->prr_racct->
248		        r_resources[resource];
249		break;
250	default:
251		panic("rctl_compute_available: unknown per %d",
252		    rule->rr_per);
253	}
254
255	return (available);
256}
257
258/*
259 * Return non-zero if allocating 'amount' by proc 'p' would exceed
260 * resource limit specified by 'rule'.
261 */
262static int
263rctl_would_exceed(const struct proc *p, const struct rctl_rule *rule,
264    int64_t amount)
265{
266	int64_t available;
267
268	ASSERT_RACCT_ENABLED();
269
270	rw_assert(&rctl_lock, RA_LOCKED);
271
272	available = rctl_available_resource(p, rule);
273	if (available >= amount)
274		return (0);
275
276	return (1);
277}
278
279/*
280 * Special version of rctl_available() function for the %cpu resource.
281 * We slightly cheat here and return less than we normally would.
282 */
283int64_t
284rctl_pcpu_available(const struct proc *p) {
285	struct rctl_rule *rule;
286	struct rctl_rule_link *link;
287	int64_t available, minavailable, limit;
288
289	ASSERT_RACCT_ENABLED();
290
291	minavailable = INT64_MAX;
292	limit = 0;
293
294	rw_rlock(&rctl_lock);
295
296	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
297		rule = link->rrl_rule;
298		if (rule->rr_resource != RACCT_PCTCPU)
299			continue;
300		if (rule->rr_action != RCTL_ACTION_DENY)
301			continue;
302		available = rctl_available_resource(p, rule);
303		if (available < minavailable) {
304			minavailable = available;
305			limit = rule->rr_amount;
306		}
307	}
308
309	rw_runlock(&rctl_lock);
310
311	/*
312	 * Return slightly less than actual value of the available
313	 * %cpu resource.  This makes %cpu throttling more agressive
314	 * and lets us act sooner than the limits are already exceeded.
315	 */
316	if (limit != 0) {
317		if (limit > 2 * RCTL_PCPU_SHIFT)
318			minavailable -= RCTL_PCPU_SHIFT;
319		else
320			minavailable -= (limit / 2);
321	}
322
323	return (minavailable);
324}
325
326/*
327 * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition
328 * to what it keeps allocated now.  Returns non-zero if the allocation should
329 * be denied, 0 otherwise.
330 */
331int
332rctl_enforce(struct proc *p, int resource, uint64_t amount)
333{
334	struct rctl_rule *rule;
335	struct rctl_rule_link *link;
336	struct sbuf sb;
337	int should_deny = 0;
338	char *buf;
339	static int curtime = 0;
340	static struct timeval lasttime;
341
342	ASSERT_RACCT_ENABLED();
343
344	rw_rlock(&rctl_lock);
345
346	/*
347	 * There may be more than one matching rule; go through all of them.
348	 * Denial should be done last, after logging and sending signals.
349	 */
350	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
351		rule = link->rrl_rule;
352		if (rule->rr_resource != resource)
353			continue;
354		if (!rctl_would_exceed(p, rule, amount)) {
355			link->rrl_exceeded = 0;
356			continue;
357		}
358
359		switch (rule->rr_action) {
360		case RCTL_ACTION_DENY:
361			should_deny = 1;
362			continue;
363		case RCTL_ACTION_LOG:
364			/*
365			 * If rrl_exceeded != 0, it means we've already
366			 * logged a warning for this process.
367			 */
368			if (link->rrl_exceeded != 0)
369				continue;
370
371			/*
372			 * If the process state is not fully initialized yet,
373			 * we can't access most of the required fields, e.g.
374			 * p->p_comm.  This happens when called from fork1().
375			 * Ignore this rule for now; it will be processed just
376			 * after fork, when called from racct_proc_fork_done().
377			 */
378			if (p->p_state != PRS_NORMAL)
379				continue;
380
381			if (!ppsratecheck(&lasttime, &curtime, 10))
382				continue;
383
384			buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
385			if (buf == NULL) {
386				printf("rctl_enforce: out of memory\n");
387				continue;
388			}
389			sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
390			rctl_rule_to_sbuf(&sb, rule);
391			sbuf_finish(&sb);
392			printf("rctl: rule \"%s\" matched by pid %d "
393			    "(%s), uid %d, jail %s\n", sbuf_data(&sb),
394			    p->p_pid, p->p_comm, p->p_ucred->cr_uid,
395			    p->p_ucred->cr_prison->pr_prison_racct->prr_name);
396			sbuf_delete(&sb);
397			free(buf, M_RCTL);
398			link->rrl_exceeded = 1;
399			continue;
400		case RCTL_ACTION_DEVCTL:
401			if (link->rrl_exceeded != 0)
402				continue;
403
404			if (p->p_state != PRS_NORMAL)
405				continue;
406
407			buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
408			if (buf == NULL) {
409				printf("rctl_enforce: out of memory\n");
410				continue;
411			}
412			sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
413			sbuf_printf(&sb, "rule=");
414			rctl_rule_to_sbuf(&sb, rule);
415			sbuf_printf(&sb, " pid=%d ruid=%d jail=%s",
416			    p->p_pid, p->p_ucred->cr_ruid,
417			    p->p_ucred->cr_prison->pr_prison_racct->prr_name);
418			sbuf_finish(&sb);
419			devctl_notify_f("RCTL", "rule", "matched",
420			    sbuf_data(&sb), M_NOWAIT);
421			sbuf_delete(&sb);
422			free(buf, M_RCTL);
423			link->rrl_exceeded = 1;
424			continue;
425		default:
426			if (link->rrl_exceeded != 0)
427				continue;
428
429			if (p->p_state != PRS_NORMAL)
430				continue;
431
432			KASSERT(rule->rr_action > 0 &&
433			    rule->rr_action <= RCTL_ACTION_SIGNAL_MAX,
434			    ("rctl_enforce: unknown action %d",
435			     rule->rr_action));
436
437			/*
438			 * We're using the fact that RCTL_ACTION_SIG* values
439			 * are equal to their counterparts from sys/signal.h.
440			 */
441			kern_psignal(p, rule->rr_action);
442			link->rrl_exceeded = 1;
443			continue;
444		}
445	}
446
447	rw_runlock(&rctl_lock);
448
449	if (should_deny) {
450		/*
451		 * Return fake error code; the caller should change it
452		 * into one proper for the situation - EFSIZ, ENOMEM etc.
453		 */
454		return (EDOOFUS);
455	}
456
457	return (0);
458}
459
460uint64_t
461rctl_get_limit(struct proc *p, int resource)
462{
463	struct rctl_rule *rule;
464	struct rctl_rule_link *link;
465	uint64_t amount = UINT64_MAX;
466
467	ASSERT_RACCT_ENABLED();
468
469	rw_rlock(&rctl_lock);
470
471	/*
472	 * There may be more than one matching rule; go through all of them.
473	 * Denial should be done last, after logging and sending signals.
474	 */
475	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
476		rule = link->rrl_rule;
477		if (rule->rr_resource != resource)
478			continue;
479		if (rule->rr_action != RCTL_ACTION_DENY)
480			continue;
481		if (rule->rr_amount < amount)
482			amount = rule->rr_amount;
483	}
484
485	rw_runlock(&rctl_lock);
486
487	return (amount);
488}
489
490uint64_t
491rctl_get_available(struct proc *p, int resource)
492{
493	struct rctl_rule *rule;
494	struct rctl_rule_link *link;
495	int64_t available, minavailable, allocated;
496
497	minavailable = INT64_MAX;
498
499	ASSERT_RACCT_ENABLED();
500
501	rw_rlock(&rctl_lock);
502
503	/*
504	 * There may be more than one matching rule; go through all of them.
505	 * Denial should be done last, after logging and sending signals.
506	 */
507	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
508		rule = link->rrl_rule;
509		if (rule->rr_resource != resource)
510			continue;
511		if (rule->rr_action != RCTL_ACTION_DENY)
512			continue;
513		available = rctl_available_resource(p, rule);
514		if (available < minavailable)
515			minavailable = available;
516	}
517
518	rw_runlock(&rctl_lock);
519
520	/*
521	 * XXX: Think about this _hard_.
522	 */
523	allocated = p->p_racct->r_resources[resource];
524	if (minavailable < INT64_MAX - allocated)
525		minavailable += allocated;
526	if (minavailable < 0)
527		minavailable = 0;
528	return (minavailable);
529}
530
531static int
532rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter)
533{
534
535	ASSERT_RACCT_ENABLED();
536
537	if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) {
538		if (rule->rr_subject_type != filter->rr_subject_type)
539			return (0);
540
541		switch (filter->rr_subject_type) {
542		case RCTL_SUBJECT_TYPE_PROCESS:
543			if (filter->rr_subject.rs_proc != NULL &&
544			    rule->rr_subject.rs_proc !=
545			    filter->rr_subject.rs_proc)
546				return (0);
547			break;
548		case RCTL_SUBJECT_TYPE_USER:
549			if (filter->rr_subject.rs_uip != NULL &&
550			    rule->rr_subject.rs_uip !=
551			    filter->rr_subject.rs_uip)
552				return (0);
553			break;
554		case RCTL_SUBJECT_TYPE_LOGINCLASS:
555			if (filter->rr_subject.rs_loginclass != NULL &&
556			    rule->rr_subject.rs_loginclass !=
557			    filter->rr_subject.rs_loginclass)
558				return (0);
559			break;
560		case RCTL_SUBJECT_TYPE_JAIL:
561			if (filter->rr_subject.rs_prison_racct != NULL &&
562			    rule->rr_subject.rs_prison_racct !=
563			    filter->rr_subject.rs_prison_racct)
564				return (0);
565			break;
566		default:
567			panic("rctl_rule_matches: unknown subject type %d",
568			    filter->rr_subject_type);
569		}
570	}
571
572	if (filter->rr_resource != RACCT_UNDEFINED) {
573		if (rule->rr_resource != filter->rr_resource)
574			return (0);
575	}
576
577	if (filter->rr_action != RCTL_ACTION_UNDEFINED) {
578		if (rule->rr_action != filter->rr_action)
579			return (0);
580	}
581
582	if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) {
583		if (rule->rr_amount != filter->rr_amount)
584			return (0);
585	}
586
587	if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) {
588		if (rule->rr_per != filter->rr_per)
589			return (0);
590	}
591
592	return (1);
593}
594
595static int
596str2value(const char *str, int *value, struct dict *table)
597{
598	int i;
599
600	if (value == NULL)
601		return (EINVAL);
602
603	for (i = 0; table[i].d_name != NULL; i++) {
604		if (strcasecmp(table[i].d_name, str) == 0) {
605			*value =  table[i].d_value;
606			return (0);
607		}
608	}
609
610	return (EINVAL);
611}
612
613static int
614str2id(const char *str, id_t *value)
615{
616	char *end;
617
618	if (str == NULL)
619		return (EINVAL);
620
621	*value = strtoul(str, &end, 10);
622	if ((size_t)(end - str) != strlen(str))
623		return (EINVAL);
624
625	return (0);
626}
627
628static int
629str2int64(const char *str, int64_t *value)
630{
631	char *end;
632
633	if (str == NULL)
634		return (EINVAL);
635
636	*value = strtoul(str, &end, 10);
637	if ((size_t)(end - str) != strlen(str))
638		return (EINVAL);
639
640	return (0);
641}
642
643/*
644 * Connect the rule to the racct, increasing refcount for the rule.
645 */
646static void
647rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule)
648{
649	struct rctl_rule_link *link;
650
651	ASSERT_RACCT_ENABLED();
652	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
653
654	rctl_rule_acquire(rule);
655	link = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
656	link->rrl_rule = rule;
657	link->rrl_exceeded = 0;
658
659	rw_wlock(&rctl_lock);
660	LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
661	rw_wunlock(&rctl_lock);
662}
663
664static int
665rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule)
666{
667	struct rctl_rule_link *link;
668
669	ASSERT_RACCT_ENABLED();
670	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
671	rw_assert(&rctl_lock, RA_WLOCKED);
672
673	link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT);
674	if (link == NULL)
675		return (ENOMEM);
676	rctl_rule_acquire(rule);
677	link->rrl_rule = rule;
678	link->rrl_exceeded = 0;
679
680	LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
681	return (0);
682}
683
684/*
685 * Remove limits for a rules matching the filter and release
686 * the refcounts for the rules, possibly freeing them.  Returns
687 * the number of limit structures removed.
688 */
689static int
690rctl_racct_remove_rules(struct racct *racct,
691    const struct rctl_rule *filter)
692{
693	int removed = 0;
694	struct rctl_rule_link *link, *linktmp;
695
696	ASSERT_RACCT_ENABLED();
697	rw_assert(&rctl_lock, RA_WLOCKED);
698
699	LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) {
700		if (!rctl_rule_matches(link->rrl_rule, filter))
701			continue;
702
703		LIST_REMOVE(link, rrl_next);
704		rctl_rule_release(link->rrl_rule);
705		uma_zfree(rctl_rule_link_zone, link);
706		removed++;
707	}
708	return (removed);
709}
710
711static void
712rctl_rule_acquire_subject(struct rctl_rule *rule)
713{
714
715	ASSERT_RACCT_ENABLED();
716
717	switch (rule->rr_subject_type) {
718	case RCTL_SUBJECT_TYPE_UNDEFINED:
719	case RCTL_SUBJECT_TYPE_PROCESS:
720		break;
721	case RCTL_SUBJECT_TYPE_JAIL:
722		if (rule->rr_subject.rs_prison_racct != NULL)
723			prison_racct_hold(rule->rr_subject.rs_prison_racct);
724		break;
725	case RCTL_SUBJECT_TYPE_USER:
726		if (rule->rr_subject.rs_uip != NULL)
727			uihold(rule->rr_subject.rs_uip);
728		break;
729	case RCTL_SUBJECT_TYPE_LOGINCLASS:
730		if (rule->rr_subject.rs_loginclass != NULL)
731			loginclass_hold(rule->rr_subject.rs_loginclass);
732		break;
733	default:
734		panic("rctl_rule_acquire_subject: unknown subject type %d",
735		    rule->rr_subject_type);
736	}
737}
738
739static void
740rctl_rule_release_subject(struct rctl_rule *rule)
741{
742
743	ASSERT_RACCT_ENABLED();
744
745	switch (rule->rr_subject_type) {
746	case RCTL_SUBJECT_TYPE_UNDEFINED:
747	case RCTL_SUBJECT_TYPE_PROCESS:
748		break;
749	case RCTL_SUBJECT_TYPE_JAIL:
750		if (rule->rr_subject.rs_prison_racct != NULL)
751			prison_racct_free(rule->rr_subject.rs_prison_racct);
752		break;
753	case RCTL_SUBJECT_TYPE_USER:
754		if (rule->rr_subject.rs_uip != NULL)
755			uifree(rule->rr_subject.rs_uip);
756		break;
757	case RCTL_SUBJECT_TYPE_LOGINCLASS:
758		if (rule->rr_subject.rs_loginclass != NULL)
759			loginclass_free(rule->rr_subject.rs_loginclass);
760		break;
761	default:
762		panic("rctl_rule_release_subject: unknown subject type %d",
763		    rule->rr_subject_type);
764	}
765}
766
767struct rctl_rule *
768rctl_rule_alloc(int flags)
769{
770	struct rctl_rule *rule;
771
772	ASSERT_RACCT_ENABLED();
773
774	rule = uma_zalloc(rctl_rule_zone, flags);
775	if (rule == NULL)
776		return (NULL);
777	rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
778	rule->rr_subject.rs_proc = NULL;
779	rule->rr_subject.rs_uip = NULL;
780	rule->rr_subject.rs_loginclass = NULL;
781	rule->rr_subject.rs_prison_racct = NULL;
782	rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
783	rule->rr_resource = RACCT_UNDEFINED;
784	rule->rr_action = RCTL_ACTION_UNDEFINED;
785	rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
786	refcount_init(&rule->rr_refcount, 1);
787
788	return (rule);
789}
790
791struct rctl_rule *
792rctl_rule_duplicate(const struct rctl_rule *rule, int flags)
793{
794	struct rctl_rule *copy;
795
796	ASSERT_RACCT_ENABLED();
797
798	copy = uma_zalloc(rctl_rule_zone, flags);
799	if (copy == NULL)
800		return (NULL);
801	copy->rr_subject_type = rule->rr_subject_type;
802	copy->rr_subject.rs_proc = rule->rr_subject.rs_proc;
803	copy->rr_subject.rs_uip = rule->rr_subject.rs_uip;
804	copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass;
805	copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct;
806	copy->rr_per = rule->rr_per;
807	copy->rr_resource = rule->rr_resource;
808	copy->rr_action = rule->rr_action;
809	copy->rr_amount = rule->rr_amount;
810	refcount_init(&copy->rr_refcount, 1);
811	rctl_rule_acquire_subject(copy);
812
813	return (copy);
814}
815
816void
817rctl_rule_acquire(struct rctl_rule *rule)
818{
819
820	ASSERT_RACCT_ENABLED();
821	KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
822
823	refcount_acquire(&rule->rr_refcount);
824}
825
826static void
827rctl_rule_free(void *context, int pending)
828{
829	struct rctl_rule *rule;
830
831	rule = (struct rctl_rule *)context;
832
833	ASSERT_RACCT_ENABLED();
834	KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0"));
835
836	/*
837	 * We don't need locking here; rule is guaranteed to be inaccessible.
838	 */
839
840	rctl_rule_release_subject(rule);
841	uma_zfree(rctl_rule_zone, rule);
842}
843
844void
845rctl_rule_release(struct rctl_rule *rule)
846{
847
848	ASSERT_RACCT_ENABLED();
849	KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
850
851	if (refcount_release(&rule->rr_refcount)) {
852		/*
853		 * rctl_rule_release() is often called when iterating
854		 * over all the uidinfo structures in the system,
855		 * holding uihashtbl_lock.  Since rctl_rule_free()
856		 * might end up calling uifree(), this would lead
857		 * to lock recursion.  Use taskqueue to avoid this.
858		 */
859		TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule);
860		taskqueue_enqueue(taskqueue_thread, &rule->rr_task);
861	}
862}
863
864static int
865rctl_rule_fully_specified(const struct rctl_rule *rule)
866{
867
868	ASSERT_RACCT_ENABLED();
869
870	switch (rule->rr_subject_type) {
871	case RCTL_SUBJECT_TYPE_UNDEFINED:
872		return (0);
873	case RCTL_SUBJECT_TYPE_PROCESS:
874		if (rule->rr_subject.rs_proc == NULL)
875			return (0);
876		break;
877	case RCTL_SUBJECT_TYPE_USER:
878		if (rule->rr_subject.rs_uip == NULL)
879			return (0);
880		break;
881	case RCTL_SUBJECT_TYPE_LOGINCLASS:
882		if (rule->rr_subject.rs_loginclass == NULL)
883			return (0);
884		break;
885	case RCTL_SUBJECT_TYPE_JAIL:
886		if (rule->rr_subject.rs_prison_racct == NULL)
887			return (0);
888		break;
889	default:
890		panic("rctl_rule_fully_specified: unknown subject type %d",
891		    rule->rr_subject_type);
892	}
893	if (rule->rr_resource == RACCT_UNDEFINED)
894		return (0);
895	if (rule->rr_action == RCTL_ACTION_UNDEFINED)
896		return (0);
897	if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED)
898		return (0);
899	if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED)
900		return (0);
901
902	return (1);
903}
904
905static int
906rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep)
907{
908	int error = 0;
909	char *subjectstr, *subject_idstr, *resourcestr, *actionstr,
910	     *amountstr, *perstr;
911	struct rctl_rule *rule;
912	id_t id;
913
914	ASSERT_RACCT_ENABLED();
915
916	rule = rctl_rule_alloc(M_WAITOK);
917
918	subjectstr = strsep(&rulestr, ":");
919	subject_idstr = strsep(&rulestr, ":");
920	resourcestr = strsep(&rulestr, ":");
921	actionstr = strsep(&rulestr, "=/");
922	amountstr = strsep(&rulestr, "/");
923	perstr = rulestr;
924
925	if (subjectstr == NULL || subjectstr[0] == '\0')
926		rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
927	else {
928		error = str2value(subjectstr, &rule->rr_subject_type, subjectnames);
929		if (error != 0)
930			goto out;
931	}
932
933	if (subject_idstr == NULL || subject_idstr[0] == '\0') {
934		rule->rr_subject.rs_proc = NULL;
935		rule->rr_subject.rs_uip = NULL;
936		rule->rr_subject.rs_loginclass = NULL;
937		rule->rr_subject.rs_prison_racct = NULL;
938	} else {
939		switch (rule->rr_subject_type) {
940		case RCTL_SUBJECT_TYPE_UNDEFINED:
941			error = EINVAL;
942			goto out;
943		case RCTL_SUBJECT_TYPE_PROCESS:
944			error = str2id(subject_idstr, &id);
945			if (error != 0)
946				goto out;
947			sx_assert(&allproc_lock, SA_LOCKED);
948			rule->rr_subject.rs_proc = pfind(id);
949			if (rule->rr_subject.rs_proc == NULL) {
950				error = ESRCH;
951				goto out;
952			}
953			PROC_UNLOCK(rule->rr_subject.rs_proc);
954			break;
955		case RCTL_SUBJECT_TYPE_USER:
956			error = str2id(subject_idstr, &id);
957			if (error != 0)
958				goto out;
959			rule->rr_subject.rs_uip = uifind(id);
960			break;
961		case RCTL_SUBJECT_TYPE_LOGINCLASS:
962			rule->rr_subject.rs_loginclass =
963			    loginclass_find(subject_idstr);
964			if (rule->rr_subject.rs_loginclass == NULL) {
965				error = ENAMETOOLONG;
966				goto out;
967			}
968			break;
969		case RCTL_SUBJECT_TYPE_JAIL:
970			rule->rr_subject.rs_prison_racct =
971			    prison_racct_find(subject_idstr);
972			if (rule->rr_subject.rs_prison_racct == NULL) {
973				error = ENAMETOOLONG;
974				goto out;
975			}
976			break;
977               default:
978                       panic("rctl_string_to_rule: unknown subject type %d",
979                           rule->rr_subject_type);
980               }
981	}
982
983	if (resourcestr == NULL || resourcestr[0] == '\0')
984		rule->rr_resource = RACCT_UNDEFINED;
985	else {
986		error = str2value(resourcestr, &rule->rr_resource,
987		    resourcenames);
988		if (error != 0)
989			goto out;
990	}
991
992	if (actionstr == NULL || actionstr[0] == '\0')
993		rule->rr_action = RCTL_ACTION_UNDEFINED;
994	else {
995		error = str2value(actionstr, &rule->rr_action, actionnames);
996		if (error != 0)
997			goto out;
998	}
999
1000	if (amountstr == NULL || amountstr[0] == '\0')
1001		rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
1002	else {
1003		error = str2int64(amountstr, &rule->rr_amount);
1004		if (error != 0)
1005			goto out;
1006		if (RACCT_IS_IN_MILLIONS(rule->rr_resource))
1007			rule->rr_amount *= 1000000;
1008	}
1009
1010	if (perstr == NULL || perstr[0] == '\0')
1011		rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
1012	else {
1013		error = str2value(perstr, &rule->rr_per, subjectnames);
1014		if (error != 0)
1015			goto out;
1016	}
1017
1018out:
1019	if (error == 0)
1020		*rulep = rule;
1021	else
1022		rctl_rule_release(rule);
1023
1024	return (error);
1025}
1026
1027/*
1028 * Link a rule with all the subjects it applies to.
1029 */
1030int
1031rctl_rule_add(struct rctl_rule *rule)
1032{
1033	struct proc *p;
1034	struct ucred *cred;
1035	struct uidinfo *uip;
1036	struct prison *pr;
1037	struct prison_racct *prr;
1038	struct loginclass *lc;
1039	struct rctl_rule *rule2;
1040	int match;
1041
1042	ASSERT_RACCT_ENABLED();
1043	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
1044
1045	/*
1046	 * Some rules just don't make sense.  Note that the one below
1047	 * cannot be rewritten using RACCT_IS_DENIABLE(); the RACCT_PCTCPU,
1048	 * for example, is not deniable in the racct sense, but the
1049	 * limit is enforced in a different way, so "deny" rules for %CPU
1050	 * do make sense.
1051	 */
1052	if (rule->rr_action == RCTL_ACTION_DENY &&
1053	    (rule->rr_resource == RACCT_CPU ||
1054	    rule->rr_resource == RACCT_WALLCLOCK))
1055		return (EOPNOTSUPP);
1056
1057	if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS &&
1058	    RACCT_IS_SLOPPY(rule->rr_resource))
1059		return (EOPNOTSUPP);
1060
1061	/*
1062	 * Make sure there are no duplicated rules.  Also, for the "deny"
1063	 * rules, remove ones differing only by "amount".
1064	 */
1065	if (rule->rr_action == RCTL_ACTION_DENY) {
1066		rule2 = rctl_rule_duplicate(rule, M_WAITOK);
1067		rule2->rr_amount = RCTL_AMOUNT_UNDEFINED;
1068		rctl_rule_remove(rule2);
1069		rctl_rule_release(rule2);
1070	} else
1071		rctl_rule_remove(rule);
1072
1073	switch (rule->rr_subject_type) {
1074	case RCTL_SUBJECT_TYPE_PROCESS:
1075		p = rule->rr_subject.rs_proc;
1076		KASSERT(p != NULL, ("rctl_rule_add: NULL proc"));
1077
1078		rctl_racct_add_rule(p->p_racct, rule);
1079		/*
1080		 * In case of per-process rule, we don't have anything more
1081		 * to do.
1082		 */
1083		return (0);
1084
1085	case RCTL_SUBJECT_TYPE_USER:
1086		uip = rule->rr_subject.rs_uip;
1087		KASSERT(uip != NULL, ("rctl_rule_add: NULL uip"));
1088		rctl_racct_add_rule(uip->ui_racct, rule);
1089		break;
1090
1091	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1092		lc = rule->rr_subject.rs_loginclass;
1093		KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass"));
1094		rctl_racct_add_rule(lc->lc_racct, rule);
1095		break;
1096
1097	case RCTL_SUBJECT_TYPE_JAIL:
1098		prr = rule->rr_subject.rs_prison_racct;
1099		KASSERT(prr != NULL, ("rctl_rule_add: NULL pr"));
1100		rctl_racct_add_rule(prr->prr_racct, rule);
1101		break;
1102
1103	default:
1104		panic("rctl_rule_add: unknown subject type %d",
1105		    rule->rr_subject_type);
1106	}
1107
1108	/*
1109	 * Now go through all the processes and add the new rule to the ones
1110	 * it applies to.
1111	 */
1112	sx_assert(&allproc_lock, SA_LOCKED);
1113	FOREACH_PROC_IN_SYSTEM(p) {
1114		cred = p->p_ucred;
1115		switch (rule->rr_subject_type) {
1116		case RCTL_SUBJECT_TYPE_USER:
1117			if (cred->cr_uidinfo == rule->rr_subject.rs_uip ||
1118			    cred->cr_ruidinfo == rule->rr_subject.rs_uip)
1119				break;
1120			continue;
1121		case RCTL_SUBJECT_TYPE_LOGINCLASS:
1122			if (cred->cr_loginclass == rule->rr_subject.rs_loginclass)
1123				break;
1124			continue;
1125		case RCTL_SUBJECT_TYPE_JAIL:
1126			match = 0;
1127			for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) {
1128				if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) {
1129					match = 1;
1130					break;
1131				}
1132			}
1133			if (match)
1134				break;
1135			continue;
1136		default:
1137			panic("rctl_rule_add: unknown subject type %d",
1138			    rule->rr_subject_type);
1139		}
1140
1141		rctl_racct_add_rule(p->p_racct, rule);
1142	}
1143
1144	return (0);
1145}
1146
1147static void
1148rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3)
1149{
1150	struct rctl_rule *filter = (struct rctl_rule *)arg2;
1151	int found = 0;
1152
1153	ASSERT_RACCT_ENABLED();
1154
1155	rw_wlock(&rctl_lock);
1156	found += rctl_racct_remove_rules(racct, filter);
1157	rw_wunlock(&rctl_lock);
1158
1159	*((int *)arg3) += found;
1160}
1161
1162/*
1163 * Remove all rules that match the filter.
1164 */
1165int
1166rctl_rule_remove(struct rctl_rule *filter)
1167{
1168	int found = 0;
1169	struct proc *p;
1170
1171	ASSERT_RACCT_ENABLED();
1172
1173	if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS &&
1174	    filter->rr_subject.rs_proc != NULL) {
1175		p = filter->rr_subject.rs_proc;
1176		rw_wlock(&rctl_lock);
1177		found = rctl_racct_remove_rules(p->p_racct, filter);
1178		rw_wunlock(&rctl_lock);
1179		if (found)
1180			return (0);
1181		return (ESRCH);
1182	}
1183
1184	loginclass_racct_foreach(rctl_rule_remove_callback, filter,
1185	    (void *)&found);
1186	ui_racct_foreach(rctl_rule_remove_callback, filter,
1187	    (void *)&found);
1188	prison_racct_foreach(rctl_rule_remove_callback, filter,
1189	    (void *)&found);
1190
1191	sx_assert(&allproc_lock, SA_LOCKED);
1192	rw_wlock(&rctl_lock);
1193	FOREACH_PROC_IN_SYSTEM(p) {
1194		found += rctl_racct_remove_rules(p->p_racct, filter);
1195	}
1196	rw_wunlock(&rctl_lock);
1197
1198	if (found)
1199		return (0);
1200	return (ESRCH);
1201}
1202
1203/*
1204 * Appends a rule to the sbuf.
1205 */
1206static void
1207rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule)
1208{
1209	int64_t amount;
1210
1211	ASSERT_RACCT_ENABLED();
1212
1213	sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type));
1214
1215	switch (rule->rr_subject_type) {
1216	case RCTL_SUBJECT_TYPE_PROCESS:
1217		if (rule->rr_subject.rs_proc == NULL)
1218			sbuf_printf(sb, ":");
1219		else
1220			sbuf_printf(sb, "%d:",
1221			    rule->rr_subject.rs_proc->p_pid);
1222		break;
1223	case RCTL_SUBJECT_TYPE_USER:
1224		if (rule->rr_subject.rs_uip == NULL)
1225			sbuf_printf(sb, ":");
1226		else
1227			sbuf_printf(sb, "%d:",
1228			    rule->rr_subject.rs_uip->ui_uid);
1229		break;
1230	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1231		if (rule->rr_subject.rs_loginclass == NULL)
1232			sbuf_printf(sb, ":");
1233		else
1234			sbuf_printf(sb, "%s:",
1235			    rule->rr_subject.rs_loginclass->lc_name);
1236		break;
1237	case RCTL_SUBJECT_TYPE_JAIL:
1238		if (rule->rr_subject.rs_prison_racct == NULL)
1239			sbuf_printf(sb, ":");
1240		else
1241			sbuf_printf(sb, "%s:",
1242			    rule->rr_subject.rs_prison_racct->prr_name);
1243		break;
1244	default:
1245		panic("rctl_rule_to_sbuf: unknown subject type %d",
1246		    rule->rr_subject_type);
1247	}
1248
1249	amount = rule->rr_amount;
1250	if (amount != RCTL_AMOUNT_UNDEFINED &&
1251	    RACCT_IS_IN_MILLIONS(rule->rr_resource))
1252		amount /= 1000000;
1253
1254	sbuf_printf(sb, "%s:%s=%jd",
1255	    rctl_resource_name(rule->rr_resource),
1256	    rctl_action_name(rule->rr_action),
1257	    amount);
1258
1259	if (rule->rr_per != rule->rr_subject_type)
1260		sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per));
1261}
1262
1263/*
1264 * Routine used by RCTL syscalls to read in input string.
1265 */
1266static int
1267rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen)
1268{
1269	int error;
1270	char *str;
1271
1272	ASSERT_RACCT_ENABLED();
1273
1274	if (inbuflen <= 0)
1275		return (EINVAL);
1276	if (inbuflen > RCTL_MAX_INBUFLEN)
1277		return (E2BIG);
1278
1279	str = malloc(inbuflen + 1, M_RCTL, M_WAITOK);
1280	error = copyinstr(inbufp, str, inbuflen, NULL);
1281	if (error != 0) {
1282		free(str, M_RCTL);
1283		return (error);
1284	}
1285
1286	*inputstr = str;
1287
1288	return (0);
1289}
1290
1291/*
1292 * Routine used by RCTL syscalls to write out output string.
1293 */
1294static int
1295rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen)
1296{
1297	int error;
1298
1299	ASSERT_RACCT_ENABLED();
1300
1301	if (outputsbuf == NULL)
1302		return (0);
1303
1304	sbuf_finish(outputsbuf);
1305	if (outbuflen < sbuf_len(outputsbuf) + 1) {
1306		sbuf_delete(outputsbuf);
1307		return (ERANGE);
1308	}
1309	error = copyout(sbuf_data(outputsbuf), outbufp,
1310	    sbuf_len(outputsbuf) + 1);
1311	sbuf_delete(outputsbuf);
1312	return (error);
1313}
1314
1315static struct sbuf *
1316rctl_racct_to_sbuf(struct racct *racct, int sloppy)
1317{
1318	int i;
1319	int64_t amount;
1320	struct sbuf *sb;
1321
1322	ASSERT_RACCT_ENABLED();
1323
1324	sb = sbuf_new_auto();
1325	for (i = 0; i <= RACCT_MAX; i++) {
1326		if (sloppy == 0 && RACCT_IS_SLOPPY(i))
1327			continue;
1328		amount = racct->r_resources[i];
1329		if (RACCT_IS_IN_MILLIONS(i))
1330			amount /= 1000000;
1331		sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount);
1332	}
1333	sbuf_setpos(sb, sbuf_len(sb) - 1);
1334	return (sb);
1335}
1336
1337int
1338sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1339{
1340	int error;
1341	char *inputstr;
1342	struct rctl_rule *filter;
1343	struct sbuf *outputsbuf = NULL;
1344	struct proc *p;
1345	struct uidinfo *uip;
1346	struct loginclass *lc;
1347	struct prison_racct *prr;
1348
1349	if (!racct_enable)
1350		return (ENOSYS);
1351
1352	error = priv_check(td, PRIV_RCTL_GET_RACCT);
1353	if (error != 0)
1354		return (error);
1355
1356	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1357	if (error != 0)
1358		return (error);
1359
1360	sx_slock(&allproc_lock);
1361	error = rctl_string_to_rule(inputstr, &filter);
1362	free(inputstr, M_RCTL);
1363	if (error != 0) {
1364		sx_sunlock(&allproc_lock);
1365		return (error);
1366	}
1367
1368	switch (filter->rr_subject_type) {
1369	case RCTL_SUBJECT_TYPE_PROCESS:
1370		p = filter->rr_subject.rs_proc;
1371		if (p == NULL) {
1372			error = EINVAL;
1373			goto out;
1374		}
1375		outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0);
1376		break;
1377	case RCTL_SUBJECT_TYPE_USER:
1378		uip = filter->rr_subject.rs_uip;
1379		if (uip == NULL) {
1380			error = EINVAL;
1381			goto out;
1382		}
1383		outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1);
1384		break;
1385	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1386		lc = filter->rr_subject.rs_loginclass;
1387		if (lc == NULL) {
1388			error = EINVAL;
1389			goto out;
1390		}
1391		outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1);
1392		break;
1393	case RCTL_SUBJECT_TYPE_JAIL:
1394		prr = filter->rr_subject.rs_prison_racct;
1395		if (prr == NULL) {
1396			error = EINVAL;
1397			goto out;
1398		}
1399		outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1);
1400		break;
1401	default:
1402		error = EINVAL;
1403	}
1404out:
1405	rctl_rule_release(filter);
1406	sx_sunlock(&allproc_lock);
1407	if (error != 0)
1408		return (error);
1409
1410	error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen);
1411
1412	return (error);
1413}
1414
1415static void
1416rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3)
1417{
1418	struct rctl_rule *filter = (struct rctl_rule *)arg2;
1419	struct rctl_rule_link *link;
1420	struct sbuf *sb = (struct sbuf *)arg3;
1421
1422	ASSERT_RACCT_ENABLED();
1423
1424	rw_rlock(&rctl_lock);
1425	LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
1426		if (!rctl_rule_matches(link->rrl_rule, filter))
1427			continue;
1428		rctl_rule_to_sbuf(sb, link->rrl_rule);
1429		sbuf_printf(sb, ",");
1430	}
1431	rw_runlock(&rctl_lock);
1432}
1433
1434int
1435sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1436{
1437	int error;
1438	size_t bufsize = RCTL_DEFAULT_BUFSIZE;
1439	char *inputstr, *buf;
1440	struct sbuf *sb;
1441	struct rctl_rule *filter;
1442	struct rctl_rule_link *link;
1443	struct proc *p;
1444
1445	if (!racct_enable)
1446		return (ENOSYS);
1447
1448	error = priv_check(td, PRIV_RCTL_GET_RULES);
1449	if (error != 0)
1450		return (error);
1451
1452	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1453	if (error != 0)
1454		return (error);
1455
1456	sx_slock(&allproc_lock);
1457	error = rctl_string_to_rule(inputstr, &filter);
1458	free(inputstr, M_RCTL);
1459	if (error != 0) {
1460		sx_sunlock(&allproc_lock);
1461		return (error);
1462	}
1463
1464again:
1465	buf = malloc(bufsize, M_RCTL, M_WAITOK);
1466	sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1467	KASSERT(sb != NULL, ("sbuf_new failed"));
1468
1469	sx_assert(&allproc_lock, SA_LOCKED);
1470	FOREACH_PROC_IN_SYSTEM(p) {
1471		rw_rlock(&rctl_lock);
1472		LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1473			/*
1474			 * Non-process rules will be added to the buffer later.
1475			 * Adding them here would result in duplicated output.
1476			 */
1477			if (link->rrl_rule->rr_subject_type !=
1478			    RCTL_SUBJECT_TYPE_PROCESS)
1479				continue;
1480			if (!rctl_rule_matches(link->rrl_rule, filter))
1481				continue;
1482			rctl_rule_to_sbuf(sb, link->rrl_rule);
1483			sbuf_printf(sb, ",");
1484		}
1485		rw_runlock(&rctl_lock);
1486	}
1487
1488	loginclass_racct_foreach(rctl_get_rules_callback, filter, sb);
1489	ui_racct_foreach(rctl_get_rules_callback, filter, sb);
1490	prison_racct_foreach(rctl_get_rules_callback, filter, sb);
1491	if (sbuf_error(sb) == ENOMEM) {
1492		sbuf_delete(sb);
1493		free(buf, M_RCTL);
1494		bufsize *= 4;
1495		goto again;
1496	}
1497
1498	/*
1499	 * Remove trailing ",".
1500	 */
1501	if (sbuf_len(sb) > 0)
1502		sbuf_setpos(sb, sbuf_len(sb) - 1);
1503
1504	error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1505
1506	rctl_rule_release(filter);
1507	sx_sunlock(&allproc_lock);
1508	free(buf, M_RCTL);
1509	return (error);
1510}
1511
1512int
1513sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1514{
1515	int error;
1516	size_t bufsize = RCTL_DEFAULT_BUFSIZE;
1517	char *inputstr, *buf;
1518	struct sbuf *sb;
1519	struct rctl_rule *filter;
1520	struct rctl_rule_link *link;
1521
1522	if (!racct_enable)
1523		return (ENOSYS);
1524
1525	error = priv_check(td, PRIV_RCTL_GET_LIMITS);
1526	if (error != 0)
1527		return (error);
1528
1529	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1530	if (error != 0)
1531		return (error);
1532
1533	sx_slock(&allproc_lock);
1534	error = rctl_string_to_rule(inputstr, &filter);
1535	free(inputstr, M_RCTL);
1536	if (error != 0) {
1537		sx_sunlock(&allproc_lock);
1538		return (error);
1539	}
1540
1541	if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) {
1542		rctl_rule_release(filter);
1543		sx_sunlock(&allproc_lock);
1544		return (EINVAL);
1545	}
1546	if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) {
1547		rctl_rule_release(filter);
1548		sx_sunlock(&allproc_lock);
1549		return (EOPNOTSUPP);
1550	}
1551	if (filter->rr_subject.rs_proc == NULL) {
1552		rctl_rule_release(filter);
1553		sx_sunlock(&allproc_lock);
1554		return (EINVAL);
1555	}
1556
1557again:
1558	buf = malloc(bufsize, M_RCTL, M_WAITOK);
1559	sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1560	KASSERT(sb != NULL, ("sbuf_new failed"));
1561
1562	rw_rlock(&rctl_lock);
1563	LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links,
1564	    rrl_next) {
1565		rctl_rule_to_sbuf(sb, link->rrl_rule);
1566		sbuf_printf(sb, ",");
1567	}
1568	rw_runlock(&rctl_lock);
1569	if (sbuf_error(sb) == ENOMEM) {
1570		sbuf_delete(sb);
1571		free(buf, M_RCTL);
1572		bufsize *= 4;
1573		goto again;
1574	}
1575
1576	/*
1577	 * Remove trailing ",".
1578	 */
1579	if (sbuf_len(sb) > 0)
1580		sbuf_setpos(sb, sbuf_len(sb) - 1);
1581
1582	error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1583	rctl_rule_release(filter);
1584	sx_sunlock(&allproc_lock);
1585	free(buf, M_RCTL);
1586	return (error);
1587}
1588
1589int
1590sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1591{
1592	int error;
1593	struct rctl_rule *rule;
1594	char *inputstr;
1595
1596	if (!racct_enable)
1597		return (ENOSYS);
1598
1599	error = priv_check(td, PRIV_RCTL_ADD_RULE);
1600	if (error != 0)
1601		return (error);
1602
1603	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1604	if (error != 0)
1605		return (error);
1606
1607	sx_slock(&allproc_lock);
1608	error = rctl_string_to_rule(inputstr, &rule);
1609	free(inputstr, M_RCTL);
1610	if (error != 0) {
1611		sx_sunlock(&allproc_lock);
1612		return (error);
1613	}
1614	/*
1615	 * The 'per' part of a rule is optional.
1616	 */
1617	if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED &&
1618	    rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED)
1619		rule->rr_per = rule->rr_subject_type;
1620
1621	if (!rctl_rule_fully_specified(rule)) {
1622		error = EINVAL;
1623		goto out;
1624	}
1625
1626	error = rctl_rule_add(rule);
1627
1628out:
1629	rctl_rule_release(rule);
1630	sx_sunlock(&allproc_lock);
1631	return (error);
1632}
1633
1634int
1635sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1636{
1637	int error;
1638	struct rctl_rule *filter;
1639	char *inputstr;
1640
1641	if (!racct_enable)
1642		return (ENOSYS);
1643
1644	error = priv_check(td, PRIV_RCTL_REMOVE_RULE);
1645	if (error != 0)
1646		return (error);
1647
1648	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1649	if (error != 0)
1650		return (error);
1651
1652	sx_slock(&allproc_lock);
1653	error = rctl_string_to_rule(inputstr, &filter);
1654	free(inputstr, M_RCTL);
1655	if (error != 0) {
1656		sx_sunlock(&allproc_lock);
1657		return (error);
1658	}
1659
1660	error = rctl_rule_remove(filter);
1661	rctl_rule_release(filter);
1662	sx_sunlock(&allproc_lock);
1663
1664	return (error);
1665}
1666
1667/*
1668 * Update RCTL rule list after credential change.
1669 */
1670void
1671rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred)
1672{
1673	int rulecnt, i;
1674	struct rctl_rule_link *link, *newlink;
1675	struct uidinfo *newuip;
1676	struct loginclass *newlc;
1677	struct prison_racct *newprr;
1678	LIST_HEAD(, rctl_rule_link) newrules;
1679
1680	ASSERT_RACCT_ENABLED();
1681
1682	newuip = newcred->cr_ruidinfo;
1683	newlc = newcred->cr_loginclass;
1684	newprr = newcred->cr_prison->pr_prison_racct;
1685
1686	LIST_INIT(&newrules);
1687
1688again:
1689	/*
1690	 * First, count the rules that apply to the process with new
1691	 * credentials.
1692	 */
1693	rulecnt = 0;
1694	rw_rlock(&rctl_lock);
1695	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1696		if (link->rrl_rule->rr_subject_type ==
1697		    RCTL_SUBJECT_TYPE_PROCESS)
1698			rulecnt++;
1699	}
1700	LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next)
1701		rulecnt++;
1702	LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next)
1703		rulecnt++;
1704	LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next)
1705		rulecnt++;
1706	rw_runlock(&rctl_lock);
1707
1708	/*
1709	 * Create temporary list.  We've dropped the rctl_lock in order
1710	 * to use M_WAITOK.
1711	 */
1712	for (i = 0; i < rulecnt; i++) {
1713		newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
1714		newlink->rrl_rule = NULL;
1715		LIST_INSERT_HEAD(&newrules, newlink, rrl_next);
1716	}
1717
1718	newlink = LIST_FIRST(&newrules);
1719
1720	/*
1721	 * Assign rules to the newly allocated list entries.
1722	 */
1723	rw_wlock(&rctl_lock);
1724	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1725		if (link->rrl_rule->rr_subject_type ==
1726		    RCTL_SUBJECT_TYPE_PROCESS) {
1727			if (newlink == NULL)
1728				goto goaround;
1729			rctl_rule_acquire(link->rrl_rule);
1730			newlink->rrl_rule = link->rrl_rule;
1731			newlink = LIST_NEXT(newlink, rrl_next);
1732			rulecnt--;
1733		}
1734	}
1735
1736	LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) {
1737		if (newlink == NULL)
1738			goto goaround;
1739		rctl_rule_acquire(link->rrl_rule);
1740		newlink->rrl_rule = link->rrl_rule;
1741		newlink = LIST_NEXT(newlink, rrl_next);
1742		rulecnt--;
1743	}
1744
1745	LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) {
1746		if (newlink == NULL)
1747			goto goaround;
1748		rctl_rule_acquire(link->rrl_rule);
1749		newlink->rrl_rule = link->rrl_rule;
1750		newlink = LIST_NEXT(newlink, rrl_next);
1751		rulecnt--;
1752	}
1753
1754	LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) {
1755		if (newlink == NULL)
1756			goto goaround;
1757		rctl_rule_acquire(link->rrl_rule);
1758		newlink->rrl_rule = link->rrl_rule;
1759		newlink = LIST_NEXT(newlink, rrl_next);
1760		rulecnt--;
1761	}
1762
1763	if (rulecnt == 0) {
1764		/*
1765		 * Free the old rule list.
1766		 */
1767		while (!LIST_EMPTY(&p->p_racct->r_rule_links)) {
1768			link = LIST_FIRST(&p->p_racct->r_rule_links);
1769			LIST_REMOVE(link, rrl_next);
1770			rctl_rule_release(link->rrl_rule);
1771			uma_zfree(rctl_rule_link_zone, link);
1772		}
1773
1774		/*
1775		 * Replace lists and we're done.
1776		 *
1777		 * XXX: Is there any way to switch list heads instead
1778		 *      of iterating here?
1779		 */
1780		while (!LIST_EMPTY(&newrules)) {
1781			newlink = LIST_FIRST(&newrules);
1782			LIST_REMOVE(newlink, rrl_next);
1783			LIST_INSERT_HEAD(&p->p_racct->r_rule_links,
1784			    newlink, rrl_next);
1785		}
1786
1787		rw_wunlock(&rctl_lock);
1788
1789		return;
1790	}
1791
1792goaround:
1793	rw_wunlock(&rctl_lock);
1794
1795	/*
1796	 * Rule list changed while we were not holding the rctl_lock.
1797	 * Free the new list and try again.
1798	 */
1799	while (!LIST_EMPTY(&newrules)) {
1800		newlink = LIST_FIRST(&newrules);
1801		LIST_REMOVE(newlink, rrl_next);
1802		if (newlink->rrl_rule != NULL)
1803			rctl_rule_release(newlink->rrl_rule);
1804		uma_zfree(rctl_rule_link_zone, newlink);
1805	}
1806
1807	goto again;
1808}
1809
1810/*
1811 * Assign RCTL rules to the newly created process.
1812 */
1813int
1814rctl_proc_fork(struct proc *parent, struct proc *child)
1815{
1816	int error;
1817	struct rctl_rule_link *link;
1818	struct rctl_rule *rule;
1819
1820	LIST_INIT(&child->p_racct->r_rule_links);
1821
1822	ASSERT_RACCT_ENABLED();
1823	KASSERT(parent->p_racct != NULL, ("process without racct; p = %p", parent));
1824
1825	rw_wlock(&rctl_lock);
1826
1827	/*
1828	 * Go through limits applicable to the parent and assign them
1829	 * to the child.  Rules with 'process' subject have to be duplicated
1830	 * in order to make their rr_subject point to the new process.
1831	 */
1832	LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) {
1833		if (link->rrl_rule->rr_subject_type ==
1834		    RCTL_SUBJECT_TYPE_PROCESS) {
1835			rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT);
1836			if (rule == NULL)
1837				goto fail;
1838			KASSERT(rule->rr_subject.rs_proc == parent,
1839			    ("rule->rr_subject.rs_proc != parent"));
1840			rule->rr_subject.rs_proc = child;
1841			error = rctl_racct_add_rule_locked(child->p_racct,
1842			    rule);
1843			rctl_rule_release(rule);
1844			if (error != 0)
1845				goto fail;
1846		} else {
1847			error = rctl_racct_add_rule_locked(child->p_racct,
1848			    link->rrl_rule);
1849			if (error != 0)
1850				goto fail;
1851		}
1852	}
1853
1854	rw_wunlock(&rctl_lock);
1855	return (0);
1856
1857fail:
1858	while (!LIST_EMPTY(&child->p_racct->r_rule_links)) {
1859		link = LIST_FIRST(&child->p_racct->r_rule_links);
1860		LIST_REMOVE(link, rrl_next);
1861		rctl_rule_release(link->rrl_rule);
1862		uma_zfree(rctl_rule_link_zone, link);
1863	}
1864	rw_wunlock(&rctl_lock);
1865	return (EAGAIN);
1866}
1867
1868/*
1869 * Release rules attached to the racct.
1870 */
1871void
1872rctl_racct_release(struct racct *racct)
1873{
1874	struct rctl_rule_link *link;
1875
1876	ASSERT_RACCT_ENABLED();
1877
1878	rw_wlock(&rctl_lock);
1879	while (!LIST_EMPTY(&racct->r_rule_links)) {
1880		link = LIST_FIRST(&racct->r_rule_links);
1881		LIST_REMOVE(link, rrl_next);
1882		rctl_rule_release(link->rrl_rule);
1883		uma_zfree(rctl_rule_link_zone, link);
1884	}
1885	rw_wunlock(&rctl_lock);
1886}
1887
1888static void
1889rctl_init(void)
1890{
1891
1892	if (!racct_enable)
1893		return;
1894
1895	rctl_rule_link_zone = uma_zcreate("rctl_rule_link",
1896	    sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL,
1897	    UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1898	rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule),
1899	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1900}
1901
1902#else /* !RCTL */
1903
1904int
1905sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1906{
1907
1908	return (ENOSYS);
1909}
1910
1911int
1912sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1913{
1914
1915	return (ENOSYS);
1916}
1917
1918int
1919sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1920{
1921
1922	return (ENOSYS);
1923}
1924
1925int
1926sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1927{
1928
1929	return (ENOSYS);
1930}
1931
1932int
1933sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1934{
1935
1936	return (ENOSYS);
1937}
1938
1939#endif /* !RCTL */
1940