kern_rctl.c revision 225371
1/*-
2 * Copyright (c) 2010 The FreeBSD Foundation
3 * All rights reserved.
4 *
5 * This software was developed by Edward Tomasz Napierala under sponsorship
6 * from the FreeBSD Foundation.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * $FreeBSD: head/sys/kern/kern_rctl.c 225371 2011-09-04 05:04:34Z trasz $
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: head/sys/kern/kern_rctl.c 225371 2011-09-04 05:04:34Z trasz $");
34
35#include <sys/param.h>
36#include <sys/bus.h>
37#include <sys/malloc.h>
38#include <sys/queue.h>
39#include <sys/refcount.h>
40#include <sys/jail.h>
41#include <sys/kernel.h>
42#include <sys/limits.h>
43#include <sys/loginclass.h>
44#include <sys/priv.h>
45#include <sys/proc.h>
46#include <sys/racct.h>
47#include <sys/rctl.h>
48#include <sys/resourcevar.h>
49#include <sys/sx.h>
50#include <sys/sysent.h>
51#include <sys/sysproto.h>
52#include <sys/systm.h>
53#include <sys/types.h>
54#include <sys/eventhandler.h>
55#include <sys/lock.h>
56#include <sys/mutex.h>
57#include <sys/rwlock.h>
58#include <sys/sbuf.h>
59#include <sys/taskqueue.h>
60#include <sys/tree.h>
61#include <vm/uma.h>
62
63#ifdef RCTL
64#ifndef RACCT
65#error "The RCTL option requires the RACCT option"
66#endif
67
68FEATURE(rctl, "Resource Limits");
69
70#define	HRF_DEFAULT		0
71#define	HRF_DONT_INHERIT	1
72#define	HRF_DONT_ACCUMULATE	2
73
74/* Default buffer size for rctl_get_rules(2). */
75#define	RCTL_DEFAULT_BUFSIZE	4096
76#define	RCTL_LOG_BUFSIZE	128
77
78/*
79 * 'rctl_rule_link' connects a rule with every racct it's related to.
80 * For example, rule 'user:X:openfiles:deny=N/process' is linked
81 * with uidinfo for user X, and to each process of that user.
82 */
83struct rctl_rule_link {
84	LIST_ENTRY(rctl_rule_link)	rrl_next;
85	struct rctl_rule		*rrl_rule;
86	int				rrl_exceeded;
87};
88
89struct dict {
90	const char	*d_name;
91	int		d_value;
92};
93
94static struct dict subjectnames[] = {
95	{ "process", RCTL_SUBJECT_TYPE_PROCESS },
96	{ "user", RCTL_SUBJECT_TYPE_USER },
97	{ "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS },
98	{ "jail", RCTL_SUBJECT_TYPE_JAIL },
99	{ NULL, -1 }};
100
101static struct dict resourcenames[] = {
102	{ "cputime", RACCT_CPU },
103	{ "datasize", RACCT_DATA },
104	{ "stacksize", RACCT_STACK },
105	{ "coredumpsize", RACCT_CORE },
106	{ "memoryuse", RACCT_RSS },
107	{ "memorylocked", RACCT_MEMLOCK },
108	{ "maxproc", RACCT_NPROC },
109	{ "openfiles", RACCT_NOFILE },
110	{ "vmemoryuse", RACCT_VMEM },
111	{ "pseudoterminals", RACCT_NPTS },
112	{ "swapuse", RACCT_SWAP },
113	{ "nthr", RACCT_NTHR },
114	{ "msgqqueued", RACCT_MSGQQUEUED },
115	{ "msgqsize", RACCT_MSGQSIZE },
116	{ "nmsgq", RACCT_NMSGQ },
117	{ "nsem", RACCT_NSEM },
118	{ "nsemop", RACCT_NSEMOP },
119	{ "nshm", RACCT_NSHM },
120	{ "shmsize", RACCT_SHMSIZE },
121	{ "wallclock", RACCT_WALLCLOCK },
122	{ NULL, -1 }};
123
124static struct dict actionnames[] = {
125	{ "sighup", RCTL_ACTION_SIGHUP },
126	{ "sigint", RCTL_ACTION_SIGINT },
127	{ "sigquit", RCTL_ACTION_SIGQUIT },
128	{ "sigill", RCTL_ACTION_SIGILL },
129	{ "sigtrap", RCTL_ACTION_SIGTRAP },
130	{ "sigabrt", RCTL_ACTION_SIGABRT },
131	{ "sigemt", RCTL_ACTION_SIGEMT },
132	{ "sigfpe", RCTL_ACTION_SIGFPE },
133	{ "sigkill", RCTL_ACTION_SIGKILL },
134	{ "sigbus", RCTL_ACTION_SIGBUS },
135	{ "sigsegv", RCTL_ACTION_SIGSEGV },
136	{ "sigsys", RCTL_ACTION_SIGSYS },
137	{ "sigpipe", RCTL_ACTION_SIGPIPE },
138	{ "sigalrm", RCTL_ACTION_SIGALRM },
139	{ "sigterm", RCTL_ACTION_SIGTERM },
140	{ "sigurg", RCTL_ACTION_SIGURG },
141	{ "sigstop", RCTL_ACTION_SIGSTOP },
142	{ "sigtstp", RCTL_ACTION_SIGTSTP },
143	{ "sigchld", RCTL_ACTION_SIGCHLD },
144	{ "sigttin", RCTL_ACTION_SIGTTIN },
145	{ "sigttou", RCTL_ACTION_SIGTTOU },
146	{ "sigio", RCTL_ACTION_SIGIO },
147	{ "sigxcpu", RCTL_ACTION_SIGXCPU },
148	{ "sigxfsz", RCTL_ACTION_SIGXFSZ },
149	{ "sigvtalrm", RCTL_ACTION_SIGVTALRM },
150	{ "sigprof", RCTL_ACTION_SIGPROF },
151	{ "sigwinch", RCTL_ACTION_SIGWINCH },
152	{ "siginfo", RCTL_ACTION_SIGINFO },
153	{ "sigusr1", RCTL_ACTION_SIGUSR1 },
154	{ "sigusr2", RCTL_ACTION_SIGUSR2 },
155	{ "sigthr", RCTL_ACTION_SIGTHR },
156	{ "deny", RCTL_ACTION_DENY },
157	{ "log", RCTL_ACTION_LOG },
158	{ "devctl", RCTL_ACTION_DEVCTL },
159	{ NULL, -1 }};
160
161static void rctl_init(void);
162SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL);
163
164static uma_zone_t rctl_rule_link_zone;
165static uma_zone_t rctl_rule_zone;
166static struct rwlock rctl_lock;
167RW_SYSINIT(rctl_lock, &rctl_lock, "RCTL lock");
168
169static int rctl_rule_fully_specified(const struct rctl_rule *rule);
170static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule);
171
172MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits");
173
174static const char *
175rctl_subject_type_name(int subject)
176{
177	int i;
178
179	for (i = 0; subjectnames[i].d_name != NULL; i++) {
180		if (subjectnames[i].d_value == subject)
181			return (subjectnames[i].d_name);
182	}
183
184	panic("rctl_subject_type_name: unknown subject type %d", subject);
185}
186
187static const char *
188rctl_action_name(int action)
189{
190	int i;
191
192	for (i = 0; actionnames[i].d_name != NULL; i++) {
193		if (actionnames[i].d_value == action)
194			return (actionnames[i].d_name);
195	}
196
197	panic("rctl_action_name: unknown action %d", action);
198}
199
200const char *
201rctl_resource_name(int resource)
202{
203	int i;
204
205	for (i = 0; resourcenames[i].d_name != NULL; i++) {
206		if (resourcenames[i].d_value == resource)
207			return (resourcenames[i].d_name);
208	}
209
210	panic("rctl_resource_name: unknown resource %d", resource);
211}
212
213/*
214 * Return the amount of resource that can be allocated by 'p' before
215 * hitting 'rule'.
216 */
217static int64_t
218rctl_available_resource(const struct proc *p, const struct rctl_rule *rule)
219{
220	int resource;
221	int64_t available = INT64_MAX;
222	struct ucred *cred = p->p_ucred;
223
224	rw_assert(&rctl_lock, RA_LOCKED);
225
226	resource = rule->rr_resource;
227	switch (rule->rr_per) {
228	case RCTL_SUBJECT_TYPE_PROCESS:
229		available = rule->rr_amount -
230		    p->p_racct->r_resources[resource];
231		break;
232	case RCTL_SUBJECT_TYPE_USER:
233		available = rule->rr_amount -
234		    cred->cr_ruidinfo->ui_racct->r_resources[resource];
235		break;
236	case RCTL_SUBJECT_TYPE_LOGINCLASS:
237		available = rule->rr_amount -
238		    cred->cr_loginclass->lc_racct->r_resources[resource];
239		break;
240	case RCTL_SUBJECT_TYPE_JAIL:
241		available = rule->rr_amount -
242		    cred->cr_prison->pr_prison_racct->prr_racct->
243		        r_resources[resource];
244		break;
245	default:
246		panic("rctl_compute_available: unknown per %d",
247		    rule->rr_per);
248	}
249
250	return (available);
251}
252
253/*
254 * Return non-zero if allocating 'amount' by proc 'p' would exceed
255 * resource limit specified by 'rule'.
256 */
257static int
258rctl_would_exceed(const struct proc *p, const struct rctl_rule *rule,
259    int64_t amount)
260{
261	int64_t available;
262
263	rw_assert(&rctl_lock, RA_LOCKED);
264
265	available = rctl_available_resource(p, rule);
266	if (available >= amount)
267		return (0);
268
269	return (1);
270}
271
272/*
273 * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition
274 * to what it keeps allocated now.  Returns non-zero if the allocation should
275 * be denied, 0 otherwise.
276 */
277int
278rctl_enforce(struct proc *p, int resource, uint64_t amount)
279{
280	struct rctl_rule *rule;
281	struct rctl_rule_link *link;
282	struct sbuf sb;
283	int should_deny = 0;
284	char *buf;
285	static int curtime = 0;
286	static struct timeval lasttime;
287
288	rw_rlock(&rctl_lock);
289
290	/*
291	 * There may be more than one matching rule; go through all of them.
292	 * Denial should be done last, after logging and sending signals.
293	 */
294	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
295		rule = link->rrl_rule;
296		if (rule->rr_resource != resource)
297			continue;
298		if (!rctl_would_exceed(p, rule, amount)) {
299			link->rrl_exceeded = 0;
300			continue;
301		}
302
303		switch (rule->rr_action) {
304		case RCTL_ACTION_DENY:
305			should_deny = 1;
306			continue;
307		case RCTL_ACTION_LOG:
308			/*
309			 * If rrl_exceeded != 0, it means we've already
310			 * logged a warning for this process.
311			 */
312			if (link->rrl_exceeded != 0)
313				continue;
314
315			if (!ppsratecheck(&lasttime, &curtime, 10))
316				continue;
317
318			buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
319			if (buf == NULL) {
320				printf("rctl_enforce: out of memory\n");
321				continue;
322			}
323			sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
324			rctl_rule_to_sbuf(&sb, rule);
325			sbuf_finish(&sb);
326			printf("rctl: rule \"%s\" matched by pid %d "
327			    "(%s), uid %d, jail %s\n", sbuf_data(&sb),
328			    p->p_pid, p->p_comm, p->p_ucred->cr_uid,
329			    p->p_ucred->cr_prison->pr_prison_racct->prr_name);
330			sbuf_delete(&sb);
331			free(buf, M_RCTL);
332			link->rrl_exceeded = 1;
333			continue;
334		case RCTL_ACTION_DEVCTL:
335			if (link->rrl_exceeded != 0)
336				continue;
337
338			buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
339			if (buf == NULL) {
340				printf("rctl_enforce: out of memory\n");
341				continue;
342			}
343			sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
344			sbuf_printf(&sb, "rule=");
345			rctl_rule_to_sbuf(&sb, rule);
346			sbuf_printf(&sb, " pid=%d ruid=%d jail=%s",
347			    p->p_pid, p->p_ucred->cr_ruid,
348			    p->p_ucred->cr_prison->pr_prison_racct->prr_name);
349			sbuf_finish(&sb);
350			devctl_notify_f("RCTL", "rule", "matched",
351			    sbuf_data(&sb), M_NOWAIT);
352			sbuf_delete(&sb);
353			free(buf, M_RCTL);
354			link->rrl_exceeded = 1;
355			continue;
356		default:
357			if (link->rrl_exceeded != 0)
358				continue;
359
360			KASSERT(rule->rr_action > 0 &&
361			    rule->rr_action <= RCTL_ACTION_SIGNAL_MAX,
362			    ("rctl_enforce: unknown action %d",
363			     rule->rr_action));
364
365			/*
366			 * We're using the fact that RCTL_ACTION_SIG* values
367			 * are equal to their counterparts from sys/signal.h.
368			 */
369			psignal(p, rule->rr_action);
370			link->rrl_exceeded = 1;
371			continue;
372		}
373	}
374
375	rw_runlock(&rctl_lock);
376
377	if (should_deny) {
378		/*
379		 * Return fake error code; the caller should change it
380		 * into one proper for the situation - EFSIZ, ENOMEM etc.
381		 */
382		return (EDOOFUS);
383	}
384
385	return (0);
386}
387
388uint64_t
389rctl_get_limit(struct proc *p, int resource)
390{
391	struct rctl_rule *rule;
392	struct rctl_rule_link *link;
393	uint64_t amount = UINT64_MAX;
394
395	rw_rlock(&rctl_lock);
396
397	/*
398	 * There may be more than one matching rule; go through all of them.
399	 * Denial should be done last, after logging and sending signals.
400	 */
401	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
402		rule = link->rrl_rule;
403		if (rule->rr_resource != resource)
404			continue;
405		if (rule->rr_action != RCTL_ACTION_DENY)
406			continue;
407		if (rule->rr_amount < amount)
408			amount = rule->rr_amount;
409	}
410
411	rw_runlock(&rctl_lock);
412
413	return (amount);
414}
415
416uint64_t
417rctl_get_available(struct proc *p, int resource)
418{
419	struct rctl_rule *rule;
420	struct rctl_rule_link *link;
421	int64_t available, minavailable, allocated;
422
423	minavailable = INT64_MAX;
424
425	rw_rlock(&rctl_lock);
426
427	/*
428	 * There may be more than one matching rule; go through all of them.
429	 * Denial should be done last, after logging and sending signals.
430	 */
431	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
432		rule = link->rrl_rule;
433		if (rule->rr_resource != resource)
434			continue;
435		if (rule->rr_action != RCTL_ACTION_DENY)
436			continue;
437		available = rctl_available_resource(p, rule);
438		if (available < minavailable)
439			minavailable = available;
440	}
441
442	rw_runlock(&rctl_lock);
443
444	/*
445	 * XXX: Think about this _hard_.
446	 */
447	allocated = p->p_racct->r_resources[resource];
448	if (minavailable < INT64_MAX - allocated)
449		minavailable += allocated;
450	if (minavailable < 0)
451		minavailable = 0;
452	return (minavailable);
453}
454
455static int
456rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter)
457{
458
459	if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) {
460		if (rule->rr_subject_type != filter->rr_subject_type)
461			return (0);
462
463		switch (filter->rr_subject_type) {
464		case RCTL_SUBJECT_TYPE_PROCESS:
465			if (filter->rr_subject.rs_proc != NULL &&
466			    rule->rr_subject.rs_proc !=
467			    filter->rr_subject.rs_proc)
468				return (0);
469			break;
470		case RCTL_SUBJECT_TYPE_USER:
471			if (filter->rr_subject.rs_uip != NULL &&
472			    rule->rr_subject.rs_uip !=
473			    filter->rr_subject.rs_uip)
474				return (0);
475			break;
476		case RCTL_SUBJECT_TYPE_LOGINCLASS:
477			if (filter->rr_subject.rs_loginclass != NULL &&
478			    rule->rr_subject.rs_loginclass !=
479			    filter->rr_subject.rs_loginclass)
480				return (0);
481			break;
482		case RCTL_SUBJECT_TYPE_JAIL:
483			if (filter->rr_subject.rs_prison_racct != NULL &&
484			    rule->rr_subject.rs_prison_racct !=
485			    filter->rr_subject.rs_prison_racct)
486				return (0);
487			break;
488		default:
489			panic("rctl_rule_matches: unknown subject type %d",
490			    filter->rr_subject_type);
491		}
492	}
493
494	if (filter->rr_resource != RACCT_UNDEFINED) {
495		if (rule->rr_resource != filter->rr_resource)
496			return (0);
497	}
498
499	if (filter->rr_action != RCTL_ACTION_UNDEFINED) {
500		if (rule->rr_action != filter->rr_action)
501			return (0);
502	}
503
504	if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) {
505		if (rule->rr_amount != filter->rr_amount)
506			return (0);
507	}
508
509	if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) {
510		if (rule->rr_per != filter->rr_per)
511			return (0);
512	}
513
514	return (1);
515}
516
517static int
518str2value(const char *str, int *value, struct dict *table)
519{
520	int i;
521
522	if (value == NULL)
523		return (EINVAL);
524
525	for (i = 0; table[i].d_name != NULL; i++) {
526		if (strcasecmp(table[i].d_name, str) == 0) {
527			*value =  table[i].d_value;
528			return (0);
529		}
530	}
531
532	return (EINVAL);
533}
534
535static int
536str2id(const char *str, id_t *value)
537{
538	char *end;
539
540	if (str == NULL)
541		return (EINVAL);
542
543	*value = strtoul(str, &end, 10);
544	if ((size_t)(end - str) != strlen(str))
545		return (EINVAL);
546
547	return (0);
548}
549
550static int
551str2int64(const char *str, int64_t *value)
552{
553	char *end;
554
555	if (str == NULL)
556		return (EINVAL);
557
558	*value = strtoul(str, &end, 10);
559	if ((size_t)(end - str) != strlen(str))
560		return (EINVAL);
561
562	return (0);
563}
564
565/*
566 * Connect the rule to the racct, increasing refcount for the rule.
567 */
568static void
569rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule)
570{
571	struct rctl_rule_link *link;
572
573	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
574
575	rctl_rule_acquire(rule);
576	link = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
577	link->rrl_rule = rule;
578	link->rrl_exceeded = 0;
579
580	rw_wlock(&rctl_lock);
581	LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
582	rw_wunlock(&rctl_lock);
583}
584
585static int
586rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule)
587{
588	struct rctl_rule_link *link;
589
590	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
591	rw_assert(&rctl_lock, RA_WLOCKED);
592
593	link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT);
594	if (link == NULL)
595		return (ENOMEM);
596	rctl_rule_acquire(rule);
597	link->rrl_rule = rule;
598	link->rrl_exceeded = 0;
599
600	LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
601	return (0);
602}
603
604/*
605 * Remove limits for a rules matching the filter and release
606 * the refcounts for the rules, possibly freeing them.  Returns
607 * the number of limit structures removed.
608 */
609static int
610rctl_racct_remove_rules(struct racct *racct,
611    const struct rctl_rule *filter)
612{
613	int removed = 0;
614	struct rctl_rule_link *link, *linktmp;
615
616	rw_assert(&rctl_lock, RA_WLOCKED);
617
618	LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) {
619		if (!rctl_rule_matches(link->rrl_rule, filter))
620			continue;
621
622		LIST_REMOVE(link, rrl_next);
623		rctl_rule_release(link->rrl_rule);
624		uma_zfree(rctl_rule_link_zone, link);
625		removed++;
626	}
627	return (removed);
628}
629
630static void
631rctl_rule_acquire_subject(struct rctl_rule *rule)
632{
633
634	switch (rule->rr_subject_type) {
635	case RCTL_SUBJECT_TYPE_UNDEFINED:
636	case RCTL_SUBJECT_TYPE_PROCESS:
637		break;
638	case RCTL_SUBJECT_TYPE_JAIL:
639		if (rule->rr_subject.rs_prison_racct != NULL)
640			prison_racct_hold(rule->rr_subject.rs_prison_racct);
641		break;
642	case RCTL_SUBJECT_TYPE_USER:
643		if (rule->rr_subject.rs_uip != NULL)
644			uihold(rule->rr_subject.rs_uip);
645		break;
646	case RCTL_SUBJECT_TYPE_LOGINCLASS:
647		if (rule->rr_subject.rs_loginclass != NULL)
648			loginclass_hold(rule->rr_subject.rs_loginclass);
649		break;
650	default:
651		panic("rctl_rule_acquire_subject: unknown subject type %d",
652		    rule->rr_subject_type);
653	}
654}
655
656static void
657rctl_rule_release_subject(struct rctl_rule *rule)
658{
659
660	switch (rule->rr_subject_type) {
661	case RCTL_SUBJECT_TYPE_UNDEFINED:
662	case RCTL_SUBJECT_TYPE_PROCESS:
663		break;
664	case RCTL_SUBJECT_TYPE_JAIL:
665		if (rule->rr_subject.rs_prison_racct != NULL)
666			prison_racct_free(rule->rr_subject.rs_prison_racct);
667		break;
668	case RCTL_SUBJECT_TYPE_USER:
669		if (rule->rr_subject.rs_uip != NULL)
670			uifree(rule->rr_subject.rs_uip);
671		break;
672	case RCTL_SUBJECT_TYPE_LOGINCLASS:
673		if (rule->rr_subject.rs_loginclass != NULL)
674			loginclass_free(rule->rr_subject.rs_loginclass);
675		break;
676	default:
677		panic("rctl_rule_release_subject: unknown subject type %d",
678		    rule->rr_subject_type);
679	}
680}
681
682struct rctl_rule *
683rctl_rule_alloc(int flags)
684{
685	struct rctl_rule *rule;
686
687	rule = uma_zalloc(rctl_rule_zone, flags);
688	if (rule == NULL)
689		return (NULL);
690	rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
691	rule->rr_subject.rs_proc = NULL;
692	rule->rr_subject.rs_uip = NULL;
693	rule->rr_subject.rs_loginclass = NULL;
694	rule->rr_subject.rs_prison_racct = NULL;
695	rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
696	rule->rr_resource = RACCT_UNDEFINED;
697	rule->rr_action = RCTL_ACTION_UNDEFINED;
698	rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
699	refcount_init(&rule->rr_refcount, 1);
700
701	return (rule);
702}
703
704struct rctl_rule *
705rctl_rule_duplicate(const struct rctl_rule *rule, int flags)
706{
707	struct rctl_rule *copy;
708
709	copy = uma_zalloc(rctl_rule_zone, flags);
710	if (copy == NULL)
711		return (NULL);
712	copy->rr_subject_type = rule->rr_subject_type;
713	copy->rr_subject.rs_proc = rule->rr_subject.rs_proc;
714	copy->rr_subject.rs_uip = rule->rr_subject.rs_uip;
715	copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass;
716	copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct;
717	copy->rr_per = rule->rr_per;
718	copy->rr_resource = rule->rr_resource;
719	copy->rr_action = rule->rr_action;
720	copy->rr_amount = rule->rr_amount;
721	refcount_init(&copy->rr_refcount, 1);
722	rctl_rule_acquire_subject(copy);
723
724	return (copy);
725}
726
727void
728rctl_rule_acquire(struct rctl_rule *rule)
729{
730
731	KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
732
733	refcount_acquire(&rule->rr_refcount);
734}
735
736static void
737rctl_rule_free(void *context, int pending)
738{
739	struct rctl_rule *rule;
740
741	rule = (struct rctl_rule *)context;
742
743	KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0"));
744
745	/*
746	 * We don't need locking here; rule is guaranteed to be inaccessible.
747	 */
748
749	rctl_rule_release_subject(rule);
750	uma_zfree(rctl_rule_zone, rule);
751}
752
753void
754rctl_rule_release(struct rctl_rule *rule)
755{
756
757	KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
758
759	if (refcount_release(&rule->rr_refcount)) {
760		/*
761		 * rctl_rule_release() is often called when iterating
762		 * over all the uidinfo structures in the system,
763		 * holding uihashtbl_lock.  Since rctl_rule_free()
764		 * might end up calling uifree(), this would lead
765		 * to lock recursion.  Use taskqueue to avoid this.
766		 */
767		TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule);
768		taskqueue_enqueue(taskqueue_thread, &rule->rr_task);
769	}
770}
771
772static int
773rctl_rule_fully_specified(const struct rctl_rule *rule)
774{
775
776	switch (rule->rr_subject_type) {
777	case RCTL_SUBJECT_TYPE_UNDEFINED:
778		return (0);
779	case RCTL_SUBJECT_TYPE_PROCESS:
780		if (rule->rr_subject.rs_proc == NULL)
781			return (0);
782		break;
783	case RCTL_SUBJECT_TYPE_USER:
784		if (rule->rr_subject.rs_uip == NULL)
785			return (0);
786		break;
787	case RCTL_SUBJECT_TYPE_LOGINCLASS:
788		if (rule->rr_subject.rs_loginclass == NULL)
789			return (0);
790		break;
791	case RCTL_SUBJECT_TYPE_JAIL:
792		if (rule->rr_subject.rs_prison_racct == NULL)
793			return (0);
794		break;
795	default:
796		panic("rctl_rule_fully_specified: unknown subject type %d",
797		    rule->rr_subject_type);
798	}
799	if (rule->rr_resource == RACCT_UNDEFINED)
800		return (0);
801	if (rule->rr_action == RCTL_ACTION_UNDEFINED)
802		return (0);
803	if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED)
804		return (0);
805	if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED)
806		return (0);
807
808	return (1);
809}
810
811static int
812rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep)
813{
814	int error = 0;
815	char *subjectstr, *subject_idstr, *resourcestr, *actionstr,
816	     *amountstr, *perstr;
817	struct rctl_rule *rule;
818	id_t id;
819
820	rule = rctl_rule_alloc(M_WAITOK);
821
822	subjectstr = strsep(&rulestr, ":");
823	subject_idstr = strsep(&rulestr, ":");
824	resourcestr = strsep(&rulestr, ":");
825	actionstr = strsep(&rulestr, "=/");
826	amountstr = strsep(&rulestr, "/");
827	perstr = rulestr;
828
829	if (subjectstr == NULL || subjectstr[0] == '\0')
830		rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
831	else {
832		error = str2value(subjectstr, &rule->rr_subject_type, subjectnames);
833		if (error != 0)
834			goto out;
835	}
836
837	if (subject_idstr == NULL || subject_idstr[0] == '\0') {
838		rule->rr_subject.rs_proc = NULL;
839		rule->rr_subject.rs_uip = NULL;
840		rule->rr_subject.rs_loginclass = NULL;
841		rule->rr_subject.rs_prison_racct = NULL;
842	} else {
843		switch (rule->rr_subject_type) {
844		case RCTL_SUBJECT_TYPE_UNDEFINED:
845			error = EINVAL;
846			goto out;
847		case RCTL_SUBJECT_TYPE_PROCESS:
848			error = str2id(subject_idstr, &id);
849			if (error != 0)
850				goto out;
851			sx_assert(&allproc_lock, SA_LOCKED);
852			rule->rr_subject.rs_proc = pfind(id);
853			if (rule->rr_subject.rs_proc == NULL) {
854				error = ESRCH;
855				goto out;
856			}
857			PROC_UNLOCK(rule->rr_subject.rs_proc);
858			break;
859		case RCTL_SUBJECT_TYPE_USER:
860			error = str2id(subject_idstr, &id);
861			if (error != 0)
862				goto out;
863			rule->rr_subject.rs_uip = uifind(id);
864			break;
865		case RCTL_SUBJECT_TYPE_LOGINCLASS:
866			rule->rr_subject.rs_loginclass =
867			    loginclass_find(subject_idstr);
868			if (rule->rr_subject.rs_loginclass == NULL) {
869				error = ENAMETOOLONG;
870				goto out;
871			}
872			break;
873		case RCTL_SUBJECT_TYPE_JAIL:
874			rule->rr_subject.rs_prison_racct =
875			    prison_racct_find(subject_idstr);
876			if (rule->rr_subject.rs_prison_racct == NULL) {
877				error = ENAMETOOLONG;
878				goto out;
879			}
880			break;
881               default:
882                       panic("rctl_string_to_rule: unknown subject type %d",
883                           rule->rr_subject_type);
884               }
885	}
886
887	if (resourcestr == NULL || resourcestr[0] == '\0')
888		rule->rr_resource = RACCT_UNDEFINED;
889	else {
890		error = str2value(resourcestr, &rule->rr_resource,
891		    resourcenames);
892		if (error != 0)
893			goto out;
894	}
895
896	if (actionstr == NULL || actionstr[0] == '\0')
897		rule->rr_action = RCTL_ACTION_UNDEFINED;
898	else {
899		error = str2value(actionstr, &rule->rr_action, actionnames);
900		if (error != 0)
901			goto out;
902	}
903
904	if (amountstr == NULL || amountstr[0] == '\0')
905		rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
906	else {
907		error = str2int64(amountstr, &rule->rr_amount);
908		if (error != 0)
909			goto out;
910		if (RACCT_IS_IN_MILLIONS(rule->rr_resource))
911			rule->rr_amount *= 1000000;
912	}
913
914	if (perstr == NULL || perstr[0] == '\0')
915		rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
916	else {
917		error = str2value(perstr, &rule->rr_per, subjectnames);
918		if (error != 0)
919			goto out;
920	}
921
922out:
923	if (error == 0)
924		*rulep = rule;
925	else
926		rctl_rule_release(rule);
927
928	return (error);
929}
930
931/*
932 * Link a rule with all the subjects it applies to.
933 */
934int
935rctl_rule_add(struct rctl_rule *rule)
936{
937	struct proc *p;
938	struct ucred *cred;
939	struct uidinfo *uip;
940	struct prison *pr;
941	struct prison_racct *prr;
942	struct loginclass *lc;
943	struct rctl_rule *rule2;
944	int match;
945
946	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
947
948	/*
949	 * Some rules just don't make sense.  Note that the one below
950	 * cannot be rewritten using RACCT_IS_DENIABLE(); the RACCT_PCTCPU,
951	 * for example, is not deniable in the racct sense, but the
952	 * limit is enforced in a different way, so "deny" rules for %CPU
953	 * do make sense.
954	 */
955	if (rule->rr_action == RCTL_ACTION_DENY &&
956	    (rule->rr_resource == RACCT_CPU ||
957	    rule->rr_resource == RACCT_WALLCLOCK))
958		return (EOPNOTSUPP);
959
960	if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS &&
961	    RACCT_IS_SLOPPY(rule->rr_resource))
962		return (EOPNOTSUPP);
963
964	/*
965	 * Make sure there are no duplicated rules.  Also, for the "deny"
966	 * rules, remove ones differing only by "amount".
967	 */
968	if (rule->rr_action == RCTL_ACTION_DENY) {
969		rule2 = rctl_rule_duplicate(rule, M_WAITOK);
970		rule2->rr_amount = RCTL_AMOUNT_UNDEFINED;
971		rctl_rule_remove(rule2);
972		rctl_rule_release(rule2);
973	} else
974		rctl_rule_remove(rule);
975
976	switch (rule->rr_subject_type) {
977	case RCTL_SUBJECT_TYPE_PROCESS:
978		p = rule->rr_subject.rs_proc;
979		KASSERT(p != NULL, ("rctl_rule_add: NULL proc"));
980		/*
981		 * No resource limits for system processes.
982		 */
983		if (p->p_flag & P_SYSTEM)
984			return (EPERM);
985
986		rctl_racct_add_rule(p->p_racct, rule);
987		/*
988		 * In case of per-process rule, we don't have anything more
989		 * to do.
990		 */
991		return (0);
992
993	case RCTL_SUBJECT_TYPE_USER:
994		uip = rule->rr_subject.rs_uip;
995		KASSERT(uip != NULL, ("rctl_rule_add: NULL uip"));
996		rctl_racct_add_rule(uip->ui_racct, rule);
997		break;
998
999	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1000		lc = rule->rr_subject.rs_loginclass;
1001		KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass"));
1002		rctl_racct_add_rule(lc->lc_racct, rule);
1003		break;
1004
1005	case RCTL_SUBJECT_TYPE_JAIL:
1006		prr = rule->rr_subject.rs_prison_racct;
1007		KASSERT(prr != NULL, ("rctl_rule_add: NULL pr"));
1008		rctl_racct_add_rule(prr->prr_racct, rule);
1009		break;
1010
1011	default:
1012		panic("rctl_rule_add: unknown subject type %d",
1013		    rule->rr_subject_type);
1014	}
1015
1016	/*
1017	 * Now go through all the processes and add the new rule to the ones
1018	 * it applies to.
1019	 */
1020	sx_assert(&allproc_lock, SA_LOCKED);
1021	FOREACH_PROC_IN_SYSTEM(p) {
1022		if (p->p_flag & P_SYSTEM)
1023			continue;
1024		cred = p->p_ucred;
1025		switch (rule->rr_subject_type) {
1026		case RCTL_SUBJECT_TYPE_USER:
1027			if (cred->cr_uidinfo == rule->rr_subject.rs_uip ||
1028			    cred->cr_ruidinfo == rule->rr_subject.rs_uip)
1029				break;
1030			continue;
1031		case RCTL_SUBJECT_TYPE_LOGINCLASS:
1032			if (cred->cr_loginclass == rule->rr_subject.rs_loginclass)
1033				break;
1034			continue;
1035		case RCTL_SUBJECT_TYPE_JAIL:
1036			match = 0;
1037			for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) {
1038				if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) {
1039					match = 1;
1040					break;
1041				}
1042			}
1043			if (match)
1044				break;
1045			continue;
1046		default:
1047			panic("rctl_rule_add: unknown subject type %d",
1048			    rule->rr_subject_type);
1049		}
1050
1051		rctl_racct_add_rule(p->p_racct, rule);
1052	}
1053
1054	return (0);
1055}
1056
1057static void
1058rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3)
1059{
1060	struct rctl_rule *filter = (struct rctl_rule *)arg2;
1061	int found = 0;
1062
1063	rw_wlock(&rctl_lock);
1064	found += rctl_racct_remove_rules(racct, filter);
1065	rw_wunlock(&rctl_lock);
1066
1067	*((int *)arg3) += found;
1068}
1069
1070/*
1071 * Remove all rules that match the filter.
1072 */
1073int
1074rctl_rule_remove(struct rctl_rule *filter)
1075{
1076	int found = 0;
1077	struct proc *p;
1078
1079	if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS &&
1080	    filter->rr_subject.rs_proc != NULL) {
1081		p = filter->rr_subject.rs_proc;
1082		rw_wlock(&rctl_lock);
1083		found = rctl_racct_remove_rules(p->p_racct, filter);
1084		rw_wunlock(&rctl_lock);
1085		if (found)
1086			return (0);
1087		return (ESRCH);
1088	}
1089
1090	loginclass_racct_foreach(rctl_rule_remove_callback, filter,
1091	    (void *)&found);
1092	ui_racct_foreach(rctl_rule_remove_callback, filter,
1093	    (void *)&found);
1094	prison_racct_foreach(rctl_rule_remove_callback, filter,
1095	    (void *)&found);
1096
1097	sx_assert(&allproc_lock, SA_LOCKED);
1098	rw_wlock(&rctl_lock);
1099	FOREACH_PROC_IN_SYSTEM(p) {
1100		found += rctl_racct_remove_rules(p->p_racct, filter);
1101	}
1102	rw_wunlock(&rctl_lock);
1103
1104	if (found)
1105		return (0);
1106	return (ESRCH);
1107}
1108
1109/*
1110 * Appends a rule to the sbuf.
1111 */
1112static void
1113rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule)
1114{
1115	int64_t amount;
1116
1117	sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type));
1118
1119	switch (rule->rr_subject_type) {
1120	case RCTL_SUBJECT_TYPE_PROCESS:
1121		if (rule->rr_subject.rs_proc == NULL)
1122			sbuf_printf(sb, ":");
1123		else
1124			sbuf_printf(sb, "%d:",
1125			    rule->rr_subject.rs_proc->p_pid);
1126		break;
1127	case RCTL_SUBJECT_TYPE_USER:
1128		if (rule->rr_subject.rs_uip == NULL)
1129			sbuf_printf(sb, ":");
1130		else
1131			sbuf_printf(sb, "%d:",
1132			    rule->rr_subject.rs_uip->ui_uid);
1133		break;
1134	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1135		if (rule->rr_subject.rs_loginclass == NULL)
1136			sbuf_printf(sb, ":");
1137		else
1138			sbuf_printf(sb, "%s:",
1139			    rule->rr_subject.rs_loginclass->lc_name);
1140		break;
1141	case RCTL_SUBJECT_TYPE_JAIL:
1142		if (rule->rr_subject.rs_prison_racct == NULL)
1143			sbuf_printf(sb, ":");
1144		else
1145			sbuf_printf(sb, "%s:",
1146			    rule->rr_subject.rs_prison_racct->prr_name);
1147		break;
1148	default:
1149		panic("rctl_rule_to_sbuf: unknown subject type %d",
1150		    rule->rr_subject_type);
1151	}
1152
1153	amount = rule->rr_amount;
1154	if (amount != RCTL_AMOUNT_UNDEFINED &&
1155	    RACCT_IS_IN_MILLIONS(rule->rr_resource))
1156		amount /= 1000000;
1157
1158	sbuf_printf(sb, "%s:%s=%jd",
1159	    rctl_resource_name(rule->rr_resource),
1160	    rctl_action_name(rule->rr_action),
1161	    amount);
1162
1163	if (rule->rr_per != rule->rr_subject_type)
1164		sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per));
1165}
1166
1167/*
1168 * Routine used by RCTL syscalls to read in input string.
1169 */
1170static int
1171rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen)
1172{
1173	int error;
1174	char *str;
1175
1176	if (inbuflen <= 0)
1177		return (EINVAL);
1178
1179	str = malloc(inbuflen + 1, M_RCTL, M_WAITOK);
1180	error = copyinstr(inbufp, str, inbuflen, NULL);
1181	if (error != 0) {
1182		free(str, M_RCTL);
1183		return (error);
1184	}
1185
1186	*inputstr = str;
1187
1188	return (0);
1189}
1190
1191/*
1192 * Routine used by RCTL syscalls to write out output string.
1193 */
1194static int
1195rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen)
1196{
1197	int error;
1198
1199	if (outputsbuf == NULL)
1200		return (0);
1201
1202	sbuf_finish(outputsbuf);
1203	if (outbuflen < sbuf_len(outputsbuf) + 1) {
1204		sbuf_delete(outputsbuf);
1205		return (ERANGE);
1206	}
1207	error = copyout(sbuf_data(outputsbuf), outbufp,
1208	    sbuf_len(outputsbuf) + 1);
1209	sbuf_delete(outputsbuf);
1210	return (error);
1211}
1212
1213static struct sbuf *
1214rctl_racct_to_sbuf(struct racct *racct, int sloppy)
1215{
1216	int i;
1217	int64_t amount;
1218	struct sbuf *sb;
1219
1220	sb = sbuf_new_auto();
1221	for (i = 0; i <= RACCT_MAX; i++) {
1222		if (sloppy == 0 && RACCT_IS_SLOPPY(i))
1223			continue;
1224		amount = racct->r_resources[i];
1225		if (RACCT_IS_IN_MILLIONS(i))
1226			amount /= 1000000;
1227		sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount);
1228	}
1229	sbuf_setpos(sb, sbuf_len(sb) - 1);
1230	return (sb);
1231}
1232
1233int
1234rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1235{
1236	int error;
1237	char *inputstr;
1238	struct rctl_rule *filter;
1239	struct sbuf *outputsbuf = NULL;
1240	struct proc *p;
1241	struct uidinfo *uip;
1242	struct loginclass *lc;
1243	struct prison_racct *prr;
1244
1245	error = priv_check(td, PRIV_RCTL_GET_RACCT);
1246	if (error != 0)
1247		return (error);
1248
1249	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1250	if (error != 0)
1251		return (error);
1252
1253	sx_slock(&allproc_lock);
1254	error = rctl_string_to_rule(inputstr, &filter);
1255	free(inputstr, M_RCTL);
1256	if (error != 0) {
1257		sx_sunlock(&allproc_lock);
1258		return (error);
1259	}
1260
1261	switch (filter->rr_subject_type) {
1262	case RCTL_SUBJECT_TYPE_PROCESS:
1263		p = filter->rr_subject.rs_proc;
1264		if (p == NULL) {
1265			error = EINVAL;
1266			goto out;
1267		}
1268		if (p->p_flag & P_SYSTEM) {
1269			error = EINVAL;
1270			goto out;
1271		}
1272		outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0);
1273		break;
1274	case RCTL_SUBJECT_TYPE_USER:
1275		uip = filter->rr_subject.rs_uip;
1276		if (uip == NULL) {
1277			error = EINVAL;
1278			goto out;
1279		}
1280		outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1);
1281		break;
1282	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1283		lc = filter->rr_subject.rs_loginclass;
1284		if (lc == NULL) {
1285			error = EINVAL;
1286			goto out;
1287		}
1288		outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1);
1289		break;
1290	case RCTL_SUBJECT_TYPE_JAIL:
1291		prr = filter->rr_subject.rs_prison_racct;
1292		if (prr == NULL) {
1293			error = EINVAL;
1294			goto out;
1295		}
1296		outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1);
1297		break;
1298	default:
1299		error = EINVAL;
1300	}
1301out:
1302	rctl_rule_release(filter);
1303	sx_sunlock(&allproc_lock);
1304	if (error != 0)
1305		return (error);
1306
1307	error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen);
1308
1309	return (error);
1310}
1311
1312static void
1313rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3)
1314{
1315	struct rctl_rule *filter = (struct rctl_rule *)arg2;
1316	struct rctl_rule_link *link;
1317	struct sbuf *sb = (struct sbuf *)arg3;
1318
1319	rw_rlock(&rctl_lock);
1320	LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
1321		if (!rctl_rule_matches(link->rrl_rule, filter))
1322			continue;
1323		rctl_rule_to_sbuf(sb, link->rrl_rule);
1324		sbuf_printf(sb, ",");
1325	}
1326	rw_runlock(&rctl_lock);
1327}
1328
1329int
1330rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1331{
1332	int error;
1333	size_t bufsize = RCTL_DEFAULT_BUFSIZE;
1334	char *inputstr, *buf;
1335	struct sbuf *sb;
1336	struct rctl_rule *filter;
1337	struct rctl_rule_link *link;
1338	struct proc *p;
1339
1340	error = priv_check(td, PRIV_RCTL_GET_RULES);
1341	if (error != 0)
1342		return (error);
1343
1344	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1345	if (error != 0)
1346		return (error);
1347
1348	sx_slock(&allproc_lock);
1349	error = rctl_string_to_rule(inputstr, &filter);
1350	free(inputstr, M_RCTL);
1351	if (error != 0) {
1352		sx_sunlock(&allproc_lock);
1353		return (error);
1354	}
1355
1356again:
1357	buf = malloc(bufsize, M_RCTL, M_WAITOK);
1358	sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1359	KASSERT(sb != NULL, ("sbuf_new failed"));
1360
1361	sx_assert(&allproc_lock, SA_LOCKED);
1362	FOREACH_PROC_IN_SYSTEM(p) {
1363		rw_rlock(&rctl_lock);
1364		LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1365			/*
1366			 * Non-process rules will be added to the buffer later.
1367			 * Adding them here would result in duplicated output.
1368			 */
1369			if (link->rrl_rule->rr_subject_type !=
1370			    RCTL_SUBJECT_TYPE_PROCESS)
1371				continue;
1372			if (!rctl_rule_matches(link->rrl_rule, filter))
1373				continue;
1374			rctl_rule_to_sbuf(sb, link->rrl_rule);
1375			sbuf_printf(sb, ",");
1376		}
1377		rw_runlock(&rctl_lock);
1378	}
1379
1380	loginclass_racct_foreach(rctl_get_rules_callback, filter, sb);
1381	ui_racct_foreach(rctl_get_rules_callback, filter, sb);
1382	prison_racct_foreach(rctl_get_rules_callback, filter, sb);
1383	if (sbuf_error(sb) == ENOMEM) {
1384		sbuf_delete(sb);
1385		free(buf, M_RCTL);
1386		bufsize *= 4;
1387		goto again;
1388	}
1389
1390	/*
1391	 * Remove trailing ",".
1392	 */
1393	if (sbuf_len(sb) > 0)
1394		sbuf_setpos(sb, sbuf_len(sb) - 1);
1395
1396	error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1397
1398	rctl_rule_release(filter);
1399	sx_sunlock(&allproc_lock);
1400	free(buf, M_RCTL);
1401	return (error);
1402}
1403
1404int
1405rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1406{
1407	int error;
1408	size_t bufsize = RCTL_DEFAULT_BUFSIZE;
1409	char *inputstr, *buf;
1410	struct sbuf *sb;
1411	struct rctl_rule *filter;
1412	struct rctl_rule_link *link;
1413
1414	error = priv_check(td, PRIV_RCTL_GET_LIMITS);
1415	if (error != 0)
1416		return (error);
1417
1418	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1419	if (error != 0)
1420		return (error);
1421
1422	sx_slock(&allproc_lock);
1423	error = rctl_string_to_rule(inputstr, &filter);
1424	free(inputstr, M_RCTL);
1425	if (error != 0) {
1426		sx_sunlock(&allproc_lock);
1427		return (error);
1428	}
1429
1430	if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) {
1431		rctl_rule_release(filter);
1432		sx_sunlock(&allproc_lock);
1433		return (EINVAL);
1434	}
1435	if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) {
1436		rctl_rule_release(filter);
1437		sx_sunlock(&allproc_lock);
1438		return (EOPNOTSUPP);
1439	}
1440	if (filter->rr_subject.rs_proc == NULL) {
1441		rctl_rule_release(filter);
1442		sx_sunlock(&allproc_lock);
1443		return (EINVAL);
1444	}
1445
1446again:
1447	buf = malloc(bufsize, M_RCTL, M_WAITOK);
1448	sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1449	KASSERT(sb != NULL, ("sbuf_new failed"));
1450
1451	rw_rlock(&rctl_lock);
1452	LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links,
1453	    rrl_next) {
1454		rctl_rule_to_sbuf(sb, link->rrl_rule);
1455		sbuf_printf(sb, ",");
1456	}
1457	rw_runlock(&rctl_lock);
1458	if (sbuf_error(sb) == ENOMEM) {
1459		sbuf_delete(sb);
1460		free(buf, M_RCTL);
1461		bufsize *= 4;
1462		goto again;
1463	}
1464
1465	/*
1466	 * Remove trailing ",".
1467	 */
1468	if (sbuf_len(sb) > 0)
1469		sbuf_setpos(sb, sbuf_len(sb) - 1);
1470
1471	error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1472	rctl_rule_release(filter);
1473	sx_sunlock(&allproc_lock);
1474	free(buf, M_RCTL);
1475	return (error);
1476}
1477
1478int
1479rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1480{
1481	int error;
1482	struct rctl_rule *rule;
1483	char *inputstr;
1484
1485	error = priv_check(td, PRIV_RCTL_ADD_RULE);
1486	if (error != 0)
1487		return (error);
1488
1489	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1490	if (error != 0)
1491		return (error);
1492
1493	sx_slock(&allproc_lock);
1494	error = rctl_string_to_rule(inputstr, &rule);
1495	free(inputstr, M_RCTL);
1496	if (error != 0) {
1497		sx_sunlock(&allproc_lock);
1498		return (error);
1499	}
1500	/*
1501	 * The 'per' part of a rule is optional.
1502	 */
1503	if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED &&
1504	    rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED)
1505		rule->rr_per = rule->rr_subject_type;
1506
1507	if (!rctl_rule_fully_specified(rule)) {
1508		error = EINVAL;
1509		goto out;
1510	}
1511
1512	error = rctl_rule_add(rule);
1513
1514out:
1515	rctl_rule_release(rule);
1516	sx_sunlock(&allproc_lock);
1517	return (error);
1518}
1519
1520int
1521rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1522{
1523	int error;
1524	struct rctl_rule *filter;
1525	char *inputstr;
1526
1527	error = priv_check(td, PRIV_RCTL_REMOVE_RULE);
1528	if (error != 0)
1529		return (error);
1530
1531	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1532	if (error != 0)
1533		return (error);
1534
1535	sx_slock(&allproc_lock);
1536	error = rctl_string_to_rule(inputstr, &filter);
1537	free(inputstr, M_RCTL);
1538	if (error != 0) {
1539		sx_sunlock(&allproc_lock);
1540		return (error);
1541	}
1542
1543	error = rctl_rule_remove(filter);
1544	rctl_rule_release(filter);
1545	sx_sunlock(&allproc_lock);
1546
1547	return (error);
1548}
1549
1550/*
1551 * Update RCTL rule list after credential change.
1552 */
1553void
1554rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred)
1555{
1556	int rulecnt, i;
1557	struct rctl_rule_link *link, *newlink;
1558	struct uidinfo *newuip;
1559	struct loginclass *newlc;
1560	struct prison_racct *newprr;
1561	LIST_HEAD(, rctl_rule_link) newrules;
1562
1563	newuip = newcred->cr_ruidinfo;
1564	newlc = newcred->cr_loginclass;
1565	newprr = newcred->cr_prison->pr_prison_racct;
1566
1567	LIST_INIT(&newrules);
1568
1569again:
1570	/*
1571	 * First, count the rules that apply to the process with new
1572	 * credentials.
1573	 */
1574	rulecnt = 0;
1575	rw_rlock(&rctl_lock);
1576	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1577		if (link->rrl_rule->rr_subject_type ==
1578		    RCTL_SUBJECT_TYPE_PROCESS)
1579			rulecnt++;
1580	}
1581	LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next)
1582		rulecnt++;
1583	LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next)
1584		rulecnt++;
1585	LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next)
1586		rulecnt++;
1587	rw_runlock(&rctl_lock);
1588
1589	/*
1590	 * Create temporary list.  We've dropped the rctl_lock in order
1591	 * to use M_WAITOK.
1592	 */
1593	for (i = 0; i < rulecnt; i++) {
1594		newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
1595		newlink->rrl_rule = NULL;
1596		LIST_INSERT_HEAD(&newrules, newlink, rrl_next);
1597	}
1598
1599	newlink = LIST_FIRST(&newrules);
1600
1601	/*
1602	 * Assign rules to the newly allocated list entries.
1603	 */
1604	rw_wlock(&rctl_lock);
1605	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1606		if (link->rrl_rule->rr_subject_type ==
1607		    RCTL_SUBJECT_TYPE_PROCESS) {
1608			if (newlink == NULL)
1609				goto goaround;
1610			rctl_rule_acquire(link->rrl_rule);
1611			newlink->rrl_rule = link->rrl_rule;
1612			newlink = LIST_NEXT(newlink, rrl_next);
1613			rulecnt--;
1614		}
1615	}
1616
1617	LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) {
1618		if (newlink == NULL)
1619			goto goaround;
1620		rctl_rule_acquire(link->rrl_rule);
1621		newlink->rrl_rule = link->rrl_rule;
1622		newlink = LIST_NEXT(newlink, rrl_next);
1623		rulecnt--;
1624	}
1625
1626	LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) {
1627		if (newlink == NULL)
1628			goto goaround;
1629		rctl_rule_acquire(link->rrl_rule);
1630		newlink->rrl_rule = link->rrl_rule;
1631		newlink = LIST_NEXT(newlink, rrl_next);
1632		rulecnt--;
1633	}
1634
1635	LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) {
1636		if (newlink == NULL)
1637			goto goaround;
1638		rctl_rule_acquire(link->rrl_rule);
1639		newlink->rrl_rule = link->rrl_rule;
1640		newlink = LIST_NEXT(newlink, rrl_next);
1641		rulecnt--;
1642	}
1643
1644	if (rulecnt == 0) {
1645		/*
1646		 * Free the old rule list.
1647		 */
1648		while (!LIST_EMPTY(&p->p_racct->r_rule_links)) {
1649			link = LIST_FIRST(&p->p_racct->r_rule_links);
1650			LIST_REMOVE(link, rrl_next);
1651			rctl_rule_release(link->rrl_rule);
1652			uma_zfree(rctl_rule_link_zone, link);
1653		}
1654
1655		/*
1656		 * Replace lists and we're done.
1657		 *
1658		 * XXX: Is there any way to switch list heads instead
1659		 *      of iterating here?
1660		 */
1661		while (!LIST_EMPTY(&newrules)) {
1662			newlink = LIST_FIRST(&newrules);
1663			LIST_REMOVE(newlink, rrl_next);
1664			LIST_INSERT_HEAD(&p->p_racct->r_rule_links,
1665			    newlink, rrl_next);
1666		}
1667
1668		rw_wunlock(&rctl_lock);
1669
1670		return;
1671	}
1672
1673goaround:
1674	rw_wunlock(&rctl_lock);
1675
1676	/*
1677	 * Rule list changed while we were not holding the rctl_lock.
1678	 * Free the new list and try again.
1679	 */
1680	while (!LIST_EMPTY(&newrules)) {
1681		newlink = LIST_FIRST(&newrules);
1682		LIST_REMOVE(newlink, rrl_next);
1683		if (newlink->rrl_rule != NULL)
1684			rctl_rule_release(newlink->rrl_rule);
1685		uma_zfree(rctl_rule_link_zone, newlink);
1686	}
1687
1688	goto again;
1689}
1690
1691/*
1692 * Assign RCTL rules to the newly created process.
1693 */
1694int
1695rctl_proc_fork(struct proc *parent, struct proc *child)
1696{
1697	int error;
1698	struct rctl_rule_link *link;
1699	struct rctl_rule *rule;
1700
1701	LIST_INIT(&child->p_racct->r_rule_links);
1702
1703	/*
1704	 * No limits for kernel processes.
1705	 */
1706	if (child->p_flag & P_SYSTEM)
1707		return (0);
1708
1709	/*
1710	 * Nothing to inherit from P_SYSTEM parents.
1711	 */
1712	if (parent->p_racct == NULL) {
1713		KASSERT(parent->p_flag & P_SYSTEM,
1714		    ("non-system process without racct; p = %p", parent));
1715		return (0);
1716	}
1717
1718	rw_wlock(&rctl_lock);
1719
1720	/*
1721	 * Go through limits applicable to the parent and assign them
1722	 * to the child.  Rules with 'process' subject have to be duplicated
1723	 * in order to make their rr_subject point to the new process.
1724	 */
1725	LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) {
1726		if (link->rrl_rule->rr_subject_type ==
1727		    RCTL_SUBJECT_TYPE_PROCESS) {
1728			rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT);
1729			if (rule == NULL)
1730				goto fail;
1731			KASSERT(rule->rr_subject.rs_proc == parent,
1732			    ("rule->rr_subject.rs_proc != parent"));
1733			rule->rr_subject.rs_proc = child;
1734			error = rctl_racct_add_rule_locked(child->p_racct,
1735			    rule);
1736			rctl_rule_release(rule);
1737			if (error != 0)
1738				goto fail;
1739		} else {
1740			error = rctl_racct_add_rule_locked(child->p_racct,
1741			    link->rrl_rule);
1742			if (error != 0)
1743				goto fail;
1744		}
1745	}
1746
1747	rw_wunlock(&rctl_lock);
1748	return (0);
1749
1750fail:
1751	while (!LIST_EMPTY(&child->p_racct->r_rule_links)) {
1752		link = LIST_FIRST(&child->p_racct->r_rule_links);
1753		LIST_REMOVE(link, rrl_next);
1754		rctl_rule_release(link->rrl_rule);
1755		uma_zfree(rctl_rule_link_zone, link);
1756	}
1757	rw_wunlock(&rctl_lock);
1758	return (EAGAIN);
1759}
1760
1761/*
1762 * Release rules attached to the racct.
1763 */
1764void
1765rctl_racct_release(struct racct *racct)
1766{
1767	struct rctl_rule_link *link;
1768
1769	rw_wlock(&rctl_lock);
1770	while (!LIST_EMPTY(&racct->r_rule_links)) {
1771		link = LIST_FIRST(&racct->r_rule_links);
1772		LIST_REMOVE(link, rrl_next);
1773		rctl_rule_release(link->rrl_rule);
1774		uma_zfree(rctl_rule_link_zone, link);
1775	}
1776	rw_wunlock(&rctl_lock);
1777}
1778
1779static void
1780rctl_init(void)
1781{
1782
1783	rctl_rule_link_zone = uma_zcreate("rctl_rule_link",
1784	    sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL,
1785	    UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1786	rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule),
1787	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1788}
1789
1790#else /* !RCTL */
1791
1792int
1793rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1794{
1795
1796	return (ENOSYS);
1797}
1798
1799int
1800rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1801{
1802
1803	return (ENOSYS);
1804}
1805
1806int
1807rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1808{
1809
1810	return (ENOSYS);
1811}
1812
1813int
1814rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1815{
1816
1817	return (ENOSYS);
1818}
1819
1820int
1821rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1822{
1823
1824	return (ENOSYS);
1825}
1826
1827#endif /* !RCTL */
1828