kern_rctl.c revision 220527
1/*-
2 * Copyright (c) 2010 The FreeBSD Foundation
3 * All rights reserved.
4 *
5 * This software was developed by Edward Tomasz Napierala under sponsorship
6 * from the FreeBSD Foundation.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * $FreeBSD: head/sys/kern/kern_rctl.c 220527 2011-04-10 18:35:43Z trasz $
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: head/sys/kern/kern_rctl.c 220527 2011-04-10 18:35:43Z trasz $");
34
35#include <sys/param.h>
36#include <sys/bus.h>
37#include <sys/malloc.h>
38#include <sys/queue.h>
39#include <sys/refcount.h>
40#include <sys/jail.h>
41#include <sys/kernel.h>
42#include <sys/limits.h>
43#include <sys/loginclass.h>
44#include <sys/priv.h>
45#include <sys/proc.h>
46#include <sys/racct.h>
47#include <sys/rctl.h>
48#include <sys/resourcevar.h>
49#include <sys/sx.h>
50#include <sys/sysent.h>
51#include <sys/sysproto.h>
52#include <sys/systm.h>
53#include <sys/types.h>
54#include <sys/eventhandler.h>
55#include <sys/lock.h>
56#include <sys/mutex.h>
57#include <sys/rwlock.h>
58#include <sys/sbuf.h>
59#include <sys/taskqueue.h>
60#include <sys/tree.h>
61#include <vm/uma.h>
62
63#ifdef RCTL
64#ifndef RACCT
65#error "The RCTL option requires the RACCT option"
66#endif
67
68FEATURE(rctl, "Resource Limits");
69
70#define	HRF_DEFAULT		0
71#define	HRF_DONT_INHERIT	1
72#define	HRF_DONT_ACCUMULATE	2
73
74/* Default buffer size for rctl_get_rules(2). */
75#define	RCTL_DEFAULT_BUFSIZE	4096
76#define	RCTL_LOG_BUFSIZE	128
77
78/*
79 * 'rctl_rule_link' connects a rule with every racct it's related to.
80 * For example, rule 'user:X:openfiles:deny=N/process' is linked
81 * with uidinfo for user X, and to each process of that user.
82 */
83struct rctl_rule_link {
84	LIST_ENTRY(rctl_rule_link)	rrl_next;
85	struct rctl_rule		*rrl_rule;
86	int				rrl_exceeded;
87};
88
89struct dict {
90	const char	*d_name;
91	int		d_value;
92};
93
94static struct dict subjectnames[] = {
95	{ "process", RCTL_SUBJECT_TYPE_PROCESS },
96	{ "user", RCTL_SUBJECT_TYPE_USER },
97	{ "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS },
98	{ "jail", RCTL_SUBJECT_TYPE_JAIL },
99	{ NULL, -1 }};
100
101static struct dict resourcenames[] = {
102	{ "cpu", RACCT_CPU },
103	{ "fsize", RACCT_FSIZE },
104	{ "data", RACCT_DATA },
105	{ "stack", RACCT_STACK },
106	{ "core", RACCT_CORE },
107	{ "rss", RACCT_RSS },
108	{ "memlock", RACCT_MEMLOCK },
109	{ "nproc", RACCT_NPROC },
110	{ "nofile", RACCT_NOFILE },
111	{ "sbsize", RACCT_SBSIZE },
112	{ "vmem", RACCT_VMEM },
113	{ "npts", RACCT_NPTS },
114	{ "swap", RACCT_SWAP },
115	{ "nthr", RACCT_NTHR },
116	{ "msgqqueued", RACCT_MSGQQUEUED },
117	{ "msgqsize", RACCT_MSGQSIZE },
118	{ "nmsgq", RACCT_NMSGQ },
119	{ "nsem", RACCT_NSEM },
120	{ "nsemop", RACCT_NSEMOP },
121	{ "nshm", RACCT_NSHM },
122	{ "shmsize", RACCT_SHMSIZE },
123	{ "wallclock", RACCT_WALLCLOCK },
124	{ NULL, -1 }};
125
126static struct dict actionnames[] = {
127	{ "sighup", RCTL_ACTION_SIGHUP },
128	{ "sigint", RCTL_ACTION_SIGINT },
129	{ "sigquit", RCTL_ACTION_SIGQUIT },
130	{ "sigill", RCTL_ACTION_SIGILL },
131	{ "sigtrap", RCTL_ACTION_SIGTRAP },
132	{ "sigabrt", RCTL_ACTION_SIGABRT },
133	{ "sigemt", RCTL_ACTION_SIGEMT },
134	{ "sigfpe", RCTL_ACTION_SIGFPE },
135	{ "sigkill", RCTL_ACTION_SIGKILL },
136	{ "sigbus", RCTL_ACTION_SIGBUS },
137	{ "sigsegv", RCTL_ACTION_SIGSEGV },
138	{ "sigsys", RCTL_ACTION_SIGSYS },
139	{ "sigpipe", RCTL_ACTION_SIGPIPE },
140	{ "sigalrm", RCTL_ACTION_SIGALRM },
141	{ "sigterm", RCTL_ACTION_SIGTERM },
142	{ "sigurg", RCTL_ACTION_SIGURG },
143	{ "sigstop", RCTL_ACTION_SIGSTOP },
144	{ "sigtstp", RCTL_ACTION_SIGTSTP },
145	{ "sigchld", RCTL_ACTION_SIGCHLD },
146	{ "sigttin", RCTL_ACTION_SIGTTIN },
147	{ "sigttou", RCTL_ACTION_SIGTTOU },
148	{ "sigio", RCTL_ACTION_SIGIO },
149	{ "sigxcpu", RCTL_ACTION_SIGXCPU },
150	{ "sigxfsz", RCTL_ACTION_SIGXFSZ },
151	{ "sigvtalrm", RCTL_ACTION_SIGVTALRM },
152	{ "sigprof", RCTL_ACTION_SIGPROF },
153	{ "sigwinch", RCTL_ACTION_SIGWINCH },
154	{ "siginfo", RCTL_ACTION_SIGINFO },
155	{ "sigusr1", RCTL_ACTION_SIGUSR1 },
156	{ "sigusr2", RCTL_ACTION_SIGUSR2 },
157	{ "sigthr", RCTL_ACTION_SIGTHR },
158	{ "deny", RCTL_ACTION_DENY },
159	{ "log", RCTL_ACTION_LOG },
160	{ "devctl", RCTL_ACTION_DEVCTL },
161	{ NULL, -1 }};
162
163static void rctl_init(void);
164SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL);
165
166static uma_zone_t rctl_rule_link_zone;
167static uma_zone_t rctl_rule_zone;
168static struct rwlock rctl_lock;
169RW_SYSINIT(rctl_lock, &rctl_lock, "RCTL lock");
170
171static int rctl_rule_fully_specified(const struct rctl_rule *rule);
172static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule);
173
174MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits");
175
176static const char *
177rctl_subject_type_name(int subject)
178{
179	int i;
180
181	for (i = 0; subjectnames[i].d_name != NULL; i++) {
182		if (subjectnames[i].d_value == subject)
183			return (subjectnames[i].d_name);
184	}
185
186	panic("rctl_subject_type_name: unknown subject type %d", subject);
187}
188
189static const char *
190rctl_action_name(int action)
191{
192	int i;
193
194	for (i = 0; actionnames[i].d_name != NULL; i++) {
195		if (actionnames[i].d_value == action)
196			return (actionnames[i].d_name);
197	}
198
199	panic("rctl_action_name: unknown action %d", action);
200}
201
202const char *
203rctl_resource_name(int resource)
204{
205	int i;
206
207	for (i = 0; resourcenames[i].d_name != NULL; i++) {
208		if (resourcenames[i].d_value == resource)
209			return (resourcenames[i].d_name);
210	}
211
212	panic("rctl_resource_name: unknown resource %d", resource);
213}
214
215/*
216 * Return the amount of resource that can be allocated by 'p' before
217 * hitting 'rule'.
218 */
219static int64_t
220rctl_available_resource(const struct proc *p, const struct rctl_rule *rule)
221{
222	int resource;
223	int64_t available = INT64_MAX;
224	struct ucred *cred = p->p_ucred;
225
226	rw_assert(&rctl_lock, RA_LOCKED);
227
228	resource = rule->rr_resource;
229	switch (rule->rr_per) {
230	case RCTL_SUBJECT_TYPE_PROCESS:
231		available = rule->rr_amount -
232		    p->p_racct->r_resources[resource];
233		break;
234	case RCTL_SUBJECT_TYPE_USER:
235		available = rule->rr_amount -
236		    cred->cr_ruidinfo->ui_racct->r_resources[resource];
237		break;
238	case RCTL_SUBJECT_TYPE_LOGINCLASS:
239		available = rule->rr_amount -
240		    cred->cr_loginclass->lc_racct->r_resources[resource];
241		break;
242	case RCTL_SUBJECT_TYPE_JAIL:
243		available = rule->rr_amount -
244		    cred->cr_prison->pr_racct->r_resources[resource];
245		break;
246	default:
247		panic("rctl_compute_available: unknown per %d",
248		    rule->rr_per);
249	}
250
251	return (available);
252}
253
254/*
255 * Return non-zero if allocating 'amount' by proc 'p' would exceed
256 * resource limit specified by 'rule'.
257 */
258static int
259rctl_would_exceed(const struct proc *p, const struct rctl_rule *rule,
260    int64_t amount)
261{
262	int64_t available;
263
264	rw_assert(&rctl_lock, RA_LOCKED);
265
266	available = rctl_available_resource(p, rule);
267	if (available >= amount)
268		return (0);
269
270	return (1);
271}
272
273/*
274 * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition
275 * to what it keeps allocated now.  Returns non-zero if the allocation should
276 * be denied, 0 otherwise.
277 */
278int
279rctl_enforce(struct proc *p, int resource, uint64_t amount)
280{
281	struct rctl_rule *rule;
282	struct rctl_rule_link *link;
283	struct sbuf sb;
284	int should_deny = 0;
285	char *buf;
286	static int curtime = 0;
287	static struct timeval lasttime;
288
289	rw_rlock(&rctl_lock);
290
291	/*
292	 * There may be more than one matching rule; go through all of them.
293	 * Denial should be done last, after logging and sending signals.
294	 */
295	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
296		rule = link->rrl_rule;
297		if (rule->rr_resource != resource)
298			continue;
299		if (!rctl_would_exceed(p, rule, amount)) {
300			link->rrl_exceeded = 0;
301			continue;
302		}
303
304		switch (rule->rr_action) {
305		case RCTL_ACTION_DENY:
306			should_deny = 1;
307			continue;
308		case RCTL_ACTION_LOG:
309			/*
310			 * If rrl_exceeded != 0, it means we've already
311			 * logged a warning for this process.
312			 */
313			if (link->rrl_exceeded != 0)
314				continue;
315
316			if (!ppsratecheck(&lasttime, &curtime, 10))
317				continue;
318
319			buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
320			if (buf == NULL) {
321				printf("rctl_enforce: out of memory\n");
322				continue;
323			}
324			sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
325			rctl_rule_to_sbuf(&sb, rule);
326			sbuf_finish(&sb);
327			printf("rctl: rule \"%s\" matched by pid %d "
328			    "(%s), uid %d, jail %s\n", sbuf_data(&sb),
329			    p->p_pid, p->p_comm, p->p_ucred->cr_uid,
330			    p->p_ucred->cr_prison->pr_name);
331			sbuf_delete(&sb);
332			free(buf, M_RCTL);
333			link->rrl_exceeded = 1;
334			continue;
335		case RCTL_ACTION_DEVCTL:
336			if (link->rrl_exceeded != 0)
337				continue;
338
339			buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
340			if (buf == NULL) {
341				printf("rctl_enforce: out of memory\n");
342				continue;
343			}
344			sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
345			sbuf_printf(&sb, "rule=");
346			rctl_rule_to_sbuf(&sb, rule);
347			sbuf_printf(&sb, " pid=%d ruid=%d jail=%s",
348			    p->p_pid, p->p_ucred->cr_ruid,
349			    p->p_ucred->cr_prison->pr_name);
350			sbuf_finish(&sb);
351			devctl_notify_f("RCTL", "rule", "matched",
352			    sbuf_data(&sb), M_NOWAIT);
353			sbuf_delete(&sb);
354			free(buf, M_RCTL);
355			link->rrl_exceeded = 1;
356			continue;
357		default:
358			if (link->rrl_exceeded != 0)
359				continue;
360
361			KASSERT(rule->rr_action > 0 &&
362			    rule->rr_action <= RCTL_ACTION_SIGNAL_MAX,
363			    ("rctl_enforce: unknown action %d",
364			     rule->rr_action));
365
366			/*
367			 * We're using the fact that RCTL_ACTION_SIG* values
368			 * are equal to their counterparts from sys/signal.h.
369			 */
370			psignal(p, rule->rr_action);
371			link->rrl_exceeded = 1;
372			continue;
373		}
374	}
375
376	rw_runlock(&rctl_lock);
377
378	if (should_deny) {
379		/*
380		 * Return fake error code; the caller should change it
381		 * into one proper for the situation - EFSIZ, ENOMEM etc.
382		 */
383		return (EDOOFUS);
384	}
385
386	return (0);
387}
388
389uint64_t
390rctl_get_limit(struct proc *p, int resource)
391{
392	struct rctl_rule *rule;
393	struct rctl_rule_link *link;
394	uint64_t amount = UINT64_MAX;
395
396	rw_rlock(&rctl_lock);
397
398	/*
399	 * There may be more than one matching rule; go through all of them.
400	 * Denial should be done last, after logging and sending signals.
401	 */
402	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
403		rule = link->rrl_rule;
404		if (rule->rr_resource != resource)
405			continue;
406		if (rule->rr_action != RCTL_ACTION_DENY)
407			continue;
408		if (rule->rr_amount < amount)
409			amount = rule->rr_amount;
410	}
411
412	rw_runlock(&rctl_lock);
413
414	return (amount);
415}
416
417uint64_t
418rctl_get_available(struct proc *p, int resource)
419{
420	struct rctl_rule *rule;
421	struct rctl_rule_link *link;
422	int64_t available, minavailable, allocated;
423
424	minavailable = INT64_MAX;
425
426	rw_rlock(&rctl_lock);
427
428	/*
429	 * There may be more than one matching rule; go through all of them.
430	 * Denial should be done last, after logging and sending signals.
431	 */
432	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
433		rule = link->rrl_rule;
434		if (rule->rr_resource != resource)
435			continue;
436		if (rule->rr_action != RCTL_ACTION_DENY)
437			continue;
438		available = rctl_available_resource(p, rule);
439		if (available < minavailable)
440			minavailable = available;
441	}
442
443	rw_runlock(&rctl_lock);
444
445	/*
446	 * XXX: Think about this _hard_.
447	 */
448	allocated = p->p_racct->r_resources[resource];
449	if (minavailable < INT64_MAX - allocated)
450		minavailable += allocated;
451	if (minavailable < 0)
452		minavailable = 0;
453	return (minavailable);
454}
455
456static int
457rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter)
458{
459
460	if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) {
461		if (rule->rr_subject_type != filter->rr_subject_type)
462			return (0);
463
464		switch (filter->rr_subject_type) {
465		case RCTL_SUBJECT_TYPE_PROCESS:
466			if (filter->rr_subject.rs_proc != NULL &&
467			    rule->rr_subject.rs_proc !=
468			    filter->rr_subject.rs_proc)
469				return (0);
470			break;
471		case RCTL_SUBJECT_TYPE_USER:
472			if (filter->rr_subject.rs_uip != NULL &&
473			    rule->rr_subject.rs_uip !=
474			    filter->rr_subject.rs_uip)
475				return (0);
476			break;
477		case RCTL_SUBJECT_TYPE_LOGINCLASS:
478			if (filter->rr_subject.rs_loginclass != NULL &&
479			    rule->rr_subject.rs_loginclass !=
480			    filter->rr_subject.rs_loginclass)
481				return (0);
482			break;
483		case RCTL_SUBJECT_TYPE_JAIL:
484			if (filter->rr_subject.rs_prison != NULL &&
485			    rule->rr_subject.rs_prison !=
486			    filter->rr_subject.rs_prison)
487				return (0);
488			break;
489		default:
490			panic("rctl_rule_matches: unknown subject type %d",
491			    filter->rr_subject_type);
492		}
493	}
494
495	if (filter->rr_resource != RACCT_UNDEFINED) {
496		if (rule->rr_resource != filter->rr_resource)
497			return (0);
498	}
499
500	if (filter->rr_action != RCTL_ACTION_UNDEFINED) {
501		if (rule->rr_action != filter->rr_action)
502			return (0);
503	}
504
505	if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) {
506		if (rule->rr_amount != filter->rr_amount)
507			return (0);
508	}
509
510	if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) {
511		if (rule->rr_per != filter->rr_per)
512			return (0);
513	}
514
515	return (1);
516}
517
518static int
519str2value(const char *str, int *value, struct dict *table)
520{
521	int i;
522
523	if (value == NULL)
524		return (EINVAL);
525
526	for (i = 0; table[i].d_name != NULL; i++) {
527		if (strcasecmp(table[i].d_name, str) == 0) {
528			*value =  table[i].d_value;
529			return (0);
530		}
531	}
532
533	return (EINVAL);
534}
535
536static int
537str2id(const char *str, id_t *value)
538{
539	char *end;
540
541	if (str == NULL)
542		return (EINVAL);
543
544	*value = strtoul(str, &end, 10);
545	if ((size_t)(end - str) != strlen(str))
546		return (EINVAL);
547
548	return (0);
549}
550
551static int
552str2int64(const char *str, int64_t *value)
553{
554	char *end;
555
556	if (str == NULL)
557		return (EINVAL);
558
559	*value = strtoul(str, &end, 10);
560	if ((size_t)(end - str) != strlen(str))
561		return (EINVAL);
562
563	return (0);
564}
565
566/*
567 * Connect the rule to the racct, increasing refcount for the rule.
568 */
569static void
570rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule)
571{
572	struct rctl_rule_link *link;
573
574	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
575
576	rctl_rule_acquire(rule);
577	link = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
578	link->rrl_rule = rule;
579	link->rrl_exceeded = 0;
580
581	rw_wlock(&rctl_lock);
582	LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
583	rw_wunlock(&rctl_lock);
584}
585
586static int
587rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule)
588{
589	struct rctl_rule_link *link;
590
591	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
592	rw_assert(&rctl_lock, RA_WLOCKED);
593
594	link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT);
595	if (link == NULL)
596		return (ENOMEM);
597	rctl_rule_acquire(rule);
598	link->rrl_rule = rule;
599	link->rrl_exceeded = 0;
600
601	LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
602	return (0);
603}
604
605/*
606 * Remove limits for a rules matching the filter and release
607 * the refcounts for the rules, possibly freeing them.  Returns
608 * the number of limit structures removed.
609 */
610static int
611rctl_racct_remove_rules(struct racct *racct,
612    const struct rctl_rule *filter)
613{
614	int removed = 0;
615	struct rctl_rule_link *link, *linktmp;
616
617	rw_assert(&rctl_lock, RA_WLOCKED);
618
619	LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) {
620		if (!rctl_rule_matches(link->rrl_rule, filter))
621			continue;
622
623		LIST_REMOVE(link, rrl_next);
624		rctl_rule_release(link->rrl_rule);
625		uma_zfree(rctl_rule_link_zone, link);
626		removed++;
627	}
628	return (removed);
629}
630
631static void
632rctl_rule_acquire_subject(struct rctl_rule *rule)
633{
634
635	switch (rule->rr_subject_type) {
636	case RCTL_SUBJECT_TYPE_UNDEFINED:
637	case RCTL_SUBJECT_TYPE_PROCESS:
638	case RCTL_SUBJECT_TYPE_JAIL:
639		break;
640	case RCTL_SUBJECT_TYPE_USER:
641		if (rule->rr_subject.rs_uip != NULL)
642			uihold(rule->rr_subject.rs_uip);
643		break;
644	case RCTL_SUBJECT_TYPE_LOGINCLASS:
645		if (rule->rr_subject.rs_loginclass != NULL)
646			loginclass_hold(rule->rr_subject.rs_loginclass);
647		break;
648	default:
649		panic("rctl_rule_acquire_subject: unknown subject type %d",
650		    rule->rr_subject_type);
651	}
652}
653
654static void
655rctl_rule_release_subject(struct rctl_rule *rule)
656{
657
658	switch (rule->rr_subject_type) {
659	case RCTL_SUBJECT_TYPE_UNDEFINED:
660	case RCTL_SUBJECT_TYPE_PROCESS:
661	case RCTL_SUBJECT_TYPE_JAIL:
662		break;
663	case RCTL_SUBJECT_TYPE_USER:
664		if (rule->rr_subject.rs_uip != NULL)
665			uifree(rule->rr_subject.rs_uip);
666		break;
667	case RCTL_SUBJECT_TYPE_LOGINCLASS:
668		if (rule->rr_subject.rs_loginclass != NULL)
669			loginclass_free(rule->rr_subject.rs_loginclass);
670		break;
671	default:
672		panic("rctl_rule_release_subject: unknown subject type %d",
673		    rule->rr_subject_type);
674	}
675}
676
677struct rctl_rule *
678rctl_rule_alloc(int flags)
679{
680	struct rctl_rule *rule;
681
682	rule = uma_zalloc(rctl_rule_zone, flags);
683	if (rule == NULL)
684		return (NULL);
685	rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
686	rule->rr_subject.rs_proc = NULL;
687	rule->rr_subject.rs_uip = NULL;
688	rule->rr_subject.rs_loginclass = NULL;
689	rule->rr_subject.rs_prison = NULL;
690	rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
691	rule->rr_resource = RACCT_UNDEFINED;
692	rule->rr_action = RCTL_ACTION_UNDEFINED;
693	rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
694	refcount_init(&rule->rr_refcount, 1);
695
696	return (rule);
697}
698
699struct rctl_rule *
700rctl_rule_duplicate(const struct rctl_rule *rule, int flags)
701{
702	struct rctl_rule *copy;
703
704	copy = uma_zalloc(rctl_rule_zone, flags);
705	if (copy == NULL)
706		return (NULL);
707	copy->rr_subject_type = rule->rr_subject_type;
708	copy->rr_subject.rs_proc = rule->rr_subject.rs_proc;
709	copy->rr_subject.rs_uip = rule->rr_subject.rs_uip;
710	copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass;
711	copy->rr_subject.rs_prison = rule->rr_subject.rs_prison;
712	copy->rr_per = rule->rr_per;
713	copy->rr_resource = rule->rr_resource;
714	copy->rr_action = rule->rr_action;
715	copy->rr_amount = rule->rr_amount;
716	refcount_init(&copy->rr_refcount, 1);
717	rctl_rule_acquire_subject(copy);
718
719	return (copy);
720}
721
722void
723rctl_rule_acquire(struct rctl_rule *rule)
724{
725
726	KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
727
728	refcount_acquire(&rule->rr_refcount);
729}
730
731static void
732rctl_rule_free(void *context, int pending)
733{
734	struct rctl_rule *rule;
735
736	rule = (struct rctl_rule *)context;
737
738	KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0"));
739
740	/*
741	 * We don't need locking here; rule is guaranteed to be inaccessible.
742	 */
743
744	rctl_rule_release_subject(rule);
745	uma_zfree(rctl_rule_zone, rule);
746}
747
748void
749rctl_rule_release(struct rctl_rule *rule)
750{
751
752	KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
753
754	if (refcount_release(&rule->rr_refcount)) {
755		/*
756		 * rctl_rule_release() is often called when iterating
757		 * over all the uidinfo structures in the system,
758		 * holding uihashtbl_lock.  Since rctl_rule_free()
759		 * might end up calling uifree(), this would lead
760		 * to lock recursion.  Use taskqueue to avoid this.
761		 */
762		TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule);
763		taskqueue_enqueue(taskqueue_thread, &rule->rr_task);
764	}
765}
766
767static int
768rctl_rule_fully_specified(const struct rctl_rule *rule)
769{
770
771	switch (rule->rr_subject_type) {
772	case RCTL_SUBJECT_TYPE_UNDEFINED:
773		return (0);
774	case RCTL_SUBJECT_TYPE_PROCESS:
775		if (rule->rr_subject.rs_proc == NULL)
776			return (0);
777		break;
778	case RCTL_SUBJECT_TYPE_USER:
779		if (rule->rr_subject.rs_uip == NULL)
780			return (0);
781		break;
782	case RCTL_SUBJECT_TYPE_LOGINCLASS:
783		if (rule->rr_subject.rs_loginclass == NULL)
784			return (0);
785		break;
786	case RCTL_SUBJECT_TYPE_JAIL:
787		if (rule->rr_subject.rs_prison == NULL)
788			return (0);
789		break;
790	default:
791		panic("rctl_rule_fully_specified: unknown subject type %d",
792		    rule->rr_subject_type);
793	}
794	if (rule->rr_resource == RACCT_UNDEFINED)
795		return (0);
796	if (rule->rr_action == RCTL_ACTION_UNDEFINED)
797		return (0);
798	if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED)
799		return (0);
800	if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED)
801		return (0);
802
803	return (1);
804}
805
806static int
807rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep)
808{
809	int error = 0;
810	char *subjectstr, *subject_idstr, *resourcestr, *actionstr,
811	     *amountstr, *perstr;
812	struct rctl_rule *rule;
813	id_t id;
814
815	rule = rctl_rule_alloc(M_WAITOK);
816
817	subjectstr = strsep(&rulestr, ":");
818	subject_idstr = strsep(&rulestr, ":");
819	resourcestr = strsep(&rulestr, ":");
820	actionstr = strsep(&rulestr, "=/");
821	amountstr = strsep(&rulestr, "/");
822	perstr = rulestr;
823
824	if (subjectstr == NULL || subjectstr[0] == '\0')
825		rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
826	else {
827		error = str2value(subjectstr, &rule->rr_subject_type, subjectnames);
828		if (error != 0)
829			goto out;
830	}
831
832	if (subject_idstr == NULL || subject_idstr[0] == '\0') {
833		rule->rr_subject.rs_proc = NULL;
834		rule->rr_subject.rs_uip = NULL;
835		rule->rr_subject.rs_loginclass = NULL;
836		rule->rr_subject.rs_prison = NULL;
837	} else {
838		switch (rule->rr_subject_type) {
839		case RCTL_SUBJECT_TYPE_UNDEFINED:
840			error = EINVAL;
841			goto out;
842		case RCTL_SUBJECT_TYPE_PROCESS:
843			error = str2id(subject_idstr, &id);
844			if (error != 0)
845				goto out;
846			sx_assert(&allproc_lock, SA_LOCKED);
847			rule->rr_subject.rs_proc = pfind(id);
848			if (rule->rr_subject.rs_proc == NULL) {
849				error = ESRCH;
850				goto out;
851			}
852			PROC_UNLOCK(rule->rr_subject.rs_proc);
853			break;
854		case RCTL_SUBJECT_TYPE_USER:
855			error = str2id(subject_idstr, &id);
856			if (error != 0)
857				goto out;
858			rule->rr_subject.rs_uip = uifind(id);
859			break;
860		case RCTL_SUBJECT_TYPE_LOGINCLASS:
861			rule->rr_subject.rs_loginclass =
862			    loginclass_find(subject_idstr);
863			if (rule->rr_subject.rs_loginclass == NULL) {
864				error = ENAMETOOLONG;
865				goto out;
866			}
867			break;
868		case RCTL_SUBJECT_TYPE_JAIL:
869			rule->rr_subject.rs_prison =
870			    prison_find_name(&prison0, subject_idstr);
871			if (rule->rr_subject.rs_prison == NULL) {
872				/*
873				 * No jail with that name; try with the JID.
874				 */
875				error = str2id(subject_idstr, &id);
876				if (error != 0)
877					goto out;
878				rule->rr_subject.rs_prison = prison_find(id);
879				if (rule->rr_subject.rs_prison == NULL) {
880					error = ESRCH;
881					goto out;
882				}
883			}
884			/* prison_find() returns with mutex held. */
885			mtx_unlock(&rule->rr_subject.rs_prison->pr_mtx);
886			break;
887               default:
888                       panic("rctl_string_to_rule: unknown subject type %d",
889                           rule->rr_subject_type);
890               }
891	}
892
893	if (resourcestr == NULL || resourcestr[0] == '\0')
894		rule->rr_resource = RACCT_UNDEFINED;
895	else {
896		error = str2value(resourcestr, &rule->rr_resource,
897		    resourcenames);
898		if (error != 0)
899			goto out;
900	}
901
902	if (actionstr == NULL || actionstr[0] == '\0')
903		rule->rr_action = RCTL_ACTION_UNDEFINED;
904	else {
905		error = str2value(actionstr, &rule->rr_action, actionnames);
906		if (error != 0)
907			goto out;
908	}
909
910	if (amountstr == NULL || amountstr[0] == '\0')
911		rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
912	else {
913		error = str2int64(amountstr, &rule->rr_amount);
914		if (error != 0)
915			goto out;
916		if (racct_is_in_thousands(rule->rr_resource))
917			rule->rr_amount *= 1000;
918	}
919
920	if (perstr == NULL || perstr[0] == '\0')
921		rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
922	else {
923		error = str2value(perstr, &rule->rr_per, subjectnames);
924		if (error != 0)
925			goto out;
926	}
927
928out:
929	if (error == 0)
930		*rulep = rule;
931	else
932		rctl_rule_release(rule);
933
934	return (error);
935}
936
937/*
938 * Link a rule with all the subjects it applies to.
939 */
940int
941rctl_rule_add(struct rctl_rule *rule)
942{
943	struct proc *p;
944	struct ucred *cred;
945	struct uidinfo *uip;
946	struct prison *pr;
947	struct loginclass *lc;
948	struct rctl_rule *rule2;
949	int match;
950
951	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
952
953	/*
954	 * Some rules just don't make sense.  Note that the one below
955	 * cannot be rewritten using racct_is_deniable(); the RACCT_PCTCPU,
956	 * for example, is not deniable in the racct sense, but the
957	 * limit is enforced in a different way, so "deny" rules for %CPU
958	 * do make sense.
959	 */
960	if (rule->rr_action == RCTL_ACTION_DENY &&
961	    (rule->rr_resource == RACCT_CPU ||
962	    rule->rr_resource == RACCT_WALLCLOCK))
963		return (EOPNOTSUPP);
964
965	if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS &&
966	    racct_is_sloppy(rule->rr_resource))
967		return (EOPNOTSUPP);
968
969	/*
970	 * Make sure there are no duplicated rules.  Also, for the "deny"
971	 * rules, remove ones differing only by "amount".
972	 */
973	if (rule->rr_action == RCTL_ACTION_DENY) {
974		rule2 = rctl_rule_duplicate(rule, M_WAITOK);
975		rule2->rr_amount = RCTL_AMOUNT_UNDEFINED;
976		rctl_rule_remove(rule2);
977		rctl_rule_release(rule2);
978	} else
979		rctl_rule_remove(rule);
980
981	switch (rule->rr_subject_type) {
982	case RCTL_SUBJECT_TYPE_PROCESS:
983		p = rule->rr_subject.rs_proc;
984		KASSERT(p != NULL, ("rctl_rule_add: NULL proc"));
985		/*
986		 * No resource limits for system processes.
987		 */
988		if (p->p_flag & P_SYSTEM)
989			return (EPERM);
990
991		rctl_racct_add_rule(p->p_racct, rule);
992		/*
993		 * In case of per-process rule, we don't have anything more
994		 * to do.
995		 */
996		return (0);
997
998	case RCTL_SUBJECT_TYPE_USER:
999		uip = rule->rr_subject.rs_uip;
1000		KASSERT(uip != NULL, ("rctl_rule_add: NULL uip"));
1001		rctl_racct_add_rule(uip->ui_racct, rule);
1002		break;
1003
1004	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1005		lc = rule->rr_subject.rs_loginclass;
1006		KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass"));
1007		rctl_racct_add_rule(lc->lc_racct, rule);
1008		break;
1009
1010	case RCTL_SUBJECT_TYPE_JAIL:
1011		pr = rule->rr_subject.rs_prison;
1012		KASSERT(pr != NULL, ("rctl_rule_add: NULL pr"));
1013		rctl_racct_add_rule(pr->pr_racct, rule);
1014		break;
1015
1016	default:
1017		panic("rctl_rule_add: unknown subject type %d",
1018		    rule->rr_subject_type);
1019	}
1020
1021	/*
1022	 * Now go through all the processes and add the new rule to the ones
1023	 * it applies to.
1024	 */
1025	sx_assert(&allproc_lock, SA_LOCKED);
1026	FOREACH_PROC_IN_SYSTEM(p) {
1027		if (p->p_flag & P_SYSTEM)
1028			continue;
1029		cred = p->p_ucred;
1030		switch (rule->rr_subject_type) {
1031		case RCTL_SUBJECT_TYPE_USER:
1032			if (cred->cr_uidinfo == rule->rr_subject.rs_uip ||
1033			    cred->cr_ruidinfo == rule->rr_subject.rs_uip)
1034				break;
1035			continue;
1036		case RCTL_SUBJECT_TYPE_LOGINCLASS:
1037			if (cred->cr_loginclass == rule->rr_subject.rs_loginclass)
1038				break;
1039			continue;
1040		case RCTL_SUBJECT_TYPE_JAIL:
1041			match = 0;
1042			for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) {
1043				if (pr == rule->rr_subject.rs_prison) {
1044					match = 1;
1045					break;
1046				}
1047			}
1048			if (match)
1049				break;
1050			continue;
1051		default:
1052			panic("rctl_rule_add: unknown subject type %d",
1053			    rule->rr_subject_type);
1054		}
1055
1056		rctl_racct_add_rule(p->p_racct, rule);
1057	}
1058
1059	return (0);
1060}
1061
1062static void
1063rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3)
1064{
1065	struct rctl_rule *filter = (struct rctl_rule *)arg2;
1066	int found = 0;
1067
1068	rw_wlock(&rctl_lock);
1069	found += rctl_racct_remove_rules(racct, filter);
1070	rw_wunlock(&rctl_lock);
1071
1072	*((int *)arg3) += found;
1073}
1074
1075/*
1076 * Remove all rules that match the filter.
1077 */
1078int
1079rctl_rule_remove(struct rctl_rule *filter)
1080{
1081	int found = 0;
1082	struct proc *p;
1083
1084	if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS &&
1085	    filter->rr_subject.rs_proc != NULL) {
1086		p = filter->rr_subject.rs_proc;
1087		rw_wlock(&rctl_lock);
1088		found = rctl_racct_remove_rules(p->p_racct, filter);
1089		rw_wunlock(&rctl_lock);
1090		if (found)
1091			return (0);
1092		return (ESRCH);
1093	}
1094
1095	loginclass_racct_foreach(rctl_rule_remove_callback, filter,
1096	    (void *)&found);
1097	ui_racct_foreach(rctl_rule_remove_callback, filter,
1098	    (void *)&found);
1099	prison_racct_foreach(rctl_rule_remove_callback, filter,
1100	    (void *)&found);
1101
1102	sx_assert(&allproc_lock, SA_LOCKED);
1103	rw_wlock(&rctl_lock);
1104	FOREACH_PROC_IN_SYSTEM(p) {
1105		found += rctl_racct_remove_rules(p->p_racct, filter);
1106	}
1107	rw_wunlock(&rctl_lock);
1108
1109	if (found)
1110		return (0);
1111	return (ESRCH);
1112}
1113
1114/*
1115 * Appends a rule to the sbuf.
1116 */
1117static void
1118rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule)
1119{
1120	int64_t amount;
1121
1122	sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type));
1123
1124	switch (rule->rr_subject_type) {
1125	case RCTL_SUBJECT_TYPE_PROCESS:
1126		if (rule->rr_subject.rs_proc == NULL)
1127			sbuf_printf(sb, ":");
1128		else
1129			sbuf_printf(sb, "%d:",
1130			    rule->rr_subject.rs_proc->p_pid);
1131		break;
1132	case RCTL_SUBJECT_TYPE_USER:
1133		if (rule->rr_subject.rs_uip == NULL)
1134			sbuf_printf(sb, ":");
1135		else
1136			sbuf_printf(sb, "%d:",
1137			    rule->rr_subject.rs_uip->ui_uid);
1138		break;
1139	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1140		if (rule->rr_subject.rs_loginclass == NULL)
1141			sbuf_printf(sb, ":");
1142		else
1143			sbuf_printf(sb, "%s:",
1144			    rule->rr_subject.rs_loginclass->lc_name);
1145		break;
1146	case RCTL_SUBJECT_TYPE_JAIL:
1147		if (rule->rr_subject.rs_prison == NULL)
1148			sbuf_printf(sb, ":");
1149		else
1150			sbuf_printf(sb, "%s:",
1151			    rule->rr_subject.rs_prison->pr_name);
1152		break;
1153	default:
1154		panic("rctl_rule_to_sbuf: unknown subject type %d",
1155		    rule->rr_subject_type);
1156	}
1157
1158	amount = rule->rr_amount;
1159	if (amount != RCTL_AMOUNT_UNDEFINED &&
1160	    racct_is_in_thousands(rule->rr_resource))
1161		amount /= 1000;
1162
1163	sbuf_printf(sb, "%s:%s=%jd",
1164	    rctl_resource_name(rule->rr_resource),
1165	    rctl_action_name(rule->rr_action),
1166	    amount);
1167
1168	if (rule->rr_per != rule->rr_subject_type)
1169		sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per));
1170}
1171
1172/*
1173 * Routine used by RCTL syscalls to read in input string.
1174 */
1175static int
1176rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen)
1177{
1178	int error;
1179	char *str;
1180
1181	if (inbuflen <= 0)
1182		return (EINVAL);
1183
1184	str = malloc(inbuflen + 1, M_RCTL, M_WAITOK);
1185	error = copyinstr(inbufp, str, inbuflen, NULL);
1186	if (error != 0) {
1187		free(str, M_RCTL);
1188		return (error);
1189	}
1190
1191	*inputstr = str;
1192
1193	return (0);
1194}
1195
1196/*
1197 * Routine used by RCTL syscalls to write out output string.
1198 */
1199static int
1200rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen)
1201{
1202	int error;
1203
1204	if (outputsbuf == NULL)
1205		return (0);
1206
1207	sbuf_finish(outputsbuf);
1208	if (outbuflen < sbuf_len(outputsbuf) + 1) {
1209		sbuf_delete(outputsbuf);
1210		return (ERANGE);
1211	}
1212	error = copyout(sbuf_data(outputsbuf), outbufp,
1213	    sbuf_len(outputsbuf) + 1);
1214	sbuf_delete(outputsbuf);
1215	return (error);
1216}
1217
1218static struct sbuf *
1219rctl_racct_to_sbuf(struct racct *racct, int sloppy)
1220{
1221	int i;
1222	int64_t amount;
1223	struct sbuf *sb;
1224
1225	sb = sbuf_new_auto();
1226	for (i = 0; i <= RACCT_MAX; i++) {
1227		if (sloppy == 0 && racct_is_sloppy(i))
1228			continue;
1229		amount = racct->r_resources[i];
1230		if (racct_is_in_thousands(i))
1231			amount /= 1000;
1232		sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount);
1233	}
1234	sbuf_setpos(sb, sbuf_len(sb) - 1);
1235	return (sb);
1236}
1237
1238int
1239rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1240{
1241	int error;
1242	char *inputstr;
1243	struct rctl_rule *filter;
1244	struct sbuf *outputsbuf = NULL;
1245	struct proc *p;
1246	struct uidinfo *uip;
1247	struct loginclass *lc;
1248	struct prison *pr;
1249
1250	error = priv_check(td, PRIV_RCTL_GET_RACCT);
1251	if (error != 0)
1252		return (error);
1253
1254	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1255	if (error != 0)
1256		return (error);
1257
1258	sx_slock(&allproc_lock);
1259	sx_slock(&allprison_lock);
1260	error = rctl_string_to_rule(inputstr, &filter);
1261	free(inputstr, M_RCTL);
1262	if (error != 0) {
1263		sx_sunlock(&allprison_lock);
1264		sx_sunlock(&allproc_lock);
1265		return (error);
1266	}
1267
1268	switch (filter->rr_subject_type) {
1269	case RCTL_SUBJECT_TYPE_PROCESS:
1270		p = filter->rr_subject.rs_proc;
1271		if (p == NULL) {
1272			error = EINVAL;
1273			goto out;
1274		}
1275		if (p->p_flag & P_SYSTEM) {
1276			error = EINVAL;
1277			goto out;
1278		}
1279		outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0);
1280		break;
1281	case RCTL_SUBJECT_TYPE_USER:
1282		uip = filter->rr_subject.rs_uip;
1283		if (uip == NULL) {
1284			error = EINVAL;
1285			goto out;
1286		}
1287		outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1);
1288		break;
1289	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1290		lc = filter->rr_subject.rs_loginclass;
1291		if (lc == NULL) {
1292			error = EINVAL;
1293			goto out;
1294		}
1295		outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1);
1296		break;
1297	case RCTL_SUBJECT_TYPE_JAIL:
1298		pr = filter->rr_subject.rs_prison;
1299		if (pr == NULL) {
1300			error = EINVAL;
1301			goto out;
1302		}
1303		outputsbuf = rctl_racct_to_sbuf(pr->pr_racct, 1);
1304		break;
1305	default:
1306		error = EINVAL;
1307	}
1308out:
1309	rctl_rule_release(filter);
1310	sx_sunlock(&allprison_lock);
1311	sx_sunlock(&allproc_lock);
1312	if (error != 0)
1313		return (error);
1314
1315	error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen);
1316
1317	return (error);
1318}
1319
1320static void
1321rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3)
1322{
1323	struct rctl_rule *filter = (struct rctl_rule *)arg2;
1324	struct rctl_rule_link *link;
1325	struct sbuf *sb = (struct sbuf *)arg3;
1326
1327	rw_rlock(&rctl_lock);
1328	LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
1329		if (!rctl_rule_matches(link->rrl_rule, filter))
1330			continue;
1331		rctl_rule_to_sbuf(sb, link->rrl_rule);
1332		sbuf_printf(sb, ",");
1333	}
1334	rw_runlock(&rctl_lock);
1335}
1336
1337int
1338rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1339{
1340	int error;
1341	size_t bufsize = RCTL_DEFAULT_BUFSIZE;
1342	char *inputstr, *buf;
1343	struct sbuf *sb;
1344	struct rctl_rule *filter;
1345	struct rctl_rule_link *link;
1346	struct proc *p;
1347
1348	error = priv_check(td, PRIV_RCTL_GET_RULES);
1349	if (error != 0)
1350		return (error);
1351
1352	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1353	if (error != 0)
1354		return (error);
1355
1356	sx_slock(&allproc_lock);
1357	sx_slock(&allprison_lock);
1358	error = rctl_string_to_rule(inputstr, &filter);
1359	free(inputstr, M_RCTL);
1360	if (error != 0) {
1361		sx_sunlock(&allprison_lock);
1362		sx_sunlock(&allproc_lock);
1363		return (error);
1364	}
1365
1366again:
1367	buf = malloc(bufsize, M_RCTL, M_WAITOK);
1368	sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1369	KASSERT(sb != NULL, ("sbuf_new failed"));
1370
1371	sx_assert(&allproc_lock, SA_LOCKED);
1372	FOREACH_PROC_IN_SYSTEM(p) {
1373		rw_rlock(&rctl_lock);
1374		LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1375			/*
1376			 * Non-process rules will be added to the buffer later.
1377			 * Adding them here would result in duplicated output.
1378			 */
1379			if (link->rrl_rule->rr_subject_type !=
1380			    RCTL_SUBJECT_TYPE_PROCESS)
1381				continue;
1382			if (!rctl_rule_matches(link->rrl_rule, filter))
1383				continue;
1384			rctl_rule_to_sbuf(sb, link->rrl_rule);
1385			sbuf_printf(sb, ",");
1386		}
1387		rw_runlock(&rctl_lock);
1388	}
1389
1390	loginclass_racct_foreach(rctl_get_rules_callback, filter, sb);
1391	ui_racct_foreach(rctl_get_rules_callback, filter, sb);
1392	prison_racct_foreach(rctl_get_rules_callback, filter, sb);
1393	if (sbuf_error(sb) == ENOMEM) {
1394		sbuf_delete(sb);
1395		free(buf, M_RCTL);
1396		bufsize *= 4;
1397		goto again;
1398	}
1399
1400	/*
1401	 * Remove trailing ",".
1402	 */
1403	if (sbuf_len(sb) > 0)
1404		sbuf_setpos(sb, sbuf_len(sb) - 1);
1405
1406	error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1407
1408	rctl_rule_release(filter);
1409	sx_sunlock(&allprison_lock);
1410	sx_sunlock(&allproc_lock);
1411	free(buf, M_RCTL);
1412	return (error);
1413}
1414
1415int
1416rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1417{
1418	int error;
1419	size_t bufsize = RCTL_DEFAULT_BUFSIZE;
1420	char *inputstr, *buf;
1421	struct sbuf *sb;
1422	struct rctl_rule *filter;
1423	struct rctl_rule_link *link;
1424
1425	error = priv_check(td, PRIV_RCTL_GET_LIMITS);
1426	if (error != 0)
1427		return (error);
1428
1429	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1430	if (error != 0)
1431		return (error);
1432
1433	sx_slock(&allproc_lock);
1434	sx_slock(&allprison_lock);
1435	error = rctl_string_to_rule(inputstr, &filter);
1436	free(inputstr, M_RCTL);
1437	if (error != 0) {
1438		sx_sunlock(&allprison_lock);
1439		sx_sunlock(&allproc_lock);
1440		return (error);
1441	}
1442
1443	if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) {
1444		rctl_rule_release(filter);
1445		sx_sunlock(&allprison_lock);
1446		sx_sunlock(&allproc_lock);
1447		return (EINVAL);
1448	}
1449	if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) {
1450		rctl_rule_release(filter);
1451		sx_sunlock(&allprison_lock);
1452		sx_sunlock(&allproc_lock);
1453		return (EOPNOTSUPP);
1454	}
1455	if (filter->rr_subject.rs_proc == NULL) {
1456		rctl_rule_release(filter);
1457		sx_sunlock(&allprison_lock);
1458		sx_sunlock(&allproc_lock);
1459		return (EINVAL);
1460	}
1461
1462again:
1463	buf = malloc(bufsize, M_RCTL, M_WAITOK);
1464	sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1465	KASSERT(sb != NULL, ("sbuf_new failed"));
1466
1467	rw_rlock(&rctl_lock);
1468	LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links,
1469	    rrl_next) {
1470		rctl_rule_to_sbuf(sb, link->rrl_rule);
1471		sbuf_printf(sb, ",");
1472	}
1473	rw_runlock(&rctl_lock);
1474	if (sbuf_error(sb) == ENOMEM) {
1475		sbuf_delete(sb);
1476		free(buf, M_RCTL);
1477		bufsize *= 4;
1478		goto again;
1479	}
1480
1481	/*
1482	 * Remove trailing ",".
1483	 */
1484	if (sbuf_len(sb) > 0)
1485		sbuf_setpos(sb, sbuf_len(sb) - 1);
1486
1487	error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1488	rctl_rule_release(filter);
1489	sx_sunlock(&allprison_lock);
1490	sx_sunlock(&allproc_lock);
1491	free(buf, M_RCTL);
1492	return (error);
1493}
1494
1495int
1496rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1497{
1498	int error;
1499	struct rctl_rule *rule;
1500	char *inputstr;
1501
1502	error = priv_check(td, PRIV_RCTL_ADD_RULE);
1503	if (error != 0)
1504		return (error);
1505
1506	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1507	if (error != 0)
1508		return (error);
1509
1510	sx_slock(&allproc_lock);
1511	sx_slock(&allprison_lock);
1512	error = rctl_string_to_rule(inputstr, &rule);
1513	free(inputstr, M_RCTL);
1514	if (error != 0) {
1515		sx_sunlock(&allprison_lock);
1516		sx_sunlock(&allproc_lock);
1517		return (error);
1518	}
1519	/*
1520	 * The 'per' part of a rule is optional.
1521	 */
1522	if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED &&
1523	    rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED)
1524		rule->rr_per = rule->rr_subject_type;
1525
1526	if (!rctl_rule_fully_specified(rule)) {
1527		error = EINVAL;
1528		goto out;
1529	}
1530
1531	error = rctl_rule_add(rule);
1532
1533out:
1534	rctl_rule_release(rule);
1535	sx_sunlock(&allprison_lock);
1536	sx_sunlock(&allproc_lock);
1537	return (error);
1538}
1539
1540int
1541rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1542{
1543	int error;
1544	struct rctl_rule *filter;
1545	char *inputstr;
1546
1547	error = priv_check(td, PRIV_RCTL_REMOVE_RULE);
1548	if (error != 0)
1549		return (error);
1550
1551	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1552	if (error != 0)
1553		return (error);
1554
1555	sx_slock(&allproc_lock);
1556	sx_slock(&allprison_lock);
1557	error = rctl_string_to_rule(inputstr, &filter);
1558	free(inputstr, M_RCTL);
1559	if (error != 0) {
1560		sx_sunlock(&allprison_lock);
1561		sx_sunlock(&allproc_lock);
1562		return (error);
1563	}
1564
1565	error = rctl_rule_remove(filter);
1566	rctl_rule_release(filter);
1567	sx_sunlock(&allprison_lock);
1568	sx_sunlock(&allproc_lock);
1569
1570	return (error);
1571}
1572
1573/*
1574 * Update RCTL rule list after credential change.
1575 */
1576void
1577rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred)
1578{
1579	int rulecnt, i;
1580	struct rctl_rule_link *link, *newlink;
1581	struct uidinfo *newuip;
1582	struct loginclass *newlc;
1583	struct prison *newpr;
1584	LIST_HEAD(, rctl_rule_link) newrules;
1585
1586	newuip = newcred->cr_ruidinfo;
1587	newlc = newcred->cr_loginclass;
1588	newpr = newcred->cr_prison;
1589
1590	LIST_INIT(&newrules);
1591
1592again:
1593	/*
1594	 * First, count the rules that apply to the process with new
1595	 * credentials.
1596	 */
1597	rulecnt = 0;
1598	rw_rlock(&rctl_lock);
1599	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1600		if (link->rrl_rule->rr_subject_type ==
1601		    RCTL_SUBJECT_TYPE_PROCESS)
1602			rulecnt++;
1603	}
1604	LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next)
1605		rulecnt++;
1606	LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next)
1607		rulecnt++;
1608	LIST_FOREACH(link, &newpr->pr_racct->r_rule_links, rrl_next)
1609		rulecnt++;
1610	rw_runlock(&rctl_lock);
1611
1612	/*
1613	 * Create temporary list.  We've dropped the rctl_lock in order
1614	 * to use M_WAITOK.
1615	 */
1616	for (i = 0; i < rulecnt; i++) {
1617		newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
1618		newlink->rrl_rule = NULL;
1619		LIST_INSERT_HEAD(&newrules, newlink, rrl_next);
1620	}
1621
1622	newlink = LIST_FIRST(&newrules);
1623
1624	/*
1625	 * Assign rules to the newly allocated list entries.
1626	 */
1627	rw_wlock(&rctl_lock);
1628	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1629		if (link->rrl_rule->rr_subject_type ==
1630		    RCTL_SUBJECT_TYPE_PROCESS) {
1631			if (newlink == NULL)
1632				goto goaround;
1633			rctl_rule_acquire(link->rrl_rule);
1634			newlink->rrl_rule = link->rrl_rule;
1635			newlink = LIST_NEXT(newlink, rrl_next);
1636			rulecnt--;
1637		}
1638	}
1639
1640	LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) {
1641		if (newlink == NULL)
1642			goto goaround;
1643		rctl_rule_acquire(link->rrl_rule);
1644		newlink->rrl_rule = link->rrl_rule;
1645		newlink = LIST_NEXT(newlink, rrl_next);
1646		rulecnt--;
1647	}
1648
1649	LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) {
1650		if (newlink == NULL)
1651			goto goaround;
1652		rctl_rule_acquire(link->rrl_rule);
1653		newlink->rrl_rule = link->rrl_rule;
1654		newlink = LIST_NEXT(newlink, rrl_next);
1655		rulecnt--;
1656	}
1657
1658	LIST_FOREACH(link, &newpr->pr_racct->r_rule_links, rrl_next) {
1659		if (newlink == NULL)
1660			goto goaround;
1661		rctl_rule_acquire(link->rrl_rule);
1662		newlink->rrl_rule = link->rrl_rule;
1663		newlink = LIST_NEXT(newlink, rrl_next);
1664		rulecnt--;
1665	}
1666
1667	if (rulecnt == 0) {
1668		/*
1669		 * Free the old rule list.
1670		 */
1671		while (!LIST_EMPTY(&p->p_racct->r_rule_links)) {
1672			link = LIST_FIRST(&p->p_racct->r_rule_links);
1673			LIST_REMOVE(link, rrl_next);
1674			rctl_rule_release(link->rrl_rule);
1675			uma_zfree(rctl_rule_link_zone, link);
1676		}
1677
1678		/*
1679		 * Replace lists and we're done.
1680		 *
1681		 * XXX: Is there any way to switch list heads instead
1682		 *      of iterating here?
1683		 */
1684		while (!LIST_EMPTY(&newrules)) {
1685			newlink = LIST_FIRST(&newrules);
1686			LIST_REMOVE(newlink, rrl_next);
1687			LIST_INSERT_HEAD(&p->p_racct->r_rule_links,
1688			    newlink, rrl_next);
1689		}
1690
1691		rw_wunlock(&rctl_lock);
1692
1693		return;
1694	}
1695
1696goaround:
1697	rw_wunlock(&rctl_lock);
1698
1699	/*
1700	 * Rule list changed while we were not holding the rctl_lock.
1701	 * Free the new list and try again.
1702	 */
1703	while (!LIST_EMPTY(&newrules)) {
1704		newlink = LIST_FIRST(&newrules);
1705		LIST_REMOVE(newlink, rrl_next);
1706		if (newlink->rrl_rule != NULL)
1707			rctl_rule_release(newlink->rrl_rule);
1708		uma_zfree(rctl_rule_link_zone, newlink);
1709	}
1710
1711	goto again;
1712}
1713
1714/*
1715 * Assign RCTL rules to the newly created process.
1716 */
1717int
1718rctl_proc_fork(struct proc *parent, struct proc *child)
1719{
1720	int error;
1721	struct rctl_rule_link *link;
1722	struct rctl_rule *rule;
1723
1724	LIST_INIT(&child->p_racct->r_rule_links);
1725
1726	/*
1727	 * No limits for kernel processes.
1728	 */
1729	if (child->p_flag & P_SYSTEM)
1730		return (0);
1731
1732	/*
1733	 * Nothing to inherit from P_SYSTEM parents.
1734	 */
1735	if (parent->p_racct == NULL) {
1736		KASSERT(parent->p_flag & P_SYSTEM,
1737		    ("non-system process without racct; p = %p", parent));
1738		return (0);
1739	}
1740
1741	rw_wlock(&rctl_lock);
1742
1743	/*
1744	 * Go through limits applicable to the parent and assign them
1745	 * to the child.  Rules with 'process' subject have to be duplicated
1746	 * in order to make their rr_subject point to the new process.
1747	 */
1748	LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) {
1749		if (link->rrl_rule->rr_subject_type ==
1750		    RCTL_SUBJECT_TYPE_PROCESS) {
1751			rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT);
1752			if (rule == NULL)
1753				goto fail;
1754			KASSERT(rule->rr_subject.rs_proc == parent,
1755			    ("rule->rr_subject.rs_proc != parent"));
1756			rule->rr_subject.rs_proc = child;
1757			error = rctl_racct_add_rule_locked(child->p_racct,
1758			    rule);
1759			rctl_rule_release(rule);
1760			if (error != 0)
1761				goto fail;
1762		} else {
1763			error = rctl_racct_add_rule_locked(child->p_racct,
1764			    link->rrl_rule);
1765			if (error != 0)
1766				goto fail;
1767		}
1768	}
1769
1770	rw_wunlock(&rctl_lock);
1771	return (0);
1772
1773fail:
1774	while (!LIST_EMPTY(&child->p_racct->r_rule_links)) {
1775		link = LIST_FIRST(&child->p_racct->r_rule_links);
1776		LIST_REMOVE(link, rrl_next);
1777		rctl_rule_release(link->rrl_rule);
1778		uma_zfree(rctl_rule_link_zone, link);
1779	}
1780	rw_wunlock(&rctl_lock);
1781	return (EAGAIN);
1782}
1783
1784/*
1785 * Release rules attached to the racct.
1786 */
1787void
1788rctl_racct_release(struct racct *racct)
1789{
1790	struct rctl_rule_link *link;
1791
1792	rw_wlock(&rctl_lock);
1793	while (!LIST_EMPTY(&racct->r_rule_links)) {
1794		link = LIST_FIRST(&racct->r_rule_links);
1795		LIST_REMOVE(link, rrl_next);
1796		rctl_rule_release(link->rrl_rule);
1797		uma_zfree(rctl_rule_link_zone, link);
1798	}
1799	rw_wunlock(&rctl_lock);
1800}
1801
1802static void
1803rctl_init(void)
1804{
1805
1806	rctl_rule_link_zone = uma_zcreate("rctl_rule_link",
1807	    sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL,
1808	    UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1809	rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule),
1810	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1811}
1812
1813#else /* !RCTL */
1814
1815int
1816rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1817{
1818
1819	return (ENOSYS);
1820}
1821
1822int
1823rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1824{
1825
1826	return (ENOSYS);
1827}
1828
1829int
1830rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1831{
1832
1833	return (ENOSYS);
1834}
1835
1836int
1837rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1838{
1839
1840	return (ENOSYS);
1841}
1842
1843int
1844rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1845{
1846
1847	return (ENOSYS);
1848}
1849
1850#endif /* !RCTL */
1851