kern_jail.c revision 166827
1139804Simp/*- 246197Sphk * ---------------------------------------------------------------------------- 346197Sphk * "THE BEER-WARE LICENSE" (Revision 42): 446197Sphk * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 546197Sphk * can do whatever you want with this stuff. If we meet some day, and you think 646197Sphk * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 746197Sphk * ---------------------------------------------------------------------------- 846197Sphk */ 946155Sphk 10116182Sobrien#include <sys/cdefs.h> 11116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/kern_jail.c 166827 2007-02-19 13:10:29Z rwatson $"); 12116182Sobrien 13131177Spjd#include "opt_mac.h" 14131177Spjd 1546155Sphk#include <sys/param.h> 1646155Sphk#include <sys/types.h> 1746155Sphk#include <sys/kernel.h> 1846155Sphk#include <sys/systm.h> 1946155Sphk#include <sys/errno.h> 2046155Sphk#include <sys/sysproto.h> 2146155Sphk#include <sys/malloc.h> 22164032Srwatson#include <sys/priv.h> 2346155Sphk#include <sys/proc.h> 24124882Srwatson#include <sys/taskqueue.h> 2546155Sphk#include <sys/jail.h> 2687275Srwatson#include <sys/lock.h> 2787275Srwatson#include <sys/mutex.h> 28113275Smike#include <sys/namei.h> 29147185Spjd#include <sys/mount.h> 30113275Smike#include <sys/queue.h> 3146155Sphk#include <sys/socket.h> 32113275Smike#include <sys/syscallsubr.h> 3357163Srwatson#include <sys/sysctl.h> 34113275Smike#include <sys/vnode.h> 3546155Sphk#include <net/if.h> 3646155Sphk#include <netinet/in.h> 3746155Sphk 38163606Srwatson#include <security/mac/mac_framework.h> 39163606Srwatson 4046155SphkMALLOC_DEFINE(M_PRISON, "prison", "Prison structures"); 4146155Sphk 4289414SarrSYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0, 4357163Srwatson "Jail rules"); 4457163Srwatson 4557163Srwatsonint jail_set_hostname_allowed = 1; 4689414SarrSYSCTL_INT(_security_jail, OID_AUTO, set_hostname_allowed, CTLFLAG_RW, 4757163Srwatson &jail_set_hostname_allowed, 0, 4857163Srwatson "Processes in jail can set their hostnames"); 4957163Srwatson 5061235Srwatsonint jail_socket_unixiproute_only = 1; 5189414SarrSYSCTL_INT(_security_jail, OID_AUTO, socket_unixiproute_only, CTLFLAG_RW, 5261235Srwatson &jail_socket_unixiproute_only, 0, 5361235Srwatson "Processes in jail are limited to creating UNIX/IPv4/route sockets only"); 5461235Srwatson 5568024Srwatsonint jail_sysvipc_allowed = 0; 5689414SarrSYSCTL_INT(_security_jail, OID_AUTO, sysvipc_allowed, CTLFLAG_RW, 5768024Srwatson &jail_sysvipc_allowed, 0, 5868024Srwatson "Processes in jail can use System V IPC primitives"); 5968024Srwatson 60147185Spjdstatic int jail_enforce_statfs = 2; 61147185SpjdSYSCTL_INT(_security_jail, OID_AUTO, enforce_statfs, CTLFLAG_RW, 62147185Spjd &jail_enforce_statfs, 0, 63147185Spjd "Processes in jail cannot see all mounted file systems"); 64125804Srwatson 65128664Sbmilekicint jail_allow_raw_sockets = 0; 66128664SbmilekicSYSCTL_INT(_security_jail, OID_AUTO, allow_raw_sockets, CTLFLAG_RW, 67128664Sbmilekic &jail_allow_raw_sockets, 0, 68128664Sbmilekic "Prison root can create raw sockets"); 69128664Sbmilekic 70141543Scpercivaint jail_chflags_allowed = 0; 71141543ScpercivaSYSCTL_INT(_security_jail, OID_AUTO, chflags_allowed, CTLFLAG_RW, 72141543Scperciva &jail_chflags_allowed, 0, 73141543Scperciva "Processes in jail can alter system file flags"); 74141543Scperciva 75113275Smike/* allprison, lastprid, and prisoncount are protected by allprison_mtx. */ 76113275Smikestruct prisonlist allprison; 77113275Smikestruct mtx allprison_mtx; 78113275Smikeint lastprid = 0; 79113275Smikeint prisoncount = 0; 80113275Smike 81113275Smikestatic void init_prison(void *); 82124882Srwatsonstatic void prison_complete(void *context, int pending); 83113275Smikestatic struct prison *prison_find(int); 84113275Smikestatic int sysctl_jail_list(SYSCTL_HANDLER_ARGS); 85113275Smike 86113275Smikestatic void 87113275Smikeinit_prison(void *data __unused) 88113275Smike{ 89113275Smike 90113275Smike mtx_init(&allprison_mtx, "allprison", NULL, MTX_DEF); 91113275Smike LIST_INIT(&allprison); 92113275Smike} 93113275Smike 94113275SmikeSYSINIT(prison, SI_SUB_INTRINSIC, SI_ORDER_ANY, init_prison, NULL); 95113275Smike 9682710Sdillon/* 9782710Sdillon * MPSAFE 98114168Smike * 99114168Smike * struct jail_args { 100114168Smike * struct jail *jail; 101114168Smike * }; 10282710Sdillon */ 10346155Sphkint 104114168Smikejail(struct thread *td, struct jail_args *uap) 10546155Sphk{ 106113275Smike struct nameidata nd; 107113275Smike struct prison *pr, *tpr; 10846155Sphk struct jail j; 109113275Smike struct jail_attach_args jaa; 110150652Scsjp int vfslocked, error, tryprid; 11146155Sphk 112114168Smike error = copyin(uap->jail, &j, sizeof(j)); 11346155Sphk if (error) 11484828Sjhb return (error); 11584828Sjhb if (j.version != 0) 11684828Sjhb return (EINVAL); 11784828Sjhb 118114168Smike MALLOC(pr, struct prison *, sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO); 11993818Sjhb mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF); 120113275Smike pr->pr_ref = 1; 121114168Smike error = copyinstr(j.path, &pr->pr_path, sizeof(pr->pr_path), 0); 122113275Smike if (error) 123113275Smike goto e_killmtx; 124150652Scsjp NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF, UIO_SYSSPACE, 125150652Scsjp pr->pr_path, td); 126113275Smike error = namei(&nd); 127150652Scsjp if (error) 128113275Smike goto e_killmtx; 129150652Scsjp vfslocked = NDHASGIANT(&nd); 130113275Smike pr->pr_root = nd.ni_vp; 131113275Smike VOP_UNLOCK(nd.ni_vp, 0, td); 132113275Smike NDFREE(&nd, NDF_ONLY_PNBUF); 133150652Scsjp VFS_UNLOCK_GIANT(vfslocked); 134114168Smike error = copyinstr(j.hostname, &pr->pr_host, sizeof(pr->pr_host), 0); 13584828Sjhb if (error) 136113275Smike goto e_dropvnref; 137113275Smike pr->pr_ip = j.ip_number; 138113275Smike pr->pr_linux = NULL; 139113275Smike pr->pr_securelevel = securelevel; 140113275Smike 141113275Smike /* Determine next pr_id and add prison to allprison list. */ 142113275Smike mtx_lock(&allprison_mtx); 143113275Smike tryprid = lastprid + 1; 144113275Smike if (tryprid == JAIL_MAX) 145113275Smike tryprid = 1; 146113275Smikenext: 147113275Smike LIST_FOREACH(tpr, &allprison, pr_list) { 148113275Smike if (tpr->pr_id == tryprid) { 149113275Smike tryprid++; 150113275Smike if (tryprid == JAIL_MAX) { 151113275Smike mtx_unlock(&allprison_mtx); 152113275Smike error = EAGAIN; 153113275Smike goto e_dropvnref; 154113275Smike } 155113275Smike goto next; 156113275Smike } 157113275Smike } 158113275Smike pr->pr_id = jaa.jid = lastprid = tryprid; 159113275Smike LIST_INSERT_HEAD(&allprison, pr, pr_list); 160113275Smike prisoncount++; 161113275Smike mtx_unlock(&allprison_mtx); 162113275Smike 163113275Smike error = jail_attach(td, &jaa); 164113275Smike if (error) 165113275Smike goto e_dropprref; 166113275Smike mtx_lock(&pr->pr_mtx); 167113275Smike pr->pr_ref--; 168113275Smike mtx_unlock(&pr->pr_mtx); 169113275Smike td->td_retval[0] = jaa.jid; 170113275Smike return (0); 171113275Smikee_dropprref: 172113275Smike mtx_lock(&allprison_mtx); 173113275Smike LIST_REMOVE(pr, pr_list); 174113275Smike prisoncount--; 175113275Smike mtx_unlock(&allprison_mtx); 176113275Smikee_dropvnref: 177150652Scsjp vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 178113275Smike vrele(pr->pr_root); 179150652Scsjp VFS_UNLOCK_GIANT(vfslocked); 180113275Smikee_killmtx: 181113275Smike mtx_destroy(&pr->pr_mtx); 182113275Smike FREE(pr, M_PRISON); 183113275Smike return (error); 184113275Smike} 185113275Smike 186113275Smike/* 187113275Smike * MPSAFE 188114168Smike * 189114168Smike * struct jail_attach_args { 190114168Smike * int jid; 191114168Smike * }; 192113275Smike */ 193113275Smikeint 194114168Smikejail_attach(struct thread *td, struct jail_attach_args *uap) 195113275Smike{ 196113275Smike struct proc *p; 197113275Smike struct ucred *newcred, *oldcred; 198113275Smike struct prison *pr; 199150652Scsjp int vfslocked, error; 200113275Smike 201126023Snectar /* 202126023Snectar * XXX: Note that there is a slight race here if two threads 203126023Snectar * in the same privileged process attempt to attach to two 204126023Snectar * different jails at the same time. It is important for 205126023Snectar * user processes not to do this, or they might end up with 206126023Snectar * a process root from one prison, but attached to the jail 207126023Snectar * of another. 208126023Snectar */ 209164032Srwatson error = priv_check(td, PRIV_JAIL_ATTACH); 210126023Snectar if (error) 211126023Snectar return (error); 212126023Snectar 213113275Smike p = td->td_proc; 214113275Smike mtx_lock(&allprison_mtx); 215113275Smike pr = prison_find(uap->jid); 216113275Smike if (pr == NULL) { 217113275Smike mtx_unlock(&allprison_mtx); 218113275Smike return (EINVAL); 219113275Smike } 220113275Smike pr->pr_ref++; 221113275Smike mtx_unlock(&pr->pr_mtx); 222113275Smike mtx_unlock(&allprison_mtx); 223113275Smike 224150652Scsjp vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 225113275Smike vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY, td); 226113275Smike if ((error = change_dir(pr->pr_root, td)) != 0) 227113275Smike goto e_unlock; 228113275Smike#ifdef MAC 229113275Smike if ((error = mac_check_vnode_chroot(td->td_ucred, pr->pr_root))) 230113275Smike goto e_unlock; 231113275Smike#endif 232113275Smike VOP_UNLOCK(pr->pr_root, 0, td); 233113275Smike change_root(pr->pr_root, td); 234150652Scsjp VFS_UNLOCK_GIANT(vfslocked); 235113275Smike 23684828Sjhb newcred = crget(); 23784828Sjhb PROC_LOCK(p); 23884828Sjhb oldcred = p->p_ucred; 239113275Smike setsugid(p); 24084828Sjhb crcopy(newcred, oldcred); 241113630Sjhb newcred->cr_prison = pr; 24284828Sjhb p->p_ucred = newcred; 24384828Sjhb PROC_UNLOCK(p); 24484828Sjhb crfree(oldcred); 24546155Sphk return (0); 246113275Smikee_unlock: 247113275Smike VOP_UNLOCK(pr->pr_root, 0, td); 248150652Scsjp VFS_UNLOCK_GIANT(vfslocked); 249113275Smike mtx_lock(&pr->pr_mtx); 250113275Smike pr->pr_ref--; 251113275Smike mtx_unlock(&pr->pr_mtx); 25246155Sphk return (error); 25346155Sphk} 25446155Sphk 255113275Smike/* 256113275Smike * Returns a locked prison instance, or NULL on failure. 257113275Smike */ 258113275Smikestatic struct prison * 259113275Smikeprison_find(int prid) 260113275Smike{ 261113275Smike struct prison *pr; 262113275Smike 263113275Smike mtx_assert(&allprison_mtx, MA_OWNED); 264113275Smike LIST_FOREACH(pr, &allprison, pr_list) { 265113275Smike if (pr->pr_id == prid) { 266113275Smike mtx_lock(&pr->pr_mtx); 267113275Smike return (pr); 268113275Smike } 269113275Smike } 270113275Smike return (NULL); 271113275Smike} 272113275Smike 27372786Srwatsonvoid 27472786Srwatsonprison_free(struct prison *pr) 27572786Srwatson{ 27672786Srwatson 277113275Smike mtx_lock(&allprison_mtx); 27887275Srwatson mtx_lock(&pr->pr_mtx); 27972786Srwatson pr->pr_ref--; 28072786Srwatson if (pr->pr_ref == 0) { 281113275Smike LIST_REMOVE(pr, pr_list); 28287275Srwatson mtx_unlock(&pr->pr_mtx); 283113275Smike prisoncount--; 284113275Smike mtx_unlock(&allprison_mtx); 285124882Srwatson 286124882Srwatson TASK_INIT(&pr->pr_task, 0, prison_complete, pr); 287144660Sjeff taskqueue_enqueue(taskqueue_thread, &pr->pr_task); 28887275Srwatson return; 28972786Srwatson } 29087275Srwatson mtx_unlock(&pr->pr_mtx); 291113275Smike mtx_unlock(&allprison_mtx); 29272786Srwatson} 29372786Srwatson 294124882Srwatsonstatic void 295124882Srwatsonprison_complete(void *context, int pending) 296124882Srwatson{ 297124882Srwatson struct prison *pr; 298150652Scsjp int vfslocked; 299124882Srwatson 300124882Srwatson pr = (struct prison *)context; 301124882Srwatson 302150652Scsjp vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 303124882Srwatson vrele(pr->pr_root); 304150652Scsjp VFS_UNLOCK_GIANT(vfslocked); 305124882Srwatson 306124882Srwatson mtx_destroy(&pr->pr_mtx); 307124882Srwatson if (pr->pr_linux != NULL) 308124882Srwatson FREE(pr->pr_linux, M_PRISON); 309124882Srwatson FREE(pr, M_PRISON); 310124882Srwatson} 311124882Srwatson 31272786Srwatsonvoid 31372786Srwatsonprison_hold(struct prison *pr) 31472786Srwatson{ 31572786Srwatson 31687275Srwatson mtx_lock(&pr->pr_mtx); 31772786Srwatson pr->pr_ref++; 31887275Srwatson mtx_unlock(&pr->pr_mtx); 31972786Srwatson} 32072786Srwatson 32187275Srwatsonu_int32_t 32287275Srwatsonprison_getip(struct ucred *cred) 32387275Srwatson{ 32487275Srwatson 32587275Srwatson return (cred->cr_prison->pr_ip); 32687275Srwatson} 32787275Srwatson 32846155Sphkint 32972786Srwatsonprison_ip(struct ucred *cred, int flag, u_int32_t *ip) 33046155Sphk{ 33146155Sphk u_int32_t tmp; 33246155Sphk 33372786Srwatson if (!jailed(cred)) 33446155Sphk return (0); 33546155Sphk if (flag) 33646155Sphk tmp = *ip; 33746155Sphk else 33846155Sphk tmp = ntohl(*ip); 33946155Sphk if (tmp == INADDR_ANY) { 34046155Sphk if (flag) 34172786Srwatson *ip = cred->cr_prison->pr_ip; 34246155Sphk else 34372786Srwatson *ip = htonl(cred->cr_prison->pr_ip); 34446155Sphk return (0); 34546155Sphk } 34681114Srwatson if (tmp == INADDR_LOOPBACK) { 34781114Srwatson if (flag) 34881114Srwatson *ip = cred->cr_prison->pr_ip; 34981114Srwatson else 35081114Srwatson *ip = htonl(cred->cr_prison->pr_ip); 35181114Srwatson return (0); 35281114Srwatson } 35372786Srwatson if (cred->cr_prison->pr_ip != tmp) 35446155Sphk return (1); 35546155Sphk return (0); 35646155Sphk} 35746155Sphk 35846155Sphkvoid 35972786Srwatsonprison_remote_ip(struct ucred *cred, int flag, u_int32_t *ip) 36046155Sphk{ 36146155Sphk u_int32_t tmp; 36246155Sphk 36372786Srwatson if (!jailed(cred)) 36446155Sphk return; 36546155Sphk if (flag) 36646155Sphk tmp = *ip; 36746155Sphk else 36846155Sphk tmp = ntohl(*ip); 36981114Srwatson if (tmp == INADDR_LOOPBACK) { 37046155Sphk if (flag) 37172786Srwatson *ip = cred->cr_prison->pr_ip; 37246155Sphk else 37372786Srwatson *ip = htonl(cred->cr_prison->pr_ip); 37446155Sphk return; 37546155Sphk } 37646155Sphk return; 37746155Sphk} 37846155Sphk 37946155Sphkint 38072786Srwatsonprison_if(struct ucred *cred, struct sockaddr *sa) 38146155Sphk{ 382114168Smike struct sockaddr_in *sai; 38346155Sphk int ok; 38446155Sphk 385114168Smike sai = (struct sockaddr_in *)sa; 38661235Srwatson if ((sai->sin_family != AF_INET) && jail_socket_unixiproute_only) 38761235Srwatson ok = 1; 38861235Srwatson else if (sai->sin_family != AF_INET) 38946155Sphk ok = 0; 39072786Srwatson else if (cred->cr_prison->pr_ip != ntohl(sai->sin_addr.s_addr)) 39146155Sphk ok = 1; 39246155Sphk else 39346155Sphk ok = 0; 39446155Sphk return (ok); 39546155Sphk} 39672786Srwatson 39772786Srwatson/* 39872786Srwatson * Return 0 if jails permit p1 to frob p2, otherwise ESRCH. 39972786Srwatson */ 40072786Srwatsonint 401114168Smikeprison_check(struct ucred *cred1, struct ucred *cred2) 40272786Srwatson{ 40372786Srwatson 40472786Srwatson if (jailed(cred1)) { 40572786Srwatson if (!jailed(cred2)) 40672786Srwatson return (ESRCH); 40772786Srwatson if (cred2->cr_prison != cred1->cr_prison) 40872786Srwatson return (ESRCH); 40972786Srwatson } 41072786Srwatson 41172786Srwatson return (0); 41272786Srwatson} 41372786Srwatson 41472786Srwatson/* 41572786Srwatson * Return 1 if the passed credential is in a jail, otherwise 0. 41672786Srwatson */ 41772786Srwatsonint 418114168Smikejailed(struct ucred *cred) 41972786Srwatson{ 42072786Srwatson 42172786Srwatson return (cred->cr_prison != NULL); 42272786Srwatson} 42391384Srobert 42491384Srobert/* 42591384Srobert * Return the correct hostname for the passed credential. 42691384Srobert */ 42791391Srobertvoid 428114168Smikegetcredhostname(struct ucred *cred, char *buf, size_t size) 42991384Srobert{ 43091384Srobert 43191391Srobert if (jailed(cred)) { 43291391Srobert mtx_lock(&cred->cr_prison->pr_mtx); 433105354Srobert strlcpy(buf, cred->cr_prison->pr_host, size); 43491391Srobert mtx_unlock(&cred->cr_prison->pr_mtx); 435114168Smike } else 436105354Srobert strlcpy(buf, hostname, size); 43791384Srobert} 438113275Smike 439125804Srwatson/* 440147185Spjd * Determine whether the subject represented by cred can "see" 441147185Spjd * status of a mount point. 442147185Spjd * Returns: 0 for permitted, ENOENT otherwise. 443147185Spjd * XXX: This function should be called cr_canseemount() and should be 444147185Spjd * placed in kern_prot.c. 445125804Srwatson */ 446125804Srwatsonint 447147185Spjdprison_canseemount(struct ucred *cred, struct mount *mp) 448125804Srwatson{ 449147185Spjd struct prison *pr; 450147185Spjd struct statfs *sp; 451147185Spjd size_t len; 452125804Srwatson 453147185Spjd if (!jailed(cred) || jail_enforce_statfs == 0) 454147185Spjd return (0); 455147185Spjd pr = cred->cr_prison; 456147185Spjd if (pr->pr_root->v_mount == mp) 457147185Spjd return (0); 458147185Spjd if (jail_enforce_statfs == 2) 459147185Spjd return (ENOENT); 460147185Spjd /* 461147185Spjd * If jail's chroot directory is set to "/" we should be able to see 462147185Spjd * all mount-points from inside a jail. 463147185Spjd * This is ugly check, but this is the only situation when jail's 464147185Spjd * directory ends with '/'. 465147185Spjd */ 466147185Spjd if (strcmp(pr->pr_path, "/") == 0) 467147185Spjd return (0); 468147185Spjd len = strlen(pr->pr_path); 469147185Spjd sp = &mp->mnt_stat; 470147185Spjd if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0) 471147185Spjd return (ENOENT); 472147185Spjd /* 473147185Spjd * Be sure that we don't have situation where jail's root directory 474147185Spjd * is "/some/path" and mount point is "/some/pathpath". 475147185Spjd */ 476147185Spjd if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/') 477147185Spjd return (ENOENT); 478147185Spjd return (0); 479147185Spjd} 480147185Spjd 481147185Spjdvoid 482147185Spjdprison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp) 483147185Spjd{ 484147185Spjd char jpath[MAXPATHLEN]; 485147185Spjd struct prison *pr; 486147185Spjd size_t len; 487147185Spjd 488147185Spjd if (!jailed(cred) || jail_enforce_statfs == 0) 489147185Spjd return; 490147185Spjd pr = cred->cr_prison; 491147185Spjd if (prison_canseemount(cred, mp) != 0) { 492147185Spjd bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 493147185Spjd strlcpy(sp->f_mntonname, "[restricted]", 494147185Spjd sizeof(sp->f_mntonname)); 495147185Spjd return; 496125804Srwatson } 497147185Spjd if (pr->pr_root->v_mount == mp) { 498147185Spjd /* 499147185Spjd * Clear current buffer data, so we are sure nothing from 500147185Spjd * the valid path left there. 501147185Spjd */ 502147185Spjd bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 503147185Spjd *sp->f_mntonname = '/'; 504147185Spjd return; 505147185Spjd } 506147185Spjd /* 507147185Spjd * If jail's chroot directory is set to "/" we should be able to see 508147185Spjd * all mount-points from inside a jail. 509147185Spjd */ 510147185Spjd if (strcmp(pr->pr_path, "/") == 0) 511147185Spjd return; 512147185Spjd len = strlen(pr->pr_path); 513147185Spjd strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath)); 514147185Spjd /* 515147185Spjd * Clear current buffer data, so we are sure nothing from 516147185Spjd * the valid path left there. 517147185Spjd */ 518147185Spjd bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 519147185Spjd if (*jpath == '\0') { 520147185Spjd /* Should never happen. */ 521147185Spjd *sp->f_mntonname = '/'; 522147185Spjd } else { 523147185Spjd strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname)); 524147185Spjd } 525125804Srwatson} 526125804Srwatson 527164032Srwatson/* 528164032Srwatson * Check with permission for a specific privilege is granted within jail. We 529164032Srwatson * have a specific list of accepted privileges; the rest are denied. 530164032Srwatson */ 531164032Srwatsonint 532164032Srwatsonprison_priv_check(struct ucred *cred, int priv) 533164032Srwatson{ 534164032Srwatson 535164032Srwatson if (!jailed(cred)) 536164032Srwatson return (0); 537164032Srwatson 538164032Srwatson switch (priv) { 539164032Srwatson 540164032Srwatson /* 541164032Srwatson * Allow ktrace privileges for root in jail. 542164032Srwatson */ 543164032Srwatson case PRIV_KTRACE: 544164032Srwatson 545166827Srwatson#if 0 546164032Srwatson /* 547164032Srwatson * Allow jailed processes to configure audit identity and 548164032Srwatson * submit audit records (login, etc). In the future we may 549164032Srwatson * want to further refine the relationship between audit and 550164032Srwatson * jail. 551164032Srwatson */ 552164032Srwatson case PRIV_AUDIT_GETAUDIT: 553164032Srwatson case PRIV_AUDIT_SETAUDIT: 554164032Srwatson case PRIV_AUDIT_SUBMIT: 555166827Srwatson#endif 556164032Srwatson 557164032Srwatson /* 558164032Srwatson * Allow jailed processes to manipulate process UNIX 559164032Srwatson * credentials in any way they see fit. 560164032Srwatson */ 561164032Srwatson case PRIV_CRED_SETUID: 562164032Srwatson case PRIV_CRED_SETEUID: 563164032Srwatson case PRIV_CRED_SETGID: 564164032Srwatson case PRIV_CRED_SETEGID: 565164032Srwatson case PRIV_CRED_SETGROUPS: 566164032Srwatson case PRIV_CRED_SETREUID: 567164032Srwatson case PRIV_CRED_SETREGID: 568164032Srwatson case PRIV_CRED_SETRESUID: 569164032Srwatson case PRIV_CRED_SETRESGID: 570164032Srwatson 571164032Srwatson /* 572164032Srwatson * Jail implements visibility constraints already, so allow 573164032Srwatson * jailed root to override uid/gid-based constraints. 574164032Srwatson */ 575164032Srwatson case PRIV_SEEOTHERGIDS: 576164032Srwatson case PRIV_SEEOTHERUIDS: 577164032Srwatson 578164032Srwatson /* 579164032Srwatson * Jail implements inter-process debugging limits already, so 580164032Srwatson * allow jailed root various debugging privileges. 581164032Srwatson */ 582164032Srwatson case PRIV_DEBUG_DIFFCRED: 583164032Srwatson case PRIV_DEBUG_SUGID: 584164032Srwatson case PRIV_DEBUG_UNPRIV: 585164032Srwatson 586164032Srwatson /* 587164032Srwatson * Allow jail to set various resource limits and login 588164032Srwatson * properties, and for now, exceed process resource limits. 589164032Srwatson */ 590164032Srwatson case PRIV_PROC_LIMIT: 591164032Srwatson case PRIV_PROC_SETLOGIN: 592164032Srwatson case PRIV_PROC_SETRLIMIT: 593164032Srwatson 594164032Srwatson /* 595164032Srwatson * System V and POSIX IPC privileges are granted in jail. 596164032Srwatson */ 597164032Srwatson case PRIV_IPC_READ: 598164032Srwatson case PRIV_IPC_WRITE: 599164032Srwatson case PRIV_IPC_EXEC: 600164032Srwatson case PRIV_IPC_ADMIN: 601164032Srwatson case PRIV_IPC_MSGSIZE: 602164032Srwatson case PRIV_MQ_ADMIN: 603164032Srwatson 604164032Srwatson /* 605164032Srwatson * Jail implements its own inter-process limits, so allow 606164032Srwatson * root processes in jail to change scheduling on other 607164032Srwatson * processes in the same jail. Likewise for signalling. 608164032Srwatson */ 609164032Srwatson case PRIV_SCHED_DIFFCRED: 610164032Srwatson case PRIV_SIGNAL_DIFFCRED: 611164032Srwatson case PRIV_SIGNAL_SUGID: 612164032Srwatson 613164032Srwatson /* 614164032Srwatson * Allow jailed processes to write to sysctls marked as jail 615164032Srwatson * writable. 616164032Srwatson */ 617164032Srwatson case PRIV_SYSCTL_WRITEJAIL: 618164032Srwatson 619164032Srwatson /* 620164032Srwatson * Allow root in jail to manage a variety of quota 621164032Srwatson * properties. Some are a bit surprising and should be 622164032Srwatson * reconsidered. 623164032Srwatson */ 624164032Srwatson case PRIV_UFS_GETQUOTA: 625164032Srwatson case PRIV_UFS_QUOTAOFF: /* XXXRW: Slightly surprising. */ 626164032Srwatson case PRIV_UFS_QUOTAON: /* XXXRW: Slightly surprising. */ 627164032Srwatson case PRIV_UFS_SETQUOTA: 628164032Srwatson case PRIV_UFS_SETUSE: /* XXXRW: Slightly surprising. */ 629164032Srwatson 630164032Srwatson /* 631164032Srwatson * Since Jail relies on chroot() to implement file system 632164032Srwatson * protections, grant many VFS privileges to root in jail. 633164032Srwatson * Be careful to exclude mount-related and NFS-related 634164032Srwatson * privileges. 635164032Srwatson */ 636164032Srwatson case PRIV_VFS_READ: 637164032Srwatson case PRIV_VFS_WRITE: 638164032Srwatson case PRIV_VFS_ADMIN: 639164032Srwatson case PRIV_VFS_EXEC: 640164032Srwatson case PRIV_VFS_LOOKUP: 641164032Srwatson case PRIV_VFS_BLOCKRESERVE: /* XXXRW: Slightly surprising. */ 642164032Srwatson case PRIV_VFS_CHFLAGS_DEV: 643164032Srwatson case PRIV_VFS_CHOWN: 644164032Srwatson case PRIV_VFS_CHROOT: 645164032Srwatson case PRIV_VFS_CLEARSUGID: 646164032Srwatson case PRIV_VFS_FCHROOT: 647164032Srwatson case PRIV_VFS_LINK: 648164032Srwatson case PRIV_VFS_SETGID: 649164032Srwatson case PRIV_VFS_STICKYFILE: 650164032Srwatson return (0); 651164032Srwatson 652164032Srwatson /* 653164032Srwatson * Depending on the global setting, allow privilege of 654164032Srwatson * setting system flags. 655164032Srwatson */ 656164032Srwatson case PRIV_VFS_SYSFLAGS: 657164032Srwatson if (jail_chflags_allowed) 658164032Srwatson return (0); 659164032Srwatson else 660164032Srwatson return (EPERM); 661164032Srwatson 662164032Srwatson /* 663164032Srwatson * Allow jailed root to bind reserved ports. 664164032Srwatson */ 665164032Srwatson case PRIV_NETINET_RESERVEDPORT: 666164032Srwatson return (0); 667164032Srwatson 668164032Srwatson /* 669164032Srwatson * Conditionally allow creating raw sockets in jail. 670164032Srwatson */ 671164032Srwatson case PRIV_NETINET_RAW: 672164032Srwatson if (jail_allow_raw_sockets) 673164032Srwatson return (0); 674164032Srwatson else 675164032Srwatson return (EPERM); 676164032Srwatson 677164032Srwatson /* 678164032Srwatson * Since jail implements its own visibility limits on netstat 679164032Srwatson * sysctls, allow getcred. This allows identd to work in 680164032Srwatson * jail. 681164032Srwatson */ 682164032Srwatson case PRIV_NETINET_GETCRED: 683164032Srwatson return (0); 684164032Srwatson 685164032Srwatson default: 686164032Srwatson /* 687164032Srwatson * In all remaining cases, deny the privilege request. This 688164032Srwatson * includes almost all network privileges, many system 689164032Srwatson * configuration privileges. 690164032Srwatson */ 691164032Srwatson return (EPERM); 692164032Srwatson } 693164032Srwatson} 694164032Srwatson 695113275Smikestatic int 696113275Smikesysctl_jail_list(SYSCTL_HANDLER_ARGS) 697113275Smike{ 698113275Smike struct xprison *xp, *sxp; 699113275Smike struct prison *pr; 700113275Smike int count, error; 701113275Smike 702127020Spjd if (jailed(req->td->td_ucred)) 703125806Srwatson return (0); 704113275Smikeretry: 705113275Smike mtx_lock(&allprison_mtx); 706113275Smike count = prisoncount; 707113275Smike mtx_unlock(&allprison_mtx); 708113275Smike 709113275Smike if (count == 0) 710113275Smike return (0); 711113275Smike 712113275Smike sxp = xp = malloc(sizeof(*xp) * count, M_TEMP, M_WAITOK | M_ZERO); 713113275Smike mtx_lock(&allprison_mtx); 714113275Smike if (count != prisoncount) { 715113275Smike mtx_unlock(&allprison_mtx); 716113275Smike free(sxp, M_TEMP); 717113275Smike goto retry; 718113275Smike } 719113275Smike 720113275Smike LIST_FOREACH(pr, &allprison, pr_list) { 721113275Smike mtx_lock(&pr->pr_mtx); 722113275Smike xp->pr_version = XPRISON_VERSION; 723113275Smike xp->pr_id = pr->pr_id; 724113275Smike strlcpy(xp->pr_path, pr->pr_path, sizeof(xp->pr_path)); 725113275Smike strlcpy(xp->pr_host, pr->pr_host, sizeof(xp->pr_host)); 726113275Smike xp->pr_ip = pr->pr_ip; 727113275Smike mtx_unlock(&pr->pr_mtx); 728113275Smike xp++; 729113275Smike } 730113275Smike mtx_unlock(&allprison_mtx); 731113275Smike 732113275Smike error = SYSCTL_OUT(req, sxp, sizeof(*sxp) * count); 733113275Smike free(sxp, M_TEMP); 734113275Smike if (error) 735113275Smike return (error); 736113275Smike return (0); 737113275Smike} 738113275Smike 739113275SmikeSYSCTL_OID(_security_jail, OID_AUTO, list, CTLTYPE_STRUCT | CTLFLAG_RD, 740113275Smike NULL, 0, sysctl_jail_list, "S", "List of active jails"); 741126004Spjd 742126004Spjdstatic int 743126004Spjdsysctl_jail_jailed(SYSCTL_HANDLER_ARGS) 744126004Spjd{ 745126004Spjd int error, injail; 746126004Spjd 747126004Spjd injail = jailed(req->td->td_ucred); 748126004Spjd error = SYSCTL_OUT(req, &injail, sizeof(injail)); 749126004Spjd 750126004Spjd return (error); 751126004Spjd} 752126004SpjdSYSCTL_PROC(_security_jail, OID_AUTO, jailed, CTLTYPE_INT | CTLFLAG_RD, 753126004Spjd NULL, 0, sysctl_jail_jailed, "I", "Process in jail?"); 754