kern_jail.c revision 177785
1139804Simp/*- 246197Sphk * ---------------------------------------------------------------------------- 346197Sphk * "THE BEER-WARE LICENSE" (Revision 42): 446197Sphk * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 546197Sphk * can do whatever you want with this stuff. If we meet some day, and you think 646197Sphk * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 746197Sphk * ---------------------------------------------------------------------------- 846197Sphk */ 946155Sphk 10116182Sobrien#include <sys/cdefs.h> 11116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/kern_jail.c 177785 2008-03-31 12:01:21Z kib $"); 12116182Sobrien 13131177Spjd#include "opt_mac.h" 14131177Spjd 1546155Sphk#include <sys/param.h> 1646155Sphk#include <sys/types.h> 1746155Sphk#include <sys/kernel.h> 1846155Sphk#include <sys/systm.h> 1946155Sphk#include <sys/errno.h> 2046155Sphk#include <sys/sysproto.h> 2146155Sphk#include <sys/malloc.h> 22164032Srwatson#include <sys/priv.h> 2346155Sphk#include <sys/proc.h> 24124882Srwatson#include <sys/taskqueue.h> 25177785Skib#include <sys/fcntl.h> 2646155Sphk#include <sys/jail.h> 2787275Srwatson#include <sys/lock.h> 2887275Srwatson#include <sys/mutex.h> 29168401Spjd#include <sys/sx.h> 30113275Smike#include <sys/namei.h> 31147185Spjd#include <sys/mount.h> 32113275Smike#include <sys/queue.h> 3346155Sphk#include <sys/socket.h> 34113275Smike#include <sys/syscallsubr.h> 3557163Srwatson#include <sys/sysctl.h> 36113275Smike#include <sys/vnode.h> 3746155Sphk#include <net/if.h> 3846155Sphk#include <netinet/in.h> 3946155Sphk 40163606Srwatson#include <security/mac/mac_framework.h> 41163606Srwatson 4246155SphkMALLOC_DEFINE(M_PRISON, "prison", "Prison structures"); 4346155Sphk 4489414SarrSYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0, 4557163Srwatson "Jail rules"); 4657163Srwatson 4757163Srwatsonint jail_set_hostname_allowed = 1; 4889414SarrSYSCTL_INT(_security_jail, OID_AUTO, set_hostname_allowed, CTLFLAG_RW, 4957163Srwatson &jail_set_hostname_allowed, 0, 5057163Srwatson "Processes in jail can set their hostnames"); 5157163Srwatson 5261235Srwatsonint jail_socket_unixiproute_only = 1; 5389414SarrSYSCTL_INT(_security_jail, OID_AUTO, socket_unixiproute_only, CTLFLAG_RW, 5461235Srwatson &jail_socket_unixiproute_only, 0, 5561235Srwatson "Processes in jail are limited to creating UNIX/IPv4/route sockets only"); 5661235Srwatson 5768024Srwatsonint jail_sysvipc_allowed = 0; 5889414SarrSYSCTL_INT(_security_jail, OID_AUTO, sysvipc_allowed, CTLFLAG_RW, 5968024Srwatson &jail_sysvipc_allowed, 0, 6068024Srwatson "Processes in jail can use System V IPC primitives"); 6168024Srwatson 62147185Spjdstatic int jail_enforce_statfs = 2; 63147185SpjdSYSCTL_INT(_security_jail, OID_AUTO, enforce_statfs, CTLFLAG_RW, 64147185Spjd &jail_enforce_statfs, 0, 65147185Spjd "Processes in jail cannot see all mounted file systems"); 66125804Srwatson 67128664Sbmilekicint jail_allow_raw_sockets = 0; 68128664SbmilekicSYSCTL_INT(_security_jail, OID_AUTO, allow_raw_sockets, CTLFLAG_RW, 69128664Sbmilekic &jail_allow_raw_sockets, 0, 70128664Sbmilekic "Prison root can create raw sockets"); 71128664Sbmilekic 72141543Scpercivaint jail_chflags_allowed = 0; 73141543ScpercivaSYSCTL_INT(_security_jail, OID_AUTO, chflags_allowed, CTLFLAG_RW, 74141543Scperciva &jail_chflags_allowed, 0, 75141543Scperciva "Processes in jail can alter system file flags"); 76141543Scperciva 77168396Spjdint jail_mount_allowed = 0; 78168396SpjdSYSCTL_INT(_security_jail, OID_AUTO, mount_allowed, CTLFLAG_RW, 79168396Spjd &jail_mount_allowed, 0, 80168396Spjd "Processes in jail can mount/unmount jail-friendly file systems"); 81168396Spjd 82168401Spjd/* allprison, lastprid, and prisoncount are protected by allprison_lock. */ 83113275Smikestruct prisonlist allprison; 84168401Spjdstruct sx allprison_lock; 85113275Smikeint lastprid = 0; 86113275Smikeint prisoncount = 0; 87113275Smike 88168401Spjd/* 89168401Spjd * List of jail services. Protected by allprison_lock. 90168401Spjd */ 91168401SpjdTAILQ_HEAD(prison_services_head, prison_service); 92168401Spjdstatic struct prison_services_head prison_services = 93168401Spjd TAILQ_HEAD_INITIALIZER(prison_services); 94168401Spjdstatic int prison_service_slots = 0; 95168401Spjd 96168401Spjdstruct prison_service { 97168401Spjd prison_create_t ps_create; 98168401Spjd prison_destroy_t ps_destroy; 99168401Spjd int ps_slotno; 100168401Spjd TAILQ_ENTRY(prison_service) ps_next; 101168401Spjd char ps_name[0]; 102168401Spjd}; 103168401Spjd 104113275Smikestatic void init_prison(void *); 105124882Srwatsonstatic void prison_complete(void *context, int pending); 106113275Smikestatic int sysctl_jail_list(SYSCTL_HANDLER_ARGS); 107113275Smike 108113275Smikestatic void 109113275Smikeinit_prison(void *data __unused) 110113275Smike{ 111113275Smike 112168401Spjd sx_init(&allprison_lock, "allprison"); 113113275Smike LIST_INIT(&allprison); 114113275Smike} 115113275Smike 116113275SmikeSYSINIT(prison, SI_SUB_INTRINSIC, SI_ORDER_ANY, init_prison, NULL); 117113275Smike 11882710Sdillon/* 119114168Smike * struct jail_args { 120114168Smike * struct jail *jail; 121114168Smike * }; 12282710Sdillon */ 12346155Sphkint 124114168Smikejail(struct thread *td, struct jail_args *uap) 12546155Sphk{ 126113275Smike struct nameidata nd; 127113275Smike struct prison *pr, *tpr; 128168401Spjd struct prison_service *psrv; 12946155Sphk struct jail j; 130113275Smike struct jail_attach_args jaa; 131150652Scsjp int vfslocked, error, tryprid; 13246155Sphk 133114168Smike error = copyin(uap->jail, &j, sizeof(j)); 13446155Sphk if (error) 13584828Sjhb return (error); 13684828Sjhb if (j.version != 0) 13784828Sjhb return (EINVAL); 13884828Sjhb 139114168Smike MALLOC(pr, struct prison *, sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO); 14093818Sjhb mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF); 141113275Smike pr->pr_ref = 1; 142114168Smike error = copyinstr(j.path, &pr->pr_path, sizeof(pr->pr_path), 0); 143113275Smike if (error) 144113275Smike goto e_killmtx; 145150652Scsjp NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF, UIO_SYSSPACE, 146150652Scsjp pr->pr_path, td); 147113275Smike error = namei(&nd); 148150652Scsjp if (error) 149113275Smike goto e_killmtx; 150150652Scsjp vfslocked = NDHASGIANT(&nd); 151113275Smike pr->pr_root = nd.ni_vp; 152175294Sattilio VOP_UNLOCK(nd.ni_vp, 0); 153113275Smike NDFREE(&nd, NDF_ONLY_PNBUF); 154150652Scsjp VFS_UNLOCK_GIANT(vfslocked); 155114168Smike error = copyinstr(j.hostname, &pr->pr_host, sizeof(pr->pr_host), 0); 15684828Sjhb if (error) 157113275Smike goto e_dropvnref; 158113275Smike pr->pr_ip = j.ip_number; 159113275Smike pr->pr_linux = NULL; 160113275Smike pr->pr_securelevel = securelevel; 161168401Spjd if (prison_service_slots == 0) 162168401Spjd pr->pr_slots = NULL; 163168401Spjd else { 164168401Spjd pr->pr_slots = malloc(sizeof(*pr->pr_slots) * prison_service_slots, 165168401Spjd M_PRISON, M_ZERO | M_WAITOK); 166168401Spjd } 167113275Smike 168113275Smike /* Determine next pr_id and add prison to allprison list. */ 169168401Spjd sx_xlock(&allprison_lock); 170113275Smike tryprid = lastprid + 1; 171113275Smike if (tryprid == JAIL_MAX) 172113275Smike tryprid = 1; 173113275Smikenext: 174113275Smike LIST_FOREACH(tpr, &allprison, pr_list) { 175113275Smike if (tpr->pr_id == tryprid) { 176113275Smike tryprid++; 177113275Smike if (tryprid == JAIL_MAX) { 178168401Spjd sx_xunlock(&allprison_lock); 179113275Smike error = EAGAIN; 180113275Smike goto e_dropvnref; 181113275Smike } 182113275Smike goto next; 183113275Smike } 184113275Smike } 185113275Smike pr->pr_id = jaa.jid = lastprid = tryprid; 186113275Smike LIST_INSERT_HEAD(&allprison, pr, pr_list); 187113275Smike prisoncount++; 188168401Spjd sx_downgrade(&allprison_lock); 189168401Spjd TAILQ_FOREACH(psrv, &prison_services, ps_next) { 190168401Spjd psrv->ps_create(psrv, pr); 191168401Spjd } 192168401Spjd sx_sunlock(&allprison_lock); 193113275Smike 194113275Smike error = jail_attach(td, &jaa); 195113275Smike if (error) 196113275Smike goto e_dropprref; 197113275Smike mtx_lock(&pr->pr_mtx); 198113275Smike pr->pr_ref--; 199113275Smike mtx_unlock(&pr->pr_mtx); 200113275Smike td->td_retval[0] = jaa.jid; 201113275Smike return (0); 202113275Smikee_dropprref: 203168401Spjd sx_xlock(&allprison_lock); 204113275Smike LIST_REMOVE(pr, pr_list); 205113275Smike prisoncount--; 206168401Spjd sx_downgrade(&allprison_lock); 207168401Spjd TAILQ_FOREACH(psrv, &prison_services, ps_next) { 208168401Spjd psrv->ps_destroy(psrv, pr); 209168401Spjd } 210168401Spjd sx_sunlock(&allprison_lock); 211113275Smikee_dropvnref: 212150652Scsjp vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 213113275Smike vrele(pr->pr_root); 214150652Scsjp VFS_UNLOCK_GIANT(vfslocked); 215113275Smikee_killmtx: 216113275Smike mtx_destroy(&pr->pr_mtx); 217113275Smike FREE(pr, M_PRISON); 218113275Smike return (error); 219113275Smike} 220113275Smike 221113275Smike/* 222114168Smike * struct jail_attach_args { 223114168Smike * int jid; 224114168Smike * }; 225113275Smike */ 226113275Smikeint 227114168Smikejail_attach(struct thread *td, struct jail_attach_args *uap) 228113275Smike{ 229113275Smike struct proc *p; 230113275Smike struct ucred *newcred, *oldcred; 231113275Smike struct prison *pr; 232150652Scsjp int vfslocked, error; 233167309Spjd 234126023Snectar /* 235126023Snectar * XXX: Note that there is a slight race here if two threads 236126023Snectar * in the same privileged process attempt to attach to two 237126023Snectar * different jails at the same time. It is important for 238126023Snectar * user processes not to do this, or they might end up with 239126023Snectar * a process root from one prison, but attached to the jail 240126023Snectar * of another. 241126023Snectar */ 242164032Srwatson error = priv_check(td, PRIV_JAIL_ATTACH); 243126023Snectar if (error) 244126023Snectar return (error); 245126023Snectar 246113275Smike p = td->td_proc; 247168401Spjd sx_slock(&allprison_lock); 248113275Smike pr = prison_find(uap->jid); 249113275Smike if (pr == NULL) { 250168401Spjd sx_sunlock(&allprison_lock); 251113275Smike return (EINVAL); 252113275Smike } 253113275Smike pr->pr_ref++; 254113275Smike mtx_unlock(&pr->pr_mtx); 255168401Spjd sx_sunlock(&allprison_lock); 256113275Smike 257150652Scsjp vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 258175202Sattilio vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY); 259113275Smike if ((error = change_dir(pr->pr_root, td)) != 0) 260113275Smike goto e_unlock; 261113275Smike#ifdef MAC 262172930Srwatson if ((error = mac_vnode_check_chroot(td->td_ucred, pr->pr_root))) 263113275Smike goto e_unlock; 264113275Smike#endif 265175294Sattilio VOP_UNLOCK(pr->pr_root, 0); 266113275Smike change_root(pr->pr_root, td); 267150652Scsjp VFS_UNLOCK_GIANT(vfslocked); 268113275Smike 26984828Sjhb newcred = crget(); 27084828Sjhb PROC_LOCK(p); 27184828Sjhb oldcred = p->p_ucred; 272113275Smike setsugid(p); 27384828Sjhb crcopy(newcred, oldcred); 274113630Sjhb newcred->cr_prison = pr; 27584828Sjhb p->p_ucred = newcred; 27684828Sjhb PROC_UNLOCK(p); 27784828Sjhb crfree(oldcred); 27846155Sphk return (0); 279113275Smikee_unlock: 280175294Sattilio VOP_UNLOCK(pr->pr_root, 0); 281150652Scsjp VFS_UNLOCK_GIANT(vfslocked); 282113275Smike mtx_lock(&pr->pr_mtx); 283113275Smike pr->pr_ref--; 284113275Smike mtx_unlock(&pr->pr_mtx); 28546155Sphk return (error); 28646155Sphk} 28746155Sphk 288113275Smike/* 289113275Smike * Returns a locked prison instance, or NULL on failure. 290113275Smike */ 291168399Spjdstruct prison * 292113275Smikeprison_find(int prid) 293113275Smike{ 294113275Smike struct prison *pr; 295113275Smike 296168401Spjd sx_assert(&allprison_lock, SX_LOCKED); 297113275Smike LIST_FOREACH(pr, &allprison, pr_list) { 298113275Smike if (pr->pr_id == prid) { 299113275Smike mtx_lock(&pr->pr_mtx); 300168489Spjd if (pr->pr_ref == 0) { 301168489Spjd mtx_unlock(&pr->pr_mtx); 302168489Spjd break; 303168489Spjd } 304113275Smike return (pr); 305113275Smike } 306113275Smike } 307113275Smike return (NULL); 308113275Smike} 309113275Smike 31072786Srwatsonvoid 31172786Srwatsonprison_free(struct prison *pr) 31272786Srwatson{ 31372786Srwatson 31487275Srwatson mtx_lock(&pr->pr_mtx); 31572786Srwatson pr->pr_ref--; 31672786Srwatson if (pr->pr_ref == 0) { 317168483Spjd mtx_unlock(&pr->pr_mtx); 318124882Srwatson TASK_INIT(&pr->pr_task, 0, prison_complete, pr); 319144660Sjeff taskqueue_enqueue(taskqueue_thread, &pr->pr_task); 32087275Srwatson return; 32172786Srwatson } 32287275Srwatson mtx_unlock(&pr->pr_mtx); 32372786Srwatson} 32472786Srwatson 325124882Srwatsonstatic void 326124882Srwatsonprison_complete(void *context, int pending) 327124882Srwatson{ 328168489Spjd struct prison_service *psrv; 329124882Srwatson struct prison *pr; 330150652Scsjp int vfslocked; 331124882Srwatson 332124882Srwatson pr = (struct prison *)context; 333124882Srwatson 334168489Spjd sx_xlock(&allprison_lock); 335168489Spjd LIST_REMOVE(pr, pr_list); 336168489Spjd prisoncount--; 337168489Spjd sx_downgrade(&allprison_lock); 338168489Spjd TAILQ_FOREACH(psrv, &prison_services, ps_next) { 339168489Spjd psrv->ps_destroy(psrv, pr); 340168489Spjd } 341168489Spjd sx_sunlock(&allprison_lock); 342168489Spjd 343150652Scsjp vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 344124882Srwatson vrele(pr->pr_root); 345150652Scsjp VFS_UNLOCK_GIANT(vfslocked); 346124882Srwatson 347124882Srwatson mtx_destroy(&pr->pr_mtx); 348124882Srwatson if (pr->pr_linux != NULL) 349124882Srwatson FREE(pr->pr_linux, M_PRISON); 350124882Srwatson FREE(pr, M_PRISON); 351124882Srwatson} 352124882Srwatson 35372786Srwatsonvoid 35472786Srwatsonprison_hold(struct prison *pr) 35572786Srwatson{ 35672786Srwatson 35787275Srwatson mtx_lock(&pr->pr_mtx); 358168489Spjd KASSERT(pr->pr_ref > 0, 359168489Spjd ("Trying to hold dead prison (id=%d).", pr->pr_id)); 36072786Srwatson pr->pr_ref++; 36187275Srwatson mtx_unlock(&pr->pr_mtx); 36272786Srwatson} 36372786Srwatson 36487275Srwatsonu_int32_t 36587275Srwatsonprison_getip(struct ucred *cred) 36687275Srwatson{ 36787275Srwatson 36887275Srwatson return (cred->cr_prison->pr_ip); 36987275Srwatson} 37087275Srwatson 37146155Sphkint 37272786Srwatsonprison_ip(struct ucred *cred, int flag, u_int32_t *ip) 37346155Sphk{ 37446155Sphk u_int32_t tmp; 37546155Sphk 37672786Srwatson if (!jailed(cred)) 37746155Sphk return (0); 378167309Spjd if (flag) 37946155Sphk tmp = *ip; 38046155Sphk else 38146155Sphk tmp = ntohl(*ip); 38246155Sphk if (tmp == INADDR_ANY) { 383167309Spjd if (flag) 38472786Srwatson *ip = cred->cr_prison->pr_ip; 38546155Sphk else 38672786Srwatson *ip = htonl(cred->cr_prison->pr_ip); 38746155Sphk return (0); 38846155Sphk } 38981114Srwatson if (tmp == INADDR_LOOPBACK) { 39081114Srwatson if (flag) 39181114Srwatson *ip = cred->cr_prison->pr_ip; 39281114Srwatson else 39381114Srwatson *ip = htonl(cred->cr_prison->pr_ip); 39481114Srwatson return (0); 39581114Srwatson } 39672786Srwatson if (cred->cr_prison->pr_ip != tmp) 39746155Sphk return (1); 39846155Sphk return (0); 39946155Sphk} 40046155Sphk 40146155Sphkvoid 40272786Srwatsonprison_remote_ip(struct ucred *cred, int flag, u_int32_t *ip) 40346155Sphk{ 40446155Sphk u_int32_t tmp; 40546155Sphk 40672786Srwatson if (!jailed(cred)) 40746155Sphk return; 40846155Sphk if (flag) 40946155Sphk tmp = *ip; 41046155Sphk else 41146155Sphk tmp = ntohl(*ip); 41281114Srwatson if (tmp == INADDR_LOOPBACK) { 41346155Sphk if (flag) 41472786Srwatson *ip = cred->cr_prison->pr_ip; 41546155Sphk else 41672786Srwatson *ip = htonl(cred->cr_prison->pr_ip); 41746155Sphk return; 41846155Sphk } 41946155Sphk return; 42046155Sphk} 42146155Sphk 42246155Sphkint 42372786Srwatsonprison_if(struct ucred *cred, struct sockaddr *sa) 42446155Sphk{ 425114168Smike struct sockaddr_in *sai; 42646155Sphk int ok; 42746155Sphk 428114168Smike sai = (struct sockaddr_in *)sa; 42961235Srwatson if ((sai->sin_family != AF_INET) && jail_socket_unixiproute_only) 43061235Srwatson ok = 1; 43161235Srwatson else if (sai->sin_family != AF_INET) 43246155Sphk ok = 0; 43372786Srwatson else if (cred->cr_prison->pr_ip != ntohl(sai->sin_addr.s_addr)) 43446155Sphk ok = 1; 43546155Sphk else 43646155Sphk ok = 0; 43746155Sphk return (ok); 43846155Sphk} 43972786Srwatson 44072786Srwatson/* 44172786Srwatson * Return 0 if jails permit p1 to frob p2, otherwise ESRCH. 44272786Srwatson */ 44372786Srwatsonint 444114168Smikeprison_check(struct ucred *cred1, struct ucred *cred2) 44572786Srwatson{ 44672786Srwatson 44772786Srwatson if (jailed(cred1)) { 44872786Srwatson if (!jailed(cred2)) 44972786Srwatson return (ESRCH); 45072786Srwatson if (cred2->cr_prison != cred1->cr_prison) 45172786Srwatson return (ESRCH); 45272786Srwatson } 45372786Srwatson 45472786Srwatson return (0); 45572786Srwatson} 45672786Srwatson 45772786Srwatson/* 45872786Srwatson * Return 1 if the passed credential is in a jail, otherwise 0. 45972786Srwatson */ 46072786Srwatsonint 461114168Smikejailed(struct ucred *cred) 46272786Srwatson{ 46372786Srwatson 46472786Srwatson return (cred->cr_prison != NULL); 46572786Srwatson} 46691384Srobert 46791384Srobert/* 46891384Srobert * Return the correct hostname for the passed credential. 46991384Srobert */ 47091391Srobertvoid 471114168Smikegetcredhostname(struct ucred *cred, char *buf, size_t size) 47291384Srobert{ 47391384Srobert 47491391Srobert if (jailed(cred)) { 47591391Srobert mtx_lock(&cred->cr_prison->pr_mtx); 476105354Srobert strlcpy(buf, cred->cr_prison->pr_host, size); 47791391Srobert mtx_unlock(&cred->cr_prison->pr_mtx); 478114168Smike } else 479105354Srobert strlcpy(buf, hostname, size); 48091384Srobert} 481113275Smike 482125804Srwatson/* 483147185Spjd * Determine whether the subject represented by cred can "see" 484147185Spjd * status of a mount point. 485147185Spjd * Returns: 0 for permitted, ENOENT otherwise. 486147185Spjd * XXX: This function should be called cr_canseemount() and should be 487147185Spjd * placed in kern_prot.c. 488125804Srwatson */ 489125804Srwatsonint 490147185Spjdprison_canseemount(struct ucred *cred, struct mount *mp) 491125804Srwatson{ 492147185Spjd struct prison *pr; 493147185Spjd struct statfs *sp; 494147185Spjd size_t len; 495125804Srwatson 496147185Spjd if (!jailed(cred) || jail_enforce_statfs == 0) 497147185Spjd return (0); 498147185Spjd pr = cred->cr_prison; 499147185Spjd if (pr->pr_root->v_mount == mp) 500147185Spjd return (0); 501147185Spjd if (jail_enforce_statfs == 2) 502147185Spjd return (ENOENT); 503147185Spjd /* 504147185Spjd * If jail's chroot directory is set to "/" we should be able to see 505147185Spjd * all mount-points from inside a jail. 506147185Spjd * This is ugly check, but this is the only situation when jail's 507147185Spjd * directory ends with '/'. 508147185Spjd */ 509147185Spjd if (strcmp(pr->pr_path, "/") == 0) 510147185Spjd return (0); 511147185Spjd len = strlen(pr->pr_path); 512147185Spjd sp = &mp->mnt_stat; 513147185Spjd if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0) 514147185Spjd return (ENOENT); 515147185Spjd /* 516147185Spjd * Be sure that we don't have situation where jail's root directory 517147185Spjd * is "/some/path" and mount point is "/some/pathpath". 518147185Spjd */ 519147185Spjd if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/') 520147185Spjd return (ENOENT); 521147185Spjd return (0); 522147185Spjd} 523147185Spjd 524147185Spjdvoid 525147185Spjdprison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp) 526147185Spjd{ 527147185Spjd char jpath[MAXPATHLEN]; 528147185Spjd struct prison *pr; 529147185Spjd size_t len; 530147185Spjd 531147185Spjd if (!jailed(cred) || jail_enforce_statfs == 0) 532147185Spjd return; 533147185Spjd pr = cred->cr_prison; 534147185Spjd if (prison_canseemount(cred, mp) != 0) { 535147185Spjd bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 536147185Spjd strlcpy(sp->f_mntonname, "[restricted]", 537147185Spjd sizeof(sp->f_mntonname)); 538147185Spjd return; 539125804Srwatson } 540147185Spjd if (pr->pr_root->v_mount == mp) { 541147185Spjd /* 542147185Spjd * Clear current buffer data, so we are sure nothing from 543147185Spjd * the valid path left there. 544147185Spjd */ 545147185Spjd bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 546147185Spjd *sp->f_mntonname = '/'; 547147185Spjd return; 548147185Spjd } 549147185Spjd /* 550147185Spjd * If jail's chroot directory is set to "/" we should be able to see 551147185Spjd * all mount-points from inside a jail. 552147185Spjd */ 553147185Spjd if (strcmp(pr->pr_path, "/") == 0) 554147185Spjd return; 555147185Spjd len = strlen(pr->pr_path); 556147185Spjd strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath)); 557147185Spjd /* 558147185Spjd * Clear current buffer data, so we are sure nothing from 559147185Spjd * the valid path left there. 560147185Spjd */ 561147185Spjd bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 562147185Spjd if (*jpath == '\0') { 563147185Spjd /* Should never happen. */ 564147185Spjd *sp->f_mntonname = '/'; 565147185Spjd } else { 566147185Spjd strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname)); 567147185Spjd } 568125804Srwatson} 569125804Srwatson 570164032Srwatson/* 571164032Srwatson * Check with permission for a specific privilege is granted within jail. We 572164032Srwatson * have a specific list of accepted privileges; the rest are denied. 573164032Srwatson */ 574164032Srwatsonint 575164032Srwatsonprison_priv_check(struct ucred *cred, int priv) 576164032Srwatson{ 577164032Srwatson 578164032Srwatson if (!jailed(cred)) 579164032Srwatson return (0); 580164032Srwatson 581164032Srwatson switch (priv) { 582164032Srwatson 583164032Srwatson /* 584164032Srwatson * Allow ktrace privileges for root in jail. 585164032Srwatson */ 586164032Srwatson case PRIV_KTRACE: 587164032Srwatson 588166827Srwatson#if 0 589164032Srwatson /* 590164032Srwatson * Allow jailed processes to configure audit identity and 591164032Srwatson * submit audit records (login, etc). In the future we may 592164032Srwatson * want to further refine the relationship between audit and 593164032Srwatson * jail. 594164032Srwatson */ 595164032Srwatson case PRIV_AUDIT_GETAUDIT: 596164032Srwatson case PRIV_AUDIT_SETAUDIT: 597164032Srwatson case PRIV_AUDIT_SUBMIT: 598166827Srwatson#endif 599164032Srwatson 600164032Srwatson /* 601164032Srwatson * Allow jailed processes to manipulate process UNIX 602164032Srwatson * credentials in any way they see fit. 603164032Srwatson */ 604164032Srwatson case PRIV_CRED_SETUID: 605164032Srwatson case PRIV_CRED_SETEUID: 606164032Srwatson case PRIV_CRED_SETGID: 607164032Srwatson case PRIV_CRED_SETEGID: 608164032Srwatson case PRIV_CRED_SETGROUPS: 609164032Srwatson case PRIV_CRED_SETREUID: 610164032Srwatson case PRIV_CRED_SETREGID: 611164032Srwatson case PRIV_CRED_SETRESUID: 612164032Srwatson case PRIV_CRED_SETRESGID: 613164032Srwatson 614164032Srwatson /* 615164032Srwatson * Jail implements visibility constraints already, so allow 616164032Srwatson * jailed root to override uid/gid-based constraints. 617164032Srwatson */ 618164032Srwatson case PRIV_SEEOTHERGIDS: 619164032Srwatson case PRIV_SEEOTHERUIDS: 620164032Srwatson 621164032Srwatson /* 622164032Srwatson * Jail implements inter-process debugging limits already, so 623164032Srwatson * allow jailed root various debugging privileges. 624164032Srwatson */ 625164032Srwatson case PRIV_DEBUG_DIFFCRED: 626164032Srwatson case PRIV_DEBUG_SUGID: 627164032Srwatson case PRIV_DEBUG_UNPRIV: 628164032Srwatson 629164032Srwatson /* 630164032Srwatson * Allow jail to set various resource limits and login 631164032Srwatson * properties, and for now, exceed process resource limits. 632164032Srwatson */ 633164032Srwatson case PRIV_PROC_LIMIT: 634164032Srwatson case PRIV_PROC_SETLOGIN: 635164032Srwatson case PRIV_PROC_SETRLIMIT: 636164032Srwatson 637164032Srwatson /* 638164032Srwatson * System V and POSIX IPC privileges are granted in jail. 639164032Srwatson */ 640164032Srwatson case PRIV_IPC_READ: 641164032Srwatson case PRIV_IPC_WRITE: 642164032Srwatson case PRIV_IPC_ADMIN: 643164032Srwatson case PRIV_IPC_MSGSIZE: 644164032Srwatson case PRIV_MQ_ADMIN: 645164032Srwatson 646164032Srwatson /* 647164032Srwatson * Jail implements its own inter-process limits, so allow 648164032Srwatson * root processes in jail to change scheduling on other 649164032Srwatson * processes in the same jail. Likewise for signalling. 650164032Srwatson */ 651164032Srwatson case PRIV_SCHED_DIFFCRED: 652164032Srwatson case PRIV_SIGNAL_DIFFCRED: 653164032Srwatson case PRIV_SIGNAL_SUGID: 654164032Srwatson 655164032Srwatson /* 656164032Srwatson * Allow jailed processes to write to sysctls marked as jail 657164032Srwatson * writable. 658164032Srwatson */ 659164032Srwatson case PRIV_SYSCTL_WRITEJAIL: 660164032Srwatson 661164032Srwatson /* 662164032Srwatson * Allow root in jail to manage a variety of quota 663166831Srwatson * properties. These should likely be conditional on a 664166831Srwatson * configuration option. 665164032Srwatson */ 666166832Srwatson case PRIV_VFS_GETQUOTA: 667166832Srwatson case PRIV_VFS_SETQUOTA: 668164032Srwatson 669164032Srwatson /* 670164032Srwatson * Since Jail relies on chroot() to implement file system 671164032Srwatson * protections, grant many VFS privileges to root in jail. 672164032Srwatson * Be careful to exclude mount-related and NFS-related 673164032Srwatson * privileges. 674164032Srwatson */ 675164032Srwatson case PRIV_VFS_READ: 676164032Srwatson case PRIV_VFS_WRITE: 677164032Srwatson case PRIV_VFS_ADMIN: 678164032Srwatson case PRIV_VFS_EXEC: 679164032Srwatson case PRIV_VFS_LOOKUP: 680164032Srwatson case PRIV_VFS_BLOCKRESERVE: /* XXXRW: Slightly surprising. */ 681164032Srwatson case PRIV_VFS_CHFLAGS_DEV: 682164032Srwatson case PRIV_VFS_CHOWN: 683164032Srwatson case PRIV_VFS_CHROOT: 684167152Spjd case PRIV_VFS_RETAINSUGID: 685164032Srwatson case PRIV_VFS_FCHROOT: 686164032Srwatson case PRIV_VFS_LINK: 687164032Srwatson case PRIV_VFS_SETGID: 688172860Srwatson case PRIV_VFS_STAT: 689164032Srwatson case PRIV_VFS_STICKYFILE: 690164032Srwatson return (0); 691164032Srwatson 692164032Srwatson /* 693164032Srwatson * Depending on the global setting, allow privilege of 694164032Srwatson * setting system flags. 695164032Srwatson */ 696164032Srwatson case PRIV_VFS_SYSFLAGS: 697164032Srwatson if (jail_chflags_allowed) 698164032Srwatson return (0); 699164032Srwatson else 700164032Srwatson return (EPERM); 701164032Srwatson 702164032Srwatson /* 703168396Spjd * Depending on the global setting, allow privilege of 704168396Spjd * mounting/unmounting file systems. 705168396Spjd */ 706168396Spjd case PRIV_VFS_MOUNT: 707168396Spjd case PRIV_VFS_UNMOUNT: 708168396Spjd case PRIV_VFS_MOUNT_NONUSER: 709168699Spjd case PRIV_VFS_MOUNT_OWNER: 710168396Spjd if (jail_mount_allowed) 711168396Spjd return (0); 712168396Spjd else 713168396Spjd return (EPERM); 714168396Spjd 715168396Spjd /* 716168591Srwatson * Allow jailed root to bind reserved ports and reuse in-use 717168591Srwatson * ports. 718164032Srwatson */ 719164032Srwatson case PRIV_NETINET_RESERVEDPORT: 720168591Srwatson case PRIV_NETINET_REUSEPORT: 721164032Srwatson return (0); 722164032Srwatson 723164032Srwatson /* 724175630Sbz * Allow jailed root to set certian IPv4/6 (option) headers. 725175630Sbz */ 726175630Sbz case PRIV_NETINET_SETHDROPTS: 727175630Sbz return (0); 728175630Sbz 729175630Sbz /* 730164032Srwatson * Conditionally allow creating raw sockets in jail. 731164032Srwatson */ 732164032Srwatson case PRIV_NETINET_RAW: 733164032Srwatson if (jail_allow_raw_sockets) 734164032Srwatson return (0); 735164032Srwatson else 736164032Srwatson return (EPERM); 737164032Srwatson 738164032Srwatson /* 739164032Srwatson * Since jail implements its own visibility limits on netstat 740164032Srwatson * sysctls, allow getcred. This allows identd to work in 741164032Srwatson * jail. 742164032Srwatson */ 743164032Srwatson case PRIV_NETINET_GETCRED: 744164032Srwatson return (0); 745164032Srwatson 746164032Srwatson default: 747164032Srwatson /* 748164032Srwatson * In all remaining cases, deny the privilege request. This 749164032Srwatson * includes almost all network privileges, many system 750164032Srwatson * configuration privileges. 751164032Srwatson */ 752164032Srwatson return (EPERM); 753164032Srwatson } 754164032Srwatson} 755164032Srwatson 756168401Spjd/* 757168401Spjd * Register jail service. Provides 'create' and 'destroy' methods. 758168401Spjd * 'create' method will be called for every existing jail and all 759168401Spjd * jails in the future as they beeing created. 760168401Spjd * 'destroy' method will be called for every jail going away and 761168401Spjd * for all existing jails at the time of service deregistration. 762168401Spjd */ 763168401Spjdstruct prison_service * 764168401Spjdprison_service_register(const char *name, prison_create_t create, 765168401Spjd prison_destroy_t destroy) 766168401Spjd{ 767168401Spjd struct prison_service *psrv, *psrv2; 768168401Spjd struct prison *pr; 769168401Spjd int reallocate = 1, slotno = 0; 770168401Spjd void **slots, **oldslots; 771168401Spjd 772168401Spjd psrv = malloc(sizeof(*psrv) + strlen(name) + 1, M_PRISON, 773168401Spjd M_WAITOK | M_ZERO); 774168401Spjd psrv->ps_create = create; 775168401Spjd psrv->ps_destroy = destroy; 776168401Spjd strcpy(psrv->ps_name, name); 777168401Spjd /* 778168401Spjd * Grab the allprison_lock here, so we won't miss any jail 779168401Spjd * creation/destruction. 780168401Spjd */ 781168401Spjd sx_xlock(&allprison_lock); 782168401Spjd#ifdef INVARIANTS 783168401Spjd /* 784168401Spjd * Verify if service is not already registered. 785168401Spjd */ 786168401Spjd TAILQ_FOREACH(psrv2, &prison_services, ps_next) { 787168401Spjd KASSERT(strcmp(psrv2->ps_name, name) != 0, 788168401Spjd ("jail service %s already registered", name)); 789168401Spjd } 790168401Spjd#endif 791168401Spjd /* 792168401Spjd * Find free slot. When there is no existing free slot available, 793168401Spjd * allocate one at the end. 794168401Spjd */ 795168401Spjd TAILQ_FOREACH(psrv2, &prison_services, ps_next) { 796168401Spjd if (psrv2->ps_slotno != slotno) { 797168401Spjd KASSERT(slotno < psrv2->ps_slotno, 798168401Spjd ("Invalid slotno (slotno=%d >= ps_slotno=%d", 799168401Spjd slotno, psrv2->ps_slotno)); 800168401Spjd /* We found free slot. */ 801168401Spjd reallocate = 0; 802168401Spjd break; 803168401Spjd } 804168401Spjd slotno++; 805168401Spjd } 806168401Spjd psrv->ps_slotno = slotno; 807168401Spjd /* 808168401Spjd * Keep the list sorted by slot number. 809168401Spjd */ 810168401Spjd if (psrv2 != NULL) { 811168401Spjd KASSERT(reallocate == 0, ("psrv2 != NULL && reallocate != 0")); 812168401Spjd TAILQ_INSERT_BEFORE(psrv2, psrv, ps_next); 813168401Spjd } else { 814168401Spjd KASSERT(reallocate == 1, ("psrv2 == NULL && reallocate == 0")); 815168401Spjd TAILQ_INSERT_TAIL(&prison_services, psrv, ps_next); 816168401Spjd } 817168401Spjd prison_service_slots++; 818168401Spjd sx_downgrade(&allprison_lock); 819168401Spjd /* 820168401Spjd * Allocate memory for new slot if we didn't found empty one. 821168401Spjd * Do not use realloc(9), because pr_slots is protected with a mutex, 822168401Spjd * so we can't sleep. 823168401Spjd */ 824168401Spjd LIST_FOREACH(pr, &allprison, pr_list) { 825168401Spjd if (reallocate) { 826168401Spjd /* First allocate memory with M_WAITOK. */ 827168401Spjd slots = malloc(sizeof(*slots) * prison_service_slots, 828168401Spjd M_PRISON, M_WAITOK); 829168401Spjd /* Now grab the mutex and replace pr_slots. */ 830168401Spjd mtx_lock(&pr->pr_mtx); 831168401Spjd oldslots = pr->pr_slots; 832168401Spjd if (psrv->ps_slotno > 0) { 833168401Spjd bcopy(oldslots, slots, 834168401Spjd sizeof(*slots) * (prison_service_slots - 1)); 835168401Spjd } 836168401Spjd slots[psrv->ps_slotno] = NULL; 837168401Spjd pr->pr_slots = slots; 838168401Spjd mtx_unlock(&pr->pr_mtx); 839168401Spjd if (oldslots != NULL) 840168401Spjd free(oldslots, M_PRISON); 841168401Spjd } 842168401Spjd /* 843168401Spjd * Call 'create' method for each existing jail. 844168401Spjd */ 845168401Spjd psrv->ps_create(psrv, pr); 846168401Spjd } 847168401Spjd sx_sunlock(&allprison_lock); 848168401Spjd 849168401Spjd return (psrv); 850168401Spjd} 851168401Spjd 852168401Spjdvoid 853168401Spjdprison_service_deregister(struct prison_service *psrv) 854168401Spjd{ 855168401Spjd struct prison *pr; 856168401Spjd void **slots, **oldslots; 857168401Spjd int last = 0; 858168401Spjd 859168401Spjd sx_xlock(&allprison_lock); 860168401Spjd if (TAILQ_LAST(&prison_services, prison_services_head) == psrv) 861168401Spjd last = 1; 862168401Spjd TAILQ_REMOVE(&prison_services, psrv, ps_next); 863168401Spjd prison_service_slots--; 864168401Spjd sx_downgrade(&allprison_lock); 865168401Spjd LIST_FOREACH(pr, &allprison, pr_list) { 866168401Spjd /* 867168401Spjd * Call 'destroy' method for every currently existing jail. 868168401Spjd */ 869168401Spjd psrv->ps_destroy(psrv, pr); 870168401Spjd /* 871168401Spjd * If this is the last slot, free the memory allocated for it. 872168401Spjd */ 873168401Spjd if (last) { 874168401Spjd if (prison_service_slots == 0) 875168401Spjd slots = NULL; 876168401Spjd else { 877168401Spjd slots = malloc(sizeof(*slots) * prison_service_slots, 878168401Spjd M_PRISON, M_WAITOK); 879168401Spjd } 880168401Spjd mtx_lock(&pr->pr_mtx); 881168401Spjd oldslots = pr->pr_slots; 882168401Spjd /* 883168401Spjd * We require setting slot to NULL after freeing it, 884168401Spjd * this way we can check for memory leaks here. 885168401Spjd */ 886168401Spjd KASSERT(oldslots[psrv->ps_slotno] == NULL, 887168401Spjd ("Slot %d (service %s, jailid=%d) still contains data?", 888168401Spjd psrv->ps_slotno, psrv->ps_name, pr->pr_id)); 889168401Spjd if (psrv->ps_slotno > 0) { 890168401Spjd bcopy(oldslots, slots, 891168401Spjd sizeof(*slots) * prison_service_slots); 892168401Spjd } 893168401Spjd pr->pr_slots = slots; 894168401Spjd mtx_unlock(&pr->pr_mtx); 895168401Spjd KASSERT(oldslots != NULL, ("oldslots == NULL")); 896168401Spjd free(oldslots, M_PRISON); 897168401Spjd } 898168401Spjd } 899168401Spjd sx_sunlock(&allprison_lock); 900168401Spjd free(psrv, M_PRISON); 901168401Spjd} 902168401Spjd 903168401Spjd/* 904168401Spjd * Function sets data for the given jail in slot assigned for the given 905168401Spjd * jail service. 906168401Spjd */ 907168401Spjdvoid 908168401Spjdprison_service_data_set(struct prison_service *psrv, struct prison *pr, 909168401Spjd void *data) 910168401Spjd{ 911168401Spjd 912168401Spjd mtx_assert(&pr->pr_mtx, MA_OWNED); 913168401Spjd pr->pr_slots[psrv->ps_slotno] = data; 914168401Spjd} 915168401Spjd 916168401Spjd/* 917168401Spjd * Function clears slots assigned for the given jail service in the given 918168401Spjd * prison structure and returns current slot data. 919168401Spjd */ 920168401Spjdvoid * 921168401Spjdprison_service_data_del(struct prison_service *psrv, struct prison *pr) 922168401Spjd{ 923168401Spjd void *data; 924168401Spjd 925168401Spjd mtx_assert(&pr->pr_mtx, MA_OWNED); 926168401Spjd data = pr->pr_slots[psrv->ps_slotno]; 927168401Spjd pr->pr_slots[psrv->ps_slotno] = NULL; 928168401Spjd return (data); 929168401Spjd} 930168401Spjd 931168401Spjd/* 932168401Spjd * Function returns current data from the slot assigned to the given jail 933168401Spjd * service for the given jail. 934168401Spjd */ 935168401Spjdvoid * 936168401Spjdprison_service_data_get(struct prison_service *psrv, struct prison *pr) 937168401Spjd{ 938168401Spjd 939168401Spjd mtx_assert(&pr->pr_mtx, MA_OWNED); 940168401Spjd return (pr->pr_slots[psrv->ps_slotno]); 941168401Spjd} 942168401Spjd 943113275Smikestatic int 944113275Smikesysctl_jail_list(SYSCTL_HANDLER_ARGS) 945113275Smike{ 946113275Smike struct xprison *xp, *sxp; 947113275Smike struct prison *pr; 948113275Smike int count, error; 949113275Smike 950127020Spjd if (jailed(req->td->td_ucred)) 951125806Srwatson return (0); 952113275Smike 953168401Spjd sx_slock(&allprison_lock); 954168401Spjd if ((count = prisoncount) == 0) { 955168401Spjd sx_sunlock(&allprison_lock); 956113275Smike return (0); 957168401Spjd } 958113275Smike 959113275Smike sxp = xp = malloc(sizeof(*xp) * count, M_TEMP, M_WAITOK | M_ZERO); 960167309Spjd 961113275Smike LIST_FOREACH(pr, &allprison, pr_list) { 962113275Smike xp->pr_version = XPRISON_VERSION; 963113275Smike xp->pr_id = pr->pr_id; 964168487Spjd xp->pr_ip = pr->pr_ip; 965113275Smike strlcpy(xp->pr_path, pr->pr_path, sizeof(xp->pr_path)); 966168487Spjd mtx_lock(&pr->pr_mtx); 967113275Smike strlcpy(xp->pr_host, pr->pr_host, sizeof(xp->pr_host)); 968113275Smike mtx_unlock(&pr->pr_mtx); 969113275Smike xp++; 970113275Smike } 971168401Spjd sx_sunlock(&allprison_lock); 972113275Smike 973113275Smike error = SYSCTL_OUT(req, sxp, sizeof(*sxp) * count); 974113275Smike free(sxp, M_TEMP); 975167354Spjd return (error); 976113275Smike} 977113275Smike 978113275SmikeSYSCTL_OID(_security_jail, OID_AUTO, list, CTLTYPE_STRUCT | CTLFLAG_RD, 979113275Smike NULL, 0, sysctl_jail_list, "S", "List of active jails"); 980126004Spjd 981126004Spjdstatic int 982126004Spjdsysctl_jail_jailed(SYSCTL_HANDLER_ARGS) 983126004Spjd{ 984126004Spjd int error, injail; 985126004Spjd 986126004Spjd injail = jailed(req->td->td_ucred); 987126004Spjd error = SYSCTL_OUT(req, &injail, sizeof(injail)); 988126004Spjd 989126004Spjd return (error); 990126004Spjd} 991126004SpjdSYSCTL_PROC(_security_jail, OID_AUTO, jailed, CTLTYPE_INT | CTLFLAG_RD, 992126004Spjd NULL, 0, sysctl_jail_jailed, "I", "Process in jail?"); 993